Commit 2018acbc by Christine Lytwynec

use pip-accel if it is available, get .pip/download-cache from S3

add script to upload/download pip cache directories from S3

update all-tests.sh to use pip-download-cache from S3

update pip-accel to version 0.22
parent 708fd87b
......@@ -6,6 +6,7 @@ import os
import hashlib
from distutils import sysconfig
from paver.easy import *
import subprocess
from .utils.envs import Env
......@@ -133,8 +134,24 @@ def python_prereqs_installation():
"""
Installs Python prerequisites
"""
try:
subprocess.check_call(
"pip-accel --version",
stdout=file("/dev/null"),
stderr=file("/dev/null"),
shell=True
)
executable = 'pip-accel'
except subprocess.CalledProcessError:
executable = 'pip'
for req_file in PYTHON_REQ_FILES:
sh("pip install -q --exists-action w -r {req_file}".format(req_file=req_file))
sh("{ex} install -q --exists-action=w -r {req_file}".format(
ex=executable,
req_file=req_file,
)
)
@task
......
......@@ -5,3 +5,7 @@ lazy==1.1
path.py==3.0.1
watchdog==0.7.1
python-memcached
# Requirements to run paver with pip-accel
-e git+https://github.com/jzoldak/pip.git@v1.4.1patch772#egg=pip
pip-accel[s3]==0.22
......@@ -94,8 +94,22 @@ if [ -e $HOME/edx-venv_clean.tar.gz ]; then
tar -C $HOME -xf $HOME/edx-venv_clean.tar.gz
fi
# Activate the Python virtualenv
source $HOME/edx-venv/bin/activate
# Activate a new Python virtualenv
virtualenv $HOME/edx-venv-$GIT_COMMIT
source $HOME/edx-venv-$GIT_COMMIT/bin/activate
# boto and path.py are requirements of scripts/pip_cache_store.py which is used
# to download the pip download cache from S3.
# We are installing just boto and path.py here to avoid installing all of base.txt
# and paver.txt before getting the download cache. If versions of these are changed
# in requirements files, they will be updated by install_prereqs.
pip install -q boto>=2.32.1 path.py>=3.0.1
# Download the pip-download-cache
python scripts/pip_cache_store.py download -b edx-platform.dependency-cache -f v1/master -d $HOME/.pip/download-cache/ -t $HOME/pip-download-cache.tar.gz
# Now install paver requirements
pip install -q -r requirements/edx/paver.txt
# If the environment variable 'SHARD' is not set, default to 'all'.
# This could happen if you are trying to use this script from
......@@ -122,7 +136,15 @@ case "$TEST_SUITE" in
<testcase classname="quality" name="quality" time="0.604"></testcase>
</testsuite>
END
exit $EXIT
exitcode=$EXIT
# Update the pip-download-cache.tar.gz in S3 if JOB_NAME starts with "edx-all-tests-auto-master/"
# (for old jenkins) or "edx-platform-all-tests-master/" (for new jenkins).
# The JOB_NAME is something along the lines of "edx-all-tests-auto-master/SHARD=1,TEST_SUITE=quality".
if [[ ${JOB_NAME} == 'edx-all-tests-auto-master/'* ]] || [[ ${JOB_NAME} == 'edx-platform-all-tests-master/'* ]]; then
python scripts/pip_cache_store.py upload -b edx-platform.dependency-cache -f v1/master -d $HOME/.pip/download-cache/ -t $HOME/pip-download-cache.tar.gz
fi
exit $exitcode
;;
"unit")
......@@ -202,3 +224,8 @@ END
;;
esac
# Deactivate and clean up python virtualenv
deactivate
rm -r $HOME/edx-venv-$GIT_COMMIT
#!/usr/bin/env python
"""
This script is intended to be used to store the ~/.pip/download-cache
directory in S3. The primary use case, as of this writing, is to help
speed up Jenkins build times for edx-platform tests.
Before running pip-accel install (or pip install) on a Jenkins worker,
this directory will be downloaded from S3.
For usage: `python pip_cache_store.py -h`.
"""
import argparse
from boto.s3.connection import S3Connection
from boto.exception import S3ResponseError
import os
from path import path
import sys
import tarfile
class S3TarStore():
"""
Static methods for storing directories in S3 in tar.gz form.
"""
def __init__(self, *args, **kwargs):
self.dirpath = kwargs['dirpath']
self.tarpath = kwargs['tarpath']
self.bucket_name = kwargs['bucket_name']
self.keyname = path(kwargs['bucket_folder']) / self.tarpath.basename()
@staticmethod
def bucket(bucket_name):
"""
Returns bucket matching name. If there exists no such bucket
or there is an exception raised, then `None` is returned.
"""
try:
conn = S3Connection()
bucket = conn.get_bucket(bucket_name)
except S3ResponseError:
print (
"Please check that the bucket {} exists and that you have "
"the proper credentials to access it.".format(bucket_name)
)
return None
except Exception as e:
print (
"There was an error while connecting to S3. "
"Please check error log for more details."
)
sys.stderr.write(e.message)
return None
if not bucket:
print "No such bucket {}.".format(self.bucket_name)
return bucket
@staticmethod
def download_dir(bucket, tarpath, dirpath, keyname):
"""
Downloads a file matching `keyname` from `bucket`
to `tarpath`. It then extracts the tar.gz file into `dirpath`.
If no matching `keyname` is found, it does nothing.
Note that any exceptions that occur while downloading or unpacking
will be logged, but not raised.
"""
key = bucket.lookup(keyname)
if key:
try:
print "Downloading contents of {} from S3.".format(keyname)
key.get_contents_to_filename(tarpath)
with tarfile.open(tarpath, mode="r:gz") as tar:
print "Unpacking {} to {}".format(tarpath, dirpath)
tar.extractall(path=dirpath.parent)
except Exception as e:
print ("Ignored Exception:\n {}".format(e.message))
else:
print (
"Couldn't find anything matching {} in S3 bucket. "
"Doing Nothing.".format(keyname)
)
@staticmethod
def upload_dir(bucket, tarpath, dirpath, keyname):
"""
Packs the contents of `dirpath` into a tar.gz file named
`tarpath.basename()`. It then uploads the tar.gz file to `bucket`
as `keyname`. If `dirpath` is not a directory, it does nothing.
Note that any exceptions that occur while compressing or uploading
will be logged, but not raised.
# """
if dirpath.isdir():
try:
with tarfile.open(tarpath, "w:gz") as tar:
print "Packing up {} to {}".format(dirpath, tarpath)
tar.add(dirpath, arcname='/')
print "Uploading {} to S3 bucket.".format(keyname)
existing_key = bucket.lookup(keyname)
key = existing_key if existing_key else bucket.new_key(keyname)
key.set_contents_from_filename(tarpath)
except Exception as e:
print ("Ignored Exception:\n {}".format(e.message))
sys.stderr.write(e.message)
else:
"Path {} isn't a directory. Doing Nothing.".format(dirname)
def download(self):
"""
Checks that bucket is available and downloads self.keyname to self.tarpath.
Then extracts self.tarpath to self.dirpath.
"""
bucket = self.bucket(self.bucket_name)
if not bucket:
return
self.download_dir(bucket, self.tarpath, self.dirpath, self.keyname)
def upload(self):
"""
Checks that bucket is available. Then compresses self.dirpath to self.tarpath
and uploads self.tarpath to self.keyname.
"""
bucket = self.bucket(self.bucket_name)
if not bucket:
return
self.upload_dir(bucket, self.tarpath, self.dirpath, self.keyname)
def main():
"""
Calls S3TarStore.upload or S3TarStore.download using the command line args.
"""
parser = argparse.ArgumentParser(description='Upload/download tar.gz files to/from S3.')
parser.add_argument('action', choices=('upload', 'download'))
parser.add_argument('--bucket', '-b', dest='bucket_name', required=True,
help='Name of S3 bucket.')
parser.add_argument('--folder', '-f', dest='bucket_folder', required=True,
help='Folder within S3 bucket. (ex. "v1/my-branch-name/")')
parser.add_argument('--dir', '-d', dest='dirpath', required=True,
help='Directory to be uploaded from or downloaded to. '
'(ex. "~/.pip/download-cache/")')
parser.add_argument('--tar', '-t', dest='tarpath', required=True,
help='Path to place newly created or downloaded tarfile. '
'The basename of this should be the basename of the tarfile '
'stored in S3. (ex. "~/pip-download-cache.tar.gz")')
args = parser.parse_args()
store = S3TarStore(
dirpath = path(args.dirpath),
tarpath = path(args.tarpath),
bucket_name = args.bucket_name,
bucket_folder = args.bucket_folder,
)
if args.action == 'upload':
store.upload()
elif args.action == 'download':
store.download()
if __name__ == '__main__':
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment