Commit f6de951f by Jesse Zoldak

WIP file upload and download [no-ci]

parent c4d92b8b
......@@ -3,6 +3,7 @@ tasks for controlling the databases used in tests
"""
from __future__ import print_function
import os
import tarfile
from paver.easy import sh, needs
import boto
......@@ -30,7 +31,8 @@ MIGRATION_OUTPUT_FILES = [
ALL_DB_FILES = BOKCHOY_DB_FILES + MIGRATION_OUTPUT_FILES
CACHE_BUCKET_NAME = 'edx-tools-database-caches'
FINGERPRINT_FILEPATH = '{}/common/test/db_cache/bokchoy_migrations.sha1'.format(Env.REPO_ROOT)
CACHE_FOLDER = 'common/test/db_cache'
FINGERPRINT_FILEPATH = '{}/{}/bokchoy_migrations.sha1'.format(Env.REPO_ROOT, CACHE_FOLDER)
def remove_cached_db_files():
......@@ -39,7 +41,7 @@ def remove_cached_db_files():
for db_file in BOKCHOY_DB_FILES:
try:
db_file_path = os.path.join(
'{}/common/test/db_cache'.format(Env.REPO_ROOT), db_file
'{}/{}'.format(Env.REPO_ROOT, CACHE_FOLDER), db_file
)
os.remove(db_file_path)
print('\tRemoved {}'.format(db_file_path))
......@@ -48,28 +50,50 @@ def remove_cached_db_files():
continue
def verify_files_were_created(files):
"""
Verify that the files were created.
This will help notice/prevent breakages due to
changes to the bash script file.
"""
for file in files:
file_path = os.path.join(CACHE_FOLDER, file)
assert os.path.isfile(file_path)
def apply_migrations_and_create_cache_files():
"""
Apply migrations to the test database and create the cache files.
"""
sh('{}/scripts/reset-test-db.sh'.format(Env.REPO_ROOT))
verify_files_were_created(BOKCHOY_DB_FILES)
def calculate_bokchoy_migrations():
"""
Run the calculate-bokchoy-migrations script, which will generate two
yml files. These tell whether or not we need to run migrations.
"""
sh('{}/scripts/calculate-bokchoy-migrations.sh'.format(Env.REPO_ROOT))
verify_files_were_created(MIGRATION_OUTPUT_FILES)
def fingerprint_bokchoy_db_files():
"""
Generate a sha1 checksum for files used to configure the bokchoy databases.
This checksum will represent the current 'state' of the databases,
including schema, migrations to be run and data. It can be used to determine
if the databases need to be updated.
including schema, migrations to be run, and data. It can be used to
determine if the databases need to be updated.
WARNING: this will give different results depending on whether the
bokchoy database has been flushed or not.
"""
calculate_bokchoy_migrations()
file_paths = [
os.path.join('common/test/db_cache', db_file) for db_file in ALL_DB_FILES
os.path.join(CACHE_FOLDER, db_file) for db_file in ALL_DB_FILES
]
fingerprint = compute_fingerprint(file_paths)
print("Computed fingerprint for bokchoy db files: {}".format(fingerprint))
print("The fingerprint for bokchoy db files is: {}".format(fingerprint))
return fingerprint
......@@ -82,27 +106,66 @@ def update_bokchoy_db_cache():
* Remove any previously cached database files
* Apply migrations on a fresh db
* Write the collective sha1 checksum for all of these files to disk
WARNING: this method will remove your current cached files
and apply migrations, which could take several minutes.
"""
remove_cached_db_files()
apply_migrations_and_create_cache_files()
# Apply migrations to the test database and create the cache files
sh('{}/scripts/reset-test-db.sh'.format(Env.REPO_ROOT))
# Write the fingerprint of the database files to disk for use in future
# comparisons
# Write the fingerprint of the database files to disk for use
# in future comparisons.
fingerprint = fingerprint_bokchoy_db_files()
with open(FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
fingerprint_file.write(fingerprint)
def extract_bokchoy_db_cache_files(files=BOKCHOY_DB_FILES, path=CACHE_FOLDER):
""" Extract the files retrieved from S3."""
remove_cached_db_files()
with tarfile.open(name=path, mode='r') as tar_file:
for name in files:
tar_file.extract(name=name, path=path)
verify_files_were_created(BOKCHOY_DB_FILES)
def get_bokchoy_db_cache_from_s3(fingerprint, bucket_name=CACHE_BUCKET_NAME, path=CACHE_FOLDER):
"""
Retrieve the zip file with the fingerprint
"""
zipfile_name = '{}.tar.gz'.format(fingerprint)
zipfile_path = os.path.join(path, zipfile)
conn = boto.connect_s3()
bucket = conn.get_bucket(bucket_name)
key = boto.s3.key.Key(bucket=bucket_name, name=zipfile_name)
assert key.exists()
with open(zipfile_path, 'w') as zipfile:
key.get_contents_to_file(zipfile)
extract_bokchoy_db_cache_files()
def create_tarfile(fingerprint, files=BOKCHOY_DB_FILES, path=CACHE_FOLDER):
""" Create a tar.gz file with the current bokchoy DB cache files."""
zipfile_name = '{}.tar.gz'.format(fingerprint)
zipfile_path = os.path.join(path, zipfile_name)
with tarfile.open(name=zipfile_path, mode='w:gz') as tar_file:
for name in files:
tarfile.add(name)
def is_fingerprint_in_bucket(fingerprint, bucket_name=CACHE_BUCKET_NAME):
"""
Test if a zip file matching the given fingerprint is present within an s3 bucket
"""
zipfile_name = '{}.tar.gz'.format(fingerprint)
conn = boto.connect_s3()
bucket = conn.get_bucket(bucket_name)
zip_present = "{}.zip".format(fingerprint) in [
k.name for k in bucket.get_all_keys()
]
key = boto.s3.key.Key(bucket=bucket, name=zipfile_name)
zip_present = key.exists()
msg = "a match in the {} bucket.".format(bucket_name)
if zip_present:
print("Found {}".format(msg))
......@@ -111,19 +174,22 @@ def is_fingerprint_in_bucket(fingerprint, bucket_name=CACHE_BUCKET_NAME):
return zip_present
def compare_bokchoy_db_fingerprints():
def get_bokchoy_db_fingerprint_from_file():
""" Return the value recorded in the fingerprint file."""
try:
with open(FINGERPRINT_FILEPATH, 'r') as fingerprint_file:
cached_fingerprint = fingerprint_file.read().strip()
except IOError:
return None
return cached_fingerprint
def do_fingerprints_match():
"""
Determine if the current state of the bokchoy databases and related files
have changed since the last time they were updated in the repository by
comparing their fingerprint to the fingerprint saved in the repo.
Returns:
True if the fingerprint can be read off disk and matches, False otherwise.
"""
try:
with open(FINGERPRINT_FILEPATH, 'r') as fingerprint_file:
cached_fingerprint = fingerprint_file.read().strip()
except IOError:
return False
current_fingerprint = fingerprint_bokchoy_db_files()
cached_fingerprint = get_bokchoy_db_fingerprint_from_file()
return current_fingerprint == cached_fingerprint
......@@ -14,16 +14,16 @@ class TestPaverDatabaseTasks(MockS3Mixin, TestCase):
"""Tests for the Database cache file manipulation."""
def setUp(self):
super(TestPaverDatabaseTasks, self).setUp()
self.conn = boto.connect_s3()
self.conn.create_bucket('moto_test_bucket')
self.bucket = self.conn.get_bucket('moto_test_bucket')
conn = boto.connect_s3()
conn.create_bucket('moto_test_bucket')
self.bucket = conn.get_bucket('moto_test_bucket')
def test_fingerprint_in_bucket(self):
key = boto.s3.key.Key(bucket=self.bucket, name='testfile.zip')
key = boto.s3.key.Key(bucket=self.bucket, name='testfile.tar.gz')
key.set_contents_from_string('this is a test')
self.assertTrue(is_fingerprint_in_bucket('testfile', 'moto_test_bucket'))
def test_fingerprint_not_in_bucket(self):
key = boto.s3.key.Key(bucket=self.bucket, name='testfile.zip')
key = boto.s3.key.Key(bucket=self.bucket, name='testfile.tar.gz')
key.set_contents_from_string('this is a test')
self.assertFalse(is_fingerprint_in_bucket('otherfile', 'moto_test_bucket'))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment