Commit 088a7137 by Christina Roberts

Merge pull request #2330 from edx/christina/export-commands

Export conversion script
parents 08fb4950 2ee96434
"""
Script for converting a tar.gz file representing an exported course
to the archive format used by a different version of export.
Sample invocation: ./manage.py export_convert_format mycourse.tar.gz ~/newformat/
"""
import os
from path import path
from django.core.management.base import BaseCommand, CommandError
from tempfile import mkdtemp
import tarfile
import shutil
from extract_tar import safetar_extractall
from xmodule.modulestore.xml_exporter import convert_between_versions
class Command(BaseCommand):
"""
Convert between export formats.
"""
help = 'Convert between versions 0 and 1 of the course export format'
args = '<tar.gz archive file> <output path>'
def handle(self, *args, **options):
"Execute the command"
if len(args) != 2:
raise CommandError("export requires two arguments: <tar.gz file> <output path>")
source_archive = args[0]
output_path = args[1]
# Create temp directories to extract the source and create the target archive.
temp_source_dir = mkdtemp()
temp_target_dir = mkdtemp()
try:
extract_source(source_archive, temp_source_dir)
desired_version = convert_between_versions(temp_source_dir, temp_target_dir)
# New zip up the target directory.
parts = os.path.basename(source_archive).split('.')
archive_name = path(output_path) / "{source_name}_version_{desired_version}.tar.gz".format(
source_name=parts[0], desired_version=desired_version
)
with open(archive_name, "w"):
tar_file = tarfile.open(archive_name, mode='w:gz')
try:
for item in os.listdir(temp_target_dir):
tar_file.add(path(temp_target_dir) / item, arcname=item)
finally:
tar_file.close()
print("Created archive {0}".format(archive_name))
except ValueError as err:
raise CommandError(err)
finally:
shutil.rmtree(temp_source_dir)
shutil.rmtree(temp_target_dir)
def extract_source(source_archive, target):
"""
Extract the archive into the given target directory.
"""
with tarfile.open(source_archive) as tar_file:
safetar_extractall(tar_file, target)
"""
Test for export_convert_format.
"""
from unittest import TestCase
from django.core.management import call_command, CommandError
from tempfile import mkdtemp
import shutil
from path import path
from contentstore.management.commands.export_convert_format import Command, extract_source
from xmodule.tests.helpers import directories_equal
class ConvertExportFormat(TestCase):
"""
Tests converting between export formats.
"""
def setUp(self):
""" Common setup. """
self.temp_dir = mkdtemp()
self.data_dir = path(__file__).realpath().parent / 'data'
self.version0 = self.data_dir / "Version0_drafts.tar.gz"
self.version1 = self.data_dir / "Version1_drafts.tar.gz"
self.command = Command()
def tearDown(self):
""" Common cleanup. """
shutil.rmtree(self.temp_dir)
def test_no_args(self):
""" Test error condition of no arguments. """
errstring = "export requires two arguments"
with self.assertRaisesRegexp(CommandError, errstring):
self.command.handle()
def test_version1_archive(self):
"""
Smoke test for creating a version 1 archive from a version 0.
"""
call_command('export_convert_format', self.version0, self.temp_dir)
output = path(self.temp_dir) / 'Version0_drafts_version_1.tar.gz'
self.assertTrue(self._verify_archive_equality(output, self.version1))
def test_version0_archive(self):
"""
Smoke test for creating a version 0 archive from a version 1.
"""
call_command('export_convert_format', self.version1, self.temp_dir)
output = path(self.temp_dir) / 'Version1_drafts_version_0.tar.gz'
self.assertTrue(self._verify_archive_equality(output, self.version0))
def _verify_archive_equality(self, file1, file2):
"""
Helper function for determining if 2 archives are equal.
"""
temp_dir_1 = mkdtemp()
temp_dir_2 = mkdtemp()
try:
extract_source(file1, temp_dir_1)
extract_source(file2, temp_dir_2)
return directories_equal(temp_dir_1, temp_dir_2)
finally:
shutil.rmtree(temp_dir_1)
shutil.rmtree(temp_dir_2)
...@@ -9,7 +9,14 @@ from fs.osfs import OSFS ...@@ -9,7 +9,14 @@ from fs.osfs import OSFS
from json import dumps from json import dumps
import json import json
import datetime import datetime
import os
from path import path
import shutil
DRAFT_DIR = "drafts"
PUBLISHED_DIR = "published"
EXPORT_VERSION_FILE = "format.json"
EXPORT_VERSION_KEY = "export_format"
class EdxJSONEncoder(json.JSONEncoder): class EdxJSONEncoder(json.JSONEncoder):
""" """
...@@ -95,7 +102,7 @@ def export_to_xml(modulestore, contentstore, course_location, root_dir, course_d ...@@ -95,7 +102,7 @@ def export_to_xml(modulestore, contentstore, course_location, root_dir, course_d
draft_verticals = draft_modulestore.get_items([None, course_location.org, course_location.course, draft_verticals = draft_modulestore.get_items([None, course_location.org, course_location.course,
'vertical', None, 'draft']) 'vertical', None, 'draft'])
if len(draft_verticals) > 0: if len(draft_verticals) > 0:
draft_course_dir = export_fs.makeopendir('drafts') draft_course_dir = export_fs.makeopendir(DRAFT_DIR)
for draft_vertical in draft_verticals: for draft_vertical in draft_verticals:
parent_locs = draft_modulestore.get_parent_locations(draft_vertical.location, course.location.course_id) parent_locs = draft_modulestore.get_parent_locations(draft_vertical.location, course.location.course_id)
# Don't try to export orphaned items. # Don't try to export orphaned items.
...@@ -117,3 +124,90 @@ def export_extra_content(export_fs, modulestore, course_id, course_location, cat ...@@ -117,3 +124,90 @@ def export_extra_content(export_fs, modulestore, course_id, course_location, cat
for item in items: for item in items:
with item_dir.open(item.location.name + file_suffix, 'w') as item_file: with item_dir.open(item.location.name + file_suffix, 'w') as item_file:
item_file.write(item.data.encode('utf8')) item_file.write(item.data.encode('utf8'))
def convert_between_versions(source_dir, target_dir):
"""
Converts a version 0 export format to version 1, and vice versa.
@param source_dir: the directory structure with the course export that should be converted.
The contents of source_dir will not be altered.
@param target_dir: the directory where the converted export should be written.
@return: the version number of the converted export.
"""
def convert_to_version_1():
""" Convert a version 0 archive to version 0 """
os.mkdir(copy_root)
with open(copy_root / EXPORT_VERSION_FILE, 'w') as f:
f.write('{{"{export_key}": 1}}\n'.format(export_key=EXPORT_VERSION_KEY))
# If a drafts folder exists, copy it over.
copy_drafts()
# Now copy everything into the published directory
published_dir = copy_root / PUBLISHED_DIR
shutil.copytree(path(source_dir) / course_name, published_dir)
# And delete the nested drafts directory, if it exists.
nested_drafts_dir = published_dir / DRAFT_DIR
if nested_drafts_dir.isdir():
shutil.rmtree(nested_drafts_dir)
def convert_to_version_0():
""" Convert a version 1 archive to version 0 """
# Copy everything in "published" up to the top level.
published_dir = path(source_dir) / course_name / PUBLISHED_DIR
if not published_dir.isdir():
raise ValueError("a version 1 archive must contain a published branch")
shutil.copytree(published_dir, copy_root)
# If there is a "draft" branch, copy it. All other branches are ignored.
copy_drafts()
def copy_drafts():
"""
Copy drafts directory from the old archive structure to the new.
"""
draft_dir = path(source_dir) / course_name / DRAFT_DIR
if draft_dir.isdir():
shutil.copytree(draft_dir, copy_root / DRAFT_DIR)
root = os.listdir(source_dir)
if len(root) != 1 or (path(source_dir) / root[0]).isfile():
raise ValueError("source archive does not have single course directory at top level")
course_name = root[0]
# For this version of the script, we simply convert back and forth between version 0 and 1.
original_version = get_version(path(source_dir) / course_name)
if original_version not in [0, 1]:
raise ValueError("unknown version: " + str(original_version))
desired_version = 1 if original_version is 0 else 0
copy_root = path(target_dir) / course_name
if desired_version == 1:
convert_to_version_1()
else:
convert_to_version_0()
return desired_version
def get_version(course_path):
"""
Return the export format version number for the given
archive directory structure (represented as a path instance).
If the archived file does not correspond to a known export
format, None will be returned.
"""
format_file = course_path / EXPORT_VERSION_FILE
if not format_file.isfile():
return 0
with open(format_file, "r") as f:
data = json.load(f)
if EXPORT_VERSION_KEY in data:
return data[EXPORT_VERSION_KEY]
return None
"""
Utility methods for unit tests.
"""
import filecmp
from path import path
def directories_equal(directory1, directory2):
"""
Returns True if the 2 directories have equal content, else false.
"""
def compare_dirs(dir1, dir2):
""" Compare directories for equality. """
comparison = filecmp.dircmp(dir1, dir2)
if (len(comparison.left_only) > 0) or (len(comparison.right_only) > 0):
return False
if (len(comparison.funny_files) > 0) or (len(comparison.diff_files) > 0):
return False
for subdir in comparison.subdirs:
if not compare_dirs(dir1 / subdir, dir2 / subdir):
return False
return True
return compare_dirs(path(directory1), path(directory2))
...@@ -12,11 +12,17 @@ import mock ...@@ -12,11 +12,17 @@ import mock
import pytz import pytz
from fs.osfs import OSFS from fs.osfs import OSFS
from path import path from path import path
import uuid
import tarfile
import os
from xmodule.modulestore import Location from xmodule.modulestore import Location
from xmodule.modulestore.xml import XMLModuleStore from xmodule.modulestore.xml import XMLModuleStore
from xmodule.modulestore.xml_exporter import EdxJSONEncoder from xmodule.modulestore.xml_exporter import (
EdxJSONEncoder, convert_between_versions, get_version
)
from xmodule.tests import DATA_DIR from xmodule.tests import DATA_DIR
from xmodule.tests.helpers import directories_equal
def strip_filenames(descriptor): def strip_filenames(descriptor):
...@@ -195,3 +201,132 @@ class TestEdxJsonEncoder(unittest.TestCase): ...@@ -195,3 +201,132 @@ class TestEdxJsonEncoder(unittest.TestCase):
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
self.encoder.default({}) self.encoder.default({})
class ConvertExportFormat(unittest.TestCase):
"""
Tests converting between export formats.
"""
def setUp(self):
""" Common setup. """
# Directory for expanding all the test archives
self.temp_dir = mkdtemp()
# Directory where new archive will be created
self.result_dir = path(self.temp_dir) / uuid.uuid4().hex
os.mkdir(self.result_dir)
# Expand all the test archives and store their paths.
self.data_dir = path(__file__).realpath().parent / 'data'
self.version0_nodrafts = self._expand_archive('Version0_nodrafts.tar.gz')
self.version1_nodrafts = self._expand_archive('Version1_nodrafts.tar.gz')
self.version0_drafts = self._expand_archive('Version0_drafts.tar.gz')
self.version1_drafts = self._expand_archive('Version1_drafts.tar.gz')
self.version1_drafts_extra_branch = self._expand_archive('Version1_drafts_extra_branch.tar.gz')
self.no_version = self._expand_archive('NoVersionNumber.tar.gz')
def tearDown(self):
""" Common cleanup. """
shutil.rmtree(self.temp_dir)
def _expand_archive(self, name):
""" Expand archive into a directory and return the directory. """
target = path(self.temp_dir) / uuid.uuid4().hex
os.mkdir(target)
with tarfile.open(self.data_dir / name) as tar_file:
tar_file.extractall(path=target)
return target
def test_no_version(self):
""" Test error condition of no version number specified. """
errstring = "unknown version"
with self.assertRaisesRegexp(ValueError, errstring):
convert_between_versions(self.no_version, self.result_dir)
def test_no_published(self):
""" Test error condition of a version 1 archive with no published branch. """
errstring = "version 1 archive must contain a published branch"
no_published = self._expand_archive('Version1_nopublished.tar.gz')
with self.assertRaisesRegexp(ValueError, errstring):
convert_between_versions(no_published, self.result_dir)
def test_empty_course(self):
""" Test error condition of a version 1 archive with no published branch. """
errstring = "source archive does not have single course directory at top level"
empty_course = self._expand_archive('EmptyCourse.tar.gz')
with self.assertRaisesRegexp(ValueError, errstring):
convert_between_versions(empty_course, self.result_dir)
def test_convert_to_1_nodrafts(self):
"""
Test for converting from version 0 of export format to version 1 in a course with no drafts.
"""
self._verify_conversion(self.version0_nodrafts, self.version1_nodrafts)
def test_convert_to_1_drafts(self):
"""
Test for converting from version 0 of export format to version 1 in a course with drafts.
"""
self._verify_conversion(self.version0_drafts, self.version1_drafts)
def test_convert_to_0_nodrafts(self):
"""
Test for converting from version 1 of export format to version 0 in a course with no drafts.
"""
self._verify_conversion(self.version1_nodrafts, self.version0_nodrafts)
def test_convert_to_0_drafts(self):
"""
Test for converting from version 1 of export format to version 0 in a course with drafts.
"""
self._verify_conversion(self.version1_drafts, self.version0_drafts)
def test_convert_to_0_extra_branch(self):
"""
Test for converting from version 1 of export format to version 0 in a course
with drafts and an extra branch.
"""
self._verify_conversion(self.version1_drafts_extra_branch, self.version0_drafts)
def test_equality_function(self):
"""
Check equality function returns False for unequal directories.
"""
self.assertFalse(directories_equal(self.version1_nodrafts, self.version0_nodrafts))
self.assertFalse(directories_equal(self.version1_drafts_extra_branch, self.version1_drafts))
def test_version_0(self):
"""
Check that get_version correctly identifies a version 0 archive (old format).
"""
self.assertEqual(0, self._version_test(self.version0_nodrafts))
def test_version_1(self):
"""
Check that get_version correctly identifies a version 1 archive (new format).
"""
self.assertEqual(1, self._version_test(self.version1_nodrafts))
def test_version_missing(self):
"""
Check that get_version returns None if no version number is specified,
and the archive is not version 0.
"""
self.assertIsNone(self._version_test(self.no_version))
def _version_test(self, archive_dir):
"""
Helper function for version tests.
"""
root = os.listdir(archive_dir)
course_directory = archive_dir / root[0]
return get_version(course_directory)
def _verify_conversion(self, source_archive, comparison_archive):
"""
Helper function for conversion tests.
"""
convert_between_versions(source_archive, self.result_dir)
self.assertTrue(directories_equal(self.result_dir, comparison_archive))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment