Commit 088a7137 by Christina Roberts

Merge pull request #2330 from edx/christina/export-commands

Export conversion script
parents 08fb4950 2ee96434
"""
Script for converting a tar.gz file representing an exported course
to the archive format used by a different version of export.
Sample invocation: ./manage.py export_convert_format mycourse.tar.gz ~/newformat/
"""
import os
from path import path
from django.core.management.base import BaseCommand, CommandError
from tempfile import mkdtemp
import tarfile
import shutil
from extract_tar import safetar_extractall
from xmodule.modulestore.xml_exporter import convert_between_versions
class Command(BaseCommand):
"""
Convert between export formats.
"""
help = 'Convert between versions 0 and 1 of the course export format'
args = '<tar.gz archive file> <output path>'
def handle(self, *args, **options):
"Execute the command"
if len(args) != 2:
raise CommandError("export requires two arguments: <tar.gz file> <output path>")
source_archive = args[0]
output_path = args[1]
# Create temp directories to extract the source and create the target archive.
temp_source_dir = mkdtemp()
temp_target_dir = mkdtemp()
try:
extract_source(source_archive, temp_source_dir)
desired_version = convert_between_versions(temp_source_dir, temp_target_dir)
# New zip up the target directory.
parts = os.path.basename(source_archive).split('.')
archive_name = path(output_path) / "{source_name}_version_{desired_version}.tar.gz".format(
source_name=parts[0], desired_version=desired_version
)
with open(archive_name, "w"):
tar_file = tarfile.open(archive_name, mode='w:gz')
try:
for item in os.listdir(temp_target_dir):
tar_file.add(path(temp_target_dir) / item, arcname=item)
finally:
tar_file.close()
print("Created archive {0}".format(archive_name))
except ValueError as err:
raise CommandError(err)
finally:
shutil.rmtree(temp_source_dir)
shutil.rmtree(temp_target_dir)
def extract_source(source_archive, target):
"""
Extract the archive into the given target directory.
"""
with tarfile.open(source_archive) as tar_file:
safetar_extractall(tar_file, target)
"""
Test for export_convert_format.
"""
from unittest import TestCase
from django.core.management import call_command, CommandError
from tempfile import mkdtemp
import shutil
from path import path
from contentstore.management.commands.export_convert_format import Command, extract_source
from xmodule.tests.helpers import directories_equal
class ConvertExportFormat(TestCase):
"""
Tests converting between export formats.
"""
def setUp(self):
""" Common setup. """
self.temp_dir = mkdtemp()
self.data_dir = path(__file__).realpath().parent / 'data'
self.version0 = self.data_dir / "Version0_drafts.tar.gz"
self.version1 = self.data_dir / "Version1_drafts.tar.gz"
self.command = Command()
def tearDown(self):
""" Common cleanup. """
shutil.rmtree(self.temp_dir)
def test_no_args(self):
""" Test error condition of no arguments. """
errstring = "export requires two arguments"
with self.assertRaisesRegexp(CommandError, errstring):
self.command.handle()
def test_version1_archive(self):
"""
Smoke test for creating a version 1 archive from a version 0.
"""
call_command('export_convert_format', self.version0, self.temp_dir)
output = path(self.temp_dir) / 'Version0_drafts_version_1.tar.gz'
self.assertTrue(self._verify_archive_equality(output, self.version1))
def test_version0_archive(self):
"""
Smoke test for creating a version 0 archive from a version 1.
"""
call_command('export_convert_format', self.version1, self.temp_dir)
output = path(self.temp_dir) / 'Version1_drafts_version_0.tar.gz'
self.assertTrue(self._verify_archive_equality(output, self.version0))
def _verify_archive_equality(self, file1, file2):
"""
Helper function for determining if 2 archives are equal.
"""
temp_dir_1 = mkdtemp()
temp_dir_2 = mkdtemp()
try:
extract_source(file1, temp_dir_1)
extract_source(file2, temp_dir_2)
return directories_equal(temp_dir_1, temp_dir_2)
finally:
shutil.rmtree(temp_dir_1)
shutil.rmtree(temp_dir_2)
......@@ -9,7 +9,14 @@ from fs.osfs import OSFS
from json import dumps
import json
import datetime
import os
from path import path
import shutil
DRAFT_DIR = "drafts"
PUBLISHED_DIR = "published"
EXPORT_VERSION_FILE = "format.json"
EXPORT_VERSION_KEY = "export_format"
class EdxJSONEncoder(json.JSONEncoder):
"""
......@@ -95,7 +102,7 @@ def export_to_xml(modulestore, contentstore, course_location, root_dir, course_d
draft_verticals = draft_modulestore.get_items([None, course_location.org, course_location.course,
'vertical', None, 'draft'])
if len(draft_verticals) > 0:
draft_course_dir = export_fs.makeopendir('drafts')
draft_course_dir = export_fs.makeopendir(DRAFT_DIR)
for draft_vertical in draft_verticals:
parent_locs = draft_modulestore.get_parent_locations(draft_vertical.location, course.location.course_id)
# Don't try to export orphaned items.
......@@ -117,3 +124,90 @@ def export_extra_content(export_fs, modulestore, course_id, course_location, cat
for item in items:
with item_dir.open(item.location.name + file_suffix, 'w') as item_file:
item_file.write(item.data.encode('utf8'))
def convert_between_versions(source_dir, target_dir):
"""
Converts a version 0 export format to version 1, and vice versa.
@param source_dir: the directory structure with the course export that should be converted.
The contents of source_dir will not be altered.
@param target_dir: the directory where the converted export should be written.
@return: the version number of the converted export.
"""
def convert_to_version_1():
""" Convert a version 0 archive to version 0 """
os.mkdir(copy_root)
with open(copy_root / EXPORT_VERSION_FILE, 'w') as f:
f.write('{{"{export_key}": 1}}\n'.format(export_key=EXPORT_VERSION_KEY))
# If a drafts folder exists, copy it over.
copy_drafts()
# Now copy everything into the published directory
published_dir = copy_root / PUBLISHED_DIR
shutil.copytree(path(source_dir) / course_name, published_dir)
# And delete the nested drafts directory, if it exists.
nested_drafts_dir = published_dir / DRAFT_DIR
if nested_drafts_dir.isdir():
shutil.rmtree(nested_drafts_dir)
def convert_to_version_0():
""" Convert a version 1 archive to version 0 """
# Copy everything in "published" up to the top level.
published_dir = path(source_dir) / course_name / PUBLISHED_DIR
if not published_dir.isdir():
raise ValueError("a version 1 archive must contain a published branch")
shutil.copytree(published_dir, copy_root)
# If there is a "draft" branch, copy it. All other branches are ignored.
copy_drafts()
def copy_drafts():
"""
Copy drafts directory from the old archive structure to the new.
"""
draft_dir = path(source_dir) / course_name / DRAFT_DIR
if draft_dir.isdir():
shutil.copytree(draft_dir, copy_root / DRAFT_DIR)
root = os.listdir(source_dir)
if len(root) != 1 or (path(source_dir) / root[0]).isfile():
raise ValueError("source archive does not have single course directory at top level")
course_name = root[0]
# For this version of the script, we simply convert back and forth between version 0 and 1.
original_version = get_version(path(source_dir) / course_name)
if original_version not in [0, 1]:
raise ValueError("unknown version: " + str(original_version))
desired_version = 1 if original_version is 0 else 0
copy_root = path(target_dir) / course_name
if desired_version == 1:
convert_to_version_1()
else:
convert_to_version_0()
return desired_version
def get_version(course_path):
"""
Return the export format version number for the given
archive directory structure (represented as a path instance).
If the archived file does not correspond to a known export
format, None will be returned.
"""
format_file = course_path / EXPORT_VERSION_FILE
if not format_file.isfile():
return 0
with open(format_file, "r") as f:
data = json.load(f)
if EXPORT_VERSION_KEY in data:
return data[EXPORT_VERSION_KEY]
return None
"""
Utility methods for unit tests.
"""
import filecmp
from path import path
def directories_equal(directory1, directory2):
"""
Returns True if the 2 directories have equal content, else false.
"""
def compare_dirs(dir1, dir2):
""" Compare directories for equality. """
comparison = filecmp.dircmp(dir1, dir2)
if (len(comparison.left_only) > 0) or (len(comparison.right_only) > 0):
return False
if (len(comparison.funny_files) > 0) or (len(comparison.diff_files) > 0):
return False
for subdir in comparison.subdirs:
if not compare_dirs(dir1 / subdir, dir2 / subdir):
return False
return True
return compare_dirs(path(directory1), path(directory2))
......@@ -12,11 +12,17 @@ import mock
import pytz
from fs.osfs import OSFS
from path import path
import uuid
import tarfile
import os
from xmodule.modulestore import Location
from xmodule.modulestore.xml import XMLModuleStore
from xmodule.modulestore.xml_exporter import EdxJSONEncoder
from xmodule.modulestore.xml_exporter import (
EdxJSONEncoder, convert_between_versions, get_version
)
from xmodule.tests import DATA_DIR
from xmodule.tests.helpers import directories_equal
def strip_filenames(descriptor):
......@@ -195,3 +201,132 @@ class TestEdxJsonEncoder(unittest.TestCase):
with self.assertRaises(TypeError):
self.encoder.default({})
class ConvertExportFormat(unittest.TestCase):
"""
Tests converting between export formats.
"""
def setUp(self):
""" Common setup. """
# Directory for expanding all the test archives
self.temp_dir = mkdtemp()
# Directory where new archive will be created
self.result_dir = path(self.temp_dir) / uuid.uuid4().hex
os.mkdir(self.result_dir)
# Expand all the test archives and store their paths.
self.data_dir = path(__file__).realpath().parent / 'data'
self.version0_nodrafts = self._expand_archive('Version0_nodrafts.tar.gz')
self.version1_nodrafts = self._expand_archive('Version1_nodrafts.tar.gz')
self.version0_drafts = self._expand_archive('Version0_drafts.tar.gz')
self.version1_drafts = self._expand_archive('Version1_drafts.tar.gz')
self.version1_drafts_extra_branch = self._expand_archive('Version1_drafts_extra_branch.tar.gz')
self.no_version = self._expand_archive('NoVersionNumber.tar.gz')
def tearDown(self):
""" Common cleanup. """
shutil.rmtree(self.temp_dir)
def _expand_archive(self, name):
""" Expand archive into a directory and return the directory. """
target = path(self.temp_dir) / uuid.uuid4().hex
os.mkdir(target)
with tarfile.open(self.data_dir / name) as tar_file:
tar_file.extractall(path=target)
return target
def test_no_version(self):
""" Test error condition of no version number specified. """
errstring = "unknown version"
with self.assertRaisesRegexp(ValueError, errstring):
convert_between_versions(self.no_version, self.result_dir)
def test_no_published(self):
""" Test error condition of a version 1 archive with no published branch. """
errstring = "version 1 archive must contain a published branch"
no_published = self._expand_archive('Version1_nopublished.tar.gz')
with self.assertRaisesRegexp(ValueError, errstring):
convert_between_versions(no_published, self.result_dir)
def test_empty_course(self):
""" Test error condition of a version 1 archive with no published branch. """
errstring = "source archive does not have single course directory at top level"
empty_course = self._expand_archive('EmptyCourse.tar.gz')
with self.assertRaisesRegexp(ValueError, errstring):
convert_between_versions(empty_course, self.result_dir)
def test_convert_to_1_nodrafts(self):
"""
Test for converting from version 0 of export format to version 1 in a course with no drafts.
"""
self._verify_conversion(self.version0_nodrafts, self.version1_nodrafts)
def test_convert_to_1_drafts(self):
"""
Test for converting from version 0 of export format to version 1 in a course with drafts.
"""
self._verify_conversion(self.version0_drafts, self.version1_drafts)
def test_convert_to_0_nodrafts(self):
"""
Test for converting from version 1 of export format to version 0 in a course with no drafts.
"""
self._verify_conversion(self.version1_nodrafts, self.version0_nodrafts)
def test_convert_to_0_drafts(self):
"""
Test for converting from version 1 of export format to version 0 in a course with drafts.
"""
self._verify_conversion(self.version1_drafts, self.version0_drafts)
def test_convert_to_0_extra_branch(self):
"""
Test for converting from version 1 of export format to version 0 in a course
with drafts and an extra branch.
"""
self._verify_conversion(self.version1_drafts_extra_branch, self.version0_drafts)
def test_equality_function(self):
"""
Check equality function returns False for unequal directories.
"""
self.assertFalse(directories_equal(self.version1_nodrafts, self.version0_nodrafts))
self.assertFalse(directories_equal(self.version1_drafts_extra_branch, self.version1_drafts))
def test_version_0(self):
"""
Check that get_version correctly identifies a version 0 archive (old format).
"""
self.assertEqual(0, self._version_test(self.version0_nodrafts))
def test_version_1(self):
"""
Check that get_version correctly identifies a version 1 archive (new format).
"""
self.assertEqual(1, self._version_test(self.version1_nodrafts))
def test_version_missing(self):
"""
Check that get_version returns None if no version number is specified,
and the archive is not version 0.
"""
self.assertIsNone(self._version_test(self.no_version))
def _version_test(self, archive_dir):
"""
Helper function for version tests.
"""
root = os.listdir(archive_dir)
course_directory = archive_dir / root[0]
return get_version(course_directory)
def _verify_conversion(self, source_archive, comparison_archive):
"""
Helper function for conversion tests.
"""
convert_between_versions(source_archive, self.result_dir)
self.assertTrue(directories_equal(self.result_dir, comparison_archive))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment