Commit 84992cdf by Nimisha Asthagiri Committed by Don Mitchell

Refactor xml_importer.py for easier reading.

Remove post-publish step.
parent 47851c50
......@@ -40,7 +40,7 @@ class Command(BaseCommand):
dis=do_import_static))
mstore = modulestore()
_, course_items = import_from_xml(
course_items = import_from_xml(
mstore, ModuleStoreEnum.UserID.mgmt_command, data_dir, course_dirs, load_error_modules=False,
static_content_store=contentstore(), verbose=True,
do_import_static=do_import_static,
......
......@@ -89,7 +89,7 @@ class ContentStoreToyCourseTest(ContentStoreTestCase):
component_types should cause 'Video' to be present.
"""
store = self.store
_, course_items = import_from_xml(store, self.user.id, 'common/test/data/', ['simple'])
course_items = import_from_xml(store, self.user.id, 'common/test/data/', ['simple'])
course = course_items[0]
course.advanced_modules = component_types
store.update_item(course, self.user.id)
......@@ -116,7 +116,7 @@ class ContentStoreToyCourseTest(ContentStoreTestCase):
def test_malformed_edit_unit_request(self):
store = self.store
_, course_items = import_from_xml(store, self.user.id, 'common/test/data/', ['simple'])
course_items = import_from_xml(store, self.user.id, 'common/test/data/', ['simple'])
# just pick one vertical
usage_key = course_items[0].id.make_usage_key('vertical', None)
......@@ -126,7 +126,7 @@ class ContentStoreToyCourseTest(ContentStoreTestCase):
def check_edit_unit(self, test_course_name):
"""Verifies the editing HTML in all the verticals in the given test course"""
_, course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', [test_course_name])
course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', [test_course_name])
items = self.store.get_items(course_items[0].id, qualifiers={'category': 'vertical'})
self._check_verticals(items)
......@@ -148,7 +148,7 @@ class ContentStoreToyCourseTest(ContentStoreTestCase):
both draft and non-draft copies.
'''
store = self.store
_, course_items = import_from_xml(store, self.user.id, 'common/test/data/', ['simple'])
course_items = import_from_xml(store, self.user.id, 'common/test/data/', ['simple'])
course_key = course_items[0].id
html_usage_key = course_key.make_usage_key('html', 'test_html')
......@@ -263,7 +263,7 @@ class ContentStoreToyCourseTest(ContentStoreTestCase):
self.assertEqual(num_drafts, 1)
def test_no_static_link_rewrites_on_import(self):
_, course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'])
course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'])
course = course_items[0]
handouts_usage_key = course.id.make_usage_key('course_info', 'handouts')
......@@ -287,7 +287,7 @@ class ContentStoreToyCourseTest(ContentStoreTestCase):
self.assertGreater(len(course.textbooks), 0)
def test_import_polls(self):
_, course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'])
course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'])
course_key = course_items[0].id
items = self.store.get_items(course_key, qualifiers={'category': 'poll_question'})
......@@ -307,7 +307,7 @@ class ContentStoreToyCourseTest(ContentStoreTestCase):
Tests the ajax callback to render an XModule
"""
direct_store = self.store
_, course_items = import_from_xml(direct_store, self.user.id, 'common/test/data/', ['toy'])
course_items = import_from_xml(direct_store, self.user.id, 'common/test/data/', ['toy'])
usage_key = course_items[0].id.make_usage_key('vertical', 'vertical_test')
# also try a custom response which will trigger the 'is this course in whitelist' logic
resp = self.client.get_json(
......@@ -357,7 +357,7 @@ class ContentStoreToyCourseTest(ContentStoreTestCase):
This test case verifies that a course can use specialized override for about data, e.g. /about/Fall_2012/effort.html
while there is a base definition in /about/effort.html
'''
_, course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'])
course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'])
course_key = course_items[0].id
effort = self.store.get_item(course_key.make_usage_key('about', 'effort'))
self.assertEqual(effort.data, '6 hours')
......@@ -460,7 +460,7 @@ class ContentStoreToyCourseTest(ContentStoreTestCase):
content_store = contentstore()
trash_store = contentstore('trashcan')
_, course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'], static_content_store=content_store)
course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'], static_content_store=content_store)
# look up original (and thumbnail) in content store, should be there after import
location = AssetLocation.from_deprecated_string('/c4x/edX/toy/asset/sample_static.txt')
......@@ -618,7 +618,7 @@ class ContentStoreToyCourseTest(ContentStoreTestCase):
"""
content_store = contentstore()
_, course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'], static_content_store=content_store)
course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'], static_content_store=content_store)
course_id = course_items[0].id
......@@ -845,7 +845,7 @@ class ContentStoreToyCourseTest(ContentStoreTestCase):
def test_course_handouts_rewrites(self):
# import a test course
_, course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'])
course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'])
course_id = course_items[0].id
handouts_location = course_id.make_usage_key('course_info', 'handouts')
......@@ -895,7 +895,7 @@ class ContentStoreToyCourseTest(ContentStoreTestCase):
# Create toy course
_, course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'])
course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'])
course_id = course_items[0].id
root_dir = path(mkdtemp_clean())
......@@ -1271,7 +1271,7 @@ class ContentStoreTest(ContentStoreTestCase):
)
self.assertEqual(resp.status_code, 200)
_, course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['simple'])
course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['simple'])
course_key = course_items[0].id
resp = self._show_course_overview(course_key)
......@@ -1400,7 +1400,7 @@ class ContentStoreTest(ContentStoreTestCase):
self.assertNotEquals(new_discussion_item.discussion_id, '$$GUID$$')
def test_metadata_inheritance(self):
_, course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'])
course_items = import_from_xml(self.store, self.user.id, 'common/test/data/', ['toy'])
course = course_items[0]
verticals = self.store.get_items(course.id, qualifiers={'category': 'vertical'})
......@@ -1466,7 +1466,7 @@ class ContentStoreTest(ContentStoreTestCase):
content_store = contentstore()
# Use conditional_and_poll, as it's got an image already
__, courses = import_from_xml(
courses = import_from_xml(
self.store,
self.user.id,
'common/test/data/',
......
......@@ -64,7 +64,7 @@ class ContentStoreImportTest(ModuleStoreTestCase):
# edx/course can be imported into a namespace with an org/course
# like edx/course_name
module_store, __, course = self.load_test_import_course()
__, course_items = import_from_xml(
course_items = import_from_xml(
module_store,
self.user.id,
'common/test/data',
......@@ -139,7 +139,7 @@ class ContentStoreImportTest(ModuleStoreTestCase):
def test_no_static_link_rewrites_on_import(self):
module_store = modulestore()
_, courses = import_from_xml(module_store, self.user.id, 'common/test/data/', ['toy'], do_import_static=False, verbose=True)
courses = import_from_xml(module_store, self.user.id, 'common/test/data/', ['toy'], do_import_static=False, verbose=True)
course_key = courses[0].id
handouts = module_store.get_item(course_key.make_usage_key('course_info', 'handouts'))
......@@ -157,10 +157,10 @@ class ContentStoreImportTest(ModuleStoreTestCase):
store = modulestore()._get_modulestore_by_type(ModuleStoreEnum.Type.mongo)
# we try to refresh the inheritance tree for each update_item in the import
with check_exact_number_of_calls(store, store.refresh_cached_metadata_inheritance_tree, 46):
with check_exact_number_of_calls(store, store.refresh_cached_metadata_inheritance_tree, 28):
# the post-publish step loads each item in the subtree, which calls _get_cached_metadata_inheritance_tree
with check_exact_number_of_calls(store, store._get_cached_metadata_inheritance_tree, 22):
# _get_cached_metadata_inheritance_tree should be called only once
with check_exact_number_of_calls(store, store._get_cached_metadata_inheritance_tree, 1):
# with bulk-edit in progress, the inheritance tree should be recomputed only at the end of the import
# NOTE: On Jenkins, with memcache enabled, the number of calls here is only 1.
......
......@@ -10,7 +10,7 @@ class DraftReorderTestCase(ModuleStoreTestCase):
def test_order(self):
store = modulestore()
_, course_items = import_from_xml(store, self.user.id, 'common/test/data/', ['import_draft_order'])
course_items = import_from_xml(store, self.user.id, 'common/test/data/', ['import_draft_order'])
course_key = course_items[0].id
sequential = store.get_item(course_key.make_usage_key('sequential', '0f4f7649b10141b0bdc9922dcf94515a'))
verticals = sequential.children
......
......@@ -58,7 +58,7 @@ class XBlockImportTest(ModuleStoreTestCase):
the expected field value set.
"""
_, courses = import_from_xml(
courses = import_from_xml(
self.store, self.user.id, 'common/test/data', [course_dir]
)
......
......@@ -214,7 +214,7 @@ def import_handler(request, course_key_string):
logging.debug('found course.xml at {0}'.format(dirpath))
_module_store, course_items = import_from_xml(
course_items = import_from_xml(
modulestore(),
request.user.id,
settings.GITHUB_REPO_ROOT,
......
......@@ -49,7 +49,7 @@ class BasicAssetsTestCase(AssetsTestCase):
def test_pdf_asset(self):
module_store = modulestore()
_, course_items = import_from_xml(
course_items = import_from_xml(
module_store,
self.user.id,
'common/test/data/',
......@@ -193,7 +193,7 @@ class LockAssetTestCase(AssetsTestCase):
# Load the toy course.
module_store = modulestore()
_, course_items = import_from_xml(
course_items = import_from_xml(
module_store,
self.user.id,
'common/test/data/',
......
......@@ -117,29 +117,36 @@ def import_from_xml(
target_course_id=None, verbose=False,
do_import_static=True, create_new_course_if_not_present=False):
"""
Import the specified xml data_dir into the "store" modulestore,
using org and course as the location org and course.
course_dirs: If specified, the list of course_dirs to load. Otherwise, load
all course dirs
target_course_id is the CourseKey that all modules should be remapped to
after import off disk. We do this remapping as a post-processing step
because there's logic in the importing which expects a 'url_name' as an
identifier to where things are on disk
e.g. ../policies/<url_name>/policy.json as well as metadata keys in
the policy.json. so we need to keep the original url_name during import
:param do_import_static:
if False, then static files are not imported into the static content
store. This can be employed for courses which have substantial
unchanging static content, which is to inefficient to import every
time the course is loaded. Static content for some courses may also be
served directly by nginx, instead of going through django.
: create_new_course_if_not_present:
If True, then a new course is created if it doesn't already exist.
The check for existing courses is case-insensitive.
Import xml-based courses from data_dir into modulestore.
Returns:
list of new course objects
Args:
store: a modulestore implementing ModuleStoreWriteBase in which to store the imported courses.
data_dir: the root directory from which to find the xml courses.
course_dirs: If specified, the list of data_dir subdirectories to load. Otherwise, load
all course dirs
target_course_id: is the CourseKey that all modules should be remapped to
after import off disk. NOTE: this only makes sense if importing only
one course. If there are more than one course loaded from data_dir/course_dirs & you
supply this id, this method will raise an AssertException.
static_content_store: the static asset store
do_import_static: if True, then import the course's static files into static_content_store
This can be employed for courses which have substantial
unchanging static content, which is too inefficient to import every
time the course is loaded. Static content for some courses may also be
served directly by nginx, instead of going through django.
create_new_course_if_not_present: If True, then a new course is created if it doesn't already exist.
Otherwise, it throws an InvalidLocationError for the course.
default_class, load_error_modules: are arguments for constructing the XMLModuleStore (see its doc)
"""
xml_module_store = XMLModuleStore(
......@@ -156,149 +163,47 @@ def import_from_xml(
if target_course_id:
assert(len(xml_module_store.modules) == 1)
# NOTE: the XmlModuleStore does not implement get_items()
# which would be a preferable means to enumerate the entire collection
# of course modules. It will be left as a TBD to implement that
# method on XmlModuleStore.
course_items = []
new_courses = []
for course_key in xml_module_store.modules.keys():
with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred, course_key):
if target_course_id is not None:
dest_course_id = target_course_id
else:
dest_course_id = course_key
# Creates a new course if it doesn't already exist
if create_new_course_if_not_present and not store.has_course(dest_course_id, ignore_case=True):
try:
store.create_course(dest_course_id.org, dest_course_id.course, dest_course_id.run, user_id)
except DuplicateCourseError:
# course w/ same org and course exists
# The Mongo modulestore checks *with* the run in has_course, but not in create_course.
log.debug(
"Skipping import of course with id, {0},"
"since it collides with an existing one".format(dest_course_id)
)
continue
with store.bulk_write_operations(dest_course_id):
course_data_path = None
if verbose:
log.debug("Scanning {0} for course module...".format(course_key))
# Quick scan to get course module as we need some info from there.
# Also we need to make sure that the course module is committed
# first into the store
for module in xml_module_store.modules[course_key].itervalues():
if module.scope_ids.block_type == 'course':
course_data_path = path(data_dir) / module.data_dir
log.debug(u'======> IMPORTING course {course_key}'.format(
course_key=course_key,
))
if not do_import_static:
# for old-style xblock where this was actually linked to kvs
module.static_asset_path = module.data_dir
module.save()
log.debug('course static_asset_path={path}'.format(
path=module.static_asset_path
))
log.debug('course data_dir={0}'.format(module.data_dir))
course = _import_module_and_update_references(
module, store, user_id,
course_key,
dest_course_id,
do_import_static=do_import_static
)
if target_course_id is not None:
dest_course_id = target_course_id
else:
dest_course_id = course_key
for entry in course.pdf_textbooks:
for chapter in entry.get('chapters', []):
if StaticContent.is_c4x_path(chapter.get('url', '')):
asset_key = StaticContent.get_location_from_path(chapter['url'])
chapter['url'] = StaticContent.get_static_path_from_location(asset_key)
# Original wiki_slugs had value location.course. To make them unique this was changed to 'org.course.name'.
# If we are importing into a course with a different course_id and wiki_slug is equal to either of these default
# values then remap it so that the wiki does not point to the old wiki.
if course_key != course.id:
original_unique_wiki_slug = u'{0}.{1}.{2}'.format(
course_key.org,
course_key.course,
course_key.run
)
if course.wiki_slug == original_unique_wiki_slug or course.wiki_slug == course_key.course:
course.wiki_slug = u'{0}.{1}.{2}'.format(
course.id.org,
course.id.course,
course.id.run,
)
# cdodge: more hacks (what else). Seems like we have a
# problem when importing a course (like 6.002) which
# does not have any tabs defined in the policy file.
# The import goes fine and then displays fine in LMS,
# but if someone tries to add a new tab in the CMS, then
# the LMS barfs because it expects that -- if there are
# *any* tabs -- then there at least needs to be
# some predefined ones
if course.tabs is None or len(course.tabs) == 0:
CourseTabList.initialize_default(course)
store.update_item(course, user_id)
course_items.append(course)
break
# TODO: shouldn't this raise an exception if course wasn't found?
# then import all the static content
if static_content_store is not None and do_import_static:
# first pass to find everything in /static/
import_static_content(
course_data_path, static_content_store,
dest_course_id, subpath='static', verbose=verbose
)
# Creates a new course if it doesn't already exist
if create_new_course_if_not_present and not store.has_course(dest_course_id, ignore_case=True):
try:
store.create_course(dest_course_id.org, dest_course_id.course, dest_course_id.run, user_id)
except DuplicateCourseError:
# course w/ same org and course exists
log.debug(
"Skipping import of course with id, %s,"
"since it collides with an existing one", dest_course_id
)
continue
elif verbose and not do_import_static:
log.debug(
"Skipping import of static content, "
"since do_import_static={0}".format(do_import_static)
)
with store.bulk_write_operations(dest_course_id):
# STEP 1: find and import course module
course, course_data_path = _import_course_module(
xml_module_store, store, user_id, data_dir, course_key, dest_course_id, do_import_static, verbose
)
new_courses.append(course)
# no matter what do_import_static is, import "static_import" directory
# This is needed because the "about" pages (eg "overview") are
# loaded via load_extra_content, and do not inherit the lms
# metadata from the course module, and thus do not get
# "static_content_store" properly defined. Static content
# referenced in those extra pages thus need to come through the
# c4x:// contentstore, unfortunately. Tell users to copy that
# content into the "static_import" subdir.
simport = 'static_import'
if os.path.exists(course_data_path / simport):
import_static_content(
course_data_path, static_content_store,
dest_course_id, subpath=simport, verbose=verbose
)
# STEP 2: import static content
_import_static_content_wrapper(
static_content_store, do_import_static, course_data_path, dest_course_id, verbose
)
# now loop through all the modules
# STEP 3: import PUBLISHED items
# now loop through all the modules
with store.branch_setting(ModuleStoreEnum.Branch.published_only, dest_course_id):
for module in xml_module_store.modules[course_key].itervalues():
if module.scope_ids.block_type == 'course':
# we've already saved the course module up at the top
# of the loop so just skip over it in the inner loop
# we've already saved the course module up above
continue
if verbose:
log.debug('importing module location {loc}'.format(
loc=module.location
))
log.debug('importing module location {loc}'.format(loc=module.location))
_import_module_and_update_references(
module, store,
......@@ -309,10 +214,8 @@ def import_from_xml(
runtime=course.runtime
)
# finally, publish the course
store.publish(course.location, user_id)
# now import any DRAFT items
# STEP 4: import any DRAFT items
with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred, dest_course_id):
_import_course_draft(
xml_module_store,
store,
......@@ -323,8 +226,114 @@ def import_from_xml(
course.runtime
)
return xml_module_store, course_items
return new_courses
def _import_course_module(
xml_module_store, store, user_id, data_dir, course_key, dest_course_id, do_import_static, verbose
):
if verbose:
log.debug("Scanning {0} for course module...".format(course_key))
# Quick scan to get course module as we need some info from there.
# Also we need to make sure that the course module is committed
# first into the store
for module in xml_module_store.modules[course_key].itervalues():
if module.scope_ids.block_type == 'course':
course_data_path = path(data_dir) / module.data_dir
log.debug(u'======> IMPORTING course {course_key}'.format(
course_key=course_key,
))
if not do_import_static:
# for old-style xblock where this was actually linked to kvs
module.static_asset_path = module.data_dir
module.save()
log.debug('course static_asset_path={path}'.format(
path=module.static_asset_path
))
log.debug('course data_dir={0}'.format(module.data_dir))
course = _import_module_and_update_references(
module, store, user_id,
course_key,
dest_course_id,
do_import_static=do_import_static
)
for entry in course.pdf_textbooks:
for chapter in entry.get('chapters', []):
if StaticContent.is_c4x_path(chapter.get('url', '')):
asset_key = StaticContent.get_location_from_path(chapter['url'])
chapter['url'] = StaticContent.get_static_path_from_location(asset_key)
# Original wiki_slugs had value location.course. To make them unique this was changed to 'org.course.name'.
# If we are importing into a course with a different course_id and wiki_slug is equal to either of these default
# values then remap it so that the wiki does not point to the old wiki.
if course_key != course.id:
original_unique_wiki_slug = u'{0}.{1}.{2}'.format(
course_key.org,
course_key.course,
course_key.run
)
if course.wiki_slug == original_unique_wiki_slug or course.wiki_slug == course_key.course:
course.wiki_slug = u'{0}.{1}.{2}'.format(
course.id.org,
course.id.course,
course.id.run,
)
# cdodge: more hacks (what else). Seems like we have a
# problem when importing a course (like 6.002) which
# does not have any tabs defined in the policy file.
# The import goes fine and then displays fine in LMS,
# but if someone tries to add a new tab in the CMS, then
# the LMS barfs because it expects that -- if there are
# *any* tabs -- then there at least needs to be
# some predefined ones
if course.tabs is None or len(course.tabs) == 0:
CourseTabList.initialize_default(course)
store.update_item(course, user_id)
return course, course_data_path
# raise an exception if the course wasn't found
raise Exception("Course module not found in imported modules")
def _import_static_content_wrapper(static_content_store, do_import_static, course_data_path, dest_course_id, verbose):
# then import all the static content
if static_content_store is not None and do_import_static:
# first pass to find everything in /static/
import_static_content(
course_data_path, static_content_store,
dest_course_id, subpath='static', verbose=verbose
)
elif verbose and not do_import_static:
log.debug(
"Skipping import of static content, "
"since do_import_static={0}".format(do_import_static)
)
# no matter what do_import_static is, import "static_import" directory
# This is needed because the "about" pages (eg "overview") are
# loaded via load_extra_content, and do not inherit the lms
# metadata from the course module, and thus do not get
# "static_content_store" properly defined. Static content
# referenced in those extra pages thus need to come through the
# c4x:// contentstore, unfortunately. Tell users to copy that
# content into the "static_import" subdir.
simport = 'static_import'
if os.path.exists(course_data_path / simport):
import_static_content(
course_data_path, static_content_store,
dest_course_id, subpath=simport, verbose=verbose
)
def _import_module_and_update_references(
module, store, user_id,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment