Commit 91a0a3ff by David Baumgold

Merge pull request #1994 from edx/db/reformat-importer

Reformat xml_importer.py to be more readable
parents ebb33be4 36f35ac9
...@@ -16,8 +16,9 @@ from .store_utilities import rewrite_nonportable_content_links ...@@ -16,8 +16,9 @@ from .store_utilities import rewrite_nonportable_content_links
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def import_static_content(modules, course_loc, course_data_path, static_content_store, target_location_namespace, def import_static_content(
subpath='static', verbose=False): modules, course_loc, course_data_path, static_content_store,
target_location_namespace, subpath='static', verbose=False):
remap_dict = {} remap_dict = {}
...@@ -27,8 +28,8 @@ def import_static_content(modules, course_loc, course_data_path, static_content_ ...@@ -27,8 +28,8 @@ def import_static_content(modules, course_loc, course_data_path, static_content_
with open(course_data_path / 'policies/assets.json') as f: with open(course_data_path / 'policies/assets.json') as f:
policy = json.load(f) policy = json.load(f)
except (IOError, ValueError) as err: except (IOError, ValueError) as err:
# xml backed courses won't have this file, only exported courses; so, its absence is not # xml backed courses won't have this file, only exported courses;
# really an exception. # so, its absence is not really an exception.
policy = {} policy = {}
verbose = True verbose = True
...@@ -45,49 +46,60 @@ def import_static_content(modules, course_loc, course_data_path, static_content_ ...@@ -45,49 +46,60 @@ def import_static_content(modules, course_loc, course_data_path, static_content_
data = f.read() data = f.read()
except IOError: except IOError:
if filename.startswith('._'): if filename.startswith('._'):
# OS X "companion files". See http://www.diigo.com/annotated/0c936fda5da4aa1159c189cea227e174 # OS X "companion files". See
# http://www.diigo.com/annotated/0c936fda5da4aa1159c189cea227e174
continue continue
# Not a 'hidden file', then re-raise exception # Not a 'hidden file', then re-raise exception
raise raise
fullname_with_subpath = content_path.replace(static_dir, '') # strip away leading path from the name # strip away leading path from the name
fullname_with_subpath = content_path.replace(static_dir, '')
if fullname_with_subpath.startswith('/'): if fullname_with_subpath.startswith('/'):
fullname_with_subpath = fullname_with_subpath[1:] fullname_with_subpath = fullname_with_subpath[1:]
content_loc = StaticContent.compute_location(target_location_namespace.org, target_location_namespace.course, fullname_with_subpath) content_loc = StaticContent.compute_location(
target_location_namespace.org, target_location_namespace.course,
fullname_with_subpath
)
policy_ele = policy.get(content_loc.name, {}) policy_ele = policy.get(content_loc.name, {})
displayname = policy_ele.get('displayname', filename) displayname = policy_ele.get('displayname', filename)
locked = policy_ele.get('locked', False) locked = policy_ele.get('locked', False)
mime_type = policy_ele.get('contentType', mimetypes.guess_type(filename)[0]) mime_type = policy_ele.get(
'contentType',
mimetypes.guess_type(filename)[0]
)
content = StaticContent( content = StaticContent(
content_loc, displayname, mime_type, data, content_loc, displayname, mime_type, data,
import_path=fullname_with_subpath, locked=locked import_path=fullname_with_subpath, locked=locked
) )
# first let's save a thumbnail so we can get back a thumbnail location # first let's save a thumbnail so we can get back a thumbnail location
(thumbnail_content, thumbnail_location) = static_content_store.generate_thumbnail(content) thumbnail_content, thumbnail_location = static_content_store.generate_thumbnail(content)
if thumbnail_content is not None: if thumbnail_content is not None:
content.thumbnail_location = thumbnail_location content.thumbnail_location = thumbnail_location
#then commit the content # then commit the content
try: try:
static_content_store.save(content) static_content_store.save(content)
except Exception as err: except Exception as err:
log.exception('Error importing {0}, error={1}'.format(fullname_with_subpath, err)) log.exception('Error importing {0}, error={1}'.format(
fullname_with_subpath, err
))
#store the remapping information which will be needed to subsitute in the module data # store the remapping information which will be needed
# to subsitute in the module data
remap_dict[fullname_with_subpath] = content_loc.name remap_dict[fullname_with_subpath] = content_loc.name
return remap_dict return remap_dict
def import_from_xml(store, data_dir, course_dirs=None, def import_from_xml(
default_class='xmodule.raw_module.RawDescriptor', store, data_dir, course_dirs=None,
load_error_modules=True, static_content_store=None, target_location_namespace=None, default_class='xmodule.raw_module.RawDescriptor',
verbose=False, draft_store=None, load_error_modules=True, static_content_store=None,
do_import_static=True): target_location_namespace=None, verbose=False, draft_store=None,
do_import_static=True):
""" """
Import the specified xml data_dir into the "store" modulestore, Import the specified xml data_dir into the "store" modulestore,
using org and course as the location org and course. using org and course as the location org and course.
...@@ -95,14 +107,20 @@ def import_from_xml(store, data_dir, course_dirs=None, ...@@ -95,14 +107,20 @@ def import_from_xml(store, data_dir, course_dirs=None,
course_dirs: If specified, the list of course_dirs to load. Otherwise, load course_dirs: If specified, the list of course_dirs to load. Otherwise, load
all course dirs all course dirs
target_location_namespace is the namespace [passed as Location] (i.e. {tag},{org},{course}) that all modules in the should be remapped to target_location_namespace is the namespace [passed as Location]
after import off disk. We do this remapping as a post-processing step because there's logic in the importing which (i.e. {tag},{org},{course}) that all modules in the should be remapped to
expects a 'url_name' as an identifier to where things are on disk e.g. ../policies/<url_name>/policy.json as well as metadata keys in after import off disk. We do this remapping as a post-processing step
because there's logic in the importing which expects a 'url_name' as an
identifier to where things are on disk
e.g. ../policies/<url_name>/policy.json as well as metadata keys in
the policy.json. so we need to keep the original url_name during import the policy.json. so we need to keep the original url_name during import
do_import_static: if False, then static files are not imported into the static content store. This can be employed for courses which :param do_import_static:
have substantial unchanging static content, which is to inefficient to import every time the course is loaded. if False, then static files are not imported into the static content
Static content for some courses may also be served directly by nginx, instead of going through django. store. This can be employed for courses which have substantial
unchanging static content, which is to inefficient to import every
time the course is loaded. Static content for some courses may also be
served directly by nginx, instead of going through django.
""" """
...@@ -114,21 +132,26 @@ def import_from_xml(store, data_dir, course_dirs=None, ...@@ -114,21 +132,26 @@ def import_from_xml(store, data_dir, course_dirs=None,
xblock_mixins=store.xblock_mixins, xblock_mixins=store.xblock_mixins,
) )
# NOTE: the XmlModuleStore does not implement get_items() which would be a preferable means # NOTE: the XmlModuleStore does not implement get_items()
# to enumerate the entire collection of course modules. It will be left as a TBD to implement that # which would be a preferable means to enumerate the entire collection
# of course modules. It will be left as a TBD to implement that
# method on XmlModuleStore. # method on XmlModuleStore.
course_items = [] course_items = []
for course_id in xml_module_store.modules.keys(): for course_id in xml_module_store.modules.keys():
if target_location_namespace is not None: if target_location_namespace is not None:
pseudo_course_id = '/'.join([target_location_namespace.org, target_location_namespace.course]) pseudo_course_id = '/'.join(
[target_location_namespace.org, target_location_namespace.course]
)
else: else:
course_id_components = course_id.split('/') course_id_components = course_id.split('/')
pseudo_course_id = '/'.join([course_id_components[0], course_id_components[1]]) pseudo_course_id = '/'.join(
[course_id_components[0], course_id_components[1]]
)
try: try:
# turn off all write signalling while importing as this is a high volume operation # turn off all write signalling while importing as this
# on stores that need it # is a high volume operation on stores that need it
if (hasattr(store, 'ignore_write_events_on_courses') and if (hasattr(store, 'ignore_write_events_on_courses') and
pseudo_course_id not in store.ignore_write_events_on_courses): pseudo_course_id not in store.ignore_write_events_on_courses):
store.ignore_write_events_on_courses.append(pseudo_course_id) store.ignore_write_events_on_courses.append(pseudo_course_id)
...@@ -139,69 +162,104 @@ def import_from_xml(store, data_dir, course_dirs=None, ...@@ -139,69 +162,104 @@ def import_from_xml(store, data_dir, course_dirs=None,
if verbose: if verbose:
log.debug("Scanning {0} for course module...".format(course_id)) log.debug("Scanning {0} for course module...".format(course_id))
# Quick scan to get course module as we need some info from there. Also we need to make sure that the # Quick scan to get course module as we need some info from there.
# course module is committed first into the store # Also we need to make sure that the course module is committed
# first into the store
for module in xml_module_store.modules[course_id].itervalues(): for module in xml_module_store.modules[course_id].itervalues():
if module.scope_ids.block_type == 'course': if module.scope_ids.block_type == 'course':
course_data_path = path(data_dir) / module.data_dir course_data_path = path(data_dir) / module.data_dir
course_location = module.location course_location = module.location
log.debug('======> IMPORTING course to location {0}'.format(course_location)) log.debug('======> IMPORTING course to location {loc}'.format(
loc=course_location
))
module = remap_namespace(module, target_location_namespace) module = remap_namespace(module, target_location_namespace)
if not do_import_static: if not do_import_static:
module.static_asset_path = module.data_dir # for old-style xblock where this was actually linked to kvs # for old-style xblock where this was actually linked to kvs
module.static_asset_path = module.data_dir
module.save() module.save()
log.debug('course static_asset_path={0}'.format(module.static_asset_path)) log.debug('course static_asset_path={path}'.format(
path=module.static_asset_path
))
log.debug('course data_dir={0}'.format(module.data_dir)) log.debug('course data_dir={0}'.format(module.data_dir))
# cdodge: more hacks (what else). Seems like we have a problem when importing a course (like 6.002) which # cdodge: more hacks (what else). Seems like we have a
# does not have any tabs defined in the policy file. The import goes fine and then displays fine in LMS, # problem when importing a course (like 6.002) which
# but if someone tries to add a new tab in the CMS, then the LMS barfs because it expects that - # does not have any tabs defined in the policy file.
# if there is *any* tabs - then there at least needs to be some predefined ones # The import goes fine and then displays fine in LMS,
# but if someone tries to add a new tab in the CMS, then
# the LMS barfs because it expects that -- if there are
# *any* tabs -- then there at least needs to be
# some predefined ones
if module.tabs is None or len(module.tabs) == 0: if module.tabs is None or len(module.tabs) == 0:
module.tabs = [{"type": "courseware"}, module.tabs = [
{"type": "course_info", "name": "Course Info"}, {"type": "courseware"},
{"type": "discussion", "name": "Discussion"}, {"type": "course_info", "name": "Course Info"},
{"type": "wiki", "name": "Wiki"}] # note, add 'progress' when we can support it on Edge {"type": "discussion", "name": "Discussion"},
{"type": "wiki", "name": "Wiki"},
import_module(module, store, course_data_path, static_content_store, course_location, # note, add 'progress' when we can support it on Edge
target_location_namespace or course_location, do_import_static=do_import_static) ]
import_module(
module, store, course_data_path, static_content_store,
course_location,
target_location_namespace or course_location,
do_import_static=do_import_static
)
course_items.append(module) course_items.append(module)
# then import all the static content # then import all the static content
if static_content_store is not None and do_import_static: if static_content_store is not None and do_import_static:
_namespace_rename = target_location_namespace if target_location_namespace is not None else course_location if target_location_namespace is not None:
_namespace_rename = target_location_namespace
else:
_namespace_rename = course_location
# first pass to find everything in /static/ # first pass to find everything in /static/
import_static_content(xml_module_store.modules[course_id], course_location, course_data_path, static_content_store, import_static_content(
_namespace_rename, subpath='static', verbose=verbose) xml_module_store.modules[course_id], course_location,
course_data_path, static_content_store,
_namespace_rename, subpath='static', verbose=verbose
)
elif verbose and not do_import_static: elif verbose and not do_import_static:
log.debug('Skipping import of static content, since do_import_static={0}'.format(do_import_static)) log.debug(
"Skipping import of static content, "
"since do_import_static={0}".format(do_import_static)
)
# no matter what do_import_static is, import "static_import" directory # no matter what do_import_static is, import "static_import" directory
# This is needed because the "about" pages (eg "overview") are loaded via load_extra_content, and # This is needed because the "about" pages (eg "overview") are
# do not inherit the lms metadata from the course module, and thus do not get "static_content_store" # loaded via load_extra_content, and do not inherit the lms
# properly defined. Static content referenced in those extra pages thus need to come through the # metadata from the course module, and thus do not get
# c4x:// contentstore, unfortunately. Tell users to copy that content into the "static_import" subdir. # "static_content_store" properly defined. Static content
# referenced in those extra pages thus need to come through the
# c4x:// contentstore, unfortunately. Tell users to copy that
# content into the "static_import" subdir.
simport = 'static_import' simport = 'static_import'
if os.path.exists(course_data_path / simport): if os.path.exists(course_data_path / simport):
_namespace_rename = target_location_namespace if target_location_namespace is not None else course_location if target_location_namespace is not None:
_namespace_rename = target_location_namespace
import_static_content(xml_module_store.modules[course_id], course_location, course_data_path, static_content_store, else:
_namespace_rename, subpath=simport, verbose=verbose) _namespace_rename = course_location
import_static_content(
xml_module_store.modules[course_id], course_location,
course_data_path, static_content_store,
_namespace_rename, subpath=simport, verbose=verbose
)
# finally loop through all the modules # finally loop through all the modules
for module in xml_module_store.modules[course_id].itervalues(): for module in xml_module_store.modules[course_id].itervalues():
if module.scope_ids.block_type == 'course': if module.scope_ids.block_type == 'course':
# we've already saved the course module up at the top of the loop # we've already saved the course module up at the top
# so just skip over it in the inner loop # of the loop so just skip over it in the inner loop
continue continue
# remap module to the new namespace # remap module to the new namespace
...@@ -209,11 +267,16 @@ def import_from_xml(store, data_dir, course_dirs=None, ...@@ -209,11 +267,16 @@ def import_from_xml(store, data_dir, course_dirs=None,
module = remap_namespace(module, target_location_namespace) module = remap_namespace(module, target_location_namespace)
if verbose: if verbose:
log.debug('importing module location {0}'.format(module.location)) log.debug('importing module location {loc}'.format(
loc=module.location
))
import_module(module, store, course_data_path, static_content_store, course_location, import_module(
target_location_namespace if target_location_namespace else course_location, module, store, course_data_path, static_content_store,
do_import_static=do_import_static) course_location,
target_location_namespace if target_location_namespace else course_location,
do_import_static=do_import_static
)
# now import any 'draft' items # now import any 'draft' items
if draft_store is not None: if draft_store is not None:
...@@ -239,11 +302,14 @@ def import_from_xml(store, data_dir, course_dirs=None, ...@@ -239,11 +302,14 @@ def import_from_xml(store, data_dir, course_dirs=None,
return xml_module_store, course_items return xml_module_store, course_items
def import_module(module, store, course_data_path, static_content_store, def import_module(
source_course_location, dest_course_location, allow_not_found=False, module, store, course_data_path, static_content_store,
do_import_static=True): source_course_location, dest_course_location, allow_not_found=False,
do_import_static=True):
logging.debug('processing import of module {0}...'.format(module.location.url())) logging.debug('processing import of module {url}...'.format(
url=module.location.url()
))
content = {} content = {}
for field in module.fields.values(): for field in module.fields.values():
...@@ -262,13 +328,17 @@ def import_module(module, store, course_data_path, static_content_store, ...@@ -262,13 +328,17 @@ def import_module(module, store, course_data_path, static_content_store,
module_data = content module_data = content
if isinstance(module_data, basestring) and do_import_static: if isinstance(module_data, basestring) and do_import_static:
# we want to convert all 'non-portable' links in the module_data (if it is a string) to # we want to convert all 'non-portable' links in the module_data
# portable strings (e.g. /static/) # (if it is a string) to portable strings (e.g. /static/)
module_data = rewrite_nonportable_content_links( module_data = rewrite_nonportable_content_links(
source_course_location.course_id, dest_course_location.course_id, module_data) source_course_location.course_id,
dest_course_location.course_id, module_data
)
if allow_not_found: if allow_not_found:
store.update_item(module.location, module_data, allow_not_found=allow_not_found) store.update_item(
module.location, module_data, allow_not_found=allow_not_found
)
else: else:
store.update_item(module.location, module_data) store.update_item(module.location, module_data)
...@@ -278,23 +348,29 @@ def import_module(module, store, course_data_path, static_content_store, ...@@ -278,23 +348,29 @@ def import_module(module, store, course_data_path, static_content_store,
# NOTE: It's important to use own_metadata here to avoid writing # NOTE: It's important to use own_metadata here to avoid writing
# inherited metadata everywhere. # inherited metadata everywhere.
# remove any export/import only xml_attributes which are used to wire together draft imports # remove any export/import only xml_attributes
if hasattr(module, 'xml_attributes') and 'parent_sequential_url' in module.xml_attributes: # which are used to wire together draft imports
if 'parent_sequential_url' in getattr(module, 'xml_attributes', []):
del module.xml_attributes['parent_sequential_url'] del module.xml_attributes['parent_sequential_url']
if hasattr(module, 'xml_attributes') and 'index_in_children_list' in module.xml_attributes: if 'index_in_children_list' in getattr(module, 'xml_attributes', []):
del module.xml_attributes['index_in_children_list'] del module.xml_attributes['index_in_children_list']
module.save() module.save()
store.update_metadata(module.location, dict(own_metadata(module))) store.update_metadata(module.location, dict(own_metadata(module)))
def import_course_draft(xml_module_store, store, draft_store, course_data_path, static_content_store, source_location_namespace, target_location_namespace): def import_course_draft(
xml_module_store, store, draft_store, course_data_path,
static_content_store, source_location_namespace,
target_location_namespace):
''' '''
This will import all the content inside of the 'drafts' folder, if it exists This will import all the content inside of the 'drafts' folder, if it exists
NOTE: This is not a full course import, basically in our current application only verticals (and downwards) NOTE: This is not a full course import, basically in our current
can be in draft. Therefore, we need to use slightly different call points into the import process_xml application only verticals (and downwards) can be in draft.
as we can't simply call XMLModuleStore() constructor (like we do for importing public content) Therefore, we need to use slightly different call points into
the import process_xml as we can't simply call XMLModuleStore() constructor
(like we do for importing public content)
''' '''
draft_dir = course_data_path + "/drafts" draft_dir = course_data_path + "/drafts"
if not os.path.exists(draft_dir): if not os.path.exists(draft_dir):
...@@ -312,36 +388,42 @@ def import_course_draft(xml_module_store, store, draft_store, course_data_path, ...@@ -312,36 +388,42 @@ def import_course_draft(xml_module_store, store, draft_store, course_data_path,
load_error_modules=False, load_error_modules=False,
) )
# now walk the /vertical directory where each file in there will be a draft copy of the Vertical # now walk the /vertical directory where each file in there
# will be a draft copy of the Vertical
for dirname, dirnames, filenames in os.walk(draft_dir + "/vertical"): for dirname, dirnames, filenames in os.walk(draft_dir + "/vertical"):
for filename in filenames: for filename in filenames:
module_path = os.path.join(dirname, filename) module_path = os.path.join(dirname, filename)
with open(module_path, 'r') as f: with open(module_path, 'r') as f:
try: try:
# note, on local dev it seems like OSX will put some extra files in # note, on local dev it seems like OSX will put
# the directory with "quarantine" information. These files are # some extra files in the directory with "quarantine"
# binary files and will throw exceptions when we try to parse # information. These files are binary files and will
# the file as an XML string. Let's make sure we're # throw exceptions when we try to parse the file
# as an XML string. Let's make sure we're
# dealing with a string before ingesting # dealing with a string before ingesting
data = f.read() data = f.read()
try: try:
xml = data.decode('utf-8') xml = data.decode('utf-8')
except UnicodeDecodeError, err: except UnicodeDecodeError, err:
# seems like on OSX localdev, the OS is making quarantine files # seems like on OSX localdev, the OS is making
# in the unzip directory when importing courses # quarantine files in the unzip directory
# so if we blindly try to enumerate through the directory, we'll try # when importing courses so if we blindly try to
# to process a bunch of binary quarantine files (which are prefixed with a '._' character # enumerate through the directory, we'll try
# which will dump a bunch of exceptions to the output, although they are harmless. # to process a bunch of binary quarantine files
# # (which are prefixed with a '._' character which
# Reading online docs there doesn't seem to be a good means to detect a 'hidden' # will dump a bunch of exceptions to the output,
# file that works well across all OS environments. So for now, I'm using # although they are harmless.
# OSX's utilization of a leading '.' in the filename to indicate a system hidden
# file.
# #
# Better yet would be a way to figure out if this is a binary file, but I # Reading online docs there doesn't seem to be
# haven't found a good way to do this yet. # a good means to detect a 'hidden' file that works
# well across all OS environments. So for now, I'm using
# OSX's utilization of a leading '.' in the filename
# to indicate a system hidden file.
# #
# Better yet would be a way to figure out if this is
# a binary file, but I haven't found a good way
# to do this yet.
if filename.startswith('._'): if filename.startswith('._'):
continue continue
# Not a 'hidden file', then re-raise exception # Not a 'hidden file', then re-raise exception
...@@ -352,8 +434,9 @@ def import_course_draft(xml_module_store, store, draft_store, course_data_path, ...@@ -352,8 +434,9 @@ def import_course_draft(xml_module_store, store, draft_store, course_data_path,
def _import_module(module): def _import_module(module):
module.location = module.location._replace(revision='draft') module.location = module.location._replace(revision='draft')
# make sure our parent has us in its list of children # make sure our parent has us in its list of children
# this is to make sure private only verticals show up in the list of children since # this is to make sure private only verticals show up
# they would have been filtered out from the non-draft store export # in the list of children since they would have been
# filtered out from the non-draft store export
if module.location.category == 'vertical': if module.location.category == 'vertical':
non_draft_location = module.location._replace(revision=None) non_draft_location = module.location._replace(revision=None)
sequential_url = module.xml_attributes['parent_sequential_url'] sequential_url = module.xml_attributes['parent_sequential_url']
...@@ -361,41 +444,48 @@ def import_course_draft(xml_module_store, store, draft_store, course_data_path, ...@@ -361,41 +444,48 @@ def import_course_draft(xml_module_store, store, draft_store, course_data_path,
seq_location = Location(sequential_url) seq_location = Location(sequential_url)
# IMPORTANT: Be sure to update the sequential in the NEW namespace # IMPORTANT: Be sure to update the sequential
seq_location = seq_location._replace(org=target_location_namespace.org, # in the NEW namespace
course=target_location_namespace.course seq_location = seq_location._replace(
) org=target_location_namespace.org,
course=target_location_namespace.course
)
sequential = store.get_item(seq_location, depth=0) sequential = store.get_item(seq_location, depth=0)
if non_draft_location.url() not in sequential.children: if non_draft_location.url() not in sequential.children:
sequential.children.insert(index, non_draft_location.url()) sequential.children.insert(index, non_draft_location.url())
store.update_children(sequential.location, sequential.children) store.update_children(sequential.location, sequential.children)
import_module(module, draft_store, course_data_path, static_content_store, import_module(
source_location_namespace, target_location_namespace, allow_not_found=True) module, draft_store, course_data_path,
static_content_store, source_location_namespace,
target_location_namespace, allow_not_found=True
)
for child in module.get_children(): for child in module.get_children():
_import_module(child) _import_module(child)
# HACK: since we are doing partial imports of drafts # HACK: since we are doing partial imports of drafts
# the vertical doesn't have the 'url-name' set in the attributes (they are normally in the parent # the vertical doesn't have the 'url-name' set in the
# object, aka sequential), so we have to replace the location.name with the XML filename # attributes (they are normally in the parent object,
# that is part of the pack # aka sequential), so we have to replace the location.name
# with the XML filename that is part of the pack
fn, fileExtension = os.path.splitext(filename) fn, fileExtension = os.path.splitext(filename)
descriptor.location = descriptor.location._replace(name=fn) descriptor.location = descriptor.location._replace(name=fn)
_import_module(descriptor) _import_module(descriptor)
except Exception, e: except Exception, e:
logging.exception('There was an error. {0}'.format(unicode(e))) logging.exception('There was an error. {err}'.format(
pass err=unicode(e)
))
def remap_namespace(module, target_location_namespace): def remap_namespace(module, target_location_namespace):
if target_location_namespace is None: if target_location_namespace is None:
return module return module
# This looks a bit wonky as we need to also change the 'name' of the imported course to be what # This looks a bit wonky as we need to also change the 'name' of the
# the caller passed in # imported course to be what the caller passed in
if module.location.category != 'course': if module.location.category != 'course':
module.location = module.location._replace( module.location = module.location._replace(
tag=target_location_namespace.tag, tag=target_location_namespace.tag,
...@@ -413,18 +503,20 @@ def remap_namespace(module, target_location_namespace): ...@@ -413,18 +503,20 @@ def remap_namespace(module, target_location_namespace):
course=target_location_namespace.course, course=target_location_namespace.course,
name=target_location_namespace.name name=target_location_namespace.name
) )
# # There is more re-namespacing work we have to do when
# There is more re-namespacing work we have to do when importing course modules # importing course modules
#
# remap pdf_textbook urls # remap pdf_textbook urls
for entry in module.pdf_textbooks: for entry in module.pdf_textbooks:
for chapter in entry.get('chapters', []): for chapter in entry.get('chapters', []):
if StaticContent.is_c4x_path(chapter.get('url', '')): if StaticContent.is_c4x_path(chapter.get('url', '')):
chapter['url'] = StaticContent.renamespace_c4x_path(chapter['url'], target_location_namespace) chapter['url'] = StaticContent.renamespace_c4x_path(
chapter['url'], target_location_namespace
)
# if there is a wiki_slug which is the same as the original location (aka default value), # if there is a wiki_slug which is the same as the original location
# then remap that so the wiki doesn't point to the old Wiki. # (aka default value), then remap that so the wiki doesn't point to
# the old Wiki.
if module.wiki_slug == original_location.course: if module.wiki_slug == original_location.course:
module.wiki_slug = target_location_namespace.course module.wiki_slug = target_location_namespace.course
...@@ -461,8 +553,9 @@ def allowed_metadata_by_category(category): ...@@ -461,8 +553,9 @@ def allowed_metadata_by_category(category):
def check_module_metadata_editability(module): def check_module_metadata_editability(module):
''' '''
Assert that there is no metadata within a particular module that we can't support editing Assert that there is no metadata within a particular module that
However we always allow 'display_name' and 'xml_attribtues' we can't support editing. However we always allow 'display_name'
and 'xml_attributes'
''' '''
allowed = allowed_metadata_by_category(module.location.category) allowed = allowed_metadata_by_category(module.location.category)
if '*' in allowed: if '*' in allowed:
...@@ -475,7 +568,12 @@ def check_module_metadata_editability(module): ...@@ -475,7 +568,12 @@ def check_module_metadata_editability(module):
if len(illegal_keys) > 0: if len(illegal_keys) > 0:
err_cnt = err_cnt + 1 err_cnt = err_cnt + 1
print ': found non-editable metadata on {0}. These metadata keys are not supported = {1}'. format(module.location.url(), illegal_keys) print(
": found non-editable metadata on {url}. "
"These metadata keys are not supported = {keys}".format(
url=module.location.url(), keys=illegal_keys
)
)
return err_cnt return err_cnt
...@@ -490,7 +588,8 @@ def validate_no_non_editable_metadata(module_store, course_id, category): ...@@ -490,7 +588,8 @@ def validate_no_non_editable_metadata(module_store, course_id, category):
return err_cnt return err_cnt
def validate_category_hierarchy(module_store, course_id, parent_category, expected_child_category): def validate_category_hierarchy(
module_store, course_id, parent_category, expected_child_category):
err_cnt = 0 err_cnt = 0
parents = [] parents = []
...@@ -503,8 +602,14 @@ def validate_category_hierarchy(module_store, course_id, parent_category, expect ...@@ -503,8 +602,14 @@ def validate_category_hierarchy(module_store, course_id, parent_category, expect
for child_loc in [Location(child) for child in parent.children]: for child_loc in [Location(child) for child in parent.children]:
if child_loc.category != expected_child_category: if child_loc.category != expected_child_category:
err_cnt += 1 err_cnt += 1
print 'ERROR: child {0} of parent {1} was expected to be category of {2} but was {3}'.format( print(
child_loc, parent.location, expected_child_category, child_loc.category) "ERROR: child {child} of parent {parent} was expected to be "
"category of {expected} but was {actual}".format(
child=child_loc, parent=parent.location,
expected=expected_child_category,
actual=child_loc.category
)
)
return err_cnt return err_cnt
...@@ -512,8 +617,13 @@ def validate_category_hierarchy(module_store, course_id, parent_category, expect ...@@ -512,8 +617,13 @@ def validate_category_hierarchy(module_store, course_id, parent_category, expect
def validate_data_source_path_existence(path, is_err=True, extra_msg=None): def validate_data_source_path_existence(path, is_err=True, extra_msg=None):
_cnt = 0 _cnt = 0
if not os.path.exists(path): if not os.path.exists(path):
print ("{0}: Expected folder at {1}. {2}".format('ERROR' if is_err == True else 'WARNING', path, extra_msg if print(
extra_msg is not None else '')) "{type}: Expected folder at {path}. {extra}".format(
type='ERROR' if is_err else 'WARNING',
path=path,
extra=extra_msg or "",
)
)
_cnt = 1 _cnt = 1
return _cnt return _cnt
...@@ -524,15 +634,17 @@ def validate_data_source_paths(data_dir, course_dir): ...@@ -524,15 +634,17 @@ def validate_data_source_paths(data_dir, course_dir):
err_cnt = 0 err_cnt = 0
warn_cnt = 0 warn_cnt = 0
err_cnt += validate_data_source_path_existence(course_path / 'static') err_cnt += validate_data_source_path_existence(course_path / 'static')
warn_cnt += validate_data_source_path_existence(course_path / 'static/subs', is_err=False, warn_cnt += validate_data_source_path_existence(
extra_msg='Video captions (if they are used) will not work unless they are static/subs.') course_path / 'static/subs', is_err=False,
extra_msg='Video captions (if they are used) will not work unless they are static/subs.'
)
return err_cnt, warn_cnt return err_cnt, warn_cnt
def validate_course_policy(module_store, course_id): def validate_course_policy(module_store, course_id):
""" """
Validate that the course explicitly sets values for any fields whose defaults may have changed between Validate that the course explicitly sets values for any fields
the export and the import. whose defaults may have changed between the export and the import.
Does not add to error count as these are just warnings. Does not add to error count as these are just warnings.
""" """
...@@ -542,16 +654,25 @@ def validate_course_policy(module_store, course_id): ...@@ -542,16 +654,25 @@ def validate_course_policy(module_store, course_id):
if module.location.category == 'course': if module.location.category == 'course':
if not module._field_data.has(module, 'rerandomize'): if not module._field_data.has(module, 'rerandomize'):
warn_cnt += 1 warn_cnt += 1
print 'WARN: course policy does not specify value for "rerandomize" whose default is now "never". The behavior of your course may change.' print(
'WARN: course policy does not specify value for '
'"rerandomize" whose default is now "never". '
'The behavior of your course may change.'
)
if not module._field_data.has(module, 'showanswer'): if not module._field_data.has(module, 'showanswer'):
warn_cnt += 1 warn_cnt += 1
print 'WARN: course policy does not specify value for "showanswer" whose default is now "finished". The behavior of your course may change.' print(
'WARN: course policy does not specify value for '
'"showanswer" whose default is now "finished". '
'The behavior of your course may change.'
)
return warn_cnt return warn_cnt
def perform_xlint(data_dir, course_dirs, def perform_xlint(
default_class='xmodule.raw_module.RawDescriptor', data_dir, course_dirs,
load_error_modules=True): default_class='xmodule.raw_module.RawDescriptor',
load_error_modules=True):
err_cnt = 0 err_cnt = 0
warn_cnt = 0 warn_cnt = 0
...@@ -581,7 +702,7 @@ def perform_xlint(data_dir, course_dirs, ...@@ -581,7 +702,7 @@ def perform_xlint(data_dir, course_dirs,
for err_log in module_store.errored_courses.itervalues(): for err_log in module_store.errored_courses.itervalues():
for err_log_entry in err_log.errors: for err_log_entry in err_log.errors:
msg = err_log_entry[0] msg = err_log_entry[0]
print msg print(msg)
if msg.startswith('ERROR:'): if msg.startswith('ERROR:'):
err_cnt += 1 err_cnt += 1
else: else:
...@@ -589,33 +710,64 @@ def perform_xlint(data_dir, course_dirs, ...@@ -589,33 +710,64 @@ def perform_xlint(data_dir, course_dirs,
for course_id in module_store.modules.keys(): for course_id in module_store.modules.keys():
# constrain that courses only have 'chapter' children # constrain that courses only have 'chapter' children
err_cnt += validate_category_hierarchy(module_store, course_id, "course", "chapter") err_cnt += validate_category_hierarchy(
module_store, course_id, "course", "chapter"
)
# constrain that chapters only have 'sequentials' # constrain that chapters only have 'sequentials'
err_cnt += validate_category_hierarchy(module_store, course_id, "chapter", "sequential") err_cnt += validate_category_hierarchy(
module_store, course_id, "chapter", "sequential"
)
# constrain that sequentials only have 'verticals' # constrain that sequentials only have 'verticals'
err_cnt += validate_category_hierarchy(module_store, course_id, "sequential", "vertical") err_cnt += validate_category_hierarchy(
# validate the course policy overrides any defaults which have changed over time module_store, course_id, "sequential", "vertical"
)
# validate the course policy overrides any defaults
# which have changed over time
warn_cnt += validate_course_policy(module_store, course_id) warn_cnt += validate_course_policy(module_store, course_id)
# don't allow metadata on verticals, since we can't edit them in studio # don't allow metadata on verticals, since we can't edit them in studio
err_cnt += validate_no_non_editable_metadata(module_store, course_id, "vertical") err_cnt += validate_no_non_editable_metadata(
module_store, course_id, "vertical"
)
# don't allow metadata on chapters, since we can't edit them in studio # don't allow metadata on chapters, since we can't edit them in studio
err_cnt += validate_no_non_editable_metadata(module_store, course_id, "chapter") err_cnt += validate_no_non_editable_metadata(
module_store, course_id, "chapter"
)
# don't allow metadata on sequences that we can't edit # don't allow metadata on sequences that we can't edit
err_cnt += validate_no_non_editable_metadata(module_store, course_id, "sequential") err_cnt += validate_no_non_editable_metadata(
module_store, course_id, "sequential"
)
# check for a presence of a course marketing video # check for a presence of a course marketing video
location_elements = course_id.split('/') location_elements = course_id.split('/')
if Location(['i4x', location_elements[0], location_elements[1], 'about', 'video', None]) not in module_store.modules[course_id]: loc = Location([
print "WARN: Missing course marketing video. It is recommended that every course have a marketing video." 'i4x', location_elements[0], location_elements[1],
'about', 'video', None
])
if loc not in module_store.modules[course_id]:
print(
"WARN: Missing course marketing video. It is recommended "
"that every course have a marketing video."
)
warn_cnt += 1 warn_cnt += 1
print "\n\n------------------------------------------\nVALIDATION SUMMARY: {0} Errors {1} Warnings\n".format(err_cnt, warn_cnt) print("\n")
print("------------------------------------------")
print("VALIDATION SUMMARY: {err} Errors {warn} Warnings".format(
err=err_cnt, warn=warn_cnt)
)
if err_cnt > 0: if err_cnt > 0:
print "This course is not suitable for importing. Please fix courseware according to specifications before importing." print(
"This course is not suitable for importing. Please fix courseware "
"according to specifications before importing."
)
elif warn_cnt > 0: elif warn_cnt > 0:
print "This course can be imported, but some errors may occur during the run of the course. It is recommend that you fix your courseware before importing" print(
"This course can be imported, but some errors may occur "
"during the run of the course. It is recommend that you fix "
"your courseware before importing"
)
else: else:
print "This course can be imported successfully." print("This course can be imported successfully.")
return err_cnt return err_cnt
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment