Commit 4f321897 by chrisndodge

Merge pull request #620 from edx/feature/cdodge/convert-to-portable-links-on-import-and-clone

Feature/cdodge/convert to portable links on import and clone
parents 8b2346fc e23ec4f2
......@@ -644,6 +644,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
content_store = contentstore()
# now do the actual cloning
clone_course(module_store, content_store, source_location, dest_location)
# first assert that all draft content got cloned as well
......@@ -693,6 +694,56 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
expected_children.append(child_loc.url())
self.assertEqual(expected_children, lookup_item.children)
def test_portable_link_rewrites_during_clone_course(self):
course_data = {
'org': 'MITx',
'number': '999',
'display_name': 'Robot Super Course',
'run': '2013_Spring'
}
module_store = modulestore('direct')
draft_store = modulestore('draft')
content_store = contentstore()
import_from_xml(module_store, 'common/test/data/', ['toy'])
source_course_id = 'edX/toy/2012_Fall'
dest_course_id = 'MITx/999/2013_Spring'
source_location = CourseDescriptor.id_to_location(source_course_id)
dest_location = CourseDescriptor.id_to_location(dest_course_id)
# let's force a non-portable link in the clone source
# as a final check, make sure that any non-portable links are rewritten during cloning
html_module_location = Location([
source_location.tag, source_location.org, source_location.course, 'html', 'nonportable'])
html_module = module_store.get_instance(source_location.course_id, html_module_location)
self.assertTrue(isinstance(html_module.data, basestring))
new_data = html_module.data.replace('/static/', '/c4x/{0}/{1}/asset/'.format(
source_location.org, source_location.course))
module_store.update_item(html_module_location, new_data)
html_module = module_store.get_instance(source_location.course_id, html_module_location)
self.assertEqual(new_data, html_module.data)
# create the destination course
resp = self.client.post(reverse('create_new_course'), course_data)
self.assertEqual(resp.status_code, 200)
data = parse_json(resp)
self.assertEqual(data['id'], 'i4x://MITx/999/course/2013_Spring')
# do the actual cloning
clone_course(module_store, content_store, source_location, dest_location)
# make sure that any non-portable links are rewritten during cloning
html_module_location = Location([
dest_location.tag, dest_location.org, dest_location.course, 'html', 'nonportable'])
html_module = module_store.get_instance(dest_location.course_id, html_module_location)
self.assertIn('/static/foo.jpg', html_module.data)
def test_illegal_draft_crud_ops(self):
draft_store = modulestore('draft')
direct_store = modulestore('direct')
......@@ -720,6 +771,22 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
resp = self.client.get('http://localhost:8001/c4x/CDX/123123/asset/&images_circuits_Lab7Solution2.png')
self.assertEqual(resp.status_code, 400)
def test_rewrite_nonportable_links_on_import(self):
module_store = modulestore('direct')
content_store = contentstore()
import_from_xml(module_store, 'common/test/data/', ['toy'], static_content_store=content_store)
# first check a static asset link
html_module_location = Location(['i4x', 'edX', 'toy', 'html', 'nonportable'])
html_module = module_store.get_instance('edX/toy/2012_Fall', html_module_location)
self.assertIn('/static/foo.jpg', html_module.data)
# then check a intra courseware link
html_module_location = Location(['i4x', 'edX', 'toy', 'html', 'nonportable_link'])
html_module = module_store.get_instance('edX/toy/2012_Fall', html_module_location)
self.assertIn('/jump_to_id/nonportable_link', html_module.data)
def test_delete_course(self):
"""
This test will import a course, make a draft item, and delete it. This will also assert that the
......@@ -1384,6 +1451,31 @@ class ContentStoreTest(ModuleStoreTestCase):
json.dumps({'id': del_loc.url()}), "application/json")
self.assertEqual(200, resp.status_code)
def test_import_into_new_course_id(self):
module_store = modulestore('direct')
target_location = Location(['i4x', 'MITx', '999', 'course', '2013_Spring'])
course_data = {
'org': target_location.org,
'number': target_location.course,
'display_name': 'Robot Super Course',
'run': target_location.name
}
resp = self.client.post(reverse('create_new_course'), course_data)
self.assertEqual(resp.status_code, 200)
data = parse_json(resp)
self.assertEqual(data['id'], target_location.url())
import_from_xml(module_store, 'common/test/data/', ['simple'], target_location_namespace=target_location)
modules = module_store.get_items(Location([
target_location.tag, target_location.org, target_location.course, None, None, None]))
# we should have a number of modules in there
# we can't specify an exact number since it'll always be changing
self.assertGreater(len(modules), 10)
def test_import_metadata_with_attempts_empty_string(self):
module_store = modulestore('direct')
import_from_xml(module_store, 'common/test/data/', ['simple'])
......
......@@ -315,6 +315,8 @@ def import_course(request, org, course, name):
create_all_course_groups(request.user, course_items[0].location)
logging.debug('created all course groups at {0}'.format(course_items[0].location))
return HttpResponse(json.dumps({'Status': 'OK'}))
else:
course_module = modulestore().get_item(location)
......
......@@ -150,14 +150,13 @@ DEBUG_TOOLBAR_PANELS = (
'debug_toolbar.panels.sql.SQLDebugPanel',
'debug_toolbar.panels.signals.SignalDebugPanel',
'debug_toolbar.panels.logger.LoggingPanel',
'debug_toolbar_mongo.panel.MongoDebugPanel',
# Enabling the profiler has a weird bug as of django-debug-toolbar==0.9.4 and
# Django=1.3.1/1.4 where requests to views get duplicated (your method gets
# hit twice). So you can uncomment when you need to diagnose performance
# problems, but you shouldn't leave it on.
# 'debug_toolbar.panels.profiling.ProfilingDebugPanel',
)
)
DEBUG_TOOLBAR_CONFIG = {
'INTERCEPT_REDIRECTS': False
......@@ -165,7 +164,7 @@ DEBUG_TOOLBAR_CONFIG = {
# To see stacktraces for MongoDB queries, set this to True.
# Stacktraces slow down page loads drastically (for pages with lots of queries).
DEBUG_TOOLBAR_MONGO_STACKTRACES = True
DEBUG_TOOLBAR_MONGO_STACKTRACES = False
# disable NPS survey in dev mode
MITX_FEATURES['STUDIO_NPS_SURVEY'] = False
......
"""
This configuration is to turn on the Django Toolbar stats for DB access stats, for performance analysis
"""
# We intentionally define lots of variables that aren't used, and
# want to import all variables from base settings files
# pylint: disable=W0401, W0614
from .dev import *
DEBUG_TOOLBAR_PANELS = (
'debug_toolbar.panels.version.VersionDebugPanel',
'debug_toolbar.panels.timer.TimerDebugPanel',
'debug_toolbar.panels.settings_vars.SettingsVarsDebugPanel',
'debug_toolbar.panels.headers.HeaderDebugPanel',
'debug_toolbar.panels.request_vars.RequestVarsDebugPanel',
'debug_toolbar.panels.sql.SQLDebugPanel',
'debug_toolbar.panels.signals.SignalDebugPanel',
'debug_toolbar.panels.logger.LoggingPanel',
'debug_toolbar_mongo.panel.MongoDebugPanel'
# Enabling the profiler has a weird bug as of django-debug-toolbar==0.9.4 and
# Django=1.3.1/1.4 where requests to views get duplicated (your method gets
# hit twice). So you can uncomment when you need to diagnose performance
# problems, but you shouldn't leave it on.
# 'debug_toolbar.panels.profiling.ProfilingDebugPanel',
)
# To see stacktraces for MongoDB queries, set this to True.
# Stacktraces slow down page loads drastically (for pages with lots of queries).
DEBUG_TOOLBAR_MONGO_STACKTRACES = True
import re
from xmodule.contentstore.content import StaticContent
from xmodule.modulestore import Location
from xmodule.modulestore.mongo import MongoModuleStore
......@@ -6,7 +7,105 @@ from xmodule.modulestore.inheritance import own_metadata
import logging
def _clone_modules(modulestore, modules, dest_location):
def _prefix_only_url_replace_regex(prefix):
"""
Match static urls in quotes that don't end in '?raw'.
To anyone contemplating making this more complicated:
http://xkcd.com/1171/
"""
return r"""
(?x) # flags=re.VERBOSE
(?P<quote>\\?['"]) # the opening quotes
(?P<prefix>{prefix}) # the prefix
(?P<rest>.*?) # everything else in the url
(?P=quote) # the first matching closing quote
""".format(prefix=re.escape(prefix))
def _prefix_and_category_url_replace_regex(prefix):
"""
Match static urls in quotes that don't end in '?raw'.
To anyone contemplating making this more complicated:
http://xkcd.com/1171/
"""
return r"""
(?x) # flags=re.VERBOSE
(?P<quote>\\?['"]) # the opening quotes
(?P<prefix>{prefix}) # the prefix
(?P<category>[^/]+)/
(?P<rest>.*?) # everything else in the url
(?P=quote) # the first matching closing quote
""".format(prefix=re.escape(prefix))
def rewrite_nonportable_content_links(source_course_id, dest_course_id, text):
"""
Does a regex replace on non-portable links:
/c4x/<org>/<course>/asset/<name> -> /static/<name>
/jump_to/i4x://<org>/<course>/<category>/<name> -> /jump_to_id/<id>
"""
org, course, run = source_course_id.split("/")
dest_org, dest_course, dest_run = dest_course_id.split("/")
def portable_asset_link_subtitution(match):
quote = match.group('quote')
rest = match.group('rest')
return quote + '/static/' + rest + quote
def portable_jump_to_link_substitution(match):
quote = match.group('quote')
rest = match.group('rest')
return quote + '/jump_to_id/' + rest + quote
def generic_courseware_link_substitution(match):
quote = match.group('quote')
rest = match.group('rest')
dest_generic_courseware_lik_base = '/courses/{org}/{course}/{run}/'.format(
org=dest_org, course=dest_course, run=dest_run
)
return quote + dest_generic_courseware_lik_base + rest + quote
course_location = Location(['i4x', org, course, 'course', run])
# NOTE: ultimately link updating is not a hard requirement, so if something blows up with
# the regex subsitution, log the error and continue
try:
c4x_link_base = '{0}/'.format(StaticContent.get_base_url_path_for_course_assets(course_location))
text = re.sub(_prefix_only_url_replace_regex(c4x_link_base), portable_asset_link_subtitution, text)
except Exception, e:
logging.warning("Error going regex subtituion %r on text = %r.\n\nError msg = %s", c4x_link_base, text, str(e))
try:
jump_to_link_base = '/courses/{org}/{course}/{run}/jump_to/i4x://{org}/{course}/'.format(
org=org, course=course, run=run
)
text = re.sub(_prefix_and_category_url_replace_regex(jump_to_link_base), portable_jump_to_link_substitution, text)
except Exception, e:
logging.warning("Error going regex subtituion %r on text = %r.\n\nError msg = %s", jump_to_link_base, text, str(e))
# Also, there commonly is a set of link URL's used in the format:
# /courses/<org>/<course>/<run> which will be broken if migrated to a different course_id
# so let's rewrite those, but the target will also be non-portable,
#
# Note: we only need to do this if we are changing course-id's
#
if source_course_id != dest_course_id:
try:
generic_courseware_link_base = '/courses/{org}/{course}/{run}/'.format(
org=org, course=course, run=run
)
text = re.sub(_prefix_only_url_replace_regex(generic_courseware_link_base), portable_asset_link_subtitution, text)
except Exception, e:
logging.warning("Error going regex subtituion %r on text = %r.\n\nError msg = %s", generic_courseware_link_base, text, str(e))
return text
def _clone_modules(modulestore, modules, source_location, dest_location):
for module in modules:
original_loc = Location(module.location)
......@@ -21,7 +120,12 @@ def _clone_modules(modulestore, modules, dest_location):
print "Cloning module {0} to {1}....".format(original_loc, module.location)
# NOTE: usage of the the internal module.xblock_kvs._data does not include any 'default' values for the fields
modulestore.update_item(module.location, module.xblock_kvs._data)
data = module.xblock_kvs._data
if isinstance(data, basestring):
data = rewrite_nonportable_content_links(
source_location.course_id, dest_location.course_id, data)
modulestore.update_item(module.location, data)
# repoint children
if module.has_children:
......@@ -73,10 +177,10 @@ def clone_course(modulestore, contentstore, source_location, dest_location, dele
# Get all modules under this namespace which is (tag, org, course) tuple
modules = modulestore.get_items([source_location.tag, source_location.org, source_location.course, None, None, None])
_clone_modules(modulestore, modules, dest_location)
_clone_modules(modulestore, modules, source_location, dest_location)
modules = modulestore.get_items([source_location.tag, source_location.org, source_location.course, None, None, 'draft'])
_clone_modules(modulestore, modules, dest_location)
_clone_modules(modulestore, modules, source_location, dest_location)
# now iterate through all of the assets and clone them
# first the thumbnails
......
......@@ -10,6 +10,7 @@ from xmodule.modulestore import Location
from xmodule.contentstore.content import StaticContent
from .inheritance import own_metadata
from xmodule.errortracker import make_error_tracker
from .store_utilities import rewrite_nonportable_content_links
log = logging.getLogger(__name__)
......@@ -60,54 +61,6 @@ def import_static_content(modules, course_loc, course_data_path, static_content_
return remap_dict
def import_module_from_xml(modulestore, static_content_store, course_data_path, module, target_location_namespace=None, verbose=False):
# remap module to the new namespace
if target_location_namespace is not None:
# This looks a bit wonky as we need to also change the 'name' of the imported course to be what
# the caller passed in
if module.location.category != 'course':
module.location = module.location._replace(tag=target_location_namespace.tag, org=target_location_namespace.org,
course=target_location_namespace.course)
else:
module.location = module.location._replace(tag=target_location_namespace.tag, org=target_location_namespace.org,
course=target_location_namespace.course, name=target_location_namespace.name)
# then remap children pointers since they too will be re-namespaced
if module.has_children:
children_locs = module.children
new_locs = []
for child in children_locs:
child_loc = Location(child)
new_child_loc = child_loc._replace(tag=target_location_namespace.tag, org=target_location_namespace.org,
course=target_location_namespace.course)
new_locs.append(new_child_loc.url())
module.children = new_locs
if hasattr(module, 'data'):
modulestore.update_item(module.location, module.data)
if module.has_children:
modulestore.update_children(module.location, module.children)
modulestore.update_metadata(module.location, own_metadata(module))
def import_course_from_xml(modulestore, static_content_store, course_data_path, module, target_location_namespace=None, verbose=False):
# cdodge: more hacks (what else). Seems like we have a problem when importing a course (like 6.002) which
# does not have any tabs defined in the policy file. The import goes fine and then displays fine in LMS,
# but if someone tries to add a new tab in the CMS, then the LMS barfs because it expects that -
# if there is *any* tabs - then there at least needs to be some predefined ones
if module.tabs is None or len(module.tabs) == 0:
module.tabs = [{"type": "courseware"},
{"type": "course_info", "name": "Course Info"},
{"type": "discussion", "name": "Discussion"},
{"type": "wiki", "name": "Wiki"}] # note, add 'progress' when we can support it on Edge
import_module_from_xml(modulestore, static_content_store, course_data_path, module, target_location_namespace, verbose=verbose)
def import_from_xml(store, data_dir, course_dirs=None,
default_class='xmodule.raw_module.RawDescriptor',
load_error_modules=True, static_content_store=None, target_location_namespace=None,
......@@ -175,7 +128,8 @@ def import_from_xml(store, data_dir, course_dirs=None,
{"type": "discussion", "name": "Discussion"},
{"type": "wiki", "name": "Wiki"}] # note, add 'progress' when we can support it on Edge
import_module(module, store, course_data_path, static_content_store)
import_module(module, store, course_data_path, static_content_store, course_location,
target_location_namespace or course_location)
course_items.append(module)
......@@ -189,7 +143,6 @@ def import_from_xml(store, data_dir, course_dirs=None,
# finally loop through all the modules
for module in xml_module_store.modules[course_id].itervalues():
if module.category == 'course':
# we've already saved the course module up at the top of the loop
# so just skip over it in the inner loop
......@@ -202,25 +155,31 @@ def import_from_xml(store, data_dir, course_dirs=None,
if verbose:
log.debug('importing module location {0}'.format(module.location))
import_module(module, store, course_data_path, static_content_store)
import_module(module, store, course_data_path, static_content_store, course_location,
target_location_namespace if target_location_namespace else course_location)
# now import any 'draft' items
if draft_store is not None:
import_course_draft(xml_module_store, store, draft_store, course_data_path,
static_content_store, target_location_namespace if target_location_namespace is not None
static_content_store, course_location, target_location_namespace if target_location_namespace
else course_location)
finally:
# turn back on all write signalling
if pseudo_course_id in store.ignore_write_events_on_courses:
store.ignore_write_events_on_courses.remove(pseudo_course_id)
store.refresh_cached_metadata_inheritance_tree(target_location_namespace if
target_location_namespace is not None else course_location)
store.refresh_cached_metadata_inheritance_tree(
target_location_namespace if target_location_namespace is not None else course_location
)
return xml_module_store, course_items
def import_module(module, store, course_data_path, static_content_store, allow_not_found=False):
def import_module(module, store, course_data_path, static_content_store,
source_course_location, dest_course_location, allow_not_found=False):
logging.debug('processing import of module {0}...'.format(module.location.url()))
content = {}
for field in module.fields:
if field.scope != Scope.content:
......@@ -237,6 +196,12 @@ def import_module(module, store, course_data_path, static_content_store, allow_n
else:
module_data = content
if isinstance(module_data, basestring):
# we want to convert all 'non-portable' links in the module_data (if it is a string) to
# portable strings (e.g. /static/)
module_data = rewrite_nonportable_content_links(
source_course_location.course_id, dest_course_location.course_id, module_data)
if allow_not_found:
store.update_item(module.location, module_data, allow_not_found=allow_not_found)
else:
......@@ -250,7 +215,7 @@ def import_module(module, store, course_data_path, static_content_store, allow_n
store.update_metadata(module.location, dict(own_metadata(module)))
def import_course_draft(xml_module_store, store, draft_store, course_data_path, static_content_store, target_location_namespace):
def import_course_draft(xml_module_store, store, draft_store, course_data_path, static_content_store, source_location_namespace, target_location_namespace):
'''
This will import all the content inside of the 'drafts' folder, if it exists
NOTE: This is not a full course import, basically in our current application only verticals (and downwards)
......@@ -307,7 +272,8 @@ def import_course_draft(xml_module_store, store, draft_store, course_data_path,
del module.xml_attributes['parent_sequential_url']
del module.xml_attributes['index_in_children_list']
import_module(module, draft_store, course_data_path, static_content_store, allow_not_found=True)
import_module(module, draft_store, course_data_path, static_content_store,
source_location_namespace, target_location_namespace, allow_not_found=True)
for child in module.get_children():
_import_module(child)
......@@ -524,3 +490,57 @@ def perform_xlint(data_dir, course_dirs,
print "This course can be imported successfully."
return err_cnt
#
# UNSURE IF THIS IS UNUSED CODE - IF SO NEEDS TO BE PRUNED. TO BE INVESTIGATED.
#
def import_module_from_xml(modulestore, static_content_store, course_data_path, module, target_location_namespace=None, verbose=False):
# remap module to the new namespace
if target_location_namespace is not None:
# This looks a bit wonky as we need to also change the 'name' of the imported course to be what
# the caller passed in
if module.location.category != 'course':
module.location = module.location._replace(tag=target_location_namespace.tag, org=target_location_namespace.org,
course=target_location_namespace.course)
else:
module.location = module.location._replace(tag=target_location_namespace.tag, org=target_location_namespace.org,
course=target_location_namespace.course, name=target_location_namespace.name)
# then remap children pointers since they too will be re-namespaced
if module.has_children:
children_locs = module.children
new_locs = []
for child in children_locs:
child_loc = Location(child)
new_child_loc = child_loc._replace(tag=target_location_namespace.tag, org=target_location_namespace.org,
course=target_location_namespace.course)
new_locs.append(new_child_loc.url())
module.children = new_locs
if hasattr(module, 'data'):
modulestore.update_item(module.location, module.data)
if module.has_children:
modulestore.update_children(module.location, module.children)
modulestore.update_metadata(module.location, own_metadata(module))
def import_course_from_xml(modulestore, static_content_store, course_data_path, module, target_location_namespace=None, verbose=False):
# CDODGE: Is this unused code (along with import_module_from_xml)? I can't find any references to it. If so, then
# we need to delete this apparently duplicate code.
# cdodge: more hacks (what else). Seems like we have a problem when importing a course (like 6.002) which
# does not have any tabs defined in the policy file. The import goes fine and then displays fine in LMS,
# but if someone tries to add a new tab in the CMS, then the LMS barfs because it expects that -
# if there is *any* tabs - then there at least needs to be some predefined ones
if module.tabs is None or len(module.tabs) == 0:
module.tabs = [{"type": "courseware"},
{"type": "course_info", "name": "Course Info"},
{"type": "discussion", "name": "Discussion"},
{"type": "wiki", "name": "Wiki"}] # note, add 'progress' when we can support it on Edge
import_module_from_xml(modulestore, static_content_store, course_data_path, module, target_location_namespace, verbose=verbose)
......@@ -5,6 +5,8 @@
<html url_name="secret:toylab"/>
<html url_name="toyjumpto"/>
<html url_name="toyhtml"/>
<html url_name="nonportable"/>
<html url_name="nonportable_link"/>
<video url_name="Video_Resources" youtube_id_1_0="1bK-WdDi6Qw" display_name="Video Resources"/>
</videosequence>
<video url_name="Welcome" youtube_id_1_0="p2Q6BrNhdh8" display_name="Welcome"/>
......
<a href="/c4x/edX/toy/asset/foo.jpg">link</a>
<html filename="nonportable.html"/>
\ No newline at end of file
<a href="/courses/edX/toy/2012_Fall/jump_to/i4x://edX/toy/html/nonportable_link">link</a>
<html filename="nonportable_link.html"/>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment