Commit a55724d1 by Calen Pennington

Prevent unicode data from being injected into a Location using the _replace function

parent 46109bb3
...@@ -40,6 +40,21 @@ INVALID_HTML_CHARS = re.compile(r"[^\w-]") ...@@ -40,6 +40,21 @@ INVALID_HTML_CHARS = re.compile(r"[^\w-]")
_LocationBase = namedtuple('LocationBase', 'tag org course category name revision') _LocationBase = namedtuple('LocationBase', 'tag org course category name revision')
def _check_location_part(val, regexp):
"""
Check that `regexp` doesn't match inside `val`. If it does, raise an exception
Args:
val (string): The value to check
regexp (re.RegexObject): The regular expression specifying invalid characters
Raises:
InvalidLocationError: Raised if any invalid character is found in `val`
"""
if val is not None and regexp.search(val) is not None:
raise InvalidLocationError("Invalid characters in {!r}.".format(val))
class Location(_LocationBase): class Location(_LocationBase):
''' '''
Encodes a location. Encodes a location.
...@@ -145,7 +160,6 @@ class Location(_LocationBase): ...@@ -145,7 +160,6 @@ class Location(_LocationBase):
Components may be set to None, which may be interpreted in some contexts Components may be set to None, which may be interpreted in some contexts
to mean wildcard selection. to mean wildcard selection.
""" """
if (org is None and course is None and category is None and name is None and revision is None): if (org is None and course is None and category is None and name is None and revision is None):
location = loc_or_tag location = loc_or_tag
else: else:
...@@ -161,23 +175,18 @@ class Location(_LocationBase): ...@@ -161,23 +175,18 @@ class Location(_LocationBase):
check_list(list_) check_list(list_)
def check_list(list_): def check_list(list_):
def check(val, regexp):
if val is not None and regexp.search(val) is not None:
log.debug('invalid characters val=%r, list_=%r', val, list_)
raise InvalidLocationError("Invalid characters in {!r}.".format(val))
list_ = list(list_) list_ = list(list_)
for val in list_[:4] + [list_[5]]: for val in list_[:4] + [list_[5]]:
check(val, INVALID_CHARS) _check_location_part(val, INVALID_CHARS)
# names allow colons # names allow colons
check(list_[4], INVALID_CHARS_NAME) _check_location_part(list_[4], INVALID_CHARS_NAME)
if isinstance(location, Location): if isinstance(location, Location):
return location return location
elif isinstance(location, basestring): elif isinstance(location, basestring):
match = URL_RE.match(location) match = URL_RE.match(location)
if match is None: if match is None:
log.debug("location %r doesn't match URL" % location) log.debug("location %r doesn't match URL", location)
raise InvalidLocationError(location) raise InvalidLocationError(location)
groups = match.groupdict() groups = match.groupdict()
check_dict(groups) check_dict(groups)
...@@ -249,6 +258,18 @@ class Location(_LocationBase): ...@@ -249,6 +258,18 @@ class Location(_LocationBase):
return "/".join([self.org, self.course, self.name]) return "/".join([self.org, self.course, self.name])
def _replace(self, **kwargs):
"""
Return a new :class:`Location` with values replaced
by the values specified in `**kwargs`
"""
for name, value in kwargs.iteritems():
if name == 'name':
_check_location_part(value, INVALID_CHARS_NAME)
else:
_check_location_part(value, INVALID_CHARS)
return super(Location, self)._replace(**kwargs)
def replace(self, **kwargs): def replace(self, **kwargs):
''' '''
Expose a public method for replacing location elements Expose a public method for replacing location elements
......
...@@ -205,11 +205,11 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem): ...@@ -205,11 +205,11 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
descriptor.data_dir = course_dir descriptor.data_dir = course_dir
xmlstore.modules[course_id][descriptor.location] = descriptor xmlstore.modules[course_id][descriptor.scope_ids.usage_id] = descriptor
if descriptor.has_children: if descriptor.has_children:
for child in descriptor.get_children(): for child in descriptor.get_children():
parent_tracker.add_parent(child.location, descriptor.location) parent_tracker.add_parent(child.scope_ids.usage_id, descriptor.scope_ids.usage_id)
# After setting up the descriptor, save any changes that we have # After setting up the descriptor, save any changes that we have
# made to attributes on the descriptor to the underlying KeyValueStore. # made to attributes on the descriptor to the underlying KeyValueStore.
...@@ -412,8 +412,8 @@ class XMLModuleStore(ModuleStoreReadBase): ...@@ -412,8 +412,8 @@ class XMLModuleStore(ModuleStoreReadBase):
if course_descriptor is not None and not isinstance(course_descriptor, ErrorDescriptor): if course_descriptor is not None and not isinstance(course_descriptor, ErrorDescriptor):
self.courses[course_dir] = course_descriptor self.courses[course_dir] = course_descriptor
self._location_errors[course_descriptor.location] = errorlog self._location_errors[course_descriptor.scope_ids.usage_id] = errorlog
self.parent_trackers[course_descriptor.id].make_known(course_descriptor.location) self.parent_trackers[course_descriptor.id].make_known(course_descriptor.scope_ids.usage_id)
else: else:
# Didn't load course. Instead, save the errors elsewhere. # Didn't load course. Instead, save the errors elsewhere.
self.errored_courses[course_dir] = errorlog self.errored_courses[course_dir] = errorlog
...@@ -570,7 +570,7 @@ class XMLModuleStore(ModuleStoreReadBase): ...@@ -570,7 +570,7 @@ class XMLModuleStore(ModuleStoreReadBase):
html = f.read().decode('utf-8') html = f.read().decode('utf-8')
# tabs are referenced in policy.json through a 'slug' which is just the filename without the .html suffix # tabs are referenced in policy.json through a 'slug' which is just the filename without the .html suffix
slug = os.path.splitext(os.path.basename(filepath))[0] slug = os.path.splitext(os.path.basename(filepath))[0]
loc = Location('i4x', course_descriptor.location.org, course_descriptor.location.course, category, slug) loc = course_descriptor.scope_ids.usage_id._replace(category=category, name=slug)
module = system.construct_xblock_from_class( module = system.construct_xblock_from_class(
HtmlDescriptor, HtmlDescriptor,
# We're loading a descriptor, so student_id is meaningless # We're loading a descriptor, so student_id is meaningless
...@@ -588,7 +588,7 @@ class XMLModuleStore(ModuleStoreReadBase): ...@@ -588,7 +588,7 @@ class XMLModuleStore(ModuleStoreReadBase):
module.display_name = tab['name'] module.display_name = tab['name']
module.data_dir = course_dir module.data_dir = course_dir
module.save() module.save()
self.modules[course_descriptor.id][module.location] = module self.modules[course_descriptor.id][module.scope_ids.usage_id] = module
except Exception, e: except Exception, e:
logging.exception("Failed to load %s. Skipping... \ logging.exception("Failed to load %s. Skipping... \
Exception: %s", filepath, unicode(e)) Exception: %s", filepath, unicode(e))
......
...@@ -310,9 +310,7 @@ def import_module( ...@@ -310,9 +310,7 @@ def import_module(
source_course_location, dest_course_location, allow_not_found=False, source_course_location, dest_course_location, allow_not_found=False,
do_import_static=True): do_import_static=True):
logging.debug('processing import of module {url}...'.format( logging.debug('processing import of module {}...'.format(module.location.url()))
url=module.location.url()
))
content = {} content = {}
for field in module.fields.values(): for field in module.fields.values():
......
...@@ -93,7 +93,6 @@ transifex-client==0.9.1 ...@@ -93,7 +93,6 @@ transifex-client==0.9.1
# Used for testing # Used for testing
coverage==3.7 coverage==3.7
ddt==0.4.0
factory_boy==2.2.1 factory_boy==2.2.1
mock==1.0.1 mock==1.0.1
nosexcover==1.0.7 nosexcover==1.0.7
......
...@@ -22,3 +22,6 @@ ...@@ -22,3 +22,6 @@
-e git+https://github.com/edx/django-waffle.git@823a102e48#egg=django-waffle -e git+https://github.com/edx/django-waffle.git@823a102e48#egg=django-waffle
-e git+https://github.com/edx/event-tracking.git@f0211d702d#egg=event-tracking -e git+https://github.com/edx/event-tracking.git@f0211d702d#egg=event-tracking
-e git+https://github.com/edx/bok-choy.git@bc6f1adbe439618162079f1004b2b3db3b6f8916#egg=bok_choy -e git+https://github.com/edx/bok-choy.git@bc6f1adbe439618162079f1004b2b3db3b6f8916#egg=bok_choy
# Move back to upstream release once https://github.com/txels/ddt/pull/13 is merged
-e git+https://github.com/edx/ddt.git@9e8010b8777aa40b848fdb76de6e60081616325a#egg=ddt
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment