Commit a55724d1 by Calen Pennington

Prevent unicode data from being injected into a Location using the _replace function

parent 46109bb3
......@@ -40,6 +40,21 @@ INVALID_HTML_CHARS = re.compile(r"[^\w-]")
_LocationBase = namedtuple('LocationBase', 'tag org course category name revision')
def _check_location_part(val, regexp):
"""
Check that `regexp` doesn't match inside `val`. If it does, raise an exception
Args:
val (string): The value to check
regexp (re.RegexObject): The regular expression specifying invalid characters
Raises:
InvalidLocationError: Raised if any invalid character is found in `val`
"""
if val is not None and regexp.search(val) is not None:
raise InvalidLocationError("Invalid characters in {!r}.".format(val))
class Location(_LocationBase):
'''
Encodes a location.
......@@ -145,7 +160,6 @@ class Location(_LocationBase):
Components may be set to None, which may be interpreted in some contexts
to mean wildcard selection.
"""
if (org is None and course is None and category is None and name is None and revision is None):
location = loc_or_tag
else:
......@@ -161,23 +175,18 @@ class Location(_LocationBase):
check_list(list_)
def check_list(list_):
def check(val, regexp):
if val is not None and regexp.search(val) is not None:
log.debug('invalid characters val=%r, list_=%r', val, list_)
raise InvalidLocationError("Invalid characters in {!r}.".format(val))
list_ = list(list_)
for val in list_[:4] + [list_[5]]:
check(val, INVALID_CHARS)
_check_location_part(val, INVALID_CHARS)
# names allow colons
check(list_[4], INVALID_CHARS_NAME)
_check_location_part(list_[4], INVALID_CHARS_NAME)
if isinstance(location, Location):
return location
elif isinstance(location, basestring):
match = URL_RE.match(location)
if match is None:
log.debug("location %r doesn't match URL" % location)
log.debug("location %r doesn't match URL", location)
raise InvalidLocationError(location)
groups = match.groupdict()
check_dict(groups)
......@@ -249,6 +258,18 @@ class Location(_LocationBase):
return "/".join([self.org, self.course, self.name])
def _replace(self, **kwargs):
"""
Return a new :class:`Location` with values replaced
by the values specified in `**kwargs`
"""
for name, value in kwargs.iteritems():
if name == 'name':
_check_location_part(value, INVALID_CHARS_NAME)
else:
_check_location_part(value, INVALID_CHARS)
return super(Location, self)._replace(**kwargs)
def replace(self, **kwargs):
'''
Expose a public method for replacing location elements
......
......@@ -205,11 +205,11 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
descriptor.data_dir = course_dir
xmlstore.modules[course_id][descriptor.location] = descriptor
xmlstore.modules[course_id][descriptor.scope_ids.usage_id] = descriptor
if descriptor.has_children:
for child in descriptor.get_children():
parent_tracker.add_parent(child.location, descriptor.location)
parent_tracker.add_parent(child.scope_ids.usage_id, descriptor.scope_ids.usage_id)
# After setting up the descriptor, save any changes that we have
# made to attributes on the descriptor to the underlying KeyValueStore.
......@@ -412,8 +412,8 @@ class XMLModuleStore(ModuleStoreReadBase):
if course_descriptor is not None and not isinstance(course_descriptor, ErrorDescriptor):
self.courses[course_dir] = course_descriptor
self._location_errors[course_descriptor.location] = errorlog
self.parent_trackers[course_descriptor.id].make_known(course_descriptor.location)
self._location_errors[course_descriptor.scope_ids.usage_id] = errorlog
self.parent_trackers[course_descriptor.id].make_known(course_descriptor.scope_ids.usage_id)
else:
# Didn't load course. Instead, save the errors elsewhere.
self.errored_courses[course_dir] = errorlog
......@@ -570,7 +570,7 @@ class XMLModuleStore(ModuleStoreReadBase):
html = f.read().decode('utf-8')
# tabs are referenced in policy.json through a 'slug' which is just the filename without the .html suffix
slug = os.path.splitext(os.path.basename(filepath))[0]
loc = Location('i4x', course_descriptor.location.org, course_descriptor.location.course, category, slug)
loc = course_descriptor.scope_ids.usage_id._replace(category=category, name=slug)
module = system.construct_xblock_from_class(
HtmlDescriptor,
# We're loading a descriptor, so student_id is meaningless
......@@ -588,7 +588,7 @@ class XMLModuleStore(ModuleStoreReadBase):
module.display_name = tab['name']
module.data_dir = course_dir
module.save()
self.modules[course_descriptor.id][module.location] = module
self.modules[course_descriptor.id][module.scope_ids.usage_id] = module
except Exception, e:
logging.exception("Failed to load %s. Skipping... \
Exception: %s", filepath, unicode(e))
......
......@@ -310,9 +310,7 @@ def import_module(
source_course_location, dest_course_location, allow_not_found=False,
do_import_static=True):
logging.debug('processing import of module {url}...'.format(
url=module.location.url()
))
logging.debug('processing import of module {}...'.format(module.location.url()))
content = {}
for field in module.fields.values():
......
......@@ -93,7 +93,6 @@ transifex-client==0.9.1
# Used for testing
coverage==3.7
ddt==0.4.0
factory_boy==2.2.1
mock==1.0.1
nosexcover==1.0.7
......
......@@ -22,3 +22,6 @@
-e git+https://github.com/edx/django-waffle.git@823a102e48#egg=django-waffle
-e git+https://github.com/edx/event-tracking.git@f0211d702d#egg=event-tracking
-e git+https://github.com/edx/bok-choy.git@bc6f1adbe439618162079f1004b2b3db3b6f8916#egg=bok_choy
# Move back to upstream release once https://github.com/txels/ddt/pull/13 is merged
-e git+https://github.com/edx/ddt.git@9e8010b8777aa40b848fdb76de6e60081616325a#egg=ddt
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment