Prevent unicode data from being injected into a Location using the _replace function

a55724d1 · Calen Pennington · 46109bb3 · a55724d1 · a55724d1 · a55724d1
Commit a55724d1 authored Jan 14, 2014 by Calen Pennington
6 changed files
--- a/common/lib/xmodule/xmodule/modulestore/__init__.py
+++ b/common/lib/xmodule/xmodule/modulestore/__init__.py
@@ -40,6 +40,21 @@ INVALID_HTML_CHARS = re.compile(r"[^\w-]")
 _LocationBase = namedtuple('LocationBase', 'tag org course category name revision')


+def _check_location_part(val, regexp):
+    """
+    Check that `regexp` doesn't match inside `val`. If it does, raise an exception
+
+    Args:
+        val (string): The value to check
+        regexp (re.RegexObject): The regular expression specifying invalid characters
+
+    Raises:
+        InvalidLocationError: Raised if any invalid character is found in `val`
+    """
+    if val is not None and regexp.search(val) is not None:
+        raise InvalidLocationError("Invalid characters in {!r}.".format(val))
+
+
 class Location(_LocationBase):
    '''
    Encodes a location.
@@ -145,7 +160,6 @@ class Location(_LocationBase):
        Components may be set to None, which may be interpreted in some contexts
        to mean wildcard selection.
        """
-
        if (org is None and course is None and category is None and name is None and revision is None):
            location = loc_or_tag
        else:
@@ -161,23 +175,18 @@ class Location(_LocationBase):
            check_list(list_)

        def check_list(list_):
-            def check(val, regexp):
-                if val is not None and regexp.search(val) is not None:
-                    log.debug('invalid characters val=%r, list_=%r', val, list_)
-                    raise InvalidLocationError("Invalid characters in {!r}.".format(val))
-
            list_ = list(list_)
            for val in list_[:4] + [list_[5]]:
-                check(val, INVALID_CHARS)
+                _check_location_part(val, INVALID_CHARS)
            # names allow colons
-            check(list_[4], INVALID_CHARS_NAME)
+            _check_location_part(list_[4], INVALID_CHARS_NAME)

        if isinstance(location, Location):
            return location
        elif isinstance(location, basestring):
            match = URL_RE.match(location)
            if match is None:
-                log.debug("location %r doesn't match URL" % location)
+                log.debug("location %r doesn't match URL", location)
                raise InvalidLocationError(location)
            groups = match.groupdict()
            check_dict(groups)
@@ -249,6 +258,18 @@ class Location(_LocationBase):

        return "/".join([self.org, self.course, self.name])

+    def _replace(self, **kwargs):
+        """
+        Return a new :class:`Location` with values replaced
+        by the values specified in `**kwargs`
+        """
+        for name, value in kwargs.iteritems():
+            if name == 'name':
+                _check_location_part(value, INVALID_CHARS_NAME)
+            else:
+                _check_location_part(value, INVALID_CHARS)
+        return super(Location, self)._replace(**kwargs)
+
    def replace(self, **kwargs):
        '''
        Expose a public method for replacing location elements

--- a/common/lib/xmodule/xmodule/modulestore/tests/test_location.py
+++ b/common/lib/xmodule/xmodule/modulestore/tests/test_location.py
--- a/common/lib/xmodule/xmodule/modulestore/xml.py
+++ b/common/lib/xmodule/xmodule/modulestore/xml.py
@@ -205,11 +205,11 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):

            descriptor.data_dir = course_dir

-            xmlstore.modules[course_id][descriptor.location] = descriptor
+            xmlstore.modules[course_id][descriptor.scope_ids.usage_id] = descriptor

            if descriptor.has_children:
                for child in descriptor.get_children():
-                    parent_tracker.add_parent(child.location, descriptor.location)
+                    parent_tracker.add_parent(child.scope_ids.usage_id, descriptor.scope_ids.usage_id)

            # After setting up the descriptor, save any changes that we have
            # made to attributes on the descriptor to the underlying KeyValueStore.
@@ -412,8 +412,8 @@ class XMLModuleStore(ModuleStoreReadBase):

        if course_descriptor is not None and not isinstance(course_descriptor, ErrorDescriptor):
            self.courses[course_dir] = course_descriptor
-            self._location_errors[course_descriptor.location] = errorlog
-            self.parent_trackers[course_descriptor.id].make_known(course_descriptor.location)
+            self._location_errors[course_descriptor.scope_ids.usage_id] = errorlog
+            self.parent_trackers[course_descriptor.id].make_known(course_descriptor.scope_ids.usage_id)
        else:
            # Didn't load course.  Instead, save the errors elsewhere.
            self.errored_courses[course_dir] = errorlog
@@ -570,7 +570,7 @@ class XMLModuleStore(ModuleStoreReadBase):
                    html = f.read().decode('utf-8')
                    # tabs are referenced in policy.json through a 'slug' which is just the filename without the .html suffix
                    slug = os.path.splitext(os.path.basename(filepath))[0]
-                    loc = Location('i4x', course_descriptor.location.org, course_descriptor.location.course, category, slug)
+                    loc = course_descriptor.scope_ids.usage_id._replace(category=category, name=slug)
                    module = system.construct_xblock_from_class(
                        HtmlDescriptor,
                        # We're loading a descriptor, so student_id is meaningless
@@ -588,7 +588,7 @@ class XMLModuleStore(ModuleStoreReadBase):
                                module.display_name = tab['name']
                    module.data_dir = course_dir
                    module.save()
-                    self.modules[course_descriptor.id][module.location] = module
+                    self.modules[course_descriptor.id][module.scope_ids.usage_id] = module
                except Exception, e:
                    logging.exception("Failed to load %s. Skipping... \
                            Exception: %s", filepath, unicode(e))

--- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py
+++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py
@@ -310,9 +310,7 @@ def import_module(
        source_course_location, dest_course_location, allow_not_found=False,
        do_import_static=True):

-    logging.debug('processing import of module {url}...'.format(
-        url=module.location.url()
-    ))
+    logging.debug('processing import of module {}...'.format(module.location.url()))

    content = {}
    for field in module.fields.values():

--- a/requirements/edx/base.txt
+++ b/requirements/edx/base.txt
@@ -93,7 +93,6 @@ transifex-client==0.9.1

 # Used for testing
 coverage==3.7
-ddt==0.4.0
 factory_boy==2.2.1
 mock==1.0.1
 nosexcover==1.0.7

--- a/requirements/edx/github.txt
+++ b/requirements/edx/github.txt
@@ -22,3 +22,6 @@
 -e git+https://github.com/edx/django-waffle.git@823a102e48#egg=django-waffle
 -e git+https://github.com/edx/event-tracking.git@f0211d702d#egg=event-tracking
 -e git+https://github.com/edx/bok-choy.git@bc6f1adbe439618162079f1004b2b3db3b6f8916#egg=bok_choy
+
+# Move back to upstream release once https://github.com/txels/ddt/pull/13 is merged
+-e git+https://github.com/edx/ddt.git@9e8010b8777aa40b848fdb76de6e60081616325a#egg=ddt
\ No newline at end of file