Merge pull request #1386 from edx/dhm/bug-courseid

Allow - ~ and . in course, branch, & block ids

Merge pull request #1386 from edx/dhm/bug-courseid
Allow - ~ and . in course, branch, & block ids
2678f2a1 · Don Mitchell · 4b225de3 · 9fc5a8a4 · 2678f2a1 · 2678f2a1
Commit 2678f2a1 authored Oct 17, 2013 by Don Mitchell
Show whitespace changes
Inline Side-by-side

Showing with 18 additions and 27 deletions

common/lib/xmodule/xmodule/modulestore/parsers.py
+12 -19

common/lib/xmodule/xmodule/modulestore/tests/test_locators.py
+6 -8

No files found.
--- a/common/lib/xmodule/xmodule/modulestore/parsers.py
+++ b/common/lib/xmodule/xmodule/modulestore/parsers.py
@@ -10,7 +10,7 @@ VERSION_PREFIX = "/version/"
 URL_VERSION_PREFIX = 'version/'
 URL_RE = re.compile(r'^(edx://)?(.+)$', re.IGNORECASE)
+ALLOWED_ID_CHARS = r'[a-zA-Z0-9_\-~.]'
 def parse_url(string, tag_optional=False):
    """
@@ -45,17 +45,12 @@ def parse_url(string, tag_optional=False):
    return parse_course_id(path)
-BLOCK_RE = re.compile(r'^\w+$', re.IGNORECASE)
+BLOCK_RE = re.compile(r'^' + ALLOWED_ID_CHARS + r'+$', re.IGNORECASE)
 def parse_block_ref(string):
    r"""
-    A block_ref is a string of word_chars.
+    A block_ref is a string of url safe characters (see ALLOWED_ID_CHARS)
-    <word_chars> matches one or more Unicode word characters; this includes most
-    characters that can be part of a word in any language, as well as numbers
-    and the underscore. (see definition of \w in python regular expressions,
-    at http://docs.python.org/dev/library/re.html)
    If string is a block_ref, returns a dict with key 'block_ref' and the value,
    otherwise returns None.
@@ -65,7 +60,10 @@ def parse_block_ref(string):
    return None
-GUID_RE = re.compile(r'^(?P<version_guid>[A-F0-9]+)(' + BLOCK_PREFIX + '(?P<block>\w+))?$', re.IGNORECASE)
+GUID_RE = re.compile(
+    r'^(?P<version_guid>[A-F0-9]+)(' + BLOCK_PREFIX + '(?P<block>' + ALLOWED_ID_CHARS + r'+))?$',
+    re.IGNORECASE
+)
 def parse_guid(string):
@@ -83,10 +81,10 @@ def parse_guid(string):
 COURSE_ID_RE = re.compile(
-    r'^(?P<id>(\w+)(\.\w+\w*)*)(' +
+    r'^(?P<id>' + ALLOWED_ID_CHARS + r'+)(' +
-    BRANCH_PREFIX + '(?P<branch>\w+))?(' +
+    BRANCH_PREFIX + r'(?P<branch>' + ALLOWED_ID_CHARS + r'+))?(' +
-    VERSION_PREFIX + '(?P<version_guid>[A-F0-9]+))?(' +
+    VERSION_PREFIX + r'(?P<version_guid>[A-F0-9]+))?(' +
-    BLOCK_PREFIX + '(?P<block>\w+))?$', re.IGNORECASE
+    BLOCK_PREFIX + r'(?P<block>' + ALLOWED_ID_CHARS + r'+))?$', re.IGNORECASE
 )
@@ -117,12 +115,7 @@ def parse_course_id(string):
      block = name
-      name = <word_chars>
+      name = ALLOWED_ID_CHARS
-    <word_chars> matches one or more Unicode word characters; this includes most
-    characters that can be part of a word in any language, as well as numbers
-    and the underscore. (see definition of \w in python regular expressions,
-    at http://docs.python.org/dev/library/re.html)
    If string is a course_id, returns a dict with keys 'id', 'branch', and 'block'.
    Revision is optional: if missing returned_dict['branch'] is None.

--- a/common/lib/xmodule/xmodule/modulestore/tests/test_locators.py
+++ b/common/lib/xmodule/xmodule/modulestore/tests/test_locators.py
@@ -75,15 +75,13 @@ class LocatorTest(TestCase):
        """
        Test all sorts of badly-formed course_ids (and urls with those course_ids)
        """
-        for bad_id in ('mit.',
+        for bad_id in (' mit.eecs',
-                       ' mit.eecs',
                       'mit.eecs ',
                       URL_VERSION_PREFIX + 'mit.eecs',
                       BLOCK_PREFIX + 'block/mit.eecs',
                       'mit.ee cs',
                       'mit.ee,cs',
                       'mit.ee/cs',
-                       'mit.ee$cs',
                       'mit.ee&cs',
                       'mit.ee()cs',
                       BRANCH_PREFIX + 'this',
@@ -130,17 +128,17 @@ class LocatorTest(TestCase):
    def test_course_constructor_url_course_id_and_version_guid(self):
        test_id_loc = '519665f6223ebd6980884f2b'
-        testobj = CourseLocator(url='edx://mit.eecs.6002x' + VERSION_PREFIX + test_id_loc)
+        testobj = CourseLocator(url='edx://mit.eecs-honors.6002x' + VERSION_PREFIX + test_id_loc)
        self.check_course_locn_fields(testobj, 'error parsing url with both course ID and version GUID',
-                                      course_id='mit.eecs.6002x',
+                                      course_id='mit.eecs-honors.6002x',
                                      version_guid=ObjectId(test_id_loc))
    def test_course_constructor_url_course_id_branch_and_version_guid(self):
        test_id_loc = '519665f6223ebd6980884f2b'
-        testobj = CourseLocator(url='edx://mit.eecs.6002x' + BRANCH_PREFIX + 'draft' + VERSION_PREFIX + test_id_loc)
+        testobj = CourseLocator(url='edx://mit.eecs.~6002x' + BRANCH_PREFIX + 'draft-1' + VERSION_PREFIX + test_id_loc)
        self.check_course_locn_fields(testobj, 'error parsing url with both course ID branch, and version GUID',
-                                      course_id='mit.eecs.6002x',
+                                      course_id='mit.eecs.~6002x',
-                                      branch='draft',
+                                      branch='draft-1',
                                      version_guid=ObjectId(test_id_loc))
    def test_course_constructor_course_id_no_branch(self):