diff --git a/common/lib/xmodule/xmodule/modulestore/parsers.py b/common/lib/xmodule/xmodule/modulestore/parsers.py index aeb84b5..d0d7ffb 100644 --- a/common/lib/xmodule/xmodule/modulestore/parsers.py +++ b/common/lib/xmodule/xmodule/modulestore/parsers.py @@ -10,7 +10,7 @@ VERSION_PREFIX = "/version/" URL_VERSION_PREFIX = 'version/' URL_RE = re.compile(r'^(edx://)?(.+)$', re.IGNORECASE) - +ALLOWED_ID_CHARS = r'[a-zA-Z0-9_\-~.]' def parse_url(string, tag_optional=False): """ @@ -45,17 +45,12 @@ def parse_url(string, tag_optional=False): return parse_course_id(path) -BLOCK_RE = re.compile(r'^\w+$', re.IGNORECASE) +BLOCK_RE = re.compile(r'^' + ALLOWED_ID_CHARS + r'+$', re.IGNORECASE) def parse_block_ref(string): r""" - A block_ref is a string of word_chars. - - <word_chars> matches one or more Unicode word characters; this includes most - characters that can be part of a word in any language, as well as numbers - and the underscore. (see definition of \w in python regular expressions, - at http://docs.python.org/dev/library/re.html) + A block_ref is a string of url safe characters (see ALLOWED_ID_CHARS) If string is a block_ref, returns a dict with key 'block_ref' and the value, otherwise returns None. @@ -65,7 +60,10 @@ def parse_block_ref(string): return None -GUID_RE = re.compile(r'^(?P<version_guid>[A-F0-9]+)(' + BLOCK_PREFIX + '(?P<block>\w+))?$', re.IGNORECASE) +GUID_RE = re.compile( + r'^(?P<version_guid>[A-F0-9]+)(' + BLOCK_PREFIX + '(?P<block>' + ALLOWED_ID_CHARS + r'+))?$', + re.IGNORECASE +) def parse_guid(string): @@ -83,10 +81,10 @@ def parse_guid(string): COURSE_ID_RE = re.compile( - r'^(?P<id>(\w+)(\.\w+\w*)*)(' + - BRANCH_PREFIX + '(?P<branch>\w+))?(' + - VERSION_PREFIX + '(?P<version_guid>[A-F0-9]+))?(' + - BLOCK_PREFIX + '(?P<block>\w+))?$', re.IGNORECASE + r'^(?P<id>' + ALLOWED_ID_CHARS + r'+)(' + + BRANCH_PREFIX + r'(?P<branch>' + ALLOWED_ID_CHARS + r'+))?(' + + VERSION_PREFIX + r'(?P<version_guid>[A-F0-9]+))?(' + + BLOCK_PREFIX + r'(?P<block>' + ALLOWED_ID_CHARS + r'+))?$', re.IGNORECASE ) @@ -117,12 +115,7 @@ def parse_course_id(string): block = name - name = <word_chars> - - <word_chars> matches one or more Unicode word characters; this includes most - characters that can be part of a word in any language, as well as numbers - and the underscore. (see definition of \w in python regular expressions, - at http://docs.python.org/dev/library/re.html) + name = ALLOWED_ID_CHARS If string is a course_id, returns a dict with keys 'id', 'branch', and 'block'. Revision is optional: if missing returned_dict['branch'] is None. diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_locators.py b/common/lib/xmodule/xmodule/modulestore/tests/test_locators.py index 1ccb238..be00f34 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_locators.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_locators.py @@ -75,15 +75,13 @@ class LocatorTest(TestCase): """ Test all sorts of badly-formed course_ids (and urls with those course_ids) """ - for bad_id in ('mit.', - ' mit.eecs', + for bad_id in (' mit.eecs', 'mit.eecs ', URL_VERSION_PREFIX + 'mit.eecs', BLOCK_PREFIX + 'block/mit.eecs', 'mit.ee cs', 'mit.ee,cs', 'mit.ee/cs', - 'mit.ee$cs', 'mit.ee&cs', 'mit.ee()cs', BRANCH_PREFIX + 'this', @@ -130,17 +128,17 @@ class LocatorTest(TestCase): def test_course_constructor_url_course_id_and_version_guid(self): test_id_loc = '519665f6223ebd6980884f2b' - testobj = CourseLocator(url='edx://mit.eecs.6002x' + VERSION_PREFIX + test_id_loc) + testobj = CourseLocator(url='edx://mit.eecs-honors.6002x' + VERSION_PREFIX + test_id_loc) self.check_course_locn_fields(testobj, 'error parsing url with both course ID and version GUID', - course_id='mit.eecs.6002x', + course_id='mit.eecs-honors.6002x', version_guid=ObjectId(test_id_loc)) def test_course_constructor_url_course_id_branch_and_version_guid(self): test_id_loc = '519665f6223ebd6980884f2b' - testobj = CourseLocator(url='edx://mit.eecs.6002x' + BRANCH_PREFIX + 'draft' + VERSION_PREFIX + test_id_loc) + testobj = CourseLocator(url='edx://mit.eecs.~6002x' + BRANCH_PREFIX + 'draft-1' + VERSION_PREFIX + test_id_loc) self.check_course_locn_fields(testobj, 'error parsing url with both course ID branch, and version GUID', - course_id='mit.eecs.6002x', - branch='draft', + course_id='mit.eecs.~6002x', + branch='draft-1', version_guid=ObjectId(test_id_loc)) def test_course_constructor_course_id_no_branch(self):