Commit f1f76a9a by Ned Batchelder

Dummy text has more accents, and properly ignored more non-text things.

parent 5e244b1c
...@@ -20,7 +20,16 @@ class Converter(object): ...@@ -20,7 +20,16 @@ class Converter(object):
# matches tags like these: # matches tags like these:
# HTML: <B>, </B>, <BR/>, <textformat leading="10"> # HTML: <B>, </B>, <BR/>, <textformat leading="10">
# Python: %(date)s, %(name)s # Python: %(date)s, %(name)s
tag_pattern = re.compile(r'(<[-\w" .:?=/]*>)|({[^}]*})|(%\([^)]*\)\w)', re.I) tag_pattern = re.compile(r'''
(<[-\w" .:?=/]*>) | # <tag>
({[^}]*}) | # {tag}
(%\([^)]*\)\w) | # %(tag)s
(&\w+;) | # &entity;
(&\#\d+;) | # &#1234;
(&\#x[0-9a-f]+;) # &#xABCD;
''',
re.IGNORECASE|re.VERBOSE
)
def convert(self, string): def convert(self, string):
"""Returns: a converted tagged string """Returns: a converted tagged string
......
...@@ -34,8 +34,11 @@ TABLE = {'A': u'\xC0', ...@@ -34,8 +34,11 @@ TABLE = {'A': u'\xC0',
'I': U'\xCC', 'I': U'\xCC',
'i': u'\xEF', 'i': u'\xEF',
'O': u'\xD8', 'O': u'\xD8',
'o': u'\xF6', 'o': u'\xF8',
'u': u'\xFC' 'U': u'\xDB',
'u': u'\xFC',
'Y': u'\xDD',
'y': u'\xFD',
} }
......
...@@ -22,7 +22,7 @@ class TestConverter(TestCase): ...@@ -22,7 +22,7 @@ class TestConverter(TestCase):
Assert that embedded HTML and python tags are not converted. Assert that embedded HTML and python tags are not converted.
""" """
c = UpcaseConverter() c = UpcaseConverter()
test_cases = ( test_cases = [
# no tags # no tags
('big bad wolf', 'BIG BAD WOLF'), ('big bad wolf', 'BIG BAD WOLF'),
# one html tag # one html tag
...@@ -36,7 +36,11 @@ class TestConverter(TestCase): ...@@ -36,7 +36,11 @@ class TestConverter(TestCase):
# both kinds of tags # both kinds of tags
('<strong>big</strong> %(adjective)s %(noun)s', ('<strong>big</strong> %(adjective)s %(noun)s',
'<strong>BIG</strong> %(adjective)s %(noun)s'), '<strong>BIG</strong> %(adjective)s %(noun)s'),
) # .format-style tags
for (source, expected) in test_cases: ('The {0} barn is {1!r}.', 'THE {0} BARN IS {1!r}.'),
# HTML entities
('<b>&copy; 2013 edX, &#xa0;</b>', '<b>&copy; 2013 EDX, &#xa0;</b>'),
]
for source, expected in test_cases:
result = c.convert(source) result = c.convert(source)
self.assertEquals(result, expected) self.assertEquals(result, expected)
...@@ -18,23 +18,24 @@ class TestDummy(TestCase): ...@@ -18,23 +18,24 @@ class TestDummy(TestCase):
Tests with a dummy converter (adds spurious accents to strings). Tests with a dummy converter (adds spurious accents to strings).
Assert that embedded HTML and python tags are not converted. Assert that embedded HTML and python tags are not converted.
""" """
test_cases = (("hello my name is Bond, James Bond", test_cases = [
u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#'), ("hello my name is Bond, James Bond",
u'h\xe9ll\xf8 m\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd Lorem i#'),
('don\'t convert <a href="href">tag ids</a>', ('don\'t convert <a href="href">tag ids</a>',
u'd\xf6n\'t \xe7\xf6nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#'), u'd\xf8n\'t \xe7\xf8nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#'),
('don\'t convert %(name)s tags on %(date)s', ('don\'t convert %(name)s tags on %(date)s',
u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#") u"d\xf8n't \xe7\xf8nv\xe9rt %(name)s t\xe4gs \xf8n %(date)s Lorem ips#")
) ]
for (source, expected) in test_cases: for source, expected in test_cases:
result = self.converter.convert(source) result = self.converter.convert(source)
self.assertEquals(result, expected) self.assertEquals(result, expected)
def test_singular(self): def test_singular(self):
entry = POEntry() entry = POEntry()
entry.msgid = 'A lovely day for a cup of tea.' entry.msgid = 'A lovely day for a cup of tea.'
expected = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r \xe4 \xe7\xfcp \xf6f t\xe9\xe4. Lorem i#' expected = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
self.converter.convert_msg(entry) self.converter.convert_msg(entry)
self.assertEquals(entry.msgstr, expected) self.assertEquals(entry.msgstr, expected)
...@@ -42,8 +43,8 @@ class TestDummy(TestCase): ...@@ -42,8 +43,8 @@ class TestDummy(TestCase):
entry = POEntry() entry = POEntry()
entry.msgid = 'A lovely day for a cup of tea.' entry.msgid = 'A lovely day for a cup of tea.'
entry.msgid_plural = 'A lovely day for some cups of tea.' entry.msgid_plural = 'A lovely day for some cups of tea.'
expected_s = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r \xe4 \xe7\xfcp \xf6f t\xe9\xe4. Lorem i#' expected_s = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
expected_p = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r s\xf6m\xe9 \xe7\xfcps \xf6f t\xe9\xe4. Lorem ip#' expected_p = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r s\xf8m\xe9 \xe7\xfcps \xf8f t\xe9\xe4. Lorem ip#'
self.converter.convert_msg(entry) self.converter.convert_msg(entry)
result = entry.msgstr_plural result = entry.msgstr_plural
self.assertEquals(result['0'], expected_s) self.assertEquals(result['0'], expected_s)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment