Lorem is now fancy, and HTML tags are recognized with single-quote attributes.

209f8cc3 · Ned Batchelder · 6d5e13e2 · 209f8cc3 · 209f8cc3 · 209f8cc3
Commit 209f8cc3 authored Dec 27, 2013 by Ned Batchelder
Hide whitespace changes
Inline Side-by-side

Showing with 91 additions and 60 deletions

i18n/converter.py
+3 -3

i18n/dummy.py
+57 -43

i18n/tests/test_converter.py
+3 -0

i18n/tests/test_dummy.py
+27 -13

lms/templates/login.html
+1 -1

No files found.
--- a/i18n/converter.py
+++ b/i18n/converter.py
@@ -21,9 +21,9 @@ class Converter(object):
    #   HTML:   <B>, </B>, <BR/>, <textformat leading="10">
    #   Python: %(date)s, %(name)s
    tag_pattern = re.compile(r'''
-        (<[-\w" .:?=/]*>)   |       # <tag>
-        ({[^}]*})           |       # {tag}
-        (%\([^)]*\)\w)      |       # %(tag)s
+        (<[^>]+>)           |       # <tag>
+        ({[^}]+})           |       # {tag}
+        (%\([\w]+\)\w)      |       # %(tag)s
        (&\w+;)             |       # &entity;
        (&\#\d+;)           |       # &#1234;
        (&\#x[0-9a-f]+;)            # &#xABCD;

--- a/i18n/dummy.py
+++ b/i18n/dummy.py
-from converter import Converter
+# -*- coding: utf-8 -*-
+r"""
+Creates new localization properties files in a dummy language.
+
+Each property file is derived from the equivalent en_US file, with these
+transformations applied:
+
+1. Every vowel is replaced with an equivalent with extra accent marks.
+
+2. Every string is padded out to +30% length to simulate verbose languages
+   (such as German) to see if layout and flows work properly.

-# Creates new localization properties files in a dummy language
-# Each property file is derived from the equivalent en_US file, except
-# 1. Every vowel is replaced with an equivalent with extra accent marks
-# 2. Every string is padded out to +30% length to simulate verbose languages (e.g. German)
-#    to see if layout and flows work properly
-# 3. Every string is terminated with a '#' character to make it easier to detect truncation
+3. Every string is terminated with a '#' character to make it easier to detect
+   truncation.

+Example use::

-# --------------------------------
-# Example use:
-# >>> from dummy import Dummy
-# >>> c = Dummy()
-# >>> c.convert("hello my name is Bond, James Bond")
-# u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#'
-#
-# >>> c.convert('don\'t convert <a href="href">tag ids</a>')
-# u'd\xf6n\'t \xe7\xf6nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#'
-#
-# >>> c.convert('don\'t convert %(name)s tags on %(date)s')
-# u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#"
+    >>> from dummy import Dummy
+    >>> c = Dummy()
+    >>> c.convert("My name is Bond, James Bond")
+    u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
+    >>> print c.convert("My name is Bond, James Bond")
+    Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
+    >>> print c.convert("don't convert <a href='href'>tag ids</a>")
+    døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#
+    >>> print c.convert("don't convert %(name)s tags on %(date)s")
+    døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#

+"""
+
+from converter import Converter

 # Substitute plain characters with accented lookalikes.
 # http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
-TABLE = {'A': u'\xC0',
-         'a': u'\xE4',
-         'b': u'\xDF',
-         'C': u'\xc7',
-         'c': u'\xE7',
-         'E': u'\xC9',
-         'e': u'\xE9',
-         'I': U'\xCC',
-         'i': u'\xEF',
-         'O': u'\xD8',
-         'o': u'\xF8',
-         'U': u'\xDB',
-         'u': u'\xFC',
-         'Y': u'\xDD',
-         'y': u'\xFD',
-         }
-
+TABLE = {
+    'A': u'À',
+    'a': u'ä',
+    'b': u'ß',
+    'C': u'Ç',
+    'c': u'ç',
+    'E': u'É',
+    'e': u'é',
+    'I': u'Ì',
+    'i': u'ï',
+    'O': u'Ø',
+    'o': u'ø',
+    'U': u'Û',
+    'u': u'ü',
+    'Y': u'Ý',
+    'y': u'ý',
+}


 # The print industry's standard dummy text, in use since the 1500s
-# see http://www.lipsum.com/
-LOREM = ' Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed ' \
-        'do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad ' \
-        'minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ' \
-        'ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate ' \
-        'velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat ' \
-        'cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. '
+# see http://www.lipsum.com/, then fed through a "fancy-text" converter.
+# The string should start with a space.
+LOREM = " " + " ".join(     # join and split just make the string easier here.
+    u"""
+    Ⱡσяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
+    тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
+    νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
+    ¢σммσ∂σ ¢σηѕєqυαт.  ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
+    νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
+    ¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
+    єѕт łαвσяυм.
+    """.split()
+)

 # To simulate more verbose languages (like German), pad the length of a string
 # by a multiple of PAD_FACTOR

--- a/i18n/tests/test_converter.py
+++ b/i18n/tests/test_converter.py
@@ -29,6 +29,9 @@ class TestConverter(TestCase):
            ('big <strong>bad</strong> wolf', 'BIG <strong>BAD</strong> WOLF'),
            # two html tags
            ('big <b>bad</b> <i>wolf</i>', 'BIG <b>BAD</b> <i>WOLF</i>'),
+            # html tags with attributes
+            ('<a href="foo">bar</a> baz', '<a href="foo">BAR</a> BAZ'),
+            ("<a href='foo'>bar</a> baz", "<a href='foo'>BAR</a> BAZ"),
            # one python tag
            ('big %(adjective)s wolf', 'BIG %(adjective)s WOLF'),
            # two python tags

--- a/i18n/tests/test_dummy.py
+++ b/i18n/tests/test_dummy.py
+# -*- coding: utf-8 -*-
 import os, string, random
 from unittest import TestCase
 from polib import POEntry
@@ -13,39 +14,52 @@ class TestDummy(TestCase):
    def setUp(self):
        self.converter = dummy.Dummy()

+    def assertUnicodeEquals(self, str1, str2):
+        """Just like assertEquals, but doesn't put Unicode into the fail message.
+
+        Either nose, or rake, or something, deals very badly with unusual
+        Unicode characters in the assertions, so we use repr here to keep
+        things safe.
+
+        """
+        self.assertEquals(
+            str1, str2,
+            "Mismatch: %r != %r" % (str1, str2),
+        )
+
    def test_dummy(self):
        """
        Tests with a dummy converter (adds spurious accents to strings).
        Assert that embedded HTML and python tags are not converted.
        """
        test_cases = [
-            ("hello my name is Bond, James Bond",
-             u'h\xe9ll\xf8 m\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd Lorem i#'),
+            (u"hello my name is Bond, James Bond",
+             u"héllø mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ι#"),

-            ('don\'t convert <a href="href">tag ids</a>',
-             u'd\xf8n\'t \xe7\xf8nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#'),
+            (u"don't convert <a href='href'>tag ids</a>",
+             u"døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#"),

-            ('don\'t convert %(name)s tags on %(date)s',
-             u"d\xf8n't \xe7\xf8nv\xe9rt %(name)s t\xe4gs \xf8n %(date)s Lorem ips#")
+            (u"don't convert %(name)s tags on %(date)s",
+             u"døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#"),
        ]
        for source, expected in test_cases:
            result = self.converter.convert(source)
-            self.assertEquals(result, expected)
+            self.assertUnicodeEquals(result, expected)

    def test_singular(self):
        entry = POEntry()
        entry.msgid = 'A lovely day for a cup of tea.'
-        expected = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
+        expected = u'À løvélý däý før ä çüp øf téä. Ⱡσяєм ι#'
        self.converter.convert_msg(entry)
-        self.assertEquals(entry.msgstr, expected)
+        self.assertUnicodeEquals(entry.msgstr, expected)

    def test_plural(self):
        entry = POEntry()
        entry.msgid = 'A lovely day for a cup of tea.'
        entry.msgid_plural = 'A lovely day for some cups of tea.'
-        expected_s = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
-        expected_p = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r s\xf8m\xe9 \xe7\xfcps \xf8f t\xe9\xe4. Lorem ip#'
+        expected_s = u'À løvélý däý før ä çüp øf téä. Ⱡσяєм ι#'
+        expected_p = u'À løvélý däý før sømé çüps øf téä. Ⱡσяєм ιρ#'
        self.converter.convert_msg(entry)
        result = entry.msgstr_plural
-        self.assertEquals(result['0'], expected_s)
-        self.assertEquals(result['1'], expected_p)
+        self.assertUnicodeEquals(result['0'], expected_s)
+        self.assertUnicodeEquals(result['1'], expected_p)
--- a/lms/templates/login.html
+++ b/lms/templates/login.html
@@ -89,7 +89,7 @@
        $submitButton.
          addClass('is-disabled').
          prop('disabled', true).
-          html(gettext('Processing your account information &hellip;'));
+          html("${_(u'Processing your account information…')}");
      }
    }
  </script>