More than one dummy language

ece123e9 · Ned Batchelder · a60abede · ece123e9 · ece123e9 · ece123e9
Commit ece123e9 authored Feb 07, 2014 by Ned Batchelder
Showing with 117 additions and 118 deletions

.gitignore
+2 -0

conf/locale/config.yaml
+4 -2

i18n/config.py
+1 -12

i18n/dummy.py
+99 -95

i18n/generate.py
+2 -1

i18n/tests/test_config.py
+1 -1

i18n/tests/test_dummy.py
+6 -6

lms/envs/common.py
+2 -1

No files found.
--- a/.gitignore
+++ b/.gitignore
@@ -37,6 +37,8 @@ codekit-config.json
 !djangojs.mo
 conf/locale/en/LC_MESSAGES/*.po
 conf/locale/en/LC_MESSAGES/*.mo
+conf/locale/fake*/LC_MESSAGES/*.po
+conf/locale/fake*/LC_MESSAGES/*.mo
 conf/locale/messages.mo

 ### Testing artifacts

--- a/conf/locale/config.yaml
+++ b/conf/locale/config.yaml
@@ -51,8 +51,10 @@ locales:
    - zh_TW  # Chinese (Taiwan)


-# The locale used for fake-accented English, for testing.
-dummy-locale: eo
+# The locales used for fake-accented English, for testing.
+dummy_locales:
+    - eo
+    - fake2

 # Directories we don't search for strings.
 ignore_dirs:

--- a/i18n/config.py
+++ b/i18n/config.py
@@ -17,6 +17,7 @@ class Configuration(object):
    Reads localization configuration in json format.
    """
    DEFAULTS = {
+        'dummy_locales': [],
        'generate_merge': {},
        'ignore_dirs': [],
        'locales': ['en'],
@@ -42,18 +43,6 @@ class Configuration(object):
            return self._config.get(name, self.DEFAULTS[name])
        raise AttributeError("Configuration has no such setting: {!r}".format(name))

-    @property
-    def dummy_locale(self):
-        """
-        Returns a locale to use for the dummy text, e.g. 'eo'.
-        Throws exception if no dummy-locale is declared.
-        The locale is a string.
-        """
-        dummy = self._config.get('dummy-locale', None)
-        if not dummy:
-            raise Exception('Could not read dummy-locale from configuration file.')
-        return dummy
-
    def get_messages_dir(self, locale):
        """
        Returns the name of the directory holding the po files for locale.

--- a/i18n/dummy.py
+++ b/i18n/dummy.py
@@ -31,98 +31,22 @@ from i18n.config import CONFIGURATION
 from i18n.execute import create_dir_if_necessary
 from i18n.converter import Converter

-# Substitute plain characters with accented lookalikes.
-# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
-TABLE = {
-    'A': u'À',
-    'a': u'ä',
-    'b': u'ß',
-    'C': u'Ç',
-    'c': u'ç',
-    'E': u'É',
-    'e': u'é',
-    'I': u'Ì',
-    'i': u'ï',
-    'O': u'Ø',
-    'o': u'ø',
-    'U': u'Û',
-    'u': u'ü',
-    'Y': u'Ý',
-    'y': u'ý',
-}
-
-
-# The print industry's standard dummy text, in use since the 1500s
-# see http://www.lipsum.com/, then fed through a "fancy-text" converter.
-# The string should start with a space, so that it joins nicely with the text
-# that precedes it.  The Lorem contains an apostrophe since French often does,
-# and translated strings get put into single-quoted strings, which then break.
-LOREM = " " + " ".join(     # join and split just make the string easier here.
-    u"""
-    Ⱡ'σяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
-    тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
-    νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
-    ¢σммσ∂σ ¢σηѕєqυαт.  ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
-    νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
-    ¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
-    єѕт łαвσяυм.
-    """.split()
-)
-
-# To simulate more verbose languages (like German), pad the length of a string
-# by a multiple of PAD_FACTOR
-PAD_FACTOR = 1.33
-
-
-class Dummy(Converter):
-    r"""
-    Creates new localization properties files in a dummy language.
-
-    Each property file is derived from the equivalent en_US file, with these
-    transformations applied:

-    1. Every vowel is replaced with an equivalent with extra accent marks.
+class BaseDummyConverter(Converter):
+    """Base class for dummy converters.

-    2. Every string is padded out to +30% length to simulate verbose languages
-       (such as German) to see if layout and flows work properly.
+    String conversion goes through a character map, then gets padded.

-    3. Every string is terminated with a '#' character to make it easier to detect
-       truncation.
-
-    Example use::
-
-        >>> from dummy import Dummy
-        >>> c = Dummy()
-        >>> c.convert("My name is Bond, James Bond")
-        u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
-        >>> print c.convert("My name is Bond, James Bond")
-        Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
-        >>> print c.convert("don't convert <a href='href'>tag ids</a>")
-        døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#
-        >>> print c.convert("don't convert %(name)s tags on %(date)s")
-        døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#
    """
-    def convert(self, string):
-        result = Converter.convert(self, string)
-        return self.pad(result)
+    TABLE = {}

    def inner_convert_string(self, string):
-        for k, v in TABLE.items():
-            string = string.replace(k, v)
-        return string
+        for old, new in self.TABLE.items():
+            string = string.replace(old, new)
+        return self.pad(string)

    def pad(self, string):
-        """add some lorem ipsum text to the end of string"""
-        size = len(string)
-        if size < 7:
-            target = size * 3
-        else:
-            target = int(size*PAD_FACTOR)
-        return string + self.terminate(LOREM[:(target-size)])
-
-    def terminate(self, string):
-        """replaces the final char of string with #"""
-        return string[:-1] + '#'
+        return string

    def convert_msg(self, msg):
        """
@@ -159,15 +83,95 @@ class Dummy(Converter):
        return translated


-def make_dummy(file, locale):
+class Dummy(BaseDummyConverter):
+    r"""
+    Creates new localization properties files in a dummy language.
+
+    Each property file is derived from the equivalent en_US file, with these
+    transformations applied:
+
+    1. Every vowel is replaced with an equivalent with extra accent marks.
+
+    2. Every string is padded out to +30% length to simulate verbose languages
+       (such as German) to see if layout and flows work properly.
+
+    3. Every string is terminated with a '#' character to make it easier to detect
+       truncation.
+
+    Example use::
+
+        >>> from dummy import Dummy
+        >>> c = Dummy()
+        >>> c.convert("My name is Bond, James Bond")
+        u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
+        >>> print c.convert("My name is Bond, James Bond")
+        Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
+        >>> print c.convert("don't convert <a href='href'>tag ids</a>")
+        døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#
+        >>> print c.convert("don't convert %(name)s tags on %(date)s")
+        døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#
+
+    """
+    # Substitute plain characters with accented lookalikes.
+    # http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
+    TABLE = dict(zip(
+        u"AabCcEeIiOoUuYy",
+        u"ÀäßÇçÉéÌïÖöÛüÝý"
+    ))
+
+    # The print industry's standard dummy text, in use since the 1500s
+    # see http://www.lipsum.com/, then fed through a "fancy-text" converter.
+    # The string should start with a space, so that it joins nicely with the text
+    # that precedes it.  The Lorem contains an apostrophe since French often does,
+    # and translated strings get put into single-quoted strings, which then break.
+    LOREM = " " + " ".join(     # join and split just make the string easier here.
+        u"""
+        Ⱡ'σяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
+        тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
+        νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
+        ¢σммσ∂σ ¢σηѕєqυαт.  ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
+        νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
+        ¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
+        єѕт łαвσяυм.
+        """.split()
+    )
+
+    # To simulate more verbose languages (like German), pad the length of a string
+    # by a multiple of PAD_FACTOR
+    PAD_FACTOR = 1.33
+
+    def pad(self, string):
+        """add some lorem ipsum text to the end of string"""
+        size = len(string)
+        if size < 7:
+            target = size * 3
+        else:
+            target = int(size * self.PAD_FACTOR)
+        pad_len = target - size - 1
+        return string + self.LOREM[:pad_len] + "#"
+
+
+class Dummy2(BaseDummyConverter):
+    """A second dummy converter.
+
+    Like Dummy, but uses a different obvious but readable automatic conversion:
+    Strikes-through many letters, and turns lower-case letters upside-down.
+
+    """
+    TABLE = dict(zip(
+        u"ABCDEGHIJKLOPRTUYZabcdefghijklmnopqrstuvwxyz",
+        u"ȺɃȻĐɆǤĦƗɈꝀŁØⱣɌŦɄɎƵɐqɔpǝɟƃɥᴉɾʞlɯuødbɹsʇnʌʍxʎz"
+    ))
+
+
+def make_dummy(filename, locale, converter):
    """
    Takes a source po file, reads it, and writes out a new po file
    in :param locale: containing a dummy translation.
    """
-    if not path(file).exists():
-        raise IOError('File does not exist: %s' % file)
-    pofile = polib.pofile(file)
-    converter = Dummy()
+    if not path(filename).exists():
+        raise IOError('File does not exist: %r' % filename)
+    pofile = polib.pofile(filename)
    for msg in pofile:
        converter.convert_msg(msg)

@@ -175,7 +179,7 @@ def make_dummy(file, locale):
    # do something reasonable.
    pofile.metadata['Plural-Forms'] = 'nplurals=2; plural=(n != 1);'

-    new_file = new_filename(file, locale)
+    new_file = new_filename(filename, locale)
    create_dir_if_necessary(new_file)
    pofile.save(new_file)

@@ -191,12 +195,12 @@ def main():
    """
    Generate dummy strings for all source po files.
    """
-    LOCALE = CONFIGURATION.dummy_locale
    SOURCE_MSGS_DIR = CONFIGURATION.source_messages_dir
-    print "Processing source language files into dummy strings:"
-    for source_file in CONFIGURATION.source_messages_dir.walkfiles('*.po'):
-        print '   ', source_file.relpath()
-        make_dummy(SOURCE_MSGS_DIR.joinpath(source_file), LOCALE)
+    for locale, converter in zip(CONFIGURATION.dummy_locales, [Dummy(), Dummy2()]):
+        print "Processing source language files into dummy strings, locale {}:".format(locale)
+        for source_file in CONFIGURATION.source_messages_dir.walkfiles('*.po'):
+            print '   ', source_file.relpath()
+            make_dummy(SOURCE_MSGS_DIR.joinpath(source_file), locale, converter)
    print



--- a/i18n/generate.py
+++ b/i18n/generate.py
@@ -115,7 +115,8 @@ def main(argv=None):
    for locale in CONFIGURATION.translated_locales:
        merge_files(locale, fail_if_missing=args.strict)
    # Dummy text is not required. Don't raise exception if files are missing.
-    merge_files(CONFIGURATION.dummy_locale, fail_if_missing=False)
+    for locale in CONFIGURATION.dummy_locales:
+        merge_files(locale, fail_if_missing=False)

    compile_cmd = 'django-admin.py compilemessages'
    execute(compile_cmd, working_directory=BASE_DIR)

--- a/i18n/tests/test_config.py
+++ b/i18n/tests/test_config.py
@@ -29,5 +29,5 @@ class TestConfiguration(TestCase):
        self.assertIsNotNone(locales)
        self.assertIsInstance(locales, list)
        self.assertIn('en', locales)
-        self.assertEqual('eo', CONFIGURATION.dummy_locale)
+        self.assertEqual('eo', CONFIGURATION.dummy_locales[0])
        self.assertEqual('en', CONFIGURATION.source_locale)
--- a/i18n/tests/test_dummy.py
+++ b/i18n/tests/test_dummy.py
@@ -33,13 +33,13 @@ class TestDummy(TestCase):

    @ddt.data(
        (u"hello my name is Bond, James Bond",
-         u"héllø mý nämé ïs Bønd, Jämés Bønd Ⱡ'σяєм ι#"),
+         u"héllö mý nämé ïs Bönd, Jämés Bönd Ⱡ'σяєм ι#"),

        (u"don't convert <a href='href'>tag ids</a>",
-         u"døn't çønvért <a href='href'>täg ïds</a> Ⱡ'σяєм ιρѕυ#"),
+         u"dön't çönvért <a href='href'>täg ïds</a> Ⱡ'σяєм#"),

        (u"don't convert %(name)s tags on %(date)s",
-         u"døn't çønvért %(name)s tägs øn %(date)s Ⱡ'σяєм ιρѕ#"),
+         u"dön't çönvért %(name)s tägs ön %(date)s Ⱡ'σяєм #"),
    )
    def test_dummy(self, data):
        """
@@ -53,7 +53,7 @@ class TestDummy(TestCase):
    def test_singular(self):
        entry = POEntry()
        entry.msgid = "A lovely day for a cup of tea."
-        expected = u"À løvélý däý før ä çüp øf téä. Ⱡ'σяєм #"
+        expected = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #"
        self.converter.convert_msg(entry)
        self.assertUnicodeEquals(entry.msgstr, expected)

@@ -61,8 +61,8 @@ class TestDummy(TestCase):
        entry = POEntry()
        entry.msgid = "A lovely day for a cup of tea."
        entry.msgid_plural = "A lovely day for some cups of tea."
-        expected_s = u"À løvélý däý før ä çüp øf téä. Ⱡ'σяєм #"
-        expected_p = u"À løvélý däý før sømé çüps øf téä. Ⱡ'σяєм ιρ#"
+        expected_s = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #"
+        expected_p = u"À lövélý däý för sömé çüps öf téä. Ⱡ'σяєм ιρ#"
        self.converter.convert_msg(entry)
        result = entry.msgstr_plural
        self.assertUnicodeEquals(result['0'], expected_s)

--- a/lms/envs/common.py
+++ b/lms/envs/common.py
@@ -498,7 +498,8 @@ LANGUAGE_CODE = 'en'  # http://www.i18nguy.com/unicode/language-identifiers.html

 # Sourced from http://www.localeplanet.com/icu/ and wikipedia
 LANGUAGES = (
-    ('eo', u'Dummy Language (Esperanto)'),  # Dummy languaged used for testing
+    ('eo', u'Dummy Language (Esperanto)'),  # Dummy language used for testing
+    ('fake2', u'Fake translations'),        # Another dummy language for testing (not pushed to prod)

    ('ach', u'Acholi'),  # Acoli
    ('ar', u'العربية'),  # Arabic