Commit ece123e9 by Ned Batchelder

More than one dummy language

parent a60abede
......@@ -37,6 +37,8 @@ codekit-config.json
!djangojs.mo
conf/locale/en/LC_MESSAGES/*.po
conf/locale/en/LC_MESSAGES/*.mo
conf/locale/fake*/LC_MESSAGES/*.po
conf/locale/fake*/LC_MESSAGES/*.mo
conf/locale/messages.mo
### Testing artifacts
......
......@@ -51,8 +51,10 @@ locales:
- zh_TW # Chinese (Taiwan)
# The locale used for fake-accented English, for testing.
dummy-locale: eo
# The locales used for fake-accented English, for testing.
dummy_locales:
- eo
- fake2
# Directories we don't search for strings.
ignore_dirs:
......
......@@ -17,6 +17,7 @@ class Configuration(object):
Reads localization configuration in json format.
"""
DEFAULTS = {
'dummy_locales': [],
'generate_merge': {},
'ignore_dirs': [],
'locales': ['en'],
......@@ -42,18 +43,6 @@ class Configuration(object):
return self._config.get(name, self.DEFAULTS[name])
raise AttributeError("Configuration has no such setting: {!r}".format(name))
@property
def dummy_locale(self):
"""
Returns a locale to use for the dummy text, e.g. 'eo'.
Throws exception if no dummy-locale is declared.
The locale is a string.
"""
dummy = self._config.get('dummy-locale', None)
if not dummy:
raise Exception('Could not read dummy-locale from configuration file.')
return dummy
def get_messages_dir(self, locale):
"""
Returns the name of the directory holding the po files for locale.
......
......@@ -31,98 +31,22 @@ from i18n.config import CONFIGURATION
from i18n.execute import create_dir_if_necessary
from i18n.converter import Converter
# Substitute plain characters with accented lookalikes.
# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
TABLE = {
'A': u'À',
'a': u'ä',
'b': u'ß',
'C': u'Ç',
'c': u'ç',
'E': u'É',
'e': u'é',
'I': u'Ì',
'i': u'ï',
'O': u'Ø',
'o': u'ø',
'U': u'Û',
'u': u'ü',
'Y': u'Ý',
'y': u'ý',
}
# The print industry's standard dummy text, in use since the 1500s
# see http://www.lipsum.com/, then fed through a "fancy-text" converter.
# The string should start with a space, so that it joins nicely with the text
# that precedes it. The Lorem contains an apostrophe since French often does,
# and translated strings get put into single-quoted strings, which then break.
LOREM = " " + " ".join( # join and split just make the string easier here.
u"""
Ⱡ'σяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
єѕт łαвσяυм.
""".split()
)
# To simulate more verbose languages (like German), pad the length of a string
# by a multiple of PAD_FACTOR
PAD_FACTOR = 1.33
class Dummy(Converter):
r"""
Creates new localization properties files in a dummy language.
Each property file is derived from the equivalent en_US file, with these
transformations applied:
1. Every vowel is replaced with an equivalent with extra accent marks.
class BaseDummyConverter(Converter):
"""Base class for dummy converters.
2. Every string is padded out to +30% length to simulate verbose languages
(such as German) to see if layout and flows work properly.
String conversion goes through a character map, then gets padded.
3. Every string is terminated with a '#' character to make it easier to detect
truncation.
Example use::
>>> from dummy import Dummy
>>> c = Dummy()
>>> c.convert("My name is Bond, James Bond")
u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
>>> print c.convert("My name is Bond, James Bond")
Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
>>> print c.convert("don't convert <a href='href'>tag ids</a>")
døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#
>>> print c.convert("don't convert %(name)s tags on %(date)s")
døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#
"""
def convert(self, string):
result = Converter.convert(self, string)
return self.pad(result)
TABLE = {}
def inner_convert_string(self, string):
for k, v in TABLE.items():
string = string.replace(k, v)
return string
for old, new in self.TABLE.items():
string = string.replace(old, new)
return self.pad(string)
def pad(self, string):
"""add some lorem ipsum text to the end of string"""
size = len(string)
if size < 7:
target = size * 3
else:
target = int(size*PAD_FACTOR)
return string + self.terminate(LOREM[:(target-size)])
def terminate(self, string):
"""replaces the final char of string with #"""
return string[:-1] + '#'
return string
def convert_msg(self, msg):
"""
......@@ -159,15 +83,95 @@ class Dummy(Converter):
return translated
def make_dummy(file, locale):
class Dummy(BaseDummyConverter):
r"""
Creates new localization properties files in a dummy language.
Each property file is derived from the equivalent en_US file, with these
transformations applied:
1. Every vowel is replaced with an equivalent with extra accent marks.
2. Every string is padded out to +30% length to simulate verbose languages
(such as German) to see if layout and flows work properly.
3. Every string is terminated with a '#' character to make it easier to detect
truncation.
Example use::
>>> from dummy import Dummy
>>> c = Dummy()
>>> c.convert("My name is Bond, James Bond")
u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
>>> print c.convert("My name is Bond, James Bond")
Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
>>> print c.convert("don't convert <a href='href'>tag ids</a>")
døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#
>>> print c.convert("don't convert %(name)s tags on %(date)s")
døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#
"""
# Substitute plain characters with accented lookalikes.
# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
TABLE = dict(zip(
u"AabCcEeIiOoUuYy",
u"ÀäßÇçÉéÌïÖöÛüÝý"
))
# The print industry's standard dummy text, in use since the 1500s
# see http://www.lipsum.com/, then fed through a "fancy-text" converter.
# The string should start with a space, so that it joins nicely with the text
# that precedes it. The Lorem contains an apostrophe since French often does,
# and translated strings get put into single-quoted strings, which then break.
LOREM = " " + " ".join( # join and split just make the string easier here.
u"""
Ⱡ'σяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
єѕт łαвσяυм.
""".split()
)
# To simulate more verbose languages (like German), pad the length of a string
# by a multiple of PAD_FACTOR
PAD_FACTOR = 1.33
def pad(self, string):
"""add some lorem ipsum text to the end of string"""
size = len(string)
if size < 7:
target = size * 3
else:
target = int(size * self.PAD_FACTOR)
pad_len = target - size - 1
return string + self.LOREM[:pad_len] + "#"
class Dummy2(BaseDummyConverter):
"""A second dummy converter.
Like Dummy, but uses a different obvious but readable automatic conversion:
Strikes-through many letters, and turns lower-case letters upside-down.
"""
TABLE = dict(zip(
u"ABCDEGHIJKLOPRTUYZabcdefghijklmnopqrstuvwxyz",
u"ȺɃȻĐɆǤĦƗɈꝀŁØⱣɌŦɄɎƵɐqɔpǝɟƃɥᴉɾʞlɯuødbɹsʇnʌʍxʎz"
))
def make_dummy(filename, locale, converter):
"""
Takes a source po file, reads it, and writes out a new po file
in :param locale: containing a dummy translation.
"""
if not path(file).exists():
raise IOError('File does not exist: %s' % file)
pofile = polib.pofile(file)
converter = Dummy()
if not path(filename).exists():
raise IOError('File does not exist: %r' % filename)
pofile = polib.pofile(filename)
for msg in pofile:
converter.convert_msg(msg)
......@@ -175,7 +179,7 @@ def make_dummy(file, locale):
# do something reasonable.
pofile.metadata['Plural-Forms'] = 'nplurals=2; plural=(n != 1);'
new_file = new_filename(file, locale)
new_file = new_filename(filename, locale)
create_dir_if_necessary(new_file)
pofile.save(new_file)
......@@ -191,12 +195,12 @@ def main():
"""
Generate dummy strings for all source po files.
"""
LOCALE = CONFIGURATION.dummy_locale
SOURCE_MSGS_DIR = CONFIGURATION.source_messages_dir
print "Processing source language files into dummy strings:"
for source_file in CONFIGURATION.source_messages_dir.walkfiles('*.po'):
print ' ', source_file.relpath()
make_dummy(SOURCE_MSGS_DIR.joinpath(source_file), LOCALE)
for locale, converter in zip(CONFIGURATION.dummy_locales, [Dummy(), Dummy2()]):
print "Processing source language files into dummy strings, locale {}:".format(locale)
for source_file in CONFIGURATION.source_messages_dir.walkfiles('*.po'):
print ' ', source_file.relpath()
make_dummy(SOURCE_MSGS_DIR.joinpath(source_file), locale, converter)
print
......
......@@ -115,7 +115,8 @@ def main(argv=None):
for locale in CONFIGURATION.translated_locales:
merge_files(locale, fail_if_missing=args.strict)
# Dummy text is not required. Don't raise exception if files are missing.
merge_files(CONFIGURATION.dummy_locale, fail_if_missing=False)
for locale in CONFIGURATION.dummy_locales:
merge_files(locale, fail_if_missing=False)
compile_cmd = 'django-admin.py compilemessages'
execute(compile_cmd, working_directory=BASE_DIR)
......
......@@ -29,5 +29,5 @@ class TestConfiguration(TestCase):
self.assertIsNotNone(locales)
self.assertIsInstance(locales, list)
self.assertIn('en', locales)
self.assertEqual('eo', CONFIGURATION.dummy_locale)
self.assertEqual('eo', CONFIGURATION.dummy_locales[0])
self.assertEqual('en', CONFIGURATION.source_locale)
......@@ -33,13 +33,13 @@ class TestDummy(TestCase):
@ddt.data(
(u"hello my name is Bond, James Bond",
u"héllø mý nämé ïs Bønd, Jämés Bønd Ⱡ'σяєм ι#"),
u"héllö mý nämé ïs Bönd, Jämés Bönd Ⱡ'σяєм ι#"),
(u"don't convert <a href='href'>tag ids</a>",
u"døn't çønvért <a href='href'>täg ïds</a> Ⱡ'σяєм ιρѕυ#"),
u"dön't çönvért <a href='href'>täg ïds</a> Ⱡ'σяєм#"),
(u"don't convert %(name)s tags on %(date)s",
u"døn't çønvért %(name)s tägs øn %(date)s Ⱡ'σяєм ιρѕ#"),
u"dön't çönvért %(name)s tägs ön %(date)s Ⱡ'σяєм #"),
)
def test_dummy(self, data):
"""
......@@ -53,7 +53,7 @@ class TestDummy(TestCase):
def test_singular(self):
entry = POEntry()
entry.msgid = "A lovely day for a cup of tea."
expected = u"À løvélý däý før ä çüp øf téä. Ⱡ'σяєм #"
expected = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #"
self.converter.convert_msg(entry)
self.assertUnicodeEquals(entry.msgstr, expected)
......@@ -61,8 +61,8 @@ class TestDummy(TestCase):
entry = POEntry()
entry.msgid = "A lovely day for a cup of tea."
entry.msgid_plural = "A lovely day for some cups of tea."
expected_s = u"À løvélý däý før ä çüp øf téä. Ⱡ'σяєм #"
expected_p = u"À løvélý däý før sømé çüps øf téä. Ⱡ'σяєм ιρ#"
expected_s = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #"
expected_p = u"À lövélý däý för sömé çüps öf téä. Ⱡ'σяєм ιρ#"
self.converter.convert_msg(entry)
result = entry.msgstr_plural
self.assertUnicodeEquals(result['0'], expected_s)
......
......@@ -498,7 +498,8 @@ LANGUAGE_CODE = 'en' # http://www.i18nguy.com/unicode/language-identifiers.html
# Sourced from http://www.localeplanet.com/icu/ and wikipedia
LANGUAGES = (
('eo', u'Dummy Language (Esperanto)'), # Dummy languaged used for testing
('eo', u'Dummy Language (Esperanto)'), # Dummy language used for testing
('fake2', u'Fake translations'), # Another dummy language for testing (not pushed to prod)
('ach', u'Acholi'), # Acoli
('ar', u'العربية'), # Arabic
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment