Commit 209f8cc3 by Ned Batchelder

Lorem is now fancy, and HTML tags are recognized with single-quote attributes.

parent 6d5e13e2
......@@ -21,9 +21,9 @@ class Converter(object):
# HTML: <B>, </B>, <BR/>, <textformat leading="10">
# Python: %(date)s, %(name)s
tag_pattern = re.compile(r'''
(<[-\w" .:?=/]*>) | # <tag>
({[^}]*}) | # {tag}
(%\([^)]*\)\w) | # %(tag)s
(<[^>]+>) | # <tag>
({[^}]+}) | # {tag}
(%\([\w]+\)\w) | # %(tag)s
(&\w+;) | # &entity;
(&\#\d+;) | # &#1234;
(&\#x[0-9a-f]+;) # &#xABCD;
......
from converter import Converter
# -*- coding: utf-8 -*-
r"""
Creates new localization properties files in a dummy language.
Each property file is derived from the equivalent en_US file, with these
transformations applied:
1. Every vowel is replaced with an equivalent with extra accent marks.
2. Every string is padded out to +30% length to simulate verbose languages
(such as German) to see if layout and flows work properly.
# Creates new localization properties files in a dummy language
# Each property file is derived from the equivalent en_US file, except
# 1. Every vowel is replaced with an equivalent with extra accent marks
# 2. Every string is padded out to +30% length to simulate verbose languages (e.g. German)
# to see if layout and flows work properly
# 3. Every string is terminated with a '#' character to make it easier to detect truncation
3. Every string is terminated with a '#' character to make it easier to detect
truncation.
Example use::
# --------------------------------
# Example use:
# >>> from dummy import Dummy
# >>> c = Dummy()
# >>> c.convert("hello my name is Bond, James Bond")
# u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#'
#
# >>> c.convert('don\'t convert <a href="href">tag ids</a>')
# u'd\xf6n\'t \xe7\xf6nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#'
#
# >>> c.convert('don\'t convert %(name)s tags on %(date)s')
# u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#"
>>> from dummy import Dummy
>>> c = Dummy()
>>> c.convert("My name is Bond, James Bond")
u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
>>> print c.convert("My name is Bond, James Bond")
Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
>>> print c.convert("don't convert <a href='href'>tag ids</a>")
døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#
>>> print c.convert("don't convert %(name)s tags on %(date)s")
døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#
"""
from converter import Converter
# Substitute plain characters with accented lookalikes.
# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
TABLE = {'A': u'\xC0',
'a': u'\xE4',
'b': u'\xDF',
'C': u'\xc7',
'c': u'\xE7',
'E': u'\xC9',
'e': u'\xE9',
'I': U'\xCC',
'i': u'\xEF',
'O': u'\xD8',
'o': u'\xF8',
'U': u'\xDB',
'u': u'\xFC',
'Y': u'\xDD',
'y': u'\xFD',
}
TABLE = {
'A': u'À',
'a': u'ä',
'b': u'ß',
'C': u'Ç',
'c': u'ç',
'E': u'É',
'e': u'é',
'I': u'Ì',
'i': u'ï',
'O': u'Ø',
'o': u'ø',
'U': u'Û',
'u': u'ü',
'Y': u'Ý',
'y': u'ý',
}
# The print industry's standard dummy text, in use since the 1500s
# see http://www.lipsum.com/
LOREM = ' Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed ' \
'do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad ' \
'minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ' \
'ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate ' \
'velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat ' \
'cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. '
# see http://www.lipsum.com/, then fed through a "fancy-text" converter.
# The string should start with a space.
LOREM = " " + " ".join( # join and split just make the string easier here.
u"""
Ⱡσяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
єѕт łαвσяυм.
""".split()
)
# To simulate more verbose languages (like German), pad the length of a string
# by a multiple of PAD_FACTOR
......
......@@ -29,6 +29,9 @@ class TestConverter(TestCase):
('big <strong>bad</strong> wolf', 'BIG <strong>BAD</strong> WOLF'),
# two html tags
('big <b>bad</b> <i>wolf</i>', 'BIG <b>BAD</b> <i>WOLF</i>'),
# html tags with attributes
('<a href="foo">bar</a> baz', '<a href="foo">BAR</a> BAZ'),
("<a href='foo'>bar</a> baz", "<a href='foo'>BAR</a> BAZ"),
# one python tag
('big %(adjective)s wolf', 'BIG %(adjective)s WOLF'),
# two python tags
......
# -*- coding: utf-8 -*-
import os, string, random
from unittest import TestCase
from polib import POEntry
......@@ -13,39 +14,52 @@ class TestDummy(TestCase):
def setUp(self):
self.converter = dummy.Dummy()
def assertUnicodeEquals(self, str1, str2):
"""Just like assertEquals, but doesn't put Unicode into the fail message.
Either nose, or rake, or something, deals very badly with unusual
Unicode characters in the assertions, so we use repr here to keep
things safe.
"""
self.assertEquals(
str1, str2,
"Mismatch: %r != %r" % (str1, str2),
)
def test_dummy(self):
"""
Tests with a dummy converter (adds spurious accents to strings).
Assert that embedded HTML and python tags are not converted.
"""
test_cases = [
("hello my name is Bond, James Bond",
u'h\xe9ll\xf8 m\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd Lorem i#'),
(u"hello my name is Bond, James Bond",
u"héllø mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ι#"),
('don\'t convert <a href="href">tag ids</a>',
u'd\xf8n\'t \xe7\xf8nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#'),
(u"don't convert <a href='href'>tag ids</a>",
u"døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#"),
('don\'t convert %(name)s tags on %(date)s',
u"d\xf8n't \xe7\xf8nv\xe9rt %(name)s t\xe4gs \xf8n %(date)s Lorem ips#")
(u"don't convert %(name)s tags on %(date)s",
u"døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#"),
]
for source, expected in test_cases:
result = self.converter.convert(source)
self.assertEquals(result, expected)
self.assertUnicodeEquals(result, expected)
def test_singular(self):
entry = POEntry()
entry.msgid = 'A lovely day for a cup of tea.'
expected = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
expected = u'À løvélý däý før ä çüp øf téä. Ⱡσяєм ι#'
self.converter.convert_msg(entry)
self.assertEquals(entry.msgstr, expected)
self.assertUnicodeEquals(entry.msgstr, expected)
def test_plural(self):
entry = POEntry()
entry.msgid = 'A lovely day for a cup of tea.'
entry.msgid_plural = 'A lovely day for some cups of tea.'
expected_s = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
expected_p = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r s\xf8m\xe9 \xe7\xfcps \xf8f t\xe9\xe4. Lorem ip#'
expected_s = u'À løvélý däý før ä çüp øf téä. Ⱡσяєм ι#'
expected_p = u'À løvélý däý før sømé çüps øf téä. Ⱡσяєм ιρ#'
self.converter.convert_msg(entry)
result = entry.msgstr_plural
self.assertEquals(result['0'], expected_s)
self.assertEquals(result['1'], expected_p)
self.assertUnicodeEquals(result['0'], expected_s)
self.assertUnicodeEquals(result['1'], expected_p)
......@@ -89,7 +89,7 @@
$submitButton.
addClass('is-disabled').
prop('disabled', true).
html(gettext('Processing your account information &hellip;'));
html("${_(u'Processing your account information…')}");
}
}
</script>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment