Commit 209f8cc3 by Ned Batchelder

Lorem is now fancy, and HTML tags are recognized with single-quote attributes.

parent 6d5e13e2
...@@ -21,9 +21,9 @@ class Converter(object): ...@@ -21,9 +21,9 @@ class Converter(object):
# HTML: <B>, </B>, <BR/>, <textformat leading="10"> # HTML: <B>, </B>, <BR/>, <textformat leading="10">
# Python: %(date)s, %(name)s # Python: %(date)s, %(name)s
tag_pattern = re.compile(r''' tag_pattern = re.compile(r'''
(<[-\w" .:?=/]*>) | # <tag> (<[^>]+>) | # <tag>
({[^}]*}) | # {tag} ({[^}]+}) | # {tag}
(%\([^)]*\)\w) | # %(tag)s (%\([\w]+\)\w) | # %(tag)s
(&\w+;) | # &entity; (&\w+;) | # &entity;
(&\#\d+;) | # &#1234; (&\#\d+;) | # &#1234;
(&\#x[0-9a-f]+;) # &#xABCD; (&\#x[0-9a-f]+;) # &#xABCD;
......
from converter import Converter # -*- coding: utf-8 -*-
r"""
Creates new localization properties files in a dummy language.
Each property file is derived from the equivalent en_US file, with these
transformations applied:
1. Every vowel is replaced with an equivalent with extra accent marks.
2. Every string is padded out to +30% length to simulate verbose languages
(such as German) to see if layout and flows work properly.
# Creates new localization properties files in a dummy language 3. Every string is terminated with a '#' character to make it easier to detect
# Each property file is derived from the equivalent en_US file, except truncation.
# 1. Every vowel is replaced with an equivalent with extra accent marks
# 2. Every string is padded out to +30% length to simulate verbose languages (e.g. German)
# to see if layout and flows work properly
# 3. Every string is terminated with a '#' character to make it easier to detect truncation
Example use::
# -------------------------------- >>> from dummy import Dummy
# Example use: >>> c = Dummy()
# >>> from dummy import Dummy >>> c.convert("My name is Bond, James Bond")
# >>> c = Dummy() u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
# >>> c.convert("hello my name is Bond, James Bond") >>> print c.convert("My name is Bond, James Bond")
# u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#' Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
# >>> print c.convert("don't convert <a href='href'>tag ids</a>")
# >>> c.convert('don\'t convert <a href="href">tag ids</a>') døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#
# u'd\xf6n\'t \xe7\xf6nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#' >>> print c.convert("don't convert %(name)s tags on %(date)s")
# døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#
# >>> c.convert('don\'t convert %(name)s tags on %(date)s')
# u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#"
"""
from converter import Converter
# Substitute plain characters with accented lookalikes. # Substitute plain characters with accented lookalikes.
# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent # http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
TABLE = {'A': u'\xC0', TABLE = {
'a': u'\xE4', 'A': u'À',
'b': u'\xDF', 'a': u'ä',
'C': u'\xc7', 'b': u'ß',
'c': u'\xE7', 'C': u'Ç',
'E': u'\xC9', 'c': u'ç',
'e': u'\xE9', 'E': u'É',
'I': U'\xCC', 'e': u'é',
'i': u'\xEF', 'I': u'Ì',
'O': u'\xD8', 'i': u'ï',
'o': u'\xF8', 'O': u'Ø',
'U': u'\xDB', 'o': u'ø',
'u': u'\xFC', 'U': u'Û',
'Y': u'\xDD', 'u': u'ü',
'y': u'\xFD', 'Y': u'Ý',
} 'y': u'ý',
}
# The print industry's standard dummy text, in use since the 1500s # The print industry's standard dummy text, in use since the 1500s
# see http://www.lipsum.com/ # see http://www.lipsum.com/, then fed through a "fancy-text" converter.
LOREM = ' Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed ' \ # The string should start with a space.
'do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad ' \ LOREM = " " + " ".join( # join and split just make the string easier here.
'minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ' \ u"""
'ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate ' \ Ⱡσяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
'velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat ' \ тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
'cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ' νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
єѕт łαвσяυм.
""".split()
)
# To simulate more verbose languages (like German), pad the length of a string # To simulate more verbose languages (like German), pad the length of a string
# by a multiple of PAD_FACTOR # by a multiple of PAD_FACTOR
......
...@@ -29,6 +29,9 @@ class TestConverter(TestCase): ...@@ -29,6 +29,9 @@ class TestConverter(TestCase):
('big <strong>bad</strong> wolf', 'BIG <strong>BAD</strong> WOLF'), ('big <strong>bad</strong> wolf', 'BIG <strong>BAD</strong> WOLF'),
# two html tags # two html tags
('big <b>bad</b> <i>wolf</i>', 'BIG <b>BAD</b> <i>WOLF</i>'), ('big <b>bad</b> <i>wolf</i>', 'BIG <b>BAD</b> <i>WOLF</i>'),
# html tags with attributes
('<a href="foo">bar</a> baz', '<a href="foo">BAR</a> BAZ'),
("<a href='foo'>bar</a> baz", "<a href='foo'>BAR</a> BAZ"),
# one python tag # one python tag
('big %(adjective)s wolf', 'BIG %(adjective)s WOLF'), ('big %(adjective)s wolf', 'BIG %(adjective)s WOLF'),
# two python tags # two python tags
......
# -*- coding: utf-8 -*-
import os, string, random import os, string, random
from unittest import TestCase from unittest import TestCase
from polib import POEntry from polib import POEntry
...@@ -13,39 +14,52 @@ class TestDummy(TestCase): ...@@ -13,39 +14,52 @@ class TestDummy(TestCase):
def setUp(self): def setUp(self):
self.converter = dummy.Dummy() self.converter = dummy.Dummy()
def assertUnicodeEquals(self, str1, str2):
"""Just like assertEquals, but doesn't put Unicode into the fail message.
Either nose, or rake, or something, deals very badly with unusual
Unicode characters in the assertions, so we use repr here to keep
things safe.
"""
self.assertEquals(
str1, str2,
"Mismatch: %r != %r" % (str1, str2),
)
def test_dummy(self): def test_dummy(self):
""" """
Tests with a dummy converter (adds spurious accents to strings). Tests with a dummy converter (adds spurious accents to strings).
Assert that embedded HTML and python tags are not converted. Assert that embedded HTML and python tags are not converted.
""" """
test_cases = [ test_cases = [
("hello my name is Bond, James Bond", (u"hello my name is Bond, James Bond",
u'h\xe9ll\xf8 m\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd Lorem i#'), u"héllø mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ι#"),
('don\'t convert <a href="href">tag ids</a>', (u"don't convert <a href='href'>tag ids</a>",
u'd\xf8n\'t \xe7\xf8nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#'), u"døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#"),
('don\'t convert %(name)s tags on %(date)s', (u"don't convert %(name)s tags on %(date)s",
u"d\xf8n't \xe7\xf8nv\xe9rt %(name)s t\xe4gs \xf8n %(date)s Lorem ips#") u"døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#"),
] ]
for source, expected in test_cases: for source, expected in test_cases:
result = self.converter.convert(source) result = self.converter.convert(source)
self.assertEquals(result, expected) self.assertUnicodeEquals(result, expected)
def test_singular(self): def test_singular(self):
entry = POEntry() entry = POEntry()
entry.msgid = 'A lovely day for a cup of tea.' entry.msgid = 'A lovely day for a cup of tea.'
expected = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#' expected = u'À løvélý däý før ä çüp øf téä. Ⱡσяєм ι#'
self.converter.convert_msg(entry) self.converter.convert_msg(entry)
self.assertEquals(entry.msgstr, expected) self.assertUnicodeEquals(entry.msgstr, expected)
def test_plural(self): def test_plural(self):
entry = POEntry() entry = POEntry()
entry.msgid = 'A lovely day for a cup of tea.' entry.msgid = 'A lovely day for a cup of tea.'
entry.msgid_plural = 'A lovely day for some cups of tea.' entry.msgid_plural = 'A lovely day for some cups of tea.'
expected_s = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#' expected_s = u'À løvélý däý før ä çüp øf téä. Ⱡσяєм ι#'
expected_p = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r s\xf8m\xe9 \xe7\xfcps \xf8f t\xe9\xe4. Lorem ip#' expected_p = u'À løvélý däý før sømé çüps øf téä. Ⱡσяєм ιρ#'
self.converter.convert_msg(entry) self.converter.convert_msg(entry)
result = entry.msgstr_plural result = entry.msgstr_plural
self.assertEquals(result['0'], expected_s) self.assertUnicodeEquals(result['0'], expected_s)
self.assertEquals(result['1'], expected_p) self.assertUnicodeEquals(result['1'], expected_p)
...@@ -89,7 +89,7 @@ ...@@ -89,7 +89,7 @@
$submitButton. $submitButton.
addClass('is-disabled'). addClass('is-disabled').
prop('disabled', true). prop('disabled', true).
html(gettext('Processing your account information &hellip;')); html("${_(u'Processing your account information…')}");
} }
} }
</script> </script>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment