dummy.py 4.87 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
# -*- coding: utf-8 -*-
r"""
Creates new localization properties files in a dummy language.

Each property file is derived from the equivalent en_US file, with these
transformations applied:

1. Every vowel is replaced with an equivalent with extra accent marks.

2. Every string is padded out to +30% length to simulate verbose languages
   (such as German) to see if layout and flows work properly.
Steve Strassmann committed
12

13 14
3. Every string is terminated with a '#' character to make it easier to detect
   truncation.
Steve Strassmann committed
15

16
Example use::
Steve Strassmann committed
17

18 19 20 21 22 23 24 25 26 27
    >>> from dummy import Dummy
    >>> c = Dummy()
    >>> c.convert("My name is Bond, James Bond")
    u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
    >>> print c.convert("My name is Bond, James Bond")
    Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
    >>> print c.convert("don't convert <a href='href'>tag ids</a>")
    døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#
    >>> print c.convert("don't convert %(name)s tags on %(date)s")
    døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#
Steve Strassmann committed
28

29 30
"""

31
from i18n.converter import Converter
Steve Strassmann committed
32 33 34

# Substitute plain characters with accented lookalikes.
# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
TABLE = {
    'A': u'À',
    'a': u'ä',
    'b': u'ß',
    'C': u'Ç',
    'c': u'ç',
    'E': u'É',
    'e': u'é',
    'I': u'Ì',
    'i': u'ï',
    'O': u'Ø',
    'o': u'ø',
    'U': u'Û',
    'u': u'ü',
    'Y': u'Ý',
    'y': u'ý',
}
Steve Strassmann committed
52 53 54


# The print industry's standard dummy text, in use since the 1500s
55
# see http://www.lipsum.com/, then fed through a "fancy-text" converter.
56 57 58
# The string should start with a space, so that it joins nicely with the text
# that precedes it.  The Lorem contains an apostrophe since French often does,
# and translated strings get put into single-quoted strings, which then break.
59 60
LOREM = " " + " ".join(     # join and split just make the string easier here.
    u"""
61
    Ⱡ'σяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
62 63 64 65 66 67 68 69
    тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
    νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
    ¢σммσ∂σ ¢σηѕєqυαт.  ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
    νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
    ¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
    єѕт łαвσяυм.
    """.split()
)
Steve Strassmann committed
70 71 72

# To simulate more verbose languages (like German), pad the length of a string
# by a multiple of PAD_FACTOR
73
PAD_FACTOR = 1.33
Steve Strassmann committed
74 75


76
class Dummy(Converter):
77
    """
Steve Strassmann committed
78 79 80
    A string converter that generates dummy strings with fake accents
    and lorem ipsum padding.

81
    """
82
    def convert(self, string):
Steve Strassmann committed
83 84 85
        result = Converter.convert(self, string)
        return self.pad(result)

86
    def inner_convert_string(self, string):
87
        for k, v in TABLE.items():
Steve Strassmann committed
88 89 90
            string = string.replace(k, v)
        return string

91 92
    def pad(self, string):
        """add some lorem ipsum text to the end of string"""
Steve Strassmann committed
93 94
        size = len(string)
        if size < 7:
95
            target = size * 3
Steve Strassmann committed
96 97 98 99
        else:
            target = int(size*PAD_FACTOR)
        return string + self.terminate(LOREM[:(target-size)])

100 101
    def terminate(self, string):
        """replaces the final char of string with #"""
102
        return string[:-1] + '#'
Steve Strassmann committed
103

104 105
    def convert_msg(self, msg):
        """
106 107
        Takes one POEntry object and converts it (adds a dummy translation to it)
        msg is an instance of polib.POEntry
108 109
        """
        source = msg.msgid
110
        if not source:
Steve Strassmann committed
111 112
            # don't translate empty string
            return
113 114

        plural = msg.msgid_plural
115
        if plural:
Steve Strassmann committed
116
            # translate singular and plural
117 118
            foreign_single = self.convert(source)
            foreign_plural = self.convert(plural)
119 120 121 122
            plural = {
                '0': self.final_newline(source, foreign_single),
                '1': self.final_newline(plural, foreign_plural),
            }
123
            msg.msgstr_plural = plural
Steve Strassmann committed
124
        else:
125 126 127 128 129 130 131 132
            foreign = self.convert(source)
            msg.msgstr = self.final_newline(source, foreign)

    def final_newline(self, original, translated):
        """ Returns a new translated string.
            If last char of original is a newline, make sure translation
            has a newline too.
        """
133 134 135
        if original:
            if original[-1] == '\n' and translated[-1] != '\n':
                translated += '\n'
136
        return translated