dummy.py 4.59 KB
Newer Older
Steve Strassmann committed
1 2
from converter import Converter

3
# Creates new localization properties files in a dummy language
Steve Strassmann committed
4 5 6 7 8 9 10 11 12 13 14
# Each property file is derived from the equivalent en_US file, except
# 1. Every vowel is replaced with an equivalent with extra accent marks
# 2. Every string is padded out to +30% length to simulate verbose languages (e.g. German)
#    to see if layout and flows work properly
# 3. Every string is terminated with a '#' character to make it easier to detect truncation


# --------------------------------
# Example use:
# >>> from dummy import Dummy
# >>> c = Dummy()
15 16
# >>> c.convert("hello my name is Bond, James Bond")
# u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#'
Steve Strassmann committed
17
#
18 19
# >>> c.convert('don\'t convert <a href="href">tag ids</a>')
# u'd\xf6n\'t \xe7\xf6nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#'
Steve Strassmann committed
20
#
21 22
# >>> c.convert('don\'t convert %(name)s tags on %(date)s')
# u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#"
Steve Strassmann committed
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57


# Substitute plain characters with accented lookalikes.
# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
TABLE = {'A': u'\xC0',
         'a': u'\xE4',
         'b': u'\xDF',
         'C': u'\xc7',
         'c': u'\xE7',
         'E': u'\xC9',
         'e': u'\xE9',
         'I': U'\xCC',
         'i': u'\xEF',
         'O': u'\xD8',
         'o': u'\xF6',
         'u': u'\xFC'
         }



# The print industry's standard dummy text, in use since the 1500s
# see http://www.lipsum.com/
LOREM = ' Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed ' \
        'do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad ' \
        'minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ' \
        'ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate ' \
        'velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat ' \
        'cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. '

# To simulate more verbose languages (like German), pad the length of a string
# by a multiple of PAD_FACTOR
PAD_FACTOR = 1.3


class Dummy (Converter):
58
    """
Steve Strassmann committed
59 60
    A string converter that generates dummy strings with fake accents
    and lorem ipsum padding.
61
    """
Steve Strassmann committed
62

63
    def convert(self, string):
Steve Strassmann committed
64 65 66
        result = Converter.convert(self, string)
        return self.pad(result)

67
    def inner_convert_string(self, string):
Steve Strassmann committed
68 69 70 71 72
        for (k,v) in TABLE.items():
            string = string.replace(k, v)
        return string


73 74
    def pad(self, string):
        """add some lorem ipsum text to the end of string"""
Steve Strassmann committed
75 76 77 78 79 80 81
        size = len(string)
        if size < 7:
            target = size*3
        else:
            target = int(size*PAD_FACTOR)
        return string + self.terminate(LOREM[:(target-size)])

82 83
    def terminate(self, string):
        """replaces the final char of string with #"""
Steve Strassmann committed
84 85
        return string[:-1]+'#'

86 87
    def init_msgs(self, msgs):
        """
Steve Strassmann committed
88
        Make sure the first msg in msgs has a plural property.
89
        msgs is list of instances of polib.POEntry
90
        """
Steve Strassmann committed
91 92 93 94 95 96 97 98 99 100
        if len(msgs)==0:
            return
        headers = msgs[0].get_property('msgstr')
        has_plural = len([header for header in headers if header.find('Plural-Forms:') == 0])>0
        if not has_plural:
            # Apply declaration for English pluralization rules
            plural = "Plural-Forms: nplurals=2; plural=(n != 1);\\n"
            headers.append(plural)
        

101 102
    def convert_msg(self, msg):
        """
103 104
        Takes one POEntry object and converts it (adds a dummy translation to it)
        msg is an instance of polib.POEntry
105 106 107
        """
        source = msg.msgid
        if len(source)==0:
Steve Strassmann committed
108 109
            # don't translate empty string
            return
110 111

        plural = msg.msgid_plural
Steve Strassmann committed
112 113
        if len(plural)>0:
            # translate singular and plural
114 115 116 117 118
            foreign_single = self.convert(source)
            foreign_plural = self.convert(plural)
            plural = {'0': self.final_newline(source, foreign_single),
                      '1': self.final_newline(plural, foreign_plural)}
            msg.msgstr_plural = plural
Steve Strassmann committed
119 120
            return
        else:
121 122 123 124 125 126 127 128 129 130 131 132
            foreign = self.convert(source)
            msg.msgstr = self.final_newline(source, foreign)

    def final_newline(self, original, translated):
        """ Returns a new translated string.
            If last char of original is a newline, make sure translation
            has a newline too.
        """
        if len(original)>1:
            if original[-1]=='\n' and translated[-1]!='\n':
                return translated + '\n'
        return translated