Commit dfb04bc4 by Ned Batchelder

Merge pull request #2017 from edx/ned/i18n-minor-improvements

Minor i18n improvements
parents 6c4aae73 c37ab385
......@@ -84,7 +84,8 @@ urlpatterns += patterns(
js_info_dict = {
'domain': 'djangojs',
'packages': ('cms',),
# No packages needed, we get LOCALE_PATHS anyway.
'packages': (),
}
urlpatterns += patterns('',
......
......@@ -11,7 +11,7 @@ BASE_DIR = path(__file__).abspath().dirname().joinpath('..').normpath()
LOCALE_DIR = BASE_DIR.joinpath('conf', 'locale')
class Configuration:
class Configuration(object):
"""
# Reads localization configuration in json format
......
import re
import itertools
class Converter:
class Converter(object):
"""Converter is an abstract class that transforms strings.
It hides embedded tags (HTML or Python sequences) from transformation
To implement Converter, provide implementation for inner_convert_string()
Strategy:
......@@ -16,16 +16,25 @@ class Converter:
3. re-insert the extracted tags
"""
# matches tags like these:
# HTML: <B>, </B>, <BR/>, <textformat leading="10">
# Python: %(date)s, %(name)s
tag_pattern = re.compile(r'(<[-\w" .:?=/]*>)|({[^}]*})|(%\([^)]*\)\w)', re.I)
tag_pattern = re.compile(r'''
(<[-\w" .:?=/]*>) | # <tag>
({[^}]*}) | # {tag}
(%\([^)]*\)\w) | # %(tag)s
(&\w+;) | # &entity;
(&\#\d+;) | # &#1234;
(&\#x[0-9a-f]+;) # &#xABCD;
''',
re.IGNORECASE|re.VERBOSE
)
def convert(self, string):
"""Returns: a converted tagged string
param: string (contains html tags)
Don't replace characters inside tags
"""
(string, tags) = self.detag_string(string)
......@@ -35,7 +44,7 @@ class Converter:
def detag_string(self, string):
"""Extracts tags from string.
returns (string, list) where
string: string has tags replaced by indices (<BR>... => <0>, <1>, <2>, etc.)
list: list of the removed tags ('<BR>', '<I>', '</I>')
......@@ -62,4 +71,3 @@ class Converter:
def inner_convert_string(self, string):
return string # do nothing by default
......@@ -34,8 +34,11 @@ TABLE = {'A': u'\xC0',
'I': U'\xCC',
'i': u'\xEF',
'O': u'\xD8',
'o': u'\xF6',
'u': u'\xFC'
'o': u'\xF8',
'U': u'\xDB',
'u': u'\xFC',
'Y': u'\xDD',
'y': u'\xFD',
}
......@@ -54,49 +57,47 @@ LOREM = ' Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed ' \
PAD_FACTOR = 1.3
class Dummy (Converter):
class Dummy(Converter):
"""
A string converter that generates dummy strings with fake accents
and lorem ipsum padding.
"""
"""
def convert(self, string):
result = Converter.convert(self, string)
return self.pad(result)
def inner_convert_string(self, string):
for (k,v) in TABLE.items():
for k, v in TABLE.items():
string = string.replace(k, v)
return string
def pad(self, string):
"""add some lorem ipsum text to the end of string"""
size = len(string)
if size < 7:
target = size*3
target = size * 3
else:
target = int(size*PAD_FACTOR)
return string + self.terminate(LOREM[:(target-size)])
def terminate(self, string):
"""replaces the final char of string with #"""
return string[:-1]+'#'
return string[:-1] + '#'
def init_msgs(self, msgs):
"""
Make sure the first msg in msgs has a plural property.
msgs is list of instances of polib.POEntry
"""
if len(msgs)==0:
if not msgs:
return
headers = msgs[0].get_property('msgstr')
has_plural = len([header for header in headers if header.find('Plural-Forms:') == 0])>0
has_plural = any(header.startswith('Plural-Forms:') for header in headers)
if not has_plural:
# Apply declaration for English pluralization rules
plural = "Plural-Forms: nplurals=2; plural=(n != 1);\\n"
headers.append(plural)
def convert_msg(self, msg):
"""
......@@ -104,19 +105,18 @@ class Dummy (Converter):
msg is an instance of polib.POEntry
"""
source = msg.msgid
if len(source)==0:
if not source:
# don't translate empty string
return
plural = msg.msgid_plural
if len(plural)>0:
if plural:
# translate singular and plural
foreign_single = self.convert(source)
foreign_plural = self.convert(plural)
plural = {'0': self.final_newline(source, foreign_single),
'1': self.final_newline(plural, foreign_plural)}
msg.msgstr_plural = plural
return
else:
foreign = self.convert(source)
msg.msgstr = self.final_newline(source, foreign)
......@@ -126,7 +126,7 @@ class Dummy (Converter):
If last char of original is a newline, make sure translation
has a newline too.
"""
if len(original)>1:
if original[-1]=='\n' and translated[-1]!='\n':
return translated + '\n'
if original:
if original[-1] == '\n' and translated[-1] != '\n':
translated += '\n'
return translated
......@@ -11,13 +11,13 @@ def execute(command, working_directory=BASE_DIR):
Output is ignored.
"""
LOG.info(command)
subprocess.call(command.split(' '), cwd=working_directory)
subprocess.check_output(command.split(' '), cwd=working_directory, stderr=subprocess.STDOUT)
def call(command, working_directory=BASE_DIR):
"""
Executes shell command in a given working_directory.
Command is a string to pass to the shell.
Command is a list of strings to execute as a command line.
Returns a tuple of two strings: (stdout, stderr)
"""
......@@ -25,7 +25,8 @@ def call(command, working_directory=BASE_DIR):
p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_directory)
out, err = p.communicate()
return (out, err)
def create_dir_if_necessary(pathname):
dirname = os.path.dirname(pathname)
if not os.path.exists(dirname):
......
#!/usr/bin/env python
"""
See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow
This task extracts all English strings from all source code
and produces three human-readable files:
This task extracts all English strings from all source code
and produces three human-readable files:
conf/locale/en/LC_MESSAGES/django-partial.po
conf/locale/en/LC_MESSAGES/djangojs.po
conf/locale/en/LC_MESSAGES/mako.po
This task will clobber any existing django.po file.
This is because django-admin.py makemessages hardcodes this filename
and it cannot be overridden.
This task will clobber any existing django.po file.
This is because django-admin.py makemessages hardcodes this filename
and it cannot be overridden.
"""
import os, sys, logging
......@@ -34,7 +34,7 @@ SOURCE_WARN = 'This English source file is machine-generated. Do not check it in
LOG = logging.getLogger(__name__)
def main ():
def main():
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
create_dir_if_necessary(LOCALE_DIR)
source_msgs_dir = CONFIGURATION.source_messages_dir
......@@ -44,23 +44,28 @@ def main ():
for filename in generated_files:
remove_file(source_msgs_dir.joinpath(filename))
# Extract strings from mako templates
# Extract strings from mako templates.
babel_mako_cmd = 'pybabel extract -F %s -c "TRANSLATORS:" . -o %s' % (BABEL_CONFIG, BABEL_OUT)
# Extract strings from django source files
make_django_cmd = 'django-admin.py makemessages -l en --ignore=src/* --ignore=i18n/* ' \
+ '--extension html'
# Extract strings from javascript source files
make_djangojs_cmd = 'django-admin.py makemessages -l en -d djangojs --ignore=src/* ' \
+ '--ignore=i18n/* --extension js'
# Extract strings from django source files.
make_django_cmd = (
'django-admin.py makemessages -l en --ignore=src/* --ignore=i18n/* '
'--extension html'
)
# Extract strings from Javascript source files.
make_djangojs_cmd = (
'django-admin.py makemessages -l en --ignore=src/* --ignore=i18n/* '
'-d djangojs --extension js'
)
execute(babel_mako_cmd, working_directory=BASE_DIR)
execute(make_django_cmd, working_directory=BASE_DIR)
# makemessages creates 'django.po'. This filename is hardcoded.
# Rename it to django-partial.po to enable merging into django.po later.
os.rename(source_msgs_dir.joinpath('django.po'),
source_msgs_dir.joinpath('django-partial.po'))
os.rename(
source_msgs_dir.joinpath('django.po'),
source_msgs_dir.joinpath('django-partial.po')
)
execute(make_djangojs_cmd, working_directory=BASE_DIR)
for filename in generated_files:
......@@ -101,7 +106,7 @@ def fix_header(po):
('FIRST AUTHOR <EMAIL@ADDRESS>',
'EdX Team <info@edx.org>')
)
for (src, dest) in fixes:
for src, dest in fixes:
header = header.replace(src, dest)
po.header = header
......@@ -112,12 +117,12 @@ def fix_header(po):
u'Content-Transfer-Encoding': u'8bit',
u'Project-Id-Version': u'PACKAGE VERSION',
u'Report-Msgid-Bugs-To': u'',
u'Last-Translator': u'FULL NAME <EMAIL@ADDRESS>',
u'Last-Translator': u'FULL NAME <EMAIL@ADDRESS>',
u'Language-Team': u'LANGUAGE <LL@li.org>',
u'POT-Creation-Date': u'2013-04-25 14:14-0400',
u'Content-Type': u'text/plain; charset=UTF-8',
u'MIME-Version': u'1.0'}
"""
"""
def fix_metadata(po):
"""
......@@ -146,7 +151,7 @@ def is_key_string(string):
returns True if string is a key string.
Key strings begin with underscore.
"""
return len(string)>1 and string[0]=='_'
return len(string) > 1 and string[0] == '_'
if __name__ == '__main__':
main()
#!/usr/bin/env python
"""
See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow
See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow
This task merges and compiles the human-readable .po files on the
local filesystem into machine-readable .mo files. This is typically
necessary as part of the build process since these .mo files are
needed by Django when serving the web app.
This task merges and compiles the human-readable .pofiles on the
local filesystem into machine-readable .mofiles. This is typically
necessary as part of the build process since these .mofiles are
needed by Django when serving the web app.
The configuration file (in edx-platform/conf/locale/config) specifies which
languages to generate.
The configuration file (in edx-platform/conf/locale/config) specifies which
languages to generate.
"""
import os, sys, logging
......@@ -26,10 +26,13 @@ def merge(locale, target='django.po', fail_if_missing=True):
"""
For the given locale, merge django-partial.po, messages.po, mako.po -> django.po
target is the resulting filename
If fail_if_missing is True, and the files to be merged are missing,
throw an Exception.
If fail_if_missing is False, and the files to be merged are missing,
If fail_if_missing is true, and the files to be merged are missing,
throw an Exception, otherwise return silently.
If fail_if_missing is false, and the files to be merged are missing,
just return silently.
"""
LOG.info('Merging locale={0}'.format(locale))
locale_directory = CONFIGURATION.get_messages_dir(locale)
......
......@@ -51,11 +51,7 @@ def new_filename(original_filename, new_locale):
orig_dir = os.path.dirname(original_filename)
msgs_dir = os.path.basename(orig_dir)
orig_file = os.path.basename(original_filename)
return os.path.abspath(os.path.join(orig_dir,
'../..',
new_locale,
msgs_dir,
orig_file))
return os.path.abspath(os.path.join(orig_dir, '../..', new_locale, msgs_dir, orig_file))
if __name__ == '__main__':
# required arg: file
......
from test_config import TestConfiguration
from test_extract import TestExtract
from test_generate import TestGenerate
from test_converter import TestConverter
from test_dummy import TestDummy
import test_validate
......@@ -17,7 +17,7 @@ class TestConfiguration(TestCase):
config_filename = os.path.normpath(os.path.join(LOCALE_DIR, 'no_such_file'))
with self.assertRaises(Exception):
Configuration(config_filename)
def test_valid_configuration(self):
"""
Make sure we have a valid configuration file,
......
......@@ -3,7 +3,7 @@ from unittest import TestCase
import converter
class UpcaseConverter (converter.Converter):
class UpcaseConverter(converter.Converter):
"""
Converts a string to uppercase. Just used for testing.
"""
......@@ -22,7 +22,7 @@ class TestConverter(TestCase):
Assert that embedded HTML and python tags are not converted.
"""
c = UpcaseConverter()
test_cases = (
test_cases = [
# no tags
('big bad wolf', 'BIG BAD WOLF'),
# one html tag
......@@ -36,7 +36,11 @@ class TestConverter(TestCase):
# both kinds of tags
('<strong>big</strong> %(adjective)s %(noun)s',
'<strong>BIG</strong> %(adjective)s %(noun)s'),
)
for (source, expected) in test_cases:
# .format-style tags
('The {0} barn is {1!r}.', 'THE {0} BARN IS {1!r}.'),
# HTML entities
('<b>&copy; 2013 edX, &#xa0;</b>', '<b>&copy; 2013 EDX, &#xa0;</b>'),
]
for source, expected in test_cases:
result = c.convert(source)
self.assertEquals(result, expected)
......@@ -18,23 +18,24 @@ class TestDummy(TestCase):
Tests with a dummy converter (adds spurious accents to strings).
Assert that embedded HTML and python tags are not converted.
"""
test_cases = (("hello my name is Bond, James Bond",
u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#'),
('don\'t convert <a href="href">tag ids</a>',
u'd\xf6n\'t \xe7\xf6nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#'),
('don\'t convert %(name)s tags on %(date)s',
u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#")
)
for (source, expected) in test_cases:
test_cases = [
("hello my name is Bond, James Bond",
u'h\xe9ll\xf8 m\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd Lorem i#'),
('don\'t convert <a href="href">tag ids</a>',
u'd\xf8n\'t \xe7\xf8nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#'),
('don\'t convert %(name)s tags on %(date)s',
u"d\xf8n't \xe7\xf8nv\xe9rt %(name)s t\xe4gs \xf8n %(date)s Lorem ips#")
]
for source, expected in test_cases:
result = self.converter.convert(source)
self.assertEquals(result, expected)
def test_singular(self):
entry = POEntry()
entry.msgid = 'A lovely day for a cup of tea.'
expected = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r \xe4 \xe7\xfcp \xf6f t\xe9\xe4. Lorem i#'
expected = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
self.converter.convert_msg(entry)
self.assertEquals(entry.msgstr, expected)
......@@ -42,8 +43,8 @@ class TestDummy(TestCase):
entry = POEntry()
entry.msgid = 'A lovely day for a cup of tea.'
entry.msgid_plural = 'A lovely day for some cups of tea.'
expected_s = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r \xe4 \xe7\xfcp \xf6f t\xe9\xe4. Lorem i#'
expected_p = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r s\xf6m\xe9 \xe7\xfcps \xf6f t\xe9\xe4. Lorem ip#'
expected_s = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
expected_p = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r s\xf8m\xe9 \xe7\xfcps \xf8f t\xe9\xe4. Lorem ip#'
self.converter.convert_msg(entry)
result = entry.msgstr_plural
self.assertEquals(result['0'], expected_s)
......
......@@ -4,14 +4,14 @@ from nose.plugins.skip import SkipTest
from config import LOCALE_DIR
from execute import call
def test_po_files(root=LOCALE_DIR):
"""
This is a generator. It yields all of the .po files under root, and tests each one.
"""
log = logging.getLogger(__name__)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
for (dirpath, dirnames, filenames) in os.walk(root):
for name in filenames:
(base, ext) = os.path.splitext(name)
......@@ -24,11 +24,8 @@ def validate_po_file(filename, log):
Call GNU msgfmt -c on each .po file to validate its format.
Any errors caught by msgfmt are logged to log.
"""
# Skip this test for now because it's very noisy
raise SkipTest()
# Use relative paths to make output less noisy.
rfile = os.path.relpath(filename, LOCALE_DIR)
(out, err) = call(['msgfmt','-c', rfile], working_directory=LOCALE_DIR)
if err != '':
log.warn('\n'+err)
......@@ -27,7 +27,7 @@ def clean_translated_locales():
for locale in CONFIGURATION.locales:
if locale != CONFIGURATION.source_locale:
clean_locale(locale)
def clean_locale(locale):
"""
Strips out the warning from all of a locale's translated po files
......@@ -58,7 +58,7 @@ def get_new_header(po):
return TRANSIFEX_HEADER % team
if __name__ == '__main__':
if len(sys.argv)<2:
if len(sys.argv) < 2:
raise Exception("missing argument: push or pull")
arg = sys.argv[1]
if arg == 'push':
......@@ -67,4 +67,3 @@ if __name__ == '__main__':
pull()
else:
raise Exception("unknown argument: (%s)" % arg)
......@@ -72,7 +72,8 @@ urlpatterns += (
js_info_dict = {
'domain': 'djangojs',
'packages': ('lms',),
# No packages needed, we get LOCALE_PATHS anyway.
'packages': (),
}
urlpatterns += (
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment