Commit a6ffe8c1 by Dave St.Germain

Moving these tools to i18n-tools repo

parent dc22e696
#!/usr/bin/env python
"""
Utility for cleaning up your local directory after switching between
branches with different translation levels (eg master branch, with only
reviewed translations, versus dev branch, with all translations)
"""
from __future__ import print_function
import os
from i18n.config import CONFIGURATION
from i18n.execute import execute
def clean_conf_folder(locale):
"""Remove the configuration directory for `locale`"""
dirname = CONFIGURATION.get_messages_dir(locale)
command = "rm -rf {}".format(dirname)
print(command)
try:
execute(command)
except Exception as exc:
print("Encountered error {}; continuing...".format(exc))
return
def clean_configuration_directory():
"""
Remove the configuration directories for all locales
in CONFIGURATION.translated_locales
"""
for locale in CONFIGURATION.translated_locales:
clean_conf_folder(locale)
if __name__ == '__main__':
clean_configuration_directory()
import os
import yaml
from path import path
# BASE_DIR is the working directory to execute django-admin commands from.
# Typically this should be the 'edx-platform' directory.
BASE_DIR = path(__file__).abspath().dirname().dirname()
# LOCALE_DIR contains the locale files.
# Typically this should be 'edx-platform/conf/locale'
LOCALE_DIR = BASE_DIR.joinpath('conf', 'locale')
class Configuration(object):
"""
Reads localization configuration in json format.
"""
DEFAULTS = {
'dummy_locales': [],
'generate_merge': {},
'ignore_dirs': [],
'locales': ['en'],
'segment': {},
'source_locale': 'en',
'third_party': [],
}
def __init__(self, filename):
self._filename = filename
self._config = self.read_config(filename)
def read_config(self, filename):
"""
Returns data found in config file (as dict), or raises exception if file not found
"""
if not os.path.exists(filename):
raise Exception("Configuration file cannot be found: %s" % filename)
with open(filename) as stream:
return yaml.safe_load(stream)
def __getattr__(self, name):
if name in self.DEFAULTS:
return self._config.get(name, self.DEFAULTS[name])
raise AttributeError("Configuration has no such setting: {!r}".format(name))
def get_messages_dir(self, locale):
"""
Returns the name of the directory holding the po files for locale.
Example: edx-platform/conf/locale/fr/LC_MESSAGES
"""
return LOCALE_DIR.joinpath(locale, 'LC_MESSAGES')
@property
def source_messages_dir(self):
"""
Returns the name of the directory holding the source-language po files (English).
Example: edx-platform/conf/locale/en/LC_MESSAGES
"""
return self.get_messages_dir(self.source_locale)
@property
def translated_locales(self):
"""
Returns the set of locales to be translated (ignoring the source_locale).
"""
return sorted(set(self.locales) - set([self.source_locale]))
CONFIGURATION = Configuration(LOCALE_DIR.joinpath('config.yaml').normpath())
import re
import itertools
class Converter(object):
"""Converter is an abstract class that transforms strings.
It hides embedded tags (HTML or Python sequences) from transformation
To implement Converter, provide implementation for inner_convert_string()
Strategy:
1. extract tags embedded in the string
a. use the index of each extracted tag to re-insert it later
b. replace tags in string with numbers (<0>, <1>, etc.)
c. save extracted tags in a separate list
2. convert string
3. re-insert the extracted tags
"""
# matches tags like these:
# HTML: <B>, </B>, <BR/>, <textformat leading="10">
# Python: %(date)s, %(name)s
tag_pattern = re.compile(
r'''
(<[^>]+>) | # <tag>
({[^}]+}) | # {tag}
(%\([\w]+\)\w) | # %(tag)s
(&\w+;) | # &entity;
(&\#\d+;) | # &#1234;
(&\#x[0-9a-f]+;) # &#xABCD;
''',
re.IGNORECASE | re.VERBOSE
)
def convert(self, string):
"""Returns: a converted tagged string
param: string (contains html tags)
Don't replace characters inside tags
"""
(string, tags) = self.detag_string(string)
string = self.inner_convert_string(string)
string = self.retag_string(string, tags)
return string
def detag_string(self, string):
"""Extracts tags from string.
returns (string, list) where
string: string has tags replaced by indices (<BR>... => <0>, <1>, <2>, etc.)
list: list of the removed tags ('<BR>', '<I>', '</I>')
"""
counter = itertools.count(0)
count = lambda m: '<%s>' % counter.next()
tags = self.tag_pattern.findall(string)
tags = [''.join(tag) for tag in tags]
(new, nfound) = self.tag_pattern.subn(count, string)
if len(tags) != nfound:
raise Exception('tags dont match:' + string)
return (new, tags)
def retag_string(self, string, tags):
"""substitutes each tag back into string, into occurrences of <0>, <1> etc"""
for (i, tag) in enumerate(tags):
p = '<%s>' % i
string = re.sub(p, tag, string, 1)
return string
# ------------------------------
# Customize this in subclasses of Converter
def inner_convert_string(self, string):
return string # do nothing by default
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Generate test translation files from human-readable po files.
Dummy language is specified in configuration file (see config.py)
two letter language codes reference:
see http://www.loc.gov/standards/iso639-2/php/code_list.php
Django will not localize in languages that django itself has not been
localized for. So we are using a well-known language (default='eo').
Django languages are listed in django.conf.global_settings.LANGUAGES
po files can be generated with this:
django-admin.py makemessages --all --extension html -l en
Usage:
$ ./dummy.py
generates output conf/locale/$DUMMY_LOCALE/LC_MESSAGES,
where $DUMMY_LOCALE is the dummy_locale value set in the i18n config
"""
from __future__ import print_function
import re
import sys
import argparse
import polib
from path import path
from i18n.config import CONFIGURATION
from i18n.converter import Converter
class BaseDummyConverter(Converter):
"""Base class for dummy converters.
String conversion goes through a character map, then gets padded.
"""
TABLE = {}
def inner_convert_string(self, string):
for old, new in self.TABLE.items():
string = string.replace(old, new)
return self.pad(string)
def pad(self, string):
return string
def convert_msg(self, msg):
"""
Takes one POEntry object and converts it (adds a dummy translation to it)
msg is an instance of polib.POEntry
"""
source = msg.msgid
if not source:
# don't translate empty string
return
plural = msg.msgid_plural
if plural:
# translate singular and plural
foreign_single = self.convert(source)
foreign_plural = self.convert(plural)
plural = {
'0': self.final_newline(source, foreign_single),
'1': self.final_newline(plural, foreign_plural),
}
msg.msgstr_plural = plural
else:
foreign = self.convert(source)
msg.msgstr = self.final_newline(source, foreign)
def final_newline(self, original, translated):
""" Returns a new translated string.
If last char of original is a newline, make sure translation
has a newline too.
"""
if original:
if original[-1] == '\n' and translated[-1] != '\n':
translated += '\n'
return translated
class Dummy(BaseDummyConverter):
r"""
Creates new localization properties files in a dummy language.
Each property file is derived from the equivalent en_US file, with these
transformations applied:
1. Every vowel is replaced with an equivalent with extra accent marks.
2. Every string is padded out to +30% length to simulate verbose languages
(such as German) to see if layout and flows work properly.
3. Every string is terminated with a '#' character to make it easier to detect
truncation.
Example use::
>>> from dummy import Dummy
>>> c = Dummy()
>>> c.convert("My name is Bond, James Bond")
u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
>>> print c.convert("My name is Bond, James Bond")
Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
>>> print c.convert("don't convert <a href='href'>tag ids</a>")
døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#
>>> print c.convert("don't convert %(name)s tags on %(date)s")
døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#
"""
# Substitute plain characters with accented lookalikes.
# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
TABLE = dict(zip(
u"AabCcEeIiOoUuYy",
u"ÀäßÇçÉéÌïÖöÛüÝý"
))
# The print industry's standard dummy text, in use since the 1500s
# see http://www.lipsum.com/, then fed through a "fancy-text" converter.
# The string should start with a space, so that it joins nicely with the text
# that precedes it. The Lorem contains an apostrophe since French often does,
# and translated strings get put into single-quoted strings, which then break.
LOREM = " " + " ".join( # join and split just make the string easier here.
u"""
Ⱡ'σяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
єѕт łαвσяυм.
""".split()
)
# To simulate more verbose languages (like German), pad the length of a string
# by a multiple of PAD_FACTOR
PAD_FACTOR = 1.33
def pad(self, string):
"""add some lorem ipsum text to the end of string"""
size = len(string)
if size < 7:
target = size * 3
else:
target = int(size * self.PAD_FACTOR)
pad_len = target - size - 1
return string + self.LOREM[:pad_len] + "#"
class Dummy2(BaseDummyConverter):
"""A second dummy converter.
Like Dummy, but uses a different obvious but readable automatic conversion:
Strikes-through many letters, and turns lower-case letters upside-down.
"""
TABLE = dict(zip(
u"ABCDEGHIJKLOPRTUYZabcdefghijklmnopqrstuvwxyz",
u"ȺɃȻĐɆǤĦƗɈꝀŁØⱣɌŦɄɎƵɐqɔpǝɟƃɥᴉɾʞlɯuødbɹsʇnʌʍxʎz"
))
def make_dummy(filename, locale, converter):
"""
Takes a source po file, reads it, and writes out a new po file
in :param locale: containing a dummy translation.
"""
if not path(filename).exists():
raise IOError('File does not exist: %r' % filename)
pofile = polib.pofile(filename)
for msg in pofile:
# Some strings are actually formatting strings, don't dummy-ify them,
# or dates will look like "DÀTÉ_TÌMÉ_FÖRMÀT Ⱡ'σ# EST"
if re.match(r"^[A-Z_]+_FORMAT$", msg.msgid):
continue
converter.convert_msg(msg)
# Apply declaration for English pluralization rules so that ngettext will
# do something reasonable.
pofile.metadata['Plural-Forms'] = 'nplurals=2; plural=(n != 1);'
new_file = new_filename(filename, locale)
new_file.parent.makedirs_p()
pofile.save(new_file)
def new_filename(original_filename, new_locale):
"""Returns a filename derived from original_filename, using new_locale as the locale"""
f = path(original_filename)
new_file = f.parent.parent.parent / new_locale / f.parent.name / f.name
return new_file.abspath()
def main(verbosity=1):
"""
Generate dummy strings for all source po files.
"""
SOURCE_MSGS_DIR = CONFIGURATION.source_messages_dir
for locale, converter in zip(CONFIGURATION.dummy_locales, [Dummy(), Dummy2()]):
if verbosity:
print('Processing source language files into dummy strings, locale "{}"'.format(locale))
for source_file in CONFIGURATION.source_messages_dir.walkfiles('*.po'):
if verbosity:
print(' ', source_file.relpath())
make_dummy(SOURCE_MSGS_DIR.joinpath(source_file), locale, converter)
if verbosity:
print()
if __name__ == '__main__':
# pylint: disable=invalid-name
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--verbose", "-v", action="count", default=0)
args = parser.parse_args()
main(verbosity=args.verbose)
"""
Utility library file for executing shell commands
"""
import os
import subprocess
import logging
from i18n.config import BASE_DIR
LOG = logging.getLogger(__name__)
def execute(command, working_directory=BASE_DIR, stderr=subprocess.STDOUT):
"""
Executes shell command in a given working_directory.
Command is a string to pass to the shell.
Output is ignored.
"""
LOG.info("Executing in %s ...", working_directory)
LOG.info(command)
subprocess.check_call(command, cwd=working_directory, stderr=stderr, shell=True)
def call(command, working_directory=BASE_DIR):
"""
Executes shell command in a given working_directory.
Command is a list of strings to execute as a command line.
Returns a tuple of two strings: (stdout, stderr)
"""
LOG.info(command)
p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_directory, shell=True)
out, err = p.communicate()
return (out, err)
def remove_file(filename, verbose=True):
"""
Attempt to delete filename.
log is boolean. If true, removal is logged.
Log a warning if file does not exist.
Logging filenames are releative to BASE_DIR to cut down on noise in output.
"""
if verbose:
LOG.info('Deleting file %s' % os.path.relpath(filename, BASE_DIR))
if not os.path.exists(filename):
LOG.warn("File does not exist: %s" % os.path.relpath(filename, BASE_DIR))
else:
os.remove(filename)
#!/usr/bin/env python
"""
See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow
This task extracts all English strings from all source code
and produces three human-readable files:
conf/locale/en/LC_MESSAGES/django-partial.po
conf/locale/en/LC_MESSAGES/djangojs-partial.po
conf/locale/en/LC_MESSAGES/mako.po
This task will clobber any existing django.po file.
This is because django-admin.py makemessages hardcodes this filename
and it cannot be overridden.
"""
from datetime import datetime
import importlib
import os
import os.path
import logging
import sys
import argparse
from path import path
from polib import pofile
from i18n.config import BASE_DIR, LOCALE_DIR, CONFIGURATION
from i18n.execute import execute, remove_file
from i18n.segment import segment_pofiles
EDX_MARKER = "edX translation file"
LOG = logging.getLogger(__name__)
DEVNULL = open(os.devnull, 'wb')
def base(path1, *paths):
"""Return a relative path from BASE_DIR to path1 / paths[0] / ... """
return BASE_DIR.relpathto(path1.joinpath(*paths))
def main(verbosity=1):
"""
Main entry point of script
"""
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
LOCALE_DIR.parent.makedirs_p()
source_msgs_dir = CONFIGURATION.source_messages_dir
remove_file(source_msgs_dir.joinpath('django.po'))
# Extract strings from mako templates.
verbosity_map = {
0: "-q",
1: "",
2: "-v",
}
babel_verbosity = verbosity_map.get(verbosity, "")
if verbosity:
stderr = None
else:
stderr = DEVNULL
babel_cmd_template = 'pybabel {verbosity} extract -F {config} -c "Translators:" . -o {output}'
babel_mako_cmd = babel_cmd_template.format(
verbosity=babel_verbosity,
config=base(LOCALE_DIR, 'babel_mako.cfg'),
output=base(CONFIGURATION.source_messages_dir, 'mako.po'),
)
execute(babel_mako_cmd, working_directory=BASE_DIR, stderr=stderr)
babel_underscore_cmd = babel_cmd_template.format(
verbosity=babel_verbosity,
config=base(LOCALE_DIR, 'babel_underscore.cfg'),
output=base(CONFIGURATION.source_messages_dir, 'underscore.po'),
)
execute(babel_underscore_cmd, working_directory=BASE_DIR, stderr=stderr)
makemessages = "django-admin.py makemessages -l en -v{}".format(verbosity)
ignores = " ".join('--ignore="{}/*"'.format(d) for d in CONFIGURATION.ignore_dirs)
if ignores:
makemessages += " " + ignores
# Extract strings from django source files, including .py files.
make_django_cmd = makemessages + ' --extension html'
execute(make_django_cmd, working_directory=BASE_DIR, stderr=stderr)
# Extract strings from Javascript source files.
make_djangojs_cmd = makemessages + ' -d djangojs --extension js'
execute(make_djangojs_cmd, working_directory=BASE_DIR, stderr=stderr)
# makemessages creates 'django.po'. This filename is hardcoded.
# Rename it to django-partial.po to enable merging into django.po later.
os.rename(
source_msgs_dir.joinpath('django.po'),
source_msgs_dir.joinpath('django-partial.po')
)
# makemessages creates 'djangojs.po'. This filename is hardcoded.
# Rename it to djangojs-partial.po to enable merging into djangojs.po later.
os.rename(
source_msgs_dir.joinpath('djangojs.po'),
source_msgs_dir.joinpath('djangojs-partial.po')
)
files_to_clean = set()
# Extract strings from third-party applications.
for app_name in CONFIGURATION.third_party:
# Import the app to find out where it is. Then use pybabel to extract
# from that directory.
app_module = importlib.import_module(app_name)
app_dir = path(app_module.__file__).dirname().dirname()
output_file = source_msgs_dir / (app_name + ".po")
files_to_clean.add(output_file)
babel_cmd = 'pybabel {verbosity} extract -F {config} -c "Translators:" {app} -o {output}'
babel_cmd = babel_cmd.format(
verbosity=babel_verbosity,
config=LOCALE_DIR / 'babel_third_party.cfg',
app=app_name,
output=output_file,
)
execute(babel_cmd, working_directory=app_dir, stderr=stderr)
# Segment the generated files.
segmented_files = segment_pofiles("en")
files_to_clean.update(segmented_files)
# Finish each file.
for filename in files_to_clean:
LOG.info('Cleaning %s' % filename)
po = pofile(source_msgs_dir.joinpath(filename))
# replace default headers with edX headers
fix_header(po)
# replace default metadata with edX metadata
fix_metadata(po)
# remove key strings which belong in messages.po
strip_key_strings(po)
po.save()
def fix_header(po):
"""
Replace default headers with edX headers
"""
# By default, django-admin.py makemessages creates this header:
#
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
po.metadata_is_fuzzy = [] # remove [u'fuzzy']
header = po.header
fixes = (
('SOME DESCRIPTIVE TITLE', EDX_MARKER),
('Translations template for PROJECT.', EDX_MARKER),
('YEAR', str(datetime.utcnow().year)),
('ORGANIZATION', 'edX'),
("THE PACKAGE'S COPYRIGHT HOLDER", "EdX"),
(
'This file is distributed under the same license as the PROJECT project.',
'This file is distributed under the GNU AFFERO GENERAL PUBLIC LICENSE.'
),
(
'This file is distributed under the same license as the PACKAGE package.',
'This file is distributed under the GNU AFFERO GENERAL PUBLIC LICENSE.'
),
('FIRST AUTHOR <EMAIL@ADDRESS>', 'EdX Team <info@edx.org>'),
)
for src, dest in fixes:
header = header.replace(src, dest)
po.header = header
def fix_metadata(po):
"""
Replace default metadata with edX metadata
"""
# By default, django-admin.py makemessages creates this metadata:
#
# {u'PO-Revision-Date': u'YEAR-MO-DA HO:MI+ZONE',
# u'Language': u'',
# u'Content-Transfer-Encoding': u'8bit',
# u'Project-Id-Version': u'PACKAGE VERSION',
# u'Report-Msgid-Bugs-To': u'',
# u'Last-Translator': u'FULL NAME <EMAIL@ADDRESS>',
# u'Language-Team': u'LANGUAGE <LL@li.org>',
# u'POT-Creation-Date': u'2013-04-25 14:14-0400',
# u'Content-Type': u'text/plain; charset=UTF-8',
# u'MIME-Version': u'1.0'}
fixes = {
'PO-Revision-Date': datetime.utcnow(),
'Report-Msgid-Bugs-To': 'openedx-translation@googlegroups.com',
'Project-Id-Version': '0.1a',
'Language': 'en',
'Last-Translator': '',
'Language-Team': 'openedx-translation <openedx-translation@googlegroups.com>',
}
po.metadata.update(fixes)
def strip_key_strings(po):
"""
Removes all entries in PO which are key strings.
These entries should appear only in messages.po, not in any other po files.
"""
newlist = [entry for entry in po if not is_key_string(entry.msgid)]
del po[:]
po += newlist
def is_key_string(string):
"""
returns True if string is a key string.
Key strings begin with underscore.
"""
return len(string) > 1 and string[0] == '_'
if __name__ == '__main__':
# pylint: disable=invalid-name
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--verbose', '-v', action='count', default=0)
args = parser.parse_args()
main(verbosity=args.verbose)
#!/usr/bin/env python
"""
See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow
This task merges and compiles the human-readable .po files on the
local filesystem into machine-readable .mo files. This is typically
necessary as part of the build process since these .mo files are
needed by Django when serving the web app.
The configuration file (in edx-platform/conf/locale/config.yaml) specifies which
languages to generate.
"""
import argparse
import logging
import os
import sys
from polib import pofile
from i18n.config import BASE_DIR, CONFIGURATION
from i18n.execute import execute
LOG = logging.getLogger(__name__)
DEVNULL = open(os.devnull, "wb")
def merge(locale, target='django.po', sources=('django-partial.po',), fail_if_missing=True):
"""
For the given locale, merge the `sources` files to become the `target`
file. Note that the target file might also be one of the sources.
If fail_if_missing is true, and the files to be merged are missing,
throw an Exception, otherwise return silently.
If fail_if_missing is false, and the files to be merged are missing,
just return silently.
"""
LOG.info('Merging {target} for locale {locale}'.format(target=target, locale=locale))
locale_directory = CONFIGURATION.get_messages_dir(locale)
try:
validate_files(locale_directory, sources)
except Exception, e:
if not fail_if_missing:
return
raise
# merged file is merged.po
merge_cmd = 'msgcat -o merged.po ' + ' '.join(sources)
execute(merge_cmd, working_directory=locale_directory)
# clean up redunancies in the metadata
merged_filename = locale_directory.joinpath('merged.po')
clean_pofile(merged_filename)
# rename merged.po -> django.po (default)
target_filename = locale_directory.joinpath(target)
os.rename(merged_filename, target_filename)
def merge_files(locale, fail_if_missing=True):
"""
Merge all the files in `locale`, as specified in config.yaml.
"""
for target, sources in CONFIGURATION.generate_merge.items():
merge(locale, target, sources, fail_if_missing)
def clean_pofile(file):
"""
Clean various aspect of a .po file.
Fixes:
- Removes the ,fuzzy flag on metadata.
- Removes occurrence line numbers so that the generated files don't
generate a lot of line noise when they're committed.
- Removes any flags ending with "-format". Mac gettext seems to add
these flags, Linux does not, and we don't seem to need them. By
removing them, we reduce the unimportant differences that clutter
diffs as different developers work on the files.
"""
# Reading in the .po file and saving it again fixes redundancies.
pomsgs = pofile(file)
# The msgcat tool marks the metadata as fuzzy, but it's ok as it is.
pomsgs.metadata_is_fuzzy = False
for entry in pomsgs:
# Remove line numbers
entry.occurrences = [(filename, None) for (filename, lineno) in entry.occurrences]
# Remove -format flags
entry.flags = [f for f in entry.flags if not f.endswith("-format")]
pomsgs.save()
def validate_files(dir, files_to_merge):
"""
Asserts that the given files exist.
files_to_merge is a list of file names (no directories).
dir is the directory (a path object from path.py) in which the files should appear.
raises an Exception if any of the files are not in dir.
"""
for path in files_to_merge:
pathname = dir.joinpath(path)
if not pathname.exists():
raise Exception("I18N: Cannot generate because file not found: {0}".format(pathname))
def main(strict=True, verbosity=1):
"""
Main entry point for script
"""
for locale in CONFIGURATION.translated_locales:
merge_files(locale, fail_if_missing=strict)
# Dummy text is not required. Don't raise exception if files are missing.
for locale in CONFIGURATION.dummy_locales:
merge_files(locale, fail_if_missing=False)
compile_cmd = 'django-admin.py compilemessages -v{}'.format(verbosity)
if verbosity:
stderr = None
else:
stderr = DEVNULL
execute(compile_cmd, working_directory=BASE_DIR, stderr=stderr)
if __name__ == '__main__':
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# pylint: disable=invalid-name
parser = argparse.ArgumentParser(description="Generate merged and compiled message files.")
parser.add_argument("--strict", action='store_true', help="Complain about missing files.")
parser.add_argument("--verbose", "-v", action="count", default=0)
args = parser.parse_args()
main(strict=args.strict, verbosity=args.verbose)
#!/usr/bin/env python
"""
Segment a .po file to produce smaller files based on the locations of the
messages.
"""
import copy
import fnmatch
import logging
import sys
import argparse
import polib
import textwrap
from i18n.config import CONFIGURATION
LOG = logging.getLogger(__name__)
def segment_pofiles(locale):
"""Segment all the pofiles for `locale`.
Returns a set of filenames, all the segment files written.
"""
files_written = set()
for filename, segments in CONFIGURATION.segment.items():
filename = CONFIGURATION.get_messages_dir(locale) / filename
files_written.update(segment_pofile(filename, segments))
return files_written
def segment_pofile(filename, segments):
"""Segment a .po file using patterns in `segments`.
The .po file at `filename` is read, and the occurrence locations of its
messages are examined. `segments` is a dictionary: the keys are segment
.po filenames, the values are lists of patterns::
{
'django-studio.po': [
'cms/*',
'some-other-studio-place/*',
],
'django-weird.po': [
'*/weird_*.*',
],
}
If all a message's occurrences match the patterns for a segment, then that
message is written to the new segmented .po file.
Any message that matches no segments, or more than one, is written back to
the original file.
Arguments:
filename (path.path): a path object referring to the original .po file.
segments (dict): specification of the segments to create.
Returns:
a set of path objects, all the segment files written.
"""
reading_msg = "Reading {num} entries from {file}"
writing_msg = "Writing {num} entries to {file}"
source_po = polib.pofile(filename)
LOG.info(reading_msg.format(file=filename, num=len(source_po)))
# A new pofile just like the source, but with no messages. We'll put
# anything not segmented into this file.
remaining_po = copy.deepcopy(source_po)
remaining_po[:] = []
# Turn the segments dictionary into two structures: segment_patterns is a
# list of (pattern, segmentfile) pairs. segment_po_files is a dict mapping
# segment file names to pofile objects of their contents.
segment_po_files = {filename: remaining_po}
segment_patterns = []
for segmentfile, patterns in segments.items():
segment_po_files[segmentfile] = copy.deepcopy(remaining_po)
segment_patterns.extend((pat, segmentfile) for pat in patterns)
# Examine each message in the source file. If all of its occurrences match
# a pattern for the same segment, it goes in that segment. Otherwise, it
# goes in remaining.
for msg in source_po:
msg_segments = set()
for occ_file, _ in msg.occurrences:
for pat, segment_file in segment_patterns:
if fnmatch.fnmatch(occ_file, pat):
msg_segments.add(segment_file)
break
else:
msg_segments.add(filename)
assert msg_segments
if len(msg_segments) == 1:
# This message belongs in this segment.
segment_file = msg_segments.pop()
segment_po_files[segment_file].append(msg)
else:
# It's in more than one segment, so put it back in the main file.
remaining_po.append(msg)
# Write out the results.
files_written = set()
for segment_file, pofile in segment_po_files.items():
out_file = filename.dirname() / segment_file
if len(pofile) == 0:
LOG.error("No messages to write to {file}, did you run segment twice?".format(file=out_file))
else:
LOG.info(writing_msg.format(file=out_file, num=len(pofile)))
pofile.save(out_file)
files_written.add(out_file)
return files_written
def main(locales=None, verbosity=1): # pylint: disable=unused-argument
"""
Main entry point of script
"""
# This is used as a tool only to segment translation files when adding a
# new segment. In the regular workflow, the work is done by the extract
# phase calling the functions above.
locales = locales or []
for locale in locales:
segment_pofiles(locale)
if __name__ == "__main__":
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# pylint: disable=invalid-name
description = textwrap.dedent("""
Segment the .po files in LOCALE(s) based on the segmenting rules in
config.yaml.
Note that segmenting is *not* idempotent: it modifies the input file, so
be careful that you don't run it twice on the same file.
""".strip())
parser = argparse.ArgumentParser(description=description)
parser.add_argument("locale", nargs="+", help="a locale to segment")
parser.add_argument("--verbose", "-v", action="count", default=0)
args = parser.parse_args()
main(locales=args.locale, verbosity=args.verbose)
# This is test data.
#
msgid ""
msgstr ""
"Project-Id-Version: 0.1a\n"
"Report-Msgid-Bugs-To: openedx-translation@googlegroups.com\n"
"POT-Creation-Date: 2014-01-22 15:35-0500\n"
"PO-Revision-Date: 2014-01-22 20:35:52.096456\n"
"Last-Translator: \n"
"Language-Team: openedx-translation <openedx-translation@googlegroups.com>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: en\n"
#: cms/djangoapps/contentstore/views/tabs.py:39
#: lms/djangoapps/instructor/views/instructor_dashboard.py:111
msgid "Course Info"
msgstr "stuff about the course"
#: common/djangoapps/course_modes/models.py:43
msgid "Honor Code Certificate"
msgstr "your paper"
#: common/djangoapps/course_modes/views.py:81
#: common/djangoapps/student/views.py:478
msgid "Enrollment is closed"
msgstr "no way, dude"
#: common/static/js/vendor/mathjax-MathJax-c9db6ac/docs/source/mjtheme/layout.html:129
#: lms/templates/wiki/plugins/attachments/index.html:40
msgid "Search"
msgstr "find it!"
#: lms/djangoapps/courseware/features/video.py:111
msgid "ERROR: No playable video sources found!"
msgstr "try youtube, dude!"
# This is test data.
#
msgid ""
msgstr ""
"Project-Id-Version: 0.1a\n"
"Report-Msgid-Bugs-To: openedx-translation@googlegroups.com\n"
"POT-Creation-Date: 2014-01-22 15:35-0500\n"
"PO-Revision-Date: 2014-01-22 20:35:52.096456\n"
"Last-Translator: \n"
"Language-Team: openedx-translation <openedx-translation@googlegroups.com>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: en\n"
#: cms/djangoapps/contentstore/views/tabs.py:39
#: lms/djangoapps/instructor/views/instructor_dashboard.py:111
msgid "Course Info"
msgstr "stuff about the course"
#: common/djangoapps/course_modes/models.py:43
msgid "Honor Code Certificate"
msgstr "your paper"
#: common/djangoapps/course_modes/views.py:81
#: common/djangoapps/student/views.py:478
msgid "Enrollment is closed"
msgstr "no way, dude"
#: cms/djangoapps/contentstore/views/course.py:237
msgid ""
"There is already a course defined with the same organization, course number,"
" and course run. Please change either organization or course number to be "
"unique."
msgstr "org/course/run, wtf??"
#: cms/djangoapps/contentstore/views/course.py:243
#: cms/djangoapps/contentstore/views/course.py:247
#: other_cms/djangoapps/contentstore/views/course.py:269
#: cms/djangoapps/contentstore/views/course.py:272
msgid ""
"Please change either the organization or course number so that it is unique."
msgstr "pick again!"
#: common/static/js/vendor/mathjax-MathJax-c9db6ac/docs/source/mjtheme/layout.html:129
#: lms/templates/wiki/plugins/attachments/index.html:40
msgid "Search"
msgstr "find it!"
#: lms/djangoapps/courseware/features/video.py:111
msgid "ERROR: No playable video sources found!"
msgstr "try youtube, dude!"
# This is test data.
#
msgid ""
msgstr ""
"Project-Id-Version: 0.1a\n"
"Report-Msgid-Bugs-To: openedx-translation@googlegroups.com\n"
"POT-Creation-Date: 2014-01-22 15:35-0500\n"
"PO-Revision-Date: 2014-01-22 20:35:52.096456\n"
"Last-Translator: \n"
"Language-Team: openedx-translation <openedx-translation@googlegroups.com>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: en\n"
#: cms/djangoapps/contentstore/views/course.py:237
msgid ""
"There is already a course defined with the same organization, course number,"
" and course run. Please change either organization or course number to be "
"unique."
msgstr "org/course/run, wtf??"
#: cms/djangoapps/contentstore/views/course.py:243
#: cms/djangoapps/contentstore/views/course.py:247
#: other_cms/djangoapps/contentstore/views/course.py:269
#: cms/djangoapps/contentstore/views/course.py:272
msgid ""
"Please change either the organization or course number so that it is unique."
msgstr "pick again!"
"""
Test that the compiled .mo files match the translations in the
uncompiled .po files.
This is required because we are checking in the .mo files into
the repo, but compiling them is a manual process. We want to make
sure that we find out if someone forgets the compilation step.
"""
import ddt
import polib
from unittest import TestCase
from i18n.config import CONFIGURATION, LOCALE_DIR
@ddt.ddt
class TestCompiledMessages(TestCase):
"""
Test that mo files match their source po files
"""
PO_FILES = ['django.po', 'djangojs.po']
@ddt.data(*CONFIGURATION.translated_locales)
def test_translated_messages(self, locale):
message_dir = LOCALE_DIR / locale / 'LC_MESSAGES'
for pofile_name in self.PO_FILES:
pofile_path = message_dir / pofile_name
pofile = polib.pofile(pofile_path)
mofile = polib.mofile(pofile_path.stripext() + '.mo')
po_entries = {entry.msgid: entry for entry in pofile.translated_entries()}
mo_entries = {entry.msgid: entry for entry in mofile.translated_entries()}
# Check that there are no entries in po that aren't in mo, and vice-versa
self.assertEquals(po_entries.viewkeys(), mo_entries.viewkeys())
for entry_id, po_entry in po_entries.iteritems():
mo_entry = mo_entries[entry_id]
for attr in ('msgstr', 'msgid_plural', 'msgstr_plural', 'msgctxt', 'obsolete', 'encoding'):
po_attr = getattr(po_entry, attr)
mo_attr = getattr(mo_entry, attr)
# The msgstr_plural in the mo_file is keyed on ints, but in the po_file it's
# keyed on strings. This normalizes them.
if attr == 'msgstr_plural':
po_attr = {int(key): val for (key, val) in po_attr.items()}
self.assertEquals(
po_attr,
mo_attr,
"When comparing {} for entry {!r}, {!r} from the .po file doesn't match {!r} from the .mo file".format(
attr,
entry_id,
po_attr,
mo_attr,
)
)
import os
from unittest import TestCase
from i18n.config import Configuration, LOCALE_DIR, CONFIGURATION
class TestConfiguration(TestCase):
"""
Tests functionality of i18n/config.py
"""
def test_config(self):
config_filename = os.path.normpath(os.path.join(LOCALE_DIR, 'config.yaml'))
config = Configuration(config_filename)
self.assertEqual(config.source_locale, 'en')
def test_no_config(self):
config_filename = os.path.normpath(os.path.join(LOCALE_DIR, 'no_such_file'))
with self.assertRaises(Exception):
Configuration(config_filename)
def test_valid_configuration(self):
"""
Make sure we have a valid configuration file,
and that it contains an 'en' locale.
Also check values of dummy_locale and source_locale.
"""
self.assertIsNotNone(CONFIGURATION)
locales = CONFIGURATION.locales
self.assertIsNotNone(locales)
self.assertIsInstance(locales, list)
self.assertIn('en', locales)
self.assertEqual('eo', CONFIGURATION.dummy_locales[0])
self.assertEqual('en', CONFIGURATION.source_locale)
"""Tests of i18n/converter.py"""
from unittest import TestCase
import ddt
from i18n import converter
class UpcaseConverter(converter.Converter):
"""
Converts a string to uppercase. Just used for testing.
"""
def inner_convert_string(self, string):
return string.upper()
@ddt.ddt
class TestConverter(TestCase):
"""
Tests functionality of i18n/converter.py
"""
@ddt.data(
# no tags
('big bad wolf',
'BIG BAD WOLF'),
# one html tag
('big <strong>bad</strong> wolf',
'BIG <strong>BAD</strong> WOLF'),
# two html tags
('big <b>bad</b> gray <i>wolf</i>',
'BIG <b>BAD</b> GRAY <i>WOLF</i>'),
# html tags with attributes
('<a href="foo">bar</a> baz',
'<a href="foo">BAR</a> BAZ'),
("<a href='foo'>bar</a> baz",
"<a href='foo'>BAR</a> BAZ"),
# one python tag
('big %(adjective)s wolf',
'BIG %(adjective)s WOLF'),
# two python tags
('big %(adjective)s gray %(noun)s',
'BIG %(adjective)s GRAY %(noun)s'),
# both kinds of tags
('<strong>big</strong> %(adjective)s %(noun)s',
'<strong>BIG</strong> %(adjective)s %(noun)s'),
# .format-style tags
('The {0} barn is {1!r}.',
'THE {0} BARN IS {1!r}.'),
# HTML entities
('<b>&copy; 2013 edX, &#xa0;</b>',
'<b>&copy; 2013 EDX, &#xa0;</b>'),
)
def test_converter(self, data):
"""
Tests with a simple converter (converts strings to uppercase).
Assert that embedded HTML and python tags are not converted.
"""
source, expected = data
result = UpcaseConverter().convert(source)
self.assertEquals(result, expected)
# -*- coding: utf-8 -*-
"""Tests of i18n/dummy.py"""
from unittest import TestCase
import ddt
from polib import POEntry
from i18n import dummy
@ddt.ddt
class TestDummy(TestCase):
"""
Tests functionality of i18n/dummy.py
"""
def setUp(self):
self.converter = dummy.Dummy()
def assertUnicodeEquals(self, str1, str2):
"""Just like assertEquals, but doesn't put Unicode into the fail message.
Either nose, or rake, or something, deals very badly with unusual
Unicode characters in the assertions, so we use repr here to keep
things safe.
"""
self.assertEquals(
str1, str2,
"Mismatch: %r != %r" % (str1, str2),
)
@ddt.data(
(u"hello my name is Bond, James Bond",
u"héllö mý nämé ïs Bönd, Jämés Bönd Ⱡ'σяєм ι#"),
(u"don't convert <a href='href'>tag ids</a>",
u"dön't çönvért <a href='href'>täg ïds</a> Ⱡ'σяєм#"),
(u"don't convert %(name)s tags on %(date)s",
u"dön't çönvért %(name)s tägs ön %(date)s Ⱡ'σяєм #"),
)
def test_dummy(self, data):
"""
Tests with a dummy converter (adds spurious accents to strings).
Assert that embedded HTML and python tags are not converted.
"""
source, expected = data
result = self.converter.convert(source)
self.assertUnicodeEquals(result, expected)
def test_singular(self):
entry = POEntry()
entry.msgid = "A lovely day for a cup of tea."
expected = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #"
self.converter.convert_msg(entry)
self.assertUnicodeEquals(entry.msgstr, expected)
def test_plural(self):
entry = POEntry()
entry.msgid = "A lovely day for a cup of tea."
entry.msgid_plural = "A lovely day for some cups of tea."
expected_s = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #"
expected_p = u"À lövélý däý för sömé çüps öf téä. Ⱡ'σяєм ιρ#"
self.converter.convert_msg(entry)
result = entry.msgstr_plural
self.assertUnicodeEquals(result['0'], expected_s)
self.assertUnicodeEquals(result['1'], expected_p)
from datetime import datetime, timedelta
import os
from unittest import TestCase
from nose.plugins.skip import SkipTest
import polib
from pytz import UTC
from i18n import extract
from i18n.config import CONFIGURATION
# Make sure setup runs only once
SETUP_HAS_RUN = False
class TestExtract(TestCase):
"""
Tests functionality of i18n/extract.py
"""
generated_files = ('django-partial.po', 'djangojs-partial.po', 'mako.po')
def setUp(self):
# Skip this test because it takes too long (>1 minute)
# TODO: figure out how to declare a "long-running" test suite
# and add this test to it.
raise SkipTest()
global SETUP_HAS_RUN
# Subtract 1 second to help comparisons with file-modify time succeed,
# since os.path.getmtime() is not millisecond-accurate
self.start_time = datetime.now(UTC) - timedelta(seconds=1)
super(TestExtract, self).setUp()
if not SETUP_HAS_RUN:
# Run extraction script. Warning, this takes 1 minute or more
extract.main(verbosity=0)
SETUP_HAS_RUN = True
def get_files(self):
"""
This is a generator.
Returns the fully expanded filenames for all extracted files
Fails assertion if one of the files doesn't exist.
"""
for filename in self.generated_files:
path = os.path.join(CONFIGURATION.source_messages_dir, filename)
exists = os.path.exists(path)
self.assertTrue(exists, msg='Missing file: %s' % filename)
if exists:
yield path
def test_files(self):
"""
Asserts that each auto-generated file has been modified since 'extract' was launched.
Intended to show that the file has been touched by 'extract'.
"""
for path in self.get_files():
self.assertTrue(datetime.fromtimestamp(os.path.getmtime(path)) > self.start_time,
msg='File not recently modified: %s' % os.path.basename(path))
def test_is_keystring(self):
"""
Verifies is_keystring predicate
"""
entry1 = polib.POEntry()
entry2 = polib.POEntry()
entry1.msgid = "_.lms.admin.warning.keystring"
entry2.msgid = "This is not a keystring"
self.assertTrue(extract.is_key_string(entry1.msgid))
self.assertFalse(extract.is_key_string(entry2.msgid))
def test_headers(self):
"""Verify all headers have been modified"""
for path in self.get_files():
po = polib.pofile(path)
header = po.header
self.assertEqual(
header.find('edX translation file'),
0,
msg='Missing header in %s:\n"%s"' % (os.path.basename(path), header)
)
def test_metadata(self):
"""Verify all metadata has been modified"""
for path in self.get_files():
po = polib.pofile(path)
metadata = po.metadata
value = metadata['Report-Msgid-Bugs-To']
expected = 'openedx-translation@googlegroups.com'
self.assertEquals(expected, value)
from datetime import datetime, timedelta
import os
import sys
import string
import random
import re
from unittest import TestCase
from mock import patch
from polib import pofile
from pytz import UTC
from i18n import extract
from i18n import generate
from i18n import dummy
from i18n.config import CONFIGURATION
class TestGenerate(TestCase):
"""
Tests functionality of i18n/generate.py
"""
generated_files = ('django-partial.po', 'djangojs-partial.po', 'mako.po')
@classmethod
def setUpClass(cls):
sys.stderr.write(
"\nExtracting i18n strings and generating dummy translations; "
"this may take a few minutes\n"
)
sys.stderr.flush()
extract.main(verbosity=0)
dummy.main(verbosity=0)
def setUp(self):
# Subtract 1 second to help comparisons with file-modify time succeed,
# since os.path.getmtime() is not millisecond-accurate
self.start_time = datetime.now(UTC) - timedelta(seconds=1)
def test_merge(self):
"""
Tests merge script on English source files.
"""
filename = os.path.join(CONFIGURATION.source_messages_dir, random_name())
generate.merge(CONFIGURATION.source_locale, target=filename)
self.assertTrue(os.path.exists(filename))
os.remove(filename)
# Patch dummy_locales to not have esperanto present
@patch.object(CONFIGURATION, 'dummy_locales', ['fake2'])
def test_main(self):
"""
Runs generate.main() which should merge source files,
then compile all sources in all configured languages.
Validates output by checking all .mo files in all configured languages.
.mo files should exist, and be recently created (modified
after start of test suite)
"""
generate.main(verbosity=0, strict=False)
for locale in CONFIGURATION.translated_locales:
for filename in ('django', 'djangojs'):
mofile = filename+'.mo'
path = os.path.join(CONFIGURATION.get_messages_dir(locale), mofile)
exists = os.path.exists(path)
self.assertTrue(exists, msg='Missing file in locale %s: %s' % (locale, mofile))
self.assertTrue(datetime.fromtimestamp(os.path.getmtime(path), UTC) >= self.start_time,
msg='File not recently modified: %s' % path)
# Segmenting means that the merge headers don't work they way they
# used to, so don't make this check for now. I'm not sure if we'll
# get the merge header back eventually, or delete this code eventually.
# self.assert_merge_headers(locale)
def assert_merge_headers(self, locale):
"""
This is invoked by test_main to ensure that it runs after
calling generate.main().
There should be exactly three merge comment headers
in our merged .po file. This counts them to be sure.
A merge comment looks like this:
# #-#-#-#-# django-partial.po (0.1a) #-#-#-#-#
"""
path = os.path.join(CONFIGURATION.get_messages_dir(locale), 'django.po')
po = pofile(path)
pattern = re.compile('^#-#-#-#-#', re.M)
match = pattern.findall(po.header)
self.assertEqual(len(match), 3,
msg="Found %s (should be 3) merge comments in the header for %s" % \
(len(match), path))
def random_name(size=6):
"""Returns random filename as string, like test-4BZ81W"""
chars = string.ascii_uppercase + string.digits
return 'test-' + ''.join(random.choice(chars) for x in range(size))
"""Test i18n/segment.py"""
import os.path
import shutil
import unittest
from path import path
import polib
from i18n.segment import segment_pofile
HERE = path(__file__).dirname()
TEST_DATA = HERE / "data"
WORK = HERE / "work"
class SegmentTest(unittest.TestCase):
"""Test segment_pofile."""
def setUp(self):
if not os.path.exists(WORK):
os.mkdir(WORK)
self.addCleanup(shutil.rmtree, WORK)
def assert_pofile_same(self, pofile1, pofile2):
"""The paths `p1` and `p2` should be identical pofiles."""
po1 = polib.pofile(pofile1)
po2 = polib.pofile(pofile2)
self.assertEqual(po1, po2)
def test_sample_data(self):
work_file = WORK / "django.po"
shutil.copyfile(TEST_DATA / "django_before.po", work_file)
original_pofile = polib.pofile(work_file)
written = segment_pofile(
work_file,
{
'studio.po': [
'cms/*',
'other_cms/*',
],
}
)
self.assertEqual(written, set([WORK / "django.po", WORK / "studio.po"]))
pofiles = [polib.pofile(f) for f in written]
after_entries = sum(len(pofile) for pofile in pofiles)
self.assertEqual(len(original_pofile), after_entries)
original_ids = set(m.msgid for m in original_pofile)
after_ids = set(m.msgid for pofile in pofiles for m in pofile)
self.assertEqual(original_ids, after_ids)
self.assert_pofile_same(WORK / "django.po", TEST_DATA / "django_after.po")
self.assert_pofile_same(WORK / "studio.po", TEST_DATA / "studio.po")
#!/usr/bin/env python
from __future__ import print_function
import sys
from polib import pofile
import argparse
from i18n.config import CONFIGURATION
from i18n.execute import execute
from i18n.extract import EDX_MARKER
TRANSIFEX_HEADER = u'edX community translations have been downloaded from {}'
TRANSIFEX_URL = 'https://www.transifex.com/projects/p/edx-platform/'
def push():
execute('tx push -s')
def pull():
print("Pulling languages from transifex...")
# Pull translations from all languages where there is
# at least 10% reviewed translations
execute('tx pull --mode=reviewed --all')
clean_translated_locales()
def clean_translated_locales():
"""
Strips out the warning from all translated po files
about being an English source file.
"""
for locale in CONFIGURATION.translated_locales:
clean_locale(locale)
def clean_locale(locale):
"""
Strips out the warning from all of a locale's translated po files
about being an English source file.
Iterates over machine-generated files.
"""
dirname = CONFIGURATION.get_messages_dir(locale)
for filename in ('django-partial.po', 'djangojs-partial.po', 'mako.po'):
clean_file(dirname.joinpath(filename))
def clean_file(filename):
"""
Strips out the warning from a translated po file about being an English source file.
Replaces warning with a note about coming from Transifex.
"""
try:
po = pofile(filename)
except Exception as exc:
# An exception can occur when a language is deleted from Transifex.
# Don't totally fail here.
print("Encountered error {} with filename {} - language project may no longer exist on Transifex".format(exc, filename))
return
if po.header.find(EDX_MARKER) != -1:
new_header = get_new_header(po)
new = po.header.replace(EDX_MARKER, new_header)
po.header = new
po.save()
def get_new_header(po):
team = po.metadata.get('Language-Team', None)
if not team:
return TRANSIFEX_HEADER.format(TRANSIFEX_URL)
else:
return TRANSIFEX_HEADER.format(team)
if __name__ == '__main__':
# pylint: disable=invalid-name
parser = argparse.ArgumentParser()
parser.add_argument("command", help="push or pull")
parser.add_argument("--verbose", "-v")
args = parser.parse_args()
# pylint: enable=invalid-name
if args.command == "push":
push()
elif args.command == "pull":
pull()
else:
raise Exception("unknown command ({cmd})".format(cmd=args.command))
"""Tests that validate .po files."""
import argparse
import codecs
import logging
import os
import sys
import textwrap
import polib
from i18n.config import LOCALE_DIR
from i18n.execute import call
from i18n.converter import Converter
log = logging.getLogger(__name__)
def validate_po_files(root, report_empty=False):
"""
Validate all of the po files found in the root directory.
"""
for dirpath, __, filenames in os.walk(root):
for name in filenames:
__, ext = os.path.splitext(name)
if ext.lower() == '.po':
filename = os.path.join(dirpath, name)
# First validate the format of this file
msgfmt_check_po_file(filename)
# Now, check that the translated strings are valid, and optionally check for empty translations
check_messages(filename, report_empty)
def msgfmt_check_po_file(filename):
"""
Call GNU msgfmt -c on each .po file to validate its format.
Any errors caught by msgfmt are logged to log.
"""
# Use relative paths to make output less noisy.
rfile = os.path.relpath(filename, LOCALE_DIR)
out, err = call('msgfmt -c {}'.format(rfile), working_directory=LOCALE_DIR)
if err != '':
log.info('\n' + out)
log.warn('\n' + err)
def tags_in_string(msg):
"""
Return the set of tags in a message string.
Tags includes HTML tags, data placeholders, etc.
Skips tags that might change due to translations: HTML entities, <abbr>,
and so on.
"""
def is_linguistic_tag(tag):
"""Is this tag one that can change with the language?"""
if tag.startswith("&"):
return True
if any(x in tag for x in ["<abbr>", "<abbr ", "</abbr>"]):
return True
return False
__, tags = Converter().detag_string(msg)
return set(t for t in tags if not is_linguistic_tag(t))
def astral(msg):
"""Does `msg` have characters outside the Basic Multilingual Plane?"""
return any(ord(c) > 0xFFFF for c in msg)
def check_messages(filename, report_empty=False):
"""
Checks messages in various ways:
Translations must have the same slots as the English. Messages can't have astral
characters in them.
If report_empty is True, will also report empty translation strings.
"""
# Don't check English files.
if "/locale/en/" in filename:
return
# problems will be a list of tuples. Each is a description, and a msgid,
# and then zero or more translations.
problems = []
pomsgs = polib.pofile(filename)
for msg in pomsgs:
# Check for characters Javascript can't support.
# https://code.djangoproject.com/ticket/21725
if astral(msg.msgstr):
problems.append(("Non-BMP char", msg.msgid, msg.msgstr))
if msg.msgid_plural:
# Plurals: two strings in, N strings out.
source = msg.msgid + " | " + msg.msgid_plural
translation = " | ".join(v for k, v in sorted(msg.msgstr_plural.items()))
empty = any(not t.strip() for t in msg.msgstr_plural.values())
else:
# Singular: just one string in and one string out.
source = msg.msgid
translation = msg.msgstr
empty = not msg.msgstr.strip()
if empty:
if report_empty:
problems.append(("Empty translation", source))
else:
id_tags = tags_in_string(source)
tx_tags = tags_in_string(translation)
# Check if tags don't match
if id_tags != tx_tags:
id_has = u", ".join(u'"{}"'.format(t) for t in id_tags - tx_tags)
tx_has = u", ".join(u'"{}"'.format(t) for t in tx_tags - id_tags)
if id_has and tx_has:
diff = u"{} vs {}".format(id_has, tx_has)
elif id_has:
diff = u"{} missing".format(id_has)
else:
diff = u"{} added".format(tx_has)
problems.append((
"Different tags in source and translation",
source,
translation,
diff
))
if problems:
problem_file = filename.replace(".po", ".prob")
id_filler = textwrap.TextWrapper(width=79, initial_indent=" msgid: ", subsequent_indent=" " * 9)
tx_filler = textwrap.TextWrapper(width=79, initial_indent=" -----> ", subsequent_indent=" " * 9)
with codecs.open(problem_file, "w", encoding="utf8") as prob_file:
for problem in problems:
desc, msgid = problem[:2]
prob_file.write(u"{}\n{}\n".format(desc, id_filler.fill(msgid)))
for translation in problem[2:]:
prob_file.write(u"{}\n".format(tx_filler.fill(translation)))
prob_file.write(u"\n")
log.error(" {0} problems in {1}, details in .prob file".format(len(problems), filename))
else:
log.info(" No problems found in {0}".format(filename))
def get_parser():
"""
Returns an argument parser for this script.
"""
parser = argparse.ArgumentParser(description=( # pylint: disable=redefined-outer-name
"Automatically finds translation errors in all edx-platform *.po files, "
"for all languages, unless one or more language(s) is specified to check."
))
parser.add_argument(
'-l', '--language',
type=str,
nargs='*',
help="Specify one or more specific language code(s) to check (eg 'ko_KR')."
)
parser.add_argument(
'-e', '--empty',
action='store_true',
help="Includes empty translation strings in .prob files."
)
parser.add_argument(
'-v', '--verbose',
action='count', default=0,
help="Turns on info-level logging."
)
return parser
def main(languages=None, empty=False, verbosity=1): # pylint: disable=unused-argument
"""
Main entry point for script
"""
languages = languages or []
if not languages:
root = LOCALE_DIR
validate_po_files(root, empty)
return
# languages will be a list of language codes; test each language.
for language in languages:
root = LOCALE_DIR / language
# Assert that a directory for this language code exists on the system
if not root.isdir():
log.error(" {0} is not a valid directory.\nSkipping language '{1}'".format(root, language))
continue
# If we found the language code's directory, validate the files.
validate_po_files(root, empty)
if __name__ == '__main__':
# pylint: disable=invalid-name
parser = get_parser()
args = parser.parse_args()
if args.verbose:
log_level = logging.INFO
else:
log_level = logging.WARNING
logging.basicConfig(stream=sys.stdout, level=log_level)
# pylint: enable=invalid-name
print("Validating languages...")
main(languages=args.language, empty=args.empty, verbosity=args.verbose)
print("Finished validating languages")
......@@ -28,4 +28,5 @@
-e git+https://github.com/edx/acid-block.git@459aff7b63db8f2c5decd1755706c1a64fb4ebb1#egg=acid-xblock
-e git+https://github.com/edx/edx-ora2.git@release-2014-06-13T11.52#egg=edx-ora2
-e git+https://github.com/edx/opaque-keys.git@5929789900b3d0a354ce7274bde74edfd0430f03#egg=opaque-keys
git+https://github.com/edx/ease.git@a990b25ed4238acb1b15ee6f027465db3a10960e#egg=ease
-e git+https://github.com/edx/i18n-tools.git@c186d9d877773734908e49ccc5c01407e6ad8199#egg=i18n-tools
-e git+https://github.com/edx/ease.git@a990b25ed4238acb1b15ee6f027465db3a10960e#egg=ease
......@@ -10,6 +10,5 @@ setup(
packages=[
"lms",
"cms",
"i18n",
],
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment