"""Tests that validate .po files."""

import argparse
import codecs
import logging
import os
import sys
import textwrap

import polib

from i18n.config import LOCALE_DIR
from i18n.execute import call
from i18n.converter import Converter


log = logging.getLogger(__name__)


def validate_po_files(root, report_empty=False):
    """
    Validate all of the po files found in the root directory.
    """

    for dirpath, __, filenames in os.walk(root):
        for name in filenames:
            __, ext = os.path.splitext(name)
            if ext.lower() == '.po':
                filename = os.path.join(dirpath, name)
                # First validate the format of this file
                msgfmt_check_po_file(filename)
                # Now, check that the translated strings are valid, and optionally check for empty translations
                check_messages(filename, report_empty)


def msgfmt_check_po_file(filename):
    """
    Call GNU msgfmt -c on each .po file to validate its format.
    Any errors caught by msgfmt are logged to log.
    """
    # Use relative paths to make output less noisy.
    rfile = os.path.relpath(filename, LOCALE_DIR)
    out, err = call('msgfmt -c {}'.format(rfile), working_directory=LOCALE_DIR)
    if err != '':
        log.info('\n' + out)
        log.warn('\n' + err)


def tags_in_string(msg):
    """
    Return the set of tags in a message string.

    Tags includes HTML tags, data placeholders, etc.

    Skips tags that might change due to translations: HTML entities, <abbr>,
    and so on.

    """
    def is_linguistic_tag(tag):
        """Is this tag one that can change with the language?"""
        if tag.startswith("&"):
            return True
        if any(x in tag for x in ["<abbr>", "<abbr ", "</abbr>"]):
            return True
        return False

    __, tags = Converter().detag_string(msg)
    return set(t for t in tags if not is_linguistic_tag(t))


def astral(msg):
    """Does `msg` have characters outside the Basic Multilingual Plane?"""
    return any(ord(c) > 0xFFFF for c in msg)


def check_messages(filename, report_empty=False):
    """
    Checks messages in various ways:

    Translations must have the same slots as the English. Messages can't have astral
    characters in them.

    If report_empty is True, will also report empty translation strings.

    """
    # Don't check English files.
    if "/locale/en/" in filename:
        return

    # problems will be a list of tuples.  Each is a description, and a msgid,
    # and then zero or more translations.
    problems = []
    pomsgs = polib.pofile(filename)
    for msg in pomsgs:
        # Check for characters Javascript can't support.
        # https://code.djangoproject.com/ticket/21725
        if astral(msg.msgstr):
            problems.append(("Non-BMP char", msg.msgid, msg.msgstr))

        if msg.msgid_plural:
            # Plurals: two strings in, N strings out.
            source = msg.msgid + " | " + msg.msgid_plural
            translation = " | ".join(v for k, v in sorted(msg.msgstr_plural.items()))
            empty = any(not t.strip() for t in msg.msgstr_plural.values())
        else:
            # Singular: just one string in and one string out.
            source = msg.msgid
            translation = msg.msgstr
            empty = not msg.msgstr.strip()

        if empty:
            if report_empty:
                problems.append(("Empty translation", source))
        else:
            id_tags = tags_in_string(source)
            tx_tags = tags_in_string(translation)

            # Check if tags don't match
            if id_tags != tx_tags:
                id_has = u", ".join(u'"{}"'.format(t) for t in id_tags - tx_tags)
                tx_has = u", ".join(u'"{}"'.format(t) for t in tx_tags - id_tags)
                if id_has and tx_has:
                    diff = u"{} vs {}".format(id_has, tx_has)
                elif id_has:
                    diff = u"{} missing".format(id_has)
                else:
                    diff = u"{} added".format(tx_has)
                problems.append((
                    "Different tags in source and translation",
                    source,
                    translation,
                    diff
                ))

    if problems:
        problem_file = filename.replace(".po", ".prob")
        id_filler = textwrap.TextWrapper(width=79, initial_indent="  msgid: ", subsequent_indent=" " * 9)
        tx_filler = textwrap.TextWrapper(width=79, initial_indent="  -----> ", subsequent_indent=" " * 9)
        with codecs.open(problem_file, "w", encoding="utf8") as prob_file:
            for problem in problems:
                desc, msgid = problem[:2]
                prob_file.write(u"{}\n{}\n".format(desc, id_filler.fill(msgid)))
                for translation in problem[2:]:
                    prob_file.write(u"{}\n".format(tx_filler.fill(translation)))
                prob_file.write(u"\n")

        log.error(" {0} problems in {1}, details in .prob file".format(len(problems), filename))
    else:
        log.info(" No problems found in {0}".format(filename))


def get_parser():
    """
    Returns an argument parser for this script.
    """
    parser = argparse.ArgumentParser(description=(  # pylint: disable=redefined-outer-name
        "Automatically finds translation errors in all edx-platform *.po files, "
        "for all languages, unless one or more language(s) is specified to check."
    ))

    parser.add_argument(
        '-l', '--language',
        type=str,
        nargs='*',
        help="Specify one or more specific language code(s) to check (eg 'ko_KR')."
    )

    parser.add_argument(
        '-e', '--empty',
        action='store_true',
        help="Includes empty translation strings in .prob files."
    )

    parser.add_argument(
        '-v', '--verbose',
        action='count', default=0,
        help="Turns on info-level logging."
    )

    return parser


def main(languages=None, empty=False, verbosity=1):  # pylint: disable=unused-argument
    """
    Main entry point for script
    """
    languages = languages or []

    if not languages:
        root = LOCALE_DIR
        validate_po_files(root, empty)
        return

    # languages will be a list of language codes; test each language.
    for language in languages:
        root = LOCALE_DIR / language
        # Assert that a directory for this language code exists on the system
        if not root.isdir():
            log.error(" {0} is not a valid directory.\nSkipping language '{1}'".format(root, language))
            continue
        # If we found the language code's directory, validate the files.
        validate_po_files(root, empty)


if __name__ == '__main__':
    # pylint: disable=invalid-name
    parser = get_parser()
    args = parser.parse_args()
    if args.verbose:
        log_level = logging.INFO
    else:
        log_level = logging.WARNING
    logging.basicConfig(stream=sys.stdout, level=log_level)
    # pylint: enable=invalid-name

    print("Validating languages...")
    main(languages=args.language, empty=args.empty, verbosity=args.verbose)
    print("Finished validating languages")