#!/usr/bin/env python
"""
Segment a .po file to produce smaller files based on the locations of the
messages.
"""

import copy
import fnmatch
import logging
import sys

import polib

from i18n.config import CONFIGURATION

LOG = logging.getLogger(__name__)


def segment_pofiles(locale):
    """Segment all the pofiles for `locale`.

    Returns a set of filenames, all the segment files written.

    """
    files_written = set()
    for filename, segments in CONFIGURATION.segment.items():
        filename = CONFIGURATION.get_messages_dir(locale) / filename
        files_written.update(segment_pofile(filename, segments))
    return files_written


def segment_pofile(filename, segments):
    """Segment a .po file using patterns in `segments`.

    The .po file at `filename` is read, and the occurrence locations of its
    messages are examined.  `segments` is a dictionary: the keys are segment
    .po filenames, the values are lists of patterns::

        {
            'django-studio.po': [
                'cms/*',
                'some-other-studio-place/*',
            ],
            'django-weird.po': [
                '*/weird_*.*',
            ],
        }

    If all a message's occurrences match the patterns for a segment, then that
    message is written to the new segmented .po file.

    Any message that matches no segments, or more than one, is written back to
    the original file.

    Arguments:
        filename (path.path): a path object referring to the original .po file.
        segments (dict): specification of the segments to create.

    Returns:
        a set of path objects, all the segment files written.

    """
    reading_msg = "Reading {num} entries from {file}"
    writing_msg = "Writing {num} entries to {file}"

    source_po = polib.pofile(filename)
    LOG.info(reading_msg.format(file=filename, num=len(source_po)))

    # A new pofile just like the source, but with no messages. We'll put
    # anything not segmented into this file.
    remaining_po = copy.deepcopy(source_po)
    remaining_po[:] = []

    # Turn the segments dictionary into two structures: segment_patterns is a
    # list of (pattern, segmentfile) pairs.  segment_po_files is a dict mapping
    # segment file names to pofile objects of their contents.
    segment_po_files = {filename: remaining_po}
    segment_patterns = []
    for segmentfile, patterns in segments.items():
        segment_po_files[segmentfile] = copy.deepcopy(remaining_po)
        segment_patterns.extend((pat, segmentfile) for pat in patterns)

    # Examine each message in the source file. If all of its occurrences match
    # a pattern for the same segment, it goes in that segment.  Otherwise, it
    # goes in remaining.
    for msg in source_po:
        msg_segments = set()
        for occ_file, _ in msg.occurrences:
            for pat, segment_file in segment_patterns:
                if fnmatch.fnmatch(occ_file, pat):
                    msg_segments.add(segment_file)
                    break
            else:
                msg_segments.add(filename)

        assert msg_segments
        if len(msg_segments) == 1:
            # This message belongs in this segment.
            segment_file = msg_segments.pop()
            segment_po_files[segment_file].append(msg)
        else:
            # It's in more than one segment, so put it back in the main file.
            remaining_po.append(msg)

    # Write out the results.
    files_written = set()
    for segment_file, pofile in segment_po_files.items():
        out_file = filename.dirname() / segment_file
        if len(pofile) == 0:
            LOG.error("No messages to write to {file}, did you run segment twice?".format(file=out_file))
        else:
            LOG.info(writing_msg.format(file=out_file, num=len(pofile)))
            pofile.save(out_file)
            files_written.add(out_file)

    return files_written


def main(argv):
    """
    $ segment.py LOCALE [...]

    Segment the .po files in LOCALE(s) based on the segmenting rules in
    config.yaml.

    Note that segmenting is *not* idempotent: it modifies the input file, so
    be careful that you don't run it twice on the same file.

    """
    # This is used as a tool only to segment translation files when adding a
    # new segment.  In the regular workflow, the work is done by the extract
    # phase calling the functions above.

    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    if len(argv) < 2:
        sys.exit("Need a locale to segment")
    for locale in argv[1:]:
        segment_pofiles(locale)


if __name__ == "__main__":
    main(sys.argv)