Merge 'parser-cleanup' into development: some refactoring of parsing logic.

32adefb9 · Chris Jerdonek · bc6173e6 · f26c5e7a · 32adefb9 · 32adefb9
Commit 32adefb9 authored Jan 21, 2012 by Chris Jerdonek
Expand all Show whitespace changes
Inline Side-by-side

Showing with 265 additions and 0 deletions

pystache/parser.py
+197 -0

pystache/renderengine.py
+0 -0

pystache/template.py
+45 -0

tests/test_renderengine.py
+23 -0

No files found.
--- a/pystache/parser.py
+++ b/pystache/parser.py
+# coding: utf-8
+"""
+Provides a class for parsing template strings.
+This module is only meant for internal use by the renderengine module.
+"""
+import re
+from template import ParsedTemplate
+DEFAULT_DELIMITERS = ('{{', '}}')
+END_OF_LINE_CHARACTERS = ['\r', '\n']
+NON_BLANK_RE = re.compile(r'^(.)', re.M)
+def _compile_template_re(delimiters):
+    # The possible tag type characters following the opening tag,
+    # excluding "=" and "{".
+    tag_types = "!>&/#^"
+    # TODO: are we following this in the spec?
+    #
+    #   The tag's content MUST be a non-whitespace character sequence
+    #   NOT containing the current closing delimiter.
+    #
+    tag = r"""
+        (?P<whitespace>[\ \t]*)
+        %(otag)s \s*
+        (?:
+          (?P<change>=) \s* (?P<delims>.+?)   \s* = |
+          (?P<raw>{)    \s* (?P<raw_name>.+?) \s* } |
+          (?P<tag>[%(tag_types)s]?)  \s* (?P<tag_key>[\s\S]+?)
+        )
+        \s* %(ctag)s
+    """ % {'tag_types': tag_types, 'otag': re.escape(delimiters[0]), 'ctag': re.escape(delimiters[1])}
+    return re.compile(tag, re.VERBOSE)
+class ParsingError(Exception):
+    pass
+class Parser(object):
+    _delimiters = None
+    _template_re = None
+    def __init__(self, engine, delimiters=None):
+        """
+        Construct an instance.
+        Arguments:
+          engine: a RenderEngine instance.
+        """
+        if delimiters is None:
+            delimiters = DEFAULT_DELIMITERS
+        self._delimiters = delimiters
+        self.engine = engine
+    def compile_template_re(self):
+        self._template_re = _compile_template_re(self._delimiters)
+    def _change_delimiters(self, delimiters):
+        self._delimiters = delimiters
+        self.compile_template_re()
+    def parse(self, template, index=0, section_key=None):
+        """
+        Parse a template string into a ParsedTemplate instance.
+        This method uses the current tag delimiter.
+        Arguments:
+          template: a template string of type unicode.
+        """
+        parse_tree = []
+        start_index = index
+        while True:
+            match = self._template_re.search(template, index)
+            if match is None:
+                break
+            match_index = match.start()
+            end_index = match.end()
+            before_tag = template[index : match_index]
+            parse_tree.append(before_tag)
+            matches = match.groupdict()
+            # Normalize the matches dictionary.
+            if matches['change'] is not None:
+                matches.update(tag='=', tag_key=matches['delims'])
+            elif matches['raw'] is not None:
+                matches.update(tag='&', tag_key=matches['raw_name'])
+            tag_type = matches['tag']
+            tag_key = matches['tag_key']
+            leading_whitespace = matches['whitespace']
+            # Standalone (non-interpolation) tags consume the entire line,
+            # both leading whitespace and trailing newline.
+            did_tag_begin_line = match_index == 0 or template[match_index - 1] in END_OF_LINE_CHARACTERS
+            did_tag_end_line = end_index == len(template) or template[end_index] in END_OF_LINE_CHARACTERS
+            is_tag_interpolating = tag_type in ['', '&']
+            if did_tag_begin_line and did_tag_end_line and not is_tag_interpolating:
+                if end_index < len(template):
+                    end_index += template[end_index] == '\r' and 1 or 0
+                if end_index < len(template):
+                    end_index += template[end_index] == '\n' and 1 or 0
+            elif leading_whitespace:
+                parse_tree.append(leading_whitespace)
+                match_index += len(leading_whitespace)
+                leading_whitespace = ''
+            if tag_type == '/':
+                if tag_key != section_key:
+                    raise ParsingError("Section end tag mismatch: %s != %s" % (repr(tag_key), repr(section_key)))
+                return ParsedTemplate(parse_tree), template[start_index:match_index], end_index
+            index = self._handle_tag_type(template, parse_tree, tag_type, tag_key, leading_whitespace, end_index)
+        # Save the rest of the template.
+        parse_tree.append(template[index:])
+        return ParsedTemplate(parse_tree)
+    def _parse_section(self, template, index_start, section_key):
+        parsed_template, template, index_end = self.parse(template=template, index=index_start, section_key=section_key)
+        return parsed_template, template, index_end
+    def _handle_tag_type(self, template, parse_tree, tag_type, tag_key, leading_whitespace, end_index):
+        # TODO: switch to using a dictionary instead of a bunch of ifs and elifs.
+        if tag_type == '!':
+            return end_index
+        if tag_type == '=':
+            delimiters = tag_key.split()
+            self._change_delimiters(delimiters)
+            return end_index
+        engine = self.engine
+        if tag_type == '':
+            func = engine._make_get_escaped(tag_key)
+        elif tag_type == '&':
+            func = engine._make_get_literal(tag_key)
+        elif tag_type == '#':
+            parsed_section, template, end_index = self._parse_section(template, end_index, tag_key)
+            func = engine._make_get_section(tag_key, parsed_section, template, self._delimiters)
+        elif tag_type == '^':
+            parsed_section, template, end_index = self._parse_section(template, end_index, tag_key)
+            func = engine._make_get_inverse(tag_key, parsed_section)
+        elif tag_type == '>':
+            template = engine.load_partial(tag_key)
+            # Indent before rendering.
+            template = re.sub(NON_BLANK_RE, leading_whitespace + r'\1', template)
+            func = engine._make_get_partial(template)
+        else:
+            raise Exception("Unrecognized tag type: %s" % repr(tag_type))
+        parse_tree.append(func)
+        return end_index
--- a/pystache/renderengine.py
+++ b/pystache/renderengine.py
--- a/pystache/template.py
+++ b/pystache/template.py
+# coding: utf-8
+"""
+Exposes a class that represents a parsed (or compiled) template.
+This module is meant only for internal use.
+"""
+class ParsedTemplate(object):
+    def __init__(self, parse_tree):
+        """
+        Arguments:
+          parse_tree: a list, each element of which is either--
+            (1) a unicode string, or
+            (2) a "rendering" callable that accepts a Context instance
+                and returns a unicode string.
+        The possible rendering callables are the return values of the
+        following functions:
+        * RenderEngine._make_get_escaped()
+        * RenderEngine._make_get_inverse()
+        * RenderEngine._make_get_literal()
+        * RenderEngine._make_get_partial()
+        * RenderEngine._make_get_section()
+        """
+        self._parse_tree = parse_tree
+    def render(self, context):
+        """
+        Returns: a string of type unicode.
+        """
+        get_unicode = lambda val: val(context) if callable(val) else val
+        parts = map(get_unicode, self._parse_tree)
+        s = ''.join(parts)
+        return unicode(s)
--- a/tests/test_renderengine.py
+++ b/tests/test_renderengine.py
@@ -9,6 +9,7 @@ import cgi
 import unittest
 from pystache.context import Context
+from pystache.parser import ParsingError
 from pystache.renderengine import RenderEngine
 from tests.common import assert_strings
@@ -270,6 +271,28 @@ class RenderTests(unittest.TestCase):
    ## Test cases related specifically to sections.
+    def test_section__end_tag_with_no_start_tag(self):
+        """
+        Check what happens if there is an end tag with no start tag.
+        """
+        template = '{{/section}}'
+        try:
+            self._assert_render(None, template)
+        except ParsingError, err:
+            self.assertEquals(str(err), "Section end tag mismatch: u'section' != None")
+    def test_section__end_tag_mismatch(self):
+        """
+        Check what happens if the end tag doesn't match.
+        """
+        template = '{{#section_start}}{{/section_end}}'
+        try:
+            self._assert_render(None, template)
+        except ParsingError, err:
+            self.assertEquals(str(err), "Section end tag mismatch: u'section_end' != u'section_start'")
    def test_section__context_values(self):
        """
        Test that escape and literal work on context values in sections.