Commit 32adefb9 by Chris Jerdonek

Merge 'parser-cleanup' into development: some refactoring of parsing logic.

parents bc6173e6 f26c5e7a
# coding: utf-8
"""
Provides a class for parsing template strings.
This module is only meant for internal use by the renderengine module.
"""
import re
from template import ParsedTemplate
DEFAULT_DELIMITERS = ('{{', '}}')
END_OF_LINE_CHARACTERS = ['\r', '\n']
NON_BLANK_RE = re.compile(r'^(.)', re.M)
def _compile_template_re(delimiters):
# The possible tag type characters following the opening tag,
# excluding "=" and "{".
tag_types = "!>&/#^"
# TODO: are we following this in the spec?
#
# The tag's content MUST be a non-whitespace character sequence
# NOT containing the current closing delimiter.
#
tag = r"""
(?P<whitespace>[\ \t]*)
%(otag)s \s*
(?:
(?P<change>=) \s* (?P<delims>.+?) \s* = |
(?P<raw>{) \s* (?P<raw_name>.+?) \s* } |
(?P<tag>[%(tag_types)s]?) \s* (?P<tag_key>[\s\S]+?)
)
\s* %(ctag)s
""" % {'tag_types': tag_types, 'otag': re.escape(delimiters[0]), 'ctag': re.escape(delimiters[1])}
return re.compile(tag, re.VERBOSE)
class ParsingError(Exception):
pass
class Parser(object):
_delimiters = None
_template_re = None
def __init__(self, engine, delimiters=None):
"""
Construct an instance.
Arguments:
engine: a RenderEngine instance.
"""
if delimiters is None:
delimiters = DEFAULT_DELIMITERS
self._delimiters = delimiters
self.engine = engine
def compile_template_re(self):
self._template_re = _compile_template_re(self._delimiters)
def _change_delimiters(self, delimiters):
self._delimiters = delimiters
self.compile_template_re()
def parse(self, template, index=0, section_key=None):
"""
Parse a template string into a ParsedTemplate instance.
This method uses the current tag delimiter.
Arguments:
template: a template string of type unicode.
"""
parse_tree = []
start_index = index
while True:
match = self._template_re.search(template, index)
if match is None:
break
match_index = match.start()
end_index = match.end()
before_tag = template[index : match_index]
parse_tree.append(before_tag)
matches = match.groupdict()
# Normalize the matches dictionary.
if matches['change'] is not None:
matches.update(tag='=', tag_key=matches['delims'])
elif matches['raw'] is not None:
matches.update(tag='&', tag_key=matches['raw_name'])
tag_type = matches['tag']
tag_key = matches['tag_key']
leading_whitespace = matches['whitespace']
# Standalone (non-interpolation) tags consume the entire line,
# both leading whitespace and trailing newline.
did_tag_begin_line = match_index == 0 or template[match_index - 1] in END_OF_LINE_CHARACTERS
did_tag_end_line = end_index == len(template) or template[end_index] in END_OF_LINE_CHARACTERS
is_tag_interpolating = tag_type in ['', '&']
if did_tag_begin_line and did_tag_end_line and not is_tag_interpolating:
if end_index < len(template):
end_index += template[end_index] == '\r' and 1 or 0
if end_index < len(template):
end_index += template[end_index] == '\n' and 1 or 0
elif leading_whitespace:
parse_tree.append(leading_whitespace)
match_index += len(leading_whitespace)
leading_whitespace = ''
if tag_type == '/':
if tag_key != section_key:
raise ParsingError("Section end tag mismatch: %s != %s" % (repr(tag_key), repr(section_key)))
return ParsedTemplate(parse_tree), template[start_index:match_index], end_index
index = self._handle_tag_type(template, parse_tree, tag_type, tag_key, leading_whitespace, end_index)
# Save the rest of the template.
parse_tree.append(template[index:])
return ParsedTemplate(parse_tree)
def _parse_section(self, template, index_start, section_key):
parsed_template, template, index_end = self.parse(template=template, index=index_start, section_key=section_key)
return parsed_template, template, index_end
def _handle_tag_type(self, template, parse_tree, tag_type, tag_key, leading_whitespace, end_index):
# TODO: switch to using a dictionary instead of a bunch of ifs and elifs.
if tag_type == '!':
return end_index
if tag_type == '=':
delimiters = tag_key.split()
self._change_delimiters(delimiters)
return end_index
engine = self.engine
if tag_type == '':
func = engine._make_get_escaped(tag_key)
elif tag_type == '&':
func = engine._make_get_literal(tag_key)
elif tag_type == '#':
parsed_section, template, end_index = self._parse_section(template, end_index, tag_key)
func = engine._make_get_section(tag_key, parsed_section, template, self._delimiters)
elif tag_type == '^':
parsed_section, template, end_index = self._parse_section(template, end_index, tag_key)
func = engine._make_get_inverse(tag_key, parsed_section)
elif tag_type == '>':
template = engine.load_partial(tag_key)
# Indent before rendering.
template = re.sub(NON_BLANK_RE, leading_whitespace + r'\1', template)
func = engine._make_get_partial(template)
else:
raise Exception("Unrecognized tag type: %s" % repr(tag_type))
parse_tree.append(func)
return end_index
......@@ -7,64 +7,7 @@ Defines a class responsible for rendering logic.
import re
DEFAULT_TAG_OPENING = '{{'
DEFAULT_TAG_CLOSING = '}}'
END_OF_LINE_CHARACTERS = ['\r', '\n']
def render_parse_tree(parse_tree, context):
"""
Returns: a string of type unicode.
The elements of parse_tree can be any of the following:
* a unicode string
* the return value of a call to any of the following:
* RenderEngine._make_get_literal():
Args: context
Returns: unicode
* RenderEngine._make_get_escaped():
Args: context
Returns: unicode
* RenderEngine._make_get_partial()
Args: context
Returns: unicode
* RenderEngine._make_get_section()
Args: context
Returns: unicode
* _make_get_inverse()
Args: context
Returns: unicode
"""
get_unicode = lambda val: val(context) if callable(val) else val
parts = map(get_unicode, parse_tree)
s = ''.join(parts)
return unicode(s)
def _make_get_inverse(name, parsed):
def get_inverse(context):
"""
Returns a string with type unicode.
"""
data = context.get(name)
if data:
return u''
return render_parse_tree(parsed, context)
return get_inverse
class EndOfSection(Exception):
def __init__(self, parse_tree, template, position):
self.parse_tree = parse_tree
self.template = template
self.position = position
from parser import Parser
class RenderEngine(object):
......@@ -86,13 +29,6 @@ class RenderEngine(object):
"""
tag_re = None
otag = DEFAULT_TAG_OPENING
ctag = DEFAULT_TAG_CLOSING
nonblank_re = re.compile(r'^(.)', re.M)
def __init__(self, load_partial=None, literal=None, escape=None):
"""
Arguments:
......@@ -127,80 +63,6 @@ class RenderEngine(object):
self.literal = literal
self.load_partial = load_partial
def render(self, template, context):
"""
Return a template rendered as a string with type unicode.
Arguments:
template: a template string of type unicode (but not a proper
subclass of unicode).
context: a Context instance.
"""
# Be strict but not too strict. In other words, accept str instead
# of unicode, but don't assume anything about the encoding (e.g.
# don't use self.literal).
template = unicode(template)
return self._render_template(template=template, context=context)
def _render_template(self, template, context):
"""
Returns: a string of type unicode.
Arguments:
template: template string
context: a Context instance
"""
if type(template) is not unicode:
raise Exception("Argument 'template' not unicode: %s: %s" % (type(template), repr(template)))
parse_tree = self.parse_string_to_tree(template_string=template)
return render_parse_tree(parse_tree, context)
def parse_string_to_tree(self, template_string, delims=None):
engine = RenderEngine(load_partial=self.load_partial,
literal=self.literal,
escape=self.escape)
if delims is not None:
engine.otag = delims[0]
engine.ctag = delims[1]
engine._compile_regexps()
return engine.parse_to_tree(template=template_string)
def _compile_regexps(self):
# The possible tag type characters following the opening tag,
# excluding "=" and "{".
tag_types = "!>&/#^"
# TODO: are we following this in the spec?
#
# The tag's content MUST be a non-whitespace character sequence
# NOT containing the current closing delimiter.
#
tag = r"""
(?P<content>[\s\S]*?)
(?P<whitespace>[\ \t]*)
%(otag)s \s*
(?:
(?P<change>=) \s* (?P<delims>.+?) \s* = |
(?P<raw>{) \s* (?P<raw_name>.+?) \s* } |
(?P<tag>[%(tag_types)s]?) \s* (?P<name>[\s\S]+?)
)
\s* %(ctag)s
""" % {'tag_types': tag_types, 'otag': re.escape(self.otag), 'ctag': re.escape(self.ctag)}
self.tag_re = re.compile(tag, re.M | re.X)
def _get_string_value(self, context, tag_name):
"""
Get a value from the given context as a basestring instance.
......@@ -231,7 +93,7 @@ class RenderEngine(object):
template = str(template)
if type(template) is not unicode:
template = self.literal(template)
val = self._render_template(template, context)
val = self._render(template, context)
if not isinstance(val, basestring):
val = str(val)
......@@ -264,34 +126,47 @@ class RenderEngine(object):
return get_escaped
def _make_get_partial(self, name, indentation=''):
def _make_get_partial(self, template):
def get_partial(context):
"""
Returns: a string of type unicode.
"""
template = self.load_partial(name)
# Indent before rendering.
template = re.sub(self.nonblank_re, indentation + r'\1', template)
return self._render_template(template, context)
return self._render(template, context)
return get_partial
def _make_get_section(self, name, parse_tree_, template_, delims):
def _make_get_inverse(self, name, parsed_template):
def get_inverse(context):
"""
Returns a string with type unicode.
"""
data = context.get(name)
if data:
return u''
return parsed_template.render(context)
return get_inverse
# TODO: the template_ and parsed_template_ arguments don't both seem
# to be necessary. Can we remove one of them? For example, if
# callable(data) is True, then the initial parsed_template isn't used.
def _make_get_section(self, name, parsed_template_, template_, delims):
def get_section(context):
"""
Returns: a string of type unicode.
"""
template = template_
parse_tree = parse_tree_
parsed_template = parsed_template_
data = context.get(name)
if not data:
data = []
elif callable(data):
# TODO: should we check the arity?
template = data(template)
parse_tree = self.parse_string_to_tree(template_string=template, delims=delims)
parsed_template = self._parse(template, delimiters=delims)
data = [ data ]
elif type(data) not in [list, tuple]:
data = [ data ]
......@@ -299,107 +174,63 @@ class RenderEngine(object):
parts = []
for element in data:
context.push(element)
parts.append(render_parse_tree(parse_tree, context))
parts.append(parsed_template.render(context))
context.pop()
return unicode(''.join(parts))
return get_section
def parse_to_tree(self, template, index=0):
"""
Parse a template into a syntax tree.
def _parse(self, template, delimiters=None):
"""
parse_tree = []
start_index = index
while True:
match = self.tag_re.search(template, index)
if match is None:
break
captures = match.groupdict()
match_index = match.end('content')
end_index = match.end()
index = self._handle_match(template, parse_tree, captures, start_index, match_index, end_index)
# Save the rest of the template.
parse_tree.append(template[index:])
return parse_tree
def _handle_match(self, template, parse_tree, captures, start_index, match_index, end_index):
# Normalize the captures dictionary.
if captures['change'] is not None:
captures.update(tag='=', name=captures['delims'])
elif captures['raw'] is not None:
captures.update(tag='{', name=captures['raw_name'])
Parse the given template, and return a ParsedTemplate instance.
parse_tree.append(captures['content'])
# Standalone (non-interpolation) tags consume the entire line,
# both leading whitespace and trailing newline.
did_tag_begin_line = match_index == 0 or template[match_index - 1] in END_OF_LINE_CHARACTERS
did_tag_end_line = end_index == len(template) or template[end_index] in END_OF_LINE_CHARACTERS
is_tag_interpolating = captures['tag'] in ['', '&', '{']
if did_tag_begin_line and did_tag_end_line and not is_tag_interpolating:
if end_index < len(template):
end_index += template[end_index] == '\r' and 1 or 0
if end_index < len(template):
end_index += template[end_index] == '\n' and 1 or 0
elif captures['whitespace']:
parse_tree.append(captures['whitespace'])
match_index += len(captures['whitespace'])
captures['whitespace'] = ''
name = captures['name']
Arguments:
if captures['tag'] == '!':
return end_index
template: a template string of type unicode.
if captures['tag'] == '=':
self.otag, self.ctag = name.split()
self._compile_regexps()
return end_index
"""
parser = Parser(self, delimiters=delimiters)
parser.compile_template_re()
if captures['tag'] == '>':
func = self._make_get_partial(name, captures['whitespace'])
elif captures['tag'] in ['#', '^']:
return parser.parse(template=template)
try:
self.parse_to_tree(template=template, index=end_index)
except EndOfSection as e:
bufr = e.parse_tree
tmpl = e.template
end_index = e.position
def _render(self, template, context):
"""
Returns: a string of type unicode.
if captures['tag'] == '#':
func = self._make_get_section(name, bufr, tmpl, (self.otag, self.ctag))
else:
func = _make_get_inverse(name, bufr)
Arguments:
elif captures['tag'] in ['{', '&']:
template: a template string of type unicode.
context: a Context instance.
func = self._make_get_literal(name)
"""
# We keep this type-check as an added check because this method is
# called with template strings coming from potentially externally-
# supplied functions like self.literal, self.load_partial, etc.
# Beyond this point, we have much better control over the type.
if type(template) is not unicode:
raise Exception("Argument 'template' not unicode: %s: %s" % (type(template), repr(template)))
elif captures['tag'] == '':
parsed_template = self._parse(template)
func = self._make_get_escaped(name)
return parsed_template.render(context)
elif captures['tag'] == '/':
def render(self, template, context):
"""
Return a template rendered as a string with type unicode.
# TODO: don't use exceptions for flow control.
raise EndOfSection(parse_tree, template[start_index:match_index], end_index)
Arguments:
else:
raise Exception("'%s' is an unrecognized type!" % captures['tag'])
template: a template string of type unicode (but not a proper
subclass of unicode).
parse_tree.append(func)
context: a Context instance.
return end_index
"""
# Be strict but not too strict. In other words, accept str instead
# of unicode, but don't assume anything about the encoding (e.g.
# don't use self.literal).
template = unicode(template)
return self._render(template, context)
# coding: utf-8
"""
Exposes a class that represents a parsed (or compiled) template.
This module is meant only for internal use.
"""
class ParsedTemplate(object):
def __init__(self, parse_tree):
"""
Arguments:
parse_tree: a list, each element of which is either--
(1) a unicode string, or
(2) a "rendering" callable that accepts a Context instance
and returns a unicode string.
The possible rendering callables are the return values of the
following functions:
* RenderEngine._make_get_escaped()
* RenderEngine._make_get_inverse()
* RenderEngine._make_get_literal()
* RenderEngine._make_get_partial()
* RenderEngine._make_get_section()
"""
self._parse_tree = parse_tree
def render(self, context):
"""
Returns: a string of type unicode.
"""
get_unicode = lambda val: val(context) if callable(val) else val
parts = map(get_unicode, self._parse_tree)
s = ''.join(parts)
return unicode(s)
......@@ -9,6 +9,7 @@ import cgi
import unittest
from pystache.context import Context
from pystache.parser import ParsingError
from pystache.renderengine import RenderEngine
from tests.common import assert_strings
......@@ -270,6 +271,28 @@ class RenderTests(unittest.TestCase):
## Test cases related specifically to sections.
def test_section__end_tag_with_no_start_tag(self):
"""
Check what happens if there is an end tag with no start tag.
"""
template = '{{/section}}'
try:
self._assert_render(None, template)
except ParsingError, err:
self.assertEquals(str(err), "Section end tag mismatch: u'section' != None")
def test_section__end_tag_mismatch(self):
"""
Check what happens if the end tag doesn't match.
"""
template = '{{#section_start}}{{/section_end}}'
try:
self._assert_render(None, template)
except ParsingError, err:
self.assertEquals(str(err), "Section end tag mismatch: u'section_end' != u'section_start'")
def test_section__context_values(self):
"""
Test that escape and literal work on context values in sections.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment