Improved byte-string handling in Python 3.

ebdc702f · Chris Jerdonek · 3e23a0c1 · ebdc702f · ebdc702f · ebdc702f
Commit ebdc702f authored May 04, 2012 by Chris Jerdonek
Hide whitespace changes
Inline Side-by-side

Showing with 68 additions and 26 deletions

HISTORY.rst
+1 -0

pystache/common.py
+27 -0

pystache/renderengine.py
+4 -3

pystache/renderer.py
+2 -14

pystache/tests/test_renderengine.py
+34 -9

No files found.
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -9,6 +9,7 @@ History
 * Bugfix: exceptions raised from a property are no longer swallowed when
  getting a key from a context stack (issue #110).
 * Bugfix: lambda section values can now return non-ascii, non-unicode strings (issue #118).
+* More robust handling of byte strings in Python 3.
 0.5.2 (2012-05-03)
 ------------------

--- a/pystache/common.py
+++ b/pystache/common.py
@@ -5,6 +5,33 @@ Exposes functionality needed throughout the project.
 """
+from sys import version_info
+def _get_string_types():
+    # TODO: come up with a better solution for this.  One of the issues here
+    #   is that in Python 3 there is no common base class for unicode strings
+    #   and byte strings, and 2to3 seems to convert all of "str", "unicode",
+    #   and "basestring" to Python 3's "str".
+    if version_info < (3, ):
+         return basestring
+    # The latter evaluates to "bytes" in Python 3 -- even after conversion by 2to3.
+    return (unicode, type(u"a".encode('utf-8')))
+_STRING_TYPES = _get_string_types()
+def is_string(obj):
+    """
+    Return whether the given object is a byte string or unicode string.
+    This function is provided for compatibility with both Python 2 and 3
+    when using 2to3.
+    """
+    return isinstance(obj, _STRING_TYPES)
 # This function was designed to be portable across Python versions -- both
 # with older versions and with Python 3 after applying 2to3.
 def read(path):

--- a/pystache/renderengine.py
+++ b/pystache/renderengine.py
@@ -7,6 +7,7 @@ Defines a class responsible for rendering logic.
 import re
+from pystache.common import is_string
 from pystache.parser import Parser
@@ -97,7 +98,7 @@ class RenderEngine(object):
            # Return because _render_value() is already a string.
            return self._render_value(val(), context)
-        if not isinstance(val, basestring):
+        if not is_string(val):
            return str(val)
        return val
@@ -190,7 +191,7 @@ class RenderEngine(object):
                    # Then the value does not support iteration.
                    data = [data]
                else:
-                    if isinstance(data, (basestring, dict)):
+                    if is_string(data) or isinstance(data, dict):
                        # Do not treat strings and dicts (which are iterable) as lists.
                        data = [data]
                    # Otherwise, treat the value as a list.
@@ -245,7 +246,7 @@ class RenderEngine(object):
        Render an arbitrary value.
        """
-        if not isinstance(val, basestring):
+        if not is_string(val):
            # In case the template is an integer, for example.
            val = str(val)
        if type(val) is not unicode:

--- a/pystache/renderer.py
+++ b/pystache/renderer.py
@@ -8,7 +8,7 @@ This module provides a Renderer class to render templates.
 import sys
 from pystache import defaults
-from pystache.common import TemplateNotFoundError, MissingTags
+from pystache.common import TemplateNotFoundError, MissingTags, is_string
 from pystache.context import ContextStack, KeyNotFoundError
 from pystache.loader import Loader
 from pystache.renderengine import context_get, RenderEngine
@@ -16,18 +16,6 @@ from pystache.specloader import SpecLoader
 from pystache.template_spec import TemplateSpec
-# TODO: come up with a better solution for this.  One of the issues here
-#   is that in Python 3 there is no common base class for unicode strings
-#   and byte strings, and 2to3 seems to convert all of "str", "unicode",
-#   and "basestring" to Python 3's "str".
-if sys.version_info < (3, ):
-    _STRING_TYPES = basestring
-else:
-    # The latter evaluates to "bytes" in Python 3 -- even after conversion by 2to3.
-    _STRING_TYPES = (unicode, type(u"a".encode('utf-8')))
 class Renderer(object):
    """
@@ -411,7 +399,7 @@ class Renderer(object):
            all items in the *context list.
        """
-        if isinstance(template, _STRING_TYPES):
+        if is_string(template):
            return self._render_string(template, *context, **kwargs)
        # Otherwise, we assume the template is an object.

--- a/pystache/tests/test_renderengine.py
+++ b/pystache/tests/test_renderengine.py
@@ -296,6 +296,16 @@ class RenderTests(unittest.TestCase, AssertStringMixin, AssertExceptionMixin):
        context = {'section': item, attr_name: 7}
        self._assert_render(u'7', template, context)
+    # This test is also important for testing 2to3.
+    def test_interpolation__nonascii_nonunicode(self):
+        """
+        Test a tag whose value is a non-ascii, non-unicode string.
+        """
+        template = '{{nonascii}}'
+        context = {'nonascii': u'abcdé'.encode('utf-8')}
+        self._assert_render(u'abcdé', template, context)
    def test_implicit_iterator__literal(self):
        """
        Test an implicit iterator in a literal tag.
@@ -354,6 +364,28 @@ class RenderTests(unittest.TestCase, AssertStringMixin, AssertExceptionMixin):
        self._assert_render(u'unescaped: < escaped: &lt;', template, context, engine=engine, partials=partials)
+    ## Test cases related specifically to lambdas.
+    # This test is also important for testing 2to3.
+    def test_section__nonascii_nonunicode(self):
+        """
+        Test a section whose value is a non-ascii, non-unicode string.
+        """
+        template = '{{#nonascii}}{{.}}{{/nonascii}}'
+        context = {'nonascii': u'abcdé'.encode('utf-8')}
+        self._assert_render(u'abcdé', template, context)
+    # This test is also important for testing 2to3.
+    def test_lambda__returning_nonascii_nonunicode(self):
+        """
+        Test a lambda tag value returning a non-ascii, non-unicode string.
+        """
+        template = '{{lambda}}'
+        context = {'lambda': lambda: u'abcdé'.encode('utf-8')}
+        self._assert_render(u'abcdé', template, context)
    ## Test cases related specifically to sections.
    def test_section__end_tag_with_no_start_tag(self):
@@ -472,22 +504,15 @@ class RenderTests(unittest.TestCase, AssertStringMixin, AssertExceptionMixin):
        context = {'test': (lambda text: 'Hi %s' % text)}
        self._assert_render(u'Hi Mom', template, context)
+    # This test is also important for testing 2to3.
    def test_section__lambda__returning_nonascii_nonunicode(self):
        """
        Test a lambda section value returning a non-ascii, non-unicode string.
        """
-        def literal(s):
-            if isinstance(s, unicode):
-                return s
-            return unicode(s, encoding='utf8')
-        engine = self._engine()
-        engine.literal = literal
        template = '{{#lambda}}{{/lambda}}'
        context = {'lambda': lambda text: u'abcdé'.encode('utf-8')}
-        self._assert_render(u'abcdé', template, context, engine=engine)
+        self._assert_render(u'abcdé', template, context)
    def test_section__lambda__returning_nonstring(self):
        """