Commit 9112d87e by Gabriel

Python3 compatibility with 2to3, testing with tox.

2to3 is enabled by switching to Distribute from distutils.

LEPL now works on character strings; I'd prefer byte strings,
but this seems to work.
parent 94ddf76a
...@@ -29,12 +29,18 @@ Read RFC 6266 section 4.3 for more details. ...@@ -29,12 +29,18 @@ Read RFC 6266 section 4.3 for more details.
# Compatibility # Compatibility
Currently tested under Python 2.7. Currently tested under Python 2.7, Python 2.6, and Python 3.2.
# Testing # Testing
To test in the current Python implementation:
nosetests --all-modules --detailed-errors nosetests --all-modules --detailed-errors
To test with tox:
tox
# References # References
* RFC 6266 <https://tools.ietf.org/html/rfc6266> * RFC 6266 <https://tools.ietf.org/html/rfc6266>
......
...@@ -14,12 +14,13 @@ build_header handles the sender side. ...@@ -14,12 +14,13 @@ build_header handles the sender side.
from lepl import * from lepl import *
from collections import namedtuple from collections import namedtuple
from urllib import quote, unquote from urllib import quote, unquote
from urlparse import urlsplit
from string import hexdigits, ascii_letters, digits from string import hexdigits, ascii_letters, digits
import posixpath import posixpath
import os.path import os.path
import re import re
import urlparse import sys
__all__ = ( __all__ = (
'ContentDisposition', 'ContentDisposition',
...@@ -30,9 +31,24 @@ __all__ = ( ...@@ -30,9 +31,24 @@ __all__ = (
) )
PY3K = sys.version_info >= (3,)
LangTagged = namedtuple('LangTagged', 'string langtag') LangTagged = namedtuple('LangTagged', 'string langtag')
if PY3K:
percent_encode = quote
percent_decode = unquote
else:
def percent_encode(string, **kwargs):
encoding = kwargs.pop('encoding')
return quote(string.encode(encoding), **kwargs)
def percent_decode(string, **kwargs):
encoding = kwargs.pop('encoding')
return unquote(string, **kwargs).decode(encoding)
class ContentDisposition(object): class ContentDisposition(object):
""" """
Records various indications and hints about content disposition. Records various indications and hints about content disposition.
...@@ -86,8 +102,9 @@ class ContentDisposition(object): ...@@ -86,8 +102,9 @@ class ContentDisposition(object):
@property @property
def location_path(self): def location_path(self):
if self.location: if self.location:
return unquote( return percent_decode(
urlparse.urlsplit(self.location, scheme='http').path) urlsplit(self.location, scheme='http').path,
encoding='utf-8')
def filename_sanitized(self, extension, default_filename='file'): def filename_sanitized(self, extension, default_filename='file'):
"""Returns a filename that is safer to use on the filesystem. """Returns a filename that is safer to use on the filesystem.
...@@ -135,6 +152,14 @@ class ContentDisposition(object): ...@@ -135,6 +152,14 @@ class ContentDisposition(object):
self.disposition, self.assocs, self.location) self.disposition, self.assocs, self.location)
def ensure_charset(text, encoding):
if isinstance(text, bytes):
return text.decode(encoding)
else:
assert fits_inside_codec(text, encoding)
return text
def parse_headers(content_disposition, location=None): def parse_headers(content_disposition, location=None):
"""Build a ContentDisposition from header values. """Build a ContentDisposition from header values.
""" """
...@@ -146,7 +171,7 @@ def parse_headers(content_disposition, location=None): ...@@ -146,7 +171,7 @@ def parse_headers(content_disposition, location=None):
if False: if False:
# Require content_disposition to be ascii bytes (0-127), # Require content_disposition to be ascii bytes (0-127),
# or characters in the ascii range # or characters in the ascii range
content_disposition = content_disposition.encode('ascii') content_disposition = ensure_charset(content_disposition, 'ascii')
else: else:
# We allow non-ascii here (it will only be parsed inside of # We allow non-ascii here (it will only be parsed inside of
# qdtext, and rejected by the grammar if it appears in # qdtext, and rejected by the grammar if it appears in
...@@ -155,14 +180,12 @@ def parse_headers(content_disposition, location=None): ...@@ -155,14 +180,12 @@ def parse_headers(content_disposition, location=None):
# won't get dismissed because of an unrelated ambiguity # won't get dismissed because of an unrelated ambiguity
# in the filename parameter. But it does mean we occasionally # in the filename parameter. But it does mean we occasionally
# give less-than-certain values for some legacy senders. # give less-than-certain values for some legacy senders.
content_disposition = content_disposition.encode('iso-8859-1') content_disposition = ensure_charset(content_disposition, 'iso-8859-1')
# Check the caller already did LWS-folding (normally done # Check the caller already did LWS-folding (normally done
# when separating header names and values; RFC 2616 section 2.2 # when separating header names and values; RFC 2616 section 2.2
# says it should be done before interpretation at any rate). # says it should be done before interpretation at any rate).
# Since this is ascii the definition of space is known; I don't know # Hopefully space still means what it should in iso-8859-1.
# what Python's definition of space chars will be if we allow
# iso-8859-1.
# This check is a bit stronger that LWS folding, it will # This check is a bit stronger that LWS folding, it will
# remove CR and LF even if they aren't part of a CRLF. # remove CR and LF even if they aren't part of a CRLF.
# However http doesn't allow isolated CR and LF in headers outside # However http doesn't allow isolated CR and LF in headers outside
...@@ -197,7 +220,9 @@ def parse_ext_value(val): ...@@ -197,7 +220,9 @@ def parse_ext_value(val):
else: else:
charset, coded = val charset, coded = val
langtag = None langtag = None
decoded = coded.decode(charset) if not PY3K and isinstance(coded, unicode):
coded = coded.encode('ascii')
decoded = percent_decode(coded, encoding=charset)
return LangTagged(decoded, langtag) return LangTagged(decoded, langtag)
...@@ -253,7 +278,7 @@ char = Any(''.join(chr(i) for i in xrange(128))) # ascii range: 0-127 ...@@ -253,7 +278,7 @@ char = Any(''.join(chr(i) for i in xrange(128))) # ascii range: 0-127
quoted_pair = Drop('\\') + char quoted_pair = Drop('\\') + char
quoted_string = (Drop('"') & (quoted_pair | qdtext)[:, ...] & Drop('"') quoted_string = (Drop('"') & (quoted_pair | qdtext)[:, ...] & Drop('"')
) > parse_iso ) #> parse_iso
value = token | quoted_string value = token | quoted_string
...@@ -267,7 +292,7 @@ language = token ...@@ -267,7 +292,7 @@ language = token
attr_char = Any(attr_chars) attr_char = Any(attr_chars)
hexdig = Any(hexdigits) hexdig = Any(hexdigits)
pct_encoded = '%' + hexdig + hexdig >> unquote pct_encoded = '%' + hexdig + hexdig
value_chars = (pct_encoded | attr_char)[...] value_chars = (pct_encoded | attr_char)[...]
ext_value = ( ext_value = (
charset & Drop("'") & Optional(language) & Drop("'") charset & Drop("'") & Optional(language) & Drop("'")
...@@ -312,6 +337,15 @@ def is_ascii(text): ...@@ -312,6 +337,15 @@ def is_ascii(text):
return all(ord(ch) < 128 for ch in text) return all(ord(ch) < 128 for ch in text)
def fits_inside_codec(text, codec):
try:
text.encode(codec)
except UnicodeEncodeError:
return False
else:
return True
def is_lws_safe(text): def is_lws_safe(text):
return ' '.join(text.split()) == text return ' '.join(text.split()) == text
...@@ -371,8 +405,8 @@ def build_header( ...@@ -371,8 +405,8 @@ def build_header(
# alnum are already considered always-safe, but the rest isn't. # alnum are already considered always-safe, but the rest isn't.
# Python encodes ~ when it shouldn't, for example. # Python encodes ~ when it shouldn't, for example.
rv += "; filename*=utf-8''%s" % (quote( rv += "; filename*=utf-8''%s" % (percent_encode(
filename.encode('utf-8'), safe=attr_chars_nonalnum), ) filename, safe=attr_chars_nonalnum, encoding='utf-8'), )
# This will only encode filename_compat, if it used non-ascii iso-8859-1. # This will only encode filename_compat, if it used non-ascii iso-8859-1.
return rv.encode('iso-8859-1') return rv.encode('iso-8859-1')
......
from distutils.core import setup from setuptools import setup
setup( setup(
name='rfc6266', name='rfc6266',
version='0.0.1', # symver version='0.0.1', # symver
py_modules=['rfc6266'], py_modules=['rfc6266'],
install_requires=['LEPL'], install_requires=['LEPL'],
use_2to3=True,
) )
[tox]
envlist=py27,py26,py32
[testenv]
deps=nose
# changedir is a hack to prevent nose from finding the non-2to3 source
# now nose will use import, which has the converted modules in its path
changedir=.tox
commands=nosetests --detailed-errors rfc6266
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment