Commit 8f2ca4ae by Gabriel

Sender side seems to work.

parent 040f262e
...@@ -5,8 +5,8 @@ ...@@ -5,8 +5,8 @@
from lepl import * from lepl import *
from collections import namedtuple from collections import namedtuple
from urllib import unquote from urllib import quote, unquote
from string import hexdigits from string import hexdigits, ascii_letters, digits
import re import re
__all__ = ('ContentDisposition', ) __all__ = ('ContentDisposition', )
...@@ -41,6 +41,9 @@ class ContentDisposition(object): ...@@ -41,6 +41,9 @@ class ContentDisposition(object):
def from_header(cls, hdrval): def from_header(cls, hdrval):
# Require hdrval to be ascii bytes (0-127), # Require hdrval to be ascii bytes (0-127),
# or characters in the ascii range # or characters in the ascii range
# XXX We might allow non-ascii here (see the definition of qdtext),
# but parsing it would still be ambiguous. OTOH, we might allow it
# just so that the non-ambiguous filename* value does get parsed.
hdrval = hdrval.encode('ascii') hdrval = hdrval.encode('ascii')
rv, = content_disposition_value.parse(hdrval) rv, = content_disposition_value.parse(hdrval)
return rv return rv
...@@ -75,20 +78,39 @@ def CaseInsensitiveLiteral(lit): ...@@ -75,20 +78,39 @@ def CaseInsensitiveLiteral(lit):
return Regexp('(?i)' + re.escape(lit)) return Regexp('(?i)' + re.escape(lit))
# To debug, wrap in this block: # RFC 2616
#with TraceVariables():
separator_chars = "()<>@,;:\\\"/[]?={} \t" separator_chars = "()<>@,;:\\\"/[]?={} \t"
ctl_chars = ''.join(chr(i) for i in xrange(32)) + chr(127) ctl_chars = ''.join(chr(i) for i in xrange(32)) + chr(127)
nontoken_chars = separator_chars + ctl_chars nontoken_chars = separator_chars + ctl_chars
# RFC 5987
attr_chars_nonalnum = '!#$&+-.^_`|~'
attr_chars = ascii_letters + digits + attr_chars_nonalnum
# RFC 5987 gives this alternative construction of the token character class
token_chars = attr_chars + "*'%"
# To debug, wrap in this block:
#with TraceVariables():
# Definitions from https://tools.ietf.org/html/rfc2616#section-2.2 # Definitions from https://tools.ietf.org/html/rfc2616#section-2.2
token = AnyBut(nontoken_chars)[1:, ...] # token was redefined from attr_chars to avoid using AnyBut,
# which might include non-ascii octets.
token = Any(token_chars)[1:, ...]
# RFC 2616 says some linear whitespace (LWS) is in fact allowed in text # RFC 2616 says some linear whitespace (LWS) is in fact allowed in text
# and qdtext; however it also mentions folding that whitespace into # and qdtext; however it also mentions folding that whitespace into
# a single SP (which isn't in CTL). # a single SP (which isn't in CTL).
# Assume the caller already that folding when parsing headers. # Assume the caller already that folding when parsing headers.
# XXX qdtext also allows non-ascii, which might be
# parsed as ISO-8859-1 (but is ambiguous). We should probably reject it.
# Everything else in this grammar (including RFC 5987 ext values)
# is ascii-safe.
# Because of this, this is the only character class to use AnyBut,
# and all the others are defined with Any.
qdtext = AnyBut('"' + ctl_chars) qdtext = AnyBut('"' + ctl_chars)
char = Any(''.join(chr(i) for i in xrange(128))) # ascii range: 0-127 char = Any(''.join(chr(i) for i in xrange(128))) # ascii range: 0-127
...@@ -102,9 +124,11 @@ value = token | quoted_string ...@@ -102,9 +124,11 @@ value = token | quoted_string
# for future evolutions. # for future evolutions.
charset = (CaseInsensitiveLiteral('UTF-8') charset = (CaseInsensitiveLiteral('UTF-8')
| CaseInsensitiveLiteral('ISO-8859-1')) | CaseInsensitiveLiteral('ISO-8859-1'))
# XXX See RFC 5646 for the correct definition # XXX See RFC 5646 for the correct definition
language = token language = token
attr_char = AnyBut(nontoken_chars + "*'%")
attr_char = Any(attr_chars)
hexdig = Any(hexdigits) hexdig = Any(hexdigits)
pct_encoded = '%' + hexdig + hexdig >> unquote pct_encoded = '%' + hexdig + hexdig >> unquote
value_chars = (pct_encoded | attr_char)[...] value_chars = (pct_encoded | attr_char)[...]
...@@ -132,22 +156,36 @@ def is_token_char(ch): ...@@ -132,22 +156,36 @@ def is_token_char(ch):
return 31 < asciicode < 127 and ch not in separator_chars return 31 < asciicode < 127 and ch not in separator_chars
def usesonlycharsfrom(candidate, chars):
# Found that shortcut in urllib.quote
return not candidate.rstrip(chars)
def is_token(candidate): def is_token(candidate):
return all(is_token_char(ch) for ch in candidate) return all(is_token_char(ch) for ch in candidate)
def header_for_filename(filename, filename_ascii=None): def header_for_filename(filename, compat='ignore', filename_ascii=None):
# Compat methods (fallback for receivers that can't handle filename*):
# - ignore (give only filename*);
# - strip accents using unicode's decomposing normalisations,
# which can be done from unicode data (stdlib), and keep only ascii;
# - use the ascii transliteration tables from Unidecode (PyPI);
# - use iso-8859-1.
# Ignore is the safest, and can be used to trigger a fallback
# to the document location.
# While this method exists, it could also sanitize the filename
# by rejecting slashes or other weirdness that might upset a receiver.
if compat != 'ignore':
raise NotImplementedError
if is_token(filename): if is_token(filename):
return 'attachment; filename=%s' % filename return 'attachment; filename=%s' % filename
try: return "attachment; filename*=utf-8''%s" % quote(
asc = filename.encode('ascii') filename.encode('utf-8'), safe=attr_chars_nonalnum)
iso = filename.encode('iso-8859-1')
except UnicodeEncodeError:
return 'attachment; filename=%s; filename*=%s' % (fn1, fn2)
else:
# The filename is ascii already
pass
def test_cdfh(): def test_cdfh():
...@@ -160,4 +198,13 @@ def test_cdfh(): ...@@ -160,4 +198,13 @@ def test_cdfh():
'attachment; filename="EURO rates"; filename*=utf-8\'\'%e2%82%ac%20rates') 'attachment; filename="EURO rates"; filename*=utf-8\'\'%e2%82%ac%20rates')
assert cd.filename == u'€ rates' assert cd.filename == u'€ rates'
def roundtrip(filename):
return ContentDisposition.from_header(
header_for_filename(filename)).filename
def assert_roundtrip(filename):
assert roundtrip(filename) == filename
assert_roundtrip(u'aéioou"qfsdf!')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment