Commit 2aebc2cb by Gabriel

Allow iso-8859-1.

parent 6d967701
...@@ -14,6 +14,7 @@ import posixpath ...@@ -14,6 +14,7 @@ import posixpath
import os.path import os.path
import re import re
__all__ = ('ContentDisposition', ) __all__ = ('ContentDisposition', )
...@@ -123,12 +124,20 @@ class ContentDisposition(object): ...@@ -123,12 +124,20 @@ class ContentDisposition(object):
if content_disposition is None: if content_disposition is None:
return cls(location=location) return cls(location=location)
# Require content_disposition to be ascii bytes (0-127), # Both alternatives seem valid.
# or characters in the ascii range if False:
# XXX We might allow non-ascii here (see the definition of qdtext), # Require content_disposition to be ascii bytes (0-127),
# but parsing it would still be ambiguous. OTOH, we might allow it # or characters in the ascii range
# just so that the non-ambiguous filename* value does get parsed. content_disposition = content_disposition.encode('ascii')
content_disposition = content_disposition.encode('ascii') else:
# We allow non-ascii here (it will only be parsed inside of
# qdtext, and rejected by the grammar if it appears in
# other places), although parsing it can be ambiguous.
# Parsing it ensures that a non-ambiguous filename* value
# won't get dismissed because of an unrelated ambiguity
# in the filename parameter. But it does mean we occasionally
# give less-than-certain values for some legacy senders.
content_disposition = content_disposition.encode('iso-8859-1')
# Check the caller already did LWS-folding (normally done # Check the caller already did LWS-folding (normally done
# when separating header names and values; RFC 2616 section 2.2 # when separating header names and values; RFC 2616 section 2.2
# says it should be done before interpretation at any rate). # says it should be done before interpretation at any rate).
...@@ -164,6 +173,10 @@ def parse_ext_value(val): ...@@ -164,6 +173,10 @@ def parse_ext_value(val):
return LangTagged(decoded, langtag) return LangTagged(decoded, langtag)
def parse_iso(val):
return ''.join(val).decode('iso-8859-1')
# Currently LEPL doesn't handle case-insensivitity: # Currently LEPL doesn't handle case-insensivitity:
# https://groups.google.com/group/lepl/browse_thread/thread/68e7b136038772ca # https://groups.google.com/group/lepl/browse_thread/thread/68e7b136038772ca
def CaseInsensitiveLiteral(lit): def CaseInsensitiveLiteral(lit):
...@@ -197,10 +210,13 @@ token = Any(token_chars)[1:, ...] ...@@ -197,10 +210,13 @@ token = Any(token_chars)[1:, ...]
# a single SP (which isn't in CTL). # a single SP (which isn't in CTL).
# Assume the caller already that folding when parsing headers. # Assume the caller already that folding when parsing headers.
# XXX qdtext also allows non-ascii, which might be # NOTE: qdtext also allows non-ascii, which we choose to parse
# parsed as ISO-8859-1 (but is ambiguous). We should probably reject it. # as ISO-8859-1; rejecting it entirely would also be permitted.
# Some broken browsers attempt encoding-sniffing, which is broken
# because the spec only allows iso, and because encoding-sniffing
# can mangle valid values.
# Everything else in this grammar (including RFC 5987 ext values) # Everything else in this grammar (including RFC 5987 ext values)
# is ascii-safe. # is in an ascii-safe encoding.
# Because of this, this is the only character class to use AnyBut, # Because of this, this is the only character class to use AnyBut,
# and all the others are defined with Any. # and all the others are defined with Any.
qdtext = AnyBut('"' + ctl_chars) qdtext = AnyBut('"' + ctl_chars)
...@@ -208,7 +224,8 @@ qdtext = AnyBut('"' + ctl_chars) ...@@ -208,7 +224,8 @@ qdtext = AnyBut('"' + ctl_chars)
char = Any(''.join(chr(i) for i in xrange(128))) # ascii range: 0-127 char = Any(''.join(chr(i) for i in xrange(128))) # ascii range: 0-127
quoted_pair = Drop('\\') + char quoted_pair = Drop('\\') + char
quoted_string = Drop('"') & (quoted_pair | qdtext)[:, ...] & Drop('"') quoted_string = (Drop('"') & (quoted_pair | qdtext)[:, ...] & Drop('"')
) > parse_iso
value = token | quoted_string value = token | quoted_string
...@@ -259,6 +276,7 @@ def usesonlycharsfrom(candidate, chars): ...@@ -259,6 +276,7 @@ def usesonlycharsfrom(candidate, chars):
def is_token(candidate): def is_token(candidate):
#return usesonlycharsfrom(candidate, token_chars)
return all(is_token_char(ch) for ch in candidate) return all(is_token_char(ch) for ch in candidate)
...@@ -298,6 +316,11 @@ def test_cdfh(): ...@@ -298,6 +316,11 @@ def test_cdfh():
assert cdfh('attachment').disposition == 'attachment' assert cdfh('attachment').disposition == 'attachment'
assert cdfh('attachment; key=val').assocs['key'] == 'val' assert cdfh('attachment; key=val').assocs['key'] == 'val'
assert cdfh('attachment; filename=simple').filename_unsafe == 'simple' assert cdfh('attachment; filename=simple').filename_unsafe == 'simple'
# test ISO-8859-1
fname = cdfh(u'attachment; filename="oyé"').filename_unsafe
assert fname == u'oyé', repr(fname)
cd = cdfh( cd = cdfh(
'attachment; filename="EURO rates";' 'attachment; filename="EURO rates";'
' filename*=utf-8\'\'%e2%82%ac%20rates') ' filename*=utf-8\'\'%e2%82%ac%20rates')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment