Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
R
rfc6266
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
rfc6266
Commits
8f2ca4ae
Commit
8f2ca4ae
authored
Jan 28, 2012
by
Gabriel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Sender side seems to work.
parent
040f262e
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
63 additions
and
16 deletions
+63
-16
rfc6266.py
+63
-16
No files found.
rfc6266.py
View file @
8f2ca4ae
...
@@ -5,8 +5,8 @@
...
@@ -5,8 +5,8 @@
from
lepl
import
*
from
lepl
import
*
from
collections
import
namedtuple
from
collections
import
namedtuple
from
urllib
import
unquote
from
urllib
import
quote
,
unquote
from
string
import
hexdigits
from
string
import
hexdigits
,
ascii_letters
,
digits
import
re
import
re
__all__
=
(
'ContentDisposition'
,
)
__all__
=
(
'ContentDisposition'
,
)
...
@@ -41,6 +41,9 @@ class ContentDisposition(object):
...
@@ -41,6 +41,9 @@ class ContentDisposition(object):
def
from_header
(
cls
,
hdrval
):
def
from_header
(
cls
,
hdrval
):
# Require hdrval to be ascii bytes (0-127),
# Require hdrval to be ascii bytes (0-127),
# or characters in the ascii range
# or characters in the ascii range
# XXX We might allow non-ascii here (see the definition of qdtext),
# but parsing it would still be ambiguous. OTOH, we might allow it
# just so that the non-ambiguous filename* value does get parsed.
hdrval
=
hdrval
.
encode
(
'ascii'
)
hdrval
=
hdrval
.
encode
(
'ascii'
)
rv
,
=
content_disposition_value
.
parse
(
hdrval
)
rv
,
=
content_disposition_value
.
parse
(
hdrval
)
return
rv
return
rv
...
@@ -75,20 +78,39 @@ def CaseInsensitiveLiteral(lit):
...
@@ -75,20 +78,39 @@ def CaseInsensitiveLiteral(lit):
return
Regexp
(
'(?i)'
+
re
.
escape
(
lit
))
return
Regexp
(
'(?i)'
+
re
.
escape
(
lit
))
# To debug, wrap in this block:
# RFC 2616
#with TraceVariables():
separator_chars
=
"()<>@,;:
\\\"
/[]?={}
\t
"
separator_chars
=
"()<>@,;:
\\\"
/[]?={}
\t
"
ctl_chars
=
''
.
join
(
chr
(
i
)
for
i
in
xrange
(
32
))
+
chr
(
127
)
ctl_chars
=
''
.
join
(
chr
(
i
)
for
i
in
xrange
(
32
))
+
chr
(
127
)
nontoken_chars
=
separator_chars
+
ctl_chars
nontoken_chars
=
separator_chars
+
ctl_chars
# RFC 5987
attr_chars_nonalnum
=
'!#$&+-.^_`|~'
attr_chars
=
ascii_letters
+
digits
+
attr_chars_nonalnum
# RFC 5987 gives this alternative construction of the token character class
token_chars
=
attr_chars
+
"*'
%
"
# To debug, wrap in this block:
#with TraceVariables():
# Definitions from https://tools.ietf.org/html/rfc2616#section-2.2
# Definitions from https://tools.ietf.org/html/rfc2616#section-2.2
token
=
AnyBut
(
nontoken_chars
)[
1
:,
...
]
# token was redefined from attr_chars to avoid using AnyBut,
# which might include non-ascii octets.
token
=
Any
(
token_chars
)[
1
:,
...
]
# RFC 2616 says some linear whitespace (LWS) is in fact allowed in text
# RFC 2616 says some linear whitespace (LWS) is in fact allowed in text
# and qdtext; however it also mentions folding that whitespace into
# and qdtext; however it also mentions folding that whitespace into
# a single SP (which isn't in CTL).
# a single SP (which isn't in CTL).
# Assume the caller already that folding when parsing headers.
# Assume the caller already that folding when parsing headers.
# XXX qdtext also allows non-ascii, which might be
# parsed as ISO-8859-1 (but is ambiguous). We should probably reject it.
# Everything else in this grammar (including RFC 5987 ext values)
# is ascii-safe.
# Because of this, this is the only character class to use AnyBut,
# and all the others are defined with Any.
qdtext
=
AnyBut
(
'"'
+
ctl_chars
)
qdtext
=
AnyBut
(
'"'
+
ctl_chars
)
char
=
Any
(
''
.
join
(
chr
(
i
)
for
i
in
xrange
(
128
)))
# ascii range: 0-127
char
=
Any
(
''
.
join
(
chr
(
i
)
for
i
in
xrange
(
128
)))
# ascii range: 0-127
...
@@ -102,9 +124,11 @@ value = token | quoted_string
...
@@ -102,9 +124,11 @@ value = token | quoted_string
# for future evolutions.
# for future evolutions.
charset
=
(
CaseInsensitiveLiteral
(
'UTF-8'
)
charset
=
(
CaseInsensitiveLiteral
(
'UTF-8'
)
|
CaseInsensitiveLiteral
(
'ISO-8859-1'
))
|
CaseInsensitiveLiteral
(
'ISO-8859-1'
))
# XXX See RFC 5646 for the correct definition
# XXX See RFC 5646 for the correct definition
language
=
token
language
=
token
attr_char
=
AnyBut
(
nontoken_chars
+
"*'
%
"
)
attr_char
=
Any
(
attr_chars
)
hexdig
=
Any
(
hexdigits
)
hexdig
=
Any
(
hexdigits
)
pct_encoded
=
'
%
'
+
hexdig
+
hexdig
>>
unquote
pct_encoded
=
'
%
'
+
hexdig
+
hexdig
>>
unquote
value_chars
=
(
pct_encoded
|
attr_char
)[
...
]
value_chars
=
(
pct_encoded
|
attr_char
)[
...
]
...
@@ -132,22 +156,36 @@ def is_token_char(ch):
...
@@ -132,22 +156,36 @@ def is_token_char(ch):
return
31
<
asciicode
<
127
and
ch
not
in
separator_chars
return
31
<
asciicode
<
127
and
ch
not
in
separator_chars
def
usesonlycharsfrom
(
candidate
,
chars
):
# Found that shortcut in urllib.quote
return
not
candidate
.
rstrip
(
chars
)
def
is_token
(
candidate
):
def
is_token
(
candidate
):
return
all
(
is_token_char
(
ch
)
for
ch
in
candidate
)
return
all
(
is_token_char
(
ch
)
for
ch
in
candidate
)
def
header_for_filename
(
filename
,
filename_ascii
=
None
):
def
header_for_filename
(
filename
,
compat
=
'ignore'
,
filename_ascii
=
None
):
# Compat methods (fallback for receivers that can't handle filename*):
# - ignore (give only filename*);
# - strip accents using unicode's decomposing normalisations,
# which can be done from unicode data (stdlib), and keep only ascii;
# - use the ascii transliteration tables from Unidecode (PyPI);
# - use iso-8859-1.
# Ignore is the safest, and can be used to trigger a fallback
# to the document location.
# While this method exists, it could also sanitize the filename
# by rejecting slashes or other weirdness that might upset a receiver.
if
compat
!=
'ignore'
:
raise
NotImplementedError
if
is_token
(
filename
):
if
is_token
(
filename
):
return
'attachment; filename=
%
s'
%
filename
return
'attachment; filename=
%
s'
%
filename
try
:
return
"attachment; filename*=utf-8''
%
s"
%
quote
(
asc
=
filename
.
encode
(
'ascii'
)
filename
.
encode
(
'utf-8'
),
safe
=
attr_chars_nonalnum
)
iso
=
filename
.
encode
(
'iso-8859-1'
)
except
UnicodeEncodeError
:
return
'attachment; filename=
%
s; filename*=
%
s'
%
(
fn1
,
fn2
)
else
:
# The filename is ascii already
pass
def
test_cdfh
():
def
test_cdfh
():
...
@@ -160,4 +198,13 @@ def test_cdfh():
...
@@ -160,4 +198,13 @@ def test_cdfh():
'attachment; filename="EURO rates"; filename*=utf-8
\'\'
%
e2
%82%
ac
%20
rates'
)
'attachment; filename="EURO rates"; filename*=utf-8
\'\'
%
e2
%82%
ac
%20
rates'
)
assert
cd
.
filename
==
u'€ rates'
assert
cd
.
filename
==
u'€ rates'
def
roundtrip
(
filename
):
return
ContentDisposition
.
from_header
(
header_for_filename
(
filename
))
.
filename
def
assert_roundtrip
(
filename
):
assert
roundtrip
(
filename
)
==
filename
assert_roundtrip
(
u'aéioou"qfsdf!'
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment