Commit e63beb87 by Gabriel

Handle the sender side.

parent 821aa805
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
"""Implements RFC 6266, the Content-Disposition HTTP header. """Implements RFC 6266, the Content-Disposition HTTP header.
Currently only the receiver side is handled. ContentDisposition handles the receiver side,
Sender side is a work in progress. header_for_filename handles the sender side.
""" """
from lepl import * from lepl import *
...@@ -15,7 +15,7 @@ import os.path ...@@ -15,7 +15,7 @@ import os.path
import re import re
__all__ = ('ContentDisposition', ) __all__ = ('ContentDisposition', 'header_for_filename', )
LangTagged = namedtuple('LangTagged', 'string langtag') LangTagged = namedtuple('LangTagged', 'string langtag')
...@@ -154,7 +154,7 @@ class ContentDisposition(object): ...@@ -154,7 +154,7 @@ class ContentDisposition(object):
# remove CR and LF even if they aren't part of a CRLF. # remove CR and LF even if they aren't part of a CRLF.
# However http doesn't allow isolated CR and LF in headers outside # However http doesn't allow isolated CR and LF in headers outside
# of LWS. # of LWS.
assert content_disposition == ' '.join(content_disposition.split()) assert is_lws_safe(content_disposition)
parsed = content_disposition_value.parse(content_disposition) parsed = content_disposition_value.parse(content_disposition)
return ContentDisposition( return ContentDisposition(
disposition=parsed[0], assocs=parsed[1:], location=location) disposition=parsed[0], assocs=parsed[1:], location=location)
...@@ -286,34 +286,63 @@ def is_token(candidate): ...@@ -286,34 +286,63 @@ def is_token(candidate):
return all(is_token_char(ch) for ch in candidate) return all(is_token_char(ch) for ch in candidate)
def header_for_filename(filename, disposition='attachment', def is_lws_safe(text):
compat='ignore', filename_compat=None): return ' '.join(text.split()) == text
# https://tools.ietf.org/html/rfc6266#appendix-D
# Compat methods (fallback for receivers that can't handle filename*):
# - ignore (give only filename*); def qd_quote(text):
# - strip accents using unicode's decomposing normalisations, return text.replace('\\', '\\\\').replace('"', '\\"')
# which can be done from unicode data (stdlib), and keep only ascii;
# - use the ascii transliteration tables from Unidecode (PyPI);
# - use iso-8859-1 (can't be handled by the caller then). def header_for_filename(
# Ignore is the safest, and can be used to trigger a fallback filename, disposition='attachment', filename_compat=None
# to the document location. ):
"""Generate a Content-Disposition header for a given filename.
For legacy clients that don't understant the filename* parameter,
a filename_compat value may be given.
It should either be ascii-only (recommended) or iso-8859-1 only.
In the later case it should be a character string
(unicode in Python 2).
Options for generating filename_compat (only useful for legacy clients):
- ignore (will only send filename*);
- strip accents using unicode's decomposing normalisations,
which can be done from unicode data (stdlib), and keep only ascii;
- use the ascii transliteration tables from Unidecode (PyPI);
- use iso-8859-1
Ignore is the safest, and can be used to trigger a fallback
to the document location (which can be percent-encoded utf-8
if you control the URLs).
See https://tools.ietf.org/html/rfc6266#appendix-D
"""
# While this method exists, it could also sanitize the filename # While this method exists, it could also sanitize the filename
# by rejecting slashes or other weirdness that might upset a receiver. # by rejecting slashes or other weirdness that might upset a receiver.
if compat != 'ignore':
raise NotImplementedError
if disposition != 'attachment': if disposition != 'attachment':
assert is_token(disposition) assert is_token(disposition)
if is_token(filename): if is_token(filename):
return '%s; filename=%s' % (disposition, filename) return '%s; filename=%s' % (disposition, filename)
rv = disposition
if filename_compat:
if is_token(filename_compat):
rv += '; filename=%s' % (filename_compat, )
else:
assert is_lws_safe(filename_compat)
rv += '; filename="%s"' % (qd_quote(filename_compat), )
# alnum are already considered always-safe, but the rest isn't. # alnum are already considered always-safe, but the rest isn't.
# Python encodes ~ when it shouldn't, for example. # Python encodes ~ when it shouldn't, for example.
return "%s; filename*=utf-8''%s" % (disposition, quote( rv += "; filename*=utf-8''%s" % (quote(
filename.encode('utf-8'), safe=attr_chars_nonalnum)) filename.encode('utf-8'), safe=attr_chars_nonalnum), )
# This will only encode filename, if it used non-ascii iso-8859-1.
return rv.encode('iso-8859-1')
def test_cdfh(): def test_cdfh():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment