Handle the sender side.

e63beb87 · Gabriel · 821aa805 · e63beb87
Commit e63beb87 authored Jan 30, 2012 by Gabriel
Hide whitespace changes
Inline Side-by-side

Showing with 49 additions and 20 deletions

rfc6266.py
+49 -20

No files found.
--- a/rfc6266.py
+++ b/rfc6266.py
@@ -2,8 +2,8 @@
 """Implements RFC 6266, the Content-Disposition HTTP header.
-Currently only the receiver side is handled.
+ContentDisposition handles the receiver side,
-Sender side is a work in progress.
+header_for_filename handles the sender side.
 """
 from lepl import *
@@ -15,7 +15,7 @@ import os.path
 import re
-__all__ = ('ContentDisposition', )
+__all__ = ('ContentDisposition', 'header_for_filename', )
 LangTagged = namedtuple('LangTagged', 'string langtag')
@@ -154,7 +154,7 @@ class ContentDisposition(object):
        # remove CR and LF even if they aren't part of a CRLF.
        # However http doesn't allow isolated CR and LF in headers outside
        # of LWS.
-        assert content_disposition == ' '.join(content_disposition.split())
+        assert is_lws_safe(content_disposition)
        parsed = content_disposition_value.parse(content_disposition)
        return ContentDisposition(
            disposition=parsed[0], assocs=parsed[1:], location=location)
@@ -286,34 +286,63 @@ def is_token(candidate):
    return all(is_token_char(ch) for ch in candidate)
-def header_for_filename(filename, disposition='attachment',
+def is_lws_safe(text):
-                        compat='ignore', filename_compat=None):
+    return ' '.join(text.split()) == text
-    # https://tools.ietf.org/html/rfc6266#appendix-D
-    # Compat methods (fallback for receivers that can't handle filename*):
-    # - ignore (give only filename*);
+def qd_quote(text):
-    # - strip accents using unicode's decomposing normalisations,
+    return text.replace('\\', '\\\\').replace('"', '\\"')
-    # which can be done from unicode data (stdlib), and keep only ascii;
-    # - use the ascii transliteration tables from Unidecode (PyPI);
-    # - use iso-8859-1 (can't be handled by the caller then).
+def header_for_filename(
-    # Ignore is the safest, and can be used to trigger a fallback
+    filename, disposition='attachment', filename_compat=None
-    # to the document location.
+):
+    """Generate a Content-Disposition header for a given filename.
+    For legacy clients that don't understant the filename* parameter,
+    a filename_compat value may be given.
+    It should either be ascii-only (recommended) or iso-8859-1 only.
+    In the later case it should be a character string
+    (unicode in Python 2).
+    Options for generating filename_compat (only useful for legacy clients):
+    - ignore (will only send filename*);
+    - strip accents using unicode's decomposing normalisations,
+    which can be done from unicode data (stdlib), and keep only ascii;
+    - use the ascii transliteration tables from Unidecode (PyPI);
+    - use iso-8859-1
+    Ignore is the safest, and can be used to trigger a fallback
+    to the document location (which can be percent-encoded utf-8
+    if you control the URLs).
+    See https://tools.ietf.org/html/rfc6266#appendix-D
+    """
    # While this method exists, it could also sanitize the filename
    # by rejecting slashes or other weirdness that might upset a receiver.
-    if compat != 'ignore':
-        raise NotImplementedError
    if disposition != 'attachment':
        assert is_token(disposition)
    if is_token(filename):
        return '%s; filename=%s' % (disposition, filename)
+    rv = disposition
+    if filename_compat:
+        if is_token(filename_compat):
+            rv += '; filename=%s' % (filename_compat, )
+        else:
+            assert is_lws_safe(filename_compat)
+            rv += '; filename="%s"' % (qd_quote(filename_compat), )
    # alnum are already considered always-safe, but the rest isn't.
    # Python encodes ~ when it shouldn't, for example.
-    return "%s; filename*=utf-8''%s" % (disposition, quote(
+    rv += "; filename*=utf-8''%s" % (quote(
-        filename.encode('utf-8'), safe=attr_chars_nonalnum))
+        filename.encode('utf-8'), safe=attr_chars_nonalnum), )
+    # This will only encode filename, if it used non-ascii iso-8859-1.
+    return rv.encode('iso-8859-1')
 def test_cdfh():