Commit ec508f59 by Usman Khalid

StaticContentServer middleware conforms closer to spec for byte range requests.

This commit makes the following changes:
1. If there are syntactic errors in the Range header, it is ignored and
the full content is returned. This conforms to the spec.
2. In case of multiple ranges in the header, the full content is returned.
This behavior differs from the spec which says that a multipart response
should be returned.

PLAT-104
parent 83c54d4e
"""
Middleware to serve assets.
"""
import logging
from django.http import ( from django.http import (
HttpResponse, HttpResponseNotModified, HttpResponseForbidden HttpResponse, HttpResponseNotModified, HttpResponseForbidden
) )
...@@ -14,6 +20,7 @@ from xmodule.exceptions import NotFoundError ...@@ -14,6 +20,7 @@ from xmodule.exceptions import NotFoundError
# TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need # TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need
# to change this file so instead of using course_id_partial, we're just using asset keys # to change this file so instead of using course_id_partial, we're just using asset keys
log = logging.getLogger(__name__)
class StaticContentServer(object): class StaticContentServer(object):
def process_request(self, request): def process_request(self, request):
...@@ -82,53 +89,100 @@ class StaticContentServer(object): ...@@ -82,53 +89,100 @@ class StaticContentServer(object):
# Add Content-Range in the response if Range is structurally correct # Add Content-Range in the response if Range is structurally correct
# Request -> Range attribute structure: "Range: bytes=first-[last]" # Request -> Range attribute structure: "Range: bytes=first-[last]"
# Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
response = None response = None
if request.META.get('HTTP_RANGE'): if request.META.get('HTTP_RANGE'):
# Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream) # Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream)
if type(content) == StaticContent: if type(content) == StaticContent:
content = contentstore().find(loc, as_stream=True) content = contentstore().find(loc, as_stream=True)
# Let's parse the Range header, bytes=first-[last] header_value = request.META['HTTP_RANGE']
range_header = request.META['HTTP_RANGE'] try:
if '=' in range_header: unit, ranges = parse_range_header(header_value, content.length)
unit, byte_range = range_header.split('=') except ValueError as exception:
# "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed # If the header field is syntactically invalid it should be ignored.
if unit == 'bytes' and '-' in byte_range: log.exception(
first, last = byte_range.split('-') u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc)
# "first" must be a valid integer )
try: else:
first = int(first) if unit != 'bytes':
except ValueError: # Only accept ranges in bytes
pass log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc))
if type(first) is int: elif len(ranges) > 1:
# "last" default value is the last byte of the file # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
# Users can ask "bytes=0-" to request the whole file when they don't know the length # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
try: # But we send back the full content.
last = int(last) log.warning(
except ValueError: u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc)
last = content.length - 1 )
else:
if 0 <= first <= last < content.length: first, last = ranges[0]
# Valid Range attribute
response = HttpResponse(content.stream_data_in_range(first, last))
response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
first=first, last=last, length=content.length
)
response['Content-Length'] = str(last - first + 1)
response.status_code = 206 # HTTP_206_PARTIAL_CONTENT
if not response:
# Malformed Range attribute
response = HttpResponse()
response.status_code = 400 # HTTP_400_BAD_REQUEST
return response
else: if 0 <= first <= last < content.length:
# No Range attribute # If the byte range is satisfiable
response = HttpResponse(content.stream_data_in_range(first, last))
response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
first=first, last=last, length=content.length
)
response['Content-Length'] = str(last - first + 1)
response.status_code = 206 # Partial Content
else:
log.warning(
u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc)
)
return HttpResponse(status=416) # Requested Range Not Satisfiable
# If Range header is absent or syntactically invalid return a full content response.
if response is None:
response = HttpResponse(content.stream_data()) response = HttpResponse(content.stream_data())
response['Content-Length'] = content.length response['Content-Length'] = content.length
# "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
response['Accept-Ranges'] = 'bytes' response['Accept-Ranges'] = 'bytes'
response['Content-Type'] = content.content_type response['Content-Type'] = content.content_type
response['Last-Modified'] = last_modified_at_str response['Last-Modified'] = last_modified_at_str
return response return response
def parse_range_header(header_value, content_length):
"""
Returns the unit and a list of (start, end) tuples of ranges.
Raises ValueError if header is syntactically invalid or does not contain a range.
See spec for details: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
"""
unit = None
ranges = []
if '=' in header_value:
unit, byte_ranges_string = header_value.split('=')
# Parse the byte ranges.
for byte_range_string in byte_ranges_string.split(','):
byte_range_string = byte_range_string.strip()
# Case 0:
if '-' not in byte_range_string: # Invalid syntax of header value.
raise ValueError('Invalid syntax.')
# Case 1: -500
elif byte_range_string.startswith('-'):
first = max(0, (content_length + int(byte_range_string)))
last = content_length - 1
# Case 2: 500-
elif byte_range_string.endswith('-'):
first = int(byte_range_string[0:-1])
last = content_length - 1
# Case 3: 500-999
else:
first, last = byte_range_string.split('-')
first = int(first)
last = min(int(last), content_length - 1)
ranges.append((first, last))
if len(ranges) == 0:
raise ValueError('Invalid syntax')
return unit, ranges
...@@ -137,38 +137,57 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): ...@@ -137,38 +137,57 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
first=first_byte, last=last_byte, length=self.length_unlocked)) first=first_byte, last=last_byte, length=self.length_unlocked))
self.assertEqual(resp['Content-Length'], str(last_byte - first_byte + 1)) self.assertEqual(resp['Content-Length'], str(last_byte - first_byte + 1))
def test_range_request_multiple_ranges(self):
"""
Test that multiple ranges in request outputs the full content.
"""
first_byte = self.length_unlocked / 4
last_byte = self.length_unlocked / 2
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}, -100'.format(
first=first_byte, last=last_byte)
)
self.assertEqual(resp.status_code, 200)
self.assertNotIn('Content-Range', resp)
self.assertEqual(resp['Content-Length'], str(self.length_unlocked))
def test_range_request_malformed_missing_equal(self): def test_range_request_malformed_missing_equal(self):
""" """
Test that a range request with malformed Range (missing '=') outputs status 400. Test that a range request with malformed Range (missing '=') outputs a 200 OK full content response.
""" """
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes 0-') resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes 0-')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST self.assertEqual(resp.status_code, 200)
self.assertNotIn('Content-Range', resp)
def test_range_request_malformed_not_bytes(self): def test_range_request_malformed_not_bytes(self):
""" """
Test that a range request with malformed Range (not "bytes") outputs status 400. Test that a range request with malformed Range (not "bytes") outputs a 200 OK full content response.
"Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
""" """
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bits=0-') resp = self.client.get(self.url_unlocked, HTTP_RANGE='bits=0-')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST self.assertEqual(resp.status_code, 200)
self.assertNotIn('Content-Range', resp)
def test_range_request_malformed_missing_minus(self): def test_range_request_malformed_missing_minus(self):
""" """
Test that a range request with malformed Range (missing '-') outputs status 400. Test that a range request with malformed Range (missing '-') outputs a 200 OK full content response.
""" """
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0') resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST self.assertEqual(resp.status_code, 200)
self.assertNotIn('Content-Range', resp)
def test_range_request_malformed_first_not_integer(self): def test_range_request_malformed_first_not_integer(self):
""" """
Test that a range request with malformed Range (first is not an integer) outputs status 400. Test that a range request with malformed Range (first is not an integer) outputs a 200 OK full content response.
""" """
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=one-') resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=one-')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST self.assertEqual(resp.status_code, 200)
self.assertNotIn('Content-Range', resp)
def test_range_request_malformed_invalid_range(self): def test_range_request_malformed_invalid_range(self):
""" """
Test that a range request with malformed Range (first_byte > last_byte) outputs status 400. Test that a range request with malformed Range (first_byte > last_byte) outputs
416 Requested Range Not Satisfiable.
""" """
first_byte = self.length_unlocked / 2 first_byte = self.length_unlocked / 2
last_byte = self.length_unlocked / 4 last_byte = self.length_unlocked / 4
...@@ -176,16 +195,16 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): ...@@ -176,16 +195,16 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
first=first_byte, last=last_byte) first=first_byte, last=last_byte)
) )
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST self.assertEqual(resp.status_code, 416)
def test_range_request_malformed_out_of_bounds(self): def test_range_request_malformed_out_of_bounds(self):
""" """
Test that a range request with malformed Range (last_byte == totalLength, offset by 1 error) Test that a range request with malformed Range (first_byte, last_byte == totalLength, offset by 1 error)
outputs status 400. outputs 416 Requested Range Not Satisfiable.
""" """
last_byte = self.length_unlocked last_byte = self.length_unlocked
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0-{last}'.format( resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format(
last=last_byte) first=last_byte, last=last_byte)
) )
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST self.assertEqual(resp.status_code, 416)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment