Commit ed84c1b2 by Usman Khalid

Merge pull request #5246 from edx/usman/plat104-static-content-server

Improvements to byte range handling in StaticContentServer.
parents 44f952da 5af162eb
"""
Middleware to serve assets.
"""
import logging
from django.http import ( from django.http import (
HttpResponse, HttpResponseNotModified, HttpResponseForbidden HttpResponse, HttpResponseNotModified, HttpResponseForbidden
) )
...@@ -14,6 +20,7 @@ from xmodule.exceptions import NotFoundError ...@@ -14,6 +20,7 @@ from xmodule.exceptions import NotFoundError
# TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need # TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need
# to change this file so instead of using course_id_partial, we're just using asset keys # to change this file so instead of using course_id_partial, we're just using asset keys
log = logging.getLogger(__name__)
class StaticContentServer(object): class StaticContentServer(object):
def process_request(self, request): def process_request(self, request):
...@@ -82,53 +89,100 @@ class StaticContentServer(object): ...@@ -82,53 +89,100 @@ class StaticContentServer(object):
# Add Content-Range in the response if Range is structurally correct # Add Content-Range in the response if Range is structurally correct
# Request -> Range attribute structure: "Range: bytes=first-[last]" # Request -> Range attribute structure: "Range: bytes=first-[last]"
# Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
response = None response = None
if request.META.get('HTTP_RANGE'): if request.META.get('HTTP_RANGE'):
# Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream) # Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream)
if type(content) == StaticContent: if type(content) == StaticContent:
content = contentstore().find(loc, as_stream=True) content = contentstore().find(loc, as_stream=True)
# Let's parse the Range header, bytes=first-[last] header_value = request.META['HTTP_RANGE']
range_header = request.META['HTTP_RANGE'] try:
if '=' in range_header: unit, ranges = parse_range_header(header_value, content.length)
unit, byte_range = range_header.split('=') except ValueError as exception:
# "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed # If the header field is syntactically invalid it should be ignored.
if unit == 'bytes' and '-' in byte_range: log.exception(
first, last = byte_range.split('-') u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc)
# "first" must be a valid integer )
try: else:
first = int(first) if unit != 'bytes':
except ValueError: # Only accept ranges in bytes
pass log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc))
if type(first) is int: elif len(ranges) > 1:
# "last" default value is the last byte of the file # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
# Users can ask "bytes=0-" to request the whole file when they don't know the length # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
try: # But we send back the full content.
last = int(last) log.warning(
except ValueError: u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc)
last = content.length - 1 )
else:
if 0 <= first <= last < content.length: first, last = ranges[0]
# Valid Range attribute
response = HttpResponse(content.stream_data_in_range(first, last))
response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
first=first, last=last, length=content.length
)
response['Content-Length'] = str(last - first + 1)
response.status_code = 206 # HTTP_206_PARTIAL_CONTENT
if not response:
# Malformed Range attribute
response = HttpResponse()
response.status_code = 400 # HTTP_400_BAD_REQUEST
return response
else: if 0 <= first <= last < content.length:
# No Range attribute # If the byte range is satisfiable
response = HttpResponse(content.stream_data_in_range(first, last))
response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
first=first, last=last, length=content.length
)
response['Content-Length'] = str(last - first + 1)
response.status_code = 206 # Partial Content
else:
log.warning(
u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc)
)
return HttpResponse(status=416) # Requested Range Not Satisfiable
# If Range header is absent or syntactically invalid return a full content response.
if response is None:
response = HttpResponse(content.stream_data()) response = HttpResponse(content.stream_data())
response['Content-Length'] = content.length response['Content-Length'] = content.length
# "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
response['Accept-Ranges'] = 'bytes' response['Accept-Ranges'] = 'bytes'
response['Content-Type'] = content.content_type response['Content-Type'] = content.content_type
response['Last-Modified'] = last_modified_at_str response['Last-Modified'] = last_modified_at_str
return response return response
def parse_range_header(header_value, content_length):
"""
Returns the unit and a list of (start, end) tuples of ranges.
Raises ValueError if header is syntactically invalid or does not contain a range.
See spec for details: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
"""
unit = None
ranges = []
if '=' in header_value:
unit, byte_ranges_string = header_value.split('=')
# Parse the byte ranges.
for byte_range_string in byte_ranges_string.split(','):
byte_range_string = byte_range_string.strip()
# Case 0:
if '-' not in byte_range_string: # Invalid syntax of header value.
raise ValueError('Invalid syntax.')
# Case 1: -500
elif byte_range_string.startswith('-'):
first = max(0, (content_length + int(byte_range_string)))
last = content_length - 1
# Case 2: 500-
elif byte_range_string.endswith('-'):
first = int(byte_range_string[0:-1])
last = content_length - 1
# Case 3: 500-999
else:
first, last = byte_range_string.split('-')
first = int(first)
last = min(int(last), content_length - 1)
ranges.append((first, last))
if len(ranges) == 0:
raise ValueError('Invalid syntax')
return unit, ranges
...@@ -2,27 +2,31 @@ ...@@ -2,27 +2,31 @@
Tests for StaticContentServer Tests for StaticContentServer
""" """
import copy import copy
import ddt
import logging import logging
import unittest
from uuid import uuid4 from uuid import uuid4
from django.conf import settings from django.conf import settings
from django.test.client import Client from django.test.client import Client
from django.test.utils import override_settings from django.test.utils import override_settings
from student.models import CourseEnrollment
from xmodule.contentstore.django import contentstore from xmodule.contentstore.django import contentstore
from xmodule.modulestore.django import modulestore from xmodule.modulestore.django import modulestore
from opaque_keys.edx.locations import SlashSeparatedCourseKey from opaque_keys.edx.locations import SlashSeparatedCourseKey
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
from xmodule.modulestore.xml_importer import import_from_xml from xmodule.modulestore.xml_importer import import_from_xml
from contentserver.middleware import parse_range_header
from student.models import CourseEnrollment
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE) TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex
@ddt.ddt
@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE) @override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE)
class ContentStoreToyCourseTest(ModuleStoreTestCase): class ContentStoreToyCourseTest(ModuleStoreTestCase):
""" """
...@@ -137,55 +141,95 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): ...@@ -137,55 +141,95 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
first=first_byte, last=last_byte, length=self.length_unlocked)) first=first_byte, last=last_byte, length=self.length_unlocked))
self.assertEqual(resp['Content-Length'], str(last_byte - first_byte + 1)) self.assertEqual(resp['Content-Length'], str(last_byte - first_byte + 1))
def test_range_request_malformed_missing_equal(self): def test_range_request_multiple_ranges(self):
"""
Test that a range request with malformed Range (missing '=') outputs status 400.
"""
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes 0-')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
def test_range_request_malformed_not_bytes(self):
""" """
Test that a range request with malformed Range (not "bytes") outputs status 400. Test that multiple ranges in request outputs the full content.
"Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
""" """
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bits=0-') first_byte = self.length_unlocked / 4
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST last_byte = self.length_unlocked / 2
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}, -100'.format(
first=first_byte, last=last_byte)
)
def test_range_request_malformed_missing_minus(self): self.assertEqual(resp.status_code, 200)
""" self.assertNotIn('Content-Range', resp)
Test that a range request with malformed Range (missing '-') outputs status 400. self.assertEqual(resp['Content-Length'], str(self.length_unlocked))
"""
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
def test_range_request_malformed_first_not_integer(self): @ddt.data(
'bytes 0-',
'bits=0-',
'bytes=0',
'bytes=one-',
)
def test_syntax_errors_in_range(self, header_value):
""" """
Test that a range request with malformed Range (first is not an integer) outputs status 400. Test that syntactically invalid Range values result in a 200 OK full content response.
""" """
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=one-') resp = self.client.get(self.url_unlocked, HTTP_RANGE=header_value)
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST self.assertEqual(resp.status_code, 200)
self.assertNotIn('Content-Range', resp)
def test_range_request_malformed_invalid_range(self): def test_range_request_malformed_invalid_range(self):
""" """
Test that a range request with malformed Range (first_byte > last_byte) outputs status 400. Test that a range request with malformed Range (first_byte > last_byte) outputs
416 Requested Range Not Satisfiable.
""" """
first_byte = self.length_unlocked / 2
last_byte = self.length_unlocked / 4
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format( resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format(
first=first_byte, last=last_byte) first=(self.length_unlocked / 2), last=(self.length_unlocked / 4))
) )
self.assertEqual(resp.status_code, 416)
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
def test_range_request_malformed_out_of_bounds(self): def test_range_request_malformed_out_of_bounds(self):
""" """
Test that a range request with malformed Range (last_byte == totalLength, offset by 1 error) Test that a range request with malformed Range (first_byte, last_byte == totalLength, offset by 1 error)
outputs status 400. outputs 416 Requested Range Not Satisfiable.
""" """
last_byte = self.length_unlocked resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format(
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0-{last}'.format( first=(self.length_unlocked), last=(self.length_unlocked))
last=last_byte)
) )
self.assertEqual(resp.status_code, 416)
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST @ddt.ddt
class ParseRangeHeaderTestCase(unittest.TestCase):
"""
Tests for the parse_range_header function.
"""
def setUp(self):
self.content_length = 10000
def test_bytes_unit(self):
unit, __ = parse_range_header('bytes=100-', self.content_length)
self.assertEqual(unit, 'bytes')
@ddt.data(
('bytes=100-', 1, [(100, 9999)]),
('bytes=1000-', 1, [(1000, 9999)]),
('bytes=100-199, 200-', 2, [(100, 199), (200, 9999)]),
('bytes=100-199, 200-499', 2, [(100, 199), (200, 499)]),
('bytes=-100', 1, [(9900, 9999)]),
('bytes=-100, -200', 2, [(9900, 9999), (9800, 9999)])
)
@ddt.unpack
def test_valid_syntax(self, header_value, excepted_ranges_length, expected_ranges):
__, ranges = parse_range_header(header_value, self.content_length)
self.assertEqual(len(ranges), excepted_ranges_length)
self.assertEqual(ranges, expected_ranges)
@ddt.data(
('bytes=one-20', ValueError, 'invalid literal for int()'),
('bytes=-one', ValueError, 'invalid literal for int()'),
('bytes=-', ValueError, 'invalid literal for int()'),
('bytes=--', ValueError, 'invalid literal for int()'),
('bytes', ValueError, 'Invalid syntax'),
('bytes=', ValueError, 'Invalid syntax'),
('bytes=0', ValueError, 'Invalid syntax'),
('bytes=0-10,0', ValueError, 'Invalid syntax'),
('bytes=0=', ValueError, 'too many values to unpack'),
)
@ddt.unpack
def test_invalid_syntax(self, header_value, exception_class, exception_message_regex):
self.assertRaisesRegexp(
exception_class, exception_message_regex, parse_range_header, header_value, self.content_length
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment