Commit ed84c1b2 by Usman Khalid

Merge pull request #5246 from edx/usman/plat104-static-content-server

Improvements to byte range handling in StaticContentServer.
parents 44f952da 5af162eb
"""
Middleware to serve assets.
"""
import logging
from django.http import (
HttpResponse, HttpResponseNotModified, HttpResponseForbidden
)
......@@ -14,6 +20,7 @@ from xmodule.exceptions import NotFoundError
# TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need
# to change this file so instead of using course_id_partial, we're just using asset keys
log = logging.getLogger(__name__)
class StaticContentServer(object):
def process_request(self, request):
......@@ -82,53 +89,100 @@ class StaticContentServer(object):
# Add Content-Range in the response if Range is structurally correct
# Request -> Range attribute structure: "Range: bytes=first-[last]"
# Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
response = None
if request.META.get('HTTP_RANGE'):
# Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream)
if type(content) == StaticContent:
content = contentstore().find(loc, as_stream=True)
# Let's parse the Range header, bytes=first-[last]
range_header = request.META['HTTP_RANGE']
if '=' in range_header:
unit, byte_range = range_header.split('=')
# "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
if unit == 'bytes' and '-' in byte_range:
first, last = byte_range.split('-')
# "first" must be a valid integer
try:
first = int(first)
except ValueError:
pass
if type(first) is int:
# "last" default value is the last byte of the file
# Users can ask "bytes=0-" to request the whole file when they don't know the length
try:
last = int(last)
except ValueError:
last = content.length - 1
if 0 <= first <= last < content.length:
# Valid Range attribute
response = HttpResponse(content.stream_data_in_range(first, last))
response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
first=first, last=last, length=content.length
)
response['Content-Length'] = str(last - first + 1)
response.status_code = 206 # HTTP_206_PARTIAL_CONTENT
if not response:
# Malformed Range attribute
response = HttpResponse()
response.status_code = 400 # HTTP_400_BAD_REQUEST
return response
header_value = request.META['HTTP_RANGE']
try:
unit, ranges = parse_range_header(header_value, content.length)
except ValueError as exception:
# If the header field is syntactically invalid it should be ignored.
log.exception(
u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc)
)
else:
if unit != 'bytes':
# Only accept ranges in bytes
log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc))
elif len(ranges) > 1:
# According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
# But we send back the full content.
log.warning(
u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc)
)
else:
first, last = ranges[0]
else:
# No Range attribute
if 0 <= first <= last < content.length:
# If the byte range is satisfiable
response = HttpResponse(content.stream_data_in_range(first, last))
response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
first=first, last=last, length=content.length
)
response['Content-Length'] = str(last - first + 1)
response.status_code = 206 # Partial Content
else:
log.warning(
u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc)
)
return HttpResponse(status=416) # Requested Range Not Satisfiable
# If Range header is absent or syntactically invalid return a full content response.
if response is None:
response = HttpResponse(content.stream_data())
response['Content-Length'] = content.length
# "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
response['Accept-Ranges'] = 'bytes'
response['Content-Type'] = content.content_type
response['Last-Modified'] = last_modified_at_str
return response
def parse_range_header(header_value, content_length):
"""
Returns the unit and a list of (start, end) tuples of ranges.
Raises ValueError if header is syntactically invalid or does not contain a range.
See spec for details: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
"""
unit = None
ranges = []
if '=' in header_value:
unit, byte_ranges_string = header_value.split('=')
# Parse the byte ranges.
for byte_range_string in byte_ranges_string.split(','):
byte_range_string = byte_range_string.strip()
# Case 0:
if '-' not in byte_range_string: # Invalid syntax of header value.
raise ValueError('Invalid syntax.')
# Case 1: -500
elif byte_range_string.startswith('-'):
first = max(0, (content_length + int(byte_range_string)))
last = content_length - 1
# Case 2: 500-
elif byte_range_string.endswith('-'):
first = int(byte_range_string[0:-1])
last = content_length - 1
# Case 3: 500-999
else:
first, last = byte_range_string.split('-')
first = int(first)
last = min(int(last), content_length - 1)
ranges.append((first, last))
if len(ranges) == 0:
raise ValueError('Invalid syntax')
return unit, ranges
......@@ -2,27 +2,31 @@
Tests for StaticContentServer
"""
import copy
import ddt
import logging
import unittest
from uuid import uuid4
from django.conf import settings
from django.test.client import Client
from django.test.utils import override_settings
from student.models import CourseEnrollment
from xmodule.contentstore.django import contentstore
from xmodule.modulestore.django import modulestore
from opaque_keys.edx.locations import SlashSeparatedCourseKey
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
from xmodule.modulestore.xml_importer import import_from_xml
from contentserver.middleware import parse_range_header
from student.models import CourseEnrollment
log = logging.getLogger(__name__)
TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex
@ddt.ddt
@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE)
class ContentStoreToyCourseTest(ModuleStoreTestCase):
"""
......@@ -137,55 +141,95 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
first=first_byte, last=last_byte, length=self.length_unlocked))
self.assertEqual(resp['Content-Length'], str(last_byte - first_byte + 1))
def test_range_request_malformed_missing_equal(self):
"""
Test that a range request with malformed Range (missing '=') outputs status 400.
"""
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes 0-')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
def test_range_request_malformed_not_bytes(self):
def test_range_request_multiple_ranges(self):
"""
Test that a range request with malformed Range (not "bytes") outputs status 400.
"Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
Test that multiple ranges in request outputs the full content.
"""
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bits=0-')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
first_byte = self.length_unlocked / 4
last_byte = self.length_unlocked / 2
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}, -100'.format(
first=first_byte, last=last_byte)
)
def test_range_request_malformed_missing_minus(self):
"""
Test that a range request with malformed Range (missing '-') outputs status 400.
"""
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
self.assertEqual(resp.status_code, 200)
self.assertNotIn('Content-Range', resp)
self.assertEqual(resp['Content-Length'], str(self.length_unlocked))
def test_range_request_malformed_first_not_integer(self):
@ddt.data(
'bytes 0-',
'bits=0-',
'bytes=0',
'bytes=one-',
)
def test_syntax_errors_in_range(self, header_value):
"""
Test that a range request with malformed Range (first is not an integer) outputs status 400.
Test that syntactically invalid Range values result in a 200 OK full content response.
"""
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=one-')
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
resp = self.client.get(self.url_unlocked, HTTP_RANGE=header_value)
self.assertEqual(resp.status_code, 200)
self.assertNotIn('Content-Range', resp)
def test_range_request_malformed_invalid_range(self):
"""
Test that a range request with malformed Range (first_byte > last_byte) outputs status 400.
Test that a range request with malformed Range (first_byte > last_byte) outputs
416 Requested Range Not Satisfiable.
"""
first_byte = self.length_unlocked / 2
last_byte = self.length_unlocked / 4
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format(
first=first_byte, last=last_byte)
first=(self.length_unlocked / 2), last=(self.length_unlocked / 4))
)
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
self.assertEqual(resp.status_code, 416)
def test_range_request_malformed_out_of_bounds(self):
"""
Test that a range request with malformed Range (last_byte == totalLength, offset by 1 error)
outputs status 400.
Test that a range request with malformed Range (first_byte, last_byte == totalLength, offset by 1 error)
outputs 416 Requested Range Not Satisfiable.
"""
last_byte = self.length_unlocked
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0-{last}'.format(
last=last_byte)
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format(
first=(self.length_unlocked), last=(self.length_unlocked))
)
self.assertEqual(resp.status_code, 416)
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
@ddt.ddt
class ParseRangeHeaderTestCase(unittest.TestCase):
"""
Tests for the parse_range_header function.
"""
def setUp(self):
self.content_length = 10000
def test_bytes_unit(self):
unit, __ = parse_range_header('bytes=100-', self.content_length)
self.assertEqual(unit, 'bytes')
@ddt.data(
('bytes=100-', 1, [(100, 9999)]),
('bytes=1000-', 1, [(1000, 9999)]),
('bytes=100-199, 200-', 2, [(100, 199), (200, 9999)]),
('bytes=100-199, 200-499', 2, [(100, 199), (200, 499)]),
('bytes=-100', 1, [(9900, 9999)]),
('bytes=-100, -200', 2, [(9900, 9999), (9800, 9999)])
)
@ddt.unpack
def test_valid_syntax(self, header_value, excepted_ranges_length, expected_ranges):
__, ranges = parse_range_header(header_value, self.content_length)
self.assertEqual(len(ranges), excepted_ranges_length)
self.assertEqual(ranges, expected_ranges)
@ddt.data(
('bytes=one-20', ValueError, 'invalid literal for int()'),
('bytes=-one', ValueError, 'invalid literal for int()'),
('bytes=-', ValueError, 'invalid literal for int()'),
('bytes=--', ValueError, 'invalid literal for int()'),
('bytes', ValueError, 'Invalid syntax'),
('bytes=', ValueError, 'Invalid syntax'),
('bytes=0', ValueError, 'Invalid syntax'),
('bytes=0-10,0', ValueError, 'Invalid syntax'),
('bytes=0=', ValueError, 'too many values to unpack'),
)
@ddt.unpack
def test_invalid_syntax(self, header_value, exception_class, exception_message_regex):
self.assertRaisesRegexp(
exception_class, exception_message_regex, parse_range_header, header_value, self.content_length
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment