""" Middleware to serve assets. """ import logging from django.http import ( HttpResponse, HttpResponseNotModified, HttpResponseForbidden ) from student.models import CourseEnrollment from xmodule.assetstore.assetmgr import AssetManager from xmodule.contentstore.content import StaticContent, XASSET_LOCATION_TAG from xmodule.modulestore import InvalidLocationError from opaque_keys import InvalidKeyError from opaque_keys.edx.locator import AssetLocator from cache_toolbox.core import get_cached_content, set_cached_content from xmodule.modulestore.exceptions import ItemNotFoundError from xmodule.exceptions import NotFoundError # TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need # to change this file so instead of using course_id_partial, we're just using asset keys log = logging.getLogger(__name__) class StaticContentServer(object): def process_request(self, request): # look to see if the request is prefixed with an asset prefix tag if ( request.path.startswith('/' + XASSET_LOCATION_TAG + '/') or request.path.startswith('/' + AssetLocator.CANONICAL_NAMESPACE) ): if AssetLocator.CANONICAL_NAMESPACE in request.path: request.path = request.path.replace('block/', 'block@', 1) try: loc = StaticContent.get_location_from_path(request.path) except (InvalidLocationError, InvalidKeyError): # return a 'Bad Request' to browser as we have a malformed Location response = HttpResponse() response.status_code = 400 return response # first look in our cache so we don't have to round-trip to the DB content = get_cached_content(loc) if content is None: # nope, not in cache, let's fetch from DB try: content = AssetManager.find(loc, as_stream=True) except (ItemNotFoundError, NotFoundError): response = HttpResponse() response.status_code = 404 return response # since we fetched it from DB, let's cache it going forward, but only if it's < 1MB # this is because I haven't been able to find a means to stream data out of memcached if content.length is not None: if content.length < 1048576: # since we've queried as a stream, let's read in the stream into memory to set in cache content = content.copy_to_in_mem() set_cached_content(content) else: # NOP here, but we may wish to add a "cache-hit" counter in the future pass # Check that user has access to content if getattr(content, "locked", False): if not hasattr(request, "user") or not request.user.is_authenticated(): return HttpResponseForbidden('Unauthorized') if not request.user.is_staff: if getattr(loc, 'deprecated', False) and not CourseEnrollment.is_enrolled_by_partial( request.user, loc.course_key ): return HttpResponseForbidden('Unauthorized') if not getattr(loc, 'deprecated', False) and not CourseEnrollment.is_enrolled( request.user, loc.course_key ): return HttpResponseForbidden('Unauthorized') # convert over the DB persistent last modified timestamp to a HTTP compatible # timestamp, so we can simply compare the strings last_modified_at_str = content.last_modified_at.strftime("%a, %d-%b-%Y %H:%M:%S GMT") # see if the client has cached this content, if so then compare the # timestamps, if they are the same then just return a 304 (Not Modified) if 'HTTP_IF_MODIFIED_SINCE' in request.META: if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE'] if if_modified_since == last_modified_at_str: return HttpResponseNotModified() # *** File streaming within a byte range *** # If a Range is provided, parse Range attribute of the request # Add Content-Range in the response if Range is structurally correct # Request -> Range attribute structure: "Range: bytes=first-[last]" # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 response = None if request.META.get('HTTP_RANGE'): # Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream) if type(content) == StaticContent: content = AssetManager.find(loc, as_stream=True) header_value = request.META['HTTP_RANGE'] try: unit, ranges = parse_range_header(header_value, content.length) except ValueError as exception: # If the header field is syntactically invalid it should be ignored. log.exception( u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc) ) else: if unit != 'bytes': # Only accept ranges in bytes log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc)) elif len(ranges) > 1: # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16 # But we send back the full content. log.warning( u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc) ) else: first, last = ranges[0] if 0 <= first <= last < content.length: # If the byte range is satisfiable response = HttpResponse(content.stream_data_in_range(first, last)) response['Content-Range'] = 'bytes {first}-{last}/{length}'.format( first=first, last=last, length=content.length ) response['Content-Length'] = str(last - first + 1) response.status_code = 206 # Partial Content else: log.warning( u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc) ) return HttpResponse(status=416) # Requested Range Not Satisfiable # If Range header is absent or syntactically invalid return a full content response. if response is None: response = HttpResponse(content.stream_data()) response['Content-Length'] = content.length # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed response['Accept-Ranges'] = 'bytes' response['Content-Type'] = content.content_type response['Last-Modified'] = last_modified_at_str return response def parse_range_header(header_value, content_length): """ Returns the unit and a list of (start, end) tuples of ranges. Raises ValueError if header is syntactically invalid or does not contain a range. See spec for details: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 """ unit = None ranges = [] if '=' in header_value: unit, byte_ranges_string = header_value.split('=') # Parse the byte ranges. for byte_range_string in byte_ranges_string.split(','): byte_range_string = byte_range_string.strip() # Case 0: if '-' not in byte_range_string: # Invalid syntax of header value. raise ValueError('Invalid syntax.') # Case 1: -500 elif byte_range_string.startswith('-'): first = max(0, (content_length + int(byte_range_string))) last = content_length - 1 # Case 2: 500- elif byte_range_string.endswith('-'): first = int(byte_range_string[0:-1]) last = content_length - 1 # Case 3: 500-999 else: first, last = byte_range_string.split('-') first = int(first) last = min(int(last), content_length - 1) ranges.append((first, last)) if len(ranges) == 0: raise ValueError('Invalid syntax') return unit, ranges