middleware.py 8.79 KB
Newer Older
1 2 3 4 5 6
"""
Middleware to serve assets.
"""

import logging

7 8 9
from django.http import (
    HttpResponse, HttpResponseNotModified, HttpResponseForbidden
)
10
from student.models import CourseEnrollment
11 12

from xmodule.contentstore.django import contentstore
13
from xmodule.contentstore.content import StaticContent, XASSET_LOCATION_TAG
14 15
from xmodule.modulestore import InvalidLocationError
from opaque_keys import InvalidKeyError
16
from opaque_keys.edx.locator import AssetLocator
17 18 19
from cache_toolbox.core import get_cached_content, set_cached_content
from xmodule.exceptions import NotFoundError

20 21
# TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need
# to change this file so instead of using course_id_partial, we're just using asset keys
22

23
log = logging.getLogger(__name__)
24

25 26
class StaticContentServer(object):
    def process_request(self, request):
27 28 29 30 31
        # look to see if the request is prefixed with an asset prefix tag
        if (
            request.path.startswith('/' + XASSET_LOCATION_TAG + '/') or
            request.path.startswith('/' + AssetLocator.CANONICAL_NAMESPACE)
        ):
32 33
            try:
                loc = StaticContent.get_location_from_path(request.path)
34
            except (InvalidLocationError, InvalidKeyError):
35
                # return a 'Bad Request' to browser as we have a malformed Location
36
                response = HttpResponse()
37
                response.status_code = 400
38
                return response
39

40
            # first look in our cache so we don't have to round-trip to the DB
Julian Arni committed
41
            content = get_cached_content(loc)
42 43 44
            if content is None:
                # nope, not in cache, let's fetch from DB
                try:
45
                    content = contentstore().find(loc, as_stream=True)
46
                except NotFoundError:
47 48 49
                    response = HttpResponse()
                    response.status_code = 404
                    return response
50

51 52 53 54 55 56 57
                # since we fetched it from DB, let's cache it going forward, but only if it's < 1MB
                # this is because I haven't been able to find a means to stream data out of memcached
                if content.length is not None:
                    if content.length < 1048576:
                        # since we've queried as a stream, let's read in the stream into memory to set in cache
                        content = content.copy_to_in_mem()
                        set_cached_content(content)
58
            else:
59
                # NOP here, but we may wish to add a "cache-hit" counter in the future
60
                pass
61

62 63 64
            # Check that user has access to content
            if getattr(content, "locked", False):
                if not hasattr(request, "user") or not request.user.is_authenticated():
Julian Arni committed
65
                    return HttpResponseForbidden('Unauthorized')
Don Mitchell committed
66 67
                if not request.user.is_staff:
                    if getattr(loc, 'deprecated', False) and not CourseEnrollment.is_enrolled_by_partial(
68
                        request.user, loc.course_key
Don Mitchell committed
69 70 71 72 73 74
                    ):
                        return HttpResponseForbidden('Unauthorized')
                    if not getattr(loc, 'deprecated', False) and not CourseEnrollment.is_enrolled(
                        request.user, loc.course_key
                    ):
                        return HttpResponseForbidden('Unauthorized')
75 76

            # convert over the DB persistent last modified timestamp to a HTTP compatible
77
            # timestamp, so we can simply compare the strings
78 79 80 81 82 83 84 85 86
            last_modified_at_str = content.last_modified_at.strftime("%a, %d-%b-%Y %H:%M:%S GMT")

            # see if the client has cached this content, if so then compare the
            # timestamps, if they are the same then just return a 304 (Not Modified)
            if 'HTTP_IF_MODIFIED_SINCE' in request.META:
                if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
                if if_modified_since == last_modified_at_str:
                    return HttpResponseNotModified()

87 88 89 90 91
            # *** File streaming within a byte range ***
            # If a Range is provided, parse Range attribute of the request
            # Add Content-Range in the response if Range is structurally correct
            # Request -> Range attribute structure: "Range: bytes=first-[last]"
            # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
92
            # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
93 94 95 96 97 98
            response = None
            if request.META.get('HTTP_RANGE'):
                # Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream)
                if type(content) == StaticContent:
                    content = contentstore().find(loc, as_stream=True)

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
                header_value = request.META['HTTP_RANGE']
                try:
                    unit, ranges = parse_range_header(header_value, content.length)
                except ValueError as exception:
                    # If the header field is syntactically invalid it should be ignored.
                    log.exception(
                        u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc)
                    )
                else:
                    if unit != 'bytes':
                        # Only accept ranges in bytes
                        log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc))
                    elif len(ranges) > 1:
                        # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
                        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
                        # But we send back the full content.
                        log.warning(
                            u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc)
                        )
                    else:
                        first, last = ranges[0]
120

121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
                        if 0 <= first <= last < content.length:
                            # If the byte range is satisfiable
                            response = HttpResponse(content.stream_data_in_range(first, last))
                            response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
                                first=first, last=last, length=content.length
                            )
                            response['Content-Length'] = str(last - first + 1)
                            response.status_code = 206  # Partial Content
                        else:
                            log.warning(
                                u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc)
                            )
                            return HttpResponse(status=416)  # Requested Range Not Satisfiable

            # If Range header is absent or syntactically invalid return a full content response.
            if response is None:
137 138 139
                response = HttpResponse(content.stream_data())
                response['Content-Length'] = content.length

140
            # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
141 142
            response['Accept-Ranges'] = 'bytes'
            response['Content-Type'] = content.content_type
143 144
            response['Last-Modified'] = last_modified_at_str

145
            return response
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188


def parse_range_header(header_value, content_length):
    """
    Returns the unit and a list of (start, end) tuples of ranges.

    Raises ValueError if header is syntactically invalid or does not contain a range.

    See spec for details: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
    """

    unit = None
    ranges = []

    if '=' in header_value:
        unit, byte_ranges_string = header_value.split('=')

        # Parse the byte ranges.
        for byte_range_string in byte_ranges_string.split(','):
            byte_range_string = byte_range_string.strip()
            # Case 0:
            if '-' not in byte_range_string:  # Invalid syntax of header value.
                raise ValueError('Invalid syntax.')
            # Case 1: -500
            elif byte_range_string.startswith('-'):
                first = max(0, (content_length + int(byte_range_string)))
                last = content_length - 1
            # Case 2: 500-
            elif byte_range_string.endswith('-'):
                first = int(byte_range_string[0:-1])
                last = content_length - 1
            # Case 3: 500-999
            else:
                first, last = byte_range_string.split('-')
                first = int(first)
                last = min(int(last), content_length - 1)

            ranges.append((first, last))

    if len(ranges) == 0:
        raise ValueError('Invalid syntax')

    return unit, ranges