Commit 531347b4 by Toby Lawrence

Merge pull request #11277 from edx/PERF-251

Add the basis of configuring a cache TTL for course assets.
parents 7c8de429 01a9ad23
......@@ -306,6 +306,7 @@ simplefilter('ignore')
MIDDLEWARE_CLASSES = (
'request_cache.middleware.RequestCache',
'clean_headers.middleware.CleanHeadersMiddleware',
'django.middleware.cache.UpdateCacheMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
......@@ -749,6 +750,7 @@ INSTALLED_APPS = (
# For CMS
'contentstore',
'contentserver',
'course_creators',
'external_auth',
'student', # misleading name due to sharing with lms
......
"""
This middleware is used for cleaning headers from a response before it is sent to the end user.
Due to the nature of how middleware runs, a piece of middleware high in the chain cannot ensure
that response headers won't be present on the final response body, as middleware further down
the chain could be adding them.
This middleware is intended to sit as close as possible to the top of the list, so that it has
a chance on the reponse going out to strip the intended headers.
"""
def remove_headers_from_response(response, *headers):
"""Removes the given headers from the response using the clean_headers middleware."""
response.clean_headers = headers
"""
Middleware decorator for removing headers.
"""
from functools import wraps
def clean_headers(*headers):
"""
Decorator that removes any headers specified from the response.
Usage:
@clean_headers("Vary")
def myview(request):
...
The CleanHeadersMiddleware must be used and placed as closely as possible to the top
of the middleware chain, ideally after any caching middleware but before everything else.
This decorator is not safe for multiple uses: each call will overwrite any previously set values.
"""
def _decorator(func):
"""
Decorates the given function.
"""
@wraps(func)
def _inner(*args, **kwargs):
"""
Alters the response.
"""
response = func(*args, **kwargs)
response.clean_headers = headers
return response
return _inner
return _decorator
"""
Middleware used for cleaning headers from a response before it is sent to the end user.
"""
class CleanHeadersMiddleware(object):
"""
Middleware that can drop headers present in a response.
This can be used, for example, to remove headers i.e. drop any Vary headers to improve cache performance.
"""
def process_response(self, _request, response):
"""
Processes the given response, potentially stripping out any unwanted headers.
"""
if len(getattr(response, 'clean_headers', [])) > 0:
for header in response.clean_headers:
try:
del response[header]
except KeyError:
pass
return response
"""Tests for clean_headers decorator. """
from django.http import HttpResponse, HttpRequest
from django.test import TestCase
from clean_headers.decorators import clean_headers
def fake_view(_request):
"""Fake view that returns an empty response."""
return HttpResponse()
class TestCleanHeaders(TestCase):
"""Test the `clean_headers` decorator."""
def test_clean_headers(self):
request = HttpRequest()
wrapper = clean_headers('Vary', 'Accept-Encoding')
wrapped_view = wrapper(fake_view)
response = wrapped_view(request)
self.assertEqual(len(response.clean_headers), 2)
"""Tests for clean_headers middleware."""
from django.http import HttpResponse, HttpRequest
from django.test import TestCase
from clean_headers.middleware import CleanHeadersMiddleware
class TestCleanHeadersMiddlewareProcessResponse(TestCase):
"""Test the `clean_headers` middleware. """
def setUp(self):
super(TestCleanHeadersMiddlewareProcessResponse, self).setUp()
self.middleware = CleanHeadersMiddleware()
def test_cleans_intended_headers(self):
fake_request = HttpRequest()
fake_response = HttpResponse()
fake_response['Vary'] = 'Cookie'
fake_response['Accept-Encoding'] = 'gzip'
fake_response.clean_headers = ['Vary']
result = self.middleware.process_response(fake_request, fake_response)
self.assertNotIn('Vary', result)
self.assertEquals('gzip', result['Accept-Encoding'])
def test_does_not_mangle_undecorated_response(self):
fake_request = HttpRequest()
fake_response = HttpResponse()
fake_response['Vary'] = 'Cookie'
fake_response['Accept-Encoding'] = 'gzip'
result = self.middleware.process_response(fake_request, fake_response)
self.assertEquals('Cookie', result['Vary'])
self.assertEquals('gzip', result['Accept-Encoding'])
"""
Django admin page for CourseAssetCacheTtlConfig, which allows you to configure the TTL
that gets used when sending cachability headers back with request course assets.
"""
from django.contrib import admin
from config_models.admin import ConfigurationModelAdmin
from .models import CourseAssetCacheTtlConfig
class CourseAssetCacheTtlConfigAdmin(ConfigurationModelAdmin):
"""
Basic configuration for cache TTL.
"""
list_display = [
'cache_ttl'
]
def get_list_display(self, request):
"""
Restore default list_display behavior.
ConfigurationModelAdmin overrides this, but in a way that doesn't
respect the ordering. This lets us customize it the usual Django admin
way.
"""
return self.list_display
admin.site.register(CourseAssetCacheTtlConfig, CourseAssetCacheTtlConfigAdmin)
......@@ -4,11 +4,14 @@ Middleware to serve assets.
import logging
import datetime
from django.http import (
HttpResponse, HttpResponseNotModified, HttpResponseForbidden
)
HttpResponse, HttpResponseNotModified, HttpResponseForbidden,
HttpResponseBadRequest, HttpResponseNotFound)
from student.models import CourseEnrollment
from contentserver.models import CourseAssetCacheTtlConfig
from clean_headers import remove_headers_from_response
from xmodule.assetstore.assetmgr import AssetManager
from xmodule.contentstore.content import StaticContent, XASSET_LOCATION_TAG
from xmodule.modulestore import InvalidLocationError
......@@ -22,67 +25,43 @@ from xmodule.exceptions import NotFoundError
# to change this file so instead of using course_id_partial, we're just using asset keys
log = logging.getLogger(__name__)
HTTP_DATE_FORMAT = "%a, %d %b %Y %H:%M:%S GMT"
class StaticContentServer(object):
def process_request(self, request):
# look to see if the request is prefixed with an asset prefix tag
if (
request.path.startswith('/' + XASSET_LOCATION_TAG + '/') or
def is_asset_request(self, request):
"""Determines whether the given request is an asset request"""
return (
request.path.startswith('/' + XASSET_LOCATION_TAG + '/')
or
request.path.startswith('/' + AssetLocator.CANONICAL_NAMESPACE)
):
)
def process_request(self, request):
"""Process the given request"""
if self.is_asset_request(request):
# Make sure we can convert this request into a location.
if AssetLocator.CANONICAL_NAMESPACE in request.path:
request.path = request.path.replace('block/', 'block@', 1)
try:
loc = StaticContent.get_location_from_path(request.path)
except (InvalidLocationError, InvalidKeyError):
# return a 'Bad Request' to browser as we have a malformed Location
response = HttpResponse()
response.status_code = 400
return response
# first look in our cache so we don't have to round-trip to the DB
content = get_cached_content(loc)
if content is None:
# nope, not in cache, let's fetch from DB
try:
content = AssetManager.find(loc, as_stream=True)
except (ItemNotFoundError, NotFoundError):
response = HttpResponse()
response.status_code = 404
return response
# since we fetched it from DB, let's cache it going forward, but only if it's < 1MB
# this is because I haven't been able to find a means to stream data out of memcached
if content.length is not None:
if content.length < 1048576:
# since we've queried as a stream, let's read in the stream into memory to set in cache
content = content.copy_to_in_mem()
set_cached_content(content)
else:
# NOP here, but we may wish to add a "cache-hit" counter in the future
pass
# Check that user has access to content
if getattr(content, "locked", False):
if not hasattr(request, "user") or not request.user.is_authenticated():
return HttpResponseForbidden('Unauthorized')
if not request.user.is_staff:
if getattr(loc, 'deprecated', False) and not CourseEnrollment.is_enrolled_by_partial(
request.user, loc.course_key
):
return HttpResponseForbidden('Unauthorized')
if not getattr(loc, 'deprecated', False) and not CourseEnrollment.is_enrolled(
request.user, loc.course_key
):
return HttpResponseForbidden('Unauthorized')
# convert over the DB persistent last modified timestamp to a HTTP compatible
# timestamp, so we can simply compare the strings
last_modified_at_str = content.last_modified_at.strftime("%a, %d-%b-%Y %H:%M:%S GMT")
# see if the client has cached this content, if so then compare the
# timestamps, if they are the same then just return a 304 (Not Modified)
return HttpResponseBadRequest()
# Try and load the asset.
content = None
try:
content = self.load_asset_from_location(loc)
except (ItemNotFoundError, NotFoundError):
return HttpResponseNotFound()
# Check that user has access to the content.
if not self.is_user_authorized(request, content, loc):
return HttpResponseForbidden('Unauthorized')
# Figure out if the client sent us a conditional request, and let them know
# if this asset has changed since then.
last_modified_at_str = content.last_modified_at.strftime(HTTP_DATE_FORMAT)
if 'HTTP_IF_MODIFIED_SINCE' in request.META:
if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
if if_modified_since == last_modified_at_str:
......@@ -96,7 +75,7 @@ class StaticContentServer(object):
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
response = None
if request.META.get('HTTP_RANGE'):
# Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream)
# If we have a StaticContent, get a StaticContentStream. Can't manipulate the bytes otherwise.
if type(content) == StaticContent:
content = AssetManager.find(loc, as_stream=True)
......@@ -144,10 +123,89 @@ class StaticContentServer(object):
# "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
response['Accept-Ranges'] = 'bytes'
response['Content-Type'] = content.content_type
response['Last-Modified'] = last_modified_at_str
# Set any caching headers, and do any response cleanup needed. Based on how much
# middleware we have in place, there's no easy way to use the built-in Django
# utilities and properly sanitize and modify a response to ensure that it is as
# cacheable as possible, which is why we do it ourselves.
self.set_caching_headers(content, response)
return response
def set_caching_headers(self, content, response):
"""
Sets caching headers based on whether or not the asset is locked.
"""
is_locked = getattr(content, "locked", False)
# We want to signal to the end user's browser, and to any intermediate proxies/caches,
# whether or not this asset is cacheable. If we have a TTL configured, we inform the
# caller, for unlocked assets, how long they are allowed to cache it. Since locked
# assets should be restricted to enrolled students, we simply send headers that
# indicate there should be no caching whatsoever.
cache_ttl = CourseAssetCacheTtlConfig.get_cache_ttl()
if cache_ttl > 0 and not is_locked:
response['Expires'] = StaticContentServer.get_expiration_value(datetime.datetime.utcnow(), cache_ttl)
response['Cache-Control'] = "public, max-age={ttl}, s-maxage={ttl}".format(ttl=cache_ttl)
elif is_locked:
response['Cache-Control'] = "private, no-cache, no-store"
response['Last-Modified'] = content.last_modified_at.strftime(HTTP_DATE_FORMAT)
remove_headers_from_response(response, "Vary")
@staticmethod
def get_expiration_value(now, cache_ttl):
"""Generates an RFC1123 datetime string based on a future offset."""
expire_dt = now + datetime.timedelta(seconds=cache_ttl)
return expire_dt.strftime(HTTP_DATE_FORMAT)
def is_user_authorized(self, request, content, location):
"""
Determines whether or not the user for this request is authorized to view the given asset.
"""
is_locked = getattr(content, "locked", False)
if not is_locked:
return True
if not hasattr(request, "user") or not request.user.is_authenticated():
return False
if not request.user.is_staff:
deprecated = getattr(location, 'deprecated', False)
if deprecated and not CourseEnrollment.is_enrolled_by_partial(request.user, location.course_key):
return False
if not deprecated and not CourseEnrollment.is_enrolled(request.user, location.course_key):
return False
return True
def load_asset_from_location(self, location):
"""
Loads an asset based on its location, either retrieving it from a cache
or loading it directly from the contentstore.
"""
# See if we can load this item from cache.
content = get_cached_content(location)
if content is None:
# Not in cache, so just try and load it from the asset manager.
try:
content = AssetManager.find(location, as_stream=True)
except (ItemNotFoundError, NotFoundError):
raise
# Now that we fetched it, let's go ahead and try to cache it. We cap this at 1MB
# because it's the default for memcached and also we don't want to do too much
# buffering in memory when we're serving an actual request.
if content.length is not None and content.length < 1048576:
content = content.copy_to_in_mem()
set_cached_content(content)
return content
def parse_range_header(header_value, content_length):
"""
......
# -*- coding: utf-8 -*-
#pylint: skip-file
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
from django.conf import settings
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.CreateModel(
name='CourseAssetCacheTtlConfig',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('change_date', models.DateTimeField(auto_now_add=True, verbose_name='Change date')),
('enabled', models.BooleanField(default=False, verbose_name='Enabled')),
('cache_ttl', models.PositiveIntegerField(default=0, help_text=b'The time, in seconds, to report that a course asset is allowed to be cached for.')),
('changed_by', models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, editable=False, to=settings.AUTH_USER_MODEL, null=True, verbose_name='Changed by')),
],
),
]
"""
Models for contentserver
"""
from django.db.models.fields import PositiveIntegerField
from config_models.models import ConfigurationModel
class CourseAssetCacheTtlConfig(ConfigurationModel):
"""Configuration for the TTL of course assets."""
class Meta(object):
app_label = 'contentserver'
cache_ttl = PositiveIntegerField(
default=0,
help_text="The time, in seconds, to report that a course asset is allowed to be cached for."
)
@classmethod
def get_cache_ttl(cls):
"""Gets the cache TTL for course assets, if present"""
return cls.current().cache_ttl
def __repr__(self):
return '<CourseAssetCacheTtlConfig(cache_ttl={})>'.format(self.get_cache_ttl())
def __unicode__(self):
return unicode(repr(self))
......@@ -2,6 +2,8 @@
Tests for StaticContentServer
"""
import copy
import datetime
import ddt
import logging
import unittest
......@@ -10,6 +12,7 @@ from uuid import uuid4
from django.conf import settings
from django.test.client import Client
from django.test.utils import override_settings
from mock import patch
from xmodule.contentstore.django import contentstore
from xmodule.modulestore.django import modulestore
......@@ -17,7 +20,7 @@ from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.xml_importer import import_course_from_xml
from contentserver.middleware import parse_range_header
from contentserver.middleware import parse_range_header, HTTP_DATE_FORMAT, StaticContentServer
from student.models import CourseEnrollment
log = logging.getLogger(__name__)
......@@ -136,8 +139,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
first_byte = self.length_unlocked / 4
last_byte = self.length_unlocked / 2
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format(
first=first_byte, last=last_byte)
)
first=first_byte, last=last_byte))
self.assertEqual(resp.status_code, 206) # HTTP_206_PARTIAL_CONTENT
self.assertEqual(resp['Content-Range'], 'bytes {first}-{last}/{length}'.format(
......@@ -151,8 +153,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
first_byte = self.length_unlocked / 4
last_byte = self.length_unlocked / 2
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}, -100'.format(
first=first_byte, last=last_byte)
)
first=first_byte, last=last_byte))
self.assertEqual(resp.status_code, 200)
self.assertNotIn('Content-Range', resp)
......@@ -178,8 +179,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
416 Requested Range Not Satisfiable.
"""
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format(
first=(self.length_unlocked / 2), last=(self.length_unlocked / 4))
)
first=(self.length_unlocked / 2), last=(self.length_unlocked / 4)))
self.assertEqual(resp.status_code, 416)
def test_range_request_malformed_out_of_bounds(self):
......@@ -188,10 +188,88 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
outputs 416 Requested Range Not Satisfiable.
"""
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format(
first=(self.length_unlocked), last=(self.length_unlocked))
)
first=(self.length_unlocked), last=(self.length_unlocked)))
self.assertEqual(resp.status_code, 416)
@patch('contentserver.models.CourseAssetCacheTtlConfig.get_cache_ttl')
def test_cache_headers_with_ttl_unlocked(self, mock_get_cache_ttl):
"""
Tests that when a cache TTL is set, an unlocked asset will be sent back with
the correct cache control/expires headers.
"""
mock_get_cache_ttl.return_value = 10
resp = self.client.get(self.url_unlocked)
self.assertEqual(resp.status_code, 200)
self.assertIn('Expires', resp)
self.assertEquals('public, max-age=10, s-maxage=10', resp['Cache-Control'])
@patch('contentserver.models.CourseAssetCacheTtlConfig.get_cache_ttl')
def test_cache_headers_with_ttl_locked(self, mock_get_cache_ttl):
"""
Tests that when a cache TTL is set, a locked asset will be sent back without
any cache control/expires headers.
"""
mock_get_cache_ttl.return_value = 10
CourseEnrollment.enroll(self.non_staff_usr, self.course_key)
self.assertTrue(CourseEnrollment.is_enrolled(self.non_staff_usr, self.course_key))
self.client.login(username=self.non_staff_usr, password=self.non_staff_pwd)
resp = self.client.get(self.url_locked)
self.assertEqual(resp.status_code, 200)
self.assertNotIn('Expires', resp)
self.assertEquals('private, no-cache, no-store', resp['Cache-Control'])
@patch('contentserver.models.CourseAssetCacheTtlConfig.get_cache_ttl')
def test_cache_headers_without_ttl_unlocked(self, mock_get_cache_ttl):
"""
Tests that when a cache TTL is not set, an unlocked asset will be sent back without
any cache control/expires headers.
"""
mock_get_cache_ttl.return_value = 0
resp = self.client.get(self.url_unlocked)
self.assertEqual(resp.status_code, 200)
self.assertNotIn('Expires', resp)
self.assertNotIn('Cache-Control', resp)
@patch('contentserver.models.CourseAssetCacheTtlConfig.get_cache_ttl')
def test_cache_headers_without_ttl_locked(self, mock_get_cache_ttl):
"""
Tests that when a cache TTL is not set, a locked asset will be sent back with a
cache-control header that indicates this asset should not be cached.
"""
mock_get_cache_ttl.return_value = 0
CourseEnrollment.enroll(self.non_staff_usr, self.course_key)
self.assertTrue(CourseEnrollment.is_enrolled(self.non_staff_usr, self.course_key))
self.client.login(username=self.non_staff_usr, password=self.non_staff_pwd)
resp = self.client.get(self.url_locked)
self.assertEqual(resp.status_code, 200)
self.assertNotIn('Expires', resp)
self.assertEquals('private, no-cache, no-store', resp['Cache-Control'])
def test_get_expiration_value(self):
start_dt = datetime.datetime.strptime("Thu, 01 Dec 1983 20:00:00 GMT", HTTP_DATE_FORMAT)
near_expire_dt = StaticContentServer.get_expiration_value(start_dt, 55)
self.assertEqual("Thu, 01 Dec 1983 20:00:55 GMT", near_expire_dt)
def test_response_no_vary_header_unlocked(self):
resp = self.client.get(self.url_unlocked)
self.assertEqual(resp.status_code, 200)
self.assertNotIn('Vary', resp)
def test_response_no_vary_header_locked(self):
CourseEnrollment.enroll(self.non_staff_usr, self.course_key)
self.assertTrue(CourseEnrollment.is_enrolled(self.non_staff_usr, self.course_key))
self.client.login(username=self.non_staff_usr, password=self.non_staff_pwd)
resp = self.client.get(self.url_locked)
self.assertEqual(resp.status_code, 200)
self.assertNotIn('Vary', resp)
@ddt.ddt
class ParseRangeHeaderTestCase(unittest.TestCase):
......
......@@ -1072,6 +1072,7 @@ simplefilter('ignore')
MIDDLEWARE_CLASSES = (
'request_cache.middleware.RequestCache',
'clean_headers.middleware.CleanHeadersMiddleware',
'microsite_configuration.middleware.MicrositeMiddleware',
'django_comment_client.middleware.AjaxExceptionMiddleware',
'django.middleware.common.CommonMiddleware',
......@@ -1756,6 +1757,9 @@ INSTALLED_APPS = (
'pipeline',
'static_replace',
# For content serving
'contentserver',
# Theming
'openedx.core.djangoapps.theming',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment