Commit 9967b6fd by Kevin Falcone

Revert "[PERF-325] Add versioned course asset URLs when canonicalizing asset paths."

We're seeing errors in NR from objects read out of the cache lacking the
'StaticContent' object has no attribute 'content_digest'
File "/edx/app/edxapp/edx-platform/common/djangoapps/contentserver/middleware.py",
    line 70, in process_request

This reverts commit 849ebc5f.
parent 8f69f62c
......@@ -1000,7 +1000,7 @@ class MiscCourseTests(ContentStoreTestCase):
3) computing thumbnail location of asset
4) deleting the asset from the course
"""
asset_key = self.course.id.make_asset_key('asset', 'sample_static.html')
asset_key = self.course.id.make_asset_key('asset', 'sample_static.txt')
content = StaticContent(
asset_key, "Fake asset", "application/text", "test",
)
......@@ -1072,7 +1072,7 @@ class MiscCourseTests(ContentStoreTestCase):
draft content is also deleted
"""
# add an asset
asset_key = self.course.id.make_asset_key('asset', 'sample_static.html')
asset_key = self.course.id.make_asset_key('asset', 'sample_static.txt')
content = StaticContent(
asset_key, "Fake asset", "application/text", "test",
)
......
......@@ -121,7 +121,7 @@ class CourseTestCase(ProceduralCourseTestMixin, ModuleStoreTestCase):
SEQUENTIAL = 'vertical_sequential'
DRAFT_HTML = 'draft_html'
DRAFT_VIDEO = 'draft_video'
LOCKED_ASSET_KEY = AssetLocation.from_deprecated_string('/c4x/edX/toy/asset/sample_static.html')
LOCKED_ASSET_KEY = AssetLocation.from_deprecated_string('/c4x/edX/toy/asset/sample_static.txt')
def import_and_populate_course(self):
"""
......
......@@ -126,7 +126,7 @@ class BasicAssetsTestCase(AssetsTestCase):
)
course = module_store.get_course(course_id)
filename = 'sample_static.html'
filename = 'sample_static.txt'
html_src_attribute = '"/static/{}"'.format(filename)
asset_url = replace_static_urls(html_src_attribute, course_id=course.id)
url = asset_url.replace('"', '')
......@@ -379,7 +379,7 @@ class LockAssetTestCase(AssetsTestCase):
"""
def verify_asset_locked_state(locked):
""" Helper method to verify lock state in the contentstore """
asset_location = StaticContent.get_location_from_path('/c4x/edX/toy/asset/sample_static.html')
asset_location = StaticContent.get_location_from_path('/c4x/edX/toy/asset/sample_static.txt')
content = contentstore().find(asset_location)
self.assertEqual(content.locked, locked)
......@@ -387,14 +387,14 @@ class LockAssetTestCase(AssetsTestCase):
""" Helper method for posting asset update. """
content_type = 'application/txt'
upload_date = datetime(2013, 6, 1, 10, 30, tzinfo=UTC)
asset_location = course.id.make_asset_key('asset', 'sample_static.html')
asset_location = course.id.make_asset_key('asset', 'sample_static.txt')
url = reverse_course_url('assets_handler', course.id, kwargs={'asset_key_string': unicode(asset_location)})
resp = self.client.post(
url,
# pylint: disable=protected-access
json.dumps(assets._get_asset_json(
"sample_static.html", content_type, upload_date, asset_location, None, lock)),
"sample_static.txt", content_type, upload_date, asset_location, None, lock)),
"application/json"
)
......
......@@ -3,11 +3,12 @@ Middleware to serve assets.
"""
import logging
import datetime
import newrelic.agent
from django.http import (
HttpResponse, HttpResponseNotModified, HttpResponseForbidden,
HttpResponseBadRequest, HttpResponseNotFound, HttpResponsePermanentRedirect)
HttpResponseBadRequest, HttpResponseNotFound)
from student.models import CourseEnrollment
from contentserver.models import CourseAssetCacheTtlConfig, CdnUserAgentsConfig
......@@ -29,54 +30,32 @@ HTTP_DATE_FORMAT = "%a, %d %b %Y %H:%M:%S GMT"
class StaticContentServer(object):
"""
Serves course assets to end users. Colloquially referred to as "contentserver."
"""
def is_asset_request(self, request):
"""Determines whether the given request is an asset request"""
return (
request.path.startswith('/' + XASSET_LOCATION_TAG + '/')
or
request.path.startswith('/' + AssetLocator.CANONICAL_NAMESPACE)
or
StaticContent.is_versioned_asset_path(request.path)
)
def process_request(self, request):
"""Process the given request"""
asset_path = request.path
if self.is_asset_request(request):
# Make sure we can convert this request into a location.
if AssetLocator.CANONICAL_NAMESPACE in asset_path:
asset_path = asset_path.replace('block/', 'block@', 1)
# If this is a versioned request, pull out the digest and chop off the prefix.
requested_digest = None
if StaticContent.is_versioned_asset_path(asset_path):
requested_digest, asset_path = StaticContent.parse_versioned_asset_path(asset_path)
# Make sure we have a valid location value for this asset.
if AssetLocator.CANONICAL_NAMESPACE in request.path:
request.path = request.path.replace('block/', 'block@', 1)
try:
loc = StaticContent.get_location_from_path(asset_path)
loc = StaticContent.get_location_from_path(request.path)
except (InvalidLocationError, InvalidKeyError):
return HttpResponseBadRequest()
# Attempt to load the asset to make sure it exists, and grab the asset digest
# if we're able to load it.
actual_digest = None
# Try and load the asset.
content = None
try:
content = self.load_asset_from_location(loc)
actual_digest = content.content_digest
except (ItemNotFoundError, NotFoundError):
return HttpResponseNotFound()
# If this was a versioned asset, and the digest doesn't match, redirect
# them to the actual version.
if requested_digest is not None and (actual_digest != requested_digest):
actual_asset_path = StaticContent.add_version_to_asset_path(asset_path, actual_digest)
return HttpResponsePermanentRedirect(actual_asset_path)
# Set the basics for this request. Make sure that the course key for this
# asset has a run, which old-style courses do not. Otherwise, this will
# explode when the key is serialized to be sent to NR.
......
......@@ -16,13 +16,9 @@ from django.test.utils import override_settings
from mock import patch
from xmodule.contentstore.django import contentstore
from xmodule.contentstore.content import StaticContent
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase
from xmodule.modulestore.xml_importer import import_course_from_xml
from xmodule.assetstore.assetmgr import AssetManager
from opaque_keys import InvalidKeyError
from xmodule.modulestore.exceptions import ItemNotFoundError
from contentserver.middleware import parse_range_header, HTTP_DATE_FORMAT, StaticContentServer
from student.models import CourseEnrollment
......@@ -32,23 +28,8 @@ log = logging.getLogger(__name__)
TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex
TEST_DATA_DIR = settings.COMMON_TEST_DATA_ROOT
FAKE_MD5_HASH = 'ffffffffffffffffffffffffffffffff'
def get_versioned_asset_url(asset_path):
"""
Creates a versioned asset URL.
"""
try:
locator = StaticContent.get_location_from_path(asset_path)
content = AssetManager.find(locator, as_stream=True)
return StaticContent.add_version_to_asset_path(asset_path, content.content_digest)
except (InvalidKeyError, ItemNotFoundError):
pass
return asset_path
TEST_DATA_DIR = settings.COMMON_TEST_DATA_ROOT
@ddt.ddt
......@@ -73,15 +54,13 @@ class ContentStoreToyCourseTest(SharedModuleStoreTestCase):
)
# A locked asset
cls.locked_asset = cls.course_key.make_asset_key('asset', 'sample_static.html')
cls.locked_asset = cls.course_key.make_asset_key('asset', 'sample_static.txt')
cls.url_locked = unicode(cls.locked_asset)
cls.url_locked_versioned = get_versioned_asset_url(cls.url_locked)
cls.contentstore.set_attr(cls.locked_asset, 'locked', True)
# An unlocked asset
cls.unlocked_asset = cls.course_key.make_asset_key('asset', 'another_static.txt')
cls.url_unlocked = unicode(cls.unlocked_asset)
cls.url_unlocked_versioned = get_versioned_asset_url(cls.url_unlocked)
cls.length_unlocked = cls.contentstore.get_attr(cls.unlocked_asset, 'length')
def setUp(self):
......@@ -102,37 +81,6 @@ class ContentStoreToyCourseTest(SharedModuleStoreTestCase):
resp = self.client.get(self.url_unlocked)
self.assertEqual(resp.status_code, 200)
def test_unlocked_versioned_asset(self):
"""
Test that unlocked assets that are versioned are being served.
"""
self.client.logout()
resp = self.client.get(self.url_unlocked_versioned)
self.assertEqual(resp.status_code, 200)
def test_unlocked_versioned_asset_with_nonexistent_version(self):
"""
Test that unlocked assets that are versioned, but have a nonexistent version,
are sent back as a 301 redirect which tells the caller the correct URL.
"""
url_unlocked_versioned_old = StaticContent.add_version_to_asset_path(self.url_unlocked, FAKE_MD5_HASH)
self.client.logout()
resp = self.client.get(url_unlocked_versioned_old)
self.assertEqual(resp.status_code, 301)
self.assertTrue(resp.url.endswith(self.url_unlocked_versioned)) # pylint: disable=no-member
def test_locked_versioned_asset(self):
"""
Test that locked assets that are versioned are being served.
"""
CourseEnrollment.enroll(self.non_staff_usr, self.course_key)
self.assertTrue(CourseEnrollment.is_enrolled(self.non_staff_usr, self.course_key))
self.client.login(username=self.non_staff_usr, password='test')
resp = self.client.get(self.url_locked_versioned)
self.assertEqual(resp.status_code, 200)
def test_locked_asset_not_logged_in(self):
"""
Test that locked assets behave appropriately in case the user is not
......
......@@ -5,16 +5,16 @@ from xmodule.assetstore.assetmgr import AssetManager
XASSET_LOCATION_TAG = 'c4x'
XASSET_SRCREF_PREFIX = 'xasset:'
XASSET_THUMBNAIL_TAIL_NAME = '.jpg'
STREAM_DATA_CHUNK_SIZE = 1024
VERSIONED_ASSETS_PREFIX = '/assets/courseware'
VERSIONED_ASSETS_PATTERN = r'/assets/courseware/([a-f0-9]{32})'
import os
import logging
import StringIO
from urlparse import urlparse, urlunparse, parse_qsl
from urllib import urlencode, quote_plus
from urllib import urlencode
from opaque_keys.edx.locator import AssetLocator
from opaque_keys.edx.keys import CourseKey, AssetKey
......@@ -26,7 +26,7 @@ from PIL import Image
class StaticContent(object):
def __init__(self, loc, name, content_type, data, last_modified_at=None, thumbnail_location=None, import_path=None,
length=None, locked=False, content_digest=None):
length=None, locked=False):
self.location = loc
self.name = name # a display string which can be edited, and thus not part of the location which needs to be fixed
self.content_type = content_type
......@@ -38,7 +38,6 @@ class StaticContent(object):
# cycles
self.import_path = import_path
self.locked = locked
self.content_digest = content_digest
@property
def is_thumbnail(self):
......@@ -147,40 +146,6 @@ class StaticContent(object):
return AssetKey.from_string(path[1:])
@staticmethod
def is_versioned_asset_path(path):
"""Determines whether the given asset path is versioned."""
return path.startswith(VERSIONED_ASSETS_PREFIX)
@staticmethod
def parse_versioned_asset_path(path):
"""
Examines an asset path and breaks it apart if it is versioned,
returning both the asset digest and the unversioned asset path,
which will normally be an AssetKey.
"""
asset_digest = None
asset_path = path
if StaticContent.is_versioned_asset_path(asset_path):
result = re.match(VERSIONED_ASSETS_PATTERN, asset_path)
if result is not None:
asset_digest = result.groups()[0]
asset_path = re.sub(VERSIONED_ASSETS_PATTERN, '', asset_path)
return (asset_digest, asset_path)
@staticmethod
def add_version_to_asset_path(path, version):
"""
Adds a prefix to an asset path indicating the asset's version.
"""
# Don't version an already-versioned path.
if StaticContent.is_versioned_asset_path(path):
return path
return VERSIONED_ASSETS_PREFIX + '/' + version + path
@staticmethod
def get_asset_key_from_path(course_key, path):
"""
Parses a path, extracting an asset key or creating one.
......@@ -207,16 +172,7 @@ class StaticContent(object):
return StaticContent.compute_location(course_key, path)
@staticmethod
def is_excluded_asset_type(path, excluded_exts):
"""
Check if this is an allowed file extension to serve.
Some files aren't served through the CDN in order to avoid same-origin policy/CORS-related issues.
"""
return any(path.lower().endswith(excluded_ext.lower()) for excluded_ext in excluded_exts)
@staticmethod
def get_canonicalized_asset_path(course_key, path, base_url, excluded_exts, encode=True):
def get_canonicalized_asset_path(course_key, path, base_url, excluded_exts):
"""
Returns a fully-qualified path to a piece of static content.
......@@ -232,27 +188,25 @@ class StaticContent(object):
"""
# Break down the input path.
_, _, relative_path, params, query_string, _ = urlparse(path)
_, _, relative_path, params, query_string, fragment = urlparse(path)
# Convert our path to an asset key if it isn't one already.
asset_key = StaticContent.get_asset_key_from_path(course_key, relative_path)
# Check the status of the asset to see if this can be served via CDN aka publicly.
serve_from_cdn = False
content_digest = None
try:
content = AssetManager.find(asset_key, as_stream=True)
serve_from_cdn = not getattr(content, "locked", True)
content_digest = content.content_digest
is_locked = getattr(content, "locked", True)
serve_from_cdn = not is_locked
except (ItemNotFoundError, NotFoundError):
# If we can't find the item, just treat it as if it's locked.
serve_from_cdn = False
# Do a generic check to see if anything about this asset disqualifies it from being CDN'd.
is_excluded = False
if StaticContent.is_excluded_asset_type(relative_path, excluded_exts):
# See if this is an allowed file extension to serve. Some files aren't served through the
# CDN in order to avoid same-origin policy/CORS-related issues.
if any(relative_path.lower().endswith(excluded_ext.lower()) for excluded_ext in excluded_exts):
serve_from_cdn = False
is_excluded = True
# Update any query parameter values that have asset paths in them. This is for assets that
# require their own after-the-fact values, like a Flash file that needs the path of a config
......@@ -261,29 +215,15 @@ class StaticContent(object):
updated_query_params = []
for query_name, query_val in query_params:
if query_val.startswith("/static/"):
new_val = StaticContent.get_canonicalized_asset_path(
course_key, query_val, base_url, excluded_exts, encode=False)
new_val = StaticContent.get_canonicalized_asset_path(course_key, query_val, base_url, excluded_exts)
updated_query_params.append((query_name, new_val))
else:
# Make sure we're encoding Unicode strings down to their byte string
# representation so that `urlencode` can handle it.
updated_query_params.append((query_name, query_val.encode('utf-8')))
updated_query_params.append((query_name, query_val))
serialized_asset_key = StaticContent.serialize_asset_key_with_slash(asset_key)
base_url = base_url if serve_from_cdn else ''
asset_path = serialized_asset_key
# If the content has a digest (i.e. md5sum) value specified, create a versioned path to the asset using it.
if not is_excluded and content_digest:
asset_path = StaticContent.add_version_to_asset_path(serialized_asset_key, content_digest)
# Only encode this if told to. Important so that we don't double encode
# when working with paths that are in query parameters.
asset_path = asset_path.encode('utf-8')
if encode:
asset_path = quote_plus(asset_path, '/:+@')
return urlunparse((None, base_url.encode('utf-8'), asset_path, params, urlencode(updated_query_params), None))
return urlunparse((None, base_url, serialized_asset_key, params, urlencode(updated_query_params), fragment))
def stream_data(self):
yield self._data
......@@ -302,10 +242,10 @@ class StaticContent(object):
class StaticContentStream(StaticContent):
def __init__(self, loc, name, content_type, stream, last_modified_at=None, thumbnail_location=None, import_path=None,
length=None, locked=False, content_digest=None):
length=None, locked=False):
super(StaticContentStream, self).__init__(loc, name, content_type, None, last_modified_at=last_modified_at,
thumbnail_location=thumbnail_location, import_path=import_path,
length=length, locked=locked, content_digest=content_digest)
length=length, locked=locked)
self._stream = stream
def stream_data(self):
......@@ -337,8 +277,7 @@ class StaticContentStream(StaticContent):
self._stream.seek(0)
content = StaticContent(self.location, self.name, self.content_type, self._stream.read(),
last_modified_at=self.last_modified_at, thumbnail_location=self.thumbnail_location,
import_path=self.import_path, length=self.length, locked=self.locked,
content_digest=self.content_digest)
import_path=self.import_path, length=self.length, locked=self.locked)
return content
......
......@@ -128,8 +128,7 @@ class MongoContentStore(ContentStore):
location, fp.displayname, fp.content_type, fp, last_modified_at=fp.uploadDate,
thumbnail_location=thumbnail_location,
import_path=getattr(fp, 'import_path', None),
length=fp.length, locked=getattr(fp, 'locked', False),
content_digest=getattr(fp, 'md5', None),
length=fp.length, locked=getattr(fp, 'locked', False)
)
else:
with self.fs.get(content_id) as fp:
......@@ -143,8 +142,7 @@ class MongoContentStore(ContentStore):
location, fp.displayname, fp.content_type, fp.read(), last_modified_at=fp.uploadDate,
thumbnail_location=thumbnail_location,
import_path=getattr(fp, 'import_path', None),
length=fp.length, locked=getattr(fp, 'locked', False),
content_digest=getattr(fp, 'md5', None),
length=fp.length, locked=getattr(fp, 'locked', False)
)
except NoFile:
if throw_on_not_found:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment