Commit 41ea1383 by David Ormsbee Committed by GitHub

Merge pull request #14399 from edx/ormsbee/csm_crawler_locking

Disable student state writes for crawlers.
parents 1c8cf285 608fceb0
...@@ -687,7 +687,7 @@ class FieldDataCache(object): ...@@ -687,7 +687,7 @@ class FieldDataCache(object):
A cache of django model objects needed to supply the data A cache of django model objects needed to supply the data
for a module and its descendants for a module and its descendants
""" """
def __init__(self, descriptors, course_id, user, select_for_update=False, asides=None): def __init__(self, descriptors, course_id, user, asides=None, read_only=False):
""" """
Find any courseware.models objects that are needed by any descriptor Find any courseware.models objects that are needed by any descriptor
in descriptors. Attempts to minimize the number of queries to the database. in descriptors. Attempts to minimize the number of queries to the database.
...@@ -698,8 +698,8 @@ class FieldDataCache(object): ...@@ -698,8 +698,8 @@ class FieldDataCache(object):
descriptors: A list of XModuleDescriptors. descriptors: A list of XModuleDescriptors.
course_id: The id of the current course course_id: The id of the current course
user: The user for which to cache data user: The user for which to cache data
select_for_update: Ignored
asides: The list of aside types to load, or None to prefetch no asides. asides: The list of aside types to load, or None to prefetch no asides.
read_only: We should not perform writes (they become a no-op).
""" """
if asides is None: if asides is None:
self.asides = [] self.asides = []
...@@ -709,6 +709,7 @@ class FieldDataCache(object): ...@@ -709,6 +709,7 @@ class FieldDataCache(object):
assert isinstance(course_id, CourseKey) assert isinstance(course_id, CourseKey)
self.course_id = course_id self.course_id = course_id
self.user = user self.user = user
self.read_only = read_only
self.cache = { self.cache = {
Scope.user_state: UserStateCache( Scope.user_state: UserStateCache(
...@@ -783,7 +784,7 @@ class FieldDataCache(object): ...@@ -783,7 +784,7 @@ class FieldDataCache(object):
@classmethod @classmethod
def cache_for_descriptor_descendents(cls, course_id, user, descriptor, depth=None, def cache_for_descriptor_descendents(cls, course_id, user, descriptor, depth=None,
descriptor_filter=lambda descriptor: True, descriptor_filter=lambda descriptor: True,
select_for_update=False, asides=None): asides=None, read_only=False):
""" """
course_id: the course in the context of which we want StudentModules. course_id: the course in the context of which we want StudentModules.
user: the django user for whom to load modules. user: the django user for whom to load modules.
...@@ -792,9 +793,8 @@ class FieldDataCache(object): ...@@ -792,9 +793,8 @@ class FieldDataCache(object):
the supplied descriptor. If depth is None, load all descendant StudentModules the supplied descriptor. If depth is None, load all descendant StudentModules
descriptor_filter is a function that accepts a descriptor and return whether the field data descriptor_filter is a function that accepts a descriptor and return whether the field data
should be cached should be cached
select_for_update: Ignored
""" """
cache = FieldDataCache([], course_id, user, select_for_update, asides=asides) cache = FieldDataCache([], course_id, user, asides=asides, read_only=read_only)
cache.add_descriptor_descendents(descriptor, depth, descriptor_filter) cache.add_descriptor_descendents(descriptor, depth, descriptor_filter)
return cache return cache
...@@ -840,6 +840,8 @@ class FieldDataCache(object): ...@@ -840,6 +840,8 @@ class FieldDataCache(object):
kv_dict (dict): dict mapping from `DjangoKeyValueStore.Key`s to field values kv_dict (dict): dict mapping from `DjangoKeyValueStore.Key`s to field values
Raises: DatabaseError if any fields fail to save Raises: DatabaseError if any fields fail to save
""" """
if self.read_only:
return
saved_fields = [] saved_fields = []
by_scope = defaultdict(dict) by_scope = defaultdict(dict)
...@@ -875,6 +877,8 @@ class FieldDataCache(object): ...@@ -875,6 +877,8 @@ class FieldDataCache(object):
Raises: KeyError if key isn't found in the cache Raises: KeyError if key isn't found in the cache
""" """
if self.read_only:
return
if key.scope.user == UserScope.ONE and not self.user.is_anonymous(): if key.scope.user == UserScope.ONE and not self.user.is_anonymous():
# If we're getting user data, we expect that the key matches the # If we're getting user data, we expect that the key matches the
......
...@@ -49,6 +49,7 @@ from lms.djangoapps.lms_xblock.models import XBlockAsidesConfig ...@@ -49,6 +49,7 @@ from lms.djangoapps.lms_xblock.models import XBlockAsidesConfig
from lms.djangoapps.lms_xblock.runtime import LmsModuleSystem from lms.djangoapps.lms_xblock.runtime import LmsModuleSystem
from lms.djangoapps.verify_student.services import VerificationService, ReverificationService from lms.djangoapps.verify_student.services import VerificationService, ReverificationService
from openedx.core.djangoapps.bookmarks.services import BookmarksService from openedx.core.djangoapps.bookmarks.services import BookmarksService
from openedx.core.djangoapps.crawlers.models import CrawlersConfig
from openedx.core.djangoapps.credit.services import CreditService from openedx.core.djangoapps.credit.services import CreditService
from openedx.core.djangoapps.util.user_utils import SystemUser from openedx.core.djangoapps.util.user_utils import SystemUser
from openedx.core.lib.xblock_utils import ( from openedx.core.lib.xblock_utils import (
...@@ -917,7 +918,8 @@ def get_module_by_usage_id(request, course_id, usage_id, disable_staff_debug_inf ...@@ -917,7 +918,8 @@ def get_module_by_usage_id(request, course_id, usage_id, disable_staff_debug_inf
field_data_cache = FieldDataCache.cache_for_descriptor_descendents( field_data_cache = FieldDataCache.cache_for_descriptor_descendents(
course_id, course_id,
user, user,
descriptor descriptor,
read_only=CrawlersConfig.is_crawler(request),
) )
instance = get_module_for_descriptor( instance = get_module_for_descriptor(
user, user,
......
...@@ -50,6 +50,7 @@ from lms.djangoapps.commerce.utils import EcommerceService # pylint: disable=im ...@@ -50,6 +50,7 @@ from lms.djangoapps.commerce.utils import EcommerceService # pylint: disable=im
from milestones.tests.utils import MilestonesTestCaseMixin from milestones.tests.utils import MilestonesTestCaseMixin
from openedx.core.djangoapps.self_paced.models import SelfPacedConfiguration from openedx.core.djangoapps.self_paced.models import SelfPacedConfiguration
from openedx.core.lib.gating import api as gating_api from openedx.core.lib.gating import api as gating_api
from openedx.core.djangoapps.crawlers.models import CrawlersConfig
from student.models import CourseEnrollment from student.models import CourseEnrollment
from student.tests.factories import AdminFactory, UserFactory, CourseEnrollmentFactory from student.tests.factories import AdminFactory, UserFactory, CourseEnrollmentFactory
from util.tests.test_date_utils import fake_ugettext, fake_pgettext from util.tests.test_date_utils import fake_ugettext, fake_pgettext
...@@ -58,7 +59,7 @@ from util.views import ensure_valid_course_key ...@@ -58,7 +59,7 @@ from util.views import ensure_valid_course_key
from xmodule.modulestore import ModuleStoreEnum from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.django import modulestore from xmodule.modulestore.django import modulestore
from xmodule.modulestore.tests.django_utils import TEST_DATA_MIXED_MODULESTORE from xmodule.modulestore.tests.django_utils import TEST_DATA_MIXED_MODULESTORE
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase, SharedModuleStoreTestCase
from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory, check_mongo_calls from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory, check_mongo_calls
from openedx.core.djangoapps.credit.api import set_credit_requirements from openedx.core.djangoapps.credit.api import set_credit_requirements
from openedx.core.djangoapps.credit.models import CreditCourse, CreditProvider from openedx.core.djangoapps.credit.models import CreditCourse, CreditProvider
...@@ -2065,3 +2066,83 @@ class TestRenderXBlockSelfPaced(TestRenderXBlock): ...@@ -2065,3 +2066,83 @@ class TestRenderXBlockSelfPaced(TestRenderXBlock):
def course_options(self): def course_options(self):
return {'self_paced': True} return {'self_paced': True}
class TestIndexViewCrawlerStudentStateWrites(SharedModuleStoreTestCase):
"""
Ensure that courseware index requests do not trigger student state writes.
This is to prevent locking issues that have caused latency spikes in the
courseware_studentmodule table when concurrent requests each try to update
the same rows for sequence, section, and course positions.
"""
@classmethod
def setUpClass(cls):
"""Set up the simplest course possible."""
# setUpClassAndTestData() already calls setUpClass on SharedModuleStoreTestCase
# pylint: disable=super-method-not-called
with super(TestIndexViewCrawlerStudentStateWrites, cls).setUpClassAndTestData():
cls.course = CourseFactory.create()
with cls.store.bulk_operations(cls.course.id):
cls.chapter = ItemFactory.create(category='chapter', parent_location=cls.course.location)
cls.section = ItemFactory.create(category='sequential', parent_location=cls.chapter.location)
cls.vertical = ItemFactory.create(category='vertical', parent_location=cls.section.location)
@classmethod
def setUpTestData(cls):
"""Set up and enroll our fake user in the course."""
cls.password = 'test'
cls.user = UserFactory(password=cls.password)
CourseEnrollment.enroll(cls.user, cls.course.id)
def setUp(self):
"""Do the client login."""
super(TestIndexViewCrawlerStudentStateWrites, self).setUp()
self.client.login(username=self.user.username, password=self.password)
def test_write_by_default(self):
"""By default, always write student state, regardless of user agent."""
with patch('courseware.model_data.UserStateCache.set_many') as patched_state_client_set_many:
# Simulate someone using Chrome
self._load_courseware('Mozilla/5.0 AppleWebKit/537.36')
self.assertTrue(patched_state_client_set_many.called)
patched_state_client_set_many.reset_mock()
# Common crawler user agent
self._load_courseware('edX-downloader/0.1')
self.assertTrue(patched_state_client_set_many.called)
def test_writes_with_config(self):
"""Test state writes (or lack thereof) based on config values."""
CrawlersConfig.objects.create(known_user_agents='edX-downloader,crawler_foo', enabled=True)
with patch('courseware.model_data.UserStateCache.set_many') as patched_state_client_set_many:
# Exact matching of crawler user agent
self._load_courseware('crawler_foo')
self.assertFalse(patched_state_client_set_many.called)
# Partial matching of crawler user agent
self._load_courseware('edX-downloader/0.1')
self.assertFalse(patched_state_client_set_many.called)
# Simulate an actual browser hitting it (we should write)
self._load_courseware('Mozilla/5.0 AppleWebKit/537.36')
self.assertTrue(patched_state_client_set_many.called)
# Disabling the crawlers config should revert us to default behavior
CrawlersConfig.objects.create(enabled=False)
self.test_write_by_default()
def _load_courseware(self, user_agent):
"""Helper to load the actual courseware page."""
url = reverse(
'courseware_section',
kwargs={
'course_id': unicode(self.course.id),
'chapter': unicode(self.chapter.location.name),
'section': unicode(self.section.location.name),
}
)
response = self.client.get(url, HTTP_USER_AGENT=user_agent)
# Make sure we get back an actual 200, and aren't redirected because we
# messed up the setup somehow (e.g. didn't enroll properly)
self.assertEqual(response.status_code, 200)
...@@ -26,6 +26,7 @@ from xblock.fragment import Fragment ...@@ -26,6 +26,7 @@ from xblock.fragment import Fragment
from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.keys import CourseKey
from openedx.core.djangoapps.lang_pref import LANGUAGE_KEY from openedx.core.djangoapps.lang_pref import LANGUAGE_KEY
from openedx.core.djangoapps.user_api.preferences.api import get_user_preference from openedx.core.djangoapps.user_api.preferences.api import get_user_preference
from openedx.core.djangoapps.crawlers.models import CrawlersConfig
from shoppingcart.models import CourseRegistrationCode from shoppingcart.models import CourseRegistrationCode
from student.models import CourseEnrollment from student.models import CourseEnrollment
from student.views import is_course_blocked from student.views import is_course_blocked
...@@ -101,7 +102,7 @@ class CoursewareIndex(View): ...@@ -101,7 +102,7 @@ class CoursewareIndex(View):
self.course = get_course_with_access(request.user, 'load', self.course_key, depth=CONTENT_DEPTH) self.course = get_course_with_access(request.user, 'load', self.course_key, depth=CONTENT_DEPTH)
self.is_staff = has_access(request.user, 'staff', self.course) self.is_staff = has_access(request.user, 'staff', self.course)
self._setup_masquerade_for_effective_user() self._setup_masquerade_for_effective_user()
return self._get() return self._get(request)
except Redirect as redirect_error: except Redirect as redirect_error:
return redirect(redirect_error.url) return redirect(redirect_error.url)
except UnicodeEncodeError: except UnicodeEncodeError:
...@@ -127,12 +128,12 @@ class CoursewareIndex(View): ...@@ -127,12 +128,12 @@ class CoursewareIndex(View):
# Set the user in the request to the effective user. # Set the user in the request to the effective user.
self.request.user = self.effective_user self.request.user = self.effective_user
def _get(self): def _get(self, request):
""" """
Render the index page. Render the index page.
""" """
self._redirect_if_needed_to_access_course() self._redirect_if_needed_to_access_course()
self._prefetch_and_bind_course() self._prefetch_and_bind_course(request)
if self.course.has_children_at_depth(CONTENT_DEPTH): if self.course.has_children_at_depth(CONTENT_DEPTH):
self._reset_section_to_exam_if_required() self._reset_section_to_exam_if_required()
...@@ -324,13 +325,17 @@ class CoursewareIndex(View): ...@@ -324,13 +325,17 @@ class CoursewareIndex(View):
if self.chapter: if self.chapter:
return self._find_block(self.chapter, self.section_url_name, 'section') return self._find_block(self.chapter, self.section_url_name, 'section')
def _prefetch_and_bind_course(self): def _prefetch_and_bind_course(self, request):
""" """
Prefetches all descendant data for the requested section and Prefetches all descendant data for the requested section and
sets up the runtime, which binds the request user to the section. sets up the runtime, which binds the request user to the section.
""" """
self.field_data_cache = FieldDataCache.cache_for_descriptor_descendents( self.field_data_cache = FieldDataCache.cache_for_descriptor_descendents(
self.course_key, self.effective_user, self.course, depth=CONTENT_DEPTH, self.course_key,
self.effective_user,
self.course,
depth=CONTENT_DEPTH,
read_only=CrawlersConfig.is_crawler(request),
) )
self.course = get_module_for_descriptor( self.course = get_module_for_descriptor(
......
...@@ -2160,6 +2160,9 @@ INSTALLED_APPS = ( ...@@ -2160,6 +2160,9 @@ INSTALLED_APPS = (
# Customized celery tasks, including persisting failed tasks so they can # Customized celery tasks, including persisting failed tasks so they can
# be retried # be retried
'openedx.core.djangoapps.celery_utils', 'openedx.core.djangoapps.celery_utils',
# Ability to detect and special-case crawler behavior
'openedx.core.djangoapps.crawlers',
) )
# Migrations which are not in the standard module "migrations" # Migrations which are not in the standard module "migrations"
......
"""Admin panel for configuring which user agents we consider to be Crawlers."""
from django.contrib import admin
from config_models.admin import ConfigurationModelAdmin
from .models import CrawlersConfig
admin.site.register(CrawlersConfig, ConfigurationModelAdmin)
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
from django.conf import settings
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.CreateModel(
name='CrawlersConfig',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('change_date', models.DateTimeField(auto_now_add=True, verbose_name='Change date')),
('enabled', models.BooleanField(default=False, verbose_name='Enabled')),
('known_user_agents', models.TextField(default=b'edX-downloader', help_text=b'A comma-separated list of known crawler user agents.', blank=True)),
('changed_by', models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, editable=False, to=settings.AUTH_USER_MODEL, null=True, verbose_name='Changed by')),
],
options={
'ordering': ('-change_date',),
'abstract': False,
},
),
]
"""
This module handles the detection of crawlers, so that we can handle them
appropriately in other parts of the code.
"""
from django.db import models
from config_models.models import ConfigurationModel
class CrawlersConfig(ConfigurationModel):
"""Configuration for the crawlers django app."""
known_user_agents = models.TextField(
blank=True,
help_text="A comma-separated list of known crawler user agents.",
default='edX-downloader',
)
def __unicode__(self):
return u'CrawlersConfig("{}")'.format(self.known_user_agents)
@classmethod
def is_crawler(cls, request):
"""Determine if the request came from a crawler or not.
This method is simplistic and only looks at the user agent header at the
moment, but could later be improved to be smarter about detection.
"""
current = cls.current()
if not current.enabled:
return False
req_user_agent = request.META.get('HTTP_USER_AGENT')
crawler_agents = current.known_user_agents.split(",")
# If there was no user agent detected or no crawler agents configured,
# then just return False.
if (not req_user_agent) or (not crawler_agents):
return False
# We perform prefix matching of the crawler agent here so that we don't
# have to worry about version bumps.
return any(
req_user_agent.startswith(crawler_agent)
for crawler_agent in crawler_agents
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment