Commit 5368a9ad by chrisndodge

Merge pull request #1252 from MITx/feature/cale/lms-mongo-perf

Decrease the number of queries needed for LMS courseware
parents 4eca8254 cb3b17d4
......@@ -475,7 +475,7 @@ def preview_module_system(request, preview_id, descriptor):
)
def get_preview_module(request, preview_id, location):
def get_preview_module(request, preview_id, descriptor):
"""
Returns a preview XModule at the specified location. The preview_data is chosen arbitrarily
from the set of preview data for the descriptor specified by Location
......@@ -484,7 +484,6 @@ def get_preview_module(request, preview_id, location):
preview_id (str): An identifier specifying which preview this module is used for
location: A Location
"""
descriptor = modulestore().get_item(location)
instance_state, shared_state = descriptor.get_sample_state()[0]
return load_preview_module(request, preview_id, descriptor, instance_state, shared_state)
......
......@@ -52,9 +52,10 @@ class ABTestModule(XModule):
def get_shared_state(self):
return json.dumps({'group': self.group})
def get_children_locations(self):
return self.definition['data']['group_content'][self.group]
def get_child_descriptors(self):
active_locations = set(self.definition['data']['group_content'][self.group])
return [desc for desc in self.descriptor.get_children() if desc.location.url() in active_locations]
def displayable_items(self):
# Most modules return "self" as the displayable_item. We never display ourself
# (which is why we don't implement get_html). We only display our children.
......
......@@ -265,7 +265,7 @@ class ModuleStore(object):
"""
raise NotImplementedError
def get_instance(self, course_id, location):
def get_instance(self, course_id, location, depth=0):
"""
Get an instance of this location, with policy for course_id applied.
TODO (vshnayder): this may want to live outside the modulestore eventually
......
......@@ -82,17 +82,26 @@ def location_to_query(location, wildcard=True):
If `wildcard` is True, then a None in a location is treated as a wildcard
query. Otherwise, it is searched for literally
"""
query = SON()
# Location dict is ordered by specificity, and SON
# will preserve that order for queries
for key, val in Location(location).dict().iteritems():
if wildcard and val is None:
continue
query['_id.{key}'.format(key=key)] = val
query = namedtuple_to_son(Location(location), prefix='_id.')
if wildcard:
for key, value in query.items():
if value is None:
del query[key]
return query
def namedtuple_to_son(namedtuple, prefix=''):
"""
Converts a namedtuple into a SON object with the same key order
"""
son = SON()
for idx, field_name in enumerate(namedtuple._fields):
son[prefix + field_name] = namedtuple[idx]
return son
class MongoModuleStore(ModuleStoreBase):
"""
A Mongodb backed ModuleStore
......@@ -149,6 +158,7 @@ class MongoModuleStore(ModuleStoreBase):
If depth is None, will load all the children.
This will make a number of queries that is linear in the depth.
"""
data = {}
to_process = list(items)
while to_process and depth is None or depth >= 0:
......@@ -162,8 +172,10 @@ class MongoModuleStore(ModuleStoreBase):
# http://www.mongodb.org/display/DOCS/Advanced+Queries#AdvancedQueries-%24or
# for or-query syntax
if children:
to_process = list(self.collection.find(
{'_id': {'$in': [Location(child).dict() for child in children]}}))
query = {
'_id': {'$in': [namedtuple_to_son(Location(child)) for child in children]}
}
to_process = self.collection.find(query)
else:
to_process = []
# If depth is None, then we just recurse until we hit all the descendents
......@@ -255,12 +267,17 @@ class MongoModuleStore(ModuleStoreBase):
item = self._find_one(location)
return self._load_items([item], depth)[0]
def get_instance(self, course_id, location):
def get_instance(self, course_id, location, depth=0):
"""
TODO (vshnayder): implement policy tracking in mongo.
For now, just delegate to get_item and ignore policy.
depth (int): An argument that some module stores may use to prefetch
descendents of the queried modules for more efficient results later
in the request. The depth is counted in the number of
calls to get_children() to cache. None indicates to cache all descendents.
"""
return self.get_item(location)
return self.get_item(location, depth=depth)
def get_items(self, location, course_id=None, depth=0):
items = self.collection.find(
......
......@@ -61,7 +61,7 @@ class CustomTagDescriptor(RawDescriptor):
# cdodge: look up the template as a module
template_loc = self.location._replace(category='custom_tag_template', name=template_name)
template_module = modulestore().get_instance(system.course_id, template_loc)
template_module = self.system.load_item(template_loc)
template_module_data = template_module.definition['data']
template = Template(template_module_data)
return template.render(**params)
......
......@@ -241,17 +241,17 @@ class XModule(HTMLSnippet):
Return module instances for all the children of this module.
'''
if self._loaded_children is None:
child_locations = self.get_children_locations()
children = [self.system.get_module(loc) for loc in child_locations]
child_descriptors = self.get_child_descriptors()
children = [self.system.get_module(descriptor) for descriptor in child_descriptors]
# get_module returns None if the current user doesn't have access
# to the location.
self._loaded_children = [c for c in children if c is not None]
return self._loaded_children
def get_children_locations(self):
def get_child_descriptors(self):
'''
Returns the locations of each of child modules.
Returns the descriptors of the child modules
Overriding this changes the behavior of get_children and
anything that uses get_children, such as get_display_items.
......@@ -262,7 +262,16 @@ class XModule(HTMLSnippet):
These children will be the same children returned by the
descriptor unless descriptor.has_dynamic_children() is true.
'''
return self.definition.get('children', [])
return self.descriptor.get_children()
def get_child_by(self, selector):
"""
Return a child XModuleDescriptor with the specified url_name, if it exists, and None otherwise.
"""
for child in self.get_children():
if selector(child):
return child
return None
def get_display_items(self):
'''
......@@ -577,13 +586,13 @@ class XModuleDescriptor(Plugin, HTMLSnippet, ResourceTemplates):
return self._child_instances
def get_child_by_url_name(self, url_name):
def get_child_by(self, selector):
"""
Return a child XModuleDescriptor with the specified url_name, if it exists, and None otherwise.
"""
for c in self.get_children():
if c.url_name == url_name:
return c
for child in self.get_children():
if selector(child):
return child
return None
def xmodule_constructor(self, system):
......@@ -847,7 +856,7 @@ class ModuleSystem(object):
TODO: Not used, and has inconsistent args in different
files. Update or remove.
get_module - function that takes (location) and returns a corresponding
get_module - function that takes a descriptor and returns a corresponding
module instance object. If the current user does not have
access to that location, returns None.
......
......@@ -42,28 +42,31 @@ def get_request_for_thread():
del frame
def get_course_by_id(course_id):
def get_course_by_id(course_id, depth=0):
"""
Given a course id, return the corresponding course descriptor.
If course_id is not valid, raises a 404.
depth: The number of levels of children for the modulestore to cache. None means infinite depth
"""
try:
course_loc = CourseDescriptor.id_to_location(course_id)
return modulestore().get_instance(course_id, course_loc)
return modulestore().get_instance(course_id, course_loc, depth=depth)
except (KeyError, ItemNotFoundError):
raise Http404("Course not found.")
def get_course_with_access(user, course_id, action):
def get_course_with_access(user, course_id, action, depth=0):
"""
Given a course_id, look up the corresponding course descriptor,
check that the user has the access to perform the specified action
on the course, and return the descriptor.
Raises a 404 if the course_id is invalid, or the user doesn't have access.
depth: The number of levels of children for the modulestore to cache. None means infinite depth
"""
course = get_course_by_id(course_id)
course = get_course_by_id(course_id, depth=depth)
if not has_access(user, course, action):
# Deliberately return a non-specific error message to avoid
# leaking info about access control settings
......
......@@ -36,8 +36,7 @@ def yield_dynamic_descriptor_descendents(descriptor, module_creator):
def get_dynamic_descriptor_children(descriptor):
if descriptor.has_dynamic_children():
module = module_creator(descriptor)
child_locations = module.get_children_locations()
return [descriptor.system.load_item(child_location) for child_location in child_locations ]
return module.get_child_descriptors()
else:
return descriptor.get_children()
......@@ -291,7 +290,7 @@ def progress_summary(student, request, course, student_module_cache):
graded = section_module.metadata.get('graded', False)
scores = []
module_creator = lambda descriptor : section_module.system.get_module(descriptor.location)
module_creator = section_module.system.get_module
for module_descriptor in yield_dynamic_descriptor_descendents(section_module.descriptor, module_creator):
......
......@@ -82,7 +82,8 @@ def toc_for_course(user, request, course, active_chapter, active_section):
student_module_cache = StudentModuleCache.cache_for_descriptor_descendents(
course.id, user, course, depth=2)
course_module = get_module(user, request, course.location, student_module_cache, course.id)
course_module = get_module_for_descriptor(user, request, course,
student_module_cache, course.id)
if course_module is None:
return None
......@@ -115,7 +116,9 @@ def toc_for_course(user, request, course, active_chapter, active_section):
return chapters
def get_module(user, request, location, student_module_cache, course_id, position=None, not_found_ok = False, wrap_xmodule_display = True):
def get_module(user, request, location, student_module_cache, course_id,
position=None, not_found_ok=False, wrap_xmodule_display=True,
depth=0):
"""
Get an instance of the xmodule class identified by location,
setting the state based on an existing StudentModule, or creating one if none
......@@ -130,13 +133,19 @@ def get_module(user, request, location, student_module_cache, course_id, positio
- course_id : the course_id in the context of which to load module
- position : extra information from URL for user-specified
position within module
- depth : number of levels of descendents to cache when loading this module.
None means cache all descendents
Returns: xmodule instance, or None if the user does not have access to the
module. If there's an error, will try to return an instance of ErrorModule
if possible. If not possible, return None.
"""
try:
return _get_module(user, request, location, student_module_cache, course_id, position, wrap_xmodule_display)
location = Location(location)
descriptor = modulestore().get_instance(course_id, location, depth=depth)
return get_module_for_descriptor(user, request, descriptor, student_module_cache, course_id,
position=position, not_found_ok=not_found_ok,
wrap_xmodule_display=wrap_xmodule_display)
except ItemNotFoundError:
if not not_found_ok:
log.exception("Error in get_module")
......@@ -146,12 +155,20 @@ def get_module(user, request, location, student_module_cache, course_id, positio
log.exception("Error in get_module")
return None
def _get_module(user, request, location, student_module_cache, course_id, position=None, wrap_xmodule_display = True):
def get_module_for_descriptor(user, request, descriptor, student_module_cache, course_id,
position=None, not_found_ok=False, wrap_xmodule_display=True):
"""
Actually implement get_module. See docstring there for details.
"""
return _get_module(user, request, descriptor, student_module_cache, course_id,
position=position, wrap_xmodule_display=wrap_xmodule_display)
def _get_module(user, request, descriptor, student_module_cache, course_id,
position=None, wrap_xmodule_display=True):
"""
Actually implement get_module. See docstring there for details.
"""
location = Location(location)
descriptor = modulestore().get_instance(course_id, location)
# Short circuit--if the user shouldn't have access, bail without doing any work
if not has_access(user, descriptor, 'load', course_id):
......@@ -206,12 +223,12 @@ def _get_module(user, request, location, student_module_cache, course_id, positi
'waittime': settings.XQUEUE_WAITTIME_BETWEEN_REQUESTS
}
def inner_get_module(location):
def inner_get_module(descriptor):
"""
Delegate to get_module. It does an access check, so may return None
"""
return get_module(user, request, location,
student_module_cache, course_id, position)
return get_module_for_descriptor(user, request, descriptor,
student_module_cache, course_id, position)
# TODO (cpennington): When modules are shared between courses, the static
# prefix is going to have to be specific to the module, not the directory
......@@ -246,7 +263,7 @@ def _get_module(user, request, location, student_module_cache, course_id, positi
# make an ErrorDescriptor -- assuming that the descriptor's system is ok
import_system = descriptor.system
if has_access(user, location, 'staff', course_id):
if has_access(user, descriptor.location, 'staff', course_id):
err_descriptor = ErrorDescriptor.from_xml(str(descriptor), import_system,
error_msg=exc_info_to_str(sys.exc_info()))
else:
......
......@@ -20,7 +20,7 @@ from courseware.access import has_access
from courseware.courses import (get_courses, get_course_with_access, get_courses_by_university)
import courseware.tabs as tabs
from courseware.models import StudentModuleCache
from module_render import toc_for_course, get_module, get_instance_module
from module_render import toc_for_course, get_module, get_instance_module, get_module_for_descriptor
from django_comment_client.utils import get_discussion_title
......@@ -180,7 +180,7 @@ def index(request, course_id, chapter=None, section=None,
- HTTPresponse
"""
course = get_course_with_access(request.user, course_id, 'load')
course = get_course_with_access(request.user, course_id, 'load', depth=2)
staff_access = has_access(request.user, course, 'staff')
registered = registered_for_course(course, request.user)
if not registered:
......@@ -195,7 +195,8 @@ def index(request, course_id, chapter=None, section=None,
# Has this student been in this course before?
first_time = student_module_cache.lookup(course_id, 'course', course.location.url()) is None
course_module = get_module(request.user, request, course.location, student_module_cache, course.id)
# Load the module for the course
course_module = get_module_for_descriptor(request.user, request, course, student_module_cache, course.id)
if course_module is None:
log.warning('If you see this, something went wrong: if we got this'
' far, should have gotten a course module for this user')
......@@ -215,30 +216,28 @@ def index(request, course_id, chapter=None, section=None,
'xqa_server': settings.MITX_FEATURES.get('USE_XQA_SERVER','http://xqa:server@content-qa.mitx.mit.edu/xqa')
}
chapter_descriptor = course.get_child_by_url_name(chapter)
chapter_descriptor = course.get_child_by(lambda m: m.url_name == chapter)
if chapter_descriptor is not None:
instance_module = get_instance_module(course_id, request.user, course_module, student_module_cache)
save_child_position(course_module, chapter, instance_module)
else:
raise Http404
chapter_module = get_module(request.user, request, chapter_descriptor.location,
student_module_cache, course_id)
chapter_module = course_module.get_child_by(lambda m: m.url_name == chapter)
if chapter_module is None:
# User may be trying to access a chapter that isn't live yet
raise Http404
if section is not None:
section_descriptor = chapter_descriptor.get_child_by_url_name(section)
section_descriptor = chapter_descriptor.get_child_by(lambda m: m.url_name == section)
if section_descriptor is None:
# Specifically asked-for section doesn't exist
raise Http404
section_student_module_cache = StudentModuleCache.cache_for_descriptor_descendents(
course_id, request.user, section_descriptor)
section_module = get_module(request.user, request,
section_descriptor.location,
section_student_module_cache, course_id, position)
# Load all descendents of the section, because we're going to display it's
# html, which in general will need all of its children
section_module = get_module(request.user, request, section_descriptor.location,
student_module_cache, course.id, depth=None)
if section_module is None:
# User may be trying to be clever and access something
# they don't have access to.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment