Commit 5368a9ad by chrisndodge

Merge pull request #1252 from MITx/feature/cale/lms-mongo-perf

Decrease the number of queries needed for LMS courseware
parents 4eca8254 cb3b17d4
...@@ -475,7 +475,7 @@ def preview_module_system(request, preview_id, descriptor): ...@@ -475,7 +475,7 @@ def preview_module_system(request, preview_id, descriptor):
) )
def get_preview_module(request, preview_id, location): def get_preview_module(request, preview_id, descriptor):
""" """
Returns a preview XModule at the specified location. The preview_data is chosen arbitrarily Returns a preview XModule at the specified location. The preview_data is chosen arbitrarily
from the set of preview data for the descriptor specified by Location from the set of preview data for the descriptor specified by Location
...@@ -484,7 +484,6 @@ def get_preview_module(request, preview_id, location): ...@@ -484,7 +484,6 @@ def get_preview_module(request, preview_id, location):
preview_id (str): An identifier specifying which preview this module is used for preview_id (str): An identifier specifying which preview this module is used for
location: A Location location: A Location
""" """
descriptor = modulestore().get_item(location)
instance_state, shared_state = descriptor.get_sample_state()[0] instance_state, shared_state = descriptor.get_sample_state()[0]
return load_preview_module(request, preview_id, descriptor, instance_state, shared_state) return load_preview_module(request, preview_id, descriptor, instance_state, shared_state)
......
...@@ -52,9 +52,10 @@ class ABTestModule(XModule): ...@@ -52,9 +52,10 @@ class ABTestModule(XModule):
def get_shared_state(self): def get_shared_state(self):
return json.dumps({'group': self.group}) return json.dumps({'group': self.group})
def get_children_locations(self): def get_child_descriptors(self):
return self.definition['data']['group_content'][self.group] active_locations = set(self.definition['data']['group_content'][self.group])
return [desc for desc in self.descriptor.get_children() if desc.location.url() in active_locations]
def displayable_items(self): def displayable_items(self):
# Most modules return "self" as the displayable_item. We never display ourself # Most modules return "self" as the displayable_item. We never display ourself
# (which is why we don't implement get_html). We only display our children. # (which is why we don't implement get_html). We only display our children.
......
...@@ -265,7 +265,7 @@ class ModuleStore(object): ...@@ -265,7 +265,7 @@ class ModuleStore(object):
""" """
raise NotImplementedError raise NotImplementedError
def get_instance(self, course_id, location): def get_instance(self, course_id, location, depth=0):
""" """
Get an instance of this location, with policy for course_id applied. Get an instance of this location, with policy for course_id applied.
TODO (vshnayder): this may want to live outside the modulestore eventually TODO (vshnayder): this may want to live outside the modulestore eventually
......
...@@ -82,17 +82,26 @@ def location_to_query(location, wildcard=True): ...@@ -82,17 +82,26 @@ def location_to_query(location, wildcard=True):
If `wildcard` is True, then a None in a location is treated as a wildcard If `wildcard` is True, then a None in a location is treated as a wildcard
query. Otherwise, it is searched for literally query. Otherwise, it is searched for literally
""" """
query = SON() query = namedtuple_to_son(Location(location), prefix='_id.')
# Location dict is ordered by specificity, and SON
# will preserve that order for queries if wildcard:
for key, val in Location(location).dict().iteritems(): for key, value in query.items():
if wildcard and val is None: if value is None:
continue del query[key]
query['_id.{key}'.format(key=key)] = val
return query return query
def namedtuple_to_son(namedtuple, prefix=''):
"""
Converts a namedtuple into a SON object with the same key order
"""
son = SON()
for idx, field_name in enumerate(namedtuple._fields):
son[prefix + field_name] = namedtuple[idx]
return son
class MongoModuleStore(ModuleStoreBase): class MongoModuleStore(ModuleStoreBase):
""" """
A Mongodb backed ModuleStore A Mongodb backed ModuleStore
...@@ -149,6 +158,7 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -149,6 +158,7 @@ class MongoModuleStore(ModuleStoreBase):
If depth is None, will load all the children. If depth is None, will load all the children.
This will make a number of queries that is linear in the depth. This will make a number of queries that is linear in the depth.
""" """
data = {} data = {}
to_process = list(items) to_process = list(items)
while to_process and depth is None or depth >= 0: while to_process and depth is None or depth >= 0:
...@@ -162,8 +172,10 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -162,8 +172,10 @@ class MongoModuleStore(ModuleStoreBase):
# http://www.mongodb.org/display/DOCS/Advanced+Queries#AdvancedQueries-%24or # http://www.mongodb.org/display/DOCS/Advanced+Queries#AdvancedQueries-%24or
# for or-query syntax # for or-query syntax
if children: if children:
to_process = list(self.collection.find( query = {
{'_id': {'$in': [Location(child).dict() for child in children]}})) '_id': {'$in': [namedtuple_to_son(Location(child)) for child in children]}
}
to_process = self.collection.find(query)
else: else:
to_process = [] to_process = []
# If depth is None, then we just recurse until we hit all the descendents # If depth is None, then we just recurse until we hit all the descendents
...@@ -255,12 +267,17 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -255,12 +267,17 @@ class MongoModuleStore(ModuleStoreBase):
item = self._find_one(location) item = self._find_one(location)
return self._load_items([item], depth)[0] return self._load_items([item], depth)[0]
def get_instance(self, course_id, location): def get_instance(self, course_id, location, depth=0):
""" """
TODO (vshnayder): implement policy tracking in mongo. TODO (vshnayder): implement policy tracking in mongo.
For now, just delegate to get_item and ignore policy. For now, just delegate to get_item and ignore policy.
depth (int): An argument that some module stores may use to prefetch
descendents of the queried modules for more efficient results later
in the request. The depth is counted in the number of
calls to get_children() to cache. None indicates to cache all descendents.
""" """
return self.get_item(location) return self.get_item(location, depth=depth)
def get_items(self, location, course_id=None, depth=0): def get_items(self, location, course_id=None, depth=0):
items = self.collection.find( items = self.collection.find(
......
...@@ -61,7 +61,7 @@ class CustomTagDescriptor(RawDescriptor): ...@@ -61,7 +61,7 @@ class CustomTagDescriptor(RawDescriptor):
# cdodge: look up the template as a module # cdodge: look up the template as a module
template_loc = self.location._replace(category='custom_tag_template', name=template_name) template_loc = self.location._replace(category='custom_tag_template', name=template_name)
template_module = modulestore().get_instance(system.course_id, template_loc) template_module = self.system.load_item(template_loc)
template_module_data = template_module.definition['data'] template_module_data = template_module.definition['data']
template = Template(template_module_data) template = Template(template_module_data)
return template.render(**params) return template.render(**params)
......
...@@ -241,17 +241,17 @@ class XModule(HTMLSnippet): ...@@ -241,17 +241,17 @@ class XModule(HTMLSnippet):
Return module instances for all the children of this module. Return module instances for all the children of this module.
''' '''
if self._loaded_children is None: if self._loaded_children is None:
child_locations = self.get_children_locations() child_descriptors = self.get_child_descriptors()
children = [self.system.get_module(loc) for loc in child_locations] children = [self.system.get_module(descriptor) for descriptor in child_descriptors]
# get_module returns None if the current user doesn't have access # get_module returns None if the current user doesn't have access
# to the location. # to the location.
self._loaded_children = [c for c in children if c is not None] self._loaded_children = [c for c in children if c is not None]
return self._loaded_children return self._loaded_children
def get_children_locations(self): def get_child_descriptors(self):
''' '''
Returns the locations of each of child modules. Returns the descriptors of the child modules
Overriding this changes the behavior of get_children and Overriding this changes the behavior of get_children and
anything that uses get_children, such as get_display_items. anything that uses get_children, such as get_display_items.
...@@ -262,7 +262,16 @@ class XModule(HTMLSnippet): ...@@ -262,7 +262,16 @@ class XModule(HTMLSnippet):
These children will be the same children returned by the These children will be the same children returned by the
descriptor unless descriptor.has_dynamic_children() is true. descriptor unless descriptor.has_dynamic_children() is true.
''' '''
return self.definition.get('children', []) return self.descriptor.get_children()
def get_child_by(self, selector):
"""
Return a child XModuleDescriptor with the specified url_name, if it exists, and None otherwise.
"""
for child in self.get_children():
if selector(child):
return child
return None
def get_display_items(self): def get_display_items(self):
''' '''
...@@ -577,13 +586,13 @@ class XModuleDescriptor(Plugin, HTMLSnippet, ResourceTemplates): ...@@ -577,13 +586,13 @@ class XModuleDescriptor(Plugin, HTMLSnippet, ResourceTemplates):
return self._child_instances return self._child_instances
def get_child_by_url_name(self, url_name): def get_child_by(self, selector):
""" """
Return a child XModuleDescriptor with the specified url_name, if it exists, and None otherwise. Return a child XModuleDescriptor with the specified url_name, if it exists, and None otherwise.
""" """
for c in self.get_children(): for child in self.get_children():
if c.url_name == url_name: if selector(child):
return c return child
return None return None
def xmodule_constructor(self, system): def xmodule_constructor(self, system):
...@@ -847,7 +856,7 @@ class ModuleSystem(object): ...@@ -847,7 +856,7 @@ class ModuleSystem(object):
TODO: Not used, and has inconsistent args in different TODO: Not used, and has inconsistent args in different
files. Update or remove. files. Update or remove.
get_module - function that takes (location) and returns a corresponding get_module - function that takes a descriptor and returns a corresponding
module instance object. If the current user does not have module instance object. If the current user does not have
access to that location, returns None. access to that location, returns None.
......
...@@ -42,28 +42,31 @@ def get_request_for_thread(): ...@@ -42,28 +42,31 @@ def get_request_for_thread():
del frame del frame
def get_course_by_id(course_id): def get_course_by_id(course_id, depth=0):
""" """
Given a course id, return the corresponding course descriptor. Given a course id, return the corresponding course descriptor.
If course_id is not valid, raises a 404. If course_id is not valid, raises a 404.
depth: The number of levels of children for the modulestore to cache. None means infinite depth
""" """
try: try:
course_loc = CourseDescriptor.id_to_location(course_id) course_loc = CourseDescriptor.id_to_location(course_id)
return modulestore().get_instance(course_id, course_loc) return modulestore().get_instance(course_id, course_loc, depth=depth)
except (KeyError, ItemNotFoundError): except (KeyError, ItemNotFoundError):
raise Http404("Course not found.") raise Http404("Course not found.")
def get_course_with_access(user, course_id, action): def get_course_with_access(user, course_id, action, depth=0):
""" """
Given a course_id, look up the corresponding course descriptor, Given a course_id, look up the corresponding course descriptor,
check that the user has the access to perform the specified action check that the user has the access to perform the specified action
on the course, and return the descriptor. on the course, and return the descriptor.
Raises a 404 if the course_id is invalid, or the user doesn't have access. Raises a 404 if the course_id is invalid, or the user doesn't have access.
depth: The number of levels of children for the modulestore to cache. None means infinite depth
""" """
course = get_course_by_id(course_id) course = get_course_by_id(course_id, depth=depth)
if not has_access(user, course, action): if not has_access(user, course, action):
# Deliberately return a non-specific error message to avoid # Deliberately return a non-specific error message to avoid
# leaking info about access control settings # leaking info about access control settings
......
...@@ -36,8 +36,7 @@ def yield_dynamic_descriptor_descendents(descriptor, module_creator): ...@@ -36,8 +36,7 @@ def yield_dynamic_descriptor_descendents(descriptor, module_creator):
def get_dynamic_descriptor_children(descriptor): def get_dynamic_descriptor_children(descriptor):
if descriptor.has_dynamic_children(): if descriptor.has_dynamic_children():
module = module_creator(descriptor) module = module_creator(descriptor)
child_locations = module.get_children_locations() return module.get_child_descriptors()
return [descriptor.system.load_item(child_location) for child_location in child_locations ]
else: else:
return descriptor.get_children() return descriptor.get_children()
...@@ -291,7 +290,7 @@ def progress_summary(student, request, course, student_module_cache): ...@@ -291,7 +290,7 @@ def progress_summary(student, request, course, student_module_cache):
graded = section_module.metadata.get('graded', False) graded = section_module.metadata.get('graded', False)
scores = [] scores = []
module_creator = lambda descriptor : section_module.system.get_module(descriptor.location) module_creator = section_module.system.get_module
for module_descriptor in yield_dynamic_descriptor_descendents(section_module.descriptor, module_creator): for module_descriptor in yield_dynamic_descriptor_descendents(section_module.descriptor, module_creator):
......
...@@ -82,7 +82,8 @@ def toc_for_course(user, request, course, active_chapter, active_section): ...@@ -82,7 +82,8 @@ def toc_for_course(user, request, course, active_chapter, active_section):
student_module_cache = StudentModuleCache.cache_for_descriptor_descendents( student_module_cache = StudentModuleCache.cache_for_descriptor_descendents(
course.id, user, course, depth=2) course.id, user, course, depth=2)
course_module = get_module(user, request, course.location, student_module_cache, course.id) course_module = get_module_for_descriptor(user, request, course,
student_module_cache, course.id)
if course_module is None: if course_module is None:
return None return None
...@@ -115,7 +116,9 @@ def toc_for_course(user, request, course, active_chapter, active_section): ...@@ -115,7 +116,9 @@ def toc_for_course(user, request, course, active_chapter, active_section):
return chapters return chapters
def get_module(user, request, location, student_module_cache, course_id, position=None, not_found_ok = False, wrap_xmodule_display = True): def get_module(user, request, location, student_module_cache, course_id,
position=None, not_found_ok=False, wrap_xmodule_display=True,
depth=0):
""" """
Get an instance of the xmodule class identified by location, Get an instance of the xmodule class identified by location,
setting the state based on an existing StudentModule, or creating one if none setting the state based on an existing StudentModule, or creating one if none
...@@ -130,13 +133,19 @@ def get_module(user, request, location, student_module_cache, course_id, positio ...@@ -130,13 +133,19 @@ def get_module(user, request, location, student_module_cache, course_id, positio
- course_id : the course_id in the context of which to load module - course_id : the course_id in the context of which to load module
- position : extra information from URL for user-specified - position : extra information from URL for user-specified
position within module position within module
- depth : number of levels of descendents to cache when loading this module.
None means cache all descendents
Returns: xmodule instance, or None if the user does not have access to the Returns: xmodule instance, or None if the user does not have access to the
module. If there's an error, will try to return an instance of ErrorModule module. If there's an error, will try to return an instance of ErrorModule
if possible. If not possible, return None. if possible. If not possible, return None.
""" """
try: try:
return _get_module(user, request, location, student_module_cache, course_id, position, wrap_xmodule_display) location = Location(location)
descriptor = modulestore().get_instance(course_id, location, depth=depth)
return get_module_for_descriptor(user, request, descriptor, student_module_cache, course_id,
position=position, not_found_ok=not_found_ok,
wrap_xmodule_display=wrap_xmodule_display)
except ItemNotFoundError: except ItemNotFoundError:
if not not_found_ok: if not not_found_ok:
log.exception("Error in get_module") log.exception("Error in get_module")
...@@ -146,12 +155,20 @@ def get_module(user, request, location, student_module_cache, course_id, positio ...@@ -146,12 +155,20 @@ def get_module(user, request, location, student_module_cache, course_id, positio
log.exception("Error in get_module") log.exception("Error in get_module")
return None return None
def _get_module(user, request, location, student_module_cache, course_id, position=None, wrap_xmodule_display = True):
def get_module_for_descriptor(user, request, descriptor, student_module_cache, course_id,
position=None, not_found_ok=False, wrap_xmodule_display=True):
"""
Actually implement get_module. See docstring there for details.
"""
return _get_module(user, request, descriptor, student_module_cache, course_id,
position=position, wrap_xmodule_display=wrap_xmodule_display)
def _get_module(user, request, descriptor, student_module_cache, course_id,
position=None, wrap_xmodule_display=True):
""" """
Actually implement get_module. See docstring there for details. Actually implement get_module. See docstring there for details.
""" """
location = Location(location)
descriptor = modulestore().get_instance(course_id, location)
# Short circuit--if the user shouldn't have access, bail without doing any work # Short circuit--if the user shouldn't have access, bail without doing any work
if not has_access(user, descriptor, 'load', course_id): if not has_access(user, descriptor, 'load', course_id):
...@@ -206,12 +223,12 @@ def _get_module(user, request, location, student_module_cache, course_id, positi ...@@ -206,12 +223,12 @@ def _get_module(user, request, location, student_module_cache, course_id, positi
'waittime': settings.XQUEUE_WAITTIME_BETWEEN_REQUESTS 'waittime': settings.XQUEUE_WAITTIME_BETWEEN_REQUESTS
} }
def inner_get_module(location): def inner_get_module(descriptor):
""" """
Delegate to get_module. It does an access check, so may return None Delegate to get_module. It does an access check, so may return None
""" """
return get_module(user, request, location, return get_module_for_descriptor(user, request, descriptor,
student_module_cache, course_id, position) student_module_cache, course_id, position)
# TODO (cpennington): When modules are shared between courses, the static # TODO (cpennington): When modules are shared between courses, the static
# prefix is going to have to be specific to the module, not the directory # prefix is going to have to be specific to the module, not the directory
...@@ -246,7 +263,7 @@ def _get_module(user, request, location, student_module_cache, course_id, positi ...@@ -246,7 +263,7 @@ def _get_module(user, request, location, student_module_cache, course_id, positi
# make an ErrorDescriptor -- assuming that the descriptor's system is ok # make an ErrorDescriptor -- assuming that the descriptor's system is ok
import_system = descriptor.system import_system = descriptor.system
if has_access(user, location, 'staff', course_id): if has_access(user, descriptor.location, 'staff', course_id):
err_descriptor = ErrorDescriptor.from_xml(str(descriptor), import_system, err_descriptor = ErrorDescriptor.from_xml(str(descriptor), import_system,
error_msg=exc_info_to_str(sys.exc_info())) error_msg=exc_info_to_str(sys.exc_info()))
else: else:
......
...@@ -20,7 +20,7 @@ from courseware.access import has_access ...@@ -20,7 +20,7 @@ from courseware.access import has_access
from courseware.courses import (get_courses, get_course_with_access, get_courses_by_university) from courseware.courses import (get_courses, get_course_with_access, get_courses_by_university)
import courseware.tabs as tabs import courseware.tabs as tabs
from courseware.models import StudentModuleCache from courseware.models import StudentModuleCache
from module_render import toc_for_course, get_module, get_instance_module from module_render import toc_for_course, get_module, get_instance_module, get_module_for_descriptor
from django_comment_client.utils import get_discussion_title from django_comment_client.utils import get_discussion_title
...@@ -180,7 +180,7 @@ def index(request, course_id, chapter=None, section=None, ...@@ -180,7 +180,7 @@ def index(request, course_id, chapter=None, section=None,
- HTTPresponse - HTTPresponse
""" """
course = get_course_with_access(request.user, course_id, 'load') course = get_course_with_access(request.user, course_id, 'load', depth=2)
staff_access = has_access(request.user, course, 'staff') staff_access = has_access(request.user, course, 'staff')
registered = registered_for_course(course, request.user) registered = registered_for_course(course, request.user)
if not registered: if not registered:
...@@ -195,7 +195,8 @@ def index(request, course_id, chapter=None, section=None, ...@@ -195,7 +195,8 @@ def index(request, course_id, chapter=None, section=None,
# Has this student been in this course before? # Has this student been in this course before?
first_time = student_module_cache.lookup(course_id, 'course', course.location.url()) is None first_time = student_module_cache.lookup(course_id, 'course', course.location.url()) is None
course_module = get_module(request.user, request, course.location, student_module_cache, course.id) # Load the module for the course
course_module = get_module_for_descriptor(request.user, request, course, student_module_cache, course.id)
if course_module is None: if course_module is None:
log.warning('If you see this, something went wrong: if we got this' log.warning('If you see this, something went wrong: if we got this'
' far, should have gotten a course module for this user') ' far, should have gotten a course module for this user')
...@@ -215,30 +216,28 @@ def index(request, course_id, chapter=None, section=None, ...@@ -215,30 +216,28 @@ def index(request, course_id, chapter=None, section=None,
'xqa_server': settings.MITX_FEATURES.get('USE_XQA_SERVER','http://xqa:server@content-qa.mitx.mit.edu/xqa') 'xqa_server': settings.MITX_FEATURES.get('USE_XQA_SERVER','http://xqa:server@content-qa.mitx.mit.edu/xqa')
} }
chapter_descriptor = course.get_child_by_url_name(chapter) chapter_descriptor = course.get_child_by(lambda m: m.url_name == chapter)
if chapter_descriptor is not None: if chapter_descriptor is not None:
instance_module = get_instance_module(course_id, request.user, course_module, student_module_cache) instance_module = get_instance_module(course_id, request.user, course_module, student_module_cache)
save_child_position(course_module, chapter, instance_module) save_child_position(course_module, chapter, instance_module)
else: else:
raise Http404 raise Http404
chapter_module = get_module(request.user, request, chapter_descriptor.location, chapter_module = course_module.get_child_by(lambda m: m.url_name == chapter)
student_module_cache, course_id)
if chapter_module is None: if chapter_module is None:
# User may be trying to access a chapter that isn't live yet # User may be trying to access a chapter that isn't live yet
raise Http404 raise Http404
if section is not None: if section is not None:
section_descriptor = chapter_descriptor.get_child_by_url_name(section) section_descriptor = chapter_descriptor.get_child_by(lambda m: m.url_name == section)
if section_descriptor is None: if section_descriptor is None:
# Specifically asked-for section doesn't exist # Specifically asked-for section doesn't exist
raise Http404 raise Http404
section_student_module_cache = StudentModuleCache.cache_for_descriptor_descendents( # Load all descendents of the section, because we're going to display it's
course_id, request.user, section_descriptor) # html, which in general will need all of its children
section_module = get_module(request.user, request, section_module = get_module(request.user, request, section_descriptor.location,
section_descriptor.location, student_module_cache, course.id, depth=None)
section_student_module_cache, course_id, position)
if section_module is None: if section_module is None:
# User may be trying to be clever and access something # User may be trying to be clever and access something
# they don't have access to. # they don't have access to.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment