Commit 8abba395 by chrisndodge

Merge pull request #1746 from MITx/feature/cale/metadata-inheritance-caching

Use get_many and set_many to cut down on the number of metadata trees to...
parents f70511eb b975d4d9
...@@ -556,7 +556,7 @@ class ContentStoreTest(ModuleStoreTestCase): ...@@ -556,7 +556,7 @@ class ContentStoreTest(ModuleStoreTestCase):
module_store.update_children(parent.location, parent.children + [new_component_location.url()]) module_store.update_children(parent.location, parent.children + [new_component_location.url()])
# flush the cache # flush the cache
module_store.get_cached_metadata_inheritance_tree(new_component_location, -1) module_store.refresh_cached_metadata_inheritance_tree(new_component_location)
new_module = module_store.get_item(new_component_location) new_module = module_store.get_item(new_component_location)
# check for grace period definition which should be defined at the course level # check for grace period definition which should be defined at the course level
...@@ -571,7 +571,7 @@ class ContentStoreTest(ModuleStoreTestCase): ...@@ -571,7 +571,7 @@ class ContentStoreTest(ModuleStoreTestCase):
module_store.update_metadata(new_module.location, own_metadata(new_module)) module_store.update_metadata(new_module.location, own_metadata(new_module))
# flush the cache and refetch # flush the cache and refetch
module_store.get_cached_metadata_inheritance_tree(new_component_location, -1) module_store.refresh_cached_metadata_inheritance_tree(new_component_location)
new_module = module_store.get_item(new_component_location) new_module = module_store.get_item(new_component_location)
self.assertEqual(timedelta(1), new_module.lms.graceperiod) self.assertEqual(timedelta(1), new_module.lms.graceperiod)
......
...@@ -9,6 +9,7 @@ from fs.osfs import OSFS ...@@ -9,6 +9,7 @@ from fs.osfs import OSFS
from itertools import repeat from itertools import repeat
from path import path from path import path
from datetime import datetime from datetime import datetime
from operator import attrgetter
from importlib import import_module from importlib import import_module
from xmodule.errortracker import null_error_tracker, exc_info_to_str from xmodule.errortracker import null_error_tracker, exc_info_to_str
...@@ -96,6 +97,7 @@ class MongoKeyValueStore(KeyValueStore): ...@@ -96,6 +97,7 @@ class MongoKeyValueStore(KeyValueStore):
else: else:
return False return False
MongoUsage = namedtuple('MongoUsage', 'id, def_id') MongoUsage = namedtuple('MongoUsage', 'id, def_id')
...@@ -107,7 +109,7 @@ class CachingDescriptorSystem(MakoDescriptorSystem): ...@@ -107,7 +109,7 @@ class CachingDescriptorSystem(MakoDescriptorSystem):
references to metadata_inheritance_tree references to metadata_inheritance_tree
""" """
def __init__(self, modulestore, module_data, default_class, resources_fs, def __init__(self, modulestore, module_data, default_class, resources_fs,
error_tracker, render_template, metadata_inheritance_tree = None): error_tracker, render_template, metadata_cache=None):
""" """
modulestore: the module store that can be used to retrieve additional modules modulestore: the module store that can be used to retrieve additional modules
...@@ -132,9 +134,12 @@ class CachingDescriptorSystem(MakoDescriptorSystem): ...@@ -132,9 +134,12 @@ class CachingDescriptorSystem(MakoDescriptorSystem):
# cdodge: other Systems have a course_id attribute defined. To keep things consistent, let's # cdodge: other Systems have a course_id attribute defined. To keep things consistent, let's
# define an attribute here as well, even though it's None # define an attribute here as well, even though it's None
self.course_id = None self.course_id = None
self.metadata_inheritance_tree = metadata_inheritance_tree self.metadata_cache = metadata_cache
def load_item(self, location): def load_item(self, location):
"""
Return an XModule instance for the specified location
"""
location = Location(location) location = Location(location)
json_data = self.module_data.get(location) json_data = self.module_data.get(location)
if json_data is None: if json_data is None:
...@@ -165,8 +170,8 @@ class CachingDescriptorSystem(MakoDescriptorSystem): ...@@ -165,8 +170,8 @@ class CachingDescriptorSystem(MakoDescriptorSystem):
model_data = DbModel(kvs, class_, None, MongoUsage(self.course_id, location)) model_data = DbModel(kvs, class_, None, MongoUsage(self.course_id, location))
module = class_(self, location, model_data) module = class_(self, location, model_data)
if self.metadata_inheritance_tree is not None: if self.metadata_cache is not None:
metadata_to_inherit = self.metadata_inheritance_tree.get('parent_metadata', {}).get(location.url(), {}) metadata_to_inherit = self.metadata_cache.get(metadata_cache_key(location), {}).get('parent_metadata', {}).get(location.url(), {})
inherit_metadata(module, metadata_to_inherit) inherit_metadata(module, metadata_to_inherit)
return module return module
except: except:
...@@ -196,16 +201,19 @@ def location_to_query(location, wildcard=True): ...@@ -196,16 +201,19 @@ def location_to_query(location, wildcard=True):
return query return query
def namedtuple_to_son(namedtuple, prefix=''): def namedtuple_to_son(ntuple, prefix=''):
""" """
Converts a namedtuple into a SON object with the same key order Converts a namedtuple into a SON object with the same key order
""" """
son = SON() son = SON()
for idx, field_name in enumerate(namedtuple._fields): for idx, field_name in enumerate(ntuple._fields):
son[prefix + field_name] = namedtuple[idx] son[prefix + field_name] = ntuple[idx]
return son return son
metadata_cache_key = attrgetter('org', 'course')
class MongoModuleStore(ModuleStoreBase): class MongoModuleStore(ModuleStoreBase):
""" """
A Mongodb backed ModuleStore A Mongodb backed ModuleStore
...@@ -228,7 +236,6 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -228,7 +236,6 @@ class MongoModuleStore(ModuleStoreBase):
if user is not None and password is not None: if user is not None and password is not None:
self.collection.database.authenticate(user, password) self.collection.database.authenticate(user, password)
# Force mongo to report errors, at the expense of performance # Force mongo to report errors, at the expense of performance
self.collection.safe = True self.collection.safe = True
...@@ -258,7 +265,7 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -258,7 +265,7 @@ class MongoModuleStore(ModuleStoreBase):
query = { query = {
'_id.org': location.org, '_id.org': location.org,
'_id.course': location.course, '_id.course': location.course,
'_id.category': {'$in': [ 'course', 'chapter', 'sequential', 'vertical']} '_id.category': {'$in': ['course', 'chapter', 'sequential', 'vertical']}
} }
# we just want the Location, children, and metadata # we just want the Location, children, and metadata
record_filter = {'_id': 1, 'definition.children': 1, 'metadata': 1} record_filter = {'_id': 1, 'definition.children': 1, 'metadata': 1}
...@@ -278,7 +285,11 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -278,7 +285,11 @@ class MongoModuleStore(ModuleStoreBase):
# now traverse the tree and compute down the inherited metadata # now traverse the tree and compute down the inherited metadata
metadata_to_inherit = {} metadata_to_inherit = {}
def _compute_inherited_metadata(url): def _compute_inherited_metadata(url):
"""
Helper method for computing inherited metadata for a specific location url
"""
my_metadata = {} my_metadata = {}
# check for presence of metadata key. Note that a given module may not yet be fully formed. # check for presence of metadata key. Note that a given module may not yet be fully formed.
# example: update_item -> update_children -> update_metadata sequence on new item create # example: update_item -> update_children -> update_metadata sequence on new item create
...@@ -293,7 +304,7 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -293,7 +304,7 @@ class MongoModuleStore(ModuleStoreBase):
# go through all the children and recurse, but only if we have # go through all the children and recurse, but only if we have
# in the result set. Remember results will not contain leaf nodes # in the result set. Remember results will not contain leaf nodes
for child in results_by_url[url].get('definition',{}).get('children',[]): for child in results_by_url[url].get('definition', {}).get('children', []):
if child in results_by_url: if child in results_by_url:
new_child_metadata = copy.deepcopy(my_metadata) new_child_metadata = copy.deepcopy(my_metadata)
new_child_metadata.update(results_by_url[child].get('metadata', {})) new_child_metadata.update(results_by_url[child].get('metadata', {}))
...@@ -304,42 +315,52 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -304,42 +315,52 @@ class MongoModuleStore(ModuleStoreBase):
# this is likely a leaf node, so let's record what metadata we need to inherit # this is likely a leaf node, so let's record what metadata we need to inherit
metadata_to_inherit[child] = my_metadata metadata_to_inherit[child] = my_metadata
if root is not None: if root is not None:
_compute_inherited_metadata(root) _compute_inherited_metadata(root)
return {'parent_metadata': metadata_to_inherit, return {'parent_metadata': metadata_to_inherit,
'timestamp' : datetime.now()} 'timestamp': datetime.now()}
def get_cached_metadata_inheritance_tree(self, location, force_refresh=False): def get_cached_metadata_inheritance_trees(self, locations, force_refresh=False):
''' '''
TODO (cdodge) This method can be deleted when the 'split module store' work has been completed TODO (cdodge) This method can be deleted when the 'split module store' work has been completed
''' '''
key_name = '{0}/{1}'.format(location.org, location.course)
tree = None trees = {}
if self.metadata_inheritance_cache is not None: if locations and self.metadata_inheritance_cache is not None and not force_refresh:
tree = self.metadata_inheritance_cache.get(key_name) trees = self.metadata_inheritance_cache.get_many(list(set([metadata_cache_key(loc) for loc in locations])))
else: else:
# This is to help guard against an accident prod runtime without a cache # This is to help guard against an accident prod runtime without a cache
logging.warning('Running MongoModuleStore without metadata_inheritance_cache. This should not happen in production!') logging.warning('Running MongoModuleStore without metadata_inheritance_cache. '
'This should not happen in production!')
if tree is None or force_refresh: to_cache = {}
tree = self.get_metadata_inheritance_tree(location) for loc in locations:
if self.metadata_inheritance_cache is not None: cache_key = metadata_cache_key(loc)
self.metadata_inheritance_cache.set(key_name, tree) if cache_key not in trees:
to_cache[cache_key] = trees[cache_key] = self.get_metadata_inheritance_tree(loc)
if to_cache and self.metadata_inheritance_cache is not None:
self.metadata_inheritance_cache.set_many(to_cache)
return tree return trees
def refresh_cached_metadata_inheritance_tree(self, location): def refresh_cached_metadata_inheritance_tree(self, location):
"""
Refresh the cached metadata inheritance tree for the org/course combination
for location
"""
pseudo_course_id = '/'.join([location.org, location.course]) pseudo_course_id = '/'.join([location.org, location.course])
if pseudo_course_id not in self.ignore_write_events_on_courses: if pseudo_course_id not in self.ignore_write_events_on_courses:
self.get_cached_metadata_inheritance_tree(location, force_refresh = True) self.get_cached_metadata_inheritance_trees([location], force_refresh=True)
def clear_cached_metadata_inheritance_tree(self, location): def clear_cached_metadata_inheritance_tree(self, location):
key_name = '{0}/{1}'.format(location.org, location.course) """
Delete the cached metadata inheritance tree for the org/course combination
for location
"""
if self.metadata_inheritance_cache is not None: if self.metadata_inheritance_cache is not None:
self.metadata_inheritance_cache.delete(key_name) self.metadata_inheritance_cache.delete(metadata_cache_key(location))
def _clean_item_data(self, item): def _clean_item_data(self, item):
""" """
...@@ -367,7 +388,7 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -367,7 +388,7 @@ class MongoModuleStore(ModuleStoreBase):
data[Location(item['location'])] = item data[Location(item['location'])] = item
if depth == 0: if depth == 0:
break; break
# Load all children by id. See # Load all children by id. See
# http://www.mongodb.org/display/DOCS/Advanced+Queries#AdvancedQueries-%24or # http://www.mongodb.org/display/DOCS/Advanced+Queries#AdvancedQueries-%24or
...@@ -385,7 +406,18 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -385,7 +406,18 @@ class MongoModuleStore(ModuleStoreBase):
return data return data
def _load_item(self, item, data_cache, should_apply_metadata_inheritence=True): def _cache_metadata_inheritance(self, items, depth, force_refresh=False):
"""
Retrieves all course metadata inheritance trees needed to load items
"""
locations = [
Location(item['location']) for item in items
if not (item['location']['category'] == 'course' and depth == 0)
]
return self.get_cached_metadata_inheritance_trees(locations, force_refresh=force_refresh)
def _load_item(self, item, data_cache, metadata_cache):
""" """
Load an XModuleDescriptor from item, using the children stored in data_cache Load an XModuleDescriptor from item, using the children stored in data_cache
""" """
...@@ -397,11 +429,6 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -397,11 +429,6 @@ class MongoModuleStore(ModuleStoreBase):
resource_fs = OSFS(root) resource_fs = OSFS(root)
metadata_inheritance_tree = None
if should_apply_metadata_inheritence:
metadata_inheritance_tree = self.get_cached_metadata_inheritance_tree(Location(item['location']))
# TODO (cdodge): When the 'split module store' work has been completed, we should remove # TODO (cdodge): When the 'split module store' work has been completed, we should remove
# the 'metadata_inheritance_tree' parameter # the 'metadata_inheritance_tree' parameter
system = CachingDescriptorSystem( system = CachingDescriptorSystem(
...@@ -411,7 +438,7 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -411,7 +438,7 @@ class MongoModuleStore(ModuleStoreBase):
resource_fs, resource_fs,
self.error_tracker, self.error_tracker,
self.render_template, self.render_template,
metadata_inheritance_tree = metadata_inheritance_tree metadata_cache,
) )
return system.load_item(item['location']) return system.load_item(item['location'])
...@@ -421,11 +448,11 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -421,11 +448,11 @@ class MongoModuleStore(ModuleStoreBase):
to specified depth to specified depth
""" """
data_cache = self._cache_children(items, depth) data_cache = self._cache_children(items, depth)
inheritance_cache = self._cache_metadata_inheritance(items, depth)
# if we are loading a course object, if we're not prefetching children (depth != 0) then don't # if we are loading a course object, if we're not prefetching children (depth != 0) then don't
# bother with the metadata inheritence # bother with the metadata inheritence
return [self._load_item(item, data_cache, return [self._load_item(item, data_cache, inheritance_cache) for item in items]
should_apply_metadata_inheritence=(item['location']['category'] != 'course' or depth != 0)) for item in items]
def get_courses(self): def get_courses(self):
''' '''
...@@ -559,7 +586,8 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -559,7 +586,8 @@ class MongoModuleStore(ModuleStoreBase):
raise Exception('Could not find course at {0}'.format(course_search_location)) raise Exception('Could not find course at {0}'.format(course_search_location))
if found_cnt > 1: if found_cnt > 1:
raise Exception('Found more than one course at {0}. There should only be one!!! Dump = {1}'.format(course_search_location, courses)) raise Exception('Found more than one course at {0}. There should only be one!!! '
'Dump = {1}'.format(course_search_location, courses))
return courses[0] return courses[0]
...@@ -675,4 +703,10 @@ class MongoModuleStore(ModuleStoreBase): ...@@ -675,4 +703,10 @@ class MongoModuleStore(ModuleStoreBase):
# DraftModuleStore is first, because it needs to intercept calls to MongoModuleStore # DraftModuleStore is first, because it needs to intercept calls to MongoModuleStore
class DraftMongoModuleStore(DraftModuleStore, MongoModuleStore): class DraftMongoModuleStore(DraftModuleStore, MongoModuleStore):
"""
Version of MongoModuleStore with draft capability mixed in
"""
"""
Version of MongoModuleStore with draft capability mixed in
"""
pass pass
import pymongo import pymongo
from nose.tools import assert_equals, assert_raises, assert_not_equals, with_setup from mock import Mock
from nose.tools import assert_equals, assert_raises, assert_not_equals, with_setup, assert_false
from pprint import pprint from pprint import pprint
from xmodule.modulestore import Location from xmodule.modulestore import Location
...@@ -102,3 +103,58 @@ class TestMongoModuleStore(object): ...@@ -102,3 +103,58 @@ class TestMongoModuleStore(object):
def test_path_to_location(self): def test_path_to_location(self):
'''Make sure that path_to_location works''' '''Make sure that path_to_location works'''
check_path_to_location(self.store) check_path_to_location(self.store)
def test_metadata_inheritance_query_count(self):
'''
When retrieving items from mongo, we should only query the cache a number of times
equal to the number of courses being retrieved from.
We should also not query
'''
self.store.metadata_inheritance_cache = Mock()
get_many = self.store.metadata_inheritance_cache.get_many
set_many = self.store.metadata_inheritance_cache.set_many
get_many.return_value = {('edX', 'toy'): {}}
self.store.get_item(Location("i4x://edX/toy/course/2012_Fall"), depth=0)
assert_false(get_many.called)
assert_false(set_many.called)
get_many.reset_mock()
self.store.get_item(Location("i4x://edX/toy/course/2012_Fall"), depth=3)
get_many.assert_called_with([('edX', 'toy')])
assert_equals(0, set_many.call_count)
get_many.reset_mock()
self.store.get_items(Location('i4x', 'edX', None, 'course', None), depth=0)
assert_false(get_many.called)
assert_false(set_many.called)
get_many.reset_mock()
self.store.get_items(Location('i4x', 'edX', None, 'course', None), depth=3)
assert_equals(1, get_many.call_count)
assert_equals([('edX', 'simple'), ('edX', 'toy')], sorted(get_many.call_args[0][0]))
assert_equals(1, set_many.call_count)
assert_equals([('edX', 'simple')], sorted(set_many.call_args[0][0].keys()))
get_many.reset_mock()
self.store.get_items(Location('i4x', 'edX', None, None, None), depth=0)
assert_equals(1, get_many.call_count)
assert_equals([('edX', 'simple'), ('edX', 'toy')], sorted(get_many.call_args[0][0]))
assert_equals(1, set_many.call_count)
assert_equals([('edX', 'simple')], sorted(set_many.call_args[0][0].keys()))
get_many.reset_mock()
def test_metadata_inheritance_query_count_forced_refresh(self):
self.store.metadata_inheritance_cache = Mock()
get_many = self.store.metadata_inheritance_cache.get_many
set_many = self.store.metadata_inheritance_cache.set_many
get_many.return_value = {('edX', 'toy'): {}}
self.store.get_cached_metadata_inheritance_trees(
[Location("i4x://edX/toy/course/2012_Fall"), Location("i4x://edX/simple/course/2012_Fall")],
True
)
assert_false(get_many.called)
assert_equals(1, set_many.call_count)
assert_equals([('edX', 'simple'), ('edX', 'toy')], sorted(set_many.call_args[0][0].keys()))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment