Commit 8abba395 by chrisndodge

Merge pull request #1746 from MITx/feature/cale/metadata-inheritance-caching

Use get_many and set_many to cut down on the number of metadata trees to...
parents f70511eb b975d4d9
......@@ -211,7 +211,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
new_loc = descriptor.location._replace(org='MITx', course='999')
print "Checking {0} should now also be at {1}".format(descriptor.location.url(), new_loc.url())
resp = self.client.get(reverse('edit_unit', kwargs={'location': new_loc.url()}))
self.assertEqual(resp.status_code, 200)
self.assertEqual(resp.status_code, 200)
def test_delete_course(self):
import_from_xml(modulestore(), 'common/test/data/', ['full'])
......@@ -328,11 +328,11 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
self.assertEqual(wrapper.counter, 4)
# make sure we pre-fetched a known sequential which should be at depth=2
self.assertTrue(Location(['i4x', 'edX', 'full', 'sequential',
self.assertTrue(Location(['i4x', 'edX', 'full', 'sequential',
'Administrivia_and_Circuit_Elements', None]) in course.system.module_data)
# make sure we don't have a specific vertical which should be at depth=3
self.assertFalse(Location(['i4x', 'edX', 'full', 'vertical', 'vertical_58',
self.assertFalse(Location(['i4x', 'edX', 'full', 'vertical', 'vertical_58',
None]) in course.system.module_data)
def test_export_course_with_unknown_metadata(self):
......@@ -556,7 +556,7 @@ class ContentStoreTest(ModuleStoreTestCase):
module_store.update_children(parent.location, parent.children + [new_component_location.url()])
# flush the cache
module_store.get_cached_metadata_inheritance_tree(new_component_location, -1)
module_store.refresh_cached_metadata_inheritance_tree(new_component_location)
new_module = module_store.get_item(new_component_location)
# check for grace period definition which should be defined at the course level
......@@ -571,7 +571,7 @@ class ContentStoreTest(ModuleStoreTestCase):
module_store.update_metadata(new_module.location, own_metadata(new_module))
# flush the cache and refetch
module_store.get_cached_metadata_inheritance_tree(new_component_location, -1)
module_store.refresh_cached_metadata_inheritance_tree(new_component_location)
new_module = module_store.get_item(new_component_location)
self.assertEqual(timedelta(1), new_module.lms.graceperiod)
......
......@@ -9,6 +9,7 @@ from fs.osfs import OSFS
from itertools import repeat
from path import path
from datetime import datetime
from operator import attrgetter
from importlib import import_module
from xmodule.errortracker import null_error_tracker, exc_info_to_str
......@@ -96,6 +97,7 @@ class MongoKeyValueStore(KeyValueStore):
else:
return False
MongoUsage = namedtuple('MongoUsage', 'id, def_id')
......@@ -107,7 +109,7 @@ class CachingDescriptorSystem(MakoDescriptorSystem):
references to metadata_inheritance_tree
"""
def __init__(self, modulestore, module_data, default_class, resources_fs,
error_tracker, render_template, metadata_inheritance_tree = None):
error_tracker, render_template, metadata_cache=None):
"""
modulestore: the module store that can be used to retrieve additional modules
......@@ -132,9 +134,12 @@ class CachingDescriptorSystem(MakoDescriptorSystem):
# cdodge: other Systems have a course_id attribute defined. To keep things consistent, let's
# define an attribute here as well, even though it's None
self.course_id = None
self.metadata_inheritance_tree = metadata_inheritance_tree
self.metadata_cache = metadata_cache
def load_item(self, location):
"""
Return an XModule instance for the specified location
"""
location = Location(location)
json_data = self.module_data.get(location)
if json_data is None:
......@@ -165,8 +170,8 @@ class CachingDescriptorSystem(MakoDescriptorSystem):
model_data = DbModel(kvs, class_, None, MongoUsage(self.course_id, location))
module = class_(self, location, model_data)
if self.metadata_inheritance_tree is not None:
metadata_to_inherit = self.metadata_inheritance_tree.get('parent_metadata', {}).get(location.url(), {})
if self.metadata_cache is not None:
metadata_to_inherit = self.metadata_cache.get(metadata_cache_key(location), {}).get('parent_metadata', {}).get(location.url(), {})
inherit_metadata(module, metadata_to_inherit)
return module
except:
......@@ -196,16 +201,19 @@ def location_to_query(location, wildcard=True):
return query
def namedtuple_to_son(namedtuple, prefix=''):
def namedtuple_to_son(ntuple, prefix=''):
"""
Converts a namedtuple into a SON object with the same key order
"""
son = SON()
for idx, field_name in enumerate(namedtuple._fields):
son[prefix + field_name] = namedtuple[idx]
for idx, field_name in enumerate(ntuple._fields):
son[prefix + field_name] = ntuple[idx]
return son
metadata_cache_key = attrgetter('org', 'course')
class MongoModuleStore(ModuleStoreBase):
"""
A Mongodb backed ModuleStore
......@@ -228,7 +236,6 @@ class MongoModuleStore(ModuleStoreBase):
if user is not None and password is not None:
self.collection.database.authenticate(user, password)
# Force mongo to report errors, at the expense of performance
self.collection.safe = True
......@@ -258,7 +265,7 @@ class MongoModuleStore(ModuleStoreBase):
query = {
'_id.org': location.org,
'_id.course': location.course,
'_id.category': {'$in': [ 'course', 'chapter', 'sequential', 'vertical']}
'_id.category': {'$in': ['course', 'chapter', 'sequential', 'vertical']}
}
# we just want the Location, children, and metadata
record_filter = {'_id': 1, 'definition.children': 1, 'metadata': 1}
......@@ -278,7 +285,11 @@ class MongoModuleStore(ModuleStoreBase):
# now traverse the tree and compute down the inherited metadata
metadata_to_inherit = {}
def _compute_inherited_metadata(url):
"""
Helper method for computing inherited metadata for a specific location url
"""
my_metadata = {}
# check for presence of metadata key. Note that a given module may not yet be fully formed.
# example: update_item -> update_children -> update_metadata sequence on new item create
......@@ -293,7 +304,7 @@ class MongoModuleStore(ModuleStoreBase):
# go through all the children and recurse, but only if we have
# in the result set. Remember results will not contain leaf nodes
for child in results_by_url[url].get('definition',{}).get('children',[]):
for child in results_by_url[url].get('definition', {}).get('children', []):
if child in results_by_url:
new_child_metadata = copy.deepcopy(my_metadata)
new_child_metadata.update(results_by_url[child].get('metadata', {}))
......@@ -304,42 +315,52 @@ class MongoModuleStore(ModuleStoreBase):
# this is likely a leaf node, so let's record what metadata we need to inherit
metadata_to_inherit[child] = my_metadata
if root is not None:
_compute_inherited_metadata(root)
return {'parent_metadata': metadata_to_inherit,
'timestamp' : datetime.now()}
'timestamp': datetime.now()}
def get_cached_metadata_inheritance_tree(self, location, force_refresh=False):
def get_cached_metadata_inheritance_trees(self, locations, force_refresh=False):
'''
TODO (cdodge) This method can be deleted when the 'split module store' work has been completed
'''
key_name = '{0}/{1}'.format(location.org, location.course)
tree = None
if self.metadata_inheritance_cache is not None:
tree = self.metadata_inheritance_cache.get(key_name)
trees = {}
if locations and self.metadata_inheritance_cache is not None and not force_refresh:
trees = self.metadata_inheritance_cache.get_many(list(set([metadata_cache_key(loc) for loc in locations])))
else:
# This is to help guard against an accident prod runtime without a cache
logging.warning('Running MongoModuleStore without metadata_inheritance_cache. This should not happen in production!')
logging.warning('Running MongoModuleStore without metadata_inheritance_cache. '
'This should not happen in production!')
to_cache = {}
for loc in locations:
cache_key = metadata_cache_key(loc)
if cache_key not in trees:
to_cache[cache_key] = trees[cache_key] = self.get_metadata_inheritance_tree(loc)
if tree is None or force_refresh:
tree = self.get_metadata_inheritance_tree(location)
if self.metadata_inheritance_cache is not None:
self.metadata_inheritance_cache.set(key_name, tree)
if to_cache and self.metadata_inheritance_cache is not None:
self.metadata_inheritance_cache.set_many(to_cache)
return tree
return trees
def refresh_cached_metadata_inheritance_tree(self, location):
"""
Refresh the cached metadata inheritance tree for the org/course combination
for location
"""
pseudo_course_id = '/'.join([location.org, location.course])
if pseudo_course_id not in self.ignore_write_events_on_courses:
self.get_cached_metadata_inheritance_tree(location, force_refresh = True)
self.get_cached_metadata_inheritance_trees([location], force_refresh=True)
def clear_cached_metadata_inheritance_tree(self, location):
key_name = '{0}/{1}'.format(location.org, location.course)
"""
Delete the cached metadata inheritance tree for the org/course combination
for location
"""
if self.metadata_inheritance_cache is not None:
self.metadata_inheritance_cache.delete(key_name)
self.metadata_inheritance_cache.delete(metadata_cache_key(location))
def _clean_item_data(self, item):
"""
......@@ -367,7 +388,7 @@ class MongoModuleStore(ModuleStoreBase):
data[Location(item['location'])] = item
if depth == 0:
break;
break
# Load all children by id. See
# http://www.mongodb.org/display/DOCS/Advanced+Queries#AdvancedQueries-%24or
......@@ -385,7 +406,18 @@ class MongoModuleStore(ModuleStoreBase):
return data
def _load_item(self, item, data_cache, should_apply_metadata_inheritence=True):
def _cache_metadata_inheritance(self, items, depth, force_refresh=False):
"""
Retrieves all course metadata inheritance trees needed to load items
"""
locations = [
Location(item['location']) for item in items
if not (item['location']['category'] == 'course' and depth == 0)
]
return self.get_cached_metadata_inheritance_trees(locations, force_refresh=force_refresh)
def _load_item(self, item, data_cache, metadata_cache):
"""
Load an XModuleDescriptor from item, using the children stored in data_cache
"""
......@@ -397,11 +429,6 @@ class MongoModuleStore(ModuleStoreBase):
resource_fs = OSFS(root)
metadata_inheritance_tree = None
if should_apply_metadata_inheritence:
metadata_inheritance_tree = self.get_cached_metadata_inheritance_tree(Location(item['location']))
# TODO (cdodge): When the 'split module store' work has been completed, we should remove
# the 'metadata_inheritance_tree' parameter
system = CachingDescriptorSystem(
......@@ -411,7 +438,7 @@ class MongoModuleStore(ModuleStoreBase):
resource_fs,
self.error_tracker,
self.render_template,
metadata_inheritance_tree = metadata_inheritance_tree
metadata_cache,
)
return system.load_item(item['location'])
......@@ -421,11 +448,11 @@ class MongoModuleStore(ModuleStoreBase):
to specified depth
"""
data_cache = self._cache_children(items, depth)
inheritance_cache = self._cache_metadata_inheritance(items, depth)
# if we are loading a course object, if we're not prefetching children (depth != 0) then don't
# bother with the metadata inheritence
return [self._load_item(item, data_cache,
should_apply_metadata_inheritence=(item['location']['category'] != 'course' or depth != 0)) for item in items]
# bother with the metadata inheritence
return [self._load_item(item, data_cache, inheritance_cache) for item in items]
def get_courses(self):
'''
......@@ -559,7 +586,8 @@ class MongoModuleStore(ModuleStoreBase):
raise Exception('Could not find course at {0}'.format(course_search_location))
if found_cnt > 1:
raise Exception('Found more than one course at {0}. There should only be one!!! Dump = {1}'.format(course_search_location, courses))
raise Exception('Found more than one course at {0}. There should only be one!!! '
'Dump = {1}'.format(course_search_location, courses))
return courses[0]
......@@ -631,7 +659,7 @@ class MongoModuleStore(ModuleStoreBase):
self._update_single_item(location, {'metadata': metadata})
# recompute (and update) the metadata inheritance tree which is cached
self.refresh_cached_metadata_inheritance_tree(loc)
self.refresh_cached_metadata_inheritance_tree(loc)
def delete_item(self, location):
"""
......@@ -654,7 +682,7 @@ class MongoModuleStore(ModuleStoreBase):
# from overriding our default value set in the init method.
safe=self.collection.safe)
# recompute (and update) the metadata inheritance tree which is cached
self.refresh_cached_metadata_inheritance_tree(Location(location))
self.refresh_cached_metadata_inheritance_tree(Location(location))
def get_parent_locations(self, location, course_id):
'''Find all locations that are the parents of this location in this
......@@ -675,4 +703,10 @@ class MongoModuleStore(ModuleStoreBase):
# DraftModuleStore is first, because it needs to intercept calls to MongoModuleStore
class DraftMongoModuleStore(DraftModuleStore, MongoModuleStore):
"""
Version of MongoModuleStore with draft capability mixed in
"""
"""
Version of MongoModuleStore with draft capability mixed in
"""
pass
import pymongo
from nose.tools import assert_equals, assert_raises, assert_not_equals, with_setup
from mock import Mock
from nose.tools import assert_equals, assert_raises, assert_not_equals, with_setup, assert_false
from pprint import pprint
from xmodule.modulestore import Location
......@@ -102,3 +103,58 @@ class TestMongoModuleStore(object):
def test_path_to_location(self):
'''Make sure that path_to_location works'''
check_path_to_location(self.store)
def test_metadata_inheritance_query_count(self):
'''
When retrieving items from mongo, we should only query the cache a number of times
equal to the number of courses being retrieved from.
We should also not query
'''
self.store.metadata_inheritance_cache = Mock()
get_many = self.store.metadata_inheritance_cache.get_many
set_many = self.store.metadata_inheritance_cache.set_many
get_many.return_value = {('edX', 'toy'): {}}
self.store.get_item(Location("i4x://edX/toy/course/2012_Fall"), depth=0)
assert_false(get_many.called)
assert_false(set_many.called)
get_many.reset_mock()
self.store.get_item(Location("i4x://edX/toy/course/2012_Fall"), depth=3)
get_many.assert_called_with([('edX', 'toy')])
assert_equals(0, set_many.call_count)
get_many.reset_mock()
self.store.get_items(Location('i4x', 'edX', None, 'course', None), depth=0)
assert_false(get_many.called)
assert_false(set_many.called)
get_many.reset_mock()
self.store.get_items(Location('i4x', 'edX', None, 'course', None), depth=3)
assert_equals(1, get_many.call_count)
assert_equals([('edX', 'simple'), ('edX', 'toy')], sorted(get_many.call_args[0][0]))
assert_equals(1, set_many.call_count)
assert_equals([('edX', 'simple')], sorted(set_many.call_args[0][0].keys()))
get_many.reset_mock()
self.store.get_items(Location('i4x', 'edX', None, None, None), depth=0)
assert_equals(1, get_many.call_count)
assert_equals([('edX', 'simple'), ('edX', 'toy')], sorted(get_many.call_args[0][0]))
assert_equals(1, set_many.call_count)
assert_equals([('edX', 'simple')], sorted(set_many.call_args[0][0].keys()))
get_many.reset_mock()
def test_metadata_inheritance_query_count_forced_refresh(self):
self.store.metadata_inheritance_cache = Mock()
get_many = self.store.metadata_inheritance_cache.get_many
set_many = self.store.metadata_inheritance_cache.set_many
get_many.return_value = {('edX', 'toy'): {}}
self.store.get_cached_metadata_inheritance_trees(
[Location("i4x://edX/toy/course/2012_Fall"), Location("i4x://edX/simple/course/2012_Fall")],
True
)
assert_false(get_many.called)
assert_equals(1, set_many.call_count)
assert_equals([('edX', 'simple'), ('edX', 'toy')], sorted(set_many.call_args[0][0].keys()))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment