Commit 4e7ed93b by John Eskew

Rollback eager definition loading.

Pass 'lazy' parameter down properly into low-level definition caching.
Adjust mongo call numbers in tests and add missing bulk_op wrapper.
Avoids the loading of all definitions in a large course when only
specific, filtered definitions are needed to be loaded for an endpoint.
parent c72cc5dc
...@@ -423,6 +423,7 @@ class SplitBulkWriteMixin(BulkOperationsMixin): ...@@ -423,6 +423,7 @@ class SplitBulkWriteMixin(BulkOperationsMixin):
ids.remove(definition_id) ids.remove(definition_id)
definitions.append(definition) definitions.append(definition)
if len(ids):
# Query the db for the definitions. # Query the db for the definitions.
defs_from_db = self.db_connection.get_definitions(list(ids)) defs_from_db = self.db_connection.get_definitions(list(ids))
# Add the retrieved definitions to the cache. # Add the retrieved definitions to the cache.
...@@ -683,7 +684,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -683,7 +684,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
connection.close() connection.close()
def cache_items(self, system, base_block_ids, course_key, depth=0, lazy=True): def cache_items(self, system, base_block_ids, course_key, depth=0, lazy=True):
''' """
Handles caching of items once inheritance and any other one time Handles caching of items once inheritance and any other one time
per course per fetch operations are done. per course per fetch operations are done.
...@@ -692,8 +693,8 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -692,8 +693,8 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
base_block_ids: list of BlockIds to fetch base_block_ids: list of BlockIds to fetch
course_key: the destination course providing the context course_key: the destination course providing the context
depth: how deep below these to prefetch depth: how deep below these to prefetch
lazy: whether to fetch definitions or use placeholders lazy: whether to load definitions now or later
''' """
with self.bulk_operations(course_key, emit_signals=False): with self.bulk_operations(course_key, emit_signals=False):
new_module_data = {} new_module_data = {}
for block_id in base_block_ids: for block_id in base_block_ids:
...@@ -704,13 +705,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -704,13 +705,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
new_module_data new_module_data
) )
# This code supports lazy loading, where the descendent definitions aren't loaded # This method supports lazy loading, where the descendent definitions aren't loaded
# until they're actually needed. # until they're actually needed.
# However, assume that depth == 0 means no depth is specified and depth != 0 means if not lazy:
# a depth *is* specified. If a non-zero depth is specified, force non-lazy definition
# loading in order to populate the definition cache for later access.
load_definitions_now = depth != 0 or not lazy
if load_definitions_now:
# Non-lazy loading: Load all descendants by id. # Non-lazy loading: Load all descendants by id.
descendent_definitions = self.get_definitions( descendent_definitions = self.get_definitions(
course_key, course_key,
...@@ -734,15 +731,16 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -734,15 +731,16 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
return system.module_data return system.module_data
@contract(course_entry=CourseEnvelope, block_keys="list(BlockKey)", depth="int | None") @contract(course_entry=CourseEnvelope, block_keys="list(BlockKey)", depth="int | None")
def _load_items(self, course_entry, block_keys, depth=0, lazy=True, **kwargs): def _load_items(self, course_entry, block_keys, depth=0, **kwargs):
''' """
Load & cache the given blocks from the course. May return the blocks in any order. Load & cache the given blocks from the course. May return the blocks in any order.
Load the definitions into each block if lazy is False; Load the definitions into each block if lazy is in kwargs and is False;
otherwise, use the lazy definition placeholder. otherwise, do not load the definitions - they'll be loaded later when needed.
''' """
runtime = self._get_cache(course_entry.structure['_id']) runtime = self._get_cache(course_entry.structure['_id'])
if runtime is None: if runtime is None:
lazy = kwargs.pop('lazy', True)
runtime = self.create_runtime(course_entry, lazy) runtime = self.create_runtime(course_entry, lazy)
self._add_cache(course_entry.structure['_id'], runtime) self._add_cache(course_entry.structure['_id'], runtime)
self.cache_items(runtime, block_keys, course_entry.course_key, depth, lazy) self.cache_items(runtime, block_keys, course_entry.course_key, depth, lazy)
...@@ -786,7 +784,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -786,7 +784,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
self.request_cache.data['course_cache'] = {} self.request_cache.data['course_cache'] = {}
def _lookup_course(self, course_key): def _lookup_course(self, course_key):
''' """
Decode the locator into the right series of db access. Does not Decode the locator into the right series of db access. Does not
return the CourseDescriptor! It returns the actual db json from return the CourseDescriptor! It returns the actual db json from
structures. structures.
...@@ -797,7 +795,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -797,7 +795,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
reference) reference)
:param course_key: any subclass of CourseLocator :param course_key: any subclass of CourseLocator
''' """
if course_key.org and course_key.course and course_key.run: if course_key.org and course_key.course and course_key.run:
if course_key.branch is None: if course_key.branch is None:
raise InsufficientSpecificationError(course_key) raise InsufficientSpecificationError(course_key)
...@@ -864,7 +862,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -864,7 +862,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
locator = locator_factory(structure_info, branch) locator = locator_factory(structure_info, branch)
envelope = CourseEnvelope(locator, entry) envelope = CourseEnvelope(locator, entry)
root = entry['root'] root = entry['root']
structures_list = self._load_items(envelope, [root], 0, lazy=True, **kwargs) structures_list = self._load_items(envelope, [root], depth=0, **kwargs)
if not isinstance(structures_list[0], ErrorDescriptor): if not isinstance(structures_list[0], ErrorDescriptor):
result.append(structures_list[0]) result.append(structures_list[0])
return result return result
...@@ -929,13 +927,13 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -929,13 +927,13 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
""" """
structure_entry = self._lookup_course(structure_id) structure_entry = self._lookup_course(structure_id)
root = structure_entry.structure['root'] root = structure_entry.structure['root']
result = self._load_items(structure_entry, [root], depth, lazy=True, **kwargs) result = self._load_items(structure_entry, [root], depth, **kwargs)
return result[0] return result[0]
def get_course(self, course_id, depth=0, **kwargs): def get_course(self, course_id, depth=0, **kwargs):
''' """
Gets the course descriptor for the course identified by the locator Gets the course descriptor for the course identified by the locator
''' """
if not isinstance(course_id, CourseLocator) or course_id.deprecated: if not isinstance(course_id, CourseLocator) or course_id.deprecated:
# The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore. # The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
raise ItemNotFoundError(course_id) raise ItemNotFoundError(course_id)
...@@ -951,14 +949,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -951,14 +949,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
return self._get_structure(library_id, depth, **kwargs) return self._get_structure(library_id, depth, **kwargs)
def has_course(self, course_id, ignore_case=False, **kwargs): def has_course(self, course_id, ignore_case=False, **kwargs):
''' """
Does this course exist in this modulestore. This method does not verify that the branch &/or Does this course exist in this modulestore. This method does not verify that the branch &/or
version in the course_id exists. Use get_course_index_info to check that. version in the course_id exists. Use get_course_index_info to check that.
Returns the course_id of the course if it was found, else None Returns the course_id of the course if it was found, else None
Note: we return the course_id instead of a boolean here since the found course may have Note: we return the course_id instead of a boolean here since the found course may have
a different id than the given course_id when ignore_case is True. a different id than the given course_id when ignore_case is True.
''' """
if not isinstance(course_id, CourseLocator) or course_id.deprecated: if not isinstance(course_id, CourseLocator) or course_id.deprecated:
# The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore. # The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
return False return False
...@@ -967,12 +965,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -967,12 +965,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
return CourseLocator(course_index['org'], course_index['course'], course_index['run'], course_id.branch) if course_index else None return CourseLocator(course_index['org'], course_index['course'], course_index['run'], course_id.branch) if course_index else None
def has_library(self, library_id, ignore_case=False, **kwargs): def has_library(self, library_id, ignore_case=False, **kwargs):
''' """
Does this library exist in this modulestore. This method does not verify that the branch &/or Does this library exist in this modulestore. This method does not verify that the branch &/or
version in the library_id exists. version in the library_id exists.
Returns the library_id of the course if it was found, else None. Returns the library_id of the course if it was found, else None.
''' """
if not isinstance(library_id, LibraryLocator): if not isinstance(library_id, LibraryLocator):
return None return None
...@@ -1017,7 +1015,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -1017,7 +1015,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
with self.bulk_operations(usage_key.course_key): with self.bulk_operations(usage_key.course_key):
course = self._lookup_course(usage_key.course_key) course = self._lookup_course(usage_key.course_key)
items = self._load_items(course, [BlockKey.from_usage_key(usage_key)], depth, lazy=True, **kwargs) items = self._load_items(course, [BlockKey.from_usage_key(usage_key)], depth, **kwargs)
if len(items) == 0: if len(items) == 0:
raise ItemNotFoundError(usage_key) raise ItemNotFoundError(usage_key)
elif len(items) > 1: elif len(items) > 1:
...@@ -1078,7 +1076,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -1078,7 +1076,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
if block_name == block_id.id and _block_matches_all(block): if block_name == block_id.id and _block_matches_all(block):
block_ids.append(block_id) block_ids.append(block_id)
return self._load_items(course, block_ids, lazy=True, **kwargs) return self._load_items(course, block_ids, **kwargs)
if 'category' in qualifiers: if 'category' in qualifiers:
qualifiers['block_type'] = qualifiers.pop('category') qualifiers['block_type'] = qualifiers.pop('category')
...@@ -1091,18 +1089,18 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -1091,18 +1089,18 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
items.append(block_id) items.append(block_id)
if len(items) > 0: if len(items) > 0:
return self._load_items(course, items, 0, lazy=True, **kwargs) return self._load_items(course, items, depth=0, **kwargs)
else: else:
return [] return []
def get_parent_location(self, locator, **kwargs): def get_parent_location(self, locator, **kwargs):
''' """
Return the location (Locators w/ block_ids) for the parent of this location in this Return the location (Locators w/ block_ids) for the parent of this location in this
course. Could use get_items(location, {'children': block_id}) but this is slightly faster. course. Could use get_items(location, {'children': block_id}) but this is slightly faster.
NOTE: the locator must contain the block_id, and this code does not actually ensure block_id exists NOTE: the locator must contain the block_id, and this code does not actually ensure block_id exists
:param locator: BlockUsageLocator restricting search scope :param locator: BlockUsageLocator restricting search scope
''' """
if not isinstance(locator, BlockUsageLocator) or locator.deprecated: if not isinstance(locator, BlockUsageLocator) or locator.deprecated:
# The supplied locator is of the wrong type, so it can't possibly be stored in this modulestore. # The supplied locator is of the wrong type, so it can't possibly be stored in this modulestore.
raise ItemNotFoundError(locator) raise ItemNotFoundError(locator)
...@@ -1208,12 +1206,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -1208,12 +1206,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
return definition['edit_info'] return definition['edit_info']
def get_course_successors(self, course_locator, version_history_depth=1): def get_course_successors(self, course_locator, version_history_depth=1):
''' """
Find the version_history_depth next versions of this course. Return as a VersionTree Find the version_history_depth next versions of this course. Return as a VersionTree
Mostly makes sense when course_locator uses a version_guid, but because it finds all relevant Mostly makes sense when course_locator uses a version_guid, but because it finds all relevant
next versions, these do include those created for other courses. next versions, these do include those created for other courses.
:param course_locator: :param course_locator:
''' """
if not isinstance(course_locator, CourseLocator) or course_locator.deprecated: if not isinstance(course_locator, CourseLocator) or course_locator.deprecated:
# The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore. # The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
raise ItemNotFoundError(course_locator) raise ItemNotFoundError(course_locator)
...@@ -1244,14 +1242,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -1244,14 +1242,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
return VersionTree(course_locator, result) return VersionTree(course_locator, result)
def get_block_generations(self, block_locator): def get_block_generations(self, block_locator):
''' """
Find the history of this block. Return as a VersionTree of each place the block changed (except Find the history of this block. Return as a VersionTree of each place the block changed (except
deletion). deletion).
The block's history tracks its explicit changes but not the changes in its children starting The block's history tracks its explicit changes but not the changes in its children starting
from when the block was created. from when the block was created.
''' """
# course_agnostic means we don't care if the head and version don't align, trust the version # course_agnostic means we don't care if the head and version don't align, trust the version
course_struct = self._lookup_course(block_locator.course_key.course_agnostic()).structure course_struct = self._lookup_course(block_locator.course_key.course_agnostic()).structure
block_key = BlockKey.from_usage_key(block_locator) block_key = BlockKey.from_usage_key(block_locator)
...@@ -1296,9 +1294,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -1296,9 +1294,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
) )
def get_definition_successors(self, definition_locator, version_history_depth=1): def get_definition_successors(self, definition_locator, version_history_depth=1):
''' """
Find the version_history_depth next versions of this definition. Return as a VersionTree Find the version_history_depth next versions of this definition. Return as a VersionTree
''' """
# TODO implement # TODO implement
pass pass
......
...@@ -87,14 +87,39 @@ class CountMongoCallsCourseTraversal(TestCase): ...@@ -87,14 +87,39 @@ class CountMongoCallsCourseTraversal(TestCase):
to the leaf nodes. to the leaf nodes.
""" """
# Suppose you want to traverse a course - maybe accessing the fields of each XBlock in the course,
# maybe not. What parameters should one use for get_course() in order to minimize the number of
# mongo calls? The tests below both ensure that code changes don't increase the number of mongo calls
# during traversal -and- demonstrate how to minimize the number of calls.
@ddt.data( @ddt.data(
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, 189), # The way this traversal *should* be done. # These two lines show the way this traversal *should* be done
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, 387), # The pathological case - do *not* query a course this way! # (if you'll eventually access all the fields and load all the definitions anyway).
(MIXED_SPLIT_MODULESTORE_BUILDER, None, 7), # The way this traversal *should* be done. # 'lazy' does not matter in old Mongo.
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, 145) # The pathological case - do *not* query a course this way! (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, False, True, 189),
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, True, True, 189),
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, False, True, 387),
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, True, True, 387),
# As shown in these two lines: whether or not the XBlock fields are accessed,
# the same number of mongo calls are made in old Mongo for depth=None.
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, False, False, 189),
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, True, False, 189),
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, False, False, 387),
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, True, False, 387),
# The line below shows the way this traversal *should* be done
# (if you'll eventually access all the fields and load all the definitions anyway).
(MIXED_SPLIT_MODULESTORE_BUILDER, None, False, True, 4),
(MIXED_SPLIT_MODULESTORE_BUILDER, None, True, True, 143),
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, False, True, 143),
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, True, True, 143),
(MIXED_SPLIT_MODULESTORE_BUILDER, None, False, False, 4),
(MIXED_SPLIT_MODULESTORE_BUILDER, None, True, False, 4),
# TODO: The call count below seems like a bug - should be 4?
# Seems to be related to using self.lazy in CachingDescriptorSystem.get_module_data().
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, False, False, 143),
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, True, False, 4)
) )
@ddt.unpack @ddt.unpack
def test_number_mongo_calls(self, store, depth, num_mongo_calls): def test_number_mongo_calls(self, store, depth, lazy, access_all_block_fields, num_mongo_calls):
with store.build() as (source_content, source_store): with store.build() as (source_content, source_store):
source_course_key = source_store.make_course_key('a', 'course', 'course') source_course_key = source_store.make_course_key('a', 'course', 'course')
...@@ -116,10 +141,20 @@ class CountMongoCallsCourseTraversal(TestCase): ...@@ -116,10 +141,20 @@ class CountMongoCallsCourseTraversal(TestCase):
# Starting at the root course block, do a breadth-first traversal using # Starting at the root course block, do a breadth-first traversal using
# get_children() to retrieve each block's children. # get_children() to retrieve each block's children.
with check_mongo_calls(num_mongo_calls): with check_mongo_calls(num_mongo_calls):
start_block = source_store.get_course(source_course_key, depth=depth) with source_store.bulk_operations(source_course_key):
start_block = source_store.get_course(source_course_key, depth=depth, lazy=lazy)
all_blocks = []
stack = [start_block] stack = [start_block]
while stack: while stack:
curr_block = stack.pop() curr_block = stack.pop()
all_blocks.append(curr_block)
if curr_block.has_children: if curr_block.has_children:
for block in reversed(curr_block.get_children()): for block in reversed(curr_block.get_children()):
stack.append(block) stack.append(block)
if access_all_block_fields:
# Read the fields on each block in order to ensure each block and its definition is loaded.
for xblock in all_blocks:
for __, field in xblock.fields.iteritems():
if field.is_set_on(xblock):
__ = field.read_from(xblock)
...@@ -405,7 +405,12 @@ class TestBulkWriteMixinFindMethods(TestBulkWriteMixin): ...@@ -405,7 +405,12 @@ class TestBulkWriteMixinFindMethods(TestBulkWriteMixin):
self.conn.get_definitions.return_value = db_definitions self.conn.get_definitions.return_value = db_definitions
results = self.bulk.get_definitions(self.course_key, search_ids) results = self.bulk.get_definitions(self.course_key, search_ids)
self.conn.get_definitions.assert_called_once_with(list(set(search_ids) - set(active_ids))) definitions_gotten = list(set(search_ids) - set(active_ids))
if len(definitions_gotten) > 0:
self.conn.get_definitions.assert_called_once_with(definitions_gotten)
else:
# If no definitions to get, then get_definitions() should *not* have been called.
self.assertEquals(self.conn.get_definitions.call_count, 0)
for _id in active_ids: for _id in active_ids:
if _id in search_ids: if _id in search_ids:
self.assertIn(active_definition(_id), results) self.assertIn(active_definition(_id), results)
......
...@@ -41,7 +41,12 @@ def export_to_xml(modulestore, contentstore, course_key, root_dir, course_dir): ...@@ -41,7 +41,12 @@ def export_to_xml(modulestore, contentstore, course_key, root_dir, course_dir):
with modulestore.bulk_operations(course_key): with modulestore.bulk_operations(course_key):
course = modulestore.get_course(course_key, depth=None) # None means infinite # depth = None: Traverses down the entire course structure.
# lazy = False: Loads and caches all block definitions during traversal for fast access later
# -and- to eliminate many round-trips to read individual definitions.
# Why these parameters? Because a course export needs to access all the course block information
# eventually. Accessing it all now at the beginning increases performance of the export.
course = modulestore.get_course(course_key, depth=None, lazy=False)
fsm = OSFS(root_dir) fsm = OSFS(root_dir)
export_fs = course.runtime.export_fs = fsm.makeopendir(course_dir) export_fs = course.runtime.export_fs = fsm.makeopendir(course_dir)
root_course_dir = root_dir + '/' + course_dir root_course_dir = root_dir + '/' + course_dir
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment