Commit 4e7ed93b by John Eskew

Rollback eager definition loading.

Pass 'lazy' parameter down properly into low-level definition caching.
Adjust mongo call numbers in tests and add missing bulk_op wrapper.
Avoids the loading of all definitions in a large course when only
specific, filtered definitions are needed to be loaded for an endpoint.
parent c72cc5dc
......@@ -423,6 +423,7 @@ class SplitBulkWriteMixin(BulkOperationsMixin):
ids.remove(definition_id)
definitions.append(definition)
if len(ids):
# Query the db for the definitions.
defs_from_db = self.db_connection.get_definitions(list(ids))
# Add the retrieved definitions to the cache.
......@@ -683,7 +684,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
connection.close()
def cache_items(self, system, base_block_ids, course_key, depth=0, lazy=True):
'''
"""
Handles caching of items once inheritance and any other one time
per course per fetch operations are done.
......@@ -692,8 +693,8 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
base_block_ids: list of BlockIds to fetch
course_key: the destination course providing the context
depth: how deep below these to prefetch
lazy: whether to fetch definitions or use placeholders
'''
lazy: whether to load definitions now or later
"""
with self.bulk_operations(course_key, emit_signals=False):
new_module_data = {}
for block_id in base_block_ids:
......@@ -704,13 +705,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
new_module_data
)
# This code supports lazy loading, where the descendent definitions aren't loaded
# This method supports lazy loading, where the descendent definitions aren't loaded
# until they're actually needed.
# However, assume that depth == 0 means no depth is specified and depth != 0 means
# a depth *is* specified. If a non-zero depth is specified, force non-lazy definition
# loading in order to populate the definition cache for later access.
load_definitions_now = depth != 0 or not lazy
if load_definitions_now:
if not lazy:
# Non-lazy loading: Load all descendants by id.
descendent_definitions = self.get_definitions(
course_key,
......@@ -734,15 +731,16 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
return system.module_data
@contract(course_entry=CourseEnvelope, block_keys="list(BlockKey)", depth="int | None")
def _load_items(self, course_entry, block_keys, depth=0, lazy=True, **kwargs):
'''
def _load_items(self, course_entry, block_keys, depth=0, **kwargs):
"""
Load & cache the given blocks from the course. May return the blocks in any order.
Load the definitions into each block if lazy is False;
otherwise, use the lazy definition placeholder.
'''
Load the definitions into each block if lazy is in kwargs and is False;
otherwise, do not load the definitions - they'll be loaded later when needed.
"""
runtime = self._get_cache(course_entry.structure['_id'])
if runtime is None:
lazy = kwargs.pop('lazy', True)
runtime = self.create_runtime(course_entry, lazy)
self._add_cache(course_entry.structure['_id'], runtime)
self.cache_items(runtime, block_keys, course_entry.course_key, depth, lazy)
......@@ -786,7 +784,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
self.request_cache.data['course_cache'] = {}
def _lookup_course(self, course_key):
'''
"""
Decode the locator into the right series of db access. Does not
return the CourseDescriptor! It returns the actual db json from
structures.
......@@ -797,7 +795,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
reference)
:param course_key: any subclass of CourseLocator
'''
"""
if course_key.org and course_key.course and course_key.run:
if course_key.branch is None:
raise InsufficientSpecificationError(course_key)
......@@ -864,7 +862,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
locator = locator_factory(structure_info, branch)
envelope = CourseEnvelope(locator, entry)
root = entry['root']
structures_list = self._load_items(envelope, [root], 0, lazy=True, **kwargs)
structures_list = self._load_items(envelope, [root], depth=0, **kwargs)
if not isinstance(structures_list[0], ErrorDescriptor):
result.append(structures_list[0])
return result
......@@ -929,13 +927,13 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
"""
structure_entry = self._lookup_course(structure_id)
root = structure_entry.structure['root']
result = self._load_items(structure_entry, [root], depth, lazy=True, **kwargs)
result = self._load_items(structure_entry, [root], depth, **kwargs)
return result[0]
def get_course(self, course_id, depth=0, **kwargs):
'''
"""
Gets the course descriptor for the course identified by the locator
'''
"""
if not isinstance(course_id, CourseLocator) or course_id.deprecated:
# The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
raise ItemNotFoundError(course_id)
......@@ -951,14 +949,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
return self._get_structure(library_id, depth, **kwargs)
def has_course(self, course_id, ignore_case=False, **kwargs):
'''
"""
Does this course exist in this modulestore. This method does not verify that the branch &/or
version in the course_id exists. Use get_course_index_info to check that.
Returns the course_id of the course if it was found, else None
Note: we return the course_id instead of a boolean here since the found course may have
a different id than the given course_id when ignore_case is True.
'''
"""
if not isinstance(course_id, CourseLocator) or course_id.deprecated:
# The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
return False
......@@ -967,12 +965,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
return CourseLocator(course_index['org'], course_index['course'], course_index['run'], course_id.branch) if course_index else None
def has_library(self, library_id, ignore_case=False, **kwargs):
'''
"""
Does this library exist in this modulestore. This method does not verify that the branch &/or
version in the library_id exists.
Returns the library_id of the course if it was found, else None.
'''
"""
if not isinstance(library_id, LibraryLocator):
return None
......@@ -1017,7 +1015,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
with self.bulk_operations(usage_key.course_key):
course = self._lookup_course(usage_key.course_key)
items = self._load_items(course, [BlockKey.from_usage_key(usage_key)], depth, lazy=True, **kwargs)
items = self._load_items(course, [BlockKey.from_usage_key(usage_key)], depth, **kwargs)
if len(items) == 0:
raise ItemNotFoundError(usage_key)
elif len(items) > 1:
......@@ -1078,7 +1076,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
if block_name == block_id.id and _block_matches_all(block):
block_ids.append(block_id)
return self._load_items(course, block_ids, lazy=True, **kwargs)
return self._load_items(course, block_ids, **kwargs)
if 'category' in qualifiers:
qualifiers['block_type'] = qualifiers.pop('category')
......@@ -1091,18 +1089,18 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
items.append(block_id)
if len(items) > 0:
return self._load_items(course, items, 0, lazy=True, **kwargs)
return self._load_items(course, items, depth=0, **kwargs)
else:
return []
def get_parent_location(self, locator, **kwargs):
'''
"""
Return the location (Locators w/ block_ids) for the parent of this location in this
course. Could use get_items(location, {'children': block_id}) but this is slightly faster.
NOTE: the locator must contain the block_id, and this code does not actually ensure block_id exists
:param locator: BlockUsageLocator restricting search scope
'''
"""
if not isinstance(locator, BlockUsageLocator) or locator.deprecated:
# The supplied locator is of the wrong type, so it can't possibly be stored in this modulestore.
raise ItemNotFoundError(locator)
......@@ -1208,12 +1206,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
return definition['edit_info']
def get_course_successors(self, course_locator, version_history_depth=1):
'''
"""
Find the version_history_depth next versions of this course. Return as a VersionTree
Mostly makes sense when course_locator uses a version_guid, but because it finds all relevant
next versions, these do include those created for other courses.
:param course_locator:
'''
"""
if not isinstance(course_locator, CourseLocator) or course_locator.deprecated:
# The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
raise ItemNotFoundError(course_locator)
......@@ -1244,14 +1242,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
return VersionTree(course_locator, result)
def get_block_generations(self, block_locator):
'''
"""
Find the history of this block. Return as a VersionTree of each place the block changed (except
deletion).
The block's history tracks its explicit changes but not the changes in its children starting
from when the block was created.
'''
"""
# course_agnostic means we don't care if the head and version don't align, trust the version
course_struct = self._lookup_course(block_locator.course_key.course_agnostic()).structure
block_key = BlockKey.from_usage_key(block_locator)
......@@ -1296,9 +1294,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
)
def get_definition_successors(self, definition_locator, version_history_depth=1):
'''
"""
Find the version_history_depth next versions of this definition. Return as a VersionTree
'''
"""
# TODO implement
pass
......
......@@ -87,14 +87,39 @@ class CountMongoCallsCourseTraversal(TestCase):
to the leaf nodes.
"""
# Suppose you want to traverse a course - maybe accessing the fields of each XBlock in the course,
# maybe not. What parameters should one use for get_course() in order to minimize the number of
# mongo calls? The tests below both ensure that code changes don't increase the number of mongo calls
# during traversal -and- demonstrate how to minimize the number of calls.
@ddt.data(
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, 189), # The way this traversal *should* be done.
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, 387), # The pathological case - do *not* query a course this way!
(MIXED_SPLIT_MODULESTORE_BUILDER, None, 7), # The way this traversal *should* be done.
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, 145) # The pathological case - do *not* query a course this way!
# These two lines show the way this traversal *should* be done
# (if you'll eventually access all the fields and load all the definitions anyway).
# 'lazy' does not matter in old Mongo.
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, False, True, 189),
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, True, True, 189),
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, False, True, 387),
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, True, True, 387),
# As shown in these two lines: whether or not the XBlock fields are accessed,
# the same number of mongo calls are made in old Mongo for depth=None.
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, False, False, 189),
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, True, False, 189),
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, False, False, 387),
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, True, False, 387),
# The line below shows the way this traversal *should* be done
# (if you'll eventually access all the fields and load all the definitions anyway).
(MIXED_SPLIT_MODULESTORE_BUILDER, None, False, True, 4),
(MIXED_SPLIT_MODULESTORE_BUILDER, None, True, True, 143),
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, False, True, 143),
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, True, True, 143),
(MIXED_SPLIT_MODULESTORE_BUILDER, None, False, False, 4),
(MIXED_SPLIT_MODULESTORE_BUILDER, None, True, False, 4),
# TODO: The call count below seems like a bug - should be 4?
# Seems to be related to using self.lazy in CachingDescriptorSystem.get_module_data().
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, False, False, 143),
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, True, False, 4)
)
@ddt.unpack
def test_number_mongo_calls(self, store, depth, num_mongo_calls):
def test_number_mongo_calls(self, store, depth, lazy, access_all_block_fields, num_mongo_calls):
with store.build() as (source_content, source_store):
source_course_key = source_store.make_course_key('a', 'course', 'course')
......@@ -116,10 +141,20 @@ class CountMongoCallsCourseTraversal(TestCase):
# Starting at the root course block, do a breadth-first traversal using
# get_children() to retrieve each block's children.
with check_mongo_calls(num_mongo_calls):
start_block = source_store.get_course(source_course_key, depth=depth)
with source_store.bulk_operations(source_course_key):
start_block = source_store.get_course(source_course_key, depth=depth, lazy=lazy)
all_blocks = []
stack = [start_block]
while stack:
curr_block = stack.pop()
all_blocks.append(curr_block)
if curr_block.has_children:
for block in reversed(curr_block.get_children()):
stack.append(block)
if access_all_block_fields:
# Read the fields on each block in order to ensure each block and its definition is loaded.
for xblock in all_blocks:
for __, field in xblock.fields.iteritems():
if field.is_set_on(xblock):
__ = field.read_from(xblock)
......@@ -405,7 +405,12 @@ class TestBulkWriteMixinFindMethods(TestBulkWriteMixin):
self.conn.get_definitions.return_value = db_definitions
results = self.bulk.get_definitions(self.course_key, search_ids)
self.conn.get_definitions.assert_called_once_with(list(set(search_ids) - set(active_ids)))
definitions_gotten = list(set(search_ids) - set(active_ids))
if len(definitions_gotten) > 0:
self.conn.get_definitions.assert_called_once_with(definitions_gotten)
else:
# If no definitions to get, then get_definitions() should *not* have been called.
self.assertEquals(self.conn.get_definitions.call_count, 0)
for _id in active_ids:
if _id in search_ids:
self.assertIn(active_definition(_id), results)
......
......@@ -41,7 +41,12 @@ def export_to_xml(modulestore, contentstore, course_key, root_dir, course_dir):
with modulestore.bulk_operations(course_key):
course = modulestore.get_course(course_key, depth=None) # None means infinite
# depth = None: Traverses down the entire course structure.
# lazy = False: Loads and caches all block definitions during traversal for fast access later
# -and- to eliminate many round-trips to read individual definitions.
# Why these parameters? Because a course export needs to access all the course block information
# eventually. Accessing it all now at the beginning increases performance of the export.
course = modulestore.get_course(course_key, depth=None, lazy=False)
fsm = OSFS(root_dir)
export_fs = course.runtime.export_fs = fsm.makeopendir(course_dir)
root_course_dir = root_dir + '/' + course_dir
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment