Rollback eager definition loading.

Pass 'lazy' parameter down properly into low-level definition caching. Adjust mongo call numbers in tests and add missing bulk_op wrapper. Avoids the loading of all definitions in a large course when only specific, filtered definitions are needed to be loaded for an endpoint.

Rollback eager definition loading.
Pass 'lazy' parameter down properly into low-level definition caching. Adjust mongo call numbers in tests and add missing bulk_op wrapper. Avoids the loading of all definitions in a large course when only specific, filtered definitions are needed to be loaded for an endpoint.
4e7ed93b · John Eskew · c72cc5dc · 4e7ed93b · 4e7ed93b · 4e7ed93b
Commit 4e7ed93b authored Feb 18, 2015 by John Eskew
4 changed files
--- a/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py
+++ b/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py
@@ -423,6 +423,7 @@ class SplitBulkWriteMixin(BulkOperationsMixin):
                    ids.remove(definition_id)
                    definitions.append(definition)
+        if len(ids):
            # Query the db for the definitions.
            defs_from_db = self.db_connection.get_definitions(list(ids))
            # Add the retrieved definitions to the cache.
@@ -683,7 +684,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
        connection.close()
    def cache_items(self, system, base_block_ids, course_key, depth=0, lazy=True):
-        '''
+        """
        Handles caching of items once inheritance and any other one time
        per course per fetch operations are done.
@@ -692,8 +693,8 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
            base_block_ids: list of BlockIds to fetch
            course_key: the destination course providing the context
            depth: how deep below these to prefetch
-            lazy: whether to fetch definitions or use placeholders
+            lazy: whether to load definitions now or later
-        '''
+        """
        with self.bulk_operations(course_key, emit_signals=False):
            new_module_data = {}
            for block_id in base_block_ids:
@@ -704,13 +705,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
                    new_module_data
                )
-            # This code supports lazy loading, where the descendent definitions aren't loaded
+            # This method supports lazy loading, where the descendent definitions aren't loaded
            # until they're actually needed.
-            # However, assume that depth == 0 means no depth is specified and depth != 0 means
+            if not lazy:
-            # a depth *is* specified. If a non-zero depth is specified, force non-lazy definition
-            # loading in order to populate the definition cache for later access.
-            load_definitions_now = depth != 0 or not lazy
-            if load_definitions_now:
                # Non-lazy loading: Load all descendants by id.
                descendent_definitions = self.get_definitions(
                    course_key,
@@ -734,15 +731,16 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
            return system.module_data
    @contract(course_entry=CourseEnvelope, block_keys="list(BlockKey)", depth="int | None")
-    def _load_items(self, course_entry, block_keys, depth=0, lazy=True, **kwargs):
+    def _load_items(self, course_entry, block_keys, depth=0, **kwargs):
-        '''
+        """
        Load & cache the given blocks from the course. May return the blocks in any order.
-        Load the definitions into each block if lazy is False;
+        Load the definitions into each block if lazy is in kwargs and is False;
-        otherwise, use the lazy definition placeholder.
+        otherwise, do not load the definitions - they'll be loaded later when needed.
-        '''
+        """
        runtime = self._get_cache(course_entry.structure['_id'])
        if runtime is None:
+            lazy = kwargs.pop('lazy', True)
            runtime = self.create_runtime(course_entry, lazy)
            self._add_cache(course_entry.structure['_id'], runtime)
            self.cache_items(runtime, block_keys, course_entry.course_key, depth, lazy)
@@ -786,7 +784,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
            self.request_cache.data['course_cache'] = {}
    def _lookup_course(self, course_key):
-        '''
+        """
        Decode the locator into the right series of db access. Does not
        return the CourseDescriptor! It returns the actual db json from
        structures.
@@ -797,7 +795,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
        reference)
        :param course_key: any subclass of CourseLocator
-        '''
+        """
        if course_key.org and course_key.course and course_key.run:
            if course_key.branch is None:
                raise InsufficientSpecificationError(course_key)
@@ -864,7 +862,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
            locator = locator_factory(structure_info, branch)
            envelope = CourseEnvelope(locator, entry)
            root = entry['root']
-            structures_list = self._load_items(envelope, [root], 0, lazy=True, **kwargs)
+            structures_list = self._load_items(envelope, [root], depth=0, **kwargs)
            if not isinstance(structures_list[0], ErrorDescriptor):
                result.append(structures_list[0])
        return result
@@ -929,13 +927,13 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
        """
        structure_entry = self._lookup_course(structure_id)
        root = structure_entry.structure['root']
-        result = self._load_items(structure_entry, [root], depth, lazy=True, **kwargs)
+        result = self._load_items(structure_entry, [root], depth, **kwargs)
        return result[0]
    def get_course(self, course_id, depth=0, **kwargs):
-        '''
+        """
        Gets the course descriptor for the course identified by the locator
-        '''
+        """
        if not isinstance(course_id, CourseLocator) or course_id.deprecated:
            # The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
            raise ItemNotFoundError(course_id)
@@ -951,14 +949,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
        return self._get_structure(library_id, depth, **kwargs)
    def has_course(self, course_id, ignore_case=False, **kwargs):
-        '''
+        """
        Does this course exist in this modulestore. This method does not verify that the branch &/or
        version in the course_id exists. Use get_course_index_info to check that.
        Returns the course_id of the course if it was found, else None
        Note: we return the course_id instead of a boolean here since the found course may have
           a different id than the given course_id when ignore_case is True.
-        '''
+        """
        if not isinstance(course_id, CourseLocator) or course_id.deprecated:
            # The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
            return False
@@ -967,12 +965,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
        return CourseLocator(course_index['org'], course_index['course'], course_index['run'], course_id.branch) if course_index else None
    def has_library(self, library_id, ignore_case=False, **kwargs):
-        '''
+        """
        Does this library exist in this modulestore. This method does not verify that the branch &/or
        version in the library_id exists.
        Returns the library_id of the course if it was found, else None.
-        '''
+        """
        if not isinstance(library_id, LibraryLocator):
            return None
@@ -1017,7 +1015,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
        with self.bulk_operations(usage_key.course_key):
            course = self._lookup_course(usage_key.course_key)
-            items = self._load_items(course, [BlockKey.from_usage_key(usage_key)], depth, lazy=True, **kwargs)
+            items = self._load_items(course, [BlockKey.from_usage_key(usage_key)], depth, **kwargs)
            if len(items) == 0:
                raise ItemNotFoundError(usage_key)
            elif len(items) > 1:
@@ -1078,7 +1076,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
                if block_name == block_id.id and _block_matches_all(block):
                    block_ids.append(block_id)
-            return self._load_items(course, block_ids, lazy=True, **kwargs)
+            return self._load_items(course, block_ids, **kwargs)
        if 'category' in qualifiers:
            qualifiers['block_type'] = qualifiers.pop('category')
@@ -1091,18 +1089,18 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
                items.append(block_id)
        if len(items) > 0:
-            return self._load_items(course, items, 0, lazy=True, **kwargs)
+            return self._load_items(course, items, depth=0, **kwargs)
        else:
            return []
    def get_parent_location(self, locator, **kwargs):
-        '''
+        """
        Return the location (Locators w/ block_ids) for the parent of this location in this
        course. Could use get_items(location, {'children': block_id}) but this is slightly faster.
        NOTE: the locator must contain the block_id, and this code does not actually ensure block_id exists
        :param locator: BlockUsageLocator restricting search scope
-        '''
+        """
        if not isinstance(locator, BlockUsageLocator) or locator.deprecated:
            # The supplied locator is of the wrong type, so it can't possibly be stored in this modulestore.
            raise ItemNotFoundError(locator)
@@ -1208,12 +1206,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
        return definition['edit_info']
    def get_course_successors(self, course_locator, version_history_depth=1):
-        '''
+        """
        Find the version_history_depth next versions of this course. Return as a VersionTree
        Mostly makes sense when course_locator uses a version_guid, but because it finds all relevant
        next versions, these do include those created for other courses.
        :param course_locator:
-        '''
+        """
        if not isinstance(course_locator, CourseLocator) or course_locator.deprecated:
            # The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
            raise ItemNotFoundError(course_locator)
@@ -1244,14 +1242,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
        return VersionTree(course_locator, result)
    def get_block_generations(self, block_locator):
-        '''
+        """
        Find the history of this block. Return as a VersionTree of each place the block changed (except
        deletion).
        The block's history tracks its explicit changes but not the changes in its children starting
        from when the block was created.
-        '''
+        """
        # course_agnostic means we don't care if the head and version don't align, trust the version
        course_struct = self._lookup_course(block_locator.course_key.course_agnostic()).structure
        block_key = BlockKey.from_usage_key(block_locator)
@@ -1296,9 +1294,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
        )
    def get_definition_successors(self, definition_locator, version_history_depth=1):
-        '''
+        """
        Find the version_history_depth next versions of this definition. Return as a VersionTree
-        '''
+        """
        # TODO implement
        pass

--- a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo_call_count.py
+++ b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo_call_count.py
@@ -87,14 +87,39 @@ class CountMongoCallsCourseTraversal(TestCase):
    to the leaf nodes.
    """
+    # Suppose you want to traverse a course - maybe accessing the fields of each XBlock in the course,
+    # maybe not. What parameters should one use for get_course() in order to minimize the number of
+    # mongo calls? The tests below both ensure that code changes don't increase the number of mongo calls
+    # during traversal -and- demonstrate how to minimize the number of calls.
    @ddt.data(
-        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, 189),  # The way this traversal *should* be done.
+        # These two lines show the way this traversal *should* be done
-        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, 387),     # The pathological case - do *not* query a course this way!
+        # (if you'll eventually access all the fields and load all the definitions anyway).
-        (MIXED_SPLIT_MODULESTORE_BUILDER, None, 7),  # The way this traversal *should* be done.
+        # 'lazy' does not matter in old Mongo.
-        (MIXED_SPLIT_MODULESTORE_BUILDER, 0, 145)    # The pathological case - do *not* query a course this way!
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, False, True, 189),
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, True, True, 189),
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, False, True, 387),
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, True, True, 387),
+        # As shown in these two lines: whether or not the XBlock fields are accessed,
+        # the same number of mongo calls are made in old Mongo for depth=None.
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, False, False, 189),
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, True, False, 189),
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, False, False, 387),
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, True, False, 387),
+        # The line below shows the way this traversal *should* be done
+        # (if you'll eventually access all the fields and load all the definitions anyway).
+        (MIXED_SPLIT_MODULESTORE_BUILDER, None, False, True, 4),
+        (MIXED_SPLIT_MODULESTORE_BUILDER, None, True, True, 143),
+        (MIXED_SPLIT_MODULESTORE_BUILDER, 0, False, True, 143),
+        (MIXED_SPLIT_MODULESTORE_BUILDER, 0, True, True, 143),
+        (MIXED_SPLIT_MODULESTORE_BUILDER, None, False, False, 4),
+        (MIXED_SPLIT_MODULESTORE_BUILDER, None, True, False, 4),
+        # TODO: The call count below seems like a bug - should be 4?
+        # Seems to be related to using self.lazy in CachingDescriptorSystem.get_module_data().
+        (MIXED_SPLIT_MODULESTORE_BUILDER, 0, False, False, 143),
+        (MIXED_SPLIT_MODULESTORE_BUILDER, 0, True, False, 4)
    )
    @ddt.unpack
-    def test_number_mongo_calls(self, store, depth, num_mongo_calls):
+    def test_number_mongo_calls(self, store, depth, lazy, access_all_block_fields, num_mongo_calls):
        with store.build() as (source_content, source_store):
            source_course_key = source_store.make_course_key('a', 'course', 'course')
@@ -116,10 +141,20 @@ class CountMongoCallsCourseTraversal(TestCase):
            # Starting at the root course block, do a breadth-first traversal using
            # get_children() to retrieve each block's children.
            with check_mongo_calls(num_mongo_calls):
-                start_block = source_store.get_course(source_course_key, depth=depth)
+                with source_store.bulk_operations(source_course_key):
+                    start_block = source_store.get_course(source_course_key, depth=depth, lazy=lazy)
+                    all_blocks = []
                    stack = [start_block]
                    while stack:
                        curr_block = stack.pop()
+                        all_blocks.append(curr_block)
                        if curr_block.has_children:
                            for block in reversed(curr_block.get_children()):
                                stack.append(block)
+                    if access_all_block_fields:
+                        # Read the fields on each block in order to ensure each block and its definition is loaded.
+                        for xblock in all_blocks:
+                            for __, field in xblock.fields.iteritems():
+                                if field.is_set_on(xblock):
+                                    __ = field.read_from(xblock)
--- a/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore_bulk_operations.py
+++ b/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore_bulk_operations.py
@@ -405,7 +405,12 @@ class TestBulkWriteMixinFindMethods(TestBulkWriteMixin):
        self.conn.get_definitions.return_value = db_definitions
        results = self.bulk.get_definitions(self.course_key, search_ids)
-        self.conn.get_definitions.assert_called_once_with(list(set(search_ids) - set(active_ids)))
+        definitions_gotten = list(set(search_ids) - set(active_ids))
+        if len(definitions_gotten) > 0:
+            self.conn.get_definitions.assert_called_once_with(definitions_gotten)
+        else:
+            # If no definitions to get, then get_definitions() should *not* have been called.
+            self.assertEquals(self.conn.get_definitions.call_count, 0)
        for _id in active_ids:
            if _id in search_ids:
                self.assertIn(active_definition(_id), results)

--- a/common/lib/xmodule/xmodule/modulestore/xml_exporter.py
+++ b/common/lib/xmodule/xmodule/modulestore/xml_exporter.py
@@ -41,7 +41,12 @@ def export_to_xml(modulestore, contentstore, course_key, root_dir, course_dir):
    with modulestore.bulk_operations(course_key):
-        course = modulestore.get_course(course_key, depth=None)  # None means infinite
+        # depth = None: Traverses down the entire course structure.
+        # lazy = False: Loads and caches all block definitions during traversal for fast access later
+        #               -and- to eliminate many round-trips to read individual definitions.
+        # Why these parameters? Because a course export needs to access all the course block information
+        # eventually. Accessing it all now at the beginning increases performance of the export.
+        course = modulestore.get_course(course_key, depth=None, lazy=False)
        fsm = OSFS(root_dir)
        export_fs = course.runtime.export_fs = fsm.makeopendir(course_dir)
        root_course_dir = root_dir + '/' + course_dir