Commit d0c9daa4 by John Eskew

Merge pull request #6920 from edx/jeskew/eager_loading_xblocks_to_depth

Improve performance of import/export and course traversal with Split modulestore.
parents 3a28c570 2a4a89a9
...@@ -402,13 +402,18 @@ class SplitBulkWriteMixin(BulkOperationsMixin): ...@@ -402,13 +402,18 @@ class SplitBulkWriteMixin(BulkOperationsMixin):
bulk_write_record = self._get_bulk_ops_record(course_key) bulk_write_record = self._get_bulk_ops_record(course_key)
if bulk_write_record.active: if bulk_write_record.active:
# Only query for the definitions that aren't already cached.
for definition in bulk_write_record.definitions.values(): for definition in bulk_write_record.definitions.values():
definition_id = definition.get('_id') definition_id = definition.get('_id')
if definition_id in ids: if definition_id in ids:
ids.remove(definition_id) ids.remove(definition_id)
definitions.append(definition) definitions.append(definition)
definitions.extend(self.db_connection.get_definitions(list(ids))) # Query the db for the definitions.
defs_from_db = self.db_connection.get_definitions(list(ids))
# Add the retrieved definitions to the cache.
bulk_write_record.definitions.update({d.get('_id'): d for d in defs_from_db})
definitions.extend(defs_from_db)
return definitions return definitions
def update_definition(self, course_key, definition): def update_definition(self, course_key, definition):
...@@ -683,23 +688,29 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -683,23 +688,29 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
new_module_data new_module_data
) )
if not lazy: # This code supports lazy loading, where the descendent definitions aren't loaded
# Load all descendants by id # until they're actually needed.
# However, assume that depth == 0 means no depth is specified and depth != 0 means
# a depth *is* specified. If a non-zero depth is specified, force non-lazy definition
# loading in order to populate the definition cache for later access.
load_definitions_now = depth != 0 or not lazy
if load_definitions_now:
# Non-lazy loading: Load all descendants by id.
descendent_definitions = self.get_definitions( descendent_definitions = self.get_definitions(
course_key, course_key,
[ [
block['definition'] block.definition
for block in new_module_data.itervalues() for block in new_module_data.itervalues()
] ]
) )
# turn into a map # Turn definitions into a map.
definitions = {definition['_id']: definition definitions = {definition['_id']: definition
for definition in descendent_definitions} for definition in descendent_definitions}
for block in new_module_data.itervalues(): for block in new_module_data.itervalues():
if block.definition in definitions: if block.definition in definitions:
definition = definitions[block.definition] definition = definitions[block.definition]
# convert_fields was being done here, but it gets done later in the runtime's xblock_from_json # convert_fields gets done later in the runtime's xblock_from_json
block.fields.update(definition.get('fields')) block.fields.update(definition.get('fields'))
block.definition_loaded = True block.definition_loaded = True
......
...@@ -34,7 +34,7 @@ class TestAsidesXmlStore(TestCase): ...@@ -34,7 +34,7 @@ class TestAsidesXmlStore(TestCase):
""" """
Check that the xml modulestore read in all the asides with their values Check that the xml modulestore read in all the asides with their values
""" """
with XmlModulestoreBuilder().build(course_ids=['edX/aside_test/2012_Fall']) as store: with XmlModulestoreBuilder().build(course_ids=['edX/aside_test/2012_Fall']) as (__, store):
def check_block(block): def check_block(block):
""" """
Check whether block has the expected aside w/ its fields and then recurse to the block's children Check whether block has the expected aside w/ its fields and then recurse to the block's children
......
...@@ -76,12 +76,66 @@ class MemoryCache(object): ...@@ -76,12 +76,66 @@ class MemoryCache(object):
self._data[key] = value self._data[key] = value
class MongoModulestoreBuilder(object): class MongoContentstoreBuilder(object):
"""
A builder class for a MongoContentStore.
"""
@contextmanager
def build(self):
"""
A contextmanager that returns a MongoContentStore, and deletes its contents
when the context closes.
"""
contentstore = MongoContentStore(
db='contentstore{}'.format(random.randint(0, 10000)),
collection='content',
**COMMON_DOCSTORE_CONFIG
)
contentstore.ensure_indexes()
try:
yield contentstore
finally:
# Delete the created database
contentstore._drop_database() # pylint: disable=protected-access
def __repr__(self):
return 'MongoContentstoreBuilder()'
class StoreBuilderBase(object):
"""
Base class for all modulestore builders.
"""
@contextmanager
def build(self, **kwargs):
"""
Build the modulstore, optionally building the contentstore as well.
"""
contentstore = kwargs.pop('contentstore', None)
if not contentstore:
with self.build_without_contentstore() as (contentstore, modulestore):
yield contentstore, modulestore
else:
with self.build_with_contentstore(contentstore) as modulestore:
yield modulestore
@contextmanager
def build_without_contentstore(self):
"""
Build both the contentstore and the modulestore.
"""
with MongoContentstoreBuilder().build() as contentstore:
with self.build_with_contentstore(contentstore) as modulestore:
yield contentstore, modulestore
class MongoModulestoreBuilder(StoreBuilderBase):
""" """
A builder class for a DraftModuleStore. A builder class for a DraftModuleStore.
""" """
@contextmanager @contextmanager
def build(self, contentstore): def build_with_contentstore(self, contentstore):
""" """
A contextmanager that returns an isolated mongo modulestore, and then deletes A contextmanager that returns an isolated mongo modulestore, and then deletes
all of its data at the end of the context. all of its data at the end of the context.
...@@ -125,12 +179,12 @@ class MongoModulestoreBuilder(object): ...@@ -125,12 +179,12 @@ class MongoModulestoreBuilder(object):
return 'MongoModulestoreBuilder()' return 'MongoModulestoreBuilder()'
class VersioningModulestoreBuilder(object): class VersioningModulestoreBuilder(StoreBuilderBase):
""" """
A builder class for a VersioningModuleStore. A builder class for a VersioningModuleStore.
""" """
@contextmanager @contextmanager
def build(self, contentstore): def build_with_contentstore(self, contentstore):
""" """
A contextmanager that returns an isolated versioning modulestore, and then deletes A contextmanager that returns an isolated versioning modulestore, and then deletes
all of its data at the end of the context. all of its data at the end of the context.
...@@ -170,13 +224,13 @@ class VersioningModulestoreBuilder(object): ...@@ -170,13 +224,13 @@ class VersioningModulestoreBuilder(object):
return 'SplitModulestoreBuilder()' return 'SplitModulestoreBuilder()'
class XmlModulestoreBuilder(object): class XmlModulestoreBuilder(StoreBuilderBase):
""" """
A builder class for a XMLModuleStore. A builder class for a XMLModuleStore.
""" """
# pylint: disable=unused-argument # pylint: disable=unused-argument
@contextmanager @contextmanager
def build(self, contentstore=None, course_ids=None): def build_with_contentstore(self, contentstore=None, course_ids=None):
""" """
A contextmanager that returns an isolated xml modulestore A contextmanager that returns an isolated xml modulestore
...@@ -194,7 +248,7 @@ class XmlModulestoreBuilder(object): ...@@ -194,7 +248,7 @@ class XmlModulestoreBuilder(object):
yield modulestore yield modulestore
class MixedModulestoreBuilder(object): class MixedModulestoreBuilder(StoreBuilderBase):
""" """
A builder class for a MixedModuleStore. A builder class for a MixedModuleStore.
""" """
...@@ -210,7 +264,7 @@ class MixedModulestoreBuilder(object): ...@@ -210,7 +264,7 @@ class MixedModulestoreBuilder(object):
self.mixed_modulestore = None self.mixed_modulestore = None
@contextmanager @contextmanager
def build(self, contentstore): def build_with_contentstore(self, contentstore):
""" """
A contextmanager that returns a mixed modulestore built on top of modulestores A contextmanager that returns a mixed modulestore built on top of modulestores
generated by other builder classes. generated by other builder classes.
...@@ -221,7 +275,7 @@ class MixedModulestoreBuilder(object): ...@@ -221,7 +275,7 @@ class MixedModulestoreBuilder(object):
""" """
names, generators = zip(*self.store_builders) names, generators = zip(*self.store_builders)
with nested(*(gen.build(contentstore) for gen in generators)) as modulestores: with nested(*(gen.build_with_contentstore(contentstore) for gen in generators)) as modulestores:
# Make the modulestore creation function just return the already-created modulestores # Make the modulestore creation function just return the already-created modulestores
store_iterator = iter(modulestores) store_iterator = iter(modulestores)
create_modulestore_instance = lambda *args, **kwargs: store_iterator.next() create_modulestore_instance = lambda *args, **kwargs: store_iterator.next()
...@@ -261,32 +315,6 @@ class MixedModulestoreBuilder(object): ...@@ -261,32 +315,6 @@ class MixedModulestoreBuilder(object):
return store.db_connection.structures return store.db_connection.structures
class MongoContentstoreBuilder(object):
"""
A builder class for a MongoContentStore.
"""
@contextmanager
def build(self):
"""
A contextmanager that returns a MongoContentStore, and deletes its contents
when the context closes.
"""
contentstore = MongoContentStore(
db='contentstore{}'.format(random.randint(0, 10000)),
collection='content',
**COMMON_DOCSTORE_CONFIG
)
contentstore.ensure_indexes()
try:
yield contentstore
finally:
# Delete the created database
contentstore._drop_database()
def __repr__(self):
return 'MongoContentstoreBuilder()'
MIXED_MODULESTORE_BOTH_SETUP = MixedModulestoreBuilder([ MIXED_MODULESTORE_BOTH_SETUP = MixedModulestoreBuilder([
('draft', MongoModulestoreBuilder()), ('draft', MongoModulestoreBuilder()),
('split', VersioningModulestoreBuilder()) ('split', VersioningModulestoreBuilder())
...@@ -345,11 +373,11 @@ class CrossStoreXMLRoundtrip(CourseComparisonTest, PartitionTestCase): ...@@ -345,11 +373,11 @@ class CrossStoreXMLRoundtrip(CourseComparisonTest, PartitionTestCase):
# Construct the contentstore for storing the first import # Construct the contentstore for storing the first import
with source_content_builder.build() as source_content: with source_content_builder.build() as source_content:
# Construct the modulestore for storing the first import (using the previously created contentstore) # Construct the modulestore for storing the first import (using the previously created contentstore)
with source_builder.build(source_content) as source_store: with source_builder.build(contentstore=source_content) as source_store:
# Construct the contentstore for storing the second import # Construct the contentstore for storing the second import
with dest_content_builder.build() as dest_content: with dest_content_builder.build() as dest_content:
# Construct the modulestore for storing the second import (using the second contentstore) # Construct the modulestore for storing the second import (using the second contentstore)
with dest_builder.build(dest_content) as dest_store: with dest_builder.build(contentstore=dest_content) as dest_store:
source_course_key = source_store.make_course_key('a', 'course', 'course') source_course_key = source_store.make_course_key('a', 'course', 'course')
dest_course_key = dest_store.make_course_key('a', 'course', 'course') dest_course_key = dest_store.make_course_key('a', 'course', 'course')
......
"""
Tests to verify correct number of MongoDB calls during course import/export and traversal
when using the Split modulestore.
"""
from tempfile import mkdtemp
from shutil import rmtree
from unittest import TestCase
import ddt
from xmodule.modulestore.xml_importer import import_from_xml
from xmodule.modulestore.xml_exporter import export_to_xml
from xmodule.modulestore.tests.factories import check_mongo_calls
from xmodule.modulestore.tests.test_cross_modulestore_import_export import (
MixedModulestoreBuilder, VersioningModulestoreBuilder,
MongoModulestoreBuilder, TEST_DATA_DIR
)
MIXED_OLD_MONGO_MODULESTORE_BUILDER = MixedModulestoreBuilder([('draft', MongoModulestoreBuilder())])
MIXED_SPLIT_MODULESTORE_BUILDER = MixedModulestoreBuilder([('split', VersioningModulestoreBuilder())])
@ddt.ddt
class CountMongoCallsXMLRoundtrip(TestCase):
"""
This class exists to test XML import and export to/from Split.
"""
def setUp(self):
super(CountMongoCallsXMLRoundtrip, self).setUp()
self.export_dir = mkdtemp()
self.addCleanup(rmtree, self.export_dir, ignore_errors=True)
@ddt.data(
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 287, 780, 702, 702),
(MIXED_SPLIT_MODULESTORE_BUILDER, 37, 16, 190, 189),
)
@ddt.unpack
def test_import_export(self, store_builder, export_reads, import_reads, first_import_writes, second_import_writes):
with store_builder.build() as (source_content, source_store):
with store_builder.build() as (dest_content, dest_store):
source_course_key = source_store.make_course_key('a', 'course', 'course')
dest_course_key = dest_store.make_course_key('a', 'course', 'course')
# An extra import write occurs in the first Split import due to the mismatch between
# the course id and the wiki_slug in the test XML course. The course must be updated
# with the correct wiki_slug during import.
with check_mongo_calls(import_reads, first_import_writes):
import_from_xml(
source_store,
'test_user',
TEST_DATA_DIR,
course_dirs=['manual-testing-complete'],
static_content_store=source_content,
target_course_id=source_course_key,
create_course_if_not_present=True,
raise_on_failure=True,
)
with check_mongo_calls(export_reads):
export_to_xml(
source_store,
source_content,
source_course_key,
self.export_dir,
'exported_source_course',
)
with check_mongo_calls(import_reads, second_import_writes):
import_from_xml(
dest_store,
'test_user',
self.export_dir,
course_dirs=['exported_source_course'],
static_content_store=dest_content,
target_course_id=dest_course_key,
create_course_if_not_present=True,
raise_on_failure=True,
)
@ddt.ddt
class CountMongoCallsCourseTraversal(TestCase):
"""
Tests the number of Mongo calls made when traversing a course tree from the top course root
to the leaf nodes.
"""
@ddt.data(
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, 189), # The way this traversal *should* be done.
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, 387), # The pathological case - do *not* query a course this way!
(MIXED_SPLIT_MODULESTORE_BUILDER, None, 7), # The way this traversal *should* be done.
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, 145) # The pathological case - do *not* query a course this way!
)
@ddt.unpack
def test_number_mongo_calls(self, store, depth, num_mongo_calls):
with store.build() as (source_content, source_store):
source_course_key = source_store.make_course_key('a', 'course', 'course')
# First, import a course.
import_from_xml(
source_store,
'test_user',
TEST_DATA_DIR,
course_dirs=['manual-testing-complete'],
static_content_store=source_content,
target_course_id=source_course_key,
create_course_if_not_present=True,
raise_on_failure=True,
)
# Course traversal modeled after the traversal done here:
# lms/djangoapps/mobile_api/video_outlines/serializers.py:BlockOutline
# Starting at the root course block, do a breadth-first traversal using
# get_children() to retrieve each block's children.
with check_mongo_calls(num_mongo_calls):
start_block = source_store.get_course(source_course_key, depth=depth)
stack = [start_block]
while stack:
curr_block = stack.pop()
if curr_block.has_children:
for block in reversed(curr_block.get_children()):
stack.append(block)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment