Commit 3ec3bbbe by Calen Pennington

Merge pull request #5162 from cpennington/split/import-export-performance

Performance improvements to split mongo and old-mongo, based on import/export tests.
parents 35d92f42 b9908ec2
...@@ -287,3 +287,10 @@ class ContentStore(object): ...@@ -287,3 +287,10 @@ class ContentStore(object):
logging.exception(u"Failed to generate thumbnail for {0}. Exception: {1}".format(content.location, str(e))) logging.exception(u"Failed to generate thumbnail for {0}. Exception: {1}".format(content.location, str(e)))
return thumbnail_content, thumbnail_file_location return thumbnail_content, thumbnail_file_location
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
"""
pass
...@@ -375,6 +375,36 @@ class MongoContentStore(ContentStore): ...@@ -375,6 +375,36 @@ class MongoContentStore(ContentStore):
fs_entry['_id'] = dbkey fs_entry['_id'] = dbkey
return dbkey return dbkey
def ensure_indexes(self):
# Index needed thru 'category' by `_get_all_content_for_course` and others. That query also takes a sort
# which can be `uploadDate`, `display_name`,
self.fs_files.create_index(
[('_id.org', pymongo.ASCENDING), ('_id.course', pymongo.ASCENDING), ('_id.name', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('content_son.org', pymongo.ASCENDING), ('content_son.course', pymongo.ASCENDING), ('content_son.name', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('_id.org', pymongo.ASCENDING), ('_id.course', pymongo.ASCENDING), ('uploadDate', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('_id.org', pymongo.ASCENDING), ('_id.course', pymongo.ASCENDING), ('display_name', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('content_son.org', pymongo.ASCENDING), ('content_son.course', pymongo.ASCENDING), ('uploadDate', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('content_son.org', pymongo.ASCENDING), ('content_son.course', pymongo.ASCENDING), ('display_name', pymongo.ASCENDING)],
sparse=True
)
def query_for_course(course_key, category=None): def query_for_course(course_key, category=None):
""" """
......
...@@ -486,6 +486,16 @@ class ModuleStoreRead(object): ...@@ -486,6 +486,16 @@ class ModuleStoreRead(object):
""" """
yield yield
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
pass
class ModuleStoreWrite(ModuleStoreRead): class ModuleStoreWrite(ModuleStoreRead):
""" """
......
...@@ -653,3 +653,14 @@ class MixedModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase): ...@@ -653,3 +653,14 @@ class MixedModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase):
store = self._get_modulestore_for_courseid(course_id) store = self._get_modulestore_for_courseid(course_id)
with store.bulk_operations(course_id): with store.bulk_operations(course_id):
yield yield
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
for store in self.modulestores:
store.ensure_indexes()
...@@ -1440,3 +1440,29 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo ...@@ -1440,3 +1440,29 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
return {ModuleStoreEnum.Type.mongo: True} return {ModuleStoreEnum.Type.mongo: True}
else: else:
raise HeartbeatFailure("Can't connect to {}".format(self.database.name), 'mongo') raise HeartbeatFailure("Can't connect to {}".format(self.database.name), 'mongo')
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
# Because we often query for some subset of the id, we define this index:
self.collection.create_index([
('_id.org', pymongo.ASCENDING),
('_id.course', pymongo.ASCENDING),
('_id.category', pymongo.ASCENDING),
('_id.name', pymongo.ASCENDING),
])
# Because we often scan for all category='course' regardless of the value of the other fields:
self.collection.create_index('_id.category')
# Because lms calls get_parent_locations frequently (for path generation):
self.collection.create_index('definition.children', sparse=True)
# To allow prioritizing draft vs published material
self.collection.create_index('_id.revision')
from opaque_keys.edx.locator import DefinitionLocator from opaque_keys.edx.locator import DefinitionLocator
from bson import SON
class DefinitionLazyLoader(object): class DefinitionLazyLoader(object):
...@@ -25,9 +24,3 @@ class DefinitionLazyLoader(object): ...@@ -25,9 +24,3 @@ class DefinitionLazyLoader(object):
loader pointer with the result so as not to fetch more than once loader pointer with the result so as not to fetch more than once
""" """
return self.modulestore.db_connection.get_definition(self.definition_locator.definition_id) return self.modulestore.db_connection.get_definition(self.definition_locator.definition_id)
def as_son(self):
return SON((
('block_type', self.definition_locator.block_type),
('definition', self.definition_locator.definition_id)
))
...@@ -82,7 +82,6 @@ class MongoConnection(object): ...@@ -82,7 +82,6 @@ class MongoConnection(object):
host=host, host=host,
port=port, port=port,
tz_aware=tz_aware, tz_aware=tz_aware,
document_class=son.SON,
**kwargs **kwargs
), ),
db db
...@@ -167,10 +166,10 @@ class MongoConnection(object): ...@@ -167,10 +166,10 @@ class MongoConnection(object):
""" """
case_regex = ur"(?i)^{}$" if ignore_case else ur"{}" case_regex = ur"(?i)^{}$" if ignore_case else ur"{}"
return self.course_index.find_one( return self.course_index.find_one(
son.SON([ {
(key_attr, re.compile(case_regex.format(getattr(key, key_attr)))) key_attr: re.compile(case_regex.format(getattr(key, key_attr)))
for key_attr in ('org', 'course', 'run') for key_attr in ('org', 'course', 'run')
]) }
) )
def find_matching_course_indexes(self, branch=None, search_targets=None): def find_matching_course_indexes(self, branch=None, search_targets=None):
...@@ -182,7 +181,7 @@ class MongoConnection(object): ...@@ -182,7 +181,7 @@ class MongoConnection(object):
search_targets: If specified, this must be a dictionary specifying field values search_targets: If specified, this must be a dictionary specifying field values
that must exist in the search_targets of the returned courses that must exist in the search_targets of the returned courses
""" """
query = son.SON() query = {}
if branch is not None: if branch is not None:
query['versions.{}'.format(branch)] = {'$exists': True} query['versions.{}'.format(branch)] = {'$exists': True}
...@@ -206,11 +205,11 @@ class MongoConnection(object): ...@@ -206,11 +205,11 @@ class MongoConnection(object):
from_index: If set, only update an index if it matches the one specified in `from_index`. from_index: If set, only update an index if it matches the one specified in `from_index`.
""" """
self.course_index.update( self.course_index.update(
from_index or son.SON([ from_index or {
('org', course_index['org']), 'org': course_index['org'],
('course', course_index['course']), 'course': course_index['course'],
('run', course_index['run']) 'run': course_index['run'],
]), },
course_index, course_index,
upsert=False, upsert=False,
) )
...@@ -219,11 +218,11 @@ class MongoConnection(object): ...@@ -219,11 +218,11 @@ class MongoConnection(object):
""" """
Delete the course_index from the persistence mechanism whose id is the given course_index Delete the course_index from the persistence mechanism whose id is the given course_index
""" """
return self.course_index.remove(son.SON([ return self.course_index.remove({
('org', course_index['org']), 'org': course_index['org'],
('course', course_index['course']), 'course': course_index['course'],
('run', course_index['run']) 'run': course_index['run'],
])) })
def get_definition(self, key): def get_definition(self, key):
""" """
...@@ -244,4 +243,20 @@ class MongoConnection(object): ...@@ -244,4 +243,20 @@ class MongoConnection(object):
""" """
self.definitions.insert(definition) self.definitions.insert(definition)
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
self.course_index.create_index(
[
('org', pymongo.ASCENDING),
('course', pymongo.ASCENDING),
('run', pymongo.ASCENDING)
],
unique=True
)
...@@ -2360,6 +2360,16 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -2360,6 +2360,16 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
services=self.services, services=self.services,
) )
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
self.db_connection.ensure_indexes()
class SparseList(list): class SparseList(list):
""" """
Enable inserting items into a list in arbitrary order and then retrieving them. Enable inserting items into a list in arbitrary order and then retrieving them.
......
...@@ -96,6 +96,7 @@ class MongoModulestoreBuilder(object): ...@@ -96,6 +96,7 @@ class MongoModulestoreBuilder(object):
branch_setting_func=lambda: ModuleStoreEnum.Branch.draft_preferred, branch_setting_func=lambda: ModuleStoreEnum.Branch.draft_preferred,
metadata_inheritance_cache_subsystem=MemoryCache(), metadata_inheritance_cache_subsystem=MemoryCache(),
) )
modulestore.ensure_indexes()
try: try:
yield modulestore yield modulestore
...@@ -139,6 +140,7 @@ class VersioningModulestoreBuilder(object): ...@@ -139,6 +140,7 @@ class VersioningModulestoreBuilder(object):
fs_root, fs_root,
render_template=repr, render_template=repr,
) )
modulestore.ensure_indexes()
try: try:
yield modulestore yield modulestore
...@@ -210,6 +212,7 @@ class MongoContentstoreBuilder(object): ...@@ -210,6 +212,7 @@ class MongoContentstoreBuilder(object):
collection='content', collection='content',
**COMMON_DOCSTORE_CONFIG **COMMON_DOCSTORE_CONFIG
) )
contentstore.ensure_indexes()
try: try:
yield contentstore yield contentstore
......
...@@ -310,7 +310,13 @@ class CourseComparisonTest(BulkAssertionTest): ...@@ -310,7 +310,13 @@ class CourseComparisonTest(BulkAssertionTest):
if actual_item is None and expected_item.location.category == 'course': if actual_item is None and expected_item.location.category == 'course':
actual_item_location = actual_item_location.replace(name='course') actual_item_location = actual_item_location.replace(name='course')
actual_item = actual_item_map.get(map_key(actual_item_location)) actual_item = actual_item_map.get(map_key(actual_item_location))
self.assertIsNotNone(actual_item, u'cannot find {} in {}'.format(map_key(actual_item_location), actual_item_map))
# Formatting the message slows down tests of large courses significantly, so only do it if it would be used
if actual_item is None:
msg = u'cannot find {} in {}'.format(map_key(actual_item_location), actual_item_map)
else:
msg = None
self.assertIsNotNone(actual_item, msg)
# compare fields # compare fields
self.assertEqual(expected_item.fields, actual_item.fields) self.assertEqual(expected_item.fields, actual_item.fields)
...@@ -328,17 +334,18 @@ class CourseComparisonTest(BulkAssertionTest): ...@@ -328,17 +334,18 @@ class CourseComparisonTest(BulkAssertionTest):
exp_value = map_references(field.read_from(expected_item), field, actual_course_key) exp_value = map_references(field.read_from(expected_item), field, actual_course_key)
actual_value = field.read_from(actual_item) actual_value = field.read_from(actual_item)
self.assertEqual( # Formatting the message slows down tests of large courses significantly, so only do it if it would be used
exp_value, if exp_value != actual_value:
actual_value, msg = "Field {!r} doesn't match between usages {} and {}: {!r} != {!r}".format(
"Field {!r} doesn't match between usages {} and {}: {!r} != {!r}".format(
field_name, field_name,
expected_item.scope_ids.usage_id, expected_item.scope_ids.usage_id,
actual_item.scope_ids.usage_id, actual_item.scope_ids.usage_id,
exp_value, exp_value,
actual_value, actual_value,
) )
) else:
msg = None
self.assertEqual(exp_value, actual_value, msg)
# compare children # compare children
self.assertEqual(expected_item.has_children, actual_item.has_children) self.assertEqual(expected_item.has_children, actual_item.has_children)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment