Commit a4bbed3b by John Eskew

Add performance test which finds the BSON size of varying amounts

of asset metadata in both the old Mongo and Split modulestores.
parent 284e78c2
...@@ -6,12 +6,15 @@ Generates fake XML for asset metadata. ...@@ -6,12 +6,15 @@ Generates fake XML for asset metadata.
""" """
import random import random
#import click
from lxml import etree from lxml import etree
from datetime import datetime, timedelta from datetime import datetime, timedelta
from xmodule.assetstore import AssetMetadata from xmodule.assetstore import AssetMetadata
from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.keys import CourseKey
try:
import click
except ImportError:
click = None
# Name of the asset metadata XML schema definition file. # Name of the asset metadata XML schema definition file.
ASSET_XSD_FILE = 'assets.xsd' ASSET_XSD_FILE = 'assets.xsd'
...@@ -145,7 +148,7 @@ def generate_random_asset_md(): ...@@ -145,7 +148,7 @@ def generate_random_asset_md():
return AssetMetadata( return AssetMetadata(
asset_key, asset_key,
pathname=pathname(), pathname=pathname(),
internal_name=filename(), internal_name=str([filename() for __ in xrange(10)]),
locked=locked(), locked=locked(),
contenttype=contenttype(), contenttype=contenttype(),
thumbnail=filename(), thumbnail=filename(),
...@@ -199,33 +202,37 @@ def validate_xml(xsd_filename, xml_filename): ...@@ -199,33 +202,37 @@ def validate_xml(xsd_filename, xml_filename):
with open(xml_filename, 'r') as f: with open(xml_filename, 'r') as f:
etree.fromstring(f.read(), xmlparser) etree.fromstring(f.read(), xmlparser)
if click is not None:
# pylint: disable=bad-continuation
@click.command()
@click.option('--num_assets',
type=click.INT,
default=10,
help="Number of assets to be generated by the script.",
required=False
)
@click.option('--output_xml',
type=click.File('w'),
default=AssetMetadata.EXPORTED_ASSET_FILENAME,
help="Filename for the output XML file.",
required=False
)
@click.option('--input_xsd',
type=click.File('r'),
default=ASSET_XSD_FILE,
help="Filename for the XSD (schema) file to read in.",
required=False
)
def cli(num_assets, output_xml, input_xsd):
"""
Generates a number of fake asset metadata items as XML - and validates the XML against the schema.
"""
make_asset_xml(num_assets, output_xml)
# Now - validate the XML against the XSD.
validate_xml(input_xsd, output_xml)
# @click.command() if __name__ == '__main__':
# @click.option('--numAssets', if click is not None:
# type=click.INT, cli() # pylint: disable=no-value-for-parameter
# default=10, else:
# help="Number of assets to be generated by the script.", print "Aborted! Module 'click' is not installed."
# required=False
# )
# @click.option('--outputXml',
# type=click.File('w'),
# default=AssetMetadata.EXPORTED_ASSET_FILENAME,
# help="Filename for the output XML file.",
# required=False
# )
# @click.option('--inputXsd',
# type=click.File('r'),
# default=ASSET_XSD_FILE,
# help="Filename for the XSD (schema) file to read in.",
# required=False
# )
# def cli(numAssets, outputXml, inputXsd):
# """
# Generates a number of fake asset metadata items as XML - and validates the XML against the schema.
# """
# make_asset_xml(numAssets, outputXml)
# # Now - validate the XML against the XSD.
# validate_xml(inputXsd, outputXml)
# if __name__ == '__main__':
# cli()
...@@ -6,7 +6,8 @@ import unittest ...@@ -6,7 +6,8 @@ import unittest
from tempfile import mkdtemp from tempfile import mkdtemp
import itertools import itertools
from shutil import rmtree from shutil import rmtree
from bson.code import Code
import datetime
import ddt import ddt
#from nose.plugins.attrib import attr #from nose.plugins.attrib import attr
...@@ -31,7 +32,7 @@ except ImportError: ...@@ -31,7 +32,7 @@ except ImportError:
CodeBlockTimer = None CodeBlockTimer = None
# Number of assets saved in the modulestore per test run. # Number of assets saved in the modulestore per test run.
ASSET_AMOUNT_PER_TEST = (1, 10, 100, 1000, 10000) ASSET_AMOUNT_PER_TEST = (0, 1, 10, 100, 1000, 10000)
# Use only this course in asset metadata performance testing. # Use only this course in asset metadata performance testing.
COURSE_NAME = 'manual-testing-complete' COURSE_NAME = 'manual-testing-complete'
...@@ -160,7 +161,7 @@ class FindAssetTest(unittest.TestCase): ...@@ -160,7 +161,7 @@ class FindAssetTest(unittest.TestCase):
classes with different amounts of asset metadata. classes with different amounts of asset metadata.
""" """
# Use this attribute to skip this test on regular unittest CI runs. # Use this attr to skip this test on regular unittest CI runs.
perf_test = True perf_test = True
def setUp(self): def setUp(self):
...@@ -233,3 +234,79 @@ class FindAssetTest(unittest.TestCase): ...@@ -233,3 +234,79 @@ class FindAssetTest(unittest.TestCase):
__ = source_store.get_all_asset_metadata( __ = source_store.get_all_asset_metadata(
source_course_key, 'asset', start=start_middle, sort=sort, maxresults=50 source_course_key, 'asset', start=start_middle, sort=sort, maxresults=50
) )
@ddt.ddt
# Eventually, exclude this attribute from regular unittests while running *only* tests
# with this attribute during regular performance tests.
# @attr("perf_test")
@unittest.skip
class TestModulestoreAssetSize(unittest.TestCase):
"""
This class exists to measure the size of asset metadata in ifferent modulestore
classes with different amount of asset metadata.
"""
# Use this attribute to skip this test on regular unittest CI runs.
perf_test = True
test_run_time = datetime.datetime.now()
@ddt.data(*itertools.product(
MODULESTORE_SETUPS,
ASSET_AMOUNT_PER_TEST
))
@ddt.unpack
def test_asset_sizes(self, source_ms, num_assets):
"""
Generate timings for different amounts of asset metadata and different modulestores.
"""
# First, make the fake asset metadata.
make_asset_xml(num_assets, ASSET_XML_PATH)
validate_xml(ASSET_XSD_PATH, ASSET_XML_PATH)
# Construct the contentstore for storing the first import
with MongoContentstoreBuilder().build() as source_content:
# Construct the modulestore for storing the first import (using the previously created contentstore)
with source_ms.build(source_content) as source_store:
source_course_key = source_store.make_course_key('a', 'course', 'course')
import_from_xml(
source_store,
'test_user',
TEST_DATA_ROOT,
course_dirs=TEST_COURSE,
static_content_store=source_content,
target_course_id=source_course_key,
create_course_if_not_present=True,
raise_on_failure=True,
)
asset_collection = source_ms.asset_collection()
# Ensure the asset collection exists.
if asset_collection.name in asset_collection.database.collection_names():
# Map gets the size of each structure.
mapper = Code("""
function() { emit("size", (this == null) ? 0 : Object.bsonsize(this)) }
""")
# Reduce finds the largest structure size and returns only it.
reducer = Code("""
function(key, values) {
var max_size = 0;
for (var i=0; i < values.length; i++) {
if (values[i] > max_size) {
max_size = values[i];
}
}
return max_size;
}
""")
results = asset_collection.map_reduce(mapper, reducer, "size_results")
result_str = "{} - Store: {:<15} - Num Assets: {:>6} - Result: {}\n".format(
self.test_run_time, SHORT_NAME_MAP[source_ms], num_assets, [r for r in results.find()]
)
with open("bson_sizes.txt", "a") as f:
f.write(result_str)
...@@ -100,7 +100,8 @@ class MongoModulestoreBuilder(object): ...@@ -100,7 +100,8 @@ class MongoModulestoreBuilder(object):
# Set up a temp directory for storing filesystem content created during import # Set up a temp directory for storing filesystem content created during import
fs_root = mkdtemp() fs_root = mkdtemp()
modulestore = DraftModuleStore( # pylint: disable=attribute-defined-outside-init
self.modulestore = DraftModuleStore(
contentstore, contentstore,
doc_store_config, doc_store_config,
fs_root, fs_root,
...@@ -109,13 +110,13 @@ class MongoModulestoreBuilder(object): ...@@ -109,13 +110,13 @@ class MongoModulestoreBuilder(object):
metadata_inheritance_cache_subsystem=MemoryCache(), metadata_inheritance_cache_subsystem=MemoryCache(),
xblock_mixins=XBLOCK_MIXINS, xblock_mixins=XBLOCK_MIXINS,
) )
modulestore.ensure_indexes() self.modulestore.ensure_indexes()
try: try:
yield modulestore yield self.modulestore
finally: finally:
# Delete the created database # Delete the created database
modulestore._drop_database() self.modulestore._drop_database() # pylint: disable=protected-access
# Delete the created directory on the filesystem # Delete the created directory on the filesystem
rmtree(fs_root, ignore_errors=True) rmtree(fs_root, ignore_errors=True)
...@@ -123,6 +124,12 @@ class MongoModulestoreBuilder(object): ...@@ -123,6 +124,12 @@ class MongoModulestoreBuilder(object):
def __repr__(self): def __repr__(self):
return 'MongoModulestoreBuilder()' return 'MongoModulestoreBuilder()'
def asset_collection(self):
"""
Returns the collection storing the asset metadata.
"""
return self.modulestore.asset_collection
class VersioningModulestoreBuilder(object): class VersioningModulestoreBuilder(object):
""" """
...@@ -160,7 +167,7 @@ class VersioningModulestoreBuilder(object): ...@@ -160,7 +167,7 @@ class VersioningModulestoreBuilder(object):
yield modulestore yield modulestore
finally: finally:
# Delete the created database # Delete the created database
modulestore._drop_database() modulestore._drop_database() # pylint: disable=protected-access
# Delete the created directory on the filesystem # Delete the created directory on the filesystem
rmtree(fs_root, ignore_errors=True) rmtree(fs_root, ignore_errors=True)
...@@ -206,6 +213,7 @@ class MixedModulestoreBuilder(object): ...@@ -206,6 +213,7 @@ class MixedModulestoreBuilder(object):
""" """
self.store_builders = store_builders self.store_builders = store_builders
self.mappings = mappings or {} self.mappings = mappings or {}
self.modulestore = None
@contextmanager @contextmanager
def build(self, contentstore): def build(self, contentstore):
...@@ -227,7 +235,7 @@ class MixedModulestoreBuilder(object): ...@@ -227,7 +235,7 @@ class MixedModulestoreBuilder(object):
# Generate a fake list of stores to give the already generated stores appropriate names # Generate a fake list of stores to give the already generated stores appropriate names
stores = [{'NAME': name, 'ENGINE': 'This space deliberately left blank'} for name in names] stores = [{'NAME': name, 'ENGINE': 'This space deliberately left blank'} for name in names]
modulestore = MixedModuleStore( self.modulestore = MixedModuleStore(
contentstore, contentstore,
self.mappings, self.mappings,
stores, stores,
...@@ -235,11 +243,29 @@ class MixedModulestoreBuilder(object): ...@@ -235,11 +243,29 @@ class MixedModulestoreBuilder(object):
xblock_mixins=XBLOCK_MIXINS, xblock_mixins=XBLOCK_MIXINS,
) )
yield modulestore yield self.modulestore
def __repr__(self): def __repr__(self):
return 'MixedModulestoreBuilder({!r}, {!r})'.format(self.store_builders, self.mappings) return 'MixedModulestoreBuilder({!r}, {!r})'.format(self.store_builders, self.mappings)
def asset_collection(self):
"""
Returns the collection storing the asset metadata.
"""
all_stores = self.modulestore.modulestores
if len(all_stores) > 1:
return None
store = all_stores[0]
if hasattr(store, 'asset_collection'):
# Mongo modulestore beneath mixed.
# Returns the entire collection with *all* courses' asset metadata.
return store.asset_collection
else:
# Split modulestore beneath mixed.
# Split stores all asset metadata in the structure collection.
return store.db_connection.structures
class MongoContentstoreBuilder(object): class MongoContentstoreBuilder(object):
""" """
...@@ -276,7 +302,8 @@ MIXED_MODULESTORE_SETUPS = ( ...@@ -276,7 +302,8 @@ MIXED_MODULESTORE_SETUPS = (
MixedModulestoreBuilder([('split', VersioningModulestoreBuilder())]), MixedModulestoreBuilder([('split', VersioningModulestoreBuilder())]),
) )
MIXED_MS_SETUPS_SHORT = ( MIXED_MS_SETUPS_SHORT = (
'mixed_mongo', 'mixed_split' 'mixed_mongo',
'mixed_split',
) )
DIRECT_MODULESTORE_SETUPS = ( DIRECT_MODULESTORE_SETUPS = (
MongoModulestoreBuilder(), MongoModulestoreBuilder(),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment