Commit a4bbed3b by John Eskew

Add performance test which finds the BSON size of varying amounts

of asset metadata in both the old Mongo and Split modulestores.
parent 284e78c2
......@@ -6,12 +6,15 @@ Generates fake XML for asset metadata.
"""
import random
#import click
from lxml import etree
from datetime import datetime, timedelta
from xmodule.assetstore import AssetMetadata
from opaque_keys.edx.keys import CourseKey
try:
import click
except ImportError:
click = None
# Name of the asset metadata XML schema definition file.
ASSET_XSD_FILE = 'assets.xsd'
......@@ -145,7 +148,7 @@ def generate_random_asset_md():
return AssetMetadata(
asset_key,
pathname=pathname(),
internal_name=filename(),
internal_name=str([filename() for __ in xrange(10)]),
locked=locked(),
contenttype=contenttype(),
thumbnail=filename(),
......@@ -199,33 +202,37 @@ def validate_xml(xsd_filename, xml_filename):
with open(xml_filename, 'r') as f:
etree.fromstring(f.read(), xmlparser)
# @click.command()
# @click.option('--numAssets',
# type=click.INT,
# default=10,
# help="Number of assets to be generated by the script.",
# required=False
# )
# @click.option('--outputXml',
# type=click.File('w'),
# default=AssetMetadata.EXPORTED_ASSET_FILENAME,
# help="Filename for the output XML file.",
# required=False
# )
# @click.option('--inputXsd',
# type=click.File('r'),
# default=ASSET_XSD_FILE,
# help="Filename for the XSD (schema) file to read in.",
# required=False
# )
# def cli(numAssets, outputXml, inputXsd):
# """
# Generates a number of fake asset metadata items as XML - and validates the XML against the schema.
# """
# make_asset_xml(numAssets, outputXml)
# # Now - validate the XML against the XSD.
# validate_xml(inputXsd, outputXml)
# if __name__ == '__main__':
# cli()
if click is not None:
# pylint: disable=bad-continuation
@click.command()
@click.option('--num_assets',
type=click.INT,
default=10,
help="Number of assets to be generated by the script.",
required=False
)
@click.option('--output_xml',
type=click.File('w'),
default=AssetMetadata.EXPORTED_ASSET_FILENAME,
help="Filename for the output XML file.",
required=False
)
@click.option('--input_xsd',
type=click.File('r'),
default=ASSET_XSD_FILE,
help="Filename for the XSD (schema) file to read in.",
required=False
)
def cli(num_assets, output_xml, input_xsd):
"""
Generates a number of fake asset metadata items as XML - and validates the XML against the schema.
"""
make_asset_xml(num_assets, output_xml)
# Now - validate the XML against the XSD.
validate_xml(input_xsd, output_xml)
if __name__ == '__main__':
if click is not None:
cli() # pylint: disable=no-value-for-parameter
else:
print "Aborted! Module 'click' is not installed."
......@@ -6,7 +6,8 @@ import unittest
from tempfile import mkdtemp
import itertools
from shutil import rmtree
from bson.code import Code
import datetime
import ddt
#from nose.plugins.attrib import attr
......@@ -31,7 +32,7 @@ except ImportError:
CodeBlockTimer = None
# Number of assets saved in the modulestore per test run.
ASSET_AMOUNT_PER_TEST = (1, 10, 100, 1000, 10000)
ASSET_AMOUNT_PER_TEST = (0, 1, 10, 100, 1000, 10000)
# Use only this course in asset metadata performance testing.
COURSE_NAME = 'manual-testing-complete'
......@@ -160,7 +161,7 @@ class FindAssetTest(unittest.TestCase):
classes with different amounts of asset metadata.
"""
# Use this attribute to skip this test on regular unittest CI runs.
# Use this attr to skip this test on regular unittest CI runs.
perf_test = True
def setUp(self):
......@@ -233,3 +234,79 @@ class FindAssetTest(unittest.TestCase):
__ = source_store.get_all_asset_metadata(
source_course_key, 'asset', start=start_middle, sort=sort, maxresults=50
)
@ddt.ddt
# Eventually, exclude this attribute from regular unittests while running *only* tests
# with this attribute during regular performance tests.
# @attr("perf_test")
@unittest.skip
class TestModulestoreAssetSize(unittest.TestCase):
"""
This class exists to measure the size of asset metadata in ifferent modulestore
classes with different amount of asset metadata.
"""
# Use this attribute to skip this test on regular unittest CI runs.
perf_test = True
test_run_time = datetime.datetime.now()
@ddt.data(*itertools.product(
MODULESTORE_SETUPS,
ASSET_AMOUNT_PER_TEST
))
@ddt.unpack
def test_asset_sizes(self, source_ms, num_assets):
"""
Generate timings for different amounts of asset metadata and different modulestores.
"""
# First, make the fake asset metadata.
make_asset_xml(num_assets, ASSET_XML_PATH)
validate_xml(ASSET_XSD_PATH, ASSET_XML_PATH)
# Construct the contentstore for storing the first import
with MongoContentstoreBuilder().build() as source_content:
# Construct the modulestore for storing the first import (using the previously created contentstore)
with source_ms.build(source_content) as source_store:
source_course_key = source_store.make_course_key('a', 'course', 'course')
import_from_xml(
source_store,
'test_user',
TEST_DATA_ROOT,
course_dirs=TEST_COURSE,
static_content_store=source_content,
target_course_id=source_course_key,
create_course_if_not_present=True,
raise_on_failure=True,
)
asset_collection = source_ms.asset_collection()
# Ensure the asset collection exists.
if asset_collection.name in asset_collection.database.collection_names():
# Map gets the size of each structure.
mapper = Code("""
function() { emit("size", (this == null) ? 0 : Object.bsonsize(this)) }
""")
# Reduce finds the largest structure size and returns only it.
reducer = Code("""
function(key, values) {
var max_size = 0;
for (var i=0; i < values.length; i++) {
if (values[i] > max_size) {
max_size = values[i];
}
}
return max_size;
}
""")
results = asset_collection.map_reduce(mapper, reducer, "size_results")
result_str = "{} - Store: {:<15} - Num Assets: {:>6} - Result: {}\n".format(
self.test_run_time, SHORT_NAME_MAP[source_ms], num_assets, [r for r in results.find()]
)
with open("bson_sizes.txt", "a") as f:
f.write(result_str)
......@@ -100,7 +100,8 @@ class MongoModulestoreBuilder(object):
# Set up a temp directory for storing filesystem content created during import
fs_root = mkdtemp()
modulestore = DraftModuleStore(
# pylint: disable=attribute-defined-outside-init
self.modulestore = DraftModuleStore(
contentstore,
doc_store_config,
fs_root,
......@@ -109,13 +110,13 @@ class MongoModulestoreBuilder(object):
metadata_inheritance_cache_subsystem=MemoryCache(),
xblock_mixins=XBLOCK_MIXINS,
)
modulestore.ensure_indexes()
self.modulestore.ensure_indexes()
try:
yield modulestore
yield self.modulestore
finally:
# Delete the created database
modulestore._drop_database()
self.modulestore._drop_database() # pylint: disable=protected-access
# Delete the created directory on the filesystem
rmtree(fs_root, ignore_errors=True)
......@@ -123,6 +124,12 @@ class MongoModulestoreBuilder(object):
def __repr__(self):
return 'MongoModulestoreBuilder()'
def asset_collection(self):
"""
Returns the collection storing the asset metadata.
"""
return self.modulestore.asset_collection
class VersioningModulestoreBuilder(object):
"""
......@@ -160,7 +167,7 @@ class VersioningModulestoreBuilder(object):
yield modulestore
finally:
# Delete the created database
modulestore._drop_database()
modulestore._drop_database() # pylint: disable=protected-access
# Delete the created directory on the filesystem
rmtree(fs_root, ignore_errors=True)
......@@ -206,6 +213,7 @@ class MixedModulestoreBuilder(object):
"""
self.store_builders = store_builders
self.mappings = mappings or {}
self.modulestore = None
@contextmanager
def build(self, contentstore):
......@@ -227,7 +235,7 @@ class MixedModulestoreBuilder(object):
# Generate a fake list of stores to give the already generated stores appropriate names
stores = [{'NAME': name, 'ENGINE': 'This space deliberately left blank'} for name in names]
modulestore = MixedModuleStore(
self.modulestore = MixedModuleStore(
contentstore,
self.mappings,
stores,
......@@ -235,11 +243,29 @@ class MixedModulestoreBuilder(object):
xblock_mixins=XBLOCK_MIXINS,
)
yield modulestore
yield self.modulestore
def __repr__(self):
return 'MixedModulestoreBuilder({!r}, {!r})'.format(self.store_builders, self.mappings)
def asset_collection(self):
"""
Returns the collection storing the asset metadata.
"""
all_stores = self.modulestore.modulestores
if len(all_stores) > 1:
return None
store = all_stores[0]
if hasattr(store, 'asset_collection'):
# Mongo modulestore beneath mixed.
# Returns the entire collection with *all* courses' asset metadata.
return store.asset_collection
else:
# Split modulestore beneath mixed.
# Split stores all asset metadata in the structure collection.
return store.db_connection.structures
class MongoContentstoreBuilder(object):
"""
......@@ -276,7 +302,8 @@ MIXED_MODULESTORE_SETUPS = (
MixedModulestoreBuilder([('split', VersioningModulestoreBuilder())]),
)
MIXED_MS_SETUPS_SHORT = (
'mixed_mongo', 'mixed_split'
'mixed_mongo',
'mixed_split',
)
DIRECT_MODULESTORE_SETUPS = (
MongoModulestoreBuilder(),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment