Commit b14eaa03 by John Eskew

Merge pull request #6627 from edx/jeskew/perf_test_asset_metadata_find_mongo

Add performance test when finding asset metadata.
parents e64d45b7 01f2e1fd
......@@ -8,7 +8,10 @@ various parts of the system.
import sqlite3
from lxml.builder import E
import lxml.html
#import click
import click
except ImportError:
click = None
DB_NAME = 'block_times.db'
......@@ -70,105 +73,217 @@ class HTMLDocument(object):
return lxml.html.tostring(self.html, pretty_print=pretty_print)
def read_timing_data():
class ReportGenerator(object):
Read in the timing data from the sqlite DB and save into a dict.
Base class for report generation.
run_data = {}
# Read data from all modulestore combos.
conn = sqlite3.connect(DB_NAME)
conn.row_factory = sqlite3.Row
sel_sql = 'select id, run_id, block_desc, elapsed, timestamp FROM block_times ORDER BY run_id DESC'
cur = conn.cursor()
all_modulestore_combos = set()
for row in cur.fetchall():
time_taken = row[3]
# Split apart the description into its parts.
desc_parts = row[2].split(':')
modulestores = desc_parts[1]
amount_md = desc_parts[2]
test_phase = 'all'
if len(desc_parts) > 3:
test_phase = desc_parts[3]
# Save the data in a multi-level dict - { phase1: { amount1: {ms1->ms2: duration, ...}, ...}, ...}.
phase_data = run_data.setdefault(test_phase, {})
amount_data = phase_data.setdefault(amount_md, {})
__ = amount_data.setdefault(modulestores, time_taken)
return all_modulestore_combos, run_data
def generate_html(all_ms_combos, run_data):
def __init__(self, db_name):
# Read data from all modulestore combos.
conn = sqlite3.connect(db_name)
conn.row_factory = sqlite3.Row
sel_sql = 'select id, run_id, block_desc, elapsed, timestamp FROM block_times ORDER BY run_id DESC'
cur = conn.cursor()
self.all_rows = cur.fetchall()
class ImportExportReportGen(ReportGenerator):
Class which generates report for course import/export performance test data.
def __init__(self, db_name):
super(ImportExportReportGen, self).__init__(db_name)
def _read_timing_data(self):
Read in the timing data from the sqlite DB and save into a dict.
self.run_data = {}
self.all_modulestore_combos = set()
for row in self.all_rows:
time_taken = row[3]
# Split apart the description into its parts.
desc_parts = row[2].split(':')
modulestores = desc_parts[1]
amount_md = desc_parts[2]
test_phase = 'all'
if len(desc_parts) > 3:
test_phase = desc_parts[3]
# Save the data in a multi-level dict - { phase1: { amount1: {ms1->ms2: duration, ...}, ...}, ...}.
phase_data = self.run_data.setdefault(test_phase, {})
amount_data = phase_data.setdefault(amount_md, {})
__ = amount_data.setdefault(modulestores, time_taken)
def generate_html(self):
Generate HTML.
html = HTMLDocument("Results")
# Output comparison of each phase to a different table.
for phase in self.run_data.keys():
if phase in ('fake_assets',):
per_phase = self.run_data[phase]
html.add_header(1, phase)
title_map = {
'duration': 'Total Duration (ms)',
'ratio': 'Total Duration Per Number of Assets (ms/asset)',
'variable_cost': 'Asset Export Duration Per Number of Assets (ms/asset)'
for table_type in ('duration', 'ratio', 'variable_cost'):
if phase == 'all' and table_type in ('ratio', 'variable_cost'):
# Make the table header columns and the table.
columns = ["Asset Metadata Amount", ]
ms_keys = sorted(self.all_modulestore_combos)
for k in ms_keys:
columns.append("{} ({})".format(k, table_type))
phase_table = HTMLTable(columns)
# Make a row for each amount of asset metadata.
for amount in sorted(per_phase.keys()):
per_amount = per_phase[amount]
num_assets = int(amount)
row = [amount, ]
for modulestore in ms_keys:
if table_type == 'duration':
value = per_amount[modulestore]
elif table_type == 'ratio':
if num_assets != 0:
value = per_amount[modulestore] / float(amount)
value = 0
elif table_type == 'variable_cost':
if num_assets == 0:
value = 0
value = (per_amount[modulestore] - per_phase['0'][modulestore]) / float(amount)
# Add the table title and the table.
html.add_header(2, title_map[table_type])
return html
class FindReportGen(ReportGenerator):
Generate HTML.
Class which generates report for asset access performance test data.
def __init__(self, db_name):
super(FindReportGen, self).__init__(db_name)
html = HTMLDocument("Results")
# Output comparison of each phase to a different table.
for phase in run_data.keys():
if phase in ('fake_assets',):
per_phase = run_data[phase]
html.add_header(1, phase)
title_map = {
'duration': 'Total Duration (ms)',
'ratio': 'Total Duration Per Number of Assets (ms/asset)',
'variable_cost': 'Asset Export Duration Per Number of Assets (ms/asset)'
for table_type in ('duration', 'ratio', 'variable_cost'):
if phase == 'all' and table_type in ('ratio', 'variable_cost'):
def _read_timing_data(self):
Read in the timing data from the sqlite DB and save into a dict.
self.run_data = {}
self.all_modulestores = set()
for row in self.all_rows:
time_taken = row[3]
# Split apart the description into its parts.
desc_parts = row[2].split(':')
if desc_parts[0] != 'FindAssetTest':
modulestore, amount_md = desc_parts[1:3]
test_phase = 'all'
sort = None
if len(desc_parts) >= 4:
test_phase = desc_parts[3]
if len(desc_parts) >= 5:
sort = desc_parts[4]
# Save the data in a multi-level dict:
# { phase1: { [sort1: {] amount1: { modulestore1: duration, ...}, ...}, ...}.
phase_data = self.run_data.setdefault(test_phase, {})
if test_phase == 'get_asset_list':
# Add a level here for the sort.
phase_data = phase_data.setdefault(sort, {})
amount_data = phase_data.setdefault(amount_md, {})
__ = amount_data.setdefault(modulestore, time_taken)
def generate_html(self):
Generate HTML.
html = HTMLDocument("Results")
# Output comparison of each phase to a different table.
# for store in self.run_data.keys():
# per_phase = self.run_data[store]
# html.add_header(1, store)
for phase in self.run_data.keys():
per_phase = self.run_data[phase]
# Make the table header columns and the table.
columns = ["Asset Metadata Amount", ]
ms_keys = sorted(all_ms_combos)
ms_keys = sorted(self.all_modulestores)
for k in ms_keys:
columns.append("{} ({})".format(k, table_type))
columns.append("Time Taken (ms) ({})".format(k))
phase_table = HTMLTable(columns)
if phase != 'get_asset_list':
for amount in sorted(per_phase.keys()):
per_amount = per_phase[amount]
row = [amount, ]
for modulestore in ms_keys:
time_taken = per_amount[modulestore]
html.add_header(2, phase)
# get_asset_list phase includes the sort as well.
html.add_header(2, phase)
for sort in per_phase.keys():
sort_table = HTMLTable(columns)
per_sort = per_phase[sort]
for amount in sorted(per_sort.keys()):
per_amount = per_sort[amount]
row = [amount, ]
for modulestore in ms_keys:
# Each sort has two different ranges retrieved.
time_taken = per_amount[modulestore] / 2.0
html.add_header(3, sort)
return html
if click is not None:
@click.argument('outfile', type=click.File('w'), default='-', required=False)
@click.option('--db_name', help='Name of sqlite database from which to read data.', default=DB_NAME)
@click.option('--data_type', help='Data type to process. One of: "imp_exp" or "find"', default="find")
def cli(outfile, db_name, data_type):
Generate an HTML report from the sqlite timing data.
if data_type == 'imp_exp':
ie_gen = ImportExportReportGen(db_name)
html = ie_gen.generate_html()
elif data_type == 'find':
f_gen = FindReportGen(db_name)
html = f_gen.generate_html()
click.echo(html.tostring(), file=outfile)
# Make a row for each amount of asset metadata.
for amount in sorted(per_phase.keys()):
per_amount = per_phase[amount]
num_assets = int(amount)
row = [amount, ]
for modulestore in ms_keys:
if table_type == 'duration':
value = per_amount[modulestore]
elif table_type == 'ratio':
if num_assets != 0:
value = per_amount[modulestore] / float(amount)
value = 0
elif table_type == 'variable_cost':
if num_assets == 0:
value = 0
value = (per_amount[modulestore] - per_phase['0'][modulestore]) / float(amount)
# Add the table title and the table.
html.add_header(2, title_map[table_type])
return html
# @click.command()
# @click.argument('outfile', type=click.File('w'), default='-', required=False)
# def cli(outfile):
# """
# Generate an HTML report from the sqlite timing data.
# """
# all_ms_combos, run_data = read_timing_data()
# html = generate_html(all_ms_combos, run_data)
# click.echo(html.tostring(), file=outfile)
# if __name__ == '__main__':
# cli() # pylint: disable=no-value-for-parameter
if __name__ == '__main__':
if click is not None:
cli() # pylint: disable=no-value-for-parameter
print "Aborted! Module 'click' is not installed."
......@@ -10,7 +10,9 @@ from shutil import rmtree
import ddt
#from nose.plugins.attrib import attr
from nose.plugins.skip import SkipTest
from xmodule.assetstore import AssetMetadata
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.xml_importer import import_from_xml
from xmodule.modulestore.xml_exporter import export_to_xml
from xmodule.modulestore.tests.test_cross_modulestore_import_export import (
......@@ -23,15 +25,10 @@ from xmodule.modulestore.perf_tests.generate_asset_xml import make_asset_xml, va
# The dependency below needs to be installed manually from the development.txt file, which doesn't
# get installed during unit tests!
#from code_block_timer import CodeBlockTimer
class CodeBlockTimer(object):
To fake out the tests below, this class definition is used. Remove it when uncommenting above.
def __init__(self, desc):
from code_block_timer import CodeBlockTimer
except ImportError:
CodeBlockTimer = None
# Number of assets saved in the modulestore per test run.
ASSET_AMOUNT_PER_TEST = (1, 10, 100, 1000, 10000)
......@@ -42,6 +39,13 @@ COURSE_NAME = 'manual-testing-complete'
# A list of courses to test - only one.
('displayname', ModuleStoreEnum.SortOrder.ascending),
('displayname', ModuleStoreEnum.SortOrder.descending),
('uploadDate', ModuleStoreEnum.SortOrder.ascending),
('uploadDate', ModuleStoreEnum.SortOrder.descending),
# pylint: disable=invalid-name
TEST_DIR = path(__file__).dirname()
PLATFORM_ROOT = TEST_DIR.parent.parent.parent.parent.parent.parent
......@@ -80,10 +84,13 @@ class CrossStoreXMLRoundtrip(unittest.TestCase):
def test_generate_timings(self, source_ms, dest_ms, num_assets):
def test_generate_import_export_timings(self, source_ms, dest_ms, num_assets):
Generate timings for different amounts of asset metadata and different modulestores.
if CodeBlockTimer is None:
raise SkipTest("CodeBlockTimer undefined.")
desc = "XMLRoundTrip:{}->{}:{}".format(
......@@ -140,3 +147,89 @@ class CrossStoreXMLRoundtrip(unittest.TestCase):
# Eventually, exclude this attribute from regular unittests while running *only* tests
# with this attribute during regular performance tests.
# @attr("perf_test")
class FindAssetTest(unittest.TestCase):
This class exists to time asset finding in different modulestore
classes with different amounts of asset metadata.
# Use this attribute to skip this test on regular unittest CI runs.
perf_test = True
def setUp(self):
super(FindAssetTest, self).setUp()
self.export_dir = mkdtemp()
self.addCleanup(rmtree, self.export_dir, ignore_errors=True)*itertools.product(
def test_generate_find_timings(self, source_ms, num_assets):
Generate timings for different amounts of asset metadata and different modulestores.
if CodeBlockTimer is None:
raise SkipTest("CodeBlockTimer undefined.")
desc = "FindAssetTest:{}:{}".format(
with CodeBlockTimer(desc):
with CodeBlockTimer("fake_assets"):
# First, make the fake asset metadata.
make_asset_xml(num_assets, ASSET_XML_PATH)
# Construct the contentstore for storing the first import
with MongoContentstoreBuilder().build() as source_content:
# Construct the modulestore for storing the first import (using the previously created contentstore)
with as source_store:
source_course_key = source_store.make_course_key('a', 'course', 'course')
asset_key = source_course_key.make_asset_key(
AssetMetadata.GENERAL_ASSET_TYPE, 'silly_cat_picture.gif'
with CodeBlockTimer("initial_import"):
with CodeBlockTimer("find_nonexistent_asset"):
# More correct would be using the AssetManager.find() - but since the test
# has created its own test modulestore, the AssetManager can't be used.
__ = source_store.find_asset_metadata(asset_key)
# Perform get_all_asset_metadata for each sort.
for sort in ALL_SORTS:
with CodeBlockTimer("get_asset_list:{}-{}".format(
'asc' if sort[1] == ModuleStoreEnum.SortOrder.ascending else 'desc'
# Grab two ranges of 50 assets using different sorts.
# Why 50? That's how many are displayed on the current Studio "Files & Uploads" page.
start_middle = num_assets / 2
__ = source_store.get_all_asset_metadata(
source_course_key, 'asset', start=0, sort=sort, maxresults=50
__ = source_store.get_all_asset_metadata(
source_course_key, 'asset', start=start_middle, sort=sort, maxresults=50
