Commit 6873c045 by John Eskew

First iteration of code which tests performance of asset metadata

   during course import/export. Uses external code_block_timer module.
   Also, add script which validates XML file against an XSD file.
   Uses the unittest framework - but is currently skipped.
parent c3f78902
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Generates fake XML for asset metadata.
"""
import random
#import click
from lxml import etree
from datetime import datetime, timedelta
from xmodule.assetstore import AssetMetadata
from opaque_keys.edx.keys import CourseKey
# Name of the asset metadata XML schema definition file.
ASSET_XSD_FILE = 'assets.xsd'
# Characters used in name generation below.
NAME_CHARS = u'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-'
NAME_CHARS_W_UNICODE = NAME_CHARS + u'àĚŘDžΦШΩΣӔ'
def coin_flip():
"""
50/50 chance
"""
return random.choice((True, False))
def asset_type():
"""
Pick an asset type at random.
"""
asset_type_choices = (
(95, "asset"),
(100, "video")
)
d100 = random.randint(0, 100)
for choice in asset_type_choices:
if d100 <= choice[0]:
return choice[1]
return asset_type_choices[-1][1]
def filename():
"""
Fake a filename.
"""
fname = u''
for __ in xrange(random.randint(10, 30)):
fname += random.choice(NAME_CHARS_W_UNICODE)
fname += random.choice(('.jpg', '.pdf', '.png', '.txt'))
return fname
def pathname():
"""
Fake a pathname.
"""
pname = u''
for __ in xrange(random.randint(2, 3)):
for __ in xrange(random.randint(5, 10)):
pname += random.choice(NAME_CHARS)
pname += '/'
return pname
def locked():
"""
Locked or unlocked.
"""
return coin_flip()
def fields():
"""
Generate some fake extra fields.
"""
f = {}
if coin_flip():
if coin_flip():
f['copyrighted'] = coin_flip()
if coin_flip():
f['size'] = random.randint(100, 10000000)
if coin_flip():
f['color'] = random.choice(('blue', 'pink', 'fuchsia', 'rose', 'mauve', 'black'))
return f
def user_id():
"""
Fake user id.
"""
return random.randint(1, 100000000)
def versions():
"""
Fake versions.
"""
curr_ver = random.randint(1, 500)
prev_ver = curr_ver - 1
def ver_str(ver):
"""
Version string.
"""
return 'v{}.0'.format(ver)
return (ver_str(curr_ver), ver_str(prev_ver))
def date_and_time():
"""
Fake date/time.
"""
start_date = datetime.now()
time_back = timedelta(seconds=random.randint(0, 473040000)) # 15 year interval
return start_date - time_back
def contenttype():
"""
Random MIME type.
"""
return random.choice((
'image/jpeg',
'text/html',
'audio/aiff',
'video/avi',
'text/plain',
'application/msword',
'application/x-gzip',
'application/javascript',
))
def generate_random_asset_md():
"""
Generates a single AssetMetadata object with semi-random data.
"""
course_key = CourseKey.from_string('org/course/run')
asset_key = course_key.make_asset_key(asset_type(), filename())
(curr_version, prev_version) = versions()
return AssetMetadata(
asset_key,
pathname=pathname(),
internal_name=filename(),
locked=locked(),
contenttype=contenttype(),
thumbnail=filename(),
fields=fields(),
curr_version=curr_version,
prev_version=prev_version,
edited_by=user_id(),
edited_by_email='staff@edx.org',
edited_on=date_and_time(),
created_by=user_id(),
created_by_email='staff@edx.org',
created_on=date_and_time(),
)
def make_asset_md(amount):
"""
Make a number of fake AssetMetadata objects.
"""
all_asset_md = []
for __ in xrange(amount):
all_asset_md.append(generate_random_asset_md())
return all_asset_md
# pylint: disable=no-member
def make_asset_xml(amount, xml_filename):
"""
Make an XML file filled with fake AssetMetadata.
"""
all_md = make_asset_md(amount)
xml_root = etree.Element("assets")
for mdata in all_md:
asset_element = etree.SubElement(xml_root, "asset")
mdata.to_xml(asset_element)
with open(xml_filename, "w") as xml_file:
etree.ElementTree(xml_root).write(xml_file)
def validate_xml(xsd_filename, xml_filename):
"""
Validate a generated XML file against the XSD.
"""
with open(xsd_filename, 'r') as f:
schema_root = etree.XML(f.read())
schema = etree.XMLSchema(schema_root)
xmlparser = etree.XMLParser(schema=schema)
with open(xml_filename, 'r') as f:
etree.fromstring(f.read(), xmlparser)
# @click.command()
# @click.option('--numAssets',
# type=click.INT,
# default=10,
# help="Number of assets to be generated by the script.",
# required=False
# )
# @click.option('--outputXml',
# type=click.File('w'),
# default=AssetMetadata.EXPORTED_ASSET_FILENAME,
# help="Filename for the output XML file.",
# required=False
# )
# @click.option('--inputXsd',
# type=click.File('r'),
# default=ASSET_XSD_FILE,
# help="Filename for the XSD (schema) file to read in.",
# required=False
# )
# def cli(numAssets, outputXml, inputXsd):
# """
# Generates a number of fake asset metadata items as XML - and validates the XML against the schema.
# """
# make_asset_xml(numAssets, outputXml)
# # Now - validate the XML against the XSD.
# validate_xml(inputXsd, outputXml)
# if __name__ == '__main__':
# cli()
"""
Reads the data generated by performance tests and generates a savable
report which can be viewed over time to examine the performance effects of code changes on
various parts of the system.
"""
import sqlite3
from lxml.builder import E
import lxml.html
#import click
DB_NAME = 'block_times.db'
class HTMLTable(object):
"""
Simple wrapper for an HTML table.
"""
def __init__(self, hdr_columns):
self.table = E.TABLE()
col_headers = [E.TH(x) for x in hdr_columns]
header_row = E.TR(*col_headers)
self.table.append(header_row)
def add_row(self, items):
"""Add row to table."""
row_items = [E.TD(x) for x in items]
self.table.append(E.TR(*row_items))
def tostring(self):
"""Output table HTML as string."""
return lxml.html.tostring(self.table)
@staticmethod
def style():
""" Return a hard-coded table style."""
return E.style("""
table, th, td {
border: 1px solid black;
border-collapse: collapse;
}
th, td {
padding: 5px;
}"""
) # pylint: disable=bad-continuation
class HTMLDocument(object):
"""
Simple wrapper for an entire HTML document.
"""
def __init__(self, title):
self.html = E.html(E.head(E.title(title), HTMLTable.style()))
self.body = E.body()
self.html.append(self.body)
def add_header(self, level, text):
"""Add a header to the document."""
func_name = "H{}".format(level)
self.body.append(getattr(E, func_name)(text))
def add_to_body(self, elem):
"""Add to document body."""
self.body.append(elem)
def tostring(self, pretty_print=False):
"""Output HTML document as string."""
return lxml.html.tostring(self.html, pretty_print=pretty_print)
def read_timing_data():
"""
Read in the timing data from the sqlite DB and save into a dict.
"""
run_data = {}
# Read data from all modulestore combos.
conn = sqlite3.connect(DB_NAME)
conn.row_factory = sqlite3.Row
sel_sql = 'select id, run_id, block_desc, elapsed, timestamp FROM block_times ORDER BY run_id DESC'
cur = conn.cursor()
cur.execute(sel_sql)
all_modulestore_combos = set()
for row in cur.fetchall():
time_taken = row[3]
# Split apart the description into its parts.
desc_parts = row[2].split(':')
modulestores = desc_parts[1]
all_modulestore_combos.add(modulestores)
amount_md = desc_parts[2]
test_phase = 'all'
if len(desc_parts) > 3:
test_phase = desc_parts[3]
# Save the data in a multi-level dict - { phase1: { amount1: {ms1->ms2: duration, ...}, ...}, ...}.
phase_data = run_data.setdefault(test_phase, {})
amount_data = phase_data.setdefault(amount_md, {})
__ = amount_data.setdefault(modulestores, time_taken)
return all_modulestore_combos, run_data
def generate_html(all_ms_combos, run_data):
"""
Generate HTML.
"""
html = HTMLDocument("Results")
# Output comparison of each phase to a different table.
for phase in run_data.keys():
if phase in ('fake_assets',):
continue
per_phase = run_data[phase]
html.add_header(1, phase)
title_map = {
'duration': 'Total Duration (ms)',
'ratio': 'Total Duration Per Number of Assets (ms/asset)',
'variable_cost': 'Asset Export Duration Per Number of Assets (ms/asset)'
}
for table_type in ('duration', 'ratio', 'variable_cost'):
if phase == 'all' and table_type in ('ratio', 'variable_cost'):
continue
# Make the table header columns and the table.
columns = ["Asset Metadata Amount", ]
ms_keys = sorted(all_ms_combos)
for k in ms_keys:
columns.append("{} ({})".format(k, table_type))
phase_table = HTMLTable(columns)
# Make a row for each amount of asset metadata.
for amount in sorted(per_phase.keys()):
per_amount = per_phase[amount]
num_assets = int(amount)
row = [amount, ]
for modulestore in ms_keys:
if table_type == 'duration':
value = per_amount[modulestore]
elif table_type == 'ratio':
if num_assets != 0:
value = per_amount[modulestore] / float(amount)
else:
value = 0
elif table_type == 'variable_cost':
if num_assets == 0:
value = 0
else:
value = (per_amount[modulestore] - per_phase['0'][modulestore]) / float(amount)
row.append("{}".format(value))
phase_table.add_row(row)
# Add the table title and the table.
html.add_header(2, title_map[table_type])
html.add_to_body(phase_table.table)
return html
# @click.command()
# @click.argument('outfile', type=click.File('w'), default='-', required=False)
# def cli(outfile):
# """
# Generate an HTML report from the sqlite timing data.
# """
# all_ms_combos, run_data = read_timing_data()
# html = generate_html(all_ms_combos, run_data)
# click.echo(html.tostring(), file=outfile)
# if __name__ == '__main__':
# cli() # pylint: disable=no-value-for-parameter
"""
Performance test for asset metadata in the modulestore.
"""
from path import path
import unittest
from tempfile import mkdtemp
import itertools
from shutil import rmtree
import ddt
#from nose.plugins.attrib import attr
from xmodule.assetstore import AssetMetadata
from xmodule.modulestore.xml_importer import import_from_xml
from xmodule.modulestore.xml_exporter import export_to_xml
from xmodule.modulestore.tests.test_cross_modulestore_import_export import (
MODULESTORE_SETUPS,
SHORT_NAME_MAP,
TEST_DATA_DIR,
MongoContentstoreBuilder,
)
from xmodule.modulestore.perf_tests.generate_asset_xml import make_asset_xml, validate_xml, ASSET_XSD_FILE
# The dependency below needs to be installed manually from the development.txt file, which doesn't
# get installed during unit tests!
#from code_block_timer import CodeBlockTimer
class CodeBlockTimer(object):
"""
To fake out the tests below, this class definition is used. Remove it when uncommenting above.
"""
def __init__(self, desc):
pass
# Number of assets saved in the modulestore per test run.
ASSET_AMOUNT_PER_TEST = (1, 10, 100, 1000, 10000)
# Use only this course in asset metadata performance testing.
COURSE_NAME = 'manual-testing-complete'
# A list of courses to test - only one.
TEST_COURSE = (COURSE_NAME, )
# pylint: disable=invalid-name
TEST_DIR = path(__file__).dirname()
PLATFORM_ROOT = TEST_DIR.parent.parent.parent.parent.parent.parent
TEST_DATA_ROOT = PLATFORM_ROOT / TEST_DATA_DIR
COURSE_DATA_DIR = TEST_DATA_ROOT / COURSE_NAME
# Path where generated asset file is saved.
ASSET_XML_PATH = COURSE_DATA_DIR / AssetMetadata.EXPORTED_ASSET_DIR / AssetMetadata.EXPORTED_ASSET_FILENAME
# Path where asset XML schema definition file is located.
ASSET_XSD_PATH = PLATFORM_ROOT / "common" / "lib" / "xmodule" / "xmodule" / "assetstore" / "tests" / ASSET_XSD_FILE
@ddt.ddt
# Eventually, exclude this attribute from regular unittests while running *only* tests
# with this attribute during regular performance tests.
# @attr("perf_test")
@unittest.skip
class CrossStoreXMLRoundtrip(unittest.TestCase):
"""
This class exists to time XML import and export between different modulestore
classes with different amount of asset metadata.
"""
# Use this attribute to skip this test on regular unittest CI runs.
perf_test = True
def setUp(self):
super(CrossStoreXMLRoundtrip, self).setUp()
self.export_dir = mkdtemp()
self.addCleanup(rmtree, self.export_dir, ignore_errors=True)
@ddt.data(*itertools.product(
MODULESTORE_SETUPS,
MODULESTORE_SETUPS,
ASSET_AMOUNT_PER_TEST
))
@ddt.unpack
def test_generate_timings(self, source_ms, dest_ms, num_assets):
"""
Generate timings for different amounts of asset metadata and different modulestores.
"""
desc = "XMLRoundTrip:{}->{}:{}".format(
SHORT_NAME_MAP[source_ms],
SHORT_NAME_MAP[dest_ms],
num_assets
)
with CodeBlockTimer(desc):
with CodeBlockTimer("fake_assets"):
# First, make the fake asset metadata.
make_asset_xml(num_assets, ASSET_XML_PATH)
validate_xml(ASSET_XSD_PATH, ASSET_XML_PATH)
# Construct the contentstore for storing the first import
with MongoContentstoreBuilder().build() as source_content:
# Construct the modulestore for storing the first import (using the previously created contentstore)
with source_ms.build(source_content) as source_store:
# Construct the contentstore for storing the second import
with MongoContentstoreBuilder().build() as dest_content:
# Construct the modulestore for storing the second import (using the second contentstore)
with dest_ms.build(dest_content) as dest_store:
source_course_key = source_store.make_course_key('a', 'course', 'course')
dest_course_key = dest_store.make_course_key('a', 'course', 'course')
with CodeBlockTimer("initial_import"):
import_from_xml(
source_store,
'test_user',
TEST_DATA_ROOT,
course_dirs=TEST_COURSE,
static_content_store=source_content,
target_course_id=source_course_key,
create_course_if_not_present=True,
raise_on_failure=True,
)
with CodeBlockTimer("export"):
export_to_xml(
source_store,
source_content,
source_course_key,
self.export_dir,
'exported_source_course',
)
with CodeBlockTimer("second_import"):
import_from_xml(
dest_store,
'test_user',
self.export_dir,
course_dirs=['exported_source_course'],
static_content_store=dest_content,
target_course_id=dest_course_key,
create_course_if_not_present=True,
raise_on_failure=True,
)
......@@ -275,11 +275,20 @@ MIXED_MODULESTORE_SETUPS = (
MixedModulestoreBuilder([('draft', MongoModulestoreBuilder())]),
MixedModulestoreBuilder([('split', VersioningModulestoreBuilder())]),
)
MIXED_MS_SETUPS_SHORT = (
'mixed_mongo', 'mixed_split'
)
DIRECT_MODULESTORE_SETUPS = (
MongoModulestoreBuilder(),
# VersioningModulestoreBuilder(), # FUTUREDO: LMS-11227
)
DIRECT_MS_SETUPS_SHORT = (
'mongo',
#'split',
)
MODULESTORE_SETUPS = DIRECT_MODULESTORE_SETUPS + MIXED_MODULESTORE_SETUPS
MODULESTORE_SHORTNAMES = DIRECT_MS_SETUPS_SHORT + MIXED_MS_SETUPS_SHORT
SHORT_NAME_MAP = dict(zip(MODULESTORE_SETUPS, MODULESTORE_SHORTNAMES))
CONTENTSTORE_SETUPS = (MongoContentstoreBuilder(),)
COURSE_DATA_NAMES = (
......@@ -312,7 +321,6 @@ class CrossStoreXMLRoundtrip(CourseComparisonTest, PartitionTestCase):
))
@ddt.unpack
def test_round_trip(self, source_builder, dest_builder, source_content_builder, dest_content_builder, course_data_name):
# Construct the contentstore for storing the first import
with source_content_builder.build() as source_content:
# Construct the modulestore for storing the first import (using the previously created contentstore)
......@@ -354,14 +362,14 @@ class CrossStoreXMLRoundtrip(CourseComparisonTest, PartitionTestCase):
raise_on_failure=True,
)
# NOT CURRENTLY USED
# export_to_xml(
# dest_store,
# dest_content,
# dest_course_key,
# self.export_dir,
# 'exported_dest_course',
# )
# NOT CURRENTLY USED
# export_to_xml(
# dest_store,
# dest_content,
# dest_course_key,
# self.export_dir,
# 'exported_dest_course',
# )
self.exclude_field(None, 'wiki_slug')
self.exclude_field(None, 'xml_attributes')
......
#
# Dependencies that are used in development only - and are *NOT* needed to be installed in staging/production.
#
# Python libraries to install directly from github / PyPi
click==3.3
# Third-party:
-e git+https://github.com/doctoryes/code_block_timer.git@f3d0629f086bcc649c3c77f4bc5b9c2c8172c3bf#egg=code_block_timer
......@@ -2,7 +2,7 @@
#
# If you open a pull request that adds a new dependency, you should notify:
# * @mollydb - to check licensing
# * One of @e0d, @jarv, or @feanil - to check system requirements
# * One of @e0d or @feanil - to check system requirements
# Python libraries to install directly from github
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment