Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
b14eaa03
Commit
b14eaa03
authored
Jan 16, 2015
by
John Eskew
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #6627 from edx/jeskew/perf_test_asset_metadata_find_mongo
Add performance test when finding asset metadata.
parents
e64d45b7
01f2e1fd
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
310 additions
and
102 deletions
+310
-102
common/lib/xmodule/xmodule/modulestore/perf_tests/generate_report.py
+207
-92
common/lib/xmodule/xmodule/modulestore/perf_tests/test_asset_import_export.py
+103
-10
No files found.
common/lib/xmodule/xmodule/modulestore/perf_tests/generate_report.py
View file @
b14eaa03
...
...
@@ -8,7 +8,10 @@ various parts of the system.
import
sqlite3
from
lxml.builder
import
E
import
lxml.html
#import click
try
:
import
click
except
ImportError
:
click
=
None
DB_NAME
=
'block_times.db'
...
...
@@ -70,105 +73,217 @@ class HTMLDocument(object):
return
lxml
.
html
.
tostring
(
self
.
html
,
pretty_print
=
pretty_print
)
def
read_timing_data
(
):
class
ReportGenerator
(
object
):
"""
Read in the timing data from the sqlite DB and save into a dict
.
Base class for report generation
.
"""
run_data
=
{}
# Read data from all modulestore combos.
conn
=
sqlite3
.
connect
(
DB_NAME
)
conn
.
row_factory
=
sqlite3
.
Row
sel_sql
=
'select id, run_id, block_desc, elapsed, timestamp FROM block_times ORDER BY run_id DESC'
cur
=
conn
.
cursor
()
cur
.
execute
(
sel_sql
)
all_modulestore_combos
=
set
()
for
row
in
cur
.
fetchall
():
time_taken
=
row
[
3
]
# Split apart the description into its parts.
desc_parts
=
row
[
2
]
.
split
(
':'
)
modulestores
=
desc_parts
[
1
]
all_modulestore_combos
.
add
(
modulestores
)
amount_md
=
desc_parts
[
2
]
test_phase
=
'all'
if
len
(
desc_parts
)
>
3
:
test_phase
=
desc_parts
[
3
]
# Save the data in a multi-level dict - { phase1: { amount1: {ms1->ms2: duration, ...}, ...}, ...}.
phase_data
=
run_data
.
setdefault
(
test_phase
,
{})
amount_data
=
phase_data
.
setdefault
(
amount_md
,
{})
__
=
amount_data
.
setdefault
(
modulestores
,
time_taken
)
return
all_modulestore_combos
,
run_data
def
generate_html
(
all_ms_combos
,
run_data
):
def
__init__
(
self
,
db_name
):
# Read data from all modulestore combos.
conn
=
sqlite3
.
connect
(
db_name
)
conn
.
row_factory
=
sqlite3
.
Row
sel_sql
=
'select id, run_id, block_desc, elapsed, timestamp FROM block_times ORDER BY run_id DESC'
cur
=
conn
.
cursor
()
cur
.
execute
(
sel_sql
)
self
.
all_rows
=
cur
.
fetchall
()
class
ImportExportReportGen
(
ReportGenerator
):
"""
Class which generates report for course import/export performance test data.
"""
def
__init__
(
self
,
db_name
):
super
(
ImportExportReportGen
,
self
)
.
__init__
(
db_name
)
self
.
_read_timing_data
()
def
_read_timing_data
(
self
):
"""
Read in the timing data from the sqlite DB and save into a dict.
"""
self
.
run_data
=
{}
self
.
all_modulestore_combos
=
set
()
for
row
in
self
.
all_rows
:
time_taken
=
row
[
3
]
# Split apart the description into its parts.
desc_parts
=
row
[
2
]
.
split
(
':'
)
modulestores
=
desc_parts
[
1
]
self
.
all_modulestore_combos
.
add
(
modulestores
)
amount_md
=
desc_parts
[
2
]
test_phase
=
'all'
if
len
(
desc_parts
)
>
3
:
test_phase
=
desc_parts
[
3
]
# Save the data in a multi-level dict - { phase1: { amount1: {ms1->ms2: duration, ...}, ...}, ...}.
phase_data
=
self
.
run_data
.
setdefault
(
test_phase
,
{})
amount_data
=
phase_data
.
setdefault
(
amount_md
,
{})
__
=
amount_data
.
setdefault
(
modulestores
,
time_taken
)
def
generate_html
(
self
):
"""
Generate HTML.
"""
html
=
HTMLDocument
(
"Results"
)
# Output comparison of each phase to a different table.
for
phase
in
self
.
run_data
.
keys
():
if
phase
in
(
'fake_assets'
,):
continue
per_phase
=
self
.
run_data
[
phase
]
html
.
add_header
(
1
,
phase
)
title_map
=
{
'duration'
:
'Total Duration (ms)'
,
'ratio'
:
'Total Duration Per Number of Assets (ms/asset)'
,
'variable_cost'
:
'Asset Export Duration Per Number of Assets (ms/asset)'
}
for
table_type
in
(
'duration'
,
'ratio'
,
'variable_cost'
):
if
phase
==
'all'
and
table_type
in
(
'ratio'
,
'variable_cost'
):
continue
# Make the table header columns and the table.
columns
=
[
"Asset Metadata Amount"
,
]
ms_keys
=
sorted
(
self
.
all_modulestore_combos
)
for
k
in
ms_keys
:
columns
.
append
(
"{} ({})"
.
format
(
k
,
table_type
))
phase_table
=
HTMLTable
(
columns
)
# Make a row for each amount of asset metadata.
for
amount
in
sorted
(
per_phase
.
keys
()):
per_amount
=
per_phase
[
amount
]
num_assets
=
int
(
amount
)
row
=
[
amount
,
]
for
modulestore
in
ms_keys
:
if
table_type
==
'duration'
:
value
=
per_amount
[
modulestore
]
elif
table_type
==
'ratio'
:
if
num_assets
!=
0
:
value
=
per_amount
[
modulestore
]
/
float
(
amount
)
else
:
value
=
0
elif
table_type
==
'variable_cost'
:
if
num_assets
==
0
:
value
=
0
else
:
value
=
(
per_amount
[
modulestore
]
-
per_phase
[
'0'
][
modulestore
])
/
float
(
amount
)
row
.
append
(
"{}"
.
format
(
value
))
phase_table
.
add_row
(
row
)
# Add the table title and the table.
html
.
add_header
(
2
,
title_map
[
table_type
])
html
.
add_to_body
(
phase_table
.
table
)
return
html
class
FindReportGen
(
ReportGenerator
):
"""
Generate HTML
.
Class which generates report for asset access performance test data
.
"""
def
__init__
(
self
,
db_name
):
super
(
FindReportGen
,
self
)
.
__init__
(
db_name
)
self
.
_read_timing_data
()
html
=
HTMLDocument
(
"Results"
)
# Output comparison of each phase to a different table.
for
phase
in
run_data
.
keys
():
if
phase
in
(
'fake_assets'
,):
continue
per_phase
=
run_data
[
phase
]
html
.
add_header
(
1
,
phase
)
title_map
=
{
'duration'
:
'Total Duration (ms)'
,
'ratio'
:
'Total Duration Per Number of Assets (ms/asset)'
,
'variable_cost'
:
'Asset Export Duration Per Number of Assets (ms/asset)'
}
for
table_type
in
(
'duration'
,
'ratio'
,
'variable_cost'
):
if
phase
==
'all'
and
table_type
in
(
'ratio'
,
'variable_cost'
):
def
_read_timing_data
(
self
):
"""
Read in the timing data from the sqlite DB and save into a dict.
"""
self
.
run_data
=
{}
self
.
all_modulestores
=
set
()
for
row
in
self
.
all_rows
:
time_taken
=
row
[
3
]
# Split apart the description into its parts.
desc_parts
=
row
[
2
]
.
split
(
':'
)
if
desc_parts
[
0
]
!=
'FindAssetTest'
:
continue
modulestore
,
amount_md
=
desc_parts
[
1
:
3
]
self
.
all_modulestores
.
add
(
modulestore
)
test_phase
=
'all'
sort
=
None
if
len
(
desc_parts
)
>=
4
:
test_phase
=
desc_parts
[
3
]
if
len
(
desc_parts
)
>=
5
:
sort
=
desc_parts
[
4
]
# Save the data in a multi-level dict:
# { phase1: { [sort1: {] amount1: { modulestore1: duration, ...}, ...}, ...}.
phase_data
=
self
.
run_data
.
setdefault
(
test_phase
,
{})
if
test_phase
==
'get_asset_list'
:
# Add a level here for the sort.
phase_data
=
phase_data
.
setdefault
(
sort
,
{})
amount_data
=
phase_data
.
setdefault
(
amount_md
,
{})
__
=
amount_data
.
setdefault
(
modulestore
,
time_taken
)
def
generate_html
(
self
):
"""
Generate HTML.
"""
html
=
HTMLDocument
(
"Results"
)
# Output comparison of each phase to a different table.
# for store in self.run_data.keys():
# per_phase = self.run_data[store]
# html.add_header(1, store)
for
phase
in
self
.
run_data
.
keys
():
per_phase
=
self
.
run_data
[
phase
]
# Make the table header columns and the table.
columns
=
[
"Asset Metadata Amount"
,
]
ms_keys
=
sorted
(
all_ms_combo
s
)
ms_keys
=
sorted
(
self
.
all_modulestore
s
)
for
k
in
ms_keys
:
columns
.
append
(
"
{} ({})"
.
format
(
k
,
table_type
))
columns
.
append
(
"
Time Taken (ms) ({})"
.
format
(
k
))
phase_table
=
HTMLTable
(
columns
)
if
phase
!=
'get_asset_list'
:
for
amount
in
sorted
(
per_phase
.
keys
()):
per_amount
=
per_phase
[
amount
]
row
=
[
amount
,
]
for
modulestore
in
ms_keys
:
time_taken
=
per_amount
[
modulestore
]
row
.
append
(
"{}"
.
format
(
time_taken
))
phase_table
.
add_row
(
row
)
html
.
add_header
(
2
,
phase
)
html
.
add_to_body
(
phase_table
.
table
)
else
:
# get_asset_list phase includes the sort as well.
html
.
add_header
(
2
,
phase
)
for
sort
in
per_phase
.
keys
():
sort_table
=
HTMLTable
(
columns
)
per_sort
=
per_phase
[
sort
]
for
amount
in
sorted
(
per_sort
.
keys
()):
per_amount
=
per_sort
[
amount
]
row
=
[
amount
,
]
for
modulestore
in
ms_keys
:
# Each sort has two different ranges retrieved.
time_taken
=
per_amount
[
modulestore
]
/
2.0
row
.
append
(
"{}"
.
format
(
time_taken
))
sort_table
.
add_row
(
row
)
html
.
add_header
(
3
,
sort
)
html
.
add_to_body
(
sort_table
.
table
)
return
html
if
click
is
not
None
:
@click.command
()
@click.argument
(
'outfile'
,
type
=
click
.
File
(
'w'
),
default
=
'-'
,
required
=
False
)
@click.option
(
'--db_name'
,
help
=
'Name of sqlite database from which to read data.'
,
default
=
DB_NAME
)
@click.option
(
'--data_type'
,
help
=
'Data type to process. One of: "imp_exp" or "find"'
,
default
=
"find"
)
def
cli
(
outfile
,
db_name
,
data_type
):
"""
Generate an HTML report from the sqlite timing data.
"""
if
data_type
==
'imp_exp'
:
ie_gen
=
ImportExportReportGen
(
db_name
)
html
=
ie_gen
.
generate_html
()
elif
data_type
==
'find'
:
f_gen
=
FindReportGen
(
db_name
)
html
=
f_gen
.
generate_html
()
click
.
echo
(
html
.
tostring
(),
file
=
outfile
)
# Make a row for each amount of asset metadata.
for
amount
in
sorted
(
per_phase
.
keys
()):
per_amount
=
per_phase
[
amount
]
num_assets
=
int
(
amount
)
row
=
[
amount
,
]
for
modulestore
in
ms_keys
:
if
table_type
==
'duration'
:
value
=
per_amount
[
modulestore
]
elif
table_type
==
'ratio'
:
if
num_assets
!=
0
:
value
=
per_amount
[
modulestore
]
/
float
(
amount
)
else
:
value
=
0
elif
table_type
==
'variable_cost'
:
if
num_assets
==
0
:
value
=
0
else
:
value
=
(
per_amount
[
modulestore
]
-
per_phase
[
'0'
][
modulestore
])
/
float
(
amount
)
row
.
append
(
"{}"
.
format
(
value
))
phase_table
.
add_row
(
row
)
# Add the table title and the table.
html
.
add_header
(
2
,
title_map
[
table_type
])
html
.
add_to_body
(
phase_table
.
table
)
return
html
# @click.command()
# @click.argument('outfile', type=click.File('w'), default='-', required=False)
# def cli(outfile):
# """
# Generate an HTML report from the sqlite timing data.
# """
# all_ms_combos, run_data = read_timing_data()
# html = generate_html(all_ms_combos, run_data)
# click.echo(html.tostring(), file=outfile)
# if __name__ == '__main__':
# cli() # pylint: disable=no-value-for-parameter
if
__name__
==
'__main__'
:
if
click
is
not
None
:
cli
()
# pylint: disable=no-value-for-parameter
else
:
print
"Aborted! Module 'click' is not installed."
common/lib/xmodule/xmodule/modulestore/perf_tests/test_asset_import_export.py
View file @
b14eaa03
...
...
@@ -10,7 +10,9 @@ from shutil import rmtree
import
ddt
#from nose.plugins.attrib import attr
from
nose.plugins.skip
import
SkipTest
from
xmodule.assetstore
import
AssetMetadata
from
xmodule.modulestore
import
ModuleStoreEnum
from
xmodule.modulestore.xml_importer
import
import_from_xml
from
xmodule.modulestore.xml_exporter
import
export_to_xml
from
xmodule.modulestore.tests.test_cross_modulestore_import_export
import
(
...
...
@@ -23,15 +25,10 @@ from xmodule.modulestore.perf_tests.generate_asset_xml import make_asset_xml, va
# The dependency below needs to be installed manually from the development.txt file, which doesn't
# get installed during unit tests!
#from code_block_timer import CodeBlockTimer
class
CodeBlockTimer
(
object
):
"""
To fake out the tests below, this class definition is used. Remove it when uncommenting above.
"""
def
__init__
(
self
,
desc
):
pass
try
:
from
code_block_timer
import
CodeBlockTimer
except
ImportError
:
CodeBlockTimer
=
None
# Number of assets saved in the modulestore per test run.
ASSET_AMOUNT_PER_TEST
=
(
1
,
10
,
100
,
1000
,
10000
)
...
...
@@ -42,6 +39,13 @@ COURSE_NAME = 'manual-testing-complete'
# A list of courses to test - only one.
TEST_COURSE
=
(
COURSE_NAME
,
)
ALL_SORTS
=
(
(
'displayname'
,
ModuleStoreEnum
.
SortOrder
.
ascending
),
(
'displayname'
,
ModuleStoreEnum
.
SortOrder
.
descending
),
(
'uploadDate'
,
ModuleStoreEnum
.
SortOrder
.
ascending
),
(
'uploadDate'
,
ModuleStoreEnum
.
SortOrder
.
descending
),
)
# pylint: disable=invalid-name
TEST_DIR
=
path
(
__file__
)
.
dirname
()
PLATFORM_ROOT
=
TEST_DIR
.
parent
.
parent
.
parent
.
parent
.
parent
.
parent
...
...
@@ -80,10 +84,13 @@ class CrossStoreXMLRoundtrip(unittest.TestCase):
ASSET_AMOUNT_PER_TEST
))
@ddt.unpack
def
test_generate_timings
(
self
,
source_ms
,
dest_ms
,
num_assets
):
def
test_generate_
import_export_
timings
(
self
,
source_ms
,
dest_ms
,
num_assets
):
"""
Generate timings for different amounts of asset metadata and different modulestores.
"""
if
CodeBlockTimer
is
None
:
raise
SkipTest
(
"CodeBlockTimer undefined."
)
desc
=
"XMLRoundTrip:{}->{}:{}"
.
format
(
SHORT_NAME_MAP
[
source_ms
],
SHORT_NAME_MAP
[
dest_ms
],
...
...
@@ -140,3 +147,89 @@ class CrossStoreXMLRoundtrip(unittest.TestCase):
create_course_if_not_present
=
True
,
raise_on_failure
=
True
,
)
@ddt.ddt
# Eventually, exclude this attribute from regular unittests while running *only* tests
# with this attribute during regular performance tests.
# @attr("perf_test")
@unittest.skip
class
FindAssetTest
(
unittest
.
TestCase
):
"""
This class exists to time asset finding in different modulestore
classes with different amounts of asset metadata.
"""
# Use this attribute to skip this test on regular unittest CI runs.
perf_test
=
True
def
setUp
(
self
):
super
(
FindAssetTest
,
self
)
.
setUp
()
self
.
export_dir
=
mkdtemp
()
self
.
addCleanup
(
rmtree
,
self
.
export_dir
,
ignore_errors
=
True
)
@ddt.data
(
*
itertools
.
product
(
MODULESTORE_SETUPS
,
ASSET_AMOUNT_PER_TEST
,
))
@ddt.unpack
def
test_generate_find_timings
(
self
,
source_ms
,
num_assets
):
"""
Generate timings for different amounts of asset metadata and different modulestores.
"""
if
CodeBlockTimer
is
None
:
raise
SkipTest
(
"CodeBlockTimer undefined."
)
desc
=
"FindAssetTest:{}:{}"
.
format
(
SHORT_NAME_MAP
[
source_ms
],
num_assets
,
)
with
CodeBlockTimer
(
desc
):
with
CodeBlockTimer
(
"fake_assets"
):
# First, make the fake asset metadata.
make_asset_xml
(
num_assets
,
ASSET_XML_PATH
)
validate_xml
(
ASSET_XSD_PATH
,
ASSET_XML_PATH
)
# Construct the contentstore for storing the first import
with
MongoContentstoreBuilder
()
.
build
()
as
source_content
:
# Construct the modulestore for storing the first import (using the previously created contentstore)
with
source_ms
.
build
(
source_content
)
as
source_store
:
source_course_key
=
source_store
.
make_course_key
(
'a'
,
'course'
,
'course'
)
asset_key
=
source_course_key
.
make_asset_key
(
AssetMetadata
.
GENERAL_ASSET_TYPE
,
'silly_cat_picture.gif'
)
with
CodeBlockTimer
(
"initial_import"
):
import_from_xml
(
source_store
,
'test_user'
,
TEST_DATA_ROOT
,
course_dirs
=
TEST_COURSE
,
static_content_store
=
source_content
,
target_course_id
=
source_course_key
,
create_course_if_not_present
=
True
,
raise_on_failure
=
True
,
)
with
CodeBlockTimer
(
"find_nonexistent_asset"
):
# More correct would be using the AssetManager.find() - but since the test
# has created its own test modulestore, the AssetManager can't be used.
__
=
source_store
.
find_asset_metadata
(
asset_key
)
# Perform get_all_asset_metadata for each sort.
for
sort
in
ALL_SORTS
:
with
CodeBlockTimer
(
"get_asset_list:{}-{}"
.
format
(
sort
[
0
],
'asc'
if
sort
[
1
]
==
ModuleStoreEnum
.
SortOrder
.
ascending
else
'desc'
)):
# Grab two ranges of 50 assets using different sorts.
# Why 50? That's how many are displayed on the current Studio "Files & Uploads" page.
start_middle
=
num_assets
/
2
__
=
source_store
.
get_all_asset_metadata
(
source_course_key
,
'asset'
,
start
=
0
,
sort
=
sort
,
maxresults
=
50
)
__
=
source_store
.
get_all_asset_metadata
(
source_course_key
,
'asset'
,
start
=
start_middle
,
sort
=
sort
,
maxresults
=
50
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment