Commit 062e0b21 by John Eskew

Implement course-wide asset paging methods.

Add SortedCollection class to help sort lists efficiently.
https://openedx.atlassian.net/browse/PLAT-74
parent fece2b9d
......@@ -5,6 +5,7 @@ Classes representing asset & asset thumbnail metadata.
from datetime import datetime
import pytz
from contracts import contract, new_contract
from bisect import bisect_left, bisect_right
from opaque_keys.edx.keys import CourseKey, AssetKey
new_contract('AssetKey', AssetKey)
......@@ -33,8 +34,8 @@ class AssetMetadata(object):
# All AssetMetadata objects should have AssetLocators with this type.
ASSET_TYPE = 'asset'
@contract(asset_id='AssetKey', basename='basestring | None', internal_name='str | None', locked='bool | None', contenttype='basestring | None',
md5='str | None', curr_version='str | None', prev_version='str | None', edited_by='int | None', edited_on='datetime | None')
@contract(asset_id='AssetKey', basename='basestring|None', internal_name='basestring|None', locked='bool|None', contenttype='basestring|None',
md5='basestring|None', curr_version='basestring|None', prev_version='basestring|None', edited_by='int|None', edited_on='datetime|None')
def __init__(self, asset_id,
basename=None, internal_name=None,
locked=None, contenttype=None, md5=None,
......@@ -99,12 +100,10 @@ class AssetMetadata(object):
'locked': self.locked,
'contenttype': self.contenttype,
'md5': self.md5,
'edit_info': {
'curr_version': self.curr_version,
'prev_version': self.prev_version,
'edited_by': self.edited_by,
'edited_on': self.edited_on
}
'curr_version': self.curr_version,
'prev_version': self.prev_version,
'edited_by': self.edited_by,
'edited_on': self.edited_on
}
@contract(asset_doc='dict | None')
......@@ -121,11 +120,10 @@ class AssetMetadata(object):
self.locked = asset_doc['locked']
self.contenttype = asset_doc['contenttype']
self.md5 = asset_doc['md5']
edit_info = asset_doc['edit_info']
self.curr_version = edit_info['curr_version']
self.prev_version = edit_info['prev_version']
self.edited_by = edit_info['edited_by']
self.edited_on = edit_info['edited_on']
self.curr_version = asset_doc['curr_version']
self.prev_version = asset_doc['prev_version']
self.edited_by = asset_doc['edited_by']
self.edited_on = asset_doc['edited_on']
class AssetThumbnailMetadata(object):
......
......@@ -14,6 +14,8 @@ import collections
from contextlib import contextmanager
import functools
import threading
from operator import itemgetter
from sortedcontainers import SortedListWithKey
from abc import ABCMeta, abstractmethod
from contracts import contract, new_contract
......@@ -292,19 +294,23 @@ class ModuleStoreAssetInterface(object):
if course_assets is None:
return None, None
if get_thumbnail:
all_assets = course_assets['thumbnails']
else:
all_assets = course_assets['assets']
info = 'thumbnails' if get_thumbnail else 'assets'
all_assets = SortedListWithKey([], key=itemgetter('filename'))
# Assets should be pre-sorted, so add them efficiently without sorting.
# extend() will raise a ValueError if the passed-in list is not sorted.
all_assets.extend(course_assets.get(info, []))
# See if this asset already exists by checking the external_filename.
# Studio doesn't currently support using multiple course assets with the same filename.
# So use the filename as the unique identifier.
for idx, asset in enumerate(all_assets):
if asset['filename'] == filename:
return course_assets, idx
idx = None
idx_left = all_assets.bisect_left({'filename': filename})
idx_right = all_assets.bisect_right({'filename': filename})
if idx_left != idx_right:
# Asset was found in the list.
idx = idx_left
return course_assets, None
return course_assets, idx
@contract(asset_key='AssetKey')
def _find_asset_info(self, asset_key, thumbnail=False, **kwargs):
......@@ -358,14 +364,14 @@ class ModuleStoreAssetInterface(object):
"""
return self._find_asset_info(asset_key, thumbnail=True, **kwargs)
@contract(course_key='CourseKey', start='int | None', maxresults='int | None', sort='list | None', get_thumbnails='bool')
@contract(course_key='CourseKey', start='int|None', maxresults='int|None', sort='tuple(str,str)|None', get_thumbnails='bool')
def _get_all_asset_metadata(self, course_key, start=0, maxresults=-1, sort=None, get_thumbnails=False, **kwargs):
"""
Returns a list of static asset (or thumbnail) metadata for a course.
Args:
course_key (CourseKey): course identifier
start (int): optional - start at this asset number
start (int): optional - start at this asset number. Zero-based!
maxresults (int): optional - return at most this many, -1 means no limit
sort (array): optional - None means no sort
(sort_by (str), sort_order (str))
......@@ -382,35 +388,60 @@ class ModuleStoreAssetInterface(object):
# to distinguish zero assets from "not able to retrieve assets".
return None
if get_thumbnails:
all_assets = course_assets.get('thumbnails', [])
else:
all_assets = course_assets.get('assets', [])
# DO_NEXT: Add start/maxresults/sort functionality as part of https://openedx.atlassian.net/browse/PLAT-74
if start and maxresults and sort:
pass
# Determine the proper sort - with defaults of ('displayname', 'ascending').
sort_field = 'filename'
sort_order = 'ascending'
if sort:
if sort[0] == 'uploadDate':
sort_field = 'edited_on'
if sort[1] == 'descending':
sort_order = 'descending'
info = 'thumbnails' if get_thumbnails else 'assets'
all_assets = SortedListWithKey(course_assets.get(info, []), key=itemgetter(sort_field))
num_assets = len(all_assets)
start_idx = start
end_idx = min(num_assets, start + maxresults)
if maxresults < 0:
# No limit on the results.
end_idx = num_assets
step_incr = 1
if sort_order == 'descending':
# Flip the indices and iterate backwards.
step_incr = -1
start_idx = (num_assets - 1) - start_idx
end_idx = (num_assets - 1) - end_idx
ret_assets = []
for asset in all_assets:
for idx in range(start_idx, end_idx, step_incr):
asset = all_assets[idx]
if get_thumbnails:
thumb = AssetThumbnailMetadata(
course_key.make_asset_key('thumbnail', asset['filename']),
internal_name=asset['filename'], **kwargs
internal_name=asset['filename'],
**kwargs
)
ret_assets.append(thumb)
else:
asset = AssetMetadata(
new_asset = AssetMetadata(
course_key.make_asset_key('asset', asset['filename']),
basename=asset['filename'],
edited_on=asset['edit_info']['edited_on'],
internal_name=asset['internal_name'],
locked=asset['locked'],
contenttype=asset['contenttype'],
md5=str(asset['md5']), **kwargs
md5=asset['md5'],
curr_version=asset['curr_version'],
prev_version=asset['prev_version'],
edited_on=asset['edited_on'],
edited_by=asset['edited_by'],
**kwargs
)
ret_assets.append(asset)
ret_assets.append(new_asset)
return ret_assets
@contract(course_key='CourseKey', start='int | None', maxresults='int | None', sort='list | None')
@contract(course_key='CourseKey', start='int|None', maxresults='int|None', sort='tuple(str,str)|None')
def get_all_asset_metadata(self, course_key, start=0, maxresults=-1, sort=None, **kwargs):
"""
Returns a list of static assets for a course.
......
......@@ -370,7 +370,7 @@ class MixedModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase):
return store.find_asset_thumbnail_metadata(asset_key, **kwargs)
@strip_key
@contract(course_key='CourseKey', start=int, maxresults=int, sort='list | None')
@contract(course_key='CourseKey', start=int, maxresults=int, sort='tuple|None')
def get_all_asset_metadata(self, course_key, start=0, maxresults=-1, sort=None, **kwargs):
"""
Returns a list of static assets for a course.
......
......@@ -25,6 +25,8 @@ from path import path
from datetime import datetime
from pytz import UTC
from contracts import contract, new_contract
from operator import itemgetter
from sortedcontainers import SortedListWithKey
from importlib import import_module
from xmodule.errortracker import null_error_tracker, exc_info_to_str
......@@ -1493,7 +1495,10 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
course_assets, asset_idx = self._find_course_asset(course_key, asset_metadata.asset_id.path, thumbnail)
info = 'thumbnails' if thumbnail else 'assets'
all_assets = course_assets[info]
all_assets = SortedListWithKey([], key=itemgetter('filename'))
# Assets should be pre-sorted, so add them efficiently without sorting.
# extend() will raise a ValueError if the passed-in list is not sorted.
all_assets.extend(course_assets[info])
# Set the edited information for assets only - not thumbnails.
if not thumbnail:
......@@ -1502,15 +1507,15 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
# Translate metadata to Mongo format.
metadata_to_insert = asset_metadata.to_mongo()
if asset_idx is None:
# Append new metadata.
# Future optimization: Insert in order & binary search to retrieve.
all_assets.append(metadata_to_insert)
# Add new metadata sorted into the list.
all_assets.add(metadata_to_insert)
else:
# Replace existing metadata.
all_assets[asset_idx] = metadata_to_insert
all_assets.pop(asset_idx)
all_assets.insert(asset_idx, metadata_to_insert)
# Update the document.
self.asset_collection.update({'_id': course_assets['_id']}, {'$set': {info: all_assets}})
self.asset_collection.update({'_id': course_assets['_id']}, {'$set': {info: all_assets.as_list()}})
return True
@contract(asset_key='AssetKey', attr_dict=dict)
......
......@@ -6,6 +6,7 @@ from datetime import datetime, timedelta
import pytz
import unittest
import ddt
from time import sleep
from xmodule.assetstore import AssetMetadata, AssetThumbnailMetadata
from xmodule.modulestore import ModuleStoreEnum
......@@ -79,12 +80,23 @@ class TestMongoAssetMetadataStorage(unittest.TestCase):
asset6_vals = ('asset.txt', 'JJJCCC747858', '/dev/null', False, ModuleStoreEnum.UserID.test * 4, datetime.now(pytz.utc), '50', '49')
asset6 = dict(zip(asset_fields[1:], asset6_vals[1:]))
# More assets.
asset7_vals = ('roman_history.pdf', 'JASDUNSADK', 'texts/italy', True, ModuleStoreEnum.UserID.test * 7, datetime.now(pytz.utc), '1.1', '1.01')
asset8_vals = ('weather_patterns.bmp', '928SJXX2EB', 'science', False, ModuleStoreEnum.UserID.test * 8, datetime.now(pytz.utc), '52', '51')
asset9_vals = ('demo.swf', 'DFDFGGGG14', 'demos/easy', False, ModuleStoreEnum.UserID.test * 9, datetime.now(pytz.utc), '5', '4')
asset7 = dict(zip(asset_fields[1:], asset7_vals[1:]))
asset8 = dict(zip(asset_fields[1:], asset8_vals[1:]))
asset9 = dict(zip(asset_fields[1:], asset9_vals[1:]))
asset1_key = course1_key.make_asset_key('asset', asset1_vals[0])
asset2_key = course1_key.make_asset_key('asset', asset2_vals[0])
asset3_key = course2_key.make_asset_key('asset', asset3_vals[0])
asset4_key = course2_key.make_asset_key('asset', asset4_vals[0])
asset5_key = course2_key.make_asset_key('asset', asset5_vals[0])
asset6_key = course2_key.make_asset_key('asset', asset6_vals[0])
asset7_key = course2_key.make_asset_key('asset', asset7_vals[0])
asset8_key = course2_key.make_asset_key('asset', asset8_vals[0])
asset9_key = course2_key.make_asset_key('asset', asset9_vals[0])
asset1_md = AssetMetadata(asset1_key, **asset1)
asset2_md = AssetMetadata(asset2_key, **asset2)
......@@ -92,13 +104,26 @@ class TestMongoAssetMetadataStorage(unittest.TestCase):
asset4_md = AssetMetadata(asset4_key, **asset4)
asset5_md = AssetMetadata(asset5_key, **non_existent_asset)
asset6_md = AssetMetadata(asset6_key, **asset6)
asset7_md = AssetMetadata(asset7_key, **asset7)
asset8_md = AssetMetadata(asset8_key, **asset8)
asset9_md = AssetMetadata(asset9_key, **asset9)
if store is not None:
# Sleeps are to ensure that edited_on order is correct.
store.save_asset_metadata(course1_key, asset1_md, ModuleStoreEnum.UserID.test)
store.save_asset_metadata(course1_key, asset2_md, ModuleStoreEnum.UserID.test)
store.save_asset_metadata(course2_key, asset3_md, ModuleStoreEnum.UserID.test)
store.save_asset_metadata(course2_key, asset4_md, ModuleStoreEnum.UserID.test)
sleep(0.0001)
store.save_asset_metadata(course1_key, asset2_md, ModuleStoreEnum.UserID.test * 2)
sleep(0.0001)
store.save_asset_metadata(course2_key, asset3_md, ModuleStoreEnum.UserID.test * 3)
sleep(0.0001)
store.save_asset_metadata(course2_key, asset4_md, ModuleStoreEnum.UserID.test * 4)
sleep(0.0001)
# 5 & 6 are not saved on purpose!
store.save_asset_metadata(course2_key, asset7_md, ModuleStoreEnum.UserID.test * 7)
sleep(0.0001)
store.save_asset_metadata(course2_key, asset8_md, ModuleStoreEnum.UserID.test * 8)
sleep(0.0001)
store.save_asset_metadata(course2_key, asset9_md, ModuleStoreEnum.UserID.test * 9)
return (asset1_md, asset2_md, asset3_md, asset4_md, asset5_md, asset6_md)
......@@ -387,8 +412,67 @@ class TestMongoAssetMetadataStorage(unittest.TestCase):
store.delete_all_asset_metadata(course.id, ModuleStoreEnum.UserID.test)
self.assertEquals(len(store.get_all_asset_thumbnail_metadata(course.id)), 0)
def test_get_all_assets_with_paging(self):
pass
@ddt.data(*MODULESTORE_SETUPS)
def test_get_all_assets_with_paging(self, storebuilder):
"""
Save multiple metadata in each store and retrieve it singularly, as all assets, and after deleting all.
"""
# Temporarily only perform this test for Old Mongo - not Split.
if not isinstance(storebuilder, MongoModulestoreBuilder):
raise unittest.SkipTest
with MongoContentstoreBuilder().build() as contentstore:
with storebuilder.build(contentstore) as store:
course1 = CourseFactory.create(modulestore=store)
course2 = CourseFactory.create(modulestore=store)
self.setup_assets(course1.id, course2.id, store)
expected_sorts_by_2 = (
(
('displayname', 'ascending'),
('code.tgz', 'demo.swf', 'dog.png', 'roman_history.pdf', 'weather_patterns.bmp'),
(2, 2, 1)
),
(
('displayname', 'descending'),
('weather_patterns.bmp', 'roman_history.pdf', 'dog.png', 'demo.swf', 'code.tgz'),
(2, 2, 1)
),
(
('uploadDate', 'ascending'),
('code.tgz', 'dog.png', 'roman_history.pdf', 'weather_patterns.bmp', 'demo.swf'),
(2, 2, 1)
),
(
('uploadDate', 'descending'),
('demo.swf', 'weather_patterns.bmp', 'roman_history.pdf', 'dog.png', 'code.tgz'),
(2, 2, 1)
),
)
# First, with paging across all sorts.
for sort_test in expected_sorts_by_2:
for i in xrange(3):
asset_page = store.get_all_asset_metadata(course2.id, start=2 * i, maxresults=2, sort=sort_test[0])
self.assertEquals(len(asset_page), sort_test[2][i])
self.assertEquals(asset_page[0].asset_id.path, sort_test[1][2 * i])
if sort_test[2][i] == 2:
self.assertEquals(asset_page[1].asset_id.path, sort_test[1][(2 * i) + 1])
# Now fetch everything.
asset_page = store.get_all_asset_metadata(course2.id, start=0, sort=('displayname', 'ascending'))
self.assertEquals(len(asset_page), 5)
self.assertEquals(asset_page[0].asset_id.path, 'code.tgz')
self.assertEquals(asset_page[1].asset_id.path, 'demo.swf')
self.assertEquals(asset_page[2].asset_id.path, 'dog.png')
self.assertEquals(asset_page[3].asset_id.path, 'roman_history.pdf')
self.assertEquals(asset_page[4].asset_id.path, 'weather_patterns.bmp')
# Some odd conditions.
asset_page = store.get_all_asset_metadata(course2.id, start=100, sort=('displayname', 'ascending'))
self.assertEquals(len(asset_page), 0)
asset_page = store.get_all_asset_metadata(course2.id, start=3, maxresults=0, sort=('displayname', 'ascending'))
self.assertEquals(len(asset_page), 0)
asset_page = store.get_all_asset_metadata(course2.id, start=3, maxresults=-12345, sort=('displayname', 'descending'))
self.assertEquals(len(asset_page), 2)
def test_copy_all_assets(self):
pass
......
......@@ -56,6 +56,7 @@ PyYAML==3.10
requests==2.3.0
Shapely==1.2.16
sorl-thumbnail==11.12
sortedcontainers==0.9.2
South==0.7.6
sympy==0.7.1
xmltodict==0.4.1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment