Commit 062e0b21 by John Eskew

Implement course-wide asset paging methods.

Add SortedCollection class to help sort lists efficiently.
https://openedx.atlassian.net/browse/PLAT-74
parent fece2b9d
...@@ -5,6 +5,7 @@ Classes representing asset & asset thumbnail metadata. ...@@ -5,6 +5,7 @@ Classes representing asset & asset thumbnail metadata.
from datetime import datetime from datetime import datetime
import pytz import pytz
from contracts import contract, new_contract from contracts import contract, new_contract
from bisect import bisect_left, bisect_right
from opaque_keys.edx.keys import CourseKey, AssetKey from opaque_keys.edx.keys import CourseKey, AssetKey
new_contract('AssetKey', AssetKey) new_contract('AssetKey', AssetKey)
...@@ -33,8 +34,8 @@ class AssetMetadata(object): ...@@ -33,8 +34,8 @@ class AssetMetadata(object):
# All AssetMetadata objects should have AssetLocators with this type. # All AssetMetadata objects should have AssetLocators with this type.
ASSET_TYPE = 'asset' ASSET_TYPE = 'asset'
@contract(asset_id='AssetKey', basename='basestring | None', internal_name='str | None', locked='bool | None', contenttype='basestring | None', @contract(asset_id='AssetKey', basename='basestring|None', internal_name='basestring|None', locked='bool|None', contenttype='basestring|None',
md5='str | None', curr_version='str | None', prev_version='str | None', edited_by='int | None', edited_on='datetime | None') md5='basestring|None', curr_version='basestring|None', prev_version='basestring|None', edited_by='int|None', edited_on='datetime|None')
def __init__(self, asset_id, def __init__(self, asset_id,
basename=None, internal_name=None, basename=None, internal_name=None,
locked=None, contenttype=None, md5=None, locked=None, contenttype=None, md5=None,
...@@ -99,12 +100,10 @@ class AssetMetadata(object): ...@@ -99,12 +100,10 @@ class AssetMetadata(object):
'locked': self.locked, 'locked': self.locked,
'contenttype': self.contenttype, 'contenttype': self.contenttype,
'md5': self.md5, 'md5': self.md5,
'edit_info': { 'curr_version': self.curr_version,
'curr_version': self.curr_version, 'prev_version': self.prev_version,
'prev_version': self.prev_version, 'edited_by': self.edited_by,
'edited_by': self.edited_by, 'edited_on': self.edited_on
'edited_on': self.edited_on
}
} }
@contract(asset_doc='dict | None') @contract(asset_doc='dict | None')
...@@ -121,11 +120,10 @@ class AssetMetadata(object): ...@@ -121,11 +120,10 @@ class AssetMetadata(object):
self.locked = asset_doc['locked'] self.locked = asset_doc['locked']
self.contenttype = asset_doc['contenttype'] self.contenttype = asset_doc['contenttype']
self.md5 = asset_doc['md5'] self.md5 = asset_doc['md5']
edit_info = asset_doc['edit_info'] self.curr_version = asset_doc['curr_version']
self.curr_version = edit_info['curr_version'] self.prev_version = asset_doc['prev_version']
self.prev_version = edit_info['prev_version'] self.edited_by = asset_doc['edited_by']
self.edited_by = edit_info['edited_by'] self.edited_on = asset_doc['edited_on']
self.edited_on = edit_info['edited_on']
class AssetThumbnailMetadata(object): class AssetThumbnailMetadata(object):
......
...@@ -14,6 +14,8 @@ import collections ...@@ -14,6 +14,8 @@ import collections
from contextlib import contextmanager from contextlib import contextmanager
import functools import functools
import threading import threading
from operator import itemgetter
from sortedcontainers import SortedListWithKey
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from contracts import contract, new_contract from contracts import contract, new_contract
...@@ -292,19 +294,23 @@ class ModuleStoreAssetInterface(object): ...@@ -292,19 +294,23 @@ class ModuleStoreAssetInterface(object):
if course_assets is None: if course_assets is None:
return None, None return None, None
if get_thumbnail: info = 'thumbnails' if get_thumbnail else 'assets'
all_assets = course_assets['thumbnails'] all_assets = SortedListWithKey([], key=itemgetter('filename'))
else: # Assets should be pre-sorted, so add them efficiently without sorting.
all_assets = course_assets['assets'] # extend() will raise a ValueError if the passed-in list is not sorted.
all_assets.extend(course_assets.get(info, []))
# See if this asset already exists by checking the external_filename. # See if this asset already exists by checking the external_filename.
# Studio doesn't currently support using multiple course assets with the same filename. # Studio doesn't currently support using multiple course assets with the same filename.
# So use the filename as the unique identifier. # So use the filename as the unique identifier.
for idx, asset in enumerate(all_assets): idx = None
if asset['filename'] == filename: idx_left = all_assets.bisect_left({'filename': filename})
return course_assets, idx idx_right = all_assets.bisect_right({'filename': filename})
if idx_left != idx_right:
# Asset was found in the list.
idx = idx_left
return course_assets, None return course_assets, idx
@contract(asset_key='AssetKey') @contract(asset_key='AssetKey')
def _find_asset_info(self, asset_key, thumbnail=False, **kwargs): def _find_asset_info(self, asset_key, thumbnail=False, **kwargs):
...@@ -358,14 +364,14 @@ class ModuleStoreAssetInterface(object): ...@@ -358,14 +364,14 @@ class ModuleStoreAssetInterface(object):
""" """
return self._find_asset_info(asset_key, thumbnail=True, **kwargs) return self._find_asset_info(asset_key, thumbnail=True, **kwargs)
@contract(course_key='CourseKey', start='int | None', maxresults='int | None', sort='list | None', get_thumbnails='bool') @contract(course_key='CourseKey', start='int|None', maxresults='int|None', sort='tuple(str,str)|None', get_thumbnails='bool')
def _get_all_asset_metadata(self, course_key, start=0, maxresults=-1, sort=None, get_thumbnails=False, **kwargs): def _get_all_asset_metadata(self, course_key, start=0, maxresults=-1, sort=None, get_thumbnails=False, **kwargs):
""" """
Returns a list of static asset (or thumbnail) metadata for a course. Returns a list of static asset (or thumbnail) metadata for a course.
Args: Args:
course_key (CourseKey): course identifier course_key (CourseKey): course identifier
start (int): optional - start at this asset number start (int): optional - start at this asset number. Zero-based!
maxresults (int): optional - return at most this many, -1 means no limit maxresults (int): optional - return at most this many, -1 means no limit
sort (array): optional - None means no sort sort (array): optional - None means no sort
(sort_by (str), sort_order (str)) (sort_by (str), sort_order (str))
...@@ -382,35 +388,60 @@ class ModuleStoreAssetInterface(object): ...@@ -382,35 +388,60 @@ class ModuleStoreAssetInterface(object):
# to distinguish zero assets from "not able to retrieve assets". # to distinguish zero assets from "not able to retrieve assets".
return None return None
if get_thumbnails: # Determine the proper sort - with defaults of ('displayname', 'ascending').
all_assets = course_assets.get('thumbnails', []) sort_field = 'filename'
else: sort_order = 'ascending'
all_assets = course_assets.get('assets', []) if sort:
if sort[0] == 'uploadDate':
# DO_NEXT: Add start/maxresults/sort functionality as part of https://openedx.atlassian.net/browse/PLAT-74 sort_field = 'edited_on'
if start and maxresults and sort: if sort[1] == 'descending':
pass sort_order = 'descending'
info = 'thumbnails' if get_thumbnails else 'assets'
all_assets = SortedListWithKey(course_assets.get(info, []), key=itemgetter(sort_field))
num_assets = len(all_assets)
start_idx = start
end_idx = min(num_assets, start + maxresults)
if maxresults < 0:
# No limit on the results.
end_idx = num_assets
step_incr = 1
if sort_order == 'descending':
# Flip the indices and iterate backwards.
step_incr = -1
start_idx = (num_assets - 1) - start_idx
end_idx = (num_assets - 1) - end_idx
ret_assets = [] ret_assets = []
for asset in all_assets: for idx in range(start_idx, end_idx, step_incr):
asset = all_assets[idx]
if get_thumbnails: if get_thumbnails:
thumb = AssetThumbnailMetadata( thumb = AssetThumbnailMetadata(
course_key.make_asset_key('thumbnail', asset['filename']), course_key.make_asset_key('thumbnail', asset['filename']),
internal_name=asset['filename'], **kwargs internal_name=asset['filename'],
**kwargs
) )
ret_assets.append(thumb) ret_assets.append(thumb)
else: else:
asset = AssetMetadata( new_asset = AssetMetadata(
course_key.make_asset_key('asset', asset['filename']), course_key.make_asset_key('asset', asset['filename']),
basename=asset['filename'], basename=asset['filename'],
edited_on=asset['edit_info']['edited_on'], internal_name=asset['internal_name'],
locked=asset['locked'],
contenttype=asset['contenttype'], contenttype=asset['contenttype'],
md5=str(asset['md5']), **kwargs md5=asset['md5'],
curr_version=asset['curr_version'],
prev_version=asset['prev_version'],
edited_on=asset['edited_on'],
edited_by=asset['edited_by'],
**kwargs
) )
ret_assets.append(asset) ret_assets.append(new_asset)
return ret_assets return ret_assets
@contract(course_key='CourseKey', start='int | None', maxresults='int | None', sort='list | None') @contract(course_key='CourseKey', start='int|None', maxresults='int|None', sort='tuple(str,str)|None')
def get_all_asset_metadata(self, course_key, start=0, maxresults=-1, sort=None, **kwargs): def get_all_asset_metadata(self, course_key, start=0, maxresults=-1, sort=None, **kwargs):
""" """
Returns a list of static assets for a course. Returns a list of static assets for a course.
......
...@@ -370,7 +370,7 @@ class MixedModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase): ...@@ -370,7 +370,7 @@ class MixedModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase):
return store.find_asset_thumbnail_metadata(asset_key, **kwargs) return store.find_asset_thumbnail_metadata(asset_key, **kwargs)
@strip_key @strip_key
@contract(course_key='CourseKey', start=int, maxresults=int, sort='list | None') @contract(course_key='CourseKey', start=int, maxresults=int, sort='tuple|None')
def get_all_asset_metadata(self, course_key, start=0, maxresults=-1, sort=None, **kwargs): def get_all_asset_metadata(self, course_key, start=0, maxresults=-1, sort=None, **kwargs):
""" """
Returns a list of static assets for a course. Returns a list of static assets for a course.
......
...@@ -25,6 +25,8 @@ from path import path ...@@ -25,6 +25,8 @@ from path import path
from datetime import datetime from datetime import datetime
from pytz import UTC from pytz import UTC
from contracts import contract, new_contract from contracts import contract, new_contract
from operator import itemgetter
from sortedcontainers import SortedListWithKey
from importlib import import_module from importlib import import_module
from xmodule.errortracker import null_error_tracker, exc_info_to_str from xmodule.errortracker import null_error_tracker, exc_info_to_str
...@@ -1493,7 +1495,10 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo ...@@ -1493,7 +1495,10 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
course_assets, asset_idx = self._find_course_asset(course_key, asset_metadata.asset_id.path, thumbnail) course_assets, asset_idx = self._find_course_asset(course_key, asset_metadata.asset_id.path, thumbnail)
info = 'thumbnails' if thumbnail else 'assets' info = 'thumbnails' if thumbnail else 'assets'
all_assets = course_assets[info] all_assets = SortedListWithKey([], key=itemgetter('filename'))
# Assets should be pre-sorted, so add them efficiently without sorting.
# extend() will raise a ValueError if the passed-in list is not sorted.
all_assets.extend(course_assets[info])
# Set the edited information for assets only - not thumbnails. # Set the edited information for assets only - not thumbnails.
if not thumbnail: if not thumbnail:
...@@ -1502,15 +1507,15 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo ...@@ -1502,15 +1507,15 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
# Translate metadata to Mongo format. # Translate metadata to Mongo format.
metadata_to_insert = asset_metadata.to_mongo() metadata_to_insert = asset_metadata.to_mongo()
if asset_idx is None: if asset_idx is None:
# Append new metadata. # Add new metadata sorted into the list.
# Future optimization: Insert in order & binary search to retrieve. all_assets.add(metadata_to_insert)
all_assets.append(metadata_to_insert)
else: else:
# Replace existing metadata. # Replace existing metadata.
all_assets[asset_idx] = metadata_to_insert all_assets.pop(asset_idx)
all_assets.insert(asset_idx, metadata_to_insert)
# Update the document. # Update the document.
self.asset_collection.update({'_id': course_assets['_id']}, {'$set': {info: all_assets}}) self.asset_collection.update({'_id': course_assets['_id']}, {'$set': {info: all_assets.as_list()}})
return True return True
@contract(asset_key='AssetKey', attr_dict=dict) @contract(asset_key='AssetKey', attr_dict=dict)
......
...@@ -6,6 +6,7 @@ from datetime import datetime, timedelta ...@@ -6,6 +6,7 @@ from datetime import datetime, timedelta
import pytz import pytz
import unittest import unittest
import ddt import ddt
from time import sleep
from xmodule.assetstore import AssetMetadata, AssetThumbnailMetadata from xmodule.assetstore import AssetMetadata, AssetThumbnailMetadata
from xmodule.modulestore import ModuleStoreEnum from xmodule.modulestore import ModuleStoreEnum
...@@ -79,12 +80,23 @@ class TestMongoAssetMetadataStorage(unittest.TestCase): ...@@ -79,12 +80,23 @@ class TestMongoAssetMetadataStorage(unittest.TestCase):
asset6_vals = ('asset.txt', 'JJJCCC747858', '/dev/null', False, ModuleStoreEnum.UserID.test * 4, datetime.now(pytz.utc), '50', '49') asset6_vals = ('asset.txt', 'JJJCCC747858', '/dev/null', False, ModuleStoreEnum.UserID.test * 4, datetime.now(pytz.utc), '50', '49')
asset6 = dict(zip(asset_fields[1:], asset6_vals[1:])) asset6 = dict(zip(asset_fields[1:], asset6_vals[1:]))
# More assets.
asset7_vals = ('roman_history.pdf', 'JASDUNSADK', 'texts/italy', True, ModuleStoreEnum.UserID.test * 7, datetime.now(pytz.utc), '1.1', '1.01')
asset8_vals = ('weather_patterns.bmp', '928SJXX2EB', 'science', False, ModuleStoreEnum.UserID.test * 8, datetime.now(pytz.utc), '52', '51')
asset9_vals = ('demo.swf', 'DFDFGGGG14', 'demos/easy', False, ModuleStoreEnum.UserID.test * 9, datetime.now(pytz.utc), '5', '4')
asset7 = dict(zip(asset_fields[1:], asset7_vals[1:]))
asset8 = dict(zip(asset_fields[1:], asset8_vals[1:]))
asset9 = dict(zip(asset_fields[1:], asset9_vals[1:]))
asset1_key = course1_key.make_asset_key('asset', asset1_vals[0]) asset1_key = course1_key.make_asset_key('asset', asset1_vals[0])
asset2_key = course1_key.make_asset_key('asset', asset2_vals[0]) asset2_key = course1_key.make_asset_key('asset', asset2_vals[0])
asset3_key = course2_key.make_asset_key('asset', asset3_vals[0]) asset3_key = course2_key.make_asset_key('asset', asset3_vals[0])
asset4_key = course2_key.make_asset_key('asset', asset4_vals[0]) asset4_key = course2_key.make_asset_key('asset', asset4_vals[0])
asset5_key = course2_key.make_asset_key('asset', asset5_vals[0]) asset5_key = course2_key.make_asset_key('asset', asset5_vals[0])
asset6_key = course2_key.make_asset_key('asset', asset6_vals[0]) asset6_key = course2_key.make_asset_key('asset', asset6_vals[0])
asset7_key = course2_key.make_asset_key('asset', asset7_vals[0])
asset8_key = course2_key.make_asset_key('asset', asset8_vals[0])
asset9_key = course2_key.make_asset_key('asset', asset9_vals[0])
asset1_md = AssetMetadata(asset1_key, **asset1) asset1_md = AssetMetadata(asset1_key, **asset1)
asset2_md = AssetMetadata(asset2_key, **asset2) asset2_md = AssetMetadata(asset2_key, **asset2)
...@@ -92,13 +104,26 @@ class TestMongoAssetMetadataStorage(unittest.TestCase): ...@@ -92,13 +104,26 @@ class TestMongoAssetMetadataStorage(unittest.TestCase):
asset4_md = AssetMetadata(asset4_key, **asset4) asset4_md = AssetMetadata(asset4_key, **asset4)
asset5_md = AssetMetadata(asset5_key, **non_existent_asset) asset5_md = AssetMetadata(asset5_key, **non_existent_asset)
asset6_md = AssetMetadata(asset6_key, **asset6) asset6_md = AssetMetadata(asset6_key, **asset6)
asset7_md = AssetMetadata(asset7_key, **asset7)
asset8_md = AssetMetadata(asset8_key, **asset8)
asset9_md = AssetMetadata(asset9_key, **asset9)
if store is not None: if store is not None:
# Sleeps are to ensure that edited_on order is correct.
store.save_asset_metadata(course1_key, asset1_md, ModuleStoreEnum.UserID.test) store.save_asset_metadata(course1_key, asset1_md, ModuleStoreEnum.UserID.test)
store.save_asset_metadata(course1_key, asset2_md, ModuleStoreEnum.UserID.test) sleep(0.0001)
store.save_asset_metadata(course2_key, asset3_md, ModuleStoreEnum.UserID.test) store.save_asset_metadata(course1_key, asset2_md, ModuleStoreEnum.UserID.test * 2)
store.save_asset_metadata(course2_key, asset4_md, ModuleStoreEnum.UserID.test) sleep(0.0001)
store.save_asset_metadata(course2_key, asset3_md, ModuleStoreEnum.UserID.test * 3)
sleep(0.0001)
store.save_asset_metadata(course2_key, asset4_md, ModuleStoreEnum.UserID.test * 4)
sleep(0.0001)
# 5 & 6 are not saved on purpose! # 5 & 6 are not saved on purpose!
store.save_asset_metadata(course2_key, asset7_md, ModuleStoreEnum.UserID.test * 7)
sleep(0.0001)
store.save_asset_metadata(course2_key, asset8_md, ModuleStoreEnum.UserID.test * 8)
sleep(0.0001)
store.save_asset_metadata(course2_key, asset9_md, ModuleStoreEnum.UserID.test * 9)
return (asset1_md, asset2_md, asset3_md, asset4_md, asset5_md, asset6_md) return (asset1_md, asset2_md, asset3_md, asset4_md, asset5_md, asset6_md)
...@@ -387,8 +412,67 @@ class TestMongoAssetMetadataStorage(unittest.TestCase): ...@@ -387,8 +412,67 @@ class TestMongoAssetMetadataStorage(unittest.TestCase):
store.delete_all_asset_metadata(course.id, ModuleStoreEnum.UserID.test) store.delete_all_asset_metadata(course.id, ModuleStoreEnum.UserID.test)
self.assertEquals(len(store.get_all_asset_thumbnail_metadata(course.id)), 0) self.assertEquals(len(store.get_all_asset_thumbnail_metadata(course.id)), 0)
def test_get_all_assets_with_paging(self): @ddt.data(*MODULESTORE_SETUPS)
pass def test_get_all_assets_with_paging(self, storebuilder):
"""
Save multiple metadata in each store and retrieve it singularly, as all assets, and after deleting all.
"""
# Temporarily only perform this test for Old Mongo - not Split.
if not isinstance(storebuilder, MongoModulestoreBuilder):
raise unittest.SkipTest
with MongoContentstoreBuilder().build() as contentstore:
with storebuilder.build(contentstore) as store:
course1 = CourseFactory.create(modulestore=store)
course2 = CourseFactory.create(modulestore=store)
self.setup_assets(course1.id, course2.id, store)
expected_sorts_by_2 = (
(
('displayname', 'ascending'),
('code.tgz', 'demo.swf', 'dog.png', 'roman_history.pdf', 'weather_patterns.bmp'),
(2, 2, 1)
),
(
('displayname', 'descending'),
('weather_patterns.bmp', 'roman_history.pdf', 'dog.png', 'demo.swf', 'code.tgz'),
(2, 2, 1)
),
(
('uploadDate', 'ascending'),
('code.tgz', 'dog.png', 'roman_history.pdf', 'weather_patterns.bmp', 'demo.swf'),
(2, 2, 1)
),
(
('uploadDate', 'descending'),
('demo.swf', 'weather_patterns.bmp', 'roman_history.pdf', 'dog.png', 'code.tgz'),
(2, 2, 1)
),
)
# First, with paging across all sorts.
for sort_test in expected_sorts_by_2:
for i in xrange(3):
asset_page = store.get_all_asset_metadata(course2.id, start=2 * i, maxresults=2, sort=sort_test[0])
self.assertEquals(len(asset_page), sort_test[2][i])
self.assertEquals(asset_page[0].asset_id.path, sort_test[1][2 * i])
if sort_test[2][i] == 2:
self.assertEquals(asset_page[1].asset_id.path, sort_test[1][(2 * i) + 1])
# Now fetch everything.
asset_page = store.get_all_asset_metadata(course2.id, start=0, sort=('displayname', 'ascending'))
self.assertEquals(len(asset_page), 5)
self.assertEquals(asset_page[0].asset_id.path, 'code.tgz')
self.assertEquals(asset_page[1].asset_id.path, 'demo.swf')
self.assertEquals(asset_page[2].asset_id.path, 'dog.png')
self.assertEquals(asset_page[3].asset_id.path, 'roman_history.pdf')
self.assertEquals(asset_page[4].asset_id.path, 'weather_patterns.bmp')
# Some odd conditions.
asset_page = store.get_all_asset_metadata(course2.id, start=100, sort=('displayname', 'ascending'))
self.assertEquals(len(asset_page), 0)
asset_page = store.get_all_asset_metadata(course2.id, start=3, maxresults=0, sort=('displayname', 'ascending'))
self.assertEquals(len(asset_page), 0)
asset_page = store.get_all_asset_metadata(course2.id, start=3, maxresults=-12345, sort=('displayname', 'descending'))
self.assertEquals(len(asset_page), 2)
def test_copy_all_assets(self): def test_copy_all_assets(self):
pass pass
......
...@@ -56,6 +56,7 @@ PyYAML==3.10 ...@@ -56,6 +56,7 @@ PyYAML==3.10
requests==2.3.0 requests==2.3.0
Shapely==1.2.16 Shapely==1.2.16
sorl-thumbnail==11.12 sorl-thumbnail==11.12
sortedcontainers==0.9.2
South==0.7.6 South==0.7.6
sympy==0.7.1 sympy==0.7.1
xmltodict==0.4.1 xmltodict==0.4.1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment