Commit 5ffc6ac2 by John Eskew

Merge pull request #4854 from edx/jeskew/assetstore_modulestore_work

Phase 1 of adding asset metadata saving to old Mongo
parents 3d1c54fe b857a0ed
"""
Classes representing asset & asset thumbnail metadata.
"""
from datetime import datetime
from contracts import contract, new_contract
from opaque_keys.edx.keys import CourseKey, AssetKey
new_contract('AssetKey', AssetKey)
new_contract('datetime', datetime)
class IncorrectAssetIdType(Exception):
"""
Raised when the asset ID passed-in to create an AssetMetadata or
AssetThumbnailMetadata is of the wrong type.
"""
pass
class AssetMetadata(object):
"""
Stores the metadata associated with a particular course asset. The asset metadata gets stored
in the modulestore.
"""
TOP_LEVEL_ATTRS = ['basename', 'internal_name', 'locked', 'contenttype', 'md5']
EDIT_INFO_ATTRS = ['curr_version', 'prev_version', 'edited_by', 'edited_on']
ALLOWED_ATTRS = TOP_LEVEL_ATTRS + EDIT_INFO_ATTRS
# All AssetMetadata objects should have AssetLocators with this type.
ASSET_TYPE = 'asset'
@contract(asset_id='AssetKey', basename='str | unicode | None', internal_name='str | None', locked='bool | None',
contenttype='str | unicode | None', md5='str | None', curr_version='str | None', prev_version='str | None')
def __init__(self, asset_id,
basename=None, internal_name=None,
locked=None, contenttype=None, md5=None,
curr_version=None, prev_version=None):
"""
Construct a AssetMetadata object.
Arguments:
asset_id (AssetKey): Key identifying this particular asset.
basename (str): Original path to file at asset upload time.
internal_name (str): Name under which the file is stored internally.
locked (bool): If True, only course participants can access the asset.
contenttype (str): MIME type of the asset.
curr_version (str): Current version of the asset.
prev_version (str): Previous version of the asset.
"""
if asset_id.asset_type != self.ASSET_TYPE:
raise IncorrectAssetIdType()
self.asset_id = asset_id
self.basename = basename # Path w/o filename.
self.internal_name = internal_name
self.locked = locked
self.contenttype = contenttype
self.md5 = md5
self.curr_version = curr_version
self.prev_version = prev_version
self.edited_by = None
self.edited_on = None
def __repr__(self):
return """AssetMetadata{!r}""".format((
self.asset_id,
self.basename, self.internal_name,
self.locked, self.contenttype, self.md5,
self.curr_version, self.prev_version,
self.edited_by, self.edited_on
))
def update(self, attr_dict):
"""
Set the attributes on the metadata. Ignore all those outside the known fields.
Arguments:
attr_dict: Prop, val dictionary of all attributes to set.
"""
for attr, val in attr_dict.iteritems():
if attr in self.ALLOWED_ATTRS:
setattr(self, attr, val)
def to_mongo(self):
"""
Converts metadata properties into a MongoDB-storable dict.
"""
return {
'filename': self.asset_id.path,
'basename': self.basename,
'internal_name': self.internal_name,
'locked': self.locked,
'contenttype': self.contenttype,
'md5': self.md5,
'edit_info': {
'curr_version': self.curr_version,
'prev_version': self.prev_version,
'edited_by': self.edited_by,
'edited_on': self.edited_on
}
}
@contract(asset_doc='dict | None')
def from_mongo(self, asset_doc):
"""
Fill in all metadata fields from a MongoDB document.
The asset_id prop is initialized upon construction only.
"""
if asset_doc is None:
return
self.basename = asset_doc['basename']
self.internal_name = asset_doc['internal_name']
self.locked = asset_doc['locked']
self.contenttype = asset_doc['contenttype']
self.md5 = asset_doc['md5']
edit_info = asset_doc['edit_info']
self.curr_version = edit_info['curr_version']
self.prev_version = edit_info['prev_version']
self.edited_by = edit_info['edited_by']
self.edited_on = edit_info['edited_on']
class AssetThumbnailMetadata(object):
"""
Stores the metadata associated with the thumbnail of a course asset.
"""
# All AssetThumbnailMetadata objects should have AssetLocators with this type.
ASSET_TYPE = 'thumbnail'
@contract(asset_id='AssetKey', internal_name='str | unicode | None')
def __init__(self, asset_id, internal_name=None):
"""
Construct a AssetThumbnailMetadata object.
Arguments:
asset_id (AssetKey): Key identifying this particular asset.
internal_name (str): Name under which the file is stored internally.
"""
if asset_id.asset_type != self.ASSET_TYPE:
raise IncorrectAssetIdType()
self.asset_id = asset_id
self.internal_name = internal_name
def __repr__(self):
return """AssetMetadata{!r}""".format((self.asset_id, self.internal_name))
def to_mongo(self):
"""
Converts metadata properties into a MongoDB-storable dict.
"""
return {
'filename': self.asset_id.path,
'internal_name': self.internal_name
}
@contract(thumbnail_doc='dict | None')
def from_mongo(self, thumbnail_doc):
"""
Fill in all metadata fields from a MongoDB document.
The asset_id prop is initialized upon construction only.
"""
if thumbnail_doc is None:
return
self.internal_name = thumbnail_doc['internal_name']
......@@ -9,10 +9,12 @@ import logging
from contextlib import contextmanager
import itertools
import functools
from contracts import contract, new_contract
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import CourseKey
from opaque_keys.edx.keys import CourseKey, AssetKey
from opaque_keys.edx.locations import SlashSeparatedCourseKey
from xmodule.assetstore import AssetMetadata, AssetThumbnailMetadata
from . import ModuleStoreWriteBase
from . import ModuleStoreEnum
......@@ -20,6 +22,10 @@ from .exceptions import ItemNotFoundError, DuplicateCourseError
from .draft_and_published import ModuleStoreDraftAndPublished
from .split_migrator import SplitMigrator
new_contract('CourseKey', CourseKey)
new_contract('AssetKey', AssetKey)
new_contract('AssetMetadata', AssetMetadata)
new_contract('AssetThumbnailMetadata', AssetThumbnailMetadata)
log = logging.getLogger(__name__)
......@@ -309,6 +315,209 @@ class MixedModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase):
store = self._get_modulestore_for_courseid(course_key)
return store.delete_course(course_key, user_id)
def _save_asset_info(self, course_key, asset_metadata, user_id, thumbnail=False):
"""
Base method to over-ride in modulestore.
"""
raise NotImplementedError()
def _delete_asset_data(self, asset_key, thumbnail=False):
"""
Base method to over-ride in modulestore.
"""
raise NotImplementedError()
def _find_course_assets(self, course_key):
"""
Base method to override.
"""
raise NotImplementedError()
@contract(course_key='CourseKey', asset_metadata='AssetMetadata')
def save_asset_metadata(self, course_key, asset_metadata, user_id):
"""
Saves the asset metadata for a particular course's asset.
Args:
course_key (CourseKey): course identifier
asset_metadata (AssetMetadata): data about the course asset data
Returns:
bool: True if metadata save was successful, else False
"""
store = self._get_modulestore_for_courseid(course_key)
return store.save_asset_metadata(course_key, asset_metadata, user_id)
@contract(course_key='CourseKey', asset_thumbnail_metadata='AssetThumbnailMetadata')
def save_asset_thumbnail_metadata(self, course_key, asset_thumbnail_metadata):
"""
Saves the asset thumbnail metadata for a particular course asset's thumbnail.
Arguments:
course_key (CourseKey): course identifier
asset_thumbnail_metadata (AssetThumbnailMetadata): data about the course asset thumbnail
Returns:
True if thumbnail metadata save was successful, else False
"""
store = self._get_modulestore_for_courseid(course_key)
return store.save_asset_metadata(course_key, asset_thumbnail_metadata)
@contract(asset_key='AssetKey')
def find_asset_metadata(self, asset_key):
"""
Find the metadata for a particular course asset.
Args:
asset_key (AssetKey): locator containing original asset filename
Returns:
asset metadata (AssetMetadata) -or- None if not found
"""
store = self._get_modulestore_for_courseid(asset_key.course_key)
return store.find_asset_metadata(asset_key)
@contract(asset_key='AssetKey')
def find_asset_thumbnail_metadata(self, asset_key):
"""
Find the metadata for a particular course asset.
Arguments:
asset_key (AssetKey): key containing original asset filename
Returns:
asset metadata (AssetMetadata) -or- None if not found
"""
store = self._get_modulestore_for_courseid(asset_key.course_key)
return store.find_asset_thumbnail_metadata(asset_key)
@contract(course_key='CourseKey', start=int, maxresults=int, sort='list | None')
def get_all_asset_metadata(self, course_key, start=0, maxresults=-1, sort=None):
"""
Returns a list of static assets for a course.
By default all assets are returned, but start and maxresults can be provided to limit the query.
Args:
course_key (CourseKey): course identifier
start (int): optional - start at this asset number
maxresults (int): optional - return at most this many, -1 means no limit
sort (array): optional - None means no sort
(sort_by (str), sort_order (str))
sort_by - one of 'uploadDate' or 'displayname'
sort_order - one of 'ascending' or 'descending'
Returns:
List of asset data dictionaries, which have the following keys:
asset_key (AssetKey): asset identifier
displayname: The human-readable name of the asset
uploadDate (datetime.datetime): The date and time that the file was uploaded
contentType: The mimetype string of the asset
md5: An md5 hash of the asset content
"""
store = self._get_modulestore_for_courseid(course_key)
return store.get_all_asset_metadata(course_key, start, maxresults, sort)
@contract(course_key='CourseKey')
def get_all_asset_thumbnail_metadata(self, course_key):
"""
Returns a list of thumbnails for all course assets.
Args:
course_key (CourseKey): course identifier
Returns:
List of AssetThumbnailMetadata objects.
"""
store = self._get_modulestore_for_courseid(course_key)
return store.get_all_asset_thumbnail_metadata(course_key)
@contract(asset_key='AssetKey')
def delete_asset_metadata(self, asset_key):
"""
Deletes a single asset's metadata.
Arguments:
asset_id (AssetKey): locator containing original asset filename
Returns:
Number of asset metadata entries deleted (0 or 1)
"""
store = self._get_modulestore_for_courseid(asset_key.course_key)
return store.delete_asset_metadata(asset_key)
@contract(asset_key='AssetKey')
def delete_asset_thumbnail_metadata(self, asset_key):
"""
Deletes a single asset's metadata.
Arguments:
asset_key (AssetKey): locator containing original asset filename
Returns:
Number of asset metadata entries deleted (0 or 1)
"""
store = self._get_modulestore_for_courseid(asset_key.course_key)
return store.delete_asset_metadata(asset_key)
@contract(course_key='CourseKey')
def delete_all_asset_metadata(self, course_key):
"""
Delete all of the assets which use this course_key as an identifier.
Arguments:
course_key (CourseKey): course_identifier
"""
store = self._get_modulestore_for_courseid(course_key)
return store.delete_all_asset_metadata(course_key)
@contract(source_course_key='CourseKey', dest_course_key='CourseKey')
def copy_all_asset_metadata(self, source_course_key, dest_course_key):
"""
Copy all the course assets from source_course_key to dest_course_key.
Arguments:
source_course_key (CourseKey): identifier of course to copy from
dest_course_key (CourseKey): identifier of course to copy to
"""
# When implementing this in https://openedx.atlassian.net/browse/PLAT-78 , consider this:
# Check the modulestores of both the source and dest course_keys. If in different modulestores,
# export all asset data from one modulestore and import it into the dest one.
store = self._get_modulestore_for_courseid(source_course_key)
return store.copy_all_asset_metadata(source_course_key, dest_course_key)
@contract(asset_key='AssetKey', attr=str)
def set_asset_metadata_attr(self, asset_key, attr, value, user_id):
"""
Add/set the given attr on the asset at the given location. Value can be any type which pymongo accepts.
Arguments:
asset_key (AssetKey): asset identifier
attr (str): which attribute to set
value: the value to set it to (any type pymongo accepts such as datetime, number, string)
Raises:
NotFoundError if no such item exists
AttributeError is attr is one of the build in attrs.
"""
store = self._get_modulestore_for_courseid(asset_key.course_key)
return store.set_asset_metadata_attrs(asset_key, attr, value, user_id)
@contract(asset_key='AssetKey', attr_dict=dict)
def set_asset_metadata_attrs(self, asset_key, attr_dict, user_id):
"""
Add/set the given dict of attrs on the asset at the given location. Value can be any type which pymongo accepts.
Arguments:
asset_key (AssetKey): asset identifier
attr_dict (dict): attribute/value pairs to set
Raises:
NotFoundError if no such item exists
AttributeError is attr is one of the build in attrs.
"""
store = self._get_modulestore_for_courseid(asset_key.course_key)
return store.set_asset_metadata_attrs(asset_key, attr_dict, user_id)
@strip_key
def get_parent_location(self, location, **kwargs):
"""
......
......@@ -24,6 +24,7 @@ from fs.osfs import OSFS
from path import path
from datetime import datetime
from pytz import UTC
from contracts import contract, new_contract
from importlib import import_module
from xmodule.errortracker import null_error_tracker, exc_info_to_str
......@@ -41,12 +42,18 @@ from xmodule.modulestore.inheritance import InheritanceMixin, inherit_metadata,
from xblock.core import XBlock
from opaque_keys.edx.locations import SlashSeparatedCourseKey
from opaque_keys.edx.locator import CourseLocator
from opaque_keys.edx.keys import UsageKey, CourseKey
from opaque_keys.edx.keys import UsageKey, CourseKey, AssetKey
from xmodule.exceptions import HeartbeatFailure
from xmodule.modulestore.edit_info import EditInfoRuntimeMixin
from xmodule.assetstore import AssetMetadata, AssetThumbnailMetadata
log = logging.getLogger(__name__)
new_contract('CourseKey', CourseKey)
new_contract('AssetKey', AssetKey)
new_contract('AssetMetadata', AssetMetadata)
new_contract('AssetThumbnailMetadata', AssetThumbnailMetadata)
# sort order that returns DRAFT items first
SORT_REVISION_FAVOR_DRAFT = ('_id.revision', pymongo.DESCENDING)
......@@ -195,7 +202,6 @@ class CachingDescriptorSystem(MakoDescriptorSystem, EditInfoRuntimeMixin):
category = json_data['location']['category']
class_ = self.load_block_type(category)
definition = json_data.get('definition', {})
metadata = json_data.get('metadata', {})
for old_name, new_name in getattr(class_, 'metadata_translations', {}).items():
......@@ -443,7 +449,7 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
super(MongoModuleStore, self).__init__(contentstore=contentstore, **kwargs)
def do_connection(
db, collection, host, port=27017, tz_aware=True, user=None, password=None, **kwargs
db, collection, host, port=27017, tz_aware=True, user=None, password=None, asset_collection=None, **kwargs
):
"""
Create & open the connection, authenticate, and provide pointers to the collection
......@@ -460,6 +466,11 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
)
self.collection = self.database[collection]
# Collection which stores asset metadata.
self.asset_collection = None
if asset_collection is not None:
self.asset_collection = self.database[asset_collection]
if user is not None and password is not None:
self.database.authenticate(user, password)
......@@ -1436,6 +1447,147 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
field_data = KvsFieldData(kvs)
return field_data
def _find_course_assets(self, course_key):
"""
Internal; finds (or creates) course asset info about all assets for a particular course
Arguments:
course_key (CourseKey): course identifier
Returns:
Asset info for the course
"""
if self.asset_collection is None:
return None
# Using the course_key, find or insert the course asset metadata document.
# A single document exists per course to store the course asset metadata.
course_assets = self.asset_collection.find_one(
{'course_id': unicode(course_key)},
fields=('course_id', 'storage', 'assets', 'thumbnails')
)
if course_assets is None:
# Not found, so create.
course_assets = {'course_id': unicode(course_key), 'storage': 'FILLMEIN-TMP', 'assets': [], 'thumbnails': []}
course_assets['_id'] = self.asset_collection.insert(course_assets)
return course_assets
@contract(course_key='CourseKey', asset_metadata='AssetMetadata | AssetThumbnailMetadata', user_id='str | unicode')
def _save_asset_info(self, course_key, asset_metadata, user_id, thumbnail=False):
"""
Saves the info for a particular course's asset/thumbnail.
Arguments:
course_key (CourseKey): course identifier
asset_metadata (AssetMetadata/AssetThumbnailMetadata): data about the course asset/thumbnail
thumbnail (bool): True if saving thumbnail metadata, False if saving asset metadata
Returns:
True if info save was successful, else False
"""
if self.asset_collection is None:
return False
course_assets, asset_idx = self._find_course_asset(course_key, asset_metadata.asset_id.path, thumbnail)
info = 'thumbnails' if thumbnail else 'assets'
all_assets = course_assets[info]
# Set the edited information for assets only - not thumbnails.
if not thumbnail:
asset_metadata.update({'edited_by': user_id, 'edited_on': datetime.now(UTC)})
# Translate metadata to Mongo format.
metadata_to_insert = asset_metadata.to_mongo()
if asset_idx is None:
# Append new metadata.
# Future optimization: Insert in order & binary search to retrieve.
all_assets.append(metadata_to_insert)
else:
# Replace existing metadata.
all_assets[asset_idx] = metadata_to_insert
# Update the document.
self.asset_collection.update({'_id': course_assets['_id']}, {'$set': {info: all_assets}})
return True
@contract(asset_key='AssetKey', attr_dict=dict)
def set_asset_metadata_attrs(self, asset_key, attr_dict, user_id):
"""
Add/set the given dict of attrs on the asset at the given location. Value can be any type which pymongo accepts.
Arguments:
asset_key (AssetKey): asset identifier
attr_dict (dict): attribute: value pairs to set
Raises:
ItemNotFoundError if no such item exists
AttributeError is attr is one of the build in attrs.
"""
if self.asset_collection is None:
return
course_assets, asset_idx = self._find_course_asset(asset_key.course_key, asset_key.path)
if asset_idx is None:
raise ItemNotFoundError(asset_key)
# Form an AssetMetadata.
all_assets = course_assets['assets']
md = AssetMetadata(asset_key, asset_key.path)
md.from_mongo(all_assets[asset_idx])
md.update(attr_dict)
md.update({'edited_by': user_id, 'edited_on': datetime.now(UTC)})
# Generate a Mongo doc from the metadata and update the course asset info.
all_assets[asset_idx] = md.to_mongo()
self.asset_collection.update({'_id': course_assets['_id']}, {"$set": {'assets': all_assets}})
@contract(asset_key='AssetKey')
def _delete_asset_data(self, asset_key, thumbnail=False):
"""
Internal; deletes a single asset's metadata -or- thumbnail.
Arguments:
asset_key (AssetKey): key containing original asset/thumbnail filename
thumbnail: True if thumbnail deletion, False if asset metadata deletion
Returns:
Number of asset metadata/thumbnail entries deleted (0 or 1)
"""
if self.asset_collection is None:
return 0
course_assets, asset_idx = self._find_course_asset(asset_key.course_key, asset_key.path, get_thumbnail=thumbnail)
if asset_idx is None:
return 0
info = 'thumbnails' if thumbnail else 'assets'
all_asset_info = course_assets[info]
all_asset_info.pop(asset_idx)
# Update the document.
self.asset_collection.update({'_id': course_assets['_id']}, {'$set': {info: all_asset_info}})
return 1
@contract(course_key='CourseKey')
def delete_all_asset_metadata(self, course_key):
"""
Delete all of the assets which use this course_key as an identifier.
Arguments:
course_key (CourseKey): course_identifier
"""
if self.asset_collection is None:
return
# Using the course_id, find the course asset metadata document.
# A single document exists per course to store the course asset metadata.
course_assets = self._find_course_assets(course_key)
self.asset_collection.remove(course_assets['_id'])
def heartbeat(self):
"""
Check that the db is reachable.
......
......@@ -99,7 +99,7 @@ class MongoConnection(object):
Segregation of pymongo functions from the data modeling mechanisms for split modulestore.
"""
def __init__(
self, db, collection, host, port=27017, tz_aware=True, user=None, password=None, **kwargs
self, db, collection, host, port=27017, tz_aware=True, user=None, password=None, asset_collection=None, **kwargs
):
"""
Create & open the connection, authenticate, and provide pointers to the collections
......@@ -114,6 +114,10 @@ class MongoConnection(object):
db
)
# Remove when adding official Split support for asset metadata storage.
if asset_collection:
pass
if user is not None and password is not None:
self.database.authenticate(user, password)
......
......@@ -47,6 +47,7 @@ class TestMixedModuleStore(CourseComparisonTest):
PORT = MONGO_PORT_NUM
DB = 'test_mongo_%s' % uuid4().hex[:5]
COLLECTION = 'modulestore'
ASSET_COLLECTION = 'assetstore'
FS_ROOT = DATA_DIR
DEFAULT_CLASS = 'xmodule.raw_module.RawDescriptor'
RENDER_TEMPLATE = lambda t_n, d, ctx = None, nsp = 'main': ''
......@@ -67,6 +68,7 @@ class TestMixedModuleStore(CourseComparisonTest):
'port': PORT,
'db': DB,
'collection': COLLECTION,
'asset_collection': ASSET_COLLECTION,
}
OPTIONS = {
'mappings': {
......
......@@ -566,6 +566,7 @@ DOC_STORE_CONFIG = {
'host': 'localhost',
'db': 'xmodule',
'collection': 'modulestore',
'asset_collection': 'assetstore',
}
MODULESTORE = {
'default': {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment