Commit df7b917b by Don Mitchell Committed by Calen Pennington

Implement next generation modulestore

A new modulestore backed by mongo that changes the data format to
facilitate easy versioning, sharing content between courses, and fast
lookup of course structure and Scope.settings data.

Conflicts:
	cms/djangoapps/contentstore/tests/test_contentstore.py
parent 4a127e51
......@@ -33,6 +33,10 @@ MODULESTORE = {
'direct': {
'ENGINE': 'xmodule.modulestore.mongo.MongoModuleStore',
'OPTIONS': modulestore_options
},
'split': {
'ENGINE': 'xmodule.modulestore.split_mongo.SplitMongoModuleStore',
'OPTIONS': modulestore_options
}
}
......
......@@ -63,6 +63,10 @@ MODULESTORE = {
'draft': {
'ENGINE': 'xmodule.modulestore.draft.DraftModuleStore',
'OPTIONS': MODULESTORE_OPTIONS
},
'split': {
'ENGINE': 'xmodule.modulestore.split_mongo.SplitMongoModuleStore',
'OPTIONS': MODULESTORE_OPTIONS
}
}
......
......@@ -15,6 +15,7 @@ import json
from xblock.core import Scope, List, String, Dict, Boolean
from .fields import Date
from xmodule.modulestore.locator import CourseLocator
from django.utils.timezone import UTC
from xmodule.util import date_utils
......@@ -373,7 +374,10 @@ class CourseDescriptor(CourseFields, SequenceDescriptor):
super(CourseDescriptor, self).__init__(*args, **kwargs)
if self.wiki_slug is None:
self.wiki_slug = self.location.course
if isinstance(self.location, Location):
self.wiki_slug = self.location.course
elif isinstance(self.location, CourseLocator):
self.wiki_slug = self.location.course_id or self.display_name
msg = None
......
......@@ -79,8 +79,10 @@ class ErrorDescriptor(ErrorFields, JSONEditingDescriptor):
@classmethod
def _construct(cls, system, contents, error_msg, location):
if location.name is None:
location = location._replace(
if isinstance(location, dict) and 'course' in location:
location = Location(location)
if isinstance(location, Location) and location.name is None:
location = location.replace(
category='error',
# Pick a unique url_name -- the sha1 hash of the contents.
# NOTE: We could try to pull out the url_name of the errored descriptor,
......@@ -94,7 +96,7 @@ class ErrorDescriptor(ErrorFields, JSONEditingDescriptor):
model_data = {
'error_msg': str(error_msg),
'contents': contents,
'display_name': 'Error: ' + location.name,
'display_name': 'Error: ' + location.url(),
'location': location,
'category': 'error'
}
......
......@@ -7,9 +7,15 @@ class ItemNotFoundError(Exception):
pass
class ItemWriteConflictError(Exception):
pass
class InsufficientSpecificationError(Exception):
pass
class OverSpecificationError(Exception):
pass
class InvalidLocationError(Exception):
pass
......@@ -21,3 +27,12 @@ class NoPathToItem(Exception):
class DuplicateItemError(Exception):
pass
class VersionConflictError(Exception):
"""
The caller asked for either draft or published head and gave a version which conflicted with it.
"""
def __init__(self, requestedLocation, currentHead):
super(VersionConflictError, self).__init__()
self.requestedLocation = requestedLocation
self.currentHead = currentHead
......@@ -50,6 +50,8 @@ def inherit_metadata(descriptor, model_data):
def own_metadata(module):
# IN SPLIT MONGO this is just ['metadata'] as it keeps ['_inherited_metadata'] separate!
# FIXME move into kvs? will that work for xml mongo?
"""
Return a dictionary that contains only non-inherited field keys,
mapped to their values
......
import re
URL_RE = re.compile(r'^edx://(.+)$', re.IGNORECASE)
def parse_url(string):
"""
A url must begin with 'edx://' (case-insensitive match),
followed by either a version_guid or a course_id.
Examples:
'edx://@0123FFFF'
'edx://edu.mit.eecs.6002x'
'edx://edu.mit.eecs.6002x;published'
'edx://edu.mit.eecs.6002x;published#HW3'
This returns None if string cannot be parsed.
If it can be parsed as a version_guid, returns a dict
with key 'version_guid' and the value,
If it can be parsed as a course_id, returns a dict
with keys 'id' and 'revision' (value of 'revision' may be None),
"""
match = URL_RE.match(string)
if not match:
return None
path = match.group(1)
if path[0] == '@':
return parse_guid(path[1:])
return parse_course_id(path)
BLOCK_RE = re.compile(r'^\w+$', re.IGNORECASE)
def parse_block_ref(string):
"""
A block_ref is a string of word_chars.
<word_chars> matches one or more Unicode word characters; this includes most
characters that can be part of a word in any language, as well as numbers
and the underscore. (see definition of \w in python regular expressions,
at http://docs.python.org/dev/library/re.html)
If string is a block_ref, returns a dict with key 'block_ref' and the value,
otherwise returns None.
"""
if len(string) > 0 and BLOCK_RE.match(string):
return {'block' : string}
return None
GUID_RE = re.compile(r'^(?P<version_guid>[A-F0-9]+)(#(?P<block>\w+))?$', re.IGNORECASE)
def parse_guid(string):
"""
A version_guid is a string of hex digits (0-F).
If string is a version_guid, returns a dict with key 'version_guid' and the value,
otherwise returns None.
"""
m = GUID_RE.match(string)
if m is not None:
return m.groupdict()
else:
return None
COURSE_ID_RE = re.compile(r'^(?P<id>(\w+)(\.\w+\w*)*)(;(?P<revision>\w+))?(#(?P<block>\w+))?$', re.IGNORECASE)
def parse_course_id(string):
"""
A course_id has a main id component.
There may also be an optional revision (;published or ;draft).
There may also be an optional block (#HW3 or #Quiz2).
Examples of valid course_ids:
'edu.mit.eecs.6002x'
'edu.mit.eecs.6002x;published'
'edu.mit.eecs.6002x#HW3'
'edu.mit.eecs.6002x;published#HW3'
Syntax:
course_id = main_id [; revision] [# block]
main_id = name [. name]*
revision = name
block = name
name = <word_chars>
<word_chars> matches one or more Unicode word characters; this includes most
characters that can be part of a word in any language, as well as numbers
and the underscore. (see definition of \w in python regular expressions,
at http://docs.python.org/dev/library/re.html)
If string is a course_id, returns a dict with keys 'id', 'revision', and 'block'.
Revision is optional: if missing returned_dict['revision'] is None.
Block is optional: if missing returned_dict['block'] is None.
Else returns None.
"""
match = COURSE_ID_RE.match(string)
if not match:
return None
return match.groupdict()
from split import SplitMongoModuleStore
\ No newline at end of file
import sys
import logging
from xmodule.mako_module import MakoDescriptorSystem
from xmodule.x_module import XModuleDescriptor
from xmodule.modulestore.locator import BlockUsageLocator
from xmodule.error_module import ErrorDescriptor
from xmodule.errortracker import exc_info_to_str
from xblock.runtime import DbModel
from ..exceptions import ItemNotFoundError
from .split_mongo_kvs import SplitMongoKVS, SplitMongoKVSid
log = logging.getLogger(__name__)
# TODO should this be here or w/ x_module or ???
class CachingDescriptorSystem(MakoDescriptorSystem):
"""
A system that has a cache of a course version's json that it will use to load modules
from, with a backup of calling to the underlying modulestore for more data.
Computes the metadata inheritance upon creation.
"""
def __init__(self, modulestore, course_entry, module_data, lazy,
default_class, error_tracker, render_template):
"""
Computes the metadata inheritance and sets up the cache.
modulestore: the module store that can be used to retrieve additional
modules
module_data: a dict mapping Location -> json that was cached from the
underlying modulestore
default_class: The default_class to use when loading an
XModuleDescriptor from the module_data
resources_fs: a filesystem, as per MakoDescriptorSystem
error_tracker: a function that logs errors for later display to users
render_template: a function for rendering templates, as per
MakoDescriptorSystem
"""
# TODO find all references to resources_fs and make handle None
super(CachingDescriptorSystem, self).__init__(
self._load_item, None, error_tracker, render_template)
self.modulestore = modulestore
self.course_entry = course_entry
self.lazy = lazy
self.module_data = module_data
self.default_class = default_class
# TODO see if self.course_id is needed: is already in course_entry but could be > 1 value
# Compute inheritance
modulestore.inherit_metadata(course_entry.get('blocks', {}),
course_entry.get('blocks', {})
.get(course_entry.get('root')))
def _load_item(self, usage_id, course_entry_override=None):
# TODO ensure all callers of system.load_item pass just the id
json_data = self.module_data.get(usage_id)
if json_data is None:
# deeper than initial descendant fetch or doesn't exist
self.modulestore.cache_items(self, [usage_id], lazy=self.lazy)
json_data = self.module_data.get(usage_id)
if json_data is None:
raise ItemNotFoundError
class_ = XModuleDescriptor.load_class(
json_data.get('category'),
self.default_class
)
return self.xblock_from_json(class_, usage_id, json_data, course_entry_override)
def xblock_from_json(self, class_, usage_id, json_data, course_entry_override=None):
if course_entry_override is None:
course_entry_override = self.course_entry
# most likely a lazy loader but not the id directly
definition = json_data.get('definition', {})
metadata = json_data.get('metadata', {})
block_locator = BlockUsageLocator(
version_guid=course_entry_override['_id'],
usage_id=usage_id,
course_id=course_entry_override.get('course_id'),
revision=course_entry_override.get('revision')
)
kvs = SplitMongoKVS(
definition,
json_data.get('children', []),
metadata,
json_data.get('_inherited_metadata'),
block_locator,
json_data.get('category'))
model_data = DbModel(kvs, class_, None,
SplitMongoKVSid(
# DbModel req's that these support .url()
block_locator,
self.modulestore.definition_locator(definition)))
try:
module = class_(self, model_data)
except Exception:
log.warning("Failed to load descriptor", exc_info=True)
if usage_id is None:
usage_id = "MISSING"
return ErrorDescriptor.from_json(
json_data,
self,
BlockUsageLocator(version_guid=course_entry_override['_id'],
usage_id=usage_id),
error_msg=exc_info_to_str(sys.exc_info())
)
module.edited_by = json_data.get('edited_by')
module.edited_on = json_data.get('edited_on')
module.previous_version = json_data.get('previous_version')
module.update_version = json_data.get('update_version')
module.definition_locator = self.modulestore.definition_locator(definition)
return module
from xmodule.modulestore.locator import DescriptionLocator
class DefinitionLazyLoader(object):
"""
A placeholder to put into an xblock in place of its definition which
when accessed knows how to get its content. Only useful if the containing
object doesn't force access during init but waits until client wants the
definition. Only works if the modulestore is a split mongo store.
"""
def __init__(self, modulestore, definition_id):
"""
Simple placeholder for yet-to-be-fetched data
:param modulestore: the pymongo db connection with the definitions
:param definition_locator: the id of the record in the above to fetch
"""
self.modulestore = modulestore
self.definition_locator = DescriptionLocator(definition_id)
def fetch(self):
"""
Fetch the definition. Note, the caller should replace this lazy
loader pointer with the result so as not to fetch more than once
"""
return self.modulestore.definitions.find_one(
{'_id': self.definition_locator.definition_id})
import copy
from xblock.core import Scope
from collections import namedtuple
from xblock.runtime import KeyValueStore, InvalidScopeError
from .definition_lazy_loader import DefinitionLazyLoader
# id is a BlockUsageLocator, def_id is the definition's guid
SplitMongoKVSid = namedtuple('SplitMongoKVSid', 'id, def_id')
# TODO should this be here or w/ x_module or ???
class SplitMongoKVS(KeyValueStore):
"""
A KeyValueStore that maps keyed data access to one of the 3 data areas
known to the MongoModuleStore (data, children, and metadata)
"""
def __init__(self, definition, children, metadata, _inherited_metadata, location, category):
"""
:param definition:
:param children:
:param metadata: the locally defined value for each metadata field
:param _inherited_metadata: the value of each inheritable field from above this.
Note, metadata may override and disagree w/ this b/c this says what the value
should be if metadata is undefined for this field.
"""
# ensure kvs's don't share objects w/ others so that changes can't appear in separate ones
# the particular use case was that changes to kvs's were polluting caches. My thinking was
# that kvs's should be independent thus responsible for the isolation.
if isinstance(definition, DefinitionLazyLoader):
self._definition = definition
else:
self._definition = copy.copy(definition)
self._children = copy.copy(children)
self._metadata = copy.copy(metadata)
self._inherited_metadata = _inherited_metadata
self._location = location
self._category = category
def get(self, key):
if key.scope == Scope.children:
return self._children
elif key.scope == Scope.parent:
return None
elif key.scope == Scope.settings:
if key.field_name in self._metadata:
return self._metadata[key.field_name]
elif key.field_name in self._inherited_metadata:
return self._inherited_metadata[key.field_name]
else:
raise KeyError()
elif key.scope == Scope.content:
if key.field_name == 'location':
return self._location
elif key.field_name == 'category':
return self._category
else:
if isinstance(self._definition, DefinitionLazyLoader):
self._definition = self._definition.fetch()
if (key.field_name == 'data' and
not isinstance(self._definition.get('data'), dict)):
return self._definition.get('data')
elif 'data' not in self._definition or key.field_name not in self._definition['data']:
raise KeyError()
else:
return self._definition['data'][key.field_name]
else:
raise InvalidScopeError(key.scope)
def set(self, key, value):
# TODO cache db update implications & add method to invoke
if key.scope == Scope.children:
self._children = value
# TODO remove inheritance from any orphaned exchildren
# TODO add inheritance to any new children
elif key.scope == Scope.settings:
# TODO if inheritable, push down to children who don't override
self._metadata[key.field_name] = value
elif key.scope == Scope.content:
if key.field_name == 'location':
self._location = value
elif key.field_name == 'category':
self._category = value
else:
if isinstance(self._definition, DefinitionLazyLoader):
self._definition = self._definition.fetch()
if (key.field_name == 'data' and
not isinstance(self._definition.get('data'), dict)):
self._definition.get('data')
else:
self._definition.setdefault('data', {})[key.field_name] = value
else:
raise InvalidScopeError(key.scope)
def delete(self, key):
# TODO cache db update implications & add method to invoke
if key.scope == Scope.children:
self._children = []
elif key.scope == Scope.settings:
# TODO if inheritable, ensure _inherited_metadata has value from above and
# revert children to that value
if key.field_name in self._metadata:
del self._metadata[key.field_name]
elif key.scope == Scope.content:
# don't allow deletion of location nor category
if key.field_name == 'location':
pass
elif key.field_name == 'category':
pass
else:
if isinstance(self._definition, DefinitionLazyLoader):
self._definition = self._definition.fetch()
if (key.field_name == 'data' and
not isinstance(self._definition.get('data'), dict)):
self._definition.setdefault('data', None)
else:
try:
del self._definition['data'][key.field_name]
except KeyError:
pass
else:
raise InvalidScopeError(key.scope)
def has(self, key):
if key.scope in (Scope.children, Scope.parent):
return True
elif key.scope == Scope.settings:
return key.field_name in self._metadata or key.field_name in self._inherited_metadata
elif key.scope == Scope.content:
if key.field_name == 'location':
return True
elif key.field_name == 'category':
return self._category is not None
else:
if isinstance(self._definition, DefinitionLazyLoader):
self._definition = self._definition.fetch()
if (key.field_name == 'data' and
not isinstance(self._definition.get('data'), dict)):
return self._definition.get('data') is not None
else:
return key.field_name in self._definition.get('data', {})
else:
return False
def get_data(self):
"""
Intended only for use by persistence layer to get the native definition['data'] rep
"""
if isinstance(self._definition, DefinitionLazyLoader):
self._definition = self._definition.fetch()
return self._definition.get('data')
def get_own_metadata(self):
"""
Get the metadata explicitly set on this element.
"""
return self._metadata
def get_inherited_metadata(self):
"""
Get the metadata set by the ancestors (which own metadata may override or not)
"""
return self._inherited_metadata
from xmodule.modulestore.django import modulestore
from xmodule.course_module import CourseDescriptor
from xmodule.x_module import XModuleDescriptor
import factory
# [dhm] I'm not sure why we're using factory_boy if we're not following its pattern. If anyone
# assumes they can call build, it will completely fail, for example.
# pylint: disable=W0232
class PersistentCourseFactory(factory.Factory):
"""
Create a new course (not a new version of a course, but a whole new index entry).
keywords:
* org: defaults to textX
* prettyid: defaults to 999
* display_name
* user_id
* data (optional) the data payload to save in the course item
* metadata (optional) the metadata payload. If display_name is in the metadata, that takes
precedence over any display_name provided directly.
"""
FACTORY_FOR = CourseDescriptor
org = 'testX'
prettyid = '999'
display_name = 'Robot Super Course'
user_id = "test_user"
data = None
metadata = None
master_version = 'draft'
# pylint: disable=W0613
@classmethod
def _create(cls, target_class, *args, **kwargs):
org = kwargs.get('org')
prettyid = kwargs.get('prettyid')
display_name = kwargs.get('display_name')
user_id = kwargs.get('user_id')
data = kwargs.get('data')
metadata = kwargs.get('metadata', {})
if metadata is None:
metadata = {}
if 'display_name' not in metadata:
metadata['display_name'] = display_name
# Write the data to the mongo datastore
new_course = modulestore('split').create_course(
org, prettyid, user_id, metadata=metadata, course_data=data, id_root=prettyid,
master_version=kwargs.get('master_version'))
return new_course
@classmethod
def _build(cls, target_class, *args, **kwargs):
raise NotImplementedError()
class ItemFactory(factory.Factory):
FACTORY_FOR = XModuleDescriptor
category = 'chapter'
user_id = 'test_user'
display_name = factory.LazyAttributeSequence(lambda o, n: "{} {}".format(o.category, n))
# pylint: disable=W0613
@classmethod
def _create(cls, target_class, *args, **kwargs):
"""
Uses *kwargs*:
*parent_location* (required): the location of the course & possibly parent
*category* (defaults to 'chapter')
*data* (optional): the data for the item
definition_locator (optional): the DescriptorLocator for the definition this uses or branches
*display_name* (optional): the display name of the item
*metadata* (optional): dictionary of metadata attributes (display_name here takes
precedence over the above attr)
"""
metadata = kwargs.get('metadata', {})
if 'display_name' not in metadata and 'display_name' in kwargs:
metadata['display_name'] = kwargs['display_name']
return modulestore('split').create_item(kwargs['parent_location'], kwargs['category'],
kwargs['user_id'], definition_locator=kwargs.get('definition_locator'),
new_def_data=kwargs.get('data'), metadata=metadata)
@classmethod
def _build(cls, target_class, *args, **kwargs):
raise NotImplementedError()
......@@ -8,9 +8,10 @@ from collections import namedtuple
from pkg_resources import resource_listdir, resource_string, resource_isdir
from xmodule.modulestore import inheritance, Location
from xmodule.modulestore.exceptions import ItemNotFoundError, InsufficientSpecificationError
from xmodule.modulestore.exceptions import ItemNotFoundError, InsufficientSpecificationError, InvalidLocationError
from xblock.core import XBlock, Scope, String, Integer, Float, ModelType
from xmodule.modulestore.locator import BlockUsageLocator
log = logging.getLogger(__name__)
......@@ -27,7 +28,13 @@ class LocationField(ModelType):
"""
Parse the json value as a Location
"""
return Location(value)
try:
return Location(value)
except InvalidLocationError:
if isinstance(value, BlockUsageLocator):
return value
else:
return BlockUsageLocator(value)
def to_json(self, value):
"""
......@@ -166,6 +173,10 @@ class XModule(XModuleFields, HTMLSnippet, XBlock):
self.url_name = self.location.name
if not hasattr(self, 'category'):
self.category = self.location.category
elif isinstance(self.location, BlockUsageLocator):
self.url_name = self.location.usage_id
if not hasattr(self, 'category'):
raise InsufficientSpecificationError()
else:
raise InsufficientSpecificationError()
self._loaded_children = None
......@@ -436,8 +447,17 @@ class XModuleDescriptor(XModuleFields, HTMLSnippet, ResourceTemplates, XBlock):
self.url_name = self.location.name
if not hasattr(self, 'category'):
self.category = self.location.category
elif isinstance(self.location, BlockUsageLocator):
self.url_name = self.location.usage_id
if not hasattr(self, 'category'):
raise InsufficientSpecificationError()
else:
raise InsufficientSpecificationError()
# update_version is the version which last updated this xblock v prev being the penultimate updater
# leaving off original_version since it complicates creation w/o any obv value yet and is computable
# by following previous until None
# definition_locator is only used by mongostores which separate definitions from blocks
self.edited_by = self.edited_on = self.previous_version = self.update_version = self.definition_locator = None
self._child_instances = None
@property
......@@ -514,22 +534,30 @@ class XModuleDescriptor(XModuleFields, HTMLSnippet, ResourceTemplates, XBlock):
# ================================= JSON PARSING ===========================
@staticmethod
def load_from_json(json_data, system, default_class=None):
def load_from_json(json_data, system, default_class=None, parent_xblock=None):
"""
This method instantiates the correct subclass of XModuleDescriptor based
on the contents of json_data.
on the contents of json_data. It does not persist it and can create one which
has no usage id.
json_data must contain a 'location' element, and must be suitable to be
passed into the subclasses `from_json` method as model_data
parent_xblock is used to compute inherited metadata as well as to append the new xblock.
json_data:
- 'location' : must have this field
- 'category': the xmodule category (required or location must be a Location)
- 'metadata': a dict of locally set metadata (not inherited)
- 'children': a list of children's usage_ids w/in this course
- 'definition':
- '_id' (optional): the usage_id of this. Will generate one if not given one.
"""
class_ = XModuleDescriptor.load_class(
json_data['location']['category'],
json_data.get('category', json_data.get('location', {}).get('category')),
default_class
)
return class_.from_json(json_data, system)
return class_.from_json(json_data, system, parent_xblock)
@classmethod
def from_json(cls, json_data, system):
def from_json(cls, json_data, system, parent_xblock=None):
"""
Creates an instance of this descriptor from the supplied json_data.
This may be overridden by subclasses
......@@ -547,28 +575,25 @@ class XModuleDescriptor(XModuleFields, HTMLSnippet, ResourceTemplates, XBlock):
Otherwise, it contains the single field 'data'
4) Any value later in this list overrides a value earlier in this list
system: A DescriptorSystem for interacting with external resources
"""
model_data = {}
for key, value in json_data.get('metadata', {}).items():
model_data[cls._translate(key)] = value
model_data.update(json_data.get('metadata', {}))
definition = json_data.get('definition', {})
if 'children' in definition:
model_data['children'] = definition['children']
if 'data' in definition:
if isinstance(definition['data'], dict):
model_data.update(definition['data'])
else:
model_data['data'] = definition['data']
model_data['location'] = json_data['location']
return cls(system, model_data)
json_data:
- 'category': the xmodule category (required)
- 'metadata': a dict of locally set metadata (not inherited)
- 'children': a list of children's usage_ids w/in this course
- 'definition':
- '_id' (optional): the usage_id of this. Will generate one if not given one.
"""
usage_id = json_data.get('_id', None)
if not '_inherited_metadata' in json_data and parent_xblock is not None:
json_data['_inherited_metadata'] = parent_xblock.xblock_kvs.get_inherited_metadata().copy()
json_metadata = json_data.get('metadata', {})
for field in inheritance.INHERITABLE_METADATA:
if field in json_metadata:
json_data['_inherited_metadata'][field] = json_metadata[field]
new_block = system.xblock_from_json(cls, usage_id, json_data)
if parent_xblock is not None:
parent_xblock.children.append(new_block)
return new_block
@classmethod
def _translate(cls, key):
......
[{"_id" : "GreekHero",
"org" : "testx",
"prettyid" : "test_course",
"versions" : {
"draft" : { "$oid" : "1d00000000000000dddd0000" }
},
"edited_on" : {"$date" : 1364481713238},
"edited_by" : "test@edx.org"},
{"_id" : "wonderful",
"org" : "testx",
"prettyid" : "another_course",
"versions" : {
"draft" : { "$oid" : "1d00000000000000dddd2222" },
"published" : { "$oid" : "1d00000000000000eeee0000" }
},
"edited_on" : {"$date" : 1364481313238},
"edited_by" : "test@edx.org"},
{"_id" : "contender",
"org" : "guestx",
"prettyid" : "test_course",
"versions" : {
"draft" : { "$oid" : "1d00000000000000dddd5555" }},
"edited_on" : {"$date" : 1364491313238},
"edited_by" : "test@guestx.edu"}
]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment