Commit b4fa3162 by Don Mitchell

Migrate data from old mongo to split mongo

Create location mapping
Create draft and published course with proper content in split.
parent 379a147a
'''
Code for migrating from other modulestores to the split_mongo modulestore.
Exists at the top level of modulestore b/c it needs to know about and access each modulestore.
In general, it's strategy is to treat the other modulestores as read-only and to never directly
manipulate storage but use existing api's.
'''
from xmodule.modulestore import Location
from xmodule.modulestore.locator import CourseLocator
from xmodule.modulestore.mongo import draft
class SplitMigrator(object):
"""
Copies courses from old mongo to split mongo and sets up location mapping so any references to the old
name will be able to find the new elements.
"""
def __init__(self, split_modulestore, direct_modulestore, draft_modulestore, loc_mapper):
super(SplitMigrator, self).__init__()
self.split_modulestore = split_modulestore
self.direct_modulestore = direct_modulestore
self.draft_modulestore = draft_modulestore
self.loc_mapper = loc_mapper
def migrate_mongo_course(self, course_location, user_id, new_course_id=None):
"""
Create a new course in split_mongo representing the published and draft versions of the course from the
original mongo store. And return the new_course_id (which the caller can also get by calling
self.loc_mapper.translate_location(old_course_location)
If the new course already exists, this raises DuplicateItemError
:param course_location: a Location whose category is 'course' and points to the course
:param user_id: the user whose action is causing this migration
:param new_course_id: (optional) the Locator.course_id for the new course. Defaults to
whatever translate_location_to_locator returns
"""
new_course_id = self.loc_mapper.create_map_entry(course_location, course_id=new_course_id)
old_course_id = course_location.course_id
# the only difference in data between the old and split_mongo xblocks are the locations;
# so, any field which holds a location must change to a Locator; otherwise, the persistence
# layer and kvs's know how to store it.
# locations are in location, children, conditionals, course.tab
# create the course: set fields to explicitly_set for each scope, id_root = new_course_id, master_branch = 'production'
original_course = self.direct_modulestore.get_item(course_location)
new_course_root_locator = self.loc_mapper.translate_location(old_course_id, course_location)
new_course = self.split_modulestore.create_course(
course_location.org, original_course.display_name,
user_id, id_root=new_course_id,
fields=self._get_json_fields_translate_children(original_course, old_course_id, True),
root_usage_id=new_course_root_locator.usage_id,
master_branch=new_course_root_locator.branch
)
self._copy_published_modules_to_course(new_course, course_location, old_course_id, user_id)
self._add_draft_modules_to_course(new_course_id, old_course_id, course_location, user_id)
return new_course_id
def _copy_published_modules_to_course(self, new_course, old_course_loc, old_course_id, user_id):
"""
Copy all of the modules from the 'direct' version of the course to the new split course.
"""
course_version_locator = new_course.location.as_course_locator()
# iterate over published course elements. Wildcarding rather than descending b/c some elements are orphaned (e.g.,
# course about pages, conditionals)
for module in self.direct_modulestore.get_items(
old_course_loc.replace(category=None, name=None, revision=None),
old_course_id
):
# don't copy the course again. No drafts should get here but check
if module.location != old_course_loc and not getattr(module, 'is_draft', False):
# create split_xblock using split.create_item
# where usage_id is computed by translate_location_to_locator
new_locator = self.loc_mapper.translate_location(
old_course_id, module.location, True, add_entry_if_missing=True
)
_new_module = self.split_modulestore.create_item(
course_version_locator, module.category, user_id,
usage_id=new_locator.usage_id,
fields=self._get_json_fields_translate_children(module, old_course_id, True),
continue_version=True
)
# after done w/ published items, add version for 'draft' pointing to the published structure
index_info = self.split_modulestore.get_course_index_info(course_version_locator)
versions = index_info['versions']
versions['draft'] = versions['published']
self.split_modulestore.update_course_index(course_version_locator, {'versions': versions}, update_versions=True)
# clean up orphans in published version: in old mongo, parents pointed to the union of their published and draft
# children which meant some pointers were to non-existent locations in 'direct'
self.split_modulestore.internal_clean_children(course_version_locator)
def _add_draft_modules_to_course(self, new_course_id, old_course_id, old_course_loc, user_id):
"""
update each draft. Create any which don't exist in published and attach to their parents.
"""
# each true update below will trigger a new version of the structure. We may want to just have one new version
# but that's for a later date.
new_draft_course_loc = CourseLocator(course_id=new_course_id, branch='draft')
# to prevent race conditions of grandchilden being added before their parents and thus having no parent to
# add to
awaiting_adoption = {}
for module in self.draft_modulestore.get_items(
old_course_loc.replace(category=None, name=None, revision=draft.DRAFT),
old_course_id
):
if getattr(module, 'is_draft', False):
new_locator = self.loc_mapper.translate_location(
old_course_id, module.location, False, add_entry_if_missing=True
)
if self.split_modulestore.has_item(new_course_id, new_locator):
# was in 'direct' so draft is a new version
split_module = self.split_modulestore.get_item(new_locator)
# need to remove any no-longer-explicitly-set values and add/update any now set values.
for name, field in split_module.fields.iteritems():
if field.is_set_on(split_module) and not module.fields[name].is_set_on(module):
field.delete_from(split_module)
for name, field in module.fields.iteritems():
# draft children will insert themselves and the others are here already; so, don't do it 2x
if name != 'children' and field.is_set_on(module):
field.write_to(split_module, field.read_from(module))
_new_module = self.split_modulestore.update_item(split_module, user_id)
else:
# only a draft version (aka, 'private'). parent needs updated too.
# create a new course version just in case the current head is also the prod head
_new_module = self.split_modulestore.create_item(
new_draft_course_loc, module.category, user_id,
usage_id=new_locator.usage_id,
fields=self._get_json_fields_translate_children(module, old_course_id, True)
)
awaiting_adoption[module.location] = new_locator.usage_id
for draft_location, new_usage_id in awaiting_adoption.iteritems():
for parent_loc in self.draft_modulestore.get_parent_locations(draft_location, old_course_id):
old_parent = self.draft_modulestore.get_item(parent_loc)
new_parent = self.split_modulestore.get_item(
self.loc_mapper.translate_location(old_course_id, old_parent.location, False)
)
# this only occurs if the parent was also awaiting adoption
if new_usage_id in new_parent.children:
break
# find index for module: new_parent may be missing quite a few of old_parent's children
new_parent_cursor = 0
draft_location = draft_location.url() # need as string
for old_child_loc in old_parent.children:
if old_child_loc == draft_location:
break
sibling_loc = self.loc_mapper.translate_location(old_course_id, Location(old_child_loc), False)
# sibling may move cursor
for idx in range(new_parent_cursor, len(new_parent.children)):
if new_parent.children[idx] == sibling_loc.usage_id:
new_parent_cursor = idx + 1
break
new_parent.children.insert(new_parent_cursor, new_usage_id)
new_parent = self.split_modulestore.update_item(new_parent, user_id)
def _get_json_fields_translate_children(self, xblock, old_course_id, published):
fields = self.get_json_fields_explicitly_set(xblock)
# this will too generously copy the children even for ones that don't exist in the published b/c the old mongo
# had no way of not having parents point to draft only children :-(
if 'children' in fields:
fields['children'] = [
self.loc_mapper.translate_location(
old_course_id, Location(child), published, add_entry_if_missing=True
).usage_id
for child in fields['children']]
return fields
def get_json_fields_explicitly_set(self, xblock):
"""
Get the json repr for fields set on this specific xblock
:param xblock:
"""
return {field.name: field.read_json(xblock) for field in xblock.fields.itervalues() if field.is_set_on(xblock)}
......@@ -1201,7 +1201,12 @@ class SplitMongoModuleStore(ModuleStoreBase):
inheriting_settings[field_name] = block_fields[field_name]
for child in block_fields.get('children', []):
self.inherit_settings(block_map, block_map[child], inheriting_settings)
try:
self.inherit_settings(block_map, block_map[child], inheriting_settings)
except KeyError:
# here's where we need logic for looking up in other structures when we allow cross pointers
# but it's also getting this during course creation if creating top down w/ children set.
pass
def descendants(self, block_map, usage_id, depth, descendent_map):
"""
......@@ -1236,6 +1241,22 @@ class SplitMongoModuleStore(ModuleStoreBase):
else:
return DescriptionLocator(definition['_id'])
def internal_clean_children(self, course_locator):
"""
Only intended for rather low level methods to use. Goes through the children attrs of
each block removing any whose usage_id is not a member of the course. Does not generate
a new version of the course but overwrites the existing one.
:param course_locator: the course to clean
"""
original_structure = self._lookup_course(course_locator)
for block in original_structure['blocks'].itervalues():
if 'fields' in block and 'children' in block['fields']:
block['fields']["children"] = [
usage_id for usage_id in block['fields']["children"] if usage_id in original_structure['blocks']
]
self.structures.update({'_id': original_structure['_id']}, original_structure)
def _block_matches(self, value, qualifiers):
'''
......
"""
Created on Sep 10, 2013
@author: dmitchell
Tests for split_migrator
"""
import unittest
import uuid
import random
import mock
import datetime
from xmodule.fields import Date
from xmodule.modulestore import Location
from xmodule.modulestore.inheritance import InheritanceMixin
from xmodule.modulestore.loc_mapper_store import LocMapperStore
from xmodule.modulestore.mongo.draft import DraftModuleStore
from xmodule.modulestore.split_mongo.split import SplitMongoModuleStore
from xmodule.modulestore.mongo.base import MongoModuleStore
from xmodule.modulestore.split_migrator import SplitMigrator
from xmodule.modulestore.mongo import draft
class TestMigration(unittest.TestCase):
# Snippet of what would be in the django settings envs file
modulestore_options = {
'default_class': 'xmodule.raw_module.RawDescriptor',
'host': 'localhost',
'db': 'test_xmodule',
'collection': 'modulestore{0}'.format(uuid.uuid4().hex),
'fs_root': '',
'render_template': mock.Mock(return_value=""),
'xblock_mixins': (InheritanceMixin,)
}
def setUp(self):
super(TestMigration, self).setUp()
self.loc_mapper = LocMapperStore(**self.modulestore_options)
self.old_mongo = MongoModuleStore(**self.modulestore_options)
self.draft_mongo = DraftModuleStore(**self.modulestore_options)
self.split_mongo = SplitMongoModuleStore(
loc_mapper=self.loc_mapper, **self.modulestore_options
)
self.migrator = SplitMigrator(self.split_mongo, self.old_mongo, self.draft_mongo, self.loc_mapper)
self.course_location = None
self.create_source_course()
def tearDown(self):
dbref = self.loc_mapper.db
dbref.drop_collection(self.loc_mapper.location_map)
split_db = self.split_mongo.db
split_db.drop_collection(split_db.course_index)
split_db.drop_collection(split_db.structures)
split_db.drop_collection(split_db.definitions)
# old_mongo doesn't give a db attr, but all of the dbs are the same
dbref.drop_collection(self.old_mongo.collection)
dbref.connection.close()
super(TestMigration, self).tearDown()
def _create_and_get_item(self, store, location, data, metadata, runtime=None):
store.create_and_save_xmodule(location, data, metadata, runtime)
return store.get_item(location)
def create_source_course(self):
"""
A course testing all of the conversion mechanisms:
* some inheritable settings
* sequences w/ draft and live intermixed children to ensure all get to the draft but
only the live ones get to published. Some are only draft, some are both, some are only live.
* about, static_tab, and conditional documents
"""
location = Location('i4x', 'test_org', 'test_course', 'course', 'runid')
self.course_location = location
date_proxy = Date()
metadata = {
'start': date_proxy.to_json(datetime.datetime(2000, 3, 13, 4)),
'display_name': 'Migration test course',
}
data = {
'wiki_slug': 'test_course_slug'
}
course_root = self._create_and_get_item(self.old_mongo, location, data, metadata)
runtime = course_root.runtime
# chapters
location = location.replace(category='chapter', name=uuid.uuid4().hex)
chapter1 = self._create_and_get_item(self.old_mongo, location, {}, {'display_name': 'Chapter 1'}, runtime)
course_root.children.append(chapter1.location.url())
location = location.replace(category='chapter', name=uuid.uuid4().hex)
chapter2 = self._create_and_get_item(self.old_mongo, location, {}, {'display_name': 'Chapter 2'}, runtime)
course_root.children.append(chapter2.location.url())
self.old_mongo.update_children(course_root.location, course_root.children)
# vertical in live only
location = location.replace(category='vertical', name=uuid.uuid4().hex)
live_vert = self._create_and_get_item(self.old_mongo, location, {}, {'display_name': 'Live vertical'}, runtime)
chapter1.children.append(live_vert.location.url())
self.create_random_units(self.old_mongo, live_vert)
# vertical in both live and draft
location = location.replace(category='vertical', name=uuid.uuid4().hex)
both_vert = self._create_and_get_item(
self.old_mongo, location, {}, {'display_name': 'Both vertical'}, runtime
)
draft_both = self._create_and_get_item(
self.draft_mongo, location, {}, {'display_name': 'Both vertical renamed'}, runtime
)
chapter1.children.append(both_vert.location.url())
self.create_random_units(self.old_mongo, both_vert, self.draft_mongo, draft_both)
# vertical in draft only (x2)
location = location.replace(category='vertical', name=uuid.uuid4().hex)
draft_vert = self._create_and_get_item(self.draft_mongo,
location, {}, {'display_name': 'Draft vertical'}, runtime)
chapter1.children.append(draft_vert.location.url())
self.create_random_units(self.draft_mongo, draft_vert)
location = location.replace(category='vertical', name=uuid.uuid4().hex)
draft_vert = self._create_and_get_item(self.draft_mongo,
location, {}, {'display_name': 'Draft vertical2'}, runtime)
chapter1.children.append(draft_vert.location.url())
self.create_random_units(self.draft_mongo, draft_vert)
# and finally one in live only (so published has to skip 2)
location = location.replace(category='vertical', name=uuid.uuid4().hex)
live_vert = self._create_and_get_item(self.old_mongo,
location, {}, {'display_name': 'Live vertical end'}, runtime)
chapter1.children.append(live_vert.location.url())
self.create_random_units(self.old_mongo, live_vert)
# update the chapter
self.old_mongo.update_children(chapter1.location, chapter1.children)
# now the other one w/ the conditional
# first create some show children
indirect1 = self._create_and_get_item(self.old_mongo,
location.replace(category='discussion', name=uuid.uuid4().hex),
"", {'display_name': 'conditional show 1'}, runtime
)
indirect2 = self._create_and_get_item(self.old_mongo,
location.replace(category='html', name=uuid.uuid4().hex),
"", {'display_name': 'conditional show 2'}, runtime
)
location = location.replace(category='conditional', name=uuid.uuid4().hex)
metadata = {
'xml_attributes' : {
'sources': [live_vert.location.url(), ],
'completed': True,
},
}
data = {
'show_tag_list': [indirect1.location.url(), indirect2.location.url()]
}
conditional = self._create_and_get_item(self.old_mongo, location, data, metadata, runtime)
conditional.children = [indirect1.location.url(), indirect2.location.url()]
# add direct children
self.create_random_units(self.old_mongo, conditional)
chapter2.children.append(conditional.location.url())
self.old_mongo.update_children(chapter2.location, chapter2.children)
# and the ancillary docs (not children)
location = location.replace(category='static_tab', name=uuid.uuid4().hex)
# the below automatically adds the tab to the course
_tab = self._create_and_get_item(self.old_mongo, location, "", {'display_name': 'Tab uno'}, runtime)
location = location.replace(category='about', name='overview')
_overview = self._create_and_get_item(self.old_mongo, location, "<p>test</p>", {}, runtime)
location = location.replace(category='course_info', name='updates')
_overview = self._create_and_get_item(self.old_mongo,
location, "<ol><li><h2>Sep 22</h2><p>test</p></li></ol>", {}, runtime
)
def create_random_units(self, store, parent, cc_store=None, cc_parent=None):
"""
Create a random selection of units under the given parent w/ random names & attrs
:param store: which store (e.g., direct/draft) to create them in
:param parent: the parent to have point to them
:param cc_store: (optional) if given, make a small change and save also to this store but w/ same location
(only makes sense if store is 'direct' and this is 'draft' or vice versa)
"""
for _ in range(random.randrange(6)):
location = parent.location.replace(
category=random.choice(['html', 'video', 'problem', 'discussion']),
name=uuid.uuid4().hex
)
metadata = {'display_name': str(uuid.uuid4()), 'graded': True}
data = {}
element = self._create_and_get_item(store, location, data, metadata, parent.runtime)
parent.children.append(element.location.url())
if cc_store is not None:
# change display_name and remove graded to test the delta
element = self._create_and_get_item(
cc_store, location, data, {'display_name': str(uuid.uuid4())}, parent.runtime
)
cc_parent.children.append(element.location.url())
store.update_children(parent.location, parent.children)
if cc_store is not None:
cc_store.update_children(cc_parent.location, cc_parent.children)
def compare_courses(self, presplit, published):
# descend via children to do comparison
old_root = presplit.get_item(self.course_location, depth=None)
new_root_locator = self.loc_mapper.translate_location(
self.course_location.course_id, self.course_location, published, add_entry_if_missing=False
)
new_root = self.split_mongo.get_course(new_root_locator)
self.compare_dags(presplit, old_root, new_root, published)
# grab the detached items to compare they should be in both published and draft
for category in ['conditional', 'about', 'course_info', 'static_tab']:
location = self.course_location.replace(name=None, category=category)
for conditional in presplit.get_items(location):
locator = self.loc_mapper.translate_location(
self.course_location.course_id,
conditional.location, published, add_entry_if_missing=False
)
self.compare_dags(presplit, conditional, self.split_mongo.get_item(locator), published)
def compare_dags(self, presplit, presplit_dag_root, split_dag_root, published):
# check that locations match
self.assertEqual(
presplit_dag_root.location,
self.loc_mapper.translate_locator_to_location(split_dag_root.location).replace(revision=None)
)
# compare all fields but children
for name in presplit_dag_root.fields.iterkeys():
if name != 'children':
self.assertEqual(
getattr(presplit_dag_root, name),
getattr(split_dag_root, name),
"{}/{}: {} != {}".format(
split_dag_root.location, name, getattr(presplit_dag_root, name), getattr(split_dag_root, name)
)
)
# test split get_item using old Location: old draft store didn't set revision for things above vertical
# but split does distinguish these; so, set revision if not published
if not published:
location = draft.as_draft(presplit_dag_root.location)
else:
location = presplit_dag_root.location
refetched = self.split_mongo.get_item(location)
self.assertEqual(
refetched.location, split_dag_root.location,
"Fetch from split via old Location {} not same as new {}".format(
refetched.location, split_dag_root.location
)
)
# compare children
if presplit_dag_root.has_children:
self.assertEqual(
len(presplit_dag_root.get_children()), len(split_dag_root.get_children()),
"{0.category} '{0.display_name}': children count {1} != {2}".format(
presplit_dag_root, len(presplit_dag_root.get_children()), split_dag_root.children
)
)
for pre_child, split_child in zip(presplit_dag_root.get_children(), split_dag_root.get_children()):
self.compare_dags(presplit, pre_child, split_child, published)
def test_migrator(self):
self.migrator.migrate_mongo_course(self.course_location, random.getrandbits(32))
# now compare the migrated to the original course
self.compare_courses(self.old_mongo, True)
self.compare_courses(self.draft_mongo, False)
......@@ -648,6 +648,13 @@ class TestItemCrud(SplitModuleTest):
continue_version=True
)
# start a new transaction
new_ele = modulestore().create_item(
new_course.location, 'chapter', user,
fields={'display_name': 'chapter 2'},
continue_version=False
)
transaction_guid = new_ele.location.version_guid
# ensure force w/ continue gives exception
with self.assertRaises(VersionConflictError):
_fail = modulestore().create_item(
......@@ -656,13 +663,6 @@ class TestItemCrud(SplitModuleTest):
force=True, continue_version=True
)
# start a new transaction
new_ele = modulestore().create_item(
new_course.location, 'chapter', user,
fields={'display_name': 'chapter 2'},
continue_version=False
)
transaction_guid = new_ele.location.version_guid
# ensure trying to continue the old one gives exception
with self.assertRaises(VersionConflictError):
_fail = modulestore().create_item(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment