Finishing async course structure work

- Added tests - Updated model field specification - Fixed issue of multiple event emission - Updated admin page - Added management command to manually generate course structures

Finishing async course structure work
- Added tests - Updated model field specification - Fixed issue of multiple event emission - Updated admin page - Added management command to manually generate course structures
92ca64fa · Clinton Blackburn · 88681ba9 · 92ca64fa · 92ca64fa · 92ca64fa
Commit 92ca64fa authored Feb 09, 2015 by Clinton Blackburn
21 changed files
--- a/common/djangoapps/util/models.py
+++ b/common/djangoapps/util/models.py
 """Models for the util app. """
+import cStringIO
+import gzip
+import logging
+
+from django.db import models
+from django.db.models.signals import post_init
+from django.utils.text import compress_string
+
 from config_models.models import ConfigurationModel


+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+
+
 class RateLimitConfiguration(ConfigurationModel):
    """Configuration flag to enable/disable rate limiting.

@@ -12,3 +23,65 @@ class RateLimitConfiguration(ConfigurationModel):
    with the `can_disable_rate_limit` class decorator.
    """
    pass
+
+
+def uncompress_string(s):
+    """
+    Helper function to reverse CompressedTextField.get_prep_value.
+    """
+
+    try:
+        val = s.encode('utf').decode('base64')
+        zbuf = cStringIO.StringIO(val)
+        zfile = gzip.GzipFile(fileobj=zbuf)
+        ret = zfile.read()
+        zfile.close()
+    except Exception as e:
+        logger.error('String decompression failed. There may be corrupted data in the database: %s', e)
+        ret = s
+    return ret
+
+
+class CompressedTextField(models.TextField):
+    """transparently compress data before hitting the db and uncompress after fetching"""
+
+    def get_prep_value(self, value):
+        if value is not None:
+            if isinstance(value, unicode):
+                value = value.encode('utf8')
+            value = compress_string(value)
+            value = value.encode('base64').decode('utf8')
+        return value
+
+    def post_init(self, instance=None, **kwargs):   # pylint: disable=unused-argument
+        value = self._get_val_from_obj(instance)
+        if value:
+            setattr(instance, self.attname, value)
+
+    def contribute_to_class(self, cls, name):
+        super(CompressedTextField, self).contribute_to_class(cls, name)
+        post_init.connect(self.post_init, sender=cls)
+
+    def _get_val_from_obj(self, obj):
+        if obj:
+            value = uncompress_string(getattr(obj, self.attname))
+            if value is not None:
+                try:
+                    value = value.decode('utf8')
+                except UnicodeDecodeError:
+                    pass
+                return value
+            else:
+                return self.get_default()
+        else:
+            return self.get_default()
+
+    def south_field_triple(self):
+        """Returns a suitable description of this field for South."""
+        # We'll just introspect the _actual_ field.
+        from south.modelsinspector import introspector
+
+        field_class = "django.db.models.fields.TextField"
+        args, kwargs = introspector(self)
+        # That's our definition!
+        return field_class, args, kwargs
--- a/common/djangoapps/xmodule_django/models.py
+++ b/common/djangoapps/xmodule_django/models.py
@@ -99,7 +99,8 @@ class OpaqueKeyField(models.CharField):
        if value is self.Empty or value is None:
            return None

-        assert isinstance(value, (basestring, self.KEY_CLASS))
+        assert isinstance(value, (basestring, self.KEY_CLASS)), \
+            "%s is not an instance of basestring or %s" % (value, self.KEY_CLASS)
        if value == '':
            # handle empty string for models being created w/o fields populated
            return None
@@ -123,7 +124,7 @@ class OpaqueKeyField(models.CharField):
        if value is self.Empty or value is None:
            return ''  # CharFields should use '' as their empty value, rather than None

-        assert isinstance(value, self.KEY_CLASS)
+        assert isinstance(value, self.KEY_CLASS), "%s is not an instance of %s" % (value, self.KEY_CLASS)
        return unicode(_strip_value(value))

    def validate(self, value, model_instance):

--- a/common/lib/xmodule/xmodule/modulestore/__init__.py
+++ b/common/lib/xmodule/xmodule/modulestore/__init__.py
@@ -170,7 +170,7 @@ class BulkOperationsMixin(object):
        self._active_bulk_ops = ActiveBulkThread(self._bulk_ops_record_type)

    @contextmanager
-    def bulk_operations(self, course_id):
+    def bulk_operations(self, course_id, emit_signals=True):
        """
        A context manager for notifying the store of bulk operations. This affects only the current thread.

@@ -181,7 +181,7 @@ class BulkOperationsMixin(object):
            self._begin_bulk_operation(course_id)
            yield
        finally:
-            self._end_bulk_operation(course_id)
+            self._end_bulk_operation(course_id, emit_signals)

    # the relevant type of bulk_ops_record for the mixin (overriding classes should override
    # this variable)
@@ -197,12 +197,14 @@ class BulkOperationsMixin(object):
        # Retrieve the bulk record based on matching org/course/run (possibly ignoring case)
        if ignore_case:
            for key, record in self._active_bulk_ops.records.iteritems():
-                if (
+                # Shortcut: check basic equivalence for cases where org/course/run might be None.
+                if key == course_key or (
                    key.org.lower() == course_key.org.lower() and
                    key.course.lower() == course_key.course.lower() and
                    key.run.lower() == course_key.run.lower()
                ):
                    return record
+
        return self._active_bulk_ops.records[course_key.for_branch(None)]

    @property
@@ -242,7 +244,7 @@ class BulkOperationsMixin(object):
        if bulk_ops_record.is_root:
            self._start_outermost_bulk_operation(bulk_ops_record, course_key)

-    def _end_outermost_bulk_operation(self, bulk_ops_record, course_key):
+    def _end_outermost_bulk_operation(self, bulk_ops_record, course_key, emit_signals=True):
        """
        The outermost nested bulk_operation call: do the actual end of the bulk operation.

@@ -250,7 +252,7 @@ class BulkOperationsMixin(object):
        """
        pass

-    def _end_bulk_operation(self, course_key):
+    def _end_bulk_operation(self, course_key, emit_signals=True):
        """
        End the active bulk operation on course_key.
        """
@@ -266,7 +268,7 @@ class BulkOperationsMixin(object):
        if bulk_ops_record.active:
            return

-        self._end_outermost_bulk_operation(bulk_ops_record, course_key)
+        self._end_outermost_bulk_operation(bulk_ops_record, course_key, emit_signals)

        self._clear_bulk_ops_record(course_key)

@@ -900,7 +902,7 @@ class ModuleStoreRead(ModuleStoreAssetBase):
        pass

    @contextmanager
-    def bulk_operations(self, course_id):
+    def bulk_operations(self, course_id, emit_signals=True):    # pylint: disable=unused-argument
        """
        A context manager for notifying the store of bulk operations. This affects only the current thread.
        """
@@ -1242,10 +1244,11 @@ class ModuleStoreWriteBase(ModuleStoreReadBase, ModuleStoreWrite):
        This base method just copies the assets. The lower level impls must do the actual cloning of
        content.
        """
-        # copy the assets
-        if self.contentstore:
-            self.contentstore.copy_all_course_assets(source_course_id, dest_course_id)
-        return dest_course_id
+        with self.bulk_operations(dest_course_id):
+            # copy the assets
+            if self.contentstore:
+                self.contentstore.copy_all_course_assets(source_course_id, dest_course_id)
+            return dest_course_id

    def delete_course(self, course_key, user_id, **kwargs):
        """

--- a/common/lib/xmodule/xmodule/modulestore/django.py
+++ b/common/lib/xmodule/xmodule/modulestore/django.py
@@ -7,25 +7,30 @@ Passes settings.MODULESTORE as kwargs to MongoModuleStore
 from __future__ import absolute_import

 from importlib import import_module
+import logging
+
+import re
 from django.conf import settings
+
+# This configuration must be executed BEFORE any additional Django imports. Otherwise, the imports may fail due to
+# Django not being configured properly. This mostly applies to tests.
 if not settings.configured:
    settings.configure()
+
 from django.core.cache import get_cache, InvalidCacheBackendError
 import django.dispatch
 import django.utils
-
-import re
-
-from xmodule.util.django import get_current_request_hostname
-import xmodule.modulestore  # pylint: disable=unused-import
-from xmodule.modulestore.mixed import MixedModuleStore
-from xmodule.modulestore.draft_and_published import BranchSettingMixin
 from xmodule.contentstore.django import contentstore
+from xmodule.modulestore.draft_and_published import BranchSettingMixin
+from xmodule.modulestore.mixed import MixedModuleStore
+from xmodule.util.django import get_current_request_hostname
 import xblock.reference.plugins

-# We may not always have the request_cache module available
+
 try:
+    # We may not always have the request_cache module available
    from request_cache.middleware import RequestCache
+
    HAS_REQUEST_CACHE = True
 except ImportError:
    HAS_REQUEST_CACHE = False
@@ -34,12 +39,15 @@ except ImportError:
 try:
    from xblock_django.user_service import DjangoXBlockUserService
    from crum import get_current_user
+
    HAS_USER_SERVICE = True
 except ImportError:
    HAS_USER_SERVICE = False

+log = logging.getLogger(__name__)
 ASSET_IGNORE_REGEX = getattr(settings, "ASSET_IGNORE_REGEX", r"(^\._.*$)|(^\.DS_Store$)|(^.*~$)")

+
 class SignalHandler(object):
    """
    This class is to allow the modulestores to emit signals that can be caught
@@ -55,7 +63,7 @@ class SignalHandler(object):

        @receiver(SignalHandler.course_published)
        def listen_for_course_publish(sender, course_key, **kwargs):
-            do_my_expensive_update(course_key)
+            do_my_expensive_update.delay(course_key)

        @task()
        def do_my_expensive_update(course_key):
@@ -67,7 +75,7 @@ class SignalHandler(object):
    2. The sender is going to be the class of the modulestore sending it.
    3. Always have **kwargs in your signal handler, as new things may be added.
    4. The thing that listens for the signal lives in process, but should do
-       almost no work. It's main job is to kick off the celery task that will
+       almost no work. Its main job is to kick off the celery task that will
       do the actual work.

    """
@@ -81,8 +89,14 @@ class SignalHandler(object):
        self.modulestore_class = modulestore_class

    def send(self, signal_name, **kwargs):
+        """
+        Send the signal to the receivers.
+        """
        signal = self._mapping[signal_name]
-        signal.send_robust(sender=self.modulestore_class, **kwargs)
+        responses = signal.send_robust(sender=self.modulestore_class, **kwargs)
+
+        for receiver, response in responses:
+            log.info('Sent %s signal to %s with kwargs %s. Response was: %s', signal_name, receiver, kwargs, response)


 def load_function(path):
@@ -196,6 +210,7 @@ class ModuleI18nService(object):
    i18n service.

    """
+
    def __getattr__(self, name):
        return getattr(django.utils.translation, name)

@@ -213,6 +228,7 @@ class ModuleI18nService(object):
        # right there.  If you are reading this comment after April 1, 2014,
        # then Cale was a liar.
        from util.date_utils import strftime_localized
+
        return strftime_localized(*args, **kwargs)


@@ -224,6 +240,7 @@ def _get_modulestore_branch_setting():

    The value of the branch setting is cached in a thread-local variable so it is not repeatedly recomputed
    """
+
    def get_branch_setting():
        """
        Finds and returns the branch setting based on the Django request and the configuration settings

--- a/common/lib/xmodule/xmodule/modulestore/mixed.py
+++ b/common/lib/xmodule/xmodule/modulestore/mixed.py
@@ -637,6 +637,7 @@ class MixedModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase):
            * copy the assets
            * migrate the courseware
        """
+
        source_modulestore = self._get_modulestore_for_courselike(source_course_id)
        # for a temporary period of time, we may want to hardcode dest_modulestore as split if there's a split
        # to have only course re-runs go to split. This code, however, uses the config'd priority
@@ -646,9 +647,9 @@ class MixedModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase):

        if dest_modulestore.get_modulestore_type() == ModuleStoreEnum.Type.split:
            split_migrator = SplitMigrator(dest_modulestore, source_modulestore)
-            split_migrator.migrate_mongo_course(
-                source_course_id, user_id, dest_course_id.org, dest_course_id.course, dest_course_id.run, fields, **kwargs
-            )
+            split_migrator.migrate_mongo_course(source_course_id, user_id, dest_course_id.org,
+                                                dest_course_id.course, dest_course_id.run, fields, **kwargs)
+
            # the super handles assets and any other necessities
            super(MixedModuleStore, self).clone_course(source_course_id, dest_course_id, user_id, fields, **kwargs)
        else:
@@ -918,13 +919,13 @@ class MixedModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase):
            yield

    @contextmanager
-    def bulk_operations(self, course_id):
+    def bulk_operations(self, course_id, emit_signals=True):
        """
        A context manager for notifying the store of bulk operations.
        If course_id is None, the default store is used.
        """
        store = self._get_modulestore_for_courselike(course_id)
-        with store.bulk_operations(course_id):
+        with store.bulk_operations(course_id, emit_signals):
            yield

    def ensure_indexes(self):

--- a/common/lib/xmodule/xmodule/modulestore/mongo/base.py
+++ b/common/lib/xmodule/xmodule/modulestore/mongo/base.py
@@ -448,13 +448,17 @@ class MongoBulkOpsMixin(BulkOperationsMixin):
        # ensure it starts clean
        bulk_ops_record.dirty = False

-    def _end_outermost_bulk_operation(self, bulk_ops_record, course_id):
+    def _end_outermost_bulk_operation(self, bulk_ops_record, course_id, emit_signals=True):
        """
        Restart updating the meta-data inheritance cache for the given course.
        Refresh the meta-data inheritance cache now since it was temporarily disabled.
        """
        if bulk_ops_record.dirty:
            self.refresh_cached_metadata_inheritance_tree(course_id)
+
+            if emit_signals and self.signal_handler:
+                self.signal_handler.send("course_published", course_key=course_id)
+
            bulk_ops_record.dirty = False  # brand spanking clean now

    def _is_in_bulk_operation(self, course_id, ignore_case=False):
@@ -1119,14 +1123,15 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
        if courses.count() > 0:
            raise DuplicateCourseError(course_id, courses[0]['_id'])

-        xblock = self.create_item(user_id, course_id, 'course', course_id.run, fields=fields, **kwargs)
+        with self.bulk_operations(course_id):
+            xblock = self.create_item(user_id, course_id, 'course', course_id.run, fields=fields, **kwargs)

-        # create any other necessary things as a side effect
-        super(MongoModuleStore, self).create_course(
-            org, course, run, user_id, runtime=xblock.runtime, **kwargs
-        )
+            # create any other necessary things as a side effect
+            super(MongoModuleStore, self).create_course(
+                org, course, run, user_id, runtime=xblock.runtime, **kwargs
+            )

-        return xblock
+            return xblock

    def create_xblock(
        self, runtime, course_key, block_type, block_id=None, fields=None,
@@ -1307,6 +1312,8 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
        is_publish_root: when publishing, this indicates whether xblock is the root of the publish and should
          therefore propagate subtree edit info up the tree
        """
+        course_key = xblock.location.course_key
+
        try:
            definition_data = self._serialize_scope(xblock, Scope.content)
            now = datetime.now(UTC)
@@ -1358,8 +1365,8 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
        except ItemNotFoundError:
            if not allow_not_found:
                raise
-            elif not self.has_course(xblock.location.course_key):
-                raise ItemNotFoundError(xblock.location.course_key)
+            elif not self.has_course(course_key):
+                raise ItemNotFoundError(course_key)

        return xblock


--- a/common/lib/xmodule/xmodule/modulestore/mongo/draft.py
+++ b/common/lib/xmodule/xmodule/modulestore/mongo/draft.py
@@ -167,44 +167,46 @@ class DraftModuleStore(MongoModuleStore):
        if not self.has_course(source_course_id):
            raise ItemNotFoundError("Cannot find a course at {0}. Aborting".format(source_course_id))

-        # verify that the dest_location really is an empty course
-        # b/c we don't want the payload, I'm copying the guts of get_items here
-        query = self._course_key_to_son(dest_course_id)
-        query['_id.category'] = {'$nin': ['course', 'about']}
-        if self.collection.find(query).limit(1).count() > 0:
-            raise DuplicateCourseError(
-                dest_course_id,
-                "Course at destination {0} is not an empty course. You can only clone into an empty course. Aborting...".format(
-                    dest_course_id
+        with self.bulk_operations(dest_course_id):
+            # verify that the dest_location really is an empty course
+            # b/c we don't want the payload, I'm copying the guts of get_items here
+            query = self._course_key_to_son(dest_course_id)
+            query['_id.category'] = {'$nin': ['course', 'about']}
+            if self.collection.find(query).limit(1).count() > 0:
+                raise DuplicateCourseError(
+                    dest_course_id,
+                    "Course at destination {0} is not an empty course. "
+                    "You can only clone into an empty course. Aborting...".format(
+                        dest_course_id
+                    )
                )
-            )

-        # clone the assets
-        super(DraftModuleStore, self).clone_course(source_course_id, dest_course_id, user_id, fields)
+            # clone the assets
+            super(DraftModuleStore, self).clone_course(source_course_id, dest_course_id, user_id, fields)

-        # get the whole old course
-        new_course = self.get_course(dest_course_id)
-        if new_course is None:
-            # create_course creates the about overview
-            new_course = self.create_course(
-                dest_course_id.org, dest_course_id.course, dest_course_id.run, user_id, fields=fields
-            )
-        else:
-            # update fields on existing course
-            for key, value in fields.iteritems():
-                setattr(new_course, key, value)
-            self.update_item(new_course, user_id)
+            # get the whole old course
+            new_course = self.get_course(dest_course_id)
+            if new_course is None:
+                # create_course creates the about overview
+                new_course = self.create_course(
+                    dest_course_id.org, dest_course_id.course, dest_course_id.run, user_id, fields=fields
+                )
+            else:
+                # update fields on existing course
+                for key, value in fields.iteritems():
+                    setattr(new_course, key, value)
+                self.update_item(new_course, user_id)

-        # Get all modules under this namespace which is (tag, org, course) tuple
-        modules = self.get_items(source_course_id, revision=ModuleStoreEnum.RevisionOption.published_only)
-        self._clone_modules(modules, dest_course_id, user_id)
-        course_location = dest_course_id.make_usage_key('course', dest_course_id.run)
-        self.publish(course_location, user_id)
+            # Get all modules under this namespace which is (tag, org, course) tuple
+            modules = self.get_items(source_course_id, revision=ModuleStoreEnum.RevisionOption.published_only)
+            self._clone_modules(modules, dest_course_id, user_id)
+            course_location = dest_course_id.make_usage_key('course', dest_course_id.run)
+            self.publish(course_location, user_id)

-        modules = self.get_items(source_course_id, revision=ModuleStoreEnum.RevisionOption.draft_only)
-        self._clone_modules(modules, dest_course_id, user_id)
+            modules = self.get_items(source_course_id, revision=ModuleStoreEnum.RevisionOption.draft_only)
+            self._clone_modules(modules, dest_course_id, user_id)

-        return True
+            return True

    def _clone_modules(self, modules, dest_course_id, user_id):
        """Clones each module into the given course"""
@@ -447,7 +449,12 @@ class DraftModuleStore(MongoModuleStore):

        # if the revision is published, defer to base
        if draft_loc.revision == MongoRevisionKey.published:
-            return super(DraftModuleStore, self).update_item(xblock, user_id, allow_not_found)
+            item = super(DraftModuleStore, self).update_item(xblock, user_id, allow_not_found)
+            course_key = xblock.location.course_key
+            bulk_record = self._get_bulk_ops_record(course_key)
+            if self.signal_handler and not bulk_record.active:
+                self.signal_handler.send("course_published", course_key=course_key)
+            return item

        if not super(DraftModuleStore, self).has_item(draft_loc):
            try:
@@ -715,15 +722,17 @@ class DraftModuleStore(MongoModuleStore):
        _verify_revision_is_published(location)

        _internal_depth_first(location, True)
+        course_key = location.course_key
+        bulk_record = self._get_bulk_ops_record(course_key)
        if len(to_be_deleted) > 0:
-            bulk_record = self._get_bulk_ops_record(location.course_key)
            bulk_record.dirty = True
            self.collection.remove({'_id': {'$in': to_be_deleted}})

+        if self.signal_handler and not bulk_record.active:
+            self.signal_handler.send("course_published", course_key=course_key)
+
        # Now it's been published, add the object to the courseware search index so that it appears in search results
        CoursewareSearchIndexer.add_to_search_index(self, location)
-        if self.signal_handler:
-            self.signal_handler.send("course_published", course_key=location.course_key)

        return self.get_item(as_published(location))

@@ -737,6 +746,11 @@ class DraftModuleStore(MongoModuleStore):
        self._verify_branch_setting(ModuleStoreEnum.Branch.draft_preferred)
        self._convert_to_draft(location, user_id, delete_published=True)

+        course_key = location.course_key
+        bulk_record = self._get_bulk_ops_record(course_key)
+        if self.signal_handler and not bulk_record.active:
+            self.signal_handler.send("course_published", course_key=course_key)
+
    def revert_to_published(self, location, user_id=None):
        """
        Reverts an item to its last published version (recursively traversing all of its descendants).

--- a/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py
+++ b/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py
@@ -229,12 +229,17 @@ class SplitBulkWriteMixin(BulkOperationsMixin):
        # Ensure that any edits to the index don't pollute the initial_index
        bulk_write_record.index = copy.deepcopy(bulk_write_record.initial_index)

-    def _end_outermost_bulk_operation(self, bulk_write_record, course_key):
+    def _end_outermost_bulk_operation(self, bulk_write_record, course_key, emit_signals=True):
        """
        End the active bulk write operation on course_key.
        """
+
+        dirty = False
+
        # If the content is dirty, then update the database
        for _id in bulk_write_record.structures.viewkeys() - bulk_write_record.structures_in_db:
+            dirty = True
+
            try:
                self.db_connection.insert_structure(bulk_write_record.structures[_id])
            except DuplicateKeyError:
@@ -244,6 +249,8 @@ class SplitBulkWriteMixin(BulkOperationsMixin):
                log.debug("Attempted to insert duplicate structure %s", _id)

        for _id in bulk_write_record.definitions.viewkeys() - bulk_write_record.definitions_in_db:
+            dirty = True
+
            try:
                self.db_connection.insert_definition(bulk_write_record.definitions[_id])
            except DuplicateKeyError:
@@ -253,11 +260,18 @@ class SplitBulkWriteMixin(BulkOperationsMixin):
                log.debug("Attempted to insert duplicate definition %s", _id)

        if bulk_write_record.index is not None and bulk_write_record.index != bulk_write_record.initial_index:
+            dirty = True
+
            if bulk_write_record.initial_index is None:
                self.db_connection.insert_course_index(bulk_write_record.index)
            else:
                self.db_connection.update_course_index(bulk_write_record.index, from_index=bulk_write_record.initial_index)

+        if dirty and emit_signals:
+            signal_handler = getattr(self, 'signal_handler', None)
+            if signal_handler:
+                signal_handler.send("course_published", course_key=course_key)
+
    def get_course_index(self, course_key, ignore_case=False):
        """
        Return the index for course_key.
@@ -675,7 +689,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
            depth: how deep below these to prefetch
            lazy: whether to fetch definitions or use placeholders
        '''
-        with self.bulk_operations(course_key):
+        with self.bulk_operations(course_key, emit_signals=False):
            new_module_data = {}
            for block_id in base_block_ids:
                new_module_data = self.descendants(
@@ -1563,18 +1577,20 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
        source_index = self.get_course_index_info(source_course_id)
        if source_index is None:
            raise ItemNotFoundError("Cannot find a course at {0}. Aborting".format(source_course_id))
-        new_course = self.create_course(
-            dest_course_id.org, dest_course_id.course, dest_course_id.run,
-            user_id,
-            fields=fields,
-            versions_dict=source_index['versions'],
-            search_targets=source_index['search_targets'],
-            skip_auto_publish=True,
-            **kwargs
-        )
-        # don't copy assets until we create the course in case something's awry
-        super(SplitMongoModuleStore, self).clone_course(source_course_id, dest_course_id, user_id, fields, **kwargs)
-        return new_course
+
+        with self.bulk_operations(dest_course_id):
+            new_course = self.create_course(
+                dest_course_id.org, dest_course_id.course, dest_course_id.run,
+                user_id,
+                fields=fields,
+                versions_dict=source_index['versions'],
+                search_targets=source_index['search_targets'],
+                skip_auto_publish=True,
+                **kwargs
+            )
+            # don't copy assets until we create the course in case something's awry
+            super(SplitMongoModuleStore, self).clone_course(source_course_id, dest_course_id, user_id, fields, **kwargs)
+            return new_course

    DEFAULT_ROOT_BLOCK_ID = 'course'


--- a/common/lib/xmodule/xmodule/modulestore/split_mongo/split_draft.py
+++ b/common/lib/xmodule/xmodule/modulestore/split_mongo/split_draft.py
@@ -118,7 +118,10 @@ class DraftVersioningModuleStore(SplitMongoModuleStore, ModuleStoreDraftAndPubli
    def update_item(self, descriptor, user_id, allow_not_found=False, force=False, **kwargs):
        old_descriptor_locn = descriptor.location
        descriptor.location = self._map_revision_to_branch(old_descriptor_locn)
-        with self.bulk_operations(descriptor.location.course_key):
+        emit_signals = descriptor.location.branch == ModuleStoreEnum.BranchName.published \
+            or descriptor.location.block_type in DIRECT_ONLY_CATEGORIES
+
+        with self.bulk_operations(descriptor.location.course_key, emit_signals=emit_signals):
            item = super(DraftVersioningModuleStore, self).update_item(
                descriptor,
                user_id,
@@ -139,7 +142,9 @@ class DraftVersioningModuleStore(SplitMongoModuleStore, ModuleStoreDraftAndPubli
        See :py:meth `ModuleStoreDraftAndPublished.create_item`
        """
        course_key = self._map_revision_to_branch(course_key)
-        with self.bulk_operations(course_key):
+        emit_signals = course_key.branch == ModuleStoreEnum.BranchName.published \
+            or block_type in DIRECT_ONLY_CATEGORIES
+        with self.bulk_operations(course_key, emit_signals=emit_signals):
            item = super(DraftVersioningModuleStore, self).create_item(
                user_id, course_key, block_type, block_id=block_id,
                definition_locator=definition_locator, fields=fields,
@@ -354,11 +359,7 @@ class DraftVersioningModuleStore(SplitMongoModuleStore, ModuleStoreDraftAndPubli
        # Now it's been published, add the object to the courseware search index so that it appears in search results
        CoursewareSearchIndexer.add_to_search_index(self, location)

-        published_location = location.for_branch(ModuleStoreEnum.BranchName.published)
-        if self.signal_handler:
-            self.signal_handler.send("course_published", course_key=published_location.course_key)
-
-        return self.get_item(published_location, **kwargs)
+        return self.get_item(location.for_branch(ModuleStoreEnum.BranchName.published), **kwargs)

    def unpublish(self, location, user_id, **kwargs):
        """

--- a/common/lib/xmodule/xmodule/modulestore/tests/test_mixed_modulestore.py
+++ b/common/lib/xmodule/xmodule/modulestore/tests/test_mixed_modulestore.py
--- a/common/lib/xmodule/xmodule/modulestore/tests/test_publish.py
+++ b/common/lib/xmodule/xmodule/modulestore/tests/test_publish.py
@@ -22,7 +22,7 @@ class TestPublish(SplitWMongoCourseBoostrapper):
        # There are 12 created items and 7 parent updates
        # create course: finds: 1 to verify uniqueness, 1 to find parents
        # sends: 1 to create course, 1 to create overview
-        with check_mongo_calls(5, 2):
+        with check_mongo_calls(4, 2):
            super(TestPublish, self)._create_course(split=False)  # 2 inserts (course and overview)

        # with bulk will delay all inheritance computations which won't be added into the mongo_calls

--- a/common/lib/xmodule/xmodule/modulestore/tests/utils.py
+++ b/common/lib/xmodule/xmodule/modulestore/tests/utils.py
@@ -48,6 +48,7 @@ def create_modulestore_instance(
    return class_(
        doc_store_config=doc_store_config,
        contentstore=contentstore,
+        signal_handler=signal_handler,
        **options
    )


--- a/common/lib/xmodule/xmodule/modulestore/xml.py
+++ b/common/lib/xmodule/xmodule/modulestore/xml.py
@@ -294,8 +294,8 @@ class XMLModuleStore(ModuleStoreReadBase):
    """
    def __init__(
            self, data_dir, default_class=None, course_dirs=None, course_ids=None,
-            load_error_modules=True, i18n_service=None, fs_service=None, user_service=None, 
-            signal_handler=None, **kwargs
+            load_error_modules=True, i18n_service=None, fs_service=None, user_service=None,
+            signal_handler=None, **kwargs   # pylint: disable=unused-argument
    ):
        """
        Initialize an XMLModuleStore from data_dir

--- a/openedx/core/djangoapps/content/course_structures/admin.py
+++ b/openedx/core/djangoapps/content/course_structures/admin.py
-import json
-
 from ratelimitbackend import admin

 from .models import CourseStructure

+
 class CourseStructureAdmin(admin.ModelAdmin):
-    search_fields = ('course_id', 'version')
-    list_display = (
-        'id', 'course_id', 'version', 'created'
-    )
-    list_display_links = ('id', 'course_id')
+    search_fields = ('course_id',)
+    list_display = ('course_id', 'modified')
+    ordering = ('course_id', '-modified')
+

 admin.site.register(CourseStructure, CourseStructureAdmin)
--- a/openedx/core/djangoapps/content/course_structures/management/__init__.py
+++ b/openedx/core/djangoapps/content/course_structures/management/__init__.py
--- a/openedx/core/djangoapps/content/course_structures/management/commands/__init__.py
+++ b/openedx/core/djangoapps/content/course_structures/management/commands/__init__.py
--- a/openedx/core/djangoapps/content/course_structures/management/commands/generate_course_structure.py
+++ b/openedx/core/djangoapps/content/course_structures/management/commands/generate_course_structure.py
+import logging
+from optparse import make_option
+
+from django.core.management.base import BaseCommand
+
+from opaque_keys.edx.keys import CourseKey
+from xmodule.modulestore.django import modulestore
+
+from openedx.core.djangoapps.content.course_structures.models import update_course_structure
+
+
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+    args = '<course_id course_id ...>'
+    help = 'Generates and stores course structure for one or more courses.'
+
+    option_list = BaseCommand.option_list + (
+        make_option('--all',
+                    action='store_true',
+                    default=False,
+                    help='Generate structures for all courses.'),
+    )
+
+    def handle(self, *args, **options):
+
+        if options['all']:
+            course_keys = [course.id for course in modulestore().get_courses()]
+        else:
+            course_keys = [CourseKey.from_string(arg) for arg in args]
+
+        if not course_keys:
+            logger.fatal('No courses specified.')
+            return
+
+        logger.info('Generating course structures for %d courses.', len(course_keys))
+        logging.debug('Generating course structure(s) for the following courses: %s', course_keys)
+
+        for course_key in course_keys:
+            try:
+                update_course_structure(course_key)
+            except Exception as e:
+                logger.error('An error occurred while generating course structure for %s: %s', unicode(course_key), e)
+
+        logger.info('Finished generating course structures.')
--- a/openedx/core/djangoapps/content/course_structures/migrations/0001_initial.py
+++ b/openedx/core/djangoapps/content/course_structures/migrations/0001_initial.py
@@ -13,9 +13,8 @@ class Migration(SchemaMigration):
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('created', self.gf('model_utils.fields.AutoCreatedField')(default=datetime.datetime.now)),
            ('modified', self.gf('model_utils.fields.AutoLastModifiedField')(default=datetime.datetime.now)),
-            ('course_id', self.gf('xmodule_django.models.CourseKeyField')(max_length=255, db_index=True)),
-            ('version', self.gf('django.db.models.fields.CharField')(max_length=255)),
-            ('structure_json', self.gf('django.db.models.fields.TextField')()),
+            ('course_id', self.gf('xmodule_django.models.CourseKeyField')(unique=True, max_length=255, db_index=True)),
+            ('structure_json', self.gf('django.db.models.fields.TextField')(null=True, blank=True)),
        ))
        db.send_create_signal('course_structures', ['CourseStructure'])

@@ -28,12 +27,11 @@ class Migration(SchemaMigration):
    models = {
        'course_structures.coursestructure': {
            'Meta': {'object_name': 'CourseStructure'},
-            'course_id': ('xmodule_django.models.CourseKeyField', [], {'max_length': '255', 'db_index': 'True'}),
+            'course_id': ('xmodule_django.models.CourseKeyField', [], {'unique': 'True', 'max_length': '255', 'db_index': 'True'}),
            'created': ('model_utils.fields.AutoCreatedField', [], {'default': 'datetime.datetime.now'}),
            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'modified': ('model_utils.fields.AutoLastModifiedField', [], {'default': 'datetime.datetime.now'}),
-            'structure_json': ('django.db.models.fields.TextField', [], {}),
-            'version': ('django.db.models.fields.CharField', [], {'max_length': '255'})
+            'structure_json': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
        }
    }


--- a/openedx/core/djangoapps/content/course_structures/models.py
+++ b/openedx/core/djangoapps/content/course_structures/models.py
 import json
+import logging

-from django.db import models
-from django.dispatch import receiver
 from celery.task import task
+from django.dispatch import receiver
 from model_utils.models import TimeStampedModel
-
-from opaque_keys.edx.keys import CourseKey
-
+from opaque_keys.edx.locator import CourseLocator
 from xmodule.modulestore.django import modulestore, SignalHandler
+
+from util.models import CompressedTextField
 from xmodule_django.models import CourseKeyField

-class CourseStructure(TimeStampedModel):

-    course_id = CourseKeyField(max_length=255, db_index=True)
-    version = models.CharField(max_length=255, blank=True, default="")
+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+
+
+class CourseStructure(TimeStampedModel):
+    course_id = CourseKeyField(max_length=255, db_index=True, unique=True, verbose_name='Course ID')

    # Right now the only thing we do with the structure doc is store it and
    # send it on request. If we need to store a more complex data model later,
    # we can do so and build a migration. The only problem with a normalized
    # data model for this is that it will likely involve hundreds of rows, and
    # we'd have to be careful about caching.
-    structure_json = models.TextField()
+    structure_json = CompressedTextField(verbose_name='Structure JSON', blank=True, null=True)

-    # Index together:
-    #   (course_id, version)
-    #   (course_id, created)
+    @property
+    def structure(self):
+        if self.structure_json:
+            return json.loads(self.structure_json)
+        return None


-def course_structure(course_key):
+def generate_course_structure(course_key):
+    """
+    Generates a course structure dictionary for the specified course.
+    """
    course = modulestore().get_course(course_key, depth=None)
    blocks_stack = [course]
    blocks_dict = {}
    while blocks_stack:
        curr_block = blocks_stack.pop()
-        children = curr_block.get_children() if curr_block.has_children else []       
+        children = curr_block.get_children() if curr_block.has_children else []
        blocks_dict[unicode(curr_block.scope_ids.usage_id)] = {
            "usage_key": unicode(curr_block.scope_ids.usage_id),
            "block_type": curr_block.category,
            "display_name": curr_block.display_name,
            "graded": curr_block.graded,
            "format": curr_block.format,
-            "children": [unicode(ch.scope_ids.usage_id) for ch in children]
+            "children": [unicode(child.scope_ids.usage_id) for child in children]
        }
        blocks_stack.extend(children)
    return {
@@ -48,15 +55,38 @@ def course_structure(course_key):
        "blocks": blocks_dict
    }

+
 @receiver(SignalHandler.course_published)
 def listen_for_course_publish(sender, course_key, **kwargs):
-    update_course_structure(course_key)
+    # Note: The countdown=0 kwarg is set to to ensure the method below does not attempt to access the course
+    # before the signal emitter has finished all operations. This is also necessary to ensure all tests pass.
+    update_course_structure.delay(course_key, countdown=0)
+

 @task()
 def update_course_structure(course_key):
-    structure = course_structure(course_key)
-    CourseStructure.objects.create(
-        course_id=unicode(course_key),
-        structure_json=json.dumps(structure),
-        version="",
+    """
+    Regenerates and updates the course structure (in the database) for the specified course.
+    """
+    if not isinstance(course_key, CourseLocator):
+        logger.error('update_course_structure requires a CourseLocator. Given %s.', type(course_key))
+        return
+
+    try:
+        structure = generate_course_structure(course_key)
+    except Exception as e:
+        logger.error('An error occurred while generating course structure: %s', e)
+        raise
+
+    structure_json = json.dumps(structure)
+
+    cs, created = CourseStructure.objects.get_or_create(
+        course_id=course_key,
+        defaults={'structure_json': structure_json}
    )
+
+    if not created:
+        cs.structure_json = structure_json
+        cs.save()
+
+    return cs
--- a/openedx/core/djangoapps/content/course_structures/tests.py
+++ b/openedx/core/djangoapps/content/course_structures/tests.py
+import json
+from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
+from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory
+
+from openedx.core.djangoapps.content.course_structures.models import generate_course_structure, CourseStructure
+
+
+class CourseStructureTests(ModuleStoreTestCase):
+    def setUp(self, **kwargs):
+        super(CourseStructureTests, self).setUp()
+        self.course = CourseFactory.create()
+        self.section = ItemFactory.create(parent=self.course, category='chapter', display_name='Test Section')
+        CourseStructure.objects.all().delete()
+
+    def test_generate_course_structure(self):
+        blocks = {}
+
+        def add_block(block):
+            children = block.get_children() if block.has_children else []
+
+            blocks[unicode(block.location)] = {
+                "usage_key": unicode(block.location),
+                "block_type": block.category,
+                "display_name": block.display_name,
+                "graded": block.graded,
+                "format": block.format,
+                "children": [unicode(child.location) for child in children]
+            }
+
+            for child in children:
+                add_block(child)
+
+        add_block(self.course)
+
+        expected = {
+            'root': unicode(self.course.location),
+            'blocks': blocks
+        }
+
+        self.maxDiff = None
+        actual = generate_course_structure(self.course.id)
+        self.assertDictEqual(actual, expected)
+
+    def test_structure_json(self):
+        """
+        Although stored as compressed data, CourseStructure.structure_json should always return the uncompressed string.
+        """
+        course_id = 'a/b/c'
+        structure = {
+            'root': course_id,
+            'blocks': {
+                course_id: {
+                    'id': course_id
+                }
+            }
+        }
+        structure_json = json.dumps(structure)
+        cs = CourseStructure.objects.create(course_id=self.course.id, structure_json=structure_json)
+        self.assertEqual(cs.structure_json, structure_json)
+
+        # Reload the data to ensure the init signal is fired to decompress the data.
+        cs = CourseStructure.objects.get(course_id=self.course.id)
+        self.assertEqual(cs.structure_json, structure_json)
+
+    def test_structure(self):
+        """
+        CourseStructure.structure should return the uncompressed, JSON-parsed course structure.
+        """
+        structure = {
+            'root': 'a/b/c',
+            'blocks': {
+                'a/b/c': {
+                    'id': 'a/b/c'
+                }
+            }
+        }
+        structure_json = json.dumps(structure)
+        cs = CourseStructure.objects.create(course_id=self.course.id, structure_json=structure_json)
+        self.assertDictEqual(cs.structure, structure)
--- a/requirements/edx/base.txt
+++ b/requirements/edx/base.txt
@@ -135,18 +135,19 @@ django_nose==1.3
 factory_boy==2.2.1
 freezegun==0.1.11
 lettuce==0.2.20
+mock-django==0.6.6
 mock==1.0.1
 nose-exclude
 nose-ignore-docstring
 nosexcover==1.0.7
 pep8==1.5.7
+PyContracts==1.7.1
 pylint==1.4.1
 python-subunit==0.0.16
 rednose==0.3
 selenium==2.42.1
 splinter==0.5.4
 testtools==0.9.34
-PyContracts==1.7.1

 # Used for Segment.io analytics
 analytics-python==0.4.4