Commit 64722bfc by Adam Palay

Moves dump_to_neo4j cache backend to neo4j instead of memcached (SUST-76)

* Instead of caching when a course is last published, we get this information
  from the CourseStructure table
* This commit introduces a mock py2neo Graph to be used for testing
parent 9749bbab
......@@ -12,9 +12,3 @@ class CoursegraphConfig(AppConfig):
AppConfig for courseware app
name = 'openedx.core.djangoapps.coursegraph'
def ready(self):
Import signals on startup
from openedx.core.djangoapps.coursegraph import signals # pylint: disable=unused-variable
......@@ -7,18 +7,14 @@ from __future__ import unicode_literals, print_function
import logging
from import BaseCommand
from django.utils import six
from django.utils import six, timezone
from opaque_keys.edx.keys import CourseKey
from py2neo import Graph, Node, Relationship, authenticate
from py2neo import Graph, Node, Relationship, authenticate, NodeSelector
from py2neo.compat import integer, string, unicode as neo4j_unicode
from request_cache.middleware import RequestCache
from xmodule.modulestore.django import modulestore
from openedx.core.djangoapps.coursegraph.utils import (
from openedx.core.djangoapps.content.course_structures.models import CourseStructure
log = logging.getLogger(__name__)
......@@ -29,9 +25,6 @@ bolt_log.setLevel(logging.ERROR)
PRIMITIVE_NEO4J_TYPES = (integer, string, neo4j_unicode, float, bool)
COMMAND_LAST_RUN_CACHE = CommandLastRunCache()
COURSE_LAST_PUBLISHED_CACHE = CourseLastPublishedCache()
class ModuleStoreSerializer(object):
......@@ -45,8 +38,10 @@ class ModuleStoreSerializer(object):
If that parameter isn't furnished, loads all course_keys from the
Filters out course_keys in the `skip` parameter, if provided.
:param courses: string serialization of course keys
:param skip: string serialization of course keys
courses: A list of string serializations of course keys.
For example, ["course-v1:org+course+run"].
skip: Also a list of string serializations of course keys.
if courses:
course_keys = [CourseKey.from_string(course.strip()) for course in courses]
......@@ -67,7 +62,7 @@ class ModuleStoreSerializer(object):
fields: a dictionary of an XBlock's field names and values
label: the name of the XBlock's type (i.e. 'course'
block_type: the name of the XBlock's type (i.e. 'course'
or 'problem')
# convert all fields to a dict and filter out parent and children field
......@@ -88,25 +83,27 @@ class ModuleStoreSerializer(object):
fields['course_key'] = six.text_type(course_key)
fields['location'] = six.text_type(item.location)
label = item.scope_ids.block_type
block_type = item.scope_ids.block_type
# prune some fields
if label == 'course':
if block_type == 'course':
# prune the checklists field
if 'checklists' in fields:
del fields['checklists']
return fields, label
# record the time this command was run
fields['time_last_dumped_to_neo4j'] = six.text_type(
return fields, block_type
def serialize_course(self, course_id):
Serializes a course into py2neo Nodes and Relationships
course_id: CourseKey of the course we want to serialize
nodes: a list of py2neo Node objects
relationships: a list of py2neo Relationships objects
Serializes a course into Nodes and Relationships
# create a location to node mapping we'll need later for
# writing relationships
......@@ -116,12 +113,12 @@ class ModuleStoreSerializer(object):
# create nodes
nodes = []
for item in items:
fields, label = self.serialize_item(item)
fields, block_type = self.serialize_item(item)
for field_name, value in six.iteritems(fields):
fields[field_name] = self.coerce_types(value)
node = Node(label, 'item', **fields)
node = Node(block_type, 'item', **fields)
location_to_node[item.location] = node
......@@ -144,7 +141,7 @@ class ModuleStoreSerializer(object):
value: the value of an xblock's field
Returns: either the value, a text version of the value, or, if the
value is a list, a list where each element is converted to text.
value is a list, a list where each element is converted to text.
coerced_value = value
if isinstance(value, list):
......@@ -168,44 +165,92 @@ class ModuleStoreSerializer(object):
def should_dump_course(course_key):
def get_command_last_run(course_key, graph):
This information is stored on the course node of a course in neo4j
course_key: a CourseKey
graph: a py2neo Graph
Returns: The datetime that the command was last run, converted into
text, or None, if there's no record of this command last being run.
selector = NodeSelector(graph)
course_node =
last_this_command_was_run = None
if course_node:
last_this_command_was_run = course_node['time_last_dumped_to_neo4j']
return last_this_command_was_run
def get_course_last_published(course_key):
We use the CourseStructure table to get when this course was last
course_key: a CourseKey
Returns: The datetime the course was last published at, converted into
text, or None, if there's no record of the last time this course
was published.
structure = CourseStructure.objects.get(course_id=course_key)
course_last_published_date = six.text_type(structure.modified)
except CourseStructure.DoesNotExist:
course_last_published_date = None
return course_last_published_date
def should_dump_course(self, course_key, graph):
Only dump the course if it's been changed since the last time it's been
:param course_key: a CourseKey object.
:return: bool. Whether or not this course should be dumped to neo4j.
course_key: a CourseKey object.
graph: a py2neo Graph object.
Returns: bool of whether this course should be dumped to neo4j.
last_this_command_was_run = COMMAND_LAST_RUN_CACHE.get(course_key)
last_course_had_published_event = COURSE_LAST_PUBLISHED_CACHE.get(
last_this_command_was_run = self.get_command_last_run(course_key, graph)
course_last_published_date = self.get_course_last_published(course_key)
# if we have no record of this course being serialized, serialize it
# if we don't have a record of the last time this command was run,
# we should serialize the course and dump it
if last_this_command_was_run is None:
return True
# if we've serialized the course recently and we have no published
# events, we can skip re-serializing it
if last_this_command_was_run and last_course_had_published_event is None:
# events, we will not dump it, and so we can skip serializing it
# again here
if last_this_command_was_run and course_last_published_date is None:
return False
# otherwise, serialize if the command was run before the course's last
# published event
return last_this_command_was_run < last_course_had_published_event
# otherwise, serialize and dump the course if the command was run
# before the course's last published event
return last_this_command_was_run < course_last_published_date
def dump_courses_to_neo4j(self, graph, override_cache=False):
graph: py2neo graph object
override_cache: serialize the courses even if they'be been recently
Returns two lists: one of the courses that were successfully written
to neo4j, and one of courses that were not.
Method that iterates through a list of courses in a modulestore,
serializes them, then writes them to neo4j
graph: py2neo graph object
override_cache: serialize the courses even if they'be been recently
Returns: two lists--one of the courses that were successfully written
to neo4j and one of courses that were not.
total_number_of_courses = len(self.course_keys)
successful_courses = []
......@@ -222,7 +267,7 @@ class ModuleStoreSerializer(object):
if not (override_cache or self.should_dump_course(course_key)):
if not (override_cache or self.should_dump_course(course_key, graph)):"skipping dumping %s, since it hasn't changed", course_key)
......@@ -258,7 +303,6 @@ class ModuleStoreSerializer(object):
return successful_courses, unsuccessful_courses
Utilities for testing the dump_to_neo4j management command
from __future__ import unicode_literals
from py2neo import Node
class MockGraph(object):
A stubbed out version of py2neo's Graph object, used for testing.
transaction_errors: a bool for whether transactions should throw
an error.
def __init__(self, transaction_errors=False, **kwargs): # pylint: disable=unused-argument
self.nodes = set()
self.number_commits = 0
self.number_rollbacks = 0
self.transaction_errors = transaction_errors
def begin(self):
A stub of the method that generates transactions
Returns: a MockTransaction object (instead of a py2neo Transaction)
return MockTransaction(self)
class MockTransaction(object):
A stubbed out version of py2neo's Transaction object, used for testing.
def __init__(self, graph):
self.temp = set()
self.graph = graph
def run(self, query):
Deletes all nodes associated with a course. Normally `run` executes
an arbitrary query, but in our code, we only use it to delete nodes
associated with a course.
query: query string to be executed (in this case, to delete all
nodes associated with a course)
start_string = "WHERE n.course_key='"
start = query.index(start_string) + len(start_string)
query = query[start:]
end = query.find("'")
course_key = query[:end]
self.graph.nodes = set([
node for node in self.graph.nodes if node['course_key'] != course_key
def create(self, element):
Adds elements to the transaction's temporary backend storage
element: a py2neo Node object
if isinstance(element, Node):
def commit(self):
Takes elements in the transaction's temporary storage and adds them
to the mock graph's storage. Throws an error if the graph's
transaction_errors param is set to True.
if self.graph.transaction_errors:
raise Exception("fake exception while trying to commit")
for element in self.temp:
self.graph.number_commits += 1
def rollback(self):
Clears the transactions temporary storage
self.graph.number_rollbacks += 1
class MockNodeSelector(object):
Mocks out py2neo's NodeSelector class. Used to select a node from a graph.
py2neo's NodeSelector expects a real graph object to run queries against,
so, rather than have to mock out MockGraph to accommodate those queries,
it seemed simpler to mock out NodeSelector as well.
def __init__(self, graph):
self.graph = graph
def select(self, label, course_key):
Selects nodes that match a label and course_key
label: the string of the label we're selecting nodes by
course_key: the string of the course key we're selecting node by
Returns: a MockResult of matching nodes
nodes = []
for node in self.graph.nodes:
if node.has_label(label) and node["course_key"] == course_key:
return MockNodeSelection(nodes)
class MockNodeSelection(list):
Mocks out py2neo's NodeSelection class: this is the type of what
MockNodeSelector's `select` method returns.
def first(self):
Returns: the first element of a list if the list has elements.
Otherwise, None.
return self[0] if self else None
Signal handlers for the CourseGraph application
from django.dispatch.dispatcher import receiver
from xmodule.modulestore.django import SignalHandler
from openedx.core.djangoapps.coursegraph.utils import CourseLastPublishedCache
def _listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable=unused-argument
Register when the course was published on a course publish event
Tests for coursegraph's signal handler on course publish
from __future__ import unicode_literals
from opaque_keys.edx.keys import CourseKey
from openedx.core.djangoapps.coursegraph.signals import _listen_for_course_publish
from openedx.core.djangoapps.coursegraph.utils import CourseLastPublishedCache
from openedx.core.djangolib.testing.utils import CacheIsolationTestCase
class TestCourseGraphSignalHandler(CacheIsolationTestCase):
Tests for the course publish course handler
ENABLED_CACHES = ['default']
def test_cache_set_on_course_publish(self):
Tests that the last published cache is set on course publish
course_key = CourseKey.from_string('course-v1:org+course+run')
last_published_cache = CourseLastPublishedCache()
_listen_for_course_publish(None, course_key)
Helpers for the CourseGraph app
from django.core.cache import cache
from django.utils import timezone
class TimeRecordingCacheBase(object):
A base class for caching the current time for some key.
# cache_prefix should be defined in children classes
cache_prefix = None
_cache = cache
def _key(self, course_key):
Make a cache key from the prefix and a course_key
:param course_key: CourseKey object
:return: a cache key
return self.cache_prefix + unicode(course_key)
def get(self, course_key):
Gets the time value associated with the CourseKey.
:param course_key: a CourseKey object.
:return: the time the key was last set.
return self._cache.get(self._key(course_key))
def set(self, course_key):
Sets the current time for a CourseKey key.
:param course_key: a CourseKey object.
return self._cache.set(self._key(course_key),
class CourseLastPublishedCache(TimeRecordingCacheBase):
Used to record the last time that a course had a publish event run on it.
cache_prefix = u'course_last_published'
class CommandLastRunCache(TimeRecordingCacheBase):
Used to record the last time that the dump_to_neo4j command was run on a
cache_prefix = u'dump_to_neo4j_command_last_run'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment