Commit 261eb31d by Adam Committed by GitHub

Merge pull request #13992 from edx/adam/refactor-dump-to-neo4j-caching

Moves dump_to_neo4j cache backend to neo4j instead of memcached (SUST…
parents c67ee94d 64722bfc
...@@ -12,9 +12,3 @@ class CoursegraphConfig(AppConfig): ...@@ -12,9 +12,3 @@ class CoursegraphConfig(AppConfig):
AppConfig for courseware app AppConfig for courseware app
""" """
name = 'openedx.core.djangoapps.coursegraph' name = 'openedx.core.djangoapps.coursegraph'
def ready(self):
"""
Import signals on startup
"""
from openedx.core.djangoapps.coursegraph import signals # pylint: disable=unused-variable
...@@ -7,18 +7,14 @@ from __future__ import unicode_literals, print_function ...@@ -7,18 +7,14 @@ from __future__ import unicode_literals, print_function
import logging import logging
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.utils import six from django.utils import six, timezone
from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.keys import CourseKey
from py2neo import Graph, Node, Relationship, authenticate from py2neo import Graph, Node, Relationship, authenticate, NodeSelector
from py2neo.compat import integer, string, unicode as neo4j_unicode from py2neo.compat import integer, string, unicode as neo4j_unicode
from request_cache.middleware import RequestCache from request_cache.middleware import RequestCache
from xmodule.modulestore.django import modulestore from xmodule.modulestore.django import modulestore
from openedx.core.djangoapps.coursegraph.utils import ( from openedx.core.djangoapps.content.course_structures.models import CourseStructure
CommandLastRunCache,
CourseLastPublishedCache,
)
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -29,9 +25,6 @@ bolt_log.setLevel(logging.ERROR) ...@@ -29,9 +25,6 @@ bolt_log.setLevel(logging.ERROR)
PRIMITIVE_NEO4J_TYPES = (integer, string, neo4j_unicode, float, bool) PRIMITIVE_NEO4J_TYPES = (integer, string, neo4j_unicode, float, bool)
COMMAND_LAST_RUN_CACHE = CommandLastRunCache()
COURSE_LAST_PUBLISHED_CACHE = CourseLastPublishedCache()
class ModuleStoreSerializer(object): class ModuleStoreSerializer(object):
""" """
...@@ -45,8 +38,10 @@ class ModuleStoreSerializer(object): ...@@ -45,8 +38,10 @@ class ModuleStoreSerializer(object):
If that parameter isn't furnished, loads all course_keys from the If that parameter isn't furnished, loads all course_keys from the
modulestore. modulestore.
Filters out course_keys in the `skip` parameter, if provided. Filters out course_keys in the `skip` parameter, if provided.
:param courses: string serialization of course keys Args:
:param skip: string serialization of course keys courses: A list of string serializations of course keys.
For example, ["course-v1:org+course+run"].
skip: Also a list of string serializations of course keys.
""" """
if courses: if courses:
course_keys = [CourseKey.from_string(course.strip()) for course in courses] course_keys = [CourseKey.from_string(course.strip()) for course in courses]
...@@ -67,7 +62,7 @@ class ModuleStoreSerializer(object): ...@@ -67,7 +62,7 @@ class ModuleStoreSerializer(object):
Returns: Returns:
fields: a dictionary of an XBlock's field names and values fields: a dictionary of an XBlock's field names and values
label: the name of the XBlock's type (i.e. 'course' block_type: the name of the XBlock's type (i.e. 'course'
or 'problem') or 'problem')
""" """
# convert all fields to a dict and filter out parent and children field # convert all fields to a dict and filter out parent and children field
...@@ -88,25 +83,27 @@ class ModuleStoreSerializer(object): ...@@ -88,25 +83,27 @@ class ModuleStoreSerializer(object):
fields['course_key'] = six.text_type(course_key) fields['course_key'] = six.text_type(course_key)
fields['location'] = six.text_type(item.location) fields['location'] = six.text_type(item.location)
label = item.scope_ids.block_type block_type = item.scope_ids.block_type
# prune some fields if block_type == 'course':
if label == 'course': # prune the checklists field
if 'checklists' in fields: if 'checklists' in fields:
del fields['checklists'] del fields['checklists']
return fields, label # record the time this command was run
fields['time_last_dumped_to_neo4j'] = six.text_type(timezone.now())
return fields, block_type
def serialize_course(self, course_id): def serialize_course(self, course_id):
""" """
Serializes a course into py2neo Nodes and Relationships
Args: Args:
course_id: CourseKey of the course we want to serialize course_id: CourseKey of the course we want to serialize
Returns: Returns:
nodes: a list of py2neo Node objects nodes: a list of py2neo Node objects
relationships: a list of py2neo Relationships objects relationships: a list of py2neo Relationships objects
Serializes a course into Nodes and Relationships
""" """
# create a location to node mapping we'll need later for # create a location to node mapping we'll need later for
# writing relationships # writing relationships
...@@ -116,12 +113,12 @@ class ModuleStoreSerializer(object): ...@@ -116,12 +113,12 @@ class ModuleStoreSerializer(object):
# create nodes # create nodes
nodes = [] nodes = []
for item in items: for item in items:
fields, label = self.serialize_item(item) fields, block_type = self.serialize_item(item)
for field_name, value in six.iteritems(fields): for field_name, value in six.iteritems(fields):
fields[field_name] = self.coerce_types(value) fields[field_name] = self.coerce_types(value)
node = Node(label, 'item', **fields) node = Node(block_type, 'item', **fields)
nodes.append(node) nodes.append(node)
location_to_node[item.location] = node location_to_node[item.location] = node
...@@ -144,7 +141,7 @@ class ModuleStoreSerializer(object): ...@@ -144,7 +141,7 @@ class ModuleStoreSerializer(object):
value: the value of an xblock's field value: the value of an xblock's field
Returns: either the value, a text version of the value, or, if the Returns: either the value, a text version of the value, or, if the
value is a list, a list where each element is converted to text. value is a list, a list where each element is converted to text.
""" """
coerced_value = value coerced_value = value
if isinstance(value, list): if isinstance(value, list):
...@@ -168,44 +165,92 @@ class ModuleStoreSerializer(object): ...@@ -168,44 +165,92 @@ class ModuleStoreSerializer(object):
transaction.create(entity) transaction.create(entity)
@staticmethod @staticmethod
def should_dump_course(course_key): def get_command_last_run(course_key, graph):
"""
This information is stored on the course node of a course in neo4j
Args:
course_key: a CourseKey
graph: a py2neo Graph
Returns: The datetime that the command was last run, converted into
text, or None, if there's no record of this command last being run.
"""
selector = NodeSelector(graph)
course_node = selector.select(
"course",
course_key=six.text_type(course_key)
).first()
last_this_command_was_run = None
if course_node:
last_this_command_was_run = course_node['time_last_dumped_to_neo4j']
return last_this_command_was_run
@staticmethod
def get_course_last_published(course_key):
"""
We use the CourseStructure table to get when this course was last
published.
Args:
course_key: a CourseKey
Returns: The datetime the course was last published at, converted into
text, or None, if there's no record of the last time this course
was published.
"""
try:
structure = CourseStructure.objects.get(course_id=course_key)
course_last_published_date = six.text_type(structure.modified)
except CourseStructure.DoesNotExist:
course_last_published_date = None
return course_last_published_date
def should_dump_course(self, course_key, graph):
""" """
Only dump the course if it's been changed since the last time it's been Only dump the course if it's been changed since the last time it's been
dumped. dumped.
:param course_key: a CourseKey object. Args:
:return: bool. Whether or not this course should be dumped to neo4j. course_key: a CourseKey object.
graph: a py2neo Graph object.
Returns: bool of whether this course should be dumped to neo4j.
""" """
last_this_command_was_run = COMMAND_LAST_RUN_CACHE.get(course_key) last_this_command_was_run = self.get_command_last_run(course_key, graph)
last_course_had_published_event = COURSE_LAST_PUBLISHED_CACHE.get(
course_key course_last_published_date = self.get_course_last_published(course_key)
)
# if we have no record of this course being serialized, serialize it # if we don't have a record of the last time this command was run,
# we should serialize the course and dump it
if last_this_command_was_run is None: if last_this_command_was_run is None:
return True return True
# if we've serialized the course recently and we have no published # if we've serialized the course recently and we have no published
# events, we can skip re-serializing it # events, we will not dump it, and so we can skip serializing it
if last_this_command_was_run and last_course_had_published_event is None: # again here
if last_this_command_was_run and course_last_published_date is None:
return False return False
# otherwise, serialize if the command was run before the course's last # otherwise, serialize and dump the course if the command was run
# published event # before the course's last published event
return last_this_command_was_run < last_course_had_published_event return last_this_command_was_run < course_last_published_date
def dump_courses_to_neo4j(self, graph, override_cache=False): def dump_courses_to_neo4j(self, graph, override_cache=False):
""" """
Parameters Method that iterates through a list of courses in a modulestore,
---------- serializes them, then writes them to neo4j
graph: py2neo graph object Args:
override_cache: serialize the courses even if they'be been recently graph: py2neo graph object
serialized override_cache: serialize the courses even if they'be been recently
serialized
Returns two lists: one of the courses that were successfully written
to neo4j, and one of courses that were not. Returns: two lists--one of the courses that were successfully written
------- to neo4j and one of courses that were not.
""" """
total_number_of_courses = len(self.course_keys) total_number_of_courses = len(self.course_keys)
successful_courses = [] successful_courses = []
...@@ -222,7 +267,7 @@ class ModuleStoreSerializer(object): ...@@ -222,7 +267,7 @@ class ModuleStoreSerializer(object):
total_number_of_courses, total_number_of_courses,
) )
if not (override_cache or self.should_dump_course(course_key)): if not (override_cache or self.should_dump_course(course_key, graph)):
log.info("skipping dumping %s, since it hasn't changed", course_key) log.info("skipping dumping %s, since it hasn't changed", course_key)
continue continue
...@@ -258,7 +303,6 @@ class ModuleStoreSerializer(object): ...@@ -258,7 +303,6 @@ class ModuleStoreSerializer(object):
unsuccessful_courses.append(course_string) unsuccessful_courses.append(course_string)
else: else:
COMMAND_LAST_RUN_CACHE.set(course_key)
successful_courses.append(course_string) successful_courses.append(course_string)
return successful_courses, unsuccessful_courses return successful_courses, unsuccessful_courses
......
...@@ -16,7 +16,13 @@ from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory ...@@ -16,7 +16,13 @@ from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory
from openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j import ( from openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j import (
ModuleStoreSerializer, ModuleStoreSerializer,
) )
from openedx.core.djangoapps.coursegraph.signals import _listen_for_course_publish from openedx.core.djangoapps.coursegraph.management.commands.tests.utils import (
MockGraph,
MockNodeSelector,
)
from openedx.core.djangoapps.content.course_structures.signals import (
listen_for_course_publish
)
class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase): class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
...@@ -39,6 +45,43 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase): ...@@ -39,6 +45,43 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
cls.course_strings = [six.text_type(cls.course.id), six.text_type(cls.course2.id)] cls.course_strings = [six.text_type(cls.course.id), six.text_type(cls.course2.id)]
@staticmethod
def setup_mock_graph(mock_selector_class, mock_graph_class, transaction_errors=False):
"""
Replaces the py2neo Graph object with a MockGraph; similarly replaces
NodeSelector with MockNodeSelector.
Args:
mock_selector_class: a mocked NodeSelector class
mock_graph_class: a mocked Graph class
transaction_errors: a bool for whether we should get errors
when transactions try to commit
Returns: an instance of MockGraph
"""
mock_graph = MockGraph(transaction_errors=transaction_errors)
mock_graph_class.return_value = mock_graph
mock_node_selector = MockNodeSelector(mock_graph)
mock_selector_class.return_value = mock_node_selector
return mock_graph
def assertCourseDump(self, mock_graph, number_of_courses, number_commits, number_rollbacks):
"""
Asserts that we have the expected number of courses, commits, and
rollbacks after we dump the modulestore to neo4j
Args:
mock_graph: a MockGraph backend
number_of_courses: number of courses we expect to find
number_commits: number of commits we expect against the graph
number_rollbacks: number of commit rollbacks we expect
"""
courses = set([node['course_key'] for node in mock_graph.nodes])
self.assertEqual(len(courses), number_of_courses)
self.assertEqual(mock_graph.number_commits, number_commits)
self.assertEqual(mock_graph.number_rollbacks, number_rollbacks)
@ddt.ddt @ddt.ddt
class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase):
...@@ -46,15 +89,14 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): ...@@ -46,15 +89,14 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase):
Tests for the dump to neo4j management command Tests for the dump to neo4j management command
""" """
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.Graph') @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.Graph')
@ddt.data(1, 2) @ddt.data(1, 2)
def test_dump_specific_courses(self, number_of_courses, mock_graph_class): def test_dump_specific_courses(self, number_of_courses, mock_graph_class, mock_selector_class):
""" """
Test that you can specify which courses you want to dump. Test that you can specify which courses you want to dump.
""" """
mock_graph = mock_graph_class.return_value mock_graph = self.setup_mock_graph(mock_selector_class, mock_graph_class)
mock_transaction = mock.Mock()
mock_graph.begin.return_value = mock_transaction
call_command( call_command(
'dump_to_neo4j', 'dump_to_neo4j',
...@@ -65,18 +107,22 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): ...@@ -65,18 +107,22 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase):
password='mock_password', password='mock_password',
) )
self.assertEqual(mock_graph.begin.call_count, number_of_courses) self.assertCourseDump(
self.assertEqual(mock_transaction.commit.call_count, number_of_courses) mock_graph,
self.assertEqual(mock_transaction.commit.rollback.call_count, 0) number_of_courses=number_of_courses,
number_commits=number_of_courses,
number_rollbacks=0
)
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.Graph') @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.Graph')
def test_dump_skip_course(self, mock_graph_class): def test_dump_skip_course(self, mock_graph_class, mock_selector_class):
""" """
Test that you can skip courses. Test that you can skip courses.
""" """
mock_graph = mock_graph_class.return_value mock_graph = self.setup_mock_graph(
mock_transaction = mock.Mock() mock_selector_class, mock_graph_class
mock_graph.begin.return_value = mock_transaction )
call_command( call_command(
'dump_to_neo4j', 'dump_to_neo4j',
...@@ -87,18 +133,22 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): ...@@ -87,18 +133,22 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase):
password='mock_password', password='mock_password',
) )
self.assertEqual(mock_graph.begin.call_count, 1) self.assertCourseDump(
self.assertEqual(mock_transaction.commit.call_count, 1) mock_graph,
self.assertEqual(mock_transaction.commit.rollback.call_count, 0) number_of_courses=1,
number_commits=1,
number_rollbacks=0,
)
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.Graph') @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.Graph')
def test_dump_skip_beats_specifying(self, mock_graph_class): def test_dump_skip_beats_specifying(self, mock_graph_class, mock_selector_class):
""" """
Test that if you skip and specify the same course, you'll skip it. Test that if you skip and specify the same course, you'll skip it.
""" """
mock_graph = mock_graph_class.return_value mock_graph = self.setup_mock_graph(
mock_transaction = mock.Mock() mock_selector_class, mock_graph_class
mock_graph.begin.return_value = mock_transaction )
call_command( call_command(
'dump_to_neo4j', 'dump_to_neo4j',
...@@ -110,31 +160,38 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): ...@@ -110,31 +160,38 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase):
password='mock_password', password='mock_password',
) )
self.assertEqual(mock_graph.begin.call_count, 0) self.assertCourseDump(
self.assertEqual(mock_transaction.commit.call_count, 0) mock_graph,
self.assertEqual(mock_transaction.commit.rollback.call_count, 0) number_of_courses=0,
number_commits=0,
number_rollbacks=0,
)
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.Graph') @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.Graph')
def test_dump_all_courses(self, mock_graph_class): def test_dump_all_courses(self, mock_graph_class, mock_selector_class):
""" """
Test if you don't specify which courses to dump, then you'll dump Test if you don't specify which courses to dump, then you'll dump
all of them. all of them.
""" """
mock_graph = mock_graph_class.return_value mock_graph = self.setup_mock_graph(
mock_transaction = mock.Mock() mock_selector_class, mock_graph_class
mock_graph.begin.return_value = mock_transaction )
call_command( call_command(
'dump_to_neo4j', 'dump_to_neo4j',
host='mock_host', host='mock_host',
http_port=7474, http_port=7474,
user='mock_user', user='mock_user',
password='mock_password', password='mock_password'
) )
self.assertEqual(mock_graph.begin.call_count, 2) self.assertCourseDump(
self.assertEqual(mock_transaction.commit.call_count, 2) mock_graph,
self.assertEqual(mock_transaction.commit.rollback.call_count, 0) number_of_courses=2,
number_commits=2,
number_rollbacks=0,
)
@ddt.ddt @ddt.ddt
...@@ -167,9 +224,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): ...@@ -167,9 +224,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
""" """
Tests the serialize_course method. Tests the serialize_course method.
""" """
nodes, relationships = self.mss.serialize_course( nodes, relationships = self.mss.serialize_course(self.course.id)
self.course.id
)
self.assertEqual(len(nodes), 9) self.assertEqual(len(nodes), 9)
self.assertEqual(len(relationships), 7) self.assertEqual(len(relationships), 7)
...@@ -194,63 +249,68 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): ...@@ -194,63 +249,68 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
coerced_value = self.mss.coerce_types(original_value) coerced_value = self.mss.coerce_types(original_value)
self.assertEqual(coerced_value, coerced_expected) self.assertEqual(coerced_value, coerced_expected)
def test_dump_to_neo4j(self): @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
def test_dump_to_neo4j(self, mock_selector_class):
""" """
Tests the dump_to_neo4j method works against a mock Tests the dump_to_neo4j method works against a mock
py2neo Graph py2neo Graph
""" """
mock_graph = mock.Mock() mock_graph = MockGraph()
mock_transaction = mock.Mock() mock_selector_class.return_value = MockNodeSelector(mock_graph)
mock_graph.begin.return_value = mock_transaction
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph)
self.assertEqual(mock_graph.begin.call_count, 2) self.assertCourseDump(
self.assertEqual(mock_transaction.commit.call_count, 2) mock_graph,
self.assertEqual(mock_transaction.rollback.call_count, 0) number_of_courses=2,
number_commits=2,
number_rollbacks=0,
)
# 7 nodes + 9 relationships from the first course # 9 nodes + 7 relationships from the first course
# 2 nodes and no relationships from the second # 2 nodes and no relationships from the second
self.assertEqual(mock_transaction.create.call_count, 18)
self.assertEqual(mock_transaction.run.call_count, 2) self.assertEqual(len(mock_graph.nodes), 11)
self.assertEqual(len(unsuccessful), 0) self.assertEqual(len(unsuccessful), 0)
self.assertItemsEqual(successful, self.course_strings) self.assertItemsEqual(successful, self.course_strings)
def test_dump_to_neo4j_rollback(self): @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
def test_dump_to_neo4j_rollback(self, mock_selector_class):
""" """
Tests that the the dump_to_neo4j method handles the case where there's Tests that the the dump_to_neo4j method handles the case where there's
an exception trying to write to the neo4j database. an exception trying to write to the neo4j database.
""" """
mock_graph = mock.Mock() mock_graph = MockGraph(transaction_errors=True)
mock_transaction = mock.Mock() mock_selector_class.return_value = MockNodeSelector(mock_graph)
mock_graph.begin.return_value = mock_transaction
mock_transaction.run.side_effect = ValueError('Something went wrong!')
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph)
self.assertEqual(mock_graph.begin.call_count, 2) self.assertCourseDump(
self.assertEqual(mock_transaction.commit.call_count, 0) mock_graph,
self.assertEqual(mock_transaction.rollback.call_count, 2) number_of_courses=0,
number_commits=0,
number_rollbacks=2,
)
self.assertEqual(len(successful), 0) self.assertEqual(len(successful), 0)
self.assertItemsEqual(unsuccessful, self.course_strings) self.assertItemsEqual(unsuccessful, self.course_strings)
@ddt.data( @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
(True, 2), @ddt.data((True, 2), (False, 0))
(False, 0),
)
@ddt.unpack @ddt.unpack
def test_dump_to_neo4j_cache(self, override_cache, expected_number_courses): def test_dump_to_neo4j_cache(self, override_cache, expected_number_courses, mock_selector_class):
""" """
Tests the caching mechanism and override to make sure we only publish Tests the caching mechanism and override to make sure we only publish
recently updated courses. recently updated courses.
""" """
mock_graph = mock.Mock() mock_graph = MockGraph()
mock_selector_class.return_value = MockNodeSelector(mock_graph)
# run once to warm the cache # run once to warm the cache
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) self.mss.dump_courses_to_neo4j(
self.assertEqual(len(successful + unsuccessful), len(self.course_strings)) mock_graph, override_cache=override_cache
)
# when run the second time, only dump courses if the cache override # when run the second time, only dump courses if the cache override
# is enabled # is enabled
...@@ -259,19 +319,21 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): ...@@ -259,19 +319,21 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
) )
self.assertEqual(len(successful + unsuccessful), expected_number_courses) self.assertEqual(len(successful + unsuccessful), expected_number_courses)
def test_dump_to_neo4j_published(self): @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
def test_dump_to_neo4j_published(self, mock_selector_class):
""" """
Tests that we only dump those courses that have been published after Tests that we only dump those courses that have been published after
the last time the command was been run. the last time the command was been run.
""" """
mock_graph = mock.Mock() mock_graph = MockGraph()
mock_selector_class.return_value = MockNodeSelector(mock_graph)
# run once to warm the cache # run once to warm the cache
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph)
self.assertEqual(len(successful + unsuccessful), len(self.course_strings)) self.assertEqual(len(successful + unsuccessful), len(self.course_strings))
# simulate one of the courses being published # simulate one of the courses being published
_listen_for_course_publish(None, self.course.id) listen_for_course_publish(None, self.course.id)
# make sure only the published course was dumped # make sure only the published course was dumped
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph)
...@@ -280,31 +342,24 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): ...@@ -280,31 +342,24 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
self.assertEqual(successful[0], unicode(self.course.id)) self.assertEqual(successful[0], unicode(self.course.id))
@ddt.data( @ddt.data(
(datetime(2016, 3, 30), datetime(2016, 3, 31), True), (six.text_type(datetime(2016, 3, 30)), six.text_type(datetime(2016, 3, 31)), True),
(datetime(2016, 3, 31), datetime(2016, 3, 30), False), (six.text_type(datetime(2016, 3, 31)), six.text_type(datetime(2016, 3, 30)), False),
(datetime(2016, 3, 31), None, False), (six.text_type(datetime(2016, 3, 31)), None, False),
(None, datetime(2016, 3, 30), True), (None, six.text_type(datetime(2016, 3, 30)), True),
(None, None, True), (None, None, True),
) )
@ddt.unpack @ddt.unpack
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.COMMAND_LAST_RUN_CACHE') def test_should_dump_course(self, last_command_run, last_course_published, should_dump):
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.COURSE_LAST_PUBLISHED_CACHE')
def test_should_dump_course(
self,
last_command_run,
last_course_published,
should_dump,
mock_course_last_published_cache,
mock_command_last_run_cache,
):
""" """
Tests whether a course should be dumped given the last time it was Tests whether a course should be dumped given the last time it was
dumped and the last time it was published. dumped and the last time it was published.
""" """
mock_command_last_run_cache.get.return_value = last_command_run mss = ModuleStoreSerializer()
mock_course_last_published_cache.get.return_value = last_course_published mss.get_command_last_run = lambda course_key, graph: last_command_run
mss.get_course_last_published = lambda course_key: last_course_published
mock_course_key = mock.Mock mock_course_key = mock.Mock
mock_graph = mock.Mock()
self.assertEqual( self.assertEqual(
self.mss.should_dump_course(mock_course_key), mss.should_dump_course(mock_course_key, mock_graph),
should_dump should_dump,
) )
"""
Utilities for testing the dump_to_neo4j management command
"""
from __future__ import unicode_literals
from py2neo import Node
class MockGraph(object):
"""
A stubbed out version of py2neo's Graph object, used for testing.
Args:
transaction_errors: a bool for whether transactions should throw
an error.
"""
def __init__(self, transaction_errors=False, **kwargs): # pylint: disable=unused-argument
self.nodes = set()
self.number_commits = 0
self.number_rollbacks = 0
self.transaction_errors = transaction_errors
def begin(self):
"""
A stub of the method that generates transactions
Returns: a MockTransaction object (instead of a py2neo Transaction)
"""
return MockTransaction(self)
class MockTransaction(object):
"""
A stubbed out version of py2neo's Transaction object, used for testing.
"""
def __init__(self, graph):
self.temp = set()
self.graph = graph
def run(self, query):
"""
Deletes all nodes associated with a course. Normally `run` executes
an arbitrary query, but in our code, we only use it to delete nodes
associated with a course.
Args:
query: query string to be executed (in this case, to delete all
nodes associated with a course)
"""
start_string = "WHERE n.course_key='"
start = query.index(start_string) + len(start_string)
query = query[start:]
end = query.find("'")
course_key = query[:end]
self.graph.nodes = set([
node for node in self.graph.nodes if node['course_key'] != course_key
])
def create(self, element):
"""
Adds elements to the transaction's temporary backend storage
Args:
element: a py2neo Node object
"""
if isinstance(element, Node):
self.temp.add(element)
def commit(self):
"""
Takes elements in the transaction's temporary storage and adds them
to the mock graph's storage. Throws an error if the graph's
transaction_errors param is set to True.
"""
if self.graph.transaction_errors:
raise Exception("fake exception while trying to commit")
for element in self.temp:
self.graph.nodes.add(element)
self.temp.clear()
self.graph.number_commits += 1
def rollback(self):
"""
Clears the transactions temporary storage
"""
self.temp.clear()
self.graph.number_rollbacks += 1
class MockNodeSelector(object):
"""
Mocks out py2neo's NodeSelector class. Used to select a node from a graph.
py2neo's NodeSelector expects a real graph object to run queries against,
so, rather than have to mock out MockGraph to accommodate those queries,
it seemed simpler to mock out NodeSelector as well.
"""
def __init__(self, graph):
self.graph = graph
def select(self, label, course_key):
"""
Selects nodes that match a label and course_key
Args:
label: the string of the label we're selecting nodes by
course_key: the string of the course key we're selecting node by
Returns: a MockResult of matching nodes
"""
nodes = []
for node in self.graph.nodes:
if node.has_label(label) and node["course_key"] == course_key:
nodes.append(node)
return MockNodeSelection(nodes)
class MockNodeSelection(list):
"""
Mocks out py2neo's NodeSelection class: this is the type of what
MockNodeSelector's `select` method returns.
"""
def first(self):
"""
Returns: the first element of a list if the list has elements.
Otherwise, None.
"""
return self[0] if self else None
"""
Signal handlers for the CourseGraph application
"""
from django.dispatch.dispatcher import receiver
from xmodule.modulestore.django import SignalHandler
from openedx.core.djangoapps.coursegraph.utils import CourseLastPublishedCache
@receiver(SignalHandler.course_published)
def _listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable=unused-argument
"""
Register when the course was published on a course publish event
"""
CourseLastPublishedCache().set(course_key)
"""
Tests for coursegraph's signal handler on course publish
"""
from __future__ import unicode_literals
from opaque_keys.edx.keys import CourseKey
from openedx.core.djangoapps.coursegraph.signals import _listen_for_course_publish
from openedx.core.djangoapps.coursegraph.utils import CourseLastPublishedCache
from openedx.core.djangolib.testing.utils import CacheIsolationTestCase
class TestCourseGraphSignalHandler(CacheIsolationTestCase):
"""
Tests for the course publish course handler
"""
ENABLED_CACHES = ['default']
def test_cache_set_on_course_publish(self):
"""
Tests that the last published cache is set on course publish
"""
course_key = CourseKey.from_string('course-v1:org+course+run')
last_published_cache = CourseLastPublishedCache()
self.assertIsNone(last_published_cache.get(course_key))
_listen_for_course_publish(None, course_key)
self.assertIsNotNone(last_published_cache.get(course_key))
"""
Helpers for the CourseGraph app
"""
from django.core.cache import cache
from django.utils import timezone
class TimeRecordingCacheBase(object):
"""
A base class for caching the current time for some key.
"""
# cache_prefix should be defined in children classes
cache_prefix = None
_cache = cache
def _key(self, course_key):
"""
Make a cache key from the prefix and a course_key
:param course_key: CourseKey object
:return: a cache key
"""
return self.cache_prefix + unicode(course_key)
def get(self, course_key):
"""
Gets the time value associated with the CourseKey.
:param course_key: a CourseKey object.
:return: the time the key was last set.
"""
return self._cache.get(self._key(course_key))
def set(self, course_key):
"""
Sets the current time for a CourseKey key.
:param course_key: a CourseKey object.
"""
return self._cache.set(self._key(course_key), timezone.now())
class CourseLastPublishedCache(TimeRecordingCacheBase):
"""
Used to record the last time that a course had a publish event run on it.
"""
cache_prefix = u'course_last_published'
class CommandLastRunCache(TimeRecordingCacheBase):
"""
Used to record the last time that the dump_to_neo4j command was run on a
course.
"""
cache_prefix = u'dump_to_neo4j_command_last_run'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment