Commit e798b2e8 by Adam Palay

add precedes relationship to coursegraph (EDUCATOR-484)

parent 8df5c941
...@@ -33,16 +33,25 @@ class ModuleStoreSerializer(object): ...@@ -33,16 +33,25 @@ class ModuleStoreSerializer(object):
one graph per course. one graph per course.
""" """
def __init__(self, courses=None, skip=None): def __init__(self, course_keys):
self.course_keys = course_keys
@classmethod
def create(cls, courses=None, skip=None):
""" """
Sets the object's course_keys attribute from the `courses` parameter. Sets the object's course_keys attribute from the `courses` parameter.
If that parameter isn't furnished, loads all course_keys from the If that parameter isn't furnished, loads all course_keys from the
modulestore. modulestore.
Filters out course_keys in the `skip` parameter, if provided. Filters out course_keys in the `skip` parameter, if provided.
Args:
Arguments:
courses: A list of string serializations of course keys. courses: A list of string serializations of course keys.
For example, ["course-v1:org+course+run"]. For example, ["course-v1:org+course+run"].
skip: Also a list of string serializations of course keys. skip: Also a list of string serializations of course keys.
Returns:
a ModulestoreSerializer instance
""" """
if courses: if courses:
course_keys = [CourseKey.from_string(course.strip()) for course in courses] course_keys = [CourseKey.from_string(course.strip()) for course in courses]
...@@ -51,14 +60,14 @@ class ModuleStoreSerializer(object): ...@@ -51,14 +60,14 @@ class ModuleStoreSerializer(object):
course.id for course in modulestore().get_course_summaries() course.id for course in modulestore().get_course_summaries()
] ]
if skip is not None: if skip is not None:
skip_keys = [CourseKey.from_string(course.strip()) for course in skip] skip_keys = set([CourseKey.from_string(course.strip()) for course in skip])
course_keys = [course_key for course_key in course_keys if course_key not in skip_keys] course_keys = [course_key for course_key in course_keys if course_key not in skip_keys]
self.course_keys = course_keys return cls(course_keys)
@staticmethod @staticmethod
def serialize_item(item): def serialize_item(item):
""" """
Args: Arguments:
item: an XBlock item: an XBlock
Returns: Returns:
...@@ -100,7 +109,7 @@ class ModuleStoreSerializer(object): ...@@ -100,7 +109,7 @@ class ModuleStoreSerializer(object):
def serialize_course(self, course_id): def serialize_course(self, course_id):
""" """
Serializes a course into py2neo Nodes and Relationships Serializes a course into py2neo Nodes and Relationships
Args: Arguments:
course_id: CourseKey of the course we want to serialize course_id: CourseKey of the course we want to serialize
Returns: Returns:
...@@ -125,6 +134,7 @@ class ModuleStoreSerializer(object): ...@@ -125,6 +134,7 @@ class ModuleStoreSerializer(object):
# create relationships # create relationships
relationships = [] relationships = []
for item in items: for item in items:
previous_child_node = None
for index, child_loc in enumerate(item.get_children()): for index, child_loc in enumerate(item.get_children()):
parent_node = location_to_node.get(item.location) parent_node = location_to_node.get(item.location)
child_node = location_to_node.get(child_loc.location) child_node = location_to_node.get(child_loc.location)
...@@ -133,13 +143,20 @@ class ModuleStoreSerializer(object): ...@@ -133,13 +143,20 @@ class ModuleStoreSerializer(object):
relationship = Relationship(parent_node, "PARENT_OF", child_node) relationship = Relationship(parent_node, "PARENT_OF", child_node)
relationships.append(relationship) relationships.append(relationship)
if previous_child_node:
ordering_relationship = Relationship(
previous_child_node, "PRECEDES", child_node
)
relationships.append(ordering_relationship)
previous_child_node = child_node
nodes = location_to_node.values() nodes = location_to_node.values()
return nodes, relationships return nodes, relationships
@staticmethod @staticmethod
def coerce_types(value): def coerce_types(value):
""" """
Args: Arguments:
value: the value of an xblock's field value: the value of an xblock's field
Returns: either the value, a text version of the value, or, if the Returns: either the value, a text version of the value, or, if the
...@@ -159,7 +176,7 @@ class ModuleStoreSerializer(object): ...@@ -159,7 +176,7 @@ class ModuleStoreSerializer(object):
@staticmethod @staticmethod
def add_to_transaction(neo4j_entities, transaction): def add_to_transaction(neo4j_entities, transaction):
""" """
Args: Arguments:
neo4j_entities: a list of Nodes or Relationships neo4j_entities: a list of Nodes or Relationships
transaction: a neo4j transaction transaction: a neo4j transaction
""" """
...@@ -170,7 +187,7 @@ class ModuleStoreSerializer(object): ...@@ -170,7 +187,7 @@ class ModuleStoreSerializer(object):
def get_command_last_run(course_key, graph): def get_command_last_run(course_key, graph):
""" """
This information is stored on the course node of a course in neo4j This information is stored on the course node of a course in neo4j
Args: Arguments:
course_key: a CourseKey course_key: a CourseKey
graph: a py2neo Graph graph: a py2neo Graph
...@@ -195,7 +212,7 @@ class ModuleStoreSerializer(object): ...@@ -195,7 +212,7 @@ class ModuleStoreSerializer(object):
""" """
We use the CourseStructure table to get when this course was last We use the CourseStructure table to get when this course was last
published. published.
Args: Arguments:
course_key: a CourseKey course_key: a CourseKey
Returns: The datetime the course was last published at, converted into Returns: The datetime the course was last published at, converted into
...@@ -214,7 +231,7 @@ class ModuleStoreSerializer(object): ...@@ -214,7 +231,7 @@ class ModuleStoreSerializer(object):
""" """
Only dump the course if it's been changed since the last time it's been Only dump the course if it's been changed since the last time it's been
dumped. dumped.
Args: Arguments:
course_key: a CourseKey object. course_key: a CourseKey object.
graph: a py2neo Graph object. graph: a py2neo Graph object.
...@@ -240,11 +257,50 @@ class ModuleStoreSerializer(object): ...@@ -240,11 +257,50 @@ class ModuleStoreSerializer(object):
# before the course's last published event # before the course's last published event
return last_this_command_was_run < course_last_published_date return last_this_command_was_run < course_last_published_date
def dump_course_to_neo4j(self, course_key, graph):
"""
Serializes a course and writes it to neo4j.
Arguments:
course_key: course key for the course to be exported
graph: py2neo graph object
"""
nodes, relationships = self.serialize_course(course_key)
log.info(
"%d nodes and %d relationships in %s",
len(nodes),
len(relationships),
course_key,
)
transaction = graph.begin()
course_string = six.text_type(course_key)
try:
# first, delete existing course
transaction.run(
"MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format(
course_string
)
)
# now, re-add it
self.add_to_transaction(nodes, transaction)
self.add_to_transaction(relationships, transaction)
transaction.commit()
except Exception: # pylint: disable=broad-except
log.exception(
"Error trying to dump course %s to neo4j, rolling back",
course_string
)
transaction.rollback()
def dump_courses_to_neo4j(self, graph, override_cache=False): def dump_courses_to_neo4j(self, graph, override_cache=False):
""" """
Method that iterates through a list of courses in a modulestore, Method that iterates through a list of courses in a modulestore,
serializes them, then writes them to neo4j serializes them, then submits tasks to write them to neo4j.
Args: Arguments:
graph: py2neo graph object graph: py2neo graph object
override_cache: serialize the courses even if they'be been recently override_cache: serialize the courses even if they'be been recently
serialized serialized
...@@ -255,8 +311,8 @@ class ModuleStoreSerializer(object): ...@@ -255,8 +311,8 @@ class ModuleStoreSerializer(object):
total_number_of_courses = len(self.course_keys) total_number_of_courses = len(self.course_keys)
successful_courses = [] submitted_courses = []
unsuccessful_courses = [] skipped_courses = []
for index, course_key in enumerate(self.course_keys): for index, course_key in enumerate(self.course_keys):
# first, clear the request cache to prevent memory leaks # first, clear the request cache to prevent memory leaks
...@@ -271,43 +327,13 @@ class ModuleStoreSerializer(object): ...@@ -271,43 +327,13 @@ class ModuleStoreSerializer(object):
if not (override_cache or self.should_dump_course(course_key, graph)): if not (override_cache or self.should_dump_course(course_key, graph)):
log.info("skipping dumping %s, since it hasn't changed", course_key) log.info("skipping dumping %s, since it hasn't changed", course_key)
continue skipped_courses.append(unicode(course_key))
nodes, relationships = self.serialize_course(course_key)
log.info(
"%d nodes and %d relationships in %s",
len(nodes),
len(relationships),
course_key,
)
transaction = graph.begin()
course_string = six.text_type(course_key)
try:
# first, delete existing course
transaction.run(
"MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format(
course_string
)
)
# now, re-add it
self.add_to_transaction(nodes, transaction)
self.add_to_transaction(relationships, transaction)
transaction.commit()
except Exception: # pylint: disable=broad-except
log.exception(
"Error trying to dump course %s to neo4j, rolling back",
course_string
)
transaction.rollback()
unsuccessful_courses.append(course_string)
else: else:
successful_courses.append(course_string) self.dump_course_to_neo4j(course_key, graph)
submitted_courses.append(unicode(course_key))
return successful_courses, unsuccessful_courses return submitted_courses, skipped_courses
class Command(BaseCommand): class Command(BaseCommand):
...@@ -374,28 +400,24 @@ class Command(BaseCommand): ...@@ -374,28 +400,24 @@ class Command(BaseCommand):
secure=secure, secure=secure,
) )
mss = ModuleStoreSerializer(options['courses'], options['skip']) mss = ModuleStoreSerializer.create(options['courses'], options['skip'])
successful_courses, unsuccessful_courses = mss.dump_courses_to_neo4j( submitted_courses, skipped_courses = mss.dump_courses_to_neo4j(
graph, override_cache=options['override'] graph, override_cache=options['override']
) )
if not successful_courses and not unsuccessful_courses: log.info(
"%d courses submitted for export to neo4j. %d courses skipped.",
len(submitted_courses),
len(skipped_courses),
)
if not submitted_courses:
print("No courses exported to neo4j at all!") print("No courses exported to neo4j at all!")
return return
if successful_courses: if submitted_courses:
print( print(
"These courses exported to neo4j successfully:\n\t" + "These courses were submitted for export to neo4j successfully:\n\t" +
"\n\t".join(successful_courses) "\n\t".join(submitted_courses)
) )
else:
print("No courses exported to neo4j successfully.")
if unsuccessful_courses:
print(
"These courses did not export to neo4j successfully:\n\t" +
"\n\t".join(unsuccessful_courses)
)
else:
print("All courses exported to neo4j successfully.")
...@@ -31,6 +31,26 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase): ...@@ -31,6 +31,26 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
""" """
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
"""
Creates two courses; one that's just a course module, and one that
looks like:
course
|
chapter
|
sequential
|
vertical
/ | \ \
/ | \ ----------
/ | \ \
/ | --- \
/ | \ \
html -> problem -> video -> video2
The side-pointing arrows (->) are PRECEDES relationships; the more
vertical lines are PARENT_OF relationships.
"""
super(TestDumpToNeo4jCommandBase, cls).setUpClass() super(TestDumpToNeo4jCommandBase, cls).setUpClass()
cls.course = CourseFactory.create() cls.course = CourseFactory.create()
cls.chapter = ItemFactory.create(parent=cls.course, category='chapter') cls.chapter = ItemFactory.create(parent=cls.course, category='chapter')
...@@ -51,7 +71,7 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase): ...@@ -51,7 +71,7 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
Replaces the py2neo Graph object with a MockGraph; similarly replaces Replaces the py2neo Graph object with a MockGraph; similarly replaces
NodeSelector with MockNodeSelector. NodeSelector with MockNodeSelector.
Args: Arguments:
mock_selector_class: a mocked NodeSelector class mock_selector_class: a mocked NodeSelector class
mock_graph_class: a mocked Graph class mock_graph_class: a mocked Graph class
transaction_errors: a bool for whether we should get errors transaction_errors: a bool for whether we should get errors
...@@ -71,7 +91,7 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase): ...@@ -71,7 +91,7 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
""" """
Asserts that we have the expected number of courses, commits, and Asserts that we have the expected number of courses, commits, and
rollbacks after we dump the modulestore to neo4j rollbacks after we dump the modulestore to neo4j
Args: Arguments:
mock_graph: a MockGraph backend mock_graph: a MockGraph backend
number_of_courses: number of courses we expect to find number_of_courses: number of courses we expect to find
number_commits: number of commits we expect against the graph number_commits: number of commits we expect against the graph
...@@ -203,7 +223,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): ...@@ -203,7 +223,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
def setUpClass(cls): def setUpClass(cls):
"""Any ModuleStore course/content operations can go here.""" """Any ModuleStore course/content operations can go here."""
super(TestModuleStoreSerializer, cls).setUpClass() super(TestModuleStoreSerializer, cls).setUpClass()
cls.mss = ModuleStoreSerializer() cls.mss = ModuleStoreSerializer.create()
def test_serialize_item(self): def test_serialize_item(self):
""" """
...@@ -228,7 +248,81 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): ...@@ -228,7 +248,81 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
""" """
nodes, relationships = self.mss.serialize_course(self.course.id) nodes, relationships = self.mss.serialize_course(self.course.id)
self.assertEqual(len(nodes), 9) self.assertEqual(len(nodes), 9)
self.assertEqual(len(relationships), 7) # the course has 7 "PARENT_OF" relationships and 3 "PRECEDES"
self.assertEqual(len(relationships), 10)
@staticmethod
def _extract_relationship_pairs(relationships, relationship_type):
"""
Extracts a list of XBlock location tuples from a list of Relationships.
Arguments:
relationships: list of py2neo `Relationship` objects
relationship_type: the type of relationship to filter `relationships`
by.
Returns:
List of tuples of the locations of of the relationships'
constituent nodes.
"""
relationship_pairs = [
tuple([node["location"] for node in rel.nodes()])
for rel in relationships if rel.type() == relationship_type
]
return relationship_pairs
@staticmethod
def _extract_location_pair(xblock1, xblock2):
"""
Returns a tuple of locations from two XBlocks.
Arguments:
xblock1: an xblock
xblock2: also an xblock
Returns:
A tuple of the string representations of those XBlocks' locations.
"""
return (unicode(xblock1.location), unicode(xblock2.location))
def assertBlockPairIsRelationship(self, xblock1, xblock2, relationships, relationship_type):
"""
Helper assertion that a pair of xblocks have a certain kind of
relationship with one another.
"""
relationship_pairs = self._extract_relationship_pairs(relationships, relationship_type)
location_pair = self._extract_location_pair(xblock1, xblock2)
self.assertIn(location_pair, relationship_pairs)
def assertBlockPairIsNotRelationship(self, xblock1, xblock2, relationships, relationship_type):
"""
The opposite of `assertBlockPairIsRelationship`: asserts that a pair
of xblocks do NOT have a certain kind of relationship.
"""
relationship_pairs = self._extract_relationship_pairs(relationships, relationship_type)
location_pair = self._extract_location_pair(xblock1, xblock2)
self.assertNotIn(location_pair, relationship_pairs)
def test_precedes_relationship(self):
"""
Tests that two nodes that should have a precedes relationship have it.
"""
__, relationships = self.mss.serialize_course(self.course.id)
self.assertBlockPairIsRelationship(self.video, self.video2, relationships, "PRECEDES")
self.assertBlockPairIsNotRelationship(self.video2, self.video, relationships, "PRECEDES")
self.assertBlockPairIsNotRelationship(self.vertical, self.video, relationships, "PRECEDES")
self.assertBlockPairIsNotRelationship(self.html, self.video, relationships, "PRECEDES")
def test_parent_relationship(self):
"""
Test that two nodes that should have a parent_of relationship have it.
"""
__, relationships = self.mss.serialize_course(self.course.id)
self.assertBlockPairIsRelationship(self.vertical, self.video, relationships, "PARENT_OF")
self.assertBlockPairIsRelationship(self.vertical, self.html, relationships, "PARENT_OF")
self.assertBlockPairIsRelationship(self.course, self.chapter, relationships, "PARENT_OF")
self.assertBlockPairIsNotRelationship(self.course, self.video, relationships, "PARENT_OF")
self.assertBlockPairIsNotRelationship(self.video, self.vertical, relationships, "PARENT_OF")
self.assertBlockPairIsNotRelationship(self.video, self.html, relationships, "PARENT_OF")
def test_nodes_have_indices(self): def test_nodes_have_indices(self):
""" """
...@@ -277,7 +371,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): ...@@ -277,7 +371,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
mock_graph = MockGraph() mock_graph = MockGraph()
mock_selector_class.return_value = MockNodeSelector(mock_graph) mock_selector_class.return_value = MockNodeSelector(mock_graph)
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) submitted, skipped = self.mss.dump_courses_to_neo4j(mock_graph)
self.assertCourseDump( self.assertCourseDump(
mock_graph, mock_graph,
...@@ -290,9 +384,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): ...@@ -290,9 +384,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
# 2 nodes and no relationships from the second # 2 nodes and no relationships from the second
self.assertEqual(len(mock_graph.nodes), 11) self.assertEqual(len(mock_graph.nodes), 11)
self.assertItemsEqual(submitted, self.course_strings)
self.assertEqual(len(unsuccessful), 0)
self.assertItemsEqual(successful, self.course_strings)
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector') @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
def test_dump_to_neo4j_rollback(self, mock_selector_class): def test_dump_to_neo4j_rollback(self, mock_selector_class):
...@@ -303,7 +395,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): ...@@ -303,7 +395,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
mock_graph = MockGraph(transaction_errors=True) mock_graph = MockGraph(transaction_errors=True)
mock_selector_class.return_value = MockNodeSelector(mock_graph) mock_selector_class.return_value = MockNodeSelector(mock_graph)
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) submitted, skipped = self.mss.dump_courses_to_neo4j(mock_graph)
self.assertCourseDump( self.assertCourseDump(
mock_graph, mock_graph,
...@@ -312,8 +404,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): ...@@ -312,8 +404,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
number_rollbacks=2, number_rollbacks=2,
) )
self.assertEqual(len(successful), 0) self.assertItemsEqual(submitted, self.course_strings)
self.assertItemsEqual(unsuccessful, self.course_strings)
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector') @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
@ddt.data((True, 2), (False, 0)) @ddt.data((True, 2), (False, 0))
...@@ -333,10 +424,10 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): ...@@ -333,10 +424,10 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
# when run the second time, only dump courses if the cache override # when run the second time, only dump courses if the cache override
# is enabled # is enabled
successful, unsuccessful = self.mss.dump_courses_to_neo4j( submitted, __ = self.mss.dump_courses_to_neo4j(
mock_graph, override_cache=override_cache mock_graph, override_cache=override_cache
) )
self.assertEqual(len(successful + unsuccessful), expected_number_courses) self.assertEqual(len(submitted), expected_number_courses)
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector') @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
def test_dump_to_neo4j_published(self, mock_selector_class): def test_dump_to_neo4j_published(self, mock_selector_class):
...@@ -348,17 +439,16 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): ...@@ -348,17 +439,16 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
mock_selector_class.return_value = MockNodeSelector(mock_graph) mock_selector_class.return_value = MockNodeSelector(mock_graph)
# run once to warm the cache # run once to warm the cache
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) submitted, skipped = self.mss.dump_courses_to_neo4j(mock_graph)
self.assertEqual(len(successful + unsuccessful), len(self.course_strings)) self.assertEqual(len(submitted), len(self.course_strings))
# simulate one of the courses being published # simulate one of the courses being published
listen_for_course_publish(None, self.course.id) listen_for_course_publish(None, self.course.id)
# make sure only the published course was dumped # make sure only the published course was dumped
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) submitted, __ = self.mss.dump_courses_to_neo4j(mock_graph)
self.assertEqual(len(unsuccessful), 0) self.assertEqual(len(submitted), 1)
self.assertEqual(len(successful), 1) self.assertEqual(submitted[0], unicode(self.course.id))
self.assertEqual(successful[0], unicode(self.course.id))
@ddt.data( @ddt.data(
(six.text_type(datetime(2016, 3, 30)), six.text_type(datetime(2016, 3, 31)), True), (six.text_type(datetime(2016, 3, 30)), six.text_type(datetime(2016, 3, 31)), True),
...@@ -373,7 +463,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): ...@@ -373,7 +463,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
Tests whether a course should be dumped given the last time it was Tests whether a course should be dumped given the last time it was
dumped and the last time it was published. dumped and the last time it was published.
""" """
mss = ModuleStoreSerializer() mss = ModuleStoreSerializer.create()
mss.get_command_last_run = lambda course_key, graph: last_command_run mss.get_command_last_run = lambda course_key, graph: last_command_run
mss.get_course_last_published = lambda course_key: last_course_published mss.get_course_last_published = lambda course_key: last_course_published
mock_course_key = mock.Mock mock_course_key = mock.Mock
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment