Commit e798b2e8 by Adam Palay

add precedes relationship to coursegraph (EDUCATOR-484)

parent 8df5c941
......@@ -33,16 +33,25 @@ class ModuleStoreSerializer(object):
one graph per course.
"""
def __init__(self, courses=None, skip=None):
def __init__(self, course_keys):
self.course_keys = course_keys
@classmethod
def create(cls, courses=None, skip=None):
"""
Sets the object's course_keys attribute from the `courses` parameter.
If that parameter isn't furnished, loads all course_keys from the
modulestore.
Filters out course_keys in the `skip` parameter, if provided.
Args:
Arguments:
courses: A list of string serializations of course keys.
For example, ["course-v1:org+course+run"].
skip: Also a list of string serializations of course keys.
Returns:
a ModulestoreSerializer instance
"""
if courses:
course_keys = [CourseKey.from_string(course.strip()) for course in courses]
......@@ -51,14 +60,14 @@ class ModuleStoreSerializer(object):
course.id for course in modulestore().get_course_summaries()
]
if skip is not None:
skip_keys = [CourseKey.from_string(course.strip()) for course in skip]
skip_keys = set([CourseKey.from_string(course.strip()) for course in skip])
course_keys = [course_key for course_key in course_keys if course_key not in skip_keys]
self.course_keys = course_keys
return cls(course_keys)
@staticmethod
def serialize_item(item):
"""
Args:
Arguments:
item: an XBlock
Returns:
......@@ -100,7 +109,7 @@ class ModuleStoreSerializer(object):
def serialize_course(self, course_id):
"""
Serializes a course into py2neo Nodes and Relationships
Args:
Arguments:
course_id: CourseKey of the course we want to serialize
Returns:
......@@ -125,6 +134,7 @@ class ModuleStoreSerializer(object):
# create relationships
relationships = []
for item in items:
previous_child_node = None
for index, child_loc in enumerate(item.get_children()):
parent_node = location_to_node.get(item.location)
child_node = location_to_node.get(child_loc.location)
......@@ -133,13 +143,20 @@ class ModuleStoreSerializer(object):
relationship = Relationship(parent_node, "PARENT_OF", child_node)
relationships.append(relationship)
if previous_child_node:
ordering_relationship = Relationship(
previous_child_node, "PRECEDES", child_node
)
relationships.append(ordering_relationship)
previous_child_node = child_node
nodes = location_to_node.values()
return nodes, relationships
@staticmethod
def coerce_types(value):
"""
Args:
Arguments:
value: the value of an xblock's field
Returns: either the value, a text version of the value, or, if the
......@@ -159,7 +176,7 @@ class ModuleStoreSerializer(object):
@staticmethod
def add_to_transaction(neo4j_entities, transaction):
"""
Args:
Arguments:
neo4j_entities: a list of Nodes or Relationships
transaction: a neo4j transaction
"""
......@@ -170,7 +187,7 @@ class ModuleStoreSerializer(object):
def get_command_last_run(course_key, graph):
"""
This information is stored on the course node of a course in neo4j
Args:
Arguments:
course_key: a CourseKey
graph: a py2neo Graph
......@@ -195,7 +212,7 @@ class ModuleStoreSerializer(object):
"""
We use the CourseStructure table to get when this course was last
published.
Args:
Arguments:
course_key: a CourseKey
Returns: The datetime the course was last published at, converted into
......@@ -214,7 +231,7 @@ class ModuleStoreSerializer(object):
"""
Only dump the course if it's been changed since the last time it's been
dumped.
Args:
Arguments:
course_key: a CourseKey object.
graph: a py2neo Graph object.
......@@ -240,39 +257,15 @@ class ModuleStoreSerializer(object):
# before the course's last published event
return last_this_command_was_run < course_last_published_date
def dump_courses_to_neo4j(self, graph, override_cache=False):
def dump_course_to_neo4j(self, course_key, graph):
"""
Method that iterates through a list of courses in a modulestore,
serializes them, then writes them to neo4j
Args:
graph: py2neo graph object
override_cache: serialize the courses even if they'be been recently
serialized
Serializes a course and writes it to neo4j.
Returns: two lists--one of the courses that were successfully written
to neo4j and one of courses that were not.
Arguments:
course_key: course key for the course to be exported
graph: py2neo graph object
"""
total_number_of_courses = len(self.course_keys)
successful_courses = []
unsuccessful_courses = []
for index, course_key in enumerate(self.course_keys):
# first, clear the request cache to prevent memory leaks
RequestCache.clear_request_cache()
log.info(
"Now exporting %s to neo4j: course %d of %d total courses",
course_key,
index + 1,
total_number_of_courses,
)
if not (override_cache or self.should_dump_course(course_key, graph)):
log.info("skipping dumping %s, since it hasn't changed", course_key)
continue
nodes, relationships = self.serialize_course(course_key)
log.info(
"%d nodes and %d relationships in %s",
......@@ -302,12 +295,45 @@ class ModuleStoreSerializer(object):
course_string
)
transaction.rollback()
unsuccessful_courses.append(course_string)
def dump_courses_to_neo4j(self, graph, override_cache=False):
"""
Method that iterates through a list of courses in a modulestore,
serializes them, then submits tasks to write them to neo4j.
Arguments:
graph: py2neo graph object
override_cache: serialize the courses even if they'be been recently
serialized
Returns: two lists--one of the courses that were successfully written
to neo4j and one of courses that were not.
"""
total_number_of_courses = len(self.course_keys)
submitted_courses = []
skipped_courses = []
for index, course_key in enumerate(self.course_keys):
# first, clear the request cache to prevent memory leaks
RequestCache.clear_request_cache()
log.info(
"Now exporting %s to neo4j: course %d of %d total courses",
course_key,
index + 1,
total_number_of_courses,
)
if not (override_cache or self.should_dump_course(course_key, graph)):
log.info("skipping dumping %s, since it hasn't changed", course_key)
skipped_courses.append(unicode(course_key))
else:
successful_courses.append(course_string)
self.dump_course_to_neo4j(course_key, graph)
submitted_courses.append(unicode(course_key))
return successful_courses, unsuccessful_courses
return submitted_courses, skipped_courses
class Command(BaseCommand):
......@@ -374,28 +400,24 @@ class Command(BaseCommand):
secure=secure,
)
mss = ModuleStoreSerializer(options['courses'], options['skip'])
mss = ModuleStoreSerializer.create(options['courses'], options['skip'])
successful_courses, unsuccessful_courses = mss.dump_courses_to_neo4j(
submitted_courses, skipped_courses = mss.dump_courses_to_neo4j(
graph, override_cache=options['override']
)
if not successful_courses and not unsuccessful_courses:
log.info(
"%d courses submitted for export to neo4j. %d courses skipped.",
len(submitted_courses),
len(skipped_courses),
)
if not submitted_courses:
print("No courses exported to neo4j at all!")
return
if successful_courses:
print(
"These courses exported to neo4j successfully:\n\t" +
"\n\t".join(successful_courses)
)
else:
print("No courses exported to neo4j successfully.")
if unsuccessful_courses:
if submitted_courses:
print(
"These courses did not export to neo4j successfully:\n\t" +
"\n\t".join(unsuccessful_courses)
"These courses were submitted for export to neo4j successfully:\n\t" +
"\n\t".join(submitted_courses)
)
else:
print("All courses exported to neo4j successfully.")
......@@ -31,6 +31,26 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
"""
@classmethod
def setUpClass(cls):
"""
Creates two courses; one that's just a course module, and one that
looks like:
course
|
chapter
|
sequential
|
vertical
/ | \ \
/ | \ ----------
/ | \ \
/ | --- \
/ | \ \
html -> problem -> video -> video2
The side-pointing arrows (->) are PRECEDES relationships; the more
vertical lines are PARENT_OF relationships.
"""
super(TestDumpToNeo4jCommandBase, cls).setUpClass()
cls.course = CourseFactory.create()
cls.chapter = ItemFactory.create(parent=cls.course, category='chapter')
......@@ -51,7 +71,7 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
Replaces the py2neo Graph object with a MockGraph; similarly replaces
NodeSelector with MockNodeSelector.
Args:
Arguments:
mock_selector_class: a mocked NodeSelector class
mock_graph_class: a mocked Graph class
transaction_errors: a bool for whether we should get errors
......@@ -71,7 +91,7 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
"""
Asserts that we have the expected number of courses, commits, and
rollbacks after we dump the modulestore to neo4j
Args:
Arguments:
mock_graph: a MockGraph backend
number_of_courses: number of courses we expect to find
number_commits: number of commits we expect against the graph
......@@ -203,7 +223,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
def setUpClass(cls):
"""Any ModuleStore course/content operations can go here."""
super(TestModuleStoreSerializer, cls).setUpClass()
cls.mss = ModuleStoreSerializer()
cls.mss = ModuleStoreSerializer.create()
def test_serialize_item(self):
"""
......@@ -228,7 +248,81 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
"""
nodes, relationships = self.mss.serialize_course(self.course.id)
self.assertEqual(len(nodes), 9)
self.assertEqual(len(relationships), 7)
# the course has 7 "PARENT_OF" relationships and 3 "PRECEDES"
self.assertEqual(len(relationships), 10)
@staticmethod
def _extract_relationship_pairs(relationships, relationship_type):
"""
Extracts a list of XBlock location tuples from a list of Relationships.
Arguments:
relationships: list of py2neo `Relationship` objects
relationship_type: the type of relationship to filter `relationships`
by.
Returns:
List of tuples of the locations of of the relationships'
constituent nodes.
"""
relationship_pairs = [
tuple([node["location"] for node in rel.nodes()])
for rel in relationships if rel.type() == relationship_type
]
return relationship_pairs
@staticmethod
def _extract_location_pair(xblock1, xblock2):
"""
Returns a tuple of locations from two XBlocks.
Arguments:
xblock1: an xblock
xblock2: also an xblock
Returns:
A tuple of the string representations of those XBlocks' locations.
"""
return (unicode(xblock1.location), unicode(xblock2.location))
def assertBlockPairIsRelationship(self, xblock1, xblock2, relationships, relationship_type):
"""
Helper assertion that a pair of xblocks have a certain kind of
relationship with one another.
"""
relationship_pairs = self._extract_relationship_pairs(relationships, relationship_type)
location_pair = self._extract_location_pair(xblock1, xblock2)
self.assertIn(location_pair, relationship_pairs)
def assertBlockPairIsNotRelationship(self, xblock1, xblock2, relationships, relationship_type):
"""
The opposite of `assertBlockPairIsRelationship`: asserts that a pair
of xblocks do NOT have a certain kind of relationship.
"""
relationship_pairs = self._extract_relationship_pairs(relationships, relationship_type)
location_pair = self._extract_location_pair(xblock1, xblock2)
self.assertNotIn(location_pair, relationship_pairs)
def test_precedes_relationship(self):
"""
Tests that two nodes that should have a precedes relationship have it.
"""
__, relationships = self.mss.serialize_course(self.course.id)
self.assertBlockPairIsRelationship(self.video, self.video2, relationships, "PRECEDES")
self.assertBlockPairIsNotRelationship(self.video2, self.video, relationships, "PRECEDES")
self.assertBlockPairIsNotRelationship(self.vertical, self.video, relationships, "PRECEDES")
self.assertBlockPairIsNotRelationship(self.html, self.video, relationships, "PRECEDES")
def test_parent_relationship(self):
"""
Test that two nodes that should have a parent_of relationship have it.
"""
__, relationships = self.mss.serialize_course(self.course.id)
self.assertBlockPairIsRelationship(self.vertical, self.video, relationships, "PARENT_OF")
self.assertBlockPairIsRelationship(self.vertical, self.html, relationships, "PARENT_OF")
self.assertBlockPairIsRelationship(self.course, self.chapter, relationships, "PARENT_OF")
self.assertBlockPairIsNotRelationship(self.course, self.video, relationships, "PARENT_OF")
self.assertBlockPairIsNotRelationship(self.video, self.vertical, relationships, "PARENT_OF")
self.assertBlockPairIsNotRelationship(self.video, self.html, relationships, "PARENT_OF")
def test_nodes_have_indices(self):
"""
......@@ -277,7 +371,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
mock_graph = MockGraph()
mock_selector_class.return_value = MockNodeSelector(mock_graph)
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph)
submitted, skipped = self.mss.dump_courses_to_neo4j(mock_graph)
self.assertCourseDump(
mock_graph,
......@@ -290,9 +384,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
# 2 nodes and no relationships from the second
self.assertEqual(len(mock_graph.nodes), 11)
self.assertEqual(len(unsuccessful), 0)
self.assertItemsEqual(successful, self.course_strings)
self.assertItemsEqual(submitted, self.course_strings)
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
def test_dump_to_neo4j_rollback(self, mock_selector_class):
......@@ -303,7 +395,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
mock_graph = MockGraph(transaction_errors=True)
mock_selector_class.return_value = MockNodeSelector(mock_graph)
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph)
submitted, skipped = self.mss.dump_courses_to_neo4j(mock_graph)
self.assertCourseDump(
mock_graph,
......@@ -312,8 +404,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
number_rollbacks=2,
)
self.assertEqual(len(successful), 0)
self.assertItemsEqual(unsuccessful, self.course_strings)
self.assertItemsEqual(submitted, self.course_strings)
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
@ddt.data((True, 2), (False, 0))
......@@ -333,10 +424,10 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
# when run the second time, only dump courses if the cache override
# is enabled
successful, unsuccessful = self.mss.dump_courses_to_neo4j(
submitted, __ = self.mss.dump_courses_to_neo4j(
mock_graph, override_cache=override_cache
)
self.assertEqual(len(successful + unsuccessful), expected_number_courses)
self.assertEqual(len(submitted), expected_number_courses)
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
def test_dump_to_neo4j_published(self, mock_selector_class):
......@@ -348,17 +439,16 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
mock_selector_class.return_value = MockNodeSelector(mock_graph)
# run once to warm the cache
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph)
self.assertEqual(len(successful + unsuccessful), len(self.course_strings))
submitted, skipped = self.mss.dump_courses_to_neo4j(mock_graph)
self.assertEqual(len(submitted), len(self.course_strings))
# simulate one of the courses being published
listen_for_course_publish(None, self.course.id)
# make sure only the published course was dumped
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph)
self.assertEqual(len(unsuccessful), 0)
self.assertEqual(len(successful), 1)
self.assertEqual(successful[0], unicode(self.course.id))
submitted, __ = self.mss.dump_courses_to_neo4j(mock_graph)
self.assertEqual(len(submitted), 1)
self.assertEqual(submitted[0], unicode(self.course.id))
@ddt.data(
(six.text_type(datetime(2016, 3, 30)), six.text_type(datetime(2016, 3, 31)), True),
......@@ -373,7 +463,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
Tests whether a course should be dumped given the last time it was
dumped and the last time it was published.
"""
mss = ModuleStoreSerializer()
mss = ModuleStoreSerializer.create()
mss.get_command_last_run = lambda course_key, graph: last_command_run
mss.get_course_last_published = lambda course_key: last_course_published
mock_course_key = mock.Mock
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment