Commit e798b2e8 by Adam Palay

add precedes relationship to coursegraph (EDUCATOR-484)

parent 8df5c941
...@@ -33,16 +33,25 @@ class ModuleStoreSerializer(object): ...@@ -33,16 +33,25 @@ class ModuleStoreSerializer(object):
one graph per course. one graph per course.
""" """
def __init__(self, courses=None, skip=None): def __init__(self, course_keys):
self.course_keys = course_keys
@classmethod
def create(cls, courses=None, skip=None):
""" """
Sets the object's course_keys attribute from the `courses` parameter. Sets the object's course_keys attribute from the `courses` parameter.
If that parameter isn't furnished, loads all course_keys from the If that parameter isn't furnished, loads all course_keys from the
modulestore. modulestore.
Filters out course_keys in the `skip` parameter, if provided. Filters out course_keys in the `skip` parameter, if provided.
Args:
Arguments:
courses: A list of string serializations of course keys. courses: A list of string serializations of course keys.
For example, ["course-v1:org+course+run"]. For example, ["course-v1:org+course+run"].
skip: Also a list of string serializations of course keys. skip: Also a list of string serializations of course keys.
Returns:
a ModulestoreSerializer instance
""" """
if courses: if courses:
course_keys = [CourseKey.from_string(course.strip()) for course in courses] course_keys = [CourseKey.from_string(course.strip()) for course in courses]
...@@ -51,14 +60,14 @@ class ModuleStoreSerializer(object): ...@@ -51,14 +60,14 @@ class ModuleStoreSerializer(object):
course.id for course in modulestore().get_course_summaries() course.id for course in modulestore().get_course_summaries()
] ]
if skip is not None: if skip is not None:
skip_keys = [CourseKey.from_string(course.strip()) for course in skip] skip_keys = set([CourseKey.from_string(course.strip()) for course in skip])
course_keys = [course_key for course_key in course_keys if course_key not in skip_keys] course_keys = [course_key for course_key in course_keys if course_key not in skip_keys]
self.course_keys = course_keys return cls(course_keys)
@staticmethod @staticmethod
def serialize_item(item): def serialize_item(item):
""" """
Args: Arguments:
item: an XBlock item: an XBlock
Returns: Returns:
...@@ -100,7 +109,7 @@ class ModuleStoreSerializer(object): ...@@ -100,7 +109,7 @@ class ModuleStoreSerializer(object):
def serialize_course(self, course_id): def serialize_course(self, course_id):
""" """
Serializes a course into py2neo Nodes and Relationships Serializes a course into py2neo Nodes and Relationships
Args: Arguments:
course_id: CourseKey of the course we want to serialize course_id: CourseKey of the course we want to serialize
Returns: Returns:
...@@ -125,6 +134,7 @@ class ModuleStoreSerializer(object): ...@@ -125,6 +134,7 @@ class ModuleStoreSerializer(object):
# create relationships # create relationships
relationships = [] relationships = []
for item in items: for item in items:
previous_child_node = None
for index, child_loc in enumerate(item.get_children()): for index, child_loc in enumerate(item.get_children()):
parent_node = location_to_node.get(item.location) parent_node = location_to_node.get(item.location)
child_node = location_to_node.get(child_loc.location) child_node = location_to_node.get(child_loc.location)
...@@ -133,13 +143,20 @@ class ModuleStoreSerializer(object): ...@@ -133,13 +143,20 @@ class ModuleStoreSerializer(object):
relationship = Relationship(parent_node, "PARENT_OF", child_node) relationship = Relationship(parent_node, "PARENT_OF", child_node)
relationships.append(relationship) relationships.append(relationship)
if previous_child_node:
ordering_relationship = Relationship(
previous_child_node, "PRECEDES", child_node
)
relationships.append(ordering_relationship)
previous_child_node = child_node
nodes = location_to_node.values() nodes = location_to_node.values()
return nodes, relationships return nodes, relationships
@staticmethod @staticmethod
def coerce_types(value): def coerce_types(value):
""" """
Args: Arguments:
value: the value of an xblock's field value: the value of an xblock's field
Returns: either the value, a text version of the value, or, if the Returns: either the value, a text version of the value, or, if the
...@@ -159,7 +176,7 @@ class ModuleStoreSerializer(object): ...@@ -159,7 +176,7 @@ class ModuleStoreSerializer(object):
@staticmethod @staticmethod
def add_to_transaction(neo4j_entities, transaction): def add_to_transaction(neo4j_entities, transaction):
""" """
Args: Arguments:
neo4j_entities: a list of Nodes or Relationships neo4j_entities: a list of Nodes or Relationships
transaction: a neo4j transaction transaction: a neo4j transaction
""" """
...@@ -170,7 +187,7 @@ class ModuleStoreSerializer(object): ...@@ -170,7 +187,7 @@ class ModuleStoreSerializer(object):
def get_command_last_run(course_key, graph): def get_command_last_run(course_key, graph):
""" """
This information is stored on the course node of a course in neo4j This information is stored on the course node of a course in neo4j
Args: Arguments:
course_key: a CourseKey course_key: a CourseKey
graph: a py2neo Graph graph: a py2neo Graph
...@@ -195,7 +212,7 @@ class ModuleStoreSerializer(object): ...@@ -195,7 +212,7 @@ class ModuleStoreSerializer(object):
""" """
We use the CourseStructure table to get when this course was last We use the CourseStructure table to get when this course was last
published. published.
Args: Arguments:
course_key: a CourseKey course_key: a CourseKey
Returns: The datetime the course was last published at, converted into Returns: The datetime the course was last published at, converted into
...@@ -214,7 +231,7 @@ class ModuleStoreSerializer(object): ...@@ -214,7 +231,7 @@ class ModuleStoreSerializer(object):
""" """
Only dump the course if it's been changed since the last time it's been Only dump the course if it's been changed since the last time it's been
dumped. dumped.
Args: Arguments:
course_key: a CourseKey object. course_key: a CourseKey object.
graph: a py2neo Graph object. graph: a py2neo Graph object.
...@@ -240,11 +257,50 @@ class ModuleStoreSerializer(object): ...@@ -240,11 +257,50 @@ class ModuleStoreSerializer(object):
# before the course's last published event # before the course's last published event
return last_this_command_was_run < course_last_published_date return last_this_command_was_run < course_last_published_date
def dump_course_to_neo4j(self, course_key, graph):
"""
Serializes a course and writes it to neo4j.
Arguments:
course_key: course key for the course to be exported
graph: py2neo graph object
"""
nodes, relationships = self.serialize_course(course_key)
log.info(
"%d nodes and %d relationships in %s",
len(nodes),
len(relationships),
course_key,
)
transaction = graph.begin()
course_string = six.text_type(course_key)
try:
# first, delete existing course
transaction.run(
"MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format(
course_string
)
)
# now, re-add it
self.add_to_transaction(nodes, transaction)
self.add_to_transaction(relationships, transaction)
transaction.commit()
except Exception: # pylint: disable=broad-except
log.exception(
"Error trying to dump course %s to neo4j, rolling back",
course_string
)
transaction.rollback()
def dump_courses_to_neo4j(self, graph, override_cache=False): def dump_courses_to_neo4j(self, graph, override_cache=False):
""" """
Method that iterates through a list of courses in a modulestore, Method that iterates through a list of courses in a modulestore,
serializes them, then writes them to neo4j serializes them, then submits tasks to write them to neo4j.
Args: Arguments:
graph: py2neo graph object graph: py2neo graph object
override_cache: serialize the courses even if they'be been recently override_cache: serialize the courses even if they'be been recently
serialized serialized
...@@ -255,8 +311,8 @@ class ModuleStoreSerializer(object): ...@@ -255,8 +311,8 @@ class ModuleStoreSerializer(object):
total_number_of_courses = len(self.course_keys) total_number_of_courses = len(self.course_keys)
successful_courses = [] submitted_courses = []
unsuccessful_courses = [] skipped_courses = []
for index, course_key in enumerate(self.course_keys): for index, course_key in enumerate(self.course_keys):
# first, clear the request cache to prevent memory leaks # first, clear the request cache to prevent memory leaks
...@@ -271,43 +327,13 @@ class ModuleStoreSerializer(object): ...@@ -271,43 +327,13 @@ class ModuleStoreSerializer(object):
if not (override_cache or self.should_dump_course(course_key, graph)): if not (override_cache or self.should_dump_course(course_key, graph)):
log.info("skipping dumping %s, since it hasn't changed", course_key) log.info("skipping dumping %s, since it hasn't changed", course_key)
continue skipped_courses.append(unicode(course_key))
nodes, relationships = self.serialize_course(course_key)
log.info(
"%d nodes and %d relationships in %s",
len(nodes),
len(relationships),
course_key,
)
transaction = graph.begin()
course_string = six.text_type(course_key)
try:
# first, delete existing course
transaction.run(
"MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format(
course_string
)
)
# now, re-add it
self.add_to_transaction(nodes, transaction)
self.add_to_transaction(relationships, transaction)
transaction.commit()
except Exception: # pylint: disable=broad-except
log.exception(
"Error trying to dump course %s to neo4j, rolling back",
course_string
)
transaction.rollback()
unsuccessful_courses.append(course_string)
else: else:
successful_courses.append(course_string) self.dump_course_to_neo4j(course_key, graph)
submitted_courses.append(unicode(course_key))
return successful_courses, unsuccessful_courses return submitted_courses, skipped_courses
class Command(BaseCommand): class Command(BaseCommand):
...@@ -374,28 +400,24 @@ class Command(BaseCommand): ...@@ -374,28 +400,24 @@ class Command(BaseCommand):
secure=secure, secure=secure,
) )
mss = ModuleStoreSerializer(options['courses'], options['skip']) mss = ModuleStoreSerializer.create(options['courses'], options['skip'])
successful_courses, unsuccessful_courses = mss.dump_courses_to_neo4j( submitted_courses, skipped_courses = mss.dump_courses_to_neo4j(
graph, override_cache=options['override'] graph, override_cache=options['override']
) )
if not successful_courses and not unsuccessful_courses: log.info(
"%d courses submitted for export to neo4j. %d courses skipped.",
len(submitted_courses),
len(skipped_courses),
)
if not submitted_courses:
print("No courses exported to neo4j at all!") print("No courses exported to neo4j at all!")
return return
if successful_courses: if submitted_courses:
print( print(
"These courses exported to neo4j successfully:\n\t" + "These courses were submitted for export to neo4j successfully:\n\t" +
"\n\t".join(successful_courses) "\n\t".join(submitted_courses)
) )
else:
print("No courses exported to neo4j successfully.")
if unsuccessful_courses:
print(
"These courses did not export to neo4j successfully:\n\t" +
"\n\t".join(unsuccessful_courses)
)
else:
print("All courses exported to neo4j successfully.")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment