Commit 01a0f6d6 by Adam Palay

allow ability to specify which courses to write to neo4j

parent cf2c48ab
......@@ -13,6 +13,7 @@ from py2neo import Graph, Node, Relationship, authenticate
from py2neo.compat import integer, string, unicode as neo4j_unicode
from request_cache.middleware import RequestCache
from xmodule.modulestore.django import modulestore
from opaque_keys.edx.keys import CourseKey
log = logging.getLogger(__name__)
......@@ -30,8 +31,20 @@ class ModuleStoreSerializer(object):
Class with functionality to serialize a modulestore into subgraphs,
one graph per course.
"""
def __init__(self):
self.all_courses = modulestore().get_course_summaries()
def load_course_keys(self, courses=None):
"""
Sets the object's course_keys attribute from the `courses` parameter.
If that parameter isn't furnished, loads all course_keys from the
modulestore.
:param courses: string serialization of course keys
"""
if courses:
course_keys = [CourseKey.from_string(course.strip()) for course in courses]
else:
course_keys = [
course.id for course in modulestore().get_course_summaries()
]
self.course_keys = course_keys
@staticmethod
def serialize_item(item):
......@@ -136,6 +149,79 @@ class ModuleStoreSerializer(object):
return coerced_value
@staticmethod
def add_to_transaction(neo4j_entities, transaction):
"""
Args:
neo4j_entities: a list of Nodes or Relationships
transaction: a neo4j transaction
"""
for entity in neo4j_entities:
transaction.create(entity)
def dump_courses_to_neo4j(self, graph):
"""
Parameters
----------
graph: py2neo graph object
Returns two lists: one of the courses that were successfully written
to neo4j, and one of courses that were not.
-------
"""
total_number_of_courses = len(self.course_keys)
successful_courses = []
unsuccessful_courses = []
for index, course_key in enumerate(self.course_keys):
# first, clear the request cache to prevent memory leaks
RequestCache.clear_request_cache()
log.info(
"Now exporting %s to neo4j: course %d of %d total courses",
course_key,
index + 1,
total_number_of_courses,
)
nodes, relationships = self.serialize_course(course_key)
log.info(
"%d nodes and %d relationships in %s",
len(nodes),
len(relationships),
course_key,
)
transaction = graph.begin()
course_string = six.text_type(course_key)
try:
# first, delete existing course
transaction.run(
"MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format(
course_string
)
)
# now, re-add it
self.add_to_transaction(nodes, transaction)
self.add_to_transaction(relationships, transaction)
transaction.commit()
except Exception: # pylint: disable=broad-except
log.exception(
"Error trying to dump course %s to neo4j, rolling back",
course_string
)
transaction.rollback()
unsuccessful_courses.append(course_string)
else:
successful_courses.append(course_string)
return successful_courses, unsuccessful_courses
class Command(BaseCommand):
"""
Command to dump modulestore data to neo4j
......@@ -155,16 +241,7 @@ class Command(BaseCommand):
parser.add_argument('--port', type=int)
parser.add_argument('--user', type=unicode)
parser.add_argument('--password', type=unicode)
@staticmethod
def add_to_transaction(neo4j_entities, transaction):
"""
Args:
neo4j_entities: a list of Nodes or Relationships
transaction: a neo4j transaction
"""
for entity in neo4j_entities:
transaction.create(entity)
parser.add_argument('--courses', type=unicode, nargs='*')
def handle(self, *args, **options): # pylint: disable=unused-argument
"""
......@@ -192,44 +269,22 @@ class Command(BaseCommand):
)
mss = ModuleStoreSerializer()
mss.load_course_keys(options['courses'])
total_number_of_courses = len(mss.all_courses)
successful_courses, unsuccessful_courses = mss.dump_courses_to_neo4j(graph)
for index, course in enumerate(mss.all_courses):
# first, clear the request cache to prevent memory leaks
RequestCache.clear_request_cache()
log.info(
"Now exporting %s to neo4j: course %d of %d total courses",
course.id,
index + 1,
total_number_of_courses
)
nodes, relationships = mss.serialize_course(course.id)
log.info(
"%d nodes and %d relationships in %s",
len(nodes),
len(relationships),
course.id
if successful_courses:
print(
"These courses exported to neo4j successfully:\n\t" +
"\n\t".join(successful_courses)
)
else:
print("No courses exported to neo4j successfully.")
transaction = graph.begin()
try:
# first, delete existing course
transaction.run(
"MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format(
six.text_type(course.id)
)
)
# now, re-add it
self.add_to_transaction(nodes, transaction)
self.add_to_transaction(relationships, transaction)
transaction.commit()
except Exception: # pylint: disable=broad-except
log.exception(
"Error trying to dump course %s to neo4j, rolling back",
six.text_type(course.id)
)
transaction.rollback()
if unsuccessful_courses:
print(
"These courses did not export to neo4j successfully:\n\t" +
"\n\t".join(unsuccessful_courses)
)
else:
print("All courses exported to neo4j successfully.")
......@@ -35,49 +35,49 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
cls.course2 = CourseFactory.create()
cls.course_strings = [six.text_type(cls.course.id), six.text_type(cls.course2.id)]
@ddt.ddt
class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase):
"""
Tests for the dump to neo4j management command
"""
@mock.patch('courseware.management.commands.dump_to_neo4j.Graph')
def test_dump_to_neo4j(self, mock_graph_class):
@ddt.data(1, 2)
def test_dump_specific_courses(self, number_of_courses, mock_graph_class):
"""
Tests the dump_to_neo4j management command works against a mock
py2neo Graph
Test that you can specify which courses you want to dump.
"""
mock_graph = mock_graph_class.return_value
mock_transaction = mock.Mock()
mock_graph.begin.return_value = mock_transaction
call_command(
'dump_to_neo4j',
courses=self.course_strings[:number_of_courses],
host='mock_host',
port=7473,
user='mock_user',
password='mock_password',
)
self.assertEqual(mock_graph.begin.call_count, 2)
self.assertEqual(mock_transaction.commit.call_count, 2)
self.assertEqual(mock_transaction.rollback.call_count, 0)
# 7 nodes + 9 relationships from the first course
# 2 nodes and no relationships from the second
self.assertEqual(mock_transaction.create.call_count, 18)
self.assertEqual(mock_transaction.run.call_count, 2)
self.assertEqual(mock_graph.begin.call_count, number_of_courses)
self.assertEqual(mock_transaction.commit.call_count, number_of_courses)
self.assertEqual(mock_transaction.commit.rollback.call_count, 0)
@mock.patch('courseware.management.commands.dump_to_neo4j.Graph')
def test_dump_to_neo4j_rollback(self, mock_graph_class):
def test_dump_all_courses(self, mock_graph_class):
"""
Tests that the management command handles the case where there's
an exception trying to write to the neo4j database.
Test if you don't specify which courses to dump, then you'll dump
all of them.
"""
mock_graph = mock_graph_class.return_value
mock_transaction = mock.Mock()
mock_graph.begin.return_value = mock_transaction
mock_transaction.run.side_effect = ValueError('Something went wrong!')
call_command(
'dump_to_neo4j',
......@@ -88,8 +88,8 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase):
)
self.assertEqual(mock_graph.begin.call_count, 2)
self.assertEqual(mock_transaction.commit.call_count, 0)
self.assertEqual(mock_transaction.rollback.call_count, 2)
self.assertEqual(mock_transaction.commit.call_count, 2)
self.assertEqual(mock_transaction.commit.rollback.call_count, 0)
@ddt.ddt
......@@ -97,15 +97,13 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
"""
Tests for the ModuleStoreSerializer
"""
def setUp(self):
super(TestModuleStoreSerializer, self).setUp()
self.modulestore_serializer = ModuleStoreSerializer()
def test_serialize_item(self):
"""
Tests the serialize_item method.
"""
fields, label = self.modulestore_serializer.serialize_item(self.course)
mss = ModuleStoreSerializer()
mss.load_course_keys()
fields, label = mss.serialize_item(self.course)
self.assertEqual(label, "course")
self.assertIn("edited_on", fields.keys())
self.assertIn("display_name", fields.keys())
......@@ -119,7 +117,9 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
"""
Tests the serialize_course method.
"""
nodes, relationships = self.modulestore_serializer.serialize_course(
mss = ModuleStoreSerializer()
mss.load_course_keys()
nodes, relationships = mss.serialize_course(
self.course.id
)
self.assertEqual(len(nodes), 9)
......@@ -135,7 +135,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
# each element in the iterable is not unicode:
self.assertFalse(any(isinstance(tab, six.text_type) for tab in example_iterable))
# but after they are coerced, they are:
coerced = self.modulestore_serializer.coerce_types(example_iterable)
coerced = ModuleStoreSerializer().coerce_types(example_iterable)
self.assertTrue(all(isinstance(tab, six.text_type) for tab in coerced))
# finally, make sure we haven't changed the type:
self.assertEqual(type(coerced), iterable_type)
......@@ -154,5 +154,52 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
"""
Tests the coerce_types helper for the neo4j base types
"""
coerced_value = self.modulestore_serializer.coerce_types(original_value)
coerced_value = ModuleStoreSerializer().coerce_types(original_value)
self.assertEqual(coerced_value, coerced_expected)
def test_dump_to_neo4j(self):
"""
Tests the dump_to_neo4j method works against a mock
py2neo Graph
"""
mock_graph = mock.Mock()
mock_transaction = mock.Mock()
mock_graph.begin.return_value = mock_transaction
mss = ModuleStoreSerializer()
mss.load_course_keys()
successful, unsuccessful = mss.dump_courses_to_neo4j(mock_graph)
self.assertEqual(mock_graph.begin.call_count, 2)
self.assertEqual(mock_transaction.commit.call_count, 2)
self.assertEqual(mock_transaction.rollback.call_count, 0)
# 7 nodes + 9 relationships from the first course
# 2 nodes and no relationships from the second
self.assertEqual(mock_transaction.create.call_count, 18)
self.assertEqual(mock_transaction.run.call_count, 2)
self.assertEqual(len(unsuccessful), 0)
self.assertItemsEqual(successful, self.course_strings)
def test_dump_to_neo4j_rollback(self):
"""
Tests that the the dump_to_neo4j method handles the case where there's
an exception trying to write to the neo4j database.
"""
mock_graph = mock.Mock()
mock_transaction = mock.Mock()
mock_graph.begin.return_value = mock_transaction
mock_transaction.run.side_effect = ValueError('Something went wrong!')
mss = ModuleStoreSerializer()
mss.load_course_keys()
successful, unsuccessful = mss.dump_courses_to_neo4j(mock_graph)
self.assertEqual(mock_graph.begin.call_count, 2)
self.assertEqual(mock_transaction.commit.call_count, 0)
self.assertEqual(mock_transaction.rollback.call_count, 2)
self.assertEqual(len(successful), 0)
self.assertItemsEqual(unsuccessful, self.course_strings)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment