Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
01a0f6d6
Commit
01a0f6d6
authored
Aug 31, 2016
by
Adam Palay
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
allow ability to specify which courses to write to neo4j
parent
cf2c48ab
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
176 additions
and
74 deletions
+176
-74
lms/djangoapps/courseware/management/commands/dump_to_neo4j.py
+104
-49
lms/djangoapps/courseware/management/commands/tests/test_dump_to_neo4j.py
+72
-25
No files found.
lms/djangoapps/courseware/management/commands/dump_to_neo4j.py
View file @
01a0f6d6
...
...
@@ -13,6 +13,7 @@ from py2neo import Graph, Node, Relationship, authenticate
from
py2neo.compat
import
integer
,
string
,
unicode
as
neo4j_unicode
from
request_cache.middleware
import
RequestCache
from
xmodule.modulestore.django
import
modulestore
from
opaque_keys.edx.keys
import
CourseKey
log
=
logging
.
getLogger
(
__name__
)
...
...
@@ -30,8 +31,20 @@ class ModuleStoreSerializer(object):
Class with functionality to serialize a modulestore into subgraphs,
one graph per course.
"""
def
__init__
(
self
):
self
.
all_courses
=
modulestore
()
.
get_course_summaries
()
def
load_course_keys
(
self
,
courses
=
None
):
"""
Sets the object's course_keys attribute from the `courses` parameter.
If that parameter isn't furnished, loads all course_keys from the
modulestore.
:param courses: string serialization of course keys
"""
if
courses
:
course_keys
=
[
CourseKey
.
from_string
(
course
.
strip
())
for
course
in
courses
]
else
:
course_keys
=
[
course
.
id
for
course
in
modulestore
()
.
get_course_summaries
()
]
self
.
course_keys
=
course_keys
@staticmethod
def
serialize_item
(
item
):
...
...
@@ -136,6 +149,79 @@ class ModuleStoreSerializer(object):
return
coerced_value
@staticmethod
def
add_to_transaction
(
neo4j_entities
,
transaction
):
"""
Args:
neo4j_entities: a list of Nodes or Relationships
transaction: a neo4j transaction
"""
for
entity
in
neo4j_entities
:
transaction
.
create
(
entity
)
def
dump_courses_to_neo4j
(
self
,
graph
):
"""
Parameters
----------
graph: py2neo graph object
Returns two lists: one of the courses that were successfully written
to neo4j, and one of courses that were not.
-------
"""
total_number_of_courses
=
len
(
self
.
course_keys
)
successful_courses
=
[]
unsuccessful_courses
=
[]
for
index
,
course_key
in
enumerate
(
self
.
course_keys
):
# first, clear the request cache to prevent memory leaks
RequestCache
.
clear_request_cache
()
log
.
info
(
"Now exporting
%
s to neo4j: course
%
d of
%
d total courses"
,
course_key
,
index
+
1
,
total_number_of_courses
,
)
nodes
,
relationships
=
self
.
serialize_course
(
course_key
)
log
.
info
(
"
%
d nodes and
%
d relationships in
%
s"
,
len
(
nodes
),
len
(
relationships
),
course_key
,
)
transaction
=
graph
.
begin
()
course_string
=
six
.
text_type
(
course_key
)
try
:
# first, delete existing course
transaction
.
run
(
"MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n"
.
format
(
course_string
)
)
# now, re-add it
self
.
add_to_transaction
(
nodes
,
transaction
)
self
.
add_to_transaction
(
relationships
,
transaction
)
transaction
.
commit
()
except
Exception
:
# pylint: disable=broad-except
log
.
exception
(
"Error trying to dump course
%
s to neo4j, rolling back"
,
course_string
)
transaction
.
rollback
()
unsuccessful_courses
.
append
(
course_string
)
else
:
successful_courses
.
append
(
course_string
)
return
successful_courses
,
unsuccessful_courses
class
Command
(
BaseCommand
):
"""
Command to dump modulestore data to neo4j
...
...
@@ -155,16 +241,7 @@ class Command(BaseCommand):
parser
.
add_argument
(
'--port'
,
type
=
int
)
parser
.
add_argument
(
'--user'
,
type
=
unicode
)
parser
.
add_argument
(
'--password'
,
type
=
unicode
)
@staticmethod
def
add_to_transaction
(
neo4j_entities
,
transaction
):
"""
Args:
neo4j_entities: a list of Nodes or Relationships
transaction: a neo4j transaction
"""
for
entity
in
neo4j_entities
:
transaction
.
create
(
entity
)
parser
.
add_argument
(
'--courses'
,
type
=
unicode
,
nargs
=
'*'
)
def
handle
(
self
,
*
args
,
**
options
):
# pylint: disable=unused-argument
"""
...
...
@@ -192,44 +269,22 @@ class Command(BaseCommand):
)
mss
=
ModuleStoreSerializer
()
mss
.
load_course_keys
(
options
[
'courses'
])
total_number_of_courses
=
len
(
mss
.
all_courses
)
successful_courses
,
unsuccessful_courses
=
mss
.
dump_courses_to_neo4j
(
graph
)
for
index
,
course
in
enumerate
(
mss
.
all_courses
):
# first, clear the request cache to prevent memory leaks
RequestCache
.
clear_request_cache
()
log
.
info
(
"Now exporting
%
s to neo4j: course
%
d of
%
d total courses"
,
course
.
id
,
index
+
1
,
total_number_of_courses
)
nodes
,
relationships
=
mss
.
serialize_course
(
course
.
id
)
log
.
info
(
"
%
d nodes and
%
d relationships in
%
s"
,
len
(
nodes
),
len
(
relationships
),
course
.
id
if
successful_courses
:
print
(
"These courses exported to neo4j successfully:
\n\t
"
+
"
\n\t
"
.
join
(
successful_courses
)
)
else
:
print
(
"No courses exported to neo4j successfully."
)
transaction
=
graph
.
begin
()
try
:
# first, delete existing course
transaction
.
run
(
"MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n"
.
format
(
six
.
text_type
(
course
.
id
)
)
)
# now, re-add it
self
.
add_to_transaction
(
nodes
,
transaction
)
self
.
add_to_transaction
(
relationships
,
transaction
)
transaction
.
commit
()
except
Exception
:
# pylint: disable=broad-except
log
.
exception
(
"Error trying to dump course
%
s to neo4j, rolling back"
,
six
.
text_type
(
course
.
id
)
)
transaction
.
rollback
()
if
unsuccessful_courses
:
print
(
"These courses did not export to neo4j successfully:
\n\t
"
+
"
\n\t
"
.
join
(
unsuccessful_courses
)
)
else
:
print
(
"All courses exported to neo4j successfully."
)
lms/djangoapps/courseware/management/commands/tests/test_dump_to_neo4j.py
View file @
01a0f6d6
...
...
@@ -35,49 +35,49 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
cls
.
course2
=
CourseFactory
.
create
()
cls
.
course_strings
=
[
six
.
text_type
(
cls
.
course
.
id
),
six
.
text_type
(
cls
.
course2
.
id
)]
@ddt.ddt
class
TestDumpToNeo4jCommand
(
TestDumpToNeo4jCommandBase
):
"""
Tests for the dump to neo4j management command
"""
@mock.patch
(
'courseware.management.commands.dump_to_neo4j.Graph'
)
def
test_dump_to_neo4j
(
self
,
mock_graph_class
):
@ddt.data
(
1
,
2
)
def
test_dump_specific_courses
(
self
,
number_of_courses
,
mock_graph_class
):
"""
Tests the dump_to_neo4j management command works against a mock
py2neo Graph
Test that you can specify which courses you want to dump.
"""
mock_graph
=
mock_graph_class
.
return_value
mock_transaction
=
mock
.
Mock
()
mock_graph
.
begin
.
return_value
=
mock_transaction
call_command
(
'dump_to_neo4j'
,
courses
=
self
.
course_strings
[:
number_of_courses
],
host
=
'mock_host'
,
port
=
7473
,
user
=
'mock_user'
,
password
=
'mock_password'
,
)
self
.
assertEqual
(
mock_graph
.
begin
.
call_count
,
2
)
self
.
assertEqual
(
mock_transaction
.
commit
.
call_count
,
2
)
self
.
assertEqual
(
mock_transaction
.
rollback
.
call_count
,
0
)
# 7 nodes + 9 relationships from the first course
# 2 nodes and no relationships from the second
self
.
assertEqual
(
mock_transaction
.
create
.
call_count
,
18
)
self
.
assertEqual
(
mock_transaction
.
run
.
call_count
,
2
)
self
.
assertEqual
(
mock_graph
.
begin
.
call_count
,
number_of_courses
)
self
.
assertEqual
(
mock_transaction
.
commit
.
call_count
,
number_of_courses
)
self
.
assertEqual
(
mock_transaction
.
commit
.
rollback
.
call_count
,
0
)
@mock.patch
(
'courseware.management.commands.dump_to_neo4j.Graph'
)
def
test_dump_
to_neo4j_rollback
(
self
,
mock_graph_class
):
def
test_dump_
all_courses
(
self
,
mock_graph_class
):
"""
Test
s that the management command handles the case where there's
a
n exception trying to write to the neo4j database
.
Test
if you don't specify which courses to dump, then you'll dump
a
ll of them
.
"""
mock_graph
=
mock_graph_class
.
return_value
mock_transaction
=
mock
.
Mock
()
mock_graph
.
begin
.
return_value
=
mock_transaction
mock_transaction
.
run
.
side_effect
=
ValueError
(
'Something went wrong!'
)
call_command
(
'dump_to_neo4j'
,
...
...
@@ -88,8 +88,8 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase):
)
self
.
assertEqual
(
mock_graph
.
begin
.
call_count
,
2
)
self
.
assertEqual
(
mock_transaction
.
commit
.
call_count
,
0
)
self
.
assertEqual
(
mock_transaction
.
rollback
.
call_count
,
2
)
self
.
assertEqual
(
mock_transaction
.
commit
.
call_count
,
2
)
self
.
assertEqual
(
mock_transaction
.
commit
.
rollback
.
call_count
,
0
)
@ddt.ddt
...
...
@@ -97,15 +97,13 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
"""
Tests for the ModuleStoreSerializer
"""
def
setUp
(
self
):
super
(
TestModuleStoreSerializer
,
self
)
.
setUp
()
self
.
modulestore_serializer
=
ModuleStoreSerializer
()
def
test_serialize_item
(
self
):
"""
Tests the serialize_item method.
"""
fields
,
label
=
self
.
modulestore_serializer
.
serialize_item
(
self
.
course
)
mss
=
ModuleStoreSerializer
()
mss
.
load_course_keys
()
fields
,
label
=
mss
.
serialize_item
(
self
.
course
)
self
.
assertEqual
(
label
,
"course"
)
self
.
assertIn
(
"edited_on"
,
fields
.
keys
())
self
.
assertIn
(
"display_name"
,
fields
.
keys
())
...
...
@@ -119,7 +117,9 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
"""
Tests the serialize_course method.
"""
nodes
,
relationships
=
self
.
modulestore_serializer
.
serialize_course
(
mss
=
ModuleStoreSerializer
()
mss
.
load_course_keys
()
nodes
,
relationships
=
mss
.
serialize_course
(
self
.
course
.
id
)
self
.
assertEqual
(
len
(
nodes
),
9
)
...
...
@@ -135,7 +135,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
# each element in the iterable is not unicode:
self
.
assertFalse
(
any
(
isinstance
(
tab
,
six
.
text_type
)
for
tab
in
example_iterable
))
# but after they are coerced, they are:
coerced
=
self
.
modulestore_serializer
.
coerce_types
(
example_iterable
)
coerced
=
ModuleStoreSerializer
()
.
coerce_types
(
example_iterable
)
self
.
assertTrue
(
all
(
isinstance
(
tab
,
six
.
text_type
)
for
tab
in
coerced
))
# finally, make sure we haven't changed the type:
self
.
assertEqual
(
type
(
coerced
),
iterable_type
)
...
...
@@ -154,5 +154,52 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
"""
Tests the coerce_types helper for the neo4j base types
"""
coerced_value
=
self
.
modulestore_serializer
.
coerce_types
(
original_value
)
coerced_value
=
ModuleStoreSerializer
()
.
coerce_types
(
original_value
)
self
.
assertEqual
(
coerced_value
,
coerced_expected
)
def
test_dump_to_neo4j
(
self
):
"""
Tests the dump_to_neo4j method works against a mock
py2neo Graph
"""
mock_graph
=
mock
.
Mock
()
mock_transaction
=
mock
.
Mock
()
mock_graph
.
begin
.
return_value
=
mock_transaction
mss
=
ModuleStoreSerializer
()
mss
.
load_course_keys
()
successful
,
unsuccessful
=
mss
.
dump_courses_to_neo4j
(
mock_graph
)
self
.
assertEqual
(
mock_graph
.
begin
.
call_count
,
2
)
self
.
assertEqual
(
mock_transaction
.
commit
.
call_count
,
2
)
self
.
assertEqual
(
mock_transaction
.
rollback
.
call_count
,
0
)
# 7 nodes + 9 relationships from the first course
# 2 nodes and no relationships from the second
self
.
assertEqual
(
mock_transaction
.
create
.
call_count
,
18
)
self
.
assertEqual
(
mock_transaction
.
run
.
call_count
,
2
)
self
.
assertEqual
(
len
(
unsuccessful
),
0
)
self
.
assertItemsEqual
(
successful
,
self
.
course_strings
)
def
test_dump_to_neo4j_rollback
(
self
):
"""
Tests that the the dump_to_neo4j method handles the case where there's
an exception trying to write to the neo4j database.
"""
mock_graph
=
mock
.
Mock
()
mock_transaction
=
mock
.
Mock
()
mock_graph
.
begin
.
return_value
=
mock_transaction
mock_transaction
.
run
.
side_effect
=
ValueError
(
'Something went wrong!'
)
mss
=
ModuleStoreSerializer
()
mss
.
load_course_keys
()
successful
,
unsuccessful
=
mss
.
dump_courses_to_neo4j
(
mock_graph
)
self
.
assertEqual
(
mock_graph
.
begin
.
call_count
,
2
)
self
.
assertEqual
(
mock_transaction
.
commit
.
call_count
,
0
)
self
.
assertEqual
(
mock_transaction
.
rollback
.
call_count
,
2
)
self
.
assertEqual
(
len
(
successful
),
0
)
self
.
assertItemsEqual
(
unsuccessful
,
self
.
course_strings
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment