Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
e798b2e8
Commit
e798b2e8
authored
Mar 27, 2017
by
Adam Palay
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add precedes relationship to coursegraph (EDUCATOR-484)
parent
8df5c941
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
191 additions
and
79 deletions
+191
-79
openedx/core/djangoapps/coursegraph/management/commands/dump_to_neo4j.py
+81
-59
openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py
+110
-20
No files found.
openedx/core/djangoapps/coursegraph/management/commands/dump_to_neo4j.py
View file @
e798b2e8
...
...
@@ -33,16 +33,25 @@ class ModuleStoreSerializer(object):
one graph per course.
"""
def
__init__
(
self
,
courses
=
None
,
skip
=
None
):
def
__init__
(
self
,
course_keys
):
self
.
course_keys
=
course_keys
@classmethod
def
create
(
cls
,
courses
=
None
,
skip
=
None
):
"""
Sets the object's course_keys attribute from the `courses` parameter.
If that parameter isn't furnished, loads all course_keys from the
modulestore.
Filters out course_keys in the `skip` parameter, if provided.
Args:
Arguments:
courses: A list of string serializations of course keys.
For example, ["course-v1:org+course+run"].
skip: Also a list of string serializations of course keys.
Returns:
a ModulestoreSerializer instance
"""
if
courses
:
course_keys
=
[
CourseKey
.
from_string
(
course
.
strip
())
for
course
in
courses
]
...
...
@@ -51,14 +60,14 @@ class ModuleStoreSerializer(object):
course
.
id
for
course
in
modulestore
()
.
get_course_summaries
()
]
if
skip
is
not
None
:
skip_keys
=
[
CourseKey
.
from_string
(
course
.
strip
())
for
course
in
skip
]
skip_keys
=
set
([
CourseKey
.
from_string
(
course
.
strip
())
for
course
in
skip
])
course_keys
=
[
course_key
for
course_key
in
course_keys
if
course_key
not
in
skip_keys
]
self
.
course_keys
=
course_keys
return
cls
(
course_keys
)
@staticmethod
def
serialize_item
(
item
):
"""
Args:
Arg
ument
s:
item: an XBlock
Returns:
...
...
@@ -100,7 +109,7 @@ class ModuleStoreSerializer(object):
def
serialize_course
(
self
,
course_id
):
"""
Serializes a course into py2neo Nodes and Relationships
Args:
Arg
ument
s:
course_id: CourseKey of the course we want to serialize
Returns:
...
...
@@ -125,6 +134,7 @@ class ModuleStoreSerializer(object):
# create relationships
relationships
=
[]
for
item
in
items
:
previous_child_node
=
None
for
index
,
child_loc
in
enumerate
(
item
.
get_children
()):
parent_node
=
location_to_node
.
get
(
item
.
location
)
child_node
=
location_to_node
.
get
(
child_loc
.
location
)
...
...
@@ -133,13 +143,20 @@ class ModuleStoreSerializer(object):
relationship
=
Relationship
(
parent_node
,
"PARENT_OF"
,
child_node
)
relationships
.
append
(
relationship
)
if
previous_child_node
:
ordering_relationship
=
Relationship
(
previous_child_node
,
"PRECEDES"
,
child_node
)
relationships
.
append
(
ordering_relationship
)
previous_child_node
=
child_node
nodes
=
location_to_node
.
values
()
return
nodes
,
relationships
@staticmethod
def
coerce_types
(
value
):
"""
Args:
Arg
ument
s:
value: the value of an xblock's field
Returns: either the value, a text version of the value, or, if the
...
...
@@ -159,7 +176,7 @@ class ModuleStoreSerializer(object):
@staticmethod
def
add_to_transaction
(
neo4j_entities
,
transaction
):
"""
Args:
Arg
ument
s:
neo4j_entities: a list of Nodes or Relationships
transaction: a neo4j transaction
"""
...
...
@@ -170,7 +187,7 @@ class ModuleStoreSerializer(object):
def
get_command_last_run
(
course_key
,
graph
):
"""
This information is stored on the course node of a course in neo4j
Args:
Arg
ument
s:
course_key: a CourseKey
graph: a py2neo Graph
...
...
@@ -195,7 +212,7 @@ class ModuleStoreSerializer(object):
"""
We use the CourseStructure table to get when this course was last
published.
Args:
Arg
ument
s:
course_key: a CourseKey
Returns: The datetime the course was last published at, converted into
...
...
@@ -214,7 +231,7 @@ class ModuleStoreSerializer(object):
"""
Only dump the course if it's been changed since the last time it's been
dumped.
Args:
Arg
ument
s:
course_key: a CourseKey object.
graph: a py2neo Graph object.
...
...
@@ -240,39 +257,15 @@ class ModuleStoreSerializer(object):
# before the course's last published event
return
last_this_command_was_run
<
course_last_published_date
def
dump_course
s_to_neo4j
(
self
,
graph
,
override_cache
=
False
):
def
dump_course
_to_neo4j
(
self
,
course_key
,
graph
):
"""
Method that iterates through a list of courses in a modulestore,
serializes them, then writes them to neo4j
Args:
graph: py2neo graph object
override_cache: serialize the courses even if they'be been recently
serialized
Serializes a course and writes it to neo4j.
Returns: two lists--one of the courses that were successfully written
to neo4j and one of courses that were not.
Arguments:
course_key: course key for the course to be exported
graph: py2neo graph object
"""
total_number_of_courses
=
len
(
self
.
course_keys
)
successful_courses
=
[]
unsuccessful_courses
=
[]
for
index
,
course_key
in
enumerate
(
self
.
course_keys
):
# first, clear the request cache to prevent memory leaks
RequestCache
.
clear_request_cache
()
log
.
info
(
"Now exporting
%
s to neo4j: course
%
d of
%
d total courses"
,
course_key
,
index
+
1
,
total_number_of_courses
,
)
if
not
(
override_cache
or
self
.
should_dump_course
(
course_key
,
graph
)):
log
.
info
(
"skipping dumping
%
s, since it hasn't changed"
,
course_key
)
continue
nodes
,
relationships
=
self
.
serialize_course
(
course_key
)
log
.
info
(
"
%
d nodes and
%
d relationships in
%
s"
,
...
...
@@ -302,12 +295,45 @@ class ModuleStoreSerializer(object):
course_string
)
transaction
.
rollback
()
unsuccessful_courses
.
append
(
course_string
)
def
dump_courses_to_neo4j
(
self
,
graph
,
override_cache
=
False
):
"""
Method that iterates through a list of courses in a modulestore,
serializes them, then submits tasks to write them to neo4j.
Arguments:
graph: py2neo graph object
override_cache: serialize the courses even if they'be been recently
serialized
Returns: two lists--one of the courses that were successfully written
to neo4j and one of courses that were not.
"""
total_number_of_courses
=
len
(
self
.
course_keys
)
submitted_courses
=
[]
skipped_courses
=
[]
for
index
,
course_key
in
enumerate
(
self
.
course_keys
):
# first, clear the request cache to prevent memory leaks
RequestCache
.
clear_request_cache
()
log
.
info
(
"Now exporting
%
s to neo4j: course
%
d of
%
d total courses"
,
course_key
,
index
+
1
,
total_number_of_courses
,
)
if
not
(
override_cache
or
self
.
should_dump_course
(
course_key
,
graph
)):
log
.
info
(
"skipping dumping
%
s, since it hasn't changed"
,
course_key
)
skipped_courses
.
append
(
unicode
(
course_key
))
else
:
successful_courses
.
append
(
course_string
)
self
.
dump_course_to_neo4j
(
course_key
,
graph
)
submitted_courses
.
append
(
unicode
(
course_key
))
return
su
ccessful_courses
,
unsuccessful
_courses
return
su
bmitted_courses
,
skipped
_courses
class
Command
(
BaseCommand
):
...
...
@@ -374,28 +400,24 @@ class Command(BaseCommand):
secure
=
secure
,
)
mss
=
ModuleStoreSerializer
(
options
[
'courses'
],
options
[
'skip'
])
mss
=
ModuleStoreSerializer
.
create
(
options
[
'courses'
],
options
[
'skip'
])
su
ccessful_courses
,
unsuccessful
_courses
=
mss
.
dump_courses_to_neo4j
(
su
bmitted_courses
,
skipped
_courses
=
mss
.
dump_courses_to_neo4j
(
graph
,
override_cache
=
options
[
'override'
]
)
if
not
successful_courses
and
not
unsuccessful_courses
:
log
.
info
(
"
%
d courses submitted for export to neo4j.
%
d courses skipped."
,
len
(
submitted_courses
),
len
(
skipped_courses
),
)
if
not
submitted_courses
:
print
(
"No courses exported to neo4j at all!"
)
return
if
successful_courses
:
print
(
"These courses exported to neo4j successfully:
\n\t
"
+
"
\n\t
"
.
join
(
successful_courses
)
)
else
:
print
(
"No courses exported to neo4j successfully."
)
if
unsuccessful_courses
:
if
submitted_courses
:
print
(
"These courses
did not
export to neo4j successfully:
\n\t
"
+
"
\n\t
"
.
join
(
unsuccessful
_courses
)
"These courses
were submitted for
export to neo4j successfully:
\n\t
"
+
"
\n\t
"
.
join
(
submitted
_courses
)
)
else
:
print
(
"All courses exported to neo4j successfully."
)
openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py
View file @
e798b2e8
...
...
@@ -31,6 +31,26 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
"""
@classmethod
def
setUpClass
(
cls
):
"""
Creates two courses; one that's just a course module, and one that
looks like:
course
|
chapter
|
sequential
|
vertical
/ |
\
\
/ |
\
----------
/ |
\
\
/ | ---
\
/ |
\
\
html -> problem -> video -> video2
The side-pointing arrows (->) are PRECEDES relationships; the more
vertical lines are PARENT_OF relationships.
"""
super
(
TestDumpToNeo4jCommandBase
,
cls
)
.
setUpClass
()
cls
.
course
=
CourseFactory
.
create
()
cls
.
chapter
=
ItemFactory
.
create
(
parent
=
cls
.
course
,
category
=
'chapter'
)
...
...
@@ -51,7 +71,7 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
Replaces the py2neo Graph object with a MockGraph; similarly replaces
NodeSelector with MockNodeSelector.
Args:
Arg
ument
s:
mock_selector_class: a mocked NodeSelector class
mock_graph_class: a mocked Graph class
transaction_errors: a bool for whether we should get errors
...
...
@@ -71,7 +91,7 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
"""
Asserts that we have the expected number of courses, commits, and
rollbacks after we dump the modulestore to neo4j
Args:
Arg
ument
s:
mock_graph: a MockGraph backend
number_of_courses: number of courses we expect to find
number_commits: number of commits we expect against the graph
...
...
@@ -203,7 +223,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
def
setUpClass
(
cls
):
"""Any ModuleStore course/content operations can go here."""
super
(
TestModuleStoreSerializer
,
cls
)
.
setUpClass
()
cls
.
mss
=
ModuleStoreSerializer
()
cls
.
mss
=
ModuleStoreSerializer
.
create
()
def
test_serialize_item
(
self
):
"""
...
...
@@ -228,7 +248,81 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
"""
nodes
,
relationships
=
self
.
mss
.
serialize_course
(
self
.
course
.
id
)
self
.
assertEqual
(
len
(
nodes
),
9
)
self
.
assertEqual
(
len
(
relationships
),
7
)
# the course has 7 "PARENT_OF" relationships and 3 "PRECEDES"
self
.
assertEqual
(
len
(
relationships
),
10
)
@staticmethod
def
_extract_relationship_pairs
(
relationships
,
relationship_type
):
"""
Extracts a list of XBlock location tuples from a list of Relationships.
Arguments:
relationships: list of py2neo `Relationship` objects
relationship_type: the type of relationship to filter `relationships`
by.
Returns:
List of tuples of the locations of of the relationships'
constituent nodes.
"""
relationship_pairs
=
[
tuple
([
node
[
"location"
]
for
node
in
rel
.
nodes
()])
for
rel
in
relationships
if
rel
.
type
()
==
relationship_type
]
return
relationship_pairs
@staticmethod
def
_extract_location_pair
(
xblock1
,
xblock2
):
"""
Returns a tuple of locations from two XBlocks.
Arguments:
xblock1: an xblock
xblock2: also an xblock
Returns:
A tuple of the string representations of those XBlocks' locations.
"""
return
(
unicode
(
xblock1
.
location
),
unicode
(
xblock2
.
location
))
def
assertBlockPairIsRelationship
(
self
,
xblock1
,
xblock2
,
relationships
,
relationship_type
):
"""
Helper assertion that a pair of xblocks have a certain kind of
relationship with one another.
"""
relationship_pairs
=
self
.
_extract_relationship_pairs
(
relationships
,
relationship_type
)
location_pair
=
self
.
_extract_location_pair
(
xblock1
,
xblock2
)
self
.
assertIn
(
location_pair
,
relationship_pairs
)
def
assertBlockPairIsNotRelationship
(
self
,
xblock1
,
xblock2
,
relationships
,
relationship_type
):
"""
The opposite of `assertBlockPairIsRelationship`: asserts that a pair
of xblocks do NOT have a certain kind of relationship.
"""
relationship_pairs
=
self
.
_extract_relationship_pairs
(
relationships
,
relationship_type
)
location_pair
=
self
.
_extract_location_pair
(
xblock1
,
xblock2
)
self
.
assertNotIn
(
location_pair
,
relationship_pairs
)
def
test_precedes_relationship
(
self
):
"""
Tests that two nodes that should have a precedes relationship have it.
"""
__
,
relationships
=
self
.
mss
.
serialize_course
(
self
.
course
.
id
)
self
.
assertBlockPairIsRelationship
(
self
.
video
,
self
.
video2
,
relationships
,
"PRECEDES"
)
self
.
assertBlockPairIsNotRelationship
(
self
.
video2
,
self
.
video
,
relationships
,
"PRECEDES"
)
self
.
assertBlockPairIsNotRelationship
(
self
.
vertical
,
self
.
video
,
relationships
,
"PRECEDES"
)
self
.
assertBlockPairIsNotRelationship
(
self
.
html
,
self
.
video
,
relationships
,
"PRECEDES"
)
def
test_parent_relationship
(
self
):
"""
Test that two nodes that should have a parent_of relationship have it.
"""
__
,
relationships
=
self
.
mss
.
serialize_course
(
self
.
course
.
id
)
self
.
assertBlockPairIsRelationship
(
self
.
vertical
,
self
.
video
,
relationships
,
"PARENT_OF"
)
self
.
assertBlockPairIsRelationship
(
self
.
vertical
,
self
.
html
,
relationships
,
"PARENT_OF"
)
self
.
assertBlockPairIsRelationship
(
self
.
course
,
self
.
chapter
,
relationships
,
"PARENT_OF"
)
self
.
assertBlockPairIsNotRelationship
(
self
.
course
,
self
.
video
,
relationships
,
"PARENT_OF"
)
self
.
assertBlockPairIsNotRelationship
(
self
.
video
,
self
.
vertical
,
relationships
,
"PARENT_OF"
)
self
.
assertBlockPairIsNotRelationship
(
self
.
video
,
self
.
html
,
relationships
,
"PARENT_OF"
)
def
test_nodes_have_indices
(
self
):
"""
...
...
@@ -277,7 +371,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
mock_graph
=
MockGraph
()
mock_selector_class
.
return_value
=
MockNodeSelector
(
mock_graph
)
su
ccessful
,
unsuccessful
=
self
.
mss
.
dump_courses_to_neo4j
(
mock_graph
)
su
bmitted
,
skipped
=
self
.
mss
.
dump_courses_to_neo4j
(
mock_graph
)
self
.
assertCourseDump
(
mock_graph
,
...
...
@@ -290,9 +384,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
# 2 nodes and no relationships from the second
self
.
assertEqual
(
len
(
mock_graph
.
nodes
),
11
)
self
.
assertEqual
(
len
(
unsuccessful
),
0
)
self
.
assertItemsEqual
(
successful
,
self
.
course_strings
)
self
.
assertItemsEqual
(
submitted
,
self
.
course_strings
)
@mock.patch
(
'openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector'
)
def
test_dump_to_neo4j_rollback
(
self
,
mock_selector_class
):
...
...
@@ -303,7 +395,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
mock_graph
=
MockGraph
(
transaction_errors
=
True
)
mock_selector_class
.
return_value
=
MockNodeSelector
(
mock_graph
)
su
ccessful
,
unsuccessful
=
self
.
mss
.
dump_courses_to_neo4j
(
mock_graph
)
su
bmitted
,
skipped
=
self
.
mss
.
dump_courses_to_neo4j
(
mock_graph
)
self
.
assertCourseDump
(
mock_graph
,
...
...
@@ -312,8 +404,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
number_rollbacks
=
2
,
)
self
.
assertEqual
(
len
(
successful
),
0
)
self
.
assertItemsEqual
(
unsuccessful
,
self
.
course_strings
)
self
.
assertItemsEqual
(
submitted
,
self
.
course_strings
)
@mock.patch
(
'openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector'
)
@ddt.data
((
True
,
2
),
(
False
,
0
))
...
...
@@ -333,10 +424,10 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
# when run the second time, only dump courses if the cache override
# is enabled
su
ccessful
,
unsuccessful
=
self
.
mss
.
dump_courses_to_neo4j
(
su
bmitted
,
__
=
self
.
mss
.
dump_courses_to_neo4j
(
mock_graph
,
override_cache
=
override_cache
)
self
.
assertEqual
(
len
(
su
ccessful
+
unsuccessful
),
expected_number_courses
)
self
.
assertEqual
(
len
(
su
bmitted
),
expected_number_courses
)
@mock.patch
(
'openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector'
)
def
test_dump_to_neo4j_published
(
self
,
mock_selector_class
):
...
...
@@ -348,17 +439,16 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
mock_selector_class
.
return_value
=
MockNodeSelector
(
mock_graph
)
# run once to warm the cache
su
ccessful
,
unsuccessful
=
self
.
mss
.
dump_courses_to_neo4j
(
mock_graph
)
self
.
assertEqual
(
len
(
su
ccessful
+
unsuccessful
),
len
(
self
.
course_strings
))
su
bmitted
,
skipped
=
self
.
mss
.
dump_courses_to_neo4j
(
mock_graph
)
self
.
assertEqual
(
len
(
su
bmitted
),
len
(
self
.
course_strings
))
# simulate one of the courses being published
listen_for_course_publish
(
None
,
self
.
course
.
id
)
# make sure only the published course was dumped
successful
,
unsuccessful
=
self
.
mss
.
dump_courses_to_neo4j
(
mock_graph
)
self
.
assertEqual
(
len
(
unsuccessful
),
0
)
self
.
assertEqual
(
len
(
successful
),
1
)
self
.
assertEqual
(
successful
[
0
],
unicode
(
self
.
course
.
id
))
submitted
,
__
=
self
.
mss
.
dump_courses_to_neo4j
(
mock_graph
)
self
.
assertEqual
(
len
(
submitted
),
1
)
self
.
assertEqual
(
submitted
[
0
],
unicode
(
self
.
course
.
id
))
@ddt.data
(
(
six
.
text_type
(
datetime
(
2016
,
3
,
30
)),
six
.
text_type
(
datetime
(
2016
,
3
,
31
)),
True
),
...
...
@@ -373,7 +463,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
Tests whether a course should be dumped given the last time it was
dumped and the last time it was published.
"""
mss
=
ModuleStoreSerializer
()
mss
=
ModuleStoreSerializer
.
create
()
mss
.
get_command_last_run
=
lambda
course_key
,
graph
:
last_command_run
mss
.
get_course_last_published
=
lambda
course_key
:
last_course_published
mock_course_key
=
mock
.
Mock
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment