Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
course-discovery
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
course-discovery
Commits
fe5f29da
Commit
fe5f29da
authored
Apr 05, 2016
by
Peter Fogg
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Ingest data from Drupal.
ECOM-3983
parent
39214f4d
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
340 additions
and
12 deletions
+340
-12
course_discovery/apps/course_metadata/data_loaders.py
+106
-4
course_discovery/apps/course_metadata/management/commands/refresh_course_metadata.py
+6
-1
course_discovery/apps/course_metadata/migrations/0002_auto_20160406_1644.py
+24
-0
course_discovery/apps/course_metadata/models.py
+1
-0
course_discovery/apps/course_metadata/tests/test_data_loaders.py
+200
-7
course_discovery/settings/base.py
+2
-0
requirements/base.txt
+1
-0
No files found.
course_discovery/apps/course_metadata/data_loaders.py
View file @
fe5f29da
""" Data loaders. """
import
abc
import
logging
from
urllib.parse
import
urljoin
from
dateutil.parser
import
parse
from
django.conf
import
settings
from
edx_rest_api_client.client
import
EdxRestApiClient
import
html2text
from
opaque_keys.edx.keys
import
CourseKey
from
course_discovery.apps.course_metadata.models
import
(
Organization
,
Image
,
Course
,
CourseRun
,
CourseOrganization
,
Video
Course
,
CourseOrganization
,
CourseRun
,
Image
,
LanguageTag
,
LevelType
,
Organization
,
Subject
,
Video
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -24,7 +27,7 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
PAGE_SIZE
=
50
def
__init__
(
self
,
api_url
,
access_token
):
def
__init__
(
self
,
api_url
,
access_token
=
None
):
"""
Arguments:
api_url (str): URL of the API from which data is loaded
...
...
@@ -68,6 +71,21 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
return
None
@classmethod
def
convert_course_run_key
(
cls
,
course_run_key_str
):
"""
Given a serialized course run key, return the corresponding
serialized course key.
Args:
course_run_key_str (str): The serialized course run key.
Returns:
str
"""
course_run_key
=
CourseKey
.
from_string
(
course_run_key_str
)
return
'{org}+{course}'
.
format
(
org
=
course_run_key
.
org
,
course
=
course_run_key
.
course
)
class
OrganizationsApiDataLoader
(
AbstractDataLoader
):
""" Loads organizations from the Organizations API. """
...
...
@@ -141,9 +159,10 @@ class CoursesApiDataLoader(AbstractDataLoader):
def
update_course
(
self
,
body
):
# NOTE (CCB): Use the data from the CourseKey since the Course API exposes display names for org and number,
# which may not be unique for an organization.
course_run_key
=
CourseKey
.
from_string
(
body
[
'id'
])
course_run_key_str
=
body
[
'id'
]
course_run_key
=
CourseKey
.
from_string
(
course_run_key_str
)
organization
,
__
=
Organization
.
objects
.
get_or_create
(
key
=
course_run_key
.
org
)
course_key
=
'{org}+{course}'
.
format
(
org
=
organization
.
key
,
course
=
course_run_key
.
course
)
course_key
=
self
.
convert_course_run_key
(
course_run_key_str
)
defaults
=
{
'title'
:
body
[
'name'
]
}
...
...
@@ -202,3 +221,86 @@ class CoursesApiDataLoader(AbstractDataLoader):
video
,
__
=
Video
.
objects
.
get_or_create
(
src
=
video_url
)
return
video
class
DrupalApiDataLoader
(
AbstractDataLoader
):
"""Loads course runs from the Drupal API."""
def
ingest
(
self
):
client
=
EdxRestApiClient
(
self
.
api_url
)
logger
.
info
(
'Refreshing Courses and CourseRuns from
%
s...'
,
self
.
api_url
)
response
=
client
.
courses
.
get
()
data
=
response
[
'items'
]
logger
.
info
(
'Retrieved
%
d course runs...'
,
len
(
data
))
for
body
in
data
:
cleaned_body
=
self
.
clean_strings
(
body
)
course
=
self
.
update_course
(
cleaned_body
)
self
.
update_course_run
(
course
,
cleaned_body
)
logger
.
info
(
'Retrieved
%
d course runs from
%
s.'
,
len
(
data
),
self
.
api_url
)
def
update_course
(
self
,
body
):
"""Create or update a course from Drupal data given by `body`."""
course_key
=
self
.
convert_course_run_key
(
body
[
'course_id'
])
try
:
course
=
Course
.
objects
.
get
(
key
=
course_key
)
except
Course
.
DoesNotExist
:
logger
.
warning
(
'Course not find course [
%
s]'
,
course_key
)
return
None
course
.
full_description
=
self
.
clean_html
(
body
[
'description'
])
course
.
short_description
=
self
.
clean_html
(
body
[
'subtitle'
])
course
.
marketing_url
=
urljoin
(
settings
.
MARKETING_URL_ROOT
,
body
[
'course_about_uri'
])
level_type
,
__
=
LevelType
.
objects
.
get_or_create
(
name
=
body
[
'level'
][
'title'
])
course
.
level_type
=
level_type
self
.
set_subjects
(
course
,
body
)
course
.
save
()
return
course
def
set_subjects
(
self
,
course
,
body
):
"""Update `course` with subjects from `body`."""
course
.
subjects
.
clear
()
subjects
=
(
s
[
'title'
]
for
s
in
body
[
'subjects'
])
for
subject_name
in
subjects
:
# Normalize subject names with title case
subject
,
__
=
Subject
.
objects
.
get_or_create
(
name
=
subject_name
.
title
())
course
.
subjects
.
add
(
subject
)
def
update_course_run
(
self
,
course
,
body
):
"""
Create or update a run of `course` from Drupal data given by `body`.
"""
course_run_key
=
body
[
'course_id'
]
try
:
course_run
=
CourseRun
.
objects
.
get
(
key
=
course_run_key
)
except
CourseRun
.
DoesNotExist
:
logger
.
warning
(
'Could not find course run [
%
s]'
,
course_run_key
)
return
None
course_run
.
language
=
self
.
get_language_tag
(
body
)
course_run
.
course
=
course
course_run
.
save
()
return
course_run
def
get_language_tag
(
self
,
body
):
"""Get a language tag from Drupal data given by `body`."""
iso_code
=
body
[
'current_language'
]
if
iso_code
is
None
:
return
None
try
:
return
LanguageTag
.
objects
.
get
(
code
=
iso_code
)
except
LanguageTag
.
DoesNotExist
:
logger
.
warning
(
'Could not find language with ISO code [
%
s].'
,
iso_code
)
return
None
def
clean_html
(
self
,
content
):
"""Cleans HTML from a string and returns a Markdown version."""
stripped
=
content
.
replace
(
' '
,
''
)
html_converter
=
html2text
.
HTML2Text
()
html_converter
.
wrap_links
=
False
html_converter
.
body_width
=
None
return
html_converter
.
handle
(
stripped
)
.
strip
()
course_discovery/apps/course_metadata/management/commands/refresh_course_metadata.py
View file @
fe5f29da
...
...
@@ -4,7 +4,11 @@ from django.conf import settings
from
django.core.management
import
BaseCommand
from
edx_rest_api_client.client
import
EdxRestApiClient
from
course_discovery.apps.course_metadata.data_loaders
import
OrganizationsApiDataLoader
,
CoursesApiDataLoader
from
course_discovery.apps.course_metadata.data_loaders
import
(
CoursesApiDataLoader
,
DrupalApiDataLoader
,
OrganizationsApiDataLoader
,
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -39,3 +43,4 @@ class Command(BaseCommand):
OrganizationsApiDataLoader
(
settings
.
ORGANIZATIONS_API_URL
,
access_token
)
.
ingest
()
CoursesApiDataLoader
(
settings
.
COURSES_API_URL
,
access_token
)
.
ingest
()
DrupalApiDataLoader
(
settings
.
MARKETING_API_URL
)
.
ingest
()
course_discovery/apps/course_metadata/migrations/0002_auto_20160406_1644.py
0 → 100644
View file @
fe5f29da
# -*- coding: utf-8 -*-
from
__future__
import
unicode_literals
from
django.db
import
migrations
,
models
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'course_metadata'
,
'0001_initial'
),
]
operations
=
[
migrations
.
AddField
(
model_name
=
'course'
,
name
=
'marketing_url'
,
field
=
models
.
URLField
(
null
=
True
,
max_length
=
255
,
blank
=
True
),
),
migrations
.
AddField
(
model_name
=
'historicalcourse'
,
name
=
'marketing_url'
,
field
=
models
.
URLField
(
null
=
True
,
max_length
=
255
,
blank
=
True
),
),
]
course_discovery/apps/course_metadata/models.py
View file @
fe5f29da
...
...
@@ -127,6 +127,7 @@ class Course(TimeStampedModel):
expected_learning_items
=
SortedManyToManyField
(
ExpectedLearningItem
,
blank
=
True
)
image
=
models
.
ForeignKey
(
Image
,
default
=
None
,
null
=
True
,
blank
=
True
)
video
=
models
.
ForeignKey
(
Video
,
default
=
None
,
null
=
True
,
blank
=
True
)
marketing_url
=
models
.
URLField
(
max_length
=
255
,
null
=
True
,
blank
=
True
)
history
=
HistoricalRecords
()
...
...
course_discovery/apps/course_metadata/tests/test_data_loaders.py
View file @
fe5f29da
""" Tests for data loaders. """
import
datetime
import
json
from
urllib.parse
import
parse_qs
,
urlparse
from
urllib.parse
import
parse_qs
,
urlparse
,
urljoin
import
ddt
import
responses
from
django.conf
import
settings
from
django.test
import
TestCase
,
override_settings
from
opaque_keys.edx.keys
import
CourseKey
from
course_discovery.apps.course_metadata.data_loaders
import
OrganizationsApiDataLoader
,
CoursesApiDataLoader
,
\
AbstractDataLoader
from
course_discovery.apps.course_metadata.models
import
Organization
,
Image
,
Course
,
CourseRun
from
course_discovery.apps.course_metadata.data_loaders
import
(
OrganizationsApiDataLoader
,
CoursesApiDataLoader
,
AbstractDataLoader
,
DrupalApiDataLoader
)
from
course_discovery.apps.course_metadata.models
import
(
Course
,
CourseRun
,
Image
,
LanguageTag
,
Organization
,
Subject
)
ACCESS_TOKEN
=
'secret'
COURSES_API_URL
=
'https://lms.example.com/api/courses/v1'
ORGANIZATIONS_API_URL
=
'https://lms.example.com/api/organizations/v0'
MARKETING_API_URL
=
'https://example.com/api/catalog/v2/'
JSON
=
'application/json'
...
...
@@ -53,10 +58,11 @@ class DataLoaderTestMixin(object):
super
(
DataLoaderTestMixin
,
self
)
.
setUp
()
self
.
loader
=
self
.
loader_class
(
self
.
api_url
,
ACCESS_TOKEN
)
# pylint: disable=not-callable
def
assert_api_called
(
self
,
expected_num_calls
):
def
assert_api_called
(
self
,
expected_num_calls
,
check_auth
=
True
):
""" Asserts the API was called with the correct number of calls, and the appropriate Authorization header. """
self
.
assertEqual
(
len
(
responses
.
calls
),
expected_num_calls
)
self
.
assertEqual
(
responses
.
calls
[
0
]
.
request
.
headers
[
'Authorization'
],
'Bearer {}'
.
format
(
ACCESS_TOKEN
))
if
check_auth
:
self
.
assertEqual
(
responses
.
calls
[
0
]
.
request
.
headers
[
'Authorization'
],
'Bearer {}'
.
format
(
ACCESS_TOKEN
))
def
test_init
(
self
):
""" Verify the constructor sets the appropriate attributes. """
...
...
@@ -287,7 +293,7 @@ class CoursesApiDataLoaderTests(DataLoaderTestMixin, TestCase):
expected_num_course_runs
=
len
(
data
)
self
.
assert_api_called
(
expected_num_course_runs
)
# Verify the
Organizatio
ns were created correctly
# Verify the
CourseRu
ns were created correctly
self
.
assertEqual
(
CourseRun
.
objects
.
count
(),
expected_num_course_runs
)
for
datum
in
data
:
...
...
@@ -350,3 +356,190 @@ class CoursesApiDataLoaderTests(DataLoaderTestMixin, TestCase):
self
.
assertEqual
(
actual
.
src
,
expected_video_src
)
else
:
self
.
assertIsNone
(
actual
)
@override_settings
(
MARKETING_API_URL
=
MARKETING_API_URL
)
@ddt.ddt
class
DrupalApiDataLoaderTests
(
DataLoaderTestMixin
,
TestCase
):
EXISTING_COURSE_AND_RUN_DATA
=
({
'course_run_key'
:
'course-v1:SC+BreadX+3T2015'
,
'course_key'
:
'SC+BreadX'
,
'title'
:
'Bread Baking 101'
,
'current_language'
:
'en-us'
,
},
{
'course_run_key'
:
'course-v1:TX+T201+3T2015'
,
'course_key'
:
'TX+T201'
,
'title'
:
'Testing 201'
,
'current_language'
:
''
})
# A course which exists, but has no associated runs
EXISTING_COURSE
=
{
'course_key'
:
'PartialX+P102'
,
'title'
:
'A partial course'
,
}
api_url
=
MARKETING_API_URL
loader_class
=
DrupalApiDataLoader
def
setUp
(
self
):
super
(
DrupalApiDataLoaderTests
,
self
)
.
setUp
()
for
course_dict
in
self
.
EXISTING_COURSE_AND_RUN_DATA
:
course
=
Course
.
objects
.
create
(
key
=
course_dict
[
'course_key'
],
title
=
course_dict
[
'title'
])
CourseRun
.
objects
.
create
(
key
=
course_dict
[
'course_run_key'
],
language
=
self
.
loader
.
get_language_tag
(
course_dict
),
course
=
course
)
Course
.
objects
.
create
(
key
=
self
.
EXISTING_COURSE
[
'course_key'
],
title
=
self
.
EXISTING_COURSE
[
'title'
])
def
mock_api
(
self
):
"""Mock out the Drupal API. Returns a list of mocked-out course runs."""
body
=
{
'items'
:
[{
'title'
:
self
.
EXISTING_COURSE_AND_RUN_DATA
[
0
][
'title'
],
'level'
:
{
'title'
:
'Introductory'
,
},
'course_about_uri'
:
'/course/bread-baking-101'
,
'course_id'
:
self
.
EXISTING_COURSE_AND_RUN_DATA
[
0
][
'course_run_key'
],
'subjects'
:
[{
'title'
:
'Bread baking'
,
}],
'current_language'
:
self
.
EXISTING_COURSE_AND_RUN_DATA
[
0
][
'current_language'
],
'subtitle'
:
'Learn about Bread'
,
'description'
:
'<p><b>Bread</b> is a <a href="/wiki/Staple_food" title="Staple food">staple food</a>.'
,
},
{
'title'
:
self
.
EXISTING_COURSE_AND_RUN_DATA
[
1
][
'title'
],
'level'
:
{
'title'
:
'Intermediate'
,
},
'course_about_uri'
:
'/course/testing-201'
,
'course_id'
:
self
.
EXISTING_COURSE_AND_RUN_DATA
[
1
][
'course_run_key'
],
'subjects'
:
[{
'title'
:
'testing'
,
}],
'current_language'
:
self
.
EXISTING_COURSE_AND_RUN_DATA
[
1
][
'current_language'
],
'subtitle'
:
'Testing 201'
,
'description'
:
"how to test better"
,
},
{
# Create a course which exists in LMS/Otto, but without course runs
'title'
:
self
.
EXISTING_COURSE
[
'title'
],
'level'
:
{
'title'
:
'Advanced'
,
},
'course_about_uri'
:
'/course/partial-101'
,
'course_id'
:
'course-v1:{course_key}+run'
.
format
(
course_key
=
self
.
EXISTING_COURSE
[
'course_key'
]),
'subjects'
:
[{
'title'
:
'partially fake'
,
}],
'current_language'
:
'en-us'
,
'subtitle'
:
'Nope'
,
'description'
:
'what is fake?'
,
},
{
# Create a fake course run which doesn't exist in LMS/Otto
'title'
:
'A partial course'
,
'level'
:
{
'title'
:
'Advanced'
,
},
'course_about_uri'
:
'/course/partial-101'
,
'course_id'
:
'course-v1:fakeX+fake+reallyfake'
,
'subjects'
:
[{
'title'
:
'seriously fake'
,
}],
'current_language'
:
'en-us'
,
'subtitle'
:
'Nope'
,
'description'
:
'what is real?'
,
}]
}
responses
.
add
(
responses
.
GET
,
settings
.
MARKETING_API_URL
+
'courses/'
,
body
=
json
.
dumps
(
body
),
status
=
200
,
content_type
=
'application/json'
)
return
body
[
'items'
]
def
assert_course_run_loaded
(
self
,
body
):
"""
Verify that the course run corresponding to `body` has been saved
correctly.
"""
course_run_key_str
=
body
[
'course_id'
]
course_run_key
=
CourseKey
.
from_string
(
course_run_key_str
)
course_key
=
'{org}+{course}'
.
format
(
org
=
course_run_key
.
org
,
course
=
course_run_key
.
course
)
course
=
Course
.
objects
.
get
(
key
=
course_key
)
course_run
=
CourseRun
.
objects
.
get
(
key
=
course_run_key_str
)
self
.
assertEqual
(
course_run
.
course
,
course
)
self
.
assert_course_loaded
(
course
,
body
)
if
course_run
.
language
:
self
.
assertEqual
(
course_run
.
language
.
code
,
body
[
'current_language'
])
else
:
self
.
assertEqual
(
body
[
'current_language'
],
''
)
def
assert_course_loaded
(
self
,
course
,
body
):
"""Verify that the course has been loaded correctly."""
self
.
assertEqual
(
course
.
title
,
body
[
'title'
])
self
.
assertEqual
(
course
.
full_description
,
self
.
loader
.
clean_html
(
body
[
'description'
]))
self
.
assertEqual
(
course
.
short_description
,
self
.
loader
.
clean_html
(
body
[
'subtitle'
]))
self
.
assertEqual
(
course
.
marketing_url
,
urljoin
(
settings
.
MARKETING_URL_ROOT
,
body
[
'course_about_uri'
]))
self
.
assertEqual
(
course
.
level_type
.
name
,
body
[
'level'
][
'title'
])
self
.
assert_subjects_loaded
(
course
,
body
)
def
assert_subjects_loaded
(
self
,
course
,
body
):
"""Verify that subjects have been loaded correctly."""
course_subjects
=
course
.
subjects
.
all
()
api_subjects
=
body
[
'subjects'
]
self
.
assertEqual
(
len
(
course_subjects
),
len
(
api_subjects
))
for
api_subject
in
api_subjects
:
loaded_subject
=
Subject
.
objects
.
get
(
name
=
api_subject
[
'title'
]
.
title
())
self
.
assertIn
(
loaded_subject
,
course_subjects
)
@responses.activate
def
test_ingest
(
self
):
"""Verify the data loader ingests data from Drupal."""
data
=
self
.
mock_api
()
# The faked course should not be loaded from Drupal
loaded_data
=
data
[:
-
2
]
self
.
loader
.
ingest
()
# Drupal does not paginate its response or check authorization
self
.
assert_api_called
(
1
,
check_auth
=
False
)
# Assert that the fake course was not created
self
.
assertEqual
(
CourseRun
.
objects
.
count
(),
len
(
loaded_data
))
for
datum
in
loaded_data
:
self
.
assert_course_run_loaded
(
datum
)
Course
.
objects
.
get
(
key
=
self
.
EXISTING_COURSE
[
'course_key'
],
title
=
self
.
EXISTING_COURSE
[
'title'
])
@ddt.data
(
(
''
,
''
),
(
'<h1>foo</h1>'
,
'# foo'
),
(
'<a href="http://example.com">link</a>'
,
'[link](http://example.com)'
),
(
'<strong>foo</strong>'
,
'**foo**'
),
(
'<em>foo</em>'
,
'_foo_'
),
(
'
\n
foo
\n
'
,
'foo'
),
(
'<span>foo</span>'
,
'foo'
),
(
'<div>foo</div>'
,
'foo'
),
)
@ddt.unpack
def
test_clean_html
(
self
,
to_clean
,
expected
):
self
.
assertEqual
(
self
.
loader
.
clean_html
(
to_clean
),
expected
)
@ddt.data
(
({
'current_language'
:
''
},
None
),
({
'current_language'
:
'not-real'
},
None
),
({
'current_language'
:
'en-us'
},
LanguageTag
(
code
=
'en-us'
,
name
=
'English - United States'
)),
({
'current_language'
:
None
},
None
),
)
@ddt.unpack
def
test_get_language_tag
(
self
,
body
,
expected
):
self
.
assertEqual
(
self
.
loader
.
get_language_tag
(
body
),
expected
)
course_discovery/settings/base.py
View file @
fe5f29da
...
...
@@ -312,6 +312,8 @@ HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
COURSES_API_URL
=
'http://127.0.0.1:8000/api/courses/v1/'
ECOMMERCE_API_URL
=
'http://127.0.0.1:8002/api/v2/'
ORGANIZATIONS_API_URL
=
'http://127.0.0.1:8000/api/organizations/v0/'
MARKETING_API_URL
=
'http://example.org/api/catalog/v2/'
MARKETING_URL_ROOT
=
'http://example.org/'
EDX_DRF_EXTENSIONS
=
{
'OAUTH2_USER_INFO_URL'
:
'http://localhost:8000/oauth2/user_info'
,
...
...
requirements/base.txt
View file @
fe5f29da
...
...
@@ -15,6 +15,7 @@ edx-drf-extensions==0.2.0
edx-opaque-keys==0.3.0
edx-rest-api-client==1.5.0
elasticsearch>=1.0.0,<2.0.0
html2text==2016.4.2
pycountry==1.20
python-dateutil==2.5.2
pytz==2015.7
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment