Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
course-discovery
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
course-discovery
Commits
fe5f29da
Commit
fe5f29da
authored
Apr 05, 2016
by
Peter Fogg
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Ingest data from Drupal.
ECOM-3983
parent
39214f4d
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
340 additions
and
12 deletions
+340
-12
course_discovery/apps/course_metadata/data_loaders.py
+106
-4
course_discovery/apps/course_metadata/management/commands/refresh_course_metadata.py
+6
-1
course_discovery/apps/course_metadata/migrations/0002_auto_20160406_1644.py
+24
-0
course_discovery/apps/course_metadata/models.py
+1
-0
course_discovery/apps/course_metadata/tests/test_data_loaders.py
+200
-7
course_discovery/settings/base.py
+2
-0
requirements/base.txt
+1
-0
No files found.
course_discovery/apps/course_metadata/data_loaders.py
View file @
fe5f29da
""" Data loaders. """
import
abc
import
logging
from
urllib.parse
import
urljoin
from
dateutil.parser
import
parse
from
django.conf
import
settings
from
edx_rest_api_client.client
import
EdxRestApiClient
import
html2text
from
opaque_keys.edx.keys
import
CourseKey
from
course_discovery.apps.course_metadata.models
import
(
Organization
,
Image
,
Course
,
CourseRun
,
CourseOrganization
,
Video
Course
,
CourseOrganization
,
CourseRun
,
Image
,
LanguageTag
,
LevelType
,
Organization
,
Subject
,
Video
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -24,7 +27,7 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
PAGE_SIZE
=
50
def
__init__
(
self
,
api_url
,
access_token
):
def
__init__
(
self
,
api_url
,
access_token
=
None
):
"""
Arguments:
api_url (str): URL of the API from which data is loaded
...
...
@@ -68,6 +71,21 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
return
None
@classmethod
def
convert_course_run_key
(
cls
,
course_run_key_str
):
"""
Given a serialized course run key, return the corresponding
serialized course key.
Args:
course_run_key_str (str): The serialized course run key.
Returns:
str
"""
course_run_key
=
CourseKey
.
from_string
(
course_run_key_str
)
return
'{org}+{course}'
.
format
(
org
=
course_run_key
.
org
,
course
=
course_run_key
.
course
)
class
OrganizationsApiDataLoader
(
AbstractDataLoader
):
""" Loads organizations from the Organizations API. """
...
...
@@ -141,9 +159,10 @@ class CoursesApiDataLoader(AbstractDataLoader):
def
update_course
(
self
,
body
):
# NOTE (CCB): Use the data from the CourseKey since the Course API exposes display names for org and number,
# which may not be unique for an organization.
course_run_key
=
CourseKey
.
from_string
(
body
[
'id'
])
course_run_key_str
=
body
[
'id'
]
course_run_key
=
CourseKey
.
from_string
(
course_run_key_str
)
organization
,
__
=
Organization
.
objects
.
get_or_create
(
key
=
course_run_key
.
org
)
course_key
=
'{org}+{course}'
.
format
(
org
=
organization
.
key
,
course
=
course_run_key
.
course
)
course_key
=
self
.
convert_course_run_key
(
course_run_key_str
)
defaults
=
{
'title'
:
body
[
'name'
]
}
...
...
@@ -202,3 +221,86 @@ class CoursesApiDataLoader(AbstractDataLoader):
video
,
__
=
Video
.
objects
.
get_or_create
(
src
=
video_url
)
return
video
class
DrupalApiDataLoader
(
AbstractDataLoader
):
"""Loads course runs from the Drupal API."""
def
ingest
(
self
):
client
=
EdxRestApiClient
(
self
.
api_url
)
logger
.
info
(
'Refreshing Courses and CourseRuns from
%
s...'
,
self
.
api_url
)
response
=
client
.
courses
.
get
()
data
=
response
[
'items'
]
logger
.
info
(
'Retrieved
%
d course runs...'
,
len
(
data
))
for
body
in
data
:
cleaned_body
=
self
.
clean_strings
(
body
)
course
=
self
.
update_course
(
cleaned_body
)
self
.
update_course_run
(
course
,
cleaned_body
)
logger
.
info
(
'Retrieved
%
d course runs from
%
s.'
,
len
(
data
),
self
.
api_url
)
def
update_course
(
self
,
body
):
"""Create or update a course from Drupal data given by `body`."""
course_key
=
self
.
convert_course_run_key
(
body
[
'course_id'
])
try
:
course
=
Course
.
objects
.
get
(
key
=
course_key
)
except
Course
.
DoesNotExist
:
logger
.
warning
(
'Course not find course [
%
s]'
,
course_key
)
return
None
course
.
full_description
=
self
.
clean_html
(
body
[
'description'
])
course
.
short_description
=
self
.
clean_html
(
body
[
'subtitle'
])
course
.
marketing_url
=
urljoin
(
settings
.
MARKETING_URL_ROOT
,
body
[
'course_about_uri'
])
level_type
,
__
=
LevelType
.
objects
.
get_or_create
(
name
=
body
[
'level'
][
'title'
])
course
.
level_type
=
level_type
self
.
set_subjects
(
course
,
body
)
course
.
save
()
return
course
def
set_subjects
(
self
,
course
,
body
):
"""Update `course` with subjects from `body`."""
course
.
subjects
.
clear
()
subjects
=
(
s
[
'title'
]
for
s
in
body
[
'subjects'
])
for
subject_name
in
subjects
:
# Normalize subject names with title case
subject
,
__
=
Subject
.
objects
.
get_or_create
(
name
=
subject_name
.
title
())
course
.
subjects
.
add
(
subject
)
def
update_course_run
(
self
,
course
,
body
):
"""
Create or update a run of `course` from Drupal data given by `body`.
"""
course_run_key
=
body
[
'course_id'
]
try
:
course_run
=
CourseRun
.
objects
.
get
(
key
=
course_run_key
)
except
CourseRun
.
DoesNotExist
:
logger
.
warning
(
'Could not find course run [
%
s]'
,
course_run_key
)
return
None
course_run
.
language
=
self
.
get_language_tag
(
body
)
course_run
.
course
=
course
course_run
.
save
()
return
course_run
def
get_language_tag
(
self
,
body
):
"""Get a language tag from Drupal data given by `body`."""
iso_code
=
body
[
'current_language'
]
if
iso_code
is
None
:
return
None
try
:
return
LanguageTag
.
objects
.
get
(
code
=
iso_code
)
except
LanguageTag
.
DoesNotExist
:
logger
.
warning
(
'Could not find language with ISO code [
%
s].'
,
iso_code
)
return
None
def
clean_html
(
self
,
content
):
"""Cleans HTML from a string and returns a Markdown version."""
stripped
=
content
.
replace
(
' '
,
''
)
html_converter
=
html2text
.
HTML2Text
()
html_converter
.
wrap_links
=
False
html_converter
.
body_width
=
None
return
html_converter
.
handle
(
stripped
)
.
strip
()
course_discovery/apps/course_metadata/management/commands/refresh_course_metadata.py
View file @
fe5f29da
...
...
@@ -4,7 +4,11 @@ from django.conf import settings
from
django.core.management
import
BaseCommand
from
edx_rest_api_client.client
import
EdxRestApiClient
from
course_discovery.apps.course_metadata.data_loaders
import
OrganizationsApiDataLoader
,
CoursesApiDataLoader
from
course_discovery.apps.course_metadata.data_loaders
import
(
CoursesApiDataLoader
,
DrupalApiDataLoader
,
OrganizationsApiDataLoader
,
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -39,3 +43,4 @@ class Command(BaseCommand):
OrganizationsApiDataLoader
(
settings
.
ORGANIZATIONS_API_URL
,
access_token
)
.
ingest
()
CoursesApiDataLoader
(
settings
.
COURSES_API_URL
,
access_token
)
.
ingest
()
DrupalApiDataLoader
(
settings
.
MARKETING_API_URL
)
.
ingest
()
course_discovery/apps/course_metadata/migrations/0002_auto_20160406_1644.py
0 → 100644
View file @
fe5f29da
# -*- coding: utf-8 -*-
from
__future__
import
unicode_literals
from
django.db
import
migrations
,
models
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'course_metadata'
,
'0001_initial'
),
]
operations
=
[
migrations
.
AddField
(
model_name
=
'course'
,
name
=
'marketing_url'
,
field
=
models
.
URLField
(
null
=
True
,
max_length
=
255
,
blank
=
True
),
),
migrations
.
AddField
(
model_name
=
'historicalcourse'
,
name
=
'marketing_url'
,
field
=
models
.
URLField
(
null
=
True
,
max_length
=
255
,
blank
=
True
),
),
]
course_discovery/apps/course_metadata/models.py
View file @
fe5f29da
...
...
@@ -127,6 +127,7 @@ class Course(TimeStampedModel):
expected_learning_items
=
SortedManyToManyField
(
ExpectedLearningItem
,
blank
=
True
)
image
=
models
.
ForeignKey
(
Image
,
default
=
None
,
null
=
True
,
blank
=
True
)
video
=
models
.
ForeignKey
(
Video
,
default
=
None
,
null
=
True
,
blank
=
True
)
marketing_url
=
models
.
URLField
(
max_length
=
255
,
null
=
True
,
blank
=
True
)
history
=
HistoricalRecords
()
...
...
course_discovery/apps/course_metadata/tests/test_data_loaders.py
View file @
fe5f29da
This diff is collapsed.
Click to expand it.
course_discovery/settings/base.py
View file @
fe5f29da
...
...
@@ -312,6 +312,8 @@ HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
COURSES_API_URL
=
'http://127.0.0.1:8000/api/courses/v1/'
ECOMMERCE_API_URL
=
'http://127.0.0.1:8002/api/v2/'
ORGANIZATIONS_API_URL
=
'http://127.0.0.1:8000/api/organizations/v0/'
MARKETING_API_URL
=
'http://example.org/api/catalog/v2/'
MARKETING_URL_ROOT
=
'http://example.org/'
EDX_DRF_EXTENSIONS
=
{
'OAUTH2_USER_INFO_URL'
:
'http://localhost:8000/oauth2/user_info'
,
...
...
requirements/base.txt
View file @
fe5f29da
...
...
@@ -15,6 +15,7 @@ edx-drf-extensions==0.2.0
edx-opaque-keys==0.3.0
edx-rest-api-client==1.5.0
elasticsearch>=1.0.0,<2.0.0
html2text==2016.4.2
pycountry==1.20
python-dateutil==2.5.2
pytz==2015.7
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment