Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
course-discovery
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
course-discovery
Commits
fe5f29da
Commit
fe5f29da
authored
Apr 05, 2016
by
Peter Fogg
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Ingest data from Drupal.
ECOM-3983
parent
39214f4d
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
340 additions
and
12 deletions
+340
-12
course_discovery/apps/course_metadata/data_loaders.py
+106
-4
course_discovery/apps/course_metadata/management/commands/refresh_course_metadata.py
+6
-1
course_discovery/apps/course_metadata/migrations/0002_auto_20160406_1644.py
+24
-0
course_discovery/apps/course_metadata/models.py
+1
-0
course_discovery/apps/course_metadata/tests/test_data_loaders.py
+200
-7
course_discovery/settings/base.py
+2
-0
requirements/base.txt
+1
-0
No files found.
course_discovery/apps/course_metadata/data_loaders.py
View file @
fe5f29da
""" Data loaders. """
""" Data loaders. """
import
abc
import
abc
import
logging
import
logging
from
urllib.parse
import
urljoin
from
dateutil.parser
import
parse
from
dateutil.parser
import
parse
from
django.conf
import
settings
from
edx_rest_api_client.client
import
EdxRestApiClient
from
edx_rest_api_client.client
import
EdxRestApiClient
import
html2text
from
opaque_keys.edx.keys
import
CourseKey
from
opaque_keys.edx.keys
import
CourseKey
from
course_discovery.apps.course_metadata.models
import
(
from
course_discovery.apps.course_metadata.models
import
(
Organization
,
Image
,
Course
,
CourseRun
,
CourseOrganization
,
Video
Course
,
CourseOrganization
,
CourseRun
,
Image
,
LanguageTag
,
LevelType
,
Organization
,
Subject
,
Video
)
)
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -24,7 +27,7 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
...
@@ -24,7 +27,7 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
PAGE_SIZE
=
50
PAGE_SIZE
=
50
def
__init__
(
self
,
api_url
,
access_token
):
def
__init__
(
self
,
api_url
,
access_token
=
None
):
"""
"""
Arguments:
Arguments:
api_url (str): URL of the API from which data is loaded
api_url (str): URL of the API from which data is loaded
...
@@ -68,6 +71,21 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
...
@@ -68,6 +71,21 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
return
None
return
None
@classmethod
def
convert_course_run_key
(
cls
,
course_run_key_str
):
"""
Given a serialized course run key, return the corresponding
serialized course key.
Args:
course_run_key_str (str): The serialized course run key.
Returns:
str
"""
course_run_key
=
CourseKey
.
from_string
(
course_run_key_str
)
return
'{org}+{course}'
.
format
(
org
=
course_run_key
.
org
,
course
=
course_run_key
.
course
)
class
OrganizationsApiDataLoader
(
AbstractDataLoader
):
class
OrganizationsApiDataLoader
(
AbstractDataLoader
):
""" Loads organizations from the Organizations API. """
""" Loads organizations from the Organizations API. """
...
@@ -141,9 +159,10 @@ class CoursesApiDataLoader(AbstractDataLoader):
...
@@ -141,9 +159,10 @@ class CoursesApiDataLoader(AbstractDataLoader):
def
update_course
(
self
,
body
):
def
update_course
(
self
,
body
):
# NOTE (CCB): Use the data from the CourseKey since the Course API exposes display names for org and number,
# NOTE (CCB): Use the data from the CourseKey since the Course API exposes display names for org and number,
# which may not be unique for an organization.
# which may not be unique for an organization.
course_run_key
=
CourseKey
.
from_string
(
body
[
'id'
])
course_run_key_str
=
body
[
'id'
]
course_run_key
=
CourseKey
.
from_string
(
course_run_key_str
)
organization
,
__
=
Organization
.
objects
.
get_or_create
(
key
=
course_run_key
.
org
)
organization
,
__
=
Organization
.
objects
.
get_or_create
(
key
=
course_run_key
.
org
)
course_key
=
'{org}+{course}'
.
format
(
org
=
organization
.
key
,
course
=
course_run_key
.
course
)
course_key
=
self
.
convert_course_run_key
(
course_run_key_str
)
defaults
=
{
defaults
=
{
'title'
:
body
[
'name'
]
'title'
:
body
[
'name'
]
}
}
...
@@ -202,3 +221,86 @@ class CoursesApiDataLoader(AbstractDataLoader):
...
@@ -202,3 +221,86 @@ class CoursesApiDataLoader(AbstractDataLoader):
video
,
__
=
Video
.
objects
.
get_or_create
(
src
=
video_url
)
video
,
__
=
Video
.
objects
.
get_or_create
(
src
=
video_url
)
return
video
return
video
class
DrupalApiDataLoader
(
AbstractDataLoader
):
"""Loads course runs from the Drupal API."""
def
ingest
(
self
):
client
=
EdxRestApiClient
(
self
.
api_url
)
logger
.
info
(
'Refreshing Courses and CourseRuns from
%
s...'
,
self
.
api_url
)
response
=
client
.
courses
.
get
()
data
=
response
[
'items'
]
logger
.
info
(
'Retrieved
%
d course runs...'
,
len
(
data
))
for
body
in
data
:
cleaned_body
=
self
.
clean_strings
(
body
)
course
=
self
.
update_course
(
cleaned_body
)
self
.
update_course_run
(
course
,
cleaned_body
)
logger
.
info
(
'Retrieved
%
d course runs from
%
s.'
,
len
(
data
),
self
.
api_url
)
def
update_course
(
self
,
body
):
"""Create or update a course from Drupal data given by `body`."""
course_key
=
self
.
convert_course_run_key
(
body
[
'course_id'
])
try
:
course
=
Course
.
objects
.
get
(
key
=
course_key
)
except
Course
.
DoesNotExist
:
logger
.
warning
(
'Course not find course [
%
s]'
,
course_key
)
return
None
course
.
full_description
=
self
.
clean_html
(
body
[
'description'
])
course
.
short_description
=
self
.
clean_html
(
body
[
'subtitle'
])
course
.
marketing_url
=
urljoin
(
settings
.
MARKETING_URL_ROOT
,
body
[
'course_about_uri'
])
level_type
,
__
=
LevelType
.
objects
.
get_or_create
(
name
=
body
[
'level'
][
'title'
])
course
.
level_type
=
level_type
self
.
set_subjects
(
course
,
body
)
course
.
save
()
return
course
def
set_subjects
(
self
,
course
,
body
):
"""Update `course` with subjects from `body`."""
course
.
subjects
.
clear
()
subjects
=
(
s
[
'title'
]
for
s
in
body
[
'subjects'
])
for
subject_name
in
subjects
:
# Normalize subject names with title case
subject
,
__
=
Subject
.
objects
.
get_or_create
(
name
=
subject_name
.
title
())
course
.
subjects
.
add
(
subject
)
def
update_course_run
(
self
,
course
,
body
):
"""
Create or update a run of `course` from Drupal data given by `body`.
"""
course_run_key
=
body
[
'course_id'
]
try
:
course_run
=
CourseRun
.
objects
.
get
(
key
=
course_run_key
)
except
CourseRun
.
DoesNotExist
:
logger
.
warning
(
'Could not find course run [
%
s]'
,
course_run_key
)
return
None
course_run
.
language
=
self
.
get_language_tag
(
body
)
course_run
.
course
=
course
course_run
.
save
()
return
course_run
def
get_language_tag
(
self
,
body
):
"""Get a language tag from Drupal data given by `body`."""
iso_code
=
body
[
'current_language'
]
if
iso_code
is
None
:
return
None
try
:
return
LanguageTag
.
objects
.
get
(
code
=
iso_code
)
except
LanguageTag
.
DoesNotExist
:
logger
.
warning
(
'Could not find language with ISO code [
%
s].'
,
iso_code
)
return
None
def
clean_html
(
self
,
content
):
"""Cleans HTML from a string and returns a Markdown version."""
stripped
=
content
.
replace
(
' '
,
''
)
html_converter
=
html2text
.
HTML2Text
()
html_converter
.
wrap_links
=
False
html_converter
.
body_width
=
None
return
html_converter
.
handle
(
stripped
)
.
strip
()
course_discovery/apps/course_metadata/management/commands/refresh_course_metadata.py
View file @
fe5f29da
...
@@ -4,7 +4,11 @@ from django.conf import settings
...
@@ -4,7 +4,11 @@ from django.conf import settings
from
django.core.management
import
BaseCommand
from
django.core.management
import
BaseCommand
from
edx_rest_api_client.client
import
EdxRestApiClient
from
edx_rest_api_client.client
import
EdxRestApiClient
from
course_discovery.apps.course_metadata.data_loaders
import
OrganizationsApiDataLoader
,
CoursesApiDataLoader
from
course_discovery.apps.course_metadata.data_loaders
import
(
CoursesApiDataLoader
,
DrupalApiDataLoader
,
OrganizationsApiDataLoader
,
)
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -39,3 +43,4 @@ class Command(BaseCommand):
...
@@ -39,3 +43,4 @@ class Command(BaseCommand):
OrganizationsApiDataLoader
(
settings
.
ORGANIZATIONS_API_URL
,
access_token
)
.
ingest
()
OrganizationsApiDataLoader
(
settings
.
ORGANIZATIONS_API_URL
,
access_token
)
.
ingest
()
CoursesApiDataLoader
(
settings
.
COURSES_API_URL
,
access_token
)
.
ingest
()
CoursesApiDataLoader
(
settings
.
COURSES_API_URL
,
access_token
)
.
ingest
()
DrupalApiDataLoader
(
settings
.
MARKETING_API_URL
)
.
ingest
()
course_discovery/apps/course_metadata/migrations/0002_auto_20160406_1644.py
0 → 100644
View file @
fe5f29da
# -*- coding: utf-8 -*-
from
__future__
import
unicode_literals
from
django.db
import
migrations
,
models
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'course_metadata'
,
'0001_initial'
),
]
operations
=
[
migrations
.
AddField
(
model_name
=
'course'
,
name
=
'marketing_url'
,
field
=
models
.
URLField
(
null
=
True
,
max_length
=
255
,
blank
=
True
),
),
migrations
.
AddField
(
model_name
=
'historicalcourse'
,
name
=
'marketing_url'
,
field
=
models
.
URLField
(
null
=
True
,
max_length
=
255
,
blank
=
True
),
),
]
course_discovery/apps/course_metadata/models.py
View file @
fe5f29da
...
@@ -127,6 +127,7 @@ class Course(TimeStampedModel):
...
@@ -127,6 +127,7 @@ class Course(TimeStampedModel):
expected_learning_items
=
SortedManyToManyField
(
ExpectedLearningItem
,
blank
=
True
)
expected_learning_items
=
SortedManyToManyField
(
ExpectedLearningItem
,
blank
=
True
)
image
=
models
.
ForeignKey
(
Image
,
default
=
None
,
null
=
True
,
blank
=
True
)
image
=
models
.
ForeignKey
(
Image
,
default
=
None
,
null
=
True
,
blank
=
True
)
video
=
models
.
ForeignKey
(
Video
,
default
=
None
,
null
=
True
,
blank
=
True
)
video
=
models
.
ForeignKey
(
Video
,
default
=
None
,
null
=
True
,
blank
=
True
)
marketing_url
=
models
.
URLField
(
max_length
=
255
,
null
=
True
,
blank
=
True
)
history
=
HistoricalRecords
()
history
=
HistoricalRecords
()
...
...
course_discovery/apps/course_metadata/tests/test_data_loaders.py
View file @
fe5f29da
This diff is collapsed.
Click to expand it.
course_discovery/settings/base.py
View file @
fe5f29da
...
@@ -312,6 +312,8 @@ HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
...
@@ -312,6 +312,8 @@ HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
COURSES_API_URL
=
'http://127.0.0.1:8000/api/courses/v1/'
COURSES_API_URL
=
'http://127.0.0.1:8000/api/courses/v1/'
ECOMMERCE_API_URL
=
'http://127.0.0.1:8002/api/v2/'
ECOMMERCE_API_URL
=
'http://127.0.0.1:8002/api/v2/'
ORGANIZATIONS_API_URL
=
'http://127.0.0.1:8000/api/organizations/v0/'
ORGANIZATIONS_API_URL
=
'http://127.0.0.1:8000/api/organizations/v0/'
MARKETING_API_URL
=
'http://example.org/api/catalog/v2/'
MARKETING_URL_ROOT
=
'http://example.org/'
EDX_DRF_EXTENSIONS
=
{
EDX_DRF_EXTENSIONS
=
{
'OAUTH2_USER_INFO_URL'
:
'http://localhost:8000/oauth2/user_info'
,
'OAUTH2_USER_INFO_URL'
:
'http://localhost:8000/oauth2/user_info'
,
...
...
requirements/base.txt
View file @
fe5f29da
...
@@ -15,6 +15,7 @@ edx-drf-extensions==0.2.0
...
@@ -15,6 +15,7 @@ edx-drf-extensions==0.2.0
edx-opaque-keys==0.3.0
edx-opaque-keys==0.3.0
edx-rest-api-client==1.5.0
edx-rest-api-client==1.5.0
elasticsearch>=1.0.0,<2.0.0
elasticsearch>=1.0.0,<2.0.0
html2text==2016.4.2
pycountry==1.20
pycountry==1.20
python-dateutil==2.5.2
python-dateutil==2.5.2
pytz==2015.7
pytz==2015.7
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment