Commit 49f31bd5 by Clinton Blackburn Committed by Clinton Blackburn

Added Courses API data loader

This commit adds support for loading courses and course runs from the Courses API.

ECOM-3982
parent 5a2272fc
...@@ -2,9 +2,13 @@ ...@@ -2,9 +2,13 @@
import abc import abc
import logging import logging
from dateutil.parser import parse
from edx_rest_api_client.client import EdxRestApiClient from edx_rest_api_client.client import EdxRestApiClient
from opaque_keys.edx.keys import CourseKey
from course_discovery.apps.course_metadata.models import Organization, Image from course_discovery.apps.course_metadata.models import (
Organization, Image, Course, CourseRun, CourseOrganization, Video
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -34,10 +38,35 @@ class AbstractDataLoader(metaclass=abc.ABCMeta): ...@@ -34,10 +38,35 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
""" Load data for all supported objects (e.g. courses, runs). """ """ Load data for all supported objects (e.g. courses, runs). """
pass pass
def clean_strings(self, data): @classmethod
def clean_string(cls, s):
""" Removes all leading and trailing spaces. Returns None if the resulting string is empty. """
if not isinstance(s, str):
return s
return s.strip() or None
@classmethod
def clean_strings(cls, data):
""" Iterates over all string values, removing leading and trailing spaces, """ Iterates over all string values, removing leading and trailing spaces,
and replacing empty strings with None. """ and replacing empty strings with None. """
return {k: v.strip() or None for k, v in data.items() if isinstance(v, str)} return {k: cls.clean_string(v) for k, v in data.items()}
@classmethod
def parse_date(cls, date_string):
"""
Returns a parsed date.
Args:
date_string (str): String to be parsed.
Returns:
datetime, or None
"""
if date_string:
return parse(date_string)
return None
class OrganizationsApiDataLoader(AbstractDataLoader): class OrganizationsApiDataLoader(AbstractDataLoader):
...@@ -70,15 +99,106 @@ class OrganizationsApiDataLoader(AbstractDataLoader): ...@@ -70,15 +99,106 @@ class OrganizationsApiDataLoader(AbstractDataLoader):
def update_organization(self, body): def update_organization(self, body):
image = None image = None
image_url = body['logo'] image_url = body['logo']
if image_url: if image_url:
image_url = image_url.lower() image_url = image_url.lower()
image, __ = Image.objects.get_or_create(src=image_url) image, __ = Image.objects.get_or_create(src=image_url)
defaults = { defaults = {
'name': body['name'], 'name': body['name'],
'description': body['description'], 'description': body['description'],
'logo_image': image, 'logo_image': image,
} }
Organization.objects.update_or_create(key=body['short_name'], defaults=defaults) Organization.objects.update_or_create(key=body['short_name'], defaults=defaults)
class CoursesApiDataLoader(AbstractDataLoader):
""" Loads course runs from the Courses API. """
def ingest(self):
client = EdxRestApiClient(self.api_url, oauth_access_token=self.access_token)
count = None
page = 1
logger.info('Refreshing Courses and CourseRuns from %s....', self.api_url)
while page:
response = client.courses().get(page=page, page_size=self.PAGE_SIZE)
count = response['pagination']['count']
results = response['results']
logger.info('Retrieved %d course runs...', len(results))
if response['pagination']['next']:
page += 1
else:
page = None
for body in results:
body = self.clean_strings(body)
course = self.update_course(body)
self.update_course_run(course, body)
logger.info('Retrieved %d course runs from %s.', count, self.api_url)
def update_course(self, body):
# NOTE (CCB): Use the data from the CourseKey since the Course API exposes display names for org and number,
# which may not be unique for an organization.
course_run_key = CourseKey.from_string(body['id'])
organization, __ = Organization.objects.get_or_create(key=course_run_key.org)
course_key = '{org}+{course}'.format(org=organization.key, course=course_run_key.course)
defaults = {
'title': body['name']
}
course, __ = Course.objects.update_or_create(key=course_key, defaults=defaults)
course.organizations.clear()
CourseOrganization.objects.create(
course=course, organization=organization, relation_type=CourseOrganization.OWNER)
return course
def update_course_run(self, course, body):
defaults = {
'course': course,
'start': self.parse_date(body['start']),
'end': self.parse_date(body['end']),
'enrollment_start': self.parse_date(body['enrollment_start']),
'enrollment_end': self.parse_date(body['enrollment_end']),
'title': body['name'],
'short_description': body['short_description'],
'video': self.get_courserun_video(body),
'pacing_type': self.get_pacing_type(body),
'image': self.get_courserun_image(body),
}
CourseRun.objects.update_or_create(key=body['id'], defaults=defaults)
def get_pacing_type(self, body):
pacing = body.get('pacing')
if pacing:
pacing = pacing.lower()
if pacing == 'instructor':
return CourseRun.INSTRUCTOR_PACED
elif pacing == 'self':
return CourseRun.SELF_PACED
else:
return None
def get_courserun_image(self, body):
image = None
image_url = body['media'].get('image', {}).get('raw')
if image_url:
image_url = image_url.lower()
image, __ = Image.objects.get_or_create(src=image_url)
return image
def get_courserun_video(self, body):
video = None
video_url = body['media'].get('course_video', {}).get('uri')
if video_url:
video_url = video_url.lower()
video, __ = Video.objects.get_or_create(src=video_url)
return video
...@@ -3,6 +3,8 @@ from os.path import join, abspath, dirname ...@@ -3,6 +3,8 @@ from os.path import join, abspath, dirname
from sys import path from sys import path
# PATH vars # PATH vars
from urllib.parse import urljoin
here = lambda *x: join(abspath(dirname(__file__)), *x) here = lambda *x: join(abspath(dirname(__file__)), *x)
PROJECT_ROOT = here("..") PROJECT_ROOT = here("..")
root = lambda *x: join(abspath(PROJECT_ROOT), *x) root = lambda *x: join(abspath(PROJECT_ROOT), *x)
...@@ -307,6 +309,7 @@ HAYSTACK_CONNECTIONS = { ...@@ -307,6 +309,7 @@ HAYSTACK_CONNECTIONS = {
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor' HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
COURSES_API_URL = 'http://127.0.0.1:8000/api/courses/v1/'
ECOMMERCE_API_URL = 'http://127.0.0.1:8002/api/v2/' ECOMMERCE_API_URL = 'http://127.0.0.1:8002/api/v2/'
ORGANIZATIONS_API_URL = 'http://127.0.0.1:8000/api/organizations/v0/' ORGANIZATIONS_API_URL = 'http://127.0.0.1:8000/api/organizations/v0/'
......
...@@ -10,8 +10,11 @@ djangorestframework-jwt==1.7.2 ...@@ -10,8 +10,11 @@ djangorestframework-jwt==1.7.2
django-rest-swagger[reST]==0.3.4 django-rest-swagger[reST]==0.3.4
dry-rest-permissions==0.1.6 dry-rest-permissions==0.1.6
edx-auth-backends==0.1.3 edx-auth-backends==0.1.3
edx-ccx-keys==0.2.0
edx-drf-extensions==0.2.0 edx-drf-extensions==0.2.0
edx-opaque-keys==0.3.0
edx-rest-api-client==1.5.0 edx-rest-api-client==1.5.0
elasticsearch>=1.0.0,<2.0.0 elasticsearch>=1.0.0,<2.0.0
pycountry==1.20 pycountry==1.20
python-dateutil==2.5.2
pytz==2015.7 pytz==2015.7
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment