Commit 49f31bd5 by Clinton Blackburn Committed by Clinton Blackburn

Added Courses API data loader

This commit adds support for loading courses and course runs from the Courses API.

ECOM-3982
parent 5a2272fc
......@@ -2,9 +2,13 @@
import abc
import logging
from dateutil.parser import parse
from edx_rest_api_client.client import EdxRestApiClient
from opaque_keys.edx.keys import CourseKey
from course_discovery.apps.course_metadata.models import Organization, Image
from course_discovery.apps.course_metadata.models import (
Organization, Image, Course, CourseRun, CourseOrganization, Video
)
logger = logging.getLogger(__name__)
......@@ -34,10 +38,35 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
""" Load data for all supported objects (e.g. courses, runs). """
pass
def clean_strings(self, data):
@classmethod
def clean_string(cls, s):
""" Removes all leading and trailing spaces. Returns None if the resulting string is empty. """
if not isinstance(s, str):
return s
return s.strip() or None
@classmethod
def clean_strings(cls, data):
""" Iterates over all string values, removing leading and trailing spaces,
and replacing empty strings with None. """
return {k: v.strip() or None for k, v in data.items() if isinstance(v, str)}
return {k: cls.clean_string(v) for k, v in data.items()}
@classmethod
def parse_date(cls, date_string):
"""
Returns a parsed date.
Args:
date_string (str): String to be parsed.
Returns:
datetime, or None
"""
if date_string:
return parse(date_string)
return None
class OrganizationsApiDataLoader(AbstractDataLoader):
......@@ -70,15 +99,106 @@ class OrganizationsApiDataLoader(AbstractDataLoader):
def update_organization(self, body):
image = None
image_url = body['logo']
if image_url:
image_url = image_url.lower()
image, __ = Image.objects.get_or_create(src=image_url)
defaults = {
'name': body['name'],
'description': body['description'],
'logo_image': image,
}
Organization.objects.update_or_create(key=body['short_name'], defaults=defaults)
class CoursesApiDataLoader(AbstractDataLoader):
""" Loads course runs from the Courses API. """
def ingest(self):
client = EdxRestApiClient(self.api_url, oauth_access_token=self.access_token)
count = None
page = 1
logger.info('Refreshing Courses and CourseRuns from %s....', self.api_url)
while page:
response = client.courses().get(page=page, page_size=self.PAGE_SIZE)
count = response['pagination']['count']
results = response['results']
logger.info('Retrieved %d course runs...', len(results))
if response['pagination']['next']:
page += 1
else:
page = None
for body in results:
body = self.clean_strings(body)
course = self.update_course(body)
self.update_course_run(course, body)
logger.info('Retrieved %d course runs from %s.', count, self.api_url)
def update_course(self, body):
# NOTE (CCB): Use the data from the CourseKey since the Course API exposes display names for org and number,
# which may not be unique for an organization.
course_run_key = CourseKey.from_string(body['id'])
organization, __ = Organization.objects.get_or_create(key=course_run_key.org)
course_key = '{org}+{course}'.format(org=organization.key, course=course_run_key.course)
defaults = {
'title': body['name']
}
course, __ = Course.objects.update_or_create(key=course_key, defaults=defaults)
course.organizations.clear()
CourseOrganization.objects.create(
course=course, organization=organization, relation_type=CourseOrganization.OWNER)
return course
def update_course_run(self, course, body):
defaults = {
'course': course,
'start': self.parse_date(body['start']),
'end': self.parse_date(body['end']),
'enrollment_start': self.parse_date(body['enrollment_start']),
'enrollment_end': self.parse_date(body['enrollment_end']),
'title': body['name'],
'short_description': body['short_description'],
'video': self.get_courserun_video(body),
'pacing_type': self.get_pacing_type(body),
'image': self.get_courserun_image(body),
}
CourseRun.objects.update_or_create(key=body['id'], defaults=defaults)
def get_pacing_type(self, body):
pacing = body.get('pacing')
if pacing:
pacing = pacing.lower()
if pacing == 'instructor':
return CourseRun.INSTRUCTOR_PACED
elif pacing == 'self':
return CourseRun.SELF_PACED
else:
return None
def get_courserun_image(self, body):
image = None
image_url = body['media'].get('image', {}).get('raw')
if image_url:
image_url = image_url.lower()
image, __ = Image.objects.get_or_create(src=image_url)
return image
def get_courserun_video(self, body):
video = None
video_url = body['media'].get('course_video', {}).get('uri')
if video_url:
video_url = video_url.lower()
video, __ = Video.objects.get_or_create(src=video_url)
return video
......@@ -3,6 +3,8 @@ from os.path import join, abspath, dirname
from sys import path
# PATH vars
from urllib.parse import urljoin
here = lambda *x: join(abspath(dirname(__file__)), *x)
PROJECT_ROOT = here("..")
root = lambda *x: join(abspath(PROJECT_ROOT), *x)
......@@ -307,6 +309,7 @@ HAYSTACK_CONNECTIONS = {
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
COURSES_API_URL = 'http://127.0.0.1:8000/api/courses/v1/'
ECOMMERCE_API_URL = 'http://127.0.0.1:8002/api/v2/'
ORGANIZATIONS_API_URL = 'http://127.0.0.1:8000/api/organizations/v0/'
......
......@@ -10,8 +10,11 @@ djangorestframework-jwt==1.7.2
django-rest-swagger[reST]==0.3.4
dry-rest-permissions==0.1.6
edx-auth-backends==0.1.3
edx-ccx-keys==0.2.0
edx-drf-extensions==0.2.0
edx-opaque-keys==0.3.0
edx-rest-api-client==1.5.0
elasticsearch>=1.0.0,<2.0.0
pycountry==1.20
python-dateutil==2.5.2
pytz==2015.7
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment