Commit 6434b37a by Clinton Blackburn

Merge pull request #48 from edx/clintonb/data-loaders

Restored LMS Data Loaders
parents 6c6f49f0 accd69eb
""" Data loaders. """
import abc
import logging
from dateutil.parser import parse
from edx_rest_api_client.client import EdxRestApiClient
from opaque_keys.edx.keys import CourseKey
from course_discovery.apps.course_metadata.models import (
Organization, Image, Course, CourseRun, CourseOrganization, Video
)
logger = logging.getLogger(__name__)
class AbstractDataLoader(metaclass=abc.ABCMeta):
""" Base class for all data loaders.
Attributes:
api_url (str): URL of the API from which data is loaded
access_token (str): OAuth2 access token
PAGE_SIZE (int): Number of items to load per API call
"""
PAGE_SIZE = 50
def __init__(self, api_url, access_token):
"""
Arguments:
api_url (str): URL of the API from which data is loaded
access_token (str): OAuth2 access token
"""
self.access_token = access_token
self.api_url = api_url
@abc.abstractmethod
def ingest(self): # pragma: no cover
""" Load data for all supported objects (e.g. courses, runs). """
pass
@classmethod
def clean_string(cls, s):
""" Removes all leading and trailing spaces. Returns None if the resulting string is empty. """
if not isinstance(s, str):
return s
return s.strip() or None
@classmethod
def clean_strings(cls, data):
""" Iterates over all string values, removing leading and trailing spaces,
and replacing empty strings with None. """
return {k: cls.clean_string(v) for k, v in data.items()}
@classmethod
def parse_date(cls, date_string):
"""
Returns a parsed date.
Args:
date_string (str): String to be parsed.
Returns:
datetime, or None
"""
if date_string:
return parse(date_string)
return None
class OrganizationsApiDataLoader(AbstractDataLoader):
""" Loads organizations from the Organizations API. """
def ingest(self):
client = EdxRestApiClient(self.api_url, oauth_access_token=self.access_token)
count = None
page = 1
logger.info('Refreshing Organizations from %s....', self.api_url)
while page:
response = client.organizations().get(page=page, page_size=self.PAGE_SIZE)
count = response['count']
results = response['results']
logger.info('Retrieved %d organizations...', len(results))
if response['next']:
page += 1
else:
page = None
for body in results:
body = self.clean_strings(body)
self.update_organization(body)
logger.info('Retrieved %d organizations from %s.', count, self.api_url)
def update_organization(self, body):
image = None
image_url = body['logo']
if image_url:
image_url = image_url.lower()
image, __ = Image.objects.get_or_create(src=image_url)
defaults = {
'name': body['name'],
'description': body['description'],
'logo_image': image,
}
Organization.objects.update_or_create(key=body['short_name'], defaults=defaults)
class CoursesApiDataLoader(AbstractDataLoader):
""" Loads course runs from the Courses API. """
def ingest(self):
client = EdxRestApiClient(self.api_url, oauth_access_token=self.access_token)
count = None
page = 1
logger.info('Refreshing Courses and CourseRuns from %s....', self.api_url)
while page:
response = client.courses().get(page=page, page_size=self.PAGE_SIZE)
count = response['pagination']['count']
results = response['results']
logger.info('Retrieved %d course runs...', len(results))
if response['pagination']['next']:
page += 1
else:
page = None
for body in results:
body = self.clean_strings(body)
course = self.update_course(body)
self.update_course_run(course, body)
logger.info('Retrieved %d course runs from %s.', count, self.api_url)
def update_course(self, body):
# NOTE (CCB): Use the data from the CourseKey since the Course API exposes display names for org and number,
# which may not be unique for an organization.
course_run_key = CourseKey.from_string(body['id'])
organization, __ = Organization.objects.get_or_create(key=course_run_key.org)
course_key = '{org}+{course}'.format(org=organization.key, course=course_run_key.course)
defaults = {
'title': body['name']
}
course, __ = Course.objects.update_or_create(key=course_key, defaults=defaults)
course.organizations.clear()
CourseOrganization.objects.create(
course=course, organization=organization, relation_type=CourseOrganization.OWNER)
return course
def update_course_run(self, course, body):
defaults = {
'course': course,
'start': self.parse_date(body['start']),
'end': self.parse_date(body['end']),
'enrollment_start': self.parse_date(body['enrollment_start']),
'enrollment_end': self.parse_date(body['enrollment_end']),
'title': body['name'],
'short_description': body['short_description'],
'video': self.get_courserun_video(body),
'pacing_type': self.get_pacing_type(body),
'image': self.get_courserun_image(body),
}
CourseRun.objects.update_or_create(key=body['id'], defaults=defaults)
def get_pacing_type(self, body):
pacing = body.get('pacing')
if pacing:
pacing = pacing.lower()
if pacing == 'instructor':
return CourseRun.INSTRUCTOR_PACED
elif pacing == 'self':
return CourseRun.SELF_PACED
else:
return None
def get_courserun_image(self, body):
image = None
image_url = body['media'].get('image', {}).get('raw')
if image_url:
image_url = image_url.lower()
image, __ = Image.objects.get_or_create(src=image_url)
return image
def get_courserun_video(self, body):
video = None
video_url = body['media'].get('course_video', {}).get('uri')
if video_url:
video_url = video_url.lower()
video, __ = Video.objects.get_or_create(src=video_url)
return video
import logging
from django.conf import settings
from django.core.management import BaseCommand
from edx_rest_api_client.client import EdxRestApiClient
from course_discovery.apps.course_metadata.data_loaders import OrganizationsApiDataLoader, CoursesApiDataLoader
logger = logging.getLogger(__name__)
class Command(BaseCommand):
help = 'Refresh course metadata from external sources.'
def add_arguments(self, parser):
parser.add_argument(
'--access_token',
action='store',
dest='access_token',
default=None,
help='OAuth2 access token used to authenticate API calls.'
)
def handle(self, *args, **options):
access_token = options.get('access_token')
if not access_token:
logger.info('No access token provided. Retrieving access token using client_credential flow...')
try:
access_token, __ = EdxRestApiClient.get_oauth_access_token(
'{root}/access_token'.format(root=settings.SOCIAL_AUTH_EDX_OIDC_URL_ROOT),
settings.SOCIAL_AUTH_EDX_OIDC_KEY,
settings.SOCIAL_AUTH_EDX_OIDC_SECRET
)
except Exception:
logger.exception('No access token provided or acquired through client_credential flow.')
raise
OrganizationsApiDataLoader(settings.ORGANIZATIONS_API_URL, access_token).ingest()
CoursesApiDataLoader(settings.COURSES_API_URL, access_token).ingest()
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('course_metadata', '0003_auto_20160404_1734'),
]
operations = [
migrations.AlterField(
model_name='video',
name='image',
field=models.ForeignKey(to='course_metadata.Image', null=True, blank=True),
),
]
...@@ -51,7 +51,7 @@ class Image(AbstractMediaModel): ...@@ -51,7 +51,7 @@ class Image(AbstractMediaModel):
class Video(AbstractMediaModel): class Video(AbstractMediaModel):
""" Video model. """ """ Video model. """
image = models.ForeignKey(Image) image = models.ForeignKey(Image, null=True, blank=True)
class LevelType(AbstractNamedModel): class LevelType(AbstractNamedModel):
......
...@@ -3,6 +3,8 @@ from os.path import join, abspath, dirname ...@@ -3,6 +3,8 @@ from os.path import join, abspath, dirname
from sys import path from sys import path
# PATH vars # PATH vars
from urllib.parse import urljoin
here = lambda *x: join(abspath(dirname(__file__)), *x) here = lambda *x: join(abspath(dirname(__file__)), *x)
PROJECT_ROOT = here("..") PROJECT_ROOT = here("..")
root = lambda *x: join(abspath(PROJECT_ROOT), *x) root = lambda *x: join(abspath(PROJECT_ROOT), *x)
...@@ -231,6 +233,16 @@ LOGGING = { ...@@ -231,6 +233,16 @@ LOGGING = {
'propagate': True, 'propagate': True,
'level': 'WARNING' 'level': 'WARNING'
}, },
'elasticsearch': {
'handlers': ['console'],
'propagate': True,
'level': 'WARNING'
},
'urllib3': {
'handlers': ['console'],
'propagate': True,
'level': 'WARNING'
},
'django.request': { 'django.request': {
'handlers': ['console'], 'handlers': ['console'],
'propagate': True, 'propagate': True,
...@@ -297,9 +309,9 @@ HAYSTACK_CONNECTIONS = { ...@@ -297,9 +309,9 @@ HAYSTACK_CONNECTIONS = {
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor' HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
# TODO Replace with None and document. COURSES_API_URL = 'http://127.0.0.1:8000/api/courses/v1/'
ECOMMERCE_API_URL = 'https://ecommerce.stage.edx.org/api/v2/' ECOMMERCE_API_URL = 'http://127.0.0.1:8002/api/v2/'
COURSES_API_URL = 'https://courses.stage.edx.org/api/courses/v1/' ORGANIZATIONS_API_URL = 'http://127.0.0.1:8000/api/organizations/v0/'
EDX_DRF_EXTENSIONS = { EDX_DRF_EXTENSIONS = {
'OAUTH2_USER_INFO_URL': 'http://localhost:8000/oauth2/user_info', 'OAUTH2_USER_INFO_URL': 'http://localhost:8000/oauth2/user_info',
......
...@@ -58,8 +58,6 @@ ENABLE_AUTO_AUTH = True ...@@ -58,8 +58,6 @@ ENABLE_AUTO_AUTH = True
JWT_AUTH['JWT_SECRET_KEY'] = 'course-discovery-jwt-secret-key' JWT_AUTH['JWT_SECRET_KEY'] = 'course-discovery-jwt-secret-key'
ECOMMERCE_API_URL = 'http://localhost:8002/api/v2/'
COURSES_API_URL = 'http://localhost:8000/api/courses/v1/'
##################################################################### #####################################################################
# Lastly, see if the developer has any local overrides. # Lastly, see if the developer has any local overrides.
......
...@@ -10,8 +10,11 @@ djangorestframework-jwt==1.7.2 ...@@ -10,8 +10,11 @@ djangorestframework-jwt==1.7.2
django-rest-swagger[reST]==0.3.4 django-rest-swagger[reST]==0.3.4
dry-rest-permissions==0.1.6 dry-rest-permissions==0.1.6
edx-auth-backends==0.1.3 edx-auth-backends==0.1.3
edx-ccx-keys==0.2.0
edx-drf-extensions==0.2.0 edx-drf-extensions==0.2.0
edx-opaque-keys==0.3.0
edx-rest-api-client==1.5.0 edx-rest-api-client==1.5.0
elasticsearch>=1.0.0,<2.0.0 elasticsearch>=1.0.0,<2.0.0
pycountry==1.20 pycountry==1.20
python-dateutil==2.5.2
pytz==2015.7 pytz==2015.7
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment