Commit 1a323089 by Clinton Blackburn Committed by GitHub

Reorganized the data loaders (#222)

These files have grown too large. I have split them up to make them a bit more manageable.

ECOM-5099
parent 5a6cbf5f
import abc
from dateutil.parser import parse
from django.utils.functional import cached_property
from edx_rest_api_client.client import EdxRestApiClient
from opaque_keys.edx.keys import CourseKey
from course_discovery.apps.core.utils import delete_orphans
from course_discovery.apps.course_metadata.models import Image, Person, Video
class AbstractDataLoader(metaclass=abc.ABCMeta):
""" Base class for all data loaders.
Attributes:
api_url (str): URL of the API from which data is loaded
partner (Partner): Partner which owns the data for this data loader
access_token (str): OAuth2 access token
PAGE_SIZE (int): Number of items to load per API call
"""
PAGE_SIZE = 50
SUPPORTED_TOKEN_TYPES = ('bearer', 'jwt',)
def __init__(self, partner, api_url, access_token=None, token_type=None):
"""
Arguments:
partner (Partner): Partner which owns the APIs and data being loaded
api_url (str): URL of the API from which data is loaded
access_token (str): OAuth2 access token
token_type (str): The type of access token passed in (e.g. Bearer, JWT)
"""
if token_type:
token_type = token_type.lower()
if token_type not in self.SUPPORTED_TOKEN_TYPES:
raise ValueError('The token type {token_type} is invalid!'.format(token_type=token_type))
self.access_token = access_token
self.token_type = token_type
self.partner = partner
self.api_url = api_url.strip('/')
@cached_property
def api_client(self):
"""
Returns an authenticated API client ready to call the API from which data is loaded.
Returns:
EdxRestApiClient
"""
kwargs = {}
if self.token_type == 'jwt':
kwargs['jwt'] = self.access_token
else:
kwargs['oauth_access_token'] = self.access_token
return EdxRestApiClient(self.api_url, **kwargs)
@abc.abstractmethod
def ingest(self): # pragma: no cover
""" Load data for all supported objects (e.g. courses, runs). """
pass
@classmethod
def clean_string(cls, s):
""" Removes all leading and trailing spaces. Returns None if the resulting string is empty. """
if not isinstance(s, str):
return s
return s.strip() or None
@classmethod
def clean_strings(cls, data):
""" Iterates over all string values, removing leading and trailing spaces,
and replacing empty strings with None. """
return {k: cls.clean_string(v) for k, v in data.items()}
@classmethod
def parse_date(cls, date_string):
"""
Returns a parsed date.
Args:
date_string (str): String to be parsed.
Returns:
datetime, or None
"""
if date_string:
return parse(date_string)
return None
@classmethod
def convert_course_run_key(cls, course_run_key_str):
"""
Given a serialized course run key, return the corresponding
serialized course key.
Args:
course_run_key_str (str): The serialized course run key.
Returns:
str
"""
course_run_key = CourseKey.from_string(course_run_key_str)
return '{org}+{course}'.format(org=course_run_key.org, course=course_run_key.course)
@classmethod
def delete_orphans(cls):
""" Remove orphaned objects from the database. """
for model in (Image, Person, Video):
delete_orphans(model)
import ddt
import responses
from edx_rest_api_client.auth import BearerAuth, SuppliedJwtAuth
from edx_rest_api_client.client import EdxRestApiClient
from course_discovery.apps.course_metadata.tests.factories import PartnerFactory
ACCESS_TOKEN = 'secret'
ACCESS_TOKEN_TYPE = 'Bearer'
@ddt.ddt
class ApiClientTestMixin(object):
@ddt.unpack
@ddt.data(
('Bearer', BearerAuth),
('JWT', SuppliedJwtAuth),
)
def test_api_client(self, token_type, expected_auth_class):
""" Verify the property returns an API client with the correct authentication. """
loader = self.loader_class(self.partner, self.api_url, ACCESS_TOKEN, token_type)
client = loader.api_client
self.assertIsInstance(client, EdxRestApiClient)
# NOTE (CCB): My initial preference was to mock the constructor and ensure the correct auth arguments
# were passed. However, that seems nearly impossible. This is the next best alternative. It is brittle, and
# may break if we ever change the underlying request class of EdxRestApiClient.
self.assertIsInstance(client._store['session'].auth, expected_auth_class) # pylint: disable=protected-access
# pylint: disable=not-callable
class DataLoaderTestMixin(object):
loader_class = None
partner = None
def setUp(self):
super(DataLoaderTestMixin, self).setUp()
self.partner = PartnerFactory()
self.loader = self.loader_class(self.partner, self.api_url, ACCESS_TOKEN, ACCESS_TOKEN_TYPE)
@property
def api_url(self): # pragma: no cover
raise NotImplementedError
def assert_api_called(self, expected_num_calls, check_auth=True):
""" Asserts the API was called with the correct number of calls, and the appropriate Authorization header. """
self.assertEqual(len(responses.calls), expected_num_calls)
if check_auth:
self.assertEqual(responses.calls[0].request.headers['Authorization'], 'Bearer {}'.format(ACCESS_TOKEN))
def test_init(self):
""" Verify the constructor sets the appropriate attributes. """
self.assertEqual(self.loader.partner.short_code, self.partner.short_code)
self.assertEqual(self.loader.access_token, ACCESS_TOKEN)
self.assertEqual(self.loader.token_type, ACCESS_TOKEN_TYPE.lower())
def test_init_with_unsupported_token_type(self):
""" Verify the constructor raises an error if an unsupported token type is passed in. """
with self.assertRaises(ValueError):
self.loader_class(self.partner, self.api_url, ACCESS_TOKEN, 'not-supported')
......@@ -4,9 +4,11 @@ from django.core.management import BaseCommand, CommandError
from edx_rest_api_client.client import EdxRestApiClient
from course_discovery.apps.core.models import Partner
from course_discovery.apps.course_metadata.data_loaders import (
CoursesApiDataLoader, DrupalApiDataLoader, OrganizationsApiDataLoader, EcommerceApiDataLoader,
ProgramsApiDataLoader, MarketingSiteDataLoader
from course_discovery.apps.course_metadata.data_loaders.api import (
CoursesApiDataLoader, OrganizationsApiDataLoader, EcommerceApiDataLoader, ProgramsApiDataLoader,
)
from course_discovery.apps.course_metadata.data_loaders.marketing_site import (
DrupalApiDataLoader, MarketingSiteDataLoader,
)
logger = logging.getLogger(__name__)
......
......@@ -7,9 +7,11 @@ from django.test import TestCase
from course_discovery.apps.core.tests.factories import PartnerFactory
from course_discovery.apps.core.tests.utils import mock_api_callback
from course_discovery.apps.course_metadata.data_loaders import (
CoursesApiDataLoader, DrupalApiDataLoader, OrganizationsApiDataLoader, EcommerceApiDataLoader,
ProgramsApiDataLoader, MarketingSiteDataLoader
from course_discovery.apps.course_metadata.data_loaders.api import (
CoursesApiDataLoader, OrganizationsApiDataLoader, EcommerceApiDataLoader, ProgramsApiDataLoader,
)
from course_discovery.apps.course_metadata.data_loaders.marketing_site import (
DrupalApiDataLoader, MarketingSiteDataLoader,
)
from course_discovery.apps.course_metadata.models import Course, CourseRun, Organization, Program
from course_discovery.apps.course_metadata.tests import mock_data
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment