Commit 5e8c2ad7 by Clinton Blackburn Committed by GitHub

Added new marketing site data loader (#193)

This will eventually replace the existing Drupal data loader. A more extensive API is now used to get much more data.

ECOM-5099
parent a0dbf812
......@@ -53,7 +53,8 @@ class PartnerAdmin(admin.ModelAdmin):
}),
(_('Marketing Site Configuration'), {
'description': _('Configure the marketing site URLs that will be used to retrieve data and create URLs.'),
'fields': ('marketing_site_url_root', 'marketing_site_api_url',)
'fields': ('marketing_site_url_root', 'marketing_site_api_url', 'marketing_site_api_username',
'marketing_site_api_password',)
}),
)
list_display = ('name', 'short_code',)
......
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0009_auto_20160730_2131'),
]
operations = [
migrations.AddField(
model_name='partner',
name='marketing_site_api_password',
field=models.CharField(verbose_name='Marketing Site API Password', blank=True, max_length=255, null=True),
),
migrations.AddField(
model_name='partner',
name='marketing_site_api_username',
field=models.CharField(verbose_name='Marketing Site API Username', blank=True, max_length=255, null=True),
),
]
......@@ -70,6 +70,10 @@ class Partner(TimeStampedModel):
verbose_name=_('Marketing Site API URL'))
marketing_site_url_root = models.URLField(max_length=255, null=True, blank=True,
verbose_name=_('Marketing Site URL'))
marketing_site_api_username = models.CharField(max_length=255, null=True, blank=True,
verbose_name=_('Marketing Site API Username'))
marketing_site_api_password = models.CharField(max_length=255, null=True, blank=True,
verbose_name=_('Marketing Site API Password'))
oidc_url_root = models.CharField(max_length=255, null=True, verbose_name=_('OpenID Connect URL'))
oidc_key = models.CharField(max_length=255, null=True, verbose_name=_('OpenID Connect Key'))
oidc_secret = models.CharField(max_length=255, null=True, verbose_name=_('OpenID Connect Secret'))
......
......@@ -27,6 +27,8 @@ class PartnerFactory(factory.DjangoModelFactory):
programs_api_url = '{root}/api/programs/v1/'.format(root=FuzzyUrlRoot().fuzz())
marketing_site_api_url = '{root}/api/courses/v1/'.format(root=FuzzyUrlRoot().fuzz())
marketing_site_url_root = '{root}/'.format(root=FuzzyUrlRoot().fuzz())
marketing_site_api_username = FuzzyText().fuzz()
marketing_site_api_password = FuzzyText().fuzz()
oidc_url_root = '{root}'.format(root=FuzzyUrlRoot().fuzz())
oidc_key = FuzzyText().fuzz()
oidc_secret = FuzzyText().fuzz()
......
......@@ -2,9 +2,10 @@
import abc
import logging
from decimal import Decimal
from urllib.parse import urljoin
from urllib.parse import urljoin, urlencode
import html2text
import requests
from dateutil.parser import parse
from django.utils.functional import cached_property
from edx_rest_api_client.client import EdxRestApiClient
......@@ -33,7 +34,7 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
PAGE_SIZE = 50
SUPPORTED_TOKEN_TYPES = ('bearer', 'jwt',)
def __init__(self, partner, api_url, access_token, token_type):
def __init__(self, partner, api_url, access_token=None, token_type=None):
"""
Arguments:
partner (Partner): Partner which owns the APIs and data being loaded
......@@ -41,15 +42,16 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
access_token (str): OAuth2 access token
token_type (str): The type of access token passed in (e.g. Bearer, JWT)
"""
token_type = token_type.lower()
if token_type:
token_type = token_type.lower()
if token_type not in self.SUPPORTED_TOKEN_TYPES:
raise ValueError('The token type {token_type} is invalid!'.format(token_type=token_type))
if token_type not in self.SUPPORTED_TOKEN_TYPES:
raise ValueError('The token type {token_type} is invalid!'.format(token_type=token_type))
self.access_token = access_token
self.token_type = token_type
self.partner = partner
self.api_url = api_url
self.api_url = api_url.strip('/')
@cached_property
def api_client(self):
......@@ -573,3 +575,101 @@ class ProgramsApiDataLoader(AbstractDataLoader):
image, __ = Image.objects.update_or_create(src=image_url, defaults=defaults)
return image
class MarketingSiteDataLoader(AbstractDataLoader):
def __init__(self, partner, api_url, access_token=None, token_type=None):
super(MarketingSiteDataLoader, self).__init__(partner, api_url, access_token, token_type)
if not (self.partner.marketing_site_api_username and self.partner.marketing_site_api_password):
msg = 'Marketing Site API credentials are not properly configured for Partner [{partner}]!'.format(
partner=partner.short_code)
raise Exception(msg)
@cached_property
def api_client(self):
username = self.partner.marketing_site_api_username
# Login by posting to the login form
login_data = {
'name': username,
'pass': self.partner.marketing_site_api_password,
'form_id': 'user_login',
'op': 'Log in',
}
session = requests.Session()
login_url = '{root}/user'.format(root=self.api_url)
response = session.post(login_url, data=login_data)
expected_url = '{root}/users/{username}'.format(root=self.api_url, username=username)
if not (response.status_code == 200 and response.url == expected_url):
raise Exception('Login failed!')
return session
def ingest(self): # pragma: no cover
""" Load data for all supported objects (e.g. courses, runs). """
# TODO Ingest schools
# TODO Ingest instructors
# TODO Ingest course runs (courses)
self.retrieve_and_ingest_node_type('xseries', self.update_xseries)
def retrieve_and_ingest_node_type(self, node_type, update_method):
"""
Retrieves all nodes of the specified type, and calls `update_method` for each node.
Args:
node_type (str): Type of node to retrieve (e.g. course, xseries, school, instructor)
update_method: Method to which the retrieved data should be passed.
"""
page = 0
while page is not None and page >= 0:
kwargs = {
'type': node_type,
'max-depth': 2,
'load-entity-refs': 'subject,file,taxonomy_term,taxonomy_vocabulary,node,field_collection_item',
'page': page,
}
qs = urlencode(kwargs)
url = '{root}/node.json?{qs}'.format(root=self.api_url, qs=qs)
response = self.api_client.get(url)
status_code = response.status_code
if status_code is not 200:
msg = 'Failed to retrieve data from {url}\nStatus Code: {status}\nBody: {body}'.format(
url=url, status=status_code, body=response.content)
logger.error(msg)
raise Exception(msg)
data = response.json()
for datum in data['list']:
try:
url = datum['url']
datum = self.clean_strings(datum)
update_method(datum)
except: # pylint: disable=bare-except
logger.exception('Failed to load %s.', url)
if 'next' in data:
page += 1
else:
break
def update_xseries(self, data):
marketing_slug = data['url'].split('/')[-1]
card_image_url = data.get('field_card_image', {}).get('url')
defaults = {
'title': data['title'],
'subtitle': data.get('field_xseries_subtitle_short'),
'category': 'XSeries',
'partner': self.partner,
}
if card_image_url:
card_image, __ = Image.objects.get_or_create(src=card_image_url)
defaults['image'] = card_image
Program.objects.update_or_create(marketing_slug=marketing_slug, defaults=defaults)
......@@ -5,7 +5,8 @@ from edx_rest_api_client.client import EdxRestApiClient
from course_discovery.apps.core.models import Partner
from course_discovery.apps.course_metadata.data_loaders import (
CoursesApiDataLoader, DrupalApiDataLoader, OrganizationsApiDataLoader, EcommerceApiDataLoader, ProgramsApiDataLoader
CoursesApiDataLoader, DrupalApiDataLoader, OrganizationsApiDataLoader, EcommerceApiDataLoader,
ProgramsApiDataLoader, MarketingSiteDataLoader
)
logger = logging.getLogger(__name__)
......@@ -80,6 +81,7 @@ class Command(BaseCommand):
(partner.ecommerce_api_url, EcommerceApiDataLoader,),
(partner.marketing_site_api_url, DrupalApiDataLoader,),
(partner.programs_api_url, ProgramsApiDataLoader,),
(partner.marketing_site_url_root, MarketingSiteDataLoader,),
)
for api_url, loader_class in data_loaders:
......
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('course_metadata', '0009_auto_20160725_1751'),
]
operations = [
migrations.AlterField(
model_name='program',
name='marketing_slug',
field=models.CharField(db_index=True, blank=True, help_text='Slug used to generate links to the marketing site', max_length=255),
),
]
......@@ -490,7 +490,8 @@ class Program(TimeStampedModel):
marketing_slug = models.CharField(
help_text=_('Slug used to generate links to the marketing site'),
blank=True,
max_length=255
max_length=255,
db_index=True
)
image = models.ForeignKey(Image, default=None, null=True, blank=True)
......
......@@ -2,6 +2,8 @@
import datetime
import json
from decimal import Decimal
from urllib.parse import parse_qs
from urllib.parse import urlparse
import ddt
import mock
......@@ -15,7 +17,7 @@ from pytz import UTC
from course_discovery.apps.core.tests.utils import mock_api_callback
from course_discovery.apps.course_metadata.data_loaders import (
OrganizationsApiDataLoader, CoursesApiDataLoader, DrupalApiDataLoader, EcommerceApiDataLoader, AbstractDataLoader,
ProgramsApiDataLoader
ProgramsApiDataLoader, MarketingSiteDataLoader
)
from course_discovery.apps.course_metadata.models import (
Course, CourseOrganization, CourseRun, Image, LanguageTag, Organization, Person, Seat, Subject, Program
......@@ -66,8 +68,25 @@ class AbstractDataLoaderTest(TestCase):
self.assertFalse(instance.__class__.objects.filter(pk=instance.pk).exists()) # pylint: disable=no-member
# pylint: disable=not-callable
@ddt.ddt
class ApiClientTestMixin(object):
@ddt.unpack
@ddt.data(
('Bearer', BearerAuth),
('JWT', SuppliedJwtAuth),
)
def test_api_client(self, token_type, expected_auth_class):
""" Verify the property returns an API client with the correct authentication. """
loader = self.loader_class(self.partner, self.api_url, ACCESS_TOKEN, token_type)
client = loader.api_client
self.assertIsInstance(client, EdxRestApiClient)
# NOTE (CCB): My initial preference was to mock the constructor and ensure the correct auth arguments
# were passed. However, that seems nearly impossible. This is the next best alternative. It is brittle, and
# may break if we ever change the underlying request class of EdxRestApiClient.
self.assertIsInstance(client._store['session'].auth, expected_auth_class) # pylint: disable=protected-access
# pylint: disable=not-callable
class DataLoaderTestMixin(object):
loader_class = None
partner = None
......@@ -98,24 +117,9 @@ class DataLoaderTestMixin(object):
with self.assertRaises(ValueError):
self.loader_class(self.partner, self.api_url, ACCESS_TOKEN, 'not-supported')
@ddt.unpack
@ddt.data(
('Bearer', BearerAuth),
('JWT', SuppliedJwtAuth),
)
def test_api_client(self, token_type, expected_auth_class):
""" Verify the property returns an API client with the correct authentication. """
loader = self.loader_class(self.partner, self.api_url, ACCESS_TOKEN, token_type)
client = loader.api_client
self.assertIsInstance(client, EdxRestApiClient)
# NOTE (CCB): My initial preference was to mock the constructor and ensure the correct auth arguments
# were passed. However, that seems nearly impossible. This is the next best alternative. It is brittle, and
# may break if we ever change the underlying request class of EdxRestApiClient.
self.assertIsInstance(client._store['session'].auth, expected_auth_class) # pylint: disable=protected-access
@ddt.ddt
class OrganizationsApiDataLoaderTests(DataLoaderTestMixin, TestCase):
class OrganizationsApiDataLoaderTests(ApiClientTestMixin, DataLoaderTestMixin, TestCase):
loader_class = OrganizationsApiDataLoader
@property
......@@ -169,7 +173,7 @@ class OrganizationsApiDataLoaderTests(DataLoaderTestMixin, TestCase):
@ddt.ddt
class CoursesApiDataLoaderTests(DataLoaderTestMixin, TestCase):
class CoursesApiDataLoaderTests(ApiClientTestMixin, DataLoaderTestMixin, TestCase):
loader_class = CoursesApiDataLoader
@property
......@@ -308,7 +312,7 @@ class CoursesApiDataLoaderTests(DataLoaderTestMixin, TestCase):
@ddt.ddt
class DrupalApiDataLoaderTests(DataLoaderTestMixin, TestCase):
class DrupalApiDataLoaderTests(ApiClientTestMixin, DataLoaderTestMixin, TestCase):
loader_class = DrupalApiDataLoader
@property
......@@ -487,7 +491,7 @@ class DrupalApiDataLoaderTests(DataLoaderTestMixin, TestCase):
@ddt.ddt
class EcommerceApiDataLoaderTests(DataLoaderTestMixin, TestCase):
class EcommerceApiDataLoaderTests(ApiClientTestMixin, DataLoaderTestMixin, TestCase):
loader_class = EcommerceApiDataLoader
@property
......@@ -601,7 +605,7 @@ class EcommerceApiDataLoaderTests(DataLoaderTestMixin, TestCase):
@ddt.ddt
class ProgramsApiDataLoaderTests(DataLoaderTestMixin, TestCase):
class ProgramsApiDataLoaderTests(ApiClientTestMixin, DataLoaderTestMixin, TestCase):
loader_class = ProgramsApiDataLoader
@property
......@@ -652,10 +656,134 @@ class ProgramsApiDataLoaderTests(DataLoaderTestMixin, TestCase):
self.assert_api_called(1)
# Verify the Programs were created correctly
expected_num_programs = len(api_data)
self.assertEqual(Program.objects.count(), expected_num_programs)
self.assertEqual(Program.objects.count(), len(api_data))
for datum in api_data:
self.assert_program_loaded(datum)
self.loader.ingest()
class MarketingSiteDataLoaderTests(DataLoaderTestMixin, TestCase):
loader_class = MarketingSiteDataLoader
LOGIN_COOKIE = ('session_id', 'abc123')
@property
def api_url(self):
return self.partner.marketing_site_url_root
def mock_login_response(self, failure=False):
url = self.api_url + 'user'
landing_url = '{base}users/{username}'.format(base=self.api_url,
username=self.partner.marketing_site_api_username)
status = 500 if failure else 302
adding_headers = {}
if not failure:
adding_headers['Location'] = landing_url
responses.add(responses.POST, url, status=status, adding_headers=adding_headers)
responses.add(responses.GET, landing_url)
def mock_api_callback(self, url, data):
""" Paginate the data, one item per page. """
def request_callback(request):
count = len(data)
# Use the querystring to determine which page should be returned. Default to page 1.
# Note that the values of the dict returned by `parse_qs` are lists, hence the `[1]` default value.
qs = parse_qs(urlparse(request.path_url).query)
page = int(qs.get('page', [0])[0])
page_size = 1
body = {
'list': [data[page]]
}
if (page * page_size) < count - 1:
next_page = page + 1
next_url = '{}?page={}'.format(url, next_page)
body['next'] = next_url
return 200, {}, json.dumps(body)
return request_callback
def mock_api(self):
bodies = mock_data.MARKETING_SITE_API_XSERIES_BODIES
url = self.api_url + 'node.json'
responses.add_callback(
responses.GET,
url,
callback=self.mock_api_callback(url, bodies),
content_type=JSON
)
return bodies
def mock_api_failure(self):
url = self.api_url + 'node.json'
responses.add(responses.GET, url, status=500)
def assert_program_loaded(self, data):
marketing_slug = data['url'].split('/')[-1]
program = Program.objects.get(marketing_slug=marketing_slug)
self.assertEqual(program.title, data['title'])
self.assertEqual(program.subtitle, data.get('field_xseries_subtitle_short'))
self.assertEqual(program.category, 'XSeries')
self.assertEqual(program.partner, self.partner)
card_image_url = data.get('field_card_image', {}).get('url')
if card_image_url:
card_image = Image.objects.get(src=card_image_url)
self.assertEqual(program.image, card_image)
else:
self.assertIsNone(program.image)
def test_constructor_without_credentials(self):
""" Verify the constructor raises an exception if the Partner has no marketing site credentials set. """
self.partner.marketing_site_api_username = None
with self.assertRaises(Exception):
self.loader_class(self.partner, self.api_url)
@responses.activate
def test_api_client_login_failure(self):
self.mock_login_response(failure=True)
with self.assertRaises(Exception):
self.loader.api_client # pylint: disable=pointless-statement
@responses.activate
def test_ingest(self):
self.mock_login_response()
api_data = self.mock_api()
self.assertEqual(Program.objects.count(), 0)
self.loader.ingest()
for datum in api_data:
self.assert_program_loaded(datum)
@responses.activate
def test_ingest_with_api_failure(self):
self.mock_login_response()
self.mock_api_failure()
with self.assertRaises(Exception):
self.loader.ingest()
@responses.activate
def test_ingest_exception_handling(self):
""" Verify the data loader properly handles exceptions during processing of the data from the API. """
self.mock_login_response()
api_data = self.mock_api()
with mock.patch.object(self.loader, 'clean_strings', side_effect=Exception):
with mock.patch('course_discovery.apps.course_metadata.data_loaders.logger') as mock_logger:
self.loader.ingest()
self.assertEqual(mock_logger.exception.call_count, len(api_data))
calls = [mock.call('Failed to load %s.', datum['url']) for datum in api_data]
mock_logger.exception.assert_has_calls(calls)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment