Commit 9bd55913 by Bill DeRusha

Update data loaders to respect canonical_course_runs

Update data loaders to only update courses for which the course_run
is set as the canonical run for that course.

ECOM-6226
parent f2cfa6a4
...@@ -107,8 +107,20 @@ class CoursesApiDataLoader(AbstractDataLoader): ...@@ -107,8 +107,20 @@ class CoursesApiDataLoader(AbstractDataLoader):
try: try:
body = self.clean_strings(body) body = self.clean_strings(body)
course = self.update_course(body) course_run = self.get_course_run(body)
self.update_course_run(course, body)
if course_run:
self.update_course_run(course_run, body)
course = getattr(course_run, 'canonical_for_course', False)
if course:
course = self.update_course(course, body)
logger.info('Processed course with key [%s].', course.key)
else:
course, created = self.get_or_create_course(body)
course_run = self.create_course_run(course, body)
if created:
course.canonical_course_run = course_run
course.save()
except: # pylint: disable=bare-except except: # pylint: disable=bare-except
msg = 'An error occurred while updating {course_run} from {api_url}'.format( msg = 'An error occurred while updating {course_run} from {api_url}'.format(
course_run=course_run_id, course_run=course_run_id,
...@@ -116,14 +128,33 @@ class CoursesApiDataLoader(AbstractDataLoader): ...@@ -116,14 +128,33 @@ class CoursesApiDataLoader(AbstractDataLoader):
) )
logger.exception(msg) logger.exception(msg)
def update_course(self, body): def get_course_run(self, body):
course_run_key = body['id']
try:
return CourseRun.objects.get(key__iexact=course_run_key)
except CourseRun.DoesNotExist:
return None
def update_course_run(self, course_run, body):
validated_data = self.format_course_run_data(body)
self._update_instance(course_run, validated_data)
logger.info('Processed course run with UUID [%s].', course_run.uuid)
def create_course_run(self, course, body):
defaults = self.format_course_run_data(body, course=course)
return CourseRun.objects.create(**defaults)
def get_or_create_course(self, body):
course_run_key = CourseKey.from_string(body['id']) course_run_key = CourseKey.from_string(body['id'])
course_key = self.get_course_key_from_course_run_key(course_run_key) course_key = self.get_course_key_from_course_run_key(course_run_key)
defaults = self.format_course_data(body)
# We need to add the key to the defaults because django ignores kwargs with __
# separators when constructing the create request
defaults['key'] = course_key
defaults['partner'] = self.partner
defaults = {
'key': course_key,
'title': body['name'],
}
course, created = Course.objects.get_or_create(key__iexact=course_key, partner=self.partner, defaults=defaults) course, created = Course.objects.get_or_create(key__iexact=course_key, partner=self.partner, defaults=defaults)
if created: if created:
...@@ -133,16 +164,27 @@ class CoursesApiDataLoader(AbstractDataLoader): ...@@ -133,16 +164,27 @@ class CoursesApiDataLoader(AbstractDataLoader):
defaults = {'key': key} defaults = {'key': key}
organization, __ = Organization.objects.get_or_create(key__iexact=key, partner=self.partner, organization, __ = Organization.objects.get_or_create(key__iexact=key, partner=self.partner,
defaults=defaults) defaults=defaults)
course.authoring_organizations.add(organization) course.authoring_organizations.add(organization)
logger.info('Processed course with key [%s].', course_key) return (course, created)
def update_course(self, course, body):
validated_data = self.format_course_data(body)
self._update_instance(course, validated_data)
logger.info('Processed course with key [%s].', course.key)
return course return course
def update_course_run(self, course, body): def _update_instance(self, instance, validated_data):
key = body['id'] for attr, value in validated_data.items():
setattr(instance, attr, value)
instance.save()
def format_course_run_data(self, body, course=None):
defaults = { defaults = {
'key': key, 'key': body['id'],
'end': self.parse_date(body['end']), 'end': self.parse_date(body['end']),
'enrollment_start': self.parse_date(body['enrollment_start']), 'enrollment_start': self.parse_date(body['enrollment_start']),
'enrollment_end': self.parse_date(body['enrollment_end']), 'enrollment_end': self.parse_date(body['enrollment_end']),
...@@ -162,10 +204,17 @@ class CoursesApiDataLoader(AbstractDataLoader): ...@@ -162,10 +204,17 @@ class CoursesApiDataLoader(AbstractDataLoader):
'mobile_available': body.get('mobile_available') or False, 'mobile_available': body.get('mobile_available') or False,
}) })
course_run, __ = course.course_runs.update_or_create(key__iexact=key, defaults=defaults) if course:
defaults['course'] = course
return defaults
def format_course_data(self, body):
defaults = {
'title': body['name'],
}
logger.info('Processed course run with key [%s].', course_run.key) return defaults
return course_run
def get_pacing_type(self, body): def get_pacing_type(self, body):
pacing = body.get('pacing') pacing = body.get('pacing')
......
...@@ -377,73 +377,78 @@ class CourseMarketingSiteDataLoader(AbstractMarketingSiteDataLoader): ...@@ -377,73 +377,78 @@ class CourseMarketingSiteDataLoader(AbstractMarketingSiteDataLoader):
return kwargs return kwargs
def process_node(self, data): def process_node(self, data):
course_run_key = CourseKey.from_string(data['field_course_id']) course_run = self.get_course_run(data)
key = self.get_course_key_from_course_run_key(course_run_key)
# Clean the title for the course and course run if course_run:
data['field_course_course_title']['value'] = self.clean_html(data['field_course_course_title']['value']) self.update_course_run(course_run, data)
try:
course = self.update_course(course_run.canonical_for_course, data)
self.set_subjects(course, data)
self.set_authoring_organizations(course, data)
logger.info('Processed course with key [%s].', course.key)
except AttributeError:
pass
else:
course, created = self.get_or_create_course(data)
course_run = self.create_course_run(course, data)
if created:
course.canonical_course_run = course_run
course.save()
def get_course_run(self, data):
course_run_key = data['field_course_id']
try:
return CourseRun.objects.get(key__iexact=course_run_key)
except CourseRun.DoesNotExist:
return None
defaults = { def update_course_run(self, course_run, data):
'key': key, validated_data = self.format_course_run_data(data, course_run.course)
'title': self.clean_html(data['field_course_course_title']['value']), self._update_instance(course_run, validated_data)
'number': data['field_course_code'], self.set_course_run_staff(course_run, data)
'full_description': self.get_description(data), self.set_course_run_transcript_languages(course_run, data)
'video': self.get_video(data),
'short_description': self.clean_html(data['field_course_sub_title_short']),
'level_type': self.get_level_type(data['field_course_level']),
'card_image_url': self._get_nested_url(data.get('field_course_image_promoted')),
}
course, created = Course.objects.get_or_create(key__iexact=key, partner=self.partner, defaults=defaults)
# If the course already exists update the fields only if the course_run we got from drupal is published. logger.info('Processed course run with UUID [%s].', course_run.uuid)
# People often put temp data into required drupal fields for unpublished courses. We don't want to overwrite
# the course info with this data, so we only update course info from published sources.
published = self.get_course_run_status(data) == CourseRunStatus.Published
if not created and published:
for attr, value in defaults.items():
setattr(course, attr, value)
course.save()
self.set_subjects(course, data) def create_course_run(self, course, data):
self.set_authoring_organizations(course, data) defaults = self.format_course_run_data(data, course)
self.create_course_run(course, data)
logger.info('Processed course with key [%s].', key) course_run = CourseRun.objects.create(**defaults)
return course self.set_course_run_staff(course_run, data)
self.set_course_run_transcript_languages(course_run, data)
def get_description(self, data): return course_run
description = (data.get('field_course_body', {}) or {}).get('value')
description = description or (data.get('field_course_description', {}) or {}).get('value')
description = description or ''
description = self.clean_html(description)
return description
def get_course_run_status(self, data): def get_or_create_course(self, data):
return CourseRunStatus.Published if bool(int(data['status'])) else CourseRunStatus.Unpublished course_run_key = CourseKey.from_string(data['field_course_id'])
key = self.get_course_key_from_course_run_key(course_run_key)
defaults = self.format_course_data(data, key=key)
def get_level_type(self, name): course, created = Course.objects.get_or_create(key__iexact=key, partner=self.partner, defaults=defaults)
level_type = None
if name: if created:
level_type, __ = LevelType.objects.get_or_create(name=name) self.set_subjects(course, data)
self.set_authoring_organizations(course, data)
return level_type return (course, created)
def get_video(self, data): def update_course(self, course, data):
video_url = self._get_nested_url(data.get('field_course_video') or data.get('field_product_video')) validated_data = self.format_course_data(data)
image_url = self._get_nested_url(data.get('field_course_image_featured_card')) self._update_instance(course, validated_data)
return self.get_or_create_video(video_url, image_url)
def get_pacing_type(self, data): if self.get_course_run_status(data) != CourseRunStatus.Published:
self_paced = data.get('field_course_self_paced', False) logger.warning(
return CourseRunPacing.Self if self_paced else CourseRunPacing.Instructor 'Updating course [%s] with data from unpublished course_run [%s].', course.uuid, data['field_course_id']
)
def get_hidden(self, data): return course
# 'couse' [sic]. The field is misspelled on Drupal. ಠ_ಠ
hidden = data.get('field_couse_is_hidden', False)
return hidden is True
def create_course_run(self, course, data): def _update_instance(self, instance, validated_data):
for attr, value in validated_data.items():
setattr(instance, attr, value)
instance.save()
def format_course_run_data(self, data, course):
uuid = data['uuid'] uuid = data['uuid']
key = data['field_course_id'] key = data['field_course_id']
slug = data['url'].split('/')[-1] slug = data['url'].split('/')[-1]
...@@ -457,7 +462,6 @@ class CourseMarketingSiteDataLoader(AbstractMarketingSiteDataLoader): ...@@ -457,7 +462,6 @@ class CourseMarketingSiteDataLoader(AbstractMarketingSiteDataLoader):
defaults = { defaults = {
'key': key, 'key': key,
'course': course,
'uuid': uuid, 'uuid': uuid,
'title_override': self.clean_html(data['field_course_course_title']['value']), 'title_override': self.clean_html(data['field_course_course_title']['value']),
'language': language, 'language': language,
...@@ -470,6 +474,7 @@ class CourseMarketingSiteDataLoader(AbstractMarketingSiteDataLoader): ...@@ -470,6 +474,7 @@ class CourseMarketingSiteDataLoader(AbstractMarketingSiteDataLoader):
'weeks_to_complete': None, 'weeks_to_complete': None,
'mobile_available': data.get('field_course_enrollment_mobile') or False, 'mobile_available': data.get('field_course_enrollment_mobile') or False,
'video': course.video, 'video': course.video,
'course': course,
} }
if weeks_to_complete: if weeks_to_complete:
...@@ -478,18 +483,57 @@ class CourseMarketingSiteDataLoader(AbstractMarketingSiteDataLoader): ...@@ -478,18 +483,57 @@ class CourseMarketingSiteDataLoader(AbstractMarketingSiteDataLoader):
weeks_to_complete = rrule.rrule(rrule.WEEKLY, dtstart=start, until=end).count() weeks_to_complete = rrule.rrule(rrule.WEEKLY, dtstart=start, until=end).count()
defaults['weeks_to_complete'] = int(weeks_to_complete) defaults['weeks_to_complete'] = int(weeks_to_complete)
try: return defaults
course_run, __ = CourseRun.objects.update_or_create(key__iexact=key, defaults=defaults)
except TypeError:
# TODO Fix the data in Drupal (ECOM-5304)
logger.error('Multiple course runs are identified by the key [%s] or UUID [%s].', key, uuid)
return None
self.set_course_run_staff(course_run, data) def format_course_data(self, data, key=None):
self.set_course_run_transcript_languages(course_run, data) if not key:
course_run_key = CourseKey.from_string(data['field_course_id'])
key = self.get_course_key_from_course_run_key(course_run_key)
logger.info('Processed course run with UUID [%s].', uuid) defaults = {
return course_run 'key': key,
'title': self.clean_html(data['field_course_course_title']['value']),
'number': data['field_course_code'],
'full_description': self.get_description(data),
'video': self.get_video(data),
'short_description': self.clean_html(data['field_course_sub_title_short']),
'level_type': self.get_level_type(data['field_course_level']),
'card_image_url': self._get_nested_url(data.get('field_course_image_promoted')),
}
return defaults
def get_description(self, data):
description = (data.get('field_course_body', {}) or {}).get('value')
description = description or (data.get('field_course_description', {}) or {}).get('value')
description = description or ''
description = self.clean_html(description)
return description
def get_course_run_status(self, data):
return CourseRunStatus.Published if bool(int(data['status'])) else CourseRunStatus.Unpublished
def get_level_type(self, name):
level_type = None
if name:
level_type, __ = LevelType.objects.get_or_create(name=name)
return level_type
def get_video(self, data):
video_url = self._get_nested_url(data.get('field_course_video') or data.get('field_product_video'))
image_url = self._get_nested_url(data.get('field_course_image_featured_card'))
return self.get_or_create_video(video_url, image_url)
def get_pacing_type(self, data):
self_paced = data.get('field_course_self_paced', False)
return CourseRunPacing.Self if self_paced else CourseRunPacing.Instructor
def get_hidden(self, data):
# 'couse' [sic]. The field is misspelled on Drupal. ಠ_ಠ
hidden = data.get('field_couse_is_hidden', False)
return hidden is True
def _get_objects_by_uuid(self, object_type, raw_objects_data): def _get_objects_by_uuid(self, object_type, raw_objects_data):
uuids = [_object.get('uuid') for _object in raw_objects_data] uuids = [_object.get('uuid') for _object in raw_objects_data]
......
...@@ -133,8 +133,9 @@ class CoursesApiDataLoaderTests(ApiClientTestMixin, DataLoaderTestMixin, TestCas ...@@ -133,8 +133,9 @@ class CoursesApiDataLoaderTests(ApiClientTestMixin, DataLoaderTestMixin, TestCas
def api_url(self): def api_url(self):
return self.partner.courses_api_url return self.partner.courses_api_url
def mock_api(self): def mock_api(self, bodies=None):
bodies = mock_data.COURSES_API_BODIES if not bodies:
bodies = mock_data.COURSES_API_BODIES
url = self.api_url + 'courses/' url = self.api_url + 'courses/'
responses.add_callback( responses.add_callback(
responses.GET, responses.GET,
...@@ -185,6 +186,8 @@ class CoursesApiDataLoaderTests(ApiClientTestMixin, DataLoaderTestMixin, TestCas ...@@ -185,6 +186,8 @@ class CoursesApiDataLoaderTests(ApiClientTestMixin, DataLoaderTestMixin, TestCas
for field, value in expected_values.items(): for field, value in expected_values.items():
self.assertEqual(getattr(course_run, field), value, 'Field {} is invalid.'.format(field)) self.assertEqual(getattr(course_run, field), value, 'Field {} is invalid.'.format(field))
return course_run
@responses.activate @responses.activate
@ddt.data(True, False) @ddt.data(True, False)
def test_ingest(self, partner_has_marketing_site): def test_ingest(self, partner_has_marketing_site):
...@@ -227,6 +230,39 @@ class CoursesApiDataLoaderTests(ApiClientTestMixin, DataLoaderTestMixin, TestCas ...@@ -227,6 +230,39 @@ class CoursesApiDataLoaderTests(ApiClientTestMixin, DataLoaderTestMixin, TestCas
) )
mock_logger.exception.assert_called_with(msg) mock_logger.exception.assert_called_with(msg)
@responses.activate
def test_ingest_canonical(self):
""" Verify the method ingests data from the Courses API. """
self.assertEqual(Course.objects.count(), 0)
self.assertEqual(CourseRun.objects.count(), 0)
self.mock_api([
mock_data.COURSES_API_BODY_ORIGINAL,
mock_data.COURSES_API_BODY_SECOND,
mock_data.COURSES_API_BODY_UPDATED,
])
self.loader.ingest()
# Verify the CourseRun was created correctly by no errors raised
course_run_orig = CourseRun.objects.get(key=mock_data.COURSES_API_BODY_ORIGINAL['id'])
# Verify that a course has been created and set as canonical by no errors raised
course = course_run_orig.canonical_for_course
# Verify the CourseRun was created correctly by no errors raised
course_run_second = CourseRun.objects.get(key=mock_data.COURSES_API_BODY_SECOND['id'])
# Verify not set as canonical
with self.assertRaises(AttributeError):
course_run_second.canonical_for_course # pylint: disable=pointless-statement
# Verify second course not used to update course
self.assertNotEqual(mock_data.COURSES_API_BODY_SECOND['name'], course.title)
# Verify udpated canonical course used to update course
self.assertEqual(mock_data.COURSES_API_BODY_UPDATED['name'], course.title)
# Verify the updated course run updated the original course run
self.assertEqual(mock_data.COURSES_API_BODY_UPDATED['hidden'], course_run_orig.hidden)
def test_get_pacing_type_field_missing(self): def test_get_pacing_type_field_missing(self):
""" Verify the method returns None if the API response does not include a pacing field. """ """ Verify the method returns None if the API response does not include a pacing field. """
self.assertIsNone(self.loader.get_pacing_type({})) self.assertIsNone(self.loader.get_pacing_type({}))
......
...@@ -325,9 +325,7 @@ class PersonMarketingSiteDataLoaderTests(AbstractMarketingSiteDataLoaderTestMixi ...@@ -325,9 +325,7 @@ class PersonMarketingSiteDataLoaderTests(AbstractMarketingSiteDataLoaderTestMixi
@ddt.ddt @ddt.ddt
class CourseMarketingSiteDataLoaderTests(AbstractMarketingSiteDataLoaderTestMixin, TestCase): class CourseMarketingSiteDataLoaderTests(AbstractMarketingSiteDataLoaderTestMixin, TestCase):
loader_class = CourseMarketingSiteDataLoader loader_class = CourseMarketingSiteDataLoader
mocked_data = mock_data.MARKETING_SITE_API_COURSE_BODIES mocked_data = mock_data.UNIQUE_MARKETING_SITE_API_COURSE_BODIES
mocked_data.append(mock_data.MARKETING_SITE_API_UNPUBLISHED_COPY_COURSE_BODY)
mocked_data.append(mock_data.MARKETING_SITE_API_PUBLISHED_COPY_COURSE_BODY)
def _get_uuids(self, items): def _get_uuids(self, items):
return [item['uuid'] for item in items] return [item['uuid'] for item in items]
...@@ -513,6 +511,8 @@ class CourseMarketingSiteDataLoaderTests(AbstractMarketingSiteDataLoaderTestMixi ...@@ -513,6 +511,8 @@ class CourseMarketingSiteDataLoaderTests(AbstractMarketingSiteDataLoaderTestMixi
expected_transcript_languages = self.loader.get_language_tags_from_names(language_names) expected_transcript_languages = self.loader.get_language_tags_from_names(language_names)
self.assertEqual(list(course_run.transcript_languages.all()), list(expected_transcript_languages)) self.assertEqual(list(course_run.transcript_languages.all()), list(expected_transcript_languages))
return course_run
def _get_course(self, data): def _get_course(self, data):
course_run_key = CourseKey.from_string(data['field_course_id']) course_run_key = CourseKey.from_string(data['field_course_id'])
return Course.objects.get(key=self.loader.get_course_key_from_course_run_key(course_run_key), return Course.objects.get(key=self.loader.get_course_key_from_course_run_key(course_run_key),
...@@ -522,20 +522,33 @@ class CourseMarketingSiteDataLoaderTests(AbstractMarketingSiteDataLoaderTestMixi ...@@ -522,20 +522,33 @@ class CourseMarketingSiteDataLoaderTests(AbstractMarketingSiteDataLoaderTestMixi
def test_ingest(self): def test_ingest(self):
self.mock_login_response() self.mock_login_response()
data = self.mock_api() data = self.mock_api()
published_course_run_key = mock_data.MARKETING_SITE_API_PUBLISHED_COPY_COURSE_BODY['field_course_id']
self.loader.ingest() self.loader.ingest()
for datum in data: for datum in data:
self.assert_course_run_loaded(datum) self.assert_course_run_loaded(datum)
self.assert_course_loaded(datum)
@responses.activate
def test_canonical(self):
self.mocked_data = [
mock_data.ORIGINAL_MARKETING_SITE_API_COURSE_BODY,
mock_data.NEW_RUN_MARKETING_SITE_API_COURSE_BODY,
mock_data.UPDATED_MARKETING_SITE_API_COURSE_BODY,
]
self.mock_login_response()
self.mock_api()
self.loader.ingest()
course_run = self.assert_course_run_loaded(mock_data.UPDATED_MARKETING_SITE_API_COURSE_BODY)
self.assert_course_loaded(mock_data.UPDATED_MARKETING_SITE_API_COURSE_BODY)
self.assertTrue(course_run.canonical_for_course)
if datum['field_course_code'] == mock_data.MULTI_COURSE_RUN_COURSE_NUMBER: course_run = self.assert_course_run_loaded(mock_data.NEW_RUN_MARKETING_SITE_API_COURSE_BODY)
# For the original course and the unpublished course ensure course fields are not present. course = course_run.course
if datum['field_course_id'] != published_course_run_key:
self.assert_no_override_unpublished_course_fields(datum)
# For the latest published course ensure course fields match the latest saved course.
else:
self.assert_course_loaded(datum)
else: new_run_title = mock_data.NEW_RUN_MARKETING_SITE_API_COURSE_BODY['field_course_course_title']['value']
self.assert_course_loaded(datum) self.assertNotEqual(course.title, new_run_title)
with self.assertRaises(AttributeError):
course_run.canonical_for_course # pylint: disable=pointless-statement
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment