Commit 5d289dc5 by Clinton Blackburn Committed by Clinton Blackburn

Deleting orphaned media and people models after loading data

ECOM-4266
parent 5def6946
from django.db import models
from django.test import TestCase
from course_discovery.apps.core.utils import get_all_related_field_names
class UnrelatedModel(models.Model):
class Meta:
app_label = 'core'
managed = False
class RelatedModel(models.Model):
class Meta:
app_label = 'core'
managed = False
class ForeignRelatedModel(models.Model):
fk = models.ForeignKey(RelatedModel)
class Meta:
app_label = 'core'
managed = False
class M2MRelatedModel(models.Model):
m2m = models.ManyToManyField(RelatedModel)
class Meta:
app_label = 'core'
managed = False
class ModelUtilTests(TestCase):
def test_get_all_related_field_names(self):
""" Verify the method returns the names of all relational fields for a model. """
self.assertEqual(get_all_related_field_names(UnrelatedModel), [])
self.assertEqual(set(get_all_related_field_names(RelatedModel)), {'foreignrelatedmodel', 'm2mrelatedmodel'})
......@@ -28,3 +28,33 @@ class ElasticsearchUtils(object):
}
es.indices.update_aliases(body)
logger.info('...alias updated.')
def get_all_related_field_names(model):
"""
Returns the names of all related fields (e.g. ForeignKey ManyToMany)
Args:
model (Model): Model whose field names should be returned
Returns:
list[str]
"""
fields = model._meta._get_fields(forward=False) # pylint: disable=protected-access
names = set([field.name for field in fields])
return list(names)
def delete_orphans(model):
"""
Deletes all instances of the given model with no relationships to other models.
Args:
model (Model): Model whose instances should be deleted
Returns:
None
"""
field_names = get_all_related_field_names(model)
kwargs = {'{0}__isnull'.format(field_name): True for field_name in field_names}
model.objects.filter(**kwargs).delete()
......@@ -4,13 +4,14 @@ import logging
from decimal import Decimal
from urllib.parse import urljoin
import html2text
from dateutil.parser import parse
from django.conf import settings
from edx_rest_api_client.client import EdxRestApiClient
import html2text
from opaque_keys.edx.keys import CourseKey
from course_discovery.apps.core.models import Currency
from course_discovery.apps.core.utils import delete_orphans
from course_discovery.apps.course_metadata.models import (
Course, CourseOrganization, CourseRun, Image, LanguageTag, LevelType, Organization, Person, Seat, Subject, Video
)
......@@ -88,6 +89,12 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
course_run_key = CourseKey.from_string(course_run_key_str)
return '{org}+{course}'.format(org=course_run_key.org, course=course_run_key.course)
@classmethod
def delete_orphans(cls):
""" Remove orphaned objects from the database. """
for model in (Image, Person, Video):
delete_orphans(model)
class OrganizationsApiDataLoader(AbstractDataLoader):
""" Loads organizations from the Organizations API. """
......@@ -116,6 +123,8 @@ class OrganizationsApiDataLoader(AbstractDataLoader):
logger.info('Retrieved %d organizations from %s.', count, self.api_url)
self.delete_orphans()
def update_organization(self, body):
image = None
image_url = body['logo']
......@@ -157,6 +166,8 @@ class CoursesApiDataLoader(AbstractDataLoader):
logger.info('Retrieved %d course runs from %s.', count, self.api_url)
self.delete_orphans()
def update_course(self, body):
# NOTE (CCB): Use the data from the CourseKey since the Course API exposes display names for org and number,
# which may not be unique for an organization.
......@@ -238,9 +249,10 @@ class DrupalApiDataLoader(AbstractDataLoader):
course = self.update_course(cleaned_body)
self.update_course_run(course, cleaned_body)
# Clean up orphan data on the end of many-to-many relationships
Person.objects.filter(courses_staffed=None).delete()
# Clean Organizations separately from other orphaned instances to avoid removing all orgnaziations
# after an initial data load on an empty table.
Organization.objects.filter(courseorganization__isnull=True).delete()
self.delete_orphans()
logger.info('Retrieved %d course runs from %s.', len(data), self.api_url)
......@@ -370,6 +382,8 @@ class EcommerceApiDataLoader(AbstractDataLoader):
logger.info('Retrieved %d course seats from %s.', count, self.api_url)
self.delete_orphans()
def update_seats(self, body):
course_run_key = body['id']
try:
......
......@@ -17,7 +17,9 @@ from course_discovery.apps.course_metadata.data_loaders import (
from course_discovery.apps.course_metadata.models import (
Course, CourseOrganization, CourseRun, Image, LanguageTag, Organization, Person, Seat, Subject
)
from course_discovery.apps.course_metadata.tests.factories import CourseRunFactory, SeatFactory
from course_discovery.apps.course_metadata.tests.factories import (
CourseRunFactory, SeatFactory, ImageFactory, PersonFactory, VideoFactory
)
ACCESS_TOKEN = 'secret'
COURSES_API_URL = 'https://lms.example.com/api/courses/v1'
......@@ -53,6 +55,14 @@ class AbstractDataLoaderTest(TestCase):
dt = datetime.datetime.utcnow()
self.assertEqual(AbstractDataLoader.parse_date(dt.isoformat()), dt)
def test_delete_orphans(self):
""" Verify the delete_orphans method deletes orphaned instances. """
instances = (ImageFactory(), PersonFactory(), VideoFactory(),)
AbstractDataLoader.delete_orphans()
for instance in instances:
self.assertFalse(instance.__class__.objects.filter(pk=instance.pk).exists()) # pylint: disable=no-member
class DataLoaderTestMixin(object):
api_url = None
......@@ -371,18 +381,20 @@ class CoursesApiDataLoaderTests(DataLoaderTestMixin, TestCase):
@override_settings(MARKETING_API_URL=MARKETING_API_URL)
@ddt.ddt
class DrupalApiDataLoaderTests(DataLoaderTestMixin, TestCase):
EXISTING_COURSE_AND_RUN_DATA = ({
'course_run_key': 'course-v1:SC+BreadX+3T2015',
'course_key': 'SC+BreadX',
'title': 'Bread Baking 101',
'current_language': 'en-us',
}, {
'course_run_key': 'course-v1:TX+T201+3T2015',
'course_key': 'TX+T201',
'title': 'Testing 201',
'current_language': ''
})
EXISTING_COURSE_AND_RUN_DATA = (
{
'course_run_key': 'course-v1:SC+BreadX+3T2015',
'course_key': 'SC+BreadX',
'title': 'Bread Baking 101',
'current_language': 'en-us',
},
{
'course_run_key': 'course-v1:TX+T201+3T2015',
'course_key': 'TX+T201',
'title': 'Testing 201',
'current_language': ''
}
)
# A course which exists, but has no associated runs
EXISTING_COURSE = {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment