Commit a03d7675 by Clinton Blackburn

Merge pull request #79 from edx/clintonb/remove-orphans

Deleting orphaned media and people models after loading data
parents 5def6946 5d289dc5
from django.db import models
from django.test import TestCase
from course_discovery.apps.core.utils import get_all_related_field_names
class UnrelatedModel(models.Model):
class Meta:
app_label = 'core'
managed = False
class RelatedModel(models.Model):
class Meta:
app_label = 'core'
managed = False
class ForeignRelatedModel(models.Model):
fk = models.ForeignKey(RelatedModel)
class Meta:
app_label = 'core'
managed = False
class M2MRelatedModel(models.Model):
m2m = models.ManyToManyField(RelatedModel)
class Meta:
app_label = 'core'
managed = False
class ModelUtilTests(TestCase):
def test_get_all_related_field_names(self):
""" Verify the method returns the names of all relational fields for a model. """
self.assertEqual(get_all_related_field_names(UnrelatedModel), [])
self.assertEqual(set(get_all_related_field_names(RelatedModel)), {'foreignrelatedmodel', 'm2mrelatedmodel'})
...@@ -28,3 +28,33 @@ class ElasticsearchUtils(object): ...@@ -28,3 +28,33 @@ class ElasticsearchUtils(object):
} }
es.indices.update_aliases(body) es.indices.update_aliases(body)
logger.info('...alias updated.') logger.info('...alias updated.')
def get_all_related_field_names(model):
"""
Returns the names of all related fields (e.g. ForeignKey ManyToMany)
Args:
model (Model): Model whose field names should be returned
Returns:
list[str]
"""
fields = model._meta._get_fields(forward=False) # pylint: disable=protected-access
names = set([field.name for field in fields])
return list(names)
def delete_orphans(model):
"""
Deletes all instances of the given model with no relationships to other models.
Args:
model (Model): Model whose instances should be deleted
Returns:
None
"""
field_names = get_all_related_field_names(model)
kwargs = {'{0}__isnull'.format(field_name): True for field_name in field_names}
model.objects.filter(**kwargs).delete()
...@@ -4,13 +4,14 @@ import logging ...@@ -4,13 +4,14 @@ import logging
from decimal import Decimal from decimal import Decimal
from urllib.parse import urljoin from urllib.parse import urljoin
import html2text
from dateutil.parser import parse from dateutil.parser import parse
from django.conf import settings from django.conf import settings
from edx_rest_api_client.client import EdxRestApiClient from edx_rest_api_client.client import EdxRestApiClient
import html2text
from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.keys import CourseKey
from course_discovery.apps.core.models import Currency from course_discovery.apps.core.models import Currency
from course_discovery.apps.core.utils import delete_orphans
from course_discovery.apps.course_metadata.models import ( from course_discovery.apps.course_metadata.models import (
Course, CourseOrganization, CourseRun, Image, LanguageTag, LevelType, Organization, Person, Seat, Subject, Video Course, CourseOrganization, CourseRun, Image, LanguageTag, LevelType, Organization, Person, Seat, Subject, Video
) )
...@@ -88,6 +89,12 @@ class AbstractDataLoader(metaclass=abc.ABCMeta): ...@@ -88,6 +89,12 @@ class AbstractDataLoader(metaclass=abc.ABCMeta):
course_run_key = CourseKey.from_string(course_run_key_str) course_run_key = CourseKey.from_string(course_run_key_str)
return '{org}+{course}'.format(org=course_run_key.org, course=course_run_key.course) return '{org}+{course}'.format(org=course_run_key.org, course=course_run_key.course)
@classmethod
def delete_orphans(cls):
""" Remove orphaned objects from the database. """
for model in (Image, Person, Video):
delete_orphans(model)
class OrganizationsApiDataLoader(AbstractDataLoader): class OrganizationsApiDataLoader(AbstractDataLoader):
""" Loads organizations from the Organizations API. """ """ Loads organizations from the Organizations API. """
...@@ -116,6 +123,8 @@ class OrganizationsApiDataLoader(AbstractDataLoader): ...@@ -116,6 +123,8 @@ class OrganizationsApiDataLoader(AbstractDataLoader):
logger.info('Retrieved %d organizations from %s.', count, self.api_url) logger.info('Retrieved %d organizations from %s.', count, self.api_url)
self.delete_orphans()
def update_organization(self, body): def update_organization(self, body):
image = None image = None
image_url = body['logo'] image_url = body['logo']
...@@ -157,6 +166,8 @@ class CoursesApiDataLoader(AbstractDataLoader): ...@@ -157,6 +166,8 @@ class CoursesApiDataLoader(AbstractDataLoader):
logger.info('Retrieved %d course runs from %s.', count, self.api_url) logger.info('Retrieved %d course runs from %s.', count, self.api_url)
self.delete_orphans()
def update_course(self, body): def update_course(self, body):
# NOTE (CCB): Use the data from the CourseKey since the Course API exposes display names for org and number, # NOTE (CCB): Use the data from the CourseKey since the Course API exposes display names for org and number,
# which may not be unique for an organization. # which may not be unique for an organization.
...@@ -238,9 +249,10 @@ class DrupalApiDataLoader(AbstractDataLoader): ...@@ -238,9 +249,10 @@ class DrupalApiDataLoader(AbstractDataLoader):
course = self.update_course(cleaned_body) course = self.update_course(cleaned_body)
self.update_course_run(course, cleaned_body) self.update_course_run(course, cleaned_body)
# Clean up orphan data on the end of many-to-many relationships # Clean Organizations separately from other orphaned instances to avoid removing all orgnaziations
Person.objects.filter(courses_staffed=None).delete() # after an initial data load on an empty table.
Organization.objects.filter(courseorganization__isnull=True).delete() Organization.objects.filter(courseorganization__isnull=True).delete()
self.delete_orphans()
logger.info('Retrieved %d course runs from %s.', len(data), self.api_url) logger.info('Retrieved %d course runs from %s.', len(data), self.api_url)
...@@ -370,6 +382,8 @@ class EcommerceApiDataLoader(AbstractDataLoader): ...@@ -370,6 +382,8 @@ class EcommerceApiDataLoader(AbstractDataLoader):
logger.info('Retrieved %d course seats from %s.', count, self.api_url) logger.info('Retrieved %d course seats from %s.', count, self.api_url)
self.delete_orphans()
def update_seats(self, body): def update_seats(self, body):
course_run_key = body['id'] course_run_key = body['id']
try: try:
......
...@@ -17,7 +17,9 @@ from course_discovery.apps.course_metadata.data_loaders import ( ...@@ -17,7 +17,9 @@ from course_discovery.apps.course_metadata.data_loaders import (
from course_discovery.apps.course_metadata.models import ( from course_discovery.apps.course_metadata.models import (
Course, CourseOrganization, CourseRun, Image, LanguageTag, Organization, Person, Seat, Subject Course, CourseOrganization, CourseRun, Image, LanguageTag, Organization, Person, Seat, Subject
) )
from course_discovery.apps.course_metadata.tests.factories import CourseRunFactory, SeatFactory from course_discovery.apps.course_metadata.tests.factories import (
CourseRunFactory, SeatFactory, ImageFactory, PersonFactory, VideoFactory
)
ACCESS_TOKEN = 'secret' ACCESS_TOKEN = 'secret'
COURSES_API_URL = 'https://lms.example.com/api/courses/v1' COURSES_API_URL = 'https://lms.example.com/api/courses/v1'
...@@ -53,6 +55,14 @@ class AbstractDataLoaderTest(TestCase): ...@@ -53,6 +55,14 @@ class AbstractDataLoaderTest(TestCase):
dt = datetime.datetime.utcnow() dt = datetime.datetime.utcnow()
self.assertEqual(AbstractDataLoader.parse_date(dt.isoformat()), dt) self.assertEqual(AbstractDataLoader.parse_date(dt.isoformat()), dt)
def test_delete_orphans(self):
""" Verify the delete_orphans method deletes orphaned instances. """
instances = (ImageFactory(), PersonFactory(), VideoFactory(),)
AbstractDataLoader.delete_orphans()
for instance in instances:
self.assertFalse(instance.__class__.objects.filter(pk=instance.pk).exists()) # pylint: disable=no-member
class DataLoaderTestMixin(object): class DataLoaderTestMixin(object):
api_url = None api_url = None
...@@ -371,18 +381,20 @@ class CoursesApiDataLoaderTests(DataLoaderTestMixin, TestCase): ...@@ -371,18 +381,20 @@ class CoursesApiDataLoaderTests(DataLoaderTestMixin, TestCase):
@override_settings(MARKETING_API_URL=MARKETING_API_URL) @override_settings(MARKETING_API_URL=MARKETING_API_URL)
@ddt.ddt @ddt.ddt
class DrupalApiDataLoaderTests(DataLoaderTestMixin, TestCase): class DrupalApiDataLoaderTests(DataLoaderTestMixin, TestCase):
EXISTING_COURSE_AND_RUN_DATA = (
EXISTING_COURSE_AND_RUN_DATA = ({ {
'course_run_key': 'course-v1:SC+BreadX+3T2015', 'course_run_key': 'course-v1:SC+BreadX+3T2015',
'course_key': 'SC+BreadX', 'course_key': 'SC+BreadX',
'title': 'Bread Baking 101', 'title': 'Bread Baking 101',
'current_language': 'en-us', 'current_language': 'en-us',
}, { },
'course_run_key': 'course-v1:TX+T201+3T2015', {
'course_key': 'TX+T201', 'course_run_key': 'course-v1:TX+T201+3T2015',
'title': 'Testing 201', 'course_key': 'TX+T201',
'current_language': '' 'title': 'Testing 201',
}) 'current_language': ''
}
)
# A course which exists, but has no associated runs # A course which exists, but has no associated runs
EXISTING_COURSE = { EXISTING_COURSE = {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment