Commit 616a0f79 by Renzo Lucioni

Optimize queries made by the catalog courses endpoint

filter() and exclude() calls made to narrow the set of course runs after prefetching were causing prefetched data to be discarded. This resulted in the endpoint making many duplicate queries. These expensive duplicate queries are eliminated by prefetching the filtered set of course runs instead of prefetching all course runs and then trying to filter them.

ECOM-6473
parent 57a8c10d
...@@ -387,8 +387,12 @@ class MinimalCourseRunSerializer(TimestampModelSerializer): ...@@ -387,8 +387,12 @@ class MinimalCourseRunSerializer(TimestampModelSerializer):
seats = SeatSerializer(many=True) seats = SeatSerializer(many=True)
@classmethod @classmethod
def prefetch_queryset(cls): def prefetch_queryset(cls, queryset=None):
return CourseRun.objects.all().select_related('course').prefetch_related( # Explicitly check for None to avoid returning all CourseRuns when the
# queryset passed in happens to be empty.
queryset = queryset if queryset is not None else CourseRun.objects.all()
return queryset.select_related('course').prefetch_related(
'course__partner', 'course__partner',
Prefetch('seats', queryset=SeatSerializer.prefetch_queryset()), Prefetch('seats', queryset=SeatSerializer.prefetch_queryset()),
) )
...@@ -421,8 +425,9 @@ class CourseRunSerializer(MinimalCourseRunSerializer): ...@@ -421,8 +425,9 @@ class CourseRunSerializer(MinimalCourseRunSerializer):
level_type = serializers.SlugRelatedField(read_only=True, slug_field='name') level_type = serializers.SlugRelatedField(read_only=True, slug_field='name')
@classmethod @classmethod
def prefetch_queryset(cls): def prefetch_queryset(cls, queryset=None):
queryset = super().prefetch_queryset() queryset = super().prefetch_queryset(queryset=queryset)
return queryset.select_related('language', 'video').prefetch_related( return queryset.select_related('language', 'video').prefetch_related(
'transcript_languages', 'transcript_languages',
Prefetch('staff', queryset=PersonSerializer.prefetch_queryset()), Prefetch('staff', queryset=PersonSerializer.prefetch_queryset()),
...@@ -597,12 +602,34 @@ class CatalogCourseSerializer(CourseSerializer): ...@@ -597,12 +602,34 @@ class CatalogCourseSerializer(CourseSerializer):
""" """
course_runs = serializers.SerializerMethodField() course_runs = serializers.SerializerMethodField()
@classmethod
def prefetch_queryset(cls, queryset=None):
"""
Similar to the CourseSerializer's prefetch_queryset, but prefetches a
filtered CourseRun queryset.
"""
queryset = queryset if queryset is not None else Course.objects.all()
available_course_runs = CourseRun.objects.active().enrollable().marketable()
return queryset.select_related('level_type', 'video', 'partner').prefetch_related(
'expected_learning_items',
'prerequisites',
'subjects',
Prefetch(
'course_runs',
queryset=CourseRunSerializer.prefetch_queryset(queryset=available_course_runs),
# Using to_attr is recommended when filtering down a prefetch
# result as it is less ambiguous than storing a filtered result
# in the related manager’s cache and accessing it via all().
to_attr='available_course_runs'
),
Prefetch('authoring_organizations', queryset=OrganizationSerializer.prefetch_queryset()),
Prefetch('sponsoring_organizations', queryset=OrganizationSerializer.prefetch_queryset()),
)
def get_course_runs(self, course): def get_course_runs(self, course):
return CourseRunSerializer( return CourseRunSerializer(
# TODO: These queryset methods chain filter() and exclude() calls, course.available_course_runs,
# causing prefetched results to be discarded. They should be replaced
# with Python-based filtering that preserves the prefetched data.
course.course_runs.active().enrollable().marketable(),
many=True, many=True,
context=self.context context=self.context
).data ).data
......
...@@ -172,9 +172,12 @@ class CatalogViewSetTests(ElasticsearchTestMixin, SerializationMixin, OAuth2Mixi ...@@ -172,9 +172,12 @@ class CatalogViewSetTests(ElasticsearchTestMixin, SerializationMixin, OAuth2Mixi
# to be included. # to be included.
CourseRunFactory(course=course) CourseRunFactory(course=course)
with self.assertNumQueries(26): with self.assertNumQueries(18):
response = self.client.get(url) response = self.client.get(url)
# Prefetched results are assigned to a custom attribute.
course.available_course_runs = [course_run]
assert response.status_code == 200 assert response.status_code == 200
assert response.data['results'] == self.serialize_catalog_course([course], many=True) assert response.data['results'] == self.serialize_catalog_course([course], many=True)
......
...@@ -84,7 +84,8 @@ class CatalogViewSet(viewsets.ModelViewSet): ...@@ -84,7 +84,8 @@ class CatalogViewSet(viewsets.ModelViewSet):
""" """
Retrieve the list of courses contained within this catalog. Retrieve the list of courses contained within this catalog.
Only courses with at least one active and marketable course run are returned. Only courses with at least one course run that can be enrolled in immediately,
is ongoing or yet to start, and appears on the marketing site are returned.
--- ---
serializer: serializers.CatalogCourseSerializer serializer: serializers.CatalogCourseSerializer
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment