cache results when requesting facet counts

7f9e7bd2 · Anthony Mangano · 90598c77 · 7f9e7bd2 · 7f9e7bd2 · 7f9e7bd2
Commit 7f9e7bd2 authored Mar 16, 2017 by Anthony Mangano
Showing with 65 additions and 0 deletions

course_discovery/apps/edx_haystack_extensions/distinct_counts/query.py
+23 -0

course_discovery/apps/edx_haystack_extensions/tests/test_distinct_counts/test_query.py
+37 -0

course_discovery/settings/base.py
+5 -0

No files found.
--- a/course_discovery/apps/edx_haystack_extensions/distinct_counts/query.py
+++ b/course_discovery/apps/edx_haystack_extensions/distinct_counts/query.py
+from django.conf import settings
 from haystack.query import SearchQuerySet
 from course_discovery.apps.edx_haystack_extensions.distinct_counts.backends import DistinctCountsSearchQuery
@@ -49,3 +50,25 @@ class DistinctCountsSearchQuerySet(SearchQuerySet):
        if self._distinct_result_count is None:
            self._distinct_result_count = self.query.get_distinct_count()
        return self._distinct_result_count
+    def facet_counts(self):
+        """
+        Return the facet counts. Note: this will cause the query to run if it hasn't already.
+        Override the original implementation so that if we're forced to run the query, we can
+        cache the results that come back with it and avoid having to make another request to get
+        them later. Original implementation:
+        https://github.com/django-haystack/django-haystack/blob/master/haystack/query.py#L532
+        """
+        if self.query.has_run():
+            return self.query.get_facet_counts()
+        else:
+            # Force the query to run and fill the cache with the first page of results.
+            # This will cause the facet_counts to be cached along with the rest of the results
+            # and could potentially reduce the number of queries required to complete a search
+            # request.
+            #
+            # Note: If there are fewer than count results for the query, ES will simply return what it
+            # has found without raising an exception.
+            self._fill_cache(0, settings.DISTINCT_COUNTS_QUERY_CACHE_WARMING_COUNT)
+            return self.query.get_facet_counts()
--- a/course_discovery/apps/edx_haystack_extensions/tests/test_distinct_counts/test_query.py
+++ b/course_discovery/apps/edx_haystack_extensions/tests/test_distinct_counts/test_query.py
 import datetime
+import mock
 import pytest
 from django.test import TestCase
@@ -123,3 +124,39 @@ class DistinctCountsSearchQuerySetTests(ElasticsearchTestMixin, TestCase):
        hidden_count, hidden_distinct_count = facet_counts['queries']['hidden']
        assert hidden_count == 2
        assert hidden_distinct_count == 1
+    def test_facet_counts_caches_results(self):
+        """ Verify that facet_counts cache results when it is forced to run the query."""
+        course = CourseFactory()
+        runs = [
+            CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course),
+            CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course),
+            CourseRunFactory(title='foo', pacing_type='instructor_paced', hidden=False, course=course),
+        ]
+        queryset = SearchQuerySet().filter(title='foo').models(CourseRun)
+        queryset = queryset.facet('pacing_type').query_facet('hidden', 'hidden:true')
+        dc_queryset = DistinctCountsSearchQuerySet.from_queryset(queryset).with_distinct_counts('aggregation_key')
+        # This should force the query to run and the results to be cached
+        facet_counts = dc_queryset.facet_counts()
+        with mock.patch.object(DistinctCountsSearchQuery, 'run') as mock_run:
+            # Calling facet_counts again shouldn't result in an additional query
+            cached_facet_counts = dc_queryset.facet_counts()
+            assert not mock_run.called
+            assert facet_counts == cached_facet_counts
+            # Calling count shouldn't result in another query, as we should have already cached it with the
+            # first request.
+            count = dc_queryset.count()
+            assert not mock_run.called
+            assert count == len(runs)
+            # Fetching the results shouldn't result in another query, as we should have already cached them
+            # with the initial request.
+            results = dc_queryset[:]
+            assert not mock_run.called
+            expected = {run.key for run in runs}
+            actual = {run.key for run in results}
+            assert expected == actual
--- a/course_discovery/settings/base.py
+++ b/course_discovery/settings/base.py
@@ -454,6 +454,11 @@ SEARCH_FACET_LIMIT = 10000
 DISTINCT_COUNTS_HIT_PRECISION = 1500
 DISTINCT_COUNTS_FACET_PRECISION = 250
+# The number of records that should be requested when warming the SearchQuerySet cache. Set this to equal the
+# number of records typically requested with each search query in order to reduce the number of queries that need
+# to be executed.
+DISTINCT_COUNTS_QUERY_CACHE_WARMING_COUNT = 20
 DEFAULT_PARTNER_ID = None
 # See: https://docs.djangoproject.com/en/dev/ref/settings/#site-id