Commit 7f9e7bd2 by Anthony Mangano

cache results when requesting facet counts

parent 90598c77
from django.conf import settings
from haystack.query import SearchQuerySet
from course_discovery.apps.edx_haystack_extensions.distinct_counts.backends import DistinctCountsSearchQuery
......@@ -49,3 +50,25 @@ class DistinctCountsSearchQuerySet(SearchQuerySet):
if self._distinct_result_count is None:
self._distinct_result_count = self.query.get_distinct_count()
return self._distinct_result_count
def facet_counts(self):
"""
Return the facet counts. Note: this will cause the query to run if it hasn't already.
Override the original implementation so that if we're forced to run the query, we can
cache the results that come back with it and avoid having to make another request to get
them later. Original implementation:
https://github.com/django-haystack/django-haystack/blob/master/haystack/query.py#L532
"""
if self.query.has_run():
return self.query.get_facet_counts()
else:
# Force the query to run and fill the cache with the first page of results.
# This will cause the facet_counts to be cached along with the rest of the results
# and could potentially reduce the number of queries required to complete a search
# request.
#
# Note: If there are fewer than count results for the query, ES will simply return what it
# has found without raising an exception.
self._fill_cache(0, settings.DISTINCT_COUNTS_QUERY_CACHE_WARMING_COUNT)
return self.query.get_facet_counts()
import datetime
import mock
import pytest
from django.test import TestCase
......@@ -123,3 +124,39 @@ class DistinctCountsSearchQuerySetTests(ElasticsearchTestMixin, TestCase):
hidden_count, hidden_distinct_count = facet_counts['queries']['hidden']
assert hidden_count == 2
assert hidden_distinct_count == 1
def test_facet_counts_caches_results(self):
""" Verify that facet_counts cache results when it is forced to run the query."""
course = CourseFactory()
runs = [
CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course),
CourseRunFactory(title='foo', pacing_type='self_paced', hidden=True, course=course),
CourseRunFactory(title='foo', pacing_type='instructor_paced', hidden=False, course=course),
]
queryset = SearchQuerySet().filter(title='foo').models(CourseRun)
queryset = queryset.facet('pacing_type').query_facet('hidden', 'hidden:true')
dc_queryset = DistinctCountsSearchQuerySet.from_queryset(queryset).with_distinct_counts('aggregation_key')
# This should force the query to run and the results to be cached
facet_counts = dc_queryset.facet_counts()
with mock.patch.object(DistinctCountsSearchQuery, 'run') as mock_run:
# Calling facet_counts again shouldn't result in an additional query
cached_facet_counts = dc_queryset.facet_counts()
assert not mock_run.called
assert facet_counts == cached_facet_counts
# Calling count shouldn't result in another query, as we should have already cached it with the
# first request.
count = dc_queryset.count()
assert not mock_run.called
assert count == len(runs)
# Fetching the results shouldn't result in another query, as we should have already cached them
# with the initial request.
results = dc_queryset[:]
assert not mock_run.called
expected = {run.key for run in runs}
actual = {run.key for run in results}
assert expected == actual
......@@ -454,6 +454,11 @@ SEARCH_FACET_LIMIT = 10000
DISTINCT_COUNTS_HIT_PRECISION = 1500
DISTINCT_COUNTS_FACET_PRECISION = 250
# The number of records that should be requested when warming the SearchQuerySet cache. Set this to equal the
# number of records typically requested with each search query in order to reduce the number of queries that need
# to be executed.
DISTINCT_COUNTS_QUERY_CACHE_WARMING_COUNT = 20
DEFAULT_PARTNER_ID = None
# See: https://docs.djangoproject.com/en/dev/ref/settings/#site-id
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment