Commit e14d8cb1 by Matthew Piatetsky

Move ElasticSearchBoostConfig into settings

ECOM-7079
parent dd6cd90f
......@@ -46,7 +46,7 @@ class AffiliateWindowViewSetTests(ElasticsearchTestMixin, SerializationMixin, AP
def test_affiliate_with_supported_seats(self):
""" Verify that endpoint returns course runs for verified and professional seats only. """
with self.assertNumQueries(8):
with self.assertNumQueries(7):
response = self.client.get(self.affiliate_url)
self.assertEqual(response.status_code, 200)
......@@ -130,7 +130,7 @@ class AffiliateWindowViewSetTests(ElasticsearchTestMixin, SerializationMixin, AP
# Superusers can view all catalogs
self.client.force_authenticate(superuser)
with self.assertNumQueries(5):
with self.assertNumQueries(4):
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
......@@ -140,7 +140,7 @@ class AffiliateWindowViewSetTests(ElasticsearchTestMixin, SerializationMixin, AP
self.assertEqual(response.status_code, 403)
catalog.viewers = [self.user]
with self.assertNumQueries(8):
with self.assertNumQueries(7):
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
......
......@@ -147,7 +147,7 @@ class CatalogViewSetTests(ElasticsearchTestMixin, SerializationMixin, OAuth2Mixi
for course_run in excluded_runs:
SeatFactory(course_run=course_run)
with self.assertNumQueries(28):
with self.assertNumQueries(27):
response = self.client.get(url)
assert response.status_code == 200
assert response.data['results'] == self.serialize_catalog_course(courses, many=True)
......@@ -173,7 +173,7 @@ class CatalogViewSetTests(ElasticsearchTestMixin, SerializationMixin, OAuth2Mixi
url = reverse('api:v1:catalog-csv', kwargs={'id': self.catalog.id})
with self.assertNumQueries(18):
with self.assertNumQueries(17):
response = self.client.get(url)
course_run = self.serialize_catalog_flat_course_run(self.course_run)
......
......@@ -162,7 +162,7 @@ class CourseRunViewSetTests(SerializationMixin, ElasticsearchTestMixin, APITestC
query = 'title:Some random title'
url = '{root}?q={query}'.format(root=reverse('api:v1:course_run-list'), query=query)
with self.assertNumQueries(38):
with self.assertNumQueries(36):
response = self.client.get(url)
actual_sorted = sorted(response.data['results'], key=lambda course_run: course_run['key'])
......
......@@ -170,7 +170,7 @@ class CourseViewSetTests(SerializationMixin, APITestCase):
query = 'title:' + title
url = '{root}?q={query}'.format(root=reverse('api:v1:course-list'), query=query)
with self.assertNumQueries(59):
with self.assertNumQueries(58):
response = self.client.get(url)
self.assertListEqual(response.data['results'], self.serialize_course(courses, many=True))
......
......@@ -3,12 +3,9 @@ import json
import urllib.parse
import ddt
import pytz
from django.conf import settings
from django.core.urlresolvers import reverse
from django.test import TestCase
from haystack.query import SearchQuerySet
from mock import patch
from rest_framework.test import APITestCase
from course_discovery.apps.api.serializers import (CourseRunSearchSerializer, ProgramSearchSerializer,
......@@ -17,7 +14,7 @@ from course_discovery.apps.api.v1.views.search import TypeaheadSearchView
from course_discovery.apps.core.tests.factories import USER_PASSWORD, PartnerFactory, UserFactory
from course_discovery.apps.core.tests.mixins import ElasticsearchTestMixin
from course_discovery.apps.course_metadata.choices import CourseRunStatus, ProgramStatus
from course_discovery.apps.course_metadata.models import CourseRun, Program, ProgramType
from course_discovery.apps.course_metadata.models import CourseRun, Program
from course_discovery.apps.course_metadata.tests.factories import (CourseFactory, CourseRunFactory, OrganizationFactory,
ProgramFactory)
......@@ -269,7 +266,7 @@ class CourseRunSearchViewSetTests(DefaultPartnerMixin, SerializationMixin, Login
ProgramFactory(courses=course_list, status=ProgramStatus.Active, excluded_course_runs=excluded_course_run_list)
with self.assertNumQueries(6):
with self.assertNumQueries(4):
response = self.get_response('software', faceted=False)
self.assertEqual(response.status_code, 200)
......@@ -292,7 +289,7 @@ class CourseRunSearchViewSetTests(DefaultPartnerMixin, SerializationMixin, Login
active_program = ProgramFactory(courses=[course_run.course], status=ProgramStatus.Active)
ProgramFactory(courses=[course_run.course], status=program_status)
with self.assertNumQueries(8):
with self.assertNumQueries(5):
response = self.get_response('software', faceted=False)
self.assertEqual(response.status_code, 200)
......@@ -586,125 +583,3 @@ class TypeaheadSearchViewTests(DefaultPartnerMixin, TypeaheadSerializationMixin,
edx_program = programs[0]
self.assertDictEqual(response.data, {'course_runs': [self.serialize_course_run(edx_course_run)],
'programs': [self.serialize_program(edx_program)]})
@ddt.ddt
class SearchBoostingTests(ElasticsearchTestMixin, TestCase):
def build_normalized_course_run(self, **kwargs):
""" Builds a CourseRun with fields set to normalize boosting behavior."""
defaults = {
'pacing_type': 'instructor_paced',
'start': datetime.datetime.now(pytz.timezone('utc')) + datetime.timedelta(weeks=52),
'enrollment_start': datetime.datetime.now(pytz.timezone('utc')) + datetime.timedelta(weeks=50),
'enrollment_end': None
}
defaults.update(kwargs)
return CourseRunFactory(**defaults)
def test_start_date_boosting(self):
""" Verify upcoming courses are boosted over past courses."""
now = datetime.datetime.now(pytz.timezone('utc'))
self.build_normalized_course_run(start=now + datetime.timedelta(weeks=10))
test_record = self.build_normalized_course_run(start=now + datetime.timedelta(weeks=1))
search_results = SearchQuerySet().models(CourseRun).all()
self.assertEqual(2, len(search_results))
self.assertGreater(search_results[0].score, search_results[1].score)
self.assertEqual(int(test_record.start.timestamp()), int(search_results[0].start.timestamp())) # pylint: disable=no-member
def test_self_paced_boosting(self):
""" Verify that self paced courses are boosted over instructor led courses."""
self.build_normalized_course_run(pacing_type='instructor_paced')
test_record = self.build_normalized_course_run(pacing_type='self_paced')
search_results = SearchQuerySet().models(CourseRun).all()
self.assertEqual(2, len(search_results))
self.assertGreater(search_results[0].score, search_results[1].score)
self.assertEqual(test_record.pacing_type, search_results[0].pacing_type)
@ddt.data(
# Case 1: Should not get boost if has_enrollable_paid_seats is False, has_enrollable_paid_seats is None or
# paid_seat_enrollment_end is in the past.
(False, None, False),
(None, None, False),
(True, datetime.datetime.now(pytz.timezone('utc')) - datetime.timedelta(days=15), False),
# Case 2: Should get boost if has_enrollable_paid_seats is True and paid_seat_enrollment_end is None or
# in the future.
(True, None, True),
(True, datetime.datetime.now(pytz.timezone('utc')) + datetime.timedelta(days=15), True)
)
@ddt.unpack
def test_enrollable_paid_seat_boosting(self, has_enrollable_paid_seats, paid_seat_enrollment_end, expects_boost):
""" Verify that CourseRuns for which an unenrolled user may enroll and purchase a paid Seat are boosted."""
# Create a control record (one that should never be boosted).
with patch.object(CourseRun, 'has_enrollable_paid_seats', return_value=False):
with patch.object(CourseRun, 'get_paid_seat_enrollment_end', return_value=None):
self.build_normalized_course_run(title='test1')
# Create the test record (may be boosted).
with patch.object(CourseRun, 'has_enrollable_paid_seats', return_value=has_enrollable_paid_seats):
with patch.object(CourseRun, 'get_paid_seat_enrollment_end', return_value=paid_seat_enrollment_end):
test_record = self.build_normalized_course_run(title='test2')
search_results = SearchQuerySet().models(CourseRun).all()
self.assertEqual(2, len(search_results))
if expects_boost:
self.assertGreater(search_results[0].score, search_results[1].score)
self.assertEqual(test_record.title, search_results[0].title)
else:
self.assertEqual(search_results[0].score, search_results[1].score)
@ddt.data('MicroMasters', 'Professional Certificate')
def test_program_type_boosting(self, program_type):
""" Verify MicroMasters and Professional Certificate are boosted over XSeries."""
ProgramFactory(type=ProgramType.objects.get(name='XSeries'))
test_record = ProgramFactory(type=ProgramType.objects.get(name=program_type))
search_results = SearchQuerySet().models(Program).all()
self.assertEqual(2, len(search_results))
self.assertGreater(search_results[0].score, search_results[1].score)
self.assertEqual(str(test_record.type), str(search_results[0].type))
@ddt.data(
# Case 1: Should get boost if enrollment_start and enrollment_end unspecified.
(None, None, True),
# Case 2: Should get boost if enrollment_start unspecified and enrollment_end in future.
(None, datetime.datetime.now(pytz.timezone('utc')) + datetime.timedelta(days=15), True),
# Case 3: Should get boost if enrollment_start in past and enrollment_end unspecified.
(datetime.datetime.now(pytz.timezone('utc')) - datetime.timedelta(days=15), None, True),
# Case 4: Should get boost if enrollment_start in past and enrollment_end in future.
(datetime.datetime.now(pytz.timezone('utc')) - datetime.timedelta(days=15),
datetime.datetime.now(pytz.timezone('utc')) + datetime.timedelta(days=15),
True),
# Case 5: Should not get boost if enrollment_start in future.
(datetime.datetime.now(pytz.timezone('utc')) + datetime.timedelta(days=15), None, False),
# Case 5: Should not get boost if enrollment_end in past.
(None, datetime.datetime.now(pytz.timezone('utc')) - datetime.timedelta(days=15), False),
)
@ddt.unpack
def test_enrollable_course_run_boosting(self, enrollment_start, enrollment_end, expects_boost):
""" Verify that enrollable CourseRuns are boosted."""
# Create a control record that should never be boosted
self.build_normalized_course_run(title='test1')
# Create the test record
test_record = self.build_normalized_course_run(
title='test2',
enrollment_start=enrollment_start,
enrollment_end=enrollment_end
)
search_results = SearchQuerySet().models(CourseRun).all()
self.assertEqual(2, len(search_results))
if expects_boost:
self.assertGreater(search_results[0].score, search_results[1].score)
self.assertEqual(test_record.title, search_results[0].title)
else:
self.assertEqual(search_results[0].score, search_results[1].score)
from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend, ElasticsearchSearchEngine
from course_discovery.apps.edx_haystack_extensions.models import ElasticsearchBoostConfig
from course_discovery.apps.edx_haystack_extensions.elasticsearch_boost_config import get_elasticsearch_boost_config
class SimpleQuerySearchBackendMixin(object):
......@@ -37,8 +37,7 @@ class SimpleQuerySearchBackendMixin(object):
}
# https://www.elastic.co/guide/en/elasticsearch/reference/1.7/query-dsl-function-score-query.html
function_score_config = ElasticsearchBoostConfig.get_solo().function_score
function_score_config = get_elasticsearch_boost_config()['function_score']
function_score_config['query'] = {
'query_string': simple_query
}
......
def get_elasticsearch_boost_config():
elasticsearch_boost_config = {
'function_score': {
'boost_mode': 'sum',
'boost': 1.0,
'score_mode': 'sum',
'functions': [
{'filter': {'term': {'pacing_type_exact': 'self_paced'}}, 'weight': 1.0},
{'filter': {'term': {'type_exact': 'Professional Certificate'}}, 'weight': 1.0},
{'filter': {'term': {'type_exact': 'MicroMasters'}}, 'weight': 1.0},
{'linear': {'start': {'origin': 'now', 'decay': 0.95, 'scale': '1d'}}, 'weight': 5.0},
# Boost function for CourseRuns with enrollable paid Seats.
# We want to boost if:
# - The course run has at least one enrollable paid Seat (has_enrollable_paid_seats is True)
# AND one of the following two conditions are true
# - The paid_seat_enrollment_end is unspecified.
# - The paid_seat_enrollment_end is in the future.
# We apply a weight of 1.0 to match the boost given for self paced courses.
{
'filter': {
'bool': {
'must': [
{'exists': {'field': 'has_enrollable_paid_seats'}},
{'term': {'has_enrollable_paid_seats': True}}
],
'should': [
{'bool': {'must_not': {'exists': {'field': 'paid_seat_enrollment_end'}}}},
{'range': {'paid_seat_enrollment_end': {'gte': 'now'}}}
]
}
},
'weight': 1.0
},
# Boost function for enrollable CourseRuns.
# We want to boost if:
# - enrollment_start and enrollment_end are unspecified
# - enrollment_start is unspecified and enrollment_end is in the future
# - enrollment_end is unspecified and enrollment_start is in the past
# - enrollment_start is in the past and enrollment_end is in the future
# We apply a weight of 1.0 to match the boost given for self paced and enrollable paid courses.
{
'filter': {
'bool': {
'should': [
{'bool': {
'must_not': [
{'exists': {'field': 'enrollment_start'}},
{'exists': {'field': 'enrollment_end'}}
]
}},
{'bool': {
'must_not': {'exists': {'field': 'enrollment_start'}},
'must': [
{'exists': {'field': 'enrollment_end'}},
{'range': {'enrollment_end': {'gt': 'now'}}}
]
}},
{'bool': {
'must_not': {'exists': {'field': 'enrollment_end'}},
'must': [
{'exists': {'field': 'enrollment_start'}},
{'range': {'enrollment_start': {'lte': 'now'}}}
]
}},
{'bool': {
'must': [
{'exists': {'field': 'enrollment_start'}},
{'exists': {'field': 'enrollment_end'}},
{'range': {'enrollment_start': {'lte': 'now'}}},
{'range': {'enrollment_end': {'gt': 'now'}}}
]
}}
]
}
},
'weight': 1.0
}
]
}
}
return elasticsearch_boost_config
......@@ -4,7 +4,7 @@ from haystack.backends import BaseSearchBackend
from mock import patch
from course_discovery.apps.core.tests.mixins import ElasticsearchTestMixin
from course_discovery.apps.edx_haystack_extensions.models import ElasticsearchBoostConfig
from course_discovery.apps.edx_haystack_extensions.elasticsearch_boost_config import get_elasticsearch_boost_config
class SearchBackendTestMixin(ElasticsearchTestMixin):
......@@ -37,9 +37,9 @@ class SimpleQuerySearchBackendMixinTestMixin(SearchBackendTestMixin):
}
def _default_function_score(self):
function_score = {'function_score': ElasticsearchBoostConfig.get_solo().function_score}
function_score['function_score']['query'] = {'query_string': self.simple_query}
return function_score
boost_config = get_elasticsearch_boost_config()
boost_config['function_score']['query'] = {'query_string': self.simple_query}
return boost_config
def test_build_search_kwargs_all_qs_with_filter(self):
with patch.object(BaseSearchBackend, 'build_models_list', return_value=['course_metadata.course']):
......@@ -70,27 +70,29 @@ class SimpleQuerySearchBackendMixinTestMixin(SearchBackendTestMixin):
self.assertDictEqual(kwargs['query'], self._default_function_score())
def test_build_search_kwargs_function_score(self):
function_score = {
'functions': [
{
'filter': {
'term': {
'type': 'micromasters'
}
},
'weight': 10.0
}
],
'boost': 5.0,
'score_mode': 'multiply',
'boost_mode': 'sum'
test_elasticsearch_boost_config = {
'function_score': {
'functions': [
{
'filter': {
'term': {
'type': 'micromasters'
}
},
'weight': 10.0
}
],
'boost': 5.0,
'score_mode': 'multiply',
'boost_mode': 'sum'
}
}
boost_config = ElasticsearchBoostConfig.get_solo()
boost_config.function_score = function_score
boost_config.save()
with patch('course_discovery.apps.edx_haystack_extensions.backends.get_elasticsearch_boost_config',
return_value=test_elasticsearch_boost_config):
with patch.object(BaseSearchBackend, 'build_models_list', return_value=[]):
kwargs = self.backend.build_search_kwargs(self.specific_query_string)
with patch.object(BaseSearchBackend, 'build_models_list', return_value=[]):
kwargs = self.backend.build_search_kwargs(self.specific_query_string)
function_score = test_elasticsearch_boost_config['function_score']
expected_function_score = {
'function_score': function_score
......
import datetime
import ddt
import pytz
from django.test import TestCase
from haystack.query import SearchQuerySet
from mock import patch
from course_discovery.apps.core.tests.mixins import ElasticsearchTestMixin
from course_discovery.apps.course_metadata.models import CourseRun, Program, ProgramType
from course_discovery.apps.course_metadata.tests.factories import CourseRunFactory, ProgramFactory
@ddt.ddt
class SearchBoostingTests(ElasticsearchTestMixin, TestCase):
def build_normalized_course_run(self, **kwargs):
""" Builds a CourseRun with fields set to normalize boosting behavior."""
defaults = {
'pacing_type': 'instructor_paced',
'start': datetime.datetime.now(pytz.timezone('utc')) + datetime.timedelta(weeks=52),
'enrollment_start': datetime.datetime.now(pytz.timezone('utc')) + datetime.timedelta(weeks=50),
'enrollment_end': None
}
defaults.update(kwargs)
return CourseRunFactory(**defaults)
def test_start_date_boosting(self):
""" Verify upcoming courses are boosted over past courses."""
now = datetime.datetime.now(pytz.timezone('utc'))
self.build_normalized_course_run(start=now + datetime.timedelta(weeks=10))
test_record = self.build_normalized_course_run(start=now + datetime.timedelta(weeks=1))
search_results = SearchQuerySet().models(CourseRun).all()
self.assertEqual(2, len(search_results))
self.assertGreater(search_results[0].score, search_results[1].score)
self.assertEqual(int(test_record.start.timestamp()), int(search_results[0].start.timestamp())) # pylint: disable=no-member
def test_self_paced_boosting(self):
""" Verify that self paced courses are boosted over instructor led courses."""
self.build_normalized_course_run(pacing_type='instructor_paced')
test_record = self.build_normalized_course_run(pacing_type='self_paced')
search_results = SearchQuerySet().models(CourseRun).all()
self.assertEqual(2, len(search_results))
self.assertGreater(search_results[0].score, search_results[1].score)
self.assertEqual(test_record.pacing_type, search_results[0].pacing_type)
@ddt.data(
# Case 1: Should not get boost if has_enrollable_paid_seats is False, has_enrollable_paid_seats is None or
# paid_seat_enrollment_end is in the past.
(False, None, False),
(None, None, False),
(True, datetime.datetime.now(pytz.timezone('utc')) - datetime.timedelta(days=15), False),
# Case 2: Should get boost if has_enrollable_paid_seats is True and paid_seat_enrollment_end is None or
# in the future.
(True, None, True),
(True, datetime.datetime.now(pytz.timezone('utc')) + datetime.timedelta(days=15), True)
)
@ddt.unpack
def test_enrollable_paid_seat_boosting(self, has_enrollable_paid_seats, paid_seat_enrollment_end, expects_boost):
""" Verify that CourseRuns for which an unenrolled user may enroll and purchase a paid Seat are boosted."""
# Create a control record (one that should never be boosted).
with patch.object(CourseRun, 'has_enrollable_paid_seats', return_value=False):
with patch.object(CourseRun, 'get_paid_seat_enrollment_end', return_value=None):
self.build_normalized_course_run(title='test1')
# Create the test record (may be boosted).
with patch.object(CourseRun, 'has_enrollable_paid_seats', return_value=has_enrollable_paid_seats):
with patch.object(CourseRun, 'get_paid_seat_enrollment_end', return_value=paid_seat_enrollment_end):
test_record = self.build_normalized_course_run(title='test2')
search_results = SearchQuerySet().models(CourseRun).all()
self.assertEqual(2, len(search_results))
if expects_boost:
self.assertGreater(search_results[0].score, search_results[1].score)
self.assertEqual(test_record.title, search_results[0].title)
else:
self.assertEqual(search_results[0].score, search_results[1].score)
@ddt.data('MicroMasters', 'Professional Certificate')
def test_program_type_boosting(self, program_type):
""" Verify MicroMasters and Professional Certificate are boosted over XSeries."""
ProgramFactory(type=ProgramType.objects.get(name='XSeries'))
test_record = ProgramFactory(type=ProgramType.objects.get(name=program_type))
search_results = SearchQuerySet().models(Program).all()
self.assertEqual(2, len(search_results))
self.assertGreater(search_results[0].score, search_results[1].score)
self.assertEqual(str(test_record.type), str(search_results[0].type))
@ddt.data(
# Case 1: Should get boost if enrollment_start and enrollment_end unspecified.
(None, None, True),
# Case 2: Should get boost if enrollment_start unspecified and enrollment_end in future.
(None, datetime.datetime.now(pytz.timezone('utc')) + datetime.timedelta(days=15), True),
# Case 3: Should get boost if enrollment_start in past and enrollment_end unspecified.
(datetime.datetime.now(pytz.timezone('utc')) - datetime.timedelta(days=15), None, True),
# Case 4: Should get boost if enrollment_start in past and enrollment_end in future.
(datetime.datetime.now(pytz.timezone('utc')) - datetime.timedelta(days=15),
datetime.datetime.now(pytz.timezone('utc')) + datetime.timedelta(days=15),
True),
# Case 5: Should not get boost if enrollment_start in future.
(datetime.datetime.now(pytz.timezone('utc')) + datetime.timedelta(days=15), None, False),
# Case 5: Should not get boost if enrollment_end in past.
(None, datetime.datetime.now(pytz.timezone('utc')) - datetime.timedelta(days=15), False),
)
@ddt.unpack
def test_enrollable_course_run_boosting(self, enrollment_start, enrollment_end, expects_boost):
""" Verify that enrollable CourseRuns are boosted."""
# Create a control record that should never be boosted
self.build_normalized_course_run(title='test1')
# Create the test record
test_record = self.build_normalized_course_run(
title='test2',
enrollment_start=enrollment_start,
enrollment_end=enrollment_end
)
search_results = SearchQuerySet().models(CourseRun).all()
self.assertEqual(2, len(search_results))
if expects_boost:
self.assertGreater(search_results[0].score, search_results[1].score)
self.assertEqual(test_record.title, search_results[0].title)
else:
self.assertEqual(search_results[0].score, search_results[1].score)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment