Commit 20d2736d by Bill DeRusha Committed by GitHub

Merge pull request #264 from edx/bderusha/mm-facets

WIP elasticsearch boosting
parents e31b77ac 99fc74c1
from django.contrib import admin
from solo.admin import SingletonModelAdmin
from course_discovery.apps.edx_haystack_extensions.models import ElasticsearchBoostConfig
admin.site.register(ElasticsearchBoostConfig, SingletonModelAdmin)
from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend, ElasticsearchSearchEngine
from course_discovery.apps.edx_haystack_extensions.models import ElasticsearchBoostConfig
class SimpleQuerySearchBackendMixin(object):
"""
......@@ -34,10 +36,21 @@ class SimpleQuerySearchBackendMixin(object):
'auto_generate_phrase_queries': True,
}
if search_kwargs['query'].get('filtered', {}).get('query', {}).get('query_string'):
search_kwargs['query']['filtered']['query']['query_string'] = simple_query
# https://www.elastic.co/guide/en/elasticsearch/reference/1.7/query-dsl-function-score-query.html
function_score_config = ElasticsearchBoostConfig.get_solo().function_score
function_score_config['query'] = {
'query_string': simple_query
}
function_score = {
'function_score': function_score_config
}
if search_kwargs['query'].get('filtered', {}).get('query'):
search_kwargs['query']['filtered']['query'] = function_score
elif search_kwargs['query'].get('query_string'):
search_kwargs['query']['query_string'] = simple_query
search_kwargs['query'] = function_score
return search_kwargs
......
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
import jsonfield.fields
class Migration(migrations.Migration):
dependencies = [
]
operations = [
migrations.CreateModel(
name='ElasticsearchBoostConfig',
fields=[
('id', models.AutoField(serialize=False, auto_created=True, verbose_name='ID', primary_key=True)),
('function_score', jsonfield.fields.JSONField(help_text='JSON string containing an elasticsearch function score config.', verbose_name='Function Score', default={'boost': 5.0, 'boost_mode': 'multiply', 'functions': [], 'score_mode': 'multiply'})),
],
options={
'abstract': False,
},
),
]
from django.utils.translation import ugettext_lazy as _
from jsonfield.fields import JSONField
from solo.models import SingletonModel
class ElasticsearchBoostConfig(SingletonModel):
"""
Model used to store the elasticsearch boost configuration.
This includes a default JSON config for the function_score.
"""
function_score = JSONField(
verbose_name=_('Function Score'),
help_text=_('JSON string containing an elasticsearch function score config.'),
null=False,
blank=False,
default={
'functions': [],
'boost': 1.0,
'score_mode': 'multiply',
'boost_mode': 'multiply'
}
)
......@@ -4,6 +4,7 @@ from haystack.backends import BaseSearchBackend
from mock import patch
from course_discovery.apps.core.tests.mixins import ElasticsearchTestMixin
from course_discovery.apps.edx_haystack_extensions.models import ElasticsearchBoostConfig
class SearchBackendTestMixin(ElasticsearchTestMixin):
......@@ -34,6 +35,17 @@ class SimpleQuerySearchBackendMixinTestMixin(SearchBackendTestMixin):
'analyze_wildcard': True,
'auto_generate_phrase_queries': True,
}
default_function_score = {
'function_score': {
'query': {
'query_string': simple_query
},
'functions': [],
'boost': 1.0,
'score_mode': 'multiply',
'boost_mode': 'multiply'
}
}
def test_build_search_kwargs_all_qs_with_filter(self):
with patch.object(BaseSearchBackend, 'build_models_list', return_value=['course_metadata.course']):
......@@ -47,7 +59,7 @@ class SimpleQuerySearchBackendMixinTestMixin(SearchBackendTestMixin):
kwargs = self.backend.build_search_kwargs(self.specific_query_string)
self.assertIsNone(kwargs['query'].get('query_string'))
self.assertDictEqual(kwargs['query']['filtered']['query'].get('query_string'), self.simple_query)
self.assertDictEqual(kwargs['query']['filtered'].get('query'), self.default_function_score)
def test_build_search_kwargs_all_qs_no_filter(self):
with patch.object(BaseSearchBackend, 'build_models_list', return_value=[]):
......@@ -61,7 +73,38 @@ class SimpleQuerySearchBackendMixinTestMixin(SearchBackendTestMixin):
kwargs = self.backend.build_search_kwargs(self.specific_query_string)
self.assertIsNone(kwargs['query'].get('filtered'))
self.assertDictEqual(kwargs['query'].get('query_string'), self.simple_query)
self.assertDictEqual(kwargs['query'], self.default_function_score)
def test_build_search_kwargs_function_score(self):
function_score = {
'functions': [
{
'filter': {
'term': {
'type': 'micromasters'
}
},
'weight': 10.0
}
],
'boost': 5.0,
'score_mode': 'multiply',
'boost_mode': 'sum'
}
boost_config = ElasticsearchBoostConfig.get_solo()
boost_config.function_score = function_score
boost_config.save()
with patch.object(BaseSearchBackend, 'build_models_list', return_value=[]):
kwargs = self.backend.build_search_kwargs(self.specific_query_string)
expected_function_score = {
'function_score': function_score
}
expected_function_score['function_score']['query'] = {
'query_string': self.simple_query
}
self.assertDictEqual(kwargs['query'], expected_function_score)
class NonClearingSearchBackendMixinTestMixin(SearchBackendTestMixin):
......
......@@ -48,6 +48,7 @@ THIRD_PARTY_APPS = [
'django_comments',
'taggit',
'taggit_serializer',
'solo',
]
PROJECT_APPS = [
......
......@@ -35,6 +35,7 @@ EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
if os.environ.get('ENABLE_DJANGO_TOOLBAR', False):
INSTALLED_APPS += [
'debug_toolbar',
'elastic_panel',
]
MIDDLEWARE_CLASSES += (
......@@ -43,6 +44,10 @@ if os.environ.get('ENABLE_DJANGO_TOOLBAR', False):
DEBUG_TOOLBAR_PATCH_SETTINGS = False
DEBUG_TOOLBAR_PANELS = [
'elastic_panel.panel.ElasticDebugPanel'
]
INTERNAL_IPS = ('127.0.0.1',)
# END TOOLBAR CONFIGURATION
......
......@@ -32,3 +32,41 @@ We use the query string syntax to search for courses. See `the Elasticsearch doc
query string syntax, and :doc:`course_metadata` for a list of fields which can be searched.
.. _the Elasticsearch documentation: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax
Result Boosting
---------------
We use the `function_score` to boost relevance based on configureable factors. See `the function_score documentation`_
for a guide to how this function can be used to provide more relevant results.
Example functions:
Closest to today's date
.. code-block:: json
{
"linear":{
"start":{
"origin":"now",
"scale":"1d",
"decay":0.5
}
},
"weight":10.0
}
Specific result type
.. code-block:: json
{
"filter":{
"term":{
"type":"micromaster"
}
},
"weight": 5.0
}
.. _the function_score documentation: https://www.elastic.co/guide/en/elasticsearch/reference/1.7/query-dsl-function-score-query.html
......@@ -11,6 +11,7 @@ django-guardian==1.4.5
django-haystack==2.5.0
django-libsass==0.7
django-simple-history==1.8.1
django-solo==1.1.2
django-sortedm2m==1.3.2
django-stdimage==2.3.3
django-storages==1.5.0
......@@ -32,6 +33,7 @@ edx-opaque-keys==0.3.1
edx-rest-api-client==1.6.0
elasticsearch>=1.0.0,<2.0.0
html2text==2016.5.29
jsonfield==1.0.3
pillow==3.3.0
pycountry==1.20
python-dateutil==2.5.3
......
......@@ -3,7 +3,7 @@
-r docs.txt
django-debug-toolbar==1.5
django-elasticsearch-debug-toolbar==1.0.4
# i18n
transifex-client==0.12.2
git+https://github.com/edx/i18n-tools.git@v0.3.2#egg=i18n_tools==0.3.2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment