Commit 2b11a14c by Clinton Blackburn Committed by GitHub

Haystack management updates (#135)

- Reorganized Haystack changes in separate app
- Modified update_index command to create a new index and re-point an alias, rather than destroy the existing index

ECOM-4746
parent 66bd850d
......@@ -49,7 +49,7 @@ class CourseRunViewSetTests(ElasticsearchTestMixin, APITestCase):
def test_list_query(self):
""" Verify the endpoint returns a filtered list of courses """
title = 'Some random course'
title = 'Some random title'
course_runs = CourseRunFactory.create_batch(3, title=title)
CourseRunFactory(title='non-matching name')
query = 'title:' + title
......
......@@ -36,7 +36,7 @@ class CourseViewSetTests(SerializationMixin, APITestCase):
def test_list_query(self):
""" Verify the endpoint returns a filtered list of courses """
title = 'Some random course'
title = 'Some random title'
courses = CourseFactory.create_batch(3, title=title)
courses = sorted(courses, key=lambda course: course.key.lower())
query = 'title:' + title
......
......@@ -74,7 +74,7 @@ class CourseTests(TestCase):
def test_search(self):
""" Verify the method returns a filtered queryset of courses. """
title = 'Some random course'
title = 'Some random title'
courses = factories.CourseFactory.create_batch(3, title=title)
courses = sorted(courses, key=lambda course: course.key)
query = 'title:' + title
......@@ -118,7 +118,7 @@ class CourseRunTests(TestCase):
def test_search(self):
""" Verify the method returns a filtered queryset of course runs. """
title = 'Some random course run'
title = 'Some random title'
course_runs = factories.CourseRunFactory.create_batch(3, title=title)
query = 'title:' + title
actual_sorted = sorted(SearchQuerySetWrapper(CourseRun.search(query)), key=lambda course_run: course_run.key)
......
from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend, ElasticsearchSearchEngine
# pylint: disable=abstract-method
class SimplifiedElasticsearchSearchBackend(ElasticsearchSearchBackend):
class SimpleQuerySearchBackendMixin(object):
"""
Mixin for simplifying Elasticsearch queries.
Uses a basic query string query.
"""
def build_search_kwargs(self, *args, **kwargs):
"""
Override default `build_search_kwargs` method to set simpler default search query settings.
......@@ -21,7 +26,7 @@ class SimplifiedElasticsearchSearchBackend(ElasticsearchSearchBackend):
}
"""
query_string = args[0]
search_kwargs = super(SimplifiedElasticsearchSearchBackend, self).build_search_kwargs(*args, **kwargs)
search_kwargs = super(SimpleQuerySearchBackendMixin, self).build_search_kwargs(*args, **kwargs)
simple_query = {
'query': query_string,
......@@ -37,5 +42,29 @@ class SimplifiedElasticsearchSearchBackend(ElasticsearchSearchBackend):
return search_kwargs
class SimplifiedElasticsearchSearchEngine(ElasticsearchSearchEngine):
backend = SimplifiedElasticsearchSearchBackend
class NonClearingSearchBackendMixin(object):
"""
Mixin that prevents indexes from being cleared.
Inherit this class if you would prefer, for example, to create a new index when you rebuild indexes rather than
clearing/updating indexes in place as Haystack normally does.
"""
def clear(self, models=None, commit=True): # pylint: disable=unused-argument
""" Does NOT clear the index.
Instead of clearing the index, this method logs the fact that the inheriting class does NOT clear
indexes, advising the user to use the appropriate tools to manually clear the index.
"""
self.log.info('%s does NOT clear indexes. Indexes should be manually cleared using the APIs/tools appropriate '
'for this search service.', self.__class__.__name__)
# pylint: disable=abstract-method
class EdxElasticsearchSearchBackend(SimpleQuerySearchBackendMixin, NonClearingSearchBackendMixin,
ElasticsearchSearchBackend):
pass
class EdxElasticsearchSearchEngine(ElasticsearchSearchEngine):
backend = EdxElasticsearchSearchBackend
import datetime
import logging
from haystack import connections as haystack_connections
from haystack.management.commands.update_index import Command as HaystackCommand
logger = logging.getLogger(__name__)
class Command(HaystackCommand):
backends = []
def handle(self, *items, **options):
self.backends = options.get('using')
if not self.backends:
self.backends = list(haystack_connections.connections_info.keys())
alias_mappings = []
# Use a timestamped index instead of the default in settings.
for backend_name in self.backends:
connection = haystack_connections[backend_name]
backend = connection.get_backend()
alias, index_name = self.prepare_backend_index(backend)
alias_mappings.append((backend, index_name, alias))
super(Command, self).handle(*items, **options)
# Set the alias (from settings) to the timestamped catalog.
for backend, index, alias in alias_mappings:
self.set_alias(backend, alias, index)
def set_alias(self, backend, alias, index):
"""
Points the alias to the specified index.
All other references made by the alias will be removed, however the referenced indexes will
not be modified in any other manner.
Args:
backend (ElasticsearchSearchBackend): Elasticsearch backend with an open connection.
alias (str): Name of the alias to set.
index (str): Name of the index where the alias should point.
Returns:
None
"""
body = {
'actions': [
{'remove': {'alias': alias, 'index': '*'}},
{'add': {'alias': alias, 'index': index}},
]
}
backend.conn.indices.update_aliases(body)
def prepare_backend_index(self, backend):
"""
Prepares an index that will be used to store data by the backend.
Args:
backend (ElasticsearchSearchBackend): Backend to update.
Returns:
(tuple): tuple containing:
alias(str): Recommended alias for the new index.
index_name(str): Name of the newly-created index.
"""
alias = backend.index_name
index_name = self.create_timestamped_index(backend, alias)
backend.index_name = index_name
return alias, index_name
def create_timestamped_index(self, backend, prefix):
"""
Creates a new index whose name is prefixed with the specified value.
Args:
backend (ElasticsearchSearchBackend): Backend through which to connect to Elasticsearch.
prefix (str): Prefix for the index name
Returns:
index_name (str): Name of the new index.
"""
timestamp = datetime.datetime.utcnow().strftime('%Y%m%d_%H%M%S')
index_name = '{alias}_{timestamp}'.format(alias=prefix, timestamp=timestamp)
backend.conn.indices.create(index=index_name)
return index_name
""" Haystack backend tests. """
from django.conf import settings
from elasticsearch.helpers import bulk
from haystack.backends import BaseSearchBackend
from mock import patch
from django.test import TestCase
from haystack.backends import BaseSearchBackend
from course_discovery.apps.core.tests.mixins import ElasticsearchTestMixin
from course_discovery.apps.core.haystack_backends import SimplifiedElasticsearchSearchBackend
class SearchBackendTestMixin(ElasticsearchTestMixin):
backend = None
backend_class = None
class SimplifiedElasticsearchSearchEngineTests(TestCase):
""" Tests for core.context_processors.core """
def setUp(self):
super(SimplifiedElasticsearchSearchEngineTests, self).setUp()
self.all_query_string = "*:*"
self.specific_query_string = "tests:test query"
self.simple_query = {
'query': self.specific_query_string,
'analyze_wildcard': True,
'auto_generate_phrase_queries': True,
}
self.backend = SimplifiedElasticsearchSearchBackend(
'default',
URL='http://test-es.example.com',
INDEX_NAME='testing'
)
super(SearchBackendTestMixin, self).setUp()
self.backend = self.get_backend()
def get_backend(self, connection_alias='default', **connection_options):
""" Instantiates a search backend with the specified parameters. """
connection_options = dict(settings.HAYSTACK_CONNECTIONS.get(connection_alias, {}), **connection_options)
return self.backend_class(connection_alias, **connection_options) # pylint: disable=not-callable
def record_count(self):
""" Returns a count of all records in the index. """
return self.backend.conn.count(index=self.backend.index_name)['count']
class SimpleQuerySearchBackendMixinTestMixin(SearchBackendTestMixin):
""" Test class mixin for testing children of SimpleQuerySearchBackendMixin. """
all_query_string = '*:*'
specific_query_string = 'tests:test query'
simple_query = {
'query': specific_query_string,
'analyze_wildcard': True,
'auto_generate_phrase_queries': True,
}
def test_build_search_kwargs_all_qs_with_filter(self):
with patch.object(BaseSearchBackend, 'build_models_list', return_value=['course_metadata.course']):
......@@ -51,3 +62,20 @@ class SimplifiedElasticsearchSearchEngineTests(TestCase):
self.assertIsNone(kwargs['query'].get('filtered'))
self.assertDictEqual(kwargs['query'].get('query_string'), self.simple_query)
class NonClearingSearchBackendMixinTestMixin(SearchBackendTestMixin):
""" Test class mixin for testing children of NonClearingSearchBackendMixin. """
def test_clear(self):
""" Verify the clear() method does NOT remove any items from the index. """
# Create a record
bulk(self.backend.conn, [{'text': 'Testing!'}], index=self.backend.index_name, doc_type='test')
self.refresh_index()
original_count = self.record_count()
self.assertGreater(original_count, 0)
# This method should not touch any records.
self.backend.clear()
self.assertEqual(self.record_count(), original_count)
from django.test import TestCase
from course_discovery.apps.edx_haystack_extensions.backends import EdxElasticsearchSearchBackend
from course_discovery.apps.edx_haystack_extensions.tests.mixins import (
SimpleQuerySearchBackendMixinTestMixin, NonClearingSearchBackendMixinTestMixin
)
class EdxElasticsearchSearchBackendTests(NonClearingSearchBackendMixinTestMixin, SimpleQuerySearchBackendMixinTestMixin,
TestCase):
""" Tests for EdxElasticsearchSearchBackend. """
backend_class = EdxElasticsearchSearchBackend
from django.conf import settings
from django.core.management import call_command
from django.test import TestCase
from elasticsearch import Elasticsearch
from freezegun import freeze_time
class UpdateIndexTests(TestCase):
@freeze_time('2016-06-21')
def test_handle(self):
""" Verify the command creates a timestamped index and repoints the alias. """
call_command('update_index')
alias = settings.HAYSTACK_CONNECTIONS['default']['INDEX_NAME']
index = '{alias}_20160621_000000'.format(alias=alias)
host = settings.HAYSTACK_CONNECTIONS['default']['URL']
connection = Elasticsearch(host)
response = connection.indices.get_alias(name=alias)
expected = {
index: {
'aliases': {
alias: {}
}
}
}
self.assertDictEqual(response, expected)
......@@ -37,7 +37,6 @@ THIRD_PARTY_APPS = (
'waffle',
'sortedm2m',
'simple_history',
'haystack',
'guardian',
'dry_rest_permissions',
)
......@@ -48,11 +47,17 @@ PROJECT_APPS = (
'course_discovery.apps.api',
'course_discovery.apps.catalogs',
'course_discovery.apps.course_metadata',
'course_discovery.apps.edx_haystack_extensions',
)
INSTALLED_APPS += THIRD_PARTY_APPS
INSTALLED_APPS += PROJECT_APPS
# NOTE: Haystack must be installed after core so that we can override Haystack's management commands with our own.
INSTALLED_APPS += ('haystack',)
MIDDLEWARE_CLASSES = (
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.locale.LocaleMiddleware',
......@@ -316,7 +321,7 @@ SWAGGER_SETTINGS = {
HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': 'course_discovery.apps.core.haystack_backends.SimplifiedElasticsearchSearchEngine',
'ENGINE': 'course_discovery.apps.edx_haystack_extensions.backends.EdxElasticsearchSearchEngine',
'URL': 'http://localhost:9200/',
'INDEX_NAME': 'catalog',
},
......
......@@ -31,7 +31,7 @@ DATABASES = {
HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': 'haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine',
'ENGINE': 'course_discovery.apps.edx_haystack_extensions.backends.EdxElasticsearchSearchEngine',
'URL': os.environ.get('TEST_ELASTICSEARCH_URL', 'http://localhost:9200/'),
'INDEX_NAME': 'catalog_test',
},
......
......@@ -6,6 +6,7 @@ ddt==1.0.1
django-nose==1.4.2
edx-lint==0.5.0
factory-boy==2.6.0
freezegun==0.3.7
lxml==3.4.2
mock==1.3.0
nose-ignore-docstring==0.2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment