Commit 4f6550be by Matthew Piatetsky

Add sanity check before moving index alias

ECOM-7572
parent db16cb4f
import logging import logging
from django.conf import settings
from django.core.management import CommandError
from haystack import connections as haystack_connections from haystack import connections as haystack_connections
from haystack.management.commands.update_index import Command as HaystackCommand from haystack.management.commands.update_index import Command as HaystackCommand
...@@ -11,6 +13,9 @@ logger = logging.getLogger(__name__) ...@@ -11,6 +13,9 @@ logger = logging.getLogger(__name__)
class Command(HaystackCommand): class Command(HaystackCommand):
backends = [] backends = []
def get_record_count(self, conn, index_name):
return conn.count(index_name).get('count')
def handle(self, *items, **options): def handle(self, *items, **options):
self.backends = options.get('using') self.backends = options.get('using')
if not self.backends: if not self.backends:
...@@ -22,6 +27,7 @@ class Command(HaystackCommand): ...@@ -22,6 +27,7 @@ class Command(HaystackCommand):
for backend_name in self.backends: for backend_name in self.backends:
connection = haystack_connections[backend_name] connection = haystack_connections[backend_name]
backend = connection.get_backend() backend = connection.get_backend()
record_count = self.get_record_count(backend.conn, backend.index_name)
alias, index_name = self.prepare_backend_index(backend) alias, index_name = self.prepare_backend_index(backend)
alias_mappings.append((backend, index_name, alias)) alias_mappings.append((backend, index_name, alias))
...@@ -29,8 +35,34 @@ class Command(HaystackCommand): ...@@ -29,8 +35,34 @@ class Command(HaystackCommand):
# Set the alias (from settings) to the timestamped catalog. # Set the alias (from settings) to the timestamped catalog.
for backend, index, alias in alias_mappings: for backend, index, alias in alias_mappings:
record_count_is_sane, index_info_string = self.sanity_check_new_index(backend.conn, index, record_count)
if not record_count_is_sane:
raise CommandError('Sanity check failed for new index. ' + index_info_string)
self.set_alias(backend, alias, index) self.set_alias(backend, alias, index)
def percentage_change(self, current, previous):
try:
return abs(current - previous) / previous
except ZeroDivisionError:
# pick large percentage for division by 0
# This is done to fail the sanity check
return 1
def sanity_check_new_index(self, conn, index, previous_record_count):
""" Ensure that we do not point to an index that looks like it has missing data. """
current_record_count = conn.count(index).get('count')
percentage_change = self.percentage_change(current_record_count, previous_record_count)
# Verify there was not a big shift in record count
record_count_is_sane = percentage_change < settings.INDEX_SIZE_CHANGE_THRESHOLD
index_info_string = (
'The previous index contained [{}] records. '
'The new index contains [{}] records, a [{:.2f}%] change.'.format(
previous_record_count, current_record_count, percentage_change * 100
)
)
return record_count_is_sane, index_info_string
def set_alias(self, backend, alias, index): def set_alias(self, backend, alias, index):
""" """
Points the alias to the specified index. Points the alias to the specified index.
......
import mock
import pytest
from django.conf import settings from django.conf import settings
from django.core.management import call_command from django.core.management import CommandError, call_command
from django.test import TestCase from django.test import TestCase, override_settings
from elasticsearch import Elasticsearch from elasticsearch import Elasticsearch
from freezegun import freeze_time from freezegun import freeze_time
from course_discovery.apps.core.tests.mixins import ElasticsearchTestMixin
from course_discovery.apps.course_metadata.tests.factories import CourseRunFactory
from course_discovery.apps.edx_haystack_extensions.tests.mixins import SearchIndexTestMixin from course_discovery.apps.edx_haystack_extensions.tests.mixins import SearchIndexTestMixin
class UpdateIndexTests(SearchIndexTestMixin, TestCase): @override_settings(HAYSTACK_SIGNAL_PROCESSOR='haystack.signals.BaseSignalProcessor')
class UpdateIndexTests(ElasticsearchTestMixin, SearchIndexTestMixin, TestCase):
@freeze_time('2016-06-21') @freeze_time('2016-06-21')
def test_handle(self): def test_handle(self):
""" Verify the command creates a timestamped index and repoints the alias. """ """ Verify the command creates a timestamped index and repoints the alias. """
call_command('update_index') with mock.patch('course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.sanity_check_new_index', return_value=(True, '')):
call_command('update_index')
alias = settings.HAYSTACK_CONNECTIONS['default']['INDEX_NAME'] alias = settings.HAYSTACK_CONNECTIONS['default']['INDEX_NAME']
index = '{alias}_20160621_000000'.format(alias=alias) index = '{alias}_20160621_000000'.format(alias=alias)
...@@ -27,3 +34,32 @@ class UpdateIndexTests(SearchIndexTestMixin, TestCase): ...@@ -27,3 +34,32 @@ class UpdateIndexTests(SearchIndexTestMixin, TestCase):
} }
} }
self.assertDictEqual(response, expected) self.assertDictEqual(response, expected)
def test_sanity_check_error(self):
""" Verify the command raises a CommandError if new index fails the sanity check. """
CourseRunFactory()
record_count = 2
additional_runs = int(100 * settings.INDEX_SIZE_CHANGE_THRESHOLD + 1)
CourseRunFactory.create_batch(additional_runs)
# Ensure that an error is raised if the sanity check does not pass
with pytest.raises(CommandError):
with mock.patch('course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.set_alias', return_value=True):
with mock.patch('course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.get_record_count', return_value=record_count):
call_command('update_index')
def test_sanity_check_success(self):
""" Verify the command does not raise a CommandError error if the new index passes the sanity check. """
CourseRunFactory.create_batch(30)
record_count = 60
additional_runs = int(10 * settings.INDEX_SIZE_CHANGE_THRESHOLD - 1)
CourseRunFactory.create_batch(additional_runs)
# Ensure that no error is raised and the sanity check passes the second time
with mock.patch('course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.set_alias', return_value=True):
with mock.patch('course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.get_record_count', return_value=record_count):
call_command('update_index')
...@@ -435,6 +435,10 @@ HAYSTACK_CONNECTIONS = { ...@@ -435,6 +435,10 @@ HAYSTACK_CONNECTIONS = {
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.BaseSignalProcessor' HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.BaseSignalProcessor'
HAYSTACK_INDEX_RETENTION_LIMIT = 3 HAYSTACK_INDEX_RETENTION_LIMIT = 3
# Update Index Settings
# Make sure the size of the new index does not change by more than this percentage
INDEX_SIZE_CHANGE_THRESHOLD = .1
# Elasticsearch search query facet "size" option to increase from the default value of "100" # Elasticsearch search query facet "size" option to increase from the default value of "100"
# See https://www.elastic.co/guide/en/elasticsearch/reference/1.5/search-facets-terms-facet.html#_accuracy_control # See https://www.elastic.co/guide/en/elasticsearch/reference/1.5/search-facets-terms-facet.html#_accuracy_control
SEARCH_FACET_LIMIT = 10000 SEARCH_FACET_LIMIT = 10000
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment