Commit 4f6550be by Matthew Piatetsky

Add sanity check before moving index alias

ECOM-7572
parent db16cb4f
import logging
from django.conf import settings
from django.core.management import CommandError
from haystack import connections as haystack_connections
from haystack.management.commands.update_index import Command as HaystackCommand
......@@ -11,6 +13,9 @@ logger = logging.getLogger(__name__)
class Command(HaystackCommand):
backends = []
def get_record_count(self, conn, index_name):
return conn.count(index_name).get('count')
def handle(self, *items, **options):
self.backends = options.get('using')
if not self.backends:
......@@ -22,6 +27,7 @@ class Command(HaystackCommand):
for backend_name in self.backends:
connection = haystack_connections[backend_name]
backend = connection.get_backend()
record_count = self.get_record_count(backend.conn, backend.index_name)
alias, index_name = self.prepare_backend_index(backend)
alias_mappings.append((backend, index_name, alias))
......@@ -29,8 +35,34 @@ class Command(HaystackCommand):
# Set the alias (from settings) to the timestamped catalog.
for backend, index, alias in alias_mappings:
record_count_is_sane, index_info_string = self.sanity_check_new_index(backend.conn, index, record_count)
if not record_count_is_sane:
raise CommandError('Sanity check failed for new index. ' + index_info_string)
self.set_alias(backend, alias, index)
def percentage_change(self, current, previous):
try:
return abs(current - previous) / previous
except ZeroDivisionError:
# pick large percentage for division by 0
# This is done to fail the sanity check
return 1
def sanity_check_new_index(self, conn, index, previous_record_count):
""" Ensure that we do not point to an index that looks like it has missing data. """
current_record_count = conn.count(index).get('count')
percentage_change = self.percentage_change(current_record_count, previous_record_count)
# Verify there was not a big shift in record count
record_count_is_sane = percentage_change < settings.INDEX_SIZE_CHANGE_THRESHOLD
index_info_string = (
'The previous index contained [{}] records. '
'The new index contains [{}] records, a [{:.2f}%] change.'.format(
previous_record_count, current_record_count, percentage_change * 100
)
)
return record_count_is_sane, index_info_string
def set_alias(self, backend, alias, index):
"""
Points the alias to the specified index.
......
import mock
import pytest
from django.conf import settings
from django.core.management import call_command
from django.test import TestCase
from django.core.management import CommandError, call_command
from django.test import TestCase, override_settings
from elasticsearch import Elasticsearch
from freezegun import freeze_time
from course_discovery.apps.core.tests.mixins import ElasticsearchTestMixin
from course_discovery.apps.course_metadata.tests.factories import CourseRunFactory
from course_discovery.apps.edx_haystack_extensions.tests.mixins import SearchIndexTestMixin
class UpdateIndexTests(SearchIndexTestMixin, TestCase):
@override_settings(HAYSTACK_SIGNAL_PROCESSOR='haystack.signals.BaseSignalProcessor')
class UpdateIndexTests(ElasticsearchTestMixin, SearchIndexTestMixin, TestCase):
@freeze_time('2016-06-21')
def test_handle(self):
""" Verify the command creates a timestamped index and repoints the alias. """
call_command('update_index')
with mock.patch('course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.sanity_check_new_index', return_value=(True, '')):
call_command('update_index')
alias = settings.HAYSTACK_CONNECTIONS['default']['INDEX_NAME']
index = '{alias}_20160621_000000'.format(alias=alias)
......@@ -27,3 +34,32 @@ class UpdateIndexTests(SearchIndexTestMixin, TestCase):
}
}
self.assertDictEqual(response, expected)
def test_sanity_check_error(self):
""" Verify the command raises a CommandError if new index fails the sanity check. """
CourseRunFactory()
record_count = 2
additional_runs = int(100 * settings.INDEX_SIZE_CHANGE_THRESHOLD + 1)
CourseRunFactory.create_batch(additional_runs)
# Ensure that an error is raised if the sanity check does not pass
with pytest.raises(CommandError):
with mock.patch('course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.set_alias', return_value=True):
with mock.patch('course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.get_record_count', return_value=record_count):
call_command('update_index')
def test_sanity_check_success(self):
""" Verify the command does not raise a CommandError error if the new index passes the sanity check. """
CourseRunFactory.create_batch(30)
record_count = 60
additional_runs = int(10 * settings.INDEX_SIZE_CHANGE_THRESHOLD - 1)
CourseRunFactory.create_batch(additional_runs)
# Ensure that no error is raised and the sanity check passes the second time
with mock.patch('course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.set_alias', return_value=True):
with mock.patch('course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.get_record_count', return_value=record_count):
call_command('update_index')
......@@ -435,6 +435,10 @@ HAYSTACK_CONNECTIONS = {
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.BaseSignalProcessor'
HAYSTACK_INDEX_RETENTION_LIMIT = 3
# Update Index Settings
# Make sure the size of the new index does not change by more than this percentage
INDEX_SIZE_CHANGE_THRESHOLD = .1
# Elasticsearch search query facet "size" option to increase from the default value of "100"
# See https://www.elastic.co/guide/en/elasticsearch/reference/1.5/search-facets-terms-facet.html#_accuracy_control
SEARCH_FACET_LIMIT = 10000
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment