Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
course-discovery
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
course-discovery
Commits
4f6550be
Commit
4f6550be
authored
Mar 23, 2017
by
Matthew Piatetsky
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add sanity check before moving index alias
ECOM-7572
parent
db16cb4f
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
76 additions
and
4 deletions
+76
-4
course_discovery/apps/edx_haystack_extensions/management/commands/update_index.py
+32
-0
course_discovery/apps/edx_haystack_extensions/tests/test_update_index.py
+40
-4
course_discovery/settings/base.py
+4
-0
No files found.
course_discovery/apps/edx_haystack_extensions/management/commands/update_index.py
View file @
4f6550be
import
logging
from
django.conf
import
settings
from
django.core.management
import
CommandError
from
haystack
import
connections
as
haystack_connections
from
haystack.management.commands.update_index
import
Command
as
HaystackCommand
...
...
@@ -11,6 +13,9 @@ logger = logging.getLogger(__name__)
class
Command
(
HaystackCommand
):
backends
=
[]
def
get_record_count
(
self
,
conn
,
index_name
):
return
conn
.
count
(
index_name
)
.
get
(
'count'
)
def
handle
(
self
,
*
items
,
**
options
):
self
.
backends
=
options
.
get
(
'using'
)
if
not
self
.
backends
:
...
...
@@ -22,6 +27,7 @@ class Command(HaystackCommand):
for
backend_name
in
self
.
backends
:
connection
=
haystack_connections
[
backend_name
]
backend
=
connection
.
get_backend
()
record_count
=
self
.
get_record_count
(
backend
.
conn
,
backend
.
index_name
)
alias
,
index_name
=
self
.
prepare_backend_index
(
backend
)
alias_mappings
.
append
((
backend
,
index_name
,
alias
))
...
...
@@ -29,8 +35,34 @@ class Command(HaystackCommand):
# Set the alias (from settings) to the timestamped catalog.
for
backend
,
index
,
alias
in
alias_mappings
:
record_count_is_sane
,
index_info_string
=
self
.
sanity_check_new_index
(
backend
.
conn
,
index
,
record_count
)
if
not
record_count_is_sane
:
raise
CommandError
(
'Sanity check failed for new index. '
+
index_info_string
)
self
.
set_alias
(
backend
,
alias
,
index
)
def
percentage_change
(
self
,
current
,
previous
):
try
:
return
abs
(
current
-
previous
)
/
previous
except
ZeroDivisionError
:
# pick large percentage for division by 0
# This is done to fail the sanity check
return
1
def
sanity_check_new_index
(
self
,
conn
,
index
,
previous_record_count
):
""" Ensure that we do not point to an index that looks like it has missing data. """
current_record_count
=
conn
.
count
(
index
)
.
get
(
'count'
)
percentage_change
=
self
.
percentage_change
(
current_record_count
,
previous_record_count
)
# Verify there was not a big shift in record count
record_count_is_sane
=
percentage_change
<
settings
.
INDEX_SIZE_CHANGE_THRESHOLD
index_info_string
=
(
'The previous index contained [{}] records. '
'The new index contains [{}] records, a [{:.2f}
%
] change.'
.
format
(
previous_record_count
,
current_record_count
,
percentage_change
*
100
)
)
return
record_count_is_sane
,
index_info_string
def
set_alias
(
self
,
backend
,
alias
,
index
):
"""
Points the alias to the specified index.
...
...
course_discovery/apps/edx_haystack_extensions/tests/test_update_index.py
View file @
4f6550be
import
mock
import
pytest
from
django.conf
import
settings
from
django.core.management
import
call_command
from
django.test
import
TestCase
from
django.core.management
import
CommandError
,
call_command
from
django.test
import
TestCase
,
override_settings
from
elasticsearch
import
Elasticsearch
from
freezegun
import
freeze_time
from
course_discovery.apps.core.tests.mixins
import
ElasticsearchTestMixin
from
course_discovery.apps.course_metadata.tests.factories
import
CourseRunFactory
from
course_discovery.apps.edx_haystack_extensions.tests.mixins
import
SearchIndexTestMixin
class
UpdateIndexTests
(
SearchIndexTestMixin
,
TestCase
):
@override_settings
(
HAYSTACK_SIGNAL_PROCESSOR
=
'haystack.signals.BaseSignalProcessor'
)
class
UpdateIndexTests
(
ElasticsearchTestMixin
,
SearchIndexTestMixin
,
TestCase
):
@freeze_time
(
'2016-06-21'
)
def
test_handle
(
self
):
""" Verify the command creates a timestamped index and repoints the alias. """
call_command
(
'update_index'
)
with
mock
.
patch
(
'course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.sanity_check_new_index'
,
return_value
=
(
True
,
''
)):
call_command
(
'update_index'
)
alias
=
settings
.
HAYSTACK_CONNECTIONS
[
'default'
][
'INDEX_NAME'
]
index
=
'{alias}_20160621_000000'
.
format
(
alias
=
alias
)
...
...
@@ -27,3 +34,32 @@ class UpdateIndexTests(SearchIndexTestMixin, TestCase):
}
}
self
.
assertDictEqual
(
response
,
expected
)
def
test_sanity_check_error
(
self
):
""" Verify the command raises a CommandError if new index fails the sanity check. """
CourseRunFactory
()
record_count
=
2
additional_runs
=
int
(
100
*
settings
.
INDEX_SIZE_CHANGE_THRESHOLD
+
1
)
CourseRunFactory
.
create_batch
(
additional_runs
)
# Ensure that an error is raised if the sanity check does not pass
with
pytest
.
raises
(
CommandError
):
with
mock
.
patch
(
'course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.set_alias'
,
return_value
=
True
):
with
mock
.
patch
(
'course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.get_record_count'
,
return_value
=
record_count
):
call_command
(
'update_index'
)
def
test_sanity_check_success
(
self
):
""" Verify the command does not raise a CommandError error if the new index passes the sanity check. """
CourseRunFactory
.
create_batch
(
30
)
record_count
=
60
additional_runs
=
int
(
10
*
settings
.
INDEX_SIZE_CHANGE_THRESHOLD
-
1
)
CourseRunFactory
.
create_batch
(
additional_runs
)
# Ensure that no error is raised and the sanity check passes the second time
with
mock
.
patch
(
'course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.set_alias'
,
return_value
=
True
):
with
mock
.
patch
(
'course_discovery.apps.edx_haystack_extensions.management.commands.'
'update_index.Command.get_record_count'
,
return_value
=
record_count
):
call_command
(
'update_index'
)
course_discovery/settings/base.py
View file @
4f6550be
...
...
@@ -435,6 +435,10 @@ HAYSTACK_CONNECTIONS = {
HAYSTACK_SIGNAL_PROCESSOR
=
'haystack.signals.BaseSignalProcessor'
HAYSTACK_INDEX_RETENTION_LIMIT
=
3
# Update Index Settings
# Make sure the size of the new index does not change by more than this percentage
INDEX_SIZE_CHANGE_THRESHOLD
=
.
1
# Elasticsearch search query facet "size" option to increase from the default value of "100"
# See https://www.elastic.co/guide/en/elasticsearch/reference/1.5/search-facets-terms-facet.html#_accuracy_control
SEARCH_FACET_LIMIT
=
10000
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment