Commit 862262bd by Dino Cikatic

SOL-974 make index and remove calls use ES bulk API

parent 29df7506
......@@ -111,8 +111,7 @@ class SearchIndexerBase(object):
exclude_dictionary={"id": list(exclude_items)}
)
result_ids = [result["data"]["id"] for result in response["results"]]
for result_id in result_ids:
searcher.remove(cls.DOCUMENT_TYPE, result_id)
searcher.remove(cls.DOCUMENT_TYPE, result_ids)
@classmethod
def index(cls, modulestore, structure_key, triggered_at=None, reindex_age=REINDEX_AGE):
......@@ -142,7 +141,7 @@ class SearchIndexerBase(object):
structure_key = cls.normalize_structure_key(structure_key)
location_info = cls._get_location_info(structure_key)
# Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `index_item`
# Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `prepare_item_index`
indexed_count = {
"count": 0
}
......@@ -153,15 +152,20 @@ class SearchIndexerBase(object):
# list - those are ready to be destroyed
indexed_items = set()
# items_index is a list of all the items index dictionaries.
# it is used to collect all indexes and index them using bulk API,
# instead of per item index API call.
items_index = []
def get_item_location(item):
"""
Gets the version agnostic item location
"""
return item.location.version_agnostic().replace(branch=None)
def index_item(item, skip_index=False, groups_usage_info=None):
def prepare_item_index(item, skip_index=False, groups_usage_info=None):
"""
Add this item to the search index and indexed_items list
Add this item to the items_index and indexed_items list
Arguments:
item - item to add to index, its children will be processed recursively
......@@ -212,7 +216,7 @@ class SearchIndexerBase(object):
for child_item in item.get_children():
if modulestore.has_published_version(child_item):
children_groups_usage.append(
index_item(
prepare_item_index(
child_item,
skip_index=skip_child_index,
groups_usage_info=groups_usage_info
......@@ -234,7 +238,7 @@ class SearchIndexerBase(object):
item_index['start_date'] = item.start
item_index['content_groups'] = item_content_groups if item_content_groups else None
item_index.update(cls.supplemental_fields(item))
searcher.index(cls.DOCUMENT_TYPE, item_index)
items_index.append(item_index)
indexed_count["count"] += 1
return item_content_groups
except Exception as err: # pylint: disable=broad-except
......@@ -252,7 +256,8 @@ class SearchIndexerBase(object):
# Now index the content
for item in structure.get_children():
index_item(item, groups_usage_info=groups_usage_info)
prepare_item_index(item, groups_usage_info=groups_usage_info)
searcher.index(cls.DOCUMENT_TYPE, items_index)
cls.remove_deleted_items(searcher, structure_key, indexed_items)
except Exception as err: # pylint: disable=broad-except
# broad exception so that index operation does not prevent the rest of the application from working
......@@ -623,7 +628,7 @@ class CourseAboutSearchIndexer(object):
# Broad exception handler to protect around and report problems with indexing
try:
searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, course_info)
searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, [course_info])
except: # pylint: disable=bare-except
log.exception(
"Course discovery indexing error encountered, course discovery index may be out of date %s",
......
......@@ -1167,95 +1167,91 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
def _html_group_result(self, html_unit, content_groups):
"""
Return call object with arguments and content group for html_unit.
Return object with arguments and content group for html_unit.
"""
return call(
'courseware_content',
{
'course_name': unicode(self.course.display_name),
'id': unicode(html_unit.location),
'content': {'html_content': '', 'display_name': unicode(html_unit.display_name)},
'course': unicode(self.course.id),
'location': [
unicode(self.chapter.display_name),
unicode(self.sequential.display_name),
unicode(html_unit.parent.display_name)
],
'content_type': 'Text',
'org': self.course.org,
'content_groups': content_groups,
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
}
)
return {
'course_name': self.course.display_name,
'id': unicode(html_unit.location),
'content': {'html_content': '', 'display_name': html_unit.display_name},
'course': unicode(self.course.id),
'location': [
self.chapter.display_name,
self.sequential.display_name,
html_unit.parent.display_name
],
'content_type': 'Text',
'org': self.course.org,
'content_groups': content_groups,
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
}
def _html_experiment_group_result(self, html_unit, content_groups):
"""
Return call object with arguments and content group for html_unit.
Return object with arguments and content group for html_unit.
"""
return call(
'courseware_content',
{
'course_name': unicode(self.course.display_name),
'id': unicode(html_unit.location),
'content': {'html_content': '', 'display_name': unicode(html_unit.display_name)},
'course': unicode(self.course.id),
'location': [
unicode(self.chapter.display_name),
unicode(self.sequential2.display_name),
unicode(self.vertical3.display_name)
],
'content_type': 'Text',
'org': self.course.org,
'content_groups': content_groups,
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
}
)
return {
'course_name': self.course.display_name,
'id': unicode(html_unit.location),
'content': {'html_content': '', 'display_name': html_unit.display_name},
'course': unicode(self.course.id),
'location': [
self.chapter.display_name,
self.sequential2.display_name,
self.vertical3.display_name
],
'content_type': 'Text',
'org': self.course.org,
'content_groups': content_groups,
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
}
def _vertical_experiment_group_result(self, vertical, content_groups):
"""
Return call object with arguments and content group for split_test vertical.
Return object with arguments and content group for split_test vertical.
"""
return call(
'courseware_content',
{
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()),
'content': {'display_name': unicode(vertical.display_name)},
'course': unicode(self.course.id),
'location': [
unicode(self.chapter.display_name),
unicode(self.sequential2.display_name),
unicode(vertical.parent.display_name)
],
'content_type': 'Sequence',
'content_groups': content_groups,
'id': unicode(vertical.location),
'course_name': unicode(self.course.display_name),
'org': self.course.org
}
)
return {
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()),
'content': {'display_name': vertical.display_name},
'course': unicode(self.course.id),
'location': [
self.chapter.display_name,
self.sequential2.display_name,
vertical.parent.display_name
],
'content_type': 'Sequence',
'content_groups': content_groups,
'id': unicode(vertical.location),
'course_name': self.course.display_name,
'org': self.course.org
}
def _html_nogroup_result(self, html_unit):
"""
Return call object with arguments and content group set to empty array for html_unit.
Return object with arguments and content group set to empty array for html_unit.
"""
return call(
'courseware_content',
{
'course_name': unicode(self.course.display_name),
'id': unicode(html_unit.location),
'content': {'html_content': '', 'display_name': unicode(html_unit.display_name)},
'course': unicode(self.course.id),
'location': [
unicode(self.chapter.display_name),
unicode(self.sequential.display_name),
unicode(html_unit.parent.display_name)
],
'content_type': 'Text',
'org': self.course.org,
'content_groups': None,
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
}
)
return {
'course_name': self.course.display_name,
'id': unicode(html_unit.location),
'content': {'html_content': '', 'display_name': html_unit.display_name},
'course': unicode(self.course.id),
'location': [
self.chapter.display_name,
self.sequential.display_name,
html_unit.parent.display_name
],
'content_type': 'Text',
'org': self.course.org,
'content_groups': None,
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
}
def _get_index_values_from_call_args(self, mock_index):
"""
Return content values from args tuple in a mocked calls list.
"""
kall = mock_index.call_args
args, kwargs = kall # pylint: disable=unused-variable
return args[1]
def reindex_course(self, store):
""" kick off complete reindex of the course """
......@@ -1283,46 +1279,47 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store)
self.assertTrue(mock_index.called)
self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls)
self.assertIn(self._html_experiment_group_result(self.html_unit4, [unicode(2)]), mock_index.mock_calls)
self.assertIn(self._html_experiment_group_result(self.html_unit5, [unicode(3)]), mock_index.mock_calls)
self.assertIn(self._html_experiment_group_result(self.html_unit6, [unicode(4)]), mock_index.mock_calls)
self.assertNotIn(self._html_experiment_group_result(self.html_unit6, [unicode(5)]), mock_index.mock_calls)
indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content)
self.assertIn(self._html_experiment_group_result(self.html_unit4, [unicode(2)]), indexed_content)
self.assertIn(self._html_experiment_group_result(self.html_unit5, [unicode(3)]), indexed_content)
self.assertIn(self._html_experiment_group_result(self.html_unit6, [unicode(4)]), indexed_content)
self.assertNotIn(self._html_experiment_group_result(self.html_unit6, [unicode(5)]), indexed_content)
self.assertIn(
self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(2)]),
mock_index.mock_calls
indexed_content
)
self.assertNotIn(
self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(2)]),
mock_index.mock_calls
indexed_content
)
self.assertNotIn(
self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(2)]),
mock_index.mock_calls
indexed_content
)
self.assertNotIn(
self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(3)]),
mock_index.mock_calls
indexed_content
)
self.assertIn(
self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(3)]),
mock_index.mock_calls
indexed_content
)
self.assertNotIn(
self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(3)]),
mock_index.mock_calls
indexed_content
)
self.assertNotIn(
self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(4)]),
mock_index.mock_calls
indexed_content
)
self.assertNotIn(
self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(4)]),
mock_index.mock_calls
indexed_content
)
self.assertIn(
self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(4)]),
mock_index.mock_calls
indexed_content
)
mock_index.reset_mock()
......@@ -1332,7 +1329,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store)
self.assertTrue(mock_index.called)
self.assertIn(self._html_nogroup_result(self.html_unit1), mock_index.mock_calls)
indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_nogroup_result(self.html_unit1), indexed_content)
mock_index.reset_mock()
def test_content_group_not_indexed_on_delete(self):
......@@ -1351,7 +1349,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store)
self.assertTrue(mock_index.called)
self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls)
indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content)
mock_index.reset_mock()
empty_group_access = {'group_access': {}}
......@@ -1367,7 +1366,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store)
self.assertTrue(mock_index.called)
self.assertIn(self._html_nogroup_result(self.html_unit1), mock_index.mock_calls)
indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_nogroup_result(self.html_unit1), indexed_content)
mock_index.reset_mock()
def test_group_indexed_only_on_assigned_html_block(self):
......@@ -1383,8 +1383,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store)
self.assertTrue(mock_index.called)
self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls)
self.assertIn(self._html_nogroup_result(self.html_unit2), mock_index.mock_calls)
indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content)
self.assertIn(self._html_nogroup_result(self.html_unit2), indexed_content)
mock_index.reset_mock()
def test_different_groups_indexed_on_assigned_html_blocks(self):
......@@ -1407,8 +1408,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store)
self.assertTrue(mock_index.called)
self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls)
self.assertIn(self._html_group_result(self.html_unit2, [0]), mock_index.mock_calls)
indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content)
self.assertIn(self._html_group_result(self.html_unit2, [0]), indexed_content)
mock_index.reset_mock()
def test_different_groups_indexed_on_same_vertical_html_blocks(self):
......@@ -1435,8 +1437,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store)
self.assertTrue(mock_index.called)
self.assertIn(self._html_group_result(self.html_unit2, [1]), mock_index.mock_calls)
self.assertIn(self._html_group_result(self.html_unit3, [0]), mock_index.mock_calls)
indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_group_result(self.html_unit2, [1]), indexed_content)
self.assertIn(self._html_group_result(self.html_unit3, [0]), indexed_content)
mock_index.reset_mock()
......
......@@ -47,7 +47,7 @@ git+https://github.com/edx/ease.git@release-2015-07-14#egg=ease==0.1.3
git+https://github.com/edx/edx-oauth2-provider.git@0.5.2#egg=oauth2-provider==0.5.2
-e git+https://github.com/edx/edx-val.git@v0.0.5#egg=edx-val
-e git+https://github.com/pmitros/RecommenderXBlock.git@518234bc354edbfc2651b9e534ddb54f96080779#egg=recommender-xblock
-e git+https://github.com/edx/edx-search.git@release-2015-07-03#egg=edx-search
-e git+https://github.com/edx/edx-search.git@release-2015-07-14#egg=edx-search
-e git+https://github.com/edx/edx-milestones.git@release-2015-06-17#egg=edx-milestones
git+https://github.com/edx/edx-lint.git@ed8c8d2a0267d4d42f43642d193e25f8bd575d9b#egg=edx_lint==0.2.3
-e git+https://github.com/edx/xblock-utils.git@213a97a50276d6a2504d8133650b2930ead357a0#egg=xblock-utils
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment