Commit 862262bd by Dino Cikatic

SOL-974 make index and remove calls use ES bulk API

parent 29df7506
...@@ -111,8 +111,7 @@ class SearchIndexerBase(object): ...@@ -111,8 +111,7 @@ class SearchIndexerBase(object):
exclude_dictionary={"id": list(exclude_items)} exclude_dictionary={"id": list(exclude_items)}
) )
result_ids = [result["data"]["id"] for result in response["results"]] result_ids = [result["data"]["id"] for result in response["results"]]
for result_id in result_ids: searcher.remove(cls.DOCUMENT_TYPE, result_ids)
searcher.remove(cls.DOCUMENT_TYPE, result_id)
@classmethod @classmethod
def index(cls, modulestore, structure_key, triggered_at=None, reindex_age=REINDEX_AGE): def index(cls, modulestore, structure_key, triggered_at=None, reindex_age=REINDEX_AGE):
...@@ -142,7 +141,7 @@ class SearchIndexerBase(object): ...@@ -142,7 +141,7 @@ class SearchIndexerBase(object):
structure_key = cls.normalize_structure_key(structure_key) structure_key = cls.normalize_structure_key(structure_key)
location_info = cls._get_location_info(structure_key) location_info = cls._get_location_info(structure_key)
# Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `index_item` # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `prepare_item_index`
indexed_count = { indexed_count = {
"count": 0 "count": 0
} }
...@@ -153,15 +152,20 @@ class SearchIndexerBase(object): ...@@ -153,15 +152,20 @@ class SearchIndexerBase(object):
# list - those are ready to be destroyed # list - those are ready to be destroyed
indexed_items = set() indexed_items = set()
# items_index is a list of all the items index dictionaries.
# it is used to collect all indexes and index them using bulk API,
# instead of per item index API call.
items_index = []
def get_item_location(item): def get_item_location(item):
""" """
Gets the version agnostic item location Gets the version agnostic item location
""" """
return item.location.version_agnostic().replace(branch=None) return item.location.version_agnostic().replace(branch=None)
def index_item(item, skip_index=False, groups_usage_info=None): def prepare_item_index(item, skip_index=False, groups_usage_info=None):
""" """
Add this item to the search index and indexed_items list Add this item to the items_index and indexed_items list
Arguments: Arguments:
item - item to add to index, its children will be processed recursively item - item to add to index, its children will be processed recursively
...@@ -212,7 +216,7 @@ class SearchIndexerBase(object): ...@@ -212,7 +216,7 @@ class SearchIndexerBase(object):
for child_item in item.get_children(): for child_item in item.get_children():
if modulestore.has_published_version(child_item): if modulestore.has_published_version(child_item):
children_groups_usage.append( children_groups_usage.append(
index_item( prepare_item_index(
child_item, child_item,
skip_index=skip_child_index, skip_index=skip_child_index,
groups_usage_info=groups_usage_info groups_usage_info=groups_usage_info
...@@ -234,7 +238,7 @@ class SearchIndexerBase(object): ...@@ -234,7 +238,7 @@ class SearchIndexerBase(object):
item_index['start_date'] = item.start item_index['start_date'] = item.start
item_index['content_groups'] = item_content_groups if item_content_groups else None item_index['content_groups'] = item_content_groups if item_content_groups else None
item_index.update(cls.supplemental_fields(item)) item_index.update(cls.supplemental_fields(item))
searcher.index(cls.DOCUMENT_TYPE, item_index) items_index.append(item_index)
indexed_count["count"] += 1 indexed_count["count"] += 1
return item_content_groups return item_content_groups
except Exception as err: # pylint: disable=broad-except except Exception as err: # pylint: disable=broad-except
...@@ -252,7 +256,8 @@ class SearchIndexerBase(object): ...@@ -252,7 +256,8 @@ class SearchIndexerBase(object):
# Now index the content # Now index the content
for item in structure.get_children(): for item in structure.get_children():
index_item(item, groups_usage_info=groups_usage_info) prepare_item_index(item, groups_usage_info=groups_usage_info)
searcher.index(cls.DOCUMENT_TYPE, items_index)
cls.remove_deleted_items(searcher, structure_key, indexed_items) cls.remove_deleted_items(searcher, structure_key, indexed_items)
except Exception as err: # pylint: disable=broad-except except Exception as err: # pylint: disable=broad-except
# broad exception so that index operation does not prevent the rest of the application from working # broad exception so that index operation does not prevent the rest of the application from working
...@@ -623,7 +628,7 @@ class CourseAboutSearchIndexer(object): ...@@ -623,7 +628,7 @@ class CourseAboutSearchIndexer(object):
# Broad exception handler to protect around and report problems with indexing # Broad exception handler to protect around and report problems with indexing
try: try:
searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, course_info) searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, [course_info])
except: # pylint: disable=bare-except except: # pylint: disable=bare-except
log.exception( log.exception(
"Course discovery indexing error encountered, course discovery index may be out of date %s", "Course discovery indexing error encountered, course discovery index may be out of date %s",
......
...@@ -1167,95 +1167,91 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): ...@@ -1167,95 +1167,91 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
def _html_group_result(self, html_unit, content_groups): def _html_group_result(self, html_unit, content_groups):
""" """
Return call object with arguments and content group for html_unit. Return object with arguments and content group for html_unit.
""" """
return call( return {
'courseware_content', 'course_name': self.course.display_name,
{ 'id': unicode(html_unit.location),
'course_name': unicode(self.course.display_name), 'content': {'html_content': '', 'display_name': html_unit.display_name},
'id': unicode(html_unit.location), 'course': unicode(self.course.id),
'content': {'html_content': '', 'display_name': unicode(html_unit.display_name)}, 'location': [
'course': unicode(self.course.id), self.chapter.display_name,
'location': [ self.sequential.display_name,
unicode(self.chapter.display_name), html_unit.parent.display_name
unicode(self.sequential.display_name), ],
unicode(html_unit.parent.display_name) 'content_type': 'Text',
], 'org': self.course.org,
'content_type': 'Text', 'content_groups': content_groups,
'org': self.course.org, 'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
'content_groups': content_groups, }
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
}
)
def _html_experiment_group_result(self, html_unit, content_groups): def _html_experiment_group_result(self, html_unit, content_groups):
""" """
Return call object with arguments and content group for html_unit. Return object with arguments and content group for html_unit.
""" """
return call( return {
'courseware_content', 'course_name': self.course.display_name,
{ 'id': unicode(html_unit.location),
'course_name': unicode(self.course.display_name), 'content': {'html_content': '', 'display_name': html_unit.display_name},
'id': unicode(html_unit.location), 'course': unicode(self.course.id),
'content': {'html_content': '', 'display_name': unicode(html_unit.display_name)}, 'location': [
'course': unicode(self.course.id), self.chapter.display_name,
'location': [ self.sequential2.display_name,
unicode(self.chapter.display_name), self.vertical3.display_name
unicode(self.sequential2.display_name), ],
unicode(self.vertical3.display_name) 'content_type': 'Text',
], 'org': self.course.org,
'content_type': 'Text', 'content_groups': content_groups,
'org': self.course.org, 'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
'content_groups': content_groups, }
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
}
)
def _vertical_experiment_group_result(self, vertical, content_groups): def _vertical_experiment_group_result(self, vertical, content_groups):
""" """
Return call object with arguments and content group for split_test vertical. Return object with arguments and content group for split_test vertical.
""" """
return call( return {
'courseware_content', 'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()),
{ 'content': {'display_name': vertical.display_name},
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()), 'course': unicode(self.course.id),
'content': {'display_name': unicode(vertical.display_name)}, 'location': [
'course': unicode(self.course.id), self.chapter.display_name,
'location': [ self.sequential2.display_name,
unicode(self.chapter.display_name), vertical.parent.display_name
unicode(self.sequential2.display_name), ],
unicode(vertical.parent.display_name) 'content_type': 'Sequence',
], 'content_groups': content_groups,
'content_type': 'Sequence', 'id': unicode(vertical.location),
'content_groups': content_groups, 'course_name': self.course.display_name,
'id': unicode(vertical.location), 'org': self.course.org
'course_name': unicode(self.course.display_name), }
'org': self.course.org
}
)
def _html_nogroup_result(self, html_unit): def _html_nogroup_result(self, html_unit):
""" """
Return call object with arguments and content group set to empty array for html_unit. Return object with arguments and content group set to empty array for html_unit.
""" """
return call( return {
'courseware_content', 'course_name': self.course.display_name,
{ 'id': unicode(html_unit.location),
'course_name': unicode(self.course.display_name), 'content': {'html_content': '', 'display_name': html_unit.display_name},
'id': unicode(html_unit.location), 'course': unicode(self.course.id),
'content': {'html_content': '', 'display_name': unicode(html_unit.display_name)}, 'location': [
'course': unicode(self.course.id), self.chapter.display_name,
'location': [ self.sequential.display_name,
unicode(self.chapter.display_name), html_unit.parent.display_name
unicode(self.sequential.display_name), ],
unicode(html_unit.parent.display_name) 'content_type': 'Text',
], 'org': self.course.org,
'content_type': 'Text', 'content_groups': None,
'org': self.course.org, 'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
'content_groups': None, }
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
} def _get_index_values_from_call_args(self, mock_index):
) """
Return content values from args tuple in a mocked calls list.
"""
kall = mock_index.call_args
args, kwargs = kall # pylint: disable=unused-variable
return args[1]
def reindex_course(self, store): def reindex_course(self, store):
""" kick off complete reindex of the course """ """ kick off complete reindex of the course """
...@@ -1283,46 +1279,47 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): ...@@ -1283,46 +1279,47 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index: with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store) self.reindex_course(self.store)
self.assertTrue(mock_index.called) self.assertTrue(mock_index.called)
self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls) indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_experiment_group_result(self.html_unit4, [unicode(2)]), mock_index.mock_calls) self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content)
self.assertIn(self._html_experiment_group_result(self.html_unit5, [unicode(3)]), mock_index.mock_calls) self.assertIn(self._html_experiment_group_result(self.html_unit4, [unicode(2)]), indexed_content)
self.assertIn(self._html_experiment_group_result(self.html_unit6, [unicode(4)]), mock_index.mock_calls) self.assertIn(self._html_experiment_group_result(self.html_unit5, [unicode(3)]), indexed_content)
self.assertNotIn(self._html_experiment_group_result(self.html_unit6, [unicode(5)]), mock_index.mock_calls) self.assertIn(self._html_experiment_group_result(self.html_unit6, [unicode(4)]), indexed_content)
self.assertNotIn(self._html_experiment_group_result(self.html_unit6, [unicode(5)]), indexed_content)
self.assertIn( self.assertIn(
self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(2)]), self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(2)]),
mock_index.mock_calls indexed_content
) )
self.assertNotIn( self.assertNotIn(
self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(2)]), self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(2)]),
mock_index.mock_calls indexed_content
) )
self.assertNotIn( self.assertNotIn(
self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(2)]), self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(2)]),
mock_index.mock_calls indexed_content
) )
self.assertNotIn( self.assertNotIn(
self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(3)]), self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(3)]),
mock_index.mock_calls indexed_content
) )
self.assertIn( self.assertIn(
self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(3)]), self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(3)]),
mock_index.mock_calls indexed_content
) )
self.assertNotIn( self.assertNotIn(
self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(3)]), self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(3)]),
mock_index.mock_calls indexed_content
) )
self.assertNotIn( self.assertNotIn(
self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(4)]), self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(4)]),
mock_index.mock_calls indexed_content
) )
self.assertNotIn( self.assertNotIn(
self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(4)]), self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(4)]),
mock_index.mock_calls indexed_content
) )
self.assertIn( self.assertIn(
self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(4)]), self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(4)]),
mock_index.mock_calls indexed_content
) )
mock_index.reset_mock() mock_index.reset_mock()
...@@ -1332,7 +1329,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): ...@@ -1332,7 +1329,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index: with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store) self.reindex_course(self.store)
self.assertTrue(mock_index.called) self.assertTrue(mock_index.called)
self.assertIn(self._html_nogroup_result(self.html_unit1), mock_index.mock_calls) indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_nogroup_result(self.html_unit1), indexed_content)
mock_index.reset_mock() mock_index.reset_mock()
def test_content_group_not_indexed_on_delete(self): def test_content_group_not_indexed_on_delete(self):
...@@ -1351,7 +1349,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): ...@@ -1351,7 +1349,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index: with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store) self.reindex_course(self.store)
self.assertTrue(mock_index.called) self.assertTrue(mock_index.called)
self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls) indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content)
mock_index.reset_mock() mock_index.reset_mock()
empty_group_access = {'group_access': {}} empty_group_access = {'group_access': {}}
...@@ -1367,7 +1366,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): ...@@ -1367,7 +1366,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index: with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store) self.reindex_course(self.store)
self.assertTrue(mock_index.called) self.assertTrue(mock_index.called)
self.assertIn(self._html_nogroup_result(self.html_unit1), mock_index.mock_calls) indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_nogroup_result(self.html_unit1), indexed_content)
mock_index.reset_mock() mock_index.reset_mock()
def test_group_indexed_only_on_assigned_html_block(self): def test_group_indexed_only_on_assigned_html_block(self):
...@@ -1383,8 +1383,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): ...@@ -1383,8 +1383,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index: with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store) self.reindex_course(self.store)
self.assertTrue(mock_index.called) self.assertTrue(mock_index.called)
self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls) indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_nogroup_result(self.html_unit2), mock_index.mock_calls) self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content)
self.assertIn(self._html_nogroup_result(self.html_unit2), indexed_content)
mock_index.reset_mock() mock_index.reset_mock()
def test_different_groups_indexed_on_assigned_html_blocks(self): def test_different_groups_indexed_on_assigned_html_blocks(self):
...@@ -1407,8 +1408,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): ...@@ -1407,8 +1408,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index: with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store) self.reindex_course(self.store)
self.assertTrue(mock_index.called) self.assertTrue(mock_index.called)
self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls) indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_group_result(self.html_unit2, [0]), mock_index.mock_calls) self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content)
self.assertIn(self._html_group_result(self.html_unit2, [0]), indexed_content)
mock_index.reset_mock() mock_index.reset_mock()
def test_different_groups_indexed_on_same_vertical_html_blocks(self): def test_different_groups_indexed_on_same_vertical_html_blocks(self):
...@@ -1435,8 +1437,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): ...@@ -1435,8 +1437,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
with patch(settings.SEARCH_ENGINE + '.index') as mock_index: with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
self.reindex_course(self.store) self.reindex_course(self.store)
self.assertTrue(mock_index.called) self.assertTrue(mock_index.called)
self.assertIn(self._html_group_result(self.html_unit2, [1]), mock_index.mock_calls) indexed_content = self._get_index_values_from_call_args(mock_index)
self.assertIn(self._html_group_result(self.html_unit3, [0]), mock_index.mock_calls) self.assertIn(self._html_group_result(self.html_unit2, [1]), indexed_content)
self.assertIn(self._html_group_result(self.html_unit3, [0]), indexed_content)
mock_index.reset_mock() mock_index.reset_mock()
......
...@@ -47,7 +47,7 @@ git+https://github.com/edx/ease.git@release-2015-07-14#egg=ease==0.1.3 ...@@ -47,7 +47,7 @@ git+https://github.com/edx/ease.git@release-2015-07-14#egg=ease==0.1.3
git+https://github.com/edx/edx-oauth2-provider.git@0.5.2#egg=oauth2-provider==0.5.2 git+https://github.com/edx/edx-oauth2-provider.git@0.5.2#egg=oauth2-provider==0.5.2
-e git+https://github.com/edx/edx-val.git@v0.0.5#egg=edx-val -e git+https://github.com/edx/edx-val.git@v0.0.5#egg=edx-val
-e git+https://github.com/pmitros/RecommenderXBlock.git@518234bc354edbfc2651b9e534ddb54f96080779#egg=recommender-xblock -e git+https://github.com/pmitros/RecommenderXBlock.git@518234bc354edbfc2651b9e534ddb54f96080779#egg=recommender-xblock
-e git+https://github.com/edx/edx-search.git@release-2015-07-03#egg=edx-search -e git+https://github.com/edx/edx-search.git@release-2015-07-14#egg=edx-search
-e git+https://github.com/edx/edx-milestones.git@release-2015-06-17#egg=edx-milestones -e git+https://github.com/edx/edx-milestones.git@release-2015-06-17#egg=edx-milestones
git+https://github.com/edx/edx-lint.git@ed8c8d2a0267d4d42f43642d193e25f8bd575d9b#egg=edx_lint==0.2.3 git+https://github.com/edx/edx-lint.git@ed8c8d2a0267d4d42f43642d193e25f8bd575d9b#egg=edx_lint==0.2.3
-e git+https://github.com/edx/xblock-utils.git@213a97a50276d6a2504d8133650b2930ead357a0#egg=xblock-utils -e git+https://github.com/edx/xblock-utils.git@213a97a50276d6a2504d8133650b2930ead357a0#egg=xblock-utils
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment