Commit 3f831178 by Ben McMorran

Merge pull request #24 from edx/benmcmorran/search-tags

Search and highlight tags in student notes TNL-1927
parents 4631c73e 8e05ad10
Oleg Marshev <oleg@edx.org> Oleg Marshev <oleg@edx.org>
Tim Babych <tim.babych@gmail.com> Tim Babych <tim.babych@gmail.com>
Christina Roberts <christina@edx.org> Christina Roberts <christina@edx.org>
Ben McMorran <ben.mcmorran@gmail.com>
...@@ -12,6 +12,7 @@ class NoteIndex(indexes.SearchIndex, indexes.Indexable): ...@@ -12,6 +12,7 @@ class NoteIndex(indexes.SearchIndex, indexes.Indexable):
created = indexes.DateTimeField(model_attr='created') created = indexes.DateTimeField(model_attr='created')
updated = indexes.DateTimeField(model_attr='updated') updated = indexes.DateTimeField(model_attr='updated')
tags = indexes.CharField(model_attr='tags') tags = indexes.CharField(model_attr='tags')
data = indexes.CharField(use_template=True)
def get_model(self): def get_model(self):
return Note return Note
......
...@@ -10,6 +10,7 @@ from django.core.management import call_command ...@@ -10,6 +10,7 @@ from django.core.management import call_command
from django.core.urlresolvers import reverse from django.core.urlresolvers import reverse
from django.conf import settings from django.conf import settings
from django.http import QueryDict from django.http import QueryDict
from django.test.utils import override_settings
from rest_framework import status from rest_framework import status
from rest_framework.test import APITestCase from rest_framework.test import APITestCase
...@@ -481,11 +482,13 @@ class AnnotationSearchViewTests(BaseAnnotationViewTests): ...@@ -481,11 +482,13 @@ class AnnotationSearchViewTests(BaseAnnotationViewTests):
results = self._get_search_results(text="first", highlight=True, highlight_tag='tag', highlight_class='klass') results = self._get_search_results(text="first", highlight=True, highlight_tag='tag', highlight_class='klass')
self.assertEqual(results['rows'][0]['text'], '<tag class="klass">First</tag> note') self.assertEqual(results['rows'][0]['text'], '<tag class="klass">First</tag> note')
def test_search_ordering(self):
@override_settings(ES_DISABLED=True)
def test_search_ordering_in_db(self):
""" """
Tests ordering of search results. Tests ordering of search results from MySQL.
Sorting is by descending order by updated field (most recent first). MySQL sorting is by descending order by updated field (most recent first).
""" """
self._create_annotation(text=u'First one') self._create_annotation(text=u'First one')
note = self._create_annotation(text=u'Second note') note = self._create_annotation(text=u'Second note')
...@@ -502,6 +505,24 @@ class AnnotationSearchViewTests(BaseAnnotationViewTests): ...@@ -502,6 +505,24 @@ class AnnotationSearchViewTests(BaseAnnotationViewTests):
self.assertEqual(results['rows'][1]['text'], 'Third note') self.assertEqual(results['rows'][1]['text'], 'Third note')
self.assertEqual(results['rows'][2]['text'], 'First one') self.assertEqual(results['rows'][2]['text'], 'First one')
@unittest.skipIf(settings.ES_DISABLED, "MySQL does not do relevance ordering")
def test_search_ordering_in_es(self):
"""
Tests order of search results from ElasticSearch.
ElasticSearch sorting is based on the computed relevance of each hit.
"""
self._create_annotation(text=u'fox of the foxes')
self._create_annotation(text=u'a very long entry that contains the word fox')
self._create_annotation(text=u'the lead fox')
self._create_annotation(text=u'does not mention the word')
results = self._get_search_results(text='fox')
self.assertEqual(results['total'], 3)
self.assertEqual(results['rows'][0]['text'], 'fox of the foxes')
self.assertEqual(results['rows'][1]['text'], 'the lead fox')
self.assertEqual(results['rows'][2]['text'], 'a very long entry that contains the word fox')
@unittest.skipIf(settings.ES_DISABLED, "Unicode support in MySQL is limited") @unittest.skipIf(settings.ES_DISABLED, "Unicode support in MySQL is limited")
def test_search_unicode(self): def test_search_unicode(self):
""" """
...@@ -546,6 +567,103 @@ class AnnotationSearchViewTests(BaseAnnotationViewTests): ...@@ -546,6 +567,103 @@ class AnnotationSearchViewTests(BaseAnnotationViewTests):
self.assertEqual(results['total'], 1) self.assertEqual(results['total'], 1)
self.assertEqual(results['rows'][0]['text'], u'Third note') self.assertEqual(results['rows'][0]['text'], u'Third note')
def test_search_tag(self):
"""
Tests searching for tags
"""
self._create_annotation(text=u'First note', tags=[u'foo', u'bar'])
self._create_annotation(text=u'Another one', tags=[u'bar'])
self._create_annotation(text=u'A third note', tags=[u'bar', u'baz'])
self._create_annotation(text=u'One final note', tags=[])
results = self._get_search_results(text='Foo')
self.assertEqual(results['total'], 1)
self.assertEqual(results['rows'][0]['text'], 'First note')
results = self._get_search_results(text='bar')
self.assertEqual(results['total'], 3)
self._has_text(results['rows'], ['First note', 'Another one', 'A third note'])
results = self._get_search_results(text='baz')
self.assertEqual(results['total'], 1)
self.assertEqual(results['rows'][0]['text'], 'A third note')
def test_search_tag_or_text(self):
"""
Tests that searches can match against tags or text
"""
self._search_tag_or_text()
@override_settings(ES_DISABLED=True)
def test_search_tag_or_text_in_db(self):
"""
Tests that searches can match against tags or text without ElasticSearch
"""
self._search_tag_or_text()
def _search_tag_or_text(self):
"""
Tests that searches can match against tags or text
"""
self._create_annotation(text=u'A great comment', tags=[])
self._create_annotation(text=u'Another comment', tags=['good'])
self._create_annotation(text=u'Not as good', tags=['comment'])
self._create_annotation(text=u'Last note', tags=[])
results = self._get_search_results(text='note')
self.assertEqual(results['total'], 1)
self._has_text(results['rows'], ['Last note'])
results = self._get_search_results(text='good')
self.assertEquals(results['total'], 2)
self._has_text(results['rows'], ['Another comment', 'Not as good'])
results = self._get_search_results(text='comment')
self.assertEquals(results['total'], 3)
self._has_text(results['rows'], ['A great comment', 'Another comment', 'Not as good'])
def _has_text(self, rows, expected):
"""
Tests that the set of expected text is exactly the text in rows, ignoring order.
"""
self.assertEqual(set(row['text'] for row in rows), set(expected))
@unittest.skipIf(settings.ES_DISABLED, "MySQL does not do data templating")
def test_search_across_tag_and_text(self):
"""
Tests that searches can match if some terms are in the text and the rest are in the tags.
"""
self._create_annotation(text=u'Comment with foo', tags=[u'bar'])
self._create_annotation(text=u'Another comment', tags=[u'foo'])
self._create_annotation(text=u'A longer comment with bar', tags=[u'foo'])
results = self._get_search_results(text='foo bar')
self.assertEqual(results['total'], 2)
self.assertEqual(results['rows'][0]['text'], 'Comment with foo')
self.assertEqual(results['rows'][1]['text'], 'A longer comment with bar')
@unittest.skipIf(settings.ES_DISABLED, "MySQL does not do highlighing")
def test_search_highlight_tag(self):
"""
Tests highlighting in tags
"""
self._create_annotation(text=u'First note', tags=[u'foo', u'bar'])
self._create_annotation(text=u'Second note', tags=[u'baz'])
results = self._get_search_results()
self.assertEqual(results['total'], 2)
results = self._get_search_results(text="bar", highlight=True)
self.assertEqual(results['total'], 1)
self.assertEqual(len(results['rows']), 1)
self.assertEqual(results['rows'][0]['tags'], ['foo', '<em>bar</em>'])
results = self._get_search_results(text="bar", highlight=True, highlight_tag='tag')
self.assertEqual(results['rows'][0]['tags'], ['foo', '<tag>bar</tag>'])
results = self._get_search_results(text="bar", highlight=True, highlight_tag='tag', highlight_class='klass')
self.assertEqual(results['rows'][0]['tags'], ['foo', '<tag class="klass">bar</tag>'])
@patch('django.conf.settings.DISABLE_TOKEN_CHECK', True) @patch('django.conf.settings.DISABLE_TOKEN_CHECK', True)
class AllowAllAnnotationViewTests(BaseAnnotationViewTests): class AllowAllAnnotationViewTests(BaseAnnotationViewTests):
......
...@@ -4,11 +4,14 @@ import json ...@@ -4,11 +4,14 @@ import json
from django.conf import settings from django.conf import settings
from django.core.urlresolvers import reverse from django.core.urlresolvers import reverse
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from django.db.models import Q
from rest_framework import status from rest_framework import status
from rest_framework.response import Response from rest_framework.response import Response
from rest_framework.views import APIView from rest_framework.views import APIView
from haystack.query import SQ
from notesapi.v1.models import Note from notesapi.v1.models import Note
if not settings.ES_DISABLED: if not settings.ES_DISABLED:
...@@ -45,7 +48,7 @@ class AnnotationSearchView(APIView): ...@@ -45,7 +48,7 @@ class AnnotationSearchView(APIView):
query = query.filter(user_id=params['user']) query = query.filter(user_id=params['user'])
if 'text' in params: if 'text' in params:
query = query.filter(text__icontains=params['text']) query = query.filter(Q(text__icontains=params['text']) | Q(tags__icontains=params['text']))
return [note.as_dict() for note in query] return [note.as_dict() for note in query]
...@@ -55,8 +58,12 @@ class AnnotationSearchView(APIView): ...@@ -55,8 +58,12 @@ class AnnotationSearchView(APIView):
""" """
params = self.request.QUERY_PARAMS.dict() params = self.request.QUERY_PARAMS.dict()
query = SearchQuerySet().models(Note).filter( query = SearchQuerySet().models(Note).filter(
**{f: v for (f, v) in params.items() if f in ('user', 'course_id', 'usage_id', 'text')} **{f: v for (f, v) in params.items() if f in ('user', 'course_id', 'usage_id')}
).order_by('-updated') )
if 'text' in params:
clean_text = query.query.clean(params['text'])
query = query.filter(SQ(data=clean_text))
if params.get('highlight'): if params.get('highlight'):
tag = params.get('highlight_tag', 'em') tag = params.get('highlight_tag', 'em')
...@@ -64,7 +71,7 @@ class AnnotationSearchView(APIView): ...@@ -64,7 +71,7 @@ class AnnotationSearchView(APIView):
opts = { opts = {
'pre_tags': ['<{tag}{klass_str}>'.format( 'pre_tags': ['<{tag}{klass_str}>'.format(
tag=tag, tag=tag,
klass_str=' class="{}"'.format(klass) if klass else '' klass_str=' class=\\"{}\\"'.format(klass) if klass else ''
)], )],
'post_tags': ['</{tag}>'.format(tag=tag)], 'post_tags': ['</{tag}>'.format(tag=tag)],
} }
...@@ -78,7 +85,9 @@ class AnnotationSearchView(APIView): ...@@ -78,7 +85,9 @@ class AnnotationSearchView(APIView):
note_dict['tags'] = json.loads(item.tags) if item.tags else [] note_dict['tags'] = json.loads(item.tags) if item.tags else []
note_dict['id'] = str(item.pk) note_dict['id'] = str(item.pk)
if item.highlighted: if item.highlighted:
note_dict['text'] = item.highlighted[0] note_dict['text'] = item.highlighted[0].decode('unicode_escape')
if item.highlighted_tags:
note_dict['tags'] = json.loads(item.highlighted_tags[0])
results.append(note_dict) results.append(note_dict)
return results return results
......
...@@ -39,13 +39,30 @@ class ElasticsearchSearchBackend(OrigElasticsearchSearchBackend): ...@@ -39,13 +39,30 @@ class ElasticsearchSearchBackend(OrigElasticsearchSearchBackend):
highlight_options = { highlight_options = {
'fields': { 'fields': {
content_field: {'store': 'yes'}, content_field: {'store': 'yes'},
} 'tags': {'store': 'yes'},
},
} }
if isinstance(highlight, dict): if isinstance(highlight, dict):
highlight_options.update(highlight) highlight_options.update(highlight)
res['highlight'] = highlight_options res['highlight'] = highlight_options
return res return res
def _process_results(
self, raw_results, highlight=False, result_class=None, distance_point=None, geo_sort=False
):
"""
Overrides _process_results from Haystack's ElasticsearchSearchBackend to add highlighted tags to the result
"""
result = super(ElasticsearchSearchBackend, self)._process_results(
raw_results, highlight, result_class, distance_point, geo_sort
)
for i, raw_result in enumerate(raw_results.get('hits', {}).get('hits', [])):
if 'highlight' in raw_result:
result['results'][i].highlighted_tags = raw_result['highlight'].get('tags', '')
return result
class ElasticsearchSearchEngine(OrigElasticsearchSearchEngine): class ElasticsearchSearchEngine(OrigElasticsearchSearchEngine):
backend = ElasticsearchSearchBackend backend = ElasticsearchSearchBackend
......
...@@ -74,3 +74,7 @@ CORS_ALLOW_HEADERS = ( ...@@ -74,3 +74,7 @@ CORS_ALLOW_HEADERS = (
'x-csrftoken', 'x-csrftoken',
'x-annotator-auth-token', 'x-annotator-auth-token',
) )
TEMPLATE_DIRS = (
'templates',
)
{{ object.text }}
{{ object.tags }}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment