Commit 25bc26c7 by Tim Babych

haystack 2

parent 5f1f8026
from optparse import make_option
from django.conf import settings
from django.core.management.base import BaseCommand
from elasticutils.contrib.django import get_es
from notesapi.v1.models import NoteMappingType
class Command(BaseCommand):
"""
Indexing and mapping commands.
"""
help = 'Creates index and the mapping.'
option_list = BaseCommand.option_list + (
make_option(
'--drop',
action='store_true',
dest='drop',
default=False,
help='Recreate index'
),
)
def handle(self, *args, **options):
if options['drop']:
# drop existing
get_es().indices.delete(index=settings.ES_INDEXES['default'])
get_es().indices.create(
index=settings.ES_INDEXES['default'],
body={
'mappings': {
NoteMappingType.get_mapping_type_name(): NoteMappingType.get_mapping()
}
},
)
...@@ -4,8 +4,6 @@ from django.core.exceptions import ValidationError ...@@ -4,8 +4,6 @@ from django.core.exceptions import ValidationError
from django.conf import settings from django.conf import settings
from django.db.models import signals from django.db.models import signals
from django.dispatch import receiver from django.dispatch import receiver
from elasticutils.contrib.django import Indexable, MappingType
class Note(models.Model): class Note(models.Model):
...@@ -60,79 +58,3 @@ class Note(models.Model): ...@@ -60,79 +58,3 @@ class Note(models.Model):
'created': created, 'created': created,
'updated': updated, 'updated': updated,
} }
@receiver(signals.post_save, sender=Note)
def update_in_index(sender, instance, **kwargs):
if settings.ES_DISABLED:
return
NoteMappingType.index(instance.as_dict(), id_=instance.id, overwrite_existing=True)
@receiver(signals.post_delete, sender=Note)
def delete_in_index(sender, instance, **kwargs):
if settings.ES_DISABLED:
return
NoteMappingType.unindex(id_=instance.id)
class NoteMappingType(MappingType, Indexable):
"""
Mapping type for Note.
"""
@classmethod
def get_model(cls):
return Note
@classmethod
def get_mapping(cls):
"""
Returns an Elasticsearch mapping for Note MappingType
"""
charfield = {'type': 'string', 'index': 'not_analyzed', 'store': True}
return {
'properties': {
'id': charfield,
'user': charfield,
'course_id': charfield,
'usage_id': charfield,
'text': {'type': 'string', 'analyzer': 'snowball', 'store': True},
'quote': {'type': 'string', 'analyzer': 'snowball', 'store': True},
'created': {'type': 'date', 'store': True},
'updated': {'type': 'date', 'store': True},
}
}
@classmethod
def extract_document(cls, obj_id, obj=None):
"""
Converts this instance into an Elasticsearch document.
"""
if obj is None:
obj = cls.get_model().objects.get(pk=obj_id)
return obj.as_dict()
@staticmethod
def process_result(data):
"""
Unlistifies the result and replaces `text` with highlihted one
Unlistification: ElasticUtils returns data as [{field:value,..}..] which is not what needed.
this function reverses the effect to get the original value.
Also filed https://github.com/mozilla/elasticutils/pull/285 to make it unnecessary.
"""
for i, item in enumerate(data):
if isinstance(item, dict):
for k, v in item.items():
if k != 'ranges' and isinstance(v, list) and len(v) > 0:
data[i][k] = v[0]
# Substitute the value of text field by highlighted result.
if len(item.es_meta.highlight) and k == 'text':
data[i][k] = item.es_meta.highlight['text'][0]
return data
note_searcher = NoteMappingType.search()
from unittest import TestCase from unittest import TestCase
from notesapi.v1.models import Note, NoteMappingType from notesapi.v1.models import Note
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
...@@ -46,12 +46,4 @@ class NoteTest(TestCase): ...@@ -46,12 +46,4 @@ class NoteTest(TestCase):
with self.assertRaises(ValidationError): with self.assertRaises(ValidationError):
note = Note.create(payload) note = Note.create(payload)
note.full_clean() note.full_clean()
\ No newline at end of file
def test_extract_document(self):
note = Note.create(self.note_dict.copy())
note.save()
self.assertEqual(NoteMappingType.extract_document(note.id), note.as_dict())
def test_get_model(self):
self.assertIsInstance(NoteMappingType.get_model()(), Note)
...@@ -4,6 +4,7 @@ from calendar import timegm ...@@ -4,6 +4,7 @@ from calendar import timegm
from datetime import datetime, timedelta from datetime import datetime, timedelta
from mock import patch from mock import patch
from django.core.management import call_command
from django.core.urlresolvers import reverse from django.core.urlresolvers import reverse
from django.conf import settings from django.conf import settings
from django.http import QueryDict from django.http import QueryDict
...@@ -11,10 +12,9 @@ from django.http import QueryDict ...@@ -11,10 +12,9 @@ from django.http import QueryDict
from rest_framework import status from rest_framework import status
from rest_framework.test import APITestCase from rest_framework.test import APITestCase
from elasticutils.contrib.django import get_es
from .helpers import get_id_token from .helpers import get_id_token
from notesapi.v1.models import NoteMappingType, note_searcher, Note from notesapi.v1.models import Note
from notesapi.management.commands.create_index import Command as CreateIndexCommand
TEST_USER = "test_user_id" TEST_USER = "test_user_id"
...@@ -24,6 +24,9 @@ class BaseAnnotationViewTests(APITestCase): ...@@ -24,6 +24,9 @@ class BaseAnnotationViewTests(APITestCase):
Abstract class for testing annotation views. Abstract class for testing annotation views.
""" """
def setUp(self): def setUp(self):
call_command('clear_index', interactive=False)
call_command('update_index')
token = get_id_token(TEST_USER) token = get_id_token(TEST_USER)
self.client.credentials(HTTP_X_ANNOTATOR_AUTH_TOKEN=token) self.client.credentials(HTTP_X_ANNOTATOR_AUTH_TOKEN=token)
self.headers = {"user": TEST_USER} self.headers = {"user": TEST_USER}
...@@ -44,37 +47,6 @@ class BaseAnnotationViewTests(APITestCase): ...@@ -44,37 +47,6 @@ class BaseAnnotationViewTests(APITestCase):
], ],
} }
def tearDown(self):
for note_id in note_searcher.all().values_list('id'):
get_es().delete(
index=settings.ES_INDEXES['default'],
doc_type=NoteMappingType.get_mapping_type_name(),
id=note_id[0][0]
)
get_es().indices.refresh()
@classmethod
def setUpClass(cls):
get_es().indices.delete(index=settings.ES_INDEXES['default'], ignore=404)
get_es().indices.create(
index=settings.ES_INDEXES['default'],
body={
'mappings': {
NoteMappingType.get_mapping_type_name(): NoteMappingType.get_mapping()
}
},
)
get_es().indices.refresh()
get_es().cluster.health(wait_for_status='yellow')
@classmethod
def tearDownClass(cls):
"""
deletes the test index
"""
get_es().indices.delete(index=settings.ES_INDEXES['default'])
get_es().indices.refresh()
def _create_annotation(self, **kwargs): def _create_annotation(self, **kwargs):
""" """
Create annotation Create annotation
...@@ -84,14 +56,14 @@ class BaseAnnotationViewTests(APITestCase): ...@@ -84,14 +56,14 @@ class BaseAnnotationViewTests(APITestCase):
url = reverse('api:v1:annotations') url = reverse('api:v1:annotations')
response = self.client.post(url, opts, format='json') response = self.client.post(url, opts, format='json')
self.assertEqual(response.status_code, status.HTTP_201_CREATED) self.assertEqual(response.status_code, status.HTTP_201_CREATED)
get_es().indices.refresh() call_command('update_index')
return response.data.copy() return response.data.copy()
def _get_annotation(self, annotation_id): def _get_annotation(self, annotation_id):
""" """
Fetch annotation directly from elasticsearch. Fetch annotation directly from elasticsearch.
""" """
get_es().indices.refresh() call_command('update_index')
url = reverse('api:v1:annotations_detail', kwargs={'annotation_id': annotation_id}) url = reverse('api:v1:annotations_detail', kwargs={'annotation_id': annotation_id})
response = self.client.get(url, self.headers) response = self.client.get(url, self.headers)
self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.status_code, status.HTTP_200_OK)
...@@ -139,30 +111,30 @@ class AnnotationViewTests(BaseAnnotationViewTests): ...@@ -139,30 +111,30 @@ class AnnotationViewTests(BaseAnnotationViewTests):
self.assertEqual(response.data['user'], TEST_USER) self.assertEqual(response.data['user'], TEST_USER)
@patch('django.conf.settings.ES_DISABLED', True) # @patch('django.conf.settings.ES_DISABLED', True)
def test_create_es_disabled(self): # def test_create_es_disabled(self):
""" # """
Ensure we can create note in database when elasticsearch is disabled. # Ensure we can create note in database when elasticsearch is disabled.
""" # """
url = reverse('api:v1:annotations') # url = reverse('api:v1:annotations')
response = self.client.post(url, self.payload, format='json') # response = self.client.post(url, self.payload, format='json')
self.assertEqual(response.status_code, status.HTTP_201_CREATED) # self.assertEqual(response.status_code, status.HTTP_201_CREATED)
Note.objects.get(id=response.data['id']) # Note.objects.get(id=response.data['id'])
self.assertEqual(note_searcher.filter(id=response.data['id']).count(), 0) # self.assertEqual(note_searcher.filter(id=response.data['id']).count(), 0)
def test_delete_es_disabled(self): # def test_delete_es_disabled(self):
""" # """
Ensure we can delete note in database when elasticsearch is disabled. # Ensure we can delete note in database when elasticsearch is disabled.
""" # """
url = reverse('api:v1:annotations') # url = reverse('api:v1:annotations')
response = self.client.post(url, self.payload, format='json') # response = self.client.post(url, self.payload, format='json')
get_es().indices.refresh() # call_command('update_index')
self.assertEqual(note_searcher.filter(id=response.data['id']).count(), 1) # self.assertEqual(note_searcher.filter(id=response.data['id']).count(), 1)
with patch('django.conf.settings.ES_DISABLED', True): # with patch('django.conf.settings.ES_DISABLED', True):
Note.objects.get(id=response.data['id']).delete() # Note.objects.get(id=response.data['id']).delete()
self.assertEqual(note_searcher.filter(id=response.data['id']).count(), 1) # self.assertEqual(note_searcher.filter(id=response.data['id']).count(), 1)
def test_create_ignore_created(self): def test_create_ignore_created(self):
""" """
...@@ -269,7 +241,7 @@ class AnnotationViewTests(BaseAnnotationViewTests): ...@@ -269,7 +241,7 @@ class AnnotationViewTests(BaseAnnotationViewTests):
payload.update(self.headers) payload.update(self.headers)
url = reverse('api:v1:annotations_detail', kwargs={'annotation_id': data['id']}) url = reverse('api:v1:annotations_detail', kwargs={'annotation_id': data['id']})
response = self.client.put(url, payload, format='json') response = self.client.put(url, payload, format='json')
get_es().indices.refresh() call_command('update_index')
self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.status_code, status.HTTP_200_OK)
annotation = self._get_annotation(data['id']) annotation = self._get_annotation(data['id'])
...@@ -343,7 +315,7 @@ class AnnotationViewTests(BaseAnnotationViewTests): ...@@ -343,7 +315,7 @@ class AnnotationViewTests(BaseAnnotationViewTests):
response = self.client.delete(url, self.headers) response = self.client.delete(url, self.headers)
self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT, "response should be 204 NO CONTENT") self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT, "response should be 204 NO CONTENT")
get_es().indices.refresh() call_command('update_index')
url = reverse('api:v1:annotations_detail', kwargs={'annotation_id': note['id']}) url = reverse('api:v1:annotations_detail', kwargs={'annotation_id': note['id']})
response = self.client.get(url, self.headers) response = self.client.get(url, self.headers)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
...@@ -382,10 +354,11 @@ class AnnotationViewTests(BaseAnnotationViewTests): ...@@ -382,10 +354,11 @@ class AnnotationViewTests(BaseAnnotationViewTests):
results = self._get_search_results() results = self._get_search_results()
self.assertEqual(results['total'], 2) self.assertEqual(results['total'], 2)
# FIXME class and tag
results = self._get_search_results(text="first", highlight=True, highlight_class='class', highlight_tag='tag') results = self._get_search_results(text="first", highlight=True, highlight_class='class', highlight_tag='tag')
self.assertEqual(results['total'], 1) self.assertEqual(results['total'], 1)
self.assertEqual(len(results['rows']), 1) self.assertEqual(len(results['rows']), 1)
self.assertEqual(results['rows'][0]['text'], '<span>First</span> note') self.assertEqual(results['rows'][0]['text'], '<em>First</em> note')
def test_search_ordering(self): def test_search_ordering(self):
""" """
......
import logging import logging
import json
from django.core.urlresolvers import reverse from django.core.urlresolvers import reverse
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
...@@ -7,7 +8,8 @@ from rest_framework import status ...@@ -7,7 +8,8 @@ from rest_framework import status
from rest_framework.response import Response from rest_framework.response import Response
from rest_framework.views import APIView from rest_framework.views import APIView
from notesapi.v1.models import Note, NoteMappingType, note_searcher from notesapi.v1.models import Note
from haystack.query import SearchQuerySet
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -22,27 +24,20 @@ class AnnotationSearchView(APIView): ...@@ -22,27 +24,20 @@ class AnnotationSearchView(APIView):
Search annotations. Search annotations.
""" """
params = self.request.QUERY_PARAMS.dict() params = self.request.QUERY_PARAMS.dict()
for field in ('text', 'quote'): query = SearchQuerySet().models(Note).filter(
if field in params: **{f:v for (f,v) in params.items() if f in ('user', 'course_id', 'usage_id', 'text')}
params[field + "__match"] = params[field] ).order_by('-updated')
del params[field]
if params.get('highlight'): if params.get('highlight'):
query = query.highlight()
# Currently we do not use highlight_class and highlight_tag in service.
for param in ['highlight', 'highlight_class', 'highlight_tag']: results = []
params.pop(param, None) for item in query:
note_dict = item.get_stored_fields()
results = NoteMappingType.process_result( note_dict['range'] = json.loads(item.ranges)
list( if params.get('highlight'):
note_searcher.query(**params).order_by("-created").values_dict("_source") note_dict['text'] = item.highlighted[0]
.highlight("text", pre_tags=['<span>'], post_tags=['</span>']) results.append(note_dict)
)
)
else:
results = NoteMappingType.process_result(
list(note_searcher.query(**params).order_by("-created").values_dict("_source"))
)
return Response({'total': len(results), 'rows': results}) return Response({'total': len(results), 'rows': results})
......
...@@ -18,9 +18,14 @@ SECRET_KEY = '*^owi*4%!%9=#h@app!l^$jz8(c*q297^)4&4yn^#_m#fq=z#l' ...@@ -18,9 +18,14 @@ SECRET_KEY = '*^owi*4%!%9=#h@app!l^$jz8(c*q297^)4&4yn^#_m#fq=z#l'
CLIENT_ID = 'edx-notes-id' CLIENT_ID = 'edx-notes-id'
CLIENT_SECRET = 'edx-notes-secret' CLIENT_SECRET = 'edx-notes-secret'
ES_URLS = ['http://localhost:9200'] HAYSTACK_CONNECTIONS = {
ES_INDEXES = {'default': 'notes_index'} 'default': {
ES_DISABLED = False 'ENGINE': 'haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine',
'URL': 'http://127.0.0.1:9200/',
'INDEX_NAME': 'notes_index',
},
}
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
# Number of rows to return by default in result. # Number of rows to return by default in result.
RESULTS_DEFAULT_SIZE = 25 RESULTS_DEFAULT_SIZE = 25
...@@ -41,6 +46,7 @@ INSTALLED_APPS = ( ...@@ -41,6 +46,7 @@ INSTALLED_APPS = (
'rest_framework', 'rest_framework',
'rest_framework_swagger', 'rest_framework_swagger',
'corsheaders', 'corsheaders',
'haystack',
'notesapi', 'notesapi',
'notesapi.v1', 'notesapi.v1',
) )
......
...@@ -10,7 +10,13 @@ TEST_RUNNER = 'django_nose.NoseTestSuiteRunner' ...@@ -10,7 +10,13 @@ TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'
DISABLE_TOKEN_CHECK = False DISABLE_TOKEN_CHECK = False
INSTALLED_APPS += ('django_nose',) INSTALLED_APPS += ('django_nose',)
ES_INDEXES = {'default': 'notes_index_test'} HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': 'haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine',
'URL': 'http://127.0.0.1:9200/',
'INDEX_NAME': 'notes_index_test',
},
}
LOGGING = { LOGGING = {
'version': 1, 'version': 1,
......
...@@ -8,7 +8,11 @@ from rest_framework.response import Response ...@@ -8,7 +8,11 @@ from rest_framework.response import Response
from rest_framework.decorators import api_view, permission_classes from rest_framework.decorators import api_view, permission_classes
from elasticsearch.exceptions import TransportError from elasticsearch.exceptions import TransportError
from elasticutils import get_es from haystack import connections
def get_es():
return connections['default'].get_backend().conn
@api_view(['GET']) @api_view(['GET'])
......
...@@ -2,9 +2,9 @@ Django==1.7.1 ...@@ -2,9 +2,9 @@ Django==1.7.1
requests==2.4.3 requests==2.4.3
djangorestframework==3.0.2 djangorestframework==3.0.2
django-rest-swagger==0.2.0 django-rest-swagger==0.2.0
elasticutils==0.10.2 django-haystack==2.3.1
elasticsearch==1.2.0 elasticsearch==1.2.0
django-cors-headers==0.13 django-cors-headers==0.13
PyJWT==0.3.0 PyJWT==0.3.0
MySQL-python==1.2.5 # GPL License MySQL-python==1.2.5 # GPL License
gunicorn==19.1.1 # MIT gunicorn==19.1.1 # MIT
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment