Commit f099c72b by Clinton Blackburn

Added Search API

The API now exposes endpoints for searching courses and course runs.

ECOM-4747
parent 65cbb2de
# pylint: disable=abstract-method
import datetime
from urllib.parse import urlencode
from django.contrib.auth import get_user_model
from django.utils.translation import ugettext_lazy as _
from drf_haystack.serializers import HaystackSerializer, HaystackFacetSerializer
from rest_framework import serializers
from course_discovery.apps.catalogs.models import Catalog
from course_discovery.apps.course_metadata.models import (
Course, CourseRun, Image, Organization, Person, Prerequisite, Seat, Subject, Video
)
from course_discovery.apps.course_metadata.search_indexes import CourseIndex, CourseRunIndex
User = get_user_model()
COMMON_IGNORED_FIELDS = ('text',)
COMMON_SEARCH_FIELD_ALIASES = {
'q': 'text',
}
COURSE_RUN_FACET_FIELD_OPTIONS = {
'level_type': {},
'organizations': {},
'prerequisites': {},
'subjects': {},
'language': {},
'transcript_languages': {},
'pacing_type': {},
'start': {
"start_date": datetime.datetime.now() - datetime.timedelta(days=365),
"end_date": datetime.datetime.now(),
"gap_by": "month",
"gap_amount": 1,
},
'content_type': {},
}
COURSE_RUN_SEARCH_FIELDS = (
'key', 'title', 'short_description', 'full_description', 'start', 'end', 'enrollment_start', 'enrollment_end',
'pacing_type', 'language', 'transcript_languages', 'marketing_url', 'text',
)
def get_marketing_url_for_user(user, marketing_url):
"""
......@@ -47,12 +77,14 @@ class NamedModelSerializer(serializers.ModelSerializer):
class SubjectSerializer(NamedModelSerializer):
"""Serializer for the ``Subject`` model."""
class Meta(NamedModelSerializer.Meta):
model = Subject
class PrerequisiteSerializer(NamedModelSerializer):
"""Serializer for the ``Prerequisite`` model."""
class Meta(NamedModelSerializer.Meta):
model = Prerequisite
......@@ -169,7 +201,7 @@ class CourseRunSerializer(TimestampModelSerializer):
return get_marketing_url_for_user(self.context['request'].user, obj.marketing_url)
class ContainedCourseRunsSerializer(serializers.Serializer): # pylint: disable=abstract-method
class ContainedCourseRunsSerializer(serializers.Serializer):
"""Serializer used to represent course runs contained by a catalog."""
course_runs = serializers.DictField(
child=serializers.BooleanField(),
......@@ -207,7 +239,7 @@ class CourseSerializerExcludingClosedRuns(CourseSerializer):
course_runs = CourseRunSerializer(many=True, source='active_course_runs')
class ContainedCoursesSerializer(serializers.Serializer): # pylint: disable=abstract-method
class ContainedCoursesSerializer(serializers.Serializer):
"""Serializer used to represent courses contained by a catalog."""
courses = serializers.DictField(
child=serializers.BooleanField(),
......@@ -330,3 +362,70 @@ class FlattenedCourseRunWithCourseSerializer(CourseRunSerializer):
def get_course_key(self, obj):
return obj.course.key
class CourseSearchSerializer(HaystackSerializer):
content_type = serializers.CharField(source='model_name')
class Meta:
field_aliases = COMMON_SEARCH_FIELD_ALIASES
fields = ('key', 'title', 'short_description', 'full_description', 'text',)
ignore_fields = COMMON_IGNORED_FIELDS
index_classes = [CourseIndex]
class CourseFacetSerializer(HaystackFacetSerializer):
serialize_objects = True
class Meta:
field_aliases = COMMON_SEARCH_FIELD_ALIASES
field_options = {
'level_type': {},
'organizations': {},
'prerequisites': {},
'subjects': {},
}
ignore_fields = COMMON_IGNORED_FIELDS
class CourseRunSearchSerializer(HaystackSerializer):
content_type = serializers.CharField(source='model_name')
class Meta:
field_aliases = COMMON_SEARCH_FIELD_ALIASES
fields = COURSE_RUN_SEARCH_FIELDS
ignore_fields = COMMON_IGNORED_FIELDS
index_classes = [CourseRunIndex]
class CourseRunFacetSerializer(HaystackFacetSerializer):
serialize_objects = True
class Meta:
field_aliases = COMMON_SEARCH_FIELD_ALIASES
field_options = COURSE_RUN_FACET_FIELD_OPTIONS
ignore_fields = COMMON_IGNORED_FIELDS
class AggregateSearchSerializer(HaystackSerializer):
class Meta:
field_aliases = COMMON_SEARCH_FIELD_ALIASES
fields = COURSE_RUN_SEARCH_FIELDS
ignore_fields = COMMON_IGNORED_FIELDS
serializers = {
CourseRunIndex: CourseRunSearchSerializer,
CourseIndex: CourseSearchSerializer,
}
class AggregateFacetSearchSerializer(HaystackFacetSerializer):
serialize_objects = True
class Meta:
field_aliases = COMMON_SEARCH_FIELD_ALIASES
field_options = COURSE_RUN_FACET_FIELD_OPTIONS
ignore_fields = COMMON_IGNORED_FIELDS
serializers = {
CourseRunIndex: CourseRunFacetSerializer,
CourseIndex: CourseFacetSerializer,
}
import json
import urllib.parse
import ddt
from django.core.urlresolvers import reverse
from rest_framework.test import APITestCase
from course_discovery.apps.core.tests.factories import UserFactory, USER_PASSWORD
from course_discovery.apps.core.tests.mixins import ElasticsearchTestMixin
from course_discovery.apps.course_metadata.tests.factories import CourseRunFactory
@ddt.ddt
class CourseRunSearchViewSetTests(ElasticsearchTestMixin, APITestCase):
""" Tests for CourseRunSearchViewSet. """
faceted_path = reverse('api:v1:search-course_runs-facets')
list_path = reverse('api:v1:search-course_runs-list')
def setUp(self):
super(CourseRunSearchViewSetTests, self).setUp()
self.user = UserFactory()
self.client.login(username=self.user.username, password=USER_PASSWORD)
def get_search_response(self, query=None, faceted=False):
qs = ''
if query:
qs = urllib.parse.urlencode({'q': query})
path = self.faceted_path if faceted else self.list_path
url = '{path}?{qs}'.format(path=path, qs=qs)
return self.client.get(url)
def serialize_date(self, d):
return d.strftime('%Y-%m-%dT%H:%M:%S') if d else None
def serialize_language(self, language):
return language.name
def serialize_course_run(self, course_run):
return {
'transcript_languages': [self.serialize_language(l) for l in course_run.transcript_languages.all()],
'short_description': course_run.short_description,
'start': self.serialize_date(course_run.start),
'end': self.serialize_date(course_run.end),
'enrollment_start': self.serialize_date(course_run.enrollment_start),
'enrollment_end': self.serialize_date(course_run.enrollment_end),
'key': course_run.key,
'marketing_url': course_run.marketing_url,
'pacing_type': course_run.pacing_type,
'language': self.serialize_language(course_run.language),
'full_description': course_run.full_description,
'title': course_run.title,
'content_type': 'courserun'
}
@ddt.data(True, False)
def test_authentication(self, faceted):
""" Verify the endpoint requires authentication. """
self.client.logout()
response = self.get_search_response(faceted=faceted)
self.assertEqual(response.status_code, 403)
def test_search(self):
""" Verify the view returns search results. """
self.assert_successful_search(faceted=False)
def test_faceted_search(self):
""" Verify the view returns results and facets. """
course_run, response_data = self.assert_successful_search(faceted=True)
# Validate the pacing facet
expected = {
'text': course_run.pacing_type,
'count': 1,
}
self.assertDictContainsSubset(expected, response_data['fields']['pacing_type'][0])
def assert_successful_search(self, faceted=False):
""" Asserts the search functionality returns results for a generated query. """
# Generate data that should be indexed and returned by the query
course_run = CourseRunFactory(course__title='Software Testing')
response = self.get_search_response('software', faceted=faceted)
self.assertEqual(response.status_code, 200)
response_data = json.loads(response.content.decode('utf-8'))
# Validate the search results
expected = {
'count': 1,
'results': [
self.serialize_course_run(course_run)
]
}
actual = response_data['objects'] if faceted else response_data
self.assertDictContainsSubset(expected, actual)
return course_run, response_data
""" API v1 URLs. """
from rest_framework import routers
from django.conf.urls import include, url
from rest_framework import routers
from course_discovery.apps.api.v1 import views
partners_router = routers.SimpleRouter()
partners_router.register(r'affiliate_window/catalogs', views.AffiliateWindowViewSet, base_name='affiliate_window')
partners_urls = partners_router.urls
......@@ -17,5 +16,8 @@ router.register(r'catalogs', views.CatalogViewSet)
router.register(r'courses', views.CourseViewSet, base_name='course')
router.register(r'course_runs', views.CourseRunViewSet, base_name='course_run')
router.register(r'management', views.ManagementViewSet, base_name='management')
router.register(r'search/all', views.AggregateSearchViewSet, base_name='search-all')
router.register(r'search/courses', views.CourseSearchViewSet, base_name='search-courses')
router.register(r'search/course_runs', views.CourseRunSearchViewSet, base_name='search-course_runs')
urlpatterns += router.urls
......@@ -11,21 +11,21 @@ from django.db.models import Q
from django.db.models.functions import Lower
from django.http import HttpResponse
from django.shortcuts import get_object_or_404
from drf_haystack.filters import HaystackFacetFilter, HaystackFilter
from drf_haystack.mixins import FacetMixin
from drf_haystack.viewsets import HaystackViewSet
from dry_rest_permissions.generics import DRYPermissions
from edx_rest_framework_extensions.permissions import IsSuperuser
from rest_framework import status, viewsets
from rest_framework.decorators import detail_route, list_route
from rest_framework.exceptions import PermissionDenied
from rest_framework.pagination import PageNumberPagination
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from course_discovery.apps.api import serializers
from course_discovery.apps.api.filters import PermissionsFilter
from course_discovery.apps.api.renderers import AffiliateWindowXMLRenderer, CourseRunCSVRenderer
from course_discovery.apps.api.serializers import (
CatalogSerializer, CourseSerializer, CourseRunSerializer, ContainedCoursesSerializer,
CourseSerializerExcludingClosedRuns, AffiliateWindowSerializer, ContainedCourseRunsSerializer,
FlattenedCourseRunWithCourseSerializer
)
from course_discovery.apps.catalogs.models import Catalog
from course_discovery.apps.core.utils import SearchQuerySetWrapper
from course_discovery.apps.course_metadata.constants import COURSE_ID_REGEX, COURSE_RUN_ID_REGEX
......@@ -43,7 +43,7 @@ class CatalogViewSet(viewsets.ModelViewSet):
lookup_field = 'id'
permission_classes = (DRYPermissions,)
queryset = Catalog.objects.all()
serializer_class = CatalogSerializer
serializer_class = serializers.CatalogSerializer
@transaction.atomic
def create(self, request, *args, **kwargs):
......@@ -103,14 +103,14 @@ class CatalogViewSet(viewsets.ModelViewSet):
Only courses with active course runs are returned. A course run is considered active if it is currently
open for enrollment, or will open in the future.
---
serializer: CourseSerializerExcludingClosedRuns
serializer: serializers.CourseSerializerExcludingClosedRuns
"""
catalog = self.get_object()
queryset = catalog.courses().active()
page = self.paginate_queryset(queryset)
serializer = CourseSerializerExcludingClosedRuns(page, many=True, context={'request': request})
serializer = serializers.CourseSerializerExcludingClosedRuns(page, many=True, context={'request': request})
return self.get_paginated_response(serializer.data)
@detail_route()
......@@ -120,7 +120,7 @@ class CatalogViewSet(viewsets.ModelViewSet):
A dictionary mapping course IDs to booleans, indicating course presence, will be returned.
---
serializer: ContainedCoursesSerializer
serializer: serializers.ContainedCoursesSerializer
parameters:
- name: course_id
description: Course IDs to check for existence in the Catalog.
......@@ -136,7 +136,7 @@ class CatalogViewSet(viewsets.ModelViewSet):
courses = catalog.contains(course_ids)
instance = {'courses': courses}
serializer = ContainedCoursesSerializer(instance)
serializer = serializers.ContainedCoursesSerializer(instance)
return Response(serializer.data)
@detail_route()
......@@ -147,7 +147,7 @@ class CatalogViewSet(viewsets.ModelViewSet):
Only active course runs are returned. A course run is considered active if it is currently
open for enrollment, or will be open for enrollment in the future.
---
serializer: FlattenedCourseRunWithCourseSerializer
serializer: serializers.FlattenedCourseRunWithCourseSerializer
"""
catalog = self.get_object()
courses = catalog.courses().active()
......@@ -158,7 +158,9 @@ class CatalogViewSet(viewsets.ModelViewSet):
for acr in active_course_runs:
course_runs.append(acr)
serializer = FlattenedCourseRunWithCourseSerializer(course_runs, many=True, context={'request': request})
serializer = serializers.FlattenedCourseRunWithCourseSerializer(
course_runs, many=True, context={'request': request}
)
data = CourseRunCSVRenderer().render(serializer.data)
response = HttpResponse(data, content_type='text/csv')
......@@ -174,7 +176,7 @@ class CourseViewSet(viewsets.ReadOnlyModelViewSet):
lookup_value_regex = COURSE_ID_REGEX
queryset = Course.objects.all()
permission_classes = (IsAuthenticated,)
serializer_class = CourseSerializer
serializer_class = serializers.CourseSerializer
def get_queryset(self):
q = self.request.query_params.get('q', None)
......@@ -210,7 +212,7 @@ class CourseRunViewSet(viewsets.ReadOnlyModelViewSet):
lookup_value_regex = COURSE_RUN_ID_REGEX
queryset = CourseRun.objects.all().order_by(Lower('key'))
permission_classes = (IsAuthenticated,)
serializer_class = CourseRunSerializer
serializer_class = serializers.CourseRunSerializer
def get_queryset(self):
q = self.request.query_params.get('q', None)
......@@ -244,7 +246,7 @@ class CourseRunViewSet(viewsets.ReadOnlyModelViewSet):
A dictionary mapping course run keys to booleans,
indicating course run presence, will be returned.
---
serializer: ContainedCourseRunsSerializer
serializer: serializers.ContainedCourseRunsSerializer
parameters:
- name: query
description: Elasticsearch querystring query
......@@ -270,7 +272,7 @@ class CourseRunViewSet(viewsets.ReadOnlyModelViewSet):
contains[course_run.key] = True
instance = {'course_runs': contains}
serializer = ContainedCourseRunsSerializer(instance)
serializer = serializers.ContainedCourseRunsSerializer(instance)
return Response(serializer.data)
return Response(status=status.HTTP_400_BAD_REQUEST)
......@@ -321,7 +323,7 @@ class AffiliateWindowViewSet(viewsets.ViewSet):
""" AffiliateWindow Resource. """
permission_classes = (IsAuthenticated,)
renderer_classes = (AffiliateWindowXMLRenderer,)
serializer_class = AffiliateWindowSerializer
serializer_class = serializers.AffiliateWindowSerializer
def retrieve(self, request, pk=None): # pylint: disable=redefined-builtin,unused-argument
"""
......@@ -345,5 +347,63 @@ class AffiliateWindowViewSet(viewsets.ViewSet):
Q(course_run__enrollment_end__gte=datetime.datetime.now(pytz.UTC)))
)
serializer = AffiliateWindowSerializer(seats, many=True)
serializer = serializers.AffiliateWindowSerializer(seats, many=True)
return Response(serializer.data)
class BaseCourseHaystackViewSet(FacetMixin, HaystackViewSet):
document_uid_field = 'key'
facet_filter_backends = [HaystackFacetFilter, HaystackFilter]
load_all = True
lookup_field = 'key'
permission_classes = (IsAuthenticated,)
# NOTE: We use PageNumberPagination because drf-haytack's facet serializer relies on the page_query_param
# attribute, and it is more appropriate for search results than our default limit-offset pagination.
pagination_class = PageNumberPagination
def list(self, request, *args, **kwargs):
"""
Search.
---
parameters:
- name: q
description: Search text
paramType: query
type: string
required: false
"""
return super(BaseCourseHaystackViewSet, self).list(request, *args, **kwargs)
@list_route(methods=["get"], url_path="facets")
def facets(self, request):
"""
Returns faceted search results
---
parameters:
- name: q
description: Search text
paramType: query
type: string
required: false
"""
return super(BaseCourseHaystackViewSet, self).facets(request)
class CourseSearchViewSet(BaseCourseHaystackViewSet):
facet_serializer_class = serializers.CourseFacetSerializer
index_models = (Course,)
serializer_class = serializers.CourseSearchSerializer
class CourseRunSearchViewSet(BaseCourseHaystackViewSet):
facet_serializer_class = serializers.CourseRunFacetSerializer
index_models = (CourseRun,)
serializer_class = serializers.CourseRunSearchSerializer
# TODO Remove the detail routes. They don't work, and make no sense here given that we cannot specify the type.
class AggregateSearchViewSet(BaseCourseHaystackViewSet):
""" Search all content types. """
facet_serializer_class = serializers.AggregateFacetSearchSerializer
serializer_class = serializers.AggregateSearchSerializer
......@@ -267,6 +267,14 @@ class CourseRun(TimeStampedModel):
value = value or None
self.full_description_override = value
@property
def subjects(self):
return self.course.subjects
@property
def organizations(self):
return self.course.organizations
@classmethod
def search(cls, query):
""" Queries the search index.
......
......@@ -4,18 +4,49 @@ from opaque_keys.edx.keys import CourseKey
from course_discovery.apps.course_metadata.models import Course, CourseRun
class CourseIndex(indexes.SearchIndex, indexes.Indexable):
class BaseIndex(indexes.SearchIndex):
model = None
text = indexes.CharField(document=True, use_template=True)
content_type = indexes.CharField(faceted=True)
def prepare_content_type(self, obj): # pylint: disable=unused-argument
return self.model.__name__.lower()
def get_model(self):
return self.model
def get_updated_field(self): # pragma: no cover
return 'modified'
def index_queryset(self, using=None):
return self.model.objects.all()
class BaseCourseIndex(BaseIndex):
key = indexes.CharField(model_attr='key', stored=True)
title = indexes.CharField(model_attr='title')
short_description = indexes.CharField(model_attr='short_description', null=True)
full_description = indexes.CharField(model_attr='full_description', null=True)
level_type = indexes.CharField(model_attr='level_type__name', null=True)
subjects = indexes.MultiValueField(faceted=True)
organizations = indexes.MultiValueField(faceted=True)
def prepare_organizations(self, obj):
return ['{key}: {name}'.format(key=organization.key, name=organization.name) for organization in
obj.organizations.all()]
def prepare_subjects(self, obj):
return [subject.name for subject in obj.subjects.all()]
class CourseIndex(BaseCourseIndex, indexes.Indexable):
model = Course
level_type = indexes.CharField(model_attr='level_type__name', null=True, faceted=True)
course_runs = indexes.MultiValueField()
expected_learning_items = indexes.MultiValueField()
organizations = indexes.MultiValueField()
prerequisites = indexes.MultiValueField()
subjects = indexes.MultiValueField()
prerequisites = indexes.MultiValueField(faceted=True)
def prepare_course_runs(self, obj):
return [course_run.key for course_run in obj.course_runs.all()]
......@@ -23,46 +54,30 @@ class CourseIndex(indexes.SearchIndex, indexes.Indexable):
def prepare_expected_learning_items(self, obj):
return [item.value for item in obj.expected_learning_items.all()]
def prepare_organizations(self, obj):
return ['{key}: {name}'.format(key=organization.key, name=organization.name) for organization in
obj.organizations.all()]
def prepare_prerequisites(self, obj):
return [prerequisite.name for prerequisite in obj.prerequisites.all()]
def prepare_subjects(self, obj):
return [subject.name for subject in obj.subjects.all()]
def get_model(self):
return Course
class CourseRunIndex(BaseCourseIndex, indexes.Indexable):
model = CourseRun
def index_queryset(self, using=None):
return self.get_model().objects.all()
def get_updated_field(self): # pragma: no cover
return 'modified'
class CourseRunIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
course_key = indexes.CharField(model_attr='course__key', stored=True)
key = indexes.CharField(model_attr='key', stored=True)
org = indexes.CharField()
number = indexes.CharField()
title = indexes.CharField(model_attr='title_override', null=True)
start = indexes.DateTimeField(model_attr='start', null=True)
start = indexes.DateTimeField(model_attr='start', null=True, faceted=True)
end = indexes.DateTimeField(model_attr='end', null=True)
enrollment_start = indexes.DateTimeField(model_attr='enrollment_start', null=True)
enrollment_end = indexes.DateTimeField(model_attr='enrollment_end', null=True)
announcement = indexes.DateTimeField(model_attr='announcement', null=True)
min_effort = indexes.IntegerField(model_attr='min_effort', null=True)
max_effort = indexes.IntegerField(model_attr='max_effort', null=True)
language = indexes.CharField(null=True)
transcript_languages = indexes.MultiValueField()
pacing_type = indexes.CharField(model_attr='pacing_type', null=True)
language = indexes.CharField(null=True, faceted=True)
transcript_languages = indexes.MultiValueField(faceted=True)
pacing_type = indexes.CharField(model_attr='pacing_type', null=True, faceted=True)
marketing_url = indexes.CharField(model_attr='marketing_url', null=True)
def _prepare_language(self, language):
return '{code}: {name}'.format(code=language.code, name=language.name)
return language.name
def prepare_language(self, obj):
if obj.language:
......@@ -79,9 +94,3 @@ class CourseRunIndex(indexes.SearchIndex, indexes.Indexable):
def prepare_transcript_languages(self, obj):
return [self._prepare_language(language) for language in obj.transcript_languages.all()]
def get_model(self):
return CourseRun
def get_updated_field(self): # pragma: no cover
return 'modified'
......@@ -2,4 +2,8 @@ from django.views.generic import TemplateView
class QueryPreviewView(TemplateView):
template_name = 'catalogs/preview.html'
template_name = 'demo/query_preview.html'
class SearchDemoView(TemplateView):
template_name = 'demo/search.html'
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Search Demo</title>
</head>
<body>
<h2>Search</h2>
<form method="get" action=".">
<table>
{{ form.as_table }}
<tr>
<td>&nbsp;</td>
<td>
<input type="submit" value="Search">
</td>
</tr>
</table>
{% if query %}
<h3>Results</h3>
{% for result in page.object_list %}
<p>
<a href="{{ result.object.get_absolute_url }}">{{ result.object.title }}</a>
</p>
{% empty %}
<p>No results found.</p>
{% endfor %}
{% if page.has_previous or page.has_next %}
<div>
{% if page.has_previous %}
<a href="?q={{ query }}&amp;page={{ page.previous_page_number }}">{% endif %}&laquo; Previous
{% if page.has_previous %}</a>{% endif %}
|
{% if page.has_next %}<a href="?q={{ query }}&amp;page={{ page.next_page_number }}">{% endif %}
Next &raquo;{% if page.has_next %}</a>{% endif %}
</div>
{% endif %}
{% else %}
{# Show some example queries to run, maybe query syntax, something else? #}
{% endif %}
</form>
</body>
</html>
\ No newline at end of file
......@@ -3,3 +3,7 @@
{{ object.short_description|default:'' }}
{{ object.full_description|default:'' }}
{{ object.pacing_type|default:'' }}
{% for language in object.transcript_languages.all %}
{{ language }}
{% endfor %}
......@@ -11,6 +11,7 @@ djangorestframework-csv==1.4.1
djangorestframework-jwt==1.8.0
djangorestframework-xml==1.3.0
django-rest-swagger[reST]==0.3.7
drf-haystack==1.6.0rc1
dry-rest-permissions==0.1.6
edx-auth-backends==0.5.0
edx-ccx-keys==0.2.0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment