Commit 79733edf by Clinton Blackburn

Merge pull request #6 from edx/clintonb/courses

Course Ingestion
parents c7361777 4cdecab5
language: python
python:
- "3.5"
services:
- docker
sudo: false
# Cache the pip directory. "cache: pip" doesn't work due to install override. See https://github.com/travis-ci/travis-ci/issues/3239.
cache:
- directories:
- $HOME/.cache/pip
before_install:
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"
- docker run --detach --publish 9200:9200 --publish 9300:9300 elasticsearch:1.5.2
install:
- pip install -U pip wheel codecov
- pip install -r requirements/test.txt
before_script:
# Give Elasticsearch time to start
- sleep 10
script:
- make validate
branches:
only:
- master
after_success:
- codecov
......@@ -80,4 +80,5 @@ start-devstack:
docker-compose --x-networking up
open-devstack:
docker exec -it course-discovery /edx/app/course_discovery/devstack.sh open
docker-compose --x-networking up -d
docker exec -it course-discovery env TERM=$(TERM) /edx/app/course_discovery/devstack.sh open
from rest_framework.pagination import LimitOffsetPagination
class ElasticsearchLimitOffsetPagination(LimitOffsetPagination):
def paginate_queryset(self, queryset, request, view=None):
"""
Convert a paginated Elasticsearch response to a response suitable for DRF.
Args:
queryset (dict): Elasticsearch response
request (Request): HTTP request
Returns:
List of data.
"""
# pylint: disable=attribute-defined-outside-init
self.limit = self.get_limit(request)
self.offset = self.get_offset(request)
self.count = queryset['total']
self.request = request
if self.count > self.limit and self.template is not None:
self.display_page_controls = True
return queryset['results']
......@@ -5,14 +5,17 @@ from course_discovery.apps.catalogs.models import Catalog
class CatalogSerializer(serializers.ModelSerializer):
url = serializers.HyperlinkedIdentityField(view_name='api:v1:catalog-detail', lookup_field='id')
class Meta(object):
model = Catalog
fields = ('id', 'name', 'query',)
fields = ('id', 'name', 'query', 'url',)
class CourseSerializer(serializers.Serializer): # pylint: disable=abstract-method
id = serializers.CharField(help_text=_('Course ID'))
name = serializers.CharField(help_text=_('Course name'))
url = serializers.HyperlinkedIdentityField(view_name='api:v1:course-detail', lookup_field='id')
class ContainedCoursesSerializer(serializers.Serializer): # pylint: disable=abstract-method
......
from django.test import TestCase
from django.core.urlresolvers import reverse
from django.test import TestCase, RequestFactory
from course_discovery.apps.api.serializers import CatalogSerializer, CourseSerializer, ContainedCoursesSerializer
from course_discovery.apps.catalogs.tests.factories import CatalogFactory
from course_discovery.apps.courses.tests.factories import CourseFactory
class CatalogSerializerTests(TestCase):
def test_data(self):
catalog = CatalogFactory()
serializer = CatalogSerializer(catalog)
path = reverse('api:v1:catalog-detail', kwargs={'id': catalog.id})
request = RequestFactory().get(path)
serializer = CatalogSerializer(catalog, context={'request': request})
expected = {
'id': catalog.id,
'name': catalog.name,
'query': catalog.query,
'url': request.build_absolute_uri(),
}
self.assertDictEqual(serializer.data, expected)
class CourseSerializerTests(TestCase):
def test_data(self):
course = {
'id': 'course-v1:edX+DemoX+Demo_Course',
'name': 'edX Demo Course',
course = CourseFactory()
path = reverse('api:v1:course-detail', kwargs={'id': course.id})
request = RequestFactory().get(path)
serializer = CourseSerializer(course, context={'request': request})
expected = {
'id': course.id,
'name': course.name,
'url': request.build_absolute_uri(),
}
serializer = CourseSerializer(course)
self.assertDictEqual(serializer.data, course)
self.assertDictEqual(serializer.data, expected)
class ContainedCoursesSerializerTests(TestCase):
......
# pylint: disable=redefined-builtin
import json
import urllib
import ddt
from django.test import TestCase
from django.utils.encoding import force_text
from rest_framework.reverse import reverse
from rest_framework.test import APITestCase, APIRequestFactory
from course_discovery.apps.api.serializers import CatalogSerializer
from course_discovery.apps.api.serializers import CatalogSerializer, CourseSerializer
from course_discovery.apps.catalogs.models import Catalog
from course_discovery.apps.catalogs.tests.factories import CatalogFactory
from course_discovery.apps.core.tests.factories import UserFactory, USER_PASSWORD
from course_discovery.apps.core.tests.mixins import ElasticsearchTestMixin
from course_discovery.apps.courses.tests.factories import CourseFactory
JSON = 'application/json'
class SerializationMixin(object):
def _get_request(self, format=None):
query_data = {}
if format:
query_data['format'] = format
return APIRequestFactory().get('/', query_data)
def _serialize_object(self, serializer, obj, many=False, format=None):
return serializer(obj, many=many, context={'request': self._get_request(format)}).data
def serialize_catalog(self, catalog, many=False, format=None):
return self._serialize_object(CatalogSerializer, catalog, many, format)
def serialize_course(self, course, many=False, format=None):
return self._serialize_object(CourseSerializer, course, many, format)
@ddt.ddt
class CatalogViewSetTests(TestCase):
class CatalogViewSetTests(ElasticsearchTestMixin, SerializationMixin, APITestCase):
""" Tests for the catalog resource.
Read-only (GET) endpoints should NOT require authentication.
......@@ -24,21 +42,29 @@ class CatalogViewSetTests(TestCase):
super(CatalogViewSetTests, self).setUp()
self.user = UserFactory(is_staff=True, is_superuser=True)
self.client.login(username=self.user.username, password=USER_PASSWORD)
self.catalog = CatalogFactory()
def test_session_auth(self):
# TODO Setup auth
# TODO assert_create()
# TODO assert_update()
# TODO assert_update()
pass
query = {
'query': {
'bool': {
'must': [
{
'wildcard': {
'course.name': 'abc*'
}
}
]
}
}
}
self.catalog = CatalogFactory(query=json.dumps(query))
self.course = CourseFactory(id='a/b/c', name='ABC Test Course')
self.refresh_index()
def test_create_without_authentication(self):
""" Verify authentication is required when creating, updating, or deleting a catalog. """
self.client.logout()
Catalog.objects.all().delete()
response = self.client.post(reverse('api:v1:catalog-list'), data='{}', content_type=JSON)
response = self.client.post(reverse('api:v1:catalog-list'), {}, format='json')
self.assertEqual(response.status_code, 403)
self.assertEqual(Catalog.objects.count(), 0)
......@@ -48,7 +74,7 @@ class CatalogViewSetTests(TestCase):
self.client.logout()
url = reverse('api:v1:catalog-detail', kwargs={'id': self.catalog.id})
response = getattr(self.client, http_method)(url, data='{}', content_type=JSON)
response = getattr(self.client, http_method)(url, {}, format='json')
self.assertEqual(response.status_code, 403)
def test_create(self):
......@@ -60,31 +86,32 @@ class CatalogViewSetTests(TestCase):
'query': query
}
response = self.client.post(reverse('api:v1:catalog-list'), data=json.dumps(data), content_type=JSON)
response = self.client.post(reverse('api:v1:catalog-list'), data, format='json')
self.assertEqual(response.status_code, 201)
catalog = Catalog.objects.latest()
self.assertDictEqual(response.data, CatalogSerializer(catalog).data)
self.assertDictEqual(response.data, self.serialize_catalog(catalog))
self.assertEqual(catalog.name, name)
self.assertEqual(catalog.query, query)
def test_courses(self):
""" Verify the endpoint returns the list of courses contained in the catalog. """
# TODO Use actual filtering!
url = reverse('api:v1:catalog-courses', kwargs={'id': self.catalog.id})
courses = [self.course]
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
self.assertListEqual(json.loads(force_text(response.content))['results'], [])
self.assertListEqual(response.data['results'], self.serialize_course(courses, many=True))
def test_contains(self):
""" Verify the endpoint returns a filtered list of courses contained in the catalog. """
# TODO Use actual filtering!
url = reverse('api:v1:catalog-contains', kwargs={'id': self.catalog.id}) + '?course_id=a,b,c'
course_id = self.course.id
qs = urllib.parse.urlencode({'course_id': course_id})
url = '{}?{}'.format(reverse('api:v1:catalog-contains', kwargs={'id': self.catalog.id}), qs)
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data, {'courses': {}})
self.assertEqual(response.data, {'courses': {course_id: True}})
def test_get(self):
""" Verify the endpoint returns the details for a single catalog. """
......@@ -92,7 +119,7 @@ class CatalogViewSetTests(TestCase):
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data, CatalogSerializer(self.catalog).data)
self.assertEqual(response.data, self.serialize_catalog(self.catalog))
def test_list(self):
""" Verify the endpoint returns a list of all catalogs. """
......@@ -100,7 +127,7 @@ class CatalogViewSetTests(TestCase):
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
self.assertListEqual(response.data['results'], CatalogSerializer(Catalog.objects.all(), many=True).data)
self.assertListEqual(response.data['results'], self.serialize_catalog(Catalog.objects.all(), many=True))
def test_destroy(self):
""" Verify the endpoint deletes a catalog. """
......@@ -121,7 +148,7 @@ class CatalogViewSetTests(TestCase):
'query': query
}
response = self.client.put(url, data=json.dumps(data), content_type=JSON)
response = self.client.put(url, data, format='json')
self.assertEqual(response.status_code, 200)
catalog = Catalog.objects.get(id=self.catalog.id)
......@@ -137,9 +164,72 @@ class CatalogViewSetTests(TestCase):
'name': name
}
response = self.client.patch(url, data=json.dumps(data), content_type=JSON)
response = self.client.patch(url, data, format='json')
self.assertEqual(response.status_code, 200)
catalog = Catalog.objects.get(id=self.catalog.id)
self.assertEqual(catalog.name, name)
self.assertEqual(catalog.query, query)
@ddt.ddt
class CourseViewSetTests(ElasticsearchTestMixin, SerializationMixin, APITestCase):
def setUp(self):
super(CourseViewSetTests, self).setUp()
self.user = UserFactory(is_staff=True, is_superuser=True)
self.client.login(username=self.user.username, password=USER_PASSWORD)
@ddt.data('json', 'api')
def test_list(self, format):
""" Verify the endpoint returns a list of all courses. """
courses = CourseFactory.create_batch(10)
courses.sort(key=lambda course: course.id.lower())
url = reverse('api:v1:course-list')
limit = 3
self.refresh_index()
response = self.client.get(url, {'format': format, 'limit': limit})
self.assertEqual(response.status_code, 200)
self.assertListEqual(response.data['results'], self.serialize_course(courses[:limit], many=True, format=format))
response.render()
def test_list_query(self):
""" Verify the endpoint returns a filtered list of courses. """
# Create courses that should NOT match our query
CourseFactory.create_batch(3)
# Create courses that SHOULD match our query
name = 'query test'
courses = [CourseFactory(name=name), CourseFactory(name=name)]
courses.sort(key=lambda course: course.id.lower())
self.refresh_index()
query = {
"query": {
"bool": {
"must": [
{
"term": {
"course.name.lowercase_sort": name
}
}
]
}
}
}
qs = urllib.parse.urlencode({'q': json.dumps(query)})
url = '{}?{}'.format(reverse('api:v1:course-list'), qs)
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data['count'], len(courses))
self.assertListEqual(response.data['results'], self.serialize_course(courses, many=True))
def test_retrieve(self):
""" Verify the endpoint returns a single course. """
course = CourseFactory()
url = reverse('api:v1:course-detail', kwargs={'id': course.id})
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data, self.serialize_course(course))
......@@ -7,5 +7,6 @@ urlpatterns = []
router = routers.SimpleRouter()
router.register(r'catalogs', views.CatalogViewSet)
router.register(r'courses', views.CourseViewSet, base_name='course')
urlpatterns += router.urls
import json
import logging
from rest_framework import viewsets
from rest_framework.authentication import SessionAuthentication
from rest_framework.decorators import detail_route
from rest_framework.permissions import DjangoModelPermissionsOrAnonReadOnly
from rest_framework.permissions import DjangoModelPermissionsOrAnonReadOnly, IsAuthenticatedOrReadOnly
from rest_framework.response import Response
from course_discovery.apps.api.pagination import ElasticsearchLimitOffsetPagination
from course_discovery.apps.api.serializers import CatalogSerializer, CourseSerializer, ContainedCoursesSerializer
from course_discovery.apps.catalogs.models import Catalog
from course_discovery.apps.courses.constants import COURSE_ID_REGEX
from course_discovery.apps.courses.models import Course
logger = logging.getLogger(__name__)
......@@ -59,7 +63,7 @@ class CatalogViewSet(viewsets.ModelViewSet):
queryset = catalog.courses()
page = self.paginate_queryset(queryset)
serializer = CourseSerializer(page, many=True)
serializer = CourseSerializer(page, many=True, context={'request': request})
return self.get_paginated_response(serializer.data)
@detail_route()
......@@ -87,3 +91,55 @@ class CatalogViewSet(viewsets.ModelViewSet):
instance = {'courses': courses}
serializer = ContainedCoursesSerializer(instance)
return Response(serializer.data)
class CourseViewSet(viewsets.ReadOnlyModelViewSet):
""" Course resource. """
authentication_classes = (SessionAuthentication,)
lookup_field = 'id'
lookup_value_regex = COURSE_ID_REGEX
permission_classes = (IsAuthenticatedOrReadOnly,)
serializer_class = CourseSerializer
pagination_class = ElasticsearchLimitOffsetPagination
def get_object(self):
""" Return a single course. """
return Course.get(self.kwargs[self.lookup_url_kwarg or self.lookup_field])
def get_queryset(self):
# Note (CCB): This is solely here to appease DRF. It is not actually used.
return []
def get_data(self, limit, offset):
""" Return all courses. """
query = self.request.GET.get('q', None)
if query:
query = json.loads(query)
return Course.search(query, limit=limit, offset=offset)
else:
return Course.all(limit=limit, offset=offset)
def list(self, request, *args, **kwargs): # pylint: disable=unused-argument
"""
List all courses.
---
parameters:
- name: q
description: Query to filter the courses
required: false
type: string
paramType: query
multiple: false
"""
limit = self.paginator.get_limit(self.request)
offset = self.paginator.get_offset(self.request)
data = self.get_data(limit, offset)
page = self.paginate_queryset(data)
serializer = self.get_serializer(page, many=True)
return self.get_paginated_response(serializer.data)
def retrieve(self, request, *args, **kwargs):
""" Retrieve details for a course. """
return super(CourseViewSet, self).retrieve(request, *args, **kwargs)
......@@ -7,3 +7,6 @@ from course_discovery.apps.catalogs.models import Catalog
class CatalogAdmin(admin.ModelAdmin):
list_display = ('name',)
readonly_fields = ('created', 'modified',)
class Media(object):
js = ('js/catalogs-change-form.js',)
import json
from django.db import models
from django.utils.translation import ugettext_lazy as _
from django_extensions.db.models import TimeStampedModel
from course_discovery.apps.courses.models import Course
class Catalog(TimeStampedModel):
name = models.CharField(max_length=255, null=False, blank=False, help_text=_('Catalog name'))
......@@ -10,13 +14,18 @@ class Catalog(TimeStampedModel):
def __str__(self):
return 'Catalog #{id}: {name}'.format(id=self.id, name=self.name) # pylint: disable=no-member
@property
def query_as_dict(self):
return json.loads(self.query)
def courses(self):
""" Returns the list of courses contained within this catalog.
Returns:
List of courses contained in this catalog.
Course[]
"""
return []
return Course.search(self.query_as_dict)['results']
def contains(self, course_ids): # pylint: disable=unused-argument
""" Determines if the given courses are contained in this catalog.
......@@ -28,4 +37,26 @@ class Catalog(TimeStampedModel):
dict: Mapping of course IDs to booleans indicating if course is
contained in this catalog.
"""
return {}
query = self.query_as_dict['query']
# Create a filtered query that includes that uses the catalog's query against a
# collection of courses filtered using the passed in course IDs.
filtered_query = {
"query": {
"filtered": {
"query": query,
"filter": {
"ids": {
"values": course_ids
}
}
}
}
}
contains = {course_id: False for course_id in course_ids}
courses = Course.search(filtered_query)['results']
for course in courses:
contains[course.id] = True
return contains
......@@ -9,4 +9,4 @@ class CatalogFactory(factory.DjangoModelFactory):
model = Catalog
name = FuzzyText(prefix='catalog-name-')
query = FuzzyText(prefix='catalog-query-')
query = '{"query": {"match_all": {}}}'
import json
from django.test import TestCase
from course_discovery.apps.catalogs.tests import factories
from course_discovery.apps.core.tests.mixins import ElasticsearchTestMixin
from course_discovery.apps.courses.tests.factories import CourseFactory
class CatalogTests(TestCase):
class CatalogTests(ElasticsearchTestMixin, TestCase):
""" Catalog model tests. """
def setUp(self):
super(CatalogTests, self).setUp()
self.catalog = factories.CatalogFactory()
query = {
'query': {
'bool': {
'must': [
{
'wildcard': {
'course.name': 'abc*'
}
}
]
}
}
}
self.catalog = factories.CatalogFactory(query=json.dumps(query))
self.course = CourseFactory(id='a/b/c', name='ABCs of Ͳҽʂէìղց')
self.refresh_index()
def test_unicode(self):
""" Validate the output of the __unicode__ method. """
......@@ -21,14 +40,9 @@ class CatalogTests(TestCase):
def test_courses(self):
""" Verify the method returns a list of courses contained in the catalog. """
# TODO Setup/mock Elasticsearch
# TODO Set catalog query
# TODO Validate value of catalog.courses()
self.assertListEqual(self.catalog.courses(), [])
self.assertEqual(self.catalog.courses(), [self.course])
def test_contains(self):
""" Verify the method returns a mapping of course IDs to booleans. """
# TODO Setup/mock Elasticsearch
# TODO Set catalog query
# TODO Validate value of catalog.contains()
self.assertDictEqual(self.catalog.contains([]), {})
other_id = 'd/e/f'
self.assertDictEqual(self.catalog.contains([self.course.id, other_id]), {self.course.id: True, other_id: False})
import logging
from django.conf import settings
from elasticsearch import Elasticsearch
from course_discovery.apps.courses.config import COURSES_INDEX_CONFIG
logger = logging.getLogger(__name__)
class ElasticsearchTestMixin(object):
@classmethod
def setUpClass(cls):
super(ElasticsearchTestMixin, cls).setUpClass()
host = settings.ELASTICSEARCH['host']
cls.index = settings.ELASTICSEARCH['index']
cls.es = Elasticsearch(host)
def setUp(self):
super(ElasticsearchTestMixin, self).setUp()
self.reset_index()
self.refresh_index()
def reset_index(self):
""" Deletes and re-creates the Elasticsearch index. """
index = self.index
logger.info('Deleting index [%s]...', index)
self.es.indices.delete(index=index, ignore=404) # pylint: disable=unexpected-keyword-arg
logger.info('...index deleted.')
logger.info('Recreating index [%s]...', index)
self.es.indices.create(index=index, body=COURSES_INDEX_CONFIG)
logger.info('...done!')
def refresh_index(self):
"""
Refreshes an index.
https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-refresh.html
"""
# pylint: disable=unexpected-keyword-arg
self.es.indices.refresh(index=self.index)
self.es.cluster.health(index=self.index, wait_for_status='yellow', request_timeout=1)
default_app_config = 'course_discovery.apps.courses.apps.CoursesConfig'
import logging
from django.apps import AppConfig
from django.conf import settings
from elasticsearch import Elasticsearch, TransportError
from course_discovery.apps.courses.config import COURSES_INDEX_CONFIG
logger = logging.getLogger(__name__)
class CoursesConfig(AppConfig):
name = 'courses'
verbose_name = 'Courses'
def ready(self):
if settings.ELASTICSEARCH.get('connect_on_startup', True):
host = settings.ELASTICSEARCH['host']
index = settings.ELASTICSEARCH['index']
logger.info('Attempting to establish initial connection to Elasticsearch host [%s]...', host)
es = Elasticsearch(host, sniff_on_start=True)
logger.info('...success!')
logger.info('Making sure index [%s] exists...', index)
try:
es.indices.create(index=index, body=COURSES_INDEX_CONFIG)
logger.info('...index created.')
except TransportError as e:
if e.status_code == 400:
logger.info('...index already exists.')
else:
raise
COURSES_INDEX_CONFIG = {
'settings': {
'analysis': {
'analyzer': {
'case_insensitive_sort': {
'tokenizer': 'keyword',
'filter': ['lowercase']
}
}
}
},
'mappings': {
'course': {
'properties': {
'id': {
'type': 'string',
'analyzer': 'english',
'fields': {
'lowercase_sort': {
'type': 'string',
'analyzer': 'case_insensitive_sort'
}
}
},
'name': {
'type': 'string',
'analyzer': 'english',
'fields': {
'lowercase_sort': {
'type': 'string',
'analyzer': 'case_insensitive_sort'
}
}
}
}
}
}
}
COURSE_ID_REGEX = r'[^/+]+(/|\+)[^/+]+(/|\+)[^/]+'
COURSE_ID_PATTERN = r'(?P<id>{})'.format(COURSE_ID_REGEX)
class CourseNotFoundError(Exception):
""" The specified course was not found in the data store. """
pass
import logging
from optparse import make_option
from django.core.management import BaseCommand, CommandError
from course_discovery.apps.courses.models import Course
logger = logging.getLogger(__name__)
class Command(BaseCommand):
help = 'Refresh course data from external sources.'
option_list = BaseCommand.option_list + (
make_option('--access_token',
action='store',
dest='access_token',
default=None,
help='OAuth2 access token used to authenticate API calls.'),
)
def handle(self, *args, **options):
access_token = options.get('access_token')
if not access_token:
msg = 'Courses cannot be migrated if no access token is supplied.'
logger.error(msg)
raise CommandError(msg)
Course.refresh_all(access_token=access_token)
import logging
from django.conf import settings
from edx_rest_api_client.client import EdxRestApiClient
from elasticsearch import Elasticsearch, NotFoundError
from course_discovery.apps.courses.exceptions import CourseNotFoundError
logger = logging.getLogger(__name__)
class Course(object):
"""
Course model.
This model is backed by Elasticsearch.
"""
# Elasticsearch document type for courses.
doc_type = 'course'
# Elasticsearch index where course data is stored
_index = settings.ELASTICSEARCH['index']
@classmethod
def _es_client(cls):
""" Elasticsearch client. """
return Elasticsearch(settings.ELASTICSEARCH['host'])
@classmethod
def _hit_to_course(cls, hit):
return Course(hit['_source']['id'], hit['_source'])
@classmethod
def all(cls, limit=10, offset=0):
"""
Return a list of all courses.
Args:
limit (int): Maximum number of results to return
offset (int): Starting index from which to return results
Returns:
dict: Representation of data suitable for pagination
Examples:
{
'limit': 10,
'offset': 0,
'total': 2,
'results': [`Course`, `Course`],
}
"""
query = {
'query': {
'match_all': {}
}
}
return cls.search(query, limit=limit, offset=offset)
@classmethod
def get(cls, id): # pylint: disable=redefined-builtin
"""
Retrieve a single course.
Args:
id (str): Course ID
Returns:
Course: The course corresponding to the given ID.
Raises:
CourseNotFoundError: if the course is not found.
"""
try:
response = cls._es_client().get(index=cls._index, doc_type=cls.doc_type, id=id)
return cls._hit_to_course(response)
except NotFoundError:
raise CourseNotFoundError('Course [{}] was not found in the data store.'.format(id))
@classmethod
def search(cls, query, limit=10, offset=0):
"""
Search the data store for courses.
Args:
query (dict): Elasticsearch query used to find courses.
limit (int): Maximum number of results to return
offset (int): Index of first result to return
Returns:
dict: Representation of data suitable for pagination
Examples:
{
'limit': 10,
'offset': 0,
'total': 2,
'results': [`Course`, `Course`],
}
"""
query.setdefault('from', offset)
query.setdefault('size', limit)
query.setdefault('sort', {'id.lowercase_sort': 'asc'})
logger.debug('Querying [%s]: %s', cls._index, query)
response = cls._es_client().search(index=cls._index, doc_type=cls.doc_type, body=query)
hits = response['hits']
total = hits['total']
logger.info('Course search returned [%d] courses.', total)
return {
'limit': limit,
'offset': offset,
'total': total,
'results': [cls._hit_to_course(hit) for hit in hits['hits']]
}
@classmethod
def refresh(cls, course_id, access_token):
"""
Refresh the course data from the raw data sources.
Args:
course_id (str): Course ID
access_token (str): OAuth access token
Returns:
Course
"""
client = EdxRestApiClient(settings.ECOMMERCE_API_URL, oauth_access_token=access_token)
body = client.courses(course_id).get(include_products=True)
course = Course(course_id, body)
course.save()
return course
@classmethod
def refresh_all(cls, access_token):
"""
Refresh all course data.
Args:
access_token (str): OAuth access token
Returns:
None
"""
client = EdxRestApiClient(settings.ECOMMERCE_API_URL, oauth_access_token=access_token)
logger.info('Refreshing course data from %s....', settings.ECOMMERCE_API_URL)
count = None
page = 1
while page:
response = client.courses().get(include_products=True, page=page, page_size=50)
count = response['count']
results = response['results']
logger.info('Retrieved %d courses...', len(results))
if response['next']:
page += 1
else:
page = None
for body in results:
Course(body['id'], body).save()
logger.info('Retrieved %d courses.', count)
def __init__(self, id, body=None): # pylint: disable=redefined-builtin
if not id:
raise ValueError('Course ID cannot be empty or None.')
self.id = id
self.body = body or {}
def __eq__(self, other):
"""
Determine if this Course equals another.
Args:
other (Course): object with which to compare
Returns: True iff. the two Course objects have the same `id` value; otherwise, False.
"""
return self.id is not None \
and isinstance(other, Course) \
and self.id == getattr(other, 'id', None) \
and self.body == getattr(other, 'body', None)
def __repr__(self):
return 'Course {id}: {name}'.format(id=self.id, name=self.name)
@property
def name(self):
return self.body.get('name')
def save(self):
""" Save the course to the data store. """
logger.info('Indexing course %s...', self.id)
self._es_client().index(index=self._index, doc_type=self.doc_type, id=self.id, body=self.body)
logger.info('Finished indexing course %s.', self.id)
import factory
from factory.fuzzy import FuzzyText
from course_discovery.apps.courses.models import Course
class CourseFactory(factory.Factory):
class Meta(object):
model = Course
exclude = ('name',)
id = FuzzyText(prefix='course-id/', suffix='/fake')
name = FuzzyText(prefix="էҽʂէ çօմɾʂҽ ")
@factory.lazy_attribute
def body(self):
return {
'id': self.id,
'name': self.name
}
@classmethod
def _create(cls, model_class, *args, **kwargs):
obj = model_class(*args, **kwargs)
obj.save()
return obj
import mock
from django.apps import AppConfig
from django.conf import settings
from django.test import TestCase, override_settings
from elasticsearch import TransportError
from elasticsearch.client import IndicesClient
from testfixtures import LogCapture
from course_discovery.apps.core.tests.mixins import ElasticsearchTestMixin
LOGGER_NAME = 'course_discovery.apps.courses.apps'
class CoursesConfigTests(ElasticsearchTestMixin, TestCase):
def setUp(self):
super(CoursesConfigTests, self).setUp()
self.app_config = AppConfig.create('course_discovery.apps.courses')
def test_ready_create_index(self):
""" Verify the app does not setup a new Elasticsearch index if one exists already. """
host = settings.ELASTICSEARCH['host']
index = settings.ELASTICSEARCH['index']
# Delete the index
self.es.indices.delete(index=index, ignore=404) # pylint: disable=unexpected-keyword-arg
self.assertFalse(self.es.indices.exists(index=index))
with LogCapture(LOGGER_NAME) as l:
self.app_config.ready()
# Verify the index was created
self.assertTrue(self.es.indices.exists(index=index))
l.check(
(LOGGER_NAME, 'INFO',
'Attempting to establish initial connection to Elasticsearch host [{}]...'.format(host)),
(LOGGER_NAME, 'INFO', '...success!'),
(LOGGER_NAME, 'INFO', 'Making sure index [{}] exists...'.format(index)),
(LOGGER_NAME, 'INFO', '...index created.')
)
def test_ready_index_exists(self):
""" Verify the app does not setup a new Elasticsearch index if one exists already. """
host = settings.ELASTICSEARCH['host']
index = settings.ELASTICSEARCH['index']
# Verify the index exists
self.assertTrue(self.es.indices.exists(index=index))
with mock.patch.object(IndicesClient, 'create') as mock_create:
mock_create.side_effect = TransportError(400)
with LogCapture(LOGGER_NAME) as l:
# This call should NOT raise an exception.
self.app_config.ready()
# Verify the index still exists
self.assertTrue(self.es.indices.exists(index=index))
l.check(
(LOGGER_NAME, 'INFO',
'Attempting to establish initial connection to Elasticsearch host [{}]...'.format(host)),
(LOGGER_NAME, 'INFO', '...success!'),
(LOGGER_NAME, 'INFO', 'Making sure index [{}] exists...'.format(index)),
(LOGGER_NAME, 'INFO', '...index already exists.')
)
def test_ready_es_failure(self):
""" Verify Elasticsearch errors are raised if the app fails to create the index. """
with mock.patch.object(IndicesClient, 'create') as mock_create:
mock_create.side_effect = TransportError(500)
with self.assertRaises(TransportError):
self.app_config.ready()
@override_settings(ELASTICSEARCH={'connect_on_startup': False})
def test_ready_without_connect_on_startup(self):
"""
Verify the app does not attempt to connect to Elasticsearch if the connect_on_startup setting is not set.
"""
with mock.patch.object(IndicesClient, 'create') as mock_create:
self.app_config.ready()
mock_create.assert_not_called()
import json
from urllib.parse import urlparse, parse_qs
import responses
from django.test import TestCase, override_settings
from course_discovery.apps.core.tests.mixins import ElasticsearchTestMixin
from course_discovery.apps.courses.exceptions import CourseNotFoundError
from course_discovery.apps.courses.models import Course
from course_discovery.apps.courses.tests.factories import CourseFactory
ACCESS_TOKEN = 'secret'
ECOMMERCE_API_URL = 'https://ecommerce.example.com/api/v2'
JSON = 'application/json'
@override_settings(ECOMMERCE_API_URL=ECOMMERCE_API_URL)
class CourseTests(ElasticsearchTestMixin, TestCase):
def assert_course_attrs(self, course, attrs):
"""
Validate the attributes of a given Course.
Args:
course (Course)
attrs (dict)
"""
for attr, value in attrs.items():
self.assertEqual(getattr(course, attr), value)
@responses.activate # pylint: disable=no-member
def mock_refresh_all(self):
"""
Mock the E-Commerce API and refresh all course data.
Returns:
[dict]: List of dictionaries representing course content bodies.
"""
# Mock the call to the E-Commerce API, simulating multiple pages of data
url = '{host}/courses/'.format(host=ECOMMERCE_API_URL)
course_bodies = [
{
'id': 'a/b/c',
'url': 'https://ecommerce.example.com/api/v2/courses/a/b/c/',
'name': 'aaaaa',
'verification_deadline': '2022-01-01T01:00:00Z',
'type': 'verified',
'last_edited': '2015-08-19T15:47:24Z'
},
{
'id': 'aaa/bbb/ccc',
'url': 'https://ecommerce.example.com/api/v2/courses/aaa/bbb/ccc/',
'name': 'Introduction to Biology - The Secret of Life',
'verification_deadline': None,
'type': 'audit',
'last_edited': '2015-08-06T19:11:19Z'
}
]
def request_callback(request):
# pylint: disable=redefined-builtin
next = None
count = len(course_bodies)
# Use the querystring to determine which page should be returned. Default to page 1.
# Note that the values of the dict returned by `parse_qs` are lists, hence the `[1]` default value.
qs = parse_qs(urlparse(request.path_url).query)
page = int(qs.get('page', [1])[0])
if page < count:
next = '{}?page={}'.format(url, page)
body = {
'count': count,
'next': next,
'previous': None,
'results': [course_bodies[page - 1]]
}
return 200, {}, json.dumps(body)
# pylint: disable=no-member
responses.add_callback(responses.GET, url, callback=request_callback, content_type=JSON)
# Refresh all course data
Course.refresh_all(ACCESS_TOKEN)
self.refresh_index()
return course_bodies
def test_init(self):
""" Verify the constructor requires a non-empty string for the ID. """
msg = 'Course ID cannot be empty or None.'
with self.assertRaisesRegex(ValueError, msg):
Course(None)
with self.assertRaisesRegex(ValueError, msg):
Course('')
def test_eq(self):
""" Verify the __eq__ method returns True if two Course objects have the same `id`. """
course = CourseFactory()
# Both objects must be of type Course
self.assertNotEqual(course, 1)
# A Course should be equal to itself
self.assertEqual(course, course)
# Two Courses are equal if their id attributes match
self.assertEqual(course, Course(id=course.id, body=course.body))
def test_str(self):
""" Verify the __str__ method returns a string representation of the Course. """
course = CourseFactory()
expected = 'Course {id}: {name}'.format(id=course.id, name=course.name)
self.assertEqual(str(course), expected)
def test_all(self):
""" Verify the method returns a list of all courses. """
course_bodies = self.mock_refresh_all()
courses = []
for body in course_bodies:
courses.append(Course.get(body['id']))
expected = {
'limit': 10,
'offset': 0,
'total': 2,
'results': courses,
}
self.assertDictEqual(Course.all(), expected)
def test_all_with_limit_and_offset(self):
""" Verify the method supports limit-offset pagination. """
limit = 1
courses = [CourseFactory(id='1'), CourseFactory(id='2')]
self.refresh_index()
for offset, course in enumerate(courses):
expected = {
'limit': limit,
'offset': offset,
'total': len(courses),
'results': [course],
}
self.assertDictEqual(Course.all(limit=limit, offset=offset), expected)
def test_get(self):
""" Verify the method returns a single course. """
course = CourseFactory()
retrieved = Course.get(course.id)
self.assertEqual(course, retrieved)
def test_get_with_missing_course(self):
"""
Verify the method raises a CourseNotFoundError if the specified course does not exist in the data store.
"""
# Note (CCB): This consistently fails on Travis with the error below. Trying index refresh as a last-ditch
# effort to resolve.
#
# elasticsearch.exceptions.TransportError: TransportError(503,
# 'NoShardAvailableActionException[[course_discovery_test][1] null]; nested:
# IllegalIndexShardStateException[[course_discovery_test][1] CurrentState[POST_RECOVERY] operations only
# allowed when started/relocated]; ')
#
self.refresh_index()
course_id = 'fake.course'
expected_msg_regexp = r'Course \[{}\] was not found in the data store.'.format(course_id)
with self.assertRaisesRegex(CourseNotFoundError, expected_msg_regexp):
Course.get(course_id)
def test_search(self):
""" Verify the method returns query results from the data store. """
prefix = 'test'
query = {
'query': {
'bool': {
'must': [
{
'wildcard': {
'course.name': prefix + '*'
}
}
]
}
}
}
courses = []
for i in range(3):
courses.append(CourseFactory.create(name=prefix + str(i)))
CourseFactory.create()
courses.sort(key=lambda course: course.id.lower())
self.refresh_index()
expected = {
'limit': 10,
'offset': 0,
'total': len(courses),
'results': courses,
}
self.assertEqual(Course.search(query), expected)
@responses.activate # pylint: disable=no-member
def test_refresh(self):
""" Verify the method refreshes data for a single course. """
course_id = 'SesameStreetX/Cookies/1T2016'
name = 'C is for Cookie'
body = {
'id': course_id,
'name': name
}
# Mock the call to the E-Commerce API
url = '{host}/courses/{course_id}/'.format(host=ECOMMERCE_API_URL, course_id=course_id)
responses.add(responses.GET, url, body=json.dumps(body), content_type=JSON) # pylint: disable=no-member
# Refresh the course, and ensure the attributes are correct.
course = Course.refresh(course_id, ACCESS_TOKEN)
attrs = {
'id': course_id,
'body': body,
'name': name,
}
self.assert_course_attrs(course, attrs)
# Ensure the data is persisted to the data store
course = Course.get(course_id)
self.assert_course_attrs(course, attrs)
def test_refresh_all(self):
""" Verify the method refreshes data for all courses. """
course_bodies = self.mock_refresh_all()
self.refresh_index()
# Ensure the data is persisted to the data store
for body in course_bodies:
course_id = body['id']
attrs = {
'id': course_id,
'body': body,
'name': body['name'],
}
course = Course.get(course_id)
self.assert_course_attrs(course, attrs)
def test_name(self):
""" Verify the method returns the course name. """
name = 'ABC Course'
course = Course('a/b/c', {'name': name})
self.assertEqual(course.name, name)
def test_save(self):
""" Verify the method creates and/or updates new courses. """
course_id = 'TestX/Saving/4T2015'
body = {
'id': course_id,
'name': 'Save Me!'
}
self.assertFalse(self.es.exists(index=self.index, doc_type=Course.doc_type, id=course_id))
Course(course_id, body).save()
self.refresh_index()
self.assertTrue(self.es.exists(index=self.index, doc_type=Course.doc_type, id=course_id))
course = Course.get(course_id)
self.assertEqual(course.id, course_id)
self.assertEqual(course.body, body)
import mock
from django.core.management import CommandError, call_command
from django.test import TestCase
class RefreshAllCoursesCommandTests(TestCase):
cmd = 'refresh_all_courses'
def test_call(self):
""" Verify the management command calls Course.refresh_all(). """
access_token = 'secret'
with mock.patch('course_discovery.apps.courses.models.Course.refresh_all') as mock_refresh:
call_command(self.cmd, access_token=access_token)
mock_refresh.assert_called_once_with(access_token=access_token)
def test_call_without_access_token(self):
""" Verify the command requires an access token. """
with self.assertRaisesRegex(CommandError, 'Courses cannot be migrated if no access token is supplied.'):
call_command(self.cmd)
import os
from os.path import join, abspath, dirname
from sys import path
# PATH vars
here = lambda *x: join(abspath(dirname(__file__)), *x)
PROJECT_ROOT = here("..")
root = lambda *x: join(abspath(PROJECT_ROOT), *x)
path.append(root('apps'))
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = os.environ.get('COURSE_DISCOVERY_SECRET_KEY', 'insecure-secret-key')
......@@ -37,6 +39,7 @@ PROJECT_APPS = (
'course_discovery.apps.core',
'course_discovery.apps.api',
'course_discovery.apps.catalogs',
'course_discovery.apps.courses',
)
INSTALLED_APPS += THIRD_PARTY_APPS
......@@ -193,7 +196,7 @@ LOGGING = {
},
'handlers': {
'console': {
'level': 'INFO',
'level': 'DEBUG',
'class': 'logging.StreamHandler',
'formatter': 'standard',
'stream': 'ext://sys.stdout',
......@@ -232,10 +235,24 @@ LOGGING = {
REST_FRAMEWORK = {
'DEFAULT_PAGINATION_CLASS': 'rest_framework.pagination.LimitOffsetPagination',
'PAGE_SIZE': 20,
'VIEW_DESCRIPTION_FUNCTION': 'rest_framework_swagger.views.get_restructuredtext'
'VIEW_DESCRIPTION_FUNCTION': 'rest_framework_swagger.views.get_restructuredtext',
'TEST_REQUEST_RENDERER_CLASSES': (
'rest_framework.renderers.MultiPartRenderer',
'rest_framework.renderers.JSONRenderer',
'rest_framework.renderers.BrowsableAPIRenderer',
)
}
SWAGGER_SETTINGS = {
'api_version': 'v1',
'doc_expansion': 'list',
}
ELASTICSEARCH = {
'host': 'es',
'index': 'course_discovery',
'connect_on_startup': True
}
# TODO Replace with None and document.
ECOMMERCE_API_URL = 'https://ecommerce.stage.edx.org/api/v2/'
from os import environ
import platform
import sys
from os import environ
import yaml
from logging.handlers import SysLogHandler
......
import os
from course_discovery.settings.base import *
# TEST SETTINGS
INSTALLED_APPS += (
'django_nose',
......@@ -13,6 +11,7 @@ TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'
NOSE_ARGS = [
'--with-ignore-docstrings',
'--logging-level=DEBUG',
'--logging-clear-handlers',
]
# END TEST SETTINGS
......@@ -30,3 +29,9 @@ DATABASES = {
},
}
# END IN-MEMORY TEST DATABASE
ELASTICSEARCH = {
'host': os.environ.get('TEST_ELASTICSEARCH_HOST', 'localhost'),
'index': 'course_discovery_test',
'connect_on_startup': True
}
var $ = django.jQuery;
$(function () {
var $prettifyBtn,
$previewBtn,
$previewRow,
$queryRow = $('.form-row.field-query'),
$queryField = $('#id_query');
// Create a wrapping <div> for the button, and add an empty label
// to align the button witth the text input field.
$previewRow = $('<div><label></label></div>');
$queryRow.append($previewRow);
// Create a prettify button
$prettifyBtn = $('<button/>', {
// Translators: "Prettify" means formatting the JSON, fixing alignment issues.
text: gettext('Prettify'),
click: function (e) {
var query = $queryField.val();
e.preventDefault();
if (query) {
query = JSON.stringify(JSON.parse(query), null, 2);
$queryField.val(query);
}
}
});
$previewRow.append($prettifyBtn);
// Create a preview button
$previewBtn = $('<button/>', {
text: gettext('Preview'),
click: function (e) {
var url,
query = $queryField.val();
e.preventDefault();
if (query) {
// Remove all whitespace
query = query.replace(/\s/g, "");
// URL encode
query = encodeURIComponent(query);
url = '/api/v1/courses/?q=' + query;
window.open(url, 'catalog_preview');
}
}
});
$previewRow.append($previewBtn);
});
......@@ -43,6 +43,6 @@ urlpatterns = [
]
if settings.DEBUG and os.environ.get('ENABLE_DJANGO_TOOLBAR', False): # pragma: no cover
import debug_toolbar # pylint: disable=wrong-import-position,import-error
import debug_toolbar # pylint: disable=wrong-import-order,wrong-import-position,import-error
urlpatterns.append(url(r'^__debug__/', include(debug_toolbar.urls)))
......@@ -34,6 +34,8 @@ course-discovery:
volumes:
- .:/edx/app/course_discovery/course_discovery
command: /edx/app/course_discovery/devstack.sh start
environment:
TEST_ELASTICSEARCH_HOST: "es"
ports:
- "18381:18381"
- "8381:8381"
......
......@@ -2,6 +2,8 @@ django == 1.8.7
django-extensions == 1.5.9
django-waffle == 0.11
djangorestframework == 3.3.1
django-rest-swagger==0.3.4
django-rest-swagger[reST]==0.3.4
edx-auth-backends == 0.1.3
edx-rest-api-client==1.2.1
elasticsearch>=1.0.0,<2.0.0
pytz == 2015.7
......@@ -10,3 +10,5 @@ factory-boy==2.6.0
mock == 1.3.0
nose-ignore-docstring == 0.2
pep8 == 1.6.2
responses==0.5.0
testfixtures==4.7.0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment