Commit 761f23dd by Tyler Hallada Committed by GitHub

Merge pull request #131 from open-craft/haikuginger/report-download-endpoint

Adds endpoint to download course reports
parents 8cb6a33f 73225ece
import datetime import datetime
from importlib import import_module from importlib import import_module
import re
from django.db.models import Q from django.db.models import Q
from django.conf import settings
from django.core.files.storage import default_storage
from django.core.exceptions import SuspiciousFileOperation, SuspiciousOperation
from rest_framework.authtoken.models import Token from rest_framework.authtoken.models import Token
from opaque_keys.edx.locator import CourseKey
from opaque_keys import InvalidKeyError
from analytics_data_api.v0.exceptions import (
ReportFileNotFoundError,
CannotCreateReportDownloadLinkError
)
def get_filename_safe_course_id(course_id, replacement_char='_'):
"""
Create a representation of a course_id that can be used safely in a filepath.
"""
try:
course_key = CourseKey.from_string(course_id)
filename = unicode(replacement_char).join([course_key.org, course_key.course, course_key.run])
except InvalidKeyError:
# If the course_id doesn't parse, we will still return a value here.
filename = course_id
# The safest characters are A-Z, a-z, 0-9, <underscore>, <period> and <hyphen>.
# We represent the first four with \w.
# TODO: Once we support courses with unicode characters, we will need to revisit this.
return re.sub(r'[^\w\.\-]', unicode(replacement_char), filename)
def delete_user_auth_token(username): def delete_user_auth_token(username):
...@@ -84,3 +112,121 @@ def date_range(start_date, end_date, delta=datetime.timedelta(days=1)): ...@@ -84,3 +112,121 @@ def date_range(start_date, end_date, delta=datetime.timedelta(days=1)):
while cur_date < end_date: while cur_date < end_date:
yield cur_date yield cur_date
cur_date += delta cur_date += delta
def get_course_report_download_details(course_id, report_name):
"""
Determine the path that the report file should be located at,
then return metadata sufficient for downloading it.
"""
report_location_template = getattr(
settings,
'COURSE_REPORT_FILE_LOCATION_TEMPLATE',
'{course_id}_{report_name}.csv'
)
# Course IDs contain characters that may not be valid in various
# filesystems; here we remove them before looking for the file or
# creating the downloadable filename.
course_id = get_filename_safe_course_id(course_id)
report_location = report_location_template.format(
course_id=course_id,
report_name=report_name
)
try:
if not default_storage.exists(report_location):
raise ReportFileNotFoundError(course_id=course_id, report_name=report_name)
except (
AttributeError,
NotImplementedError,
ImportError,
SuspiciousFileOperation,
SuspiciousOperation
):
# Error out if:
# - We don't have a method to determine file existence
# - Such a method isn't implemented
# - We can't import the specified storage class
# - We don't have privileges for the specified file location
raise CannotCreateReportDownloadLinkError
try:
last_modified = default_storage.modified_time(report_location)
except (NotImplementedError, AttributeError):
last_modified = None
try:
download_size = default_storage.size(report_location)
except (NotImplementedError, AttributeError):
download_size = None
download_filename = '{}-{}-{}.csv'.format(
course_id,
report_name,
# We need a date for the filename; if we don't know when it was last modified,
# use the current date and time to stamp the filename.
(last_modified or datetime.datetime.utcnow()).strftime('%Y%m%dT%H%M%SZ')
)
url, expiration_date = get_file_object_url(report_location, download_filename)
details = {
'course_id': course_id,
'report_name': report_name,
'download_url': url
}
# These are all optional items that aren't guaranteed. The URL isn't guaranteed
# either, but we'll raise an exception earlier if we don't have it.
if last_modified is not None:
details.update({'last_modified': last_modified.strftime(settings.DATETIME_FORMAT)})
if expiration_date is not None:
details.update({'expiration_date': expiration_date.strftime(settings.DATETIME_FORMAT)})
if download_size is not None:
details.update({'file_size': download_size})
return details
def get_file_object_url(filename, download_filename):
"""
Retrieve a download URL for the file, as well as a datetime object
indicating when the URL expires.
We need to pass extra details to the URL method, above and beyond just the
file location, to give us what we need.
This method supports S3 storage's optional response parameters that allow
us to set expiry time, as well as content disposition and content type
on any download made using the generated link.
"""
# Default to expiring the link after two minutes
expire_length = getattr(settings, 'COURSE_REPORT_DOWNLOAD_EXPIRY_TIME', 120)
expires_at = get_expiration_date(expire_length)
try:
url = default_storage.url(
name=filename,
response_headers={
'response-content-disposition': 'attachment; filename={}'.format(download_filename),
'response-content-type': 'text/csv',
# The Expires header requires a very particular timestamp format
'response-expires': expires_at.strftime('%a, %d %b %Y %H:%M:%S GMT')
},
expire=expire_length
)
except TypeError:
# We got a TypeError when calling `.url()`; typically, this means that the arguments
# we passed aren't allowed. Retry with no extra arguments.
try:
url = default_storage.url(name=filename)
expires_at = None
except (AttributeError, TypeError, NotImplementedError):
# Another error, for unknown reasons. Can't recover from this; fail fast
raise CannotCreateReportDownloadLinkError
except (AttributeError, NotImplementedError):
# Either we can't find a .url() method, or we can't use it. Raise an exception.
raise CannotCreateReportDownloadLinkError
return url, expires_at
def get_expiration_date(seconds):
"""
Determine when a given link will expire, based on a given lifetime
"""
return datetime.datetime.utcnow() + datetime.timedelta(seconds=seconds)
...@@ -72,3 +72,26 @@ class ParameterValueError(BaseError): ...@@ -72,3 +72,26 @@ class ParameterValueError(BaseError):
def __init__(self, message, *args, **kwargs): def __init__(self, message, *args, **kwargs):
super(ParameterValueError, self).__init__(*args, **kwargs) super(ParameterValueError, self).__init__(*args, **kwargs)
self.message = message self.message = message
class ReportFileNotFoundError(BaseError):
"""
Raise if we couldn't find the file we need to produce the report
"""
def __init__(self, *args, **kwargs):
course_id = kwargs.pop('course_id')
report_name = kwargs.pop('report_name')
super(ReportFileNotFoundError, self).__init__(*args, **kwargs)
self.message = self.message_template.format(course_id=course_id, report_name=report_name)
@property
def message_template(self):
return 'Could not find report \'{report_name}\' for course {course_id}.'
class CannotCreateReportDownloadLinkError(BaseError):
"""
Raise if we cannot create a link for the file to be downloaded
"""
message = 'Could not create a downloadable link to the report.'
...@@ -8,6 +8,8 @@ from analytics_data_api.v0.exceptions import ( ...@@ -8,6 +8,8 @@ from analytics_data_api.v0.exceptions import (
LearnerEngagementTimelineNotFoundError, LearnerEngagementTimelineNotFoundError,
LearnerNotFoundError, LearnerNotFoundError,
ParameterValueError, ParameterValueError,
ReportFileNotFoundError,
CannotCreateReportDownloadLinkError,
) )
...@@ -129,3 +131,39 @@ class ParameterValueErrorMiddleware(BaseProcessErrorMiddleware): ...@@ -129,3 +131,39 @@ class ParameterValueErrorMiddleware(BaseProcessErrorMiddleware):
@property @property
def status_code(self): def status_code(self):
return status.HTTP_400_BAD_REQUEST return status.HTTP_400_BAD_REQUEST
class ReportFileNotFoundErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 404 if the report file isn't present
"""
@property
def error(self):
return ReportFileNotFoundError
@property
def error_code(self):
return 'report_file_not_found'
@property
def status_code(self):
return status.HTTP_404_NOT_FOUND
class CannotCreateDownloadLinkErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 501 if the filesystem doesn't support creating download links
"""
@property
def error(self):
return CannotCreateReportDownloadLinkError
@property
def error_code(self):
return 'cannot_create_report_download_link'
@property
def status_code(self):
return status.HTTP_501_NOT_IMPLEMENTED
...@@ -3,7 +3,10 @@ import json ...@@ -3,7 +3,10 @@ import json
from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.keys import CourseKey
from rest_framework import status from rest_framework import status
from analytics_data_api.utils import get_filename_safe_course_id
DEMO_COURSE_ID = u'course-v1:edX+DemoX+Demo_2014' DEMO_COURSE_ID = u'course-v1:edX+DemoX+Demo_2014'
SANITIZED_DEMO_COURSE_ID = get_filename_safe_course_id(DEMO_COURSE_ID)
class DemoCourseMixin(object): class DemoCourseMixin(object):
......
...@@ -12,13 +12,14 @@ import urllib ...@@ -12,13 +12,14 @@ import urllib
from django.conf import settings from django.conf import settings
from django_dynamic_fixture import G from django_dynamic_fixture import G
import pytz import pytz
from mock import patch, Mock
from analytics_data_api.constants.country import get_country from analytics_data_api.constants.country import get_country
from analytics_data_api.v0 import models from analytics_data_api.v0 import models
from analytics_data_api.constants import country, enrollment_modes, genders from analytics_data_api.constants import country, enrollment_modes, genders
from analytics_data_api.v0.models import CourseActivityWeekly from analytics_data_api.v0.models import CourseActivityWeekly
from analytics_data_api.v0.tests.utils import flatten from analytics_data_api.v0.tests.utils import flatten
from analytics_data_api.v0.tests.views import DemoCourseMixin, DEMO_COURSE_ID from analytics_data_api.v0.tests.views import DemoCourseMixin, DEMO_COURSE_ID, SANITIZED_DEMO_COURSE_ID
from analyticsdataserver.tests import TestCaseWithAuthentication from analyticsdataserver.tests import TestCaseWithAuthentication
...@@ -785,3 +786,119 @@ class CourseVideosListViewTests(DemoCourseMixin, TestCaseWithAuthentication): ...@@ -785,3 +786,119 @@ class CourseVideosListViewTests(DemoCourseMixin, TestCaseWithAuthentication):
def test_get_404(self): def test_get_404(self):
response = self._get_data('foo/bar/course') response = self._get_data('foo/bar/course')
self.assertEquals(response.status_code, 404) self.assertEquals(response.status_code, 404)
class CourseReportDownloadViewTests(DemoCourseMixin, TestCaseWithAuthentication):
path = '/api/v0/courses/{course_id}/reports/{report_name}'
@patch('django.core.files.storage.default_storage.exists', Mock(return_value=False))
def test_report_file_not_found(self):
response = self.authenticated_get(
self.path.format(
course_id=DEMO_COURSE_ID,
report_name='problem_response'
)
)
self.assertEqual(response.status_code, 404)
def test_report_not_supported(self):
response = self.authenticated_get(
self.path.format(
course_id=DEMO_COURSE_ID,
report_name='fake_problem_that_we_dont_support'
)
)
self.assertEqual(response.status_code, 404)
@patch('analytics_data_api.utils.default_storage', object())
def test_incompatible_storage_provider(self):
response = self.authenticated_get(
self.path.format(
course_id=DEMO_COURSE_ID,
report_name='problem_response'
)
)
self.assertEqual(response.status_code, 501)
@patch('django.core.files.storage.default_storage.exists', Mock(return_value=True))
@patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake'))
@patch(
'django.core.files.storage.default_storage.modified_time',
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
)
@patch('django.core.files.storage.default_storage.size', Mock(return_value=1000))
@patch(
'analytics_data_api.utils.get_expiration_date',
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
)
def test_make_working_link(self):
response = self.authenticated_get(
self.path.format(
course_id=DEMO_COURSE_ID,
report_name='problem_response'
)
)
self.assertEqual(response.status_code, 200)
expected = {
'course_id': SANITIZED_DEMO_COURSE_ID,
'report_name': 'problem_response',
'download_url': 'http://fake',
'last_modified': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT),
'expiration_date': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT),
'file_size': 1000
}
self.assertEqual(response.data, expected)
@patch('django.core.files.storage.default_storage.exists', Mock(return_value=True))
@patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake'))
@patch(
'django.core.files.storage.default_storage.modified_time',
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
)
@patch('django.core.files.storage.default_storage.size', Mock(side_effect=NotImplementedError()))
@patch(
'analytics_data_api.utils.get_expiration_date',
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
)
def test_make_working_link_with_missing_size(self):
response = self.authenticated_get(
self.path.format(
course_id=DEMO_COURSE_ID,
report_name='problem_response'
)
)
self.assertEqual(response.status_code, 200)
expected = {
'course_id': SANITIZED_DEMO_COURSE_ID,
'report_name': 'problem_response',
'download_url': 'http://fake',
'last_modified': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT),
'expiration_date': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT)
}
self.assertEqual(response.data, expected)
@patch('django.core.files.storage.default_storage.exists', Mock(return_value=True))
@patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake'))
@patch('django.core.files.storage.default_storage.modified_time', Mock(side_effect=NotImplementedError()))
@patch('django.core.files.storage.default_storage.size', Mock(return_value=1000))
@patch(
'analytics_data_api.utils.get_expiration_date',
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
)
def test_make_working_link_with_missing_last_modified_date(self):
response = self.authenticated_get(
self.path.format(
course_id=DEMO_COURSE_ID,
report_name='problem_response'
)
)
self.assertEqual(response.status_code, 200)
expected = {
'course_id': SANITIZED_DEMO_COURSE_ID,
'report_name': 'problem_response',
'download_url': 'http://fake',
'file_size': 1000,
'expiration_date': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT)
}
self.assertEqual(response.data, expected)
...@@ -14,7 +14,8 @@ COURSE_URLS = [ ...@@ -14,7 +14,8 @@ COURSE_URLS = [
('enrollment/location', views.CourseEnrollmentByLocationView, 'enrollment_by_location'), ('enrollment/location', views.CourseEnrollmentByLocationView, 'enrollment_by_location'),
('problems', views.ProblemsListView, 'problems'), ('problems', views.ProblemsListView, 'problems'),
('problems_and_tags', views.ProblemsAndTagsListView, 'problems_and_tags'), ('problems_and_tags', views.ProblemsAndTagsListView, 'problems_and_tags'),
('videos', views.VideosListView, 'videos') ('videos', views.VideosListView, 'videos'),
('reports/(?P<report_name>[a-zA-Z0-9_]+)', views.ReportDownloadView, 'reports'),
] ]
urlpatterns = [] urlpatterns = []
......
...@@ -9,11 +9,14 @@ from django.db.models import Max ...@@ -9,11 +9,14 @@ from django.db.models import Max
from django.http import Http404 from django.http import Http404
from django.utils.timezone import make_aware, utc from django.utils.timezone import make_aware, utc
from rest_framework import generics from rest_framework import generics
from rest_framework.response import Response
from rest_framework.views import APIView
from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.keys import CourseKey
from analytics_data_api.constants import enrollment_modes from analytics_data_api.constants import enrollment_modes
from analytics_data_api.utils import dictfetchall from analytics_data_api.utils import dictfetchall, get_course_report_download_details
from analytics_data_api.v0 import models, serializers from analytics_data_api.v0 import models, serializers
from analytics_data_api.v0.exceptions import ReportFileNotFoundError
from analytics_data_api.v0.views.utils import raise_404_if_none from analytics_data_api.v0.views.utils import raise_404_if_none
...@@ -772,3 +775,35 @@ class VideosListView(BaseCourseView): ...@@ -772,3 +775,35 @@ class VideosListView(BaseCourseView):
def apply_date_filtering(self, queryset): def apply_date_filtering(self, queryset):
# no date filtering for videos -- just return the queryset # no date filtering for videos -- just return the queryset
return queryset return queryset
class ReportDownloadView(APIView):
"""
Get information needed to download a CSV report
**Example request**
GET /api/v0/courses/{course_id}/reports/{report_name}/
**Response Values**
Returns a single object with data about the report, with the following data:
* course_id: The ID of the course
* report_name: The name of the report
* download_url: The Internet location from which the report can be downloaded
The object may also return these items, if supported by the storage backend:
* last_modified: The date the report was last updated
* expiration_date: The date through which the link will be valid
* file_size: The size in bytes of the CSV download
"""
enabled_reports = settings.ENABLED_REPORT_IDENTIFIERS
def get(self, _request, course_id, report_name):
if report_name in self.enabled_reports:
response = get_course_report_download_details(course_id, report_name)
return Response(response)
else:
raise ReportFileNotFoundError(course_id=course_id, report_name=report_name)
...@@ -85,15 +85,6 @@ USE_TZ = True ...@@ -85,15 +85,6 @@ USE_TZ = True
########## END GENERAL CONFIGURATION ########## END GENERAL CONFIGURATION
########## MEDIA CONFIGURATION
# See: https://docs.djangoproject.com/en/dev/ref/settings/#media-root
MEDIA_ROOT = normpath(join(SITE_ROOT, 'media'))
# See: https://docs.djangoproject.com/en/dev/ref/settings/#media-url
MEDIA_URL = '/media/'
########## END MEDIA CONFIGURATION
########## STATIC FILE CONFIGURATION ########## STATIC FILE CONFIGURATION
# See: https://docs.djangoproject.com/en/dev/ref/settings/#static-root # See: https://docs.djangoproject.com/en/dev/ref/settings/#static-root
STATIC_ROOT = normpath(join(SITE_ROOT, 'assets')) STATIC_ROOT = normpath(join(SITE_ROOT, 'assets'))
...@@ -177,6 +168,8 @@ MIDDLEWARE_CLASSES = ( ...@@ -177,6 +168,8 @@ MIDDLEWARE_CLASSES = (
'analytics_data_api.v0.middleware.CourseNotSpecifiedErrorMiddleware', 'analytics_data_api.v0.middleware.CourseNotSpecifiedErrorMiddleware',
'analytics_data_api.v0.middleware.CourseKeyMalformedErrorMiddleware', 'analytics_data_api.v0.middleware.CourseKeyMalformedErrorMiddleware',
'analytics_data_api.v0.middleware.ParameterValueErrorMiddleware', 'analytics_data_api.v0.middleware.ParameterValueErrorMiddleware',
'analytics_data_api.v0.middleware.ReportFileNotFoundErrorMiddleware',
'analytics_data_api.v0.middleware.CannotCreateDownloadLinkErrorMiddleware',
) )
########## END MIDDLEWARE CONFIGURATION ########## END MIDDLEWARE CONFIGURATION
...@@ -204,6 +197,7 @@ THIRD_PARTY_APPS = ( ...@@ -204,6 +197,7 @@ THIRD_PARTY_APPS = (
'rest_framework.authtoken', 'rest_framework.authtoken',
'rest_framework_swagger', 'rest_framework_swagger',
'django_countries', 'django_countries',
'storages'
) )
LOCAL_APPS = ( LOCAL_APPS = (
...@@ -306,6 +300,13 @@ ENABLE_ADMIN_SITE = False ...@@ -306,6 +300,13 @@ ENABLE_ADMIN_SITE = False
# base url to generate link to user api # base url to generate link to user api
LMS_USER_ACCOUNT_BASE_URL = None LMS_USER_ACCOUNT_BASE_URL = None
# settings for report downloads
DEFAULT_FILE_STORAGE = 'django.core.files.storage.FileSystemStorage'
MEDIA_ROOT = normpath(join(SITE_ROOT, 'static', 'reports'))
MEDIA_URL = 'http://localhost:8100/static/reports/'
COURSE_REPORT_FILE_LOCATION_TEMPLATE = '{course_id}_{report_name}.csv'
ENABLED_REPORT_IDENTIFIERS = ('problem_response',)
########## END ANALYTICS DATA API CONFIGURATION ########## END ANALYTICS DATA API CONFIGURATION
......
...@@ -31,7 +31,10 @@ CONFIG_FILE=get_env_setting('ANALYTICS_API_CFG') ...@@ -31,7 +31,10 @@ CONFIG_FILE=get_env_setting('ANALYTICS_API_CFG')
with open(CONFIG_FILE) as f: with open(CONFIG_FILE) as f:
config_from_yaml = yaml.load(f) config_from_yaml = yaml.load(f)
REPORT_DOWNLOAD_BACKEND = config_from_yaml.pop('REPORT_DOWNLOAD_BACKEND', {})
vars().update(config_from_yaml) vars().update(config_from_yaml)
vars().update(REPORT_DOWNLOAD_BACKEND)
DB_OVERRIDES = dict( DB_OVERRIDES = dict(
PASSWORD=environ.get('DB_MIGRATION_PASS', DATABASES['default']['PASSWORD']), PASSWORD=environ.get('DB_MIGRATION_PASS', DATABASES['default']['PASSWORD']),
......
...@@ -29,3 +29,14 @@ TEST_RUNNER = 'django_nose.NoseTestSuiteRunner' ...@@ -29,3 +29,14 @@ TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'
ELASTICSEARCH_LEARNERS_HOST = 'http://localhost:9223/' ELASTICSEARCH_LEARNERS_HOST = 'http://localhost:9223/'
ELASTICSEARCH_LEARNERS_INDEX = 'roster_test' ELASTICSEARCH_LEARNERS_INDEX = 'roster_test'
ELASTICSEARCH_LEARNERS_UPDATE_INDEX = 'index_update_test' ELASTICSEARCH_LEARNERS_UPDATE_INDEX = 'index_update_test'
# Default the django-storage settings so we can test easily
DEFAULT_FILE_STORAGE = 'storages.backends.s3boto.S3BotoStorage'
AWS_ACCESS_KEY_ID = 'xxxxx'
AWS_SECRET_ACCESS_KEY = 'xxxxx'
AWS_STORAGE_BUCKET_NAME = 'fake-bucket'
FTP_STORAGE_LOCATION = 'ftp://localhost:80/path'
# Default settings for report download endpoint
COURSE_REPORT_FILE_LOCATION_TEMPLATE = '/{course_id}_{report_name}.csv'
COURSE_REPORT_DOWNLOAD_EXPIRY_TIME = 120
...@@ -12,3 +12,4 @@ elasticsearch-dsl==0.0.11 # Apache 2.0 ...@@ -12,3 +12,4 @@ elasticsearch-dsl==0.0.11 # Apache 2.0
Markdown==2.6.6 # BSD Markdown==2.6.6 # BSD
-e git+https://github.com/edx/opaque-keys.git@d45d0bd8d64c69531be69178b9505b5d38806ce0#egg=opaque-keys -e git+https://github.com/edx/opaque-keys.git@d45d0bd8d64c69531be69178b9505b5d38806ce0#egg=opaque-keys
django-storages==1.4.1 # BSD
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment