Commit 80ac20ca by Clinton Blackburn

Merge pull request #10 from edx/csv

Added support for CSV output
parents f3b8c56f 3f674cc9
......@@ -62,7 +62,10 @@ disable=
# E1103: maybe no member
E1103,
# C0111: missing docstring (handled by pep257)
C0111
C0111,
# We can decide if names are invalid on our own
invalid-name,
[REPORTS]
......
......@@ -2,7 +2,9 @@
edX Analytics API Client
========================
The edX Analytics API Client (henceforth, client) allows users to retrieve data from the edX data warehouse.
The edX Analytics API Client (henceforth, client) allows users to retrieve data from the edX data warehouse. Currently,
the client supports retrieving course activity and enrollment data. By default, all data is returned in the JSON format.
Enrollment data may also be retrieved in the CSV format by changing the data_format argument.
Testing
=======
......
......@@ -2,6 +2,7 @@ import logging
import requests
import requests.exceptions
from analyticsclient import data_format as DF
from analyticsclient.course import Course
from analyticsclient.exceptions import ClientError, InvalidRequestError, NotFoundError, TimeoutError
......@@ -36,7 +37,7 @@ class Client(object):
self.status = Status(self)
self.courses = lambda course_id: Course(self, course_id)
def get(self, resource, timeout=None):
def get(self, resource, timeout=None, data_format=DF.JSON):
"""
Retrieve the data for a resource.
......@@ -45,13 +46,17 @@ class Client(object):
resource (str): Path in the form of slash separated strings.
timeout (float): Continue to attempt to retrieve a resource for this many seconds before giving up and
raising an error.
data_format (str): Format in which data should be returned
Returns: A structure consisting of simple python types (dict, list, int, str etc).
Returns: API response data in specified data_format
Raises: ClientError if the resource cannot be retrieved for any reason.
"""
response = self._request(resource, timeout=timeout)
response = self._request(resource, timeout=timeout, data_format=data_format)
if data_format == DF.CSV:
return response.text
try:
return response.json()
......@@ -82,13 +87,18 @@ class Client(object):
return False
# pylint: disable=no-member
def _request(self, resource, timeout=None):
def _request(self, resource, timeout=None, data_format=DF.JSON):
if timeout is None:
timeout = self.timeout
accept_format = 'application/json'
if data_format == DF.CSV:
accept_format = 'text/csv'
headers = {
'Accept': 'application/json',
'Accept': accept_format,
}
if self.auth_token:
headers['Authorization'] = 'Token ' + self.auth_token
......@@ -114,7 +124,7 @@ class Client(object):
return response
except requests.exceptions.Timeout:
message = "Response from {0} exceeded timeout of {1}s."
message = "Response from {0} exceeded timeout of {1}s.".format(resource, timeout)
log.exception(message)
raise TimeoutError(message)
......
import urllib
import analyticsclient.activity_type as at
import analyticsclient.activity_type as AT
import analyticsclient.data_format as DF
class Course(object):
......@@ -18,7 +19,7 @@ class Course(object):
self.client = client
self.course_id = unicode(course_id)
def enrollment(self, demographic=None, start_date=None, end_date=None):
def enrollment(self, demographic=None, start_date=None, end_date=None, data_format=DF.JSON):
"""
Get course enrollment data.
......@@ -33,6 +34,7 @@ class Course(object):
demographic (str): Demographic by which enrollment data should be grouped.
start_date (str): Minimum date for returned enrollment data
end_date (str): Maxmimum date for returned enrollment data
data_format (str): Format in which data should be returned
"""
path = 'courses/{0}/enrollment/'.format(self.course_id)
if demographic:
......@@ -49,13 +51,15 @@ class Course(object):
if querystring:
path += '?{0}'.format(querystring)
return self.client.get(path)
return self.client.get(path, data_format=data_format)
def recent_activity(self, activity_type=at.ANY):
def recent_activity(self, activity_type=AT.ANY, data_format=DF.JSON):
"""
Get the recent course activity.
Arguments:
activity_type (str): The type of recent activity to return. Defaults to ANY.
data_format (str): Format in which data should be returned
"""
return self.client.get('courses/{0}/recent_activity/?activity_type={1}'.format(self.course_id, activity_type))
path = 'courses/{0}/recent_activity/?activity_type={1}'.format(self.course_id, activity_type)
return self.client.get(path, data_format=data_format)
CSV = 'csv'
JSON = 'json'
......@@ -3,7 +3,9 @@ import json
import httpretty
import mock
import requests.exceptions
from testfixtures import log_capture
from analyticsclient import data_format
from analyticsclient.client import Client
from analyticsclient.exceptions import ClientError, TimeoutError
from analyticsclient.tests import ClientTestCase
......@@ -18,6 +20,7 @@ class ClientTests(ClientTestCase):
def tearDown(self):
httpretty.disable()
httpretty.reset()
def test_has_resource(self):
httpretty.register_uri(httpretty.GET, self.test_url, body='')
......@@ -58,14 +61,38 @@ class ClientTests(ClientTestCase):
# pylint: disable=protected-access
@mock.patch('requests.get', side_effect=requests.exceptions.Timeout)
def test_request_timeout(self, mock_get):
@log_capture()
def test_request_timeout(self, mock_get, lc):
url = self.test_url
timeout = None
self.assertRaises(TimeoutError, self.client._request, self.test_endpoint, timeout=timeout)
headers = {'Accept': 'application/json'}
self.assertRaises(TimeoutError, self.client._request, self.test_endpoint, timeout=timeout)
msg = 'Response from {0} exceeded timeout of {1}s.'.format(self.test_endpoint, self.client.timeout)
lc.check(('analyticsclient.client', 'ERROR', msg))
lc.clear()
mock_get.assert_called_once_with(url, headers=headers, timeout=self.client.timeout)
mock_get.reset_mock()
timeout = 10
self.assertRaises(TimeoutError, self.client._request, self.test_endpoint, timeout=timeout)
mock_get.assert_called_once_with(url, headers=headers, timeout=timeout)
msg = 'Response from {0} exceeded timeout of {1}s.'.format(self.test_endpoint, timeout)
lc.check(('analyticsclient.client', 'ERROR', msg))
def test_request_format(self):
httpretty.register_uri(httpretty.GET, self.test_url, body='{}')
response = self.client.get(self.test_endpoint)
self.assertEquals(httpretty.last_request().headers['Accept'], 'application/json')
self.assertDictEqual(response, {})
httpretty.register_uri(httpretty.GET, self.test_url, body='not-json')
response = self.client.get(self.test_endpoint, data_format=data_format.CSV)
self.assertEquals(httpretty.last_request().headers['Accept'], 'text/csv')
self.assertEqual(response, 'not-json')
httpretty.register_uri(httpretty.GET, self.test_url, body='{}')
response = self.client.get(self.test_endpoint, data_format=data_format.JSON)
self.assertEquals(httpretty.last_request().headers['Accept'], 'application/json')
self.assertDictEqual(response, {})
......@@ -3,6 +3,7 @@ import json
import httpretty
import re
from analyticsclient import activity_type as at
from analyticsclient import data_format
from analyticsclient import demographic as demo
from analyticsclient.exceptions import NotFoundError, InvalidRequestError
......@@ -91,3 +92,15 @@ class CoursesTests(ClientTestCase):
self.assertCorrectEnrollmentUrl(self.course, demo.EDUCATION)
self.assertCorrectEnrollmentUrl(self.course, demo.GENDER)
self.assertCorrectEnrollmentUrl(self.course, demo.LOCATION)
def test_enrollment_data_format(self):
uri = self.get_api_url('courses/{0}/enrollment/'.format(self.course.course_id))
httpretty.register_uri(httpretty.GET, uri, body='{}')
self.course.enrollment()
self.assertEquals(httpretty.last_request().headers['Accept'], 'application/json')
httpretty.register_uri(httpretty.GET, uri, body='not-json')
self.course.enrollment(data_format=data_format.CSV)
self.assertEquals(httpretty.last_request().headers['Accept'], 'text/csv')
......@@ -8,3 +8,4 @@ pep8==1.5.7
pylint==1.2.1
pep257==0.3.2
mock==1.0.1
testfixtures==4.0.0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment