Commit 6fc3d10a by Clinton Blackburn

Merge pull request #5 from edx/csv-and-fake-data

Added CSV Support and Fake Data Command
parents acefe9fc 88eb164b
[run] [run]
omit = analyticsdataserver/settings* omit = analyticsdataserver/settings*
*wsgi.py *wsgi.py
analytics_data_api/management/commands/generate_fake_enrollment_data.py
[report] [report]
# Regexes for lines to exclude from consideration # Regexes for lines to exclude from consideration
......
...@@ -50,7 +50,8 @@ syncdb: ...@@ -50,7 +50,8 @@ syncdb:
$(foreach db_name,$(DATABASES),./manage.py syncdb --migrate --noinput --database=$(db_name);) $(foreach db_name,$(DATABASES),./manage.py syncdb --migrate --noinput --database=$(db_name);)
loaddata: syncdb loaddata: syncdb
python manage.py loaddata courses education_levels single_course_activity course_enrollment_birth_year course_enrollment_education course_enrollment_gender problem_response_answer_distribution course_enrollment_daily countries course_enrollment_country --database=analytics python manage.py loaddata courses education_levels countries single_course_activity problem_response_answer_distribution --database=analytics
python manage.py generate_fake_enrollment_data
demo: clean requirements loaddata demo: clean requirements loaddata
python manage.py set_api_key analytics analytics python manage.py set_api_key edx edx
...@@ -32,12 +32,12 @@ Getting Started ...@@ -32,12 +32,12 @@ Getting Started
Loading Data Loading Data
------------ ------------
The fixtures directory contains demo data. This data can be loaded with the following commands: The fixtures directory contains demo data and the `generate_fake_enrollment_data` management command can generate
enrollment data. Run the command below to load/generate this data in the database.
$ make loaddata $ make loaddata
Running Tests Running Tests
------------- -------------
Run `make validate` install the requirements, run the tests, and run lint. If you want to get the API server ready for Run `make validate` install the requirements, run the tests, and run lint.
acceptance testing or demos, `make demo` will install requirements and load the fixture data.
[
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "US",
"date": "2014-06-01",
"count": 100
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "IN",
"date": "2014-06-01",
"count": 240
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "US",
"date": "2014-06-02",
"count": 106
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "IN",
"date": "2014-06-02",
"count": 199
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "US",
"date": "2014-06-03",
"count": 200
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "IN",
"date": "2014-06-03",
"count": 300
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "IS",
"date": "2014-06-03",
"count": 6
}
}
]
[
{
"model": "v0.CourseEnrollmentDaily",
"pk": 1,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"date": "2014-06-01",
"count": 100
}
},
{
"model": "v0.CourseEnrollmentDaily",
"pk": 2,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"date": "2014-06-02",
"count": 150
}
},
{
"model": "v0.CourseEnrollmentDaily",
"pk": 3,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"date": "2014-06-03",
"count": 300
}
}
]
[
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 5,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 6,
"count": 12255,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 6,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 7,
"count": 70885,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 7,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 3,
"count": 981,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 8,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 5,
"count": 51591,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 9,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 4,
"count": 6051,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 10,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 8,
"count": 53216,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 11,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 1,
"count": 667,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 12,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 2,
"count": 5722,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 13,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 9,
"count": 9940,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 14,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 6,
"count": 12295,
"date": "2014-07-02"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 6,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 7,
"count": 70885,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 7,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 3,
"count": 981,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 8,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 5,
"count": 51591,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 9,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 4,
"count": 6051,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 10,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 8,
"count": 53216,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 11,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 1,
"count": 667,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 12,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 2,
"count": 5722,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 13,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 9,
"count": 9940,
"date": "2014-07-01"
}
}
]
[
{
"model": "v0.CourseEnrollmentByGender",
"pk": 1,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "m",
"count": 133240,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 2,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "f",
"count": 77495,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 3,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "o",
"count": 423,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 4,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "m",
"count": 1332,
"date": "2014-07-02"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 5,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "f",
"count": 77445,
"date": "2014-07-02"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 6,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "o",
"count": 34,
"date": "2014-07-02"
}
}
]
import datetime
import random
from django.core.management.base import BaseCommand
from analytics_data_api.v0 import models
# http://stackoverflow.com/a/3590105
def constrained_sum_sample_pos(num_values, total):
"""Return a randomly chosen list of n positive integers summing to total.
Each such list is equally likely to occur."""
dividers = sorted(random.sample(xrange(1, total), num_values - 1))
return [a - b for a, b in zip(dividers + [total], [0] + dividers)]
def get_count(start):
delta = 25 * random.gauss(0, 1)
return int(start + delta)
class Command(BaseCommand):
def handle(self, *args, **options):
days = 120
course = models.Course.objects.first()
start_date = datetime.date(year=2014, month=1, day=1)
genders = {
'm': 0.6107,
'f': 0.3870,
'o': 0.23
}
education_levels = {
'associates': 0.058,
'bachelors': 0.3355,
'primary': 0.0046,
'secondary': 0.2442,
'junior_secondary': 0.0286,
'masters': 0.2518,
'none': 0.0032,
'other': 0.0271,
'doctorate': 0.0470
}
countries = {
'US': 0.34,
'GH': 0.12,
'IN': 0.10,
'CA': 0.14,
'CN': 0.22,
'DE': 0.08
}
birth_years = range(1960, 2005)
ratios = [n / 1000.0 for n in constrained_sum_sample_pos(len(birth_years), 1000)]
birth_years = dict(zip(birth_years, ratios))
# Delete existing data
for model in [models.CourseEnrollmentDaily, models.CourseEnrollmentByGender, models.CourseEnrollmentByEducation,
models.CourseEnrollmentByBirthYear, models.CourseEnrollmentByCountry]:
model.objects.all().delete()
# Create new data data
daily_total = 1500
for i in range(days):
daily_total = get_count(daily_total)
date = start_date + datetime.timedelta(days=i)
models.CourseEnrollmentDaily.objects.create(course=course, date=date, count=daily_total)
for gender, ratio in genders.iteritems():
count = int(ratio * daily_total)
models.CourseEnrollmentByGender.objects.create(course=course, date=date, count=count, gender=gender)
for short_name, ratio in education_levels.iteritems():
education_level = models.EducationLevel.objects.get(short_name=short_name)
count = int(ratio * daily_total)
models.CourseEnrollmentByEducation.objects.create(course=course, date=date, count=count,
education_level=education_level)
for code, ratio in countries.iteritems():
country = models.Country.objects.get(code=code)
count = int(ratio * daily_total)
models.CourseEnrollmentByCountry.objects.create(course=course, date=date, count=count, country=country)
for birth_year, ratio in birth_years.iteritems():
count = int(ratio * daily_total)
models.CourseEnrollmentByBirthYear.objects.create(course=course, date=date, count=count,
birth_year=birth_year)
...@@ -31,7 +31,7 @@ class CourseActivityByWeek(models.Model): ...@@ -31,7 +31,7 @@ class CourseActivityByWeek(models.Model):
class BaseCourseEnrollment(models.Model): class BaseCourseEnrollment(models.Model):
course = models.ForeignKey(Course, null=False) course = models.ForeignKey(Course, null=False)
date = models.DateField(null=False) date = models.DateField(null=False, db_index=True)
count = models.IntegerField(null=False) count = models.IntegerField(null=False)
class Meta(object): class Meta(object):
......
import collections
def flatten(dictionary, parent_key='', sep='.'):
"""
Flatten dictionary
http://stackoverflow.com/a/6027615
"""
items = []
for key, value in dictionary.items():
new_key = parent_key + sep + key if parent_key else key
if isinstance(value, collections.MutableMapping):
items.extend(flatten(value, new_key).items())
else:
items.append((new_key, value))
return dict(items)
"""Common settings and globals.""" """Common settings and globals."""
from os.path import abspath, basename, dirname, join, normpath from os.path import abspath, basename, dirname, join, normpath
from sys import stderr from sys import stderr
...@@ -250,6 +249,11 @@ REST_FRAMEWORK = { ...@@ -250,6 +249,11 @@ REST_FRAMEWORK = {
# For the browseable API # For the browseable API
'rest_framework.authentication.SessionAuthentication', 'rest_framework.authentication.SessionAuthentication',
), ),
'DEFAULT_RENDERER_CLASSES': (
'rest_framework.renderers.JSONRenderer',
'rest_framework.renderers.BrowsableAPIRenderer',
'rest_framework_csv.renderers.CSVRenderer',
)
} }
########## END REST FRAMEWORK CONFIGURATION ########## END REST FRAMEWORK CONFIGURATION
......
...@@ -85,5 +85,5 @@ ENABLE_ADMIN_SITE = True ...@@ -85,5 +85,5 @@ ENABLE_ADMIN_SITE = True
TEST_RUNNER = 'django_nose.NoseTestSuiteRunner' TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'
SWAGGER_SETTINGS = { SWAGGER_SETTINGS = {
'api_key': 'analytics' 'api_key': 'edx'
} }
from contextlib import contextmanager from contextlib import contextmanager
from functools import partial
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.db.utils import ConnectionHandler, DatabaseError from django.db.utils import ConnectionHandler, DatabaseError
from django.test import TestCase from django.test import TestCase
from django.test.utils import override_settings from django.test.utils import override_settings
from mock import patch, Mock
import mock import mock
from rest_framework.authtoken.models import Token from rest_framework.authtoken.models import Token
...@@ -15,13 +14,16 @@ class TestCaseWithAuthentication(TestCase): ...@@ -15,13 +14,16 @@ class TestCaseWithAuthentication(TestCase):
def setUp(self): def setUp(self):
super(TestCaseWithAuthentication, self).setUp() super(TestCaseWithAuthentication, self).setUp()
test_user = User.objects.create_user('tester', 'test@example.com', 'testpassword') test_user = User.objects.create_user('tester', 'test@example.com', 'testpassword')
token = Token.objects.create(user=test_user) self.token = Token.objects.create(user=test_user)
self.authenticated_get = partial(self.client.get, HTTP_AUTHORIZATION='Token ' + token.key, follow=True)
def authenticated_get(self, path, data=None, follow=True, **extra):
data = data or {}
return self.client.get(path, data, follow, HTTP_AUTHORIZATION='Token ' + self.token.key, **extra)
@contextmanager @contextmanager
def no_database(): def no_database():
cursor_mock = Mock(side_effect=DatabaseError) cursor_mock = mock.Mock(side_effect=DatabaseError)
with mock.patch('django.db.backends.util.CursorWrapper', cursor_mock): with mock.patch('django.db.backends.util.CursorWrapper', cursor_mock):
yield yield
...@@ -58,7 +60,7 @@ class OperationalEndpointsTest(TestCaseWithAuthentication): ...@@ -58,7 +60,7 @@ class OperationalEndpointsTest(TestCaseWithAuthentication):
@staticmethod @staticmethod
@contextmanager @contextmanager
def override_database_connections(databases): def override_database_connections(databases):
with patch('analyticsdataserver.views.connections', ConnectionHandler(databases)): with mock.patch('analyticsdataserver.views.connections', ConnectionHandler(databases)):
yield yield
@override_settings(ANALYTICS_DATABASE='reporting') @override_settings(ANALYTICS_DATABASE='reporting')
......
...@@ -5,3 +5,4 @@ django-model-utils==1.4.0 ...@@ -5,3 +5,4 @@ django-model-utils==1.4.0
djangorestframework==2.3.5 djangorestframework==2.3.5
ipython==2.1.0 ipython==2.1.0
django-rest-swagger==0.1.14 django-rest-swagger==0.1.14
djangorestframework-csv==1.3.3
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment