Commit 6fc3d10a by Clinton Blackburn

Merge pull request #5 from edx/csv-and-fake-data

Added CSV Support and Fake Data Command
parents acefe9fc 88eb164b
[run]
omit = analyticsdataserver/settings*
*wsgi.py
analytics_data_api/management/commands/generate_fake_enrollment_data.py
[report]
# Regexes for lines to exclude from consideration
......
......@@ -50,7 +50,8 @@ syncdb:
$(foreach db_name,$(DATABASES),./manage.py syncdb --migrate --noinput --database=$(db_name);)
loaddata: syncdb
python manage.py loaddata courses education_levels single_course_activity course_enrollment_birth_year course_enrollment_education course_enrollment_gender problem_response_answer_distribution course_enrollment_daily countries course_enrollment_country --database=analytics
python manage.py loaddata courses education_levels countries single_course_activity problem_response_answer_distribution --database=analytics
python manage.py generate_fake_enrollment_data
demo: clean requirements loaddata
python manage.py set_api_key analytics analytics
python manage.py set_api_key edx edx
......@@ -32,12 +32,12 @@ Getting Started
Loading Data
------------
The fixtures directory contains demo data. This data can be loaded with the following commands:
The fixtures directory contains demo data and the `generate_fake_enrollment_data` management command can generate
enrollment data. Run the command below to load/generate this data in the database.
$ make loaddata
Running Tests
-------------
Run `make validate` install the requirements, run the tests, and run lint. If you want to get the API server ready for
acceptance testing or demos, `make demo` will install requirements and load the fixture data.
Run `make validate` install the requirements, run the tests, and run lint.
[
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "US",
"date": "2014-06-01",
"count": 100
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "IN",
"date": "2014-06-01",
"count": 240
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "US",
"date": "2014-06-02",
"count": 106
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "IN",
"date": "2014-06-02",
"count": 199
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "US",
"date": "2014-06-03",
"count": 200
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "IN",
"date": "2014-06-03",
"count": 300
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "IS",
"date": "2014-06-03",
"count": 6
}
}
]
[
{
"model": "v0.CourseEnrollmentDaily",
"pk": 1,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"date": "2014-06-01",
"count": 100
}
},
{
"model": "v0.CourseEnrollmentDaily",
"pk": 2,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"date": "2014-06-02",
"count": 150
}
},
{
"model": "v0.CourseEnrollmentDaily",
"pk": 3,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"date": "2014-06-03",
"count": 300
}
}
]
[
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 5,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 6,
"count": 12255,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 6,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 7,
"count": 70885,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 7,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 3,
"count": 981,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 8,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 5,
"count": 51591,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 9,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 4,
"count": 6051,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 10,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 8,
"count": 53216,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 11,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 1,
"count": 667,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 12,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 2,
"count": 5722,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 13,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 9,
"count": 9940,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 14,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 6,
"count": 12295,
"date": "2014-07-02"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 6,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 7,
"count": 70885,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 7,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 3,
"count": 981,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 8,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 5,
"count": 51591,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 9,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 4,
"count": 6051,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 10,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 8,
"count": 53216,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 11,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 1,
"count": 667,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 12,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 2,
"count": 5722,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 13,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 9,
"count": 9940,
"date": "2014-07-01"
}
}
]
[
{
"model": "v0.CourseEnrollmentByGender",
"pk": 1,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "m",
"count": 133240,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 2,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "f",
"count": 77495,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 3,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "o",
"count": 423,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 4,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "m",
"count": 1332,
"date": "2014-07-02"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 5,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "f",
"count": 77445,
"date": "2014-07-02"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 6,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "o",
"count": 34,
"date": "2014-07-02"
}
}
]
import datetime
import random
from django.core.management.base import BaseCommand
from analytics_data_api.v0 import models
# http://stackoverflow.com/a/3590105
def constrained_sum_sample_pos(num_values, total):
"""Return a randomly chosen list of n positive integers summing to total.
Each such list is equally likely to occur."""
dividers = sorted(random.sample(xrange(1, total), num_values - 1))
return [a - b for a, b in zip(dividers + [total], [0] + dividers)]
def get_count(start):
delta = 25 * random.gauss(0, 1)
return int(start + delta)
class Command(BaseCommand):
def handle(self, *args, **options):
days = 120
course = models.Course.objects.first()
start_date = datetime.date(year=2014, month=1, day=1)
genders = {
'm': 0.6107,
'f': 0.3870,
'o': 0.23
}
education_levels = {
'associates': 0.058,
'bachelors': 0.3355,
'primary': 0.0046,
'secondary': 0.2442,
'junior_secondary': 0.0286,
'masters': 0.2518,
'none': 0.0032,
'other': 0.0271,
'doctorate': 0.0470
}
countries = {
'US': 0.34,
'GH': 0.12,
'IN': 0.10,
'CA': 0.14,
'CN': 0.22,
'DE': 0.08
}
birth_years = range(1960, 2005)
ratios = [n / 1000.0 for n in constrained_sum_sample_pos(len(birth_years), 1000)]
birth_years = dict(zip(birth_years, ratios))
# Delete existing data
for model in [models.CourseEnrollmentDaily, models.CourseEnrollmentByGender, models.CourseEnrollmentByEducation,
models.CourseEnrollmentByBirthYear, models.CourseEnrollmentByCountry]:
model.objects.all().delete()
# Create new data data
daily_total = 1500
for i in range(days):
daily_total = get_count(daily_total)
date = start_date + datetime.timedelta(days=i)
models.CourseEnrollmentDaily.objects.create(course=course, date=date, count=daily_total)
for gender, ratio in genders.iteritems():
count = int(ratio * daily_total)
models.CourseEnrollmentByGender.objects.create(course=course, date=date, count=count, gender=gender)
for short_name, ratio in education_levels.iteritems():
education_level = models.EducationLevel.objects.get(short_name=short_name)
count = int(ratio * daily_total)
models.CourseEnrollmentByEducation.objects.create(course=course, date=date, count=count,
education_level=education_level)
for code, ratio in countries.iteritems():
country = models.Country.objects.get(code=code)
count = int(ratio * daily_total)
models.CourseEnrollmentByCountry.objects.create(course=course, date=date, count=count, country=country)
for birth_year, ratio in birth_years.iteritems():
count = int(ratio * daily_total)
models.CourseEnrollmentByBirthYear.objects.create(course=course, date=date, count=count,
birth_year=birth_year)
......@@ -31,7 +31,7 @@ class CourseActivityByWeek(models.Model):
class BaseCourseEnrollment(models.Model):
course = models.ForeignKey(Course, null=False)
date = models.DateField(null=False)
date = models.DateField(null=False, db_index=True)
count = models.IntegerField(null=False)
class Meta(object):
......
import collections
def flatten(dictionary, parent_key='', sep='.'):
"""
Flatten dictionary
http://stackoverflow.com/a/6027615
"""
items = []
for key, value in dictionary.items():
new_key = parent_key + sep + key if parent_key else key
if isinstance(value, collections.MutableMapping):
items.extend(flatten(value, new_key).items())
else:
items.append((new_key, value))
return dict(items)
"""Common settings and globals."""
from os.path import abspath, basename, dirname, join, normpath
from sys import stderr
......@@ -250,6 +249,11 @@ REST_FRAMEWORK = {
# For the browseable API
'rest_framework.authentication.SessionAuthentication',
),
'DEFAULT_RENDERER_CLASSES': (
'rest_framework.renderers.JSONRenderer',
'rest_framework.renderers.BrowsableAPIRenderer',
'rest_framework_csv.renderers.CSVRenderer',
)
}
########## END REST FRAMEWORK CONFIGURATION
......
......@@ -85,5 +85,5 @@ ENABLE_ADMIN_SITE = True
TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'
SWAGGER_SETTINGS = {
'api_key': 'analytics'
'api_key': 'edx'
}
from contextlib import contextmanager
from functools import partial
from django.conf import settings
from django.contrib.auth.models import User
from django.db.utils import ConnectionHandler, DatabaseError
from django.test import TestCase
from django.test.utils import override_settings
from mock import patch, Mock
import mock
from rest_framework.authtoken.models import Token
......@@ -15,13 +14,16 @@ class TestCaseWithAuthentication(TestCase):
def setUp(self):
super(TestCaseWithAuthentication, self).setUp()
test_user = User.objects.create_user('tester', 'test@example.com', 'testpassword')
token = Token.objects.create(user=test_user)
self.authenticated_get = partial(self.client.get, HTTP_AUTHORIZATION='Token ' + token.key, follow=True)
self.token = Token.objects.create(user=test_user)
def authenticated_get(self, path, data=None, follow=True, **extra):
data = data or {}
return self.client.get(path, data, follow, HTTP_AUTHORIZATION='Token ' + self.token.key, **extra)
@contextmanager
def no_database():
cursor_mock = Mock(side_effect=DatabaseError)
cursor_mock = mock.Mock(side_effect=DatabaseError)
with mock.patch('django.db.backends.util.CursorWrapper', cursor_mock):
yield
......@@ -58,7 +60,7 @@ class OperationalEndpointsTest(TestCaseWithAuthentication):
@staticmethod
@contextmanager
def override_database_connections(databases):
with patch('analyticsdataserver.views.connections', ConnectionHandler(databases)):
with mock.patch('analyticsdataserver.views.connections', ConnectionHandler(databases)):
yield
@override_settings(ANALYTICS_DATABASE='reporting')
......
......@@ -5,3 +5,4 @@ django-model-utils==1.4.0
djangorestframework==2.3.5
ipython==2.1.0
django-rest-swagger==0.1.14
djangorestframework-csv==1.3.3
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment