Commit 88eb164b by Clinton Blackburn

Added command to generate fake data

Change-Id: Iab2c6860daf922c229cc66f4cc3ba632a8bbd623
parent f868623a
[run] [run]
omit = analyticsdataserver/settings* omit = analyticsdataserver/settings*
*wsgi.py *wsgi.py
analytics_data_api/management/commands/generate_fake_enrollment_data.py
[report] [report]
# Regexes for lines to exclude from consideration # Regexes for lines to exclude from consideration
......
...@@ -50,7 +50,8 @@ syncdb: ...@@ -50,7 +50,8 @@ syncdb:
$(foreach db_name,$(DATABASES),./manage.py syncdb --migrate --noinput --database=$(db_name);) $(foreach db_name,$(DATABASES),./manage.py syncdb --migrate --noinput --database=$(db_name);)
loaddata: syncdb loaddata: syncdb
python manage.py loaddata courses education_levels single_course_activity course_enrollment_birth_year course_enrollment_education course_enrollment_gender problem_response_answer_distribution course_enrollment_daily countries course_enrollment_country --database=analytics python manage.py loaddata courses education_levels countries single_course_activity problem_response_answer_distribution --database=analytics
python manage.py generate_fake_enrollment_data
demo: clean requirements loaddata demo: clean requirements loaddata
python manage.py set_api_key edx edx python manage.py set_api_key edx edx
...@@ -32,12 +32,12 @@ Getting Started ...@@ -32,12 +32,12 @@ Getting Started
Loading Data Loading Data
------------ ------------
The fixtures directory contains demo data. This data can be loaded with the following commands: The fixtures directory contains demo data and the `generate_fake_enrollment_data` management command can generate
enrollment data. Run the command below to load/generate this data in the database.
$ make loaddata $ make loaddata
Running Tests Running Tests
------------- -------------
Run `make validate` install the requirements, run the tests, and run lint. If you want to get the API server ready for Run `make validate` install the requirements, run the tests, and run lint.
acceptance testing or demos, `make demo` will install requirements and load the fixture data.
[
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "US",
"date": "2014-06-01",
"count": 100
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "IN",
"date": "2014-06-01",
"count": 240
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "US",
"date": "2014-06-02",
"count": 106
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "IN",
"date": "2014-06-02",
"count": 199
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "US",
"date": "2014-06-03",
"count": 200
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "IN",
"date": "2014-06-03",
"count": 300
}
},
{
"model": "v0.CourseEnrollmentByCountry",
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"country": "IS",
"date": "2014-06-03",
"count": 6
}
}
]
[
{
"model": "v0.CourseEnrollmentDaily",
"pk": 1,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"date": "2014-06-01",
"count": 100
}
},
{
"model": "v0.CourseEnrollmentDaily",
"pk": 2,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"date": "2014-06-02",
"count": 150
}
},
{
"model": "v0.CourseEnrollmentDaily",
"pk": 3,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"date": "2014-06-03",
"count": 300
}
}
]
[
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 5,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 6,
"count": 12255,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 6,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 7,
"count": 70885,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 7,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 3,
"count": 981,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 8,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 5,
"count": 51591,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 9,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 4,
"count": 6051,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 10,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 8,
"count": 53216,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 11,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 1,
"count": 667,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 12,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 2,
"count": 5722,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 13,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 9,
"count": 9940,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 14,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 6,
"count": 12295,
"date": "2014-07-02"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 6,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 7,
"count": 70885,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 7,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 3,
"count": 981,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 8,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 5,
"count": 51591,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 9,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 4,
"count": 6051,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 10,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 8,
"count": 53216,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 11,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 1,
"count": 667,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 12,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 2,
"count": 5722,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByEducation",
"pk": 13,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"education_level": 9,
"count": 9940,
"date": "2014-07-01"
}
}
]
[
{
"model": "v0.CourseEnrollmentByGender",
"pk": 1,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "m",
"count": 133240,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 2,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "f",
"count": 77495,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 3,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "o",
"count": 423,
"date": "2014-07-01"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 4,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "m",
"count": 1332,
"date": "2014-07-02"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 5,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "f",
"count": 77445,
"date": "2014-07-02"
}
},
{
"model": "v0.CourseEnrollmentByGender",
"pk": 6,
"fields": {
"course": ["edX/DemoX/Demo_Course"],
"gender": "o",
"count": 34,
"date": "2014-07-02"
}
}
]
import datetime
import random
from django.core.management.base import BaseCommand
from analytics_data_api.v0 import models
# http://stackoverflow.com/a/3590105
def constrained_sum_sample_pos(num_values, total):
"""Return a randomly chosen list of n positive integers summing to total.
Each such list is equally likely to occur."""
dividers = sorted(random.sample(xrange(1, total), num_values - 1))
return [a - b for a, b in zip(dividers + [total], [0] + dividers)]
def get_count(start):
delta = 25 * random.gauss(0, 1)
return int(start + delta)
class Command(BaseCommand):
def handle(self, *args, **options):
days = 120
course = models.Course.objects.first()
start_date = datetime.date(year=2014, month=1, day=1)
genders = {
'm': 0.6107,
'f': 0.3870,
'o': 0.23
}
education_levels = {
'associates': 0.058,
'bachelors': 0.3355,
'primary': 0.0046,
'secondary': 0.2442,
'junior_secondary': 0.0286,
'masters': 0.2518,
'none': 0.0032,
'other': 0.0271,
'doctorate': 0.0470
}
countries = {
'US': 0.34,
'GH': 0.12,
'IN': 0.10,
'CA': 0.14,
'CN': 0.22,
'DE': 0.08
}
birth_years = range(1960, 2005)
ratios = [n / 1000.0 for n in constrained_sum_sample_pos(len(birth_years), 1000)]
birth_years = dict(zip(birth_years, ratios))
# Delete existing data
for model in [models.CourseEnrollmentDaily, models.CourseEnrollmentByGender, models.CourseEnrollmentByEducation,
models.CourseEnrollmentByBirthYear, models.CourseEnrollmentByCountry]:
model.objects.all().delete()
# Create new data data
daily_total = 1500
for i in range(days):
daily_total = get_count(daily_total)
date = start_date + datetime.timedelta(days=i)
models.CourseEnrollmentDaily.objects.create(course=course, date=date, count=daily_total)
for gender, ratio in genders.iteritems():
count = int(ratio * daily_total)
models.CourseEnrollmentByGender.objects.create(course=course, date=date, count=count, gender=gender)
for short_name, ratio in education_levels.iteritems():
education_level = models.EducationLevel.objects.get(short_name=short_name)
count = int(ratio * daily_total)
models.CourseEnrollmentByEducation.objects.create(course=course, date=date, count=count,
education_level=education_level)
for code, ratio in countries.iteritems():
country = models.Country.objects.get(code=code)
count = int(ratio * daily_total)
models.CourseEnrollmentByCountry.objects.create(course=course, date=date, count=count, country=country)
for birth_year, ratio in birth_years.iteritems():
count = int(ratio * daily_total)
models.CourseEnrollmentByBirthYear.objects.create(course=course, date=date, count=count,
birth_year=birth_year)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment