Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-analytics-data-api
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-analytics-data-api
Commits
82204925
Commit
82204925
authored
Jul 28, 2014
by
Clinton Blackburn
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #8 from edx/better-fake-data
Updated Fake Data Generator
parents
bd4e6fc6
10621b44
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
65 additions
and
20 deletions
+65
-20
.coveragerc
+1
-1
Makefile
+1
-1
analytics_data_api/management/commands/generate_fake_course_data.py
+62
-17
analytics_data_api/v0/models.py
+1
-1
No files found.
.coveragerc
View file @
82204925
[run]
[run]
omit = analyticsdataserver/settings*
omit = analyticsdataserver/settings*
*wsgi.py
*wsgi.py
analytics_data_api/management/commands/generate_fake_
enrollment
_data.py
analytics_data_api/management/commands/generate_fake_
course
_data.py
[report]
[report]
# Regexes for lines to exclude from consideration
# Regexes for lines to exclude from consideration
...
...
Makefile
View file @
82204925
...
@@ -51,7 +51,7 @@ syncdb:
...
@@ -51,7 +51,7 @@ syncdb:
loaddata
:
syncdb
loaddata
:
syncdb
python manage.py loaddata education_levels single_course_activity problem_response_answer_distribution
--database
=
analytics
python manage.py loaddata education_levels single_course_activity problem_response_answer_distribution
--database
=
analytics
python manage.py generate_fake_
enrollment
_data
python manage.py generate_fake_
course
_data
demo
:
clean requirements loaddata
demo
:
clean requirements loaddata
python manage.py set_api_key edx edx
python manage.py set_api_key edx edx
analytics_data_api/management/commands/generate_fake_
enrollment
_data.py
→
analytics_data_api/management/commands/generate_fake_
course
_data.py
View file @
82204925
# pylint: disable=line-too-long
# pylint: disable=line-too-long
,invalid-name
import
datetime
import
datetime
import
logging
import
random
import
random
from
django.core.management.base
import
BaseCommand
from
django.core.management.base
import
BaseCommand
from
django.utils
import
timezone
from
analytics_data_api.v0
import
models
from
analytics_data_api.v0
import
models
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logger
=
logging
.
getLogger
(
__name__
)
# http://stackoverflow.com/a/3590105
# http://stackoverflow.com/a/3590105
def
constrained_sum_sample_pos
(
num_values
,
total
):
def
constrained_sum_sample_pos
(
num_values
,
total
):
"""Return a randomly chosen list of n positive integers summing to total.
"""Return a randomly chosen list of n positive integers summing to total.
...
@@ -22,19 +28,15 @@ def get_count(start):
...
@@ -22,19 +28,15 @@ def get_count(start):
class
Command
(
BaseCommand
):
class
Command
(
BaseCommand
):
def
handle
(
self
,
*
args
,
**
options
):
def
generate_daily_data
(
self
,
course_id
,
start_date
,
end_date
):
# Use the preset ratios below to generate data in the specified demographics
days
=
120
gender_ratios
=
{
course_id
=
'edX/DemoX/Demo_Course'
start_date
=
datetime
.
date
(
year
=
2014
,
month
=
1
,
day
=
1
)
genders
=
{
'm'
:
0.6107
,
'm'
:
0.6107
,
'f'
:
0.3870
,
'f'
:
0.3870
,
'o'
:
0.23
'o'
:
0.23
}
}
education_level_ratios
=
{
education_levels
=
{
'associates'
:
0.058
,
'associates'
:
0.058
,
'bachelors'
:
0.3355
,
'bachelors'
:
0.3355
,
'primary'
:
0.0046
,
'primary'
:
0.0046
,
...
@@ -45,8 +47,7 @@ class Command(BaseCommand):
...
@@ -45,8 +47,7 @@ class Command(BaseCommand):
'other'
:
0.0271
,
'other'
:
0.0271
,
'doctorate'
:
0.0470
'doctorate'
:
0.0470
}
}
country_ratios
=
{
countries
=
{
'US'
:
0.34
,
'US'
:
0.34
,
'GH'
:
0.12
,
'GH'
:
0.12
,
'IN'
:
0.10
,
'IN'
:
0.10
,
...
@@ -55,6 +56,7 @@ class Command(BaseCommand):
...
@@ -55,6 +56,7 @@ class Command(BaseCommand):
'DE'
:
0.08
'DE'
:
0.08
}
}
# Generate birth year ratios
birth_years
=
range
(
1960
,
2005
)
birth_years
=
range
(
1960
,
2005
)
ratios
=
[
n
/
1000.0
for
n
in
constrained_sum_sample_pos
(
len
(
birth_years
),
1000
)]
ratios
=
[
n
/
1000.0
for
n
in
constrained_sum_sample_pos
(
len
(
birth_years
),
1000
)]
birth_years
=
dict
(
zip
(
birth_years
,
ratios
))
birth_years
=
dict
(
zip
(
birth_years
,
ratios
))
...
@@ -67,24 +69,29 @@ class Command(BaseCommand):
...
@@ -67,24 +69,29 @@ class Command(BaseCommand):
models
.
CourseEnrollmentByCountry
]:
models
.
CourseEnrollmentByCountry
]:
model
.
objects
.
all
()
.
delete
()
model
.
objects
.
all
()
.
delete
()
logger
.
info
(
"Deleted all daily course enrollment data."
)
logger
.
info
(
"Generating new daily course enrollment data..."
)
# Create new data
# Create new data
daily_total
=
1500
daily_total
=
1500
for
i
in
range
(
days
):
date
=
start_date
while
date
<=
end_date
:
daily_total
=
get_count
(
daily_total
)
daily_total
=
get_count
(
daily_total
)
date
=
start_date
+
datetime
.
timedelta
(
days
=
i
)
models
.
CourseEnrollmentDaily
.
objects
.
create
(
course_id
=
course_id
,
date
=
date
,
count
=
daily_total
)
models
.
CourseEnrollmentDaily
.
objects
.
create
(
course_id
=
course_id
,
date
=
date
,
count
=
daily_total
)
for
gender
,
ratio
in
genders
.
iteritems
():
for
gender
,
ratio
in
gender
_ratio
s
.
iteritems
():
count
=
int
(
ratio
*
daily_total
)
count
=
int
(
ratio
*
daily_total
)
models
.
CourseEnrollmentByGender
.
objects
.
create
(
course_id
=
course_id
,
date
=
date
,
count
=
count
,
gender
=
gender
)
models
.
CourseEnrollmentByGender
.
objects
.
create
(
course_id
=
course_id
,
date
=
date
,
count
=
count
,
gender
=
gender
)
for
short_name
,
ratio
in
education_levels
.
iteritems
():
for
short_name
,
ratio
in
education_level
_ratio
s
.
iteritems
():
education_level
=
models
.
EducationLevel
.
objects
.
get
(
short_name
=
short_name
)
education_level
=
models
.
EducationLevel
.
objects
.
get
(
short_name
=
short_name
)
count
=
int
(
ratio
*
daily_total
)
count
=
int
(
ratio
*
daily_total
)
models
.
CourseEnrollmentByEducation
.
objects
.
create
(
course_id
=
course_id
,
date
=
date
,
count
=
count
,
models
.
CourseEnrollmentByEducation
.
objects
.
create
(
course_id
=
course_id
,
date
=
date
,
count
=
count
,
education_level
=
education_level
)
education_level
=
education_level
)
for
country_code
,
ratio
in
countr
ie
s
.
iteritems
():
for
country_code
,
ratio
in
countr
y_ratio
s
.
iteritems
():
count
=
int
(
ratio
*
daily_total
)
count
=
int
(
ratio
*
daily_total
)
models
.
CourseEnrollmentByCountry
.
objects
.
create
(
course_id
=
course_id
,
date
=
date
,
count
=
count
,
models
.
CourseEnrollmentByCountry
.
objects
.
create
(
course_id
=
course_id
,
date
=
date
,
count
=
count
,
country_code
=
country_code
)
country_code
=
country_code
)
...
@@ -93,3 +100,41 @@ class Command(BaseCommand):
...
@@ -93,3 +100,41 @@ class Command(BaseCommand):
count
=
int
(
ratio
*
daily_total
)
count
=
int
(
ratio
*
daily_total
)
models
.
CourseEnrollmentByBirthYear
.
objects
.
create
(
course_id
=
course_id
,
date
=
date
,
count
=
count
,
models
.
CourseEnrollmentByBirthYear
.
objects
.
create
(
course_id
=
course_id
,
date
=
date
,
count
=
count
,
birth_year
=
birth_year
)
birth_year
=
birth_year
)
date
=
date
+
datetime
.
timedelta
(
days
=
1
)
logger
.
info
(
"Done!"
)
def
generate_weekly_data
(
self
,
course_id
,
start_date
,
end_date
):
activity_types
=
[
'played_video'
,
'attempted_problem'
,
'posted_forum'
]
start
=
start_date
models
.
CourseActivityByWeek
.
objects
.
all
()
.
delete
()
logger
.
info
(
"Deleted all weekly course activity."
)
logger
.
info
(
"Generating new weekly course activity data..."
)
while
start
<
end_date
:
active_students
=
random
.
randint
(
100
,
4000
)
end
=
min
(
start
+
datetime
.
timedelta
(
weeks
=
1
),
end_date
)
counts
=
constrained_sum_sample_pos
(
len
(
activity_types
),
active_students
)
for
activity_type
,
count
in
zip
(
activity_types
,
counts
):
models
.
CourseActivityByWeek
.
objects
.
create
(
course_id
=
course_id
,
activity_type
=
activity_type
,
count
=
count
,
interval_start
=
start
,
interval_end
=
end
)
models
.
CourseActivityByWeek
.
objects
.
create
(
course_id
=
course_id
,
activity_type
=
'any'
,
count
=
active_students
,
interval_start
=
start
,
interval_end
=
end
)
start
=
end
logger
.
info
(
"Done!"
)
def
handle
(
self
,
*
args
,
**
options
):
course_id
=
'edX/DemoX/Demo_Course'
start_date
=
datetime
.
datetime
(
year
=
2014
,
month
=
1
,
day
=
1
,
tzinfo
=
timezone
.
utc
)
end_date
=
timezone
.
now
()
.
replace
(
microsecond
=
0
)
logger
.
info
(
"Generating data for
%
s..."
,
course_id
)
self
.
generate_weekly_data
(
course_id
,
start_date
,
end_date
)
self
.
generate_daily_data
(
course_id
,
start_date
,
end_date
)
analytics_data_api/v0/models.py
View file @
82204925
...
@@ -96,7 +96,7 @@ class ProblemResponseAnswerDistribution(models.Model):
...
@@ -96,7 +96,7 @@ class ProblemResponseAnswerDistribution(models.Model):
created
=
models
.
DateTimeField
(
auto_now_add
=
True
,
db_column
=
'created'
)
created
=
models
.
DateTimeField
(
auto_now_add
=
True
,
db_column
=
'created'
)
Country
=
namedtuple
(
'Country'
,
'name
,
code'
)
Country
=
namedtuple
(
'Country'
,
'name code'
)
class
CourseEnrollmentByCountry
(
BaseCourseEnrollment
):
class
CourseEnrollmentByCountry
(
BaseCourseEnrollment
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment