Commit 0bf85992 by ichuang

psychometrics djangoapp

parent ab0a58fb
'''
django admin pages for courseware model
'''
from psychometrics.models import *
from django.contrib import admin
admin.site.register(PsychometricData)
#!/usr/bin/python
#
# generate pyschometrics data from tracking logs and student module data
import os, sys, string
import datetime
import json
from courseware.models import *
from track.models import *
from psychometrics.models import *
from xmodule.modulestore import Location
from django.core.management.base import BaseCommand
#db = "ocwtutor" # for debugging
db = "default"
class Command(BaseCommand):
help = "initialize PsychometricData tables from StudentModule instances (and tracking data, if in SQL)."
help += "Note this is done for all courses for which StudentModule instances exist."
def handle(self, *args, **options):
# delete all pmd
#PsychometricData.objects.all().delete()
#PsychometricData.objects.using(db).all().delete()
smset = StudentModule.objects.using(db).exclude(max_grade=None)
for sm in smset:
url = sm.module_state_key
location = Location(url)
if not location.category=="problem":
continue
try:
state = json.loads(sm.state)
done = state['done']
except:
print "Oops, failed to eval state for %s (state=%s)" % (sm,sm.state)
continue
if done: # only keep if problem completed
try:
pmd = PsychometricData.objects.using(db).get(studentmodule=sm)
except PsychometricData.DoesNotExist:
pmd = PsychometricData(studentmodule=sm)
pmd.done = done
pmd.attempts = state['attempts']
# get attempt times from tracking log
uname = sm.student.username
tset = TrackingLog.objects.using(db).filter(username=uname, event_type__contains='save_problem_check')
tset = tset.filter(event_source='server')
tset = tset.filter(event__contains="'%s'" % url)
checktimes = [x.dtcreated for x in tset]
pmd.checktimes = json.dumps(checktimes)
if not len(checktimes)==pmd.attempts:
print "Oops, mismatch in number of attempts and check times for %s" % pmd
#print pmd
pmd.save(using=db)
print "%d PMD entries" % PsychometricData.objects.using(db).all().count()
#
# db model for psychometrics data
#
# this data is collected in real time
#
from django.db import models
from courseware.models import StudentModule
class PsychometricData(models.Model):
"""
This data is a table linking student, module, and module performance,
including number of attempts, grade, max grade, and time of checks.
Links to instances of StudentModule, but only those for capa problems.
Note that StudentModule.module_state_key is nominally a Location instance (url string).
That means it is of the form {tag}://{org}/{course}/{category}/{name}[@{revision}]
and for capa problems, category = "problem".
checktimes is extracted from tracking logs, or added by capa module via psychometrics callback.
"""
studentmodule = models.ForeignKey(StudentModule, db_index=True, unique=True) # contains student, module_state_key, course_id
done = models.BooleanField(default=False)
attempts = models.IntegerField(default=0) # extracted from studentmodule.state
checktimes = models.TextField(null=True, blank=True) # internally stored as list of datetime objects
# keep in mind
# grade = studentmodule.grade
# max_grade = studentmodule.max_grade
# student = studentmodule.student
# course_id = studentmodule.course_id
# location = studentmodule.module_state_key
def __unicode__(self):
sm = self.studentmodule
return "[PsychometricData] %s url=%s, grade=%s, max=%s, attempts=%s, ct=%s" % (sm.student,
sm.module_state_key,
sm.grade,
sm.max_grade,
self.attempts,
self.checktimes)
#
# File: psychometrics/psychoanalyze.py
#
# generate pyschometrics plots from PsychometricData
from __future__ import division
import datetime
import logging
import json
import math
import numpy as np
from scipy.optimize import curve_fit
from django.db.models import Sum, Max
from psychometrics.models import *
from xmodule.modulestore import Location
log = logging.getLogger("mitx.psychometrics")
#db = "ocwtutor" # for debugging
db = "default"
#-----------------------------------------------------------------------------
# fit functions
def func_2pl(x,a,b):
"""
2-parameter logistic function
"""
D = 1.7
edax = np.exp(D*a*(x-b))
return edax / (1+edax)
#-----------------------------------------------------------------------------
# statistics class
class StatVar(object):
"""
Simple statistics on floating point numbers: avg, sdv, var, min, max
"""
def __init__(self,unit=1):
self.sum = 0
self.sum2 = 0
self.cnt = 0
self.unit = unit
self.min = None
self.max = None
def add(self,x):
if x is None:
return
if self.min is None:
self.min = x
else:
if x<self.min:
self.min = x
if self.max is None:
self.max = x
else:
if x>self.max:
self.max = x
self.sum += x
self.sum2 += x**2
self.cnt += 1
def avg(self):
if self.cnt is None:
return 0
return self.sum / 1.0 / self.cnt / self.unit
def var(self):
if self.cnt is None:
return 0
return (self.sum2 / 1.0 / self.cnt / (self.unit**2)) - (self.avg()**2)
def sdv(self):
v = self.var()
if v>0:
return math.sqrt(v)
else:
return 0
def __str__(self):
return 'cnt=%d, avg=%f, sdv=%f' % (self.cnt,self.avg(),self.sdv())
def __add__(self,x):
self.add(x)
return self
#-----------------------------------------------------------------------------
# histogram generator
def make_histogram(ydata,bins=None):
'''
Generate histogram of ydata using bins provided, or by default bins
from 0 to 100 by 10. bins should be ordered in increasing order.
returns dict with keys being bins, and values being counts.
special: hist['bins'] = bins
'''
if bins is None:
bins = range(0,100,10)
nbins = len(bins)
hist = dict(zip(bins,[0] * nbins))
for y in ydata:
for b in bins[::-1]: # in reverse order
if y>b:
hist[b] += 1
break
# hist['bins'] = bins
return hist
#-----------------------------------------------------------------------------
def problems_with_psychometric_data(course_id):
'''
Return dict of {problems (location urls): count} for which psychometric data is available.
Does this for a given course_id.
'''
pmdset = PsychometricData.objects.using(db).filter(studentmodule__course_id=course_id)
plist = [p['studentmodule__module_state_key'] for p in pmdset.values('studentmodule__module_state_key').distinct()]
problems = dict( (p,pmdset.filter(studentmodule__module_state_key=p).count()) for p in plist )
return problems
#-----------------------------------------------------------------------------
def generate_plots_for_problem(problem):
pmdset = PsychometricData.objects.using(db).filter(studentmodule__module_state_key=problem)
nstudents = pmdset.count()
msg = ""
plots = []
if nstudents < 2:
msg += "%s nstudents=%d --> skipping, too few" % (problem,nstudents)
return msg, plots
max_grade = pmdset[0].studentmodule.max_grade
agdat = pmdset.aggregate(Sum('attempts'), Max('attempts'))
max_attempts = agdat['attempts__max']
total_attempts = agdat['attempts__sum'] # not used yet
msg += "max attempts = %d" % max_attempts
xdat = range(1,max_attempts+1)
dataset = {'xdat': xdat}
# generate grade histogram
ghist = []
axisopts = """{
xaxes: [{
axisLabel: 'Grade'
}],
yaxes: [{
position: 'left',
axisLabel: 'Count'
}]
}"""
if max_grade > 1:
ghist = make_histogram([pmd.studentmodule.grade for pmd in pmdset],np.linspace(0,max_grade,max_grade+1))
ghist_json = json.dumps(ghist.items())
plot = {'title': "Grade histogram for %s" % problem,
'id': 'histogram',
'info': '',
'data': "var dhist = %s;\n" % ghist_json,
'cmd': "[ {data: dhist, bars: { show: true }} ], %s" % axisopts,
}
plots.append(plot)
else:
msg += "<br/>Not generating histogram: max_grade=%s" % max_grade
# histogram of time differences between checks
# Warning: this is inefficient - doesn't scale to large numbers of students
dtset = [] # time differences in minutes
dtsv = StatVar()
for pmd in pmdset:
try:
checktimes = eval(pmd.checktimes) # update log of attempt timestamps
except:
continue
if len(checktimes)<2:
continue
ct0 = checktimes[0]
for ct in checktimes[1:]:
dt = (ct-ct0).total_seconds()/60.0
if dt<20: # ignore if dt too long
dtset.append(dt)
dtsv += dt
ct0 = ct
if dtsv.cnt > 2:
msg += "<br/>time differences between checks: %s" % dtsv
bins = np.linspace(0,1.5*dtsv.sdv(),30)
dbar = bins[1]-bins[0]
thist = make_histogram(dtset,bins)
thist_json = json.dumps(sorted(thist.items(), key=lambda(x): x[0]))
axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}"""
plot = {'title': "Histogram of time differences between checks",
'id': 'thistogram',
'info': '',
'data': "var thist = %s;\n" % thist_json,
'cmd': "[ {data: thist, bars: { show: true, barWidth:%f }} ], %s" % (dbar, axisopts),
}
plots.append(plot)
# one IRT plot curve for each grade received (TODO: this assumes integer grades)
for grade in range(1,int(max_grade)+1):
yset = {}
gset = pmdset.filter(studentmodule__grade=grade)
ngset = gset.count()
if ngset==0:
continue
ydat = []
ylast = 0
for x in xdat:
y = gset.filter(attempts=x).count()/ngset
ydat.append( y + ylast )
ylast = y + ylast
yset['ydat'] = ydat
if len(ydat)>5: # try to fit to logistic function if enough data points
cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts/2.0])
yset['fitparam'] = cfp
yset['fitpts'] = func_2pl(np.array(xdat),*cfp[0])
yset['fiterr'] = [yd-yf for (yd,yf) in zip(ydat,yset['fitpts'])]
fitx = np.linspace(xdat[0],xdat[-1],100)
yset['fitx'] = fitx
yset['fity'] = func_2pl(np.array(fitx),*cfp[0])
dataset['grade_%d' % grade] = yset
axisopts = """{
xaxes: [{
axisLabel: 'Number of Attempts'
}],
yaxes: [{
max:1.0,
position: 'left',
axisLabel: 'Probability of correctness'
}]
}"""
# generate points for flot plot
for grade in range(1,int(max_grade)+1):
jsdata = ""
jsplots = []
gkey = 'grade_%d' % grade
if gkey in dataset:
yset = dataset[gkey]
jsdata += "var d%d = %s;\n" % (grade,json.dumps(zip(xdat,yset['ydat'])))
jsplots.append('{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade)
if 'fitpts' in yset:
jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'],yset['fity'])))
jsplots.append('{ data: fit, lines: { show: true }, color: "blue" }')
(a,b) = yset['fitparam'][0]
irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a,b)
else:
irtinfo = ""
plots.append({'title': 'IRT Plot for grade=%s %s' % (grade,irtinfo),
'id': "irt%s" % grade,
'info': '',
'data': jsdata,
'cmd' : '[%s], %s' % (','.join(jsplots), axisopts),
})
#log.debug('plots = %s' % plots)
return msg, plots
#-----------------------------------------------------------------------------
def make_psychometrics_data_update_handler(studentmodule):
"""
Construct and return a procedure which may be called to update
the PsychometricsData instance for the given StudentModule instance.
"""
sm = studentmodule
try:
pmd = PsychometricData.objects.using(db).get(studentmodule=sm)
except PsychometricData.DoesNotExist:
pmd = PsychometricData(studentmodule=sm)
def psychometrics_data_update_handler(state):
"""
This function may be called each time a problem is successfully checked
(eg on save_problem_check events in capa_module).
state = instance state (a nice, uniform way to interface - for more future psychometric feature extraction)
"""
try:
state = json.loads(sm.state)
done = state['done']
except:
log.exception("Oops, failed to eval state for %s (state=%s)" % (sm,sm.state))
return
pmd.done = done
pmd.attempts = state['attempts']
try:
checktimes = eval(pmd.checktimes) # update log of attempt timestamps
except:
checktimes = []
checktimes.append(datetime.datetime.now())
pmd.checktimes = checktimes
try:
pmd.save()
except:
log.exception("Error in updating psychometrics data for %s" % sm)
return psychometrics_data_update_handler
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment