Commit bffd9ac3 by ichuang

center histogram bars; pep8

parent 1f07bb04
#
# File: psychometrics/psychoanalyze.py
#
# generate pyschometrics plots from PsychometricData
# generate pyschometrics plots from PsychometricData
from __future__ import division
......@@ -19,98 +19,108 @@ from xmodule.modulestore import Location
log = logging.getLogger("mitx.psychometrics")
#db = "ocwtutor" # for debugging
#db = "ocwtutor" # for debugging
#db = "default"
db = getattr(settings,'DATABASE_FOR_PSYCHOMETRICS','default')
db = getattr(settings, 'DATABASE_FOR_PSYCHOMETRICS', 'default')
#-----------------------------------------------------------------------------
# fit functions
def func_2pl(x,a,b):
def func_2pl(x, a, b):
"""
2-parameter logistic function
"""
D = 1.7
edax = np.exp(D*a*(x-b))
return edax / (1+edax)
edax = np.exp(D * a * (x - b))
return edax / (1 + edax)
#-----------------------------------------------------------------------------
# statistics class
class StatVar(object):
"""
Simple statistics on floating point numbers: avg, sdv, var, min, max
"""
def __init__(self,unit=1):
def __init__(self, unit=1):
self.sum = 0
self.sum2 = 0
self.cnt = 0
self.unit = unit
self.min = None
self.max = None
def add(self,x):
def add(self, x):
if x is None:
return
if self.min is None:
self.min = x
else:
if x<self.min:
if x < self.min:
self.min = x
if self.max is None:
self.max = x
else:
if x>self.max:
if x > self.max:
self.max = x
self.sum += x
self.sum2 += x**2
self.cnt += 1
def avg(self):
if self.cnt is None:
return 0
return self.sum / 1.0 / self.cnt / self.unit
def var(self):
if self.cnt is None:
return 0
return (self.sum2 / 1.0 / self.cnt / (self.unit**2)) - (self.avg()**2)
def sdv(self):
v = self.var()
if v>0:
return math.sqrt(v)
else:
return 0
def __str__(self):
return 'cnt=%d, avg=%f, sdv=%f' % (self.cnt,self.avg(),self.sdv())
def __add__(self,x):
return 'cnt=%d, avg=%f, sdv=%f' % (self.cnt, self.avg(), self.sdv())
def __add__(self, x):
self.add(x)
return self
#-----------------------------------------------------------------------------
# histogram generator
def make_histogram(ydata,bins=None):
def make_histogram(ydata, bins=None):
'''
Generate histogram of ydata using bins provided, or by default bins
from 0 to 100 by 10. bins should be ordered in increasing order.
returns dict with keys being bins, and values being counts.
special: hist['bins'] = bins
'''
if bins is None:
bins = range(0,100,10)
bins = range(0, 100, 10)
nbins = len(bins)
hist = dict(zip(bins,[0] * nbins))
hist = dict(zip(bins, [0] * nbins))
for y in ydata:
for b in bins[::-1]: # in reverse order
for b in bins[::-1]: # in reverse order
if y>b:
hist[b] += 1
break
# hist['bins'] = bins
return hist
#-----------------------------------------------------------------------------
def problems_with_psychometric_data(course_id):
'''
Return dict of {problems (location urls): count} for which psychometric data is available.
......@@ -118,36 +128,37 @@ def problems_with_psychometric_data(course_id):
'''
pmdset = PsychometricData.objects.using(db).filter(studentmodule__course_id=course_id)
plist = [p['studentmodule__module_state_key'] for p in pmdset.values('studentmodule__module_state_key').distinct()]
problems = dict( (p,pmdset.filter(studentmodule__module_state_key=p).count()) for p in plist )
problems = dict( (p, pmdset.filter(studentmodule__module_state_key=p).count()) for p in plist )
return problems
#-----------------------------------------------------------------------------
def generate_plots_for_problem(problem):
pmdset = PsychometricData.objects.using(db).filter(studentmodule__module_state_key=problem)
nstudents = pmdset.count()
msg = ""
plots = []
if nstudents < 2:
msg += "%s nstudents=%d --> skipping, too few" % (problem,nstudents)
msg += "%s nstudents=%d --> skipping, too few" % (problem, nstudents)
return msg, plots
max_grade = pmdset[0].studentmodule.max_grade
agdat = pmdset.aggregate(Sum('attempts'), Max('attempts'))
max_attempts = agdat['attempts__max']
total_attempts = agdat['attempts__sum'] # not used yet
total_attempts = agdat['attempts__sum'] # not used yet
msg += "max attempts = %d" % max_attempts
xdat = range(1,max_attempts+1)
xdat = range(1, max_attempts + 1)
dataset = {'xdat': xdat}
# compute grade statistics
grades = [pmd.studentmodule.grade for pmd in pmdset]
grades = [pmd.studentmodule.grade for pmd in pmdset]
gsv = StatVar()
for g in grades:
gsv += g
......@@ -171,14 +182,14 @@ def generate_plots_for_problem(problem):
max_grade = gsv.max
if max_grade > 1:
ghist = make_histogram(grades, np.linspace(0,max_grade,max_grade+1))
ghist = make_histogram(grades, np.linspace(0, max_grade, max_grade + 1))
ghist_json = json.dumps(ghist.items())
plot = {'title': "Grade histogram for %s" % problem,
'id': 'histogram',
'info': '',
'data': "var dhist = %s;\n" % ghist_json,
'cmd': "[ {data: dhist, bars: { show: true }} ], %s" % axisopts,
'cmd': '[ {data: dhist, bars: { show: true, align: "center" }} ], %s' % axisopts,
}
plots.append(plot)
else:
......@@ -186,27 +197,27 @@ def generate_plots_for_problem(problem):
# histogram of time differences between checks
# Warning: this is inefficient - doesn't scale to large numbers of students
dtset = [] # time differences in minutes
dtset = [] # time differences in minutes
dtsv = StatVar()
for pmd in pmdset:
try:
checktimes = eval(pmd.checktimes) # update log of attempt timestamps
checktimes = eval(pmd.checktimes) # update log of attempt timestamps
except:
continue
if len(checktimes)<2:
if len(checktimes) < 2:
continue
ct0 = checktimes[0]
for ct in checktimes[1:]:
dt = (ct-ct0).total_seconds()/60.0
if dt<20: # ignore if dt too long
dt = (ct - ct0).total_seconds() / 60.0
if dt < 20: # ignore if dt too long
dtset.append(dt)
dtsv += dt
ct0 = ct
if dtsv.cnt > 2:
msg += "<br/><p><font color='brown'>Time differences between checks: %s</font></p>" % dtsv
bins = np.linspace(0,1.5*dtsv.sdv(),30)
dbar = bins[1]-bins[0]
thist = make_histogram(dtset,bins)
bins = np.linspace(0, 1.5 * dtsv.sdv(), 30)
dbar = bins[1] - bins[0]
thist = make_histogram(dtset, bins)
thist_json = json.dumps(sorted(thist.items(), key=lambda(x): x[0]))
axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}"""
......@@ -215,33 +226,33 @@ def generate_plots_for_problem(problem):
'id': 'thistogram',
'info': '',
'data': "var thist = %s;\n" % thist_json,
'cmd': "[ {data: thist, bars: { show: true, barWidth:%f }} ], %s" % (dbar, axisopts),
'cmd': '[ {data: thist, bars: { show: true, align: "center", barWidth:%f }} ], %s' % (dbar, axisopts),
}
plots.append(plot)
# one IRT plot curve for each grade received (TODO: this assumes integer grades)
for grade in range(1,int(max_grade)+1):
for grade in range(1, int(max_grade) + 1):
yset = {}
gset = pmdset.filter(studentmodule__grade=grade)
ngset = gset.count()
if ngset==0:
if ngset == 0:
continue
ydat = []
ylast = 0
for x in xdat:
y = gset.filter(attempts=x).count()/ngset
y = gset.filter(attempts=x).count() / ngset
ydat.append( y + ylast )
ylast = y + ylast
yset['ydat'] = ydat
if len(ydat)>3: # try to fit to logistic function if enough data points
cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts/2.0])
if len(ydat) > 3: # try to fit to logistic function if enough data points
cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts / 2.0])
yset['fitparam'] = cfp
yset['fitpts'] = func_2pl(np.array(xdat),*cfp[0])
yset['fiterr'] = [yd-yf for (yd,yf) in zip(ydat,yset['fitpts'])]
fitx = np.linspace(xdat[0],xdat[-1],100)
yset['fitpts'] = func_2pl(np.array(xdat), *cfp[0])
yset['fiterr'] = [yd - yf for (yd, yf) in zip(ydat, yset['fitpts'])]
fitx = np.linspace(xdat[0], xdat[-1], 100)
yset['fitx'] = fitx
yset['fity'] = func_2pl(np.array(fitx),*cfp[0])
yset['fity'] = func_2pl(np.array(fitx), *cfp[0])
dataset['grade_%d' % grade] = yset
......@@ -257,27 +268,27 @@ def generate_plots_for_problem(problem):
}"""
# generate points for flot plot
for grade in range(1,int(max_grade)+1):
for grade in range(1, int(max_grade) + 1):
jsdata = ""
jsplots = []
gkey = 'grade_%d' % grade
if gkey in dataset:
yset = dataset[gkey]
jsdata += "var d%d = %s;\n" % (grade,json.dumps(zip(xdat,yset['ydat'])))
jsdata += "var d%d = %s;\n" % (grade, json.dumps(zip(xdat, yset['ydat'])))
jsplots.append('{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade)
if 'fitpts' in yset:
jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'],yset['fity'])))
jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'], yset['fity'])))
jsplots.append('{ data: fit, lines: { show: true }, color: "blue" }')
(a,b) = yset['fitparam'][0]
irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a,b)
(a, b) = yset['fitparam'][0]
irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a, b)
else:
irtinfo = ""
plots.append({'title': 'IRT Plot for grade=%s %s' % (grade,irtinfo),
plots.append({'title': 'IRT Plot for grade=%s %s' % (grade, irtinfo),
'id': "irt%s" % grade,
'info': '',
'data': jsdata,
'cmd' : '[%s], %s' % (','.join(jsplots), axisopts),
'cmd': '[%s], %s' % (','.join(jsplots), axisopts),
})
#log.debug('plots = %s' % plots)
......@@ -285,6 +296,7 @@ def generate_plots_for_problem(problem):
#-----------------------------------------------------------------------------
def make_psychometrics_data_update_handler(studentmodule):
"""
Construct and return a procedure which may be called to update
......@@ -307,13 +319,13 @@ def make_psychometrics_data_update_handler(studentmodule):
state = json.loads(sm.state)
done = state['done']
except:
log.exception("Oops, failed to eval state for %s (state=%s)" % (sm,sm.state))
log.exception("Oops, failed to eval state for %s (state=%s)" % (sm, sm.state))
return
pmd.done = done
pmd.attempts = state['attempts']
try:
checktimes = eval(pmd.checktimes) # update log of attempt timestamps
checktimes = eval(pmd.checktimes) # update log of attempt timestamps
except:
checktimes = []
checktimes.append(datetime.datetime.now())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment