Commit bffd9ac3 by ichuang

center histogram bars; pep8

parent 1f07bb04
# #
# File: psychometrics/psychoanalyze.py # File: psychometrics/psychoanalyze.py
# #
# generate pyschometrics plots from PsychometricData # generate pyschometrics plots from PsychometricData
from __future__ import division from __future__ import division
...@@ -19,98 +19,108 @@ from xmodule.modulestore import Location ...@@ -19,98 +19,108 @@ from xmodule.modulestore import Location
log = logging.getLogger("mitx.psychometrics") log = logging.getLogger("mitx.psychometrics")
#db = "ocwtutor" # for debugging #db = "ocwtutor" # for debugging
#db = "default" #db = "default"
db = getattr(settings,'DATABASE_FOR_PSYCHOMETRICS','default') db = getattr(settings, 'DATABASE_FOR_PSYCHOMETRICS', 'default')
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# fit functions # fit functions
def func_2pl(x,a,b):
def func_2pl(x, a, b):
""" """
2-parameter logistic function 2-parameter logistic function
""" """
D = 1.7 D = 1.7
edax = np.exp(D*a*(x-b)) edax = np.exp(D * a * (x - b))
return edax / (1+edax) return edax / (1 + edax)
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# statistics class # statistics class
class StatVar(object): class StatVar(object):
""" """
Simple statistics on floating point numbers: avg, sdv, var, min, max Simple statistics on floating point numbers: avg, sdv, var, min, max
""" """
def __init__(self,unit=1): def __init__(self, unit=1):
self.sum = 0 self.sum = 0
self.sum2 = 0 self.sum2 = 0
self.cnt = 0 self.cnt = 0
self.unit = unit self.unit = unit
self.min = None self.min = None
self.max = None self.max = None
def add(self,x):
def add(self, x):
if x is None: if x is None:
return return
if self.min is None: if self.min is None:
self.min = x self.min = x
else: else:
if x<self.min: if x < self.min:
self.min = x self.min = x
if self.max is None: if self.max is None:
self.max = x self.max = x
else: else:
if x>self.max: if x > self.max:
self.max = x self.max = x
self.sum += x self.sum += x
self.sum2 += x**2 self.sum2 += x**2
self.cnt += 1 self.cnt += 1
def avg(self): def avg(self):
if self.cnt is None: if self.cnt is None:
return 0 return 0
return self.sum / 1.0 / self.cnt / self.unit return self.sum / 1.0 / self.cnt / self.unit
def var(self): def var(self):
if self.cnt is None: if self.cnt is None:
return 0 return 0
return (self.sum2 / 1.0 / self.cnt / (self.unit**2)) - (self.avg()**2) return (self.sum2 / 1.0 / self.cnt / (self.unit**2)) - (self.avg()**2)
def sdv(self): def sdv(self):
v = self.var() v = self.var()
if v>0: if v>0:
return math.sqrt(v) return math.sqrt(v)
else: else:
return 0 return 0
def __str__(self): def __str__(self):
return 'cnt=%d, avg=%f, sdv=%f' % (self.cnt,self.avg(),self.sdv()) return 'cnt=%d, avg=%f, sdv=%f' % (self.cnt, self.avg(), self.sdv())
def __add__(self,x):
def __add__(self, x):
self.add(x) self.add(x)
return self return self
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# histogram generator # histogram generator
def make_histogram(ydata,bins=None):
def make_histogram(ydata, bins=None):
''' '''
Generate histogram of ydata using bins provided, or by default bins Generate histogram of ydata using bins provided, or by default bins
from 0 to 100 by 10. bins should be ordered in increasing order. from 0 to 100 by 10. bins should be ordered in increasing order.
returns dict with keys being bins, and values being counts. returns dict with keys being bins, and values being counts.
special: hist['bins'] = bins special: hist['bins'] = bins
''' '''
if bins is None: if bins is None:
bins = range(0,100,10) bins = range(0, 100, 10)
nbins = len(bins) nbins = len(bins)
hist = dict(zip(bins,[0] * nbins)) hist = dict(zip(bins, [0] * nbins))
for y in ydata: for y in ydata:
for b in bins[::-1]: # in reverse order for b in bins[::-1]: # in reverse order
if y>b: if y>b:
hist[b] += 1 hist[b] += 1
break break
# hist['bins'] = bins # hist['bins'] = bins
return hist return hist
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
def problems_with_psychometric_data(course_id): def problems_with_psychometric_data(course_id):
''' '''
Return dict of {problems (location urls): count} for which psychometric data is available. Return dict of {problems (location urls): count} for which psychometric data is available.
...@@ -118,36 +128,37 @@ def problems_with_psychometric_data(course_id): ...@@ -118,36 +128,37 @@ def problems_with_psychometric_data(course_id):
''' '''
pmdset = PsychometricData.objects.using(db).filter(studentmodule__course_id=course_id) pmdset = PsychometricData.objects.using(db).filter(studentmodule__course_id=course_id)
plist = [p['studentmodule__module_state_key'] for p in pmdset.values('studentmodule__module_state_key').distinct()] plist = [p['studentmodule__module_state_key'] for p in pmdset.values('studentmodule__module_state_key').distinct()]
problems = dict( (p,pmdset.filter(studentmodule__module_state_key=p).count()) for p in plist ) problems = dict( (p, pmdset.filter(studentmodule__module_state_key=p).count()) for p in plist )
return problems return problems
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
def generate_plots_for_problem(problem): def generate_plots_for_problem(problem):
pmdset = PsychometricData.objects.using(db).filter(studentmodule__module_state_key=problem) pmdset = PsychometricData.objects.using(db).filter(studentmodule__module_state_key=problem)
nstudents = pmdset.count() nstudents = pmdset.count()
msg = "" msg = ""
plots = [] plots = []
if nstudents < 2: if nstudents < 2:
msg += "%s nstudents=%d --> skipping, too few" % (problem,nstudents) msg += "%s nstudents=%d --> skipping, too few" % (problem, nstudents)
return msg, plots return msg, plots
max_grade = pmdset[0].studentmodule.max_grade max_grade = pmdset[0].studentmodule.max_grade
agdat = pmdset.aggregate(Sum('attempts'), Max('attempts')) agdat = pmdset.aggregate(Sum('attempts'), Max('attempts'))
max_attempts = agdat['attempts__max'] max_attempts = agdat['attempts__max']
total_attempts = agdat['attempts__sum'] # not used yet total_attempts = agdat['attempts__sum'] # not used yet
msg += "max attempts = %d" % max_attempts msg += "max attempts = %d" % max_attempts
xdat = range(1,max_attempts+1) xdat = range(1, max_attempts + 1)
dataset = {'xdat': xdat} dataset = {'xdat': xdat}
# compute grade statistics # compute grade statistics
grades = [pmd.studentmodule.grade for pmd in pmdset] grades = [pmd.studentmodule.grade for pmd in pmdset]
gsv = StatVar() gsv = StatVar()
for g in grades: for g in grades:
gsv += g gsv += g
...@@ -171,14 +182,14 @@ def generate_plots_for_problem(problem): ...@@ -171,14 +182,14 @@ def generate_plots_for_problem(problem):
max_grade = gsv.max max_grade = gsv.max
if max_grade > 1: if max_grade > 1:
ghist = make_histogram(grades, np.linspace(0,max_grade,max_grade+1)) ghist = make_histogram(grades, np.linspace(0, max_grade, max_grade + 1))
ghist_json = json.dumps(ghist.items()) ghist_json = json.dumps(ghist.items())
plot = {'title': "Grade histogram for %s" % problem, plot = {'title': "Grade histogram for %s" % problem,
'id': 'histogram', 'id': 'histogram',
'info': '', 'info': '',
'data': "var dhist = %s;\n" % ghist_json, 'data': "var dhist = %s;\n" % ghist_json,
'cmd': "[ {data: dhist, bars: { show: true }} ], %s" % axisopts, 'cmd': '[ {data: dhist, bars: { show: true, align: "center" }} ], %s' % axisopts,
} }
plots.append(plot) plots.append(plot)
else: else:
...@@ -186,27 +197,27 @@ def generate_plots_for_problem(problem): ...@@ -186,27 +197,27 @@ def generate_plots_for_problem(problem):
# histogram of time differences between checks # histogram of time differences between checks
# Warning: this is inefficient - doesn't scale to large numbers of students # Warning: this is inefficient - doesn't scale to large numbers of students
dtset = [] # time differences in minutes dtset = [] # time differences in minutes
dtsv = StatVar() dtsv = StatVar()
for pmd in pmdset: for pmd in pmdset:
try: try:
checktimes = eval(pmd.checktimes) # update log of attempt timestamps checktimes = eval(pmd.checktimes) # update log of attempt timestamps
except: except:
continue continue
if len(checktimes)<2: if len(checktimes) < 2:
continue continue
ct0 = checktimes[0] ct0 = checktimes[0]
for ct in checktimes[1:]: for ct in checktimes[1:]:
dt = (ct-ct0).total_seconds()/60.0 dt = (ct - ct0).total_seconds() / 60.0
if dt<20: # ignore if dt too long if dt < 20: # ignore if dt too long
dtset.append(dt) dtset.append(dt)
dtsv += dt dtsv += dt
ct0 = ct ct0 = ct
if dtsv.cnt > 2: if dtsv.cnt > 2:
msg += "<br/><p><font color='brown'>Time differences between checks: %s</font></p>" % dtsv msg += "<br/><p><font color='brown'>Time differences between checks: %s</font></p>" % dtsv
bins = np.linspace(0,1.5*dtsv.sdv(),30) bins = np.linspace(0, 1.5 * dtsv.sdv(), 30)
dbar = bins[1]-bins[0] dbar = bins[1] - bins[0]
thist = make_histogram(dtset,bins) thist = make_histogram(dtset, bins)
thist_json = json.dumps(sorted(thist.items(), key=lambda(x): x[0])) thist_json = json.dumps(sorted(thist.items(), key=lambda(x): x[0]))
axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}""" axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}"""
...@@ -215,33 +226,33 @@ def generate_plots_for_problem(problem): ...@@ -215,33 +226,33 @@ def generate_plots_for_problem(problem):
'id': 'thistogram', 'id': 'thistogram',
'info': '', 'info': '',
'data': "var thist = %s;\n" % thist_json, 'data': "var thist = %s;\n" % thist_json,
'cmd': "[ {data: thist, bars: { show: true, barWidth:%f }} ], %s" % (dbar, axisopts), 'cmd': '[ {data: thist, bars: { show: true, align: "center", barWidth:%f }} ], %s' % (dbar, axisopts),
} }
plots.append(plot) plots.append(plot)
# one IRT plot curve for each grade received (TODO: this assumes integer grades) # one IRT plot curve for each grade received (TODO: this assumes integer grades)
for grade in range(1,int(max_grade)+1): for grade in range(1, int(max_grade) + 1):
yset = {} yset = {}
gset = pmdset.filter(studentmodule__grade=grade) gset = pmdset.filter(studentmodule__grade=grade)
ngset = gset.count() ngset = gset.count()
if ngset==0: if ngset == 0:
continue continue
ydat = [] ydat = []
ylast = 0 ylast = 0
for x in xdat: for x in xdat:
y = gset.filter(attempts=x).count()/ngset y = gset.filter(attempts=x).count() / ngset
ydat.append( y + ylast ) ydat.append( y + ylast )
ylast = y + ylast ylast = y + ylast
yset['ydat'] = ydat yset['ydat'] = ydat
if len(ydat)>3: # try to fit to logistic function if enough data points if len(ydat) > 3: # try to fit to logistic function if enough data points
cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts/2.0]) cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts / 2.0])
yset['fitparam'] = cfp yset['fitparam'] = cfp
yset['fitpts'] = func_2pl(np.array(xdat),*cfp[0]) yset['fitpts'] = func_2pl(np.array(xdat), *cfp[0])
yset['fiterr'] = [yd-yf for (yd,yf) in zip(ydat,yset['fitpts'])] yset['fiterr'] = [yd - yf for (yd, yf) in zip(ydat, yset['fitpts'])]
fitx = np.linspace(xdat[0],xdat[-1],100) fitx = np.linspace(xdat[0], xdat[-1], 100)
yset['fitx'] = fitx yset['fitx'] = fitx
yset['fity'] = func_2pl(np.array(fitx),*cfp[0]) yset['fity'] = func_2pl(np.array(fitx), *cfp[0])
dataset['grade_%d' % grade] = yset dataset['grade_%d' % grade] = yset
...@@ -257,27 +268,27 @@ def generate_plots_for_problem(problem): ...@@ -257,27 +268,27 @@ def generate_plots_for_problem(problem):
}""" }"""
# generate points for flot plot # generate points for flot plot
for grade in range(1,int(max_grade)+1): for grade in range(1, int(max_grade) + 1):
jsdata = "" jsdata = ""
jsplots = [] jsplots = []
gkey = 'grade_%d' % grade gkey = 'grade_%d' % grade
if gkey in dataset: if gkey in dataset:
yset = dataset[gkey] yset = dataset[gkey]
jsdata += "var d%d = %s;\n" % (grade,json.dumps(zip(xdat,yset['ydat']))) jsdata += "var d%d = %s;\n" % (grade, json.dumps(zip(xdat, yset['ydat'])))
jsplots.append('{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade) jsplots.append('{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade)
if 'fitpts' in yset: if 'fitpts' in yset:
jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'],yset['fity']))) jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'], yset['fity'])))
jsplots.append('{ data: fit, lines: { show: true }, color: "blue" }') jsplots.append('{ data: fit, lines: { show: true }, color: "blue" }')
(a,b) = yset['fitparam'][0] (a, b) = yset['fitparam'][0]
irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a,b) irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a, b)
else: else:
irtinfo = "" irtinfo = ""
plots.append({'title': 'IRT Plot for grade=%s %s' % (grade,irtinfo), plots.append({'title': 'IRT Plot for grade=%s %s' % (grade, irtinfo),
'id': "irt%s" % grade, 'id': "irt%s" % grade,
'info': '', 'info': '',
'data': jsdata, 'data': jsdata,
'cmd' : '[%s], %s' % (','.join(jsplots), axisopts), 'cmd': '[%s], %s' % (','.join(jsplots), axisopts),
}) })
#log.debug('plots = %s' % plots) #log.debug('plots = %s' % plots)
...@@ -285,6 +296,7 @@ def generate_plots_for_problem(problem): ...@@ -285,6 +296,7 @@ def generate_plots_for_problem(problem):
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
def make_psychometrics_data_update_handler(studentmodule): def make_psychometrics_data_update_handler(studentmodule):
""" """
Construct and return a procedure which may be called to update Construct and return a procedure which may be called to update
...@@ -307,13 +319,13 @@ def make_psychometrics_data_update_handler(studentmodule): ...@@ -307,13 +319,13 @@ def make_psychometrics_data_update_handler(studentmodule):
state = json.loads(sm.state) state = json.loads(sm.state)
done = state['done'] done = state['done']
except: except:
log.exception("Oops, failed to eval state for %s (state=%s)" % (sm,sm.state)) log.exception("Oops, failed to eval state for %s (state=%s)" % (sm, sm.state))
return return
pmd.done = done pmd.done = done
pmd.attempts = state['attempts'] pmd.attempts = state['attempts']
try: try:
checktimes = eval(pmd.checktimes) # update log of attempt timestamps checktimes = eval(pmd.checktimes) # update log of attempt timestamps
except: except:
checktimes = [] checktimes = []
checktimes.append(datetime.datetime.now()) checktimes.append(datetime.datetime.now())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment