Commit bffd9ac3 by ichuang

center histogram bars; pep8

parent 1f07bb04
...@@ -22,73 +22,82 @@ log = logging.getLogger("mitx.psychometrics") ...@@ -22,73 +22,82 @@ log = logging.getLogger("mitx.psychometrics")
#db = "ocwtutor" # for debugging #db = "ocwtutor" # for debugging
#db = "default" #db = "default"
db = getattr(settings,'DATABASE_FOR_PSYCHOMETRICS','default') db = getattr(settings, 'DATABASE_FOR_PSYCHOMETRICS', 'default')
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# fit functions # fit functions
def func_2pl(x,a,b):
def func_2pl(x, a, b):
""" """
2-parameter logistic function 2-parameter logistic function
""" """
D = 1.7 D = 1.7
edax = np.exp(D*a*(x-b)) edax = np.exp(D * a * (x - b))
return edax / (1+edax) return edax / (1 + edax)
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# statistics class # statistics class
class StatVar(object): class StatVar(object):
""" """
Simple statistics on floating point numbers: avg, sdv, var, min, max Simple statistics on floating point numbers: avg, sdv, var, min, max
""" """
def __init__(self,unit=1): def __init__(self, unit=1):
self.sum = 0 self.sum = 0
self.sum2 = 0 self.sum2 = 0
self.cnt = 0 self.cnt = 0
self.unit = unit self.unit = unit
self.min = None self.min = None
self.max = None self.max = None
def add(self,x):
def add(self, x):
if x is None: if x is None:
return return
if self.min is None: if self.min is None:
self.min = x self.min = x
else: else:
if x<self.min: if x < self.min:
self.min = x self.min = x
if self.max is None: if self.max is None:
self.max = x self.max = x
else: else:
if x>self.max: if x > self.max:
self.max = x self.max = x
self.sum += x self.sum += x
self.sum2 += x**2 self.sum2 += x**2
self.cnt += 1 self.cnt += 1
def avg(self): def avg(self):
if self.cnt is None: if self.cnt is None:
return 0 return 0
return self.sum / 1.0 / self.cnt / self.unit return self.sum / 1.0 / self.cnt / self.unit
def var(self): def var(self):
if self.cnt is None: if self.cnt is None:
return 0 return 0
return (self.sum2 / 1.0 / self.cnt / (self.unit**2)) - (self.avg()**2) return (self.sum2 / 1.0 / self.cnt / (self.unit**2)) - (self.avg()**2)
def sdv(self): def sdv(self):
v = self.var() v = self.var()
if v>0: if v>0:
return math.sqrt(v) return math.sqrt(v)
else: else:
return 0 return 0
def __str__(self): def __str__(self):
return 'cnt=%d, avg=%f, sdv=%f' % (self.cnt,self.avg(),self.sdv()) return 'cnt=%d, avg=%f, sdv=%f' % (self.cnt, self.avg(), self.sdv())
def __add__(self,x):
def __add__(self, x):
self.add(x) self.add(x)
return self return self
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# histogram generator # histogram generator
def make_histogram(ydata,bins=None):
def make_histogram(ydata, bins=None):
''' '''
Generate histogram of ydata using bins provided, or by default bins Generate histogram of ydata using bins provided, or by default bins
from 0 to 100 by 10. bins should be ordered in increasing order. from 0 to 100 by 10. bins should be ordered in increasing order.
...@@ -97,10 +106,10 @@ def make_histogram(ydata,bins=None): ...@@ -97,10 +106,10 @@ def make_histogram(ydata,bins=None):
special: hist['bins'] = bins special: hist['bins'] = bins
''' '''
if bins is None: if bins is None:
bins = range(0,100,10) bins = range(0, 100, 10)
nbins = len(bins) nbins = len(bins)
hist = dict(zip(bins,[0] * nbins)) hist = dict(zip(bins, [0] * nbins))
for y in ydata: for y in ydata:
for b in bins[::-1]: # in reverse order for b in bins[::-1]: # in reverse order
if y>b: if y>b:
...@@ -111,6 +120,7 @@ def make_histogram(ydata,bins=None): ...@@ -111,6 +120,7 @@ def make_histogram(ydata,bins=None):
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
def problems_with_psychometric_data(course_id): def problems_with_psychometric_data(course_id):
''' '''
Return dict of {problems (location urls): count} for which psychometric data is available. Return dict of {problems (location urls): count} for which psychometric data is available.
...@@ -118,12 +128,13 @@ def problems_with_psychometric_data(course_id): ...@@ -118,12 +128,13 @@ def problems_with_psychometric_data(course_id):
''' '''
pmdset = PsychometricData.objects.using(db).filter(studentmodule__course_id=course_id) pmdset = PsychometricData.objects.using(db).filter(studentmodule__course_id=course_id)
plist = [p['studentmodule__module_state_key'] for p in pmdset.values('studentmodule__module_state_key').distinct()] plist = [p['studentmodule__module_state_key'] for p in pmdset.values('studentmodule__module_state_key').distinct()]
problems = dict( (p,pmdset.filter(studentmodule__module_state_key=p).count()) for p in plist ) problems = dict( (p, pmdset.filter(studentmodule__module_state_key=p).count()) for p in plist )
return problems return problems
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
def generate_plots_for_problem(problem): def generate_plots_for_problem(problem):
pmdset = PsychometricData.objects.using(db).filter(studentmodule__module_state_key=problem) pmdset = PsychometricData.objects.using(db).filter(studentmodule__module_state_key=problem)
...@@ -132,7 +143,7 @@ def generate_plots_for_problem(problem): ...@@ -132,7 +143,7 @@ def generate_plots_for_problem(problem):
plots = [] plots = []
if nstudents < 2: if nstudents < 2:
msg += "%s nstudents=%d --> skipping, too few" % (problem,nstudents) msg += "%s nstudents=%d --> skipping, too few" % (problem, nstudents)
return msg, plots return msg, plots
max_grade = pmdset[0].studentmodule.max_grade max_grade = pmdset[0].studentmodule.max_grade
...@@ -143,7 +154,7 @@ def generate_plots_for_problem(problem): ...@@ -143,7 +154,7 @@ def generate_plots_for_problem(problem):
msg += "max attempts = %d" % max_attempts msg += "max attempts = %d" % max_attempts
xdat = range(1,max_attempts+1) xdat = range(1, max_attempts + 1)
dataset = {'xdat': xdat} dataset = {'xdat': xdat}
# compute grade statistics # compute grade statistics
...@@ -171,14 +182,14 @@ def generate_plots_for_problem(problem): ...@@ -171,14 +182,14 @@ def generate_plots_for_problem(problem):
max_grade = gsv.max max_grade = gsv.max
if max_grade > 1: if max_grade > 1:
ghist = make_histogram(grades, np.linspace(0,max_grade,max_grade+1)) ghist = make_histogram(grades, np.linspace(0, max_grade, max_grade + 1))
ghist_json = json.dumps(ghist.items()) ghist_json = json.dumps(ghist.items())
plot = {'title': "Grade histogram for %s" % problem, plot = {'title': "Grade histogram for %s" % problem,
'id': 'histogram', 'id': 'histogram',
'info': '', 'info': '',
'data': "var dhist = %s;\n" % ghist_json, 'data': "var dhist = %s;\n" % ghist_json,
'cmd': "[ {data: dhist, bars: { show: true }} ], %s" % axisopts, 'cmd': '[ {data: dhist, bars: { show: true, align: "center" }} ], %s' % axisopts,
} }
plots.append(plot) plots.append(plot)
else: else:
...@@ -193,20 +204,20 @@ def generate_plots_for_problem(problem): ...@@ -193,20 +204,20 @@ def generate_plots_for_problem(problem):
checktimes = eval(pmd.checktimes) # update log of attempt timestamps checktimes = eval(pmd.checktimes) # update log of attempt timestamps
except: except:
continue continue
if len(checktimes)<2: if len(checktimes) < 2:
continue continue
ct0 = checktimes[0] ct0 = checktimes[0]
for ct in checktimes[1:]: for ct in checktimes[1:]:
dt = (ct-ct0).total_seconds()/60.0 dt = (ct - ct0).total_seconds() / 60.0
if dt<20: # ignore if dt too long if dt < 20: # ignore if dt too long
dtset.append(dt) dtset.append(dt)
dtsv += dt dtsv += dt
ct0 = ct ct0 = ct
if dtsv.cnt > 2: if dtsv.cnt > 2:
msg += "<br/><p><font color='brown'>Time differences between checks: %s</font></p>" % dtsv msg += "<br/><p><font color='brown'>Time differences between checks: %s</font></p>" % dtsv
bins = np.linspace(0,1.5*dtsv.sdv(),30) bins = np.linspace(0, 1.5 * dtsv.sdv(), 30)
dbar = bins[1]-bins[0] dbar = bins[1] - bins[0]
thist = make_histogram(dtset,bins) thist = make_histogram(dtset, bins)
thist_json = json.dumps(sorted(thist.items(), key=lambda(x): x[0])) thist_json = json.dumps(sorted(thist.items(), key=lambda(x): x[0]))
axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}""" axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}"""
...@@ -215,33 +226,33 @@ def generate_plots_for_problem(problem): ...@@ -215,33 +226,33 @@ def generate_plots_for_problem(problem):
'id': 'thistogram', 'id': 'thistogram',
'info': '', 'info': '',
'data': "var thist = %s;\n" % thist_json, 'data': "var thist = %s;\n" % thist_json,
'cmd': "[ {data: thist, bars: { show: true, barWidth:%f }} ], %s" % (dbar, axisopts), 'cmd': '[ {data: thist, bars: { show: true, align: "center", barWidth:%f }} ], %s' % (dbar, axisopts),
} }
plots.append(plot) plots.append(plot)
# one IRT plot curve for each grade received (TODO: this assumes integer grades) # one IRT plot curve for each grade received (TODO: this assumes integer grades)
for grade in range(1,int(max_grade)+1): for grade in range(1, int(max_grade) + 1):
yset = {} yset = {}
gset = pmdset.filter(studentmodule__grade=grade) gset = pmdset.filter(studentmodule__grade=grade)
ngset = gset.count() ngset = gset.count()
if ngset==0: if ngset == 0:
continue continue
ydat = [] ydat = []
ylast = 0 ylast = 0
for x in xdat: for x in xdat:
y = gset.filter(attempts=x).count()/ngset y = gset.filter(attempts=x).count() / ngset
ydat.append( y + ylast ) ydat.append( y + ylast )
ylast = y + ylast ylast = y + ylast
yset['ydat'] = ydat yset['ydat'] = ydat
if len(ydat)>3: # try to fit to logistic function if enough data points if len(ydat) > 3: # try to fit to logistic function if enough data points
cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts/2.0]) cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts / 2.0])
yset['fitparam'] = cfp yset['fitparam'] = cfp
yset['fitpts'] = func_2pl(np.array(xdat),*cfp[0]) yset['fitpts'] = func_2pl(np.array(xdat), *cfp[0])
yset['fiterr'] = [yd-yf for (yd,yf) in zip(ydat,yset['fitpts'])] yset['fiterr'] = [yd - yf for (yd, yf) in zip(ydat, yset['fitpts'])]
fitx = np.linspace(xdat[0],xdat[-1],100) fitx = np.linspace(xdat[0], xdat[-1], 100)
yset['fitx'] = fitx yset['fitx'] = fitx
yset['fity'] = func_2pl(np.array(fitx),*cfp[0]) yset['fity'] = func_2pl(np.array(fitx), *cfp[0])
dataset['grade_%d' % grade] = yset dataset['grade_%d' % grade] = yset
...@@ -257,27 +268,27 @@ def generate_plots_for_problem(problem): ...@@ -257,27 +268,27 @@ def generate_plots_for_problem(problem):
}""" }"""
# generate points for flot plot # generate points for flot plot
for grade in range(1,int(max_grade)+1): for grade in range(1, int(max_grade) + 1):
jsdata = "" jsdata = ""
jsplots = [] jsplots = []
gkey = 'grade_%d' % grade gkey = 'grade_%d' % grade
if gkey in dataset: if gkey in dataset:
yset = dataset[gkey] yset = dataset[gkey]
jsdata += "var d%d = %s;\n" % (grade,json.dumps(zip(xdat,yset['ydat']))) jsdata += "var d%d = %s;\n" % (grade, json.dumps(zip(xdat, yset['ydat'])))
jsplots.append('{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade) jsplots.append('{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade)
if 'fitpts' in yset: if 'fitpts' in yset:
jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'],yset['fity']))) jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'], yset['fity'])))
jsplots.append('{ data: fit, lines: { show: true }, color: "blue" }') jsplots.append('{ data: fit, lines: { show: true }, color: "blue" }')
(a,b) = yset['fitparam'][0] (a, b) = yset['fitparam'][0]
irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a,b) irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a, b)
else: else:
irtinfo = "" irtinfo = ""
plots.append({'title': 'IRT Plot for grade=%s %s' % (grade,irtinfo), plots.append({'title': 'IRT Plot for grade=%s %s' % (grade, irtinfo),
'id': "irt%s" % grade, 'id': "irt%s" % grade,
'info': '', 'info': '',
'data': jsdata, 'data': jsdata,
'cmd' : '[%s], %s' % (','.join(jsplots), axisopts), 'cmd': '[%s], %s' % (','.join(jsplots), axisopts),
}) })
#log.debug('plots = %s' % plots) #log.debug('plots = %s' % plots)
...@@ -285,6 +296,7 @@ def generate_plots_for_problem(problem): ...@@ -285,6 +296,7 @@ def generate_plots_for_problem(problem):
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
def make_psychometrics_data_update_handler(studentmodule): def make_psychometrics_data_update_handler(studentmodule):
""" """
Construct and return a procedure which may be called to update Construct and return a procedure which may be called to update
...@@ -307,7 +319,7 @@ def make_psychometrics_data_update_handler(studentmodule): ...@@ -307,7 +319,7 @@ def make_psychometrics_data_update_handler(studentmodule):
state = json.loads(sm.state) state = json.loads(sm.state)
done = state['done'] done = state['done']
except: except:
log.exception("Oops, failed to eval state for %s (state=%s)" % (sm,sm.state)) log.exception("Oops, failed to eval state for %s (state=%s)" % (sm, sm.state))
return return
pmd.done = done pmd.done = done
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment