Commit d2b3977f by Brian Wilson

Add dogstat logging to background tasks.

parent 9e11a565
......@@ -20,6 +20,7 @@ LOGFIELDS = ['username', 'ip', 'event_source', 'event_type', 'event', 'agent', '
def log_event(event):
"""Write tracking event to log file, and optionally to TrackingLog model."""
event_str = json.dumps(event)
log.info(event_str[:settings.TRACK_MAX_EVENT])
if settings.MITX_FEATURES.get('ENABLE_SQL_TRACKING_LOGS'):
......@@ -32,6 +33,11 @@ def log_event(event):
def user_track(request):
"""
Log when GET call to "event" URL is made by a user.
GET call should provide "event_type", "event", and "page" arguments.
"""
try: # TODO: Do the same for many of the optional META parameters
username = request.user.username
except:
......@@ -48,7 +54,6 @@ def user_track(request):
except:
agent = ''
# TODO: Move a bunch of this into log_event
event = {
"username": username,
"session": scookie,
......@@ -66,6 +71,7 @@ def user_track(request):
def server_track(request, event_type, event, page=None):
"""Log events related to server requests."""
try:
username = request.user.username
except:
......@@ -95,7 +101,7 @@ def server_track(request, event_type, event, page=None):
def task_track(request_info, task_info, event_type, event, page=None):
"""
Outputs tracking information for events occuring within celery tasks.
Logs tracking information for events occuring within celery tasks.
The `event_type` is a string naming the particular event being logged,
while `event` is a dict containing whatever additional contextual information
......@@ -103,9 +109,11 @@ def task_track(request_info, task_info, event_type, event, page=None):
The `request_info` is a dict containing information about the original
task request. Relevant keys are `username`, `ip`, `agent`, and `host`.
While the dict is required, the values in it are not, so that {} can be
passed in.
In addition, a `task_info` dict provides more information to be stored with
the `event` dict.
In addition, a `task_info` dict provides more information about the current
task, to be stored with the `event` dict. This may also be an empty dict.
The `page` parameter is optional, and allows the name of the page to
be provided.
......@@ -136,6 +144,7 @@ def task_track(request_info, task_info, event_type, event, page=None):
@login_required
@ensure_csrf_cookie
def view_tracking_log(request, args=''):
"""View to output contents of TrackingLog model. For staff use only."""
if not request.user.is_staff:
return redirect('/')
nlen = 100
......
......@@ -15,25 +15,22 @@ This is used by capa_module.
from datetime import datetime
import logging
import math
import numpy
import os.path
import re
import sys
from lxml import etree
from xml.sax.saxutils import unescape
from copy import deepcopy
from .correctmap import CorrectMap
import inputtypes
import customrender
import capa.inputtypes as inputtypes
import capa.customrender as customrender
from .util import contextualize_text, convert_files_to_filenames
import xqueue_interface
import capa.xqueue_interface as xqueue_interface
# to be replaced with auto-registering
import responsetypes
import safe_exec
import capa.responsetypes as responsetypes
from capa.safe_exec import safe_exec
# dict of tagname, Response Class -- this should come from auto-registering
response_tag_dict = dict([(x.response_tag, x) for x in responsetypes.__all__])
......@@ -134,7 +131,6 @@ class LoncapaProblem(object):
self.extracted_tree = self._extract_html(self.tree)
def do_reset(self):
'''
Reset internal state to unfinished, with no answers
......@@ -175,7 +171,7 @@ class LoncapaProblem(object):
Return the maximum score for this problem.
'''
maxscore = 0
for response, responder in self.responders.iteritems():
for responder in self.responders.values():
maxscore += responder.get_max_score()
return maxscore
......@@ -257,7 +253,8 @@ class LoncapaProblem(object):
def grade_answers(self, answers):
'''
Grade student responses. Called by capa_module.check_problem.
answers is a dict of all the entries from request.POST, but with the first part
`answers` is a dict of all the entries from request.POST, but with the first part
of each key removed (the string before the first "_").
Thus, for example, input_ID123 -> ID123, and input_fromjs_ID123 -> fromjs_ID123
......@@ -286,13 +283,12 @@ class LoncapaProblem(object):
that the responsetypes are synchronous. This is convenient as it
permits rescoring to be complete when the rescoring call returns.
"""
# We check for synchronous grading and no file submissions by
# screening out all problems with a CodeResponse type.
for responder in self.responders.values():
if 'filesubmission' in responder.allowed_inputfields:
return False
return True
return all('filesubmission' not in responder.allowed_inputfields for responder in self.responders.values())
# for responder in self.responders.values():
# if 'filesubmission' in responder.allowed_inputfields:
# return False
#
# return True
def rescore_existing_answers(self):
'''
......@@ -300,15 +296,17 @@ class LoncapaProblem(object):
'''
return self._grade_answers(None)
def _grade_answers(self, answers):
def _grade_answers(self, student_answers):
'''
Internal grading call used for checking new student answers and also
rescoring existing student answers.
Internal grading call used for checking new 'student_answers' and also
rescoring existing student_answers.
answers is a dict of all the entries from request.POST, but with the first part
of each key removed (the string before the first "_").
For new student_answers being graded, `student_answers` is a dict of all the
entries from request.POST, but with the first part of each key removed
(the string before the first "_"). Thus, for example,
input_ID123 -> ID123, and input_fromjs_ID123 -> fromjs_ID123.
Thus, for example, input_ID123 -> ID123, and input_fromjs_ID123 -> fromjs_ID123
For rescoring, `student_answers` is None.
Calls the Response for each question in this problem, to do the actual grading.
'''
......@@ -325,18 +323,19 @@ class LoncapaProblem(object):
# student_answers contains a proper answer or the filename of
# an earlier submission, so for now skip these entirely.
# TODO: figure out where to get file submissions when rescoring.
if 'filesubmission' in responder.allowed_inputfields and answers is None:
if 'filesubmission' in responder.allowed_inputfields and student_answers is None:
raise Exception("Cannot rescore problems with possible file submissions")
# use 'answers' if it is provided, otherwise use the saved student_answers.
if answers is not None:
results = responder.evaluate_answers(answers, oldcmap)
# use 'student_answers' only if it is provided, and if it might contain a file
# submission that would not exist in the persisted "student_answers".
if 'filesubmission' in responder.allowed_inputfields and student_answers is not None:
results = responder.evaluate_answers(student_answers, oldcmap)
else:
results = responder.evaluate_answers(self.student_answers, oldcmap)
newcmap.update(results)
self.correct_map = newcmap
# log.debug('%s: in grade_answers, answers=%s, cmap=%s' % (self,answers,newcmap))
# log.debug('%s: in grade_answers, student_answers=%s, cmap=%s' % (self,student_answers,newcmap))
return newcmap
def get_question_answers(self):
......@@ -380,7 +379,6 @@ class LoncapaProblem(object):
html = contextualize_text(etree.tostring(self._extract_html(self.tree)), self.context)
return html
def handle_input_ajax(self, get):
'''
InputTypes can support specialized AJAX calls. Find the correct input and pass along the correct data
......@@ -397,8 +395,6 @@ class LoncapaProblem(object):
log.warning("Could not find matching input for id: %s" % input_id)
return {}
# ======= Private Methods Below ========
def _process_includes(self):
......@@ -408,16 +404,16 @@ class LoncapaProblem(object):
'''
includes = self.tree.findall('.//include')
for inc in includes:
file = inc.get('file')
if file is not None:
filename = inc.get('file')
if filename is not None:
try:
# open using ModuleSystem OSFS filestore
ifp = self.system.filestore.open(file)
ifp = self.system.filestore.open(filename)
except Exception as err:
log.warning('Error %s in problem xml include: %s' % (
err, etree.tostring(inc, pretty_print=True)))
log.warning('Cannot find file %s in %s' % (
file, self.system.filestore))
filename, self.system.filestore))
# if debugging, don't fail - just log error
# TODO (vshnayder): need real error handling, display to users
if not self.system.get('DEBUG'):
......@@ -430,7 +426,7 @@ class LoncapaProblem(object):
except Exception as err:
log.warning('Error %s in problem xml include: %s' % (
err, etree.tostring(inc, pretty_print=True)))
log.warning('Cannot parse XML in %s' % (file))
log.warning('Cannot parse XML in %s' % (filename))
# if debugging, don't fail - just log error
# TODO (vshnayder): same as above
if not self.system.get('DEBUG'):
......@@ -438,11 +434,11 @@ class LoncapaProblem(object):
else:
continue
# insert new XML into tree in place of inlcude
# insert new XML into tree in place of include
parent = inc.getparent()
parent.insert(parent.index(inc), incxml)
parent.remove(inc)
log.debug('Included %s into %s' % (file, self.problem_id))
log.debug('Included %s into %s' % (filename, self.problem_id))
def _extract_system_path(self, script):
"""
......@@ -512,7 +508,7 @@ class LoncapaProblem(object):
if all_code:
try:
safe_exec.safe_exec(
safe_exec(
all_code,
context,
random_seed=self.seed,
......
......@@ -424,7 +424,7 @@ class CapaModule(CapaFields, XModule):
# If we cannot construct the problem HTML,
# then generate an error message instead.
except Exception, err:
except Exception as err:
html = self.handle_problem_html_error(err)
# The convention is to pass the name of the check button
......@@ -780,7 +780,7 @@ class CapaModule(CapaFields, XModule):
return {'success': msg}
except Exception, err:
except Exception as err:
if self.system.DEBUG:
msg = "Error checking problem: " + str(err)
msg += '\nTraceback:\n' + traceback.format_exc()
......@@ -845,13 +845,10 @@ class CapaModule(CapaFields, XModule):
# get old score, for comparison:
orig_score = self.lcp.get_score()
event_info['orig_score'] = orig_score['score']
event_info['orig_max_score'] = orig_score['total']
event_info['orig_total'] = orig_score['total']
try:
correct_map = self.lcp.rescore_existing_answers()
# rescoring should have no effect on attempts, so don't
# need to increment here, or mark done. Just save.
self.set_state_from_lcp()
except (StudentInputError, ResponseError, LoncapaProblemError) as inst:
log.warning("StudentInputError in capa_module:problem_rescore", exc_info=True)
......@@ -859,7 +856,7 @@ class CapaModule(CapaFields, XModule):
self.system.track_function('problem_rescore_fail', event_info)
return {'success': "Error: {0}".format(inst.message)}
except Exception, err:
except Exception as err:
event_info['failure'] = 'unexpected'
self.system.track_function('problem_rescore_fail', event_info)
if self.system.DEBUG:
......@@ -868,11 +865,15 @@ class CapaModule(CapaFields, XModule):
return {'success': msg}
raise
# rescoring should have no effect on attempts, so don't
# need to increment here, or mark done. Just save.
self.set_state_from_lcp()
self.publish_grade()
new_score = self.lcp.get_score()
event_info['new_score'] = new_score['score']
event_info['new_max_score'] = new_score['total']
event_info['new_total'] = new_score['total']
# success = correct if ALL questions in this problem are correct
success = 'correct'
......
......@@ -618,10 +618,11 @@ class CapaModuleTest(unittest.TestCase):
self.assertEqual(module.attempts, 1)
def test_rescore_problem_incorrect(self):
# make sure it also works when attempts have been reset,
# so add this to the test:
module = CapaFactory.create(attempts=0, done=True)
# Simulate that all answers are marked correct, no matter
# Simulate that all answers are marked incorrect, no matter
# what the input is, by patching LoncapaResponse.evaluate_answers()
with patch('capa.responsetypes.LoncapaResponse.evaluate_answers') as mock_evaluate_answers:
mock_evaluate_answers.return_value = CorrectMap(CapaFactory.answer_key(), 'incorrect')
......@@ -650,13 +651,8 @@ class CapaModuleTest(unittest.TestCase):
with self.assertRaises(NotImplementedError):
module.rescore_problem()
def test_rescore_problem_error(self):
# Try each exception that capa_module should handle
for exception_class in [StudentInputError,
LoncapaProblemError,
ResponseError]:
def _rescore_problem_error_helper(self, exception_class):
"""Helper to allow testing all errors that rescoring might return."""
# Create the module
module = CapaFactory.create(attempts=1, done=True)
......@@ -672,6 +668,15 @@ class CapaModuleTest(unittest.TestCase):
# Expect that the number of attempts is NOT incremented
self.assertEqual(module.attempts, 1)
def test_rescore_problem_student_input_error(self):
self._rescore_problem_error_helper(StudentInputError)
def test_rescore_problem_problem_error(self):
self._rescore_problem_error_helper(LoncapaProblemError)
def test_rescore_problem_response_error(self):
self._rescore_problem_error_helper(ResponseError)
def test_save_problem(self):
module = CapaFactory.create(done=False)
......
......@@ -8,8 +8,8 @@ from django.db import models
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding model 'CourseTaskLog'
db.create_table('courseware_coursetasklog', (
# Adding model 'CourseTask'
db.create_table('courseware_coursetask', (
('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
('task_type', self.gf('django.db.models.fields.CharField')(max_length=50, db_index=True)),
('course_id', self.gf('django.db.models.fields.CharField')(max_length=255, db_index=True)),
......@@ -19,15 +19,15 @@ class Migration(SchemaMigration):
('task_state', self.gf('django.db.models.fields.CharField')(max_length=50, null=True, db_index=True)),
('task_output', self.gf('django.db.models.fields.CharField')(max_length=1024, null=True)),
('requester', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['auth.User'])),
('created', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, null=True, db_index=True, blank=True)),
('updated', self.gf('django.db.models.fields.DateTimeField')(auto_now=True, db_index=True, blank=True)),
('created', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, null=True, blank=True)),
('updated', self.gf('django.db.models.fields.DateTimeField')(auto_now=True, blank=True)),
))
db.send_create_signal('courseware', ['CourseTaskLog'])
db.send_create_signal('courseware', ['CourseTask'])
def backwards(self, orm):
# Deleting model 'CourseTaskLog'
db.delete_table('courseware_coursetasklog')
# Deleting model 'CourseTask'
db.delete_table('courseware_coursetask')
models = {
......@@ -67,10 +67,10 @@ class Migration(SchemaMigration):
'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
},
'courseware.coursetasklog': {
'Meta': {'object_name': 'CourseTaskLog'},
'courseware.coursetask': {
'Meta': {'object_name': 'CourseTask'},
'course_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}),
'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'null': 'True', 'db_index': 'True', 'blank': 'True'}),
'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'null': 'True', 'blank': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'requester': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}),
'task_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}),
......@@ -79,7 +79,7 @@ class Migration(SchemaMigration):
'task_output': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True'}),
'task_state': ('django.db.models.fields.CharField', [], {'max_length': '50', 'null': 'True', 'db_index': 'True'}),
'task_type': ('django.db.models.fields.CharField', [], {'max_length': '50', 'db_index': 'True'}),
'updated': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'db_index': 'True', 'blank': 'True'})
'updated': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'})
},
'courseware.offlinecomputedgrade': {
'Meta': {'unique_together': "(('user', 'course_id'),)", 'object_name': 'OfflineComputedGrade'},
......
......@@ -265,7 +265,7 @@ class OfflineComputedGradeLog(models.Model):
return "[OCGLog] %s: %s" % (self.course_id, self.created)
class CourseTaskLog(models.Model):
class CourseTask(models.Model):
"""
Stores information about background tasks that have been submitted to
perform course-specific work.
......@@ -295,11 +295,11 @@ class CourseTaskLog(models.Model):
task_state = models.CharField(max_length=50, null=True, db_index=True) # max_length from celery_taskmeta
task_output = models.CharField(max_length=1024, null=True)
requester = models.ForeignKey(User, db_index=True)
created = models.DateTimeField(auto_now_add=True, null=True, db_index=True)
updated = models.DateTimeField(auto_now=True, db_index=True)
created = models.DateTimeField(auto_now_add=True, null=True)
updated = models.DateTimeField(auto_now=True)
def __repr__(self):
return 'CourseTaskLog<%r>' % ({
return 'CourseTask<%r>' % ({
'task_type': self.task_type,
'course_id': self.course_id,
'task_input': self.task_input,
......
......@@ -165,15 +165,15 @@ def get_xqueue_callback_url_prefix(request):
"""
Calculates default prefix based on request, but allows override via settings
This is separated so that it can be called by the LMS before submitting
background tasks to run. The xqueue callbacks should go back to the LMS,
not to the worker.
This is separated from get_module_for_descriptor so that it can be called
by the LMS before submitting background tasks to run. The xqueue callbacks
should go back to the LMS, not to the worker.
"""
default_xqueue_callback_url_prefix = '{proto}://{host}'.format(
prefix = '{proto}://{host}'.format(
proto=request.META.get('HTTP_X_FORWARDED_PROTO', 'https' if request.is_secure() else 'http'),
host=request.get_host()
)
return settings.XQUEUE_INTERFACE.get('callback_url', default_xqueue_callback_url_prefix)
return settings.XQUEUE_INTERFACE.get('callback_url', prefix)
def get_module_for_descriptor(user, request, descriptor, model_data_cache, course_id,
......@@ -192,9 +192,7 @@ def get_module_for_descriptor(user, request, descriptor, model_data_cache, cours
return get_module_for_descriptor_internal(user, descriptor, model_data_cache, course_id,
track_function, xqueue_callback_url_prefix,
position=position,
wrap_xmodule_display=wrap_xmodule_display,
grade_bucket_type=grade_bucket_type)
position, wrap_xmodule_display, grade_bucket_type)
def get_module_for_descriptor_internal(user, descriptor, model_data_cache, course_id,
......@@ -267,15 +265,15 @@ def get_module_for_descriptor_internal(user, descriptor, model_data_cache, cours
def inner_get_module(descriptor):
"""
Delegate to get_module. It does an access check, so may return None
Delegate to get_module_for_descriptor_internal() with all values except `descriptor` set.
Because it does an access check, it may return None.
"""
# TODO: fix this so that make_xqueue_callback uses the descriptor passed into
# inner_get_module, not the parent's callback. Add it as an argument....
return get_module_for_descriptor_internal(user, descriptor, model_data_cache, course_id,
track_function, make_xqueue_callback,
position=position,
wrap_xmodule_display=wrap_xmodule_display,
grade_bucket_type=grade_bucket_type)
position, wrap_xmodule_display, grade_bucket_type)
def xblock_model_data(descriptor):
return DbModel(
......
......@@ -10,8 +10,8 @@ from student.tests.factories import CourseEnrollmentAllowedFactory as StudentCou
from student.tests.factories import RegistrationFactory as StudentRegistrationFactory
from courseware.models import StudentModule, XModuleContentField, XModuleSettingsField
from courseware.models import XModuleStudentInfoField, XModuleStudentPrefsField
from courseware.models import CourseTaskLog
from courseware.models import CourseTask
from celery.states import PENDING
from xmodule.modulestore import Location
from pytz import UTC
......@@ -88,14 +88,14 @@ class StudentInfoFactory(DjangoModelFactory):
student = SubFactory(UserFactory)
class CourseTaskLogFactory(DjangoModelFactory):
FACTORY_FOR = CourseTaskLog
class CourseTaskFactory(DjangoModelFactory):
FACTORY_FOR = CourseTask
task_type = 'rescore_problem'
course_id = "MITx/999/Robot_Super_Course"
task_input = json.dumps({})
task_key = None
task_id = None
task_state = "QUEUED"
task_state = PENDING
task_output = None
requester = SubFactory(UserFactory)
......@@ -12,6 +12,13 @@
%if course_tasks is not None:
<script type="text/javascript">
// Define a CourseTaskProgress object for updating a table on the instructor
// dashboard that shows the current background tasks that are currently running
// for the instructor's course. Any tasks that were running when the page is
// first displayed are passed in as course_tasks, and populate the "Pending Course
// Task" table. The CourseTaskProgress is bound to this table, and periodically
// polls the LMS to see if any of the tasks has completed. Once a task is complete,
// it is not included in any further polling.
(function() {
......@@ -24,7 +31,7 @@
// then don't set the timeout at all.)
var refresh_interval = 5000;
// Hardcode the initial delay, for the first refresh, to two seconds:
// Hardcode the initial delay before the first refresh to two seconds:
var initial_refresh_delay = 2000;
function CourseTaskProgress(element) {
......@@ -491,10 +498,6 @@ function goto( mode)
##-----------------------------------------------------------------------------
%if modeflag.get('Data'):
<p>
<input type="submit" name="action" value="Test Celery">
<p>
<hr width="40%" style="align:left">
<p>
<input type="submit" name="action" value="Download CSV of all student profile data">
......@@ -700,6 +703,30 @@ function goto( mode)
##-----------------------------------------------------------------------------
%if datatable and modeflag.get('Psychometrics') is None:
<br/>
<br/>
<p>
<hr width="100%">
<h2>${datatable['title'] | h}</h2>
<table class="stat_table">
<tr>
%for hname in datatable['header']:
<th>${hname}</th>
%endfor
</tr>
%for row in datatable['data']:
<tr>
%for value in row:
<td>${value}</td>
%endfor
</tr>
%endfor
</table>
</p>
%endif
## Output tasks in progress
%if course_tasks is not None and len(course_tasks) > 0:
......@@ -723,12 +750,12 @@ function goto( mode)
data-in-progress="true">
<td>${course_task.task_type}</td>
<td>${course_task.task_input}</td>
<td><div class="task-id">${course_task.task_id}</div></td>
<td class="task-id">${course_task.task_id}</td>
<td>${course_task.requester}</td>
<td>${course_task.created}</td>
<td><div class="task-state">${course_task.task_state}</div></td>
<td><div class="task-duration">unknown</div></td>
<td><div class="task-progress">unknown</div></td>
<td class="task-state">${course_task.task_state}</td>
<td class="task-duration">unknown</td>
<td class="task-progress">unknown</td>
</tr>
%endfor
</table>
......@@ -739,20 +766,20 @@ function goto( mode)
##-----------------------------------------------------------------------------
%if datatable and modeflag.get('Psychometrics') is None:
%if course_stats and modeflag.get('Psychometrics') is None:
<br/>
<br/>
<p>
<hr width="100%">
<h2>${datatable['title'] | h}</h2>
<h2>${course_stats['title']}</h2>
<table class="stat_table">
<tr>
%for hname in datatable['header']:
<th>${hname | h}</th>
%for hname in course_stats['header']:
<th>${hname}</th>
%endfor
</tr>
%for row in datatable['data']:
%for row in course_stats['data']:
<tr>
%for value in row:
<td>${value | h}</td>
......
......@@ -58,7 +58,7 @@ urlpatterns = ('', # nopep8
name='auth_password_reset_done'),
url(r'^heartbeat$', include('heartbeat.urls')),
url(r'^course_task_log_status/$', 'courseware.task_submit.course_task_log_status', name='course_task_log_status'),
url(r'^course_task_status/$', 'courseware.task_submit.course_task_status', name='course_task_status'),
)
# University profiles only make sense in the default edX context
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment