Commit 5a07f1f0 by Eric Fischer

Remove AI grading

This work was done in the context of a larger PR, I'd be shocked if
this commit can be successfully reverted by itself. If you're trying
to restore AI grading though, it's a good place to start.
parent 43aa53ea
aspell
g++
gcc
git
gfortran
libblas-dev
liblapack-dev
libatlas-base-dev
libfontconfig1
libmysqlclient-dev
libxml2-dev
libxslt1-dev
nodejs
npm
python2.7
python2.7-dev
python-mysqldb
python-pip
python-software-properties
rubygems
../openassessment/locale/
\ No newline at end of file
Log files:
apps_info.log = INFO level logging for all edx-ora2 apps and OpenAssessmentBlock
apps_debug.log = same as above, except DEBUG level
errors.log = all ERROR and CRITICAL logs, stack traces
events.log = Analytics events from the xblock-sdk workbench runtime's publish()
trace.log = The kitchen sink. Massive because of SQL debug logs from Django.
"""
Public interface for AI training and grading, used by students/course authors.
"""
import logging
from django.db import DatabaseError
from submissions import api as sub_api
from openassessment.assessment.serializers import (
deserialize_training_examples, rubric_from_dict,
InvalidTrainingExample, InvalidRubric, full_assessment_dict
)
from openassessment.assessment.errors import (
AITrainingRequestError, AITrainingInternalError, AIGradingRequestError,
AIGradingInternalError, AIReschedulingRequestError, ANTICIPATED_CELERY_ERRORS
)
from openassessment.assessment.models import (
Assessment, AITrainingWorkflow, AIGradingWorkflow,
InvalidRubricSelection, NoTrainingExamples,
AI_ASSESSMENT_TYPE, AIClassifierSet
)
from openassessment.assessment.worker import training as training_tasks
from openassessment.assessment.worker import grading as grading_tasks
logger = logging.getLogger(__name__)
def submitter_is_finished(submission_uuid, ai_requirements):
"""
Determine if the submitter has finished their requirements for Example
Based Assessment. Always returns True.
Args:
submission_uuid (str): Not used.
ai_requirements (dict): Not used.
Returns:
True
"""
return True
def assessment_is_finished(submission_uuid, ai_requirements):
"""
Determine if the assessment of the given submission is completed. This
checks to see if the AI has completed the assessment.
Args:
submission_uuid (str): The UUID of the submission being graded.
ai_requirements (dict): Not used.
Returns:
True if the assessment has been completed for this submission.
"""
return bool(get_latest_assessment(submission_uuid))
def get_score(submission_uuid, ai_requirements):
"""
Generate a score based on a completed assessment for the given submission.
If no assessment has been completed for this submission, this will return
None.
Args:
submission_uuid (str): The UUID for the submission to get a score for.
ai_requirements (dict): Not used.
Returns:
A dictionary with the points earned, points possible, and
contributing_assessments information, along with a None staff_id.
"""
assessment = get_latest_assessment(submission_uuid)
if not assessment:
return None
return {
"points_earned": assessment["points_earned"],
"points_possible": assessment["points_possible"],
"contributing_assessments": [assessment["id"]],
"staff_id": None,
}
def on_init(submission_uuid, rubric=None, algorithm_id=None):
"""
Submit a response for AI assessment.
This will:
(a) create a workflow (database record) to track the grading task
(b) if classifiers exist for the rubric, schedule an asynchronous grading task.
Args:
submission_uuid (str): The UUID of the submission to assess.
Keyword Arguments:
rubric (dict): Serialized rubric model.
algorithm_id (unicode): Use only classifiers trained with the specified algorithm.
Returns:
grading_workflow_uuid (str): The UUID of the grading workflow.
Usually the caller of `submit()` won't need this (since the workers
are parameterized by grading workflow UUID), but it's
useful for testing.
Raises:
AIGradingRequestError
AIGradingInternalError
Example Usage:
>>> on_init('74a9d63e8a5fea369fd391d07befbd86ae4dc6e2', rubric, 'ease')
'10df7db776686822e501b05f452dc1e4b9141fe5'
"""
if rubric is None:
raise AIGradingRequestError(u'No rubric provided')
if algorithm_id is None:
raise AIGradingRequestError(u'No algorithm ID provided')
try:
workflow = AIGradingWorkflow.start_workflow(submission_uuid, rubric, algorithm_id)
except (sub_api.SubmissionNotFoundError, sub_api.SubmissionRequestError) as ex:
msg = (
u"An error occurred while retrieving the "
u"submission with UUID {uuid}: {ex}"
).format(uuid=submission_uuid, ex=ex)
raise AIGradingRequestError(msg)
except InvalidRubric as ex:
msg = (
u"An error occurred while parsing the serialized "
u"rubric {rubric}: {ex}"
).format(rubric=rubric, ex=ex)
raise AIGradingRequestError(msg)
except (sub_api.SubmissionInternalError, DatabaseError) as ex:
msg = (
u"An unexpected error occurred while submitting an "
u"essay for AI grading: {ex}"
).format(ex=ex)
logger.exception(msg)
raise AIGradingInternalError(msg)
# If we find classifiers for this rubric/algorithm
# then associate the classifiers with the workflow
# and schedule a grading task.
# Otherwise, the task will need to be scheduled later,
# once the classifiers have been trained.
if workflow.classifier_set is not None:
try:
grading_tasks.grade_essay.apply_async(args=[workflow.uuid])
logger.info((
u"Scheduled grading task for AI grading workflow with UUID {workflow_uuid} "
u"(submission UUID = {sub_uuid}, algorithm ID = {algorithm_id})"
).format(workflow_uuid=workflow.uuid, sub_uuid=submission_uuid, algorithm_id=algorithm_id))
return workflow.uuid
except (DatabaseError,) + ANTICIPATED_CELERY_ERRORS as ex:
msg = (
u"An unexpected error occurred while scheduling the "
u"AI grading task for the submission with UUID {uuid}: {ex}"
).format(uuid=submission_uuid, ex=ex)
logger.exception(msg)
raise AIGradingInternalError(msg)
else:
logger.info((
u"Cannot schedule a grading task for AI grading workflow with UUID {workflow_uuid} "
u"because no classifiers are available for the rubric associated with submission {sub_uuid} "
u"for the algorithm {algorithm_id}"
).format(workflow_uuid=workflow.uuid, sub_uuid=submission_uuid, algorithm_id=algorithm_id))
def get_latest_assessment(submission_uuid):
"""
Retrieve the latest AI assessment for a submission.
Args:
submission_uuid (str): The UUID of the submission being assessed.
Returns:
dict: The serialized assessment model
or None if no assessments are available
Raises:
AIGradingInternalError
Example usage:
>>> get_latest_assessment('10df7db776686822e501b05f452dc1e4b9141fe5')
{
'points_earned': 6,
'points_possible': 12,
'scored_at': datetime.datetime(2014, 1, 29, 17, 14, 52, 649284 tzinfo=<UTC>),
'scorer': u"ease",
'feedback': u''
}
"""
try:
assessments = Assessment.objects.filter(
submission_uuid=submission_uuid,
score_type=AI_ASSESSMENT_TYPE,
)[:1]
except DatabaseError as ex:
msg = (
u"An error occurred while retrieving AI graded assessments "
u"for the submission with UUID {uuid}: {ex}"
).format(uuid=submission_uuid, ex=ex)
logger.exception(msg)
raise AIGradingInternalError(msg)
if len(assessments) > 0:
return full_assessment_dict(assessments[0])
else:
return None
def get_assessment_scores_by_criteria(submission_uuid):
"""Get the score for each rubric criterion
Args:
submission_uuid (str): The submission uuid is used to get the
assessment used to score this submission.
Returns:
(dict): A dictionary of rubric criterion names, with a score of
the example based assessments.
Raises:
AIGradingInternalError: If any error occurs while retrieving
information from the scores, an error is raised.
"""
try:
assessments = list(
Assessment.objects.filter(
score_type=AI_ASSESSMENT_TYPE, submission_uuid=submission_uuid
).order_by('-scored_at')[:1]
)
scores = Assessment.scores_by_criterion(assessments)
return Assessment.get_median_score_dict(scores)
except DatabaseError:
error_message = u"Error getting example-based assessment scores for {}".format(submission_uuid)
logger.exception(error_message)
raise AIGradingInternalError(error_message)
def train_classifiers(rubric_dict, examples, course_id, item_id, algorithm_id):
"""
Schedule a task to train classifiers.
All training examples must match the rubric!
After training of classifiers completes successfully, all AIGradingWorkflows that are incomplete will be
automatically rescheduled to complete.
Args:
rubric_dict (dict): The rubric used to assess the classifiers.
examples (list of dict): Serialized training examples.
algorithm_id (unicode): The ID of the algorithm used to train the classifiers.
Returns:
training_workflow_uuid (str): The UUID of the training workflow.
Usually the caller will not need this (since the workers
are parametrized by training workflow UUID), but it's
useful for testing.
Raises:
AITrainingRequestError
AITrainingInternalError
Example usage:
>>> train_classifiers(rubric, examples, 'ease')
'10df7db776686822e501b05f452dc1e4b9141fe5'
"""
# Get or create the rubric and training examples
try:
examples = deserialize_training_examples(examples, rubric_dict)
except (InvalidRubric, InvalidTrainingExample, InvalidRubricSelection) as ex:
msg = u"Could not parse rubric and/or training examples: {ex}".format(ex=ex)
raise AITrainingRequestError(msg)
# Create the workflow model
try:
workflow = AITrainingWorkflow.start_workflow(examples, course_id, item_id, algorithm_id)
except NoTrainingExamples as ex:
raise AITrainingRequestError(ex)
except:
msg = (
u"An unexpected error occurred while creating "
u"the AI training workflow"
)
logger.exception(msg)
raise AITrainingInternalError(msg)
# Schedule the task, parametrized by the workflow UUID
try:
training_tasks.train_classifiers.apply_async(args=[workflow.uuid])
except ANTICIPATED_CELERY_ERRORS as ex:
msg = (
u"An unexpected error occurred while scheduling incomplete training workflows with"
u" course_id={cid} and item_id={iid}: {ex}"
).format(cid=course_id, iid=item_id, ex=ex)
logger.exception(msg)
raise AITrainingInternalError(msg)
# Return the workflow UUID
return workflow.uuid
def reschedule_unfinished_tasks(course_id=None, item_id=None, task_type=u"grade"):
"""
Check for unfinished tasks (both grading and training) and reschedule them.
Optionally restrict by course/item ID and task type. Default use case is to
only reschedule the unfinished grade tasks. Applied use case (with button in
staff mixin) is to call without argument, and to reschedule grades only.
Keyword Arguments:
course_id (unicode): Restrict to unfinished tasks in a particular course.
item_id (unicode): Restrict to unfinished tasks for a particular item in a course.
NOTE: if you specify the item ID, you must also specify the course ID.
task_type (unicode): Either "grade" or "train". Restrict to unfinished tasks of this type.
if task_type is specified as None, both training and grading will be rescheduled, in that order.
Raises:
AIGradingInternalError
AITrainingInternalError
AIReschedulingRequestError
"""
if course_id is None or item_id is None:
msg = u"Rescheduling tasks was not possible because the course_id / item_id was not assigned."
logger.exception(msg)
raise AIReschedulingRequestError
# Reschedules all of the training tasks
if task_type == u"train" or task_type is None:
try:
training_tasks.reschedule_training_tasks.apply_async(args=[course_id, item_id])
except ANTICIPATED_CELERY_ERRORS as ex:
msg = (
u"Rescheduling training tasks for course {cid} and item {iid} failed with exception: {ex}"
).format(cid=course_id, iid=item_id, ex=ex)
logger.exception(msg)
raise AITrainingInternalError(ex)
# Reschedules all of the grading tasks
if task_type == u"grade" or task_type is None:
try:
grading_tasks.reschedule_grading_tasks.apply_async(args=[course_id, item_id])
except ANTICIPATED_CELERY_ERRORS as ex:
msg = (
u"Rescheduling grading tasks for course {cid} and item {iid} failed with exception: {ex}"
).format(cid=course_id, iid=item_id, ex=ex)
logger.exception(msg)
raise AIGradingInternalError(ex)
def get_classifier_set_info(rubric_dict, algorithm_id, course_id, item_id):
"""
Get information about the classifier available for a particular problem.
This is the classifier that would be selected to grade essays for the problem.
Args:
rubric_dict (dict): The serialized rubric model.
algorithm_id (unicode): The algorithm to use for classification.
course_id (unicode): The course identifier for the current problem.
item_id (unicode): The item identifier for the current problem.
Returns:
dict with keys 'created_at', 'algorithm_id', 'course_id', and 'item_id'
Note that course ID and item ID might be different than the current problem
if a classifier from a different problem with a similar rubric
is the best available match.
"""
try:
rubric = rubric_from_dict(rubric_dict)
classifier_set = AIClassifierSet.most_recent_classifier_set(
rubric, algorithm_id, course_id, item_id
)
if classifier_set is not None:
return {
'created_at': classifier_set.created_at,
'algorithm_id': classifier_set.algorithm_id,
'course_id': classifier_set.course_id,
'item_id': classifier_set.item_id
}
else:
return None
except InvalidRubric:
msg = u"Could not retrieve classifier set info: the rubric definition was not valid."
logger.exception(msg)
raise AIGradingRequestError(msg)
except DatabaseError as ex:
msg = u"An unexpected error occurred while retrieving classifier set info: {ex}".format(ex=ex)
logger.exception(msg)
raise AIGradingInternalError(msg)
"""
Public interface for AI training and grading, used by workers.
"""
import logging
from httplib import HTTPException
from django.db import DatabaseError
from dogapi import dog_stats_api
from openassessment.assessment.models import (
essay_text_from_submission,
AITrainingWorkflow, AIGradingWorkflow,
ClassifierUploadError, ClassifierSerializeError,
IncompleteClassifierSet, NoTrainingExamples,
InvalidRubricSelection
)
from openassessment.assessment.errors import (
AITrainingRequestError, AITrainingInternalError,
AIGradingRequestError, AIGradingInternalError
)
logger = logging.getLogger(__name__)
@dog_stats_api.timed('openassessment.assessment.ai.get_grading_task_params')
def get_grading_task_params(grading_workflow_uuid):
"""
Retrieve the classifier set and algorithm ID
associated with a particular grading workflow.
Args:
grading_workflow_uuid (str): The UUID of the grading workflow.
Returns:
dict with keys:
* essay_text (unicode): The text of the essay submission.
* classifier_set (dict): Maps criterion names to serialized classifiers.
* valid_scores (dict): Maps criterion names to a list of valid scores for that criterion.
* algorithm_id (unicode): ID of the algorithm used to perform training.
Raises:
AIGradingRequestError
AIGradingInternalError
"""
try:
workflow = AIGradingWorkflow.objects.get(uuid=grading_workflow_uuid)
except AIGradingWorkflow.DoesNotExist:
msg = (
u"Could not retrieve the AI grading workflow with uuid {}"
).format(grading_workflow_uuid)
raise AIGradingRequestError(msg)
except DatabaseError as ex:
msg = (
u"An unexpected error occurred while retrieving the "
u"AI grading workflow with uuid {uuid}: {ex}"
).format(uuid=grading_workflow_uuid, ex=ex)
logger.exception(msg)
raise AIGradingInternalError(msg)
classifier_set = workflow.classifier_set
# Though tasks shouldn't be scheduled until classifer set(s) exist, off of the happy path this is a likely
# occurrence. Our response is to log this lack of compliance to dependency as an exception, and then thrown
# an error with the purpose of killing the celery task running this code.
if classifier_set is None:
msg = (
u"AI grading workflow with UUID {} has no classifier set, but was scheduled for grading"
).format(grading_workflow_uuid)
logger.exception(msg)
raise AIGradingInternalError(msg)
try:
return {
'essay_text': workflow.essay_text,
'classifier_set': workflow.classifier_set.classifier_data_by_criterion,
'algorithm_id': workflow.algorithm_id,
'valid_scores': workflow.classifier_set.valid_scores_by_criterion,
}
except (
DatabaseError, ClassifierSerializeError, IncompleteClassifierSet,
ValueError, IOError, HTTPException
) as ex:
msg = (
u"An unexpected error occurred while retrieving "
u"classifiers for the grading workflow with UUID {uuid}: {ex}"
).format(uuid=grading_workflow_uuid, ex=ex)
logger.exception(msg)
raise AIGradingInternalError(msg)
@dog_stats_api.timed('openassessment.assessment.ai.create_assessment')
def create_assessment(grading_workflow_uuid, criterion_scores):
"""
Create an AI assessment (complete the AI grading task).
Args:
grading_workflow_uuid (str): The UUID of the grading workflow.
criterion_scores (dict): Dictionary mapping criteria names to integer scores.
Returns:
None
Raises:
AIGradingRequestError
AIGradingInternalError
"""
try:
workflow = AIGradingWorkflow.objects.get(uuid=grading_workflow_uuid)
except AIGradingWorkflow.DoesNotExist:
msg = (
u"Could not retrieve the AI grading workflow with uuid {}"
).format(grading_workflow_uuid)
raise AIGradingRequestError(msg)
except DatabaseError as ex:
msg = (
u"An unexpected error occurred while retrieving the "
u"AI grading workflow with uuid {uuid}: {ex}"
).format(uuid=grading_workflow_uuid, ex=ex)
logger.exception(msg)
raise AIGradingInternalError(msg)
# Optimization: if the workflow has already been marked complete
# (perhaps the task was picked up by multiple workers),
# then we don't need to do anything.
# Otherwise, create the assessment mark the workflow complete.
try:
if not workflow.is_complete:
workflow.complete(criterion_scores)
logger.info((
u"Created assessment for AI grading workflow with UUID {workflow_uuid} "
u"(algorithm ID {algorithm_id})"
).format(workflow_uuid=workflow.uuid, algorithm_id=workflow.algorithm_id))
else:
msg = u"Grading workflow with UUID {} is already marked complete".format(workflow.uuid)
logger.info(msg)
except DatabaseError as ex:
msg = (
u"An unexpected error occurred while creating the assessment "
u"for AI grading workflow with uuid {uuid}: {ex}"
).format(uuid=grading_workflow_uuid, ex=ex)
logger.exception(msg)
raise AIGradingInternalError(msg)
# Fire a signal to update the workflow API
# This will allow students to receive a score if they're
# waiting on an AI assessment.
# The signal receiver is responsible for catching and logging
# all exceptions that may occur when updating the workflow.
from openassessment.assessment.signals import assessment_complete_signal
assessment_complete_signal.send(sender=None, submission_uuid=workflow.submission_uuid)
@dog_stats_api.timed('openassessment.assessment.ai.get_training_task_params')
def get_training_task_params(training_workflow_uuid):
"""
Retrieve the training examples and algorithm ID
associated with a training task.
Args:
training_workflow_uuid (str): The UUID of the training workflow.
Returns:
dict with keys:
* training_examples (list of dict): The examples used to train the classifiers.
* course_id (unicode): The course ID that the training task is associated with.
* item_id (unicode): Identifies the item that the AI will be training to grade.
* algorithm_id (unicode): The ID of the algorithm to use for training.
Raises:
AITrainingRequestError
AITrainingInternalError
Example usage:
>>> params = get_training_task_params('abcd1234')
>>> params['algorithm_id']
u'ease'
>>> params['training_examples']
[
{
"text": u"Example answer number one",
"scores": {
"vocabulary": 1,
"grammar": 2
}
},
{
"text": u"Example answer number two",
"scores": {
"vocabulary": 3,
"grammar": 1
}
}
]
"""
try:
workflow = AITrainingWorkflow.objects.get(uuid=training_workflow_uuid)
returned_examples = []
for example in workflow.training_examples.all():
scores = {
option.criterion.name: option.points
for option in example.options_selected.all()
}
returned_examples.append({
'text': essay_text_from_submission({'answer': example.answer}),
'scores': scores
})
return {
'training_examples': returned_examples,
'algorithm_id': workflow.algorithm_id,
'course_id': workflow.course_id,
'item_id': workflow.item_id
}
except AITrainingWorkflow.DoesNotExist:
msg = (
u"Could not retrieve AI training workflow with UUID {}"
).format(training_workflow_uuid)
raise AITrainingRequestError(msg)
except DatabaseError:
msg = (
u"An unexpected error occurred while retrieving "
u"training examples for the AI training workflow with UUID {}"
).format(training_workflow_uuid)
logger.exception(msg)
raise AITrainingInternalError(msg)
@dog_stats_api.timed('openassessment.assessment.ai.create_classifiers')
def create_classifiers(training_workflow_uuid, classifier_set):
"""
Upload trained classifiers and mark the workflow complete.
If grading tasks were submitted before any classifiers were trained,
this call will automatically reschedule those tasks.
Args:
training_workflow_uuid (str): The UUID of the training workflow.
classifier_set (dict): Mapping of criteria names to serialized classifiers.
Returns:
None
Raises:
AITrainingRequestError
AITrainingInternalError
"""
try:
workflow = AITrainingWorkflow.objects.get(uuid=training_workflow_uuid)
# If the task is executed multiple times, the classifier set may already
# have been created. If so, log it, then return immediately.
if workflow.is_complete:
msg = u"AI training workflow with UUID {} already has trained classifiers.".format(workflow.uuid)
logger.info(msg)
else:
workflow.complete(classifier_set)
logger.info((
u"Created trained classifiers for the AI training workflow with UUID {workflow_uuid} "
u"(using algorithm ID {algorithm_id})"
).format(workflow_uuid=workflow.uuid, algorithm_id=workflow.algorithm_id))
except AITrainingWorkflow.DoesNotExist:
msg = (
u"Could not retrieve AI training workflow with UUID {}"
).format(training_workflow_uuid)
raise AITrainingRequestError(msg)
except NoTrainingExamples as ex:
logger.exception(ex)
raise AITrainingInternalError(ex)
except (IncompleteClassifierSet, InvalidRubricSelection) as ex:
msg = (
u"An error occurred while creating the classifier set "
u"for the training workflow with UUID {uuid}: {ex}"
).format(uuid=training_workflow_uuid, ex=ex)
raise AITrainingRequestError(msg)
except (ClassifierSerializeError, ClassifierUploadError, DatabaseError) as ex:
msg = (
u"An unexpected error occurred while creating the classifier "
u"set for training workflow UUID {uuid}: {ex}"
).format(uuid=training_workflow_uuid, ex=ex)
logger.exception(msg)
raise AITrainingInternalError(msg)
def is_training_workflow_complete(workflow_uuid):
"""
Check whether the training workflow is complete.
Args:
workflow_uuid (str): The UUID of the training workflow
Returns:
bool
Raises:
AITrainingRequestError
AITrainingInternalError
"""
try:
return AITrainingWorkflow.is_workflow_complete(workflow_uuid)
except AITrainingWorkflow.DoesNotExist:
msg = (
u"Could not retrieve training workflow "
u"with uuid {uuid} to check whether it's complete."
).format(uuid=workflow_uuid)
raise AITrainingRequestError(msg)
except DatabaseError:
msg = (
u"An unexpected error occurred while checking "
u"the training workflow with uuid {uuid} for completeness"
).format(uuid=workflow_uuid)
raise AITrainingInternalError(msg)
def is_grading_workflow_complete(workflow_uuid):
"""
Check whether the grading workflow is complete.
Args:
workflow_uuid (str): The UUID of the grading workflow
Returns:
bool
Raises:
AIGradingRequestError
AIGradingInternalError
"""
try:
return AIGradingWorkflow.is_workflow_complete(workflow_uuid)
except AIGradingWorkflow.DoesNotExist:
msg = (
u"Could not retrieve grading workflow "
u"with uuid {uuid} to check whether it's complete."
).format(uuid=workflow_uuid)
raise AIGradingRequestError(msg)
except DatabaseError:
msg = (
u"An unexpected error occurred while checking "
u"the grading workflow with uuid {uuid} for completeness"
).format(uuid=workflow_uuid)
raise AIGradingInternalError(msg)
"""
Errors related to AI assessment.
"""
from celery.exceptions import InvalidTaskError, NotConfigured, NotRegistered, QueueNotFound
from socket import error as socket_error
ANTICIPATED_CELERY_ERRORS = (InvalidTaskError, NotConfigured, NotRegistered, QueueNotFound, socket_error)
class AIError(Exception):
"""
A general error occurred while using the AI assessment API.
"""
pass
class AITrainingRequestError(AIError):
"""
There was a problem with the request sent to the AI assessment API.
"""
pass
class AITrainingInternalError(AIError):
"""
An unexpected error occurred while using the AI assessment API.
"""
pass
class AIGradingRequestError(AIError):
"""
There was a problem with the request sent to the AI assessment API.
"""
pass
class AIGradingInternalError(AIError):
"""
An unexpected error occurred while using the AI assessment API.
"""
pass
class AIReschedulingRequestError(AIError):
"""
There was a problem with the request sent to the AI assessment API.
"""
pass
class AIReschedulingInternalError(AIError):
"""
An unexpected error occurred while using the AI assessment API.
"""
pass
...@@ -4,8 +4,6 @@ from __future__ import unicode_literals ...@@ -4,8 +4,6 @@ from __future__ import unicode_literals
from django.db import models, migrations from django.db import models, migrations
import django.utils.timezone import django.utils.timezone
import django_extensions.db.fields
import openassessment.assessment.models.ai
class Migration(migrations.Migration): class Migration(migrations.Migration):
...@@ -15,54 +13,6 @@ class Migration(migrations.Migration): ...@@ -15,54 +13,6 @@ class Migration(migrations.Migration):
operations = [ operations = [
migrations.CreateModel( migrations.CreateModel(
name='AIClassifier',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('classifier_data', models.FileField(upload_to=openassessment.assessment.models.ai.upload_to_path)),
],
),
migrations.CreateModel(
name='AIClassifierSet',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('created_at', models.DateTimeField(default=django.utils.timezone.now, db_index=True)),
('algorithm_id', models.CharField(max_length=128, db_index=True)),
('course_id', models.CharField(max_length=40, db_index=True)),
('item_id', models.CharField(max_length=128, db_index=True)),
],
options={
'ordering': ['-created_at', '-id'],
},
),
migrations.CreateModel(
name='AIGradingWorkflow',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('uuid', django_extensions.db.fields.UUIDField(db_index=True, unique=True, version=1, editable=False, blank=True)),
('course_id', models.CharField(max_length=40, db_index=True)),
('item_id', models.CharField(max_length=128, db_index=True)),
('scheduled_at', models.DateTimeField(default=django.utils.timezone.now, db_index=True)),
('completed_at', models.DateTimeField(null=True, db_index=True)),
('algorithm_id', models.CharField(max_length=128, db_index=True)),
('submission_uuid', models.CharField(max_length=128, db_index=True)),
('essay_text', models.TextField(blank=True)),
('student_id', models.CharField(max_length=40, db_index=True)),
],
),
migrations.CreateModel(
name='AITrainingWorkflow',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('uuid', django_extensions.db.fields.UUIDField(db_index=True, unique=True, version=1, editable=False, blank=True)),
('course_id', models.CharField(max_length=40, db_index=True)),
('item_id', models.CharField(max_length=128, db_index=True)),
('scheduled_at', models.DateTimeField(default=django.utils.timezone.now, db_index=True)),
('completed_at', models.DateTimeField(null=True, db_index=True)),
('algorithm_id', models.CharField(max_length=128, db_index=True)),
('classifier_set', models.ForeignKey(related_name='+', default=None, to='assessment.AIClassifierSet', null=True)),
],
),
migrations.CreateModel(
name='Assessment', name='Assessment',
fields=[ fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
...@@ -235,41 +185,6 @@ class Migration(migrations.Migration): ...@@ -235,41 +185,6 @@ class Migration(migrations.Migration):
name='rubric', name='rubric',
field=models.ForeignKey(to='assessment.Rubric'), field=models.ForeignKey(to='assessment.Rubric'),
), ),
migrations.AddField(
model_name='aitrainingworkflow',
name='training_examples',
field=models.ManyToManyField(related_name='+', to='assessment.TrainingExample'),
),
migrations.AddField(
model_name='aigradingworkflow',
name='assessment',
field=models.ForeignKey(related_name='+', default=None, to='assessment.Assessment', null=True),
),
migrations.AddField(
model_name='aigradingworkflow',
name='classifier_set',
field=models.ForeignKey(related_name='+', default=None, to='assessment.AIClassifierSet', null=True),
),
migrations.AddField(
model_name='aigradingworkflow',
name='rubric',
field=models.ForeignKey(related_name='+', to='assessment.Rubric'),
),
migrations.AddField(
model_name='aiclassifierset',
name='rubric',
field=models.ForeignKey(related_name='+', to='assessment.Rubric'),
),
migrations.AddField(
model_name='aiclassifier',
name='classifier_set',
field=models.ForeignKey(related_name='classifiers', to='assessment.AIClassifierSet'),
),
migrations.AddField(
model_name='aiclassifier',
name='criterion',
field=models.ForeignKey(related_name='+', to='assessment.Criterion'),
),
migrations.AlterUniqueTogether( migrations.AlterUniqueTogether(
name='studenttrainingworkflowitem', name='studenttrainingworkflowitem',
unique_together=set([('workflow', 'order_num')]), unique_together=set([('workflow', 'order_num')]),
......
...@@ -13,21 +13,6 @@ class Migration(migrations.Migration): ...@@ -13,21 +13,6 @@ class Migration(migrations.Migration):
operations = [ operations = [
migrations.AlterField( migrations.AlterField(
model_name='aiclassifierset',
name='course_id',
field=models.CharField(max_length=255, db_index=True),
),
migrations.AlterField(
model_name='aigradingworkflow',
name='course_id',
field=models.CharField(max_length=255, db_index=True),
),
migrations.AlterField(
model_name='aitrainingworkflow',
name='course_id',
field=models.CharField(max_length=255, db_index=True),
),
migrations.AlterField(
model_name='peerworkflow', model_name='peerworkflow',
name='course_id', name='course_id',
field=models.CharField(max_length=255, db_index=True), field=models.CharField(max_length=255, db_index=True),
......
""" # This file is empty, but we cannot delete it b/c historical migration records refer to it.
Database models for AI assessment.
"""
from uuid import uuid4
import json
import logging
from django.conf import settings
from django.core import signals
from django.core.files.base import ContentFile
from django.core.cache import cache, _create_cache
from django.db import models, transaction, DatabaseError
from django.utils.timezone import now
from django_extensions.db.fields import UUIDField
from dogapi import dog_stats_api
from submissions import api as sub_api
from .base import Rubric, Criterion, Assessment, AssessmentPart
from .training import TrainingExample
AI_ASSESSMENT_TYPE = "AI"
logger = logging.getLogger(__name__)
def create_cache(backend, **kwargs):
"""
Create cache backend. Using this custom function to avoid deprecation warnings.
"""
cache = _create_cache(backend, **kwargs)
# Some caches -- python-memcached in particular -- need to do a cleanup at the
# end of a request cycle. If not implemented in a particular backend
# cache.close is a no-op
signals.request_finished.connect(cache.close)
return cache
# Use an in-memory cache to hold classifier data, but allow settings to override this.
# The classifier data will generally be larger than memcached's default max size
CLASSIFIERS_CACHE_IN_MEM = getattr(
settings, 'ORA2_CLASSIFIERS_CACHE_IN_MEM',
create_cache(
'django.core.cache.backends.locmem.LocMemCache',
LOCATION='openassessment.ai.classifiers_dict'
)
)
CLASSIFIERS_CACHE_IN_FILE = getattr(
settings, 'ORA2_CLASSIFIERS_CACHE_IN_FILE',
create_cache(
'django.core.cache.backends.filebased.FileBasedCache',
LOCATION='/tmp/ora2_classifier_cache'
)
)
def essay_text_from_submission(submission):
"""
Retrieve the submission text.
Submissions are arbitrary JSON-blobs, which *should*
contain a single key, "answer", containing the essay
submission text.
If not, though, assume we've been given the essay text
directly (convenient for testing).
"""
if isinstance(submission, dict):
if 'answer' in submission:
# Format used for answer in examples.
if isinstance(submission['answer'], unicode):
return submission['answer']
# Initially there was one prompt and submission had the structure
# {'answer': {'text': 'The text.'}}
elif 'text' in submission['answer']:
essay_text = submission['answer']['text']
# When multiple prompts were introduced the structure of submission become:
# {'answer': {'parts': [{'text': 'The text part 1.'}, {'text': 'The text part 2.'}]}}
# We concatenate these parts and let AI grader evaluate the total text.
else:
essay_text = u'\n'.join([part['text'] for part in submission['answer']['parts']])
else:
essay_text = unicode(submission)
return essay_text
class IncompleteClassifierSet(Exception):
"""
The classifier set is missing a classifier for a criterion in the rubric.
"""
def __init__(self, missing_criteria):
"""
Construct an error message that explains which criteria were missing.
Args:
missing_criteria (list): The list of criteria names that were missing.
"""
msg = (
u"Missing classifiers for the following "
u"criteria: {missing}"
).format(missing=missing_criteria)
super(IncompleteClassifierSet, self).__init__(msg)
class ClassifierUploadError(Exception):
"""
An error occurred while uploading classifier data.
"""
pass
class ClassifierSerializeError(Exception):
"""
An error occurred while serializing classifier data.
"""
pass
class NoTrainingExamples(Exception):
"""
No training examples were provided to the workflow.
"""
def __init__(self, workflow_uuid=None):
msg = u"No training examples were provided"
if workflow_uuid is not None:
msg = u"{msg} to the training workflow with UUID {uuid}".format(
msg=msg, uuid=workflow_uuid
)
super(NoTrainingExamples, self).__init__(msg)
class AIClassifierSet(models.Model):
"""
A set of trained classifiers (immutable).
"""
class Meta:
app_label = "assessment"
ordering = ['-created_at', '-id']
# The rubric associated with this set of classifiers
# We should have one classifier for each of the criteria in the rubric.
rubric = models.ForeignKey(Rubric, related_name="+")
# Timestamp for when the classifier set was created.
# This allows us to find the most recently trained set of classifiers.
created_at = models.DateTimeField(default=now, db_index=True)
# The ID of the algorithm that was used to train classifiers in this set.
algorithm_id = models.CharField(max_length=128, db_index=True)
# Course Entity and Item Discriminator
# Though these items are duplicated in the database tables for the AITrainingWorkflow,
# this is okay because it will drastically speed up the operation of assigning classifiers
# to AIGradingWorkflows
course_id = models.CharField(max_length=255, db_index=True)
item_id = models.CharField(max_length=128, db_index=True)
@classmethod
@transaction.atomic
def create_classifier_set(cls, classifiers_dict, rubric, algorithm_id, course_id, item_id):
"""
Create a set of classifiers.
Args:
classifiers_dict (dict): Mapping of criterion names to
JSON-serializable classifiers.
rubric (Rubric): The rubric model.
algorithm_id (unicode): The ID of the algorithm used to train the classifiers.
course_id (unicode): The ID of the course that the classifier is going to be grading
item_id (unicode): The item within the course that the classifier is trained to grade.
Returns:
AIClassifierSet
Raises:
ClassifierSerializeError
ClassifierUploadError
InvalidRubricSelection
DatabaseError
"""
# Create the classifier set
classifier_set = cls.objects.create(
rubric=rubric, algorithm_id=algorithm_id, item_id=item_id, course_id=course_id
)
# Retrieve the criteria for this rubric,
# then organize them by criterion name
try:
rubric_index = rubric.index
except DatabaseError as ex:
msg = (
u"An unexpected error occurred while retrieving rubric criteria with the"
u"rubric hash {rh} and algorithm_id {aid}: {ex}"
).format(rh=rubric.content_hash, aid=algorithm_id, ex=ex)
logger.exception(msg)
raise
# Check that we have classifiers for all criteria in the rubric
# Ignore criteria that have no options: since these have only written feedback,
# we can't assign them a score.
all_criteria = set(classifiers_dict.keys())
all_criteria |= set(
criterion.name for criterion in
rubric_index.find_criteria_without_options()
)
missing_criteria = rubric_index.find_missing_criteria(all_criteria)
if missing_criteria:
raise IncompleteClassifierSet(missing_criteria)
# Create classifiers for each criterion
for criterion_name, classifier_data in classifiers_dict.iteritems():
classifier = AIClassifier.objects.create(
classifier_set=classifier_set,
criterion=rubric_index.find_criterion(criterion_name)
)
# Serialize the classifier data and upload
try:
contents = ContentFile(json.dumps(classifier_data))
except (TypeError, ValueError, UnicodeDecodeError) as ex:
msg = (
u"Could not serialize classifier data as JSON: {ex}"
).format(ex=ex)
raise ClassifierSerializeError(msg)
filename = uuid4().hex
try:
classifier.classifier_data.save(filename, contents)
except Exception as ex:
full_filename = upload_to_path(classifier, filename)
msg = (
u"Could not upload classifier data to {filename}: {ex}"
).format(filename=full_filename, ex=ex)
raise ClassifierUploadError(msg)
return classifier_set
@classmethod
def most_recent_classifier_set(cls, rubric, algorithm_id, course_id, item_id):
"""
Finds the most relevant classifier set based on the following line of succession:
1 -- Classifier sets with the same COURSE, ITEM, RUBRIC *content* hash, and ALGORITHM
- Newest first. If none exist...
2 -- Classifier sets with the same COURSE, ITEM, and RUBRIC *structure* hash, and ALGORITHM.
- Newest first. If none exist...
3 -- The newest classifier set with the same RUBRIC and ALGORITHM
- Newest first. If none exist...
4 -- Do no assignment and return False
Case #1 is ideal: we get a classifier set trained for the rubric as currently defined.
Case #2 handles when a course author makes a cosmetic change to a rubric after training.
We don't want to stop grading students because an author fixed a typo!
Case #3 handles problems that are duplicated, such as the default problem prompt.
If we've already trained classifiers for the identical rubric somewhere else,
then the author can use them to test out the feature immediately.
Case #4: Someone will need to schedule training; however, we will still accept
student submissions and grade them once training completes.
Args:
rubric (Rubric): The rubric associated with the classifier set.
algorithm_id (unicode): The algorithm used to create the classifier set.
course_id (unicode): The course identifier for the current problem.
item_id (unicode): The item identifier for the current problem.
Returns:
ClassifierSet or None
Raises:
DatabaseError
"""
# List of the parameters we will search for, in order of decreasing priority
search_parameters = [
# Case #1: same course / item / rubric (exact) / algorithm
{
'rubric__content_hash': rubric.content_hash,
'algorithm_id': algorithm_id,
'course_id': course_id,
'item_id': item_id
},
# Case #2: same course / item / rubric (structure only) / algorithm
{
'rubric__structure_hash': rubric.structure_hash, # pylint: disable=E1101
'algorithm_id': algorithm_id,
'course_id': course_id,
'item_id': item_id
},
# Case #3: same rubric (exact) / algorithm
{
'rubric__content_hash': rubric.content_hash,
'algorithm_id': algorithm_id
}
]
# Perform each query, starting with the highest priority
for params in search_parameters:
# Retrieve the most recent classifier set that matches our query
# (rely on implicit ordering in the model definition)
classifier_set_candidates = cls.objects.filter(**params)[:1]
# If we find a classifier set,
# then associate the most recent classifiers with it and return true
if len(classifier_set_candidates) > 0:
return classifier_set_candidates[0]
# If we get to this point, no classifiers exist with this rubric and algorithm.
return None
@property
def classifier_data_by_criterion(self):
"""
Return info for all classifiers in this classifier set in a dictionary
that maps criteria names to classifier data.
Returns:
dict: keys are criteria names, values are JSON-serializable classifier data
Raises:
ValueError
IOError
httplib.HTTPException
"""
# First check the in-memory cache
# We use an in-memory cache because the classifier data will most often
# be several megabytes, which exceeds the default memcached size limit.
# If we find it, we can avoid calls to the database, S3, and json.
cache_key = self._cache_key("classifier_data_by_criterion")
classifiers_dict = CLASSIFIERS_CACHE_IN_MEM.get(cache_key)
# If we can't find the classifier in-memory, check the filesystem cache
# We can't always rely on the in-memory cache because worker processes
# terminate when max retries are exceeded.
if classifiers_dict is None:
msg = (
u"Could not find classifiers dict in the in-memory "
u"cache for key {key}. Falling back to the file-based cache."
).format(key=cache_key)
logger.info(msg)
classifiers_dict = CLASSIFIERS_CACHE_IN_FILE.get(cache_key)
else:
msg = (
u"Found classifiers dict in the in-memory cache "
u"(cache key was {key})"
).format(key=cache_key)
logger.info(msg)
# If we can't find the classifiers dict in the cache,
# we need to look up the classifiers in the database,
# then download the classifier data.
if classifiers_dict is None:
classifiers_dict = {
classifier.criterion.name: classifier.download_classifier_data()
for classifier in self.classifiers.select_related().all() # pylint: disable=E1101
}
CLASSIFIERS_CACHE_IN_MEM.set(cache_key, classifiers_dict)
CLASSIFIERS_CACHE_IN_FILE.set(cache_key, classifiers_dict)
msg = (
u"Could not find classifiers dict in either the in-memory "
u"or file-based cache. Downloaded the data from S3 and cached "
u"it using key {key}"
).format(key=cache_key)
logger.info(msg)
return classifiers_dict
@property
def valid_scores_by_criterion(self):
"""
Return the valid scores for each classifier in this classifier set.
Returns:
dict: maps rubric criterion names to lists of valid scores.
"""
cache_key = self._cache_key("valid_scores_by_criterion")
valid_scores_by_criterion = cache.get(cache_key)
if valid_scores_by_criterion is None:
valid_scores_by_criterion = {
classifier.criterion.name: classifier.valid_scores
for classifier in self.classifiers.select_related().all() # pylint: disable=E1101
}
cache.set(cache_key, valid_scores_by_criterion)
return valid_scores_by_criterion
def _cache_key(self, data_name):
"""
Return a cache key for this classifier set.
Args:
data_name (unicode): Name for the data associated with this key.
Returns:
unicode
"""
return u"openassessment.assessment.ai.classifier_set.{pk}.{data_name}".format(
pk=self.pk, data_name=data_name
)
# Directory in which classifiers will be stored
# For instance, if we're using the default file system storage backend
# for local development, this will be a subdirectory.
# If using an S3 storage backend, this will be a subdirectory in
# an AWS S3 bucket.
AI_CLASSIFIER_STORAGE = "ora2_ai_classifiers"
def upload_to_path(instance, filename): # pylint:disable=W0613
"""
Calculate the file path where classifiers should be uploaded.
Optionally prepends the path with a prefix (determined by Django settings).
This allows us to put classifiers from different environments
(stage / prod) in different directories within the same S3 bucket.
Args:
instance (AIClassifier): Not used.
filename (unicode): The filename provided when saving the file.
Returns:
unicode
"""
prefix = getattr(settings, 'ORA2_FILE_PREFIX', None)
if prefix is not None:
return u"{prefix}/{root}/{filename}".format(
prefix=prefix,
root=AI_CLASSIFIER_STORAGE,
filename=filename
)
else:
return u"{root}/{filename}".format(
root=AI_CLASSIFIER_STORAGE,
filename=filename
)
class AIClassifier(models.Model):
"""
A trained classifier (immutable).
"""
class Meta:
app_label = "assessment"
# The set of classifiers this classifier belongs to
classifier_set = models.ForeignKey(AIClassifierSet, related_name="classifiers")
# The criterion (in the rubric) that this classifier evaluates.
criterion = models.ForeignKey(Criterion, related_name="+")
# The serialized classifier
# Because this may be large, we store it using a Django `FileField`,
# which allows us to plug in different storage backends (such as S3)
classifier_data = models.FileField(upload_to=upload_to_path)
def download_classifier_data(self):
"""
Download and deserialize the classifier data.
Returns:
JSON-serializable
Raises:
ValueError
IOError
httplib.HTTPException
"""
return json.loads(self.classifier_data.read()) # pylint:disable=E1101
@property
def valid_scores(self):
"""
Return a list of valid scores for the rubric criterion associated
with this classifier.
Returns:
list of integer scores, in ascending order.
"""
return sorted([option.points for option in self.criterion.options.all()])
class AIWorkflow(models.Model):
"""
Abstract base class for AI workflow database models.
"""
class Meta:
app_label = "assessment"
abstract = True
# Unique identifier used to track this workflow
uuid = UUIDField(version=1, db_index=True, unique=True)
# Course Entity and Item Discriminator
# Though these items are duplicated in the database tables for the submissions app,
# and every workflow has a reference to a submission entry, this is okay because
# submissions are immutable.
course_id = models.CharField(max_length=255, db_index=True)
item_id = models.CharField(max_length=128, db_index=True)
# Timestamps
# The task is *scheduled* as soon as a client asks the API to
# train classifiers.
# The task is *completed* when a worker has successfully created a
# classifier set based on the training examples.
scheduled_at = models.DateTimeField(default=now, db_index=True)
completed_at = models.DateTimeField(null=True, db_index=True)
# The ID of the algorithm used to train the classifiers
# This is a parameter passed to and interpreted by the workers.
# Django settings allow the users to map algorithm ID strings
# to the Python code they should use to perform the training.
algorithm_id = models.CharField(max_length=128, db_index=True)
# The set of trained classifiers.
# In the training task, this field will be set when the task completes successfully.
# In the grading task, this may be set to null if no classifiers are available
# when the student submits an essay for grading.
classifier_set = models.ForeignKey(
AIClassifierSet, related_name='+',
null=True, default=None
)
@property
def is_complete(self):
"""
Check whether the workflow is complete.
Returns:
bool
"""
return self.completed_at is not None
def mark_complete_and_save(self):
"""
Mark the workflow as complete.
Returns:
None
"""
self.completed_at = now()
self.save()
self._log_complete_workflow()
@classmethod
def get_incomplete_workflows(cls, course_id, item_id):
"""
Gets all incomplete grading workflows for a given course and item.
Args:
course_id (unicode): Uniquely identifies the course
item_id (unicode): The discriminator for the item we are looking for
Yields:
All incomplete workflows for this item, as a delayed "stream"
Raises:
DatabaseError
cls.DoesNotExist
"""
# Finds all of the uuid's for workflows contained within the query
grade_workflow_uuids = [
wflow['uuid'] for wflow in cls.objects.filter(
course_id=course_id, item_id=item_id, completed_at__isnull=True
).values('uuid')
]
# Continues to generate output until all workflows in the queryset have been output
for workflow_uuid in grade_workflow_uuids:
# Returns the grading workflow associated with the uuid stored in the initial query
workflow = cls.objects.get(uuid=workflow_uuid)
yield workflow
@classmethod
def is_workflow_complete(cls, workflow_uuid):
"""
Check whether the workflow with a given UUID has been marked complete.
Args:
workflow_uuid (str): The UUID of the workflow to check.
Returns:
bool
Raises:
DatabaseError
cls.DoesNotExist
"""
workflow = cls.objects.get(uuid=workflow_uuid)
return workflow.is_complete
def _log_start_workflow(self):
"""
A logging operation called at the beginning of an AI Workflows life.
Increments the number of tasks of that kind.
"""
# Identifies whether the type of task for reporting
class_name = self.__class__.__name__
data_path = 'openassessment.assessment.ai_task.' + class_name
# Sets identity tags which allow sorting by course and item
tags = [
u"course_id:{course_id}".format(course_id=self.course_id),
u"item_id:{item_id}".format(item_id=self.item_id),
]
logger.info(u"{class_name} with uuid {uuid} was started.".format(class_name=class_name, uuid=self.uuid))
dog_stats_api.increment(data_path + '.scheduled_count', tags=tags)
def _log_complete_workflow(self):
"""
A logging operation called at the end of an AI Workflow's Life
Reports the total time the task took.
"""
# Identifies whether the type of task for reporting
class_name = self.__class__.__name__
data_path = 'openassessment.assessment.ai_task.' + class_name
tags = [
u"course_id:{course_id}".format(course_id=self.course_id),
u"item_id:{item_id}".format(item_id=self.item_id),
]
# Calculates the time taken to complete the task and reports it to datadog
time_delta = self.completed_at - self.scheduled_at
dog_stats_api.histogram(
data_path + '.turnaround_time',
time_delta.total_seconds(),
tags=tags
)
dog_stats_api.increment(data_path + '.completed_count', tags=tags)
logger.info(
(
u"{class_name} with uuid {uuid} completed its workflow successfully "
u"in {seconds} seconds."
).format(class_name=class_name, uuid=self.uuid, seconds=time_delta.total_seconds())
)
class AITrainingWorkflow(AIWorkflow):
"""
Used to track AI training tasks.
Training tasks take as input an algorithm ID and a set of training examples
(which are associated with a rubric).
On successful completion, training tasks output a set of trained classifiers.
"""
class Meta:
app_label = "assessment"
# The training examples (essays + scores) used to train the classifiers.
# This is a many-to-many field because
# (a) we need multiple training examples to train a classifier, and
# (b) we may want to re-use training examples
# (for example, if a training task is executed by Celery workers multiple times)
training_examples = models.ManyToManyField(TrainingExample, related_name="+")
@classmethod
@transaction.atomic
def start_workflow(cls, examples, course_id, item_id, algorithm_id):
"""
Start a workflow to track a training task.
Args:
examples (list of TrainingExample): The training examples used to create the classifiers.
course_id (unicode): The ID for the course that the training workflow is associated with.
item_id (unicode): The ID for the item that the training workflow is training to assess.
algorithm_id (unicode): The ID of the algorithm to use for training.
Returns:
AITrainingWorkflow
Raises:
NoTrainingExamples
"""
if len(examples) == 0:
raise NoTrainingExamples()
workflow = AITrainingWorkflow.objects.create(algorithm_id=algorithm_id, item_id=item_id, course_id=course_id)
workflow.training_examples.add(*examples)
workflow.save()
workflow._log_start_workflow()
return workflow
@property
def rubric(self):
"""
Return the rubric associated with this classifier set.
Returns:
Rubric or None (if no training examples are available)
Raises:
NoTrainingExamples
"""
# We assume that all the training examples we have been provided are using
# the same rubric (this is enforced by the API call that deserializes
# the training examples).
first_example = list(self.training_examples.all()[:1]) # pylint: disable=E1101
if first_example:
return first_example[0].rubric
else:
raise NoTrainingExamples(workflow_uuid=self.uuid)
def complete(self, classifier_set):
"""
Add a classifier set to the workflow and mark it complete.
Args:
classifier_set (dict): Mapping of criteria names to serialized classifiers.
Returns:
None
Raises:
NoTrainingExamples
IncompleteClassifierSet
ClassifierSerializeError
ClassifierUploadError
InvalidRubricSelection
DatabaseError
"""
self.classifier_set = AIClassifierSet.create_classifier_set(
classifier_set, self.rubric, self.algorithm_id, self.course_id, self.item_id
)
self.mark_complete_and_save()
class AIGradingWorkflow(AIWorkflow):
"""
Used to track AI grading tasks.
Grading tasks take as input an essay submission
and a set of classifiers; the tasks select options
for each criterion in the rubric.
"""
class Meta:
app_label = "assessment"
# The UUID of the submission being graded
submission_uuid = models.CharField(max_length=128, db_index=True)
# The text of the essay submission to grade
# We duplicate this here to avoid having to repeatedly look up
# the submission. Since submissions are immutable, this is safe.
essay_text = models.TextField(blank=True)
# The rubric used to evaluate the submission.
# We store this so we can look for classifiers for the same rubric
# if none are available when the workflow is created.
rubric = models.ForeignKey(Rubric, related_name="+")
# The assessment produced by the AI grading algorithm
# Until the task completes successfully, this will be set to null
assessment = models.ForeignKey(
Assessment, related_name="+", null=True, default=None
)
# Identifier information associated with the student's submission
# Useful for finding workflows for a particular course/item/student
# Since submissions are immutable, and since the workflow is
# associated with one submission, it's safe to duplicate
# this information here from the submissions models.
student_id = models.CharField(max_length=40, db_index=True)
def assign_most_recent_classifier_set(self):
"""
Find the most recent classifier set and assign it to this workflow.
Returns:
(bool) indicates whether or not classifiers were able to be assigned to the AIGradingWorkflow
Raises:
DatabaseError
"""
classifier_set = AIClassifierSet.most_recent_classifier_set(
self.rubric, self.algorithm_id, self.course_id, self.item_id
)
if classifier_set is not None:
self.classifier_set = classifier_set
self.save()
return classifier_set is not None
@classmethod
@transaction.atomic
def start_workflow(cls, submission_uuid, rubric_dict, algorithm_id):
"""
Start a grading workflow.
Args:
submission_uuid (str): The UUID of the submission to grade.
rubric_dict (dict): The serialized rubric model.
algorithm_id (unicode): The ID of the algorithm to use for grading.
Returns:
AIGradingWorkflow
Raises:
SubmissionNotFoundError
SubmissionRequestError
SubmissionInternalError
InvalidRubric
DatabaseError
"""
# Retrieve info about the submission
submission = sub_api.get_submission_and_student(submission_uuid)
# Get or create the rubric
from openassessment.assessment.serializers import rubric_from_dict
rubric = rubric_from_dict(rubric_dict)
# Create the workflow
workflow = cls.objects.create(
submission_uuid=submission_uuid,
essay_text=essay_text_from_submission(submission),
algorithm_id=algorithm_id,
student_id=submission['student_item']['student_id'],
item_id=submission['student_item']['item_id'],
course_id=submission['student_item']['course_id'],
rubric=rubric
)
# Retrieve and assign classifier set candidates
workflow.assign_most_recent_classifier_set()
workflow._log_start_workflow()
return workflow
@transaction.atomic
def complete(self, criterion_scores):
"""
Create an assessment with scores from the AI classifiers
and mark the workflow complete.
Args:
criterion_scores (dict): Dictionary mapping criteria names to integer scores.
Raises:
InvalidRubricSelection
DatabaseError
"""
self.assessment = Assessment.create(
self.rubric, self.algorithm_id, self.submission_uuid, AI_ASSESSMENT_TYPE
)
AssessmentPart.create_from_option_points(self.assessment, criterion_scores)
self.mark_complete_and_save()
"""
Celery looks for tasks in this module,
so import the tasks we want the workers to implement.
"""
# pylint:disable=W0611
from .worker.training import train_classifiers, reschedule_training_tasks
from .worker.grading import grade_essay, reschedule_grading_tasks
# coding=utf-8
"""
Tests for AI assessment.
"""
import copy
import mock
from nose.tools import raises
from celery.exceptions import NotConfigured
from django.db import DatabaseError
from django.test.utils import override_settings
from openassessment.test_utils import CacheResetTest
from submissions import api as sub_api
from openassessment.assessment.api import ai as ai_api
from openassessment.assessment.models import (
AITrainingWorkflow, AIGradingWorkflow, AIClassifierSet, Assessment
)
from openassessment.assessment.worker.algorithm import AIAlgorithm
from openassessment.assessment.serializers import rubric_from_dict
from openassessment.assessment.errors import (
AITrainingRequestError, AITrainingInternalError, AIGradingRequestError,
AIReschedulingInternalError, AIGradingInternalError, AIError
)
from openassessment.assessment.test.constants import RUBRIC, EXAMPLES, STUDENT_ITEM, ANSWER
class StubAIAlgorithm(AIAlgorithm):
"""
Stub implementation of a supervised ML algorithm.
"""
# The format of the serialized classifier is controlled
# by the AI algorithm implementation, so we can return
# anything here as long as it's JSON-serializable
FAKE_CLASSIFIER = {
'name': u'ƒαкє ¢ℓαѕѕιƒιєя',
'binary_content': "TWFuIGlzIGRpc3Rpbmd1aX"
}
def train_classifier(self, examples):
"""
Stub implementation that returns fake classifier data.
"""
# Include the input essays in the classifier
# so we can test that the correct inputs were used
classifier = copy.copy(self.FAKE_CLASSIFIER)
classifier['examples'] = examples
classifier['score_override'] = 0
return classifier
def score(self, text, classifier, cache):
"""
Stub implementation that returns whatever scores were
provided in the serialized classifier data.
Expect `classifier` to be a dict with a single key,
"score_override" containing the score to return.
"""
return classifier['score_override']
ALGORITHM_ID = "test-stub"
COURSE_ID = STUDENT_ITEM.get('course_id')
ITEM_ID = STUDENT_ITEM.get('item_id')
AI_ALGORITHMS = {
ALGORITHM_ID: '{module}.StubAIAlgorithm'.format(module=__name__),
}
def train_classifiers(rubric_dict, classifier_score_overrides):
"""
Simple utility function to train classifiers.
Args:
rubric_dict (dict): The rubric to train the classifiers on.
classifier_score_overrides (dict): A dictionary of classifier overrides
to set the scores for the given submission.
"""
rubric = rubric_from_dict(rubric_dict)
AIClassifierSet.create_classifier_set(
classifier_score_overrides, rubric, ALGORITHM_ID, COURSE_ID, ITEM_ID
)
class AITrainingTest(CacheResetTest):
"""
Tests for AI training tasks.
"""
EXPECTED_INPUT_SCORES = {
u'vøȼȺƀᵾłȺɍɏ': [1, 0],
u'ﻭɼค๓๓คɼ': [0, 2]
}
# Use a stub AI algorithm
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_train_classifiers(self):
# Schedule a training task
# Because Celery is configured in "always eager" mode,
# expect the task to be executed synchronously.
workflow_uuid = ai_api.train_classifiers(RUBRIC, EXAMPLES, COURSE_ID, ITEM_ID, ALGORITHM_ID)
# Retrieve the classifier set from the database
workflow = AITrainingWorkflow.objects.get(uuid=workflow_uuid)
classifier_set = workflow.classifier_set
self.assertIsNot(classifier_set, None)
# Retrieve a dictionary mapping criteria names to deserialized classifiers
classifiers = classifier_set.classifier_data_by_criterion
# Check that we have classifiers for all criteria in the rubric
criteria = set(criterion['name'] for criterion in RUBRIC['criteria'])
self.assertEqual(set(classifiers.keys()), criteria)
# Check that the classifier data matches the data from our stub AI algorithm
# Since the stub data includes the training examples, we also verify
# that the classifier was trained using the correct examples.
for criterion in RUBRIC['criteria']:
classifier = classifiers[criterion['name']]
self.assertEqual(classifier['name'], StubAIAlgorithm.FAKE_CLASSIFIER['name'])
self.assertEqual(classifier['binary_content'], StubAIAlgorithm.FAKE_CLASSIFIER['binary_content'])
# Verify that the correct essays and scores were used to create the classifier
# Our stub AI algorithm provides these for us, but they would not ordinarily
# be included in the trained classifier.
self.assertEqual(len(classifier['examples']), len(EXAMPLES))
expected_scores = self.EXPECTED_INPUT_SCORES[criterion['name']]
for data in zip(EXAMPLES, classifier['examples'], expected_scores):
sent_example, received_example, expected_score = data
received_example = AIAlgorithm.ExampleEssay(*received_example)
self.assertEqual(received_example.text, sent_example['answer'])
self.assertEqual(received_example.score, expected_score)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_train_classifiers_feedback_only_criterion(self):
# Modify the rubric to include a feedback-only criterion
# (a criterion with no options, just written feedback)
rubric = copy.deepcopy(RUBRIC)
rubric['criteria'].append({
'name': 'feedback only',
'prompt': 'feedback',
'options': []
})
# Schedule a training task
# (we use training examples that do NOT include the feedback-only criterion)
workflow_uuid = ai_api.train_classifiers(rubric, EXAMPLES, COURSE_ID, ITEM_ID, ALGORITHM_ID)
# Verify that no classifier was created for the feedback-only criterion
# Since there's no points associated with that criterion,
# there's no way for the AI algorithm to score it anyway.
workflow = AITrainingWorkflow.objects.get(uuid=workflow_uuid)
classifier_data = workflow.classifier_set.classifier_data_by_criterion
self.assertNotIn('feedback only', classifier_data)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_train_classifiers_all_feedback_only_criteria(self):
# Modify the rubric to include only feedback-only criteria
# (a criterion with no options, just written feedback)
rubric = copy.deepcopy(RUBRIC)
for criterion in rubric['criteria']:
criterion['options'] = []
# Modify the training examples to provide no scores
examples = copy.deepcopy(EXAMPLES)
for example in examples:
example['options_selected'] = {}
# Schedule a training task
# Our training examples have no options
workflow_uuid = ai_api.train_classifiers(rubric, examples, COURSE_ID, ITEM_ID, ALGORITHM_ID)
# Verify that no classifier was created for the feedback-only criteria
workflow = AITrainingWorkflow.objects.get(uuid=workflow_uuid)
classifier_data = workflow.classifier_set.classifier_data_by_criterion
self.assertEqual(classifier_data, {})
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_train_classifiers_invalid_examples(self):
# Mutate an example so it does not match the rubric
mutated_examples = copy.deepcopy(EXAMPLES)
mutated_examples[0]['options_selected'] = {'invalid': 'invalid'}
# Expect a request error
with self.assertRaises(AITrainingRequestError):
ai_api.train_classifiers(RUBRIC, mutated_examples, COURSE_ID, ITEM_ID, ALGORITHM_ID)
def test_train_classifiers_no_examples(self):
# Empty list of training examples
with self.assertRaises(AITrainingRequestError):
ai_api.train_classifiers(RUBRIC, [], COURSE_ID, ITEM_ID, ALGORITHM_ID)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@mock.patch.object(AITrainingWorkflow.objects, 'create')
def test_start_workflow_database_error(self, mock_create):
# Simulate a database error when creating the training workflow
mock_create.side_effect = DatabaseError("KABOOM!")
with self.assertRaises(AITrainingInternalError):
ai_api.train_classifiers(RUBRIC, EXAMPLES, COURSE_ID, ITEM_ID, ALGORITHM_ID)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_train_classifiers_celery_error(self):
with mock.patch('openassessment.assessment.api.ai.training_tasks.train_classifiers.apply_async') as mock_train:
mock_train.side_effect = NotConfigured
with self.assertRaises(AITrainingInternalError):
ai_api.train_classifiers(RUBRIC, EXAMPLES, COURSE_ID, ITEM_ID, ALGORITHM_ID)
class AIGradingTest(CacheResetTest):
"""
Tests for AI grading tasks.
"""
CLASSIFIER_SCORE_OVERRIDES = {
u"vøȼȺƀᵾłȺɍɏ": {'score_override': 1},
u"ﻭɼค๓๓คɼ": {'score_override': 2}
}
def setUp(self):
"""
Create a submission and a fake classifier set.
"""
# Create a submission
submission = sub_api.create_submission(STUDENT_ITEM, ANSWER)
self.submission_uuid = submission['uuid']
train_classifiers(RUBRIC, self.CLASSIFIER_SCORE_OVERRIDES)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_grade_essay(self):
# Schedule a grading task
# Because Celery is configured in "always eager" mode, this will
# be executed synchronously.
ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
# Verify that we got the scores we provided to the stub AI algorithm
assessment = ai_api.get_latest_assessment(self.submission_uuid)
for part in assessment['parts']:
criterion_name = part['option']['criterion']['name']
expected_score = self.CLASSIFIER_SCORE_OVERRIDES[criterion_name]['score_override']
self.assertEqual(part['option']['points'], expected_score)
score = ai_api.get_score(self.submission_uuid, {})
self.assertEquals(score["points_possible"], 4)
self.assertEquals(score["points_earned"], 3)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_grade_essay_feedback_only_criterion(self):
# Modify the rubric to include a feedback-only criterion
# (a criterion with no options, just written feedback)
rubric = copy.deepcopy(RUBRIC)
rubric['criteria'].append({
'name': 'feedback only',
'prompt': 'feedback',
'options': []
})
# Train classifiers for the rubric
train_classifiers(rubric, self.CLASSIFIER_SCORE_OVERRIDES)
# Schedule a grading task and retrieve the assessment
ai_api.on_init(self.submission_uuid, rubric=rubric, algorithm_id=ALGORITHM_ID)
assessment = ai_api.get_latest_assessment(self.submission_uuid)
# Verify that the criteria with options were given scores
# (from the score override used by our fake classifiers)
self.assertEqual(assessment['parts'][0]['criterion']['name'], u"vøȼȺƀᵾłȺɍɏ")
self.assertEqual(assessment['parts'][0]['option']['points'], 1)
self.assertEqual(assessment['parts'][1]['criterion']['name'], u"ﻭɼค๓๓คɼ")
self.assertEqual(assessment['parts'][1]['option']['points'], 2)
# Verify that the criteria with no options (only feedback)
# has no score and empty feedback
self.assertEqual(assessment['parts'][2]['criterion']['name'], u"feedback only")
self.assertIs(assessment['parts'][2]['option'], None)
self.assertEqual(assessment['parts'][2]['feedback'], u"")
# Check the scores by criterion dict
score_dict = ai_api.get_assessment_scores_by_criteria(self.submission_uuid)
self.assertEqual(score_dict[u"vøȼȺƀᵾłȺɍɏ"], 1)
self.assertEqual(score_dict[u"ﻭɼค๓๓คɼ"], 2)
self.assertEqual(score_dict['feedback only'], 0)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_grade_essay_all_feedback_only_criteria(self):
# Modify the rubric to include only feedback-only criteria
rubric = copy.deepcopy(RUBRIC)
for criterion in rubric['criteria']:
criterion['options'] = []
# Train classifiers for the rubric
train_classifiers(rubric, {})
# Schedule a grading task and retrieve the assessment
ai_api.on_init(self.submission_uuid, rubric=rubric, algorithm_id=ALGORITHM_ID)
assessment = ai_api.get_latest_assessment(self.submission_uuid)
# Verify that all assessment parts have feedback set to an empty string
for part in assessment['parts']:
self.assertEqual(part['feedback'], u"")
# Check the scores by criterion dict
# Since none of the criteria had options, the scores should all default to 0
score_dict = ai_api.get_assessment_scores_by_criteria(self.submission_uuid)
self.assertItemsEqual(score_dict, {
u"vøȼȺƀᵾłȺɍɏ": 0,
u"ﻭɼค๓๓คɼ": 0,
})
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_get_assessment_scores_by_criteria(self):
ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
# Verify that we got the scores we provided to the stub AI algorithm
assessment = ai_api.get_latest_assessment(self.submission_uuid)
assessment_score_dict = ai_api.get_assessment_scores_by_criteria(self.submission_uuid)
for part in assessment['parts']:
criterion_name = part['option']['criterion']['name']
expected_score = self.CLASSIFIER_SCORE_OVERRIDES[criterion_name]['score_override']
self.assertEqual(assessment_score_dict[criterion_name], expected_score)
@raises(ai_api.AIGradingInternalError)
@mock.patch.object(Assessment.objects, 'filter')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_error_getting_assessment_scores(self, mock_filter):
mock_filter.side_effect = DatabaseError("Oh no!")
ai_api.get_assessment_scores_by_criteria(self.submission_uuid)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_submit_submission_not_found(self):
with self.assertRaises(AIGradingRequestError):
ai_api.on_init("no_such_submission", rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_submit_invalid_rubric(self):
invalid_rubric = {'not_valid': True}
with self.assertRaises(AIGradingRequestError):
ai_api.on_init(self.submission_uuid, rubric=invalid_rubric, algorithm_id=ALGORITHM_ID)
@mock.patch.object(AIGradingWorkflow.objects, 'create')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_submit_database_error_create(self, mock_call):
mock_call.side_effect = DatabaseError("KABOOM!")
with self.assertRaises(AIGradingInternalError):
ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
@mock.patch.object(Assessment.objects, 'filter')
def test_get_latest_assessment_database_error(self, mock_call):
mock_call.side_effect = DatabaseError("KABOOM!")
with self.assertRaises(AIGradingInternalError):
ai_api.get_latest_assessment(self.submission_uuid)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_submit_celery_error(self):
with mock.patch('openassessment.assessment.api.ai.grading_tasks.grade_essay.apply_async') as mock_grade:
mock_grade.side_effect = NotConfigured
with self.assertRaises(AIGradingInternalError):
ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
@mock.patch.object(AIClassifierSet.objects, 'filter')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_submit_database_error_filter(self, mock_filter):
mock_filter.side_effect = DatabaseError("rumble... ruMBLE, RUMBLE! BOOM!")
with self.assertRaises(AIGradingInternalError):
ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
@mock.patch.object(AIClassifierSet.objects, 'filter')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_submit_no_classifiers(self, mock_call):
mock_call.return_value = []
with mock.patch('openassessment.assessment.api.ai.logger.info') as mock_log:
ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
argument = mock_log.call_args[0][0]
self.assertTrue(u"no classifiers are available" in argument)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_submit_submission_db_error(self):
with mock.patch('openassessment.assessment.api.ai.AIGradingWorkflow.start_workflow') as mock_start:
mock_start.side_effect = sub_api.SubmissionInternalError
with self.assertRaises(AIGradingInternalError):
ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
class AIUntrainedGradingTest(CacheResetTest):
"""
Tests that do not run the setup to train classifiers.
"""
def setUp(self):
"""
Create a submission.
"""
# Create a submission
submission = sub_api.create_submission(STUDENT_ITEM, ANSWER)
self.submission_uuid = submission['uuid']
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_no_score(self):
# Test that no score has been created, and get_score returns None.
ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
score = ai_api.get_score(self.submission_uuid, {})
self.assertIsNone(score)
class AIReschedulingTest(CacheResetTest):
"""
Tests AI rescheduling.
Tests in both orders, and tests all error conditions that can arise as a result of calling rescheduling
"""
def setUp(self):
"""
Sets up each test so that it will have unfinished tasks of both types
"""
# 1) Schedule Grading, have the scheduling succeeed but the grading fail because no classifiers exist
for _ in range(0, 10):
submission = sub_api.create_submission(STUDENT_ITEM, ANSWER)
self.submission_uuid = submission['uuid']
ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
# 2) Schedule Training, have it INTENTIONALLY fail. Now we are a point where both parts need to be rescheduled
patched_method = 'openassessment.assessment.api.ai.training_tasks.train_classifiers.apply_async'
with mock.patch(patched_method) as mock_train_classifiers:
mock_train_classifiers.side_effect = AITrainingInternalError('Training Classifiers Failed for some Reason.')
with self.assertRaises(AITrainingInternalError):
ai_api.train_classifiers(RUBRIC, EXAMPLES, COURSE_ID, ITEM_ID, ALGORITHM_ID)
self._assert_complete(training_done=False, grading_done=False)
def _assert_complete(self, training_done=None, grading_done=None):
"""
Asserts that the Training and Grading are of a given completion status
Serves as an assertion for a number of unit tests.
Args:
training_done (bool): whether the user expects there to be unfinished training workflows
grading_done (bool): whether the user expects there to be unfinished grading workflows
"""
incomplete_training_workflows = AITrainingWorkflow.get_incomplete_workflows(course_id=COURSE_ID, item_id=ITEM_ID)
incomplete_grading_workflows = AIGradingWorkflow.get_incomplete_workflows(course_id=COURSE_ID, item_id=ITEM_ID)
if training_done is not None:
self.assertEqual(self._is_empty_generator(incomplete_training_workflows), training_done)
if grading_done is not None:
self.assertEqual(self._is_empty_generator(incomplete_grading_workflows), grading_done)
def _is_empty_generator(self, gen):
"""
Tests whether a given generator has any more output.
Consumes a unit of output in test.
Args:
gen (generator): A generator to test if empty
Returns:
(bool): whether or not the generator contained output before testing
"""
try:
next(gen)
return False
except StopIteration:
return True
def _call_reschedule_safe(self, task_type=u"grade"):
"""
A method which will reject an exception thrown by the unfinished task API.
This method is necessary because when we set our celery workers to propogate all errors upward
(as we now do in our unit testing suite), that also means that when a task fails X times (say
a grading task fails because classifiers are not defined) that exception will be retruned from
the call of the grade_essay (even though asynchronous), and peroclate up. This method is used
to agknowledge the fact that we expect there to be an error, and allow us to call reschedule
unfinished tasks without catching that error directly.
Args:
task_type (unicode): describes what tasks we should reschedule
"""
try:
ai_api.reschedule_unfinished_tasks(course_id=COURSE_ID, item_id=ITEM_ID, task_type=task_type)
except Exception: # pylint: disable=W0703
# This exception is being raised because of a timeout.
pass
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_reschedule_grading_success(self):
# Rescheduling grading only, expect no successes
self._call_reschedule_safe(task_type=u"grade")
# Neither training nor grading should be complete.
self._assert_complete(grading_done=False, training_done=False)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_reschedule_training_success(self):
# Reschedule training, expect all successes
ai_api.reschedule_unfinished_tasks(course_id=COURSE_ID, item_id=ITEM_ID, task_type=u"train")
# Both training and grading should be complete.
self._assert_complete(grading_done=True, training_done=True)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_reschedule_training_and_grading_success(self):
# Reschedule everything, expect all successes
ai_api.reschedule_unfinished_tasks(course_id=COURSE_ID, item_id=ITEM_ID, task_type=None)
# Both training and grading should be complete.
self._assert_complete(grading_done=True, training_done=True)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_reschedule_non_valid_args(self):
with self.assertRaises(AIError):
ai_api.reschedule_unfinished_tasks(course_id=COURSE_ID, task_type=u"train")
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_reschedule_all_large(self):
"""
Specifically tests the querying mechanisms (python generator functions), and ensures that our methodology
holds up for querysets with 125+ entries
"""
# Creates 125 more grades (for a total of 135)
for _ in range(0, 125):
submission = sub_api.create_submission(STUDENT_ITEM, ANSWER)
self.submission_uuid = submission['uuid']
ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
# Both training and grading should not be complete.
self._assert_complete(grading_done=False, training_done=False)
# Reschedule both
ai_api.reschedule_unfinished_tasks(course_id=COURSE_ID, item_id=ITEM_ID, task_type=None)
# Check that both training and grading are now complete
self._assert_complete(grading_done=True, training_done=True)
def test_reschedule_grade_celery_error(self):
patched_method = 'openassessment.assessment.api.ai.grading_tasks.reschedule_grading_tasks.apply_async'
with mock.patch(patched_method) as mock_grade:
mock_grade.side_effect = NotConfigured
with self.assertRaises(AIGradingInternalError):
ai_api.reschedule_unfinished_tasks(course_id=COURSE_ID, item_id=ITEM_ID)
def test_reschedule_train_celery_error(self):
patched_method = 'openassessment.assessment.api.ai.training_tasks.reschedule_training_tasks.apply_async'
with mock.patch(patched_method) as mock_train:
mock_train.side_effect = NotConfigured
with self.assertRaises(AITrainingInternalError):
ai_api.reschedule_unfinished_tasks(course_id=COURSE_ID, item_id=ITEM_ID, task_type=None)
@mock.patch.object(AIGradingWorkflow, 'get_incomplete_workflows')
def test_get_incomplete_workflows_error_grading(self, mock_incomplete):
mock_incomplete.side_effect = DatabaseError
with self.assertRaises(AIReschedulingInternalError):
ai_api.reschedule_unfinished_tasks(course_id=COURSE_ID, item_id=ITEM_ID)
def test_get_incomplete_workflows_error_training(self):
patched_method = 'openassessment.assessment.models.ai.AIWorkflow.get_incomplete_workflows'
with mock.patch(patched_method) as mock_incomplete:
mock_incomplete.side_effect = DatabaseError
with self.assertRaises(Exception):
ai_api.reschedule_unfinished_tasks(course_id=COURSE_ID, item_id=ITEM_ID, task_type=u"train")
def test_reschedule_train_internal_celery_error(self):
patched_method = 'openassessment.assessment.worker.training.train_classifiers.apply_async'
with mock.patch(patched_method) as mock_train:
mock_train.side_effect = NotConfigured("NotConfigured")
with mock.patch('openassessment.assessment.worker.training.logger.exception') as mock_logger:
with self.assertRaises(Exception):
ai_api.reschedule_unfinished_tasks(course_id=COURSE_ID, item_id=ITEM_ID, task_type=u"train")
last_call = mock_logger.call_args[0][0]
self.assertTrue(u"NotConfigured" in last_call)
class AIAutomaticGradingTest(CacheResetTest):
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_automatic_grade(self):
# Create some submissions which will not succeed. No classifiers yet exist.
for _ in range(0, 10):
submission = sub_api.create_submission(STUDENT_ITEM, ANSWER)
ai_api.on_init(submission['uuid'], rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
# Check that there are unresolved grading workflows
self._assert_complete(training_done=True, grading_done=False)
# Create and train a classifier set. This should set off automatic grading.
ai_api.train_classifiers(RUBRIC, EXAMPLES, COURSE_ID, ITEM_ID, ALGORITHM_ID)
# Check to make sure that all work is done.
self._assert_complete(training_done=True, grading_done=True)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_automatic_grade_error(self):
# Create some submissions which will not succeed. No classifiers yet exist.
for _ in range(0, 10):
submission = sub_api.create_submission(STUDENT_ITEM, ANSWER)
ai_api.on_init(submission['uuid'], rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
# Check that there are unresolved grading workflows
self._assert_complete(training_done=True, grading_done=False)
patched_method = 'openassessment.assessment.worker.training.reschedule_grading_tasks.apply_async'
with mock.patch(patched_method) as mocked_reschedule_grading:
mocked_reschedule_grading.side_effect = AIGradingInternalError("Kablewey.")
with self.assertRaises(AIGradingInternalError):
ai_api.train_classifiers(RUBRIC, EXAMPLES, COURSE_ID, ITEM_ID, ALGORITHM_ID)
def _assert_complete(self, training_done=None, grading_done=None):
"""
Asserts that the Training and Grading are of a given completion status
Serves as an assertion for a number of unit tests.
Args:
training_done (bool): whether the user expects there to be unfinished training workflows
grading_done (bool): whether the user expects there to be unfinished grading workflows
"""
incomplete_training_workflows = AITrainingWorkflow.get_incomplete_workflows(course_id=COURSE_ID, item_id=ITEM_ID)
incomplete_grading_workflows = AIGradingWorkflow.get_incomplete_workflows(course_id=COURSE_ID, item_id=ITEM_ID)
if training_done is not None:
self.assertEqual(self._is_empty_generator(incomplete_training_workflows), training_done)
if grading_done is not None:
self.assertEqual(self._is_empty_generator(incomplete_grading_workflows), grading_done)
def _is_empty_generator(self, gen):
"""
Tests whether a given generator has any more output.
Consumes a unit of output in test.
Args:
gen (generator): A generator to test if empty
Returns:
(bool): whether or not the generator contained output before testing
"""
try:
next(gen)
return False
except StopIteration:
return True
class AIClassifierInfoTest(CacheResetTest):
"""
Tests for retrieving info about classifier sets.
"""
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_no_classifier_set(self):
classifier_info = ai_api.get_classifier_set_info(
RUBRIC, ALGORITHM_ID, 'test_course', 'test_item'
)
self.assertIs(classifier_info, None)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_classifier_set_info(self):
workflow_uuid = ai_api.train_classifiers(
RUBRIC, EXAMPLES, 'test_course', 'test_item', ALGORITHM_ID
)
classifier_info = ai_api.get_classifier_set_info(
RUBRIC, ALGORITHM_ID, 'test_course', 'test_item'
)
# Retrieve the classifier set so we can get its actual creation date
workflow = AITrainingWorkflow.objects.get(uuid=workflow_uuid)
classifier_set = workflow.classifier_set
expected_info = {
'created_at': classifier_set.created_at,
'algorithm_id': ALGORITHM_ID,
'course_id': 'test_course',
'item_id': 'test_item'
}
self.assertEqual(classifier_info, expected_info)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_multiple_classifier_sets(self):
# Train multiple classifiers
ai_api.train_classifiers(
RUBRIC, EXAMPLES, 'test_course', 'test_item', ALGORITHM_ID
)
second_uuid = ai_api.train_classifiers(
RUBRIC, EXAMPLES, 'test_course', 'test_item', ALGORITHM_ID
)
# Expect that we get the info for the second classifier
classifier_info = ai_api.get_classifier_set_info(
RUBRIC, ALGORITHM_ID, 'test_course', 'test_item'
)
workflow = AITrainingWorkflow.objects.get(uuid=second_uuid)
classifier_set = workflow.classifier_set
self.assertEqual(classifier_info['created_at'], classifier_set.created_at)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@raises(AIGradingInternalError)
@mock.patch.object(AIClassifierSet, 'most_recent_classifier_set')
def test_database_error(self, mock_call):
mock_call.side_effect = DatabaseError('OH NO!')
ai_api.get_classifier_set_info(
RUBRIC, ALGORITHM_ID, 'test_course', 'test_item'
)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@raises(AIGradingRequestError)
def test_invalid_rubric_error(self):
invalid_rubric = {}
ai_api.get_classifier_set_info(invalid_rubric, ALGORITHM_ID, 'test_course', 'test_item')
# coding=utf-8
"""
Tests for AI algorithm implementations.
"""
import unittest
import json
import mock
from openassessment.test_utils import CacheResetTest
from openassessment.assessment.worker.algorithm import (
AIAlgorithm, FakeAIAlgorithm, EaseAIAlgorithm,
TrainingError, InvalidClassifier
)
EXAMPLES = [
AIAlgorithm.ExampleEssay(u"Mine's a tale that can't be told, my ƒяєє∂σм I hold dear.", 2),
AIAlgorithm.ExampleEssay(u"How years ago in days of old, when 𝒎𝒂𝒈𝒊𝒄 filled th air.", 1),
AIAlgorithm.ExampleEssay(u"Ṫ'ẅäṡ in the darkest depths of Ṁöṛḋöṛ, I met a girl so fair.", 1),
AIAlgorithm.ExampleEssay(u"But goレレuᄊ, and the evil one crept up and slipped away with her", 0),
AIAlgorithm.ExampleEssay(u"", 4),
AIAlgorithm.ExampleEssay(u".!?", 4),
AIAlgorithm.ExampleEssay(u"no punctuation", 4),
AIAlgorithm.ExampleEssay(u"one", 4),
]
INPUT_ESSAYS = [
u"Good times, 𝑩𝒂𝒅 𝑻𝒊𝒎𝒆𝒔, you know I had my share",
u"When my woman left home for a 𝒃𝒓𝒐𝒘𝒏 𝒆𝒚𝒆𝒅 𝒎𝒂𝒏",
u"Well, I still don't seem to 𝒄𝒂𝒓𝒆",
u"",
u".!?",
u"no punctuation",
u"one",
]
class AIAlgorithmTest(CacheResetTest):
"""
Base class for testing AI algorithm implementations.
"""
ALGORITHM_CLASS = None
def setUp(self):
self.algorithm = self.ALGORITHM_CLASS() # pylint:disable=E1102
def _scores(self, classifier, input_essays):
"""
Use the classifier to score multiple input essays.
Args:
input_essays (list of unicode): The essays to score.
Returns:
list of int: The scores
"""
cache = {}
return [
self.algorithm.score(input_essay, classifier, cache)
for input_essay in input_essays
]
class FakeAIAlgorithmTest(AIAlgorithmTest):
"""
Test for the fake AI algorithm implementation.
"""
ALGORITHM_CLASS = FakeAIAlgorithm
def test_train_and_score(self):
classifier = self.algorithm.train_classifier(EXAMPLES)
expected_scores = [2, 0, 0, 0, 4, 2, 4]
scores = self._scores(classifier, INPUT_ESSAYS)
self.assertEqual(scores, expected_scores)
def test_score_classifier_missing_key(self):
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test input", {}, {})
def test_score_classifier_no_scores(self):
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test input", {'scores': []}, {})
# Try to import EASE -- if we can't, then skip the tests that require it
try:
import ease # pylint: disable=F0401,W0611
EASE_INSTALLED = True
except ImportError:
EASE_INSTALLED = False
@unittest.skipUnless(EASE_INSTALLED, "EASE library required")
class EaseAIAlgorithmTest(AIAlgorithmTest):
"""
Test for the EASE AI library wrapper.
"""
ALGORITHM_CLASS = EaseAIAlgorithm
def test_train_and_score(self):
classifier = self.algorithm.train_classifier(EXAMPLES)
scores = self._scores(classifier, INPUT_ESSAYS)
# Check that we got scores in the correct range
valid_scores = set(example.score for example in EXAMPLES)
for score in scores:
self.assertIn(score, valid_scores)
# Check that the scores are consistent when we re-run the algorithm
repeat_scores = self._scores(classifier, INPUT_ESSAYS)
self.assertEqual(scores, repeat_scores)
def test_all_examples_have_same_score(self):
examples = [
AIAlgorithm.ExampleEssay(u"Test ëṡṡäÿ", 1),
AIAlgorithm.ExampleEssay(u"Another test ëṡṡäÿ", 1),
]
# No assertion -- just verifying that this does not raise an exception
classifier = self.algorithm.train_classifier(examples)
self._scores(classifier, INPUT_ESSAYS)
def test_most_examples_have_same_score(self):
# All training examples have the same score except for one
examples = [
AIAlgorithm.ExampleEssay(u"Test ëṡṡäÿ", 1),
AIAlgorithm.ExampleEssay(u"Another test ëṡṡäÿ", 1),
AIAlgorithm.ExampleEssay(u"Different score", 0),
]
classifier = self.algorithm.train_classifier(examples)
scores = self._scores(classifier, INPUT_ESSAYS)
# Check that we got scores back.
# This is not a very rigorous assertion -- we're mainly
# checking that we got this far without an exception.
self.assertEqual(len(scores), len(INPUT_ESSAYS))
def test_no_examples(self):
with self.assertRaises(TrainingError):
self.algorithm.train_classifier([])
def test_json_serializable(self):
classifier = self.algorithm.train_classifier(EXAMPLES)
serialized = json.dumps(classifier)
deserialized = json.loads(serialized)
# This should not raise an exception
scores = self._scores(deserialized, INPUT_ESSAYS)
self.assertEqual(len(scores), len(INPUT_ESSAYS))
@mock.patch('openassessment.assessment.worker.algorithm.pickle')
def test_pickle_serialize_error(self, mock_pickle):
mock_pickle.dumps.side_effect = Exception("Test error!")
with self.assertRaises(TrainingError):
self.algorithm.train_classifier(EXAMPLES)
def test_pickle_deserialize_error(self):
classifier = self.algorithm.train_classifier(EXAMPLES)
with mock.patch('openassessment.assessment.worker.algorithm.pickle.loads') as mock_call:
mock_call.side_effect = Exception("Test error!")
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test ëṡṡäÿ", classifier, {})
def test_serialized_classifier_not_a_dict(self):
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test ëṡṡäÿ", "not a dict", {})
# coding=utf-8
"""
Test AI Django models.
"""
import copy
import ddt
from django.test import TestCase
from django.test.utils import override_settings
from openassessment.test_utils import CacheResetTest
from openassessment.assessment.models import (
AIClassifierSet, AIClassifier, AIGradingWorkflow, AI_CLASSIFIER_STORAGE,
CLASSIFIERS_CACHE_IN_MEM, essay_text_from_submission
)
from openassessment.assessment.serializers import rubric_from_dict
from .constants import RUBRIC
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
@ddt.ddt
class DataConversionTest(TestCase):
@ddt.data(
(u'Answer', u'Answer'),
({'answer': {'text': u'Answer'}}, u'Answer'),
({'answer': {'parts': [{'text': u'Answer 1'}, {'text': u'Answer 2'}]}}, u'Answer 1\nAnswer 2')
)
@ddt.unpack
def test_essay_text_from_submission(self, input, output):
self.assertEqual(essay_text_from_submission(input), output)
class AIClassifierTest(CacheResetTest):
"""
Tests for the AIClassifier model.
"""
def test_upload_to_path_default(self):
# No path prefix provided in the settings
classifier = self._create_classifier()
components = classifier.classifier_data.name.split(u'/')
self.assertEqual(len(components), 2)
self.assertEqual(components[0], AI_CLASSIFIER_STORAGE)
self.assertGreater(len(components[1]), 0)
@override_settings(ORA2_FILE_PREFIX=u"ƒιℓє_ρяєƒιχ")
def test_upload_to_path_with_prefix(self):
classifier = self._create_classifier()
components = classifier.classifier_data.name.split(u'/')
self.assertEqual(len(components), 3)
self.assertEqual(components[0], u"ƒιℓє_ρяєƒιχ")
self.assertEqual(components[1], AI_CLASSIFIER_STORAGE)
self.assertGreater(len(components[2]), 0)
def _create_classifier(self):
"""
Create and return an AIClassifier.
"""
rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set(
CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
)
return AIClassifier.objects.filter(classifier_set=classifier_set)[0]
class AIClassifierSetTest(CacheResetTest):
"""
Tests for the AIClassifierSet model.
"""
def setUp(self):
super(AIClassifierSetTest, self).setUp()
rubric = rubric_from_dict(RUBRIC)
self.classifier_set = AIClassifierSet.create_classifier_set(
CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
)
def test_cache_downloads(self):
# Retrieve the classifier dict twice, which should hit the caching code.
# We can check that we're using the cache by asserting that
# the number of database queries decreases.
with self.assertNumQueries(1):
first = self.classifier_set.classifier_data_by_criterion
with self.assertNumQueries(0):
second = self.classifier_set.classifier_data_by_criterion
# Verify that we got the same value both times
self.assertEqual(first, second)
def test_file_cache_downloads(self):
# Retrieve the classifiers dict, which should be cached
# both in memory and on the file system
first = self.classifier_set.classifier_data_by_criterion
# Clear the in-memory cache
# This simulates what happens when a worker process dies
# after exceeding the maximum number of retries.
CLASSIFIERS_CACHE_IN_MEM.clear()
# We should still be able to retrieve the classifiers dict
# from the on-disk cache, even if memory has been cleared
with self.assertNumQueries(0):
second = self.classifier_set.classifier_data_by_criterion
# Verify that we got the correct classifiers dict back
self.assertEqual(first, second)
class AIGradingWorkflowTest(CacheResetTest):
"""
Tests for the AIGradingWorkflow model.
"""
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"test"
ITEM_ID = u"test"
ALGORITHM_ID = "test"
def setUp(self):
"""
Create a new grading workflow.
"""
self.rubric = rubric_from_dict(RUBRIC)
self.workflow = AIGradingWorkflow.objects.create(
submission_uuid='test', essay_text='test',
rubric=self.rubric, algorithm_id=self.ALGORITHM_ID,
item_id=self.ITEM_ID, course_id=self.COURSE_ID
)
# Create a rubric with a similar structure, but different prompt
similar_rubric_dict = copy.deepcopy(RUBRIC)
similar_rubric_dict['prompts'] = [{"description": 'Different prompt!'}]
self.similar_rubric = rubric_from_dict(similar_rubric_dict)
def test_assign_most_recent_classifier_set(self):
# No classifier sets are available
found = self.workflow.assign_most_recent_classifier_set()
self.assertFalse(found)
self.assertIs(self.workflow.classifier_set, None)
# Same rubric (exact), but different course id
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
"different course!", self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric (exact) but different item id
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
self.COURSE_ID, "different item!"
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric (exact), but different algorithm id
# Shouldn't change, since the algorithm ID doesn't match
AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, "different algorithm!",
self.COURSE_ID, self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric *structure*, but in a different item
# Shouldn't change, since the rubric isn't an exact match.
AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.similar_rubric, self.ALGORITHM_ID,
self.COURSE_ID, "different item!"
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric *structure* AND in the same course/item
# This should replace our current classifier set
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.similar_rubric, self.ALGORITHM_ID,
self.COURSE_ID, self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric and same course/item
# This is the ideal, so we should always prefer it
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
self.COURSE_ID, self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# -*- coding: utf-8 -*-
"""
Tests for AI worker API calls.
"""
import copy
import datetime
from uuid import uuid4
import mock
from django.db import DatabaseError
from django.core.files.base import ContentFile
from submissions import api as sub_api
from openassessment.test_utils import CacheResetTest
from openassessment.assessment.api import ai_worker as ai_worker_api
from openassessment.assessment.models import (
AITrainingWorkflow, AIGradingWorkflow,
AIClassifier, AIClassifierSet, Assessment
)
from openassessment.assessment.serializers import (
rubric_from_dict, deserialize_training_examples
)
from openassessment.assessment.errors import (
AITrainingRequestError, AITrainingInternalError,
AIGradingRequestError, AIGradingInternalError
)
from openassessment.assessment.test.constants import (
EXAMPLES, RUBRIC, STUDENT_ITEM, ANSWER
)
ALGORITHM_ID = "test-algorithm"
# Classifier data
# Since this is controlled by the AI algorithm implementation,
# we could put anything here as long as it's JSON-serializable.
CLASSIFIERS = {
u"vøȼȺƀᵾłȺɍɏ": {
'name': u'𝒕𝒆𝒔𝒕 𝒄𝒍𝒂𝒔𝒔𝒊𝒇𝒊𝒆𝒓',
'data': u'Öḧ ḷëẗ ẗḧë ṡüṅ ḅëäẗ ḋöẅṅ üṗöṅ ṁÿ ḟäċë, ṡẗäṛṡ ẗö ḟïḷḷ ṁÿ ḋṛëäṁ"'
},
u"ﻭɼค๓๓คɼ": {
'name': u'𝒕𝒆𝒔𝒕 𝒄𝒍𝒂𝒔𝒔𝒊𝒇𝒊𝒆𝒓',
'data': u"І ам а тѓаvэlэѓ оf ъотЂ тімэ аиↁ ѕрасэ, то ъэ шЂэѓэ І Ђаvэ ъээи"
}
}
class AIWorkerTrainingTest(CacheResetTest):
"""
Tests for the AI API calls a worker would make when
completing a training task.
"""
COURSE_ID = u"sämplë ċöürsë"
ITEM_ID = u"12231"
ALGORITHM_ID = "test-algorithm"
# Classifier data
# Since this is controlled by the AI algorithm implementation,
# we could put anything here as long as it's JSON-serializable.
CLASSIFIERS = {
u"vøȼȺƀᵾłȺɍɏ": {
'name': u'𝒕𝒆𝒔𝒕 𝒄𝒍𝒂𝒔𝒔𝒊𝒇𝒊𝒆𝒓',
'data': u'Öḧ ḷëẗ ẗḧë ṡüṅ ḅëäẗ ḋöẅṅ üṗöṅ ṁÿ ḟäċë, ṡẗäṛṡ ẗö ḟïḷḷ ṁÿ ḋṛëäṁ"'
},
u"ﻭɼค๓๓คɼ": {
'name': u'𝒕𝒆𝒔𝒕 𝒄𝒍𝒂𝒔𝒔𝒊𝒇𝒊𝒆𝒓',
'data': u"І ам а тѓаvэlэѓ оf ъотЂ тімэ аиↁ ѕрасэ, то ъэ шЂэѓэ І Ђаvэ ъээи"
}
}
def setUp(self):
"""
Create a training workflow in the database.
"""
examples = deserialize_training_examples(EXAMPLES, RUBRIC)
workflow = AITrainingWorkflow.start_workflow(examples, self.COURSE_ID, self.ITEM_ID, self.ALGORITHM_ID)
self.workflow_uuid = workflow.uuid
def test_get_training_task_params(self):
params = ai_worker_api.get_training_task_params(self.workflow_uuid)
expected_examples = [
{
'text': EXAMPLES[0]['answer'],
'scores': {
u"vøȼȺƀᵾłȺɍɏ": 1,
u"ﻭɼค๓๓คɼ": 0
}
},
{
'text': EXAMPLES[1]['answer'],
'scores': {
u"vøȼȺƀᵾłȺɍɏ": 0,
u"ﻭɼค๓๓คɼ": 2
}
},
]
self.assertItemsEqual(params['training_examples'], expected_examples)
self.assertItemsEqual(params['algorithm_id'], ALGORITHM_ID)
def test_get_training_task_params_no_workflow(self):
with self.assertRaises(AITrainingRequestError):
ai_worker_api.get_training_task_params("invalid_uuid")
@mock.patch.object(AITrainingWorkflow.objects, 'get')
def test_get_training_task_params_database_error(self, mock_get):
mock_get.side_effect = DatabaseError("KABOOM!")
with self.assertRaises(AITrainingInternalError):
ai_worker_api.get_training_task_params(self.workflow_uuid)
def test_create_classifiers(self):
ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)
# Expect that the workflow was marked complete
workflow = AITrainingWorkflow.objects.get(uuid=self.workflow_uuid)
self.assertIsNot(workflow.completed_at, None)
# Expect that the classifier set was created with the correct data
self.assertIsNot(workflow.classifier_set, None)
saved_classifiers = workflow.classifier_set.classifier_data_by_criterion
self.assertItemsEqual(CLASSIFIERS, saved_classifiers)
def test_create_classifiers_no_workflow(self):
with self.assertRaises(AITrainingRequestError):
ai_worker_api.create_classifiers("invalid_uuid", CLASSIFIERS)
@mock.patch.object(AITrainingWorkflow.objects, 'get')
def test_create_classifiers_database_error(self, mock_get):
mock_get.side_effect = DatabaseError("KABOOM!")
with self.assertRaises(AITrainingInternalError):
ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)
def test_create_classifiers_serialize_error(self):
# Mutate the classifier data so it is NOT JSON-serializable
classifiers = copy.deepcopy(CLASSIFIERS)
classifiers[u"vøȼȺƀᵾłȺɍɏ"] = datetime.datetime.now()
# Expect an error when we try to create the classifiers
with self.assertRaises(AITrainingInternalError):
ai_worker_api.create_classifiers(self.workflow_uuid, classifiers)
def test_create_classifiers_missing_criteria(self):
# Remove a criterion from the classifiers dict
classifiers = copy.deepcopy(CLASSIFIERS)
del classifiers[u"vøȼȺƀᵾłȺɍɏ"]
# Expect an error when we try to create the classifiers
with self.assertRaises(AITrainingRequestError):
ai_worker_api.create_classifiers(self.workflow_uuid, classifiers)
def test_create_classifiers_unrecognized_criterion(self):
# Add an extra criterion to the classifiers dict
classifiers = copy.deepcopy(CLASSIFIERS)
classifiers[u"extra_criterion"] = copy.deepcopy(classifiers[u"vøȼȺƀᵾłȺɍɏ"])
# Expect an error when we try to create the classifiers
with self.assertRaises(AITrainingRequestError):
ai_worker_api.create_classifiers(self.workflow_uuid, classifiers)
@mock.patch.object(AIClassifier, 'classifier_data')
def test_create_classifiers_upload_error(self, mock_data):
# Simulate an error occurring when uploading the trained classifier
mock_data.save.side_effect = IOError("OH NO!!!")
with self.assertRaises(AITrainingInternalError):
ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)
def test_create_classifiers_twice(self):
# Simulate repeated task execution for the same workflow
# Since these are executed sequentially, the second call should
# have no effect.
ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)
ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)
# Expect that the workflow was marked complete
workflow = AITrainingWorkflow.objects.get(uuid=self.workflow_uuid)
self.assertIsNot(workflow.completed_at, None)
# Expect that the classifier set was created with the correct data
self.assertIsNot(workflow.classifier_set, None)
saved_classifiers = workflow.classifier_set.classifier_data_by_criterion
self.assertItemsEqual(CLASSIFIERS, saved_classifiers)
def test_create_classifiers_no_training_examples(self):
# Create a workflow with no training examples
workflow = AITrainingWorkflow.objects.create(algorithm_id=ALGORITHM_ID)
# Expect an error when we try to create classifiers
with self.assertRaises(AITrainingInternalError):
ai_worker_api.create_classifiers(workflow.uuid, CLASSIFIERS)
def test_is_workflow_complete(self):
self.assertFalse(ai_worker_api.is_training_workflow_complete(self.workflow_uuid))
workflow = AITrainingWorkflow.objects.get(uuid=self.workflow_uuid)
workflow.mark_complete_and_save()
self.assertTrue(ai_worker_api.is_training_workflow_complete(self.workflow_uuid))
def test_is_workflow_complete_no_such_workflow(self):
with self.assertRaises(AITrainingRequestError):
ai_worker_api.is_training_workflow_complete('no such workflow')
@mock.patch.object(AITrainingWorkflow.objects, 'get')
def test_is_workflow_complete_database_error(self, mock_call):
mock_call.side_effect = DatabaseError("Oh no!")
with self.assertRaises(AITrainingInternalError):
ai_worker_api.is_training_workflow_complete(self.workflow_uuid)
class AIWorkerGradingTest(CacheResetTest):
"""
Tests for the AI API calls a worker would make when
completing a grading task.
"""
SCORES = {
u"vøȼȺƀᵾłȺɍɏ": 1,
u"ﻭɼค๓๓คɼ": 0
}
def setUp(self):
"""
Create a grading workflow in the database.
"""
# Create a submission
submission = sub_api.create_submission(STUDENT_ITEM, ANSWER)
self.submission_uuid = submission['uuid']
# Create a workflow for the submission
workflow = AIGradingWorkflow.start_workflow(self.submission_uuid, RUBRIC, ALGORITHM_ID)
self.workflow_uuid = workflow.uuid
# Associate the workflow with classifiers
rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set(
CLASSIFIERS, rubric, ALGORITHM_ID, STUDENT_ITEM.get('course_id'), STUDENT_ITEM.get('item_id')
)
workflow.classifier_set = classifier_set
workflow.save()
def test_get_grading_task_params(self):
params = ai_worker_api.get_grading_task_params(self.workflow_uuid)
expected_params = {
'essay_text': ANSWER,
'classifier_set': CLASSIFIERS,
'algorithm_id': ALGORITHM_ID,
'valid_scores': {
u"vøȼȺƀᵾłȺɍɏ": [0, 1, 2],
u"ﻭɼค๓๓คɼ": [0, 1, 2]
}
}
self.assertItemsEqual(params, expected_params)
def test_get_grading_task_params_num_queries(self):
with self.assertNumQueries(6):
ai_worker_api.get_grading_task_params(self.workflow_uuid)
# The second time through we should be caching the queries
# to determine the valid scores for a classifier
with self.assertNumQueries(2):
ai_worker_api.get_grading_task_params(self.workflow_uuid)
def test_get_grading_task_params_no_workflow(self):
with self.assertRaises(AIGradingRequestError):
ai_worker_api.get_grading_task_params("invalid_uuid")
def test_get_grading_task_params_no_classifiers(self):
# Remove the classifiers from the workflow
workflow = AIGradingWorkflow.objects.get(uuid=self.workflow_uuid)
workflow.classifier_set = None
workflow.save()
# Should get an error when retrieving task params
with self.assertRaises(AIGradingInternalError):
ai_worker_api.get_grading_task_params(self.workflow_uuid)
@mock.patch.object(AIGradingWorkflow.objects, 'get')
def test_get_grading_task_params_database_error(self, mock_call):
mock_call.side_effect = DatabaseError("KABOOM!")
with self.assertRaises(AIGradingInternalError):
ai_worker_api.get_grading_task_params(self.submission_uuid)
def test_invalid_classifier_data(self):
# Modify the classifier data so it is not valid JSON
invalid_json = "{"
for classifier in AIClassifier.objects.all():
classifier.classifier_data.save(uuid4().hex, ContentFile(invalid_json))
# Should get an error when retrieving task params
with self.assertRaises(AIGradingInternalError):
ai_worker_api.get_grading_task_params(self.workflow_uuid)
def test_create_assessment(self):
ai_worker_api.create_assessment(self.workflow_uuid, self.SCORES)
assessment = Assessment.objects.get(submission_uuid=self.submission_uuid)
self.assertEqual(assessment.points_earned, 1)
def test_create_assessment_no_workflow(self):
with self.assertRaises(AIGradingRequestError):
ai_worker_api.create_assessment("invalid_uuid", self.SCORES)
def test_create_assessment_workflow_already_complete(self):
# Try to create assessments for the same workflow multiple times
ai_worker_api.create_assessment(self.workflow_uuid, self.SCORES)
ai_worker_api.create_assessment(self.workflow_uuid, self.SCORES)
# Expect that only one assessment is created for the submission
num_assessments = Assessment.objects.filter(submission_uuid=self.submission_uuid).count()
self.assertEqual(num_assessments, 1)
@mock.patch.object(AIGradingWorkflow.objects, 'get')
def test_create_assessment_database_error_retrieving_workflow(self, mock_call):
mock_call.side_effect = DatabaseError("KABOOM!")
with self.assertRaises(AIGradingInternalError):
ai_worker_api.create_assessment(self.workflow_uuid, self.SCORES)
@mock.patch.object(Assessment.objects, 'create')
def test_create_assessment_database_error_complete_workflow(self, mock_call):
mock_call.side_effect = DatabaseError("KABOOM!")
with self.assertRaises(AIGradingInternalError):
ai_worker_api.create_assessment(self.workflow_uuid, self.SCORES)
def test_is_workflow_complete(self):
self.assertFalse(ai_worker_api.is_grading_workflow_complete(self.workflow_uuid))
workflow = AIGradingWorkflow.objects.get(uuid=self.workflow_uuid)
workflow.mark_complete_and_save()
self.assertTrue(ai_worker_api.is_grading_workflow_complete(self.workflow_uuid))
def test_is_workflow_complete_no_such_workflow(self):
with self.assertRaises(AIGradingRequestError):
ai_worker_api.is_grading_workflow_complete('no such workflow')
@mock.patch.object(AIGradingWorkflow.objects, 'get')
def test_is_workflow_complete_database_error(self, mock_call):
mock_call.side_effect = DatabaseError("Oh no!")
with self.assertRaises(AIGradingInternalError):
ai_worker_api.is_grading_workflow_complete(self.workflow_uuid)
# coding=utf-8
"""
Tests for AI worker tasks.
"""
from contextlib import contextmanager
import itertools
import mock
from django.test.utils import override_settings
from submissions import api as sub_api
from openassessment.test_utils import CacheResetTest
from openassessment.assessment.worker.training import train_classifiers, InvalidExample
from openassessment.assessment.worker.grading import grade_essay
from openassessment.assessment.api import ai_worker as ai_worker_api
from openassessment.assessment.models import AITrainingWorkflow, AIGradingWorkflow, AIClassifierSet
from openassessment.assessment.worker.algorithm import (
AIAlgorithm, UnknownAlgorithm, AlgorithmLoadError, TrainingError, ScoreError
)
from openassessment.assessment.serializers import (
deserialize_training_examples, rubric_from_dict
)
from openassessment.assessment.errors import (
AITrainingRequestError, AIGradingInternalError, AIGradingRequestError
)
from openassessment.assessment.test.constants import (
EXAMPLES, RUBRIC, STUDENT_ITEM, ANSWER
)
class StubAIAlgorithm(AIAlgorithm):
"""
Stub implementation of a supervised ML algorithm.
"""
def train_classifier(self, examples):
return {}
def score(self, text, classifier, cache):
return 0
class ErrorStubAIAlgorithm(AIAlgorithm):
"""
Stub implementation that raises an exception during training.
"""
def train_classifier(self, examples):
raise TrainingError("Test error!")
def score(self, text, classifier, cache):
raise ScoreError("Test error!")
class InvalidScoreAlgorithm(AIAlgorithm):
"""
Stub implementation that returns a score that isn't in the rubric.
"""
SCORE_CYCLE = itertools.cycle([-100, 0.7, 1.2, 100])
def train_classifier(self, examples):
return {}
def score(self, text, classifier, cache):
return self.SCORE_CYCLE.next()
ALGORITHM_ID = u"test-stub"
ERROR_STUB_ALGORITHM_ID = u"error-stub"
UNDEFINED_CLASS_ALGORITHM_ID = u"undefined_class"
UNDEFINED_MODULE_ALGORITHM_ID = u"undefined_module"
INVALID_SCORE_ALGORITHM_ID = u"invalid_score"
AI_ALGORITHMS = {
ALGORITHM_ID: '{module}.StubAIAlgorithm'.format(module=__name__),
ERROR_STUB_ALGORITHM_ID: '{module}.ErrorStubAIAlgorithm'.format(module=__name__),
UNDEFINED_CLASS_ALGORITHM_ID: '{module}.NotDefinedAIAlgorithm'.format(module=__name__),
UNDEFINED_MODULE_ALGORITHM_ID: 'openassessment.not.valid.NotDefinedAIAlgorithm',
INVALID_SCORE_ALGORITHM_ID: '{module}.InvalidScoreAlgorithm'.format(module=__name__),
}
class CeleryTaskTest(CacheResetTest):
"""
Test case for Celery tasks.
"""
@contextmanager
def assert_retry(self, task, final_exception):
"""
Context manager that asserts that the training task was retried.
Args:
task (celery.app.task.Task): The Celery task object.
final_exception (Exception): The error thrown after retrying.
Raises:
AssertionError
"""
original_retry = task.retry
task.retry = mock.MagicMock()
task.retry.side_effect = lambda: original_retry(task)
try:
with self.assertRaises(final_exception):
yield
task.retry.assert_called_once()
finally:
task.retry = original_retry
class AITrainingTaskTest(CeleryTaskTest):
"""
Tests for the training task executed asynchronously by Celery workers.
"""
COURSE_ID = u"10923"
ITEM_ID = u"12231"
ALGORITHM_ID = u"test-stub"
ERROR_STUB_ALGORITHM_ID = u"error-stub"
UNDEFINED_CLASS_ALGORITHM_ID = u"undefined_class"
UNDEFINED_MODULE_ALGORITHM_ID = u"undefined_module"
AI_ALGORITHMS = {
ALGORITHM_ID: '{module}.StubAIAlgorithm'.format(module=__name__),
ERROR_STUB_ALGORITHM_ID: '{module}.ErrorStubAIAlgorithm'.format(module=__name__),
UNDEFINED_CLASS_ALGORITHM_ID: '{module}.NotDefinedAIAlgorithm'.format(module=__name__),
UNDEFINED_MODULE_ALGORITHM_ID: 'openassessment.not.valid.NotDefinedAIAlgorithm'
}
def setUp(self):
"""
Create a training workflow in the database.
"""
examples = deserialize_training_examples(EXAMPLES, RUBRIC)
workflow = AITrainingWorkflow.start_workflow(examples, self.COURSE_ID, self.ITEM_ID, self.ALGORITHM_ID)
self.workflow_uuid = workflow.uuid
def test_unknown_algorithm(self):
# Since we haven't overridden settings to configure the algorithms,
# the worker will not recognize the workflow's algorithm ID.
with self.assert_retry(train_classifiers, UnknownAlgorithm):
train_classifiers(self.workflow_uuid)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_skip_completed_workflow(self):
# Mark the grading workflow as complete
workflow = AITrainingWorkflow.objects.get(uuid=self.workflow_uuid)
workflow.mark_complete_and_save()
# The training task should short-circuit immediately, skipping calls
# to get parameters for the task.
actual_call = ai_worker_api.get_training_task_params
patched = 'openassessment.assessment.worker.grading.ai_worker_api.get_training_task_params'
with mock.patch(patched) as mock_call:
mock_call.side_effect = actual_call
train_classifiers(self.workflow_uuid)
self.assertFalse(mock_call.called)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_check_complete_error(self):
with self.assert_retry(train_classifiers, AITrainingRequestError):
train_classifiers("no such workflow uuid")
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_unable_to_load_algorithm_class(self):
# The algorithm is defined in the settings, but the class does not exist.
self._set_algorithm_id(UNDEFINED_CLASS_ALGORITHM_ID)
with self.assert_retry(train_classifiers, AlgorithmLoadError):
train_classifiers(self.workflow_uuid)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_unable_to_find_algorithm_module(self):
# The algorithm is defined in the settings, but the module can't be loaded
self._set_algorithm_id(UNDEFINED_MODULE_ALGORITHM_ID)
with self.assert_retry(train_classifiers, AlgorithmLoadError):
train_classifiers(self.workflow_uuid)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@mock.patch('openassessment.assessment.worker.training.ai_worker_api.get_training_task_params')
def test_get_training_task_params_api_error(self, mock_call):
mock_call.side_effect = AITrainingRequestError("Test error!")
with self.assert_retry(train_classifiers, AITrainingRequestError):
train_classifiers(self.workflow_uuid)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_invalid_training_example_error(self):
def _mutation(examples): # pylint: disable=C0111
del examples[0]['scores'][u"ﻭɼค๓๓คɼ"]
self._assert_mutated_examples(_mutation)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_training_example_missing_key(self):
def _mutation(examples): # pylint: disable=C0111
del examples[0]['scores']
self._assert_mutated_examples(_mutation)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_training_example_non_numeric_score(self):
def _mutation(examples): # pylint: disable=C0111
examples[0]['scores'][u"ﻭɼค๓๓คɼ"] = "not an integer"
self._assert_mutated_examples(_mutation)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_training_algorithm_error(self):
# Use a stub algorithm implementation that raises an exception during training
self._set_algorithm_id(ERROR_STUB_ALGORITHM_ID)
with self.assert_retry(train_classifiers, TrainingError):
train_classifiers(self.workflow_uuid)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@mock.patch('openassessment.assessment.worker.training.ai_worker_api.create_classifiers')
def test_create_classifiers_api_error(self, mock_call):
mock_call.side_effect = AITrainingRequestError("Test error!")
with self.assert_retry(train_classifiers, AITrainingRequestError):
train_classifiers(self.workflow_uuid)
def _set_algorithm_id(self, algorithm_id):
"""
Override the default algorithm ID for the training workflow.
Args:
algorithm_id (unicode): The new algorithm ID
Returns:
None
"""
workflow = AITrainingWorkflow.objects.get(uuid=self.workflow_uuid)
workflow.algorithm_id = algorithm_id
workflow.save()
def _assert_mutated_examples(self, mutate_func):
"""
Mutate the training examples returned by the API,
then check that we get the expected error.
This *may* be a little paranoid :)
Args:
mutate_func (callable): Function that accepts a single argument,
the list of example dictionaries.
Raises:
AssertionError
"""
params = ai_worker_api.get_training_task_params(self.workflow_uuid)
mutate_func(params['training_examples'])
call_signature = 'openassessment.assessment.worker.training.ai_worker_api.get_training_task_params'
with mock.patch(call_signature) as mock_call:
mock_call.return_value = params
with self.assert_retry(train_classifiers, InvalidExample):
train_classifiers(self.workflow_uuid)
class AIGradingTaskTest(CeleryTaskTest):
"""
Tests for the grading task executed asynchronously by Celery workers.
"""
# Classifier data
# Since this is controlled by the AI algorithm implementation,
# we could put anything here as long as it's JSON-serializable.
CLASSIFIERS = {
u"vøȼȺƀᵾłȺɍɏ": {
'name': u'𝒕𝒆𝒔𝒕 𝒄𝒍𝒂𝒔𝒔𝒊𝒇𝒊𝒆𝒓',
'data': u'Öḧ ḷëẗ ẗḧë ṡüṅ ḅëäẗ ḋöẅṅ üṗöṅ ṁÿ ḟäċë, ṡẗäṛṡ ẗö ḟïḷḷ ṁÿ ḋṛëäṁ"'
},
u"ﻭɼค๓๓คɼ": {
'name': u'𝒕𝒆𝒔𝒕 𝒄𝒍𝒂𝒔𝒔𝒊𝒇𝒊𝒆𝒓',
'data': u"І ам а тѓаvэlэѓ оf ъотЂ тімэ аиↁ ѕрасэ, то ъэ шЂэѓэ І Ђаvэ ъээи"
}
}
def setUp(self):
"""
Create a submission and grading workflow.
"""
# Create a submission
submission = sub_api.create_submission(STUDENT_ITEM, ANSWER)
self.submission_uuid = submission['uuid']
# Create a workflow for the submission
workflow = AIGradingWorkflow.start_workflow(self.submission_uuid, RUBRIC, ALGORITHM_ID)
self.workflow_uuid = workflow.uuid
# Associate the workflow with classifiers
rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS, rubric, ALGORITHM_ID, STUDENT_ITEM.get('course_id'), STUDENT_ITEM.get('item_id')
)
workflow.classifier_set = classifier_set
workflow.save()
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_skip_completed_workflow(self):
# Mark the grading workflow as complete
workflow = AIGradingWorkflow.objects.get(uuid=self.workflow_uuid)
workflow.mark_complete_and_save()
# The grading task should short-circuit immediately, skipping calls
# to get parameters for the task.
actual_call = ai_worker_api.get_grading_task_params
patched = 'openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params'
with mock.patch(patched) as mock_call:
mock_call.side_effect = actual_call
grade_essay(self.workflow_uuid)
self.assertFalse(mock_call.called)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_check_complete_error(self):
with self.assert_retry(grade_essay, AIGradingRequestError):
grade_essay("no such workflow uuid")
@mock.patch('openassessment.assessment.api.ai_worker.create_assessment')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_algorithm_gives_invalid_score(self, mock_create_assessment):
# If an algorithm provides a score that isn't in the rubric,
# we should choose the closest valid score.
self._set_algorithm_id(INVALID_SCORE_ALGORITHM_ID)
# The first score given by the algorithm should be below the minimum valid score
# The second score will be between two valid scores (0 and 1), rounding up
grade_essay(self.workflow_uuid)
expected_scores = {
u"vøȼȺƀᵾłȺɍɏ": 0,
u"ﻭɼค๓๓คɼ": 1
}
mock_create_assessment.assert_called_with(self.workflow_uuid, expected_scores)
# The third score will be between two valid scores (1 and 2), rounding down
# The final score will be greater than the maximum score
self._reset_workflow()
grade_essay(self.workflow_uuid)
expected_scores = {
u"vøȼȺƀᵾłȺɍɏ": 1,
u"ﻭɼค๓๓คɼ": 2
}
mock_create_assessment.assert_called_with(self.workflow_uuid, expected_scores)
@mock.patch('openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_retrieve_params_error(self, mock_call):
mock_call.side_effect = AIGradingInternalError("Test error")
with self.assert_retry(grade_essay, AIGradingInternalError):
grade_essay(self.workflow_uuid)
def test_unknown_algorithm_id_error(self):
# Since we're not overriding settings, the algorithm ID won't be recognized
with self.assert_retry(grade_essay, UnknownAlgorithm):
grade_essay(self.workflow_uuid)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_algorithm_score_error(self):
self._set_algorithm_id(ERROR_STUB_ALGORITHM_ID)
with self.assert_retry(grade_essay, ScoreError):
grade_essay(self.workflow_uuid)
@mock.patch('openassessment.assessment.worker.grading.ai_worker_api.create_assessment')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_create_assessment_error(self, mock_call):
mock_call.side_effect = AIGradingInternalError
with self.assert_retry(grade_essay, AIGradingInternalError):
grade_essay(self.workflow_uuid)
@mock.patch('openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_params_missing_criterion_for_valid_scores(self, mock_call):
mock_call.return_value = {
'essay_text': 'test',
'classifier_set': {
u"vøȼȺƀᵾłȺɍɏ": {},
u"ﻭɼค๓๓คɼ": {}
},
'algorithm_id': ALGORITHM_ID,
'valid_scores': {}
}
with self.assert_retry(grade_essay, AIGradingInternalError):
grade_essay(self.workflow_uuid)
@mock.patch('openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_params_valid_scores_empty_list(self, mock_call):
mock_call.return_value = {
'essay_text': 'test',
'classifier_set': {
u"vøȼȺƀᵾłȺɍɏ": {},
u"ﻭɼค๓๓คɼ": {}
},
'algorithm_id': ALGORITHM_ID,
'valid_scores': {
u"vøȼȺƀᵾłȺɍɏ": [],
u"ﻭɼค๓๓คɼ": [0, 1, 2]
}
}
with self.assert_retry(grade_essay, AIGradingInternalError):
grade_essay(self.workflow_uuid)
def _set_algorithm_id(self, algorithm_id):
"""
Override the default algorithm ID for the grading workflow.
Args:
algorithm_id (unicode): The new algorithm ID
Returns:
None
"""
workflow = AIGradingWorkflow.objects.get(uuid=self.workflow_uuid)
workflow.algorithm_id = algorithm_id
workflow.save()
def _reset_workflow(self):
"""
Reset the workflow so we can re-use it.
"""
workflow = AIGradingWorkflow.objects.get(uuid=self.workflow_uuid)
workflow.completed_at = None
workflow.assessment = None
workflow.save()
"""
Define the ML algorithms used to train text classifiers.
"""
try:
import cPickle as pickle
except ImportError:
import pickle
from abc import ABCMeta, abstractmethod
from collections import namedtuple
import importlib
import traceback
import base64
from django.conf import settings
DEFAULT_AI_ALGORITHMS = {
'fake': 'openassessment.assessment.worker.algorithm.FakeAIAlgorithm',
'ease': 'openassessment.assessment.worker.algorithm.EaseAIAlgorithm'
}
class AIAlgorithmError(Exception):
"""
An error occurred when using an AI algorithm.
Superclass for more specific errors below.
"""
pass
class UnknownAlgorithm(AIAlgorithmError):
"""
Algorithm ID not found in the configuration.
"""
def __init__(self, algorithm_id):
msg = u"Could not find algorithm \"{}\" in the configuration.".format(algorithm_id)
super(UnknownAlgorithm, self).__init__(msg)
class AlgorithmLoadError(AIAlgorithmError):
"""
Unable to load the algorithm class.
"""
def __init__(self, algorithm_id, algorithm_path):
msg = (
u"Could not load algorithm \"{algorithm_id}\" from \"{path}\""
).format(algorithm_id=algorithm_id, path=algorithm_path)
super(AlgorithmLoadError, self).__init__(msg)
class TrainingError(AIAlgorithmError):
"""
An error occurred while training a classifier from example essays.
"""
pass
class ScoreError(AIAlgorithmError):
"""
An error occurred while scoring an essay.
"""
pass
class InvalidClassifier(ScoreError):
"""
The classifier could not be used by this algorithm to score an essay.
"""
pass
class AIAlgorithm(object):
"""
Abstract base class for a supervised ML text classification algorithm.
"""
__metaclass__ = ABCMeta
# Example essay used as input to the training algorithm
# `text` is a unicode string representing a student essay submission.
# `score` is an integer score.
# Note that `score` is used as an arbitrary label, so you could
# have a set of examples with non-adjacent scores.
ExampleEssay = namedtuple('ExampleEssay', ['text', 'score'])
@abstractmethod
def train_classifier(self, examples):
"""
Train a classifier based on example essays and scores.
Args:
examples (list of AIAlgorithm.ExampleEssay): Example essays and scores.
Returns:
JSON-serializable: The trained classifier. This MUST be JSON-serializable.
Raises:
TrainingError: The classifier could not be trained successfully.
"""
pass
@abstractmethod
def score(self, text, classifier, cache):
"""
Score an essay using a classifier.
Args:
text (unicode): The text to classify.
classifier (JSON-serializable): A classifier, using the same format
as `train_classifier()`.
cache (dict): An in-memory cache that persists until all criteria
in the rubric have been scored.
Raises:
InvalidClassifier: The provided classifier cannot be used by this algorithm.
ScoreError: An error occurred while scoring.
"""
pass
@classmethod
def algorithm_for_id(cls, algorithm_id):
"""
Load an algorithm based on Django settings configuration.
Args:
algorithm_id (unicode): The identifier for the algorithm,
which should be specified in Django settings.
Returns:
AIAlgorithm
Raises:
UnknownAlgorithm
"""
algorithms = getattr(settings, "ORA2_AI_ALGORITHMS", DEFAULT_AI_ALGORITHMS)
cls_path = algorithms.get(algorithm_id)
if cls_path is None:
raise UnknownAlgorithm(algorithm_id)
else:
module_path, _, name = cls_path.rpartition('.')
try:
algorithm_cls = getattr(importlib.import_module(module_path), name)
return algorithm_cls()
except (ImportError, ValueError, AttributeError):
raise AlgorithmLoadError(algorithm_id, cls_path)
class FakeAIAlgorithm(AIAlgorithm):
"""
Fake AI algorithm implementation that assigns scores randomly.
We use this for testing the pipeline independently of EASE.
"""
def train_classifier(self, examples):
"""
Store the possible score labels, which will allow
us to deterministically choose scores for other essays.
"""
unique_sorted_scores = sorted(list(set(example.score for example in examples)))
return {'scores': unique_sorted_scores}
def score(self, text, classifier, cache):
"""
Choose a score for the essay deterministically based on its length.
"""
if 'scores' not in classifier or len(classifier['scores']) == 0:
raise InvalidClassifier("Classifier must provide score labels")
else:
score_index = len(text) % len(classifier['scores'])
return classifier['scores'][score_index]
class EaseAIAlgorithm(AIAlgorithm):
"""
Wrapper for the EASE library.
See https://github.com/edx/ease for more information.
Since EASE has many system dependencies, we don't include it explicitly
in edx-ora2 requirements. When testing locally, we use the fake
algorithm implementation instead.
"""
def train_classifier(self, examples):
"""
Train a text classifier using the EASE library.
The classifier is serialized as a dictionary with keys:
* 'feature_extractor': The pickled feature extractor (transforms text into a numeric feature vector).
* 'score_classifier': The pickled classifier (uses the feature vector to assign scores to essays).
Because we are using `pickle`, the serialized classifiers are unfortunately
tied to the particular version of ease/scikit-learn/numpy/scipy/nltk that we
have installed at the time of training.
Args:
examples (list of AIAlgorithm.ExampleEssay): Example essays and scores.
Returns:
dict: The serializable classifier.
Raises:
TrainingError: The classifier could not be trained successfully.
"""
feature_ext, classifier = self._train_classifiers(examples)
return self._serialize_classifiers(feature_ext, classifier)
def score(self, text, classifier, cache):
"""
Score essays using EASE.
Args:
text (unicode): The essay text to score.
classifier (dict): The serialized classifiers created during training.
cache (dict): An in-memory cache that persists until all criteria
in the rubric have been scored.
Returns:
int
Raises:
InvalidClassifier
ScoreError
"""
try:
from ease.essay_set import EssaySet # pylint:disable=F0401
except ImportError:
msg = u"Could not import EASE to grade essays."
raise ScoreError(msg)
feature_extractor, score_classifier = self._deserialize_classifiers(classifier)
# The following is a modified version of `ease.grade.grade()`,
# skipping things we don't use (cross-validation, feedback)
# and caching essay sets across criteria. This allows us to
# avoid some expensive NLTK operations, particularly tagging
# parts of speech.
try:
# Get the essay set from the cache or create it.
# Since all essays to be graded are assigned a dummy
# score of "0", we can safely re-use the essay set
# for each criterion in the rubric.
# EASE can't handle non-ASCII unicode, so we need
# to strip out non-ASCII chars.
essay_set = cache.get('grading_essay_set')
if essay_set is None:
essay_set = EssaySet(essaytype="test")
essay_set.add_essay(text.encode('ascii', 'ignore'), 0)
cache['grading_essay_set'] = essay_set
# Extract features from the text
features = feature_extractor.gen_feats(essay_set)
# Predict a score
return int(score_classifier.predict(features)[0])
except:
msg = (
u"An unexpected error occurred while using "
u"EASE to score an essay: {traceback}"
).format(traceback=traceback.format_exc())
raise ScoreError(msg)
def _train_classifiers(self, examples):
"""
Use EASE to train classifiers.
Args:
examples (list of AIAlgorithm.ExampleEssay): Example essays and scores.
Returns:
tuple of `feature_extractor` (an `ease.feature_extractor.FeatureExtractor` object)
and `classifier` (a `sklearn.ensemble.GradientBoostingClassifier` object).
Raises:
TrainingError: Could not load EASE or could not complete training.
"""
try:
from ease.create import create # pylint: disable=F0401
except ImportError:
msg = u"Could not import EASE to perform training."
raise TrainingError(msg)
input_essays = [example.text for example in examples]
input_scores = [example.score for example in examples]
try:
# Train the classifiers
# The third argument is the essay prompt, which EASE uses
# to check if an input essay is too similar to the prompt.
# Since we're not using this feature, we pass in an empty string.
results = create(input_essays, input_scores, "")
except:
msg = (
u"An unexpected error occurred while using "
u"EASE to train classifiers: {traceback}"
).format(traceback=traceback.format_exc())
raise TrainingError(msg)
if not results.get('success', False):
msg = (
u"Errors occurred while training classifiers "
u"using EASE: {errors}"
).format(errors=results.get('errors', []))
raise TrainingError(msg)
return results.get('feature_ext'), results.get('classifier')
def _serialize_classifiers(self, feature_ext, classifier):
"""
Serialize the classifier objects.
Args:
feature_extractor (ease.feature_extractor.FeatureExtractor)
classifier (sklearn.ensemble.GradientBoostingClassifier)
Returns:
dict containing the pickled classifiers
Raises:
TrainingError: Could not serialize the classifiers.
"""
try:
return {
'feature_extractor': base64.b64encode(pickle.dumps(feature_ext)),
'score_classifier': base64.b64encode(pickle.dumps(classifier)),
}
except Exception as ex:
msg = (
u"An error occurred while serializing the classifiers "
u"created by EASE: {ex}"
).format(ex=ex)
raise TrainingError(msg)
def _deserialize_classifiers(self, classifier_data):
"""
Deserialize the classifier objects.
Args:
classifier_data (dict): The serialized classifiers.
Returns:
tuple of `(feature_extractor, score_classifier)`
Raises:
InvalidClassifier
"""
if not isinstance(classifier_data, dict):
raise InvalidClassifier("Classifier must be a dictionary.")
try:
classifier_str = classifier_data.get('feature_extractor').encode('utf-8')
feature_extractor = pickle.loads(base64.b64decode(classifier_str))
except Exception as ex:
msg = (
u"An error occurred while deserializing the "
u"EASE feature extractor: {ex}"
).format(ex=ex)
raise InvalidClassifier(msg)
try:
score_classifier_str = classifier_data.get('score_classifier').encode('utf-8')
score_classifier = pickle.loads(base64.b64decode(score_classifier_str))
except Exception as ex:
msg = (
u"An error occurred while deserializing the "
u"EASE score classifier: {ex}"
).format(ex=ex)
raise InvalidClassifier(msg)
return feature_extractor, score_classifier
"""
Asynchronous tasks for grading essays using text classifiers.
"""
import datetime
from celery import task
from django.db import DatabaseError
from django.conf import settings
from celery.utils.log import get_task_logger
from dogapi import dog_stats_api
from openassessment.assessment.api import ai_worker as ai_worker_api
from openassessment.assessment.errors import (
AIError, AIGradingInternalError, AIReschedulingInternalError, ANTICIPATED_CELERY_ERRORS
)
from .algorithm import AIAlgorithm, AIAlgorithmError
from openassessment.assessment.models.ai import AIGradingWorkflow
MAX_RETRIES = 2
logger = get_task_logger(__name__)
# If the Django settings define a low-priority queue, use that.
# Otherwise, use the default queue.
RESCHEDULE_TASK_QUEUE = getattr(settings, 'LOW_PRIORITY_QUEUE', None)
@task(max_retries=MAX_RETRIES) # pylint: disable=E1102
@dog_stats_api.timed('openassessment.assessment.ai.grade_essay.time')
def grade_essay(workflow_uuid):
"""
Asynchronous task to grade an essay using a text classifier
(trained using a supervised ML algorithm).
If the task could not be completed successfully,
it will be retried a few times; if it continues to fail,
it is left incomplete. Incomplate tasks can be rescheduled
manually through the AI API.
Args:
workflow_uuid (str): The UUID of the workflow associated
with this grading task.
Returns:
None
Raises:
AIError: An error occurred while making an AI worker API call.
AIAlgorithmError: An error occurred while retrieving or using an AI algorithm.
"""
# Short-circuit if the workflow is already marked complete
# This is an optimization, but grading tasks could still
# execute multiple times depending on when they get picked
# up by workers and marked complete.
try:
if ai_worker_api.is_grading_workflow_complete(workflow_uuid):
return
except AIError:
msg = (
u"An unexpected error occurred while checking the "
u"completion of grading workflow with UUID {uuid}"
).format(uuid=workflow_uuid)
logger.exception(msg)
raise grade_essay.retry()
# Retrieve the task parameters
try:
params = ai_worker_api.get_grading_task_params(workflow_uuid)
essay_text = params['essay_text']
classifier_set = params['classifier_set']
algorithm_id = params['algorithm_id']
valid_scores = params['valid_scores']
except (AIError, KeyError):
msg = (
u"An error occurred while retrieving the AI grading task "
u"parameters for the workflow with UUID {}"
).format(workflow_uuid)
logger.exception(msg)
raise grade_essay.retry()
# Validate that the we have valid scores for each criterion
for criterion_name in classifier_set.keys():
msg = None
if criterion_name not in valid_scores:
msg = (
u"Could not find {criterion} in the list of valid scores "
u"for grading workflow with UUID {uuid}"
).format(criterion=criterion_name, uuid=workflow_uuid)
elif len(valid_scores[criterion_name]) == 0:
msg = (
u"Valid scores for {criterion} is empty for "
u"grading workflow with UUID {uuid}"
).format(criterion=criterion_name, uuid=workflow_uuid)
if msg:
logger.exception(msg)
raise AIGradingInternalError(msg)
# Retrieve the AI algorithm
try:
algorithm = AIAlgorithm.algorithm_for_id(algorithm_id)
except AIAlgorithmError:
msg = (
u"An error occurred while retrieving "
u"the algorithm ID (grading workflow UUID {})"
).format(workflow_uuid)
logger.exception(msg)
raise grade_essay.retry()
# Use the algorithm to evaluate the essay for each criterion
# Provide an in-memory cache so the algorithm can re-use
# results for multiple rubric criteria.
try:
cache = dict()
scores_by_criterion = {
criterion_name: _closest_valid_score(
algorithm.score(essay_text, classifier, cache),
valid_scores[criterion_name]
)
for criterion_name, classifier in classifier_set.iteritems()
}
except AIAlgorithmError:
msg = (
u"An error occurred while scoring essays using "
u"an AI algorithm (worker workflow UUID {})"
).format(workflow_uuid)
logger.exception(msg)
raise grade_essay.retry()
# Create the assessment and mark the workflow complete
try:
ai_worker_api.create_assessment(workflow_uuid, scores_by_criterion)
except AIError:
msg = (
u"An error occurred while creating assessments "
u"for the AI grading workflow with UUID {uuid}. "
u"The assessment scores were: {scores}"
).format(uuid=workflow_uuid, scores=scores_by_criterion)
logger.exception(msg)
raise grade_essay.retry()
@task(queue=RESCHEDULE_TASK_QUEUE, max_retries=MAX_RETRIES) # pylint: disable=E1102
@dog_stats_api.timed('openassessment.assessment.ai.reschedule_grading_tasks.time')
def reschedule_grading_tasks(course_id, item_id):
"""
Reschedules all incomplete grading workflows with the specified parameters.
Args:
course_id (unicode): The course item that we will be rerunning the rescheduling on.
item_id (unicode): The item that the rescheduling will be running on
Raises:
AIReschedulingInternalError
AIGradingInternalError
"""
# Logs the start of the rescheduling process and records the start time so that total time can be calculated later.
_log_start_reschedule_grading(course_id=course_id, item_id=item_id)
start_time = datetime.datetime.now()
# Finds all incomplete grading workflows
try:
grading_workflows = AIGradingWorkflow.get_incomplete_workflows(course_id, item_id)
except (DatabaseError, AIGradingWorkflow.DoesNotExist) as ex:
msg = (
u"An unexpected error occurred while retrieving all incomplete "
u"grading tasks for course_id: {cid} and item_id: {iid}: {ex}"
).format(cid=course_id, iid=item_id, ex=ex)
logger.exception(msg)
raise AIReschedulingInternalError(msg)
# Notes whether or not one or more operations failed. If they did, the process of rescheduling will be retried.
failures = 0
# A dictionary mapping tuples of (rubric, algorithm_id) to completed classifier sets. Used to avoid repeated
# queries which will return the same value. This loop implements a memoization of the the query.
maintained_classifiers = {}
# Try to grade all incomplete grading workflows
for workflow in grading_workflows:
# We will always go through the process of finding the most recent set of classifiers for an
# incomplete grading workflow. The rationale for this is that if we are ever rescheduling
# grading, we likely had classifiers which were not working. This way, we always take the last
# completed set.
# Note that this solution will lead to failure if "Train Classifiers" and "Refinish Grading Tasks"
# are called in rapid succession. This is part of the reason this button is in the admin view.
# Tries to find a set of classifiers that are already defined in our maintained_classifiers based on a
# description of the workflow in the form of a tuple (rubric, course_id, item_id, algorithm_id)
workflow_description = (workflow.rubric, course_id, item_id, workflow.algorithm_id)
found_classifiers = maintained_classifiers.get(workflow_description)
# If no set of classifiers is found, we perform the query to try to find them. We take the most recent
# and add it to our dictionary of maintained classifiers for future reference.
if found_classifiers is None:
try:
found = workflow.assign_most_recent_classifier_set()
if found:
found_classifiers = workflow.classifier_set
maintained_classifiers[workflow_description] = found_classifiers
else:
msg = u"No applicable classifiers yet exist for essay with uuid='{}'".format(workflow.uuid)
logger.log(msg)
except DatabaseError as ex:
msg = (
u"A Database error occurred while trying to assign classifiers to an essay with uuid='{id}'"
).format(id=workflow.uuid)
logger.exception(msg)
# If we found classifiers in our memoized lookup dictionary, we assign them and save.
else:
workflow.classifier_set = found_classifiers
try:
workflow.save()
logger.info(
u"Classifiers were successfully assigned to grading workflow with uuid={}".format(workflow.uuid)
)
except DatabaseError as ex:
msg = (
u"A Database error occurred while trying to save classifiers to an essay with uuid='{id}'"
).format(id=workflow.uuid)
logger.exception(msg)
if found_classifiers is not None:
# Now we should (unless we had an exception above) have a classifier set.
# Try to schedule the grading
try:
grade_essay.apply_async(args=[workflow.uuid])
logger.info(
u"Rescheduling of grading was successful for grading workflow with uuid='{}'".format(workflow.uuid)
)
except ANTICIPATED_CELERY_ERRORS as ex:
msg = (
u"An error occurred while try to grade essay with uuid='{id}': {ex}"
).format(id=workflow.uuid, ex=ex)
logger.exception(msg)
failures += 1
# If we couldn't assign classifiers, we failed.
else:
failures += 1
# Logs the data from our rescheduling attempt
time_delta = datetime.datetime.now() - start_time
_log_complete_reschedule_grading(
course_id=course_id, item_id=item_id, seconds=time_delta.total_seconds(), success=(failures == 0)
)
# If one or more of these failed, we want to retry rescheduling. Note that this retry is executed in such a way
# that if it fails, an AIGradingInternalError will be raised with the number of failures on the last attempt (i.e.
# the total number of workflows matching these critera that still have left to be graded).
if failures > 0:
try:
raise AIGradingInternalError(
u"In an attempt to reschedule grading workflows, there were {} failures.".format(failures)
)
except AIGradingInternalError as ex:
raise reschedule_grading_tasks.retry()
def _closest_valid_score(score, valid_scores):
"""
Return the closest valid score for a given score.
This is necessary, since rubric scores may be non-contiguous.
Args:
score (int or float): The score assigned by the algorithm.
valid_scores (list of int): Valid scores for this criterion,
assumed to be sorted in ascending order.
Returns:
int
"""
# If the score is already valid, return it
if score in valid_scores:
return score
# Otherwise, find the closest score in the list.
closest = valid_scores[0]
delta = abs(score - closest)
for valid in valid_scores[1:]:
new_delta = abs(score - valid)
if new_delta < delta:
closest = valid
delta = new_delta
return closest
def _log_start_reschedule_grading(course_id=None, item_id=None):
"""
Sends data about the rescheduling_grading task to datadog
Args:
course_id (unicode): the course id to associate with the log start
item_id (unicode): the item id to tag with the log start
"""
tags = [
u"course_id:{}".format(course_id),
u"item_id:{}".format(item_id),
]
dog_stats_api.increment('openassessment.assessment.ai_task.AIRescheduleGrading.scheduled_count', tags=tags)
msg = u"Rescheduling of incomplete grading tasks began for course_id={cid} and item_id={iid}"
logger.info(msg.format(cid=course_id, iid=item_id))
def _log_complete_reschedule_grading(course_id=None, item_id=None, seconds=-1, success=False):
"""
Sends the total time the rescheduling of grading tasks took to datadog
(Just the time taken to reschedule tasks, not the time nescessary to complete them)
Note that this function may be invoked multiple times per call to reschedule_grading_tasks,
because the time for EACH ATTEMPT is taken (i.e. if we fail (by error) to schedule grading once,
we log the time elapsed before trying again.)
Args:
course_id (unicode): the course_id to tag the task with
item_id (unicode): the item_id to tag the task with
seconds (int): the number of seconds that elapsed during the rescheduling task.
success (bool): indicates whether or not all attempts to reschedule were successful
"""
tags = [
u"course_id:{}".format(course_id),
u"item_id:{}".format(item_id),
u"success:{}".format(success)
]
dog_stats_api.histogram('openassessment.assessment.ai_task.AIRescheduleGrading.turnaround_time', seconds, tags=tags)
dog_stats_api.increment('openassessment.assessment.ai_task.AIRescheduleGrading.completed_count', tags=tags)
msg = u"Rescheduling of incomplete grading tasks for course_id={cid} and item_id={iid} completed in {s} seconds."
if not success:
msg += u" At least one grading task failed due to internal error."
msg.format(cid=course_id, iid=item_id, s=seconds)
logger.info(msg)
"""
Asynchronous tasks for training classifiers from examples.
"""
import datetime
from collections import defaultdict
from celery import task
from celery.utils.log import get_task_logger
from dogapi import dog_stats_api
from django.conf import settings
from django.db import DatabaseError
from openassessment.assessment.api import ai_worker as ai_worker_api
from openassessment.assessment.errors import AIError, ANTICIPATED_CELERY_ERRORS
from .algorithm import AIAlgorithm, AIAlgorithmError
from .grading import reschedule_grading_tasks
from openassessment.assessment.errors.ai import AIGradingInternalError
from openassessment.assessment.models.ai import AITrainingWorkflow
MAX_RETRIES = 2
logger = get_task_logger(__name__)
# If the Django settings define a low-priority queue, use that.
# Otherwise, use the default queue.
TRAINING_TASK_QUEUE = getattr(settings, 'LOW_PRIORITY_QUEUE', None)
RESCHEDULE_TASK_QUEUE = getattr(settings, 'LOW_PRIORITY_QUEUE', None)
class InvalidExample(Exception):
"""
The example retrieved from the AI API had an invalid format.
"""
def __init__(self, example_dict, msg):
err_msg = u"Training example \"{example}\" is not valid: {msg}".format(
example=example_dict,
msg=msg
)
super(InvalidExample, self).__init__(err_msg)
@task(queue=TRAINING_TASK_QUEUE, max_retries=MAX_RETRIES) # pylint: disable=E1102
@dog_stats_api.timed('openassessment.assessment.ai.train_classifiers.time')
def train_classifiers(workflow_uuid):
"""
Asynchronous task to train classifiers for AI grading.
This task uses the AI API to retrieve task parameters
(algorithm ID and training examples) and upload
the trained classifiers.
If the task could not be completed successfully,
it is retried a few times. If it continues to fail,
it is left incomplete. Since the AI API tracks all
training tasks in the database, incomplete tasks
can always be rescheduled manually later.
Args:
workflow_uuid (str): The UUID of the workflow associated
with this training task.
Returns:
None
Raises:
AIError: An error occurred during a request to the AI API.
AIAlgorithmError: An error occurred while training the AI classifiers.
InvalidExample: The training examples provided by the AI API were not valid.
"""
# Short-circuit if the workflow is already marked complete
# This is an optimization, but training tasks could still
# execute multiple times depending on when they get picked
# up by workers and marked complete.
try:
if ai_worker_api.is_training_workflow_complete(workflow_uuid):
return
except AIError:
msg = (
u"An unexpected error occurred while checking the "
u"completion of training workflow with UUID {uuid}"
).format(uuid=workflow_uuid)
logger.exception(msg)
raise train_classifiers.retry()
# Retrieve task parameters
try:
params = ai_worker_api.get_training_task_params(workflow_uuid)
examples = params['training_examples']
algorithm_id = params['algorithm_id']
course_id = params['course_id']
item_id = params['item_id']
except (AIError, KeyError):
msg = (
u"An error occurred while retrieving AI training "
u"task parameters for the workflow with UUID {}"
).format(workflow_uuid)
logger.exception(msg)
raise train_classifiers.retry()
# Retrieve the ML algorithm to use for training
# (based on task params and worker configuration)
try:
algorithm = AIAlgorithm.algorithm_for_id(algorithm_id)
except AIAlgorithmError:
msg = (
u"An error occurred while loading the "
u"AI algorithm (training workflow UUID {})"
).format(workflow_uuid)
logger.exception(msg)
raise train_classifiers.retry()
except AIError:
msg = (
u"An error occurred while retrieving "
u"the algorithm ID (training workflow UUID {})"
).format(workflow_uuid)
logger.exception(msg)
raise train_classifiers.retry()
# Train a classifier for each criterion
# The AIAlgorithm subclass is responsible for ensuring that
# the trained classifiers are JSON-serializable.
try:
classifier_set = {
criterion_name: algorithm.train_classifier(examples_dict)
for criterion_name, examples_dict
in _examples_by_criterion(examples).iteritems()
}
except InvalidExample:
msg = (
u"Training example format was not valid "
u"(training workflow UUID {})"
).format(workflow_uuid)
logger.exception(msg)
raise train_classifiers.retry()
except AIAlgorithmError:
msg = (
u"An error occurred while training AI classifiers "
u"(training workflow UUID {})"
).format(workflow_uuid)
logger.exception(msg)
raise train_classifiers.retry()
# Upload the classifiers
# (implicitly marks the workflow complete)
try:
ai_worker_api.create_classifiers(workflow_uuid, classifier_set)
except AIError:
msg = (
u"An error occurred while uploading trained classifiers "
u"(training workflow UUID {})"
).format(workflow_uuid)
logger.exception(msg)
raise train_classifiers.retry()
# Upon successful completion of the creation of classifiers, we will try to automatically schedule any
# grading tasks for the same item.
try:
reschedule_grading_tasks.apply_async(args=[course_id, item_id])
except AIGradingInternalError as ex:
msg = (
u"An error occured while trying to regrade all ungraded assignments"
u"after classifiers were trained successfully: {}"
).format(ex)
logger.exception(msg)
# Here we don't retry, because they will already retry once in the grading task.
raise
@task(queue=RESCHEDULE_TASK_QUEUE, max_retries=MAX_RETRIES) #pylint: disable=E1102
@dog_stats_api.timed('openassessment.assessment.ai.reschedule_training_tasks.time')
def reschedule_training_tasks(course_id, item_id):
"""
Reschedules all incomplete training tasks
Args:
course_id (unicode): The course that we are going to search for unfinished training workflows
item_id (unicode): The specific item within that course that we will reschedule unfinished workflows for
Raises:
AIReschedulingInternalError
DatabaseError
"""
# Starts logging the details of the rescheduling
_log_start_reschedule_training(course_id=course_id, item_id=item_id)
start_time = datetime.datetime.now()
# Run a query to find the incomplete training workflows
try:
training_workflows = AITrainingWorkflow.get_incomplete_workflows(course_id, item_id)
except (DatabaseError, AITrainingWorkflow.DoesNotExist) as ex:
msg = (
u"An unexpected error occurred while retrieving all incomplete "
u"training tasks for course_id: {cid} and item_id: {iid}: {ex}"
).format(cid=course_id, iid=item_id, ex=ex)
logger.exception(msg)
raise reschedule_training_tasks.retry()
# Tries to train every workflow that has not completed.
for target_workflow in training_workflows:
try:
train_classifiers.apply_async(args=[target_workflow.uuid])
logger.info(
u"Rescheduling of training was successful for workflow with uuid{}".format(target_workflow.uuid)
)
except ANTICIPATED_CELERY_ERRORS as ex:
msg = (
u"An unexpected error occurred while scheduling the task for training workflow with UUID {id}: {ex}"
).format(id=target_workflow.uuid, ex=ex)
logger.exception(msg)
time_delta = datetime.datetime.now() - start_time
_log_complete_reschedule_training(
course_id=course_id, item_id=item_id, seconds=time_delta.total_seconds(), success=False
)
raise reschedule_training_tasks.retry()
# Logs the total time to reschedule all training of classifiers if not logged beforehand by exception.
time_delta = datetime.datetime.now() - start_time
_log_complete_reschedule_training(
course_id=course_id, item_id=item_id, seconds=time_delta.total_seconds(), success=True
)
def _examples_by_criterion(examples):
"""
Transform the examples returned by the AI API into our internal format.
Args:
examples (list): Training examples of the form returned by the AI API.
Each element of the list should be a dictionary with keys
'text' (the essay text) and 'scores' (a dictionary mapping
criterion names to numeric scores).
Returns:
dict: keys are the criteria names, and each value is list of `AIAlgorithm.ExampleEssay`s
Raises:
InvalidExample: The provided training examples are not in a valid format.
"""
internal_examples = defaultdict(list)
prev_criteria = None
for example_dict in examples:
# Check that the example contains the expected keys
try:
scores_dict = example_dict['scores']
text = unicode(example_dict['text'])
except KeyError:
raise InvalidExample(example_dict, u'Example dict must have keys "scores" and "text"')
# Check that the criteria names are consistent across examples
if prev_criteria is None:
prev_criteria = set(scores_dict.keys())
else:
if prev_criteria != set(scores_dict.keys()):
msg = (
u"Example criteria do not match "
u"the previous example: {criteria}"
).format(criteria=prev_criteria)
raise InvalidExample(example_dict, msg)
for criterion_name, score in scores_dict.iteritems():
try:
score = int(score)
except ValueError:
raise InvalidExample(example_dict, u"Example score is not an integer")
else:
internal_ex = AIAlgorithm.ExampleEssay(text, score)
internal_examples[criterion_name].append(internal_ex)
return internal_examples
def _log_start_reschedule_training(course_id=None, item_id=None):
"""
Sends data about the rescheduling_training task to datadog
Args:
course_id (unicode): the course id to associate with the log start
item_id (unicode): the item id to tag with the log start
"""
tags = [
u"course_id:{}".format(course_id),
u"item_id:{}".format(item_id),
]
dog_stats_api.increment('openassessment.assessment.ai_task.AIRescheduleTraining.scheduled_count', tags)
msg = u"Rescheduling of incomplete training tasks began for course_id={cid} and item_id={iid}"
logger.info(msg.format(cid=course_id, iid=item_id))
def _log_complete_reschedule_training(course_id=None, item_id=None, seconds=-1, success=False):
"""
Sends the total time the rescheduling of training tasks took to datadog
Note that this function may be invoked multiple times per call to reschedule_training_tasks,
because the time for EACH ATTEMPT is taken (i.e. if we fail (by error) to schedule training once,
we log the time elapsed before trying again.)
Args:
course_id (unicode): the course_id to tag the task with
item_id (unicode): the item_id to tag the task with
seconds (int): the number of seconds that elapsed during the rescheduling task.
success (bool): indicates whether or not all attempts to reschedule were successful
"""
tags = [
u"course_id:{}".format(course_id),
u"item_id:{}".format(item_id),
u"success:{}".format(success)
]
dog_stats_api.histogram('openassessment.assessment.ai_task.AIRescheduleTraining.turnaround_time', seconds,tags)
dog_stats_api.increment('openassessment.assessment.ai_task.AIRescheduleTraining.completed_count', tags)
msg = u"Rescheduling of incomplete training tasks for course_id={cid} and item_id={iid} completed in {s} seconds."
if not success:
msg += u" At least one rescheduling task failed due to internal error."
msg.format(cid=course_id, iid=item_id, s=seconds)
logger.info(msg)
"""
Gives the time taken by
find_active_assessments
get_submission_for_review
get_submission_for_over_grading
methods for particular set of workflows.
"""
import random
import datetime
from django.core.management.base import BaseCommand
from openassessment.assessment.models import PeerWorkflow
class Command(BaseCommand):
"""
Note the time taken by queries.
"""
help = ("Test the performance for "
"find_active_assessments, "
"get_submission_for_review & "
"get_submission_for_over_grading"
"methods.")
def __init__(self, *args, **kwargs):
super(Command, self).__init__(*args, **kwargs)
def handle(self, *args, **options):
"""
Execute the command.
Args:
None
"""
peer_workflow_count = PeerWorkflow.objects.filter(submission_uuid__isnull=False).count()
peer_workflow_ids = [random.randint(1, peer_workflow_count) for num in range(100)]
peer_workflows = list(PeerWorkflow.objects.filter(id__in=peer_workflow_ids))
pw_dt_before = datetime.datetime.now()
for peer_workflow in peer_workflows:
peer_workflow.find_active_assessments()
pw_dt_after = datetime.datetime.now()
time_taken = pw_dt_after - pw_dt_before
print "Time taken by (find_active_assessments) method Is: %s " % time_taken
#### get_submission_for_review ####
pw_dt_before = datetime.datetime.now()
for peer_workflow in peer_workflows:
peer_workflow.get_submission_for_review(2)
pw_dt_after = datetime.datetime.now()
time_taken = pw_dt_after - pw_dt_before
print "Time taken by (get_submission_for_review) method Is: %s " % time_taken
#### get_submission_for_over_grading ####
pw_dt_before = datetime.datetime.now()
for peer_workflow in peer_workflows:
peer_workflow.get_submission_for_over_grading()
pw_dt_after = datetime.datetime.now()
time_taken = pw_dt_after - pw_dt_before
print "Time taken by (get_submission_for_over_grading) method Is: %s " % time_taken
# -*- coding: utf-8 -*-
"""
Simulate failure of the worker AI grading tasks.
When the workers fail to successfully complete AI grading,
the AI grading workflow in the database will never be marked complete.
To simulate the error condition, therefore, we create incomplete
AI grading workflows without scheduling a grading task.
To recover, a staff member can reschedule incomplete grading tasks.
"""
from django.core.management.base import BaseCommand, CommandError
from submissions import api as sub_api
from openassessment.assessment.models import AIGradingWorkflow, AIClassifierSet
from openassessment.assessment.serializers import rubric_from_dict
from openassessment.assessment.worker.algorithm import AIAlgorithm
class Command(BaseCommand):
"""
Create submissions and AI incomplete grading workflows.
"""
help = (
u"Simulate failure of the worker AI grading tasks "
u"by creating incomplete AI grading workflows in the database."
)
args = '<COURSE_ID> <PROBLEM_ID> <NUM_SUBMISSIONS> <ALGORITHM_ID>'
RUBRIC_OPTIONS = [
{
"order_num": 0,
"name": u"poor",
"explanation": u"Poor job!",
"points": 0,
},
{
"order_num": 1,
"name": u"good",
"explanation": u"Good job!",
"points": 1,
}
]
RUBRIC = {
'prompts': [{"description": u"Test prompt"}],
'criteria': [
{
"order_num": 0,
"name": u"vocabulary",
"prompt": u"Vocabulary",
"options": RUBRIC_OPTIONS
},
{
"order_num": 1,
"name": u"grammar",
"prompt": u"Grammar",
"options": RUBRIC_OPTIONS
}
]
}
EXAMPLES = {
"vocabulary": [
AIAlgorithm.ExampleEssay(
text=u"World Food Day is celebrated every year around the world on 16 October in honor "
u"of the date of the founding of the Food and Agriculture "
u"Organization of the United Nations in 1945.",
score=0
),
AIAlgorithm.ExampleEssay(
text=u"Since 1981, World Food Day has adopted a different theme each year "
u"in order to highlight areas needed for action and provide a common focus.",
score=1
),
],
"grammar": [
AIAlgorithm.ExampleEssay(
text=u"Most of the themes revolve around agriculture because only investment in agriculture ",
score=0
),
AIAlgorithm.ExampleEssay(
text=u"In spite of the importance of agriculture as the driving force "
u"in the economies of many developing countries, this "
u"vital sector is frequently starved of investment.",
score=1
)
]
}
STUDENT_ID = u'test_student'
ANSWER = {"text": 'test answer'}
def handle(self, *args, **options):
"""
Execute the command.
Args:
course_id (unicode): The ID of the course to create submissions/workflows in.
item_id (unicode): The ID of the problem in the course.
num_submissions (int): The number of submissions/workflows to create.
algorithm_id (unicode): The ID of the ML algorithm to use ("fake" or "ease")
Raises:
CommandError
"""
if len(args) < 4:
raise CommandError(u"Usage: simulate_ai_grading_error {}".format(self.args))
# Parse arguments
course_id = args[0].decode('utf-8')
item_id = args[1].decode('utf-8')
num_submissions = int(args[2])
algorithm_id = args[3].decode('utf-8')
# Create the rubric model
rubric = rubric_from_dict(self.RUBRIC)
# Train classifiers
print u"Training classifiers using {algorithm_id}...".format(algorithm_id=algorithm_id)
algorithm = AIAlgorithm.algorithm_for_id(algorithm_id)
classifier_data = {
criterion_name: algorithm.train_classifier(example)
for criterion_name, example in self.EXAMPLES.iteritems()
}
print u"Successfully trained classifiers."
# Create the classifier set
classifier_set = AIClassifierSet.create_classifier_set(
classifier_data, rubric, algorithm_id, course_id, item_id
)
print u"Successfully created classifier set with id {}".format(classifier_set.pk)
# Create submissions and grading workflows
for num in range(num_submissions):
student_item = {
'course_id': course_id,
'item_id': item_id,
'item_type': 'openassessment',
'student_id': "{base}_{num}".format(base=self.STUDENT_ID, num=num)
}
submission = sub_api.create_submission(student_item, self.ANSWER)
workflow = AIGradingWorkflow.start_workflow(
submission['uuid'], self.RUBRIC, algorithm_id
)
workflow.classifier_set = classifier_set
workflow.save()
print u"{num}: Created incomplete grading workflow with UUID {uuid}".format(
num=num, uuid=workflow.uuid
)
# -*- coding: utf-8 -*-
"""
Tests for the simulate AI grading error management command.
"""
from django.test.utils import override_settings
from openassessment.test_utils import CacheResetTest
from openassessment.management.commands import simulate_ai_grading_error
from openassessment.assessment.models import AIGradingWorkflow
from openassessment.assessment.worker.grading import grade_essay
class SimulateAIGradingErrorTest(CacheResetTest):
"""
Tests for the simulate AI grading error management command.
"""
COURSE_ID = u"TɘꙅT ↄoUᴙꙅɘ"
ITEM_ID = u"𝖙𝖊𝖘𝖙 𝖎𝖙𝖊𝖒"
NUM_SUBMISSIONS = 20
AI_ALGORITHMS = {
"fake": "openassessment.assessment.worker.algorithm.FakeAIAlgorithm"
}
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_simulate_ai_grading_error(self):
# Run the command
cmd = simulate_ai_grading_error.Command()
cmd.handle(
self.COURSE_ID.encode('utf-8'),
self.ITEM_ID.encode('utf-8'),
self.NUM_SUBMISSIONS,
"fake"
)
# Check that the correct number of incomplete workflows
# were created. These workflows should still have
# a classifier set, though, because otherwise they
# wouldn't have been scheduled for grading
# (that is, the submissions were made before classifier
# training completed).
incomplete_workflows = AIGradingWorkflow.objects.filter(
classifier_set__isnull=False,
completed_at__isnull=True
)
num_errors = incomplete_workflows.count()
self.assertEqual(self.NUM_SUBMISSIONS, num_errors)
# Verify that we can complete the workflows successfully
# (that is, make sure the classifier data is valid)
# We're calling a Celery task method here,
# but we're NOT using `apply_async`, so this will
# execute synchronously.
for workflow in incomplete_workflows:
grade_essay(workflow.uuid)
# Now there should be no incomplete workflows
remaining_incomplete = AIGradingWorkflow.objects.filter(
classifier_set__isnull=False,
completed_at__isnull=True
).count()
self.assertEqual(remaining_incomplete, 0)
{% load i18n %}
{% spaceless %}
<li class="openassessment_assessment_module_settings_editor" id="oa_ai_assessment_editor">
<div class="drag-handle action"></div>
<div class="openassessment_inclusion_wrapper">
<input id="include_ai_assessment" type="checkbox"
{% if assessments.example_based_assessment %} checked="true" {% endif %}>
<label for="include_ai_assessment">{% trans "Step: Example-Based Assessment" %}</label>
</div>
<div class="openassessment_assessment_module_editor">
<p id="ai_assessment_description_closed" class="openassessment_description_closed {% if assessments.example_based_assessment %} is--hidden {% endif %}">
{% trans "An algorithm assesses learners' responses by comparing the responses to pre-assessed sample responses that the instructor provides."%}
</p>
<div id="ai_assessment_settings_editor" class="assessment_settings_wrapper {% if not assessments.example_based_assessment %} is--hidden {% endif %}">
<p class="openassessment_description">
{% trans "Enter one or more sample responses that you've created, and then specify the options that you would choose for each criterion in your rubric. Note that you must add your rubric to the Rubric tab before you can complete this step." %}
</p>
<textarea id="ai_training_examples">{{ assessments.example_based_assessment.examples }}</textarea>
</div>
</div>
</li>
{% endspaceless %}
<openassessment>
<title>Example Based Example</title>
<assessments>
<assessment name="example-based-assessment" algorithm_id="fake">
<example>
<answer>Born in northern New South Wales, Dowling entered the Royal Australian Naval College in 1915. After graduating in 1919 he went to sea aboard various Royal Navy and RAN vessels, and later specialised in gunnery. In 1937, he was given command of the sloop HMAS Swan. Following the outbreak of World War II, he saw action in the Mediterranean theatre as executive officer of the Royal Navy cruiser HMS Naiad, and survived her sinking by a German U-boat in March 1942. Returning to Australia, he served as Director of Plans and later Deputy Chief of Naval Staff before taking command of the light cruiser HMAS Hobart in November 1944. His achievements in the South West Pacific earned him the Distinguished Service Order.
Dowling took command of the RAN's first aircraft carrier, HMAS Sydney, in 1948. He became Chief of Naval Personnel in 1950, and Flag Officer Commanding HM Australian Fleet in 1953. Soon after taking up the position of CNS in February 1955, he was promoted to vice admiral and appointed a Companion of the Order of the Bath. As CNS he had to deal with shortages of money, manpower and equipment, and with the increasing role of the United States in Australia's defence planning, at the expense of traditional ties with Britain. Knighted in 1957, Dowling was Chairman of COSC from March 1959 until May 1961, when he retired from the military. In 1963 he was appointed a Knight Commander of the Royal Victorian Order and became Australian Secretary to HM Queen Elizabeth II, serving until his death in 1969.
</answer>
<select criterion="Ideas" option="Bad" />
<select criterion="Content" option="Bad" />
</example>
<example>
<answer>Roy Russell Dowling was born on 28 May 1901 in Condong, a township on the Tweed River in northern New South Wales. His parents were sugar cane inspector Russell Dowling and his wife Lily. The youth entered the Royal Australian Naval College (RANC) at Jervis Bay, Federal Capital Territory, in 1915. An underachiever academically, he excelled at sports, and became chief cadet captain before graduating in 1918 with the King's Medal, awarded for "gentlemanly bearing, character, good influence among his fellows and officer-like qualities".[1][2] The following year he was posted to Britain as a midshipman, undergoing training with the Royal Navy and seeing service on HMS Ramillies and HMS Venturous.[3] By January 1923 he was back in Australia, serving aboard the cruiser HMAS Adelaide. He was promoted to lieutenant in March.[4] In April 1924, Adelaide joined the Royal Navy's Special Service Squadron on its worldwide cruise, taking in New Zealand, Canada, the United States, Panama, and the West Indies, before docking in September at Portsmouth, England. There Dowling left the ship for his next appointment, training as a gunnery officer and serving in that capacity at HMS Excellent.
</answer>
<select criterion="Ideas" option="Good" />
<select criterion="Content" option="Bad" />
</example>
<example>
<answer>After his return to Australia in December 1926, Dowling spent eighteen months on HMAS Platypus and HMAS Anzac, where he continued to specialise in gunnery. In July 1928, he took on an instructional role at the gunnery school in Flinders Naval Depot on Western Port Bay, Victoria. He married Jessie Blanch in Melbourne on 8 May 1930; the couple had two sons and three daughters.[1][6] Jessie accompanied him on his next posting to Britain commencing in January 1931.</answer>
<select criterion="Ideas" option="Bad" />
<select criterion="Content" option="Good" />
</example>
<example>
<answer>He was promoted to lieutenant commander on 15 March, and was appointed gunnery officer on the light cruiser HMS Colombo in May. Dowling returned to Australia in January 1933, and was appointed squadron gunnery officer aboard the heavy cruiser HMAS Canberra that April.[1][4] The ship operated mainly within Australian waters over the next two years.[7] In July 1935, Dowling took charge of the gunnery school at Flinders Naval Depot. He was promoted to commander on 31 December 1936.[1][4] The following month, he assumed command of the newly commissioned Grimsby-class sloop HMAS Swan, carrying out duties in the South West Pacific.[8] Completing his tenure on Swan in January 1939, he was briefly assigned to the Navy Office, Melbourne, before returning to Britain in March for duty at HMS Pembroke, where he awaited posting aboard the yet-to-be-commissioned anti-aircraft cruiser, HMS Naiad.</answer>
<select criterion="Ideas" option="Good" />
<select criterion="Content" option="Good" />
</example>
</assessment>
</assessments>
<rubric>
<prompt>
Censorship in the Libraries
'All of us can think of a book that we hope none of our children or any other children have taken off the shelf. But if I have the right to remove that book from the shelf -- that work I abhor -- then you also have exactly the same right and so does everyone else. And then we have no books left on the shelf for any of us.' --Katherine Paterson, Author
Write a persuasive essay to a newspaper reflecting your views on censorship in libraries. Do you believe that certain materials, such as books, music, movies, magazines, etc., should be removed from the shelves if they are found offensive? Support your position with convincing arguments from your own experience, observations, and/or reading.
Read for conciseness, clarity of thought, and form.
</prompt>
<criterion feedback="optional">
<name>Ideas</name>
<prompt>Determine if there is a unifying theme or main idea.</prompt>
<option points="0">
<name>Bad</name>
<explanation>Difficult for the reader to discern the main idea. Too brief or too repetitive to establish or maintain a focus.</explanation>
</option>
<option points="3">
<name>Good</name>
<explanation>Presents a unifying theme or main idea, but may include minor tangents. Stays somewhat focused on topic and task.</explanation>
</option>
</criterion>
<criterion>
<name>Content</name>
<prompt>Assess the content of the submission</prompt>
<option points="0">
<name>Bad</name>
<explanation>Includes little information with few or no details or unrelated details. Unsuccessful in attempts to explore any facets of the topic.</explanation>
</option>
<option points="1">
<name>Good</name>
<explanation>Includes little information and few or no details. Explores only one or two facets of the topic.</explanation>
</option>
</criterion>
<feedbackprompt>
(Optional) What aspects of this response stood out to you? What did it do well? How could it improve?
</feedbackprompt>
<feedback_default_text>
I noticed that this response...
</feedback_default_text>
</rubric>
</openassessment>
<openassessment>
<title>Open Assessment Test</title>
<prompts>
<prompt>
<description>Given the state of the world today, what do you think should be done to combat poverty?</description>
</prompt>
<prompt>
<description>Given the state of the world today, what do you think should be done to combat pollution?</description>
</prompt>
</prompts>
<rubric>
<criterion>
<name>Ideas</name>
<prompt>How good are the ideas?</prompt>
<option points="0">
<name>Poor</name>
<explanation>Poor job!</explanation>
</option>
<option points="1">
<name>Fair</name>
<explanation>Fair job</explanation>
</option>
<option points="3">
<name>Good</name>
<explanation>Good job</explanation>
</option>
</criterion>
<criterion>
<name>Content</name>
<prompt>How good is the content?</prompt>
<option points="0">
<name>Poor</name>
<explanation>Poor job!</explanation>
</option>
<option points="1">
<name>Fair</name>
<explanation>Fair job</explanation>
</option>
<option points="3">
<name>Good</name>
<explanation>Good job</explanation>
</option>
</criterion>
</rubric>
<assessments>
<assessment name="example-based-assessment" algorithm_id="fake">
<example>
<answer>Example Answer One</answer>
<select criterion="Ideas" option="Poor" />
<select criterion="Content" option="Poor" />
</example>
<example>
<answer>Example Answer Two</answer>
<select criterion="Ideas" option="Fair" />
<select criterion="Content" option="Fair" />
</example>
<example>
<answer>Example Answer Three</answer>
<select criterion="Ideas" option="Fair" />
<select criterion="Content" option="Good" />
</example>
<example>
<answer>Example Answer Four</answer>
<select criterion="Ideas" option="Poor" />
<select criterion="Content" option="Good" />
</example>
</assessment>
<assessment name="peer-assessment" must_grade="5" must_be_graded_by="3" />
<assessment name="self-assessment" />
</assessments>
</openassessment>
<openassessment>
<title>Open Assessment Test</title>
<prompts>
<prompt>
<description>Given the state of the world today, what do you think should be done to combat poverty?</description>
</prompt>
<prompt>
<description>Given the state of the world today, what do you think should be done to combat pollution?</description>
</prompt>
</prompts>
<rubric>
<criterion>
<name>Ideas</name>
<prompt>How good are the ideas?</prompt>
<option points="0">
<name>Poor</name>
<explanation>Poor job!</explanation>
</option>
<option points="1">
<name>Fair</name>
<explanation>Fair job</explanation>
</option>
<option points="3">
<name>Good</name>
<explanation>Good job</explanation>
</option>
</criterion>
<criterion>
<name>Content</name>
<prompt>How good is the content?</prompt>
<option points="0">
<name>Poor</name>
<explanation>Poor job!</explanation>
</option>
<option points="1">
<name>Fair</name>
<explanation>Fair job</explanation>
</option>
<option points="3">
<name>Good</name>
<explanation>Good job</explanation>
</option>
</criterion>
</rubric>
<assessments>
<assessment name="example-based-assessment" algorithm_id="fake">
<example>
<answer>Example Answer One</answer>
<select criterion="Ideas" option="Poor" />
<select criterion="Content" option="Poor" />
</example>
<example>
<answer>Example Answer Two</answer>
<select criterion="Ideas" option="Fair" />
<select criterion="Content" option="Fair" />
</example>
<example>
<answer>Example Answer Three</answer>
<select criterion="Ideas" option="Fair" />
<select criterion="Content" option="Good" />
</example>
<example>
<answer>Example Answer Four</answer>
<select criterion="Ideas" option="Poor" />
<select criterion="Content" option="Good" />
</example>
</assessment>
</assessments>
</openassessment>
<openassessment>
<title>Feedback only criterion</title>
<prompts>
<prompt>
<description>Given the state of the world today, what do you think should be done to combat poverty?</description>
</prompt>
<prompt>
<description>Given the state of the world today, what do you think should be done to combat pollution?</description>
</prompt>
</prompts>
<rubric>
<criterion>
<name>vocabulary</name>
<prompt>How good is the vocabulary?</prompt>
<option points="0">
<name>bad</name>
<explanation>bad</explanation>
</option>
<option points="1">
<name>good</name>
<explanation>good</explanation>
</option>
</criterion>
<criterion feedback="required">
<name>𝖋𝖊𝖊𝖉𝖇𝖆𝖈𝖐 𝖔𝖓𝖑𝖞</name>
<prompt>This criterion accepts only written feedback, so it has no options</prompt>
</criterion>
</rubric>
<assessments>
<assessment name="example-based-assessment" algorithm_id="fake">
<example>
<answer>This is my answer.</answer>
<select criterion="vocabulary" option="good" />
</example>
<example>
<answer>тєѕт αηѕωєя</answer>
<select criterion="vocabulary" option="bad" />
</example>
</assessment>
</assessments>
</openassessment>
<openassessment>
<title>Open Assessment Test</title>
<prompts>
<prompt>
<description>Given the state of the world today, what do you think should be done to combat poverty?</description>
</prompt>
<prompt>
<description>Given the state of the world today, what do you think should be done to combat pollution?</description>
</prompt>
</prompts>
<rubric>
<criterion>
<name>𝓒𝓸𝓷𝓬𝓲𝓼𝓮</name>
<prompt>How concise is it?</prompt>
<option points="3">
<name>ﻉซƈﻉɭɭﻉกՇ</name>
<explanation>Extremely concise</explanation>
</option>
<option points="2">
<name>Ġööḋ</name>
<explanation>Concise</explanation>
</option>
<option points="1">
<name>ק๏๏г</name>
<explanation>Wordy</explanation>
</option>
</criterion>
<criterion>
<name>Form</name>
<prompt>How well-formed is it?</prompt>
<option points="3">
<name>Good</name>
<explanation>Good</explanation>
</option>
<option points="2">
<name>Fair</name>
<explanation>Fair</explanation>
</option>
<option points="1">
<name>Poor</name>
<explanation>Poor</explanation>
</option>
</criterion>
</rubric>
<assessments>
<assessment name="example-based-assessment" algorithm_id="fake">
<example>
<answer>Example Answer One</answer>
<select criterion="𝓒𝓸𝓷𝓬𝓲𝓼𝓮" option="Ġööḋ" />
<select criterion="Form" option="Poor" />
</example>
<example>
<answer>Example Answer Two</answer>
<select criterion="𝓒𝓸𝓷𝓬𝓲𝓼𝓮" option="ﻉซƈﻉɭɭﻉกՇ" />
<select criterion="Form" option="Fair" />
</example>
<example>
<answer>Example Answer Three</answer>
<select criterion="𝓒𝓸𝓷𝓬𝓲𝓼𝓮" option="Ġööḋ" />
<select criterion="Form" option="Good" />
</example>
<example>
<answer>Example Answer Four</answer>
<select criterion="𝓒𝓸𝓷𝓬𝓲𝓼𝓮" option="ﻉซƈﻉɭɭﻉกՇ" />
<select criterion="Form" option="Good" />
</example>
</assessment>
</assessments>
</openassessment>
"""
Integration test for example-based assessment (AI).
"""
import json
import mock
from django.test.utils import override_settings
from submissions import api as sub_api
from openassessment.xblock.openassessmentblock import OpenAssessmentBlock
from .base import XBlockHandlerTestCase, scenario
class AIAssessmentIntegrationTest(XBlockHandlerTestCase):
"""
Integration test for example-based assessment (AI).
"""
SUBMISSION = json.dumps({'submission': ('This is submission part 1!', 'This is submission part 2!')})
AI_ALGORITHMS = {
'fake': 'openassessment.assessment.worker.algorithm.FakeAIAlgorithm'
}
@mock.patch.object(OpenAssessmentBlock, 'is_admin', new_callable=mock.PropertyMock)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@scenario('data/example_based_only.xml', user_id='Bob')
def test_asynch_generate_score(self, xblock, mock_is_admin):
# Test that AI grading, which creates assessments asynchronously,
# updates the workflow so students can receive a score.
mock_is_admin.return_value = True
# Train classifiers for the problem
self.request(xblock, 'schedule_training', json.dumps({}), response_format='json')
# Submit a response
self.request(xblock, 'submit', self.SUBMISSION, response_format='json')
# BEFORE viewing the grade page, check that we get a score
score = sub_api.get_score(xblock.get_student_item_dict())
self.assertIsNot(score, None)
self.assertEqual(score['submission_uuid'], xblock.submission_uuid)
@mock.patch.object(OpenAssessmentBlock, 'is_admin', new_callable=mock.PropertyMock)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@scenario('data/feedback_only_criterion_ai.xml', user_id='Bob')
def test_feedback_only_criterion(self, xblock, mock_is_admin):
# Test that AI grading, which creates assessments asynchronously,
# updates the workflow so students can receive a score.
mock_is_admin.return_value = True
# Train classifiers for the problem and submit a response
self.request(xblock, 'schedule_training', json.dumps({}), response_format='json')
self.request(xblock, 'submit', self.SUBMISSION, response_format='json')
# Render the grade page
resp = self.request(xblock, 'render_grade', json.dumps({}))
self.assertIn('example-based', resp.lower())
[
{
"text": "Food is any substance[1] consumed to provide nutritional support for the body. It is usually of plant or animal origin, and contains essential nutrients, such as carbohydrates, fats, proteins, vitamins, or minerals. The substance is ingested by an organism and assimilated by the organism's cells in an effort to produce energy, maintain life, or stimulate growth. Historically, people secured food through two methods: hunting and gathering, and agriculture. Today, most of the food energy consumed by the world population is supplied by the food industry. Food safety and food security are monitored by agencies like the International Association for Food Protection, World Resources Institute, World Food Programme, Food and Agriculture Organization, and International Food Information Council. They address issues such as sustainability, biological diversity, climate change, nutritional economics, population growth, water supply, and access to food.",
"score": 0
},
{
"text": "Most food has its origin in plants. Some food is obtained directly from plants; but even animals that are used as food sources are raised by feeding them food derived from plants. Cereal grain is a staple food that provides more food energy worldwide than any other type of crop. Maize, wheat, and rice – in all of their varieties – account for 87% of all grain production worldwide.[2] Most of the grain that is produced worldwide is fed to livestock. Some foods not from animal or plant sources include various edible fungi, especially mushrooms. Fungi and ambient bacteria are used in the preparation of fermented and pickled foods like leavened bread, alcoholic drinks, cheese, pickles, kombucha, and yogurt. Another example is blue-green algae such as Spirulina.[3] Inorganic substances such as salt, baking soda and cream of tartar are used to preserve or chemically alter an ingredient.",
"score": 1
},
{
"text": "Many plants or plant parts are eaten as food. There are around 2,000 plant species which are cultivated for food, and many have several distinct cultivars.[4] Seeds of plants are a good source of food for animals, including humans, because they contain the nutrients necessary for the plant's initial growth, including many healthful fats, such as Omega fats. In fact, the majority of food consumed by human beings are seed-based foods. Edible seeds include cereals (maize, wheat, rice, et cetera), legumes (beans, peas, lentils, et cetera), and nuts. Oilseeds are often pressed to produce rich oils - sunflower, flaxseed, rapeseed (including canola oil), sesame, et cetera.[5] Seeds are typically high in unsaturated fats and, in moderation, are considered a health food, although not all seeds are edible. Large seeds, such as those from a lemon, pose a choking hazard, while seeds from cherries and apples contain cyanide which could be poisonous only if consumed in large volumes.[6] Fruits are the ripened ovaries of plants, including the seeds within. Many plants and animals have coevolved such that the fruits of the former are an attractive food source to the latter, because animals that eat the fruits may excrete the seeds some distance away. Fruits, therefore, make up a significant part of the diets of most cultures. Some botanical fruits, such as tomatoes, pumpkins, and eggplants, are eaten as vegetables.[7] (For more information, see list of fruits.) Vegetables are a second type of plant matter that is commonly eaten as food. These include root vegetables (potatoes and carrots), bulbs (onion family), leaf vegetables (spinach and lettuce), stem vegetables (bamboo shoots and asparagus), and inflorescence vegetables (globe artichokes and broccoli and other vegetables such as cabbage or cauliflower).[8]",
"score": 0
},
{
"text": "Animals are used as food either directly or indirectly by the products they produce. Meat is an example of a direct product taken from an animal, which comes from muscle systems or from organs. Various raw meats Food products produced by animals include milk produced by mammary glands, which in many cultures is drunk or processed into dairy products (cheese, butter, etc.). In addition, birds and other animals lay eggs, which are often eaten, and bees produce honey, a reduced nectar from flowers, which is a popular sweetener in many cultures. Some cultures consume blood, sometimes in the form of blood sausage, as a thickener for sauces, or in a cured, salted form for times of food scarcity, and others use blood in stews such as jugged hare.[9] Some cultures and people do not consume meat or animal food products for cultural, dietary, health, ethical, or ideological reasons. Vegetarians choose to forgo food from animal sources to varying degrees. Vegans do not consume any foods that are or contain ingredients from an animal source.",
"score": 2
},
{
"text": "Most food has always been obtained through agriculture. With increasing concern over both the methods and products of modern industrial agriculture, there has been a growing trend toward sustainable agricultural practices. This approach, partly fueled by consumer demand, encourages biodiversity, local self-reliance and organic farming methods.[10] Major influences on food production include international organizations (e.g. the World Trade Organization and Common Agricultural Policy), national government policy (or law), and war.[11] In popular culture, the mass production of food, specifically meats such as chicken and beef, has come under fire from various documentaries, most recently Food, Inc, documenting the mass slaughter and poor treatment of animals, often for easier revenues from large corporations. Along with a current trend towards environmentalism, people in Western culture have had an increasing trend towards the use of herbal supplements, foods for a specific group of person (such as dieters, women, or athletes), functional foods (fortified foods, such as omega-3 eggs), and a more ethnically diverse diet.[12] Several organisations have begun calling for a new kind of agriculture in which agroecosystems provide food but also support vital ecosystem services so that soil fertility and biodiversity are maintained rather than compromised. According to the International Water Management Institute and UNEP, well-managed agroecosystems not only provide food, fiber and animal products, they also provide services such as flood mitigation, groundwater recharge, erosion control and habitats for plants, birds fish and other animals.[13]",
"score": 3
},
{
"text": "Generally regarded as the most pleasant taste, sweetness is almost always caused by a type of simple sugar such as glucose or fructose, or disaccharides such as sucrose, a molecule combining glucose and fructose.[16] Complex carbohydrates are long chains and thus do not have the sweet taste. Artificial sweeteners such as sucralose are used to mimic the sugar molecule, creating the sensation of sweet, without the calories. Other types of sugar include raw sugar, which is known for its amber color, as it is unprocessed. As sugar is vital for energy and survival, the taste of sugar is pleasant. The stevia plant contains a compound known as steviol which, when extracted, has 300 times the sweetness of sugar while having minimal impact on blood sugar.[17] Sour Sourness is caused by the taste of acids, such as vinegar in alcoholic beverages. Sour foods include citrus, specifically lemons, limes, and to a lesser degree oranges. Sour is evolutionarily significant as it is a sign for a food that may have gone rancid due to bacteria.[18] Many foods, however, are slightly acidic, and help stimulate the taste buds and enhance flavor.",
"score": 1
},
{
"text": "Saltiness is the taste of alkali metal ions such as sodium and potassium. It is found in almost every food in low to moderate proportions to enhance flavor, although to eat pure salt is regarded as highly unpleasant. There are many different types of salt, with each having a different degree of saltiness, including sea salt, fleur de sel, kosher salt, mined salt, and grey salt. Other than enhancing flavor, its significance is that the body needs and maintains a delicate electrolyte balance, which is the kidney's function. Salt may be iodized, meaning iodine has been added to it, a necessary nutrient that promotes thyroid function. Some canned foods, notably soups or packaged broths, tend to be high in salt as a means of preserving the food longer. Historically speaking, salt has been used as a meat preservative as salt promotes water excretion, thus working as a preservative. Similarly, dried foods also promote food safety.[19] Bitter Bitterness is a sensation often considered unpleasant characterized by having a sharp, pungent taste. Dark, unsweetened chocolate, caffeine, lemon rind, and some types of fruit are known to be bitter. Umami Also named as Savoury. Umami, the Japanese word for delicious, is the least known in Western popular culture but has a long tradition in Asian cuisine. Umami is the taste of glutamates, especially monosodium glutamate (MSG).[16] It is characterized as savory, meaty, and rich in flavor. Salmon and mushrooms are foods high in umami. Meat and other animal byproducts are described as having this taste.[citation needed]",
"score": 2
}
]
#!/usr/bin/env bash
PYTHON=`which python`
$PYTHON -m nltk.downloader stopwords maxent_treebank_pos_tagger wordnet --quiet
Performance Tests
=================
1. Install performance test requirements:
.. code:: bash
cd ora2
pip install -r requirements/perf.txt
2. Import ``course.tar.gz`` into Studio:
* Course ID: 1
* Course Org: ora2
* Course Run: 1
3. Enable ``auto_auth`` in the LMS feature flags:
.. code:: javascript
{
"FEATURES": {
"AUTOMATIC_AUTH_FOR_TESTING": true
}
}
4. Log in as a staff user and schedule a training task in the Course Staff Debug of the example based assessment problem.
5. **Optional**: Increase open file limit:
.. code:: bash
ulimit -n 2048
6. Start the Locust server, and point it at the test server. **NOTE**: You *must* include the trailing slash in the host URL.
.. code:: bash
cd performance
locust --host=http://example.com/
If your server has basic auth enabled, provide credentials with environment vars:
.. code:: bash
cd performance
BASIC_AUTH_USER=foo BASIC_AUTH_PASSWORD=bar locust --host=http://example.com/
7. Visit the `Locust web UI <http://localhost:8089>`_ to start the test.
"""
Performance tests for the OpenAssessment XBlock.
"""
import os
import json
import random
from collections import namedtuple
import gevent
import loremipsum
from locust import HttpLocust, TaskSet, task
class OpenAssessmentPage(object):
"""
Encapsulate interactions with the OpenAssessment XBlock's pages.
"""
# These assume that the course fixture has been installed
ProblemFixture = namedtuple('ProblemFixture', [
'course_id', 'base_url', 'base_handler_url',
'rubric_options', 'render_step_handlers'
])
PROBLEMS = {
'peer_then_self': ProblemFixture(
course_id="ora2/1/1",
base_url= "courses/ora2/1/1/courseware/efa85eb090164a208d772a344df7181d/69f15a02c5af4e95b9c5525771b8f4ee/",
base_handler_url="courses/ora2/1/1/xblock/i4x:;_;_ora2;_1;_openassessment;_0e2bbf6cc89e45d98b028fa4e2d46314/handler/",
rubric_options={
'Ideas': ['Poor', 'Fair', 'Good'],
'Content': ['Poor', 'Fair', 'Good', 'Excellent']
},
render_step_handlers=[
'render_submission', 'render_peer_assessment',
'render_self_assessment', 'render_grade',
]
),
'example_based': ProblemFixture(
course_id="ora2/1/1",
base_url="courses/ora2/1/1/courseware/efa85eb090164a208d772a344df7181d/fb039ef8a34641509190918ada79122a/",
base_handler_url="courses/ora2/1/1/xblock/i4x:;_;_ora2;_1;_openassessment;_8df3fa4de26747e0ad99b4157e45f5e5/handler/",
rubric_options={
'Ideas': ['Bad', 'Good'],
'Content': ['Bad', 'Good']
},
render_step_handlers=['render_submission', 'render_grade']
)
}
def __init__(self, hostname, client, problem_name):
"""
Initialize the page to use specified HTTP client.
Args:
hostname (unicode): The hostname (used for the referer HTTP header)
client (HttpSession): The HTTP client to use.
problem_name (unicode): Name of the problem (one of the keys in `OpenAssessmentPage.PROBLEMS`)
"""
self.hostname = hostname
self.client = client
self.problem_fixture = self.PROBLEMS[problem_name]
self.logged_in = False
# Configure basic auth
if 'BASIC_AUTH_USER' in os.environ and 'BASIC_AUTH_PASSWORD' in os.environ:
self.client.auth = (os.environ['BASIC_AUTH_USER'], os.environ['BASIC_AUTH_PASSWORD'])
def log_in(self):
"""
Log in as a unique user with access to the XBlock(s) under test.
"""
resp = self.client.get(
"auto_auth",
params={'course_id': self.problem_fixture.course_id},
verify=False,
timeout=120
)
self.logged_in = (resp.status_code == 200)
return self
def load_steps(self):
"""
Load all steps in the OpenAssessment flow.
"""
# Load the container page
self.client.get(self.problem_fixture.base_url, verify=False)
# Load each of the steps in parallel
get_unverified = lambda url: self.client.get(url, verify=False)
gevent.joinall([
gevent.spawn(get_unverified, url) for url in [
self.handler_url(handler)
for handler in self.problem_fixture.render_step_handlers
]
], timeout=0.5)
return self
def submit_response(self):
"""
Submit a response.
"""
payload = json.dumps({
'submission': u' '.join(loremipsum.get_paragraphs(random.randint(1, 10))),
})
self.client.post(self.handler_url('submit'), data=payload, headers=self._post_headers, verify=False)
def peer_assess(self, continue_grading=False):
"""
Assess a peer.
Kwargs:
continue_grading (bool): If true, simulate "continued grading"
in which a student asks to assess peers in addition to the required number.
"""
params = {
'options_selected': self._select_random_options(),
'overall_feedback': loremipsum.get_paragraphs(random.randint(1, 3)),
'criterion_feedback': {}
}
if continue_grading:
params['continue_grading'] = True
payload = json.dumps(params)
self.client.post(self.handler_url('peer_assess'), data=payload, headers=self._post_headers, verify=False)
def self_assess(self):
"""
Complete a self-assessment.
"""
payload = json.dumps({
'options_selected': self._select_random_options()
})
self.client.post(self.handler_url('self_assess'), data=payload, headers=self._post_headers, verify=False)
def handler_url(self, handler_name):
"""
Return the full URL for an XBlock handler.
Args:
handler_name (str): The name of the XBlock handler method.
Returns:
str
"""
return "{base}{handler}".format(base=self.problem_fixture.base_handler_url, handler=handler_name)
def _select_random_options(self):
"""
Select random options for each criterion in the rubric.
"""
return {
criterion: random.choice(options)
for criterion, options in self.problem_fixture.rubric_options.iteritems()
}
@property
def _post_headers(self):
"""
Headers for a POST request, including the CSRF token.
"""
return {
'Content-type': 'application/json',
'Accept': 'application/json',
'X-CSRFToken': self.client.cookies.get('csrftoken', ''),
'Referer': self.hostname
}
class OpenAssessmentTasks(TaskSet):
"""
Virtual user interactions with the OpenAssessment XBlock.
"""
def __init__(self, *args, **kwargs): # pylint: disable=W0613
"""
Initialize the task set.
"""
super(OpenAssessmentTasks, self).__init__(*args, **kwargs)
self.hostname = self.locust.host
self.page = None
@task
def peer_and_self(self):
"""
Test the peer-->self workflow.
"""
if self.page is None:
self.page = OpenAssessmentPage(self.hostname, self.client, 'peer_then_self') # pylint: disable=E1101
self.page.log_in()
if not self.page.logged_in:
self.page.log_in()
else:
self._submit_response()
# Randomly peer/self assess or log in as a new user.
# This should be sufficient to get students through
# the entire flow (satisfying the requirements for peer assessment).
action = random.randint(0, 100)
if action <= 80:
continue_grading = random.randint(0, 10) < 4
self.page.peer_assess(continue_grading=continue_grading)
self.page.self_assess()
else:
self.page.log_in()
@task
def example_based(self):
"""
Test example-based assessment only.
"""
if self.page is None:
self.page = OpenAssessmentPage(self.hostname, self.client, 'example_based') # pylint: disable=E1101
self.page.log_in()
if not self.page.logged_in:
self.page.log_in()
else:
self._submit_response()
if random.randint(0, 100) < 50:
self.page.log_in()
def _submit_response(self):
"""
Simulate the user loading the page, submitting a response,
then reloading the steps (usually triggered by AJAX).
If the user has already submitted, the handler will return
an error message in the JSON, but the HTTP status will still be 200.
"""
self.page.load_steps()
self.page.submit_response()
self.page.load_steps()
class OpenAssessmentLocust(HttpLocust):
"""
Performance test definition for the OpenAssessment XBlock.
"""
task_set = OpenAssessmentTasks
min_wait = 10000
max_wait = 15000
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment