Commit bd5c184f by Will Daly

Merge pull request #452 from edx/ai-grading

Ai grading
parents 226cb629 60e28a6c
......@@ -30,6 +30,7 @@ pip-log.txt
.tox
nosetests.xml
htmlcov
coverage.xml
# Mr Developer
.mr.developer.cfg
......@@ -53,7 +54,11 @@ coverage
# tim-specific
ora2db
storage/*
openassessment/xblock/static/js/fixtures/*.html
# logging
logs/*.log*
logs/*/*.log*
# Vagrant
.vagrant
......@@ -103,7 +103,7 @@ Check for quality violations:
.. code:: bash
pylint apps
pylint openassessment
Disable quality violations on a line or file:
......@@ -112,6 +112,48 @@ Disable quality violations on a line or file:
# pylint: disable=W0123,E4567
Vagrant
=======
This repository includes a Vagrant configuration file, which is useful for testing
ORA2 in an environment that is closer to production:
* Uses `gunicorn <http://gunicorn.org/>`_ to serve the workbench application.
Unlike Django ``runserver``, gunicorn will process requests in parallel.
* Uses `mysql <http://www.mysql.com/>`_ as the database, which (unlike
`sqlite <http://www.sqlite.org/>`_) allows for simultaneous writes.
* Serves static files using `nginx <http://wiki.nginx.org/Main>`_ instead
of Django `staticfiles <https://docs.djangoproject.com/en/dev/ref/contrib/staticfiles/>`_.
* Runs multiple `celery workers <http://celery.readthedocs.org/en/latest/>`_.
* Uses `memcached <http://memcached.org/>`_.
* Installs `EASE <https://github.com/edx/ease>`_ for AI grading, including
its many requirements.
To use the Vagrant VM:
1) `Install Vagrant <https://docs.vagrantup.com/v2/installation/>`_.
2) ``vagrant up`` to start and provision the Vagrant VM.
3) Visit `http://192.168.44.10 <http://192.168.44.10>`_
4) You should see the workbench index page load.
After making a change to the code in the ``edx-ora2`` directory,
you must restart the services on the Vagrant VM:
1) ``vagrant ssh`` to ssh into the Vagrant VM.
2) ``./update.sh`` to restart the services, run database migrations, and collect static assets.
3) Visit `http://192.168.44.10 <http://192.168.44.10>`_
By default, the Vagrant VM also includes a monitoring tool for Celery tasks called `Flower <https://github.com/mher/flower>`_.
To use the tool, visit: `http://192.168.44.10:5555 <http://192.168.44.10:5555>`_
The log files from the Vagrant VM are located in ``edx-ora2/logs/vagrant``, which is shared with the host machine.
i18n
====
......
# -*- mode: ruby -*-
# vi: set ft=ruby :
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
VAGRANTFILE_API_VERSION = "2"
$script = <<END
set -e
echo "Updating apt packages..."
apt-get update -y
echo "Installing basic system requirements..."
apt-get install -y curl git vim libxml2-dev libxslt1-dev memcached nginx
echo "Installing mysql server..."
DEBIAN_FRONTEND=noninteractive apt-get install -y mysql-server-5.5
echo "CREATE DATABASE IF NOT EXISTS workbench DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;" | mysql -u root
echo "Installing Python system requirements..."
apt-get install -y python2.7 python2.7-dev python-pip python-software-properties python-mysqldb libmysqlclient-dev
pip install virtualenv
echo "Installing FireFox and xvfb (for JavaScript tests)..."
add-apt-repository "ppa:ubuntu-mozilla-security/ppa"
apt-get install -y firefox dbus-x11 xvfb
cat /home/vagrant/edx-ora2/vagrant/xvfb.conf > /etc/init/xvfb.conf
start xvfb || true
echo "Installing RabbitMQ..."
add-apt-repository "deb http://www.rabbitmq.com/debian/ testing main"
cd /tmp && wget http://www.rabbitmq.com/rabbitmq-signing-key-public.asc && apt-key add rabbitmq-signing-key-public.asc
apt-get update -y
apt-get install -y rabbitmq-server
echo "Installing NodeJS..."
add-apt-repository ppa:chris-lea/node.js
apt-get update -y
apt-get install -y nodejs
# Stop all Python upstart jobs
sudo stop workbench || true
sudo stop celery || true
sudo stop flower || true
su vagrant <<EOF
set -e
echo "Creating a virtualenv..."
mkdir -p /home/vagrant/.virtualenvs
virtualenv /home/vagrant/.virtualenvs/edx-ora2
source /home/vagrant/.virtualenvs/edx-ora2/bin/activate
echo "Configuring login script..."
cat /home/vagrant/edx-ora2/vagrant/bash_profile > /home/vagrant/.bash_profile
echo "Installing EASE..."
if [ ! -d /home/vagrant/ease ]; then
git clone https://github.com/edx/ease.git /home/vagrant/ease
fi
cat /home/vagrant/ease/apt-packages.txt | xargs sudo apt-get -y install
cd /home/vagrant/ease && pip install -r pre-requirements.txt
cd /home/vagrant/ease && python setup.py install
echo "Downloading NLTK corpus..."
cd /home/vagrant/ease && ./download-nltk-corpus.sh
echo "Installing gunicorn..."
pip install gunicorn
echo "Instally Python MySQL library..."
pip install MySQL-python
echo "Installing celery flower..."
pip install flower
echo "Install edx-ora2..."
cd /home/vagrant/edx-ora2 && ./scripts/install.sh
echo "Update the database..."
cd /home/vagrant/edx-ora2 && python manage.py syncdb --migrate --noinput --settings settings.vagrant
echo "Collect static assets..."
mkdir -p /home/vagrant/static
cd /home/vagrant/edx-ora2 && python manage.py collectstatic --noinput --settings settings.vagrant
echo "Creating the update script..."
cp /home/vagrant/edx-ora2/vagrant/update.sh /home/vagrant/update.sh
EOF
echo "Creating upstart script for workbench..."
cat /home/vagrant/edx-ora2/vagrant/workbench_upstart.conf > /etc/init/workbench.conf
start workbench || true
echo "Create upstart script for Celery workers..."
cat /home/vagrant/edx-ora2/vagrant/celery_upstart.conf > /etc/init/celery.conf
start celery || true
echo "Create upstart script for Celery flower..."
cat /home/vagrant/edx-ora2/vagrant/flower_upstart.conf > /etc/init/flower.conf
start flower || true
echo "Configure nginx"
cat /home/vagrant/edx-ora2/vagrant/nginx.conf > /etc/nginx/sites-enabled/workbench.conf
echo "Restart nginx"
sudo service nginx stop || true
sudo service nginx start || true
END
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.box = "precise64"
config.vm.box_url = "http://files.vagrantup.com/precise64.box"
config.vm.network "private_network", ip: "192.168.44.10"
config.vm.synced_folder ".", "/home/vagrant/edx-ora2"
config.vm.provider :virtualbox do |vb|
# Increase memory and CPU
vb.customize ["modifyvm", :id, "--memory", "2048"]
vb.customize ["modifyvm", :id, "--cpus", "2"]
# Allow DNS to work for Ubuntu 12.10 host
# http://askubuntu.com/questions/238040/how-do-i-fix-name-service-for-vagrant-client
vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"]
end
config.vm.provision "shell", inline: $script
end
......@@ -27,6 +27,12 @@ Self Assessment
.. automodule:: openassessment.assessment.api.self
:members:
Example-Based Assessment (AI)
*****************************
.. automodule:: openassessment.assessment.api.ai
:members:
Student Training
****************
......
......@@ -240,28 +240,23 @@ Data Model
1. **GradingWorkflow**
a. Submission UUID (varchar)
b. Rubric UUID (varchar)
c. ClassifierSet (Foreign Key, Nullable)
d. Assessment (Foreign Key, Nullable)
e. Scheduled at (timestamp): The time the task was placed on the queue.
f. Started at (timestamp): The time the task was picked up by the worker.
b. ClassifierSet (Foreign Key, Nullable)
c. Assessment (Foreign Key, Nullable)
d. Rubric (Foreign Key): Used to search for classifier sets if none are available when the workflow is started.
e. Algorithm ID (varchar): Used to search for classifier sets if none are available when the workflow is started.
f. Scheduled at (timestamp): The time the task was placed on the queue.
g. Completed at (timestamp): The time the task was completed. If set, the task is considered complete.
h. Course ID (varchar): The ID of the course associated with the submission. Useful for rescheduling
failed grading tasks in a particular course.
i. Item ID (varchar): The ID of the item (problem) associated with the submission. Useful for rescheduling
failed grading tasks in a particular item in a course.
j. Worker version (varchar): Identifier for the code running on the worker when the task was started. Useful for error tracking.
h. Course ID (varchar): The ID of the course associated with the submission. Useful for rescheduling failed grading tasks in a particular course.
i. Item ID (varchar): The ID of the item (problem) associated with the submission. Useful for rescheduling failed grading tasks in a particular item in a course.
2. **TrainingWorkflow**
a. Algorithm ID (varchar)
b. Rubric UUID (varchar)
c. Many-to-many relation with **TrainingExample**. We can re-use examples for multiple workflows.
d. ClassifierSet (Foreign Key)
e. Scheduled at (timestamp): The time the task was placed on the queue.
f. Started at (timestamp): The time the task was picked up by the worker.
g. Completed at (timestamp): The time the task was completed. If set, the task is considered complete.
h. Worker version (varchar): Identifier for the code running on the worker when the task was started. Useful for error tracking.
b. Many-to-many relation with **TrainingExample**. We can re-use examples for multiple workflows.
c. ClassifierSet (Foreign Key)
d. Scheduled at (timestamp): The time the task was placed on the queue.
e. Completed at (timestamp): The time the task was completed. If set, the task is considered complete.
3. **TrainingExample**
......@@ -272,12 +267,13 @@ Data Model
a. Rubric (Foreign Key)
b. Created at (timestamp)
c. Algorithm ID (varchar)
5. **Classifier**
a. ClassifierSet (Foreign Key)
b. URL for trained classifier (varchar)
c. Algorithm ID (varchar)
c. Criterion (Foreign Key)
6. **Assessment** (same as current implementation)
......
......@@ -5,7 +5,8 @@ from django.core.urlresolvers import reverse
from django.utils import html
from openassessment.assessment.models import (
Assessment, AssessmentFeedback, PeerWorkflow, PeerWorkflowItem, Rubric
Assessment, AssessmentFeedback, PeerWorkflow, PeerWorkflowItem, Rubric,
AIGradingWorkflow, AITrainingWorkflow, AIClassifierSet, AIClassifier
)
from openassessment.assessment.serializers import RubricSerializer
......@@ -17,7 +18,8 @@ class RubricAdmin(admin.ModelAdmin):
list_display_links = ('id', 'content_hash')
search_fields = ('id', 'content_hash')
readonly_fields = (
'id', 'content_hash', 'points_possible', 'criteria_summary', 'data'
'id', 'content_hash', 'structure_hash',
'points_possible', 'criteria_summary', 'data'
)
def criteria_summary(self, rubric_obj):
......@@ -119,7 +121,32 @@ class AssessmentFeedbackAdmin(admin.ModelAdmin):
assessments_by.allow_tags = True
class AIGradingWorkflowAdmin(admin.ModelAdmin):
list_display = ('uuid', 'submission_uuid')
search_fields = ('uuid', 'submission_uuid', 'student_id', 'item_id', 'course_id')
readonly_fields = ('uuid', 'submission_uuid', 'student_id', 'item_id', 'course_id')
class AITrainingWorkflowAdmin(admin.ModelAdmin):
list_display = ('uuid',)
search_fields = ('uuid', 'course_id', 'item_id',)
readonly_fields = ('uuid', 'course_id', 'item_id',)
class AIClassifierInline(admin.TabularInline):
model = AIClassifier
class AIClassifierSetAdmin(admin.ModelAdmin):
list_display = ('id',)
search_fields = ('id',)
inlines = [AIClassifierInline]
admin.site.register(Rubric, RubricAdmin)
admin.site.register(PeerWorkflow, PeerWorkflowAdmin)
admin.site.register(Assessment, AssessmentAdmin)
admin.site.register(AssessmentFeedback, AssessmentFeedbackAdmin)
admin.site.register(AIGradingWorkflow, AIGradingWorkflowAdmin)
admin.site.register(AITrainingWorkflow, AITrainingWorkflowAdmin)
admin.site.register(AIClassifierSet, AIClassifierSetAdmin)
......@@ -139,6 +139,8 @@ def get_score(submission_uuid, requirements):
dict with keys "points_earned" and "points_possible".
"""
if requirements is None:
return None
# User hasn't completed their own submission yet
if not submitter_is_finished(submission_uuid, requirements):
......
......@@ -69,9 +69,7 @@ def get_score(submission_uuid, requirements):
Args:
submission_uuid (str): The unique identifier for the submission
requirements (dict): Any attributes of the assessment module required
to determine if this assessment is complete. There are currently
no requirements for a self-assessment.
requirements (dict): Not used.
Returns:
A dict of points earned and points possible for the given submission.
Returns None if no score can be determined yet.
......
......@@ -7,3 +7,4 @@ Export errors from all modules defined in this package.
from .peer import *
from .self import *
from .student_training import *
from .ai import *
"""
Errors related to AI assessment.
"""
from celery.exceptions import InvalidTaskError, NotConfigured, NotRegistered, QueueNotFound
from socket import error as socket_error
ANTICIPATED_CELERY_ERRORS = (InvalidTaskError, NotConfigured, NotRegistered, QueueNotFound, socket_error)
class AIError(Exception):
"""
A general error occurred while using the AI assessment API.
"""
pass
class AITrainingRequestError(AIError):
"""
There was a problem with the request sent to the AI assessment API.
"""
pass
class AITrainingInternalError(AIError):
"""
An unexpected error occurred while using the AI assessment API.
"""
pass
class AIGradingRequestError(AIError):
"""
There was a problem with the request sent to the AI assessment API.
"""
pass
class AIGradingInternalError(AIError):
"""
An unexpected error occurred while using the AI assessment API.
"""
pass
class AIReschedulingRequestError(AIError):
"""
There was a problem with the request sent to the AI assessment API.
"""
pass
class AIReschedulingInternalError(AIError):
"""
An unexpected error occurred while using the AI assessment API.
"""
pass
......@@ -7,3 +7,4 @@ from .base import *
from .peer import *
from .training import *
from .student_training import *
from .ai import *
......@@ -56,9 +56,12 @@ class Rubric(models.Model):
creating a new Rubric instead. This makes it easy to cache and do hash-based
lookups.
"""
# SHA1 hash
# SHA1 hash, including prompts and explanations
content_hash = models.CharField(max_length=40, unique=True, db_index=True)
# SHA1 hash of just the rubric structure (criteria / options / points)
structure_hash = models.CharField(max_length=40, db_index=True)
class Meta:
app_label = "assessment"
......@@ -90,6 +93,38 @@ class Rubric(models.Model):
canonical_form = json.dumps(rubric_dict, sort_keys=True)
return sha1(canonical_form).hexdigest()
@staticmethod
def structure_hash_from_dict(rubric_dict):
"""
Generate a hash of the rubric that includes only structural information:
* Criteria names and order
* Option names / points / order number
We do NOT include prompt text or option explanations.
NOTE: currently, we use the criterion and option names as unique identifiers,
so we include them in the structure. In the future, we plan to assign
criteria/options unique IDs -- when we do that, we will need to update
this method and create a data migration for existing rubrics.
"""
structure = [
{
"criterion_name": criterion.get('name'),
"criterion_order": criterion.get('order_num'),
"options": [
{
"option_name": option.get('name'),
"option_points": option.get('points'),
"option_order": option.get('order_num')
}
for option in criterion.get('options', [])
]
}
for criterion in rubric_dict.get('criteria', [])
]
canonical_form = json.dumps(structure, sort_keys=True)
return sha1(canonical_form).hexdigest()
def options_ids(self, options_selected):
"""Given a mapping of selected options, return the option IDs.
......@@ -170,6 +205,55 @@ class Rubric(models.Model):
return option_id_set
def options_ids_for_points(self, criterion_points):
"""
Given a mapping of selected point values, return the option IDs.
If there are multiple options with the same point value,
this will return the first one (lower order number).
Args:
criterion_points (dict): Mapping of criteria names to point values.
Returns:
list of option IDs
Raises:
InvalidOptionSelection
"""
# Retrieve the mapping of criterion names/points to option IDs
# from the cache, if it's available
cache_key = "assessment.rubric_points_dict.{}".format(self.content_hash)
rubric_points_dict = cache.get(cache_key)
# Otherwise, create the dict by querying the database
if not rubric_points_dict:
rubric_options = CriterionOption.objects.filter(
criterion__rubric=self
).select_related()
rubric_points_dict = defaultdict(dict)
for option in rubric_options:
if option.points not in rubric_points_dict[option.criterion.name]:
rubric_points_dict[option.criterion.name][option.points] = option.id
# Store the dict in the cache
cache.set(cache_key, rubric_points_dict)
# Find the IDs for the options matching the specified point value
option_id_set = set()
for criterion_name, option_points in criterion_points.iteritems():
if (criterion_name in rubric_points_dict and option_points in rubric_points_dict[criterion_name]):
option_id = rubric_points_dict[criterion_name][option_points]
option_id_set.add(option_id)
else:
msg = u"{criterion} option with point value {points} not found in rubric".format(
criterion=criterion_name, points=option_points
)
raise InvalidOptionSelection(msg)
return option_id_set
class Criterion(models.Model):
"""A single aspect of a submission that needs assessment.
......
......@@ -92,7 +92,7 @@ class RubricSerializer(NestedModelSerializer):
class Meta:
model = Rubric
fields = ('id', 'content_hash', 'criteria', 'points_possible')
fields = ('id', 'content_hash', 'structure_hash', 'criteria', 'points_possible')
def validate_criteria(self, attrs, source):
"""Make sure we have at least one Criterion in the Rubric."""
......@@ -283,6 +283,7 @@ def rubric_from_dict(rubric_dict):
rubric = Rubric.objects.get(content_hash=content_hash)
except Rubric.DoesNotExist:
rubric_dict["content_hash"] = content_hash
rubric_dict["structure_hash"] = Rubric.structure_hash_from_dict(rubric_dict)
for crit_idx, criterion in enumerate(rubric_dict.get("criteria", {})):
if "order_num" not in criterion:
criterion["order_num"] = crit_idx
......
"""
Signals for the workflow API.
See https://docs.djangoproject.com/en/1.4/topics/signals
"""
import django.dispatch
# Indicate that an assessment has completed
# You can fire this signal from asynchronous processes (such as AI grading)
# to notify receivers that an assessment is available.
assessment_complete_signal = django.dispatch.Signal(providing_args=['submission_uuid']) # pylint: disable=C0103
"""
Celery looks for tasks in this module,
so import the tasks we want the workers to implement.
"""
# pylint:disable=W0611
from .worker.training import train_classifiers, reschedule_training_tasks
from .worker.grading import grade_essay, reschedule_grading_tasks
\ No newline at end of file
# coding=utf-8
"""
Tests for AI algorithm implementations.
"""
import unittest
import json
import mock
from openassessment.test_utils import CacheResetTest
from openassessment.assessment.worker.algorithm import (
AIAlgorithm, FakeAIAlgorithm, EaseAIAlgorithm,
TrainingError, InvalidClassifier
)
EXAMPLES = [
AIAlgorithm.ExampleEssay(u"Mine's a tale that can't be told, my ƒяєє∂σм I hold dear.", 2),
AIAlgorithm.ExampleEssay(u"How years ago in days of old, when 𝒎𝒂𝒈𝒊𝒄 filled th air.", 1),
AIAlgorithm.ExampleEssay(u"Ṫ'ẅäṡ in the darkest depths of Ṁöṛḋöṛ, I met a girl so fair.", 1),
AIAlgorithm.ExampleEssay(u"But goレレuᄊ, and the evil one crept up and slipped away with her", 0),
AIAlgorithm.ExampleEssay(u"", 4),
AIAlgorithm.ExampleEssay(u".!?", 4),
AIAlgorithm.ExampleEssay(u"no punctuation", 4),
AIAlgorithm.ExampleEssay(u"one", 4),
]
INPUT_ESSAYS = [
u"Good times, 𝑩𝒂𝒅 𝑻𝒊𝒎𝒆𝒔, you know I had my share",
u"When my woman left home for a 𝒃𝒓𝒐𝒘𝒏 𝒆𝒚𝒆𝒅 𝒎𝒂𝒏",
u"Well, I still don't seem to 𝒄𝒂𝒓𝒆",
u"",
u".!?",
u"no punctuation",
u"one",
]
class AIAlgorithmTest(CacheResetTest):
"""
Base class for testing AI algorithm implementations.
"""
ALGORITHM_CLASS = None
def setUp(self):
self.algorithm = self.ALGORITHM_CLASS() # pylint:disable=E1102
def _scores(self, classifier, input_essays):
"""
Use the classifier to score multiple input essays.
Args:
input_essays (list of unicode): The essays to score.
Returns:
list of int: The scores
"""
cache = {}
return [
self.algorithm.score(input_essay, classifier, cache)
for input_essay in input_essays
]
class FakeAIAlgorithmTest(AIAlgorithmTest):
"""
Test for the fake AI algorithm implementation.
"""
ALGORITHM_CLASS = FakeAIAlgorithm
def test_train_and_score(self):
classifier = self.algorithm.train_classifier(EXAMPLES)
expected_scores = [2, 0, 0, 0, 4, 2, 4]
scores = self._scores(classifier, INPUT_ESSAYS)
self.assertEqual(scores, expected_scores)
def test_score_classifier_missing_key(self):
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test input", {}, {})
def test_score_classifier_no_scores(self):
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test input", {'scores': []}, {})
# Try to import EASE -- if we can't, then skip the tests that require it
try:
import ease # pylint: disable=F0401,W0611
EASE_INSTALLED = True
except ImportError:
EASE_INSTALLED = False
@unittest.skipUnless(EASE_INSTALLED, "EASE library required")
class EaseAIAlgorithmTest(AIAlgorithmTest):
"""
Test for the EASE AI library wrapper.
"""
ALGORITHM_CLASS = EaseAIAlgorithm
def test_train_and_score(self):
classifier = self.algorithm.train_classifier(EXAMPLES)
scores = self._scores(classifier, INPUT_ESSAYS)
# Check that we got scores in the correct range
valid_scores = set(example.score for example in EXAMPLES)
for score in scores:
self.assertIn(score, valid_scores)
# Check that the scores are consistent when we re-run the algorithm
repeat_scores = self._scores(classifier, INPUT_ESSAYS)
self.assertEqual(scores, repeat_scores)
def test_all_examples_have_same_score(self):
examples = [
AIAlgorithm.ExampleEssay(u"Test ëṡṡäÿ", 1),
AIAlgorithm.ExampleEssay(u"Another test ëṡṡäÿ", 1),
]
# No assertion -- just verifying that this does not raise an exception
classifier = self.algorithm.train_classifier(examples)
self._scores(classifier, INPUT_ESSAYS)
def test_most_examples_have_same_score(self):
# All training examples have the same score except for one
examples = [
AIAlgorithm.ExampleEssay(u"Test ëṡṡäÿ", 1),
AIAlgorithm.ExampleEssay(u"Another test ëṡṡäÿ", 1),
AIAlgorithm.ExampleEssay(u"Different score", 0),
]
classifier = self.algorithm.train_classifier(examples)
scores = self._scores(classifier, INPUT_ESSAYS)
# Check that we got scores back.
# This is not a very rigorous assertion -- we're mainly
# checking that we got this far without an exception.
self.assertEqual(len(scores), len(INPUT_ESSAYS))
def test_no_examples(self):
with self.assertRaises(TrainingError):
self.algorithm.train_classifier([])
def test_json_serializable(self):
classifier = self.algorithm.train_classifier(EXAMPLES)
serialized = json.dumps(classifier)
deserialized = json.loads(serialized)
# This should not raise an exception
scores = self._scores(deserialized, INPUT_ESSAYS)
self.assertEqual(len(scores), len(INPUT_ESSAYS))
@mock.patch('openassessment.assessment.worker.algorithm.pickle')
def test_pickle_serialize_error(self, mock_pickle):
mock_pickle.dumps.side_effect = Exception("Test error!")
with self.assertRaises(TrainingError):
self.algorithm.train_classifier(EXAMPLES)
def test_pickle_deserialize_error(self):
classifier = self.algorithm.train_classifier(EXAMPLES)
with mock.patch('openassessment.assessment.worker.algorithm.pickle.loads') as mock_call:
mock_call.side_effect = Exception("Test error!")
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test ëṡṡäÿ", classifier, {})
def test_serialized_classifier_not_a_dict(self):
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test ëṡṡäÿ", "not a dict", {})
# coding=utf-8
"""
Test AI Django models.
"""
import copy
from django.test.utils import override_settings
from openassessment.test_utils import CacheResetTest
from openassessment.assessment.models import (
AIClassifierSet, AIClassifier, AIGradingWorkflow, AI_CLASSIFIER_STORAGE
)
from openassessment.assessment.serializers import rubric_from_dict
from .constants import RUBRIC
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
class AIClassifierTest(CacheResetTest):
"""
Tests for the AIClassifier model.
"""
def test_upload_to_path_default(self):
# No path prefix provided in the settings
classifier = self._create_classifier()
components = classifier.classifier_data.name.split(u'/')
self.assertEqual(len(components), 2)
self.assertEqual(components[0], AI_CLASSIFIER_STORAGE)
self.assertGreater(len(components[1]), 0)
@override_settings(ORA2_FILE_PREFIX=u"ƒιℓє_ρяєƒιχ")
def test_upload_to_path_with_prefix(self):
classifier = self._create_classifier()
components = classifier.classifier_data.name.split(u'/')
self.assertEqual(len(components), 3)
self.assertEqual(components[0], u"ƒιℓє_ρяєƒιχ")
self.assertEqual(components[1], AI_CLASSIFIER_STORAGE)
self.assertGreater(len(components[2]), 0)
def _create_classifier(self):
"""
Create and return an AIClassifier.
"""
rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set(
CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
)
return AIClassifier.objects.filter(classifier_set=classifier_set)[0]
class AIClassifierSetTest(CacheResetTest):
"""
Tests for the AIClassifierSet model.
"""
def setUp(self):
rubric = rubric_from_dict(RUBRIC)
self.classifier_set = AIClassifierSet.create_classifier_set(
CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
)
def test_cache_downloads(self):
# Retrieve the classifier dict twice, which should hit the caching code.
# We can check that we're using the cache by asserting that
# the number of database queries decreases.
with self.assertNumQueries(3):
first = self.classifier_set.classifiers_dict
with self.assertNumQueries(0):
second = self.classifier_set.classifiers_dict
# Verify that we got the same value both times
self.assertEqual(first, second)
class AIGradingWorkflowTest(CacheResetTest):
"""
Tests for the AIGradingWorkflow model.
"""
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"test"
ITEM_ID = u"test"
ALGORITHM_ID = "test"
def setUp(self):
"""
Create a new grading workflow.
"""
self.rubric = rubric_from_dict(RUBRIC)
self.workflow = AIGradingWorkflow.objects.create(
submission_uuid='test', essay_text='test',
rubric=self.rubric, algorithm_id=self.ALGORITHM_ID,
item_id=self.ITEM_ID, course_id=self.COURSE_ID
)
# Create a rubric with a similar structure, but different prompt
similar_rubric_dict = copy.deepcopy(RUBRIC)
similar_rubric_dict['prompt'] = 'Different prompt!'
self.similar_rubric = rubric_from_dict(similar_rubric_dict)
def test_assign_most_recent_classifier_set(self):
# No classifier sets are available
found = self.workflow.assign_most_recent_classifier_set()
self.assertFalse(found)
self.assertIs(self.workflow.classifier_set, None)
# Same rubric (exact), but different course id
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
"different course!", self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric (exact) but different item id
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
self.COURSE_ID, "different item!"
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric (exact), but different algorithm id
# Shouldn't change, since the algorithm ID doesn't match
AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, "different algorithm!",
self.COURSE_ID, self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric *structure*, but in a different item
# Shouldn't change, since the rubric isn't an exact match.
AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.similar_rubric, self.ALGORITHM_ID,
self.COURSE_ID, "different item!"
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric *structure* AND in the same course/item
# This should replace our current classifier set
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.similar_rubric, self.ALGORITHM_ID,
self.COURSE_ID, self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric and same course/item
# This is the ideal, so we should always prefer it
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
self.COURSE_ID, self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
......@@ -11,7 +11,7 @@ from nose.tools import raises
from openassessment.test_utils import CacheResetTest
from openassessment.assessment.api import peer as peer_api
from openassessment.assessment.models import (
Assessment, AssessmentPart, AssessmentFeedback,
Assessment, AssessmentPart, AssessmentFeedback, AssessmentFeedbackOption,
PeerWorkflow, PeerWorkflowItem
)
from openassessment.workflow import api as workflow_api
......@@ -1191,3 +1191,142 @@ class TestPeerApi(CacheResetTest):
peer_api.on_start(submission["uuid"])
workflow_api.create_workflow(submission["uuid"], STEPS)
return submission, new_student_item
class PeerWorkflowTest(CacheResetTest):
"""
Tests for the peer workflow model.
"""
STUDENT_ITEM = {
'student_id': 'test_student',
'course_id': 'test_course',
'item_type': 'openassessment',
'item_id': 'test_item'
}
OTHER_STUDENT = {
'student_id': 'test_student_2',
'course_id': 'test_course',
'item_type': 'openassessment',
'item_id': 'test_item'
}
def test_create_item_multiple_available(self):
# Bugfix TIM-572
submitter_sub = sub_api.create_submission(self.STUDENT_ITEM, 'test answer')
submitter_workflow = PeerWorkflow.objects.create(
student_id=self.STUDENT_ITEM['student_id'],
item_id=self.STUDENT_ITEM['item_id'],
course_id=self.STUDENT_ITEM['course_id'],
submission_uuid=submitter_sub['uuid']
)
scorer_sub = sub_api.create_submission(self.OTHER_STUDENT, 'test answer 2')
scorer_workflow = PeerWorkflow.objects.create(
student_id=self.OTHER_STUDENT['student_id'],
item_id=self.OTHER_STUDENT['item_id'],
course_id=self.OTHER_STUDENT['course_id'],
submission_uuid=scorer_sub['uuid']
)
for _ in range(2):
PeerWorkflowItem.objects.create(
scorer=scorer_workflow,
author=submitter_workflow,
submission_uuid=submitter_sub['uuid']
)
# This used to cause an error when `get_or_create` returned multiple workflow items
PeerWorkflow.create_item(scorer_workflow, submitter_sub['uuid'])
class AssessmentFeedbackTest(CacheResetTest):
"""
Tests for assessment feedback.
This is feedback that students give in response to the peer assessments they receive.
"""
def setUp(self):
self.feedback = AssessmentFeedback.objects.create(
submission_uuid='test_submission',
feedback_text='test feedback',
)
def test_default_options(self):
self.assertEqual(self.feedback.options.count(), 0)
def test_add_options_all_new(self):
# We haven't created any feedback options yet, so these should be created.
self.feedback.add_options(['I liked my assessment', 'I thought my assessment was unfair'])
# Check the feedback options
options = self.feedback.options.all()
self.assertEqual(len(options), 2)
self.assertEqual(options[0].text, 'I liked my assessment')
self.assertEqual(options[1].text, 'I thought my assessment was unfair')
def test_add_options_some_new(self):
# Create one feedback option in the database
AssessmentFeedbackOption.objects.create(text='I liked my assessment')
# Add feedback options. The one that's new should be created.
self.feedback.add_options(['I liked my assessment', 'I thought my assessment was unfair'])
# Check the feedback options
options = self.feedback.options.all()
self.assertEqual(len(options), 2)
self.assertEqual(options[0].text, 'I liked my assessment')
self.assertEqual(options[1].text, 'I thought my assessment was unfair')
def test_add_options_empty(self):
# No options
self.feedback.add_options([])
self.assertEqual(len(self.feedback.options.all()), 0)
# Add an option
self.feedback.add_options(['test'])
self.assertEqual(len(self.feedback.options.all()), 1)
# Add an empty list of options
self.feedback.add_options([])
self.assertEqual(len(self.feedback.options.all()), 1)
def test_add_options_duplicates(self):
# Add some options, which will be created
self.feedback.add_options(['I liked my assessment', 'I thought my assessment was unfair'])
# Add some more options, one of which is a duplicate
self.feedback.add_options(['I liked my assessment', 'I disliked my assessment'])
# There should be three options
options = self.feedback.options.all()
self.assertEqual(len(options), 3)
self.assertEqual(options[0].text, 'I liked my assessment')
self.assertEqual(options[1].text, 'I thought my assessment was unfair')
self.assertEqual(options[2].text, 'I disliked my assessment')
# There should be only three options in the database
self.assertEqual(AssessmentFeedbackOption.objects.count(), 3)
def test_add_options_all_old(self):
# Add some options, which will be created
self.feedback.add_options(['I liked my assessment', 'I thought my assessment was unfair'])
# Add some more options, all of which are duplicates
self.feedback.add_options(['I liked my assessment', 'I thought my assessment was unfair'])
# There should be two options
options = self.feedback.options.all()
self.assertEqual(len(options), 2)
self.assertEqual(options[0].text, 'I liked my assessment')
self.assertEqual(options[1].text, 'I thought my assessment was unfair')
# There should be two options in the database
self.assertEqual(AssessmentFeedbackOption.objects.count(), 2)
def test_unicode(self):
# Create options with unicode
self.feedback.add_options([u'𝓘 𝓵𝓲𝓴𝓮𝓭 𝓶𝔂 𝓪𝓼𝓼𝓮𝓼𝓼𝓶𝓮𝓷𝓽', u'ノ イんougんイ ᄊリ ム丂丂乇丂丂ᄊ乇刀イ wム丂 u刀キムノ尺'])
# There should be two options in the database
self.assertEqual(AssessmentFeedbackOption.objects.count(), 2)
# -*- coding: utf-8 -*-
"""
Tests for assessment models.
"""
import copy
from openassessment.test_utils import CacheResetTest
from openassessment.assessment.models import (
Rubric, Criterion, CriterionOption, InvalidOptionSelection
)
from openassessment.assessment.test.constants import RUBRIC
class TestRubricOptionIds(CacheResetTest):
"""
Test selection of options from a rubric.
"""
NUM_CRITERIA = 4
NUM_OPTIONS = 3
def setUp(self):
"""
Create a rubric in the database.
"""
self.rubric = Rubric.objects.create()
self.criteria = [
Criterion.objects.create(
rubric=self.rubric,
name="test criterion {num}".format(num=num),
order_num=num,
) for num in range(self.NUM_CRITERIA)
]
self.options = dict()
for criterion in self.criteria:
self.options[criterion.name] = [
CriterionOption.objects.create(
criterion=criterion,
name="test option {num}".format(num=num),
order_num=num,
points=num
) for num in range(self.NUM_OPTIONS)
]
def test_option_ids(self):
options_ids = self.rubric.options_ids({
"test criterion 0": "test option 0",
"test criterion 1": "test option 1",
"test criterion 2": "test option 2",
"test criterion 3": "test option 0",
})
self.assertEqual(options_ids, set([
self.options['test criterion 0'][0].id,
self.options['test criterion 1'][1].id,
self.options['test criterion 2'][2].id,
self.options['test criterion 3'][0].id
]))
def test_option_ids_different_order(self):
options_ids = self.rubric.options_ids({
"test criterion 0": "test option 0",
"test criterion 1": "test option 1",
"test criterion 2": "test option 2",
"test criterion 3": "test option 0",
})
self.assertEqual(options_ids, set([
self.options['test criterion 0'][0].id,
self.options['test criterion 1'][1].id,
self.options['test criterion 2'][2].id,
self.options['test criterion 3'][0].id
]))
def test_option_ids_missing_criteria(self):
with self.assertRaises(InvalidOptionSelection):
self.rubric.options_ids({
"test criterion 0": "test option 0",
"test criterion 1": "test option 1",
"test criterion 3": "test option 2",
})
def test_option_ids_extra_criteria(self):
with self.assertRaises(InvalidOptionSelection):
self.rubric.options_ids({
"test criterion 0": "test option 0",
"test criterion 1": "test option 1",
"test criterion 2": "test option 2",
"test criterion 3": "test option 1",
"extra criterion": "test",
})
def test_option_ids_mutated_criterion_name(self):
with self.assertRaises(InvalidOptionSelection):
self.rubric.options_ids({
"test mutated criterion": "test option 1",
"test criterion 1": "test option 1",
"test criterion 2": "test option 2",
"test criterion 3": "test option 1",
})
def test_option_ids_mutated_option_name(self):
with self.assertRaises(InvalidOptionSelection):
self.rubric.options_ids({
"test criterion 0": "test option 1",
"test criterion 1": "test mutated option",
"test criterion 2": "test option 2",
"test criterion 3": "test option 1",
})
def test_options_ids_points(self):
options_ids = self.rubric.options_ids_for_points({
'test criterion 0': 0,
'test criterion 1': 1,
'test criterion 2': 2,
'test criterion 3': 1
})
self.assertEqual(options_ids, set([
self.options['test criterion 0'][0].id,
self.options['test criterion 1'][1].id,
self.options['test criterion 2'][2].id,
self.options['test criterion 3'][1].id
]))
def test_options_ids_points_caching(self):
# First call: the dict is not cached
with self.assertNumQueries(1):
self.rubric.options_ids_for_points({
'test criterion 0': 0,
'test criterion 1': 1,
'test criterion 2': 2,
'test criterion 3': 1
})
# Second call: the dict is not cached
with self.assertNumQueries(0):
self.rubric.options_ids_for_points({
'test criterion 0': 1,
'test criterion 1': 2,
'test criterion 2': 1,
'test criterion 3': 0
})
def test_options_ids_first_of_duplicate_points(self):
# Change the first criterion options so that the second and third
# option have the same point value
self.options['test criterion 0'][1].points = 5
self.options['test criterion 0'][1].save()
self.options['test criterion 0'][2].points = 5
self.options['test criterion 0'][2].save()
# Should get the first option back
options_ids = self.rubric.options_ids_for_points({
'test criterion 0': 5,
'test criterion 1': 1,
'test criterion 2': 2,
'test criterion 3': 1
})
self.assertIn(self.options['test criterion 0'][1].id, options_ids)
def test_options_ids_points_invalid_selection(self):
with self.assertRaises(InvalidOptionSelection):
self.rubric.options_ids_for_points({
'test criterion 0': self.NUM_OPTIONS + 1,
'test criterion 1': 2,
'test criterion 2': 1,
'test criterion 3': 0
})
def test_structure_hash_identical(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
# Same structure, but different text should have the same structure hash
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['prompt'] = 'altered!'
for criterion in altered_rubric['criteria']:
criterion['prompt'] = 'altered!'
for option in criterion['options']:
option['explanation'] = 'altered!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
# Expect that the two hashes are the same
self.assertEqual(first_hash, second_hash)
def test_structure_hash_extra_keys(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
# Same structure, add some extra keys
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['extra'] = 'extra!'
altered_rubric['criteria'][0]['extra'] = 'extra!'
altered_rubric['criteria'][0]['options'][0]['extra'] = 'extra!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
# Expect that the two hashes are the same
self.assertEqual(first_hash, second_hash)
def test_structure_hash_criterion_order_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['order_num'] = 5
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
def test_structure_hash_criterion_name_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['name'] = 'altered!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
def test_structure_hash_option_order_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['options'][0]['order_num'] = 5
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
def test_structure_hash_option_name_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['options'][0]['name'] = 'altered!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
def test_structure_hash_option_points_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['options'][0]['points'] = 'altered!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
......@@ -7,7 +7,7 @@ msgid ""
msgstr ""
"Project-Id-Version: 0.1a\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2014-06-09 09:30-0400\n"
"POT-Creation-Date: 2014-06-09 11:02-0400\n"
"PO-Revision-Date: 2014-06-04 15:41-0400\n"
"Last-Translator: \n"
"Language-Team: openedx-translation <openedx-translation@googlegroups.com>\n"
......@@ -87,20 +87,26 @@ msgstr ""
#: openassessment/xblock/static/js/src/oa_server.js:253
#: openassessment/xblock/static/js/src/oa_server.js:293
#: openassessment/xblock/static/js/src/oa_server.js:334
#: openassessment/xblock/static/js/src/oa_server.js:368
msgid "This assessment could not be submitted."
msgstr ""
#: openassessment/xblock/static/js/openassessment.min.js:1
#: openassessment/xblock/static/js/src/oa_server.js:362
#: openassessment/xblock/static/js/src/oa_server.js:392
msgid "One or more rescheduling tasks failed."
msgstr ""
#: openassessment/xblock/static/js/openassessment.min.js:1
#: openassessment/xblock/static/js/src/oa_server.js:420
msgid "This problem could not be loaded."
msgstr ""
#: openassessment/xblock/static/js/openassessment.min.js:1
#: openassessment/xblock/static/js/src/oa_server.js:391
#: openassessment/xblock/static/js/src/oa_server.js:449
msgid "This problem could not be saved."
msgstr ""
#: openassessment/xblock/static/js/openassessment.min.js:1
#: openassessment/xblock/static/js/src/oa_server.js:421
#: openassessment/xblock/static/js/src/oa_server.js:479
msgid "The server could not be contacted."
msgstr ""
......@@ -7,7 +7,7 @@ msgid ""
msgstr ""
"Project-Id-Version: 0.1a\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2014-06-09 09:30-0400\n"
"POT-Creation-Date: 2014-06-09 11:02-0400\n"
"PO-Revision-Date: 2014-06-04 15:41-0400\n"
"Last-Translator: \n"
"Language-Team: openedx-translation <openedx-translation@googlegroups.com>\n"
......@@ -92,20 +92,26 @@ msgstr "Thïs féédßäçk çöüld nöt ßé süßmïttéd. Ⱡ'σяєм ιρ
#: openassessment/xblock/static/js/src/oa_server.js:253
#: openassessment/xblock/static/js/src/oa_server.js:293
#: openassessment/xblock/static/js/src/oa_server.js:334
#: openassessment/xblock/static/js/src/oa_server.js:368
msgid "This assessment could not be submitted."
msgstr "Thïs ässéssmént çöüld nöt ßé süßmïttéd. Ⱡ'σяєм ιρѕ#"
#: openassessment/xblock/static/js/openassessment.min.js:1
#: openassessment/xblock/static/js/src/oa_server.js:362
#: openassessment/xblock/static/js/src/oa_server.js:392
msgid "One or more rescheduling tasks failed."
msgstr "Öné ör möré résçhédülïng täsks fäïléd. Ⱡ'σяєм ιρѕ#"
#: openassessment/xblock/static/js/openassessment.min.js:1
#: openassessment/xblock/static/js/src/oa_server.js:420
msgid "This problem could not be loaded."
msgstr "Thïs prößlém çöüld nöt ßé löädéd. Ⱡ'σяєм ι#"
#: openassessment/xblock/static/js/openassessment.min.js:1
#: openassessment/xblock/static/js/src/oa_server.js:391
#: openassessment/xblock/static/js/src/oa_server.js:449
msgid "This problem could not be saved."
msgstr "Thïs prößlém çöüld nöt ßé sävéd. Ⱡ'σяєм ι#"
#: openassessment/xblock/static/js/openassessment.min.js:1
#: openassessment/xblock/static/js/src/oa_server.js:421
#: openassessment/xblock/static/js/src/oa_server.js:479
msgid "The server could not be contacted."
msgstr "Thé sérvér çöüld nöt ßé çöntäçtéd. Ⱡ'σяєм ιρ#"
# -*- coding: utf-8 -*-
"""
Simulate failure of the worker AI grading tasks.
When the workers fail to successfully complete AI grading,
the AI grading workflow in the database will never be marked complete.
To simulate the error condition, therefore, we create incomplete
AI grading workflows without scheduling a grading task.
To recover, a staff member can reschedule incomplete grading tasks.
"""
from django.core.management.base import BaseCommand, CommandError
from submissions import api as sub_api
from openassessment.assessment.models import AIGradingWorkflow, AIClassifierSet
from openassessment.assessment.serializers import rubric_from_dict
from openassessment.assessment.worker.algorithm import AIAlgorithm
class Command(BaseCommand):
"""
Create submissions and AI incomplete grading workflows.
"""
help = (
u"Simulate failure of the worker AI grading tasks "
u"by creating incomplete AI grading workflows in the database."
)
args = '<COURSE_ID> <PROBLEM_ID> <NUM_SUBMISSIONS> <ALGORITHM_ID>'
RUBRIC_OPTIONS = [
{
"order_num": 0,
"name": u"poor",
"explanation": u"Poor job!",
"points": 0,
},
{
"order_num": 1,
"name": u"good",
"explanation": u"Good job!",
"points": 1,
}
]
RUBRIC = {
'prompt': u"Test prompt",
'criteria': [
{
"order_num": 0,
"name": u"vocabulary",
"prompt": u"Vocabulary",
"options": RUBRIC_OPTIONS
},
{
"order_num": 1,
"name": u"grammar",
"prompt": u"Grammar",
"options": RUBRIC_OPTIONS
}
]
}
EXAMPLES = {
"vocabulary": [
AIAlgorithm.ExampleEssay(
text=u"World Food Day is celebrated every year around the world on 16 October in honor "
u"of the date of the founding of the Food and Agriculture "
u"Organization of the United Nations in 1945.",
score=0
),
AIAlgorithm.ExampleEssay(
text=u"Since 1981, World Food Day has adopted a different theme each year "
u"in order to highlight areas needed for action and provide a common focus.",
score=1
),
],
"grammar": [
AIAlgorithm.ExampleEssay(
text=u"Most of the themes revolve around agriculture because only investment in agriculture ",
score=0
),
AIAlgorithm.ExampleEssay(
text=u"In spite of the importance of agriculture as the driving force "
u"in the economies of many developing countries, this "
u"vital sector is frequently starved of investment.",
score=1
)
]
}
STUDENT_ID = u'test_student'
ANSWER = {'answer': 'test answer'}
def handle(self, *args, **options):
"""
Execute the command.
Args:
course_id (unicode): The ID of the course to create submissions/workflows in.
item_id (unicode): The ID of the problem in the course.
num_submissions (int): The number of submissions/workflows to create.
algorithm_id (unicode): The ID of the ML algorithm to use ("fake" or "ease")
Raises:
CommandError
"""
if len(args) < 4:
raise CommandError(u"Usage: simulate_ai_grading_error {}".format(self.args))
# Parse arguments
course_id = args[0].decode('utf-8')
item_id = args[1].decode('utf-8')
num_submissions = int(args[2])
algorithm_id = args[3].decode('utf-8')
# Create the rubric model
rubric = rubric_from_dict(self.RUBRIC)
# Train classifiers
print u"Training classifiers using {algorithm_id}...".format(algorithm_id=algorithm_id)
algorithm = AIAlgorithm.algorithm_for_id(algorithm_id)
classifier_data = {
criterion_name: algorithm.train_classifier(example)
for criterion_name, example in self.EXAMPLES.iteritems()
}
print u"Successfully trained classifiers."
# Create the classifier set
classifier_set = AIClassifierSet.create_classifier_set(
classifier_data, rubric, algorithm_id, course_id, item_id
)
print u"Successfully created classifier set with id {}".format(classifier_set.pk)
# Create submissions and grading workflows
for num in range(num_submissions):
student_item = {
'course_id': course_id,
'item_id': item_id,
'item_type': 'openassessment',
'student_id': "{base}_{num}".format(base=self.STUDENT_ID, num=num)
}
submission = sub_api.create_submission(student_item, self.ANSWER)
workflow = AIGradingWorkflow.start_workflow(
submission['uuid'], self.RUBRIC, algorithm_id
)
workflow.classifier_set = classifier_set
workflow.save()
print u"{num}: Created incomplete grading workflow with UUID {uuid}".format(
num=num, uuid=workflow.uuid
)
......@@ -52,7 +52,7 @@ class Command(BaseCommand):
Execute the command.
Args:
course_id (unicode): The ID of the course to create submissions for.
course_id (unicode): The ID of the course to use.
s3_bucket_name (unicode): The name of the S3 bucket to upload to.
Raises:
......
# -*- coding: utf-8 -*-
"""
Tests for the simulate AI grading error management command.
"""
from django.test.utils import override_settings
from openassessment.test_utils import CacheResetTest
from openassessment.management.commands import simulate_ai_grading_error
from openassessment.assessment.models import AIGradingWorkflow
from openassessment.assessment.worker.grading import grade_essay
class SimulateAIGradingErrorTest(CacheResetTest):
"""
Tests for the simulate AI grading error management command.
"""
COURSE_ID = u"TɘꙅT ↄoUᴙꙅɘ"
ITEM_ID = u"𝖙𝖊𝖘𝖙 𝖎𝖙𝖊𝖒"
NUM_SUBMISSIONS = 20
AI_ALGORITHMS = {
"fake": "openassessment.assessment.worker.algorithm.FakeAIAlgorithm"
}
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_simulate_ai_grading_error(self):
# Run the command
cmd = simulate_ai_grading_error.Command()
cmd.handle(
self.COURSE_ID.encode('utf-8'),
self.ITEM_ID.encode('utf-8'),
self.NUM_SUBMISSIONS,
"fake"
)
# Check that the correct number of incomplete workflows
# were created. These workflows should still have
# a classifier set, though, because otherwise they
# wouldn't have been scheduled for grading
# (that is, the submissions were made before classifier
# training completed).
incomplete_workflows = AIGradingWorkflow.objects.filter(
classifier_set__isnull=False,
completed_at__isnull=True
)
num_errors = incomplete_workflows.count()
self.assertEqual(self.NUM_SUBMISSIONS, num_errors)
# Verify that we can complete the workflows successfully
# (that is, make sure the classifier data is valid)
# We're calling a Celery task method here,
# but we're NOT using `apply_async`, so this will
# execute synchronously.
for workflow in incomplete_workflows:
grade_essay(workflow.uuid)
# Now there should be no incomplete workflows
remaining_incomplete = AIGradingWorkflow.objects.filter(
classifier_set__isnull=False,
completed_at__isnull=True
).count()
self.assertEqual(remaining_incomplete, 0)
......@@ -4,15 +4,15 @@ Tests for management command that uploads submission/assessment data.
"""
from StringIO import StringIO
import tarfile
from django.test import TestCase
import boto
import moto
from openassessment.test_utils import CacheResetTest
from openassessment.management.commands import upload_oa_data
from openassessment.workflow import api as workflow_api
from submissions import api as sub_api
class UploadDataTest(TestCase):
class UploadDataTest(CacheResetTest):
"""
Test the upload management command. Archiving and upload are in-scope,
but the contents of the generated CSV files are tested elsewhere.
......
......@@ -108,6 +108,30 @@
{% endif %}
{% endfor %}
{% for part in example_based_assessment.parts %}
{% if part.option.criterion.name == criterion.name %}
<li class="answer example-based-assessment"
id="question--{{ criterion_num }}__answer--example-based">
<h5 class="answer__title">
<span class="answer__source">
<span class="answer__source__value">{% trans "Example-Based Assessment" %}</span>
</span>
<span class="answer__value">
<span class="answer__value__label sr">{% trans "Example-Based Assessment" %}: </span>
<span class="answer__value__value">
{{ part.option.name }}
<span class="ui-hint hint--top" data-hint="{{ part.option.explanation }}">
<i class="ico icon-info-sign"
title="{% blocktrans with name=part.option.name %}More information about {{ name }}{% endblocktrans %}"></i>
</span>
</span>
</span>
</h5>
</li>
{% endif %}
{% endfor %}
{% if criterion.feedback %}
<li class="answer--feedback ui-toggle-visibility is--collapsed">
<h5 class="answer--feedback__title ui-toggle-visibility__control">
......
......@@ -6,7 +6,7 @@
<span class="wrapper--copy">
<span class="step__label">{% trans "Your Grade" %}:</span>
<span class="grade__value">
<span class="grade__value__title">{% trans "Waiting for Peer Assessment" %}</span>
<span class="grade__value__title">{% trans "Waiting for Assessments" %}</span>
</span>
</span>
</h2>
......@@ -16,7 +16,13 @@
<div class="wrapper--step__content">
<div class="step__content">
<div class="grade__value__description">
<p>{% trans "Your response is still undergoing peer assessment. After your peers have assessed your response, you'll see their comments and receive your final grade." %}</p>
{% if waiting == 'peer' %}
<p>{% trans "Your response is still undergoing peer assessment. After your peers have assessed your response, you'll see their comments and receive your final grade." %}</p>
{% elif waiting == 'example-based' %}
<p>{% trans "Your response is still undergoing example-based assessment. After your response has been assessed, you'll see the comments and receive your final grade." %}</p>
{% elif waiting == 'all' %}
<p>{% trans "Your response is still undergoing peer assessment and example-based assessment. After your peers have assessed your response and example-based assessment is complete, you'll see your peers' comments and receive your final grade." %}</p>
{% endif %}
</div>
</div>
</div>
......
......@@ -4,8 +4,12 @@
<h3 class="message__title">{% trans "You Have Completed This Assignment" %} </h3>
<div class="message__content">
<p>
{% if waiting %}
{% trans "Your grade will be available when your peers have completed their assessments of your response." %}
{% if waiting == 'peer' %}
<p>{% trans "Your grade will be available when your peers have completed their assessments of your response." %}</p>
{% elif waiting == 'example-based' %}
<p>{% trans "Your grade will be available when the example-based assessment of your response is complete." %}</p>
{% elif waiting == 'all' %}
<p>{% trans "Your grade will be available when your peers have completed their assessments of your response and the example-based assessment of your response is complete." %}</p>
{% else %}
<a data-behavior="ui-scroll" href="#openassessment__grade">{% trans "Review your grade and your assessment details." %}</a>
{% endif %}
......
......@@ -78,6 +78,52 @@
</table>
</div>
{% if display_schedule_training %}
<div class="staff-info__classifierset ui-staff__content__section">
{% if classifierset %}
<table class="staff-info__classifierset__table" summary="{% trans "Classifier set" %}">
<caption class="title">{% trans "Classifier set" %}</caption>
<thead>
<th abbr="Field" scope="col">{% trans "Field" %}</th>
<th abbr="Value" scope="col">{% trans "Value" %}</th>
</thead>
<tbody>
<tr>
<td class="value">{% trans "Created at" %}</td>
<td class="value">{{ classifierset.created_at }}</td>
</tr>
<tr>
<td class="value">{% trans "Algorithm ID" %}</td>
<td class="value">{{ classifierset.algorithm_id }}</td>
</tr>
<tr>
<td class="value">{% trans "Course ID" %}</td>
<td class="value">{{ classifierset.course_id }}</td>
</tr>
<tr>
<td class="value">{% trans "Item ID" %}</td>
<td class="value">{{ classifierset.item_id }}</td>
</tr>
</tbody>
</table>
{% else %}
{% trans "No classifiers are available for this problem" %}
{% endif %}
</div>
<div class="staff-info__status ui-staff__content__section">
<a aria-role="button" href="" id="schedule_training" class="action--submit"><span class="copy">{% trans "Schedule Example-Based Assessment Training" %}</span></a>
<div id="schedule_training_message"></div>
</div>
{% endif %}
{% if display_reschedule_unfinished_tasks %}
<div class="staff-info__status ui-staff__content__section">
<a aria-role="button" href="" id="reschedule_unfinished_tasks" class="action--submit"><span class="copy">{% trans "Reschedule All Unfinished Example-Based Assessment Grading Tasks" %}</span></a>
<div id="reschedule_unfinished_tasks_message"></div>
</div>
{% endif %}
<div class="staff-info__student ui-staff__content__section">
<div class="wrapper--input" class="staff-info__student__form">
<form id="openassessment_student_info_form">
......
......@@ -16,6 +16,7 @@
</div>
</div>
{% if peer_assessments %}
<div class="staff-info__status ui-staff__content__section">
<h3 class="title">{% trans "Peer Assessments for This Student" %}</h3>
{% for assessment in peer_assessments %}
......@@ -55,7 +56,9 @@
{% endwith %}
{% endfor %}
</div>
{% endif %}
{% if submitted_assessments %}
<div class="staff-info__status ui-staff__content__section">
<h3 class="title">{% trans "Peer Assessments Completed by This Student" %}</h3>
{% for assessment in submitted_assessments %}
......@@ -95,7 +98,9 @@
{% endwith %}
{% endfor %}
</div>
{% endif %}
{% if self_assessment %}
<div class="staff-info__status ui-staff__content__section">
<h3 class="title">{% trans "Student's Self Assessment" %}</h3>
<table class="staff-info__status__table" summary="{% trans "Self Assessment" %}">
......@@ -124,6 +129,38 @@
</tbody>
</table>
</div>
{% endif %}
{% if example_based_assessment %}
<div class="staff-info__status ui-staff__content__section">
<h3 class="title">{% trans "Example-Based Assessment" %}</h3>
<table class="staff-info__status__table" summary="{% trans "Example Based Assessment" %}">
<thead>
<tr>
<th abbr="Criterion" scope="col">{% trans "Criterion" %}</th>
<th abbr="Selected Option" scope="col">{% trans "Selected Option" %}</th>
<th abbr="Points" scope="col">{% trans "Points" %}</th>
<th abbr="Points Possible" scope="col">{% trans "Points Possible" %}</th>
</tr>
</thead>
<tbody>
{% for criterion in rubric_criteria %}
{% for part in example_based_assessment.parts %}
{% if part.option.criterion.name == criterion.name %}
<tr>
<td class="label">{{ criterion.name }}</td>
<td class="value">{{ part.option.name }}</td>
<td class="value">{{ part.option.points }}</td>
<td class="value">{{ criterion.total_value }}</td>
</tr>
{% endif %}
{% endfor %}
{% endfor %}
</tbody>
</table>
</div>
{% endif %}
</div>
{% else %}
{% trans "Couldn't find a response for this student." %}
......
"""
Test utilities
"""
from django.core.cache import cache
from django.core.cache import cache, get_cache
from django.test import TestCase
......@@ -11,8 +11,18 @@ class CacheResetTest(TestCase):
"""
def setUp(self):
super(CacheResetTest, self).setUp()
cache.clear()
self._clear_all_caches()
def tearDown(self):
super(CacheResetTest, self).tearDown()
self._clear_all_caches()
def _clear_all_caches(self):
"""
Clear the default cache and any custom caches.
"""
cache.clear()
get_cache(
'django.core.cache.backends.locmem.LocMemCache',
LOCATION='openassessment.ai.classifiers_dict'
).clear()
......@@ -7,9 +7,10 @@ import logging
from django.db import DatabaseError
from openassessment.assessment.api import peer as peer_api
from openassessment.assessment.api import ai as ai_api
from openassessment.assessment.api import student_training as training_api
from openassessment.assessment.errors import (
PeerAssessmentError, StudentTrainingInternalError
PeerAssessmentError, StudentTrainingInternalError, AIError
)
from submissions import api as sub_api
from .models import AssessmentWorkflow, AssessmentWorkflowStep
......@@ -22,7 +23,7 @@ from .errors import (
logger = logging.getLogger(__name__)
def create_workflow(submission_uuid, steps):
def create_workflow(submission_uuid, steps, on_init_params=None):
"""Begins a new assessment workflow.
Create a new workflow that other assessments will record themselves against.
......@@ -33,6 +34,10 @@ def create_workflow(submission_uuid, steps):
steps (list): List of steps that are part of the workflow, in the order
that the user must complete them. Example: `["peer", "self"]`
Kwargs:
on_init_params (dict): The parameters to pass to each assessment module
on init. Keys are the assessment step names.
Returns:
dict: Assessment workflow information with the following
`uuid` = UUID of this `AssessmentWorkflow`
......@@ -62,8 +67,11 @@ def create_workflow(submission_uuid, steps):
.format(submission_uuid, specific_err_msg)
)
if on_init_params is None:
on_init_params = dict()
try:
workflow = AssessmentWorkflow.start_workflow(submission_uuid, steps)
workflow = AssessmentWorkflow.start_workflow(submission_uuid, steps, on_init_params)
logger.info((
u"Started assessment workflow for "
u"submission UUID {uuid} with steps {steps}"
......@@ -289,6 +297,10 @@ def get_status_counts(course_id, item_id, steps):
]
"""
# The AI status exists for workflow logic, but no student will ever be in
# the AI status, so we should never return it.
statuses = steps + AssessmentWorkflow.STATUSES
if 'ai' in statuses: statuses.remove('ai')
return [
{
"status": status,
......@@ -298,7 +310,7 @@ def get_status_counts(course_id, item_id, steps):
item_id=item_id,
).count()
}
for status in steps + AssessmentWorkflow.STATUSES
for status in statuses
]
......
......@@ -12,12 +12,14 @@ need to then generate a matching migration for it using:
import logging
import importlib
from django.conf import settings
from django.db import models, transaction
from django.db import models, transaction, DatabaseError
from django.dispatch import receiver
from django_extensions.db.fields import UUIDField
from django.utils.timezone import now
from model_utils import Choices
from model_utils.models import StatusModel, TimeStampedModel
from submissions import api as sub_api
from openassessment.assessment.signals import assessment_complete_signal
from .errors import AssessmentApiLoadError
......@@ -33,6 +35,7 @@ DEFAULT_ASSESSMENT_API_DICT = {
'peer': 'openassessment.assessment.api.peer',
'self': 'openassessment.assessment.api.self',
'training': 'openassessment.assessment.api.student_training',
'ai': 'openassessment.assessment.api.ai',
}
ASSESSMENT_API_DICT = getattr(
settings, 'ORA2_ASSESSMENTS',
......@@ -46,7 +49,7 @@ ASSESSMENT_API_DICT = getattr(
# We then use that score as the student's overall score.
# This Django setting is a list of assessment steps (defined in `settings.ORA2_ASSESSMENTS`)
# in descending priority order.
DEFAULT_ASSESSMENT_SCORE_PRIORITY = ['peer', 'self']
DEFAULT_ASSESSMENT_SCORE_PRIORITY = ['peer', 'self', 'ai']
ASSESSMENT_SCORE_PRIORITY = getattr(
settings, 'ORA2_ASSESSMENT_SCORE_PRIORITY',
DEFAULT_ASSESSMENT_SCORE_PRIORITY
......@@ -95,13 +98,15 @@ class AssessmentWorkflow(TimeStampedModel, StatusModel):
@classmethod
@transaction.commit_on_success
def start_workflow(cls, submission_uuid, step_names):
def start_workflow(cls, submission_uuid, step_names, on_init_params):
"""
Start a new workflow.
Args:
submission_uuid (str): The UUID of the submission associated with this workflow.
step_names (list): The names of the assessment steps in the workflow.
on_init_params (dict): The parameters to pass to each assessment module
on init. Keys are the assessment step names.
Returns:
AssessmentWorkflow
......@@ -140,8 +145,8 @@ class AssessmentWorkflow(TimeStampedModel, StatusModel):
if api is not None:
# Initialize the assessment module
# We do this for every assessment module
on_init_func = getattr(api, 'on_init', lambda submission_uuid: None)
on_init_func(submission_uuid)
on_init_func = getattr(api, 'on_init', lambda submission_uuid, **params: None)
on_init_func(submission_uuid, **on_init_params.get(step.name, {}))
# For the first valid step, update the workflow status
# and notify the assessment module that it's being started
......@@ -178,12 +183,26 @@ class AssessmentWorkflow(TimeStampedModel, StatusModel):
status_dict = {}
steps = self._get_steps()
for step in steps:
status_dict[step.name] = {
"complete": step.api().submitter_is_finished(
self.submission_uuid,
assessment_requirements.get(step.name, {})
)
}
api = step.api()
if api is not None:
# If an assessment module does not define these functions,
# default to True -- that is, automatically assume that the user has
# met the requirements. This prevents students from getting "stuck"
# in the workflow in the event of a rollback that removes a step
# from the problem definition.
submitter_finished_func = getattr(api, 'submitter_is_finished', lambda submission_uuid, reqs: True)
assessment_finished_func = getattr(api, 'assessment_is_finished', lambda submission_uuid, reqs: True)
status_dict[step.name] = {
"complete": submitter_finished_func(
self.submission_uuid,
assessment_requirements.get(step.name, {})
),
"graded": assessment_finished_func(
self.submission_uuid,
assessment_requirements.get(step.name, {})
),
}
return status_dict
def update_from_assessments(self, assessment_requirements):
......@@ -268,7 +287,10 @@ class AssessmentWorkflow(TimeStampedModel, StatusModel):
# Check if the assessment API defines a score function at all
get_score_func = getattr(assessment_step.api(), 'get_score', None)
if get_score_func is not None:
requirements = assessment_requirements.get(assessment_step_name, {})
if assessment_requirements is None:
requirements = None
else:
requirements = assessment_requirements.get(assessment_step_name, {})
score = get_score_func(self.submission_uuid, requirements)
break
......@@ -408,3 +430,45 @@ class AssessmentWorkflowStep(models.Model):
if step_changed:
self.save()
@receiver(assessment_complete_signal)
def update_workflow_async(sender, **kwargs):
"""
Register a receiver for the update workflow signal
This allows asynchronous processes to update the workflow
Args:
sender (object): Not used
Kwargs:
submission_uuid (str): The UUID of the submission associated
with the workflow being updated.
Returns:
None
"""
submission_uuid = kwargs.get('submission_uuid')
if submission_uuid is None:
logger.error("Update workflow signal called without a submission UUID")
return
try:
workflow = AssessmentWorkflow.objects.get(submission_uuid=submission_uuid)
workflow.update_from_assessments(None)
except AssessmentWorkflow.DoesNotExist:
msg = u"Could not retrieve workflow for submission with UUID {}".format(submission_uuid)
logger.exception(msg)
except DatabaseError:
msg = (
u"Database error occurred while updating "
u"the workflow for submission UUID {}"
).format(submission_uuid)
logger.exception(msg)
except:
msg = (
u"Unexpected error occurred while updating the workflow "
u"for submission UUID {}"
).format(submission_uuid)
logger.exception(msg)
......@@ -57,6 +57,66 @@
},
"self": {}
}
},
"ai": {
"steps": ["ai"],
"requirements": {
"ai": {}
}
},
"ai_peer": {
"steps": ["ai", "peer"],
"requirements": {
"ai": {},
"peer": {
"must_grade": 5,
"must_be_graded_by": 3
}
}
},
"ai_training_peer": {
"steps": ["ai", "training", "peer"],
"requirements": {
"ai": {},
"training": {
"num_required": 2
},
"peer": {
"must_grade": 5,
"must_be_graded_by": 3
}
}
},
"ai_self": {
"steps": ["ai", "self"],
"requirements": {
"ai": {},
"self": {}
}
},
"ai_peer_self": {
"steps": ["ai", "peer", "self"],
"requirements": {
"ai": {},
"peer": {
"must_grade": 5,
"must_be_graded_by": 3
},
"self": {}
}
},
"ai_training_peer_self": {
"steps": ["ai", "training", "peer", "self"],
"requirements": {
"ai": {},
"training": {
"num_required": 2
},
"peer": {
"must_grade": 5,
"must_be_graded_by": 3
},
"self": {}
}
}
}
\ No newline at end of file
"""
Tests for Django signals and receivers defined by the workflow API.
"""
import mock
from django.db import DatabaseError
import ddt
from submissions import api as sub_api
from openassessment.test_utils import CacheResetTest
from openassessment.workflow import api as workflow_api
from openassessment.workflow.models import AssessmentWorkflow
from openassessment.assessment.signals import assessment_complete_signal
@ddt.ddt
class UpdateWorkflowSignalTest(CacheResetTest):
"""
Test for the update workflow signal.
"""
STUDENT_ITEM = {
"student_id": "test student",
"item_id": "test item",
"course_id": "test course",
"item_type": "openassessment",
}
def setUp(self):
"""
Create a submission.
"""
submission = sub_api.create_submission(self.STUDENT_ITEM, "test answer")
self.submission_uuid = submission['uuid']
def test_update_signal_no_workflow(self):
# Without defining a workflow, send the signal
# The receiver should catch and log the exception
assessment_complete_signal.send(sender=None, submission_uuid=self.submission_uuid)
def test_update_signal_no_submission_uuid(self):
# Try to send the signal without specifying a submission UUID
# The receiver should catch and log the exception
assessment_complete_signal.send(sender=None)
def test_update_signal_updates_workflow(self):
# Start a workflow for the submission
workflow_api.create_workflow(self.submission_uuid, ['self'])
# Spy on the workflow update call
with mock.patch.object(AssessmentWorkflow, 'update_from_assessments') as mock_update:
# Send a signal to update the workflow
assessment_complete_signal.send(sender=None, submission_uuid=self.submission_uuid)
# Verify that the workflow model update was called
mock_update.assert_called_once_with(None)
@ddt.data(DatabaseError, IOError)
@mock.patch.object(AssessmentWorkflow.objects, 'get')
def test_errors(self, error, mock_call):
# Start a workflow for the submission
workflow_api.create_workflow(self.submission_uuid, ['self'])
# The receiver should catch and log the error
mock_call.side_effect = error("OH NO!")
assessment_complete_signal.send(sender=None, submission_uuid=self.submission_uuid)
......@@ -53,4 +53,23 @@ def convert_training_examples_list_to_dict(examples_list):
}
}
for ex in examples_list
]
\ No newline at end of file
]
def create_rubric_dict(prompt, criteria):
"""
Construct a serialized rubric model in the format expected
by the assessments app.
Args:
prompt (unicode): The rubric prompt.
criteria (list of dict): The serialized rubric criteria.
Returns:
dict
"""
return {
"prompt": prompt,
"criteria": criteria
}
......@@ -9,6 +9,7 @@ from xblock.core import XBlock
from openassessment.assessment.api import peer as peer_api
from openassessment.assessment.api import self as self_api
from openassessment.assessment.api import ai as ai_api
from openassessment.assessment.errors import SelfAssessmentError, PeerAssessmentError
from submissions import api as sub_api
......@@ -51,7 +52,7 @@ class GradeMixin(object):
if status == "done":
path, context = self.render_grade_complete(workflow)
elif status == "waiting":
path = 'openassessmentblock/grade/oa_grade_waiting.html'
path, context = self.render_grade_waiting(workflow)
elif status is None:
path = 'openassessmentblock/grade/oa_grade_not_started.html'
else: # status is 'self' or 'peer', which implies that the workflow is incomplete
......@@ -61,6 +62,22 @@ class GradeMixin(object):
else:
return self.render_assessment(path, context)
def render_grade_waiting(self, workflow):
"""
Render the grade waiting state.
Args:
workflow (dict): The serialized Workflow model.
Returns:
tuple of context (dict) and template_path (string)
"""
context = {
"waiting": self.get_waiting_details(workflow["status_details"])
}
return 'openassessmentblock/grade/oa_grade_waiting.html', context
def render_grade_complete(self, workflow):
"""
Render the grade complete state.
......@@ -75,19 +92,22 @@ class GradeMixin(object):
assessment_steps = self.assessment_steps
submission_uuid = workflow['submission_uuid']
example_based_assessment = None
self_assessment = None
feedback = None
peer_assessments = []
has_submitted_feedback = False
if "peer-assessment" in assessment_steps:
feedback = peer_api.get_assessment_feedback(submission_uuid)
peer_assessments = peer_api.get_assessments(submission_uuid)
has_submitted_feedback = feedback is not None
else:
feedback = None
peer_assessments = []
has_submitted_feedback = False
if "self-assessment" in assessment_steps:
self_assessment = self_api.get_assessment(submission_uuid)
else:
self_assessment = None
if "example-based-assessment" in assessment_steps:
example_based_assessment = ai_api.get_latest_assessment(submission_uuid)
feedback_text = feedback.get('feedback', '') if feedback else ''
student_submission = sub_api.get_submission(submission_uuid)
......@@ -104,6 +124,7 @@ class GradeMixin(object):
'student_submission': student_submission,
'peer_assessments': peer_assessments,
'self_assessment': self_assessment,
'example_based_assessment': example_based_assessment,
'rubric_criteria': self._rubric_criteria_with_feedback(peer_assessments),
'has_submitted_feedback': has_submitted_feedback,
}
......@@ -112,10 +133,13 @@ class GradeMixin(object):
# Note that we are updating a *copy* of the rubric criteria stored in
# the XBlock field
max_scores = peer_api.get_rubric_max_scores(submission_uuid)
median_scores = None
if "peer-assessment" in assessment_steps:
median_scores = peer_api.get_assessment_median_scores(submission_uuid)
elif "self-assessment" in assessment_steps:
median_scores = self_api.get_assessment_scores_by_criteria(submission_uuid)
elif "example-based-assessment" in assessment_steps:
median_scores = ai_api.get_assessment_scores_by_criteria(submission_uuid)
if median_scores is not None and max_scores is not None:
for criterion in context["rubric_criteria"]:
......
......@@ -38,6 +38,8 @@ class MessageMixin(object):
# Finds the cannonical status of the workflow and the is_closed status of the problem
status = workflow.get('status')
status_details = workflow.get('status_details', {})
is_closed = deadline_info.get('general').get('is_closed')
# Finds the status_information which describes the closed status of the current step (defaults to submission)
......@@ -53,7 +55,7 @@ class MessageMixin(object):
# Render the instruction message based on the status of the workflow
# and the closed status.
if status == "done" or status == "waiting":
path, context = self.render_message_complete(status)
path, context = self.render_message_complete(status_details)
elif is_closed or status_is_closed:
path, context = self.render_message_closed(status_info)
elif status == "self":
......@@ -66,7 +68,7 @@ class MessageMixin(object):
path, context = self.render_message_open(deadline_info)
return self.render_assessment(path, context)
def render_message_complete(self, status):
def render_message_complete(self, status_details):
"""
Renders the "Complete" message state (Either Waiting or Done)
......@@ -76,10 +78,10 @@ class MessageMixin(object):
Returns:
The path (String) and context (dict) to render the "Complete" message template
"""
context = {
"waiting": (status == "waiting")
"waiting": self.get_waiting_details(status_details),
}
return 'openassessmentblock/message/oa_message_complete.html', context
def render_message_training(self, deadline_info):
......
......@@ -70,6 +70,7 @@ UI_MODELS = {
VALID_ASSESSMENT_TYPES = [
"student-training",
"example-based-assessment",
"peer-assessment",
"self-assessment",
]
......@@ -240,6 +241,19 @@ class OpenAssessmentBlock(
return frag
@property
def is_admin(self):
"""
Check whether the user has global staff permissions.
Returns:
bool
"""
if hasattr(self, 'xmodule_runtime'):
return getattr(self.xmodule_runtime, 'user_is_admin', False)
else:
return False
@property
def is_course_staff(self):
"""
Check whether the user has course staff permissions for this XBlock.
......@@ -252,6 +266,8 @@ class OpenAssessmentBlock(
else:
return False
@property
def in_studio_preview(self):
"""
......@@ -277,8 +293,9 @@ class OpenAssessmentBlock(
"""
ui_models = [UI_MODELS["submission"]]
for assessment in self.valid_assessments:
ui_model = UI_MODELS[assessment["name"]]
ui_models.append(dict(assessment, **ui_model))
ui_model = UI_MODELS.get(assessment["name"])
if ui_model:
ui_models.append(dict(assessment, **ui_model))
ui_models.append(UI_MODELS["grade"])
return ui_models
......@@ -296,6 +313,10 @@ class OpenAssessmentBlock(
load('static/xml/unicode.xml')
),
(
"OpenAssessmentBlock Example Based Rubric",
load('static/xml/example_based_example.xml')
),
(
"OpenAssessmentBlock Poverty Rubric",
load('static/xml/poverty_rubric_example.xml')
),
......@@ -465,6 +486,51 @@ class OpenAssessmentBlock(
else:
return False, None, open_range[0], open_range[1]
def get_waiting_details(self, status_details):
"""
Returns the specific waiting status based on the given status_details.
This status can currently be peer, example-based, or both. This is
determined by checking that status details to see if all assessment
modules have been graded.
Args:
status_details (dict): A dictionary containing the details of each
assessment module status. This will contain keys such as
"peer" and "ai", referring to dictionaries, which in turn will
have the key "graded". If this key has a value set, these
assessment modules have been graded.
Returns:
A string of "peer", "exampled-based", or "all" to indicate which
assessment modules in the workflow are waiting on assessments.
Returns None if no module is waiting on an assessment.
Examples:
>>> now = dt.datetime.utcnow().replace(tzinfo=pytz.utc)
>>> status_details = {
>>> 'peer': {
>>> 'completed': None,
>>> 'graded': now
>>> },
>>> 'ai': {
>>> 'completed': now,
>>> 'graded': None
>>> }
>>> }
>>> self.get_waiting_details(status_details)
"peer"
"""
waiting = None
peer_waiting = "peer" in status_details and not status_details["peer"]["graded"]
ai_waiting = "ai" in status_details and not status_details["ai"]["graded"]
if peer_waiting and ai_waiting:
waiting = "all"
elif peer_waiting:
waiting = "peer"
elif ai_waiting:
waiting = "example-based"
return waiting
def is_released(self, step=None):
"""
Check if a question has been released.
......
/**
Tests for staff info.
**/
Tests for OpenAssessment Student Training view.
**/
describe("OpenAssessment.StaffInfoView", function() {
// Stub server that returns dummy data for the staff info view
......@@ -18,6 +19,23 @@ describe("OpenAssessment.StaffInfoView", function() {
defer.resolveWith(this, [fragment]);
});
};
this.scheduleTraining = function() {
var server = this;
return $.Deferred(function(defer) {
defer.resolveWith(server, [server.data]);
}).promise();
};
this.rescheduleUnfinishedTasks = function() {
var server = this;
return $.Deferred(function(defer) {
defer.resolveWith(server, [server.data]);
}).promise();
};
this.data = {};
};
// Stub base view
......@@ -61,6 +79,30 @@ describe("OpenAssessment.StaffInfoView", function() {
baseView = new StubBaseView();
});
it("schedules training of AI classifiers", function() {
server.data = {
"success": true,
"workflow_uuid": "abc123",
"msg": "Great success."
};
spyOn(server, 'scheduleTraining').andCallThrough();
// Load the fixture
loadFixtures('oa_base.html');
// Load the view
var el = $("#openassessment-base").get(0);
var view = new OpenAssessment.StaffInfoView(el, server, baseView);
view.load();
// Submit the assessment
view.scheduleTraining();
// Expect that the assessment was sent to the server
expect(server.scheduleTraining).toHaveBeenCalled();
});
it("Loads staff info if the page contains a course staff section", function() {
// Load the fixture for the container page that DOES include a course staff section
loadFixtures('oa_base_course_staff.html');
......@@ -73,4 +115,45 @@ describe("OpenAssessment.StaffInfoView", function() {
loadFixtures('oa_base.html');
assertStaffInfoAjaxCall(false);
});
it("reschedules training of AI tasks", function() {
server.data = {
"success": true,
"workflow_uuid": "abc123",
"msg": "Great success."
};
var el = $("#openassessment-base").get(0);
var view = new OpenAssessment.StaffInfoView(el, server, baseView);
view.load();
spyOn(server, 'rescheduleUnfinishedTasks').andCallThrough();
// Test the Rescheduling
view.rescheduleUnfinishedTasks();
// Expect that the server was instructed to reschedule Unifinished Taks
expect(server.rescheduleUnfinishedTasks).toHaveBeenCalled();
});
it("reschedules training of AI tasks", function() {
server.data = {
"success": false,
"workflow_uuid": "abc123",
"errMsg": "Stupendous Failure."
};
var el = $("#openassessment-base").get(0);
var view = new OpenAssessment.StaffInfoView(el, server, baseView);
view.load();
spyOn(server, 'rescheduleUnfinishedTasks').andCallThrough();
// Test the Rescheduling
view.rescheduleUnfinishedTasks();
// Expect that the server was instructed to reschedule Unifinished Taks
expect(server.rescheduleUnfinishedTasks).toHaveBeenCalled();
});
});
......@@ -337,6 +337,64 @@ OpenAssessment.Server.prototype = {
},
/**
Schedules classifier training for Example Based Assessment for this
Location.
Returns:
A JQuery promise, which resolves with a message indicating the results
of the scheduling request.
Example:
server.scheduleTraining().done(
function(msg) { console.log("Success!"); }
alert(msg);
).fail(
function(errorMsg) { console.log(errorMsg); }
);
**/
scheduleTraining: function() {
var url = this.url('schedule_training');
return $.Deferred(function(defer) {
$.ajax({ type: "POST", url: url, data: "\"\""}).done(
function(data) {
if (data.success) {
defer.resolveWith(this, [data.msg]);
}
else {
defer.rejectWith(this, [data.msg]);
}
}
).fail(function(data) {
defer.rejectWith(this, [gettext('This assessment could not be submitted.')]);
});
});
},
/**
Reschedules grading tasks for example based assessments
Returns:
JQuery Promise which will resolve with a message indicating success or failure of the scheduling
**/
rescheduleUnfinishedTasks: function() {
var url = this.url('reschedule_unfinished_tasks');
return $.Deferred(function(defer) {
$.ajax({ type: "POST", url: url, data: "\"\""}).done(
function(data) {
if (data.success) {
defer.resolveWith(this, [data.msg]);
}
else {
defer.rejectWith(this, [data.msg]);
}
}
).fail(function(data) {
defer.rejectWith(this, [gettext('One or more rescheduling tasks failed.')]);
});
});
},
/**
Load the XBlock's XML definition from the server.
Returns:
......
......@@ -87,5 +87,54 @@ OpenAssessment.StaffInfoView.prototype = {
view.loadStudentInfo();
}
);
// Install a click handler for scheduling AI classifier training
sel.find('#schedule_training').click(
function(eventObject) {
eventObject.preventDefault();
view.scheduleTraining();
}
);
// Install a click handler for rescheduling unfinished AI tasks for this problem
sel.find('#reschedule_unfinished_tasks').click(
function(eventObject) {
eventObject.preventDefault();
view.rescheduleUnfinishedTasks();
}
);
},
/**
Sends a request to the server to schedule the training of classifiers for
this problem's Example Based Assessments.
**/
scheduleTraining: function() {
var view = this;
this.server.scheduleTraining().done(
function(msg) {
$('#schedule_training_message', this.element).text(msg)
}
).fail(function(errMsg) {
$('#schedule_training_message', this.element).text(errMsg)
});
},
/**
Begins the process of rescheduling all unfinished grading tasks. This incdludes
checking if the classifiers have been created, and grading any unfinished
student submissions.
**/
rescheduleUnfinishedTasks: function() {
var view = this;
this.server.rescheduleUnfinishedTasks().done(
function(msg) {
$('#reschedule_unfinished_tasks_message', this.element).text(msg)
}
).fail(function(errMsg) {
$('#reschedule_unfinished_tasks_message', this.element).text(errMsg)
});
}
};
......@@ -84,11 +84,11 @@
}
// UI - status (table)
.staff-info__status {
.staff-info__status, .staff-info__classifierset {
}
.staff-info__status__table {
.staff-info__status__table, .staff-info__classifierset__table {
@extend %copy-3;
border-radius: ($baseline-v/10);
color: $copy-staff-color;
......@@ -132,4 +132,5 @@
}
}
}
......@@ -26,6 +26,13 @@
// --------------------
// Developer styles for Staff Section
// --------------------
.staff-info__status {
.action--submit {
@extend %btn--secondary;
@extend %action-2;
margin: ($baseline-v/2) ($baseline-v/2) ($baseline-v/2) ($baseline-v/2);
}
}
.staff-info__student {
.label {
color: $heading-staff-color;
......
<openassessment>
<title>Example Based Example</title>
<assessments>
<assessment name="example-based-assessment" algorithm_id="fake">
<example>
<answer>Born in northern New South Wales, Dowling entered the Royal Australian Naval College in 1915. After graduating in 1919 he went to sea aboard various Royal Navy and RAN vessels, and later specialised in gunnery. In 1937, he was given command of the sloop HMAS Swan. Following the outbreak of World War II, he saw action in the Mediterranean theatre as executive officer of the Royal Navy cruiser HMS Naiad, and survived her sinking by a German U-boat in March 1942. Returning to Australia, he served as Director of Plans and later Deputy Chief of Naval Staff before taking command of the light cruiser HMAS Hobart in November 1944. His achievements in the South West Pacific earned him the Distinguished Service Order.
Dowling took command of the RAN's first aircraft carrier, HMAS Sydney, in 1948. He became Chief of Naval Personnel in 1950, and Flag Officer Commanding HM Australian Fleet in 1953. Soon after taking up the position of CNS in February 1955, he was promoted to vice admiral and appointed a Companion of the Order of the Bath. As CNS he had to deal with shortages of money, manpower and equipment, and with the increasing role of the United States in Australia's defence planning, at the expense of traditional ties with Britain. Knighted in 1957, Dowling was Chairman of COSC from March 1959 until May 1961, when he retired from the military. In 1963 he was appointed a Knight Commander of the Royal Victorian Order and became Australian Secretary to HM Queen Elizabeth II, serving until his death in 1969.
</answer>
<select criterion="Ideas" option="Bad" />
<select criterion="Content" option="Bad" />
</example>
<example>
<answer>Roy Russell Dowling was born on 28 May 1901 in Condong, a township on the Tweed River in northern New South Wales. His parents were sugar cane inspector Russell Dowling and his wife Lily. The youth entered the Royal Australian Naval College (RANC) at Jervis Bay, Federal Capital Territory, in 1915. An underachiever academically, he excelled at sports, and became chief cadet captain before graduating in 1918 with the King's Medal, awarded for "gentlemanly bearing, character, good influence among his fellows and officer-like qualities".[1][2] The following year he was posted to Britain as a midshipman, undergoing training with the Royal Navy and seeing service on HMS Ramillies and HMS Venturous.[3] By January 1923 he was back in Australia, serving aboard the cruiser HMAS Adelaide. He was promoted to lieutenant in March.[4] In April 1924, Adelaide joined the Royal Navy's Special Service Squadron on its worldwide cruise, taking in New Zealand, Canada, the United States, Panama, and the West Indies, before docking in September at Portsmouth, England. There Dowling left the ship for his next appointment, training as a gunnery officer and serving in that capacity at HMS Excellent.
</answer>
<select criterion="Ideas" option="Good" />
<select criterion="Content" option="Bad" />
</example>
<example>
<answer>After his return to Australia in December 1926, Dowling spent eighteen months on HMAS Platypus and HMAS Anzac, where he continued to specialise in gunnery. In July 1928, he took on an instructional role at the gunnery school in Flinders Naval Depot on Western Port Bay, Victoria. He married Jessie Blanch in Melbourne on 8 May 1930; the couple had two sons and three daughters.[1][6] Jessie accompanied him on his next posting to Britain commencing in January 1931.</answer>
<select criterion="Ideas" option="Bad" />
<select criterion="Content" option="Good" />
</example>
<example>
<answer>He was promoted to lieutenant commander on 15 March, and was appointed gunnery officer on the light cruiser HMS Colombo in May. Dowling returned to Australia in January 1933, and was appointed squadron gunnery officer aboard the heavy cruiser HMAS Canberra that April.[1][4] The ship operated mainly within Australian waters over the next two years.[7] In July 1935, Dowling took charge of the gunnery school at Flinders Naval Depot. He was promoted to commander on 31 December 1936.[1][4] The following month, he assumed command of the newly commissioned Grimsby-class sloop HMAS Swan, carrying out duties in the South West Pacific.[8] Completing his tenure on Swan in January 1939, he was briefly assigned to the Navy Office, Melbourne, before returning to Britain in March for duty at HMS Pembroke, where he awaited posting aboard the yet-to-be-commissioned anti-aircraft cruiser, HMS Naiad.</answer>
<select criterion="Ideas" option="Good" />
<select criterion="Content" option="Good" />
</example>
</assessment>
</assessments>
<rubric>
<prompt>
Censorship in the Libraries
'All of us can think of a book that we hope none of our children or any other children have taken off the shelf. But if I have the right to remove that book from the shelf -- that work I abhor -- then you also have exactly the same right and so does everyone else. And then we have no books left on the shelf for any of us.' --Katherine Paterson, Author
Write a persuasive essay to a newspaper reflecting your views on censorship in libraries. Do you believe that certain materials, such as books, music, movies, magazines, etc., should be removed from the shelves if they are found offensive? Support your position with convincing arguments from your own experience, observations, and/or reading.
Read for conciseness, clarity of thought, and form.
</prompt>
<criterion feedback="optional">
<name>Ideas</name>
<prompt>Determine if there is a unifying theme or main idea.</prompt>
<option points="0">
<name>Bad</name>
<explanation>Difficult for the reader to discern the main idea. Too brief or too repetitive to establish or maintain a focus.</explanation>
</option>
<option points="3">
<name>Good</name>
<explanation>Presents a unifying theme or main idea, but may include minor tangents. Stays somewhat focused on topic and task.</explanation>
</option>
</criterion>
<criterion>
<name>Content</name>
<prompt>Assess the content of the submission</prompt>
<option points="0">
<name>Bad</name>
<explanation>Includes little information with few or no details or unrelated details. Unsuccessful in attempts to explore any facets of the topic.</explanation>
</option>
<option points="1">
<name>Good</name>
<explanation>Includes little information and few or no details. Explores only one or two facets of the topic.</explanation>
</option>
</criterion>
<feedbackprompt>
(Optional) What aspects of this response stood out to you? What did it do well? How could it improve?
</feedbackprompt>
</rubric>
</openassessment>
\ No newline at end of file
<openassessment>
<title>Open Assessment Test</title>
<prompt>Example-based assessment</prompt>
<rubric>
<prompt>Read for conciseness, clarity of thought, and form.</prompt>
<criterion>
<name>Ideas</name>
<prompt>How good are the ideas?</prompt>
<option points="0">
<name>Poor</name>
<explanation>Poor job!</explanation>
</option>
<option points="1">
<name>Fair</name>
<explanation>Fair job</explanation>
</option>
<option points="3">
<name>Good</name>
<explanation>Good job</explanation>
</option>
</criterion>
<criterion>
<name>Content</name>
<prompt>How good is the content?</prompt>
<option points="0">
<name>Poor</name>
<explanation>Poor job!</explanation>
</option>
<option points="1">
<name>Fair</name>
<explanation>Fair job</explanation>
</option>
<option points="3">
<name>Good</name>
<explanation>Good job</explanation>
</option>
</criterion>
</rubric>
<assessments>
<assessment name="example-based-assessment" algorithm_id="fake">
<example>
<answer>Example Answer One</answer>
<select criterion="Ideas" option="Poor" />
<select criterion="Content" option="Poor" />
</example>
<example>
<answer>Example Answer Two</answer>
<select criterion="Ideas" option="Fair" />
<select criterion="Content" option="Fair" />
</example>
<example>
<answer>Example Answer Three</answer>
<select criterion="Ideas" option="Fair" />
<select criterion="Content" option="Good" />
</example>
<example>
<answer>Example Answer Four</answer>
<select criterion="Ideas" option="Poor" />
<select criterion="Content" option="Good" />
</example>
</assessment>
<assessment name="peer-assessment" must_grade="5" must_be_graded_by="3" />
<assessment name="self-assessment" />
</assessments>
</openassessment>
<openassessment>
<title>Open Assessment Test</title>
<prompt>Example-based assessment</prompt>
<rubric>
<prompt>Read for conciseness, clarity of thought, and form.</prompt>
<criterion>
<name>Ideas</name>
<prompt>How good are the ideas?</prompt>
<option points="0">
<name>Poor</name>
<explanation>Poor job!</explanation>
</option>
<option points="1">
<name>Fair</name>
<explanation>Fair job</explanation>
</option>
<option points="3">
<name>Good</name>
<explanation>Good job</explanation>
</option>
</criterion>
<criterion>
<name>Content</name>
<prompt>How good is the content?</prompt>
<option points="0">
<name>Poor</name>
<explanation>Poor job!</explanation>
</option>
<option points="1">
<name>Fair</name>
<explanation>Fair job</explanation>
</option>
<option points="3">
<name>Good</name>
<explanation>Good job</explanation>
</option>
</criterion>
</rubric>
<assessments>
<assessment name="example-based-assessment" algorithm_id="fake">
<example>
<answer>Example Answer One</answer>
<select criterion="Ideas" option="Poor" />
<select criterion="Content" option="Poor" />
</example>
<example>
<answer>Example Answer Two</answer>
<select criterion="Ideas" option="Fair" />
<select criterion="Content" option="Fair" />
</example>
<example>
<answer>Example Answer Three</answer>
<select criterion="Ideas" option="Fair" />
<select criterion="Content" option="Good" />
</example>
<example>
<answer>Example Answer Four</answer>
<select criterion="Ideas" option="Poor" />
<select criterion="Content" option="Good" />
</example>
</assessment>
</assessments>
</openassessment>
......@@ -40,6 +40,28 @@
</criterion>
</rubric>
<assessments>
<assessment name="example-based-assessment" algorithm_id="fake">
<example>
<answer>Example Answer One</answer>
<select criterion="𝓒𝓸𝓷𝓬𝓲𝓼𝓮" option="Ġööḋ" />
<select criterion="Form" option="Poor" />
</example>
<example>
<answer>Example Answer Two</answer>
<select criterion="𝓒𝓸𝓷𝓬𝓲𝓼𝓮" option="ﻉซƈﻉɭɭﻉกՇ" />
<select criterion="Form" option="Fair" />
</example>
<example>
<answer>Example Answer Three</answer>
<select criterion="𝓒𝓸𝓷𝓬𝓲𝓼𝓮" option="Ġööḋ" />
<select criterion="Form" option="Good" />
</example>
<example>
<answer>Example Answer Four</answer>
<select criterion="𝓒𝓸𝓷𝓬𝓲𝓼𝓮" option="ﻉซƈﻉɭɭﻉกՇ" />
<select criterion="Form" option="Good" />
</example>
</assessment>
<assessment name="peer-assessment" must_grade="2" must_be_graded_by="2" />
<assessment name="self-assessment" />
</assessments>
......
<openassessment>
<title>Open Assessment Test</title>
<prompt>
Given the state of the world today, what do you think should be done to
combat poverty? Please answer in a short essay of 200-300 words.
</prompt>
<rubric>
<prompt>Read for conciseness, clarity of thought, and form.</prompt>
<criterion>
<name>𝓒𝓸𝓷𝓬𝓲𝓼𝓮</name>
<prompt>How concise is it?</prompt>
<option points="3">
<name>ﻉซƈﻉɭɭﻉกՇ</name>
<explanation>Extremely concise</explanation>
</option>
<option points="2">
<name>Ġööḋ</name>
<explanation>Concise</explanation>
</option>
<option points="1">
<name>ק๏๏г</name>
<explanation>Wordy</explanation>
</option>
</criterion>
<criterion>
<name>Form</name>
<prompt>How well-formed is it?</prompt>
<option points="3">
<name>Good</name>
<explanation>Good</explanation>
</option>
<option points="2">
<name>Fair</name>
<explanation>Fair</explanation>
</option>
<option points="1">
<name>Poor</name>
<explanation>Poor</explanation>
</option>
</criterion>
</rubric>
<assessments>
<assessment name="example-based-assessment" algorithm_id="fake">
<example>
<answer>Example Answer One</answer>
<select criterion="𝓒𝓸𝓷𝓬𝓲𝓼𝓮" option="Ġööḋ" />
<select criterion="Form" option="Poor" />
</example>
<example>
<answer>Example Answer Two</answer>
<select criterion="𝓒𝓸𝓷𝓬𝓲𝓼𝓮" option="ﻉซƈﻉɭɭﻉกՇ" />
<select criterion="Form" option="Fair" />
</example>
<example>
<answer>Example Answer Three</answer>
<select criterion="𝓒𝓸𝓷𝓬𝓲𝓼𝓮" option="Ġööḋ" />
<select criterion="Form" option="Good" />
</example>
<example>
<answer>Example Answer Four</answer>
<select criterion="𝓒𝓸𝓷𝓬𝓲𝓼𝓮" option="ﻉซƈﻉɭɭﻉกՇ" />
<select criterion="Form" option="Good" />
</example>
</assessment>
</assessments>
</openassessment>
<openassessment>
<title>Open Assessment Test</title>
<prompt>
Given the state of the world today, what do you think should be done to
combat poverty? Please answer in a short essay of 200-300 words.
</prompt>
<rubric>
<prompt>Read for conciseness, clarity of thought, and form.</prompt>
<criterion>
<name>𝓒𝓸𝓷𝓬𝓲𝓼𝓮</name>
<prompt>How concise is it?</prompt>
<option points="3">
<name>ﻉซƈﻉɭɭﻉกՇ</name>
<explanation>Extremely concise</explanation>
</option>
<option points="2">
<name>Ġööḋ</name>
<explanation>Concise</explanation>
</option>
<option points="1">
<name>ק๏๏г</name>
<explanation>Wordy</explanation>
</option>
</criterion>
<criterion>
<name>Form</name>
<prompt>How well-formed is it?</prompt>
<option points="3">
<name>Good</name>
<explanation>Good</explanation>
</option>
<option points="2">
<name>Fair</name>
<explanation>Fair</explanation>
</option>
<option points="1">
<name>Poor</name>
<explanation>Poor</explanation>
</option>
</criterion>
</rubric>
<assessments>
<assessment name="self-assessment" />
</assessments>
</openassessment>
{
"student_training_examples_invalid_criterion": {
"rubric": {
"criteria": [
{
"order_num": 0,
"name": "vocabulary",
"prompt": "How good is the vocabulary?",
"options": [
{
"order_num": 0,
"points": 0,
"name": "Poor",
"explanation": "Poor job!"
},
{
"order_num": 1,
"points": 1,
"name": "Good",
"explanation": "Good job!"
}
]
},
{
"order_num": 1,
"name": "grammar",
"prompt": "How good is the grammar?",
"options": [
{
"order_num": 0,
"points": 0,
"name": "Poor",
"explanation": "Poor job!"
},
{
"order_num": 1,
"points": 1,
"name": "Good",
"explanation": "Good job!"
}
]
}
]
},
"assessments": [
{
"name": "student-training",
"start": null,
"due": null,
"examples": [
{
"answer": "ẗëṡẗ äṅṡẅëṛ",
"options_selected": [
{
"criterion": "Invalid criterion!",
"option": "Good"
},
{
"criterion": "grammar",
"option": "Poor"
}
]
}
]
},
{
"name": "peer-assessment",
"start": null,
"due": null,
"must_grade": 5,
"must_be_graded_by": 3
}
]
},
"example_based_no_training_examples": {
"rubric": {
"criteria": [
{
"order_num": 0,
"name": "vocabulary",
"prompt": "how good is the vocabulary?",
"options": [
{
"order_num": 0,
"points": 0,
"name": "poor",
"explanation": "poor job!"
},
{
"order_num": 1,
"points": 1,
"name": "good",
"explanation": "good job!"
}
]
},
{
"order_num": 1,
"name": "grammar",
"prompt": "how good is the grammar?",
"options": [
{
"order_num": 0,
"points": 0,
"name": "poor",
"explanation": "poor job!"
},
{
"order_num": 1,
"points": 1,
"name": "good",
"explanation": "good job!"
}
]
}
]
},
"assessments": [
{
"name": "example-based-assessment",
"start": null,
"due": null,
"algorithm_id": "ease",
"examples": []
}
]
}
}
......@@ -158,5 +158,38 @@
}
],
"is_released": true
},
"example_based_algorithm_id_is_not_ease": {
"assessments": [
{
"name": "example-based-assessment",
"start": null,
"due": null,
"algorithm_id": "NOT_EASE",
"examples": [
{
"answer": "тєѕт αηѕωєя",
"options_selected": [
{
"criterion": "Test criterion",
"option": "No"
}
]
},
{
"answer": "тєѕт αηѕωєя TWO",
"options_selected": [
{
"criterion": "Test criterion",
"option": "Yes"
}
]
}
]
}
],
"current_assessments": null,
"is_released": false
}
}
......@@ -3,9 +3,7 @@
"rubric": {
"prompt": "Test Prompt",
"criteria": []
},
"current_rubric": null,
"is_released": false
}
},
"zero_options": {
......@@ -19,9 +17,7 @@
"options": []
}
]
},
"current_rubric": null,
"is_released": false
}
},
"negative_points": {
......@@ -42,9 +38,7 @@
]
}
]
},
"current_rubric": null,
"is_released": false
}
},
"duplicate_criteria_names": {
......@@ -78,9 +72,7 @@
]
}
]
},
"current_rubric": null,
"is_released": false
}
},
"duplicate_option_names": {
......@@ -107,9 +99,7 @@
]
}
]
},
"current_rubric": null,
"is_released": false
}
},
"change_points_after_release": {
......@@ -398,7 +388,7 @@
"is_released": true
},
"rename_criterion_name_after_release": {
"rename_criterion_name_after_release": {
"rubric": {
"prompt": "Test Prompt",
"criteria": [
......@@ -526,5 +516,33 @@
]
},
"is_released": true
},
"example_based_duplicate_option_points": {
"is_example_based": true,
"rubric": {
"prompt": "Test Prompt",
"criteria": [
{
"order_num": 0,
"name": "Test criterion",
"prompt": "Test criterion prompt",
"options": [
{
"order_num": 0,
"points": 2,
"name": "No",
"explanation": "No explanation"
},
{
"order_num": 1,
"points": 2,
"name": "Yes",
"explanation": "Yes explanation"
}
]
}
]
}
}
}
......@@ -983,5 +983,103 @@
"</rubric>",
"</openassessment>"
]
},
"ai_peer_self": {
"title": "Foo",
"prompt": "Test prompt",
"rubric_feedback_prompt": "Test Feedback Prompt",
"start": null,
"due": null,
"submission_start": null,
"submission_due": null,
"criteria": [
{
"order_num": 0,
"name": "Test criterion",
"prompt": "Test criterion prompt",
"options": [
{
"order_num": 0,
"points": 0,
"name": "No",
"explanation": "No explanation"
},
{
"order_num": 1,
"points": 2,
"name": "Yes",
"explanation": "Yes explanation"
}
]
}
],
"assessments": [
{
"name": "example-based-assessment",
"algorithm_id": "sample-algorithm-id",
"examples": [
{
"answer": "тєѕт αηѕωєя",
"options_selected": [
{
"criterion": "Test criterion",
"option": "No"
}
]
},
{
"answer": "тєѕт αηѕωєя TWO",
"options_selected": [
{
"criterion": "Test criterion",
"option": "Yes"
}
]
}
]
},
{
"name": "peer-assessment",
"start": "2014-02-27T09:46:28",
"due": "2014-03-01T00:00:00",
"must_grade": 5,
"must_be_graded_by": 3
},
{
"name": "self-assessment",
"start": "2014-04-01T00:00:00",
"due": "2014-06-01T00:00:00"
}
],
"expected_xml": [
"<openassessment>",
"<title>Foo</title>",
"<assessments>",
"<assessment name=\"example-based-assessment\" algorithm_id=\"sample-algorithm-id\">",
"<example>",
"<answer>тєѕт αηѕωєя</answer>",
"<select criterion=\"Test criterion\" option=\"No\" />",
"</example>",
"<example>",
"<answer>тєѕт αηѕωєя TWO</answer>",
"<select criterion=\"Test criterion\" option=\"Yes\" />",
"</example>",
"</assessment>",
"<assessment name=\"peer-assessment\" start=\"2014-02-27T09:46:28\" due=\"2014-03-01T00:00:00\" must_grade=\"5\" must_be_graded_by=\"3\" />",
"<assessment name=\"self-assessment\" start=\"2014-04-01T00:00:00\" due=\"2014-06-01T00:00:00\" />",
"</assessments>",
"<rubric>",
"<prompt>Test prompt</prompt>",
"<criterion>",
"<name>Test criterion</name>",
"<prompt>Test criterion prompt</prompt>",
"<option points=\"0\"><name>No</name><explanation>No explanation</explanation></option>",
"<option points=\"2\"><name>Yes</name><explanation>Yes explanation</explanation></option>",
"</criterion>",
"<feedbackprompt>Test Feedback Prompt</feedbackprompt>",
"</rubric>",
"</openassessment>"
]
}
}
......@@ -84,6 +84,14 @@
</criterion>
</rubric>
<assessments>
<assessment name="example-based-assessment" algorithm_id="fake">
<example>
<answer>Example Answer One</answer>
<select criterion="Form" option="Reddit" />
<select criterion="Clear-headed" option="Yogi Berra" />
<select criterion="Concise" option="HP Lovecraft" />
</example>
</assessment>
<assessment name="peer-assessment" must_grade="5" must_be_graded_by="3" start="2015-01-02" due="2015-04-01"/>
<assessment name="self-assessment" start="2016-01-02" due="2016-04-01"/>
</assessments>
......
......@@ -9,6 +9,7 @@
"training_rubric": {
"id": 2,
"content_hash": "de2bb2b7e2c6e3df014e53b8c65f37d511cc4344",
"structure_hash": "a513b20d93487d6d80e31e1d974bf22519332567",
"criteria": [
{
"order_num": 0,
......@@ -67,4 +68,4 @@
}
}
}
}
\ No newline at end of file
}
......@@ -796,5 +796,175 @@
]
}
]
},
"example_based_assessment": {
"xml": [
"<openassessment>",
"<title>foo</title>",
"<assessments>",
"<assessment name=\"example-based-assessment\" algorithm_id=\"ease\">",
"<example>",
"<answer>тєѕт αηѕωєя</answer>",
"<select criterion=\"Test criterion\" option=\"No\" />",
"</example>",
"<example>",
"<answer>тєѕт αηѕωєя TWO</answer>",
"<select criterion=\"Test criterion\" option=\"Yes\" />",
"</example>",
"</assessment>",
"</assessments>",
"<rubric>",
"<prompt>test prompt</prompt>",
"<criterion>",
"<name>test criterion</name>",
"<prompt>test criterion prompt</prompt>",
"<option points=\"0\"><name>no</name><explanation>no explanation</explanation></option>",
"<option points=\"2\"><name>yes</name><explanation>yes explanation</explanation></option>",
"</criterion>",
"</rubric>",
"</openassessment>"
],
"title": "foo",
"prompt": "test prompt",
"start": "2000-01-01t00:00:00",
"due": "3000-01-01t00:00:00",
"submission_start": null,
"submission_due": null,
"criteria": [
{
"order_num": 0,
"name": "test criterion",
"prompt": "test criterion prompt",
"feedback": "disabled",
"options": [
{
"order_num": 0,
"points": 0,
"name": "no",
"explanation": "no explanation"
},
{
"order_num": 1,
"points": 2,
"name": "yes",
"explanation": "yes explanation"
}
]
}
],
"assessments": [
{
"name": "example-based-assessment",
"start": null,
"due": null,
"algorithm_id": "ease",
"examples": [
{
"answer": "тєѕт αηѕωєя",
"options_selected": [
{
"criterion": "Test criterion",
"option": "No"
}
]
},
{
"answer": "тєѕт αηѕωєя TWO",
"options_selected": [
{
"criterion": "Test criterion",
"option": "Yes"
}
]
}
]
}
]
},
"example_based_default_algorithm_id": {
"xml": [
"<openassessment>",
"<title>foo</title>",
"<assessments>",
"<assessment name=\"example-based-assessment\">",
"<example>",
"<answer>тєѕт αηѕωєя</answer>",
"<select criterion=\"Test criterion\" option=\"No\" />",
"</example>",
"<example>",
"<answer>тєѕт αηѕωєя TWO</answer>",
"<select criterion=\"Test criterion\" option=\"Yes\" />",
"</example>",
"</assessment>",
"</assessments>",
"<rubric>",
"<prompt>test prompt</prompt>",
"<criterion>",
"<name>test criterion</name>",
"<prompt>test criterion prompt</prompt>",
"<option points=\"0\"><name>no</name><explanation>no explanation</explanation></option>",
"<option points=\"2\"><name>yes</name><explanation>yes explanation</explanation></option>",
"</criterion>",
"</rubric>",
"</openassessment>"
],
"title": "foo",
"prompt": "test prompt",
"start": "2000-01-01t00:00:00",
"due": "3000-01-01t00:00:00",
"submission_start": null,
"submission_due": null,
"criteria": [
{
"order_num": 0,
"name": "test criterion",
"prompt": "test criterion prompt",
"feedback": "disabled",
"options": [
{
"order_num": 0,
"points": 0,
"name": "no",
"explanation": "no explanation"
},
{
"order_num": 1,
"points": 2,
"name": "yes",
"explanation": "yes explanation"
}
]
}
],
"assessments": [
{
"name": "example-based-assessment",
"start": null,
"due": null,
"algorithm_id": "ease",
"examples": [
{
"answer": "тєѕт αηѕωєя",
"options_selected": [
{
"criterion": "Test criterion",
"option": "No"
}
]
},
{
"answer": "тєѕт αηѕωєя TWO",
"options_selected": [
{
"criterion": "Test criterion",
"option": "Yes"
}
]
}
]
}
]
}
}
......@@ -392,5 +392,63 @@
"</rubric>",
"</openassessment>"
]
},
"example_based_start_date": {
"xml": [
"<openassessment>",
"<title>foo</title>",
"<assessments>",
"<assessment name=\"example-based-assessment\" start=\"2020-01-01\">",
"<example>",
"<answer>тєѕт αηѕωєя</answer>",
"<select criterion=\"Test criterion\" option=\"No\" />",
"</example>",
"<example>",
"<answer>тєѕт αηѕωєя TWO</answer>",
"<select criterion=\"Test criterion\" option=\"Yes\" />",
"</example>",
"</assessment>",
"</assessments>",
"<rubric>",
"<prompt>test prompt</prompt>",
"<criterion>",
"<name>test criterion</name>",
"<prompt>test criterion prompt</prompt>",
"<option points=\"0\"><name>no</name><explanation>no explanation</explanation></option>",
"<option points=\"2\"><name>yes</name><explanation>yes explanation</explanation></option>",
"</criterion>",
"</rubric>",
"</openassessment>"
]
},
"example_based_due_date": {
"xml": [
"<openassessment>",
"<title>foo</title>",
"<assessments>",
"<assessment name=\"example-based-assessment\" due=\"2020-01-01\">",
"<example>",
"<answer>тєѕт αηѕωєя</answer>",
"<select criterion=\"Test criterion\" option=\"No\" />",
"</example>",
"<example>",
"<answer>тєѕт αηѕωєя TWO</answer>",
"<select criterion=\"Test criterion\" option=\"Yes\" />",
"</example>",
"</assessment>",
"</assessments>",
"<rubric>",
"<prompt>test prompt</prompt>",
"<criterion>",
"<name>test criterion</name>",
"<prompt>test criterion prompt</prompt>",
"<option points=\"0\"><name>no</name><explanation>no explanation</explanation></option>",
"<option points=\"2\"><name>yes</name><explanation>yes explanation</explanation></option>",
"</criterion>",
"</rubric>",
"</openassessment>"
]
}
}
{
"student_training_examples_match_rubric": {
"rubric": {
"criteria": [
{
"order_num": 0,
"name": "vocabulary",
"prompt": "How good is the vocabulary?",
"options": [
{
"order_num": 0,
"points": 0,
"name": "Poor",
"explanation": "Poor job!"
},
{
"order_num": 1,
"points": 1,
"name": "Good",
"explanation": "Good job!"
}
]
},
{
"order_num": 1,
"name": "grammar",
"prompt": "How good is the grammar?",
"options": [
{
"order_num": 0,
"points": 0,
"name": "Poor",
"explanation": "Poor job!"
},
{
"order_num": 1,
"points": 1,
"name": "Good",
"explanation": "Good job!"
}
]
}
]
},
"assessments": [
{
"name": "student-training",
"start": null,
"due": null,
"examples": [
{
"answer": "ẗëṡẗ äṅṡẅëṛ",
"options_selected": [
{
"criterion": "vocabulary",
"option": "Good"
},
{
"criterion": "grammar",
"option": "Poor"
}
]
}
]
},
{
"name": "peer-assessment",
"start": null,
"due": null,
"must_grade": 5,
"must_be_graded_by": 3
}
]
},
"example_based_assessment_matches_rubric": {
"rubric": {
"criteria": [
{
"order_num": 0,
"name": "vocabulary",
"prompt": "How good is the vocabulary?",
"options": [
{
"order_num": 0,
"points": 0,
"name": "Poor",
"explanation": "Poor job!"
},
{
"order_num": 1,
"points": 1,
"name": "Good",
"explanation": "Good job!"
}
]
},
{
"order_num": 1,
"name": "grammar",
"prompt": "How good is the grammar?",
"options": [
{
"order_num": 0,
"points": 0,
"name": "Poor",
"explanation": "Poor job!"
},
{
"order_num": 1,
"points": 1,
"name": "Good",
"explanation": "Good job!"
}
]
}
]
},
"assessments": [
{
"name": "example-based-assessment",
"start": null,
"due": null,
"algorithm_id": "ease",
"examples": [
{
"answer": "ẗëṡẗ äṅṡẅëṛ",
"options_selected": [
{
"criterion": "vocabulary",
"option": "Good"
},
{
"criterion": "grammar",
"option": "Poor"
}
]
}
]
}
]
}
}
......@@ -13,6 +13,7 @@
"current_assessments": null,
"is_released": false
},
"self_only": {
"assessments": [
{
......@@ -22,6 +23,7 @@
"current_assessments": null,
"is_released": false
},
"must_be_graded_by_equals_must_grade": {
"assessments": [
{
......@@ -35,5 +37,38 @@
],
"current_assessments": null,
"is_released": false
},
"example_based_algorithm_id_is_ease": {
"assessments": [
{
"name": "example-based-assessment",
"start": null,
"due": null,
"algorithm_id": "ease",
"examples": [
{
"answer": "тєѕт αηѕωєя",
"options_selected": [
{
"criterion": "Test criterion",
"option": "No"
}
]
},
{
"answer": "тєѕт αηѕωєя TWO",
"options_selected": [
{
"criterion": "Test criterion",
"option": "Yes"
}
]
}
]
}
],
"current_assessments": null,
"is_released": false
}
}
......@@ -23,9 +23,7 @@
]
}
]
},
"current_rubric": null,
"is_released": false
}
},
"unicode": {
......@@ -52,9 +50,7 @@
]
}
]
},
"current_rubric": null,
"is_released": false
}
},
"change_points_before_release": {
......@@ -293,8 +289,7 @@
]
}
]
},
"is_released": false
}
},
"remove_options_before_release": {
......
{
"waiting_for_peer": {
"waiting_for_peer": true,
"waiting_for_ai": false,
"expected_response": "peer assessment"
},
"waiting_for_ai": {
"waiting_for_peer": false,
"waiting_for_ai": true,
"expected_response": "example-based assessment"
},
"waiting_for_both": {
"waiting_for_peer": true,
"waiting_for_ai": true,
"expected_response": "peer assessment and example-based assessment"
},
"not_waiting": {
"waiting_for_peer": false,
"waiting_for_ai": false,
"expected_response": "your grade:"
}
}
\ No newline at end of file
"""
Integration test for example-based assessment (AI).
"""
import json
import mock
from django.test.utils import override_settings
from submissions import api as sub_api
from openassessment.xblock.openassessmentblock import OpenAssessmentBlock
from .base import XBlockHandlerTestCase, scenario
class AIAssessmentIntegrationTest(XBlockHandlerTestCase):
"""
Integration test for example-based assessment (AI).
"""
SUBMISSION = json.dumps({'submission': 'This is a submission!'})
AI_ALGORITHMS = {
'fake': 'openassessment.assessment.worker.algorithm.FakeAIAlgorithm'
}
@mock.patch.object(OpenAssessmentBlock, 'is_admin', new_callable=mock.PropertyMock)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@scenario('data/example_based_only.xml', user_id='Bob')
def test_asynch_generate_score(self, xblock, mock_is_admin):
# Test that AI grading, which creates assessments asynchronously,
# updates the workflow so students can receive a score.
mock_is_admin.return_value = True
# Train classifiers for the problem
self.request(xblock, 'schedule_training', json.dumps({}), response_format='json')
# Submit a response
self.request(xblock, 'submit', self.SUBMISSION, response_format='json')
# BEFORE viewing the grade page, check that we get a score
score = sub_api.get_score(xblock.get_student_item_dict())
self.assertIsNot(score, None)
self.assertEqual(score['submission_uuid'], xblock.submission_uuid)
......@@ -3,14 +3,26 @@
Tests for grade handlers in Open Assessment XBlock.
"""
import copy
import ddt
import json
from django.test.utils import override_settings
from submissions import api as sub_api
from openassessment.workflow import api as workflow_api
from openassessment.assessment.api import peer as peer_api
from openassessment.assessment.api import self as self_api
from openassessment.xblock.data_conversion import create_rubric_dict
from .base import XBlockHandlerTestCase, scenario
# Test dependency on Stub AI Algorithm configuration
from openassessment.assessment.test.test_ai import (
ALGORITHM_ID, AI_ALGORITHMS, train_classifiers
)
CLASSIFIER_SCORE_OVERRIDES = {
u"𝓒𝓸𝓷𝓬𝓲𝓼𝓮": {'score_override': 1},
u"Form": {'score_override': 2}
}
@ddt.ddt
class TestGrade(XBlockHandlerTestCase):
"""
View-level tests for the XBlock grade handlers.
......@@ -40,8 +52,12 @@ class TestGrade(XBlockHandlerTestCase):
STEPS = ['peer', 'self']
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@scenario('data/grade_scenario.xml', user_id='Greggs')
def test_render_grade(self, xblock):
rubric = create_rubric_dict(xblock.prompt, xblock.rubric_criteria)
train_classifiers(rubric, CLASSIFIER_SCORE_OVERRIDES)
# Submit, assess, and render the grade view
self._create_submission_and_assessments(
xblock, self.SUBMISSION, self.PEERS, self.ASSESSMENTS, self.ASSESSMENTS[0]
......@@ -68,6 +84,70 @@ class TestGrade(XBlockHandlerTestCase):
self.assertIn('self', resp.lower())
self.assertIn('complete', resp.lower())
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@scenario('data/grade_scenario_self_only.xml', user_id='Greggs')
def test_render_grade_self_only(self, xblock):
rubric = create_rubric_dict(xblock.prompt, xblock.rubric_criteria)
train_classifiers(rubric, CLASSIFIER_SCORE_OVERRIDES)
# Submit, assess, and render the grade view
self._create_submission_and_assessments(
xblock, self.SUBMISSION, [], [], self.ASSESSMENTS[0],
waiting_for_peer=True, waiting_for_ai=True
)
resp = self.request(xblock, 'render_grade', json.dumps(dict()))
# Verify that feedback from each scorer appears in the view
self.assertIn(u'ﻉซƈﻉɭɭﻉกՇ', resp.decode('utf-8'))
self.assertIn(u'Fair', resp.decode('utf-8'))
# Verify that the submission and peer steps show that we're graded
# This isn't strictly speaking part of the grade step rendering,
# but we've already done all the setup to get to this point in the flow,
# so we might as well verify it here.
resp = self.request(xblock, 'render_submission', json.dumps(dict()))
self.assertIn('response', resp.lower())
self.assertIn('complete', resp.lower())
resp = self.request(xblock, 'render_peer_assessment', json.dumps(dict()))
self.assertNotIn('peer', resp.lower())
self.assertNotIn('complete', resp.lower())
resp = self.request(xblock, 'render_self_assessment', json.dumps(dict()))
self.assertIn('self', resp.lower())
self.assertIn('complete', resp.lower())
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@scenario('data/grade_scenario_ai_only.xml', user_id='Greggs')
def test_render_grade_ai_only(self, xblock):
rubric = create_rubric_dict(xblock.prompt, xblock.rubric_criteria)
train_classifiers(rubric, CLASSIFIER_SCORE_OVERRIDES)
# Submit, assess, and render the grade view
self._create_submission_and_assessments(
xblock, self.SUBMISSION, [], [], None, waiting_for_peer=True
)
resp = self.request(xblock, 'render_grade', json.dumps(dict()))
# Verify that feedback from each scorer appears in the view
self.assertNotIn(u'єאςєɭɭєภՇ', resp.decode('utf-8'))
self.assertIn(u'Fair', resp.decode('utf-8'))
# Verify that the submission and peer steps show that we're graded
# This isn't strictly speaking part of the grade step rendering,
# but we've already done all the setup to get to this point in the flow,
# so we might as well verify it here.
resp = self.request(xblock, 'render_submission', json.dumps(dict()))
self.assertIn('response', resp.lower())
self.assertIn('complete', resp.lower())
resp = self.request(xblock, 'render_peer_assessment', json.dumps(dict()))
self.assertNotIn('peer', resp.lower())
self.assertNotIn('complete', resp.lower())
resp = self.request(xblock, 'render_self_assessment', json.dumps(dict()))
self.assertNotIn('self', resp.lower())
self.assertNotIn('complete', resp.lower())
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@scenario('data/feedback_per_criterion.xml', user_id='Bernard')
def test_render_grade_feedback_per_criterion(self, xblock):
# Submit, assess, and render the grade view
......@@ -99,18 +179,22 @@ class TestGrade(XBlockHandlerTestCase):
self.assertIn(u'Peer 2: ฝﻉɭɭ ɗѻกﻉ!', resp.decode('utf-8'))
self.assertIn(u'Peer 2: ƒαιя נσв', resp.decode('utf-8'))
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@ddt.file_data('data/waiting_scenarios.json')
@scenario('data/grade_scenario.xml', user_id='Omar')
def test_grade_waiting(self, xblock):
def test_grade_waiting(self, xblock, data):
train_classifiers({'criteria': xblock.rubric_criteria}, CLASSIFIER_SCORE_OVERRIDES)
# Waiting to be assessed by a peer
self._create_submission_and_assessments(
xblock, self.SUBMISSION, self.PEERS, self.ASSESSMENTS, self.ASSESSMENTS[0],
waiting_for_peer=True
waiting_for_peer=data["waiting_for_peer"], waiting_for_ai=data["waiting_for_ai"]
)
resp = self.request(xblock, 'render_grade', json.dumps(dict()))
# Verify that we're on the waiting template
self.assertIn(u'waiting for peer assessment', resp.decode('utf-8').lower())
self.assertIn(data["expected_response"], resp.decode('utf-8').lower())
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@scenario('data/grade_incomplete_scenario.xml', user_id='Bunk')
def test_grade_incomplete_missing_self(self, xblock):
# Graded peers, but haven't completed self assessment
......@@ -122,6 +206,7 @@ class TestGrade(XBlockHandlerTestCase):
# Verify that we're on the right template
self.assertIn(u'not completed', resp.decode('utf-8').lower())
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@scenario('data/grade_incomplete_scenario.xml', user_id='Daniels')
def test_grade_incomplete_missing_peer(self, xblock):
# Have not yet completed peer assessment
......@@ -133,6 +218,7 @@ class TestGrade(XBlockHandlerTestCase):
# Verify that we're on the right template
self.assertIn(u'not completed', resp.decode('utf-8').lower())
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@scenario('data/grade_scenario.xml', user_id='Greggs')
def test_submit_feedback(self, xblock):
# Create submissions and assessments
......@@ -156,6 +242,7 @@ class TestGrade(XBlockHandlerTestCase):
feedback['options'], [{'text': u'Option 1'}, {'text': u'Option 2'}]
)
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@scenario('data/grade_scenario.xml', user_id='Bob')
def test_submit_feedback_no_options(self, xblock):
# Create submissions and assessments
......@@ -176,6 +263,7 @@ class TestGrade(XBlockHandlerTestCase):
self.assertIsNot(feedback, None)
self.assertItemsEqual(feedback['options'], [])
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
@scenario('data/grade_scenario.xml', user_id='Bob')
def test_submit_feedback_invalid_options(self, xblock):
# Create submissions and assessments
......@@ -194,7 +282,7 @@ class TestGrade(XBlockHandlerTestCase):
def _create_submission_and_assessments(
self, xblock, submission_text, peers, peer_assessments, self_assessment,
waiting_for_peer=False
waiting_for_peer=False, waiting_for_ai=False
):
"""
Create a submission and peer/self assessments, so that the user can receive a grade.
......@@ -208,6 +296,7 @@ class TestGrade(XBlockHandlerTestCase):
Kwargs:
waiting_for_peer (bool): If true, skip creation of peer assessments for the user's submission.
waiting_for_ai (bool): If True, skip creation of ai assessment.
Returns:
None
......@@ -216,6 +305,10 @@ class TestGrade(XBlockHandlerTestCase):
# Create a submission from the user
student_item = xblock.get_student_item_dict()
student_id = student_item['student_id']
example_based_assessment = xblock.get_assessment_module('example-based-assessment')
if not waiting_for_ai and example_based_assessment:
train_classifiers({'criteria': xblock.rubric_criteria}, CLASSIFIER_SCORE_OVERRIDES)
example_based_assessment['algorithm_id'] = ALGORITHM_ID
submission = xblock.create_submission(student_item, submission_text)
# Create submissions and assessments from other users
......@@ -263,4 +356,4 @@ class TestGrade(XBlockHandlerTestCase):
self_api.create_assessment(
submission['uuid'], student_id, self_assessment['options_selected'],
{'criteria': xblock.rubric_criteria}
)
)
\ No newline at end of file
......@@ -3,6 +3,7 @@
"""
Tests for message handlers in Open Assessment XBlock.
"""
import copy
import mock
import pytz
......@@ -28,9 +29,21 @@ class TestMessageRender(XBlockHandlerTestCase):
PAST = TODAY - dt.timedelta(days=10)
FAR_PAST = TODAY - dt.timedelta(days=100)
DEFAULT_STATUS_DETAILS = {
'peer': {
'completed': TODAY,
'graded': TODAY,
},
'ai': {
'completed': TODAY,
'graded': TODAY,
},
}
def _assert_path_and_context(
self, xblock, expected_path, expected_context,
workflow_status, deadline_information, has_peers_to_grade
workflow_status, deadline_information, has_peers_to_grade,
workflow_status_details=DEFAULT_STATUS_DETAILS
):
"""
Complete all of the logic behind rendering the message and verify
......@@ -48,12 +61,14 @@ class TestMessageRender(XBlockHandlerTestCase):
- deadline_information.get("self-assessment") has the same properties as is_closed("self-assessment")
- (WILL BE DEFAULT) deadline_information.get("over-all") has the same properties as is_closed()
has_peers_to_grade (bool): A boolean which indicates whether the queue of peer responses is empty
workflow_status_details (dict): A dictionary of status details
"""
# Simulate the response from the workflow API
workflow_info = {
'status': workflow_status
'status': workflow_status,
'status_details': workflow_status_details,
}
xblock.get_workflow_info = mock.Mock(return_value=workflow_info)
......@@ -687,6 +702,8 @@ class TestMessageRender(XBlockHandlerTestCase):
def test_waiting_due(self, xblock):
status = 'waiting'
status_details = copy.deepcopy(self.DEFAULT_STATUS_DETAILS)
status_details["peer"]["graded"] = None
deadline_information = {
'submission': (True, 'due', self.FAR_PAST, self.YESTERDAY),
......@@ -700,18 +717,20 @@ class TestMessageRender(XBlockHandlerTestCase):
expected_path = 'openassessmentblock/message/oa_message_complete.html'
expected_context = {
"waiting": True
"waiting": "peer"
}
self._assert_path_and_context(
xblock, expected_path, expected_context,
status, deadline_information, has_peers_to_grade
status, deadline_information, has_peers_to_grade, status_details
)
@scenario('data/message_scenario.xml', user_id = "Linda")
def test_waiting_not_due(self, xblock):
status = 'waiting'
status_details = copy.deepcopy(self.DEFAULT_STATUS_DETAILS)
status_details["peer"]["graded"] = None
deadline_information = {
'submission': (True, 'due', self.FAR_PAST, self.YESTERDAY),
......@@ -725,12 +744,67 @@ class TestMessageRender(XBlockHandlerTestCase):
expected_path = 'openassessmentblock/message/oa_message_complete.html'
expected_context = {
"waiting": True
"waiting": "peer"
}
self._assert_path_and_context(
xblock, expected_path, expected_context,
status, deadline_information, has_peers_to_grade
status, deadline_information, has_peers_to_grade, status_details
)
@scenario('data/message_scenario.xml', user_id="Linda")
def test_waiting_on_ai(self, xblock):
status = 'waiting'
status_details = copy.deepcopy(self.DEFAULT_STATUS_DETAILS)
status_details["ai"]["graded"] = None
deadline_information = {
'submission': (True, 'due', self.FAR_PAST, self.YESTERDAY),
'peer-assessment': (True, 'due', self.YESTERDAY, self.TODAY),
'self-assessment': (True, 'due', self.YESTERDAY, self.TODAY),
'over-all': (True, 'due', self.FAR_PAST, self.TODAY)
}
has_peers_to_grade = False
expected_path = 'openassessmentblock/message/oa_message_complete.html'
expected_context = {
"waiting": "example-based"
}
self._assert_path_and_context(
xblock, expected_path, expected_context,
status, deadline_information, has_peers_to_grade, status_details
)
@scenario('data/message_scenario.xml', user_id="Linda")
def test_waiting_on_all(self, xblock):
status = 'waiting'
status_details = copy.deepcopy(self.DEFAULT_STATUS_DETAILS)
status_details["ai"]["graded"] = None
status_details["peer"]["graded"] = None
deadline_information = {
'submission': (True, 'due', self.FAR_PAST, self.YESTERDAY),
'peer-assessment': (True, 'due', self.YESTERDAY, self.TODAY),
'self-assessment': (True, 'due', self.YESTERDAY, self.TODAY),
'over-all': (True, 'due', self.FAR_PAST, self.TODAY)
}
has_peers_to_grade = False
expected_path = 'openassessmentblock/message/oa_message_complete.html'
expected_context = {
"waiting": "all"
}
self._assert_path_and_context(
xblock, expected_path, expected_context,
status, deadline_information, has_peers_to_grade, status_details
)
@scenario('data/message_scenario.xml', user_id = "Linda")
......@@ -750,7 +824,7 @@ class TestMessageRender(XBlockHandlerTestCase):
expected_path = 'openassessmentblock/message/oa_message_complete.html'
expected_context = {
"waiting": False
"waiting": None
}
self._assert_path_and_context(
......@@ -775,7 +849,7 @@ class TestMessageRender(XBlockHandlerTestCase):
expected_path = 'openassessmentblock/message/oa_message_complete.html'
expected_context = {
"waiting": False
"waiting": None
}
self._assert_path_and_context(
......
......@@ -10,7 +10,10 @@ import pytz
import ddt
from django.test import TestCase
from openassessment.xblock.openassessmentblock import OpenAssessmentBlock
from openassessment.xblock.validation import validator, validate_assessments, validate_rubric, validate_dates
from openassessment.xblock.validation import (
validator, validate_assessments, validate_rubric,
validate_dates, validate_assessment_examples
)
@ddt.ddt
......@@ -78,13 +81,39 @@ class RubricValidationTest(TestCase):
@ddt.file_data('data/valid_rubrics.json')
def test_valid_assessment(self, data):
success, msg = validate_rubric(data['rubric'], data['current_rubric'], data['is_released'])
current_rubric = data.get('current_rubric')
is_released = data.get('is_released', False)
is_example_based = data.get('is_example_based', False)
success, msg = validate_rubric(
data['rubric'], current_rubric,is_released, is_example_based
)
self.assertTrue(success)
self.assertEqual(msg, u'')
@ddt.file_data('data/invalid_rubrics.json')
def test_invalid_assessment(self, data):
success, msg = validate_rubric(data['rubric'], data['current_rubric'], data['is_released'])
current_rubric = data.get('current_rubric')
is_released = data.get('is_released', False)
is_example_based = data.get('is_example_based', False)
success, msg = validate_rubric(
data['rubric'], current_rubric, is_released, is_example_based
)
self.assertFalse(success)
self.assertGreater(len(msg), 0)
@ddt.ddt
class AssessmentExamplesValidationTest(TestCase):
@ddt.file_data('data/valid_assessment_examples.json')
def test_valid_assessment_examples(self, data):
success, msg = validate_assessment_examples(data['rubric'], data['assessments'])
self.assertTrue(success)
self.assertEqual(msg, u'')
@ddt.file_data('data/invalid_assessment_examples.json')
def test_invalid_assessment_examples(self, data):
success, msg = validate_assessment_examples(data['rubric'], data['assessments'])
self.assertFalse(success)
self.assertGreater(len(msg), 0)
......@@ -210,26 +239,35 @@ class ValidationIntegrationTest(TestCase):
"due": None
}
EXAMPLES = [
{
"answer": "ẗëṡẗ äṅṡẅëṛ",
"options_selected": [
{
"criterion": "vocabulary",
"option": "Good"
},
{
"criterion": "grammar",
"option": "Poor"
}
]
}
]
ASSESSMENTS = [
{
"name": "example-based-assessment",
"start": None,
"due": None,
"examples": EXAMPLES,
"algorithm_id": "ease"
},
{
"name": "student-training",
"start": None,
"due": None,
"examples": [
{
"answer": "ẗëṡẗ äṅṡẅëṛ",
"options_selected": [
{
"criterion": "vocabulary",
"option": "Good"
},
{
"criterion": "grammar",
"option": "Poor"
}
]
}
]
"examples": EXAMPLES,
},
{
"name": "peer-assessment",
......@@ -253,7 +291,7 @@ class ValidationIntegrationTest(TestCase):
self.oa_block.due = None
self.validator = validator(self.oa_block)
def test_student_training_examples_match_rubric(self):
def test_validates_successfully(self):
is_valid, msg = self.validator(self.RUBRIC, self.SUBMISSION, self.ASSESSMENTS)
self.assertTrue(is_valid, msg=msg)
self.assertEqual(msg, "")
......@@ -277,3 +315,22 @@ class ValidationIntegrationTest(TestCase):
is_valid, msg = self.validator(self.RUBRIC, self.SUBMISSION, mutated_assessments)
self.assertFalse(is_valid)
self.assertEqual(msg, u'Example 1 has an invalid option for "vocabulary": "Invalid option!"')
def test_example_based_assessment_duplicate_point_values(self):
# Mutate the rubric so that two options have the same point value
# for a particular criterion.
# This should cause a validation error with example-based assessment.
mutated_rubric = copy.deepcopy(self.RUBRIC)
for option in mutated_rubric['criteria'][0]['options']:
option['points'] = 1
# Expect a validation error
is_valid, msg = self.validator(mutated_rubric, self.SUBMISSION, self.ASSESSMENTS)
self.assertFalse(is_valid)
self.assertEqual(msg, u'Example-based assessments cannot have duplicate point values.')
# But it should be okay if we don't have example-based assessment
no_example_based = copy.deepcopy(self.ASSESSMENTS)[1:]
is_valid, msg = self.validator(mutated_rubric, self.SUBMISSION, no_example_based)
self.assertTrue(is_valid)
self.assertEqual(msg, u'')
......@@ -66,6 +66,14 @@ def _is_valid_assessment_sequence(assessments):
['student-training', 'peer-assessment'],
['student-training', 'peer-assessment', 'self-assessment'],
['student-training', 'self-assessment', 'peer-assessment'],
['example-based-assessment'],
['example-based-assessment', 'self-assessment'],
['example-based-assessment', 'peer-assessment'],
['example-based-assessment', 'peer-assessment', 'self-assessment'],
['example-based-assessment', 'self-assessment', 'peer-assessment'],
['example-based-assessment', 'student-training', 'peer-assessment'],
['example-based-assessment', 'student-training', 'peer-assessment', 'self-assessment'],
['example-based-assessment', 'student-training', 'self-assessment', 'peer-assessment'],
]
sequence = [asmnt.get('name') for asmnt in assessments]
......@@ -123,6 +131,12 @@ def validate_assessments(assessments, current_assessments, is_released):
if must_grade < must_be_graded_by:
return (False, _('The "must_grade" value must be greater than or equal to the "must_be_graded_by" value.'))
# Example-based assessment MUST specify 'ease' as the algorithm ID,
# at least for now. Later, we may make this more flexible.
if assessment_dict.get('name') == 'example-based-assessment':
if assessment_dict.get('algorithm_id') not in ['ease', 'fake']:
return (False, _('The "algorithm_id" value must be set to "ease" or "fake"'))
if is_released:
if len(assessments) != len(current_assessments):
return (False, _("The number of assessments cannot be changed after the problem has been released."))
......@@ -135,7 +149,7 @@ def validate_assessments(assessments, current_assessments, is_released):
return (True, u'')
def validate_rubric(rubric_dict, current_rubric, is_released):
def validate_rubric(rubric_dict, current_rubric, is_released, is_example_based):
"""
Check that the rubric is semantically valid.
......@@ -143,6 +157,7 @@ def validate_rubric(rubric_dict, current_rubric, is_released):
rubric_dict (dict): Serialized Rubric model representing the updated state of the rubric.
current_rubric (dict): Serialized Rubric model representing the current state of the rubric.
is_released (bool): True if and only if the problem has been released.
is_example_based (bool): True if and only if this is an example-based assessment.
Returns:
tuple (is_valid, msg) where
......@@ -157,7 +172,7 @@ def validate_rubric(rubric_dict, current_rubric, is_released):
# No duplicate criteria names
duplicates = _duplicates([criterion['name'] for criterion in rubric_dict['criteria']])
if len(duplicates) > 0:
msg = u"Criteria duplicate name(s): {duplicates}".format(
msg = _(u"Criteria duplicate name(s): {duplicates}").format(
duplicates=", ".join(duplicates)
)
return (False, msg)
......@@ -166,18 +181,27 @@ def validate_rubric(rubric_dict, current_rubric, is_released):
for criterion in rubric_dict['criteria']:
duplicates = _duplicates([option['name'] for option in criterion['options']])
if len(duplicates) > 0:
msg = u"Options in '{criterion}' have duplicate name(s): {duplicates}".format(
msg = _(u"Options in '{criterion}' have duplicate name(s): {duplicates}").format(
criterion=criterion['name'], duplicates=", ".join(duplicates)
)
return (False, msg)
# Example-based assessments impose the additional restriction
# that the point values for options must be unique within
# a particular rubric criterion.
if is_example_based:
duplicates = _duplicates([option['points'] for option in criterion['options']])
if len(duplicates) > 0:
msg = _(u"Example-based assessments cannot have duplicate point values.")
return (False, msg)
# After a problem is released, authors are allowed to change text,
# but nothing that would change the point value of a rubric.
if is_released:
# Number of criteria must be the same
if len(rubric_dict['criteria']) != len(current_rubric['criteria']):
return (False, u'The number of criteria cannot be changed after a problem is released.')
return (False, _(u'The number of criteria cannot be changed after a problem is released.'))
# Criteria names must be the same
# We use criteria names as unique identifiers (unfortunately)
......@@ -195,12 +219,12 @@ def validate_rubric(rubric_dict, current_rubric, is_released):
# Number of options for each criterion must be the same
for new_criterion, old_criterion in _match_by_order(rubric_dict['criteria'], current_rubric['criteria']):
if len(new_criterion['options']) != len(old_criterion['options']):
return (False, u'The number of options cannot be changed after a problem is released.')
return (False, _(u'The number of options cannot be changed after a problem is released.'))
else:
for new_option, old_option in _match_by_order(new_criterion['options'], old_criterion['options']):
if new_option['points'] != old_option['points']:
return (False, u'Point values cannot be changed after a problem is released.')
return (False, _(u'Point values cannot be changed after a problem is released.'))
return (True, u'')
......@@ -227,7 +251,7 @@ def validate_dates(start, end, date_ranges):
return (True, u'')
def _validate_assessment_examples(rubric_dict, assessments):
def validate_assessment_examples(rubric_dict, assessments):
"""
Validate assessment training examples.
......@@ -242,10 +266,14 @@ def _validate_assessment_examples(rubric_dict, assessments):
"""
for asmnt in assessments:
if asmnt['name'] == 'student-training':
if asmnt['name'] == 'student-training' or asmnt['name'] == 'example-based-assessment':
examples = convert_training_examples_list_to_dict(asmnt['examples'])
# Must have at least one training example
if len(examples) == 0:
return False, _(u"Student training and example-based assessments must have at least one training example")
# Delegate to the student training API to validate the
# examples against the rubric.
errors = validate_training_examples(rubric_dict, examples)
......@@ -282,16 +310,17 @@ def validator(oa_block, strict_post_release=True):
return (False, msg)
# Rubric
is_example_based = 'example-based-assessment' in [asmnt.get('name') for asmnt in assessments]
current_rubric = {
'prompt': oa_block.prompt,
'criteria': oa_block.rubric_criteria
}
success, msg = validate_rubric(rubric_dict, current_rubric, is_released)
success, msg = validate_rubric(rubric_dict, current_rubric, is_released, is_example_based)
if not success:
return (False, msg)
# Training examples
success, msg = _validate_assessment_examples(rubric_dict, assessments)
success, msg = validate_assessment_examples(rubric_dict, assessments)
if not success:
return (False, msg)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment