Commit 2b224a74 by Will Daly

Merge pull request #333 from edx/will/student-training-in-flight

Student training in-flight changes
parents 64b3df76 5295df1d
"""
Django models specific to the student training assessment type.
"""
from django.db import models, transaction
from django.db import models
from django.utils import timezone
from submissions import api as sub_api
from .training import TrainingExample
......@@ -27,14 +27,12 @@ class StudentTrainingWorkflow(models.Model):
app_label = "assessment"
@classmethod
@transaction.commit_on_success
def create_workflow(cls, submission_uuid, examples):
def get_or_create_workflow(cls, submission_uuid):
"""
Create a student training workflow.
Args:
submission_uuid (str): The UUID of the submission from the student being trained.
examples (list of TrainingExamples): The training examples to show the student.
Returns:
StudentTrainingWorkflow
......@@ -43,70 +41,105 @@ class StudentTrainingWorkflow(models.Model):
SubmissionError: There was an error retrieving the submission.
"""
# Try to retrieve an existing workflow
# If we find one, return it immediately
try:
return cls.objects.get(submission_uuid=submission_uuid) # pylint:disable=E1101
except cls.DoesNotExist:
pass
# Retrieve the student item info
submission = sub_api.get_submission_and_student(submission_uuid)
student_item = submission['student_item']
# Create the workflow
workflow = cls.objects.create(
return cls.objects.create(
submission_uuid=submission_uuid,
student_id=student_item['student_id'],
item_id=student_item['item_id'],
course_id=student_item['course_id']
)
# Create workflow items for each example
for order_num, example in enumerate(examples):
StudentTrainingWorkflowItem.objects.create(
workflow=workflow,
order_num=order_num,
training_example=example,
)
return workflow
@property
def status(self):
def num_completed(self):
"""
The student's status within the workflow (num steps completed / num steps available).
Return the number of training examples that the
student successfully assessed.
Returns:
tuple of `(num_completed, num_total)`, both integers
int
"""
items = self.items.all() # pylint:disable=E1101
num_complete = sum([1 if item.is_complete else 0 for item in items])
num_total = len(items)
return num_complete, num_total
return self.items.filter(completed_at__isnull=False).count() # pylint:disable=E1101
@property
def is_complete(self):
def next_training_example(self, examples):
"""
Check whether all items in the workflow are complete.
Return the next training example for the student to assess.
If the student is already working on an example, return that.
Otherwise, choose an example the student hasn't seen
from the list of available examples.
Args:
examples (list of TrainingExample): Training examples to choose from.
Returns:
bool
TrainingExample or None
"""
num_incomplete = self.items.filter(completed_at__isnull=True).count() # pylint:disable=E1101
return num_incomplete == 0
# Fetch all the items for this workflow from the database
# Since Django's `select_related` does not follow reverse keys
# we perform the filter ourselves.
items = StudentTrainingWorkflowItem.objects.select_related(
'training_example'
).filter(workflow=self)
# If we're already working on an item, then return that item
incomplete_items = [item for item in items if not item.is_complete]
if len(incomplete_items) > 0:
return incomplete_items[0].training_example
# Otherwise, pick an item that we have not completed
# from the list of examples.
completed_examples = [
item.training_example for item in items
]
available_examples = [
available for available in examples
if available not in completed_examples
]
# If there are no more items available, return None
if len(available_examples) == 0:
return None
# Otherwise, create a new workflow item for the example
# and add it to the workflow
else:
order_num = len(items) + 1
next_example = available_examples[0]
StudentTrainingWorkflowItem.objects.create(
workflow=self,
order_num=order_num,
training_example=next_example
)
return next_example
@property
def next_incomplete_item(self):
def current_item(self):
"""
Find the next incomplete item in the workflow.
Return the item the student is currently working on,
or None.
Returns:
StudentTrainingWorkflowItem or None
"""
next_incomplete = self.items.filter( # pylint:disable=E1101
next_incomplete = self.items.select_related(
'training_example'
).filter( # pylint:disable=E1101
completed_at__isnull=True
).order_by('order_num')[:1]
if len(next_incomplete) > 0:
return next_incomplete[0]
else:
return None
return None if len(next_incomplete) == 0 else next_incomplete[0]
class StudentTrainingWorkflowItem(models.Model):
......
......@@ -3,6 +3,7 @@ Django models for training (both student and AI).
"""
import json
from hashlib import sha1
from django.core.cache import cache
from django.db import models
from .base import Rubric, CriterionOption
......@@ -22,29 +23,34 @@ class TrainingExample(models.Model):
# SHA1 hash
content_hash = models.CharField(max_length=40, unique=True, db_index=True)
# Version for models serialized to the cache
# Increment this number whenever you update this model!
CACHE_KEY_VERSION = 1
class Meta:
app_label = "assessment"
@classmethod
def create_example(cls, answer, options_ids, rubric):
def create_example(cls, answer, options_selected, rubric):
"""
Create a new training example.
Args:
answer (JSON-serializable): The answer associated with the training example.
option_ids (iterable of int): Selected option IDs for the training example.
options_selected (dict): The options selected from the rubric (mapping of criterion names to option names)
rubric (Rubric): The rubric associated with the training example.
Returns:
TrainingExample
"""
content_hash = cls.calculate_hash(answer, options_ids, rubric)
content_hash = cls.calculate_hash(answer, options_selected, rubric)
example = TrainingExample.objects.create(
content_hash=content_hash,
raw_answer=json.dumps(answer),
rubric=rubric
)
options_ids = rubric.options_ids(options_selected)
for option in CriterionOption.objects.filter(pk__in=list(options_ids)):
example.options_selected.add(option)
......@@ -71,19 +77,50 @@ class TrainingExample(models.Model):
dict: maps criterion names to selected option names
"""
return {
option.criterion.name: option.name
for option in self.options_selected.all() # pylint:disable=E1101
}
# Since training examples are immutable, we can safely cache this
cache_key = self.cache_key_serialized(attribute="options_selected_dict")
options_selected = cache.get(cache_key)
if options_selected is None:
options_selected = {
option.criterion.name: option.name
for option in self.options_selected.all() # pylint:disable=E1101
}
cache.set(cache_key, options_selected)
return options_selected
def cache_key_serialized(self, attribute=None):
"""
Create a cache key based on the content hash
for serialized versions of this model.
Kwargs:
attribute: The name of the attribute being serialized.
If not specified, assume that we are serializing the entire model.
Returns:
str: The cache key
"""
if attribute is None:
key_template = u"TrainingExample.json.v{version}.{content_hash}"
else:
key_template = u"TrainingExample.{attribute}.json.v{version}.{content_hash}"
cache_key = key_template.format(
version=self.CACHE_KEY_VERSION,
content_hash=self.content_hash,
attribute=attribute
)
return cache_key
@staticmethod
def calculate_hash(answer, option_ids, rubric):
def calculate_hash(answer, options_selected, rubric):
"""
Calculate a hash for the contents of training example.
Args:
answer (JSON-serializable): The answer associated with the training example.
option_ids (iterable of int): Selected option IDs for the training example.
options_selected (dict): The options selected from the rubric (mapping of criterion names to option names)
rubric (Rubric): The rubric associated with the training example.
Returns:
......@@ -92,10 +129,28 @@ class TrainingExample(models.Model):
"""
contents = json.dumps({
'answer': answer,
'option_ids': list(option_ids),
'options_selected': options_selected,
'rubric': rubric.id
})
return sha1(contents).hexdigest()
class Meta:
app_label = "assessment"
@classmethod
def cache_key(cls, answer, options_selected, rubric):
"""
Calculate a cache key based on the content hash.
Args:
answer (JSON-serializable): The answer associated with the training example.
options_selected (dict): The options selected from the rubric (mapping of criterion names to option names)
rubric (Rubric): The rubric associated with the training example.
Returns:
tuple of `(cache_key, content_hash)`, both bytestrings
"""
content_hash = cls.calculate_hash(answer, options_selected, rubric)
cache_key = u"TrainingExample.model.v{version}.{content_hash}".format(
version=cls.CACHE_KEY_VERSION,
content_hash=content_hash
)
return cache_key, content_hash
"""
Serializers for the training assessment type.
"""
import json
from django.core.cache import cache
from django.db import transaction, IntegrityError
from openassessment.assessment.models import TrainingExample
from .base import rubric_from_dict, RubricSerializer
......@@ -53,11 +53,17 @@ def serialize_training_example(example):
dict
"""
return {
'answer': example.answer,
'options_selected': example.options_selected_dict,
'rubric': RubricSerializer.serialized_from_cache(example.rubric),
}
# Since training examples are immutable, we can safely cache them
cache_key = example.cache_key_serialized()
example_dict = cache.get(cache_key)
if example_dict is None:
example_dict = {
'answer': example.answer,
'options_selected': example.options_selected_dict,
'rubric': RubricSerializer.serialized_from_cache(example.rubric),
}
cache.set(cache_key, example_dict)
return example_dict
@transaction.commit_on_success
......@@ -144,24 +150,31 @@ def deserialize_training_examples(examples, rubric_dict):
# Parse each example
created_examples = []
for example_dict in examples:
is_valid, errors = validate_training_example_format(example_dict)
if not is_valid:
raise InvalidTrainingExample("; ".join(errors))
options_ids = rubric.options_ids(example_dict['options_selected'])
# Try to retrieve the example from the cache
cache_key, content_hash = TrainingExample.cache_key(example_dict['answer'], example_dict['options_selected'], rubric)
example = cache.get(cache_key)
# Calculate the content hash to look up the example
content_hash = TrainingExample.calculate_hash(example_dict['answer'], options_ids, rubric)
# If we couldn't retrieve the example from the cache, create it
if example is None:
# Validate the training example
is_valid, errors = validate_training_example_format(example_dict)
if not is_valid:
raise InvalidTrainingExample("; ".join(errors))
try:
example = TrainingExample.objects.get(content_hash=content_hash)
except TrainingExample.DoesNotExist:
# Get or create the training example
try:
example = TrainingExample.create_example(
example_dict['answer'], options_ids, rubric
)
except IntegrityError:
example = TrainingExample.objects.get(content_hash=content_hash)
except TrainingExample.DoesNotExist:
try:
example = TrainingExample.create_example(
example_dict['answer'], example_dict['options_selected'], rubric
)
except IntegrityError:
example = TrainingExample.objects.get(content_hash=content_hash)
# Add the example to the cache
cache.set(cache_key, example)
created_examples.append(example)
......
......@@ -3,7 +3,7 @@ Test-specific Django settings.
"""
# Inherit from base settings
from .base import *
from .base import * # pylint:disable=W0614,W0401
TEST_APPS = (
'openassessment',
......@@ -44,3 +44,10 @@ EDX_ORA2["EVENT_LOGGER"] = "openassessment.workflow.test.events.fake_event_logge
# We run Celery in "always eager" mode in the test suite,
# which executes tasks synchronously instead of using the task queue.
CELERY_ALWAYS_EAGER = True
# Silence cache key warnings
# https://docs.djangoproject.com/en/1.4/topics/cache/#cache-key-warnings
import warnings
from django.core.cache import CacheKeyWarning
warnings.simplefilter("ignore", CacheKeyWarning)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment