Merge pull request #333 from edx/will/student-training-in-flight

Student training in-flight changes

Merge pull request #333 from edx/will/student-training-in-flight
Student training in-flight changes
2b224a74 · Will Daly · 64b3df76 · 5295df1d · 2b224a74 · 2b224a74
Commit 2b224a74 authored May 15, 2014 by Will Daly
6 changed files
--- a/apps/openassessment/assessment/api/student_training.py
+++ b/apps/openassessment/assessment/api/student_training.py
--- a/apps/openassessment/assessment/models/student_training.py
+++ b/apps/openassessment/assessment/models/student_training.py
 """
 Django models specific to the student training assessment type.
 """
-from django.db import models, transaction
+from django.db import models
 from django.utils import timezone
 from submissions import api as sub_api
 from .training import TrainingExample
@@ -27,14 +27,12 @@ class StudentTrainingWorkflow(models.Model):
        app_label = "assessment"
    @classmethod
-    @transaction.commit_on_success
+    def get_or_create_workflow(cls, submission_uuid):
-    def create_workflow(cls, submission_uuid, examples):
        """
        Create a student training workflow.
        Args:
            submission_uuid (str): The UUID of the submission from the student being trained.
-            examples (list of TrainingExamples): The training examples to show the student.
        Returns:
            StudentTrainingWorkflow
@@ -43,70 +41,105 @@ class StudentTrainingWorkflow(models.Model):
            SubmissionError: There was an error retrieving the submission.
        """
+        # Try to retrieve an existing workflow
+        # If we find one, return it immediately
+        try:
+            return cls.objects.get(submission_uuid=submission_uuid)   # pylint:disable=E1101
+        except cls.DoesNotExist:
+            pass
        # Retrieve the student item info
        submission = sub_api.get_submission_and_student(submission_uuid)
        student_item = submission['student_item']
        # Create the workflow
-        workflow = cls.objects.create(
+        return cls.objects.create(
            submission_uuid=submission_uuid,
            student_id=student_item['student_id'],
            item_id=student_item['item_id'],
            course_id=student_item['course_id']
        )
-        # Create workflow items for each example
-        for order_num, example in enumerate(examples):
-            StudentTrainingWorkflowItem.objects.create(
-                workflow=workflow,
-                order_num=order_num,
-                training_example=example,
-            )
-        return workflow
    @property
-    def status(self):
+    def num_completed(self):
        """
-        The student's status within the workflow (num steps completed / num steps available).
+        Return the number of training examples that the
+        student successfully assessed.
        Returns:
-            tuple of `(num_completed, num_total)`, both integers
+            int
        """
-        items = self.items.all()    # pylint:disable=E1101
+        return self.items.filter(completed_at__isnull=False).count()  # pylint:disable=E1101
-        num_complete = sum([1 if item.is_complete else 0 for item in items])
-        num_total = len(items)
-        return num_complete, num_total
-    @property
+    def next_training_example(self, examples):
-    def is_complete(self):
        """
-        Check whether all items in the workflow are complete.
+        Return the next training example for the student to assess.
+        If the student is already working on an example, return that.
+        Otherwise, choose an example the student hasn't seen
+        from the list of available examples.
+        Args:
+            examples (list of TrainingExample): Training examples to choose from.
        Returns:
-            bool
+            TrainingExample or None
        """
-        num_incomplete = self.items.filter(completed_at__isnull=True).count()  # pylint:disable=E1101
+        # Fetch all the items for this workflow from the database
-        return num_incomplete == 0
+        # Since Django's `select_related` does not follow reverse keys
+        # we perform the filter ourselves.
+        items = StudentTrainingWorkflowItem.objects.select_related(
+            'training_example'
+        ).filter(workflow=self)
+        # If we're already working on an item, then return that item
+        incomplete_items = [item for item in items if not item.is_complete]
+        if len(incomplete_items) > 0:
+            return incomplete_items[0].training_example
+        # Otherwise, pick an item that we have not completed
+        # from the list of examples.
+        completed_examples = [
+            item.training_example for item in items
+        ]
+        available_examples = [
+            available for available in examples
+            if available not in completed_examples
+        ]
+        # If there are no more items available, return None
+        if len(available_examples) == 0:
+            return None
+        # Otherwise, create a new workflow item for the example
+        # and add it to the workflow
+        else:
+            order_num = len(items) + 1
+            next_example = available_examples[0]
+            StudentTrainingWorkflowItem.objects.create(
+                workflow=self,
+                order_num=order_num,
+                training_example=next_example
+            )
+            return next_example
    @property
-    def next_incomplete_item(self):
+    def current_item(self):
        """
-        Find the next incomplete item in the workflow.
+        Return the item the student is currently working on,
+        or None.
        Returns:
            StudentTrainingWorkflowItem or None
        """
-        next_incomplete = self.items.filter(  # pylint:disable=E1101
+        next_incomplete = self.items.select_related(
+            'training_example'
+        ).filter(  # pylint:disable=E1101
            completed_at__isnull=True
        ).order_by('order_num')[:1]
-        if len(next_incomplete) > 0:
+        return None if len(next_incomplete) == 0 else next_incomplete[0]
-            return next_incomplete[0]
-        else:
-            return None
 class StudentTrainingWorkflowItem(models.Model):

--- a/apps/openassessment/assessment/models/training.py
+++ b/apps/openassessment/assessment/models/training.py
@@ -3,6 +3,7 @@ Django models for training (both student and AI).
 """
 import json
 from hashlib import sha1
+from django.core.cache import cache
 from django.db import models
 from .base import Rubric, CriterionOption
@@ -22,29 +23,34 @@ class TrainingExample(models.Model):
    # SHA1 hash
    content_hash = models.CharField(max_length=40, unique=True, db_index=True)
+    # Version for models serialized to the cache
+    # Increment this number whenever you update this model!
+    CACHE_KEY_VERSION = 1
    class Meta:
        app_label = "assessment"
    @classmethod
-    def create_example(cls, answer, options_ids, rubric):
+    def create_example(cls, answer, options_selected, rubric):
        """
        Create a new training example.
        Args:
            answer (JSON-serializable): The answer associated with the training example.
-            option_ids (iterable of int): Selected option IDs for the training example.
+            options_selected (dict): The options selected from the rubric (mapping of criterion names to option names)
            rubric (Rubric): The rubric associated with the training example.
        Returns:
            TrainingExample
        """
-        content_hash = cls.calculate_hash(answer, options_ids, rubric)
+        content_hash = cls.calculate_hash(answer, options_selected, rubric)
        example = TrainingExample.objects.create(
            content_hash=content_hash,
            raw_answer=json.dumps(answer),
            rubric=rubric
        )
+        options_ids = rubric.options_ids(options_selected)
        for option in CriterionOption.objects.filter(pk__in=list(options_ids)):
            example.options_selected.add(option)
@@ -71,19 +77,50 @@ class TrainingExample(models.Model):
            dict: maps criterion names to selected option names
        """
-        return {
+        # Since training examples are immutable, we can safely cache this
-            option.criterion.name: option.name
+        cache_key = self.cache_key_serialized(attribute="options_selected_dict")
-            for option in self.options_selected.all()  # pylint:disable=E1101
+        options_selected = cache.get(cache_key)
-        }
+        if options_selected is None:
+            options_selected = {
+                option.criterion.name: option.name
+                for option in self.options_selected.all()  # pylint:disable=E1101
+            }
+            cache.set(cache_key, options_selected)
+        return options_selected
+    def cache_key_serialized(self, attribute=None):
+        """
+        Create a cache key based on the content hash
+        for serialized versions of this model.
+        Kwargs:
+            attribute: The name of the attribute being serialized.
+                If not specified, assume that we are serializing the entire model.
+        Returns:
+            str: The cache key
+        """
+        if attribute is None:
+            key_template = u"TrainingExample.json.v{version}.{content_hash}"
+        else:
+            key_template = u"TrainingExample.{attribute}.json.v{version}.{content_hash}"
+        cache_key = key_template.format(
+            version=self.CACHE_KEY_VERSION,
+            content_hash=self.content_hash,
+            attribute=attribute
+        )
+        return cache_key
    @staticmethod
-    def calculate_hash(answer, option_ids, rubric):
+    def calculate_hash(answer, options_selected, rubric):
        """
        Calculate a hash for the contents of training example.
        Args:
            answer (JSON-serializable): The answer associated with the training example.
-            option_ids (iterable of int): Selected option IDs for the training example.
+            options_selected (dict): The options selected from the rubric (mapping of criterion names to option names)
            rubric (Rubric): The rubric associated with the training example.
        Returns:
@@ -92,10 +129,28 @@ class TrainingExample(models.Model):
        """
        contents = json.dumps({
            'answer': answer,
-            'option_ids': list(option_ids),
+            'options_selected': options_selected,
            'rubric': rubric.id
        })
        return sha1(contents).hexdigest()
-    class Meta:
+    @classmethod
-        app_label = "assessment"
+    def cache_key(cls, answer, options_selected, rubric):
+        """
+        Calculate a cache key based on the content hash.
+        Args:
+            answer (JSON-serializable): The answer associated with the training example.
+            options_selected (dict): The options selected from the rubric (mapping of criterion names to option names)
+            rubric (Rubric): The rubric associated with the training example.
+        Returns:
+            tuple of `(cache_key, content_hash)`, both bytestrings
+        """
+        content_hash = cls.calculate_hash(answer, options_selected, rubric)
+        cache_key = u"TrainingExample.model.v{version}.{content_hash}".format(
+            version=cls.CACHE_KEY_VERSION,
+            content_hash=content_hash
+        )
+        return cache_key, content_hash
--- a/apps/openassessment/assessment/serializers/training.py
+++ b/apps/openassessment/assessment/serializers/training.py
 """
 Serializers for the training assessment type.
 """
-import json
+from django.core.cache import cache
 from django.db import transaction, IntegrityError
 from openassessment.assessment.models import TrainingExample
 from .base import rubric_from_dict, RubricSerializer
@@ -53,11 +53,17 @@ def serialize_training_example(example):
        dict
    """
-    return {
+    # Since training examples are immutable, we can safely cache them
-        'answer': example.answer,
+    cache_key = example.cache_key_serialized()
-        'options_selected': example.options_selected_dict,
+    example_dict = cache.get(cache_key)
-        'rubric': RubricSerializer.serialized_from_cache(example.rubric),
+    if example_dict is None:
-    }
+        example_dict = {
+            'answer': example.answer,
+            'options_selected': example.options_selected_dict,
+            'rubric': RubricSerializer.serialized_from_cache(example.rubric),
+        }
+        cache.set(cache_key, example_dict)
+    return example_dict
 @transaction.commit_on_success
@@ -144,24 +150,31 @@ def deserialize_training_examples(examples, rubric_dict):
    # Parse each example
    created_examples = []
    for example_dict in examples:
-        is_valid, errors = validate_training_example_format(example_dict)
-        if not is_valid:
-            raise InvalidTrainingExample("; ".join(errors))
-        options_ids = rubric.options_ids(example_dict['options_selected'])
+        # Try to retrieve the example from the cache
+        cache_key, content_hash = TrainingExample.cache_key(example_dict['answer'], example_dict['options_selected'], rubric)
+        example = cache.get(cache_key)
-        # Calculate the content hash to look up the example
+        # If we couldn't retrieve the example from the cache, create it
-        content_hash = TrainingExample.calculate_hash(example_dict['answer'], options_ids, rubric)
+        if example is None:
+            # Validate the training example
+            is_valid, errors = validate_training_example_format(example_dict)
+            if not is_valid:
+                raise InvalidTrainingExample("; ".join(errors))
-        try:
+            # Get or create the training example
-            example = TrainingExample.objects.get(content_hash=content_hash)
-        except TrainingExample.DoesNotExist:
            try:
-                example = TrainingExample.create_example(
-                    example_dict['answer'], options_ids, rubric
-                )
-            except IntegrityError:
                example = TrainingExample.objects.get(content_hash=content_hash)
+            except TrainingExample.DoesNotExist:
+                try:
+                    example = TrainingExample.create_example(
+                        example_dict['answer'], example_dict['options_selected'], rubric
+                    )
+                except IntegrityError:
+                    example = TrainingExample.objects.get(content_hash=content_hash)
+            # Add the example to the cache
+            cache.set(cache_key, example)
        created_examples.append(example)

--- a/apps/openassessment/assessment/test/test_student_training.py
+++ b/apps/openassessment/assessment/test/test_student_training.py
--- a/settings/test.py
+++ b/settings/test.py
@@ -3,7 +3,7 @@ Test-specific Django settings.
 """
 # Inherit from base settings
-from .base import *
+from .base import *     # pylint:disable=W0614,W0401
 TEST_APPS = (
    'openassessment',
@@ -44,3 +44,10 @@ EDX_ORA2["EVENT_LOGGER"] = "openassessment.workflow.test.events.fake_event_logge
 # We run Celery in "always eager" mode in the test suite,
 # which executes tasks synchronously instead of using the task queue.
 CELERY_ALWAYS_EAGER = True
+# Silence cache key warnings
+# https://docs.djangoproject.com/en/1.4/topics/cache/#cache-key-warnings
+import warnings
+from django.core.cache import CacheKeyWarning
+warnings.simplefilter("ignore", CacheKeyWarning)