Merge pull request #333 from edx/will/student-training-in-flight

Student training in-flight changes

Merge pull request #333 from edx/will/student-training-in-flight
Student training in-flight changes
2b224a74 · Will Daly · 64b3df76 · 5295df1d · 2b224a74 · 2b224a74
Commit 2b224a74 authored May 15, 2014 by Will Daly
6 changed files
--- a/apps/openassessment/assessment/api/student_training.py
+++ b/apps/openassessment/assessment/api/student_training.py
@@ -31,18 +31,29 @@ def submitter_is_finished(submission_uuid, requirements):   # pylint:disable=W06

    Args:
        submission_uuid (str): The UUID of the student's submission.
-        requirements (dict): Not used.
+        requirements (dict): Must contain "num_required" indicating
+            the number of examples the student must assess.

    Returns:
        bool

+    Raises:
+        StudentTrainingRequestError
+
    """
    try:
+        num_required = int(requirements['num_required'])
+    except KeyError:
+        raise StudentTrainingRequestError(u'Requirements dict must contain "num_required" key')
+    except ValueError:
+        raise StudentTrainingRequestError(u'Number of requirements must be an integer')
+
+    try:
        workflow = StudentTrainingWorkflow.objects.get(submission_uuid=submission_uuid)
    except StudentTrainingWorkflow.DoesNotExist:
        return False
    else:
-        return workflow.is_complete
+        return workflow.num_completed >= num_required


 def assessment_is_finished(submission_uuid, requirements):  # pylint:disable=W0613
@@ -147,8 +158,9 @@ def validate_training_examples(rubric, examples):
            ]
            for criterion in rubric['criteria']
        }
-    except (ValueError, KeyError):
+    except (ValueError, KeyError) as ex:
        msg = _(u"Could not parse serialized rubric")
+        logger.warning("{}: {}".format(msg, ex))
        return [msg]

    # Check each example
@@ -189,161 +201,31 @@ def validate_training_examples(rubric, examples):
    return errors


-def create_training_workflow(submission_uuid, rubric, examples):
-    """
-    Start the training workflow.
-
-    Args:
-        submission_uuid (str): The UUID of the student's submission.
-        rubric (dict): Serialized rubric model.
-        examples (list): The serialized training examples the student will need to assess.
-
-    Returns:
-        None
-
-    Raises:
-        StudentTrainingRequestError
-        StudentTrainingInternalError
-
-    Example usage:
-
-        >>> options = [
-        >>>     {
-        >>>         "order_num": 0,
-        >>>         "name": "poor",
-        >>>         "explanation": "Poor job!",
-        >>>         "points": 0,
-        >>>     },
-        >>>     {
-        >>>         "order_num": 1,
-        >>>         "name": "good",
-        >>>         "explanation": "Good job!",
-        >>>         "points": 1,
-        >>>     },
-        >>>     {
-        >>>         "order_num": 2,
-        >>>         "name": "excellent",
-        >>>         "explanation": "Excellent job!",
-        >>>         "points": 2,
-        >>>     },
-        >>> ]
-        >>>
-        >>> rubric = {
-        >>>     "prompt": "Write an essay!",
-        >>>     "criteria": [
-        >>>         {
-        >>>             "order_num": 0,
-        >>>             "name": "vocabulary",
-        >>>             "prompt": "How varied is the vocabulary?",
-        >>>             "options": options
-        >>>         },
-        >>>         {
-        >>>             "order_num": 1,
-        >>>             "name": "grammar",
-        >>>             "prompt": "How correct is the grammar?",
-        >>>             "options": options
-        >>>         }
-        >>>     ]
-        >>> }
-        >>>
-        >>> examples = [
-        >>>     {
-        >>>         'answer': u'Lorem ipsum',
-        >>>         'options_selected': {
-        >>>             'vocabulary': 'good',
-        >>>             'grammar': 'excellent'
-        >>>         }
-        >>>     },
-        >>>     {
-        >>>         'answer': u'Doler',
-        >>>         'options_selected': {
-        >>>             'vocabulary': 'good',
-        >>>             'grammar': 'poor'
-        >>>         }
-        >>>     }
-        >>> ]
-        >>>
-        >>> create_training_workflow("5443ebbbe2297b30f503736e26be84f6c7303c57", rubric, examples)
-
-    """
-    try:
-        # Check that examples were provided
-        if len(examples) == 0:
-            msg = (
-                u"No examples provided for student training workflow "
-                u"(attempted to create workflow for student with submission UUID {})"
-            ).format(submission_uuid)
-            raise StudentTrainingRequestError(msg)
-
-        # Ensure that a workflow doesn't already exist for this submission
-        already_exists = StudentTrainingWorkflow.objects.filter(
-            submission_uuid=submission_uuid
-        ).exists()
-
-        if already_exists:
-            msg = (
-                u"Student training workflow already exists for the student "
-                u"associated with submission UUID {}"
-            ).format(submission_uuid)
-            raise StudentTrainingRequestError(msg)
-
-        # Create the training examples
-        try:
-            examples = deserialize_training_examples(examples, rubric)
-        except (InvalidRubric, InvalidTrainingExample) as ex:
-            logger.exception(
-                "Could not deserialize training examples for submission UUID {}".format(submission_uuid)
-            )
-            raise StudentTrainingRequestError(ex.message)
-
-        # Create the workflow
-        try:
-            StudentTrainingWorkflow.create_workflow(submission_uuid, examples)
-        except sub_api.SubmissionNotFoundError as ex:
-            raise StudentTrainingRequestError(ex.message)
-    except DatabaseError:
-        msg = (
-            u"Could not create student training workflow "
-            u"with submission UUID {}"
-        ).format(submission_uuid)
-        logger.exception(msg)
-        raise StudentTrainingInternalError(msg)
-
-
-def get_workflow_status(submission_uuid):
+def get_num_completed(submission_uuid):
    """
-    Get the student's position in the training workflow.
+    Get the number of training examples the student has assessed successfully.

    Args:
        submission_uuid (str): The UUID of the student's submission.

    Returns:
-        dict: Serialized TrainingStatus
+        int: The number of completed training examples

    Raises:
-        StudentTrainingRequestError
        StudentTrainingInternalError

    Example usage:
-        >>> get_workflow_status("5443ebbbe2297b30f503736e26be84f6c7303c57")
-        {
-            'num_items_completed': 1,
-            'num_items_available': 3
-        }
+        >>> get_num_completed("5443ebbbe2297b30f503736e26be84f6c7303c57")
+        2

    """
    try:
        try:
            workflow = StudentTrainingWorkflow.objects.get(submission_uuid=submission_uuid)
        except StudentTrainingWorkflow.DoesNotExist:
-            msg = u"Student training workflow does not exist for submission UUID {}".format(submission_uuid)
-            raise StudentTrainingRequestError(msg)
-
-        num_completed, num_total = workflow.status
-        return {
-            "num_completed": num_completed,
-            "num_total": num_total
-        }
+            return 0
+        else:
+            return workflow.num_completed
    except DatabaseError:
        msg = (
            u"An unexpected error occurred while "
@@ -353,12 +235,22 @@ def get_workflow_status(submission_uuid):
        raise StudentTrainingInternalError(msg)


-def get_training_example(submission_uuid):
+def get_training_example(submission_uuid, rubric, examples):
    """
    Retrieve a training example for the student to assess.
+    This will implicitly create a workflow for the student if one does not yet exist.
+
+    NOTE: We include the rubric in the returned dictionary to handle
+    the case in which the instructor changes the rubric definition
+    while the student is assessing the training example.  Once a student
+    starts on a training example, the student should see the same training
+    example consistently.  However, the next training example the student
+    retrieves will use the updated rubric.

    Args:
        submission_uuid (str): The UUID of the student's submission.
+        rubric (dict): Serialized rubric model.
+        examples (list): List of serialized training examples.

    Returns:
        dict: The training example with keys "answer", "rubric", and "options_selected".
@@ -380,7 +272,7 @@ def get_training_example(submission_uuid):
        >>>     }
        >>> ]
        >>>
-        >>> get_training_example("5443ebbbe2297b30f503736e26be84f6c7303c57")
+        >>> get_training_example("5443ebbbe2297b30f503736e26be84f6c7303c57", rubric, examples)
        {
            'answer': u'Lorem ipsum',
            'rubric': {
@@ -407,26 +299,38 @@ def get_training_example(submission_uuid):
        }

    """
-    # Find a workflow for the student
    try:
-        workflow = StudentTrainingWorkflow.objects.get(submission_uuid=submission_uuid)
+        # Validate the training examples
+        errors = validate_training_examples(rubric, examples)
+        if len(errors) > 0:
+            msg = _(u"Training examples do not match the rubric: {errors}").format(
+                errors="\n".join(errors)
+            )
+            raise StudentTrainingRequestError(msg)

-        # Find the next incomplete item in the workflow
-        item = workflow.next_incomplete_item
-        if item is None:
-            return None
-        else:
-            return serialize_training_example(item.training_example)
-    except StudentTrainingWorkflow.DoesNotExist:
-        msg = (
-            u"No student training workflow exists for the student "
-            u"associated with submission UUID {}"
-        ).format(submission_uuid)
+        # Get or create the workflow
+        workflow = StudentTrainingWorkflow.get_or_create_workflow(submission_uuid=submission_uuid)
+
+        # Get or create the training examples
+        examples = deserialize_training_examples(examples, rubric)
+
+        # Pick a training example that the student has not yet completed
+        # If the student already started a training example, then return that instead.
+        next_example = workflow.next_training_example(examples)
+        return None if next_example is None else serialize_training_example(next_example)
+    except (InvalidRubric, InvalidTrainingExample) as ex:
+        logger.exception(
+            "Could not deserialize training examples for submission UUID {}".format(submission_uuid)
+        )
+        raise StudentTrainingRequestError(ex.message)
+    except sub_api.SubmissionNotFoundError as ex:
+        msg = _(u"Could not retrieve the submission with UUID {}").format(submission_uuid)
+        logger.exception(msg)
        raise StudentTrainingRequestError(msg)
    except DatabaseError:
-        msg = (
-            u"Could not retrieve next item in"
-            u" student training workflow with submission UUID {}"
+        msg = _(
+            u"Could not retrieve a training example "
+            u"for the student with submission UUID {}"
        ).format(submission_uuid)
        logger.exception(msg)
        raise StudentTrainingInternalError(msg)
@@ -436,6 +340,8 @@ def assess_training_example(submission_uuid, options_selected, update_workflow=T
    """
    Assess a training example and update the workflow.

+    This must be called *after* `get_training_example()`.
+
    Args:
        submission_uuid (str): The UUID of the student's submission.
        options_selected (dict): The options the student selected.
@@ -466,8 +372,8 @@ def assess_training_example(submission_uuid, options_selected, update_workflow=T
    try:
        workflow = StudentTrainingWorkflow.objects.get(submission_uuid=submission_uuid)

-        # Find the next incomplete item in the workflow
-        item = workflow.next_incomplete_item
+        # Find the item the student is currently working on
+        item = workflow.current_item
        if item is None:
            msg = (
                u"No items are available in the student training workflow associated with "

--- a/apps/openassessment/assessment/models/student_training.py
+++ b/apps/openassessment/assessment/models/student_training.py
 """
 Django models specific to the student training assessment type.
 """
-from django.db import models, transaction
+from django.db import models
 from django.utils import timezone
 from submissions import api as sub_api
 from .training import TrainingExample
@@ -27,14 +27,12 @@ class StudentTrainingWorkflow(models.Model):
        app_label = "assessment"

    @classmethod
-    @transaction.commit_on_success
-    def create_workflow(cls, submission_uuid, examples):
+    def get_or_create_workflow(cls, submission_uuid):
        """
        Create a student training workflow.

        Args:
            submission_uuid (str): The UUID of the submission from the student being trained.
-            examples (list of TrainingExamples): The training examples to show the student.

        Returns:
            StudentTrainingWorkflow
@@ -43,70 +41,105 @@ class StudentTrainingWorkflow(models.Model):
            SubmissionError: There was an error retrieving the submission.

        """
+        # Try to retrieve an existing workflow
+        # If we find one, return it immediately
+        try:
+            return cls.objects.get(submission_uuid=submission_uuid)   # pylint:disable=E1101
+        except cls.DoesNotExist:
+            pass
+
        # Retrieve the student item info
        submission = sub_api.get_submission_and_student(submission_uuid)
        student_item = submission['student_item']

        # Create the workflow
-        workflow = cls.objects.create(
+        return cls.objects.create(
            submission_uuid=submission_uuid,
            student_id=student_item['student_id'],
            item_id=student_item['item_id'],
            course_id=student_item['course_id']
        )

-        # Create workflow items for each example
-        for order_num, example in enumerate(examples):
-            StudentTrainingWorkflowItem.objects.create(
-                workflow=workflow,
-                order_num=order_num,
-                training_example=example,
-            )
-
-        return workflow
-
    @property
-    def status(self):
+    def num_completed(self):
        """
-        The student's status within the workflow (num steps completed / num steps available).
+        Return the number of training examples that the
+        student successfully assessed.

        Returns:
-            tuple of `(num_completed, num_total)`, both integers
+            int

        """
-        items = self.items.all()    # pylint:disable=E1101
-        num_complete = sum([1 if item.is_complete else 0 for item in items])
-        num_total = len(items)
-        return num_complete, num_total
+        return self.items.filter(completed_at__isnull=False).count()  # pylint:disable=E1101

-    @property
-    def is_complete(self):
+    def next_training_example(self, examples):
        """
-        Check whether all items in the workflow are complete.
+        Return the next training example for the student to assess.
+        If the student is already working on an example, return that.
+        Otherwise, choose an example the student hasn't seen
+        from the list of available examples.
+
+        Args:
+            examples (list of TrainingExample): Training examples to choose from.

        Returns:
-            bool
+            TrainingExample or None
+
        """
-        num_incomplete = self.items.filter(completed_at__isnull=True).count()  # pylint:disable=E1101
-        return num_incomplete == 0
+        # Fetch all the items for this workflow from the database
+        # Since Django's `select_related` does not follow reverse keys
+        # we perform the filter ourselves.
+        items = StudentTrainingWorkflowItem.objects.select_related(
+            'training_example'
+        ).filter(workflow=self)
+
+        # If we're already working on an item, then return that item
+        incomplete_items = [item for item in items if not item.is_complete]
+        if len(incomplete_items) > 0:
+            return incomplete_items[0].training_example
+
+        # Otherwise, pick an item that we have not completed
+        # from the list of examples.
+        completed_examples = [
+            item.training_example for item in items
+        ]
+        available_examples = [
+            available for available in examples
+            if available not in completed_examples
+        ]
+
+        # If there are no more items available, return None
+        if len(available_examples) == 0:
+            return None
+        # Otherwise, create a new workflow item for the example
+        # and add it to the workflow
+        else:
+            order_num = len(items) + 1
+            next_example = available_examples[0]
+            StudentTrainingWorkflowItem.objects.create(
+                workflow=self,
+                order_num=order_num,
+                training_example=next_example
+            )
+            return next_example

    @property
-    def next_incomplete_item(self):
+    def current_item(self):
        """
-        Find the next incomplete item in the workflow.
+        Return the item the student is currently working on,
+        or None.

        Returns:
            StudentTrainingWorkflowItem or None

        """
-        next_incomplete = self.items.filter(  # pylint:disable=E1101
+        next_incomplete = self.items.select_related(
+            'training_example'
+        ).filter(  # pylint:disable=E1101
            completed_at__isnull=True
        ).order_by('order_num')[:1]

-        if len(next_incomplete) > 0:
-            return next_incomplete[0]
-        else:
-            return None
+        return None if len(next_incomplete) == 0 else next_incomplete[0]


 class StudentTrainingWorkflowItem(models.Model):

--- a/apps/openassessment/assessment/models/training.py
+++ b/apps/openassessment/assessment/models/training.py
@@ -3,6 +3,7 @@ Django models for training (both student and AI).
 """
 import json
 from hashlib import sha1
+from django.core.cache import cache
 from django.db import models
 from .base import Rubric, CriterionOption

@@ -22,29 +23,34 @@ class TrainingExample(models.Model):
    # SHA1 hash
    content_hash = models.CharField(max_length=40, unique=True, db_index=True)

+    # Version for models serialized to the cache
+    # Increment this number whenever you update this model!
+    CACHE_KEY_VERSION = 1
+
    class Meta:
        app_label = "assessment"

    @classmethod
-    def create_example(cls, answer, options_ids, rubric):
+    def create_example(cls, answer, options_selected, rubric):
        """
        Create a new training example.

        Args:
            answer (JSON-serializable): The answer associated with the training example.
-            option_ids (iterable of int): Selected option IDs for the training example.
+            options_selected (dict): The options selected from the rubric (mapping of criterion names to option names)
            rubric (Rubric): The rubric associated with the training example.

        Returns:
            TrainingExample

        """
-        content_hash = cls.calculate_hash(answer, options_ids, rubric)
+        content_hash = cls.calculate_hash(answer, options_selected, rubric)
        example = TrainingExample.objects.create(
            content_hash=content_hash,
            raw_answer=json.dumps(answer),
            rubric=rubric
        )
+        options_ids = rubric.options_ids(options_selected)

        for option in CriterionOption.objects.filter(pk__in=list(options_ids)):
            example.options_selected.add(option)
@@ -71,19 +77,50 @@ class TrainingExample(models.Model):
            dict: maps criterion names to selected option names

        """
-        return {
-            option.criterion.name: option.name
-            for option in self.options_selected.all()  # pylint:disable=E1101
-        }
+        # Since training examples are immutable, we can safely cache this
+        cache_key = self.cache_key_serialized(attribute="options_selected_dict")
+        options_selected = cache.get(cache_key)
+        if options_selected is None:
+            options_selected = {
+                option.criterion.name: option.name
+                for option in self.options_selected.all()  # pylint:disable=E1101
+            }
+            cache.set(cache_key, options_selected)
+        return options_selected
+
+    def cache_key_serialized(self, attribute=None):
+        """
+        Create a cache key based on the content hash
+        for serialized versions of this model.
+
+        Kwargs:
+            attribute: The name of the attribute being serialized.
+                If not specified, assume that we are serializing the entire model.
+
+        Returns:
+            str: The cache key
+
+        """
+        if attribute is None:
+            key_template = u"TrainingExample.json.v{version}.{content_hash}"
+        else:
+            key_template = u"TrainingExample.{attribute}.json.v{version}.{content_hash}"
+
+        cache_key = key_template.format(
+            version=self.CACHE_KEY_VERSION,
+            content_hash=self.content_hash,
+            attribute=attribute
+        )
+        return cache_key

    @staticmethod
-    def calculate_hash(answer, option_ids, rubric):
+    def calculate_hash(answer, options_selected, rubric):
        """
        Calculate a hash for the contents of training example.

        Args:
            answer (JSON-serializable): The answer associated with the training example.
-            option_ids (iterable of int): Selected option IDs for the training example.
+            options_selected (dict): The options selected from the rubric (mapping of criterion names to option names)
            rubric (Rubric): The rubric associated with the training example.

        Returns:
@@ -92,10 +129,28 @@ class TrainingExample(models.Model):
        """
        contents = json.dumps({
            'answer': answer,
-            'option_ids': list(option_ids),
+            'options_selected': options_selected,
            'rubric': rubric.id
        })
        return sha1(contents).hexdigest()

-    class Meta:
-        app_label = "assessment"
+    @classmethod
+    def cache_key(cls, answer, options_selected, rubric):
+        """
+        Calculate a cache key based on the content hash.
+
+        Args:
+            answer (JSON-serializable): The answer associated with the training example.
+            options_selected (dict): The options selected from the rubric (mapping of criterion names to option names)
+            rubric (Rubric): The rubric associated with the training example.
+
+        Returns:
+            tuple of `(cache_key, content_hash)`, both bytestrings
+
+        """
+        content_hash = cls.calculate_hash(answer, options_selected, rubric)
+        cache_key = u"TrainingExample.model.v{version}.{content_hash}".format(
+            version=cls.CACHE_KEY_VERSION,
+            content_hash=content_hash
+        )
+        return cache_key, content_hash
--- a/apps/openassessment/assessment/serializers/training.py
+++ b/apps/openassessment/assessment/serializers/training.py
 """
 Serializers for the training assessment type.
 """
-import json
+from django.core.cache import cache
 from django.db import transaction, IntegrityError
 from openassessment.assessment.models import TrainingExample
 from .base import rubric_from_dict, RubricSerializer
@@ -53,11 +53,17 @@ def serialize_training_example(example):
        dict

    """
-    return {
-        'answer': example.answer,
-        'options_selected': example.options_selected_dict,
-        'rubric': RubricSerializer.serialized_from_cache(example.rubric),
-    }
+    # Since training examples are immutable, we can safely cache them
+    cache_key = example.cache_key_serialized()
+    example_dict = cache.get(cache_key)
+    if example_dict is None:
+        example_dict = {
+            'answer': example.answer,
+            'options_selected': example.options_selected_dict,
+            'rubric': RubricSerializer.serialized_from_cache(example.rubric),
+        }
+        cache.set(cache_key, example_dict)
+    return example_dict


 @transaction.commit_on_success
@@ -144,24 +150,31 @@ def deserialize_training_examples(examples, rubric_dict):
    # Parse each example
    created_examples = []
    for example_dict in examples:
-        is_valid, errors = validate_training_example_format(example_dict)
-        if not is_valid:
-            raise InvalidTrainingExample("; ".join(errors))

-        options_ids = rubric.options_ids(example_dict['options_selected'])
+        # Try to retrieve the example from the cache
+        cache_key, content_hash = TrainingExample.cache_key(example_dict['answer'], example_dict['options_selected'], rubric)
+        example = cache.get(cache_key)

-        # Calculate the content hash to look up the example
-        content_hash = TrainingExample.calculate_hash(example_dict['answer'], options_ids, rubric)
+        # If we couldn't retrieve the example from the cache, create it
+        if example is None:
+            # Validate the training example
+            is_valid, errors = validate_training_example_format(example_dict)
+            if not is_valid:
+                raise InvalidTrainingExample("; ".join(errors))

-        try:
-            example = TrainingExample.objects.get(content_hash=content_hash)
-        except TrainingExample.DoesNotExist:
+            # Get or create the training example
            try:
-                example = TrainingExample.create_example(
-                    example_dict['answer'], options_ids, rubric
-                )
-            except IntegrityError:
                example = TrainingExample.objects.get(content_hash=content_hash)
+            except TrainingExample.DoesNotExist:
+                try:
+                    example = TrainingExample.create_example(
+                        example_dict['answer'], example_dict['options_selected'], rubric
+                    )
+                except IntegrityError:
+                    example = TrainingExample.objects.get(content_hash=content_hash)
+
+            # Add the example to the cache
+            cache.set(cache_key, example)

        created_examples.append(example)


--- a/apps/openassessment/assessment/test/test_student_training.py
+++ b/apps/openassessment/assessment/test/test_student_training.py
@@ -44,8 +44,8 @@ class StudentTrainingAssessmentTest(CacheResetTest):
        },
        {
            "order_num": 2,
-            "name": "єχ¢єℓℓєηт",
-            "explanation": "乇ﾒc乇ﾚﾚ乇刀ｲ ﾌo乃!",
+            "name": u"єχ¢єℓℓєηт",
+            "explanation": u"乇ﾒc乇ﾚﾚ乇刀ｲ ﾌo乃!",
            "points": 2,
        },
    ]
@@ -97,10 +97,6 @@ class StudentTrainingAssessmentTest(CacheResetTest):
        self.submission_uuid = submission['uuid']

    def test_training_workflow(self):
-
-        # Start a workflow
-        training_api.create_training_workflow(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
-
        # Initially, we should be on the first step
        self._assert_workflow_status(self.submission_uuid, 0, 2)

@@ -141,12 +137,9 @@ class StudentTrainingAssessmentTest(CacheResetTest):
        self._assert_workflow_status(self.submission_uuid, 2, 2)

    def test_assess_without_update(self):
-
-        # Start a workflow
-        training_api.create_training_workflow(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
-
        # Assess the first training example the same way the instructor did
        # but do NOT update the workflow
+        training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
        corrections = training_api.assess_training_example(
            self.submission_uuid,
            self.EXAMPLES[0]['options_selected'],
@@ -157,6 +150,69 @@ class StudentTrainingAssessmentTest(CacheResetTest):
        self.assertEqual(corrections, dict())
        self._assert_workflow_status(self.submission_uuid, 0, 2)

+    def test_get_same_example(self):
+        # Retrieve a training example
+        retrieved = training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
+
+        # If we retrieve an example without completing the current example,
+        # we should get the same one.
+        next_retrieved = training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
+        self.assertEqual(retrieved, next_retrieved)
+
+    def test_get_training_example_num_queries(self):
+
+        # Run through the training example once using a different submission
+        # Training examples and rubrics will be cached and shared for other
+        # students working on the same problem.
+        self._warm_cache(self.RUBRIC, self.EXAMPLES)
+
+        # First training example
+        # This will need to create the student training workflow and the first item
+        # NOTE: we *could* cache the rubric model to reduce the number of queries here,
+        # but we're selecting it by content hash, which is indexed and should be plenty fast.
+        with self.assertNumQueries(6):
+            training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
+
+        # Without assessing the first training example, try to retrieve a training example.
+        # This should return the same example as before, so we won't need to create
+        # any workflows or workflow items.
+        with self.assertNumQueries(3):
+            training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
+
+        # Assess the current training example
+        training_api.assess_training_example(self.submission_uuid, self.EXAMPLES[0]['options_selected'])
+
+        # Retrieve the next training example, which requires us to create
+        # a new workflow item (but not a new workflow).
+        with self.assertNumQueries(4):
+            training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
+
+    def test_submitter_is_finished_num_queries(self):
+        # Complete the first training example
+        training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
+        training_api.assess_training_example(self.submission_uuid, self.EXAMPLES[0]['options_selected'])
+
+        # Check whether we've completed the requirements
+        requirements = {'num_required': 2}
+        with self.assertNumQueries(2):
+            training_api.submitter_is_finished(self.submission_uuid, requirements)
+
+    def test_get_num_completed_num_queries(self):
+        # Complete the first training example
+        training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
+        training_api.assess_training_example(self.submission_uuid, self.EXAMPLES[0]['options_selected'])
+
+        # Check the number completed
+        with self.assertNumQueries(2):
+            training_api.get_num_completed(self.submission_uuid)
+
+    def test_assess_training_example_num_queries(self):
+        # Populate the cache with training examples and rubrics
+        self._warm_cache(self.RUBRIC, self.EXAMPLES)
+        training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
+        with self.assertNumQueries(4):
+            training_api.assess_training_example(self.submission_uuid, self.EXAMPLES[0]['options_selected'])
+
    @ddt.file_data('data/validate_training_examples.json')
    def test_validate_training_examples(self, data):
        errors = training_api.validate_training_examples(
@@ -167,17 +223,15 @@ class StudentTrainingAssessmentTest(CacheResetTest):

    def test_is_finished_no_workflow(self):
        # Without creating a workflow, we should not be finished
-        self.assertFalse(training_api.submitter_is_finished(self.submission_uuid, dict()))
+        requirements = {'num_required': 1}
+        self.assertFalse(training_api.submitter_is_finished(self.submission_uuid, requirements))

        # But since we're not being assessed by others, the "assessment" should be finished.
-        self.assertTrue(training_api.assessment_is_finished(self.submission_uuid, dict()))
+        self.assertTrue(training_api.assessment_is_finished(self.submission_uuid, requirements))

    def test_get_training_example_none_available(self):
-        # Start a workflow and assess all training examples
-        training_api.create_training_workflow(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
-        self._assert_workflow_status(self.submission_uuid, 0, 2)
-
        for example in self.EXAMPLES:
+            training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
            training_api.assess_training_example(self.submission_uuid, example['options_selected'])

        # Now we should be complete
@@ -185,40 +239,13 @@ class StudentTrainingAssessmentTest(CacheResetTest):

        # ... and if we try to get another example, we should get None
        self.assertIs(
-            training_api.get_training_example(self.submission_uuid), None
+            training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES),
+            None
        )

-    def test_get_training_example_no_workflow(self):
-        # With no workflow defined, we should get an error
-        with self.assertRaises(StudentTrainingRequestError):
-            training_api.get_training_example(self.submission_uuid)
-
-    def test_create_training_workflow_already_started(self):
-        # Create a workflow for training
-        training_api.create_training_workflow(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
-
-        # Try to create a second workflow for the same submission,
-        # expecting an error.
-        with self.assertRaises(StudentTrainingRequestError):
-            training_api.create_training_workflow(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
-
-    def test_create_training_workflow_no_examples(self):
-        # Try to create a training workflow with no examples
-        # and expect an error.
-        with self.assertRaises(StudentTrainingRequestError):
-            training_api.create_training_workflow(self.submission_uuid, self.RUBRIC, [])
-
-    def test_create_training_workflow_no_submission(self):
-        # Try to create a training workflow with an invalid submission UUID
-        with self.assertRaises(StudentTrainingRequestError):
-            training_api.create_training_workflow("not a submission!", self.RUBRIC, self.EXAMPLES)
-
    def test_assess_training_example_completed_workflow(self):
-        # Start a workflow and assess all training examples
-        training_api.create_training_workflow(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
-        self._assert_workflow_status(self.submission_uuid, 0, 2)
-
        for example in self.EXAMPLES:
+            training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
            training_api.assess_training_example(self.submission_uuid, example['options_selected'])

        # Try to assess again, and expect an error
@@ -228,66 +255,62 @@ class StudentTrainingAssessmentTest(CacheResetTest):
            )

    def test_assess_training_example_no_workflow(self):
-        # With no workflow defined, we should get an error
+        # If we try to assess without first retrieving an example
+        # (which implicitly creates a workflow)
+        # then we should get a request error.
        with self.assertRaises(StudentTrainingRequestError):
            training_api.assess_training_example(
                self.submission_uuid, self.EXAMPLES[0]['options_selected']
            )

-    def test_get_workflow_status_no_workflow(self):
-        # With no workflow defined, we should get an error
-        # when we try to request the status.
-        with self.assertRaises(StudentTrainingRequestError):
-            training_api.get_workflow_status(self.submission_uuid)
+    def test_get_num_completed_no_workflow(self):
+        num_completed = training_api.get_num_completed(self.submission_uuid)
+        self.assertEqual(num_completed, 0)

-    def test_create_workflow_invalid_rubric(self):
+    def test_get_training_example_invalid_rubric(self):
        # Rubric is missing a very important key!
        invalid_rubric = copy.deepcopy(self.RUBRIC)
        del invalid_rubric['criteria']

        with self.assertRaises(StudentTrainingRequestError):
-            training_api.create_training_workflow(self.submission_uuid, invalid_rubric, self.EXAMPLES)
+            training_api.get_training_example(self.submission_uuid, invalid_rubric, self.EXAMPLES)

-    def test_create_workflow_invalid_examples(self):
-        # Training example is not a dictionary!
+    def test_get_training_example_no_submission(self):
        with self.assertRaises(StudentTrainingRequestError):
-            training_api.create_training_workflow(self.submission_uuid, self.RUBRIC, ["not a dict!"])
-
-    @patch.object(StudentTrainingWorkflow, 'create_workflow')
-    def test_create_workflow_database_error(self, mock_db):
-        mock_db.side_effect = DatabaseError("Kaboom!")
-        with self.assertRaises(StudentTrainingInternalError):
-            training_api.create_training_workflow(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
+            training_api.get_training_example("no_such_submission", self.RUBRIC, self.EXAMPLES)

    @patch.object(StudentTrainingWorkflow.objects, 'get')
-    def test_get_workflow_status_database_error(self, mock_db):
-        training_api.create_training_workflow(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
+    def test_get_num_completed_database_error(self, mock_db):
        mock_db.side_effect = DatabaseError("Kaboom!")
        with self.assertRaises(StudentTrainingInternalError):
-            training_api.get_workflow_status(self.submission_uuid)
+            training_api.get_num_completed(self.submission_uuid)

    @patch.object(StudentTrainingWorkflow.objects, 'get')
    def test_get_training_example_database_error(self, mock_db):
-        training_api.create_training_workflow(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
        mock_db.side_effect = DatabaseError("Kaboom!")
        with self.assertRaises(StudentTrainingInternalError):
-            training_api.get_training_example(self.submission_uuid)
+            training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES)

    @patch.object(StudentTrainingWorkflow.objects, 'get')
    def test_assess_training_example_database_error(self, mock_db):
-        training_api.create_training_workflow(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
+        training_api.get_training_example(self.submission_uuid, self.RUBRIC, self.EXAMPLES)
        mock_db.side_effect = DatabaseError("Kaboom!")
        with self.assertRaises(StudentTrainingInternalError):
            training_api.assess_training_example(self.submission_uuid, self.EXAMPLES[0]['options_selected'])

-    def _assert_workflow_status(self, submission_uuid, num_completed, num_total):
+    @ddt.data({}, {'num_required': 'not an integer!'})
+    def test_submitter_is_finished_invalid_requirements(self, requirements):
+        with self.assertRaises(StudentTrainingRequestError):
+            training_api.submitter_is_finished(self.submission_uuid, requirements)
+
+    def _assert_workflow_status(self, submission_uuid, num_completed, num_required):
        """
        Check that the training workflow is on the expected step.

        Args:
            submission_uuid (str): Submission UUID of the student being trained.
            num_completed (int): The expected number of examples assessed correctly.
-            num_total (int): The expected number of available examples.
+            num_total (int): The required number of examples to assess.

        Returns:
            None
@@ -296,27 +319,22 @@ class StudentTrainingAssessmentTest(CacheResetTest):
            AssertionError

        """
-        # Check the workflow status (what step are we on?)
-        status = training_api.get_workflow_status(submission_uuid)
-        self.assertEqual(status['num_completed'], num_completed)
-        self.assertEqual(status['num_total'], num_total)
+        # Check the number of steps we've completed
+        actual_num_completed = training_api.get_num_completed(submission_uuid)
+        self.assertEqual(actual_num_completed, num_completed)

        # Check whether the assessment step is completed
        # (used by the workflow API)
-        is_finished = bool(num_completed == num_total)
-        self.assertEqual(
-            training_api.submitter_is_finished(submission_uuid, dict()),
-            is_finished
-        )
+        requirements = {'num_required': num_required}
+        is_finished = training_api.submitter_is_finished(submission_uuid, requirements)
+        self.assertEqual(is_finished, bool(num_completed >= num_required))

        # Assessment is finished should always be true,
        # since we're not being assessed by others.
-        self.assertTrue(
-            training_api.assessment_is_finished(submission_uuid, dict()),
-        )
+        self.assertTrue(training_api.assessment_is_finished(submission_uuid, requirements))

        # At no point should we receive a score!
-        self.assertIs(training_api.get_score(submission_uuid, dict()), None)
+        self.assertIs(training_api.get_score(submission_uuid, requirements), None)

    def _expected_example(self, input_example, rubric):
        """
@@ -352,6 +370,25 @@ class StudentTrainingAssessmentTest(CacheResetTest):
            AssertionError

        """
-        example = training_api.get_training_example(submission_uuid)
+        example = training_api.get_training_example(submission_uuid, input_rubric, input_examples)
        expected_example = self._expected_example(input_examples[order_num], input_rubric)
        self.assertItemsEqual(example, expected_example)
+
+    def _warm_cache(self, rubric, examples):
+        """
+        Create a submission and complete student training.
+        This will populate the cache with training examples and rubrics,
+        which are immutable and shared for all students training on a particular problem.
+
+        Args:
+            rubric (dict): Serialized rubric model.
+            examples (list of dict): Serialized training examples
+
+        Returns:
+            None
+
+        """
+        pre_submission = sub_api.create_submission(self.STUDENT_ITEM, self.ANSWER)
+        for example in examples:
+            training_api.get_training_example(pre_submission['uuid'], rubric, examples)
+            training_api.assess_training_example(pre_submission['uuid'], example['options_selected'])
--- a/settings/test.py
+++ b/settings/test.py
@@ -3,7 +3,7 @@ Test-specific Django settings.
 """

 # Inherit from base settings
-from .base import *
+from .base import *     # pylint:disable=W0614,W0401

 TEST_APPS = (
    'openassessment',
@@ -44,3 +44,10 @@ EDX_ORA2["EVENT_LOGGER"] = "openassessment.workflow.test.events.fake_event_logge
 # We run Celery in "always eager" mode in the test suite,
 # which executes tasks synchronously instead of using the task queue.
 CELERY_ALWAYS_EAGER = True
+
+
+# Silence cache key warnings
+# https://docs.djangoproject.com/en/1.4/topics/cache/#cache-key-warnings
+import warnings
+from django.core.cache import CacheKeyWarning
+warnings.simplefilter("ignore", CacheKeyWarning)