First cut at serializing Rubrics

c83d5948 · David Ormsbee · d580f825 · c83d5948 · c83d5948 · c83d5948
Commit c83d5948 authored Feb 13, 2014 by David Ormsbee
8 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,6 @@ nosetests.xml
 # some mac thing
 .DS_Store
+# PyCharm
+.idea
--- a/apps/openassessment/peer/api.py
+++ b/apps/openassessment/peer/api.py
@@ -12,6 +12,9 @@ import math
 from openassessment.peer.models import PeerEvaluation
 from openassessment.peer.serializers import PeerAssessmentSerializer
+from openassessment.peer.serializers import (
+    PeerEvaluationSerializer, content_hash_for_rubric_dict
+)
 from submissions import api as submission_api
 from submissions.models import Submission, StudentItem, Score
 from submissions.serializers import SubmissionSerializer, StudentItemSerializer
@@ -70,6 +73,7 @@ def create_assessment(
        required_assessments_for_student,
        required_assessments_for_submission,
        assessment_dict,
+        rubric_dict,
        scored_at=None):
    """Creates an assessment on the given submission.

--- a/apps/openassessment/peer/models.py
+++ b/apps/openassessment/peer/models.py
 """
-This would hold models related to the peer response workflow. There's going to
+These Models have to capture not only the state of evaluations made for certain
-be a lot here, like rubrics and such.
+submissions, but also the state of the specific rubrics at the time those
+evaluations were made. This means we have a number of little models, and that
+much of this data is immutable once created, so that we don't lose historical
+information. This also means that if you change the Rubric in a problem and
+this system is seeing that new Rubric for the first time, we're going to be
+writing a whole little tree of objects into the database. Fortunately, we only
+need to write this when we see a changed problem (rare). Performance concerns
+when reading this data is mitigated by the fact that it's easy to cache the
+entire tree of objects (since everything is immutable).
 """
+from hashlib import sha1
+import json
 from django.db import models
 from django.utils.timezone import now
@@ -31,3 +41,71 @@ class PeerEvaluation(models.Model):
    class Meta:
        ordering = ["-scored_at"]
+class Rubric(models.Model):
+    """
+    A Rubric
+    """
+    # SHA1 hash
+    content_hash = models.CharField(max_length=40)
+    # This is actually the prompt for the whole question, which may be a
+    # complex, nested XML structure.
+    prompt = models.TextField(max_length=10000)
+    def points_possible(self):
+        return sum(crit.points_possible() for crit in self.criteria.all())
+class Criterion(models.Model):
+    # All Rubrics have at least one Criterion
+    rubric = models.ForeignKey(Rubric, related_name="criteria")
+    # 0-based order in the Rubric
+    order_num = models.PositiveIntegerField()
+    # What are we asking the reviewer to evaluate in this Criterion?
+    prompt = models.TextField(max_length=10000)
+    class Meta:
+        ordering = ["rubric", "order_num"]
+    def points_possible(self):
+        return max(option.points for option in self.options.all())
+class CriterionOption(models.Model):
+    # All Criteria must have at least one CriterionOption.
+    criterion = models.ForeignKey(Criterion, related_name="options")
+    # 0-based order in Criterion
+    order_num = models.PositiveIntegerField()
+    # How many points this option is worth. 0 is allowed.
+    points = models.PositiveIntegerField()
+    # Short name of the option. This is visible to the user.
+    # Examples: "Excellent", "Good", "Fair", "Poor"
+    name = models.CharField(max_length=100)
+    # Longer text describing this option and why you should choose it.
+    # Example: "The response makes 3-5 Monty Python references and at least one
+    #           original Star Wars trilogy reference. Do not select this option
+    #           if the author made any references to the second trilogy."
+    explanation = models.TextField(max_length=10000, blank=True)
+    class Meta:
+        ordering = ["criterion", "order_num"]
+    def __repr__(self):
+        return (
+            "CriterionOption(order_num={0.order_num}, points={0.points}, "
+            "name={0.name!r}, explanation={0.explanation!r})"
+        ).format(self)
+    def __unicode__(self):
+        return repr(self)
--- a/apps/openassessment/peer/serializers.py
+++ b/apps/openassessment/peer/serializers.py
@@ -2,8 +2,14 @@
 Serializers are created to ensure models do not have to be accessed outside the
 scope of the Tim APIs.
 """
+from copy import deepcopy
+from hashlib import sha1
+import json
 from rest_framework import serializers
-from openassessment.peer.models import PeerEvaluation
+from openassessment.peer.models import (
+    Criterion, CriterionOption, PeerEvaluation, Rubric
+)
 class PeerAssessmentSerializer(serializers.ModelSerializer):
@@ -18,3 +24,62 @@ class PeerAssessmentSerializer(serializers.ModelSerializer):
            'score_type',
            'feedback',
        )
+class CriterionOptionSerializer(serializers.ModelSerializer):
+    class Meta:
+        model = CriterionOption
+        fields = ('order_num', 'points', 'name', 'explanation')
+class CriterionSerializer(serializers.ModelSerializer):
+    options = CriterionOptionSerializer(many=True)
+    class Meta:
+        model = Criterion
+        fields = ('order_num', 'prompt', 'options')
+class RubricSerializer(serializers.ModelSerializer):
+    criteria = CriterionSerializer(many=True)
+    class Meta:
+        model = Rubric
+        fields = ('id', 'content_hash', 'prompt', 'criteria')
+def content_hash_for_rubric_dict(rubric_dict):
+    """
+    It's passing in the results from a RubricSerializer, so we just have to get
+    rid of the content_hash.
+    """
+    rubric_dict = deepcopy(rubric_dict)
+    # Neither "id" nor "content_hash" would count towards calculating the
+    # content_hash.
+    rubric_dict.pop("id", None)
+    rubric_dict.pop("content_hash", None)
+    canonical_form = json.dumps(rubric_dict, sort_keys=True)
+    return sha1(canonical_form).hexdigest()
+def rubric_id_for(rubric_dict):
+    """Given a rubric_dict, return the rubric ID we're going to submit against.
+    This will create the Rubric and its children if it does not exist already.
+    """
+    rubric_dict = deepcopy(rubric_dict)
+    # Calculate the hash based on the rubric content...
+    content_hash = content_hash_for_rubric_dict(rubric_dict)
+    try:
+        rubric = Rubric.objects.get(content_hash=content_hash)
+    except Rubric.DoesNotExist:
+        rubric_dict["content_hash"] = content_hash
+        rubric_serializer = RubricSerializer(data=rubric_dict)
+        if not rubric_serializer.is_valid():
+            raise ValueError("Some better Exception here")
+        rubric = rubric_serializer.save()
+    return rubric.id
--- a/apps/openassessment/peer/test/rubric_data/project_plan_rubric.json
+++ b/apps/openassessment/peer/test/rubric_data/project_plan_rubric.json
+{
+  "prompt": "Create a plan to deliver edx-tim!",
+  "criteria": [
+    {
+      "order_num": 0,
+      "prompt": "Is the deadline realistic?",
+      "options": [
+        {
+          "order_num": 0,
+          "points": 0,
+          "name": "No",
+          "explanation": ""
+        },
+        {
+          "order_num": 1,
+          "points": 2,
+          "name": "Maybe",
+          "explanation": ""
+        },
+        {
+          "order_num": 2,
+          "points": 4,
+          "name": "Yes",
+          "explanation": ""
+        }
+      ]
+    },
+    {
+      "order_num": 1,
+      "prompt": "Describe the architecture.",
+      "options": [
+        {
+          "order_num": 0,
+          "points": 0,
+          "name": "Crazy",
+          "explanation": ""
+        },
+        {
+          "order_num": 1,
+          "points": 1,
+          "name": "Plausible",
+          "explanation": ""
+        },
+        {
+          "order_num": 2,
+          "points": 2,
+          "name": "Solid",
+          "explanation": ""
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
--- a/apps/openassessment/peer/test/test_models.py
+++ b/apps/openassessment/peer/test/test_models.py
+# -*- coding: utf-8 -*-
+from itertools import izip, permutations, product, tee
+import json
+from django.test import TestCase
+from openassessment.peer.models import Criterion, CriterionOption, Rubric
+from openassessment.peer.serializers import RubricSerializer
+#class TestHashing(TestCase):
+#
+#    def test_option_hashing_unicode_vs_bytes(self):
+#        unicode_option = CriterionOption(
+#            order_num=0, points=1, name="Bad", explanation=u"Can't understand it."
+#        )
+#        bytes_option = CriterionOption(
+#            order_num=0, points=1, name=u"Bad", explanation=u"Can't understand it."
+#        )
+#        self.assertEqual(unicode_option.summary_hash(), bytes_option.summary_hash())
+#
+#    def test_non_latin_chars_hash(self):
+#        # We're just making sure these don't throw exceptions
+#        option = CriterionOption(
+#            order_num=0, points=1, name=u"Áẃéśőḿé", explanation=u"ｷuﾚﾚ oｷ wﾉ刀"
+#        )
+#        option.summary_hash()
+#
+#        criterion = Criterion(
+#            order_num=0, prompt=u"Ẅäṡ ïẗ äẅëṡöṁë?"
+#        )
+#        criterion.summary_hash()
+#
+#    def test_minor_option_differences(self):
+#        combinations = {
+#            'order_num': [0, 1],
+#            'points': [0, 2],
+#            'name': ["Sad", "sad", "Happy!"],
+#            'explanation': ["Ewok death scene", "Ewoks dancing", "Ewoks Dancing"]
+#        }
+#        for a, b in model_combinations_by_pairs(CriterionOption, combinations):
+#            self.assertNotEqual(
+#                a.summary_hash(),
+#                b.summary_hash(),
+#                "{} and {} hash the same ({}), but shouldn't".format(
+#                    a, b, a.summary_hash()
+#                )
+#            )
+#
+#    def test_child_objects_affect_hash(self):
+#        pass
+#class TestCriterion(TestCase):
+#
+#    @classmethod
+#    def setUpClass(cls):
+#        # This only runs once because Rubrics should never mutate. A great deal
+#        # of our design depends on this assumption.
+#        cls.project_rubric = cls._create_project_rubric()
+#
+#    @classmethod
+#    def tearDownClass(cls):
+#        cls.project_rubric.delete()
+#
+#    @classmethod
+#    def _create_project_rubric(cls):
+#        rubric = Rubric.objects.create(
+#            prompt="Create a plan to deliver edx-tim!"
+#        )
+#
+#        criteria = [
+#            # Intentionally created out of order
+#            Criterion(
+#                rubric=rubric, order_num=1, prompt=u"Describe the architecture."
+#            ),
+#            Criterion(
+#                rubric=rubric, order_num=0, prompt=u"Is the deadline realistic?"
+#            ),
+#        ]
+#        rubric.criteria.add(*criteria)
+#
+#        arch_options = [
+#            CriterionOption(
+#                criterion=criteria[0], order_num=0, points=0, name="Crazy"
+#            ),
+#            CriterionOption(
+#                criterion=criteria[0], order_num=1, points=1, name="Plausible"
+#            ),
+#            CriterionOption(
+#                criterion=criteria[0], order_num=2, points=2, name="Solid"
+#            ),
+#        ]
+#        deadline_options = [
+#            CriterionOption(
+#                criterion=criteria[1], order_num=0, points=0, name="No"
+#            ),
+#            CriterionOption(
+#                criterion=criteria[1], order_num=1, points=2, name="Maybe"
+#            ),
+#            CriterionOption(
+#                criterion=criteria[1], order_num=2, points=4, name="Yes"
+#            ),
+#        ]
+#        # We're assigning it this way, but because of order_num, it should
+#        # spit back out with the deadline criterion first.
+#        criteria[0].options.add(*arch_options)
+#        criteria[1].options.add(*deadline_options)
+#
+#        return rubric
+#
+#    def test_points_possible_calculation(self):
+#        rubric = self.project_rubric
+#        deadline_crit, arch_crit = rubric.criteria.all()
+#
+#        print json.dumps(RubricSerializer(rubric).data, indent=2)
+#        1/0
+#
+#        self.assertEqual(deadline_crit.points_possible(), 4)
+#        self.assertEqual(arch_crit.points_possible(), 2)
+#        self.assertEqual(rubric.points_possible(), 5)
+#
+#    def test_hashing(self):
+#        pass
+def model_combinations_by_pairs(model_cls, template_dict):
+    return (
+        (model_cls(**a), model_cls(**b))
+        for a, b in dict_product_by_pairs(template_dict)
+    )
+def dict_product_by_pairs(template_dict, all_permutations=False):
+    """Returns iterable of (dict_a, dict_b) permutations based on template_dict.
+    The idea of this method is that we often want to test things that are just a
+    little different. For instance, if I want to test that a variation in any
+    field will cause the hash to be generated differently, it's useful to return
+    every possible combination of a set of field arguments. So it's basically
+    what you would get if you were looking at every pair of entries that's
+    generated by the innermost section of a giant nested for-loop.
+    Args:
+        template_dict (dict): Keys must be the keys you want in each generated
+            dictionary. Values should be lists that will be cycled through when
+            generating dicts in the output.
+        all_permutations (bool): If True, will return every possible
+            combination of produced pairs (n^2). False by default, so it will
+            only return adjacent pairs.
+    """
+    def _pairwise(seq):
+        """From s -> (s0,s1), (s1,s2), (s2, s3), ..."""
+        a, b = tee(seq)
+        next(b, None)
+        return izip(a, b)
+    all_dicts = dict_product(template_dict)
+    if all_permutations:
+        return permutations(all_dicts, 2)
+    else:
+        return _pairwise(all_dicts)
+def dict_product(template_dict):
+    """
+    """
+    all_value_combinations = product(*template_dict.values())
+    keys = template_dict.keys()
+    return (dict(zip(keys, values)) for values in all_value_combinations)
--- a/apps/openassessment/peer/test/test_serializers.py
+++ b/apps/openassessment/peer/test/test_serializers.py
+import json
+import os.path
+from ddt import ddt, file_data
+from django.test import TestCase
+from openassessment.peer.models import Criterion, CriterionOption, Rubric
+from openassessment.peer.serializers import rubric_id_for
+def json_data(filename):
+    curr_dir = os.path.dirname(__file__)
+    with open(os.path.join(curr_dir, filename), "rb") as json_file:
+        return json.load(json_file)
+class TestPeerSerializers(TestCase):
+    def test_repeat_data(self):
+        rubric_data = json_data('rubric_data/project_plan_rubric.json')
+        rubric_id1 = rubric_id_for(rubric_data)
+        rubric_id2 = rubric_id_for(rubric_data)
+        self.assertEqual(rubric_id1, rubric_id2)
+        Rubric.objects.get(id=rubric_id1).delete()
+    def test_db_access(self):
+        rubric_data = json_data('rubric_data/project_plan_rubric.json')
+        with self.assertNumQueries(4):
+            rubric_id1 = rubric_id_for(rubric_data)
+        with self.assertNumQueries(1):
+            rubric_id2 = rubric_id_for(rubric_data)
+        Rubric.objects.get(id=rubric_id1).delete()
\ No newline at end of file
--- a/apps/submissions/models.py
+++ b/apps/submissions/models.py
@@ -37,7 +37,7 @@ class StudentItem(models.Model):
        ))
    def __unicode__(self):
-        return "({0.student_id}, {0.course_id}, {0.item_type}, {0.item_id})".format(self)
+        return u"({0.student_id}, {0.course_id}, {0.item_type}, {0.item_id})".format(self)
    class Meta:
        unique_together = (