Commit cfbd98bd by Will Daly

Merge pull request #402 from edx/will/TIM-624

Rubric structure hash
parents 4fdfcc02 afe39aa0
...@@ -18,7 +18,8 @@ class RubricAdmin(admin.ModelAdmin): ...@@ -18,7 +18,8 @@ class RubricAdmin(admin.ModelAdmin):
list_display_links = ('id', 'content_hash') list_display_links = ('id', 'content_hash')
search_fields = ('id', 'content_hash') search_fields = ('id', 'content_hash')
readonly_fields = ( readonly_fields = (
'id', 'content_hash', 'points_possible', 'criteria_summary', 'data' 'id', 'content_hash', 'structure_hash',
'points_possible', 'criteria_summary', 'data'
) )
def criteria_summary(self, rubric_obj): def criteria_summary(self, rubric_obj):
......
...@@ -57,9 +57,12 @@ class Rubric(models.Model): ...@@ -57,9 +57,12 @@ class Rubric(models.Model):
creating a new Rubric instead. This makes it easy to cache and do hash-based creating a new Rubric instead. This makes it easy to cache and do hash-based
lookups. lookups.
""" """
# SHA1 hash # SHA1 hash, including prompts and explanations
content_hash = models.CharField(max_length=40, unique=True, db_index=True) content_hash = models.CharField(max_length=40, unique=True, db_index=True)
# SHA1 hash of just the rubric structure (criteria / options / points)
structure_hash = models.CharField(max_length=40, db_index=True)
class Meta: class Meta:
app_label = "assessment" app_label = "assessment"
...@@ -91,6 +94,38 @@ class Rubric(models.Model): ...@@ -91,6 +94,38 @@ class Rubric(models.Model):
canonical_form = json.dumps(rubric_dict, sort_keys=True) canonical_form = json.dumps(rubric_dict, sort_keys=True)
return sha1(canonical_form).hexdigest() return sha1(canonical_form).hexdigest()
@staticmethod
def structure_hash_from_dict(rubric_dict):
"""
Generate a hash of the rubric that includes only structural information:
* Criteria names and order
* Option names / points / order number
We do NOT include prompt text or option explanations.
NOTE: currently, we use the criterion and option names as unique identifiers,
so we include them in the structure. In the future, we plan to assign
criteria/options unique IDs -- when we do that, we will need to update
this method and create a data migration for existing rubrics.
"""
structure = [
{
"criterion_name": criterion.get('name'),
"criterion_order": criterion.get('order_num'),
"options": [
{
"option_name": option.get('name'),
"option_points": option.get('points'),
"option_order": option.get('order_num')
}
for option in criterion.get('options', [])
]
}
for criterion in rubric_dict.get('criteria', [])
]
canonical_form = json.dumps(structure, sort_keys=True)
return sha1(canonical_form).hexdigest()
def options_ids(self, options_selected): def options_ids(self, options_selected):
"""Given a mapping of selected options, return the option IDs. """Given a mapping of selected options, return the option IDs.
......
...@@ -92,7 +92,7 @@ class RubricSerializer(NestedModelSerializer): ...@@ -92,7 +92,7 @@ class RubricSerializer(NestedModelSerializer):
class Meta: class Meta:
model = Rubric model = Rubric
fields = ('id', 'content_hash', 'criteria', 'points_possible') fields = ('id', 'content_hash', 'structure_hash', 'criteria', 'points_possible')
def validate_criteria(self, attrs, source): def validate_criteria(self, attrs, source):
"""Make sure we have at least one Criterion in the Rubric.""" """Make sure we have at least one Criterion in the Rubric."""
...@@ -283,6 +283,7 @@ def rubric_from_dict(rubric_dict): ...@@ -283,6 +283,7 @@ def rubric_from_dict(rubric_dict):
rubric = Rubric.objects.get(content_hash=content_hash) rubric = Rubric.objects.get(content_hash=content_hash)
except Rubric.DoesNotExist: except Rubric.DoesNotExist:
rubric_dict["content_hash"] = content_hash rubric_dict["content_hash"] = content_hash
rubric_dict["structure_hash"] = Rubric.structure_hash_from_dict(rubric_dict)
for crit_idx, criterion in enumerate(rubric_dict.get("criteria", {})): for crit_idx, criterion in enumerate(rubric_dict.get("criteria", {})):
if "order_num" not in criterion: if "order_num" not in criterion:
criterion["order_num"] = crit_idx criterion["order_num"] = crit_idx
......
...@@ -3,10 +3,12 @@ ...@@ -3,10 +3,12 @@
Tests for assessment models. Tests for assessment models.
""" """
import copy
from openassessment.test_utils import CacheResetTest from openassessment.test_utils import CacheResetTest
from openassessment.assessment.models import ( from openassessment.assessment.models import (
Rubric, Criterion, CriterionOption, InvalidOptionSelection Rubric, Criterion, CriterionOption, InvalidOptionSelection
) )
from openassessment.assessment.test.constants import RUBRIC
class TestRubricOptionIds(CacheResetTest): class TestRubricOptionIds(CacheResetTest):
...@@ -122,7 +124,7 @@ class TestRubricOptionIds(CacheResetTest): ...@@ -122,7 +124,7 @@ class TestRubricOptionIds(CacheResetTest):
def test_options_ids_points_caching(self): def test_options_ids_points_caching(self):
# First call: the dict is not cached # First call: the dict is not cached
with self.assertNumQueries(1): with self.assertNumQueries(1):
options_ids = self.rubric.options_ids_for_points({ self.rubric.options_ids_for_points({
'test criterion 0': 0, 'test criterion 0': 0,
'test criterion 1': 1, 'test criterion 1': 1,
'test criterion 2': 2, 'test criterion 2': 2,
...@@ -131,7 +133,7 @@ class TestRubricOptionIds(CacheResetTest): ...@@ -131,7 +133,7 @@ class TestRubricOptionIds(CacheResetTest):
# Second call: the dict is not cached # Second call: the dict is not cached
with self.assertNumQueries(0): with self.assertNumQueries(0):
options_ids = self.rubric.options_ids_for_points({ self.rubric.options_ids_for_points({
'test criterion 0': 1, 'test criterion 0': 1,
'test criterion 1': 2, 'test criterion 1': 2,
'test criterion 2': 1, 'test criterion 2': 1,
...@@ -163,3 +165,66 @@ class TestRubricOptionIds(CacheResetTest): ...@@ -163,3 +165,66 @@ class TestRubricOptionIds(CacheResetTest):
'test criterion 2': 1, 'test criterion 2': 1,
'test criterion 3': 0 'test criterion 3': 0
}) })
def test_structure_hash_identical(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
# Same structure, but different text should have the same structure hash
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['prompt'] = 'altered!'
for criterion in altered_rubric['criteria']:
criterion['prompt'] = 'altered!'
for option in criterion['options']:
option['explanation'] = 'altered!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
# Expect that the two hashes are the same
self.assertEqual(first_hash, second_hash)
def test_structure_hash_extra_keys(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
# Same structure, add some extra keys
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['extra'] = 'extra!'
altered_rubric['criteria'][0]['extra'] = 'extra!'
altered_rubric['criteria'][0]['options'][0]['extra'] = 'extra!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
# Expect that the two hashes are the same
self.assertEqual(first_hash, second_hash)
def test_structure_hash_criterion_order_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['order_num'] = 5
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
def test_structure_hash_criterion_name_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['name'] = 'altered!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
def test_structure_hash_option_order_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['options'][0]['order_num'] = 5
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
def test_structure_hash_option_name_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['options'][0]['name'] = 'altered!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
def test_structure_hash_option_points_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['options'][0]['points'] = 'altered!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
"training_rubric": { "training_rubric": {
"id": 2, "id": 2,
"content_hash": "de2bb2b7e2c6e3df014e53b8c65f37d511cc4344", "content_hash": "de2bb2b7e2c6e3df014e53b8c65f37d511cc4344",
"structure_hash": "a513b20d93487d6d80e31e1d974bf22519332567",
"criteria": [ "criteria": [
{ {
"order_num": 0, "order_num": 0,
...@@ -67,4 +68,4 @@ ...@@ -67,4 +68,4 @@
} }
} }
} }
} }
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment