Commit cfbd98bd by Will Daly

Merge pull request #402 from edx/will/TIM-624

Rubric structure hash
parents 4fdfcc02 afe39aa0
......@@ -18,7 +18,8 @@ class RubricAdmin(admin.ModelAdmin):
list_display_links = ('id', 'content_hash')
search_fields = ('id', 'content_hash')
readonly_fields = (
'id', 'content_hash', 'points_possible', 'criteria_summary', 'data'
'id', 'content_hash', 'structure_hash',
'points_possible', 'criteria_summary', 'data'
)
def criteria_summary(self, rubric_obj):
......
......@@ -57,9 +57,12 @@ class Rubric(models.Model):
creating a new Rubric instead. This makes it easy to cache and do hash-based
lookups.
"""
# SHA1 hash
# SHA1 hash, including prompts and explanations
content_hash = models.CharField(max_length=40, unique=True, db_index=True)
# SHA1 hash of just the rubric structure (criteria / options / points)
structure_hash = models.CharField(max_length=40, db_index=True)
class Meta:
app_label = "assessment"
......@@ -91,6 +94,38 @@ class Rubric(models.Model):
canonical_form = json.dumps(rubric_dict, sort_keys=True)
return sha1(canonical_form).hexdigest()
@staticmethod
def structure_hash_from_dict(rubric_dict):
"""
Generate a hash of the rubric that includes only structural information:
* Criteria names and order
* Option names / points / order number
We do NOT include prompt text or option explanations.
NOTE: currently, we use the criterion and option names as unique identifiers,
so we include them in the structure. In the future, we plan to assign
criteria/options unique IDs -- when we do that, we will need to update
this method and create a data migration for existing rubrics.
"""
structure = [
{
"criterion_name": criterion.get('name'),
"criterion_order": criterion.get('order_num'),
"options": [
{
"option_name": option.get('name'),
"option_points": option.get('points'),
"option_order": option.get('order_num')
}
for option in criterion.get('options', [])
]
}
for criterion in rubric_dict.get('criteria', [])
]
canonical_form = json.dumps(structure, sort_keys=True)
return sha1(canonical_form).hexdigest()
def options_ids(self, options_selected):
"""Given a mapping of selected options, return the option IDs.
......
......@@ -92,7 +92,7 @@ class RubricSerializer(NestedModelSerializer):
class Meta:
model = Rubric
fields = ('id', 'content_hash', 'criteria', 'points_possible')
fields = ('id', 'content_hash', 'structure_hash', 'criteria', 'points_possible')
def validate_criteria(self, attrs, source):
"""Make sure we have at least one Criterion in the Rubric."""
......@@ -283,6 +283,7 @@ def rubric_from_dict(rubric_dict):
rubric = Rubric.objects.get(content_hash=content_hash)
except Rubric.DoesNotExist:
rubric_dict["content_hash"] = content_hash
rubric_dict["structure_hash"] = Rubric.structure_hash_from_dict(rubric_dict)
for crit_idx, criterion in enumerate(rubric_dict.get("criteria", {})):
if "order_num" not in criterion:
criterion["order_num"] = crit_idx
......
......@@ -3,10 +3,12 @@
Tests for assessment models.
"""
import copy
from openassessment.test_utils import CacheResetTest
from openassessment.assessment.models import (
Rubric, Criterion, CriterionOption, InvalidOptionSelection
)
from openassessment.assessment.test.constants import RUBRIC
class TestRubricOptionIds(CacheResetTest):
......@@ -122,7 +124,7 @@ class TestRubricOptionIds(CacheResetTest):
def test_options_ids_points_caching(self):
# First call: the dict is not cached
with self.assertNumQueries(1):
options_ids = self.rubric.options_ids_for_points({
self.rubric.options_ids_for_points({
'test criterion 0': 0,
'test criterion 1': 1,
'test criterion 2': 2,
......@@ -131,7 +133,7 @@ class TestRubricOptionIds(CacheResetTest):
# Second call: the dict is not cached
with self.assertNumQueries(0):
options_ids = self.rubric.options_ids_for_points({
self.rubric.options_ids_for_points({
'test criterion 0': 1,
'test criterion 1': 2,
'test criterion 2': 1,
......@@ -163,3 +165,66 @@ class TestRubricOptionIds(CacheResetTest):
'test criterion 2': 1,
'test criterion 3': 0
})
def test_structure_hash_identical(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
# Same structure, but different text should have the same structure hash
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['prompt'] = 'altered!'
for criterion in altered_rubric['criteria']:
criterion['prompt'] = 'altered!'
for option in criterion['options']:
option['explanation'] = 'altered!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
# Expect that the two hashes are the same
self.assertEqual(first_hash, second_hash)
def test_structure_hash_extra_keys(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
# Same structure, add some extra keys
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['extra'] = 'extra!'
altered_rubric['criteria'][0]['extra'] = 'extra!'
altered_rubric['criteria'][0]['options'][0]['extra'] = 'extra!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
# Expect that the two hashes are the same
self.assertEqual(first_hash, second_hash)
def test_structure_hash_criterion_order_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['order_num'] = 5
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
def test_structure_hash_criterion_name_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['name'] = 'altered!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
def test_structure_hash_option_order_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['options'][0]['order_num'] = 5
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
def test_structure_hash_option_name_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['options'][0]['name'] = 'altered!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
def test_structure_hash_option_points_changed(self):
first_hash = Rubric.structure_hash_from_dict(RUBRIC)
altered_rubric = copy.deepcopy(RUBRIC)
altered_rubric['criteria'][0]['options'][0]['points'] = 'altered!'
second_hash = Rubric.structure_hash_from_dict(altered_rubric)
self.assertNotEqual(first_hash, second_hash)
......@@ -9,6 +9,7 @@
"training_rubric": {
"id": 2,
"content_hash": "de2bb2b7e2c6e3df014e53b8c65f37d511cc4344",
"structure_hash": "a513b20d93487d6d80e31e1d974bf22519332567",
"criteria": [
{
"order_num": 0,
......@@ -67,4 +68,4 @@
}
}
}
}
\ No newline at end of file
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment