Commit bf6d6929 by Will Daly

Merge pull request #433 from edx/will/ai-cache-classifiers

Cache AI classifier data
parents 5f687ffc ed32822f
......@@ -6,7 +6,7 @@ import json
import logging
from django.conf import settings
from django.core.files.base import ContentFile
from django.core.cache import cache
from django.core.cache import cache, get_cache
from django.db import models, transaction, DatabaseError
from django.utils.timezone import now
from django_extensions.db.fields import UUIDField
......@@ -22,6 +22,17 @@ AI_ASSESSMENT_TYPE = "AI"
logger = logging.getLogger(__name__)
# Use an in-memory cache to hold classifier data, but allow settings to override this.
# The classifier data will generally be larger than memcached's default max size
class IncompleteClassifierSet(Exception):
The classifier set is missing a classifier for a criterion in the rubric.
......@@ -252,6 +263,9 @@ class AIClassifierSet(models.Model):
# If we get to this point, no classifiers exist with this rubric and algorithm.
return None
# Number of seconds to store downloaded classifiers in the in-memory cache.
def classifiers_dict(self):
......@@ -263,14 +277,26 @@ class AIClassifierSet(models.Model):
If there are no classifiers in the set, returns None
classifiers = list(self.classifiers.all()) # pylint: disable=E1101
if len(classifiers) == 0:
return None
return {
# First check the in-memory cache
# We use an in-memory cache because the classifier data will most often
# be several megabytes, which exceeds the default memcached size limit.
# If we find it, we can avoid calls to the database, S3, and json.
cache_key = unicode(
classifiers_dict = CLASSIFIERS_CACHE.get(cache_key)
# If we can't find the classifiers dict in the cache,
# we need to look up the classifiers in the database,
# then download the classifier data.
if classifiers_dict is None:
classifiers = list(self.classifiers.all()) # pylint: disable=E1101
classifiers_dict = { classifier.download_classifier_data()
for classifier in classifiers
CLASSIFIERS_CACHE.set(cache_key, classifiers_dict, timeout)
return classifiers_dict if classifiers_dict else None
# Directory in which classifiers will be stored
......@@ -12,16 +12,18 @@ from openassessment.assessment.serializers import rubric_from_dict
from .constants import RUBRIC
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
class AIClassifierTest(CacheResetTest):
Tests for the AIClassifier model.
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
def test_upload_to_path_default(self):
# No path prefix provided in the settings
......@@ -46,11 +48,35 @@ class AIClassifierTest(CacheResetTest):
rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, rubric, "test_algorithm", self.COURSE_ID, self.ITEM_ID
CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
return AIClassifier.objects.filter(classifier_set=classifier_set)[0]
class AIClassifierSetTest(CacheResetTest):
Tests for the AIClassifierSet model.
def setUp(self):
rubric = rubric_from_dict(RUBRIC)
self.classifier_set = AIClassifierSet.create_classifier_set(
CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
def test_cache_downloads(self):
# Retrieve the classifier dict twice, which should hit the caching code.
# We can check that we're using the cache by asserting that
# the number of database queries decreases.
with self.assertNumQueries(3):
first = self.classifier_set.classifiers_dict
with self.assertNumQueries(0):
second = self.classifier_set.classifiers_dict
# Verify that we got the same value both times
self.assertEqual(first, second)
class AIGradingWorkflowTest(CacheResetTest):
Tests for the AIGradingWorkflow model.
Test utilities
from django.core.cache import cache
from django.core.cache import cache, get_cache
from django.test import TestCase
......@@ -11,8 +11,18 @@ class CacheResetTest(TestCase):
def setUp(self):
super(CacheResetTest, self).setUp()
def tearDown(self):
super(CacheResetTest, self).tearDown()
def _clear_all_caches(self):
Clear the default cache and any custom caches.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment