Commit ed32822f by Will Daly

Cache classifier data

parent 04b682e5
......@@ -6,7 +6,7 @@ import json
import logging
from django.conf import settings
from django.core.files.base import ContentFile
from django.core.cache import cache
from django.core.cache import cache, get_cache
from django.db import models, transaction, DatabaseError
from django.utils.timezone import now
from django_extensions.db.fields import UUIDField
......@@ -22,6 +22,17 @@ AI_ASSESSMENT_TYPE = "AI"
logger = logging.getLogger(__name__)
# Use an in-memory cache to hold classifier data, but allow settings to override this.
# The classifier data will generally be larger than memcached's default max size
CLASSIFIERS_CACHE = getattr(
settings, 'ORA2_CLASSIFIERS_CACHE',
get_cache(
'django.core.cache.backends.locmem.LocMemCache',
LOCATION='openassessment.ai.classifiers_dict'
)
)
class IncompleteClassifierSet(Exception):
"""
The classifier set is missing a classifier for a criterion in the rubric.
......@@ -252,6 +263,9 @@ class AIClassifierSet(models.Model):
# If we get to this point, no classifiers exist with this rubric and algorithm.
return None
# Number of seconds to store downloaded classifiers in the in-memory cache.
DEFAULT_CLASSIFIER_CACHE_TIMEOUT = 300
@property
def classifiers_dict(self):
"""
......@@ -263,14 +277,26 @@ class AIClassifierSet(models.Model):
If there are no classifiers in the set, returns None
"""
classifiers = list(self.classifiers.all()) # pylint: disable=E1101
if len(classifiers) == 0:
return None
else:
return {
# First check the in-memory cache
# We use an in-memory cache because the classifier data will most often
# be several megabytes, which exceeds the default memcached size limit.
# If we find it, we can avoid calls to the database, S3, and json.
cache_key = unicode(self.id)
classifiers_dict = CLASSIFIERS_CACHE.get(cache_key)
# If we can't find the classifiers dict in the cache,
# we need to look up the classifiers in the database,
# then download the classifier data.
if classifiers_dict is None:
classifiers = list(self.classifiers.all()) # pylint: disable=E1101
classifiers_dict = {
classifier.criterion.name: classifier.download_classifier_data()
for classifier in classifiers
}
timeout = getattr(settings, 'ORA2_CLASSIFIER_CACHE_TIMEOUT', self.DEFAULT_CLASSIFIER_CACHE_TIMEOUT)
CLASSIFIERS_CACHE.set(cache_key, classifiers_dict, timeout)
return classifiers_dict if classifiers_dict else None
# Directory in which classifiers will be stored
......
......@@ -12,16 +12,18 @@ from openassessment.assessment.serializers import rubric_from_dict
from .constants import RUBRIC
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
class AIClassifierTest(CacheResetTest):
"""
Tests for the AIClassifier model.
"""
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
def test_upload_to_path_default(self):
# No path prefix provided in the settings
......@@ -46,11 +48,35 @@ class AIClassifierTest(CacheResetTest):
"""
rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, rubric, "test_algorithm", self.COURSE_ID, self.ITEM_ID
CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
)
return AIClassifier.objects.filter(classifier_set=classifier_set)[0]
class AIClassifierSetTest(CacheResetTest):
"""
Tests for the AIClassifierSet model.
"""
def setUp(self):
rubric = rubric_from_dict(RUBRIC)
self.classifier_set = AIClassifierSet.create_classifier_set(
CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
)
def test_cache_downloads(self):
# Retrieve the classifier dict twice, which should hit the caching code.
# We can check that we're using the cache by asserting that
# the number of database queries decreases.
with self.assertNumQueries(3):
first = self.classifier_set.classifiers_dict
with self.assertNumQueries(0):
second = self.classifier_set.classifiers_dict
# Verify that we got the same value both times
self.assertEqual(first, second)
class AIGradingWorkflowTest(CacheResetTest):
"""
Tests for the AIGradingWorkflow model.
......
"""
Test utilities
"""
from django.core.cache import cache
from django.core.cache import cache, get_cache
from django.test import TestCase
......@@ -11,8 +11,18 @@ class CacheResetTest(TestCase):
"""
def setUp(self):
super(CacheResetTest, self).setUp()
cache.clear()
self._clear_all_caches()
def tearDown(self):
super(CacheResetTest, self).tearDown()
self._clear_all_caches()
def _clear_all_caches(self):
"""
Clear the default cache and any custom caches.
"""
cache.clear()
get_cache(
'django.core.cache.backends.locmem.LocMemCache',
LOCATION='openassessment.ai.classifiers_dict'
).clear()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment