Commit bf6d6929 by Will Daly

Merge pull request #433 from edx/will/ai-cache-classifiers

Cache AI classifier data
parents 5f687ffc ed32822f
...@@ -6,7 +6,7 @@ import json ...@@ -6,7 +6,7 @@ import json
import logging import logging
from django.conf import settings from django.conf import settings
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
from django.core.cache import cache from django.core.cache import cache, get_cache
from django.db import models, transaction, DatabaseError from django.db import models, transaction, DatabaseError
from django.utils.timezone import now from django.utils.timezone import now
from django_extensions.db.fields import UUIDField from django_extensions.db.fields import UUIDField
...@@ -22,6 +22,17 @@ AI_ASSESSMENT_TYPE = "AI" ...@@ -22,6 +22,17 @@ AI_ASSESSMENT_TYPE = "AI"
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Use an in-memory cache to hold classifier data, but allow settings to override this.
# The classifier data will generally be larger than memcached's default max size
CLASSIFIERS_CACHE = getattr(
settings, 'ORA2_CLASSIFIERS_CACHE',
get_cache(
'django.core.cache.backends.locmem.LocMemCache',
LOCATION='openassessment.ai.classifiers_dict'
)
)
class IncompleteClassifierSet(Exception): class IncompleteClassifierSet(Exception):
""" """
The classifier set is missing a classifier for a criterion in the rubric. The classifier set is missing a classifier for a criterion in the rubric.
...@@ -252,6 +263,9 @@ class AIClassifierSet(models.Model): ...@@ -252,6 +263,9 @@ class AIClassifierSet(models.Model):
# If we get to this point, no classifiers exist with this rubric and algorithm. # If we get to this point, no classifiers exist with this rubric and algorithm.
return None return None
# Number of seconds to store downloaded classifiers in the in-memory cache.
DEFAULT_CLASSIFIER_CACHE_TIMEOUT = 300
@property @property
def classifiers_dict(self): def classifiers_dict(self):
""" """
...@@ -263,14 +277,26 @@ class AIClassifierSet(models.Model): ...@@ -263,14 +277,26 @@ class AIClassifierSet(models.Model):
If there are no classifiers in the set, returns None If there are no classifiers in the set, returns None
""" """
classifiers = list(self.classifiers.all()) # pylint: disable=E1101 # First check the in-memory cache
if len(classifiers) == 0: # We use an in-memory cache because the classifier data will most often
return None # be several megabytes, which exceeds the default memcached size limit.
else: # If we find it, we can avoid calls to the database, S3, and json.
return { cache_key = unicode(self.id)
classifiers_dict = CLASSIFIERS_CACHE.get(cache_key)
# If we can't find the classifiers dict in the cache,
# we need to look up the classifiers in the database,
# then download the classifier data.
if classifiers_dict is None:
classifiers = list(self.classifiers.all()) # pylint: disable=E1101
classifiers_dict = {
classifier.criterion.name: classifier.download_classifier_data() classifier.criterion.name: classifier.download_classifier_data()
for classifier in classifiers for classifier in classifiers
} }
timeout = getattr(settings, 'ORA2_CLASSIFIER_CACHE_TIMEOUT', self.DEFAULT_CLASSIFIER_CACHE_TIMEOUT)
CLASSIFIERS_CACHE.set(cache_key, classifiers_dict, timeout)
return classifiers_dict if classifiers_dict else None
# Directory in which classifiers will be stored # Directory in which classifiers will be stored
......
...@@ -12,16 +12,18 @@ from openassessment.assessment.serializers import rubric_from_dict ...@@ -12,16 +12,18 @@ from openassessment.assessment.serializers import rubric_from_dict
from .constants import RUBRIC from .constants import RUBRIC
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
class AIClassifierTest(CacheResetTest): class AIClassifierTest(CacheResetTest):
""" """
Tests for the AIClassifier model. Tests for the AIClassifier model.
""" """
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
def test_upload_to_path_default(self): def test_upload_to_path_default(self):
# No path prefix provided in the settings # No path prefix provided in the settings
...@@ -46,11 +48,35 @@ class AIClassifierTest(CacheResetTest): ...@@ -46,11 +48,35 @@ class AIClassifierTest(CacheResetTest):
""" """
rubric = rubric_from_dict(RUBRIC) rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set( classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, rubric, "test_algorithm", self.COURSE_ID, self.ITEM_ID CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
) )
return AIClassifier.objects.filter(classifier_set=classifier_set)[0] return AIClassifier.objects.filter(classifier_set=classifier_set)[0]
class AIClassifierSetTest(CacheResetTest):
"""
Tests for the AIClassifierSet model.
"""
def setUp(self):
rubric = rubric_from_dict(RUBRIC)
self.classifier_set = AIClassifierSet.create_classifier_set(
CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
)
def test_cache_downloads(self):
# Retrieve the classifier dict twice, which should hit the caching code.
# We can check that we're using the cache by asserting that
# the number of database queries decreases.
with self.assertNumQueries(3):
first = self.classifier_set.classifiers_dict
with self.assertNumQueries(0):
second = self.classifier_set.classifiers_dict
# Verify that we got the same value both times
self.assertEqual(first, second)
class AIGradingWorkflowTest(CacheResetTest): class AIGradingWorkflowTest(CacheResetTest):
""" """
Tests for the AIGradingWorkflow model. Tests for the AIGradingWorkflow model.
......
""" """
Test utilities Test utilities
""" """
from django.core.cache import cache from django.core.cache import cache, get_cache
from django.test import TestCase from django.test import TestCase
...@@ -11,8 +11,18 @@ class CacheResetTest(TestCase): ...@@ -11,8 +11,18 @@ class CacheResetTest(TestCase):
""" """
def setUp(self): def setUp(self):
super(CacheResetTest, self).setUp() super(CacheResetTest, self).setUp()
cache.clear() self._clear_all_caches()
def tearDown(self): def tearDown(self):
super(CacheResetTest, self).tearDown() super(CacheResetTest, self).tearDown()
self._clear_all_caches()
def _clear_all_caches(self):
"""
Clear the default cache and any custom caches.
"""
cache.clear() cache.clear()
get_cache(
'django.core.cache.backends.locmem.LocMemCache',
LOCATION='openassessment.ai.classifiers_dict'
).clear()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment