Commit b6cdc2a5 by Will Daly

Cache classifier data

parent 231f4c7c
...@@ -4,17 +4,15 @@ Database models for AI assessment. ...@@ -4,17 +4,15 @@ Database models for AI assessment.
from uuid import uuid4 from uuid import uuid4
import json import json
import logging import logging
import itertools
from django.conf import settings from django.conf import settings
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
from django.core.cache import get_cache
from django.db import models, transaction, DatabaseError from django.db import models, transaction, DatabaseError
from django.utils.timezone import now from django.utils.timezone import now
from django.core.exceptions import ObjectDoesNotExist
from django_extensions.db.fields import UUIDField from django_extensions.db.fields import UUIDField
from dogapi import dog_stats_api from dogapi import dog_stats_api
from submissions import api as sub_api from submissions import api as sub_api
from openassessment.assessment.serializers import rubric_from_dict from openassessment.assessment.serializers import rubric_from_dict
from openassessment.assessment.errors.ai import AIReschedulingInternalError
from .base import Rubric, Criterion, Assessment, AssessmentPart from .base import Rubric, Criterion, Assessment, AssessmentPart
from .training import TrainingExample from .training import TrainingExample
...@@ -176,6 +174,9 @@ class AIClassifierSet(models.Model): ...@@ -176,6 +174,9 @@ class AIClassifierSet(models.Model):
return classifier_set return classifier_set
# Number of seconds to store downloaded classifiers in the in-memory cache.
DEFAULT_CLASSIFIER_CACHE_TIMEOUT = 300
@property @property
def classifiers_dict(self): def classifiers_dict(self):
""" """
...@@ -187,14 +188,30 @@ class AIClassifierSet(models.Model): ...@@ -187,14 +188,30 @@ class AIClassifierSet(models.Model):
If there are no classifiers in the set, returns None If there are no classifiers in the set, returns None
""" """
classifiers = list(self.classifiers.all()) # pylint: disable=E1101 # First check the in-memory cache
if len(classifiers) == 0: # We use an in-memory cache because the classifier data will most often
return None # be several megabytes, which exceeds the default memcached size limit.
else: # If we find it, we can avoid calls to the database, S3, and json.
return { cache_key = unicode(self.id)
cache = get_cache(
'django.core.cache.backends.locmem.LocMemCache',
LOCATION='openassessment.ai.classifiers_dict'
)
classifiers_dict = cache.get(cache_key)
# If we can't find the classifiers dict in the cache,
# we need to look up the classifiers in the database,
# then download the classifier data.
if classifiers_dict is None:
classifiers = list(self.classifiers.all()) # pylint: disable=E1101
classifiers_dict = {
classifier.criterion.name: classifier.download_classifier_data() classifier.criterion.name: classifier.download_classifier_data()
for classifier in classifiers for classifier in classifiers
} }
timeout = getattr(settings, 'ORA2_CLASSIFIER_CACHE_TIMEOUT', self.DEFAULT_CLASSIFIER_CACHE_TIMEOUT)
cache.set(cache_key, classifiers_dict, timeout)
return classifiers_dict if classifiers_dict else None
# Directory in which classifiers will be stored # Directory in which classifiers will be stored
......
...@@ -12,16 +12,18 @@ from openassessment.assessment.serializers import rubric_from_dict ...@@ -12,16 +12,18 @@ from openassessment.assessment.serializers import rubric_from_dict
from .constants import RUBRIC from .constants import RUBRIC
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
class AIClassifierTest(CacheResetTest): class AIClassifierTest(CacheResetTest):
""" """
Tests for the AIClassifier model. Tests for the AIClassifier model.
""" """
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
def test_upload_to_path_default(self): def test_upload_to_path_default(self):
# No path prefix provided in the settings # No path prefix provided in the settings
...@@ -46,11 +48,35 @@ class AIClassifierTest(CacheResetTest): ...@@ -46,11 +48,35 @@ class AIClassifierTest(CacheResetTest):
""" """
rubric = rubric_from_dict(RUBRIC) rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set( classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, rubric, "test_algorithm", self.COURSE_ID, self.ITEM_ID CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
) )
return AIClassifier.objects.filter(classifier_set=classifier_set)[0] return AIClassifier.objects.filter(classifier_set=classifier_set)[0]
class AIClassifierSetTest(CacheResetTest):
"""
Tests for the AIClassifierSet model.
"""
def setUp(self):
rubric = rubric_from_dict(RUBRIC)
self.classifier_set = AIClassifierSet.create_classifier_set(
CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
)
def test_cache_downloads(self):
# Retrieve the classifier dict twice, which should hit the caching code.
# We can check that we're using the cache by asserting that
# the number of database queries decreases.
with self.assertNumQueries(3):
first = self.classifier_set.classifiers_dict
with self.assertNumQueries(0):
second = self.classifier_set.classifiers_dict
# Verify that we got the same value both times
self.assertEqual(first, second)
class AIGradingWorkflowTest(CacheResetTest): class AIGradingWorkflowTest(CacheResetTest):
""" """
Tests for the AIGradingWorkflow model. Tests for the AIGradingWorkflow model.
......
""" """
Test utilities Test utilities
""" """
from django.core.cache import cache from django.core.cache import cache, get_cache
from django.test import TestCase from django.test import TestCase
...@@ -11,8 +11,18 @@ class CacheResetTest(TestCase): ...@@ -11,8 +11,18 @@ class CacheResetTest(TestCase):
""" """
def setUp(self): def setUp(self):
super(CacheResetTest, self).setUp() super(CacheResetTest, self).setUp()
cache.clear() self._clear_all_caches()
def tearDown(self): def tearDown(self):
super(CacheResetTest, self).tearDown() super(CacheResetTest, self).tearDown()
self._clear_all_caches()
def _clear_all_caches(self):
"""
Clear the default cache and any custom caches.
"""
cache.clear() cache.clear()
get_cache(
'django.core.cache.backends.locmem.LocMemCache',
LOCATION='openassessment.ai.classifiers_dict'
).clear()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment