Commit b6cdc2a5 by Will Daly

Cache classifier data

parent 231f4c7c
......@@ -4,17 +4,15 @@ Database models for AI assessment.
from uuid import uuid4
import json
import logging
import itertools
from django.conf import settings
from django.core.files.base import ContentFile
from django.core.cache import get_cache
from django.db import models, transaction, DatabaseError
from django.utils.timezone import now
from django.core.exceptions import ObjectDoesNotExist
from django_extensions.db.fields import UUIDField
from dogapi import dog_stats_api
from submissions import api as sub_api
from openassessment.assessment.serializers import rubric_from_dict
from openassessment.assessment.errors.ai import AIReschedulingInternalError
from .base import Rubric, Criterion, Assessment, AssessmentPart
from .training import TrainingExample
......@@ -176,6 +174,9 @@ class AIClassifierSet(models.Model):
return classifier_set
# Number of seconds to store downloaded classifiers in the in-memory cache.
DEFAULT_CLASSIFIER_CACHE_TIMEOUT = 300
@property
def classifiers_dict(self):
"""
......@@ -187,14 +188,30 @@ class AIClassifierSet(models.Model):
If there are no classifiers in the set, returns None
"""
classifiers = list(self.classifiers.all()) # pylint: disable=E1101
if len(classifiers) == 0:
return None
else:
return {
# First check the in-memory cache
# We use an in-memory cache because the classifier data will most often
# be several megabytes, which exceeds the default memcached size limit.
# If we find it, we can avoid calls to the database, S3, and json.
cache_key = unicode(self.id)
cache = get_cache(
'django.core.cache.backends.locmem.LocMemCache',
LOCATION='openassessment.ai.classifiers_dict'
)
classifiers_dict = cache.get(cache_key)
# If we can't find the classifiers dict in the cache,
# we need to look up the classifiers in the database,
# then download the classifier data.
if classifiers_dict is None:
classifiers = list(self.classifiers.all()) # pylint: disable=E1101
classifiers_dict = {
classifier.criterion.name: classifier.download_classifier_data()
for classifier in classifiers
}
timeout = getattr(settings, 'ORA2_CLASSIFIER_CACHE_TIMEOUT', self.DEFAULT_CLASSIFIER_CACHE_TIMEOUT)
cache.set(cache_key, classifiers_dict, timeout)
return classifiers_dict if classifiers_dict else None
# Directory in which classifiers will be stored
......
......@@ -12,16 +12,18 @@ from openassessment.assessment.serializers import rubric_from_dict
from .constants import RUBRIC
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
class AIClassifierTest(CacheResetTest):
"""
Tests for the AIClassifier model.
"""
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
def test_upload_to_path_default(self):
# No path prefix provided in the settings
......@@ -46,11 +48,35 @@ class AIClassifierTest(CacheResetTest):
"""
rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, rubric, "test_algorithm", self.COURSE_ID, self.ITEM_ID
CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
)
return AIClassifier.objects.filter(classifier_set=classifier_set)[0]
class AIClassifierSetTest(CacheResetTest):
"""
Tests for the AIClassifierSet model.
"""
def setUp(self):
rubric = rubric_from_dict(RUBRIC)
self.classifier_set = AIClassifierSet.create_classifier_set(
CLASSIFIERS_DICT, rubric, "test_algorithm", COURSE_ID, ITEM_ID
)
def test_cache_downloads(self):
# Retrieve the classifier dict twice, which should hit the caching code.
# We can check that we're using the cache by asserting that
# the number of database queries decreases.
with self.assertNumQueries(3):
first = self.classifier_set.classifiers_dict
with self.assertNumQueries(0):
second = self.classifier_set.classifiers_dict
# Verify that we got the same value both times
self.assertEqual(first, second)
class AIGradingWorkflowTest(CacheResetTest):
"""
Tests for the AIGradingWorkflow model.
......
"""
Test utilities
"""
from django.core.cache import cache
from django.core.cache import cache, get_cache
from django.test import TestCase
......@@ -11,8 +11,18 @@ class CacheResetTest(TestCase):
"""
def setUp(self):
super(CacheResetTest, self).setUp()
cache.clear()
self._clear_all_caches()
def tearDown(self):
super(CacheResetTest, self).tearDown()
self._clear_all_caches()
def _clear_all_caches(self):
"""
Clear the default cache and any custom caches.
"""
cache.clear()
get_cache(
'django.core.cache.backends.locmem.LocMemCache',
LOCATION='openassessment.ai.classifiers_dict'
).clear()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment