Commit 689e452d by Will Daly

Add classy AI algorithm

Improve the AI benchmark script
Provide a Django cache to AI algorithms and use it to cache deserialized classifier data.
Add AI grading requirements, including EASE
Use wheels to install AI grading requirements
Silence warnings
Use in-memory cache without pickling
Add version info to serialized classifiers (both EASE and classy)
Default AI settings and better error messages
Strip chars before processing text
Add stemming
Use cPickle if it's available (both EASE and classy)
Generate list of grammar pos for most common words in treebank.
Move EASE algorithm into its own module
parent 86826ef5
...@@ -66,18 +66,17 @@ def get_grading_task_params(grading_workflow_uuid): ...@@ -66,18 +66,17 @@ def get_grading_task_params(grading_workflow_uuid):
raise AIGradingInternalError(msg) raise AIGradingInternalError(msg)
try: try:
classifiers = list(classifier_set.classifiers.select_related().all()) classifiers_dict = classifier_set.classifiers_dict
return { return {
'essay_text': workflow.essay_text, 'essay_text': workflow.essay_text,
'classifier_set': { 'classifier_set': {
classifier.criterion.name: classifier.download_classifier_data() criterion: classifier['data']
for classifier in classifiers for criterion, classifier in classifiers_dict.iteritems()
}, },
'algorithm_id': workflow.algorithm_id, 'algorithm_id': workflow.algorithm_id,
'valid_scores': { 'valid_scores': {
classifier.criterion.name: classifier.valid_scores criterion: classifier['valid_scores']
for classifier in classifiers for criterion, classifier in classifiers_dict.iteritems()
} }
} }
except (DatabaseError, ClassifierSerializeError, IncompleteClassifierSet, ValueError) as ex: except (DatabaseError, ClassifierSerializeError, IncompleteClassifierSet, ValueError) as ex:
......
...@@ -6,13 +6,13 @@ import json ...@@ -6,13 +6,13 @@ import json
import logging import logging
from django.conf import settings from django.conf import settings
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
from django.core.cache import cache, get_cache from django.core.cache import cache
from django.db import models, transaction, DatabaseError from django.db import models, transaction, DatabaseError
from django.utils.timezone import now from django.utils.timezone import now
from django_extensions.db.fields import UUIDField from django_extensions.db.fields import UUIDField
from dogapi import dog_stats_api from dogapi import dog_stats_api
from submissions import api as sub_api from submissions import api as sub_api
from openassessment.assessment.serializers import rubric_from_dict from openassessment.cache import FastCache
from .base import Rubric, Criterion, Assessment, AssessmentPart from .base import Rubric, Criterion, Assessment, AssessmentPart
from .training import TrainingExample from .training import TrainingExample
...@@ -22,17 +22,6 @@ AI_ASSESSMENT_TYPE = "AI" ...@@ -22,17 +22,6 @@ AI_ASSESSMENT_TYPE = "AI"
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Use an in-memory cache to hold classifier data, but allow settings to override this.
# The classifier data will generally be larger than memcached's default max size
CLASSIFIERS_CACHE = getattr(
settings, 'ORA2_CLASSIFIERS_CACHE',
get_cache(
'django.core.cache.backends.locmem.LocMemCache',
LOCATION='openassessment.ai.classifiers_dict'
)
)
class IncompleteClassifierSet(Exception): class IncompleteClassifierSet(Exception):
""" """
The classifier set is missing a classifier for a criterion in the rubric. The classifier set is missing a classifier for a criterion in the rubric.
...@@ -263,17 +252,17 @@ class AIClassifierSet(models.Model): ...@@ -263,17 +252,17 @@ class AIClassifierSet(models.Model):
# If we get to this point, no classifiers exist with this rubric and algorithm. # If we get to this point, no classifiers exist with this rubric and algorithm.
return None return None
# Number of seconds to store downloaded classifiers in the in-memory cache.
DEFAULT_CLASSIFIER_CACHE_TIMEOUT = 300
@property @property
def classifiers_dict(self): def classifiers_dict(self):
""" """
Return all classifiers in this classifier set in a dictionary Return all classifiers in this classifier set in a dictionary
that maps criteria names to classifier data. that maps criteria names to classifier data and valid scores.
Returns: Returns:
dict: keys are criteria names, values are JSON-serializable classifier data dict: keys are criteria names, values are dictionaries with keys
'data' (the serialized classifier data)
'valid_scores' (list of integers)
If there are no classifiers in the set, returns None If there are no classifiers in the set, returns None
""" """
...@@ -281,20 +270,22 @@ class AIClassifierSet(models.Model): ...@@ -281,20 +270,22 @@ class AIClassifierSet(models.Model):
# We use an in-memory cache because the classifier data will most often # We use an in-memory cache because the classifier data will most often
# be several megabytes, which exceeds the default memcached size limit. # be several megabytes, which exceeds the default memcached size limit.
# If we find it, we can avoid calls to the database, S3, and json. # If we find it, we can avoid calls to the database, S3, and json.
cache_key = unicode(self.id) cache_key = u"ora2.ai.classifier_set.classifiers_dict.{pk}".format(pk=self.pk)
classifiers_dict = CLASSIFIERS_CACHE.get(cache_key) fast_cache = FastCache()
classifiers_dict = fast_cache.get(cache_key)
# If we can't find the classifiers dict in the cache, # If we can't find the classifiers dict in the cache,
# we need to look up the classifiers in the database, # we need to look up the classifiers in the database,
# then download the classifier data. # then download the classifier data.
if classifiers_dict is None: if classifiers_dict is None:
classifiers = list(self.classifiers.all()) # pylint: disable=E1101
classifiers_dict = { classifiers_dict = {
classifier.criterion.name: classifier.download_classifier_data() classifier.criterion.name: {
for classifier in classifiers 'data': classifier.download_classifier_data(),
'valid_scores': classifier.valid_scores,
}
for classifier in self.classifiers.select_related().all() # pylint: disable=E1101
} }
timeout = getattr(settings, 'ORA2_CLASSIFIER_CACHE_TIMEOUT', self.DEFAULT_CLASSIFIER_CACHE_TIMEOUT) fast_cache.set(cache_key, classifiers_dict)
CLASSIFIERS_CACHE.set(cache_key, classifiers_dict, timeout)
return classifiers_dict if classifiers_dict else None return classifiers_dict if classifiers_dict else None
...@@ -378,12 +369,7 @@ class AIClassifier(models.Model): ...@@ -378,12 +369,7 @@ class AIClassifier(models.Model):
list of integer scores, in ascending order. list of integer scores, in ascending order.
""" """
cache_key = u"openassessment.assessment.ai.classifier.{pk}.valid_scores".format(pk=self.pk) return sorted([option.points for option in self.criterion.options.all()])
valid_scores = cache.get(cache_key)
if valid_scores is None:
valid_scores = sorted([option.points for option in self.criterion.options.all()])
cache.set(cache_key, valid_scores)
return valid_scores
class AIWorkflow(models.Model): class AIWorkflow(models.Model):
...@@ -707,6 +693,8 @@ class AIGradingWorkflow(AIWorkflow): ...@@ -707,6 +693,8 @@ class AIGradingWorkflow(AIWorkflow):
submission = sub_api.get_submission_and_student(submission_uuid) submission = sub_api.get_submission_and_student(submission_uuid)
# Get or create the rubric # Get or create the rubric
# (import the function here to avoid a circular dependency)
from openassessment.assessment.serializers import rubric_from_dict
rubric = rubric_from_dict(rubric_dict) rubric = rubric_from_dict(rubric_dict)
# Retrieve the submission text # Retrieve the submission text
......
...@@ -14,9 +14,8 @@ from openassessment.assessment.api import ai as ai_api ...@@ -14,9 +14,8 @@ from openassessment.assessment.api import ai as ai_api
from openassessment.assessment.models import ( from openassessment.assessment.models import (
AITrainingWorkflow, AIGradingWorkflow, AIClassifierSet, Assessment AITrainingWorkflow, AIGradingWorkflow, AIClassifierSet, Assessment
) )
from openassessment.assessment.models import AITrainingWorkflow, AIGradingWorkflow, AIClassifierSet
from openassessment.assessment.worker.algorithm import AIAlgorithm, AIAlgorithmError
from openassessment.assessment.serializers import rubric_from_dict from openassessment.assessment.serializers import rubric_from_dict
from openassessment.assessment.worker.algorithm import AIAlgorithm
from openassessment.assessment.errors import ( from openassessment.assessment.errors import (
AITrainingRequestError, AITrainingInternalError, AIGradingRequestError, AITrainingRequestError, AITrainingInternalError, AIGradingRequestError,
AIReschedulingInternalError, AIGradingInternalError, AIError AIReschedulingInternalError, AIGradingInternalError, AIError
...@@ -47,7 +46,7 @@ class StubAIAlgorithm(AIAlgorithm): ...@@ -47,7 +46,7 @@ class StubAIAlgorithm(AIAlgorithm):
classifier['score_override'] = 0 classifier['score_override'] = 0
return classifier return classifier
def score(self, text, classifier, cache): def score(self, text, classifier, cache, temp_cache):
""" """
Stub implementation that returns whatever scores were Stub implementation that returns whatever scores were
provided in the serialized classifier data. provided in the serialized classifier data.
...@@ -117,7 +116,7 @@ class AITrainingTest(CacheResetTest): ...@@ -117,7 +116,7 @@ class AITrainingTest(CacheResetTest):
# Since the stub data includes the training examples, we also verify # Since the stub data includes the training examples, we also verify
# that the classifier was trained using the correct examples. # that the classifier was trained using the correct examples.
for criterion in RUBRIC['criteria']: for criterion in RUBRIC['criteria']:
classifier = classifiers[criterion['name']] classifier = classifiers[criterion['name']]['data']
self.assertEqual(classifier['name'], StubAIAlgorithm.FAKE_CLASSIFIER['name']) self.assertEqual(classifier['name'], StubAIAlgorithm.FAKE_CLASSIFIER['name'])
self.assertEqual(classifier['binary_content'], StubAIAlgorithm.FAKE_CLASSIFIER['binary_content']) self.assertEqual(classifier['binary_content'], StubAIAlgorithm.FAKE_CLASSIFIER['binary_content'])
......
...@@ -239,7 +239,7 @@ class AIWorkerGradingTest(CacheResetTest): ...@@ -239,7 +239,7 @@ class AIWorkerGradingTest(CacheResetTest):
# The second time through we should be caching the queries # The second time through we should be caching the queries
# to determine the valid scores for a classifier # to determine the valid scores for a classifier
with self.assertNumQueries(3): with self.assertNumQueries(2):
ai_worker_api.get_grading_task_params(self.workflow_uuid) ai_worker_api.get_grading_task_params(self.workflow_uuid)
def test_get_grading_task_params_no_workflow(self): def test_get_grading_task_params_no_workflow(self):
......
...@@ -31,7 +31,7 @@ class StubAIAlgorithm(AIAlgorithm): ...@@ -31,7 +31,7 @@ class StubAIAlgorithm(AIAlgorithm):
def train_classifier(self, examples): def train_classifier(self, examples):
return {} return {}
def score(self, text, classifier, cache): def score(self, text, classifier, cache, temp_cache):
return 0 return 0
...@@ -42,7 +42,7 @@ class ErrorStubAIAlgorithm(AIAlgorithm): ...@@ -42,7 +42,7 @@ class ErrorStubAIAlgorithm(AIAlgorithm):
def train_classifier(self, examples): def train_classifier(self, examples):
raise TrainingError("Test error!") raise TrainingError("Test error!")
def score(self, text, classifier, cache): def score(self, text, classifier, cache, temp_cache):
raise ScoreError("Test error!") raise ScoreError("Test error!")
...@@ -55,7 +55,7 @@ class InvalidScoreAlgorithm(AIAlgorithm): ...@@ -55,7 +55,7 @@ class InvalidScoreAlgorithm(AIAlgorithm):
def train_classifier(self, examples): def train_classifier(self, examples):
return {} return {}
def score(self, text, classifier, cache): def score(self, text, classifier, cache, temp_cache):
return self.SCORE_CYCLE.next() return self.SCORE_CYCLE.next()
......
This source diff could not be displayed because it is too large. You can view the blob instead.
"""
Wrapper for the EASE library.
See https://github.com/edx/ease for more information.
"""
try:
import cPickle as pickle
except ImportError:
import pickle
import traceback
import hashlib
import base64
import numpy
import nltk
import scipy
import sklearn
from ease.essay_set import EssaySet
from ease.create import create
from .algorithm import AIAlgorithm, TrainingError, ScoreError, InvalidClassifier
class EaseAIAlgorithm(AIAlgorithm):
"""
Wrapper for the EASE library.
See https://github.com/edx/ease for more information.
Since EASE has many system dependencies, we don't include it explicitly
in edx-ora2 requirements. When testing locally, we use the fake
algorithm implementation instead.
"""
VERSION = "0.0.1"
def train_classifier(self, examples):
"""
Train a text classifier using the EASE library.
The classifier is serialized as a dictionary with keys:
* 'feature_extractor': The pickled feature extractor (transforms text into a numeric feature vector).
* 'score_classifier': The pickled classifier (uses the feature vector to assign scores to essays).
Because we are using `pickle`, the serialized classifiers are unfortunately
tied to the particular version of ease/scikit-learn/numpy/scipy/nltk that we
have installed at the time of training.
Args:
examples (list of AIAlgorithm.ExampleEssay): Example essays and scores.
Returns:
dict: The serializable classifier.
Raises:
TrainingError: The classifier could not be trained successfully.
"""
# Check that we have at least two unique scores
if len(set(example.score for example in examples)) < 2:
raise TrainingError("You must provide at least one positive and one negative training example")
feature_ext, classifier = self._train_classifiers(examples)
return self._serialize_classifiers(feature_ext, classifier)
def score(self, text, classifier, cache, temp_cache):
"""
Score essays using EASE.
Args:
text (unicode): The essay text to score.
classifier (dict): The serialized classifiers created during training.
cache (openassessment.cache.FastCache): An in-memory cache that persists between tasks.
temp_cache (openassessment.cache.TempCache): An in-memory cache that persists
for the duration of the current task.
Returns:
int
Raises:
InvalidClassifier
ScoreError
"""
feature_extractor, score_classifier = self._deserialize_classifiers(classifier, cache)
# The following is a modified version of `ease.grade.grade()`,
# skipping things we don't use (cross-validation, feedback)
# and caching essay sets across criteria. This allows us to
# avoid some expensive NLTK operations, particularly tagging
# parts of speech.
try:
# Get the essay set from the cache or create it.
# Since all essays to be graded are assigned a dummy
# score of "0", we can safely re-use the essay set
# for each criterion in the rubric.
# EASE can't handle non-ASCII unicode, so we need
# to strip out non-ASCII chars.
essay_set = temp_cache.get(text)
if essay_set is None:
essay_set = EssaySet(essaytype="test")
essay_set.add_essay(text.encode('ascii', 'ignore'), 0)
temp_cache.set(text, essay_set)
# Extract features from the text
features = feature_extractor.gen_feats(essay_set)
# Predict a score
return int(score_classifier.predict(features)[0])
except:
msg = (
u"An unexpected error occurred while using "
u"EASE to score an essay: {traceback}"
).format(traceback=traceback.format_exc())
raise ScoreError(msg)
def _train_classifiers(self, examples):
"""
Use EASE to train classifiers.
Args:
examples (list of AIAlgorithm.ExampleEssay): Example essays and scores.
Returns:
tuple of `feature_extractor` (an `ease.feature_extractor.FeatureExtractor` object)
and `classifier` (a `sklearn.ensemble.GradientBoostingClassifier` object).
Raises:
TrainingError: Could not load EASE or could not complete training.
"""
input_essays = [example.text for example in examples]
input_scores = [example.score for example in examples]
try:
# Train the classifiers
# The third argument is the essay prompt, which EASE uses
# to check if an input essay is too similar to the prompt.
# Since we're not using this feature, we pass in an empty string.
results = create(input_essays, input_scores, "")
except:
msg = (
u"An unexpected error occurred while using "
u"EASE to train classifiers: {traceback}"
).format(traceback=traceback.format_exc())
raise TrainingError(msg)
if not results.get('success', False):
msg = (
u"Errors occurred while training classifiers "
u"using EASE: {errors}"
).format(errors=results.get('errors', []))
raise TrainingError(msg)
return results.get('feature_ext'), results.get('classifier')
def _serialize_classifiers(self, feature_ext, classifier):
"""
Serialize the classifier objects.
Args:
feature_extractor (ease.feature_extractor.FeatureExtractor)
classifier (sklearn.ensemble.GradientBoostingClassifier)
Returns:
dict containing the pickled classifiers
Raises:
TrainingError: Could not serialize the classifiers.
"""
try:
return {
'feature_extractor': base64.b64encode(pickle.dumps(feature_ext)),
'score_classifier': base64.b64encode(pickle.dumps(classifier)),
'algorithm-version': self.VERSION,
'sklearn-version': sklearn.__version__,
'nltk-version': nltk.__version__,
'numpy-version': numpy.__version__,
'scipy-version': scipy.__version__,
}
except (pickle.PickleError, ValueError, TypeError) as ex:
msg = (
u"An error occurred while serializing the classifiers "
u"created by EASE: {ex}"
).format(ex=ex)
raise TrainingError(msg)
def _deserialize_classifiers(self, classifier_data, cache):
"""
Deserialize the classifier objects.
Args:
classifier_data (dict): The serialized classifiers.
cache (Django cache): An in-memory cache.
Returns:
tuple of `(feature_extractor, score_classifier)`
Raises:
InvalidClassifier
"""
if not isinstance(classifier_data, dict):
raise InvalidClassifier("Classifier must be a dictionary.")
serialized_extractor = classifier_data['feature_extractor']
cache_key = self._cache_key('feature-extractor', serialized_extractor)
feature_extractor = cache.get(cache_key)
if feature_extractor is None:
try:
feature_extractor = pickle.loads(base64.b64decode(serialized_extractor))
except (pickle.PickleError, ValueError, TypeError) as ex:
msg = (
u"An error occurred while deserializing the "
u"EASE feature extractor: {ex}"
).format(ex=ex)
raise InvalidClassifier(msg)
else:
cache.set(cache_key, feature_extractor)
serialized_classifier = classifier_data['score_classifier']
cache_key = self._cache_key('score-classifier', serialized_classifier)
score_classifier = cache.get(cache_key)
if score_classifier is None:
try:
score_classifier = pickle.loads(base64.b64decode(serialized_classifier))
except (pickle.PickleError, ValueError, TypeError) as ex:
msg = (
u"An error occurred while deserializing the "
u"EASE score classifier: {ex}"
).format(ex=ex)
raise InvalidClassifier(msg)
else:
cache.set(cache_key, score_classifier)
return feature_extractor, score_classifier
def _cache_key(self, name, contents):
"""
Return a cache key using an MD5 hash digest of `contents`.
Args:
name (str): The name of the key.
contents (str): Used to create a hash digest for the key.
"""
hasher = hashlib.md5()
if isinstance(contents, unicode):
contents = contents.encode('utf-8')
hasher.update(contents)
return u"ora2.ai.algorithm.ease.{name}.{hash}".format(name=name, hash=hasher.hexdigest())
...@@ -8,12 +8,13 @@ from django.db import DatabaseError ...@@ -8,12 +8,13 @@ from django.db import DatabaseError
from django.conf import settings from django.conf import settings
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from dogapi import dog_stats_api from dogapi import dog_stats_api
from openassessment.cache import FastCache, TempCache
from openassessment.assessment.api import ai_worker as ai_worker_api from openassessment.assessment.api import ai_worker as ai_worker_api
from openassessment.assessment.errors import ( from openassessment.assessment.errors import (
AIError, AIGradingInternalError, AIReschedulingInternalError, ANTICIPATED_CELERY_ERRORS AIError, AIGradingInternalError, AIReschedulingInternalError, ANTICIPATED_CELERY_ERRORS
) )
from .algorithm import AIAlgorithm, AIAlgorithmError
from openassessment.assessment.models.ai import AIGradingWorkflow from openassessment.assessment.models.ai import AIGradingWorkflow
from .algorithm import AIAlgorithm, AIAlgorithmError
MAX_RETRIES = 2 MAX_RETRIES = 2
...@@ -92,13 +93,12 @@ def grade_essay(workflow_uuid): ...@@ -92,13 +93,12 @@ def grade_essay(workflow_uuid):
raise grade_essay.retry() raise grade_essay.retry()
# Use the algorithm to evaluate the essay for each criterion # Use the algorithm to evaluate the essay for each criterion
# Provide an in-memory cache so the algorithm can re-use
# results for multiple rubric criteria.
try: try:
cache = dict() cache = FastCache()
temp_cache = TempCache()
scores_by_criterion = { scores_by_criterion = {
criterion_name: _closest_valid_score( criterion_name: _closest_valid_score(
algorithm.score(essay_text, classifier, cache), algorithm.score(essay_text, classifier, cache, temp_cache),
valid_scores[criterion_name] valid_scores[criterion_name]
) )
for criterion_name, classifier in classifier_set.iteritems() for criterion_name, classifier in classifier_set.iteritems()
......
"""
In-memory cache implementations.
"""
import time
from django.core.cache import BaseCache
from django.utils.synch import RWLock
# Global variables shared by all `FastCache` instances
_CACHE = {}
_EXPIRE_INFO = {}
_LOCK = RWLock()
class FastCache(BaseCache):
"""
A thread-safe, in-memory cache. Django's in-memory cache implementation
unfortunately pickles the objects it stores -- since we want
to cache un-pickled objects, this doesn't help us!
This uses a very simple cache invalidation scheme:
clear the entire cache after a time limit is reached.
"""
def __init__(self, params=None):
"""
Create a new cache instance. This shares state
with all other `FastCache` instances.
"""
if params is None:
params = dict()
super(FastCache, self).__init__(params)
global _CACHE, _EXPIRE_INFO, _LOCK # pylint: disable=W0602
# Store local references to the global variables
self._cache = _CACHE
self._expire_info = _EXPIRE_INFO
self._lock = _LOCK
# We store a reference to a dictionary instead of to a timestamp
# so we can modify the timestamp while referring to the same dictionary
# shared with the other cache instances.
if 'time' not in self._expire_info:
self._expire_info['time'] = time.time() + self.default_timeout
def get(self, key, default=None, version=None):
"""Retrieve a value from the cache."""
key = self._make_and_validate_key(key, version)
self._clear_if_expired()
with self._lock.reader():
return self._cache.get(key, default)
def set(self, key, value, timeout=None, version=None):
"""Set a value in the cache."""
key = self._make_and_validate_key(key, version)
self._clear_if_expired()
with self._lock.writer():
self._cache[key] = value
def clear(self):
"""Clear all values in the cache."""
with self._lock.writer():
self._cache.clear()
self._expire_info['time'] = time.time() + self.default_timeout
def add(self, key, value, timeout=None, version=None):
raise NotImplementedError
def _clear_if_expired(self):
"""Invalidate the cache if its expiration time has passed."""
if time.time() > self._expire_info['time']:
self.clear()
def _make_and_validate_key(self, key, version):
"""Create a versioned key and validate it."""
key = self.make_key(key, version=version)
self.validate_key(key)
return key
class TempCache(BaseCache):
"""
An in-memory cache designed for temporary use (within a request).
This does NOT share global state, so when the cache instance
is garbage-collected, the contents of the cache can also
be garbage-collected (assuming there are no other hard references
to the values).
This cache is NOT thread-safe.
"""
def __init__(self):
"""Create a new cache instance."""
super(TempCache, self).__init__({})
self._cache = {}
def get(self, key, default=None, version=None):
"""Retrieve a value from the cache."""
return self._cache.get(key, default)
def set(self, key, value, timeout=None, version=None):
"""Set a value in the cache."""
self._cache[key] = value
def clear(self):
"""Clear all values in the cache."""
self._cache.clear()
def add(self, key, value, timeout=None, version=None):
raise NotImplementedError
""" """
Test utilities Test utilities
""" """
from django.core.cache import cache, get_cache from django.core.cache import cache
from django.test import TestCase from django.test import TestCase
from openassessment.cache import FastCache
class CacheResetTest(TestCase): class CacheResetTest(TestCase):
...@@ -22,7 +23,4 @@ class CacheResetTest(TestCase): ...@@ -22,7 +23,4 @@ class CacheResetTest(TestCase):
Clear the default cache and any custom caches. Clear the default cache and any custom caches.
""" """
cache.clear() cache.clear()
get_cache( FastCache().clear()
'django.core.cache.backends.locmem.LocMemCache',
LOCATION='openassessment.ai.classifiers_dict'
).clear()
# coding=utf-8
"""
Tests for in-memory cache implementations.
"""
import time
import itertools
from threading import Thread
from openassessment.test_utils import CacheResetTest
from openassessment.cache import FastCache, TempCache
class FastCacheTest(CacheResetTest):
"""
Tests for the fast cache implementation.
"""
def setUp(self):
super(FastCacheTest, self).setUp()
self.cache = FastCache()
def test_single_thread(self):
self.assertIs(self.cache.get(u'𝓽𝓮𝓼𝓽'), None)
self.cache.set(u'𝓽𝓮𝓼𝓽', u'ѕυρєяƒℓу')
self.assertEqual(self.cache.get(u'𝓽𝓮𝓼𝓽'), u'ѕυρєяƒℓу')
self.cache.clear()
self.assertIs(self.cache.get(u'𝓽𝓮𝓼𝓽'), None)
def test_multiple_threads(self):
def _thread(thread_num):
"""
Set and get 100 keys in the cache.
"""
for count in range(100):
time.sleep(0.05)
key = u"thread {thread_num}, key {key_num}".format(
thread_num=thread_num, key_num=count
)
self.cache.set(key, count)
self.cache.get(key)
# Start threads that set/get keys from the cache
threads = [
Thread(target=_thread, args=(thread_num,))
for thread_num in range(20)
]
for thread in threads:
thread.start()
# Wait for all the threads to finish
for thread in threads:
thread.join(300)
# Verify that all the keys were set correctly
expected_values = {
u"thread {thread_num}, key {key_num}".format(
thread_num=thread_num, key_num=key_num
): key_num
for thread_num, key_num in itertools.product(range(5), range(100))
}
for key, value in expected_values.iteritems():
self.assertEqual(self.cache.get(key), value)
def test_multiple_threads_with_clear(self):
def _thread():
"""
Set and clear the cache.
"""
for count in range(100):
time.sleep(0.05)
self.cache.set('test', count)
self.cache.clear()
# Start threads that set/clear the cache
threads = [Thread(target=_thread) for _ in range(20)]
for thread in threads:
thread.start()
# Wait for all threads to finish
for thread in threads:
thread.join(300)
# Expect that the cache is empty
self.assertIs(self.cache.get('test'), None)
def test_expiration(self):
# Artificially set the timeout to 0, so the cache
# should be invalidated immediately
self.cache.default_timeout = 0
self.cache.clear()
self.cache.set(u'𝕵𝖆𝖒𝖊𝖘 𝕭𝖗𝖔𝖜𝖓', u'ᴡɪᴛʜ ʜɪꜱ ᴏᴡɴ ʙᴀᴅ ꜱᴇʟꜰ')
self.assertIs(self.cache.get(u'𝕵𝖆𝖒𝖊𝖘 𝕭𝖗𝖔𝖜𝖓'), None)
class TempCacheTest(CacheResetTest):
"""
Tests for the temp cache implementation.
"""
def setUp(self):
super(TempCacheTest, self).setUp()
self.cache = TempCache()
def test_single_thread(self):
self.assertIs(self.cache.get(u'𝓽𝓮𝓼𝓽'), None)
self.cache.set(u'𝓽𝓮𝓼𝓽', u'ѕυρєяƒℓу')
self.assertEqual(self.cache.get(u'𝓽𝓮𝓼𝓽'), u'ѕυρєяƒℓу')
self.cache.clear()
self.assertIs(self.cache.get(u'𝓽𝓮𝓼𝓽'), None)
<openassessment> <openassessment>
<title>Example Based Example</title> <title>Example Based Example</title>
<assessments> <assessments>
<assessment name="example-based-assessment" algorithm_id="fake"> <assessment name="example-based-assessment" algorithm_id="classy">
<example> <example>
<answer>Born in northern New South Wales, Dowling entered the Royal Australian Naval College in 1915. After graduating in 1919 he went to sea aboard various Royal Navy and RAN vessels, and later specialised in gunnery. In 1937, he was given command of the sloop HMAS Swan. Following the outbreak of World War II, he saw action in the Mediterranean theatre as executive officer of the Royal Navy cruiser HMS Naiad, and survived her sinking by a German U-boat in March 1942. Returning to Australia, he served as Director of Plans and later Deputy Chief of Naval Staff before taking command of the light cruiser HMAS Hobart in November 1944. His achievements in the South West Pacific earned him the Distinguished Service Order. <answer>Born in northern New South Wales, Dowling entered the Royal Australian Naval College in 1915. After graduating in 1919 he went to sea aboard various Royal Navy and RAN vessels, and later specialised in gunnery. In 1937, he was given command of the sloop HMAS Swan. Following the outbreak of World War II, he saw action in the Mediterranean theatre as executive officer of the Royal Navy cruiser HMS Naiad, and survived her sinking by a German U-boat in March 1942. Returning to Australia, he served as Director of Plans and later Deputy Chief of Naval Staff before taking command of the light cruiser HMAS Hobart in November 1944. His achievements in the South West Pacific earned him the Distinguished Service Order.
...@@ -66,4 +66,4 @@ ...@@ -66,4 +66,4 @@
(Optional) What aspects of this response stood out to you? What did it do well? How could it improve? (Optional) What aspects of this response stood out to you? What did it do well? How could it improve?
</feedbackprompt> </feedbackprompt>
</rubric> </rubric>
</openassessment> </openassessment>
\ No newline at end of file
...@@ -943,7 +943,7 @@ ...@@ -943,7 +943,7 @@
"name": "example-based-assessment", "name": "example-based-assessment",
"start": null, "start": null,
"due": null, "due": null,
"algorithm_id": "ease", "algorithm_id": "classy",
"examples": [ "examples": [
{ {
"answer": "тєѕт αηѕωєя", "answer": "тєѕт αηѕωєя",
......
...@@ -134,8 +134,8 @@ def validate_assessments(assessments, current_assessments, is_released): ...@@ -134,8 +134,8 @@ def validate_assessments(assessments, current_assessments, is_released):
# Example-based assessment MUST specify 'ease' as the algorithm ID, # Example-based assessment MUST specify 'ease' as the algorithm ID,
# at least for now. Later, we may make this more flexible. # at least for now. Later, we may make this more flexible.
if assessment_dict.get('name') == 'example-based-assessment': if assessment_dict.get('name') == 'example-based-assessment':
if assessment_dict.get('algorithm_id') not in ['ease', 'fake']: if assessment_dict.get('algorithm_id') not in ['ease', 'classy', 'fake']:
return (False, _('The "algorithm_id" value must be set to "ease" or "fake"')) return (False, _('The "algorithm_id" value must be set to "ease", "classy", or "fake"'))
if is_released: if is_released:
if len(assessments) != len(current_assessments): if len(assessments) != len(current_assessments):
......
...@@ -444,7 +444,7 @@ def _parse_assessments_xml(assessments_root): ...@@ -444,7 +444,7 @@ def _parse_assessments_xml(assessments_root):
if assessment_dict['name'] == 'example-based-assessment': if assessment_dict['name'] == 'example-based-assessment':
assessment_dict['examples'] = _parse_examples_xml(examples) assessment_dict['examples'] = _parse_examples_xml(examples)
assessment_dict['algorithm_id'] = unicode(assessment.get('algorithm_id', 'ease')) assessment_dict['algorithm_id'] = unicode(assessment.get('algorithm_id', 'classy'))
# Update the list of assessments # Update the list of assessments
assessments_list.append(assessment_dict) assessments_list.append(assessment_dict)
......
...@@ -24,4 +24,4 @@ pytz==2012h ...@@ -24,4 +24,4 @@ pytz==2012h
South==0.7.6 South==0.7.6
# AI grading # AI grading
git+https://github.com/edx/ease.git@a990b25ed4238acb1b15ee6f027465db3a10960e#egg=ease git+https://github.com/edx/ease.git@f11737af630196ad1153a1cbad403627a748d363#egg=ease
...@@ -5,5 +5,5 @@ ...@@ -5,5 +5,5 @@
lxml==3.0.1 lxml==3.0.1
nltk==2.0.3 nltk==2.0.3
numpy==1.6.2 numpy==1.6.2
scikit-learn==0.12.1 scikit-learn==0.14.1
scipy==0.11.0 scipy==0.11.0
#!/usr/bin/env python #!/usr/bin/env python
""" """
Benchmark the execution time of the EASE algorithm for scoring essays. Benchmark the execution time of the algorithms for scoring essays.
""" """
import os import sys
import json import json
import time import time
import math
import contextlib import contextlib
from openassessment.assessment.worker.algorithm import AIAlgorithm, EaseAIAlgorithm import random
from collections import defaultdict
import csv
# Configure Django settings so we can import openassessment modules that uses Django
from django.conf import settings
settings.configure()
from openassessment.cache import FastCache, TempCache
from openassessment.assessment.worker.algorithm import AIAlgorithm, EaseAIAlgorithm, FakeAIAlgorithm
from openassessment.assessment.worker.classy import ClassyAIAlgorithm
NUM_TRIALS = 10
NUM_TEST_SET = 10
#ALGORITHM = EaseAIAlgorithm
#ALGORITHM = FakeAIAlgorithm
ALGORITHM = ClassyAIAlgorithm
NUM_TRIALS = 3
NUM_CRITERIA = 10
DATA_FILE_PATH = os.path.abspath(
os.path.join(
os.path.dirname(__file__),
'data/ai-test-data.json'
)
)
@contextlib.contextmanager @contextlib.contextmanager
def benchmark(name): def benchmark(name, store=None):
""" """
Print the duration in seconds for a block of code. Print the duration in seconds for a block of code.
Args: Args:
name (unicode): A descriptive name for the benchmark name (unicode): A descriptive name for the benchmark
Kwargs:
store (list): If provided, append the time in seconds to this list.
Returns: Returns:
None None
...@@ -39,6 +51,8 @@ def benchmark(name): ...@@ -39,6 +51,8 @@ def benchmark(name):
end = time.clock() end = time.clock()
duration = end - start duration = end - start
print u"{name} took {duration} seconds".format(name=name, duration=duration) print u"{name} took {duration} seconds".format(name=name, duration=duration)
if store is not None:
store.append(duration)
def load_training_data(data_path): def load_training_data(data_path):
...@@ -51,43 +65,140 @@ def load_training_data(data_path): ...@@ -51,43 +65,140 @@ def load_training_data(data_path):
with keys 'text' (unicode) and 'score' (int). with keys 'text' (unicode) and 'score' (int).
Returns: Returns:
list of `AIAlgorithm.ExampleEssay`s dictionary of with keys for each criterion
and values that are lists of `AIAlgorithm.ExampleEssay`s
Also returns the number of examples loaded.
""" """
print "Loading training data..." print u"Loading training data from {path}...".format(path=data_path)
with open(data_path) as data_file: with open(data_path) as data_file:
input_examples = json.load(data_file) input_examples = json.load(data_file)
print "Done." print "Done (loaded {num} examples)".format(num=len(input_examples))
# Shuffle the input examples
random.shuffle(input_examples)
# Separate by criterion
examples_by_criterion = defaultdict(list)
for example in input_examples:
for criterion, score in example['criteria'].iteritems():
examples_by_criterion[criterion].append(
AIAlgorithm.ExampleEssay(
text=example['text'],
score=int(score)
)
)
return examples_by_criterion, len(input_examples)
return [
AIAlgorithm.ExampleEssay( def avg(nums):
text=example['text'], return sum([float(num) for num in nums]) / len(nums)
score=int(example['score'])
)
for example in input_examples def stdev(nums):
] average = avg(nums)
variance = sum([(average - float(num)) ** 2 for num in nums]) / len(nums)
return math.sqrt(variance)
def write_output(
output_file, num_examples, scoring_times,
point_deltas_by_criterion, score_matrix,
scores, classifier_sizes
):
"""
Write the output data to a CSV file.
"""
with open(output_file, 'w') as csv_file:
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Num trials', NUM_TRIALS])
csv_writer.writerow(['Training set size', num_examples - NUM_TEST_SET])
csv_writer.writerow(['Test set size', NUM_TEST_SET])
csv_writer.writerow(['Avg time per essay (seconds)', avg(scoring_times)])
csv_writer.writerow(['Stdev time per essay', stdev(scoring_times)])
csv_writer.writerow(['Avg classifier file size (bytes)', avg(classifier_sizes)])
csv_writer.writerow(['Stdev classifier file size', stdev(classifier_sizes)])
point_deltas = []
for deltas in point_deltas_by_criterion.values():
point_deltas.extend([abs(delta) for delta in deltas])
csv_writer.writerow(['Avg error (points off per score)', avg(point_deltas)])
csv_writer.writerow(['Stdev error', stdev(point_deltas)])
for criterion, point_deltas in point_deltas_by_criterion.iteritems():
abs_point_deltas = [abs(delta) for delta in point_deltas]
csv_writer.writerow([u'{criterion} error'.format(criterion=criterion), avg(abs_point_deltas)])
csv_writer.writerow([u'{criterion} stdev error'.format(criterion=criterion), stdev(abs_point_deltas)])
for criterion, counts in score_matrix.iteritems():
for actual_score, expected_score_counts in counts.iteritems():
for expected_score, count in expected_score_counts.iteritems():
percent = (float(count) / float(scores[criterion][actual_score])) * 100
csv_writer.writerow([
criterion, unicode(expected_score), unicode(actual_score),
"correct" if actual_score == expected_score else "incorrect",
u"{percent}%".format(percent=percent)
])
def main(): def main():
""" """
Time training/scoring using EASE. Time training/scoring using EASE.
""" """
examples = load_training_data(DATA_FILE_PATH) if len(sys.argv) < 3:
algorithm = EaseAIAlgorithm() print "Usage: <INPUT EXAMPLES> <OUTPUT CSV>"
sys.exit(1)
print "Training classifier..."
with benchmark('Training'): # For repeatability between test runs
classifier = algorithm.train_classifier(examples[:-1]) # Because we reload and shuffle the examples before each trial,
print "Done." # each trial should have different test/training sets.
random.seed(1)
print u"Scoring essays ({num} criteria)...".format(num=NUM_CRITERIA)
for num in range(NUM_TRIALS): point_deltas_by_criterion = defaultdict(list)
cache = {} score_matrix = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0)))
with benchmark('Scoring (rubric)'): scores = defaultdict(lambda: defaultdict(lambda: 0))
for _ in range(NUM_CRITERIA): scoring_times = []
with benchmark('Scoring (criteria)'): classifier_sizes = []
algorithm.score(examples[-1].text, classifier, cache)
print "Finished scoring essay #{num}".format(num=num) for trial_num in range(NUM_TRIALS):
print "Trial #{trial}".format(trial=trial_num)
examples_by_criteria, num_examples = load_training_data(sys.argv[1])
algorithm = ALGORITHM()
print "Training classifiers..."
with benchmark('Training'):
classifiers = {}
for criterion, examples in examples_by_criteria.iteritems():
classifiers[criterion] = algorithm.train_classifier(examples[NUM_TEST_SET:])
classifier_sizes.append(len(json.dumps(classifiers)))
print "Done."
print "Scoring essays..."
cache = FastCache()
for essay_num in range(NUM_TEST_SET):
temp_cache = TempCache()
with benchmark('Scoring essay #{num}'.format(num=essay_num), store=scoring_times):
for criterion, examples in examples_by_criteria.iteritems():
example = examples[essay_num]
score = algorithm.score(example.text, classifiers[criterion], cache, temp_cache)
delta = float(example.score) - float(score)
point_deltas_by_criterion[criterion].append(delta)
score_matrix[criterion][score][example.score] += 1
scores[criterion][score] += 1
print u"Writing output to {output}".format(output=sys.argv[2])
output_path = sys.argv[2]
write_output(
output_path,
num_examples,
scoring_times,
point_deltas_by_criterion,
score_matrix, scores,
classifier_sizes
)
if __name__ == "__main__": if __name__ == "__main__":
......
#!/usr/bin/env python
import sys
import json
import nltk
from nltk.corpus import treebank
DISCARD_TAGS = ['-NONE-', 'CD']
def main():
"""
Generate a list of most common words and parts of speech.
"""
if len(sys.argv) < 3:
print "USAGE: <NUM WORDS> <OUTPUT FILE>"
sys.exit(1)
num_words = int(sys.argv[1])
output_path = sys.argv[2]
# Retrieve the most frequent words and determine
# their most common parts of speech
print "Building the dictionary..."
freq_dist = nltk.FreqDist(treebank.words())
cond_freq_dist = nltk.ConditionalFreqDist(treebank.tagged_words())
most_freq_words = freq_dist.keys()[:num_words]
likely_tags = {
word: cond_freq_dist[word].max()
for word in most_freq_words
}
# Filter out parts of speech we don't need
likely_tags = {
word: tag for word, tag in likely_tags.iteritems()
if tag not in DISCARD_TAGS
}
# Dump the data to a file
with open(output_path, 'w') as output_file:
output_str = json.dumps(likely_tags, indent=4)
output_file.write(output_str)
print u"Wrote output to {path}".format(path=output_path)
if __name__ == "__main__":
main()
[
{
"text": "Food is any substance[1] consumed to provide nutritional support for the body. It is usually of plant or animal origin, and contains essential nutrients, such as carbohydrates, fats, proteins, vitamins, or minerals. The substance is ingested by an organism and assimilated by the organism's cells in an effort to produce energy, maintain life, or stimulate growth. Historically, people secured food through two methods: hunting and gathering, and agriculture. Today, most of the food energy consumed by the world population is supplied by the food industry. Food safety and food security are monitored by agencies like the International Association for Food Protection, World Resources Institute, World Food Programme, Food and Agriculture Organization, and International Food Information Council. They address issues such as sustainability, biological diversity, climate change, nutritional economics, population growth, water supply, and access to food.",
"score": 0
},
{
"text": "Most food has its origin in plants. Some food is obtained directly from plants; but even animals that are used as food sources are raised by feeding them food derived from plants. Cereal grain is a staple food that provides more food energy worldwide than any other type of crop. Maize, wheat, and rice – in all of their varieties – account for 87% of all grain production worldwide.[2] Most of the grain that is produced worldwide is fed to livestock. Some foods not from animal or plant sources include various edible fungi, especially mushrooms. Fungi and ambient bacteria are used in the preparation of fermented and pickled foods like leavened bread, alcoholic drinks, cheese, pickles, kombucha, and yogurt. Another example is blue-green algae such as Spirulina.[3] Inorganic substances such as salt, baking soda and cream of tartar are used to preserve or chemically alter an ingredient.",
"score": 1
},
{
"text": "Many plants or plant parts are eaten as food. There are around 2,000 plant species which are cultivated for food, and many have several distinct cultivars.[4] Seeds of plants are a good source of food for animals, including humans, because they contain the nutrients necessary for the plant's initial growth, including many healthful fats, such as Omega fats. In fact, the majority of food consumed by human beings are seed-based foods. Edible seeds include cereals (maize, wheat, rice, et cetera), legumes (beans, peas, lentils, et cetera), and nuts. Oilseeds are often pressed to produce rich oils - sunflower, flaxseed, rapeseed (including canola oil), sesame, et cetera.[5] Seeds are typically high in unsaturated fats and, in moderation, are considered a health food, although not all seeds are edible. Large seeds, such as those from a lemon, pose a choking hazard, while seeds from cherries and apples contain cyanide which could be poisonous only if consumed in large volumes.[6] Fruits are the ripened ovaries of plants, including the seeds within. Many plants and animals have coevolved such that the fruits of the former are an attractive food source to the latter, because animals that eat the fruits may excrete the seeds some distance away. Fruits, therefore, make up a significant part of the diets of most cultures. Some botanical fruits, such as tomatoes, pumpkins, and eggplants, are eaten as vegetables.[7] (For more information, see list of fruits.) Vegetables are a second type of plant matter that is commonly eaten as food. These include root vegetables (potatoes and carrots), bulbs (onion family), leaf vegetables (spinach and lettuce), stem vegetables (bamboo shoots and asparagus), and inflorescence vegetables (globe artichokes and broccoli and other vegetables such as cabbage or cauliflower).[8]",
"score": 0
},
{
"text": "Animals are used as food either directly or indirectly by the products they produce. Meat is an example of a direct product taken from an animal, which comes from muscle systems or from organs. Various raw meats Food products produced by animals include milk produced by mammary glands, which in many cultures is drunk or processed into dairy products (cheese, butter, etc.). In addition, birds and other animals lay eggs, which are often eaten, and bees produce honey, a reduced nectar from flowers, which is a popular sweetener in many cultures. Some cultures consume blood, sometimes in the form of blood sausage, as a thickener for sauces, or in a cured, salted form for times of food scarcity, and others use blood in stews such as jugged hare.[9] Some cultures and people do not consume meat or animal food products for cultural, dietary, health, ethical, or ideological reasons. Vegetarians choose to forgo food from animal sources to varying degrees. Vegans do not consume any foods that are or contain ingredients from an animal source.",
"score": 2
},
{
"text": "Most food has always been obtained through agriculture. With increasing concern over both the methods and products of modern industrial agriculture, there has been a growing trend toward sustainable agricultural practices. This approach, partly fueled by consumer demand, encourages biodiversity, local self-reliance and organic farming methods.[10] Major influences on food production include international organizations (e.g. the World Trade Organization and Common Agricultural Policy), national government policy (or law), and war.[11] In popular culture, the mass production of food, specifically meats such as chicken and beef, has come under fire from various documentaries, most recently Food, Inc, documenting the mass slaughter and poor treatment of animals, often for easier revenues from large corporations. Along with a current trend towards environmentalism, people in Western culture have had an increasing trend towards the use of herbal supplements, foods for a specific group of person (such as dieters, women, or athletes), functional foods (fortified foods, such as omega-3 eggs), and a more ethnically diverse diet.[12] Several organisations have begun calling for a new kind of agriculture in which agroecosystems provide food but also support vital ecosystem services so that soil fertility and biodiversity are maintained rather than compromised. According to the International Water Management Institute and UNEP, well-managed agroecosystems not only provide food, fiber and animal products, they also provide services such as flood mitigation, groundwater recharge, erosion control and habitats for plants, birds fish and other animals.[13]",
"score": 3
},
{
"text": "Generally regarded as the most pleasant taste, sweetness is almost always caused by a type of simple sugar such as glucose or fructose, or disaccharides such as sucrose, a molecule combining glucose and fructose.[16] Complex carbohydrates are long chains and thus do not have the sweet taste. Artificial sweeteners such as sucralose are used to mimic the sugar molecule, creating the sensation of sweet, without the calories. Other types of sugar include raw sugar, which is known for its amber color, as it is unprocessed. As sugar is vital for energy and survival, the taste of sugar is pleasant. The stevia plant contains a compound known as steviol which, when extracted, has 300 times the sweetness of sugar while having minimal impact on blood sugar.[17] Sour Sourness is caused by the taste of acids, such as vinegar in alcoholic beverages. Sour foods include citrus, specifically lemons, limes, and to a lesser degree oranges. Sour is evolutionarily significant as it is a sign for a food that may have gone rancid due to bacteria.[18] Many foods, however, are slightly acidic, and help stimulate the taste buds and enhance flavor.",
"score": 1
},
{
"text": "Saltiness is the taste of alkali metal ions such as sodium and potassium. It is found in almost every food in low to moderate proportions to enhance flavor, although to eat pure salt is regarded as highly unpleasant. There are many different types of salt, with each having a different degree of saltiness, including sea salt, fleur de sel, kosher salt, mined salt, and grey salt. Other than enhancing flavor, its significance is that the body needs and maintains a delicate electrolyte balance, which is the kidney's function. Salt may be iodized, meaning iodine has been added to it, a necessary nutrient that promotes thyroid function. Some canned foods, notably soups or packaged broths, tend to be high in salt as a means of preserving the food longer. Historically speaking, salt has been used as a meat preservative as salt promotes water excretion, thus working as a preservative. Similarly, dried foods also promote food safety.[19] Bitter Bitterness is a sensation often considered unpleasant characterized by having a sharp, pungent taste. Dark, unsweetened chocolate, caffeine, lemon rind, and some types of fruit are known to be bitter. Umami Also named as Savoury. Umami, the Japanese word for delicious, is the least known in Western popular culture but has a long tradition in Asian cuisine. Umami is the taste of glutamates, especially monosodium glutamate (MSG).[16] It is characterized as savory, meaty, and rich in flavor. Salmon and mushrooms are foods high in umami. Meat and other animal byproducts are described as having this taste.[citation needed]",
"score": 2
}
]
...@@ -101,12 +101,6 @@ LOGGING = { ...@@ -101,12 +101,6 @@ LOGGING = {
# Store uploaded files in a dev-specific directory # Store uploaded files in a dev-specific directory
MEDIA_ROOT = os.path.join(BASE_DIR, 'storage/dev') MEDIA_ROOT = os.path.join(BASE_DIR, 'storage/dev')
# AI algorithm configuration
ORA2_AI_ALGORITHMS = {
'fake': 'openassessment.assessment.worker.algorithm.FakeAIAlgorithm',
'ease': 'openassessment.assessment.worker.algorithm.EaseAIAlgorithm'
}
# Celery Broker # Celery Broker
CELERY_BROKER_TRANSPORT = "amqp" CELERY_BROKER_TRANSPORT = "amqp"
CELERY_BROKER_HOSTNAME = "localhost:5672//" CELERY_BROKER_HOSTNAME = "localhost:5672//"
......
...@@ -113,12 +113,6 @@ LOGGING = { ...@@ -113,12 +113,6 @@ LOGGING = {
# Store uploaded files in a dev-specific directory # Store uploaded files in a dev-specific directory
MEDIA_ROOT = os.path.join(BASE_DIR, 'storage/dev') MEDIA_ROOT = os.path.join(BASE_DIR, 'storage/dev')
# AI algorithm configuration
ORA2_AI_ALGORITHMS = {
'fake': 'openassessment.assessment.worker.algorithm.FakeAIAlgorithm',
'ease': 'openassessment.assessment.worker.algorithm.EaseAIAlgorithm'
}
# Celery Broker # Celery Broker
CELERY_BROKER_TRANSPORT = "amqp" CELERY_BROKER_TRANSPORT = "amqp"
CELERY_BROKER_HOSTNAME = "localhost:5672//" CELERY_BROKER_HOSTNAME = "localhost:5672//"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment