Commit 0749ce1d by Will Daly

Added fake AI Algorithm; add EASE wrapper for AI training

parent 6883bfc0
......@@ -69,7 +69,7 @@ LOGIN_SCRIPT
echo "Downloading NLTK corpus..."
mkdir -p /home/vagrant/data
curl -o /home/vagrant/data/nltk.tmp.tar.tz http://edx-static.s3.amazonaws.com/nltk/nltk-data-20131113.tar.gz
tar zxf /home/vagrant/data/nltk.tmp.tar.tz
cd /home/vagrant/data && tar zxf /home/vagrant/data/nltk.tmp.tar.tz
echo "Install edx-ora2..."
cd /home/vagrant/edx-ora2 && ./scripts/install.sh
......
"""
Celery looks for tasks in this module,
so import the tasks we want the workers to implement.
"""
# pylint:disable=W0611
from .worker.training import train_classifiers
# coding=utf-8
"""
Tests for AI algorithm implementations.
"""
import unittest
import mock
from openassessment.test_utils import CacheResetTest
from openassessment.assessment.worker.algorithm import (
AIAlgorithm, FakeAIAlgorithm, EaseAIAlgorithm,
TrainingError, InvalidClassifier
)
EXAMPLES = [
AIAlgorithm.ExampleEssay(u"Mine's a tale that can't be told, my ƒяєє∂σм I hold dear.", 2),
AIAlgorithm.ExampleEssay(u"How years ago in days of old, when 𝒎𝒂𝒈𝒊𝒄 filled th air.", 1),
AIAlgorithm.ExampleEssay(u"Ṫ'ẅäṡ in the darkest depths of Ṁöṛḋöṛ, I met a girl so fair.", 1),
AIAlgorithm.ExampleEssay(u"But goレレuᄊ, and the evil one crept up and slipped away with her", 0),
AIAlgorithm.ExampleEssay(u"", 4)
]
INPUT_ESSAYS = [
u"Good times, 𝑩𝒂𝒅 𝑻𝒊𝒎𝒆𝒔, you know I had my share",
u"When my woman left home for a 𝒃𝒓𝒐𝒘𝒏 𝒆𝒚𝒆𝒅 𝒎𝒂𝒏",
u"Well, I still don't seem to 𝒄𝒂𝒓𝒆",
u""
]
class AIAlgorithmTest(CacheResetTest):
"""
Base class for testing AI algorithm implementations.
"""
ALGORITHM_CLASS = None
def setUp(self):
self.algorithm = self.ALGORITHM_CLASS() # pylint:disable=E1102
def _scores(self, classifier, input_essays):
"""
Use the classifier to score multiple input essays.
Args:
input_essays (list of unicode): The essays to score.
Returns:
list of int: The scores
"""
return [
self.algorithm.score(input_essay, classifier)
for input_essay in input_essays
]
class FakeAIAlgorithmTest(AIAlgorithmTest):
"""
Test for the fake AI algorithm implementation.
"""
ALGORITHM_CLASS = FakeAIAlgorithm
def test_train_and_score(self):
classifier = self.algorithm.train_classifier(EXAMPLES)
expected_scores = [2, 0, 0, 0]
scores = self._scores(classifier, INPUT_ESSAYS)
self.assertEqual(scores, expected_scores)
def test_score_classifier_missing_key(self):
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test input", dict())
def test_score_classifier_no_scores(self):
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test input", {'scores': []})
# Try to import EASE -- if we can't, then skip the tests that require it
try:
import ease # pylint: disable=F0401,W0611
EASE_INSTALLED = True
except ImportError:
EASE_INSTALLED = False
@unittest.skipUnless(EASE_INSTALLED, "EASE library required")
class EaseAIAlgorithmTest(AIAlgorithmTest):
"""
Test for the EASE AI library wrapper.
"""
ALGORITHM_CLASS = EaseAIAlgorithm
def test_train_and_score(self):
classifier = self.algorithm.train_classifier(EXAMPLES)
scores = self._scores(classifier, INPUT_ESSAYS)
# Check that we got scores in the correct range
valid_scores = set(example.score for example in EXAMPLES)
for score in scores:
self.assertIn(score, valid_scores)
# Check that the scores are consistent when we re-run the algorithm
repeat_scores = self._scores(classifier, INPUT_ESSAYS)
self.assertEqual(scores, repeat_scores)
def test_all_examples_have_same_score(self):
examples = [
AIAlgorithm.ExampleEssay(u"Test ëṡṡäÿ", 1),
AIAlgorithm.ExampleEssay(u"Another test ëṡṡäÿ", 1),
]
# No assertion -- just verifying that this does not raise an exception
classifier = self.algorithm.train_classifier(examples)
self._scores(classifier, INPUT_ESSAYS)
def test_no_examples(self):
with self.assertRaises(TrainingError):
self.algorithm.train_classifier([])
@mock.patch('openassessment.assessment.worker.algorithm.pickle')
def test_pickle_serialize_error(self, mock_pickle):
mock_pickle.dumps.side_effect = Exception("Test error!")
with self.assertRaises(TrainingError):
self.algorithm.train_classifier(EXAMPLES)
@mock.patch('openassessment.assessment.worker.algorithm.pickle')
def test_pickle_deserialize_error(self, mock_pickle):
mock_pickle.loads.side_effect = Exception("Test error!")
classifier = self.algorithm.train_classifier(EXAMPLES)
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test ëṡṡäÿ", classifier)
def test_serialized_classifier_not_a_dict(self):
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test ëṡṡäÿ", "not a dict")
......@@ -4,6 +4,8 @@ Define the ML algorithms used to train text classifiers.
from abc import ABCMeta, abstractmethod
from collections import namedtuple
import importlib
import traceback
import pickle
from django.conf import settings
......@@ -20,7 +22,7 @@ class UnknownAlgorithm(AIAlgorithmError):
Algorithm ID not found in the configuration.
"""
def __init__(self, algorithm_id):
msg = u"Could not find algorithm \"u{}\" in the configuration.".format(algorithm_id)
msg = u"Could not find algorithm \"{}\" in the configuration.".format(algorithm_id)
super(UnknownAlgorithm, self).__init__(msg)
......@@ -56,7 +58,6 @@ class InvalidClassifier(ScoreError):
pass
class AIAlgorithm(object):
"""
Abstract base class for a supervised ML text classification algorithm.
......@@ -79,8 +80,7 @@ class AIAlgorithm(object):
examples (list of AIAlgorithm.ExampleEssay): Example essays and scores.
Returns:
JSON-serializable: The trained classifier. This MUST be JSON-serializable;
if any of the classifier data is binary, it should be base-64 encoded.
JSON-serializable: The trained classifier. This MUST be JSON-serializable.
Raises:
TrainingError: The classifier could not be trained successfully.
......@@ -133,3 +133,231 @@ class AIAlgorithm(object):
return algorithm_cls()
except (ImportError, AttributeError):
raise AlgorithmLoadError(algorithm_id, cls_path)
class FakeAIAlgorithm(AIAlgorithm):
"""
Fake AI algorithm implementation that assigns scores randomly.
We use this for testing the pipeline independently of EASE.
"""
def train_classifier(self, examples):
"""
Store the possible score labels, which will allow
us to deterministically choose scores for other essays.
"""
unique_sorted_scores = sorted(list(set(example.score for example in examples)))
return {'scores': unique_sorted_scores}
def score(self, text, classifier):
"""
Choose a score for the essay deterministically based on its length.
"""
if 'scores' not in classifier or len(classifier['scores']) == 0:
raise InvalidClassifier("Classifier must provide score labels")
else:
score_index = len(text) % len(classifier['scores'])
return classifier['scores'][score_index]
class EaseAIAlgorithm(AIAlgorithm):
"""
Wrapper for the EASE library.
See https://github.com/edx/ease for more information.
Since EASE has many system dependencies, we don't include it explicitly
in edx-ora2 requirements. When testing locally, we use the fake
algorithm implementation instead.
"""
def train_classifier(self, examples):
"""
Train a text classifier using the EASE library.
The classifier is serialized as a dictionary with keys:
* 'feature_extractor': The pickled feature extractor (transforms text into a numeric feature vector).
* 'score_classifier': The pickled classifier (uses the feature vector to assign scores to essays).
Because we are using `pickle`, the serialized classifiers are unfortunately
tied to the particular version of ease/scikit-learn/numpy/scipy/nltk that we
have installed at the time of training.
Args:
examples (list of AIAlgorithm.ExampleEssay): Example essays and scores.
Returns:
dict: The serializable classifier.
Raises:
TrainingError: The classifier could not be trained successfully.
"""
feature_ext, classifier = self._train_classifiers(examples)
return self._serialize_classifiers(feature_ext, classifier)
def score(self, text, classifier):
"""
Score essays using EASE.
Args:
text (unicode): The essay text to score.
classifier (dict): The serialized classifiers created during training.
Returns:
int
Raises:
InvalidClassifier
ScoreError
"""
try:
from ease.grade import grade # pylint:disable=F0401
except ImportError:
msg = u"Could not import EASE to grade essays."
raise ScoreError(msg)
feature_extractor, score_classifier = self._deserialize_classifiers(classifier)
grader_input = {
'model': score_classifier,
'extractor': feature_extractor,
'prompt': ''
}
# EASE apparently can't handle non-ASCII unicode in the submission text
# (although, oddly, training runs without error)
# So we need to sanitize the input.
sanitized_text = text.encode('ascii', 'ignore')
try:
results = grade(grader_input, sanitized_text)
except:
msg = (
u"An unexpected error occurred while using "
u"EASE to score an essay: {traceback}"
).format(traceback=traceback.format_exc())
raise ScoreError(msg)
if not results.get('success', False):
msg = (
u"Errors occurred while scoring an essay "
u"using EASE: {errors}"
).format(errors=results.get('errors', []))
raise ScoreError(msg)
score = results.get('score')
if score is None:
msg = u"Error retrieving the score from EASE"
raise ScoreError(msg)
return score
def _train_classifiers(self, examples):
"""
Use EASE to train classifiers.
Args:
examples (list of AIAlgorithm.ExampleEssay): Example essays and scores.
Returns:
tuple of `feature_extractor` (an `ease.feature_extractor.FeatureExtractor` object)
and `classifier` (a `sklearn.ensemble.GradientBoostingClassifier` object).
Raises:
TrainingError: Could not load EASE or could not complete training.
"""
try:
from ease.create import create # pylint: disable=F0401
except ImportError:
msg = u"Could not import EASE to perform training."
raise TrainingError(msg)
input_essays = [example.text for example in examples]
input_scores = [example.score for example in examples]
try:
# Train the classifiers
# The third argument is the essay prompt, which EASE uses
# to check if an input essay is too similar to the prompt.
# Since we're not using this feature, we pass in an empty string.
results = create(input_essays, input_scores, "")
except:
msg = (
u"An unexpected error occurred while using "
u"EASE to train classifiers: {traceback}"
).format(traceback=traceback.format_exc())
raise TrainingError(msg)
if not results.get('success', False):
msg = (
u"Errors occurred while training classifiers "
u"using EASE: {errors}"
).format(errors=results.get('errors', []))
raise TrainingError(msg)
return results.get('feature_ext'), results.get('classifier')
def _serialize_classifiers(self, feature_ext, classifier):
"""
Serialize the classifier objects.
Args:
feature_extractor (ease.feature_extractor.FeatureExtractor)
classifier (sklearn.ensemble.GradientBoostingClassifier)
Returns:
dict containing the pickled classifiers
Raises:
TrainingError: Could not serialize the classifiers.
"""
try:
return {
'feature_extractor': pickle.dumps(feature_ext),
'score_classifier': pickle.dumps(classifier),
}
except Exception as ex:
msg = (
u"An error occurred while serializing the classifiers "
u"created by EASE: {ex}"
).format(ex=ex)
raise TrainingError(msg)
def _deserialize_classifiers(self, classifier_data):
"""
Deserialize the classifier objects.
Args:
classifier_data (dict): The serialized classifiers.
Returns:
tuple of `(feature_extractor, score_classifier)`
Raises:
InvalidClassifier
"""
if not isinstance(classifier_data, dict):
raise InvalidClassifier("Classifier must be a dictionary.")
try:
feature_extractor = pickle.loads(classifier_data.get('feature_extractor'))
except Exception as ex:
msg = (
u"An error occurred while deserializing the "
u"EASE feature extractor: {ex}"
).format(ex=ex)
raise InvalidClassifier(msg)
try:
score_classifier = pickle.loads(classifier_data.get('score_classifier'))
except Exception as ex:
msg = (
u"An error occurred while deserializing the "
u"EASE score classifier: {ex}"
).format(ex=ex)
raise InvalidClassifier(msg)
return feature_extractor, score_classifier
......@@ -100,3 +100,8 @@ LOGGING = {
# Store uploaded files in a dev-specific directory
MEDIA_ROOT = os.path.join(BASE_DIR, 'storage/dev')
# AI algorithm configuration
ORA2_AI_ALGORITHMS = {
'fake': 'openassessment.assessment.worker.algorithm.FakeAIAlgorithm'
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment