Added fake AI Algorithm; add EASE wrapper for AI training

0749ce1d · Will Daly · 6883bfc0 · 0749ce1d · 0749ce1d · 0749ce1d
Commit 0749ce1d authored May 21, 2014 by Will Daly
Showing with 377 additions and 5 deletions

Vagrantfile
+1 -1

apps/openassessment/assessment/tasks.py
+6 -0

apps/openassessment/assessment/test/test_ai_algorithm.py
+133 -0

apps/openassessment/assessment/worker/algorithm.py
+232 -4

settings/dev.py
+5 -0

No files found.
--- a/Vagrantfile
+++ b/Vagrantfile
@@ -69,7 +69,7 @@ LOGIN_SCRIPT
    echo "Downloading NLTK corpus..."
    mkdir -p /home/vagrant/data
    curl -o /home/vagrant/data/nltk.tmp.tar.tz http://edx-static.s3.amazonaws.com/nltk/nltk-data-20131113.tar.gz
-    tar zxf /home/vagrant/data/nltk.tmp.tar.tz
+    cd /home/vagrant/data && tar zxf /home/vagrant/data/nltk.tmp.tar.tz
    echo "Install edx-ora2..."
    cd /home/vagrant/edx-ora2 && ./scripts/install.sh

--- a/apps/openassessment/assessment/tasks.py
+++ b/apps/openassessment/assessment/tasks.py
+"""
+Celery looks for tasks in this module,
+so import the tasks we want the workers to implement.
+"""
+# pylint:disable=W0611
+from .worker.training import train_classifiers
--- a/apps/openassessment/assessment/test/test_ai_algorithm.py
+++ b/apps/openassessment/assessment/test/test_ai_algorithm.py
+# coding=utf-8
+"""
+Tests for AI algorithm implementations.
+"""
+import unittest
+import mock
+from openassessment.test_utils import CacheResetTest
+from openassessment.assessment.worker.algorithm import (
+    AIAlgorithm, FakeAIAlgorithm, EaseAIAlgorithm,
+    TrainingError, InvalidClassifier
+)
+EXAMPLES = [
+    AIAlgorithm.ExampleEssay(u"Mine's a tale that can't be told, my ƒяєє∂σм I hold dear.", 2),
+    AIAlgorithm.ExampleEssay(u"How years ago in days of old, when 𝒎𝒂𝒈𝒊𝒄 filled th air.", 1),
+    AIAlgorithm.ExampleEssay(u"Ṫ'ẅäṡ in the darkest depths of Ṁöṛḋöṛ, I met a girl so fair.", 1),
+    AIAlgorithm.ExampleEssay(u"But goﾚﾚuﾶ, and the evil one crept up and slipped away with her", 0),
+    AIAlgorithm.ExampleEssay(u"", 4)
+]
+INPUT_ESSAYS = [
+    u"Good times, 𝑩𝒂𝒅 𝑻𝒊𝒎𝒆𝒔, you know I had my share",
+    u"When my woman left home for a 𝒃𝒓𝒐𝒘𝒏 𝒆𝒚𝒆𝒅 𝒎𝒂𝒏",
+    u"Well, I still don't seem to 𝒄𝒂𝒓𝒆",
+    u""
+]
+class AIAlgorithmTest(CacheResetTest):
+    """
+    Base class for testing AI algorithm implementations.
+    """
+    ALGORITHM_CLASS = None
+    def setUp(self):
+        self.algorithm = self.ALGORITHM_CLASS()   # pylint:disable=E1102
+    def _scores(self, classifier, input_essays):
+        """
+        Use the classifier to score multiple input essays.
+        Args:
+            input_essays (list of unicode): The essays to score.
+        Returns:
+            list of int: The scores
+        """
+        return [
+            self.algorithm.score(input_essay, classifier)
+            for input_essay in input_essays
+        ]
+class FakeAIAlgorithmTest(AIAlgorithmTest):
+    """
+    Test for the fake AI algorithm implementation.
+    """
+    ALGORITHM_CLASS = FakeAIAlgorithm
+    def test_train_and_score(self):
+        classifier = self.algorithm.train_classifier(EXAMPLES)
+        expected_scores = [2, 0, 0, 0]
+        scores = self._scores(classifier, INPUT_ESSAYS)
+        self.assertEqual(scores, expected_scores)
+    def test_score_classifier_missing_key(self):
+        with self.assertRaises(InvalidClassifier):
+            self.algorithm.score(u"Test input", dict())
+    def test_score_classifier_no_scores(self):
+        with self.assertRaises(InvalidClassifier):
+            self.algorithm.score(u"Test input", {'scores': []})
+# Try to import EASE -- if we can't, then skip the tests that require it
+try:
+    import ease # pylint: disable=F0401,W0611
+    EASE_INSTALLED = True
+except ImportError:
+    EASE_INSTALLED = False
+@unittest.skipUnless(EASE_INSTALLED, "EASE library required")
+class EaseAIAlgorithmTest(AIAlgorithmTest):
+    """
+    Test for the EASE AI library wrapper.
+    """
+    ALGORITHM_CLASS = EaseAIAlgorithm
+    def test_train_and_score(self):
+        classifier = self.algorithm.train_classifier(EXAMPLES)
+        scores = self._scores(classifier, INPUT_ESSAYS)
+        # Check that we got scores in the correct range
+        valid_scores = set(example.score for example in EXAMPLES)
+        for score in scores:
+            self.assertIn(score, valid_scores)
+        # Check that the scores are consistent when we re-run the algorithm
+        repeat_scores = self._scores(classifier, INPUT_ESSAYS)
+        self.assertEqual(scores, repeat_scores)
+    def test_all_examples_have_same_score(self):
+        examples = [
+            AIAlgorithm.ExampleEssay(u"Test ëṡṡäÿ", 1),
+            AIAlgorithm.ExampleEssay(u"Another test ëṡṡäÿ", 1),
+        ]
+        # No assertion -- just verifying that this does not raise an exception
+        classifier = self.algorithm.train_classifier(examples)
+        self._scores(classifier, INPUT_ESSAYS)
+    def test_no_examples(self):
+        with self.assertRaises(TrainingError):
+            self.algorithm.train_classifier([])
+    @mock.patch('openassessment.assessment.worker.algorithm.pickle')
+    def test_pickle_serialize_error(self, mock_pickle):
+        mock_pickle.dumps.side_effect = Exception("Test error!")
+        with self.assertRaises(TrainingError):
+            self.algorithm.train_classifier(EXAMPLES)
+    @mock.patch('openassessment.assessment.worker.algorithm.pickle')
+    def test_pickle_deserialize_error(self, mock_pickle):
+        mock_pickle.loads.side_effect = Exception("Test error!")
+        classifier = self.algorithm.train_classifier(EXAMPLES)
+        with self.assertRaises(InvalidClassifier):
+            self.algorithm.score(u"Test ëṡṡäÿ", classifier)
+    def test_serialized_classifier_not_a_dict(self):
+        with self.assertRaises(InvalidClassifier):
+            self.algorithm.score(u"Test ëṡṡäÿ", "not a dict")
--- a/apps/openassessment/assessment/worker/algorithm.py
+++ b/apps/openassessment/assessment/worker/algorithm.py
@@ -4,6 +4,8 @@ Define the ML algorithms used to train text classifiers.
 from abc import ABCMeta, abstractmethod
 from collections import namedtuple
 import importlib
+import traceback
+import pickle
 from django.conf import settings
@@ -20,7 +22,7 @@ class UnknownAlgorithm(AIAlgorithmError):
    Algorithm ID not found in the configuration.
    """
    def __init__(self, algorithm_id):
-        msg = u"Could not find algorithm \"u{}\" in the configuration.".format(algorithm_id)
+        msg = u"Could not find algorithm \"{}\" in the configuration.".format(algorithm_id)
        super(UnknownAlgorithm, self).__init__(msg)
@@ -56,7 +58,6 @@ class InvalidClassifier(ScoreError):
    pass
 class AIAlgorithm(object):
    """
    Abstract base class for a supervised ML text classification algorithm.
@@ -79,8 +80,7 @@ class AIAlgorithm(object):
            examples (list of AIAlgorithm.ExampleEssay): Example essays and scores.
        Returns:
-            JSON-serializable: The trained classifier.  This MUST be JSON-serializable;
+            JSON-serializable: The trained classifier.  This MUST be JSON-serializable.
-                if any of the classifier data is binary, it should be base-64 encoded.
        Raises:
            TrainingError: The classifier could not be trained successfully.
@@ -133,3 +133,231 @@ class AIAlgorithm(object):
                return algorithm_cls()
            except (ImportError, AttributeError):
                raise AlgorithmLoadError(algorithm_id, cls_path)
+class FakeAIAlgorithm(AIAlgorithm):
+    """
+    Fake AI algorithm implementation that assigns scores randomly.
+    We use this for testing the pipeline independently of EASE.
+    """
+    def train_classifier(self, examples):
+        """
+        Store the possible score labels, which will allow
+        us to deterministically choose scores for other essays.
+        """
+        unique_sorted_scores = sorted(list(set(example.score for example in examples)))
+        return {'scores': unique_sorted_scores}
+    def score(self, text, classifier):
+        """
+        Choose a score for the essay deterministically based on its length.
+        """
+        if 'scores' not in classifier or len(classifier['scores']) == 0:
+            raise InvalidClassifier("Classifier must provide score labels")
+        else:
+            score_index = len(text) % len(classifier['scores'])
+            return classifier['scores'][score_index]
+class EaseAIAlgorithm(AIAlgorithm):
+    """
+    Wrapper for the EASE library.
+    See https://github.com/edx/ease for more information.
+    Since EASE has many system dependencies, we don't include it explicitly
+    in edx-ora2 requirements.  When testing locally, we use the fake
+    algorithm implementation instead.
+    """
+    def train_classifier(self, examples):
+        """
+        Train a text classifier using the EASE library.
+        The classifier is serialized as a dictionary with keys:
+            * 'feature_extractor': The pickled feature extractor (transforms text into a numeric feature vector).
+            * 'score_classifier': The pickled classifier (uses the feature vector to assign scores to essays).
+        Because we are using `pickle`, the serialized classifiers are unfortunately
+        tied to the particular version of ease/scikit-learn/numpy/scipy/nltk that we
+        have installed at the time of training.
+        Args:
+            examples (list of AIAlgorithm.ExampleEssay): Example essays and scores.
+        Returns:
+            dict: The serializable classifier.
+        Raises:
+            TrainingError: The classifier could not be trained successfully.
+        """
+        feature_ext, classifier = self._train_classifiers(examples)
+        return self._serialize_classifiers(feature_ext, classifier)
+    def score(self, text, classifier):
+        """
+        Score essays using EASE.
+        Args:
+            text (unicode): The essay text to score.
+            classifier (dict): The serialized classifiers created during training.
+        Returns:
+            int
+        Raises:
+            InvalidClassifier
+            ScoreError
+        """
+        try:
+            from ease.grade import grade    # pylint:disable=F0401
+        except ImportError:
+            msg = u"Could not import EASE to grade essays."
+            raise ScoreError(msg)
+        feature_extractor, score_classifier = self._deserialize_classifiers(classifier)
+        grader_input = {
+            'model': score_classifier,
+            'extractor': feature_extractor,
+            'prompt': ''
+        }
+        # EASE apparently can't handle non-ASCII unicode in the submission text
+        # (although, oddly, training runs without error)
+        # So we need to sanitize the input.
+        sanitized_text = text.encode('ascii', 'ignore')
+        try:
+            results = grade(grader_input, sanitized_text)
+        except:
+            msg = (
+                u"An unexpected error occurred while using "
+                u"EASE to score an essay: {traceback}"
+            ).format(traceback=traceback.format_exc())
+            raise ScoreError(msg)
+        if not results.get('success', False):
+            msg = (
+                u"Errors occurred while scoring an essay "
+                u"using EASE: {errors}"
+            ).format(errors=results.get('errors', []))
+            raise ScoreError(msg)
+        score = results.get('score')
+        if score is None:
+            msg = u"Error retrieving the score from EASE"
+            raise ScoreError(msg)
+        return score
+    def _train_classifiers(self, examples):
+        """
+        Use EASE to train classifiers.
+        Args:
+            examples (list of AIAlgorithm.ExampleEssay): Example essays and scores.
+        Returns:
+            tuple of `feature_extractor` (an `ease.feature_extractor.FeatureExtractor` object)
+            and `classifier` (a `sklearn.ensemble.GradientBoostingClassifier` object).
+        Raises:
+            TrainingError: Could not load EASE or could not complete training.
+        """
+        try:
+            from ease.create import create  # pylint: disable=F0401
+        except ImportError:
+            msg = u"Could not import EASE to perform training."
+            raise TrainingError(msg)
+        input_essays = [example.text for example in examples]
+        input_scores = [example.score for example in examples]
+        try:
+            # Train the classifiers
+            # The third argument is the essay prompt, which EASE uses
+            # to check if an input essay is too similar to the prompt.
+            # Since we're not using this feature, we pass in an empty string.
+            results = create(input_essays, input_scores, "")
+        except:
+            msg = (
+                u"An unexpected error occurred while using "
+                u"EASE to train classifiers: {traceback}"
+            ).format(traceback=traceback.format_exc())
+            raise TrainingError(msg)
+        if not results.get('success', False):
+            msg = (
+                u"Errors occurred while training classifiers "
+                u"using EASE: {errors}"
+            ).format(errors=results.get('errors', []))
+            raise TrainingError(msg)
+        return results.get('feature_ext'), results.get('classifier')
+    def _serialize_classifiers(self, feature_ext, classifier):
+        """
+        Serialize the classifier objects.
+        Args:
+            feature_extractor (ease.feature_extractor.FeatureExtractor)
+            classifier (sklearn.ensemble.GradientBoostingClassifier)
+        Returns:
+            dict containing the pickled classifiers
+        Raises:
+            TrainingError: Could not serialize the classifiers.
+        """
+        try:
+            return {
+                'feature_extractor': pickle.dumps(feature_ext),
+                'score_classifier': pickle.dumps(classifier),
+            }
+        except Exception as ex:
+            msg = (
+                u"An error occurred while serializing the classifiers "
+                u"created by EASE: {ex}"
+            ).format(ex=ex)
+            raise TrainingError(msg)
+    def _deserialize_classifiers(self, classifier_data):
+        """
+        Deserialize the classifier objects.
+        Args:
+            classifier_data (dict): The serialized classifiers.
+        Returns:
+            tuple of `(feature_extractor, score_classifier)`
+        Raises:
+            InvalidClassifier
+        """
+        if not isinstance(classifier_data, dict):
+            raise InvalidClassifier("Classifier must be a dictionary.")
+        try:
+            feature_extractor = pickle.loads(classifier_data.get('feature_extractor'))
+        except Exception as ex:
+            msg = (
+                u"An error occurred while deserializing the "
+                u"EASE feature extractor: {ex}"
+            ).format(ex=ex)
+            raise InvalidClassifier(msg)
+        try:
+            score_classifier = pickle.loads(classifier_data.get('score_classifier'))
+        except Exception as ex:
+            msg = (
+                u"An error occurred while deserializing the "
+                u"EASE score classifier: {ex}"
+            ).format(ex=ex)
+            raise InvalidClassifier(msg)
+        return feature_extractor, score_classifier
--- a/settings/dev.py
+++ b/settings/dev.py
@@ -100,3 +100,8 @@ LOGGING = {
 # Store uploaded files in a dev-specific directory
 MEDIA_ROOT = os.path.join(BASE_DIR, 'storage/dev')
+# AI algorithm configuration
+ORA2_AI_ALGORITHMS = {
+    'fake': 'openassessment.assessment.worker.algorithm.FakeAIAlgorithm'
+}