Commit 68b9325c by Will Daly

Avoid duplicate work when grading with EASE

Add script to benchmark EASE.
parent 3cbc8fd8
......@@ -49,7 +49,7 @@ class StubAIAlgorithm(AIAlgorithm):
classifier['score_override'] = 0
return classifier
def score(self, text, classifier):
def score(self, text, classifier, cache):
"""
Stub implementation that returns whatever scores were
provided in the serialized classifier data.
......
......@@ -47,8 +47,9 @@ class AIAlgorithmTest(CacheResetTest):
list of int: The scores
"""
cache = {}
return [
self.algorithm.score(input_essay, classifier)
self.algorithm.score(input_essay, classifier, cache)
for input_essay in input_essays
]
......@@ -67,11 +68,11 @@ class FakeAIAlgorithmTest(AIAlgorithmTest):
def test_score_classifier_missing_key(self):
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test input", dict())
self.algorithm.score(u"Test input", {}, {})
def test_score_classifier_no_scores(self):
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test input", {'scores': []})
self.algorithm.score(u"Test input", {'scores': []}, {})
# Try to import EASE -- if we can't, then skip the tests that require it
......@@ -126,8 +127,8 @@ class EaseAIAlgorithmTest(AIAlgorithmTest):
mock_pickle.loads.side_effect = Exception("Test error!")
classifier = self.algorithm.train_classifier(EXAMPLES)
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test ëṡṡäÿ", classifier)
self.algorithm.score(u"Test ëṡṡäÿ", classifier, {})
def test_serialized_classifier_not_a_dict(self):
with self.assertRaises(InvalidClassifier):
self.algorithm.score(u"Test ëṡṡäÿ", "not a dict")
self.algorithm.score(u"Test ëṡṡäÿ", "not a dict", {})
......@@ -31,7 +31,7 @@ class StubAIAlgorithm(AIAlgorithm):
def train_classifier(self, examples):
return {}
def score(self, text, classifier):
def score(self, text, classifier, cache):
return 0
......@@ -42,7 +42,7 @@ class ErrorStubAIAlgorithm(AIAlgorithm):
def train_classifier(self, examples):
raise TrainingError("Test error!")
def score(self, text, classifier):
def score(self, text, classifier, cache):
raise ScoreError("Test error!")
......@@ -55,7 +55,7 @@ class InvalidScoreAlgorithm(AIAlgorithm):
def train_classifier(self, examples):
return {}
def score(self, text, classifier):
def score(self, text, classifier, cache):
return self.SCORE_CYCLE.next()
......
......@@ -89,7 +89,7 @@ class AIAlgorithm(object):
pass
@abstractmethod
def score(self, text, classifier):
def score(self, text, classifier, cache):
"""
Score an essay using a classifier.
......@@ -97,6 +97,8 @@ class AIAlgorithm(object):
text (unicode): The text to classify.
classifier (JSON-serializable): A classifier, using the same format
as `train_classifier()`.
cache (dict): An in-memory cache that persists until all criteria
in the rubric have been scored.
Raises:
InvalidClassifier: The provided classifier cannot be used by this algorithm.
......@@ -149,7 +151,7 @@ class FakeAIAlgorithm(AIAlgorithm):
unique_sorted_scores = sorted(list(set(example.score for example in examples)))
return {'scores': unique_sorted_scores}
def score(self, text, classifier):
def score(self, text, classifier, cache):
"""
Choose a score for the essay deterministically based on its length.
"""
......@@ -194,13 +196,15 @@ class EaseAIAlgorithm(AIAlgorithm):
feature_ext, classifier = self._train_classifiers(examples)
return self._serialize_classifiers(feature_ext, classifier)
def score(self, text, classifier):
def score(self, text, classifier, cache):
"""
Score essays using EASE.
Args:
text (unicode): The essay text to score.
classifier (dict): The serialized classifiers created during training.
cache (dict): An in-memory cache that persists until all criteria
in the rubric have been scored.
Returns:
int
......@@ -211,26 +215,36 @@ class EaseAIAlgorithm(AIAlgorithm):
"""
try:
from ease.grade import grade # pylint:disable=F0401
from ease.essay_set import EssaySet # pylint:disable=F0401
except ImportError:
msg = u"Could not import EASE to grade essays."
raise ScoreError(msg)
feature_extractor, score_classifier = self._deserialize_classifiers(classifier)
grader_input = {
'model': score_classifier,
'extractor': feature_extractor,
'prompt': ''
}
# EASE apparently can't handle non-ASCII unicode in the submission text
# (although, oddly, training runs without error)
# So we need to sanitize the input.
sanitized_text = text.encode('ascii', 'ignore')
# The following is a modified version of `ease.grade.grade()`,
# skipping things we don't use (cross-validation, feedback)
# and caching essay sets across criteria. This allows us to
# avoid some expensive NLTK operations, particularly tagging
# parts of speech.
try:
results = grade(grader_input, sanitized_text)
# Get the essay set from the cache or create it.
# Since all essays to be graded are assigned a dummy
# score of "0", we can safely re-use the essay set
# for each criterion in the rubric.
# EASE can't handle non-ASCII unicode, so we need
# to strip out non-ASCII chars.
essay_set = cache.get('grading_essay_set')
if essay_set is None:
essay_set = EssaySet(essaytype="test")
essay_set.add_essay(text.encode('ascii', 'ignore'), 0)
cache['grading_essay_set'] = essay_set
# Extract features from the text
features = feature_extractor.gen_feats(essay_set)
# Predict a score
return int(score_classifier.predict(features)[0])
except:
msg = (
u"An unexpected error occurred while using "
......@@ -238,20 +252,6 @@ class EaseAIAlgorithm(AIAlgorithm):
).format(traceback=traceback.format_exc())
raise ScoreError(msg)
if not results.get('success', False):
msg = (
u"Errors occurred while scoring an essay "
u"using EASE: {errors}"
).format(errors=results.get('errors', []))
raise ScoreError(msg)
score = results.get('score')
if score is None:
msg = u"Error retrieving the score from EASE"
raise ScoreError(msg)
return score
def _train_classifiers(self, examples):
"""
Use EASE to train classifiers.
......
......@@ -13,7 +13,7 @@ from openassessment.assessment.errors import (
AIError, AIGradingInternalError, AIReschedulingInternalError, ANTICIPATED_CELERY_ERRORS
)
from .algorithm import AIAlgorithm, AIAlgorithmError
from openassessment.assessment.models.ai import AIClassifierSet, AIGradingWorkflow
from openassessment.assessment.models.ai import AIGradingWorkflow
MAX_RETRIES = 2
......@@ -92,10 +92,13 @@ def grade_essay(workflow_uuid):
raise grade_essay.retry()
# Use the algorithm to evaluate the essay for each criterion
# Provide an in-memory cache so the algorithm can re-use
# results for multiple rubric criteria.
try:
cache = dict()
scores_by_criterion = {
criterion_name: _closest_valid_score(
algorithm.score(essay_text, classifier),
algorithm.score(essay_text, classifier, cache),
valid_scores[criterion_name]
)
for criterion_name, classifier in classifier_set.iteritems()
......
#!/usr/bin/env python
"""
Benchmark the execution time of the EASE algorithm for scoring essays.
"""
import os
import json
import time
import contextlib
from openassessment.assessment.worker.algorithm import AIAlgorithm, EaseAIAlgorithm
NUM_TRIALS = 3
NUM_CRITERIA = 10
DATA_FILE_PATH = os.path.abspath(
os.path.join(
os.path.dirname(__file__),
'data/ai-test-data.json'
)
)
@contextlib.contextmanager
def benchmark(name):
"""
Print the duration in seconds for a block of code.
Args:
name (unicode): A descriptive name for the benchmark
Returns:
None
Yields:
None
"""
start = time.clock()
yield
end = time.clock()
duration = end - start
print u"{name} took {duration} seconds".format(name=name, duration=duration)
def load_training_data(data_path):
"""
Load the example essays and scores.
Args:
data_path (unicode): The path to the JSON data file.
This should be a serialized list of dictionaries
with keys 'text' (unicode) and 'score' (int).
Returns:
list of `AIAlgorithm.ExampleEssay`s
"""
print "Loading training data..."
with open(data_path) as data_file:
input_examples = json.load(data_file)
print "Done."
return [
AIAlgorithm.ExampleEssay(
text=example['text'],
score=int(example['score'])
)
for example in input_examples
]
def main():
"""
Time training/scoring using EASE.
"""
examples = load_training_data(DATA_FILE_PATH)
algorithm = EaseAIAlgorithm()
print "Training classifier..."
with benchmark('Training'):
classifier = algorithm.train_classifier(examples[:-1])
print "Done."
print u"Scoring essays ({num} criteria)...".format(num=NUM_CRITERIA)
for num in range(NUM_TRIALS):
cache = {}
with benchmark('Scoring (rubric)'):
for _ in range(NUM_CRITERIA):
with benchmark('Scoring (criteria)'):
algorithm.score(examples[-1].text, classifier, cache)
print "Finished scoring essay #{num}".format(num=num)
if __name__ == "__main__":
main()
[
{
"text": "Food is any substance[1] consumed to provide nutritional support for the body. It is usually of plant or animal origin, and contains essential nutrients, such as carbohydrates, fats, proteins, vitamins, or minerals. The substance is ingested by an organism and assimilated by the organism's cells in an effort to produce energy, maintain life, or stimulate growth. Historically, people secured food through two methods: hunting and gathering, and agriculture. Today, most of the food energy consumed by the world population is supplied by the food industry. Food safety and food security are monitored by agencies like the International Association for Food Protection, World Resources Institute, World Food Programme, Food and Agriculture Organization, and International Food Information Council. They address issues such as sustainability, biological diversity, climate change, nutritional economics, population growth, water supply, and access to food.",
"score": 0
},
{
"text": "Most food has its origin in plants. Some food is obtained directly from plants; but even animals that are used as food sources are raised by feeding them food derived from plants. Cereal grain is a staple food that provides more food energy worldwide than any other type of crop. Maize, wheat, and rice – in all of their varieties – account for 87% of all grain production worldwide.[2] Most of the grain that is produced worldwide is fed to livestock. Some foods not from animal or plant sources include various edible fungi, especially mushrooms. Fungi and ambient bacteria are used in the preparation of fermented and pickled foods like leavened bread, alcoholic drinks, cheese, pickles, kombucha, and yogurt. Another example is blue-green algae such as Spirulina.[3] Inorganic substances such as salt, baking soda and cream of tartar are used to preserve or chemically alter an ingredient.",
"score": 1
},
{
"text": "Many plants or plant parts are eaten as food. There are around 2,000 plant species which are cultivated for food, and many have several distinct cultivars.[4] Seeds of plants are a good source of food for animals, including humans, because they contain the nutrients necessary for the plant's initial growth, including many healthful fats, such as Omega fats. In fact, the majority of food consumed by human beings are seed-based foods. Edible seeds include cereals (maize, wheat, rice, et cetera), legumes (beans, peas, lentils, et cetera), and nuts. Oilseeds are often pressed to produce rich oils - sunflower, flaxseed, rapeseed (including canola oil), sesame, et cetera.[5] Seeds are typically high in unsaturated fats and, in moderation, are considered a health food, although not all seeds are edible. Large seeds, such as those from a lemon, pose a choking hazard, while seeds from cherries and apples contain cyanide which could be poisonous only if consumed in large volumes.[6] Fruits are the ripened ovaries of plants, including the seeds within. Many plants and animals have coevolved such that the fruits of the former are an attractive food source to the latter, because animals that eat the fruits may excrete the seeds some distance away. Fruits, therefore, make up a significant part of the diets of most cultures. Some botanical fruits, such as tomatoes, pumpkins, and eggplants, are eaten as vegetables.[7] (For more information, see list of fruits.) Vegetables are a second type of plant matter that is commonly eaten as food. These include root vegetables (potatoes and carrots), bulbs (onion family), leaf vegetables (spinach and lettuce), stem vegetables (bamboo shoots and asparagus), and inflorescence vegetables (globe artichokes and broccoli and other vegetables such as cabbage or cauliflower).[8]",
"score": 0
},
{
"text": "Animals are used as food either directly or indirectly by the products they produce. Meat is an example of a direct product taken from an animal, which comes from muscle systems or from organs. Various raw meats Food products produced by animals include milk produced by mammary glands, which in many cultures is drunk or processed into dairy products (cheese, butter, etc.). In addition, birds and other animals lay eggs, which are often eaten, and bees produce honey, a reduced nectar from flowers, which is a popular sweetener in many cultures. Some cultures consume blood, sometimes in the form of blood sausage, as a thickener for sauces, or in a cured, salted form for times of food scarcity, and others use blood in stews such as jugged hare.[9] Some cultures and people do not consume meat or animal food products for cultural, dietary, health, ethical, or ideological reasons. Vegetarians choose to forgo food from animal sources to varying degrees. Vegans do not consume any foods that are or contain ingredients from an animal source.",
"score": 2
},
{
"text": "Most food has always been obtained through agriculture. With increasing concern over both the methods and products of modern industrial agriculture, there has been a growing trend toward sustainable agricultural practices. This approach, partly fueled by consumer demand, encourages biodiversity, local self-reliance and organic farming methods.[10] Major influences on food production include international organizations (e.g. the World Trade Organization and Common Agricultural Policy), national government policy (or law), and war.[11] In popular culture, the mass production of food, specifically meats such as chicken and beef, has come under fire from various documentaries, most recently Food, Inc, documenting the mass slaughter and poor treatment of animals, often for easier revenues from large corporations. Along with a current trend towards environmentalism, people in Western culture have had an increasing trend towards the use of herbal supplements, foods for a specific group of person (such as dieters, women, or athletes), functional foods (fortified foods, such as omega-3 eggs), and a more ethnically diverse diet.[12] Several organisations have begun calling for a new kind of agriculture in which agroecosystems provide food but also support vital ecosystem services so that soil fertility and biodiversity are maintained rather than compromised. According to the International Water Management Institute and UNEP, well-managed agroecosystems not only provide food, fiber and animal products, they also provide services such as flood mitigation, groundwater recharge, erosion control and habitats for plants, birds fish and other animals.[13]",
"score": 3
},
{
"text": "Generally regarded as the most pleasant taste, sweetness is almost always caused by a type of simple sugar such as glucose or fructose, or disaccharides such as sucrose, a molecule combining glucose and fructose.[16] Complex carbohydrates are long chains and thus do not have the sweet taste. Artificial sweeteners such as sucralose are used to mimic the sugar molecule, creating the sensation of sweet, without the calories. Other types of sugar include raw sugar, which is known for its amber color, as it is unprocessed. As sugar is vital for energy and survival, the taste of sugar is pleasant. The stevia plant contains a compound known as steviol which, when extracted, has 300 times the sweetness of sugar while having minimal impact on blood sugar.[17] Sour Sourness is caused by the taste of acids, such as vinegar in alcoholic beverages. Sour foods include citrus, specifically lemons, limes, and to a lesser degree oranges. Sour is evolutionarily significant as it is a sign for a food that may have gone rancid due to bacteria.[18] Many foods, however, are slightly acidic, and help stimulate the taste buds and enhance flavor.",
"score": 1
},
{
"text": "Saltiness is the taste of alkali metal ions such as sodium and potassium. It is found in almost every food in low to moderate proportions to enhance flavor, although to eat pure salt is regarded as highly unpleasant. There are many different types of salt, with each having a different degree of saltiness, including sea salt, fleur de sel, kosher salt, mined salt, and grey salt. Other than enhancing flavor, its significance is that the body needs and maintains a delicate electrolyte balance, which is the kidney's function. Salt may be iodized, meaning iodine has been added to it, a necessary nutrient that promotes thyroid function. Some canned foods, notably soups or packaged broths, tend to be high in salt as a means of preserving the food longer. Historically speaking, salt has been used as a meat preservative as salt promotes water excretion, thus working as a preservative. Similarly, dried foods also promote food safety.[19] Bitter Bitterness is a sensation often considered unpleasant characterized by having a sharp, pungent taste. Dark, unsweetened chocolate, caffeine, lemon rind, and some types of fruit are known to be bitter. Umami Also named as Savoury. Umami, the Japanese word for delicious, is the least known in Western popular culture but has a long tradition in Asian cuisine. Umami is the taste of glutamates, especially monosodium glutamate (MSG).[16] It is characterized as savory, meaty, and rich in flavor. Salmon and mushrooms are foods high in umami. Meat and other animal byproducts are described as having this taste.[citation needed]",
"score": 2
}
]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment