Commit e1be348a by gradyward

Changed the grade.py file sytlisitcally, and streamlined code.

Made insignifigant changes to the create.py, and added an error to errors.py.
parent 07080d1e
...@@ -44,7 +44,7 @@ def dump_input_data(text, score): ...@@ -44,7 +44,7 @@ def dump_input_data(text, score):
def create(examples, scores, prompt_string, dump_data=False): def create(examples, scores, prompt_string, dump_data=False):
""" """
Creates a machine learning model from basic inputs (essays, associated scores and a prompt) Creates a machine learning model from basic inputs (essays, associated scores and a prompt) and trains the model.
The previous version of this function took an additional argument which specified the path to the model. The previous version of this function took an additional argument which specified the path to the model.
......
...@@ -10,3 +10,6 @@ class ExampleCreationInternalError(Exception): ...@@ -10,3 +10,6 @@ class ExampleCreationInternalError(Exception):
class EaseError(Exception): class EaseError(Exception):
pass pass
class GradingRequestError(Exception):
pass
\ No newline at end of file
...@@ -17,6 +17,7 @@ from essay_set import EssaySet ...@@ -17,6 +17,7 @@ from essay_set import EssaySet
import predictor_extractor import predictor_extractor
import predictor_set import predictor_set
import util_functions import util_functions
from errors import *
#Imports needed to unpickle grader data #Imports needed to unpickle grader data
import feature_extractor import feature_extractor
...@@ -28,88 +29,89 @@ log = logging.getLogger(__name__) ...@@ -28,88 +29,89 @@ log = logging.getLogger(__name__)
def grade(grader_data, submission): def grade(grader_data, submission):
""" """
Grades a specified submission using specified models Grades a submission given all of the feature extractor and classifier set.
grader_data - A dictionary:
{ Args:
grader_data (dict): Has the following keys
'model' : trained model, 'model' : trained model,
'extractor' : trained feature extractor, 'extractor' : trained feature extractor,
'prompt' : prompt for the question, 'prompt' : prompt for the question,
'algorithm' : algorithm for the question, 'algorithm' : algorithm for the question,
} submission (str): The student submission
submission - The student submission (string)
Returns:
(dict) with the following keys:
'errors': All of the errors that arose during the grading process.
'tests':
'score': The score the input essay was assigned by the classifier set
'feedback': The feedback given by the classifier set
'success': Whether or not the grading operation was a success
'confidence': A metric of the classifier's confidence in its result
""" """
#Initialize result dictionary # Initialize result dictionary
results = {'errors': [], 'tests': [], 'score': 0, 'feedback': "", 'success': False, 'confidence': 0} results = {'errors': [], 'tests': [], 'score': 0, 'feedback': "", 'success': False, 'confidence': 0}
has_error = False
# Instantiates the Essay set which will carry our essay while it is being classified and graded.
grader_set = EssaySet(essay_type="test") grader_set = EssaySet(essay_type="test")
feedback = {} feedback = {}
model, extractor = get_classifier_and_ext(grader_data) # Retrieves the model and extractor we will be using
model, extractor = get_classifier_and_extractor(grader_data)
#This is to preserve legacy functionality
if 'algorithm' not in grader_data:
grader_data['algorithm'] = util_functions.AlgorithmTypes.classification
# Attempts to add the essay (text) to the essay set.
try: try:
#Try to add essay to essay set object
grader_set.add_essay(str(submission), 0) grader_set.add_essay(str(submission), 0)
grader_set.update_prompt(str(grader_data['prompt'])) grader_set.update_prompt(str(grader_data['prompt']))
except Exception: except:
error_message = "Essay could not be added to essay set:{0}".format(submission) error_message = "Essay could not be added to essay set:{0}".format(submission)
log.exception(error_message) log.exception(error_message)
results['errors'].append(error_message) results['errors'].append(error_message)
has_error = True
#Try to extract features from submission and assign score via the model # Tries to extract features from submission and assign score via the model
grader_features = None
try: try:
grader_feats = extractor.gen_feats(grader_set) grader_features = extractor.gen_feats(grader_set)
feedback = extractor.gen_feedback(grader_set, grader_feats)[0] feedback = extractor.gen_feedback(grader_set, grader_features)[0]
results['score'] = int(model.predict(grader_feats)[0]) results['score'] = int(model.predict(grader_features)[0])
except Exception: except:
error_message = "Could not extract features and score essay." error_message = "Could not extract features and score essay."
log.exception(error_message) log.exception(error_message)
results['errors'].append(error_message) results['errors'].append(error_message)
has_error = True
#Try to determine confidence level #Try to determine confidence level
try: try:
results['confidence'] = get_confidence_value(grader_data['algorithm'], model, grader_feats, results['score'], results['confidence'] = get_confidence_value(
grader_data['score']) grader_data['algorithm'], model, grader_features, results['score'], grader_data['score'])
except Exception: except:
#If there is an error getting confidence, it is not a show-stopper, so just log # If there is an error getting confidence, it is not a show-stopper/big deal, so just log the error
log.exception("Problem generating confidence value") log.exception("Problem generating confidence value")
if not has_error: # If we have errors above, we do not continue here, but return the dictionary of failure
if len(results['errors']) < 0:
#If the essay is just a copy of the prompt, return a 0 as the score # We have gotten through without an error, so we have been successful
results['success'] = True
# If the essay is just a copy of the prompt (or too similar), return a 0 as the score
if 'too_similar_to_prompt' in feedback and feedback['too_similar_to_prompt']: if 'too_similar_to_prompt' in feedback and feedback['too_similar_to_prompt']:
results['score'] = 0 results['score'] = 0
results['correct'] = False
results['success'] = True
#Generate short form output--number of problem areas identified in feedback # Generate feedback, identifying a number of explicable problem areas
results['feedback'] = {
'spelling': feedback['spelling'],
'grammar': feedback['grammar'],
'markup-text': feedback['markup_text'],
}
#Add feedback to results if available
results['feedback'] = {}
if 'topicality' in feedback and 'prompt_overlap' in feedback: if 'topicality' in feedback and 'prompt_overlap' in feedback:
results['feedback'].update({ results['feedback'].update({
'topicality': feedback['topicality'], 'topicality': feedback['topicality'],
'prompt-overlap': feedback['prompt_overlap'], 'prompt-overlap': feedback['prompt_overlap'],
}) })
results['feedback'].update( # If we get here, that means there was 1+ error above. Set success to false and return
{
'spelling': feedback['spelling'],
'grammar': feedback['grammar'],
'markup-text': feedback['markup_text'],
}
)
else: else:
#If error, success is False.
results['success'] = False results['success'] = False
return results return results
...@@ -117,96 +119,125 @@ def grade(grader_data, submission): ...@@ -117,96 +119,125 @@ def grade(grader_data, submission):
def grade_generic(grader_data, numeric_features, textual_features): def grade_generic(grader_data, numeric_features, textual_features):
""" """
Grades a set of numeric and textual features using a generic model Grades the generic case of numeric and textual features using a generic prediction model.
grader_data -- dictionary containing:
{
'algorithm' - Type of algorithm to use to score
}
numeric_features - list of numeric features to predict on
textual_features - list of textual feature to predict on
""" grader_data (dict): contains key (amoung others)
results = {'errors': [], 'tests': [], 'score': 0, 'success': False, 'confidence': 0} 'algorithm': Type of algorithm used to score
numeric_features (list of float or int or long): A list of numeric features of the essay we are grading
textual_features (list of string): A list of textual features of the essay we are grading
has_error = False Returns:
(dict) with the following keys:
'errors': All of the errors that arose during the grading process.
'tests':
'score': The score the input essay was assigned by the classifier set
'success': Whether or not the grading operation was a success
'confidence': A metric of the classifier's confidence in its result
#Try to find and load the model file """
results = {'errors': [], 'tests': [], 'score': 0, 'success': False, 'confidence': 0}
# Create a predictor set which will carry the information as we grade it.
grader_set = predictor_set.PredictorSet(essay_type="test") grader_set = predictor_set.PredictorSet(essay_type="test")
model, extractor = get_classifier_and_ext(grader_data) # Finds the appropriate predictor and model to use
model, extractor = get_classifier_and_extractor(grader_data)
#Try to add essays to essay set object # Try to add data to predictor set that we are going to be grading
try: try:
grader_set.add_row(numeric_features, textual_features, 0) grader_set.add_row(numeric_features, textual_features, 0)
except Exception: except:
error_msg = "Row could not be added to predictor set:{0} {1}".format(numeric_features, textual_features) error_msg = "Row could not be added to predictor set:{0} {1}".format(numeric_features, textual_features)
log.exception(error_msg) log.exception(error_msg)
results['errors'].append(error_msg) results['errors'].append(error_msg)
has_error = True
#Try to extract features from submission and assign score via the model # Try to extract features from submission and assign score via the model
try: try:
grader_feats = extractor.gen_feats(grader_set) grader_feats = extractor.gen_feats(grader_set)
results['score'] = model.predict(grader_feats)[0] results['score'] = model.predict(grader_feats)[0]
except Exception: except:
error_msg = "Could not extract features and score essay." error_msg = "Could not extract features and score essay."
log.exception(error_msg) log.exception(error_msg)
results['errors'].append(error_msg) results['errors'].append(error_msg)
has_error = True
#Try to determine confidence level # Try to determine confidence level
try: try:
results['confidence'] = get_confidence_value(grader_data['algorithm'], model, grader_feats, results['score']) results['confidence'] = get_confidence_value(grader_data['algorithm'], model, grader_feats, results['score'])
except Exception: except:
#If there is an error getting confidence, it is not a show-stopper, so just log #If there is an error getting confidence, it is not a show-stopper, so just log
log.exception("Problem generating confidence value") log.exception("Problem generating confidence value")
if not has_error: # If we didn't run into an error, we were successful
if len(results['errors']) == 0:
results['success'] = True results['success'] = True
return results return results
def get_confidence_value(algorithm, model, grader_feats, score, scores): def get_confidence_value(algorithm, model, grader_features, score, scores):
""" """
Determines a confidence in a certain score, given proper input parameters Determines the confidence level for a specific grade given to a specific essay.
algorithm- from util_functions.AlgorithmTypes
model - a trained model Args:
grader_feats - a row of features used by the model for classification/regression algorithm: one of the two from util_functions.AlgorithmTypes
score - The score assigned to the submission by a prior model model: A trained model for classification
grader_features: A dictionary describing the grading task
score: The score assigned to this problem
scores: All scores assigned to this problem for all submissions (not just this one)
NOTE: For our current intents and purposes, this value is not utile, and will be removed later on.
Returns:
Ideally: A value between 0 and 1 reflecting the normalized probability confidence in the grade assigned.
Actually: A numerical value with no weight reflecting an arbitrary degree of confidence.
""" """
min_score = min(numpy.asarray(scores)) min_score = min(numpy.asarray(scores))
max_score = max(numpy.asarray(scores))
# If our algorithm is classification:
if algorithm == util_functions.AlgorithmTypes.classification and hasattr(model, "predict_proba"): if algorithm == util_functions.AlgorithmTypes.classification and hasattr(model, "predict_proba"):
#If classification, predict with probability, which gives you a matrix of confidences per score point # If classification, predict with probability, which gives you a matrix of confidences per score point
raw_confidence = model.predict_proba(grader_feats)[0, (float(score) - float(min_score))] raw_confidence = model.predict_proba(grader_features)[0, (float(score) - float(min_score))]
#TODO: Normalize confidence somehow here # The intent was to normalize confidence here, but it was never done, so it remains as such.
confidence = raw_confidence confidence = raw_confidence
# Otherwise, if our algorithm is prediction
elif hasattr(model, "predict"): elif hasattr(model, "predict"):
raw_confidence = model.predict(grader_feats)[0] raw_confidence = model.predict(grader_features)[0]
confidence = max(float(raw_confidence) - math.floor(float(raw_confidence)), confidence = max(float(raw_confidence) - math.floor(float(raw_confidence)),
math.ceil(float(raw_confidence)) - float(raw_confidence)) math.ceil(float(raw_confidence)) - float(raw_confidence))
# Otherwise, we have no confidence, because we have no grading mechanism
else: else:
confidence = 0 confidence = 0
return confidence return confidence
def get_classifier_and_ext(grader_data): def get_classifier_and_extractor(grader_data):
"""
Finds the classifier and extractor from a completed training operation in order to perform the grading operation.
Args:
grader_data (dict): has the following keys, all self evident.
'classifier', 'model', 'feature_ext', 'extractor'
Returns:
A tuple of the form (model, extractor) which has those elements
"""
if 'classifier' in grader_data: if 'classifier' in grader_data:
model = grader_data['classifier'] model = grader_data['classifier']
elif 'model' in grader_data: elif 'model' in grader_data:
model = grader_data['model'] model = grader_data['model']
else: else:
raise Exception("Cannot find a valid model.") raise GradingRequestError("Cannot find a valid model.")
if 'feature_ext' in grader_data: if 'feature_ext' in grader_data:
extractor = grader_data['feature_ext'] extractor = grader_data['feature_ext']
elif 'extractor' in grader_data: elif 'extractor' in grader_data:
extractor = grader_data['extractor'] extractor = grader_data['extractor']
else: else:
raise Exception("Cannot find the extractor") raise GradingRequestError("Cannot find the extractor")
return model, extractor return model, extractor
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment