Commit 1708ba59 by Vik Paruchuri

Fix issues with generic stuff

parent f238e7db
...@@ -8,17 +8,17 @@ import os ...@@ -8,17 +8,17 @@ import os
import numpy import numpy
import logging import logging
# Append sys to base path to import the following modules #Append sys to base path to import the following modules
base_path = os.path.dirname(__file__) base_path = os.path.dirname(__file__)
sys.path.append(base_path) sys.path.append(base_path)
# Depend on base path to be imported #Depend on base path to be imported
from essay_set import EssaySet from essay_set import EssaySet
import predictor_extractor import predictor_extractor
import predictor_set import predictor_set
import util_functions import util_functions
# Imports needed to unpickle grader data #Imports needed to unpickle grader data
import feature_extractor import feature_extractor
import sklearn.ensemble import sklearn.ensemble
import math import math
...@@ -26,7 +26,7 @@ import math ...@@ -26,7 +26,7 @@ import math
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def grade(grader_data, submission): def grade(grader_data,submission):
""" """
Grades a specified submission using specified models Grades a specified submission using specified models
grader_data - A dictionary: grader_data - A dictionary:
...@@ -39,75 +39,80 @@ def grade(grader_data, submission): ...@@ -39,75 +39,80 @@ def grade(grader_data, submission):
submission - The student submission (string) submission - The student submission (string)
""" """
# Initialize result dictionary #Initialize result dictionary
results = {'errors': [], 'tests': [], 'score': 0, 'feedback': "", 'success': False, 'confidence': 0} results = {'errors': [],'tests': [],'score': 0, 'feedback' : "", 'success' : False, 'confidence' : 0}
has_error = False has_error=False
grader_set = EssaySet(essaytype="test") grader_set=EssaySet(essaytype="test")
feedback = {} feedback = {}
# This is to preserve legacy functionality model, extractor = get_classifier_and_ext(grader_data)
#This is to preserve legacy functionality
if 'algorithm' not in grader_data: if 'algorithm' not in grader_data:
grader_data['algorithm'] = util_functions.AlgorithmTypes.classification grader_data['algorithm'] = util_functions.AlgorithmTypes.classification
try: try:
# Try to add essay to essay set object #Try to add essay to essay set object
grader_set.add_essay(str(submission), 0) grader_set.add_essay(str(submission),0)
grader_set.update_prompt(str(grader_data['prompt'])) grader_set.update_prompt(str(grader_data['prompt']))
except: except Exception:
results['errors'].append("Essay could not be added to essay set:{0}".format(submission)) error_message = "Essay could not be added to essay set:{0}".format(submission)
has_error = True log.exception(error_message)
results['errors'].append(error_message)
has_error=True
# Try to extract features from submission and assign score via the model #Try to extract features from submission and assign score via the model
try: try:
grader_feats = grader_data['extractor'].gen_feats(grader_set) grader_feats=extractor.gen_feats(grader_set)
feedback = grader_data['extractor'].gen_feedback(grader_set, grader_feats)[0] feedback=extractor.gen_feedback(grader_set,grader_feats)[0]
results['score'] = int(grader_data['model'].predict(grader_feats)[0]) results['score']=int(model.predict(grader_feats)[0])
except: except Exception:
results['errors'].append("Could not extract features and score essay.") error_message = "Could not extract features and score essay."
has_error = True log.exception(error_message)
results['errors'].append(error_message)
# Try to determine confidence level has_error=True
#Try to determine confidence level
try: try:
results['confidence'] = get_confidence_value(grader_data['algorithm'], grader_data['model'], grader_feats, results['score'], grader_data['score']) results['confidence'] = get_confidence_value(grader_data['algorithm'], model, grader_feats, results['score'], grader_data['score'])
except: except Exception:
# If there is an error getting confidence, it is not a show-stopper, so just log #If there is an error getting confidence, it is not a show-stopper, so just log
log.exception("Problem generating confidence value") log.exception("Problem generating confidence value")
if not has_error: if not has_error:
# If the essay is just a copy of the prompt, return a 0 as the score #If the essay is just a copy of the prompt, return a 0 as the score
if('too_similar_to_prompt' in feedback and feedback['too_similar_to_prompt']): if( 'too_similar_to_prompt' in feedback and feedback['too_similar_to_prompt']):
results['score'] = 0 results['score']=0
results['correct'] = False results['correct']=False
results['success'] = True results['success']=True
# Generate short form output--number of problem areas identified in feedback #Generate short form output--number of problem areas identified in feedback
# Add feedback to results if available #Add feedback to results if available
results['feedback'] = {} results['feedback'] = {}
if 'topicality' in feedback and 'prompt_overlap' in feedback: if 'topicality' in feedback and 'prompt_overlap' in feedback:
results['feedback'].update({ results['feedback'].update({
'topicality': feedback['topicality'], 'topicality' : feedback['topicality'],
'prompt-overlap': feedback['prompt_overlap'], 'prompt-overlap' : feedback['prompt_overlap'],
}) })
results['feedback'].update( results['feedback'].update(
{ {
'spelling': feedback['spelling'], 'spelling' : feedback['spelling'],
'grammar': feedback['grammar'], 'grammar' : feedback['grammar'],
'markup-text': feedback['markup_text'], 'markup-text' : feedback['markup_text'],
} }
) )
else: else:
# If error, success is False. #If error, success is False.
results['success'] = False results['success']=False
return results return results
def grade_generic(grader_data, numeric_features, textual_features): def grade_generic(grader_data, numeric_features, textual_features):
""" """
Grades a set of numeric and textual features using a generic model Grades a set of numeric and textual features using a generic model
...@@ -119,34 +124,40 @@ def grade_generic(grader_data, numeric_features, textual_features): ...@@ -119,34 +124,40 @@ def grade_generic(grader_data, numeric_features, textual_features):
textual_features - list of textual feature to predict on textual_features - list of textual feature to predict on
""" """
results = {'errors': [], 'tests': [], 'score': 0, 'success': False, 'confidence': 0} results = {'errors': [],'tests': [],'score': 0, 'success' : False, 'confidence' : 0}
has_error = False has_error=False
# Try to find and load the model file #Try to find and load the model file
grader_set = predictor_set.PredictorSet(essaytype="test") grader_set=predictor_set.PredictorSet(essaytype="test")
# Try to add essays to essay set object model, extractor = get_classifier_and_ext(grader_data)
try:
grader_set.add_row(numeric_features, textual_features, 0)
except:
results['errors'].append("Row could not be added to predictor set:{0} {1}".format(numeric_features, textual_features))
has_error = True
# Try to extract features from submission and assign score via the model #Try to add essays to essay set object
try: try:
grader_feats = grader_data['extractor'].gen_feats(grader_set) grader_set.add_row(numeric_features, textual_features,0)
results['score'] = grader_data['model'].predict(grader_feats)[0] except Exception:
except: error_msg = "Row could not be added to predictor set:{0} {1}".format(numeric_features, textual_features)
results['errors'].append("Could not extract features and score essay.") log.exception(error_msg)
has_error = True results['errors'].append(error_msg)
has_error=True
# Try to determine confidence level
#Try to extract features from submission and assign score via the model
try:
grader_feats=extractor.gen_feats(grader_set)
results['score']=model.predict(grader_feats)[0]
except Exception:
error_msg = "Could not extract features and score essay."
log.exception(error_msg)
results['errors'].append(error_msg)
has_error=True
#Try to determine confidence level
try: try:
results['confidence'] = get_confidence_value(grader_data['algorithm'], grader_data['model'], grader_feats, results['score']) results['confidence'] = get_confidence_value(grader_data['algorithm'],model, grader_feats, results['score'])
except: except Exception:
# If there is an error getting confidence, it is not a show-stopper, so just log #If there is an error getting confidence, it is not a show-stopper, so just log
log.exception("Problem generating confidence value") log.exception("Problem generating confidence value")
if not has_error: if not has_error:
...@@ -154,8 +165,7 @@ def grade_generic(grader_data, numeric_features, textual_features): ...@@ -154,8 +165,7 @@ def grade_generic(grader_data, numeric_features, textual_features):
return results return results
def get_confidence_value(algorithm,model,grader_feats,score, scores):
def get_confidence_value(algorithm, model, grader_feats, score, scores):
""" """
Determines a confidence in a certain score, given proper input parameters Determines a confidence in a certain score, given proper input parameters
algorithm- from util_functions.AlgorithmTypes algorithm- from util_functions.AlgorithmTypes
...@@ -165,15 +175,32 @@ def get_confidence_value(algorithm, model, grader_feats, score, scores): ...@@ -165,15 +175,32 @@ def get_confidence_value(algorithm, model, grader_feats, score, scores):
""" """
min_score=min(numpy.asarray(scores)) min_score=min(numpy.asarray(scores))
max_score=max(numpy.asarray(scores)) max_score=max(numpy.asarray(scores))
if algorithm == util_functions.AlgorithmTypes.classification and hasattr(model, "predict_proba"): if algorithm == util_functions.AlgorithmTypes.classification:
#If classification, predict with probability, which gives you a matrix of confidences per score point #If classification, predict with probability, which gives you a matrix of confidences per score point
raw_confidence = model.predict_proba(grader_feats)[0, (float(score) -float(min_score))] raw_confidence=model.predict_proba(grader_feats)[0,(float(score)-float(min_score))]
#TODO: Normalize confidence somehow here #TODO: Normalize confidence somehow here
confidence=raw_confidence confidence=raw_confidence
elif hasattr(model, "predict"): else:
raw_confidence = model.predict(grader_feats)[0] raw_confidence = model.predict(grader_feats)[0]
confidence = max(float(raw_confidence) - math.floor(float(raw_confidence)), math.ceil(float(raw_confidence)) - float(raw_confidence)) confidence = max(float(raw_confidence) - math.floor(float(raw_confidence)), math.ceil(float(raw_confidence)) - float(raw_confidence))
else:
confidence = 0
return confidence return confidence
def get_classifier_and_ext(grader_data):
if 'classifier' in grader_data:
model = grader_data['classifier']
elif 'model' in grader_data:
model = grader_data['model']
else:
raise Exception("Cannot find a valid model.")
if 'feature_ext' in grader_data:
extractor = grader_data['feature_ext']
elif 'extractor' in grader_data:
extractor = grader_data['extractor']
else:
raise Exception("Cannot find the extractor")
return model, extractor
...@@ -108,7 +108,7 @@ class GenericTest(object): ...@@ -108,7 +108,7 @@ class GenericTest(object):
self.assertTrue(results['success']) self.assertTrue(results['success'])
grader = Grader(results) grader = Grader(results)
grader.grade(self.text[0]) results = grader.grade(self.text[0])
self.assertTrue(results['success']) self.assertTrue(results['success'])
def test_scoring_accuracy(self): def test_scoring_accuracy(self):
...@@ -134,10 +134,10 @@ class GenericTest(object): ...@@ -134,10 +134,10 @@ class GenericTest(object):
grader = Grader(results) grader = Grader(results)
test_text = { test_text = {
'textual_values' : [[self.text[0]]], 'textual_values' : [self.text[0]],
'numeric_values' : [[1]] 'numeric_values' : [1]
} }
grader.grade(test_text) results = grader.grade(test_text)
self.assertTrue(results['success']) self.assertTrue(results['success'])
class PolarityTest(unittest.TestCase,GenericTest): class PolarityTest(unittest.TestCase,GenericTest):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment