Commit c9596b82 by Vik Paruchuri

Remove prompt overlap and topicality, fix cv error

parent 1fb02a6d
...@@ -220,9 +220,9 @@ class FeatureExtractor(object): ...@@ -220,9 +220,9 @@ class FeatureExtractor(object):
all_feedback=[] all_feedback=[]
for m in xrange(0,len(e_set._text)): for m in xrange(0,len(e_set._text)):
#Be very careful about changing these messages! #Be very careful about changing these messages!
individual_feedback={'grammar' : "Grammar: Ok.", 'spelling' : "Spelling: Ok.", individual_feedback={'grammar' : "Grammar: Ok.",
'topicality' : "Topicality: Ok.", 'markup_text' : "", 'spelling' : "Spelling: Ok.",
'prompt_overlap' : "Prompt Overlap: Ok.", 'markup_text' : "",
'grammar_per_char' : set_grammar_per_character[m], 'grammar_per_char' : set_grammar_per_character[m],
'spelling_per_char' : set_spell_errors_per_character[m], 'spelling_per_char' : set_spell_errors_per_character[m],
'too_similar_to_prompt' : False, 'too_similar_to_prompt' : False,
......
...@@ -81,10 +81,12 @@ def grade(grader_data,grader_config,submission): ...@@ -81,10 +81,12 @@ def grade(grader_data,grader_config,submission):
problem_areas+=len(feedback[tag])>5 problem_areas+=len(feedback[tag])>5
#Add feedback to results #Add feedback to results
results['feedback']={ results['feedback'] = {}
if 'topicality' in feedback and 'prompt_overlap' in feedback:
results['feedback'].update({
'topicality' : feedback['topicality'], 'topicality' : feedback['topicality'],
'prompt-overlap' : feedback['prompt_overlap'], 'prompt-overlap' : feedback['prompt_overlap'],
} })
if results['score']/float(max_score)<.33: if results['score']/float(max_score)<.33:
results['feedback'].update( results['feedback'].update(
......
...@@ -36,8 +36,7 @@ def run_single_worker(args): ...@@ -36,8 +36,7 @@ def run_single_worker(args):
texts=[] texts=[]
lines=sa_val.readlines() lines=sa_val.readlines()
eset=essay_set.EssaySet(type="train") eset=essay_set.EssaySet(type="train")
#len(lines) for i in xrange(1,len(lines)):
for i in xrange(1,10):
id_val,essay_set_num,score1,score2,text=lines[i].split("\t") id_val,essay_set_num,score1,score2,text=lines[i].split("\t")
score1s.append(int(score1)) score1s.append(int(score1))
score2s.append(int(score2)) score2s.append(int(score2))
...@@ -57,7 +56,7 @@ def run_single_worker(args): ...@@ -57,7 +56,7 @@ def run_single_worker(args):
clf=GradientBoostingRegressor(n_estimators=100, learn_rate=.05, max_depth=4, random_state=1, min_samples_leaf=3) clf=GradientBoostingRegressor(n_estimators=100, learn_rate=.05, max_depth=4, random_state=1, min_samples_leaf=3)
try: try:
cv_preds=util_functions.gen_cv_preds(clf,train_feats,score1s, num_chunks = 3) # int(math.floor(len(texts)/2) cv_preds=util_functions.gen_cv_preds(clf,train_feats,score1s, num_chunks = 10) # int(math.floor(len(texts)/2)
except: except:
cv_preds = score1s cv_preds = score1s
...@@ -79,7 +78,7 @@ def run_single_worker(args): ...@@ -79,7 +78,7 @@ def run_single_worker(args):
return err, kappa,percent_error,human_err,human_kappa,human_percent_error return err, kappa,percent_error,human_err,human_kappa,human_percent_error
length = len(filenames) length = len(filenames)
np=12 np=8
p = Pool(processes=np) p = Pool(processes=np)
errs, kappas,percent_errors,human_errs,human_kappas,human_percent_errors = zip(*p.map(run_single_worker,[(filenames[i],data_path) for i in xrange(0,length)])) errs, kappas,percent_errors,human_errs,human_kappas,human_percent_errors = zip(*p.map(run_single_worker,[(filenames[i],data_path) for i in xrange(0,length)]))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment