Commit cc8f91a6 by Vik Paruchuri

Fix cv accuracy

parent 886190a8
...@@ -18,42 +18,42 @@ if not base_path.endswith("/"): ...@@ -18,42 +18,42 @@ if not base_path.endswith("/"):
FILENAME="sa_data.tsv" FILENAME="sa_data.tsv"
sa_val = file(FILENAME)
scores=[] all_err=[]
texts=[] all_kappa=[]
lines=sa_val.readlines()
for t_len in [0,50,100,200,300]:
eset=essay_set.EssaySet(type="train") sa_val = file(FILENAME)
scores=[]
for i in xrange(1,len(lines)): texts=[]
score,text=lines[i].split("\t\"") lines=sa_val.readlines()
scores.append(int(score)) eset=essay_set.EssaySet(type="train")
texts.append(text) for i in xrange(1,len(lines)):
eset.add_essay(text,int(score)) score,text=lines[i].split("\t\"")
#if int(score)==0: if len(text)>t_len:
# eset.generate_additional_essays(text,int(score)) scores.append(int(score))
texts.append(text)
extractor=feature_extractor.FeatureExtractor() eset.add_essay(text,int(score))
extractor.initialize_dictionaries(eset) #if int(score)==0:
train_feats=extractor.gen_feats(eset) # eset.generate_additional_essays(text,int(score))
extractor=feature_extractor.FeatureExtractor()
clf=GradientBoostingClassifier(n_estimators=100, learn_rate=.05, extractor.initialize_dictionaries(eset)
max_depth=4, random_state=1, train_feats=extractor.gen_feats(eset)
min_samples_leaf=3) clf=GradientBoostingClassifier(n_estimators=100, learn_rate=.05,max_depth=4, random_state=1,min_samples_leaf=3)
cv_preds=util_functions.gen_cv_preds(clf,train_feats,scores)
cv_preds=util_functions.gen_cv_preds(clf,train_feats,scores) err=numpy.mean(numpy.abs(cv_preds-scores))
print err
err=numpy.mean(numpy.abs(cv_preds-scores)) kappa=util_functions.quadratic_weighted_kappa(list(cv_preds),scores)
print err print kappa
all_err.append(err)
kappa=util_functions.quadratic_weighted_kappa(list(cv_preds),scores) all_kappa.append(kappa)
print kappa
"""
outfile=open("full_cvout.tsv",'w+') outfile=open("full_cvout.tsv",'w+')
outfile.write("cv_pred" + "\t" + "actual") outfile.write("cv_pred" + "\t" + "actual")
for i in xrange(0,len(cv_preds)): for i in xrange(0,len(cv_preds)):
outfile.write("{0}\t{1}".format(cv_preds[i],scores[i])) outfile.write("{0}\t{1}".format(cv_preds[i],scores[i]))
"""
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment