Commit cc8f91a6 by Vik Paruchuri

Fix cv accuracy

parent 886190a8
......@@ -18,42 +18,42 @@ if not base_path.endswith("/"):
FILENAME="sa_data.tsv"
sa_val = file(FILENAME)
scores=[]
texts=[]
lines=sa_val.readlines()
all_err=[]
all_kappa=[]
eset=essay_set.EssaySet(type="train")
for i in xrange(1,len(lines)):
for t_len in [0,50,100,200,300]:
sa_val = file(FILENAME)
scores=[]
texts=[]
lines=sa_val.readlines()
eset=essay_set.EssaySet(type="train")
for i in xrange(1,len(lines)):
score,text=lines[i].split("\t\"")
if len(text)>t_len:
scores.append(int(score))
texts.append(text)
eset.add_essay(text,int(score))
#if int(score)==0:
# eset.generate_additional_essays(text,int(score))
extractor=feature_extractor.FeatureExtractor()
extractor.initialize_dictionaries(eset)
train_feats=extractor.gen_feats(eset)
clf=GradientBoostingClassifier(n_estimators=100, learn_rate=.05,
max_depth=4, random_state=1,
min_samples_leaf=3)
cv_preds=util_functions.gen_cv_preds(clf,train_feats,scores)
err=numpy.mean(numpy.abs(cv_preds-scores))
print err
kappa=util_functions.quadratic_weighted_kappa(list(cv_preds),scores)
print kappa
outfile=open("full_cvout.tsv",'w+')
outfile.write("cv_pred" + "\t" + "actual")
for i in xrange(0,len(cv_preds)):
extractor=feature_extractor.FeatureExtractor()
extractor.initialize_dictionaries(eset)
train_feats=extractor.gen_feats(eset)
clf=GradientBoostingClassifier(n_estimators=100, learn_rate=.05,max_depth=4, random_state=1,min_samples_leaf=3)
cv_preds=util_functions.gen_cv_preds(clf,train_feats,scores)
err=numpy.mean(numpy.abs(cv_preds-scores))
print err
kappa=util_functions.quadratic_weighted_kappa(list(cv_preds),scores)
print kappa
all_err.append(err)
all_kappa.append(kappa)
"""
outfile=open("full_cvout.tsv",'w+')
outfile.write("cv_pred" + "\t" + "actual")
for i in xrange(0,len(cv_preds)):
outfile.write("{0}\t{1}".format(cv_preds[i],scores[i]))
"""
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment