Commit 13deffc6 by Vik Paruchuri

Fix some cv prediction stuff

parent bb995e3b
......@@ -10,6 +10,7 @@ import util_functions
import essay_set
import feature_extractor
import numpy
import math
from sklearn.ensemble import GradientBoostingClassifier
......@@ -40,7 +41,7 @@ for filename in filenames:
extractor.initialize_dictionaries(eset)
train_feats=extractor.gen_feats(eset)
clf=GradientBoostingClassifier(n_estimators=100, learn_rate=.05,max_depth=4, random_state=1,min_samples_leaf=3)
cv_preds=util_functions.gen_cv_preds(clf,train_feats,scores)
cv_preds=util_functions.gen_cv_preds(clf,train_feats,scores, num_chunks = int(math.floor(len(texts)/2)))
err=numpy.mean(numpy.abs(cv_preds-scores))
print err
kappa=util_functions.quadratic_weighted_kappa(list(cv_preds),scores)
......
......@@ -269,12 +269,12 @@ def gen_cv_preds(clf, arr, sel_score, num_chunks=3):
preds = []
set_score = numpy.asarray(sel_score, dtype=numpy.int)
chunk_vec = numpy.asarray(range(0, len(chunks)))
for i in range(0, len(chunks)):
for i in xrange(0, len(chunks)):
loop_inds = list(
chain.from_iterable([chunks[int(z)] for z, m in enumerate(range(0, len(chunks))) if int(z) != i]))
sim_fit = clf.fit(arr[loop_inds], set_score[loop_inds])
preds.append(sim_fit.predict(arr[chunks[i]]))
all_preds = numpy.concatenate((preds[0], preds[1], preds[2]), axis=0)
preds.append(list(sim_fit.predict(arr[chunks[i]])))
all_preds = list(chain(*preds))
return(all_preds)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment