Commit d7f57c57 by Vik Paruchuri

Implement flexible switching between classification and regression

parent 4286f7e9
"""
Functions that create a machine learning model from training data
"""
import os
import sys
import logging
......
......@@ -103,15 +103,7 @@ def get_cv_error(clf,feats,scores):
return results
def extract_features_and_generate_model_predictors(predictor_set, type=util_functions.AlgorithmTypes.regression):
if(algorithm not in [util_functions.AlgorithmTypes.regression, util_functions.AlgorithmTypes.classification]):
algorithm = util_functions.AlgorithmTypes.regression
f = predictor_extractor.PredictorExtractor()
f.initialize_dictionaries(predictor_set)
train_feats = f.gen_feats(predictor_set)
def get_algorithms(type):
if type == util_functions.AlgorithmTypes.classification:
clf = sklearn.ensemble.GradientBoostingClassifier(n_estimators=100, learn_rate=.05,
max_depth=4, random_state=1,min_samples_leaf=3)
......@@ -122,7 +114,19 @@ def extract_features_and_generate_model_predictors(predictor_set, type=util_func
max_depth=4, random_state=1,min_samples_leaf=3)
clf2=sklearn.ensemble.GradientBoostingRegressor(n_estimators=100, learn_rate=.05,
max_depth=4, random_state=1,min_samples_leaf=3)
return clf, clf2
def extract_features_and_generate_model_predictors(predictor_set, type=util_functions.AlgorithmTypes.regression):
if(algorithm not in [util_functions.AlgorithmTypes.regression, util_functions.AlgorithmTypes.classification]):
algorithm = util_functions.AlgorithmTypes.regression
f = predictor_extractor.PredictorExtractor()
f.initialize_dictionaries(predictor_set)
train_feats = f.gen_feats(predictor_set)
clf,clf2 = get_algorithms(type)
cv_error_results=get_cv_error(clf2,train_feats,predictor_set._target)
try:
......@@ -153,16 +157,17 @@ def extract_features_and_generate_model(essays,additional_array=None):
if(additional_array.shape[0]==train_feats.shape[0]):
train_feats=numpy.concatenate((train_feats,additional_array),axis=1)
clf = sklearn.ensemble.GradientBoostingClassifier(n_estimators=100, learn_rate=.05,
max_depth=4, random_state=1,min_samples_leaf=3)
set_score = numpy.asarray(essays._score, dtype=numpy.int)
if len(util_functions.f7(list(set_score)))>5:
type = util_functions.AlgorithmTypes.regression
else:
type = util_functions.AlgorithmTypes.classification
clf2=sklearn.ensemble.GradientBoostingClassifier(n_estimators=100, learn_rate=.05,
max_depth=4, random_state=1,min_samples_leaf=3)
clf,clf2 = get_algorithms(type)
cv_error_results=get_cv_error(clf2,train_feats,essays._score)
try:
set_score = numpy.asarray(essays._score, dtype=numpy.int)
clf.fit(train_feats, set_score)
except ValueError:
log.exception("Not enough classes (0,1,etc) in sample.")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment