Add some docs to create

b70221db · Vik Paruchuri · 006f9342 · b70221db
Commit b70221db authored Feb 26, 2013 by Vik Paruchuri
Hide whitespace changes
Inline Side-by-side

Showing with 31 additions and 6 deletions

create.py
+31 -6

No files found.
--- a/create.py
+++ b/create.py
@@ -5,24 +5,35 @@ Functions that create a machine learning model from training data
 import os
 import sys
 import logging
-log = logging.getLogger(__name__)
+from statsd import statsd

+#Define base path and add to sys path
 base_path = os.path.dirname(__file__)
 sys.path.append(base_path)
-
 one_up_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..'))
 sys.path.append(one_up_path)

+#Import modules that are dependent on the base path
 import model_creator
 import util_functions
 import predictor_set
 import predictor_extractor

-from statsd import statsd
+#Make a log
+log = logging.getLogger(__name__)

 @statsd.timed('open_ended_assessment.machine_learning.creator.time')
-def create(text,score,prompt_string,model_path):
-
+def create(text,score,prompt_string,model_path = None):
+    """
+    Creates a machine learning model from input text, associated scores, a prompt, and a path to the model
+    TODO: Remove model path argument, it is needed for now to support legacy code
+    text - A list of strings containing the text of the essays
+    score - a list of integers containing score values
+    prompt_string - the common prompt for the set of essays
+    model_path - Deprecated, not needed
+    """
+
+    #Initialize a results dictionary to return
    results = {'errors': [],'success' : False, 'cv_kappa' : 0, 'cv_mean_absolute_error': 0,
               'feature_ext' : "", 'classifier' : ""}

@@ -33,12 +44,14 @@ def create(text,score,prompt_string,model_path):
        return results

    try:
+        #Create an essay set object that encapsulates all the essays and alternate representations (tokens, etc)
        e_set = model_creator.create_essay_set(text, score, prompt_string)
    except:
        msg = "essay set creation failed."
        results['errors'].append(msg)
        log.exception(msg)
    try:
+        #Gets features from the essay set and computes error
        feature_ext, classifier, cv_error_results = model_creator.extract_features_and_generate_model(e_set)
        results['cv_kappa']=cv_error_results['kappa']
        results['cv_mean_absolute_error']=cv_error_results['mae']
@@ -57,7 +70,17 @@ def create(text,score,prompt_string,model_path):
    return results


-def create_generic(numeric_values, textual_values, target, model_path, algorithm = util_functions.AlgorithmTypes.regression):
+def create_generic(numeric_values, textual_values, target, model_path = None, algorithm = util_functions.AlgorithmTypes.regression):
+    """
+    Creates a model from a generic list numeric values and text values
+    numeric_values - A list of lists that are the predictors
+    textual_values - A list of lists that are the predictors
+    (each item in textual_values corresponds to the similarly indexed counterpart in numeric_values)
+    target - The variable that we are trying to predict.  A list of integers.
+    model_path - deprecated, kept for legacy code.  Do not use.
+    """
+
+    #Initialize a result dictionary to return.
    results = {'errors': [],'success' : False, 'cv_kappa' : 0, 'cv_mean_absolute_error': 0,
               'feature_ext' : "", 'classifier' : "", 'algorithm' : algorithm}

@@ -68,6 +91,7 @@ def create_generic(numeric_values, textual_values, target, model_path, algorithm
        return results

    try:
+        #Initialize a predictor set object that encapsulates all of the text and numeric predictors
        pset = predictor_set.PredictorSet(type="train")
        for i in xrange(0, len(numeric_values)):
            pset.add_row(numeric_values[i], textual_values[i], target[i])
@@ -77,6 +101,7 @@ def create_generic(numeric_values, textual_values, target, model_path, algorithm
        log.exception(msg)

    try:
+        #Extract all features and then train a classifier with the features
        feature_ext, classifier, cv_error_results = model_creator.extract_features_and_generate_model_predictors(pset, algorithm)
        results['cv_kappa']=cv_error_results['kappa']
        results['cv_mean_absolute_error']=cv_error_results['mae']