Small bug fix: good pos ngrams in feature_estractor was being called before instantiation.

e6dddf72 · gradyward · 4047b1a5 · e6dddf72 · e6dddf72
Commit e6dddf72 authored Jun 13, 2014 by gradyward
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 6 deletions

ease/create.py
+2 -2

ease/feature_extractor.py
+5 -4

No files found.
--- a/ease/create.py
+++ b/ease/create.py
@@ -168,7 +168,7 @@ def _extract_features_and_generate_model(essay_set):

    predict_classifier, cv_error_classifier = _instantiate_algorithms(algorithm)

-    cv_error_results = get_cv_error(cv_error_classifier, features, essay_set._scores)
+    cv_error_results = _get_cv_error(cv_error_classifier, features, essay_set._scores)

    try:
        predict_classifier.fit(features, set_scores)
@@ -213,7 +213,7 @@ def _instantiate_algorithms(algorithm):
    return clf, clf2


-def get_cv_error(classifier, features, scores):
+def _get_cv_error(classifier, features, scores):
    """
    Gets cross validated error for a given classifier, set of features, and scores


--- a/ease/feature_extractor.py
+++ b/ease/feature_extractor.py
@@ -47,6 +47,11 @@ class FeatureExtractor(object):
            max_features_pass_2: The maximum number of features we consider on the second pass of vocabulary grooming

        """
+
+        self._good_pos_ngrams = self._get_good_pos_ngrams()
+        self._spell_errors_per_character = 0
+        self._grammar_errors_per_character = 0
+
        if hasattr(essay_set, '_type'):
            if essay_set._type == "train":
                # Finds vocabulary which differentiates good/high scoring essays from bad/low scoring essays.
@@ -99,10 +104,6 @@ class FeatureExtractor(object):
        else:
            raise util_functions.InputError(essay_set, "wrong input. need an essay set object.")

-        self._good_pos_ngrams = self._get_good_pos_ngrams()
-        self._spell_errors_per_character = 0
-        self._grammar_errors_per_character = 0
-
    def generate_features(self, essay_set):
        """
        Generates bag of words, length, and prompt features from an essay set object