Document feedback function

91013c9f · Vik Paruchuri · 152e23d6 · 91013c9f
Commit 91013c9f authored Nov 07, 2012 by Vik Paruchuri
Hide whitespace changes
Inline Side-by-side

Showing with 15 additions and 0 deletions

feature_extractor.py
+15 -0

No files found.
--- a/feature_extractor.py
+++ b/feature_extractor.py
@@ -201,14 +201,23 @@ class FeatureExtractor(object):
        in order to get off topic feedback.
        Returns a list of lists (one list per essay in e_set)
        """
+
+        #Set ratio to modify thresholds for grammar/spelling errors
        modifier_ratio=1.05
+
+        #Calc number of grammar and spelling errors per character
        set_grammar,bad_pos_positions=self._get_grammar_errors(e_set._pos,e_set._text,e_set._tokens)
        set_grammar_per_character=[set_grammar[m]/float(len(e_set._text[m])) for m in xrange(0,len(e_set._text))]
        set_spell_errors_per_character=[e_set._spelling_errors[m]/float(len(e_set._text[m])) for m in xrange(0,len(e_set._text))]
+
+        #Iterate through essays and create a feedback dict for each
        all_feedback=[]
        for m in xrange(0,len(e_set._text)):
            individual_feedback={'grammar' : "Ok.", 'spelling' : "Ok.", 'topicality' : "Ok.", 'markup_text' : ""}
            markup_tokens=e_set._markup_text[m].split(" ")
+
+            #This loop ensures that sequences of bad grammar get put together into one sequence instead of staying
+            #disjointed
            bad_pos_starts=[z[0] for z in bad_pos_positions[m]]
            bad_pos_ends=[z[1]-1 for z in bad_pos_positions[m]]
            for z in xrange(0,len(markup_tokens)):
@@ -220,15 +229,21 @@ class FeatureExtractor(object):
                if max(bad_pos_ends)>(len(markup_tokens)-1) and max(bad_pos_starts)<(len(markup_tokens)-1):
                    markup_tokens[len(markup_tokens)-1]+="</bg>"

+            #Display messages if grammar/spelling errors greater than average in training set
            if set_grammar_per_character[m]>(self._grammar_errors_per_character*modifier_ratio):
                individual_feedback['grammar']="Potential grammar errors."
            if set_spell_errors_per_character[m]>(self._spell_errors_per_character*modifier_ratio):
                individual_feedback['spelling']="Potential spelling errors."
+
+            #Test topicality by calculating # of on topic words per character and comparing to the training set
+            #mean.  Requires features to be passed in
            if features is not None:
                f_row_sum=numpy.sum(features[m,12:])
                f_row_prop=f_row_sum/len(e_set._text[m])
                if f_row_prop<(self._mean_f_prop/1.5) or len(e_set._text[m])<20:
                    individual_feedback['topicality']="Essay may be off topic."
+
+            #Create string representation of markup text
            markup_string=" ".join(markup_tokens)
            individual_feedback['markup_text']=markup_string
            all_feedback.append(individual_feedback)