grammar markup

a276d16c · Vik Paruchuri · 4652659c · a276d16c · a276d16c
Commit a276d16c authored Nov 05, 2012 by Vik Paruchuri
Hide whitespace changes
Inline Side-by-side

Showing with 15 additions and 5 deletions

feature_extractor.py
+14 -4

util_functions.py
+1 -1

No files found.
--- a/feature_extractor.py
+++ b/feature_extractor.py
@@ -204,16 +204,26 @@ class FeatureExtractor(object):
        set_spell_errors_per_character=[e_set._spelling_errors[m]/float(len(e_set._text[m])) for m in xrange(0,len(e_set._text))]
        all_feedback=[]
        for m in xrange(0,len(e_set._text)):
-            individual_feedback=[]
+            individual_feedback={'grammar' : "Ok.", 'spelling' : "Ok.", 'topicality' : "Ok.", 'markup_text' : ""}
+            markup_tokens=nltk.word_tokenize(e_set._markup_text)
+            bad_pos_starts=[z[0] for z in bad_pos_positions[m]]
+            bad_pos_ends=[z[1] for z in bad_pos_positions[m]]
+            for z in xrange(0,len(markup_tokens)):
+                if z in bad_pos_starts:
+                    markup_tokens[z]=="[[" + markup_tokens[z]
+                elif z in bad_pos_ends:
+                    markup_tokens[z]=markup_tokens[z] + "]]"
            if set_grammar_per_character[m]>(self._grammar_errors_per_character*modifier_ratio):
-                individual_feedback.append("Potential grammar errors.")
+                individual_feedback['grammar']="Potential grammar errors."
            if set_spell_errors_per_character[m]>(self._spell_errors_per_character*modifier_ratio):
-                individual_feedback.append("Potential spelling errors.")
+                individual_feedback['spelling']="Potential spelling errors."
            if features is not None:
                f_row_sum=numpy.sum(features[m,12:])
                f_row_prop=f_row_sum/len(e_set._text[m])
                if f_row_prop<(self._mean_f_prop):
-                    individual_feedback.append("Essay may be off topic.")
+                    individual_feedback['topicality']="Essay may be off topic."
+            individual_feedback['markup_text']=" ".join(markup_tokens)
            all_feedback.append(individual_feedback)
        return all_feedback
--- a/util_functions.py
+++ b/util_functions.py
@@ -88,7 +88,7 @@ def spell_correct(string):
        sub_pat = r"\b" + incorrect_words[i] + r"\b"
        sub_comp = re.compile(sub_pat)
        newstring = re.sub(sub_comp, correct_spelling[i], newstring)
-        markup_string=re.sub(sub_comp,"[[" + correct_spelling[i] + "]]", markup_string)
+        markup_string=re.sub(sub_comp,".." + correct_spelling[i] + "..", markup_string)
    return newstring,len(incorrect_words),markup_string