add way for spell corrector to markup text

e2a4b13c · Vik Paruchuri · 709c160e · e2a4b13c · e2a4b13c · e2a4b13c
Commit e2a4b13c authored Nov 05, 2012 by Vik Paruchuri
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 6 deletions

essay_set.py
+3 -1

tests/tmpfile
+2 -2

tmpfile
+2 -2

util_functions.py
+3 -1

No files found.
--- a/essay_set.py
+++ b/essay_set.py
@@ -37,6 +37,7 @@ class EssaySet(object):
        self._generated = []
        self._prompt = ""
        self._spelling_errors=[]
+        self._markup_text=[]
    def add_essay(self, essay_text, essay_score, essay_generated=0):
        """
@@ -62,9 +63,10 @@ class EssaySet(object):
                cleaned_essay=cleaned_essay[0:MAXIMUM_ESSAY_LENGTH]
            self._text.append(cleaned_essay)
            # Spell correct text using aspell
-            cleaned_text,spell_errors=util_functions.spell_correct(self._text[len(self._text) - 1])
+            cleaned_text,spell_errors,markup_text=util_functions.spell_correct(self._text[len(self._text) - 1])
            self._clean_text.append(cleaned_text)
            self._spelling_errors.append(spell_errors)
+            self._markup_text.append(markup_text)
            # Tokenize text
            self._tokens.append(nltk.word_tokenize(self._clean_text[len(self._clean_text) - 1]))
            # Part of speech tag text

--- a/tests/tmpfile
+++ b/tests/tmpfile
-to replicate this experiment i would need to know how much vinegar to pour into the containers to insure each sample has to same amount to react to . i would need to also know which type of container to use .
+ in order to replicate this experiment , we would need to know the temperature of the vinegar as well as how much vinegar to put in . both of these could vary and therefore change the result of the experiment . 
\ No newline at end of file
--- a/tmpfile
+++ b/tmpfile
-This is a well written string and everything is spelled correctly.
+In order to replicate this experiment, I would need to know additional information such as the four different samples that they used (because I could have choosen metal, carbboard and many other sample materials that they didn't use and would get different results. Also I would also need to know the amount of vinegar to pour because this can caute a major change. Lastly, they might want to tell where to sit the samples while they dry for 30 minutes because if they are sitting in room temp. or by a light source makes a difference too.
\ No newline at end of file
--- a/util_functions.py
+++ b/util_functions.py
@@ -83,11 +83,13 @@ def spell_correct(string):
                    incorrect_words.append(begword)
                    correct_spelling.append(sug)
    newstring = string
+    markup_string = string
    for i in range(0, len(incorrect_words)):
        sub_pat = r"\b" + incorrect_words[i] + r"\b"
        sub_comp = re.compile(sub_pat)
        newstring = re.sub(sub_comp, correct_spelling[i], newstring)
-    return newstring,len(incorrect_words)
+        markup_string=re.sub(sub_comp,"[[" + correct_spelling[i] + "]]", markup_string)
+    return newstring,len(incorrect_words),markup_string
 def ngrams(tokens, min_n, max_n):