Commit e2a4b13c by Vik Paruchuri

add way for spell corrector to markup text

parent 709c160e
...@@ -37,6 +37,7 @@ class EssaySet(object): ...@@ -37,6 +37,7 @@ class EssaySet(object):
self._generated = [] self._generated = []
self._prompt = "" self._prompt = ""
self._spelling_errors=[] self._spelling_errors=[]
self._markup_text=[]
def add_essay(self, essay_text, essay_score, essay_generated=0): def add_essay(self, essay_text, essay_score, essay_generated=0):
""" """
...@@ -62,9 +63,10 @@ class EssaySet(object): ...@@ -62,9 +63,10 @@ class EssaySet(object):
cleaned_essay=cleaned_essay[0:MAXIMUM_ESSAY_LENGTH] cleaned_essay=cleaned_essay[0:MAXIMUM_ESSAY_LENGTH]
self._text.append(cleaned_essay) self._text.append(cleaned_essay)
# Spell correct text using aspell # Spell correct text using aspell
cleaned_text,spell_errors=util_functions.spell_correct(self._text[len(self._text) - 1]) cleaned_text,spell_errors,markup_text=util_functions.spell_correct(self._text[len(self._text) - 1])
self._clean_text.append(cleaned_text) self._clean_text.append(cleaned_text)
self._spelling_errors.append(spell_errors) self._spelling_errors.append(spell_errors)
self._markup_text.append(markup_text)
# Tokenize text # Tokenize text
self._tokens.append(nltk.word_tokenize(self._clean_text[len(self._clean_text) - 1])) self._tokens.append(nltk.word_tokenize(self._clean_text[len(self._clean_text) - 1]))
# Part of speech tag text # Part of speech tag text
......
to replicate this experiment i would need to know how much vinegar to pour into the containers to insure each sample has to same amount to react to . i would need to also know which type of container to use . in order to replicate this experiment , we would need to know the temperature of the vinegar as well as how much vinegar to put in . both of these could vary and therefore change the result of the experiment .
\ No newline at end of file \ No newline at end of file
This is a well written string and everything is spelled correctly. In order to replicate this experiment, I would need to know additional information such as the four different samples that they used (because I could have choosen metal, carbboard and many other sample materials that they didn't use and would get different results. Also I would also need to know the amount of vinegar to pour because this can caute a major change. Lastly, they might want to tell where to sit the samples while they dry for 30 minutes because if they are sitting in room temp. or by a light source makes a difference too.
\ No newline at end of file \ No newline at end of file
...@@ -83,11 +83,13 @@ def spell_correct(string): ...@@ -83,11 +83,13 @@ def spell_correct(string):
incorrect_words.append(begword) incorrect_words.append(begword)
correct_spelling.append(sug) correct_spelling.append(sug)
newstring = string newstring = string
markup_string = string
for i in range(0, len(incorrect_words)): for i in range(0, len(incorrect_words)):
sub_pat = r"\b" + incorrect_words[i] + r"\b" sub_pat = r"\b" + incorrect_words[i] + r"\b"
sub_comp = re.compile(sub_pat) sub_comp = re.compile(sub_pat)
newstring = re.sub(sub_comp, correct_spelling[i], newstring) newstring = re.sub(sub_comp, correct_spelling[i], newstring)
return newstring,len(incorrect_words) markup_string=re.sub(sub_comp,"[[" + correct_spelling[i] + "]]", markup_string)
return newstring,len(incorrect_words),markup_string
def ngrams(tokens, min_n, max_n): def ngrams(tokens, min_n, max_n):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment