Commit c36d48d4 by Vik Paruchuri

move part of speech tagger away from nltk word splitting

parent b3bbeec3
......@@ -70,7 +70,7 @@ class EssaySet(object):
# Tokenize text
self._tokens.append(nltk.word_tokenize(self._clean_text[len(self._clean_text) - 1]))
# Part of speech tag text
self._pos.append(nltk.pos_tag(self._tokens[len(self._tokens) - 1]))
self._pos.append(nltk.pos_tag(self._clean_text[len(self._clean_text) - 1].split(" ")))
self._generated.append(essay_generated)
# Stem spell corrected text
porter = nltk.PorterStemmer()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment