Commit 5487bad6 by Vik Paruchuri

Fix ascii decoding issue

parent fc598c06
...@@ -77,7 +77,10 @@ class EssaySet(object): ...@@ -77,7 +77,10 @@ class EssaySet(object):
self._id.append(max_id + 1) self._id.append(max_id + 1)
self._score.append(essay_score) self._score.append(essay_score)
# Clean text by removing non digit/work/punctuation characters # Clean text by removing non digit/work/punctuation characters
essay_text=str(essay_text.encode('ascii', 'ignore')) try:
essay_text=str(essay_text.encode('ascii', 'ignore'))
except:
essay_text = (essay_text.decode('utf-8','replace')).encode('ascii','ignore')
cleaned_essay=util_functions.sub_chars(essay_text).lower() cleaned_essay=util_functions.sub_chars(essay_text).lower()
if(len(cleaned_essay)>MAXIMUM_ESSAY_LENGTH): if(len(cleaned_essay)>MAXIMUM_ESSAY_LENGTH):
cleaned_essay=cleaned_essay[0:MAXIMUM_ESSAY_LENGTH] cleaned_essay=cleaned_essay[0:MAXIMUM_ESSAY_LENGTH]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment