Commit 1865c54d by Vik Paruchuri

Fix encoding issues

parent aa57934c
...@@ -58,6 +58,13 @@ class EssaySet(object): ...@@ -58,6 +58,13 @@ class EssaySet(object):
# Verify that essay_score is an int, essay_text is a string, and essay_generated equals 0 or 1 # Verify that essay_score is an int, essay_text is a string, and essay_generated equals 0 or 1
try: try:
essay_text=essay_text.encode('ascii', 'ignore')
if len(essay_text)<5:
essay_text="Invalid essay."
except:
log.exception("Could not parse essay into ascii.")
try:
#Try conversion of types #Try conversion of types
essay_score=int(essay_score) essay_score=int(essay_score)
essay_text=str(essay_text) essay_text=str(essay_text)
...@@ -65,7 +72,7 @@ class EssaySet(object): ...@@ -65,7 +72,7 @@ class EssaySet(object):
#Nothing needed here, will return error in any case. #Nothing needed here, will return error in any case.
log.exception("Invalid type for essay score : {0} or essay text : {1}".format(type(essay_score),type(essay_text))) log.exception("Invalid type for essay score : {0} or essay text : {1}".format(type(essay_score),type(essay_text)))
if type(essay_score) == type(0) and type(essay_text) == type("text")\ if isinstance(essay_score,int) and isinstance(essay_text, basestring)\
and (essay_generated == 0 or essay_generated == 1): and (essay_generated == 0 or essay_generated == 1):
self._id.append(max_id + 1) self._id.append(max_id + 1)
self._score.append(essay_score) self._score.append(essay_score)
......
...@@ -213,8 +213,8 @@ class FeatureExtractor(object): ...@@ -213,8 +213,8 @@ class FeatureExtractor(object):
#Calc number of grammar and spelling errors per character #Calc number of grammar and spelling errors per character
set_grammar,bad_pos_positions=self._get_grammar_errors(e_set._pos,e_set._text,e_set._tokens) set_grammar,bad_pos_positions=self._get_grammar_errors(e_set._pos,e_set._text,e_set._tokens)
set_grammar_per_character=[set_grammar[m]/float(len(e_set._text[m])) for m in xrange(0,len(e_set._text))] set_grammar_per_character=[set_grammar[m]/float(len(e_set._text[m])+.1) for m in xrange(0,len(e_set._text))]
set_spell_errors_per_character=[e_set._spelling_errors[m]/float(len(e_set._text[m])) for m in xrange(0,len(e_set._text))] set_spell_errors_per_character=[e_set._spelling_errors[m]/float(len(e_set._text[m])+.1) for m in xrange(0,len(e_set._text))]
#Iterate through essays and create a feedback dict for each #Iterate through essays and create a feedback dict for each
all_feedback=[] all_feedback=[]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment