Commit d7f6245d by Vik Paruchuri

add better off topic detection

parent 5edf82ef
...@@ -44,6 +44,9 @@ class FeatureExtractor(object): ...@@ -44,6 +44,9 @@ class FeatureExtractor(object):
self._spell_errors_per_character=sum(e_set._spelling_errors)/float(sum([len(t) for t in e_set._text])) self._spell_errors_per_character=sum(e_set._spelling_errors)/float(sum([len(t) for t in e_set._text]))
self._grammar_errors_per_character=1-(sum(self._get_grammar_errors self._grammar_errors_per_character=1-(sum(self._get_grammar_errors
(e_set._pos,e_set._text,e_set._tokens))/float(sum([len(t) for t in e_set._text]))) (e_set._pos,e_set._text,e_set._tokens))/float(sum([len(t) for t in e_set._text])))
bag_feats=self.gen_bag_feats(e_set)
f_row_sum=numpy.sum(bag_feats[:,:])/bag_feats.shape[0]
self._mean_f_prop=f_row_sum/float(sum([len(t) for t in e_set._text]))
ret = "ok" ret = "ok"
else: else:
raise util_functions.InputError(e_set, "needs to be an essay set of the train type.") raise util_functions.InputError(e_set, "needs to be an essay set of the train type.")
...@@ -165,14 +168,14 @@ class FeatureExtractor(object): ...@@ -165,14 +168,14 @@ class FeatureExtractor(object):
all_feedback=[] all_feedback=[]
for m in xrange(0,len(e_set._text)): for m in xrange(0,len(e_set._text)):
individual_feedback=[] individual_feedback=[]
if set_grammar_per_character[m]>self._grammar_errors_per_character: if set_grammar_per_character[m]>(self._grammar_errors_per_character):
individual_feedback.append("Potential grammatical errors.") individual_feedback.append("Potential grammar errors.")
if set_spell_errors_per_character[m]>self._spell_errors_per_character: if set_spell_errors_per_character[m]>(self._spell_errors_per_character):
individual_feedback.append("Potential spelling errors.") individual_feedback.append("Potential spelling errors.")
if features is not None: if features is not None:
f_row_sum=numpy.sum(features[m,12:]) f_row_sum=numpy.sum(features[m,12:])
f_row_prop=f_row_sum/len(e_set._text[m]) f_row_prop=f_row_sum/len(e_set._text[m])
if f_row_prop<.05: if f_row_prop<(self._mean_f_prop):
individual_feedback.append("Essay may be off topic.") individual_feedback.append("Essay may be off topic.")
all_feedback.append(individual_feedback) all_feedback.append(individual_feedback)
......
in order to replicate this experiment , we would need to know the temperature of the vinegar as well as how much vinegar to put in . both of these could vary and therefore change the result of the experiment . the self employed person variable be the marble , limestone natalie wood , charge plate .the marble bug out designate wa . deoxyguanosine monophosphate . the limestone be . thousand and sir henry joseph wood come out full point . .the stop of the try out comprise that you give birth to limit the hoi polloi of to each one sample distribution .
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment