Commit 8a5dbd1e by Vik Paruchuri

created way to get positions of bad grammar

parent 760bcddc
...@@ -11,6 +11,7 @@ import pickle ...@@ -11,6 +11,7 @@ import pickle
import os import os
from itertools import chain from itertools import chain
import copy import copy
import operator
base_path = os.path.dirname(__file__) base_path = os.path.dirname(__file__)
sys.path.append(base_path) sys.path.append(base_path)
...@@ -88,7 +89,19 @@ class FeatureExtractor(object): ...@@ -88,7 +89,19 @@ class FeatureExtractor(object):
pos_seq = [tag[1] for tag in pos[i]] pos_seq = [tag[1] for tag in pos[i]]
pos_ngrams = util_functions.ngrams(pos_seq, min_pos_seq, max_pos_seq) pos_ngrams = util_functions.ngrams(pos_seq, min_pos_seq, max_pos_seq)
long_pos_ngrams=[z for z in pos_ngrams if z.count(' ')==(max_pos_seq-1)] long_pos_ngrams=[z for z in pos_ngrams if z.count(' ')==(max_pos_seq-1)]
bad_pos_positions=[z for z in xrange(0,len(long_pos_ngrams)) if long_pos_ngrams[z] not in self._good_pos_ngrams] bad_pos_tuples=[[z,z+max_pos_seq] for z in xrange(0,len(long_pos_ngrams)) if long_pos_ngrams[z] not in self._good_pos_ngrams]
bad_pos_tuples.sort(key=operator.itemgetter(1))
to_delete=[]
for m in reversed(xrange(len(bad_pos_tuples)-1)):
start, end = bad_pos_tuples[m]
for j in xrange(m+1, len(bad_pos_tuples)):
lstart, lend = bad_pos_tuples[j]
if lstart >= start and lstart <= end:
bad_pos_tuples[m][1]=bad_pos_tuples[j][1]
to_delete.append(j)
fixed_bad_pos_tuples=[bad_pos_tuples[z] for z in xrange(0,len(bad_pos_tuples)) if z not in to_delete]
overlap_ngrams = [z for z in pos_ngrams if z in self._good_pos_ngrams] overlap_ngrams = [z for z in pos_ngrams if z in self._good_pos_ngrams]
good_pos_tags.append(len(overlap_ngrams)) good_pos_tags.append(len(overlap_ngrams))
return good_pos_tags return good_pos_tags
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment