Commit 94014b71 by Will Daly

Use a set instead of a list for good ngram lookup

parent a990b25e
...@@ -74,8 +74,8 @@ class FeatureExtractor(object): ...@@ -74,8 +74,8 @@ class FeatureExtractor(object):
def get_good_pos_ngrams(self): def get_good_pos_ngrams(self):
""" """
Gets a list of gramatically correct part of speech sequences from an input file called essaycorpus.txt Gets a set of gramatically correct part of speech sequences from an input file called essaycorpus.txt
Returns the list and caches the file Returns the set and caches the file
""" """
if(os.path.isfile(NGRAM_PATH)): if(os.path.isfile(NGRAM_PATH)):
good_pos_ngrams = pickle.load(open(NGRAM_PATH, 'rb')) good_pos_ngrams = pickle.load(open(NGRAM_PATH, 'rb'))
...@@ -92,7 +92,7 @@ class FeatureExtractor(object): ...@@ -92,7 +92,7 @@ class FeatureExtractor(object):
'NNP .', 'NNP . TO', 'NNP . TO NNP', '. TO', '. TO NNP', '. TO NNP NNP', 'NNP .', 'NNP . TO', 'NNP . TO NNP', '. TO', '. TO NNP', '. TO NNP NNP',
'TO NNP', 'TO NNP NNP'] 'TO NNP', 'TO NNP NNP']
return good_pos_ngrams return set(good_pos_ngrams)
def _get_grammar_errors(self,pos,text,tokens): def _get_grammar_errors(self,pos,text,tokens):
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment