Commit 97de6844 by Will Daly

Merge pull request #59 from edx/will/optimize-good-ngram-lookup

Use a set instead of a list for good ngram lookup
parents a990b25e 94014b71
...@@ -74,8 +74,8 @@ class FeatureExtractor(object): ...@@ -74,8 +74,8 @@ class FeatureExtractor(object):
def get_good_pos_ngrams(self): def get_good_pos_ngrams(self):
""" """
Gets a list of gramatically correct part of speech sequences from an input file called essaycorpus.txt Gets a set of gramatically correct part of speech sequences from an input file called essaycorpus.txt
Returns the list and caches the file Returns the set and caches the file
""" """
if(os.path.isfile(NGRAM_PATH)): if(os.path.isfile(NGRAM_PATH)):
good_pos_ngrams = pickle.load(open(NGRAM_PATH, 'rb')) good_pos_ngrams = pickle.load(open(NGRAM_PATH, 'rb'))
...@@ -92,7 +92,7 @@ class FeatureExtractor(object): ...@@ -92,7 +92,7 @@ class FeatureExtractor(object):
'NNP .', 'NNP . TO', 'NNP . TO NNP', '. TO', '. TO NNP', '. TO NNP NNP', 'NNP .', 'NNP . TO', 'NNP . TO NNP', '. TO', '. TO NNP', '. TO NNP NNP',
'TO NNP', 'TO NNP NNP'] 'TO NNP', 'TO NNP NNP']
return good_pos_ngrams return set(good_pos_ngrams)
def _get_grammar_errors(self,pos,text,tokens): def _get_grammar_errors(self,pos,text,tokens):
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment