Merge pull request #59 from edx/will/optimize-good-ngram-lookup

Use a set instead of a list for good ngram lookup

Merge pull request #59 from edx/will/optimize-good-ngram-lookup
Use a set instead of a list for good ngram lookup
97de6844 · Will Daly · a990b25e · 94014b71 · 97de6844
Commit 97de6844 authored Jun 23, 2014 by Will Daly
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

ease/feature_extractor.py
+3 -3

No files found.
--- a/ease/feature_extractor.py
+++ b/ease/feature_extractor.py
@@ -74,8 +74,8 @@ class FeatureExtractor(object):
    def get_good_pos_ngrams(self):
        """
-        Gets a list of gramatically correct part of speech sequences from an input file called essaycorpus.txt
+        Gets a set of gramatically correct part of speech sequences from an input file called essaycorpus.txt
-        Returns the list and caches the file
+        Returns the set and caches the file
        """
        if(os.path.isfile(NGRAM_PATH)):
            good_pos_ngrams = pickle.load(open(NGRAM_PATH, 'rb'))
@@ -92,7 +92,7 @@ class FeatureExtractor(object):
             'NNP .', 'NNP . TO', 'NNP . TO NNP', '. TO', '. TO NNP', '. TO NNP NNP',
             'TO NNP', 'TO NNP NNP']
-        return good_pos_ngrams
+        return set(good_pos_ngrams)
    def _get_grammar_errors(self,pos,text,tokens):
        """