Commit 4047b1a5 by gradyward

Many small changes. Imports, uniqueness, etc.

parent a6da2bc9
...@@ -3,7 +3,6 @@ Defines an essay set object, which encapsulates essays from training and test se ...@@ -3,7 +3,6 @@ Defines an essay set object, which encapsulates essays from training and test se
Performs spell and grammar checking, tokenization, and stemming. Performs spell and grammar checking, tokenization, and stemming.
""" """
import numpy
import nltk import nltk
import sys import sys
import random import random
......
...@@ -3,14 +3,12 @@ Extracts features from training set and test set essays ...@@ -3,14 +3,12 @@ Extracts features from training set and test set essays
""" """
import numpy import numpy
import re
import nltk import nltk
import sys import sys
from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import CountVectorizer
import pickle import pickle
import os import os
from itertools import chain from itertools import chain
import copy
import operator import operator
import logging import logging
......
...@@ -4,7 +4,6 @@ Functions to score specified data using specified ML models ...@@ -4,7 +4,6 @@ Functions to score specified data using specified ML models
import sys import sys
import os import os
import numpy
import logging import logging
# Append sys to base path to import the following modules # Append sys to base path to import the following modules
...@@ -13,12 +12,8 @@ sys.path.append(base_path) ...@@ -13,12 +12,8 @@ sys.path.append(base_path)
#Depend on base path to be imported #Depend on base path to be imported
from essay_set import EssaySet from essay_set import EssaySet
import util_functions
from errors import * from errors import *
#Imports needed to unpickle grader data
import math
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -51,7 +46,7 @@ def grade(grader_data, submission): ...@@ -51,7 +46,7 @@ def grade(grader_data, submission):
feedback = {} feedback = {}
# Retrieves the model and extractor we will be using # Retrieves the model and extractor we will be using
model, extractor = get_classifier_and_extractor(grader_data) model, extractor = _get_classifier_and_extractor(grader_data)
# Attempts to add the essay (text) to the essay set. # Attempts to add the essay (text) to the essay set.
try: try:
...@@ -82,7 +77,7 @@ def grade(grader_data, submission): ...@@ -82,7 +77,7 @@ def grade(grader_data, submission):
return results return results
def get_classifier_and_extractor(grader_data): def _get_classifier_and_extractor(grader_data):
""" """
Finds the classifier and extractor from a completed training operation in order to perform the grading operation. Finds the classifier and extractor from a completed training operation in order to perform the grading operation.
......
...@@ -147,19 +147,6 @@ def ngrams(tokens, min_n, max_n): ...@@ -147,19 +147,6 @@ def ngrams(tokens, min_n, max_n):
return all_ngrams return all_ngrams
def make_unique(sequence):
"""
Makes a list of elements unique
Args:
sequence (list of any comparable): A sequence to make unique
Return:
the list without any duplicates. May be out of order.
"""
return list(set(sequence))
def get_vocab(essays, scores, max_features_pass_1=750, max_features_pass_2=200): def get_vocab(essays, scores, max_features_pass_1=750, max_features_pass_2=200):
""" """
Uses a fisher test to find words that are significant in that they separate Uses a fisher test to find words that are significant in that they separate
...@@ -337,8 +324,12 @@ def histogram(ratings, min_rating=None, max_rating=None): ...@@ -337,8 +324,12 @@ def histogram(ratings, min_rating=None, max_rating=None):
def get_wordnet_syns(word): def get_wordnet_syns(word):
""" """
Utilize wordnet (installed with nltk) to get synonyms for words Utilize wordnet (installed with nltk) to get synonyms for words
word is the input word
returns a list of unique synonyms Args:
word (str): the word to generate synonyms for
Returns:
(list of str): Unique synonyms for the word
""" """
synonyms = [] synonyms = []
regex = r"_" regex = r"_"
...@@ -347,5 +338,6 @@ def get_wordnet_syns(word): ...@@ -347,5 +338,6 @@ def get_wordnet_syns(word):
for ss in synset: for ss in synset:
for swords in ss.lemma_names: for swords in ss.lemma_names:
synonyms.append(pat.sub(" ", swords.lower())) synonyms.append(pat.sub(" ", swords.lower()))
synonyms = make_unique(synonyms) # Makes the synonym list unique
synonyms = list(set(synonyms))
return synonyms return synonyms
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment