Commit 4047b1a5 by gradyward

Many small changes. Imports, uniqueness, etc.

parent a6da2bc9
......@@ -3,7 +3,6 @@ Defines an essay set object, which encapsulates essays from training and test se
Performs spell and grammar checking, tokenization, and stemming.
"""
import numpy
import nltk
import sys
import random
......
......@@ -3,14 +3,12 @@ Extracts features from training set and test set essays
"""
import numpy
import re
import nltk
import sys
from sklearn.feature_extraction.text import CountVectorizer
import pickle
import os
from itertools import chain
import copy
import operator
import logging
......
......@@ -4,7 +4,6 @@ Functions to score specified data using specified ML models
import sys
import os
import numpy
import logging
# Append sys to base path to import the following modules
......@@ -13,12 +12,8 @@ sys.path.append(base_path)
#Depend on base path to be imported
from essay_set import EssaySet
import util_functions
from errors import *
#Imports needed to unpickle grader data
import math
log = logging.getLogger(__name__)
......@@ -51,7 +46,7 @@ def grade(grader_data, submission):
feedback = {}
# Retrieves the model and extractor we will be using
model, extractor = get_classifier_and_extractor(grader_data)
model, extractor = _get_classifier_and_extractor(grader_data)
# Attempts to add the essay (text) to the essay set.
try:
......@@ -82,7 +77,7 @@ def grade(grader_data, submission):
return results
def get_classifier_and_extractor(grader_data):
def _get_classifier_and_extractor(grader_data):
"""
Finds the classifier and extractor from a completed training operation in order to perform the grading operation.
......
......@@ -147,19 +147,6 @@ def ngrams(tokens, min_n, max_n):
return all_ngrams
def make_unique(sequence):
"""
Makes a list of elements unique
Args:
sequence (list of any comparable): A sequence to make unique
Return:
the list without any duplicates. May be out of order.
"""
return list(set(sequence))
def get_vocab(essays, scores, max_features_pass_1=750, max_features_pass_2=200):
"""
Uses a fisher test to find words that are significant in that they separate
......@@ -337,8 +324,12 @@ def histogram(ratings, min_rating=None, max_rating=None):
def get_wordnet_syns(word):
"""
Utilize wordnet (installed with nltk) to get synonyms for words
word is the input word
returns a list of unique synonyms
Args:
word (str): the word to generate synonyms for
Returns:
(list of str): Unique synonyms for the word
"""
synonyms = []
regex = r"_"
......@@ -347,5 +338,6 @@ def get_wordnet_syns(word):
for ss in synset:
for swords in ss.lemma_names:
synonyms.append(pat.sub(" ", swords.lower()))
synonyms = make_unique(synonyms)
# Makes the synonym list unique
synonyms = list(set(synonyms))
return synonyms
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment