Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
ease
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
ease
Commits
90bde0cd
Commit
90bde0cd
authored
Jun 13, 2014
by
gradyward
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Gutted the model_creator.py module, removing all unused calls.
parent
515420a9
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
125 deletions
+0
-125
ease/model_creator.py
+0
-125
No files found.
ease/model_creator.py
View file @
90bde0cd
...
@@ -22,74 +22,6 @@ import create
...
@@ -22,74 +22,6 @@ import create
log
=
logging
.
getLogger
()
log
=
logging
.
getLogger
()
def
read_in_test_data
(
filename
):
"""
Reads in tab delimited test data file found at filename for training purposes.
filename must be a tab delimited file with columns id, dummy number column, score, dummy score, text
Args:
filename (str): The path to the data
Return:
Tuple of the form (score, text), where:
The former is the list of scores assigned to the essays in the file (int)
The latter is the list of essays in the file
"""
tid
,
e_set
,
score
,
score2
,
text
=
[],
[],
[],
[],
[]
combined_raw
=
open
(
filename
)
.
read
()
raw_lines
=
combined_raw
.
splitlines
()
for
row
in
xrange
(
1
,
len
(
raw_lines
)):
tid1
,
set1
,
score1
,
score12
,
text1
=
raw_lines
[
row
]
.
strip
()
.
split
(
"
\t
"
)
tid
.
append
(
int
(
tid1
))
text
.
append
(
text1
)
e_set
.
append
(
int
(
set1
))
score
.
append
(
int
(
score1
))
score2
.
append
(
int
(
score12
))
return
score
,
text
def
read_in_test_prompt
(
filename
):
"""
Reads in the prompt from a file.
Args:
filename (str): the name of the file
Returns:
(str): the prompt as a string.
"""
prompt_string
=
open
(
filename
)
.
read
()
return
prompt_string
def
read_in_test_data_twocolumn
(
filename
,
sep
=
","
):
"""
Reads in a two column version of the test data.
In filename, the first column should be integer score data.
The second column should be string text data.
Sep specifies the type of separator between fields.
Return:
Tuple of the form (score, text), where:
The former is the list of scores assigned to the essays in the file (int)
The latter is the list of essays in the file
"""
score
,
text
=
[],
[]
combined_raw
=
open
(
filename
)
.
read
()
raw_lines
=
combined_raw
.
splitlines
()
for
row
in
xrange
(
1
,
len
(
raw_lines
)):
score1
,
text1
=
raw_lines
[
row
]
.
strip
()
.
split
(
"
\t
"
)
text
.
append
(
text1
)
score
.
append
(
int
(
score1
))
return
score
,
text
def
create_essay_set
(
text
,
score
,
prompt_string
,
generate_additional
=
True
):
def
create_essay_set
(
text
,
score
,
prompt_string
,
generate_additional
=
True
):
"""
"""
Creates an essay set from given data.
Creates an essay set from given data.
...
@@ -151,34 +83,6 @@ def get_algorithms(algorithm):
...
@@ -151,34 +83,6 @@ def get_algorithms(algorithm):
max_depth
=
4
,
random_state
=
1
,
min_samples_leaf
=
3
)
max_depth
=
4
,
random_state
=
1
,
min_samples_leaf
=
3
)
return
clf
,
clf2
return
clf
,
clf2
def
extract_features_and_generate_model_from_predictors
(
predictor_set
,
algorithm
=
util_functions
.
AlgorithmTypes
.
regression
):
"""
Extracts features and generates predictors based on a given predictor set
predictor_set - a PredictorSet object that has been initialized with data
type - one of util_functions.AlgorithmType
"""
if
(
algorithm
not
in
[
util_functions
.
AlgorithmTypes
.
regression
,
util_functions
.
AlgorithmTypes
.
classification
]):
algorithm
=
util_functions
.
AlgorithmTypes
.
regression
f
=
predictor_extractor
.
PredictorExtractor
(
predictor_set
)
train_feats
=
f
.
generate_features
(
predictor_set
)
clf
,
clf2
=
get_algorithms
(
algorithm
)
cv_error_results
=
get_cv_error
(
clf2
,
train_feats
,
predictor_set
.
_target
)
try
:
set_score
=
numpy
.
asarray
(
predictor_set
.
_target
,
dtype
=
numpy
.
int
)
clf
.
fit
(
train_feats
,
set_score
)
except
ValueError
:
log
.
exception
(
"Not enough classes (0,1,etc) in sample."
)
set_score
=
predictor_set
.
_target
set_score
[
0
]
=
1
set_score
[
1
]
=
0
clf
.
fit
(
train_feats
,
set_score
)
return
f
,
clf
,
cv_error_results
def
extract_features_and_generate_model
(
essay_set
):
def
extract_features_and_generate_model
(
essay_set
):
"""
"""
...
@@ -213,32 +117,3 @@ def extract_features_and_generate_model(essay_set):
...
@@ -213,32 +117,3 @@ def extract_features_and_generate_model(essay_set):
predict_classifier
.
fit
(
features
,
set_score
)
predict_classifier
.
fit
(
features
,
set_score
)
return
feat_extractor
,
predict_classifier
,
cv_error_results
return
feat_extractor
,
predict_classifier
,
cv_error_results
def
dump_model_to_file
(
prompt_string
,
feature_ext
,
classifier
,
text
,
score
,
model_path
):
"""
Writes out a model to a file.
Args:
prompt_string (str): The prompt for the set of essays
feature_ext (FeatureExtractor): a trained FeatureExtractor Object
classifier : a trained Classifier Object
prompt string is a string containing the prompt
feature_ext is a trained FeatureExtractor object
classifier is a trained classifier
model_path is the path of write out the model file to
"""
model_file
=
{
'prompt'
:
prompt_string
,
'extractor'
:
feature_ext
,
'model'
:
classifier
,
'text'
:
text
,
'score'
:
score
}
pickle
.
dump
(
model_file
,
file
=
open
(
model_path
,
"w"
))
def
create_essay_set_and_dump_model
(
text
,
score
,
prompt
,
model_path
,
additional_array
=
None
):
"""
Function that creates essay set, extracts features, and writes out model
See above functions for argument descriptions
"""
essay_set
=
create_essay_set
(
text
,
score
,
prompt
)
feature_ext
,
clf
=
extract_features_and_generate_model
(
essay_set
,
additional_array
)
dump_model_to_file
(
prompt
,
feature_ext
,
clf
,
model_path
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment