Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
ease
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
ease
Commits
b70221db
Commit
b70221db
authored
Feb 26, 2013
by
Vik Paruchuri
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add some docs to create
parent
006f9342
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
31 additions
and
6 deletions
+31
-6
create.py
+31
-6
No files found.
create.py
View file @
b70221db
...
...
@@ -5,24 +5,35 @@ Functions that create a machine learning model from training data
import
os
import
sys
import
logging
log
=
logging
.
getLogger
(
__name__
)
from
statsd
import
statsd
#Define base path and add to sys path
base_path
=
os
.
path
.
dirname
(
__file__
)
sys
.
path
.
append
(
base_path
)
one_up_path
=
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'..'
))
sys
.
path
.
append
(
one_up_path
)
#Import modules that are dependent on the base path
import
model_creator
import
util_functions
import
predictor_set
import
predictor_extractor
from
statsd
import
statsd
#Make a log
log
=
logging
.
getLogger
(
__name__
)
@statsd.timed
(
'open_ended_assessment.machine_learning.creator.time'
)
def
create
(
text
,
score
,
prompt_string
,
model_path
):
def
create
(
text
,
score
,
prompt_string
,
model_path
=
None
):
"""
Creates a machine learning model from input text, associated scores, a prompt, and a path to the model
TODO: Remove model path argument, it is needed for now to support legacy code
text - A list of strings containing the text of the essays
score - a list of integers containing score values
prompt_string - the common prompt for the set of essays
model_path - Deprecated, not needed
"""
#Initialize a results dictionary to return
results
=
{
'errors'
:
[],
'success'
:
False
,
'cv_kappa'
:
0
,
'cv_mean_absolute_error'
:
0
,
'feature_ext'
:
""
,
'classifier'
:
""
}
...
...
@@ -33,12 +44,14 @@ def create(text,score,prompt_string,model_path):
return
results
try
:
#Create an essay set object that encapsulates all the essays and alternate representations (tokens, etc)
e_set
=
model_creator
.
create_essay_set
(
text
,
score
,
prompt_string
)
except
:
msg
=
"essay set creation failed."
results
[
'errors'
]
.
append
(
msg
)
log
.
exception
(
msg
)
try
:
#Gets features from the essay set and computes error
feature_ext
,
classifier
,
cv_error_results
=
model_creator
.
extract_features_and_generate_model
(
e_set
)
results
[
'cv_kappa'
]
=
cv_error_results
[
'kappa'
]
results
[
'cv_mean_absolute_error'
]
=
cv_error_results
[
'mae'
]
...
...
@@ -57,7 +70,17 @@ def create(text,score,prompt_string,model_path):
return
results
def
create_generic
(
numeric_values
,
textual_values
,
target
,
model_path
,
algorithm
=
util_functions
.
AlgorithmTypes
.
regression
):
def
create_generic
(
numeric_values
,
textual_values
,
target
,
model_path
=
None
,
algorithm
=
util_functions
.
AlgorithmTypes
.
regression
):
"""
Creates a model from a generic list numeric values and text values
numeric_values - A list of lists that are the predictors
textual_values - A list of lists that are the predictors
(each item in textual_values corresponds to the similarly indexed counterpart in numeric_values)
target - The variable that we are trying to predict. A list of integers.
model_path - deprecated, kept for legacy code. Do not use.
"""
#Initialize a result dictionary to return.
results
=
{
'errors'
:
[],
'success'
:
False
,
'cv_kappa'
:
0
,
'cv_mean_absolute_error'
:
0
,
'feature_ext'
:
""
,
'classifier'
:
""
,
'algorithm'
:
algorithm
}
...
...
@@ -68,6 +91,7 @@ def create_generic(numeric_values, textual_values, target, model_path, algorithm
return
results
try
:
#Initialize a predictor set object that encapsulates all of the text and numeric predictors
pset
=
predictor_set
.
PredictorSet
(
type
=
"train"
)
for
i
in
xrange
(
0
,
len
(
numeric_values
)):
pset
.
add_row
(
numeric_values
[
i
],
textual_values
[
i
],
target
[
i
])
...
...
@@ -77,6 +101,7 @@ def create_generic(numeric_values, textual_values, target, model_path, algorithm
log
.
exception
(
msg
)
try
:
#Extract all features and then train a classifier with the features
feature_ext
,
classifier
,
cv_error_results
=
model_creator
.
extract_features_and_generate_model_predictors
(
pset
,
algorithm
)
results
[
'cv_kappa'
]
=
cv_error_results
[
'kappa'
]
results
[
'cv_mean_absolute_error'
]
=
cv_error_results
[
'mae'
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment