Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
ease
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
ease
Commits
d62d57a7
Commit
d62d57a7
authored
Feb 26, 2013
by
Vik Paruchuri
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Incorporate confidence value into generic and regular
parent
5378393b
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
47 additions
and
21 deletions
+47
-21
create.py
+14
-2
grade.py
+32
-15
model_creator.py
+1
-4
No files found.
create.py
View file @
d62d57a7
...
@@ -6,6 +6,7 @@ import os
...
@@ -6,6 +6,7 @@ import os
import
sys
import
sys
import
logging
import
logging
from
statsd
import
statsd
from
statsd
import
statsd
import
numpy
#Define base path and add to sys path
#Define base path and add to sys path
base_path
=
os
.
path
.
dirname
(
__file__
)
base_path
=
os
.
path
.
dirname
(
__file__
)
...
@@ -35,7 +36,7 @@ def create(text,score,prompt_string,model_path = None):
...
@@ -35,7 +36,7 @@ def create(text,score,prompt_string,model_path = None):
#Initialize a results dictionary to return
#Initialize a results dictionary to return
results
=
{
'errors'
:
[],
'success'
:
False
,
'cv_kappa'
:
0
,
'cv_mean_absolute_error'
:
0
,
results
=
{
'errors'
:
[],
'success'
:
False
,
'cv_kappa'
:
0
,
'cv_mean_absolute_error'
:
0
,
'feature_ext'
:
""
,
'classifier'
:
""
}
'feature_ext'
:
""
,
'classifier'
:
""
,
'algorithm'
:
util_functions
.
AlgorithmTypes
.
classification
}
if
len
(
text
)
!=
len
(
score
):
if
len
(
text
)
!=
len
(
score
):
msg
=
"Target and text lists must be same length."
msg
=
"Target and text lists must be same length."
...
@@ -43,6 +44,15 @@ def create(text,score,prompt_string,model_path = None):
...
@@ -43,6 +44,15 @@ def create(text,score,prompt_string,model_path = None):
log
.
exception
(
msg
)
log
.
exception
(
msg
)
return
results
return
results
#Decide what algorithm to use (regression or classification)
try
:
if
len
(
util_functions
.
f7
(
list
(
score
)))
>
5
:
type
=
util_functions
.
AlgorithmTypes
.
regression
else
:
type
=
util_functions
.
AlgorithmTypes
.
classification
except
:
type
=
util_functions
.
AlgorithmTypes
.
regression
try
:
try
:
#Create an essay set object that encapsulates all the essays and alternate representations (tokens, etc)
#Create an essay set object that encapsulates all the essays and alternate representations (tokens, etc)
e_set
=
model_creator
.
create_essay_set
(
text
,
score
,
prompt_string
)
e_set
=
model_creator
.
create_essay_set
(
text
,
score
,
prompt_string
)
...
@@ -52,11 +62,12 @@ def create(text,score,prompt_string,model_path = None):
...
@@ -52,11 +62,12 @@ def create(text,score,prompt_string,model_path = None):
log
.
exception
(
msg
)
log
.
exception
(
msg
)
try
:
try
:
#Gets features from the essay set and computes error
#Gets features from the essay set and computes error
feature_ext
,
classifier
,
cv_error_results
=
model_creator
.
extract_features_and_generate_model
(
e_set
)
feature_ext
,
classifier
,
cv_error_results
=
model_creator
.
extract_features_and_generate_model
(
e_set
,
type
=
type
)
results
[
'cv_kappa'
]
=
cv_error_results
[
'kappa'
]
results
[
'cv_kappa'
]
=
cv_error_results
[
'kappa'
]
results
[
'cv_mean_absolute_error'
]
=
cv_error_results
[
'mae'
]
results
[
'cv_mean_absolute_error'
]
=
cv_error_results
[
'mae'
]
results
[
'feature_ext'
]
=
feature_ext
results
[
'feature_ext'
]
=
feature_ext
results
[
'classifier'
]
=
classifier
results
[
'classifier'
]
=
classifier
results
[
'algorithm'
]
=
type
results
[
'success'
]
=
True
results
[
'success'
]
=
True
except
:
except
:
msg
=
"feature extraction and model creation failed."
msg
=
"feature extraction and model creation failed."
...
@@ -78,6 +89,7 @@ def create_generic(numeric_values, textual_values, target, model_path = None, al
...
@@ -78,6 +89,7 @@ def create_generic(numeric_values, textual_values, target, model_path = None, al
(each item in textual_values corresponds to the similarly indexed counterpart in numeric_values)
(each item in textual_values corresponds to the similarly indexed counterpart in numeric_values)
target - The variable that we are trying to predict. A list of integers.
target - The variable that we are trying to predict. A list of integers.
model_path - deprecated, kept for legacy code. Do not use.
model_path - deprecated, kept for legacy code. Do not use.
algorithm - the type of algorithm that will be used
"""
"""
#Initialize a result dictionary to return.
#Initialize a result dictionary to return.
...
...
grade.py
View file @
d62d57a7
...
@@ -35,6 +35,7 @@ def grade(grader_data,grader_config,submission):
...
@@ -35,6 +35,7 @@ def grade(grader_data,grader_config,submission):
'model'
:
trained
model
,
'model'
:
trained
model
,
'extractor'
:
trained
feature
extractor
,
'extractor'
:
trained
feature
extractor
,
'prompt'
:
prompt
for
the
question
,
'prompt'
:
prompt
for
the
question
,
'algorithm'
:
algorithm
for
the
question
,
}
}
grader_config
-
Legacy
,
kept
for
compatibility
with
old
code
.
Need
to
remove
.
grader_config
-
Legacy
,
kept
for
compatibility
with
old
code
.
Need
to
remove
.
submission
-
The
student
submission
(
string
)
submission
-
The
student
submission
(
string
)
...
@@ -46,6 +47,10 @@ def grade(grader_data,grader_config,submission):
...
@@ -46,6 +47,10 @@ def grade(grader_data,grader_config,submission):
grader_set=EssaySet(type="test")
grader_set=EssaySet(type="test")
#This is to preserve legacy functionality
if 'algorithm' not in grader_data:
grader_data['algorithm'] = util_functions.AlgorithmTypes.classification
try:
try:
#Try to add essay to essay set object
#Try to add essay to essay set object
grader_set.add_essay(str(submission),0)
grader_set.add_essay(str(submission),0)
...
@@ -65,11 +70,7 @@ def grade(grader_data,grader_config,submission):
...
@@ -65,11 +70,7 @@ def grade(grader_data,grader_config,submission):
#Try to determine confidence level
#Try to determine confidence level
try:
try:
min_score=min(numpy.asarray(grader_data['score']))
results['confidence'] = get_confidence_value(grader_data['algorithm'], grader_data['model'], grader_feats, results['score'])
max_score=max(numpy.asarray(grader_data['score']))
raw_confidence=grader_data['model'].predict_proba(grader_feats)[0,(results['score']-min_score)]
#TODO: Normalize confidence somehow here
results['confidence']=raw_confidence
except:
except:
#If there is an error getting confidence, it is not a show-stopper, so just log
#If there is an error getting confidence, it is not a show-stopper, so just log
log.exception("Problem generating confidence value")
log.exception("Problem generating confidence value")
...
@@ -112,6 +113,17 @@ def grade(grader_data,grader_config,submission):
...
@@ -112,6 +113,17 @@ def grade(grader_data,grader_config,submission):
return results
return results
def grade_generic(grader_data, grader_config, numeric_features, textual_features):
def grade_generic(grader_data, grader_config, numeric_features, textual_features):
"""
Grades
a
set
of
numeric
and
textual
features
using
a
generic
model
grader_data
--
dictionary
containing
:
{
'algorithm'
-
Type
of
algorithm
to
use
to
score
}
grader_config
-
legacy
,
kept
for
compatibility
with
old
code
.
Need
to
remove
.
numeric_features
-
list
of
numeric
features
to
predict
on
textual_features
-
list
of
textual
feature
to
predict
on
"""
results = {'errors': [],'tests': [],'score': 0, 'success' : False, 'confidence' : 0}
results = {'errors': [],'tests': [],'score': 0, 'success' : False, 'confidence' : 0}
has_error=False
has_error=False
...
@@ -137,16 +149,7 @@ def grade_generic(grader_data, grader_config, numeric_features, textual_features
...
@@ -137,16 +149,7 @@ def grade_generic(grader_data, grader_config, numeric_features, textual_features
#Try to determine confidence level
#Try to determine confidence level
try:
try:
min_score=min(numpy.asarray(grader_data['score']))
results['confidence'] = get_confidence_value(grader_data['algorithm'], grader_data['model'], grader_feats, results['score'])
max_score=max(numpy.asarray(grader_data['score']))
if grader_data['algorithm'] == util_functions.AlgorithmTypes.classification:
raw_confidence=grader_data['model'].predict_proba(grader_feats)[0,(results['score']-min_score)]
#TODO: Normalize confidence somehow here
results['confidence']=raw_confidence
else:
raw_confidence = grader_data['model'].predict(grader_feats)[0]
confidence = max(raw_confidence - math.floor(raw_confidence), math.ceil(raw_confidence) - raw_confidence)
results['confidence'] = confidence
except:
except:
#If there is an error getting confidence, it is not a show-stopper, so just log
#If there is an error getting confidence, it is not a show-stopper, so just log
log.exception("Problem generating confidence value")
log.exception("Problem generating confidence value")
...
@@ -159,3 +162,17 @@ def grade_generic(grader_data, grader_config, numeric_features, textual_features
...
@@ -159,3 +162,17 @@ def grade_generic(grader_data, grader_config, numeric_features, textual_features
results['success'] = True
results['success'] = True
return results
return results
def get_confidence_value(algorithm,model,grader_feats,score):
min_score=min(numpy.asarray(score))
max_score=max(numpy.asarray(score))
if algorithm == util_functions.AlgorithmTypes.classification:
raw_confidence=model.predict_proba(grader_feats)[0,(score-min_score)]
#TODO: Normalize confidence somehow here
confidence=raw_confidence
else:
raw_confidence = model.predict(grader_feats)[0]
confidence = max(raw_confidence - math.floor(raw_confidence), math.ceil(raw_confidence) - raw_confidence)
return confidence
model_creator.py
View file @
d62d57a7
...
@@ -141,7 +141,7 @@ def extract_features_and_generate_model_predictors(predictor_set, type=util_func
...
@@ -141,7 +141,7 @@ def extract_features_and_generate_model_predictors(predictor_set, type=util_func
return
f
,
clf
,
cv_error_results
return
f
,
clf
,
cv_error_results
def
extract_features_and_generate_model
(
essays
,
additional_array
=
None
):
def
extract_features_and_generate_model
(
essays
,
type
=
util_functions
.
AlgorithmTypes
.
regression
):
"""
"""
Feed in an essay set to get feature vector and classifier
Feed in an essay set to get feature vector and classifier
essays must be an essay set object
essays must be an essay set object
...
@@ -153,9 +153,6 @@ def extract_features_and_generate_model(essays,additional_array=None):
...
@@ -153,9 +153,6 @@ def extract_features_and_generate_model(essays,additional_array=None):
f
.
initialize_dictionaries
(
essays
)
f
.
initialize_dictionaries
(
essays
)
train_feats
=
f
.
gen_feats
(
essays
)
train_feats
=
f
.
gen_feats
(
essays
)
if
(
additional_array
!=
None
and
type
(
additional_array
)
==
type
(
numpy
.
array
([
1
]))):
if
(
additional_array
.
shape
[
0
]
==
train_feats
.
shape
[
0
]):
train_feats
=
numpy
.
concatenate
((
train_feats
,
additional_array
),
axis
=
1
)
set_score
=
numpy
.
asarray
(
essays
.
_score
,
dtype
=
numpy
.
int
)
set_score
=
numpy
.
asarray
(
essays
.
_score
,
dtype
=
numpy
.
int
)
if
len
(
util_functions
.
f7
(
list
(
set_score
)))
>
5
:
if
len
(
util_functions
.
f7
(
list
(
set_score
)))
>
5
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment