Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
ease
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
ease
Commits
92d51e67
Commit
92d51e67
authored
Feb 26, 2013
by
Vik Paruchuri
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Docs, remove old variables from create and grade interfaces
parent
3e7457ae
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
25 additions
and
8 deletions
+25
-8
create.py
+2
-4
grade.py
+2
-4
predictor_extractor.py
+14
-0
util_functions.py
+7
-0
No files found.
create.py
View file @
92d51e67
...
...
@@ -24,14 +24,13 @@ import predictor_extractor
log
=
logging
.
getLogger
(
__name__
)
@statsd.timed
(
'open_ended_assessment.machine_learning.creator.time'
)
def
create
(
text
,
score
,
prompt_string
,
model_path
=
None
):
def
create
(
text
,
score
,
prompt_string
):
"""
Creates a machine learning model from input text, associated scores, a prompt, and a path to the model
TODO: Remove model path argument, it is needed for now to support legacy code
text - A list of strings containing the text of the essays
score - a list of integers containing score values
prompt_string - the common prompt for the set of essays
model_path - Deprecated, not needed
"""
#Initialize a results dictionary to return
...
...
@@ -81,14 +80,13 @@ def create(text,score,prompt_string,model_path = None):
return
results
def
create_generic
(
numeric_values
,
textual_values
,
target
,
model_path
=
None
,
algorithm
=
util_functions
.
AlgorithmTypes
.
regression
):
def
create_generic
(
numeric_values
,
textual_values
,
target
,
algorithm
=
util_functions
.
AlgorithmTypes
.
regression
):
"""
Creates a model from a generic list numeric values and text values
numeric_values - A list of lists that are the predictors
textual_values - A list of lists that are the predictors
(each item in textual_values corresponds to the similarly indexed counterpart in numeric_values)
target - The variable that we are trying to predict. A list of integers.
model_path - deprecated, kept for legacy code. Do not use.
algorithm - the type of algorithm that will be used
"""
...
...
grade.py
View file @
92d51e67
...
...
@@ -27,7 +27,7 @@ import math
log = logging.getLogger(__name__)
@statsd.timed('open_ended_assessment.machine_learning.grader.time')
def grade(grader_data,
grader_config,
submission):
def grade(grader_data,submission):
"""
Grades
a
specified
submission
using
specified
models
grader_data
-
A
dictionary
:
...
...
@@ -37,7 +37,6 @@ def grade(grader_data,grader_config,submission):
'prompt'
:
prompt
for
the
question
,
'algorithm'
:
algorithm
for
the
question
,
}
grader_config
-
Legacy
,
kept
for
compatibility
with
old
code
.
Need
to
remove
.
submission
-
The
student
submission
(
string
)
"""
...
...
@@ -112,14 +111,13 @@ def grade(grader_data,grader_config,submission):
return results
def grade_generic(grader_data,
grader_config,
numeric_features, textual_features):
def grade_generic(grader_data, numeric_features, textual_features):
"""
Grades
a
set
of
numeric
and
textual
features
using
a
generic
model
grader_data
--
dictionary
containing
:
{
'algorithm'
-
Type
of
algorithm
to
use
to
score
}
grader_config
-
legacy
,
kept
for
compatibility
with
old
code
.
Need
to
remove
.
numeric_features
-
list
of
numeric
features
to
predict
on
textual_features
-
list
of
textual
feature
to
predict
on
...
...
predictor_extractor.py
View file @
92d51e67
"""
Extracts features for an arbitrary set of textual and numeric inputs
"""
import
numpy
import
re
import
nltk
...
...
@@ -12,6 +16,7 @@ import logging
import
math
from
feature_extractor
import
FeatureExtractor
#Append to path and then import things that depend on path
base_path
=
os
.
path
.
dirname
(
__file__
)
sys
.
path
.
append
(
base_path
)
from
essay_set
import
EssaySet
...
...
@@ -28,6 +33,10 @@ class PredictorExtractor(object):
self
.
_initialized
=
False
def
initialize_dictionaries
(
self
,
p_set
):
"""
Initialize dictionaries with the textual inputs in the PredictorSet object
p_set - PredictorSet object that has had data fed in
"""
success
=
False
if
not
(
hasattr
(
p_set
,
'_type'
)):
error_message
=
"needs to be an essay set of the train type."
...
...
@@ -43,6 +52,7 @@ class PredictorExtractor(object):
if
div_length
==
0
:
div_length
=
1
#Ensures that even with a large amount of input textual features, training time stays reasonable
max_feats2
=
int
(
math
.
floor
(
200
/
div_length
))
for
i
in
xrange
(
0
,
len
(
p_set
.
_essay_sets
)):
self
.
_extractors
.
append
(
FeatureExtractor
())
...
...
@@ -52,6 +62,10 @@ class PredictorExtractor(object):
return
success
def
gen_feats
(
self
,
p_set
):
"""
Generates features based on an iput p_set
p_set - PredictorSet
"""
if
self
.
_initialized
!=
True
:
error_message
=
"Dictionaries have not been initialized."
log
.
exception
(
error_message
)
...
...
util_functions.py
View file @
92d51e67
...
...
@@ -83,6 +83,8 @@ def spell_correct(string):
Returns the spell corrected string if aspell is found, original string if not.
string - string
"""
#Create a temp file so that aspell could be used
f
=
open
(
'tmpfile'
,
'w'
)
f
.
write
(
string
)
f_path
=
os
.
path
.
abspath
(
f
.
name
)
...
...
@@ -91,13 +93,16 @@ def spell_correct(string):
p
=
os
.
popen
(
aspell_path
+
" -a < "
+
f_path
+
" --sug-mode=ultra"
)
except
:
log
.
exception
(
"Could not find aspell, so could not spell correct!"
)
#Return original string if aspell fails
return
string
,
0
,
string
#Aspell returns a list of incorrect words with the above flags
incorrect
=
p
.
readlines
()
p
.
close
()
incorrect_words
=
list
()
correct_spelling
=
list
()
for
i
in
range
(
1
,
len
(
incorrect
)):
if
(
len
(
incorrect
[
i
])
>
10
):
#Reformat aspell output to make sense
match
=
re
.
search
(
":"
,
incorrect
[
i
])
if
hasattr
(
match
,
"start"
):
begstring
=
incorrect
[
i
][
2
:
match
.
start
()]
...
...
@@ -111,6 +116,8 @@ def spell_correct(string):
incorrect_words
.
append
(
begword
)
correct_spelling
.
append
(
sug
)
#Create markup based on spelling errors
newstring
=
string
markup_string
=
string
already_subbed
=
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment