Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
ease
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
ease
Commits
3e7a4342
Commit
3e7a4342
authored
Jan 24, 2013
by
Vik Paruchuri
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add flexible maximum for feature counts
parent
2e7f9e6a
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
29 additions
and
7 deletions
+29
-7
feature_extractor.py
+3
-3
predictor_extractor.py
+23
-1
predictor_set.py
+3
-3
No files found.
feature_extractor.py
View file @
3e7a4342
...
@@ -32,7 +32,7 @@ class FeatureExtractor(object):
...
@@ -32,7 +32,7 @@ class FeatureExtractor(object):
self
.
_spell_errors_per_character
=
0
self
.
_spell_errors_per_character
=
0
self
.
_grammar_errors_per_character
=
0
self
.
_grammar_errors_per_character
=
0
def
initialize_dictionaries
(
self
,
e_set
):
def
initialize_dictionaries
(
self
,
e_set
,
max_feats2
=
200
):
"""
"""
Initializes dictionaries from an essay set object
Initializes dictionaries from an essay set object
Dictionaries must be initialized prior to using this to extract features
Dictionaries must be initialized prior to using this to extract features
...
@@ -41,8 +41,8 @@ class FeatureExtractor(object):
...
@@ -41,8 +41,8 @@ class FeatureExtractor(object):
"""
"""
if
(
hasattr
(
e_set
,
'_type'
)):
if
(
hasattr
(
e_set
,
'_type'
)):
if
(
e_set
.
_type
==
"train"
):
if
(
e_set
.
_type
==
"train"
):
nvocab
=
util_functions
.
get_vocab
(
e_set
.
_text
,
e_set
.
_score
)
nvocab
=
util_functions
.
get_vocab
(
e_set
.
_text
,
e_set
.
_score
,
max_feats2
=
max_feats2
)
svocab
=
util_functions
.
get_vocab
(
e_set
.
_clean_stem_text
,
e_set
.
_score
)
svocab
=
util_functions
.
get_vocab
(
e_set
.
_clean_stem_text
,
e_set
.
_score
,
max_feats2
=
max_feats2
)
self
.
_normal_dict
=
CountVectorizer
(
ngram_range
=
(
1
,
2
),
vocabulary
=
nvocab
)
self
.
_normal_dict
=
CountVectorizer
(
ngram_range
=
(
1
,
2
),
vocabulary
=
nvocab
)
self
.
_stem_dict
=
CountVectorizer
(
ngram_range
=
(
1
,
2
),
vocabulary
=
svocab
)
self
.
_stem_dict
=
CountVectorizer
(
ngram_range
=
(
1
,
2
),
vocabulary
=
svocab
)
self
.
dict_initialized
=
True
self
.
dict_initialized
=
True
...
...
predictor_extractor.py
View file @
3e7a4342
...
@@ -9,6 +9,8 @@ from itertools import chain
...
@@ -9,6 +9,8 @@ from itertools import chain
import
copy
import
copy
import
operator
import
operator
import
logging
import
logging
import
math
from
feature_extractor
import
FeatureExtractor
base_path
=
os
.
path
.
dirname
(
__file__
)
base_path
=
os
.
path
.
dirname
(
__file__
)
sys
.
path
.
append
(
base_path
)
sys
.
path
.
append
(
base_path
)
...
@@ -23,6 +25,26 @@ log = logging.getLogger(__name__)
...
@@ -23,6 +25,26 @@ log = logging.getLogger(__name__)
class
PredictorExtractor
(
object
):
class
PredictorExtractor
(
object
):
def
__init__
(
self
):
def
__init__
(
self
):
pass
self
.
_extractors
=
[]
def
initialize_dictionaries
(
self
,
p_set
):
success
=
False
if
not
(
hasattr
(
p_set
,
'_type'
)):
error_message
=
"needs to be an essay set of the train type."
log
.
exception
(
error_message
)
raise
util_functions
.
InputError
(
p_set
,
error_message
)
if
not
(
p_set
.
_type
==
"train"
):
error_message
=
"needs to be an essay set of the train type."
log
.
exception
(
error_message
)
raise
util_functions
.
InputError
(
p_set
,
error_message
)
max_feats2
=
math
.
floor
(
200
/
len
(
p_set
.
_essay_sets
))
for
i
in
xrange
(
0
,
len
(
p_set
.
_essay_sets
)):
self
.
_extractors
.
append
(
FeatureExtractor
())
self
.
_extractors
[
i
]
.
initialize_dictionaries
(
p_set
.
_essay_sets
[
i
],
max_feats2
=
max_feats2
)
return
success
def
gen_feats
(
self
,
p_set
):
predictor_set.py
View file @
3e7a4342
...
@@ -36,7 +36,7 @@ class PredictorSet(object):
...
@@ -36,7 +36,7 @@ class PredictorSet(object):
self
.
_target
=
[]
self
.
_target
=
[]
self
.
_textual_features
=
[]
self
.
_textual_features
=
[]
self
.
_numeric_features
=
[]
self
.
_numeric_features
=
[]
self
.
essay_sets
=
[]
self
.
_
essay_sets
=
[]
def
add_row
(
self
,
numeric_features
,
textual_features
,
target
):
def
add_row
(
self
,
numeric_features
,
textual_features
,
target
):
...
@@ -95,7 +95,7 @@ class PredictorSet(object):
...
@@ -95,7 +95,7 @@ class PredictorSet(object):
#Create essay sets for textual features if needed
#Create essay sets for textual features if needed
if
len
(
self
.
_textual_features
)
==
0
:
if
len
(
self
.
_textual_features
)
==
0
:
for
i
in
xrange
(
0
,
len
(
textual_features
)):
for
i
in
xrange
(
0
,
len
(
textual_features
)):
self
.
essay_sets
.
append
(
EssaySet
(
type
=
self
.
_type
))
self
.
_
essay_sets
.
append
(
EssaySet
(
type
=
self
.
_type
))
#Add numeric and textual features
#Add numeric and textual features
self
.
_numeric_features
.
append
(
numeric_features
)
self
.
_numeric_features
.
append
(
numeric_features
)
...
@@ -106,5 +106,5 @@ class PredictorSet(object):
...
@@ -106,5 +106,5 @@ class PredictorSet(object):
#Add textual features to essay sets
#Add textual features to essay sets
for
i
in
xrange
(
0
,
len
(
textual_features
)):
for
i
in
xrange
(
0
,
len
(
textual_features
)):
self
.
essay_sets
[
i
]
.
add_essay
(
textual_features
[
i
],
target
[
i
])
self
.
_
essay_sets
[
i
]
.
add_essay
(
textual_features
[
i
],
target
[
i
])
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment