Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
ease
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
ease
Commits
6f7ae1c2
Commit
6f7ae1c2
authored
Oct 24, 2012
by
Vik Paruchuri
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added documentation strings and reformatted code
parent
88b1585b
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
29 additions
and
0 deletions
+29
-0
feature_extractor.py
+29
-0
No files found.
feature_extractor.py
View file @
6f7ae1c2
...
@@ -21,6 +21,12 @@ class FeatureExtractor:
...
@@ -21,6 +21,12 @@ class FeatureExtractor:
self
.
dict_initialized
=
False
self
.
dict_initialized
=
False
def
initialize_dictionaries
(
self
,
e_set
):
def
initialize_dictionaries
(
self
,
e_set
):
"""
Initializes dictionaries from an essay set object
Dictionaries must be initialized prior to using this to extract features
e_set is an input essay set
returns a confirmation of initialization
"""
if
(
hasattr
(
e_set
,
'_type'
)):
if
(
hasattr
(
e_set
,
'_type'
)):
if
(
e_set
.
_type
==
"train"
):
if
(
e_set
.
_type
==
"train"
):
nvocab
=
util_functions
.
get_vocab
(
e_set
.
_text
,
e_set
.
_score
)
nvocab
=
util_functions
.
get_vocab
(
e_set
.
_text
,
e_set
.
_score
)
...
@@ -36,6 +42,10 @@ class FeatureExtractor:
...
@@ -36,6 +42,10 @@ class FeatureExtractor:
return
ret
return
ret
def
get_good_pos_ngrams
(
self
):
def
get_good_pos_ngrams
(
self
):
"""
Gets a list of gramatically correct part of speech sequences from an input file called essaycorpus.txt
Returns the list and caches the file
"""
if
(
os
.
path
.
isfile
(
"good_pos_ngrams.p"
)):
if
(
os
.
path
.
isfile
(
"good_pos_ngrams.p"
)):
good_pos_ngrams
=
pickle
.
load
(
open
(
'good_pos_ngrams.p'
,
'rb'
))
good_pos_ngrams
=
pickle
.
load
(
open
(
'good_pos_ngrams.p'
,
'rb'
))
else
:
else
:
...
@@ -46,6 +56,11 @@ class FeatureExtractor:
...
@@ -46,6 +56,11 @@ class FeatureExtractor:
return
good_pos_ngrams
return
good_pos_ngrams
def
gen_length_feats
(
self
,
e_set
):
def
gen_length_feats
(
self
,
e_set
):
"""
Generates length based features from an essay set
Generally an internal function called by gen_feats
Returns an array of length features
"""
text
=
e_set
.
_text
text
=
e_set
.
_text
lengths
=
[
len
(
e
)
for
e
in
text
]
lengths
=
[
len
(
e
)
for
e
in
text
]
word_counts
=
[
len
(
t
)
for
t
in
e_set
.
_tokens
]
word_counts
=
[
len
(
t
)
for
t
in
e_set
.
_tokens
]
...
@@ -68,6 +83,11 @@ class FeatureExtractor:
...
@@ -68,6 +83,11 @@ class FeatureExtractor:
return
length_arr
.
copy
()
return
length_arr
.
copy
()
def
gen_bag_feats
(
self
,
e_set
):
def
gen_bag_feats
(
self
,
e_set
):
"""
Generates bag of words features from an input essay set and trained FeatureExtractor
Generally called by gen_feats
Returns an array of features
"""
if
(
hasattr
(
self
,
'_stem_dict'
)):
if
(
hasattr
(
self
,
'_stem_dict'
)):
sfeats
=
self
.
_stem_dict
.
transform
(
e_set
.
_clean_stem_text
)
sfeats
=
self
.
_stem_dict
.
transform
(
e_set
.
_clean_stem_text
)
nfeats
=
self
.
_normal_dict
.
transform
(
e_set
.
_text
)
nfeats
=
self
.
_normal_dict
.
transform
(
e_set
.
_text
)
...
@@ -77,6 +97,10 @@ class FeatureExtractor:
...
@@ -77,6 +97,10 @@ class FeatureExtractor:
return
bag_feats
.
copy
()
return
bag_feats
.
copy
()
def
gen_feats
(
self
,
e_set
):
def
gen_feats
(
self
,
e_set
):
"""
Generates bag of words, length, and prompt features from an essay set object
returns an array of features
"""
bag_feats
=
self
.
gen_bag_feats
(
e_set
)
bag_feats
=
self
.
gen_bag_feats
(
e_set
)
length_feats
=
self
.
gen_length_feats
(
e_set
)
length_feats
=
self
.
gen_length_feats
(
e_set
)
prompt_feats
=
self
.
gen_prompt_feats
(
e_set
)
prompt_feats
=
self
.
gen_prompt_feats
(
e_set
)
...
@@ -86,6 +110,11 @@ class FeatureExtractor:
...
@@ -86,6 +110,11 @@ class FeatureExtractor:
return
overall_feats
return
overall_feats
def
gen_prompt_feats
(
self
,
e_set
):
def
gen_prompt_feats
(
self
,
e_set
):
"""
Generates prompt based features from an essay set object and internal prompt variable.
Generally called internally by gen_feats
Returns an array of prompt features
"""
prompt_toks
=
nltk
.
word_tokenize
(
e_set
.
_prompt
)
prompt_toks
=
nltk
.
word_tokenize
(
e_set
.
_prompt
)
expand_syns
=
[]
expand_syns
=
[]
for
word
in
prompt_toks
:
for
word
in
prompt_toks
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment