Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
ease
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
ease
Commits
c3b419c8
Commit
c3b419c8
authored
Dec 06, 2012
by
Vik Paruchuri
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix grammar feedback
parent
9ebb8e47
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
4 deletions
+15
-4
feature_extractor.py
+12
-4
util_functions.py
+3
-0
No files found.
feature_extractor.py
View file @
c3b419c8
...
@@ -29,6 +29,8 @@ class FeatureExtractor(object):
...
@@ -29,6 +29,8 @@ class FeatureExtractor(object):
def
__init__
(
self
):
def
__init__
(
self
):
self
.
_good_pos_ngrams
=
self
.
get_good_pos_ngrams
()
self
.
_good_pos_ngrams
=
self
.
get_good_pos_ngrams
()
self
.
dict_initialized
=
False
self
.
dict_initialized
=
False
self
.
_spell_errors_per_character
=
0
self
.
_grammar_errors_per_character
=
0
def
initialize_dictionaries
(
self
,
e_set
):
def
initialize_dictionaries
(
self
,
e_set
):
"""
"""
...
@@ -46,8 +48,8 @@ class FeatureExtractor(object):
...
@@ -46,8 +48,8 @@ class FeatureExtractor(object):
self
.
dict_initialized
=
True
self
.
dict_initialized
=
True
self
.
_mean_spelling_errors
=
sum
(
e_set
.
_spelling_errors
)
/
float
(
len
(
e_set
.
_spelling_errors
))
self
.
_mean_spelling_errors
=
sum
(
e_set
.
_spelling_errors
)
/
float
(
len
(
e_set
.
_spelling_errors
))
self
.
_spell_errors_per_character
=
sum
(
e_set
.
_spelling_errors
)
/
float
(
sum
([
len
(
t
)
for
t
in
e_set
.
_text
]))
self
.
_spell_errors_per_character
=
sum
(
e_set
.
_spelling_errors
)
/
float
(
sum
([
len
(
t
)
for
t
in
e_set
.
_text
]))
self
.
_grammar_errors_per_character
=
1
-
(
sum
(
self
.
_get_grammar_errors
self
.
_grammar_errors_per_character
=
(
sum
(
self
.
_get_grammar_errors
(
e_set
.
_pos
,
e_set
.
_text
,
e_set
.
_tokens
)
[
0
]
)
/
float
(
sum
([
len
(
t
)
for
t
in
e_set
.
_text
])))
(
e_set
.
_pos
,
e_set
.
_text
,
e_set
.
_tokens
))
/
float
(
sum
([
len
(
t
)
for
t
in
e_set
.
_text
])))
bag_feats
=
self
.
gen_bag_feats
(
e_set
)
bag_feats
=
self
.
gen_bag_feats
(
e_set
)
f_row_sum
=
numpy
.
sum
(
bag_feats
[:,:])
f_row_sum
=
numpy
.
sum
(
bag_feats
[:,:])
self
.
_mean_f_prop
=
f_row_sum
/
float
(
sum
([
len
(
t
)
for
t
in
e_set
.
_text
]))
self
.
_mean_f_prop
=
f_row_sum
/
float
(
sum
([
len
(
t
)
for
t
in
e_set
.
_text
]))
...
@@ -107,7 +109,11 @@ class FeatureExtractor(object):
...
@@ -107,7 +109,11 @@ class FeatureExtractor(object):
fixed_bad_pos_tuples
=
[
bad_pos_tuples
[
z
]
for
z
in
xrange
(
0
,
len
(
bad_pos_tuples
))
if
z
not
in
to_delete
]
fixed_bad_pos_tuples
=
[
bad_pos_tuples
[
z
]
for
z
in
xrange
(
0
,
len
(
bad_pos_tuples
))
if
z
not
in
to_delete
]
bad_pos_positions
.
append
(
fixed_bad_pos_tuples
)
bad_pos_positions
.
append
(
fixed_bad_pos_tuples
)
overlap_ngrams
=
[
z
for
z
in
pos_ngrams
if
z
in
self
.
_good_pos_ngrams
]
overlap_ngrams
=
[
z
for
z
in
pos_ngrams
if
z
in
self
.
_good_pos_ngrams
]
good_pos_tags
.
append
(
len
(
overlap_ngrams
))
if
(
len
(
pos_ngrams
)
-
len
(
overlap_ngrams
))
>
0
:
divisor
=
len
(
pos_ngrams
)
/
len
(
pos_seq
)
else
:
divisor
=
1
good_pos_tags
.
append
((
len
(
pos_ngrams
)
-
len
(
overlap_ngrams
))
/
divisor
)
return
good_pos_tags
,
bad_pos_positions
return
good_pos_tags
,
bad_pos_positions
def
gen_length_feats
(
self
,
e_set
):
def
gen_length_feats
(
self
,
e_set
):
...
@@ -215,7 +221,9 @@ class FeatureExtractor(object):
...
@@ -215,7 +221,9 @@ class FeatureExtractor(object):
for
m
in
xrange
(
0
,
len
(
e_set
.
_text
)):
for
m
in
xrange
(
0
,
len
(
e_set
.
_text
)):
individual_feedback
=
{
'grammar'
:
"Grammar: Ok."
,
'spelling'
:
"Spelling: Ok."
,
individual_feedback
=
{
'grammar'
:
"Grammar: Ok."
,
'spelling'
:
"Spelling: Ok."
,
'topicality'
:
"Topicality: Ok."
,
'markup_text'
:
""
,
'topicality'
:
"Topicality: Ok."
,
'markup_text'
:
""
,
'prompt_overlap'
:
"Prompt Overlap: Ok."
}
'prompt_overlap'
:
"Prompt Overlap: Ok."
,
'grammar_per_char'
:
set_grammar_per_character
[
m
],
'spelling_per_char'
:
set_spell_errors_per_character
[
m
]}
markup_tokens
=
e_set
.
_markup_text
[
m
]
.
split
(
" "
)
markup_tokens
=
e_set
.
_markup_text
[
m
]
.
split
(
" "
)
#This loop ensures that sequences of bad grammar get put together into one sequence instead of staying
#This loop ensures that sequences of bad grammar get put together into one sequence instead of staying
...
...
util_functions.py
View file @
c3b419c8
...
@@ -13,6 +13,9 @@ import nltk
...
@@ -13,6 +13,9 @@ import nltk
import
random
import
random
import
pickle
import
pickle
from
path
import
path
from
path
import
path
import
logging
log
=
logging
.
getLogger
(
__name__
)
def
create_model_path
(
model_path
):
def
create_model_path
(
model_path
):
if
not
model_path
.
startswith
(
"/"
)
and
not
model_path
.
startswith
(
"models/"
):
if
not
model_path
.
startswith
(
"/"
)
and
not
model_path
.
startswith
(
"models/"
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment