Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
ease
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
ease
Commits
cf5e827c
Commit
cf5e827c
authored
Jun 06, 2013
by
Vik Paruchuri
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add in test case creation, ability to run those test cases
parent
d8662d92
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
119 additions
and
14 deletions
+119
-14
.gitignore
+2
-0
ease/create.py
+21
-1
ease/grade.py
+0
-1
ease/tests/__init__.py
+1
-0
ease/tests/data/json_data/.vc
+0
-0
ease/tests/test_model_accuracy.py
+95
-12
No files found.
.gitignore
View file @
cf5e827c
...
...
@@ -11,4 +11,6 @@ ease.egg-info/
*.egg
.coverage
*.orig
!.vc
data/json_data/*
ease/create.py
View file @
cf5e827c
...
...
@@ -18,11 +18,28 @@ import model_creator
import
util_functions
import
predictor_set
import
predictor_extractor
from
datetime
import
datetime
import
json
#Make a log
log
=
logging
.
getLogger
(
__name__
)
def
create
(
text
,
score
,
prompt_string
):
def
dump_input_data
(
text
,
score
):
try
:
file_path
=
base_path
+
"/tests/data/json_data/"
time_suffix
=
datetime
.
now
()
.
strftime
(
"
%
H
%
M
%
S
%
d
%
m
%
Y"
)
prefix
=
"test-case-"
filename
=
prefix
+
time_suffix
+
".json"
json_data
=
[]
for
i
in
xrange
(
0
,
len
(
text
)):
json_data
.
append
({
'text'
:
text
[
i
],
'score'
:
score
[
i
]})
with
open
(
file_path
+
filename
,
'w+'
)
as
outfile
:
json
.
dump
(
json_data
,
outfile
)
except
:
error
=
"Could not dump data to file."
log
.
exception
(
error
)
def
create
(
text
,
score
,
prompt_string
,
dump_data
=
False
):
"""
Creates a machine learning model from input text, associated scores, a prompt, and a path to the model
TODO: Remove model path argument, it is needed for now to support legacy code
...
...
@@ -31,6 +48,9 @@ def create(text,score,prompt_string):
prompt_string - the common prompt for the set of essays
"""
if
dump_data
:
dump_input_data
(
text
,
score
)
algorithm
=
select_algorithm
(
score
)
#Initialize a results dictionary to return
results
=
{
'errors'
:
[],
'success'
:
False
,
'cv_kappa'
:
0
,
'cv_mean_absolute_error'
:
0
,
...
...
ease/grade.py
View file @
cf5e827c
...
...
@@ -25,7 +25,6 @@ import math
log
=
logging
.
getLogger
(
__name__
)
def
grade
(
grader_data
,
submission
):
"""
Grades a specified submission using specified models
...
...
ease/tests/__init__.py
0 → 100644
View file @
cf5e827c
__author__
=
'vik'
ease/tests/data/json_data/.vc
0 → 100644
View file @
cf5e827c
ease/tests/test_model_accuracy.py
View file @
cf5e827c
...
...
@@ -3,6 +3,7 @@ import os
from
ease
import
create
,
grade
import
random
import
logging
import
json
log
=
logging
.
getLogger
(
__name__
)
...
...
@@ -22,6 +23,11 @@ class DataLoader():
text
.
append
(
data
[:
CHARACTER_LIMIT
])
return
text
def
load_json_file
(
self
,
filename
):
datafile
=
open
(
os
.
path
.
join
(
filename
))
data
=
json
.
load
(
datafile
)
return
data
def
load_data
(
self
):
"""
Override when inheriting
...
...
@@ -50,6 +56,47 @@ class PolarityLoader(DataLoader):
return
scores
,
text
class
JSONLoader
(
DataLoader
):
def
__init__
(
self
,
pathname
):
self
.
pathname
=
pathname
def
load_data
(
self
):
filenames
=
os
.
listdir
(
self
.
pathname
)
files
=
[
os
.
path
.
abspath
(
os
.
path
.
join
(
self
.
pathname
,
f
))
for
f
in
filenames
if
os
.
path
.
isfile
(
os
.
path
.
join
(
self
.
pathname
,
f
))
if
f
.
endswith
(
".json"
)]
files
.
sort
()
#We need to have both a postive and a negative folder to classify
if
len
(
files
)
==
0
:
return
[],
[]
data
=
[]
for
f
in
files
:
f_data
=
self
.
load_json_file
(
f
)
data
.
append
(
f_data
)
all_scores
=
[]
all_text
=
[]
for
i
in
xrange
(
0
,
len
(
data
)):
scores
=
[
d
[
'score'
]
for
d
in
data
[
i
]]
text
=
[
d
[
'text'
]
for
d
in
data
[
i
]]
if
isinstance
(
scores
[
0
],
list
):
new_text
=
[]
new_scores
=
[]
for
i
in
xrange
(
0
,
len
(
scores
)):
text
=
scores
[
i
]
s
=
scores
[
i
]
for
j
in
s
:
new_text
.
append
(
text
)
new_scores
.
append
(
j
)
text
=
new_text
scores
=
new_scores
all_scores
.
append
(
scores
)
all_text
.
append
(
text
)
return
all_scores
,
all_text
class
ModelCreator
():
def
__init__
(
self
,
scores
,
text
):
self
.
scores
=
scores
...
...
@@ -83,10 +130,13 @@ class GenericTest(object):
expected_kappa_min
=
0
expected_mae_max
=
0
def
generic_setup
(
self
):
def
load_data
(
self
):
data_loader
=
self
.
loader
(
os
.
path
.
join
(
TEST_PATH
,
self
.
data_path
))
scores
,
text
=
data_loader
.
load_data
()
return
scores
,
text
def
generic_setup
(
self
,
scores
,
text
):
#Shuffle to mix up the classes, set seed to make it repeatable
random
.
seed
(
1
)
shuffled_scores
=
[]
...
...
@@ -100,28 +150,30 @@ class GenericTest(object):
self
.
text
=
shuffled_text
[:
TRAINING_LIMIT
]
self
.
scores
=
shuffled_scores
[:
TRAINING_LIMIT
]
def
test_
model_creation_and_grading
(
self
):
def
model_creation_and_grading
(
self
):
score_subset
=
self
.
scores
[:
QUICK_TEST_LIMIT
]
text_subset
=
self
.
text
[:
QUICK_TEST_LIMIT
]
model_creator
=
ModelCreator
(
score_subset
,
text_subset
)
results
=
model_creator
.
create_model
()
self
.
assertTrue
(
results
[
'success'
])
assert
results
[
'success'
]
==
True
grader
=
Grader
(
results
)
results
=
grader
.
grade
(
self
.
text
[
0
])
self
.
assertTrue
(
results
[
'success'
])
assert
results
[
'success'
]
==
True
def
test_
scoring_accuracy
(
self
):
def
scoring_accuracy
(
self
):
random
.
seed
(
1
)
model_creator
=
ModelCreator
(
self
.
scores
,
self
.
text
)
results
=
model_creator
.
create_model
()
self
.
assertTrue
(
results
[
'success'
])
assert
results
[
'success'
]
==
True
cv_kappa
=
results
[
'cv_kappa'
]
cv_mae
=
results
[
'cv_mean_absolute_error'
]
self
.
assertGreaterEqual
(
cv_kappa
,
self
.
expected_kappa_min
)
self
.
assertLessEqual
(
cv_mae
,
self
.
expected_mae_max
)
assert
cv_kappa
>=
self
.
expected_kappa_min
assert
cv_mae
<=
self
.
expected_mae_max
def
test_generic_model_creation_and_grading
(
self
):
def
generic_model_creation_and_grading
(
self
):
log
.
info
(
self
.
scores
)
log
.
info
(
self
.
text
)
score_subset
=
[
random
.
randint
(
0
,
100
)
for
i
in
xrange
(
0
,
min
([
QUICK_TEST_LIMIT
,
len
(
self
.
scores
)]))]
text_subset
=
self
.
text
[:
QUICK_TEST_LIMIT
]
text_subset
=
{
...
...
@@ -130,7 +182,7 @@ class GenericTest(object):
}
model_creator
=
ModelCreator
(
score_subset
,
text_subset
)
results
=
model_creator
.
create_model
()
self
.
assertTrue
(
results
[
'success'
])
assert
results
[
'success'
]
==
True
grader
=
Grader
(
results
)
test_text
=
{
...
...
@@ -138,7 +190,7 @@ class GenericTest(object):
'numeric_values'
:
[
1
]
}
results
=
grader
.
grade
(
test_text
)
self
.
assertTrue
(
results
[
'success'
])
assert
results
[
'success'
]
==
True
class
PolarityTest
(
unittest
.
TestCase
,
GenericTest
):
loader
=
PolarityLoader
...
...
@@ -150,6 +202,37 @@ class PolarityTest(unittest.TestCase,GenericTest):
expected_mae_max
=
1
def
setUp
(
self
):
self
.
generic_setup
()
scores
,
text
=
self
.
load_data
()
self
.
generic_setup
(
scores
,
text
)
def
test_model_creation_and_grading
(
self
):
self
.
model_creation_and_grading
()
def
test_scoring_accuracy
(
self
):
self
.
scoring_accuracy
()
def
test_generic_model_creation_and_grading
(
self
):
self
.
generic_model_creation_and_grading
()
class
JSONTest
(
GenericTest
):
loader
=
JSONLoader
data_path
=
"data/json_data"
#These will increase if we allow more data in.
#I am setting the amount of data low to allow tests to finish quickly (40 training essays, 1000 character max for each)
expected_kappa_min
=
-.
2
expected_mae_max
=
1
def
setUp
(
self
):
self
.
scores
,
self
.
text
=
self
.
load_data
()
return
self
.
scores
,
self
.
text
def
test_loop
():
json_test
=
JSONTest
()
scores
,
text
=
json_test
.
setUp
()
for
i
in
xrange
(
0
,
len
(
scores
)):
json_test
.
generic_setup
(
scores
[
i
],
text
[
i
])
yield
json_test
.
model_creation_and_grading
yield
json_test
.
scoring_accuracy
yield
json_test
.
generic_model_creation_and_grading
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment