Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-ora2
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-ora2
Commits
9f407d25
Commit
9f407d25
authored
Jun 16, 2014
by
Will Daly
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #437 from edx/will/ai-invalid-scores
Will/ai invalid scores
parents
52d38881
69aaec0f
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
180 additions
and
15 deletions
+180
-15
openassessment/assessment/api/ai_worker.py
+11
-5
openassessment/assessment/models/ai.py
+18
-0
openassessment/assessment/models/base.py
+1
-3
openassessment/assessment/test/test_ai_worker.py
+13
-2
openassessment/assessment/test/test_worker.py
+85
-3
openassessment/assessment/worker/grading.py
+52
-2
No files found.
openassessment/assessment/api/ai_worker.py
View file @
9f407d25
...
...
@@ -31,8 +31,7 @@ def get_grading_task_params(grading_workflow_uuid):
dict with keys:
* essay_text (unicode): The text of the essay submission.
* classifier_set (dict): Maps criterion names to serialized classifiers.
* course_id (unicode): The course ID that the training task is associated with.
* item_id (unicode): Identifies the item that the AI will be training to grade.
* valid_scores (dict): Maps criterion names to a list of valid scores for that criterion.
* algorithm_id (unicode): ID of the algorithm used to perform training.
Raises:
...
...
@@ -67,12 +66,19 @@ def get_grading_task_params(grading_workflow_uuid):
raise
AIGradingInternalError
(
msg
)
try
:
classifiers
=
list
(
classifier_set
.
classifiers
.
select_related
()
.
all
())
return
{
'essay_text'
:
workflow
.
essay_text
,
'classifier_set'
:
classifier_set
.
classifiers_dict
,
'course_id'
:
workflow
.
course_id
,
'item_id'
:
workflow
.
item_id
,
'classifier_set'
:
{
classifier
.
criterion
.
name
:
classifier
.
download_classifier_data
()
for
classifier
in
classifiers
},
'algorithm_id'
:
workflow
.
algorithm_id
,
'valid_scores'
:
{
classifier
.
criterion
.
name
:
classifier
.
valid_scores
for
classifier
in
classifiers
}
}
except
(
DatabaseError
,
ClassifierSerializeError
,
IncompleteClassifierSet
,
ValueError
)
as
ex
:
msg
=
(
...
...
openassessment/assessment/models/ai.py
View file @
9f407d25
...
...
@@ -7,6 +7,7 @@ import logging
import
itertools
from
django.conf
import
settings
from
django.core.files.base
import
ContentFile
from
django.core.cache
import
cache
from
django.db
import
models
,
transaction
,
DatabaseError
from
django.utils.timezone
import
now
from
django.core.exceptions
import
ObjectDoesNotExist
...
...
@@ -266,6 +267,23 @@ class AIClassifier(models.Model):
"""
return
json
.
loads
(
self
.
classifier_data
.
read
())
# pylint:disable=E1101
@property
def
valid_scores
(
self
):
"""
Return a list of valid scores for the rubric criterion associated
with this classifier.
Returns:
list of integer scores, in ascending order.
"""
cache_key
=
u"openassessment.assessment.ai.classifier.{pk}.valid_scores"
.
format
(
pk
=
self
.
pk
)
valid_scores
=
cache
.
get
(
cache_key
)
if
valid_scores
is
None
:
valid_scores
=
sorted
([
option
.
points
for
option
in
self
.
criterion
.
options
.
all
()])
cache
.
set
(
cache_key
,
valid_scores
)
return
valid_scores
class
AIWorkflow
(
models
.
Model
):
"""
...
...
openassessment/assessment/models/base.py
View file @
9f407d25
...
...
@@ -243,9 +243,7 @@ class Rubric(models.Model):
# Find the IDs for the options matching the specified point value
option_id_set
=
set
()
for
criterion_name
,
option_points
in
criterion_points
.
iteritems
():
if
(
criterion_name
in
rubric_points_dict
and
option_points
in
rubric_points_dict
[
criterion_name
]
):
if
(
criterion_name
in
rubric_points_dict
and
option_points
in
rubric_points_dict
[
criterion_name
]):
option_id
=
rubric_points_dict
[
criterion_name
][
option_points
]
option_id_set
.
add
(
option_id
)
else
:
...
...
openassessment/assessment/test/test_ai_worker.py
View file @
9f407d25
...
...
@@ -226,11 +226,22 @@ class AIWorkerGradingTest(CacheResetTest):
'essay_text'
:
ANSWER
,
'classifier_set'
:
CLASSIFIERS
,
'algorithm_id'
:
ALGORITHM_ID
,
'course_id'
:
STUDENT_ITEM
.
get
(
'course_id'
),
'item_id'
:
STUDENT_ITEM
.
get
(
'item_id'
)
'valid_scores'
:
{
u"vøȼȺƀᵾłȺɍɏ"
:
[
0
,
1
,
2
],
u"ﻭɼค๓๓คɼ"
:
[
0
,
1
,
2
]
}
}
self
.
assertItemsEqual
(
params
,
expected_params
)
def
test_get_grading_task_params_num_queries
(
self
):
with
self
.
assertNumQueries
(
5
):
ai_worker_api
.
get_grading_task_params
(
self
.
workflow_uuid
)
# The second time through we should be caching the queries
# to determine the valid scores for a classifier
with
self
.
assertNumQueries
(
3
):
ai_worker_api
.
get_grading_task_params
(
self
.
workflow_uuid
)
def
test_get_grading_task_params_no_workflow
(
self
):
with
self
.
assertRaises
(
AIGradingRequestError
):
ai_worker_api
.
get_grading_task_params
(
"invalid_uuid"
)
...
...
openassessment/assessment/test/test_worker.py
View file @
9f407d25
...
...
@@ -3,6 +3,7 @@
Tests for AI worker tasks.
"""
from
contextlib
import
contextmanager
import
itertools
import
mock
from
django.test.utils
import
override_settings
from
submissions
import
api
as
sub_api
...
...
@@ -45,15 +46,30 @@ class ErrorStubAIAlgorithm(AIAlgorithm):
raise
ScoreError
(
"Test error!"
)
class
InvalidScoreAlgorithm
(
AIAlgorithm
):
"""
Stub implementation that returns a score that isn't in the rubric.
"""
SCORE_CYCLE
=
itertools
.
cycle
([
-
100
,
0.7
,
1.2
,
100
])
def
train_classifier
(
self
,
examples
):
return
{}
def
score
(
self
,
text
,
classifier
):
return
self
.
SCORE_CYCLE
.
next
()
ALGORITHM_ID
=
u"test-stub"
ERROR_STUB_ALGORITHM_ID
=
u"error-stub"
UNDEFINED_CLASS_ALGORITHM_ID
=
u"undefined_class"
UNDEFINED_MODULE_ALGORITHM_ID
=
u"undefined_module"
INVALID_SCORE_ALGORITHM_ID
=
u"invalid_score"
AI_ALGORITHMS
=
{
ALGORITHM_ID
:
'{module}.StubAIAlgorithm'
.
format
(
module
=
__name__
),
ERROR_STUB_ALGORITHM_ID
:
'{module}.ErrorStubAIAlgorithm'
.
format
(
module
=
__name__
),
UNDEFINED_CLASS_ALGORITHM_ID
:
'{module}.NotDefinedAIAlgorithm'
.
format
(
module
=
__name__
),
UNDEFINED_MODULE_ALGORITHM_ID
:
'openassessment.not.valid.NotDefinedAIAlgorithm'
UNDEFINED_MODULE_ALGORITHM_ID
:
'openassessment.not.valid.NotDefinedAIAlgorithm'
,
INVALID_SCORE_ALGORITHM_ID
:
'{module}.InvalidScoreAlgorithm'
.
format
(
module
=
__name__
),
}
...
...
@@ -109,9 +125,7 @@ class AITrainingTaskTest(CeleryTaskTest):
Create a training workflow in the database.
"""
examples
=
deserialize_training_examples
(
EXAMPLES
,
RUBRIC
)
workflow
=
AITrainingWorkflow
.
start_workflow
(
examples
,
self
.
COURSE_ID
,
self
.
ITEM_ID
,
self
.
ALGORITHM_ID
)
self
.
workflow_uuid
=
workflow
.
uuid
def
test_unknown_algorithm
(
self
):
...
...
@@ -252,6 +266,32 @@ class AIGradingTaskTest(CeleryTaskTest):
workflow
.
classifier_set
=
classifier_set
workflow
.
save
()
@mock.patch
(
'openassessment.assessment.api.ai_worker.create_assessment'
)
@override_settings
(
ORA2_AI_ALGORITHMS
=
AI_ALGORITHMS
)
def
test_algorithm_gives_invalid_score
(
self
,
mock_create_assessment
):
# If an algorithm provides a score that isn't in the rubric,
# we should choose the closest valid score.
self
.
_set_algorithm_id
(
INVALID_SCORE_ALGORITHM_ID
)
# The first score given by the algorithm should be below the minimum valid score
# The second score will be between two valid scores (0 and 1), rounding up
grade_essay
(
self
.
workflow_uuid
)
expected_scores
=
{
u"vøȼȺƀᵾłȺɍɏ"
:
0
,
u"ﻭɼค๓๓คɼ"
:
1
}
mock_create_assessment
.
assert_called_with
(
self
.
workflow_uuid
,
expected_scores
)
# The third score will be between two valid scores (1 and 2), rounding down
# The final score will be greater than the maximum score
self
.
_reset_workflow
()
grade_essay
(
self
.
workflow_uuid
)
expected_scores
=
{
u"vøȼȺƀᵾłȺɍɏ"
:
1
,
u"ﻭɼค๓๓คɼ"
:
2
}
mock_create_assessment
.
assert_called_with
(
self
.
workflow_uuid
,
expected_scores
)
@mock.patch
(
'openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params'
)
@override_settings
(
ORA2_AI_ALGORITHMS
=
AI_ALGORITHMS
)
def
test_retrieve_params_error
(
self
,
mock_call
):
...
...
@@ -277,6 +317,39 @@ class AIGradingTaskTest(CeleryTaskTest):
with
self
.
assert_retry
(
grade_essay
,
AIGradingInternalError
):
grade_essay
(
self
.
workflow_uuid
)
@mock.patch
(
'openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params'
)
@override_settings
(
ORA2_AI_ALGORITHMS
=
AI_ALGORITHMS
)
def
test_params_missing_criterion_for_valid_scores
(
self
,
mock_call
):
mock_call
.
return_value
=
{
'essay_text'
:
'test'
,
'classifier_set'
:
{
u"vøȼȺƀᵾłȺɍɏ"
:
{},
u"ﻭɼค๓๓คɼ"
:
{}
},
'algorithm_id'
:
ALGORITHM_ID
,
'valid_scores'
:
{}
}
with
self
.
assert_retry
(
grade_essay
,
AIGradingInternalError
):
grade_essay
(
self
.
workflow_uuid
)
@mock.patch
(
'openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params'
)
@override_settings
(
ORA2_AI_ALGORITHMS
=
AI_ALGORITHMS
)
def
test_params_valid_scores_empty_list
(
self
,
mock_call
):
mock_call
.
return_value
=
{
'essay_text'
:
'test'
,
'classifier_set'
:
{
u"vøȼȺƀᵾłȺɍɏ"
:
{},
u"ﻭɼค๓๓คɼ"
:
{}
},
'algorithm_id'
:
ALGORITHM_ID
,
'valid_scores'
:
{
u"vøȼȺƀᵾłȺɍɏ"
:
[],
u"ﻭɼค๓๓คɼ"
:
[
0
,
1
,
2
]
}
}
with
self
.
assert_retry
(
grade_essay
,
AIGradingInternalError
):
grade_essay
(
self
.
workflow_uuid
)
def
_set_algorithm_id
(
self
,
algorithm_id
):
"""
Override the default algorithm ID for the grading workflow.
...
...
@@ -291,3 +364,12 @@ class AIGradingTaskTest(CeleryTaskTest):
workflow
=
AIGradingWorkflow
.
objects
.
get
(
uuid
=
self
.
workflow_uuid
)
workflow
.
algorithm_id
=
algorithm_id
workflow
.
save
()
def
_reset_workflow
(
self
):
"""
Reset the workflow so we can re-use it.
"""
workflow
=
AIGradingWorkflow
.
objects
.
get
(
uuid
=
self
.
workflow_uuid
)
workflow
.
completed_at
=
None
workflow
.
assessment
=
None
workflow
.
save
()
openassessment/assessment/worker/grading.py
View file @
9f407d25
...
...
@@ -10,7 +10,7 @@ from celery.utils.log import get_task_logger
from
dogapi
import
dog_stats_api
from
openassessment.assessment.api
import
ai_worker
as
ai_worker_api
from
openassessment.assessment.errors
import
(
AIError
,
AIGradingInternalError
,
AI
GradingRequestError
,
AI
ReschedulingInternalError
,
ANTICIPATED_CELERY_ERRORS
AIError
,
AIGradingInternalError
,
AIReschedulingInternalError
,
ANTICIPATED_CELERY_ERRORS
)
from
.algorithm
import
AIAlgorithm
,
AIAlgorithmError
from
openassessment.assessment.models.ai
import
AIClassifierSet
,
AIGradingWorkflow
...
...
@@ -54,6 +54,7 @@ def grade_essay(workflow_uuid):
essay_text
=
params
[
'essay_text'
]
classifier_set
=
params
[
'classifier_set'
]
algorithm_id
=
params
[
'algorithm_id'
]
valid_scores
=
params
[
'valid_scores'
]
except
(
AIError
,
KeyError
):
msg
=
(
u"An error occurred while retrieving the AI grading task "
...
...
@@ -62,6 +63,23 @@ def grade_essay(workflow_uuid):
logger
.
exception
(
msg
)
raise
grade_essay
.
retry
()
# Validate that the we have valid scores for each criterion
for
criterion_name
in
classifier_set
.
keys
():
msg
=
None
if
criterion_name
not
in
valid_scores
:
msg
=
(
u"Could not find {criterion} in the list of valid scores "
u"for grading workflow with UUID {uuid}"
)
.
format
(
criterion
=
criterion_name
,
uuid
=
workflow_uuid
)
elif
len
(
valid_scores
[
criterion_name
])
==
0
:
msg
=
(
u"Valid scores for {criterion} is empty for "
u"grading workflow with UUID {uuid}"
)
.
format
(
criterion
=
criterion_name
,
uuid
=
workflow_uuid
)
if
msg
:
logger
.
exception
(
msg
)
raise
AIGradingInternalError
(
msg
)
# Retrieve the AI algorithm
try
:
algorithm
=
AIAlgorithm
.
algorithm_for_id
(
algorithm_id
)
...
...
@@ -76,7 +94,10 @@ def grade_essay(workflow_uuid):
# Use the algorithm to evaluate the essay for each criterion
try
:
scores_by_criterion
=
{
criterion_name
:
algorithm
.
score
(
essay_text
,
classifier
)
criterion_name
:
_closest_valid_score
(
algorithm
.
score
(
essay_text
,
classifier
),
valid_scores
[
criterion_name
]
)
for
criterion_name
,
classifier
in
classifier_set
.
iteritems
()
}
except
AIAlgorithmError
:
...
...
@@ -222,6 +243,35 @@ def reschedule_grading_tasks(course_id, item_id):
raise
reschedule_grading_tasks
.
retry
()
def
_closest_valid_score
(
score
,
valid_scores
):
"""
Return the closest valid score for a given score.
This is necessary, since rubric scores may be non-contiguous.
Args:
score (int or float): The score assigned by the algorithm.
valid_scores (list of int): Valid scores for this criterion,
assumed to be sorted in ascending order.
Returns:
int
"""
# If the score is already valid, return it
if
score
in
valid_scores
:
return
score
# Otherwise, find the closest score in the list.
closest
=
valid_scores
[
0
]
delta
=
abs
(
score
-
closest
)
for
valid
in
valid_scores
[
1
:]:
new_delta
=
abs
(
score
-
valid
)
if
new_delta
<
delta
:
closest
=
valid
delta
=
new_delta
return
closest
def
_log_start_reschedule_grading
(
course_id
=
None
,
item_id
=
None
):
"""
Sends data about the rescheduling_grading task to datadog
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment