Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-ora2
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-ora2
Commits
5295df1d
Commit
5295df1d
authored
May 15, 2014
by
Will Daly
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Optimize database queries for student training
parent
d71c867f
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
204 additions
and
85 deletions
+204
-85
apps/openassessment/assessment/api/student_training.py
+2
-2
apps/openassessment/assessment/models/student_training.py
+30
-52
apps/openassessment/assessment/models/training.py
+67
-12
apps/openassessment/assessment/serializers/training.py
+32
-19
apps/openassessment/assessment/test/test_student_training.py
+73
-0
No files found.
apps/openassessment/assessment/api/student_training.py
View file @
5295df1d
...
...
@@ -316,8 +316,8 @@ def get_training_example(submission_uuid, rubric, examples):
# Pick a training example that the student has not yet completed
# If the student already started a training example, then return that instead.
item
=
workflow
.
next_incomplete_item
(
examples
)
return
None
if
item
is
None
else
serialize_training_example
(
item
.
training
_example
)
next_example
=
workflow
.
next_training_example
(
examples
)
return
None
if
next_example
is
None
else
serialize_training_example
(
next
_example
)
except
(
InvalidRubric
,
InvalidTrainingExample
)
as
ex
:
logger
.
exception
(
"Could not deserialize training examples for submission UUID {}"
.
format
(
submission_uuid
)
...
...
apps/openassessment/assessment/models/student_training.py
View file @
5295df1d
"""
Django models specific to the student training assessment type.
"""
from
django.db
import
models
,
transaction
from
django.db
import
models
from
django.utils
import
timezone
from
submissions
import
api
as
sub_api
from
.training
import
TrainingExample
...
...
@@ -60,44 +60,6 @@ class StudentTrainingWorkflow(models.Model):
course_id
=
student_item
[
'course_id'
]
)
@transaction.commit_on_success
def
create_workflow_item
(
self
,
training_example
):
"""
Create a workflow item for a training example
and add it to the workflow.
Args:
training_example (TrainingExample): The training example model
associated with the next workflow item.
Returns:
StudentTrainingWorkflowItem
"""
order_num
=
self
.
items
.
count
()
+
1
# pylint:disable=E1101
item
=
StudentTrainingWorkflowItem
.
objects
.
create
(
workflow
=
self
,
order_num
=
order_num
,
training_example
=
training_example
)
self
.
items
.
add
(
item
)
# pylint:disable=E1101
self
.
save
()
return
item
@property
def
status
(
self
):
"""
The student's status within the workflow (num steps completed / num steps available).
Returns:
tuple of `(num_completed, num_total)`, both integers
"""
items
=
self
.
items
.
all
()
# pylint:disable=E1101
num_complete
=
sum
([
1
if
item
.
is_complete
else
0
for
item
in
items
])
num_total
=
len
(
items
)
return
num_complete
,
num_total
@property
def
num_completed
(
self
):
"""
...
...
@@ -110,26 +72,36 @@ class StudentTrainingWorkflow(models.Model):
"""
return
self
.
items
.
filter
(
completed_at__isnull
=
False
)
.
count
()
# pylint:disable=E1101
def
next_
incomplete_item
(
self
,
examples
):
def
next_
training_example
(
self
,
examples
):
"""
Find the next incomplete item in the workflow.
Return the next training example for the student to assess.
If the student is already working on an example, return that.
Otherwise, choose an example the student hasn't seen
from the list of available examples.
Args:
examples (list of TrainingExample): Training examples to choose from.
Returns:
StudentTrainingWorkflowItem
or None
TrainingExample
or None
"""
# Fetch all the items for this workflow from the database
# Since Django's `select_related` does not follow reverse keys
# we perform the filter ourselves.
items
=
StudentTrainingWorkflowItem
.
objects
.
select_related
(
'training_example'
)
.
filter
(
workflow
=
self
)
# If we're already working on an item, then return that item
current_item
=
self
.
current_item
if
current_item
is
not
None
:
return
current_item
incomplete_items
=
[
item
for
item
in
items
if
not
item
.
is_complete
]
if
len
(
incomplete_items
)
>
0
:
return
incomplete_items
[
0
]
.
training_example
# Otherwise, pick an item that we have not completed
# from the list of examples.
completed_examples
=
[
item
.
training_example
for
item
in
self
.
items
.
all
()
# pylint:disable=E1101
item
.
training_example
for
item
in
items
]
available_examples
=
[
available
for
available
in
examples
...
...
@@ -142,7 +114,14 @@ class StudentTrainingWorkflow(models.Model):
# Otherwise, create a new workflow item for the example
# and add it to the workflow
else
:
return
self
.
create_workflow_item
(
available_examples
[
0
])
order_num
=
len
(
items
)
+
1
next_example
=
available_examples
[
0
]
StudentTrainingWorkflowItem
.
objects
.
create
(
workflow
=
self
,
order_num
=
order_num
,
training_example
=
next_example
)
return
next_example
@property
def
current_item
(
self
):
...
...
@@ -154,14 +133,13 @@ class StudentTrainingWorkflow(models.Model):
StudentTrainingWorkflowItem or None
"""
next_incomplete
=
self
.
items
.
filter
(
# pylint:disable=E1101
next_incomplete
=
self
.
items
.
select_related
(
'training_example'
)
.
filter
(
# pylint:disable=E1101
completed_at__isnull
=
True
)
.
order_by
(
'order_num'
)[:
1
]
if
len
(
next_incomplete
)
>
0
:
return
next_incomplete
[
0
]
else
:
return
None
return
None
if
len
(
next_incomplete
)
==
0
else
next_incomplete
[
0
]
class
StudentTrainingWorkflowItem
(
models
.
Model
):
...
...
apps/openassessment/assessment/models/training.py
View file @
5295df1d
...
...
@@ -3,6 +3,7 @@ Django models for training (both student and AI).
"""
import
json
from
hashlib
import
sha1
from
django.core.cache
import
cache
from
django.db
import
models
from
.base
import
Rubric
,
CriterionOption
...
...
@@ -22,29 +23,34 @@ class TrainingExample(models.Model):
# SHA1 hash
content_hash
=
models
.
CharField
(
max_length
=
40
,
unique
=
True
,
db_index
=
True
)
# Version for models serialized to the cache
# Increment this number whenever you update this model!
CACHE_KEY_VERSION
=
1
class
Meta
:
app_label
=
"assessment"
@classmethod
def
create_example
(
cls
,
answer
,
options_
ids
,
rubric
):
def
create_example
(
cls
,
answer
,
options_
selected
,
rubric
):
"""
Create a new training example.
Args:
answer (JSON-serializable): The answer associated with the training example.
option
_ids (iterable of int): Selected option IDs for the training example.
option
s_selected (dict): The options selected from the rubric (mapping of criterion names to option names)
rubric (Rubric): The rubric associated with the training example.
Returns:
TrainingExample
"""
content_hash
=
cls
.
calculate_hash
(
answer
,
options_
ids
,
rubric
)
content_hash
=
cls
.
calculate_hash
(
answer
,
options_
selected
,
rubric
)
example
=
TrainingExample
.
objects
.
create
(
content_hash
=
content_hash
,
raw_answer
=
json
.
dumps
(
answer
),
rubric
=
rubric
)
options_ids
=
rubric
.
options_ids
(
options_selected
)
for
option
in
CriterionOption
.
objects
.
filter
(
pk__in
=
list
(
options_ids
)):
example
.
options_selected
.
add
(
option
)
...
...
@@ -71,19 +77,50 @@ class TrainingExample(models.Model):
dict: maps criterion names to selected option names
"""
return
{
option
.
criterion
.
name
:
option
.
name
for
option
in
self
.
options_selected
.
all
()
# pylint:disable=E1101
}
# Since training examples are immutable, we can safely cache this
cache_key
=
self
.
cache_key_serialized
(
attribute
=
"options_selected_dict"
)
options_selected
=
cache
.
get
(
cache_key
)
if
options_selected
is
None
:
options_selected
=
{
option
.
criterion
.
name
:
option
.
name
for
option
in
self
.
options_selected
.
all
()
# pylint:disable=E1101
}
cache
.
set
(
cache_key
,
options_selected
)
return
options_selected
def
cache_key_serialized
(
self
,
attribute
=
None
):
"""
Create a cache key based on the content hash
for serialized versions of this model.
Kwargs:
attribute: The name of the attribute being serialized.
If not specified, assume that we are serializing the entire model.
Returns:
str: The cache key
"""
if
attribute
is
None
:
key_template
=
u"TrainingExample.json.v{version}.{content_hash}"
else
:
key_template
=
u"TrainingExample.{attribute}.json.v{version}.{content_hash}"
cache_key
=
key_template
.
format
(
version
=
self
.
CACHE_KEY_VERSION
,
content_hash
=
self
.
content_hash
,
attribute
=
attribute
)
return
cache_key
@staticmethod
def
calculate_hash
(
answer
,
option
_ids
,
rubric
):
def
calculate_hash
(
answer
,
option
s_selected
,
rubric
):
"""
Calculate a hash for the contents of training example.
Args:
answer (JSON-serializable): The answer associated with the training example.
option
_ids (iterable of int): Selected option IDs for the training example.
option
s_selected (dict): The options selected from the rubric (mapping of criterion names to option names)
rubric (Rubric): The rubric associated with the training example.
Returns:
...
...
@@ -92,10 +129,28 @@ class TrainingExample(models.Model):
"""
contents
=
json
.
dumps
({
'answer'
:
answer
,
'option
_ids'
:
list
(
option_ids
)
,
'option
s_selected'
:
options_selected
,
'rubric'
:
rubric
.
id
})
return
sha1
(
contents
)
.
hexdigest
()
class
Meta
:
app_label
=
"assessment"
@classmethod
def
cache_key
(
cls
,
answer
,
options_selected
,
rubric
):
"""
Calculate a cache key based on the content hash.
Args:
answer (JSON-serializable): The answer associated with the training example.
options_selected (dict): The options selected from the rubric (mapping of criterion names to option names)
rubric (Rubric): The rubric associated with the training example.
Returns:
tuple of `(cache_key, content_hash)`, both bytestrings
"""
content_hash
=
cls
.
calculate_hash
(
answer
,
options_selected
,
rubric
)
cache_key
=
u"TrainingExample.model.v{version}.{content_hash}"
.
format
(
version
=
cls
.
CACHE_KEY_VERSION
,
content_hash
=
content_hash
)
return
cache_key
,
content_hash
apps/openassessment/assessment/serializers/training.py
View file @
5295df1d
"""
Serializers for the training assessment type.
"""
import
json
from
django.core.cache
import
cache
from
django.db
import
transaction
,
IntegrityError
from
openassessment.assessment.models
import
TrainingExample
from
.base
import
rubric_from_dict
,
RubricSerializer
...
...
@@ -53,11 +53,17 @@ def serialize_training_example(example):
dict
"""
return
{
'answer'
:
example
.
answer
,
'options_selected'
:
example
.
options_selected_dict
,
'rubric'
:
RubricSerializer
.
serialized_from_cache
(
example
.
rubric
),
}
# Since training examples are immutable, we can safely cache them
cache_key
=
example
.
cache_key_serialized
()
example_dict
=
cache
.
get
(
cache_key
)
if
example_dict
is
None
:
example_dict
=
{
'answer'
:
example
.
answer
,
'options_selected'
:
example
.
options_selected_dict
,
'rubric'
:
RubricSerializer
.
serialized_from_cache
(
example
.
rubric
),
}
cache
.
set
(
cache_key
,
example_dict
)
return
example_dict
@transaction.commit_on_success
...
...
@@ -144,24 +150,31 @@ def deserialize_training_examples(examples, rubric_dict):
# Parse each example
created_examples
=
[]
for
example_dict
in
examples
:
is_valid
,
errors
=
validate_training_example_format
(
example_dict
)
if
not
is_valid
:
raise
InvalidTrainingExample
(
"; "
.
join
(
errors
))
options_ids
=
rubric
.
options_ids
(
example_dict
[
'options_selected'
])
# Try to retrieve the example from the cache
cache_key
,
content_hash
=
TrainingExample
.
cache_key
(
example_dict
[
'answer'
],
example_dict
[
'options_selected'
],
rubric
)
example
=
cache
.
get
(
cache_key
)
# Calculate the content hash to look up the example
content_hash
=
TrainingExample
.
calculate_hash
(
example_dict
[
'answer'
],
options_ids
,
rubric
)
# If we couldn't retrieve the example from the cache, create it
if
example
is
None
:
# Validate the training example
is_valid
,
errors
=
validate_training_example_format
(
example_dict
)
if
not
is_valid
:
raise
InvalidTrainingExample
(
"; "
.
join
(
errors
))
try
:
example
=
TrainingExample
.
objects
.
get
(
content_hash
=
content_hash
)
except
TrainingExample
.
DoesNotExist
:
# Get or create the training example
try
:
example
=
TrainingExample
.
create_example
(
example_dict
[
'answer'
],
options_ids
,
rubric
)
except
IntegrityError
:
example
=
TrainingExample
.
objects
.
get
(
content_hash
=
content_hash
)
except
TrainingExample
.
DoesNotExist
:
try
:
example
=
TrainingExample
.
create_example
(
example_dict
[
'answer'
],
example_dict
[
'options_selected'
],
rubric
)
except
IntegrityError
:
example
=
TrainingExample
.
objects
.
get
(
content_hash
=
content_hash
)
# Add the example to the cache
cache
.
set
(
cache_key
,
example
)
created_examples
.
append
(
example
)
...
...
apps/openassessment/assessment/test/test_student_training.py
View file @
5295df1d
...
...
@@ -159,6 +159,60 @@ class StudentTrainingAssessmentTest(CacheResetTest):
next_retrieved
=
training_api
.
get_training_example
(
self
.
submission_uuid
,
self
.
RUBRIC
,
self
.
EXAMPLES
)
self
.
assertEqual
(
retrieved
,
next_retrieved
)
def
test_get_training_example_num_queries
(
self
):
# Run through the training example once using a different submission
# Training examples and rubrics will be cached and shared for other
# students working on the same problem.
self
.
_warm_cache
(
self
.
RUBRIC
,
self
.
EXAMPLES
)
# First training example
# This will need to create the student training workflow and the first item
# NOTE: we *could* cache the rubric model to reduce the number of queries here,
# but we're selecting it by content hash, which is indexed and should be plenty fast.
with
self
.
assertNumQueries
(
6
):
training_api
.
get_training_example
(
self
.
submission_uuid
,
self
.
RUBRIC
,
self
.
EXAMPLES
)
# Without assessing the first training example, try to retrieve a training example.
# This should return the same example as before, so we won't need to create
# any workflows or workflow items.
with
self
.
assertNumQueries
(
3
):
training_api
.
get_training_example
(
self
.
submission_uuid
,
self
.
RUBRIC
,
self
.
EXAMPLES
)
# Assess the current training example
training_api
.
assess_training_example
(
self
.
submission_uuid
,
self
.
EXAMPLES
[
0
][
'options_selected'
])
# Retrieve the next training example, which requires us to create
# a new workflow item (but not a new workflow).
with
self
.
assertNumQueries
(
4
):
training_api
.
get_training_example
(
self
.
submission_uuid
,
self
.
RUBRIC
,
self
.
EXAMPLES
)
def
test_submitter_is_finished_num_queries
(
self
):
# Complete the first training example
training_api
.
get_training_example
(
self
.
submission_uuid
,
self
.
RUBRIC
,
self
.
EXAMPLES
)
training_api
.
assess_training_example
(
self
.
submission_uuid
,
self
.
EXAMPLES
[
0
][
'options_selected'
])
# Check whether we've completed the requirements
requirements
=
{
'num_required'
:
2
}
with
self
.
assertNumQueries
(
2
):
training_api
.
submitter_is_finished
(
self
.
submission_uuid
,
requirements
)
def
test_get_num_completed_num_queries
(
self
):
# Complete the first training example
training_api
.
get_training_example
(
self
.
submission_uuid
,
self
.
RUBRIC
,
self
.
EXAMPLES
)
training_api
.
assess_training_example
(
self
.
submission_uuid
,
self
.
EXAMPLES
[
0
][
'options_selected'
])
# Check the number completed
with
self
.
assertNumQueries
(
2
):
training_api
.
get_num_completed
(
self
.
submission_uuid
)
def
test_assess_training_example_num_queries
(
self
):
# Populate the cache with training examples and rubrics
self
.
_warm_cache
(
self
.
RUBRIC
,
self
.
EXAMPLES
)
training_api
.
get_training_example
(
self
.
submission_uuid
,
self
.
RUBRIC
,
self
.
EXAMPLES
)
with
self
.
assertNumQueries
(
4
):
training_api
.
assess_training_example
(
self
.
submission_uuid
,
self
.
EXAMPLES
[
0
][
'options_selected'
])
@ddt.file_data
(
'data/validate_training_examples.json'
)
def
test_validate_training_examples
(
self
,
data
):
errors
=
training_api
.
validate_training_examples
(
...
...
@@ -319,3 +373,22 @@ class StudentTrainingAssessmentTest(CacheResetTest):
example
=
training_api
.
get_training_example
(
submission_uuid
,
input_rubric
,
input_examples
)
expected_example
=
self
.
_expected_example
(
input_examples
[
order_num
],
input_rubric
)
self
.
assertItemsEqual
(
example
,
expected_example
)
def
_warm_cache
(
self
,
rubric
,
examples
):
"""
Create a submission and complete student training.
This will populate the cache with training examples and rubrics,
which are immutable and shared for all students training on a particular problem.
Args:
rubric (dict): Serialized rubric model.
examples (list of dict): Serialized training examples
Returns:
None
"""
pre_submission
=
sub_api
.
create_submission
(
self
.
STUDENT_ITEM
,
self
.
ANSWER
)
for
example
in
examples
:
training_api
.
get_training_example
(
pre_submission
[
'uuid'
],
rubric
,
examples
)
training_api
.
assess_training_example
(
pre_submission
[
'uuid'
],
example
[
'options_selected'
])
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment