Commit ed58c648 by Eric Fischer

Merge pull request #875 from edx/diana/ora-data-download

Move ORA data download logic into ora2 repo.
parents 16a41c64 cd1a9bfd
......@@ -3,10 +3,11 @@ Aggregate data for openassessment.
"""
import csv
import json
from django.conf import settings
from submissions import api as sub_api
from openassessment.workflow.models import AssessmentWorkflow
from openassessment.assessment.models import AssessmentPart, AssessmentFeedback
from openassessment.assessment.models import Assessment, AssessmentPart, AssessmentFeedback
class CsvWriter(object):
......@@ -114,15 +115,15 @@ class CsvWriter(object):
# so we select AssessmentPart and follow the foreign key to the Assessment.
parts = self._use_read_replica(
AssessmentPart.objects.select_related('assessment', 'option', 'option__criterion')
.filter(assessment__submission_uuid=submission_uuid)
.order_by('assessment__pk')
.filter(assessment__submission_uuid=submission_uuid)
.order_by('assessment__pk')
)
self._write_assessment_to_csv(parts, rubric_points_cache)
feedback_query = self._use_read_replica(
AssessmentFeedback.objects
.filter(submission_uuid=submission_uuid)
.prefetch_related('options')
.filter(submission_uuid=submission_uuid)
.prefetch_related('options')
)
for assessment_feedback in feedback_query:
self._write_assessment_feedback_to_csv(assessment_feedback)
......@@ -164,8 +165,8 @@ class CsvWriter(object):
end = start + self.QUERY_INTERVAL
query = self._use_read_replica(
AssessmentWorkflow.objects
.filter(course_id=course_id)
.order_by('created')
.filter(course_id=course_id)
.order_by('created')
).values('submission_uuid')[start:end]
for workflow_dict in query:
......@@ -333,4 +334,164 @@ class CsvWriter(object):
queryset.using("read_replica")
if "read_replica" in settings.DATABASES
else queryset
)
\ No newline at end of file
)
class OraAggregateData(object):
"""
Aggregate all the ORA data into a single table-like data structure.
"""
@classmethod
def _use_read_replica(cls, queryset):
"""
If there's a read replica that can be used, return a cursor to that.
Otherwise, return a cursor to the regular database.
Args:
queryset (QuerySet): The queryset that we would like to use the read replica for.
Returns:
QuerySet
"""
return (
queryset.using("read_replica")
if "read_replica" in settings.DATABASES
else queryset
)
@classmethod
def _build_assessments_cell(cls, assessments):
"""
Args:
assessments (QuerySet) - assessments that we would like to collate into one column.
Returns:
string that should be included in the 'assessments' column for this set of assessments' row
"""
returned_string = u""
for assessment in assessments:
returned_string += u"Assessment #{}\n".format(assessment.id)
returned_string += u"-- scored_at: {}\n".format(assessment.scored_at)
returned_string += u"-- type: {}\n".format(assessment.score_type)
returned_string += u"-- scorer_id: {}\n".format(assessment.scorer_id)
if assessment.feedback != u"":
returned_string += u"-- overall_feedback: {}\n".format(assessment.feedback)
return returned_string
@classmethod
def _build_assessments_parts_cell(cls, assessments):
"""
Args:
assessments (QuerySet) - assessments containing the parts that we would like to collate into one column.
Returns:
string that should be included in the relevant 'assessments_parts' column for this set of assessments' row
"""
returned_string = u""
for assessment in assessments:
returned_string += u"Assessment #{}\n".format(assessment.id)
for part in assessment.parts.all():
returned_string += u"-- {}".format(part.criterion.label)
if part.option is not None and part.option.label is not None:
option_label = part.option.label
returned_string += u": {option_label} ({option_points})\n".format(
option_label=option_label, option_points=part.option.points
)
if part.feedback != u"":
returned_string += u"-- feedback: {}\n".format(part.feedback)
return returned_string
@classmethod
def _build_feedback_options_cell(cls, assessments):
"""
Args:
assessments (QuerySet) - assessment that we would like to use to fetch and read the feedback options.
Returns:
string that should be included in the relevant 'feedback_options' column for this set of assessments' row
"""
returned_string = u""
for assessment in assessments:
for feedback in assessment.assessment_feedback.all():
for option in feedback.options.all():
returned_string += option.text + u"\n"
return returned_string
@classmethod
def _build_feedback_cell(cls, submission_uuid):
"""
Args:
submission_uuid (string) - the submission_uuid associated with this particular assessment feedback
Returns:
string that should be included in the relevant 'feedback' column for this set of assessments' row
"""
try:
feedback = AssessmentFeedback.objects.get(submission_uuid=submission_uuid)
except AssessmentFeedback.DoesNotExist:
return u""
return feedback.feedback_text
@classmethod
def collect_ora2_data(cls, course_id):
"""
Query database for aggregated ora2 response data.
Args:
course_id (string) - the course id of the course whose data we would like to return
Returns:
A tuple containing two lists: headers and data.
headers is a list containing strings corresponding to the column headers of the data.
data is a list of lists, where each sub-list corresponds to a row in the table of all the data
for this course.
"""
all_submission_information = sub_api.get_all_course_submission_information(course_id, 'openassessment')
rows = []
for student_item, submission, score in all_submission_information:
row = []
assessments = cls._use_read_replica(
Assessment.objects.prefetch_related('parts').
prefetch_related('rubric').
filter(
submission_uuid=submission['uuid']
)
)
assessments_cell = cls._build_assessments_cell(assessments)
assessments_parts_cell = cls._build_assessments_parts_cell(assessments)
feedback_options_cell = cls._build_feedback_options_cell(assessments)
feedback_cell = cls._build_feedback_cell(submission['uuid'])
row = [
submission['uuid'],
submission['student_item'],
student_item['student_id'],
submission['submitted_at'],
submission['answer'],
assessments_cell,
assessments_parts_cell,
score.get('created_at', ''),
score.get('points_earned', ''),
score.get('points_possible', ''),
feedback_options_cell,
feedback_cell
]
rows.append(row)
header = [
'Submission ID',
'Item ID',
'Anonymized Student ID',
'Date/Time Response Submitted',
'Response',
'Assessment Details',
'Assessment Scores',
'Date/Time Final Score Given',
'Final Score Points Earned',
'Final Score Points Possible',
'Feedback Statements Selected',
'Feedback on Peer Assessments'
]
return header, rows
"""
Command to retrieve all ORA2 data for a course in a .csv.
This command differs from upload_oa_data in that it places all the data into one file.
Generates the same format as the instructor dashboard downloads.
"""
import csv
from optparse import make_option
import os
from django.core.management.base import BaseCommand, CommandError
from openassessment.data import OraAggregateData
class Command(BaseCommand):
"""
Query aggregated open assessment data, write to .csv
"""
help = ("Usage: collect_ora2_data <course_id> --output-dir=<output_dir>")
args = "<course_id>"
option_list = BaseCommand.option_list + (
make_option('-o', '--output-dir',
action='store', dest='output_dir', default=None,
help="Write output to a directory rather than stdout"),
make_option('-n', '--file-name',
action='store', dest='file_name', default=None,
help="Write CSV file to the given name"),
)
def handle(self, *args, **options):
"""
Run the command.
"""
if not args:
raise CommandError("Course ID must be specified to fetch data")
course_id = args[0]
if options['file_name']:
file_name = options['file_name']
else:
file_name = ("%s-ora2.csv" % course_id).replace("/", "-")
if options['output_dir']:
csv_file = open(os.path.join(options['output_dir'], file_name), 'wb')
else:
csv_file = self.stdout
writer = csv.writer(csv_file, dialect='excel', quotechar='"', quoting=csv.QUOTE_ALL)
header, rows = OraAggregateData.collect_ora2_data(course_id)
writer.writerow(header)
for row in rows:
writer.writerow(_encode_row(row))
def _encode_row(data_list):
"""
Properly encode ora2 responses for transcription into a .csv
"""
processed_row = []
for item in data_list:
new_item = unicode(item).encode('utf-8')
processed_row.append(new_item)
return processed_row
# -*- coding: utf-8 -*-
""" Test the collect_ora2_data management command """
from mock import patch
from django.core.management import call_command
from openassessment.test_utils import CacheResetTest
class CollectOra2DataTest(CacheResetTest):
""" Test collect_ora2_data output and error conditions """
COURSE_ID = u"TɘꙅT ↄoUᴙꙅɘ"
def setUp(self):
super(CollectOra2DataTest, self).setUp()
self.test_header = [
"submission_uuid",
"item_id",
"anonymized_student_id",
"submitted_at",
"raw_answer",
"assessments",
"assessments_parts",
"final_score_given_at",
"final_score_points_earned",
"final_score_points_possible",
"feedback_options",
"feedback",
]
self.test_rows = [
[
"33a639de-4e61-11e4-82ab-hash_value",
"i4x://edX/DemoX/openassessment/hash_value",
"e31b4beb3d191cd47b07e17735728d53",
"2014-10-07 20:33:31+00:00",
'{""text"": ""This is a response to a question. #dylan""}',
"Assessment #1 -- scored_at: 2014-10-07 20:37:54 -- type: T -- scorer_id: hash -- feedback: Test",
"Assessment #1 -- Content: Unclear recommendation (5)",
"2014-10-07 21:35:47+00:00",
"10",
"20",
"Completed test assessments.",
"They were useful.",
],
[
"row-two-submission-value",
"i4x://edX/DemoX/openassessment/hash_value",
"e31b4beb3d191cd47b07e17735728d53",
"2014-10-07 20:33:31+00:00",
'{""text"": ""This is a response to a question. #dylan""}',
"Assessment #1 -- scored_at: 2014-10-07 20:37:54 -- type: T -- scorer_id: hash -- feedback: Test",
"Assessment #1 -- Content: Unclear recommendation (5)",
"2014-10-07 21:35:47+00:00",
"10",
"20",
"Completed test assessments.",
u"𝓨𝓸𝓾",
]
]
self.unicode_encoded_row = [
"row-two-submission-value",
"i4x://edX/DemoX/openassessment/hash_value",
"e31b4beb3d191cd47b07e17735728d53",
"2014-10-07 20:33:31+00:00",
'{""text"": ""This is a response to a question. #dylan""}',
"Assessment #1 -- scored_at: 2014-10-07 20:37:54 -- type: T -- scorer_id: hash -- feedback: Test",
"Assessment #1 -- Content: Unclear recommendation (5)",
"2014-10-07 21:35:47+00:00",
"10",
"20",
"Completed test assessments.",
"\xf0\x9d\x93\xa8\xf0\x9d\x93\xb8\xf0\x9d\x93\xbe",
]
@patch('openassessment.management.commands.collect_ora2_data.OraAggregateData.collect_ora2_data')
def test_valid_data_output_to_file(self, mock_data):
""" Verify that management command writes valid ORA2 data to file. """
mock_data.return_value = (self.test_header, self.test_rows)
with patch('openassessment.management.commands.collect_ora2_data.csv') as mock_write:
call_command('collect_ora2_data', self.COURSE_ID)
mock_writerow = mock_write.writer.return_value.writerow
mock_writerow.assert_any_call(self.test_header)
mock_writerow.assert_any_call(self.test_rows[0])
mock_writerow.assert_any_call(self.unicode_encoded_row)
"""
Create factories for assessments and all of their related models.
"""
import factory
from factory.django import DjangoModelFactory
from openassessment.assessment.models import (
Assessment, AssessmentPart, Rubric, Criterion, CriterionOption, AssessmentFeedbackOption, AssessmentFeedback
)
class RubricFactory(DjangoModelFactory):
""" Create mock Rubric models. """
class Meta:
model = Rubric
content_hash = factory.Faker('sha1')
structure_hash = factory.Faker('sha1')
class CriterionFactory(DjangoModelFactory):
"""
Create mock Criterion models.
Currently assumes there is only one Rubric that these are attached to.
"""
class Meta:
model = Criterion
rubric = factory.SubFactory(RubricFactory)
name = factory.Sequence(lambda n: 'criterion_{}'.format(n)) # pylint: disable=unnecessary-lambda
label = factory.Sequence(lambda n: 'label_{}'.format(n)) # pylint: disable=unnecessary-lambda
order_num = 0
prompt = 'This is a fake prompt.'
class CriterionOptionFactory(DjangoModelFactory):
""" Create mock CriterionOption models. """
class Meta:
model = CriterionOption
criterion = factory.SubFactory(CriterionFactory)
order_num = 0
points = 4
name = factory.Sequence(lambda n: 'option_{}'.format(n)) # pylint: disable=unnecessary-lambda
label = factory.Sequence(lambda n: 'option__label_{}'.format(n)) # pylint: disable=unnecessary-lambda
explanation = """The response makes 3-5 Monty Python references and at least one
original Star Wars trilogy reference. Do not select this option
if the author made any references to the second trilogy."""
class AssessmentFactory(DjangoModelFactory):
""" Create mock Assessment models. """
class Meta:
model = Assessment
submission_uuid = factory.Faker('sha1')
rubric = factory.SubFactory(RubricFactory)
scorer_id = 'test_scorer'
score_type = 'PE'
class AssessmentPartFactory(DjangoModelFactory):
""" Create mock AssessmentPart models. """
class Meta:
model = AssessmentPart
assessment = factory.SubFactory(AssessmentFactory)
criterion = factory.SubFactory(CriterionFactory)
option = None
feedback = 'This is my helpful feedback.'
class AssessmentFeedbackOptionFactory(DjangoModelFactory):
""" Create mock AssessmentFeedbackOption models. """
class Meta:
model = AssessmentFeedbackOption
text = factory.Sequence(lambda n: 'feedback_option_{}'.format(n)) # pylint: disable=unnecessary-lambda
class AssessmentFeedbackFactory(DjangoModelFactory):
""" Create mock AssessmentFeedback models. """
class Meta:
model = AssessmentFeedback
submission_uuid = factory.Faker('sha1')
feedback_text = "Feedback Text!"
@factory.post_generation
def assessments(self, create, extracted, **kwargs): # pylint: disable=unused-argument
""" Handle the many-to-many relationship between AssessmentFeedback and Assessment. """
if not create:
return
if extracted:
for assessment in extracted:
self.assessments.add(assessment)
@factory.post_generation
def options(self, create, extracted, **kwargs): # pylint: disable=unused-argument
""" Handle the many-to-many relationship between AssessmentFeedback and AssessmentFeedbackOption. """
if not create:
return
if extracted:
for option in extracted:
self.options.add(option)
......@@ -6,7 +6,7 @@
git+https://github.com/edx/XBlock.git@xblock-0.4.1#egg=XBlock==0.4.1
# edx-submissions
git+https://github.com/edx/edx-submissions.git@1.0.0#egg=edx-submissions==1.0.0
git+https://github.com/edx/edx-submissions.git@1.1.0#egg=edx-submissions==1.1.0
# Third Party Requirements
boto>=2.32.1,<3.0.0
......
......@@ -6,6 +6,7 @@ django-nose==1.4.1
mock==1.0.1
moto==0.3.1
pep8==1.7.0
factory_boy==2.6.1
git+https://github.com/edx/django-pyfs.git@1.0.3#egg=django-pyfs==1.0.3
git+https://github.com/edx/i18n-tools.git@56f048af9b6868613c14aeae760548834c495011#egg=i18n_tools
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment