Merge pull request #875 from edx/diana/ora-data-download

Move ORA data download logic into ora2 repo.

Merge pull request #875 from edx/diana/ora-data-download
Move ORA data download logic into ora2 repo.
ed58c648 · Eric Fischer · 16a41c64 · cd1a9bfd · ed58c648 · ed58c648
Commit ed58c648 authored Mar 10, 2016 by Eric Fischer
7 changed files
--- a/openassessment/data.py
+++ b/openassessment/data.py
@@ -3,10 +3,11 @@ Aggregate data for openassessment.
 """
 import csv
 import json
 from django.conf import settings
 from submissions import api as sub_api
 from openassessment.workflow.models import AssessmentWorkflow
-from openassessment.assessment.models import AssessmentPart, AssessmentFeedback
+from openassessment.assessment.models import Assessment, AssessmentPart, AssessmentFeedback
 class CsvWriter(object):
@@ -114,15 +115,15 @@ class CsvWriter(object):
            # so we select AssessmentPart and follow the foreign key to the Assessment.
            parts = self._use_read_replica(
                AssessmentPart.objects.select_related('assessment', 'option', 'option__criterion')
-                    .filter(assessment__submission_uuid=submission_uuid)
+                .filter(assessment__submission_uuid=submission_uuid)
-                    .order_by('assessment__pk')
+                .order_by('assessment__pk')
            )
            self._write_assessment_to_csv(parts, rubric_points_cache)
            feedback_query = self._use_read_replica(
                AssessmentFeedback.objects
-                    .filter(submission_uuid=submission_uuid)
+                .filter(submission_uuid=submission_uuid)
-                    .prefetch_related('options')
+                .prefetch_related('options')
            )
            for assessment_feedback in feedback_query:
                self._write_assessment_feedback_to_csv(assessment_feedback)
@@ -164,8 +165,8 @@ class CsvWriter(object):
            end = start + self.QUERY_INTERVAL
            query = self._use_read_replica(
                AssessmentWorkflow.objects
-                    .filter(course_id=course_id)
+                .filter(course_id=course_id)
-                    .order_by('created')
+                .order_by('created')
            ).values('submission_uuid')[start:end]
            for workflow_dict in query:
@@ -333,4 +334,164 @@ class CsvWriter(object):
            queryset.using("read_replica")
            if "read_replica" in settings.DATABASES
            else queryset
        )
\ No newline at end of file
+class OraAggregateData(object):
+    """
+    Aggregate all the ORA data into a single table-like data structure.
+    """
+    @classmethod
+    def _use_read_replica(cls, queryset):
+        """
+        If there's a read replica that can be used, return a cursor to that.
+        Otherwise, return a cursor to the regular database.
+        Args:
+            queryset (QuerySet): The queryset that we would like to use the read replica for.
+        Returns:
+            QuerySet
+        """
+        return (
+            queryset.using("read_replica")
+            if "read_replica" in settings.DATABASES
+            else queryset
+        )
+    @classmethod
+    def _build_assessments_cell(cls, assessments):
+        """
+        Args:
+            assessments (QuerySet) - assessments that we would like to collate into one column.
+        Returns:
+            string that should be included in the 'assessments' column for this set of assessments' row
+        """
+        returned_string = u""
+        for assessment in assessments:
+            returned_string += u"Assessment #{}\n".format(assessment.id)
+            returned_string += u"-- scored_at: {}\n".format(assessment.scored_at)
+            returned_string += u"-- type: {}\n".format(assessment.score_type)
+            returned_string += u"-- scorer_id: {}\n".format(assessment.scorer_id)
+            if assessment.feedback != u"":
+                returned_string += u"-- overall_feedback: {}\n".format(assessment.feedback)
+        return returned_string
+    @classmethod
+    def _build_assessments_parts_cell(cls, assessments):
+        """
+        Args:
+            assessments (QuerySet) - assessments containing the parts that we would like to collate into one column.
+        Returns:
+            string that should be included in the relevant 'assessments_parts' column for this set of assessments' row
+        """
+        returned_string = u""
+        for assessment in assessments:
+            returned_string += u"Assessment #{}\n".format(assessment.id)
+            for part in assessment.parts.all():
+                returned_string += u"-- {}".format(part.criterion.label)
+                if part.option is not None and part.option.label is not None:
+                    option_label = part.option.label
+                    returned_string += u": {option_label} ({option_points})\n".format(
+                        option_label=option_label, option_points=part.option.points
+                    )
+                if part.feedback != u"":
+                    returned_string += u"-- feedback: {}\n".format(part.feedback)
+        return returned_string
+    @classmethod
+    def _build_feedback_options_cell(cls, assessments):
+        """
+        Args:
+            assessments (QuerySet) - assessment that we would like to use to fetch and read the feedback options.
+        Returns:
+            string that should be included in the relevant 'feedback_options' column for this set of assessments' row
+        """
+        returned_string = u""
+        for assessment in assessments:
+            for feedback in assessment.assessment_feedback.all():
+                for option in feedback.options.all():
+                    returned_string += option.text + u"\n"
+        return returned_string
+    @classmethod
+    def _build_feedback_cell(cls, submission_uuid):
+        """
+        Args:
+            submission_uuid (string) - the submission_uuid associated with this particular assessment feedback
+        Returns:
+            string that should be included in the relevant 'feedback' column for this set of assessments' row
+        """
+        try:
+            feedback = AssessmentFeedback.objects.get(submission_uuid=submission_uuid)
+        except AssessmentFeedback.DoesNotExist:
+            return u""
+        return feedback.feedback_text
+    @classmethod
+    def collect_ora2_data(cls, course_id):
+        """
+        Query database for aggregated ora2 response data.
+        Args:
+            course_id (string) - the course id of the course whose data we would like to return
+        Returns:
+            A tuple containing two lists: headers and data.
+            headers is a list containing strings corresponding to the column headers of the data.
+            data is a list of lists, where each sub-list corresponds to a row in the table of all the data
+                for this course.
+        """
+        all_submission_information = sub_api.get_all_course_submission_information(course_id, 'openassessment')
+        rows = []
+        for student_item, submission, score in all_submission_information:
+            row = []
+            assessments = cls._use_read_replica(
+                Assessment.objects.prefetch_related('parts').
+                prefetch_related('rubric').
+                filter(
+                    submission_uuid=submission['uuid']
+                )
+            )
+            assessments_cell = cls._build_assessments_cell(assessments)
+            assessments_parts_cell = cls._build_assessments_parts_cell(assessments)
+            feedback_options_cell = cls._build_feedback_options_cell(assessments)
+            feedback_cell = cls._build_feedback_cell(submission['uuid'])
+            row = [
+                submission['uuid'],
+                submission['student_item'],
+                student_item['student_id'],
+                submission['submitted_at'],
+                submission['answer'],
+                assessments_cell,
+                assessments_parts_cell,
+                score.get('created_at', ''),
+                score.get('points_earned', ''),
+                score.get('points_possible', ''),
+                feedback_options_cell,
+                feedback_cell
+            ]
+            rows.append(row)
+        header = [
+            'Submission ID',
+            'Item ID',
+            'Anonymized Student ID',
+            'Date/Time Response Submitted',
+            'Response',
+            'Assessment Details',
+            'Assessment Scores',
+            'Date/Time Final Score Given',
+            'Final Score Points Earned',
+            'Final Score Points Possible',
+            'Feedback Statements Selected',
+            'Feedback on Peer Assessments'
+        ]
+        return header, rows
--- a/openassessment/management/commands/collect_ora2_data.py
+++ b/openassessment/management/commands/collect_ora2_data.py
+"""
+Command to retrieve all ORA2 data for a course in a .csv.
+This command differs from upload_oa_data in that it places all the data into one file.
+Generates the same format as the instructor dashboard downloads.
+"""
+import csv
+from optparse import make_option
+import os
+from django.core.management.base import BaseCommand, CommandError
+from openassessment.data import OraAggregateData
+class Command(BaseCommand):
+    """
+    Query aggregated open assessment data, write to .csv
+    """
+    help = ("Usage: collect_ora2_data <course_id> --output-dir=<output_dir>")
+    args = "<course_id>"
+    option_list = BaseCommand.option_list + (
+        make_option('-o', '--output-dir',
+                    action='store', dest='output_dir', default=None,
+                    help="Write output to a directory rather than stdout"),
+        make_option('-n', '--file-name',
+                    action='store', dest='file_name', default=None,
+                    help="Write CSV file to the given name"),
+    )
+    def handle(self, *args, **options):
+        """
+        Run the command.
+        """
+        if not args:
+            raise CommandError("Course ID must be specified to fetch data")
+        course_id = args[0]
+        if options['file_name']:
+            file_name = options['file_name']
+        else:
+            file_name = ("%s-ora2.csv" % course_id).replace("/", "-")
+        if options['output_dir']:
+            csv_file = open(os.path.join(options['output_dir'], file_name), 'wb')
+        else:
+            csv_file = self.stdout
+        writer = csv.writer(csv_file, dialect='excel', quotechar='"', quoting=csv.QUOTE_ALL)
+        header, rows = OraAggregateData.collect_ora2_data(course_id)
+        writer.writerow(header)
+        for row in rows:
+            writer.writerow(_encode_row(row))
+def _encode_row(data_list):
+    """
+    Properly encode ora2 responses for transcription into a .csv
+    """
+    processed_row = []
+    for item in data_list:
+        new_item = unicode(item).encode('utf-8')
+        processed_row.append(new_item)
+    return processed_row
--- a/openassessment/management/tests/test_collect_ora2_data.py
+++ b/openassessment/management/tests/test_collect_ora2_data.py
+# -*- coding: utf-8 -*-
+""" Test the collect_ora2_data management command """
+from mock import patch
+from django.core.management import call_command
+from openassessment.test_utils import CacheResetTest
+class CollectOra2DataTest(CacheResetTest):
+    """ Test collect_ora2_data output and error conditions """
+    COURSE_ID = u"TɘꙅT ↄoUᴙꙅɘ"
+    def setUp(self):
+        super(CollectOra2DataTest, self).setUp()
+        self.test_header = [
+            "submission_uuid",
+            "item_id",
+            "anonymized_student_id",
+            "submitted_at",
+            "raw_answer",
+            "assessments",
+            "assessments_parts",
+            "final_score_given_at",
+            "final_score_points_earned",
+            "final_score_points_possible",
+            "feedback_options",
+            "feedback",
+        ]
+        self.test_rows = [
+            [
+                "33a639de-4e61-11e4-82ab-hash_value",
+                "i4x://edX/DemoX/openassessment/hash_value",
+                "e31b4beb3d191cd47b07e17735728d53",
+                "2014-10-07 20:33:31+00:00",
+                '{""text"": ""This is a response to a question. #dylan""}',
+                "Assessment #1 -- scored_at: 2014-10-07 20:37:54 -- type: T -- scorer_id: hash -- feedback: Test",
+                "Assessment #1 -- Content: Unclear recommendation (5)",
+                "2014-10-07 21:35:47+00:00",
+                "10",
+                "20",
+                "Completed test assessments.",
+                "They were useful.",
+            ],
+            [
+                "row-two-submission-value",
+                "i4x://edX/DemoX/openassessment/hash_value",
+                "e31b4beb3d191cd47b07e17735728d53",
+                "2014-10-07 20:33:31+00:00",
+                '{""text"": ""This is a response to a question. #dylan""}',
+                "Assessment #1 -- scored_at: 2014-10-07 20:37:54 -- type: T -- scorer_id: hash -- feedback: Test",
+                "Assessment #1 -- Content: Unclear recommendation (5)",
+                "2014-10-07 21:35:47+00:00",
+                "10",
+                "20",
+                "Completed test assessments.",
+                u"𝓨𝓸𝓾",
+            ]
+        ]
+        self.unicode_encoded_row = [
+            "row-two-submission-value",
+            "i4x://edX/DemoX/openassessment/hash_value",
+            "e31b4beb3d191cd47b07e17735728d53",
+            "2014-10-07 20:33:31+00:00",
+            '{""text"": ""This is a response to a question. #dylan""}',
+            "Assessment #1 -- scored_at: 2014-10-07 20:37:54 -- type: T -- scorer_id: hash -- feedback: Test",
+            "Assessment #1 -- Content: Unclear recommendation (5)",
+            "2014-10-07 21:35:47+00:00",
+            "10",
+            "20",
+            "Completed test assessments.",
+            "\xf0\x9d\x93\xa8\xf0\x9d\x93\xb8\xf0\x9d\x93\xbe",
+        ]
+    @patch('openassessment.management.commands.collect_ora2_data.OraAggregateData.collect_ora2_data')
+    def test_valid_data_output_to_file(self, mock_data):
+        """ Verify that management command writes valid ORA2 data to file. """
+        mock_data.return_value = (self.test_header, self.test_rows)
+        with patch('openassessment.management.commands.collect_ora2_data.csv') as mock_write:
+            call_command('collect_ora2_data', self.COURSE_ID)
+            mock_writerow = mock_write.writer.return_value.writerow
+            mock_writerow.assert_any_call(self.test_header)
+            mock_writerow.assert_any_call(self.test_rows[0])
+            mock_writerow.assert_any_call(self.unicode_encoded_row)
--- a/openassessment/tests/factories.py
+++ b/openassessment/tests/factories.py
+"""
+Create factories for assessments and all of their related models.
+"""
+import factory
+from factory.django import DjangoModelFactory
+from openassessment.assessment.models import (
+    Assessment, AssessmentPart, Rubric, Criterion, CriterionOption, AssessmentFeedbackOption, AssessmentFeedback
+)
+class RubricFactory(DjangoModelFactory):
+    """ Create mock Rubric models. """
+    class Meta:
+        model = Rubric
+    content_hash = factory.Faker('sha1')
+    structure_hash = factory.Faker('sha1')
+class CriterionFactory(DjangoModelFactory):
+    """
+    Create mock Criterion models.
+    Currently assumes there is only one Rubric that these are attached to.
+    """
+    class Meta:
+        model = Criterion
+    rubric = factory.SubFactory(RubricFactory)
+    name = factory.Sequence(lambda n: 'criterion_{}'.format(n))  # pylint: disable=unnecessary-lambda
+    label = factory.Sequence(lambda n: 'label_{}'.format(n))  # pylint: disable=unnecessary-lambda
+    order_num = 0
+    prompt = 'This is a fake prompt.'
+class CriterionOptionFactory(DjangoModelFactory):
+    """ Create mock CriterionOption models. """
+    class Meta:
+        model = CriterionOption
+    criterion = factory.SubFactory(CriterionFactory)
+    order_num = 0
+    points = 4
+    name = factory.Sequence(lambda n: 'option_{}'.format(n))  # pylint: disable=unnecessary-lambda
+    label = factory.Sequence(lambda n: 'option__label_{}'.format(n))  # pylint: disable=unnecessary-lambda
+    explanation = """The response makes 3-5 Monty Python references and at least one
+                       original Star Wars trilogy reference. Do not select this option
+                       if the author made any references to the second trilogy."""
+class AssessmentFactory(DjangoModelFactory):
+    """ Create mock Assessment models. """
+    class Meta:
+        model = Assessment
+    submission_uuid = factory.Faker('sha1')
+    rubric = factory.SubFactory(RubricFactory)
+    scorer_id = 'test_scorer'
+    score_type = 'PE'
+class AssessmentPartFactory(DjangoModelFactory):
+    """ Create mock AssessmentPart models. """
+    class Meta:
+        model = AssessmentPart
+    assessment = factory.SubFactory(AssessmentFactory)
+    criterion = factory.SubFactory(CriterionFactory)
+    option = None
+    feedback = 'This is my helpful feedback.'
+class AssessmentFeedbackOptionFactory(DjangoModelFactory):
+    """ Create mock AssessmentFeedbackOption models. """
+    class Meta:
+        model = AssessmentFeedbackOption
+    text = factory.Sequence(lambda n: 'feedback_option_{}'.format(n))  # pylint: disable=unnecessary-lambda
+class AssessmentFeedbackFactory(DjangoModelFactory):
+    """ Create mock AssessmentFeedback models. """
+    class Meta:
+        model = AssessmentFeedback
+    submission_uuid = factory.Faker('sha1')
+    feedback_text = "Feedback Text!"
+    @factory.post_generation
+    def assessments(self, create, extracted, **kwargs):  # pylint: disable=unused-argument
+        """ Handle the many-to-many relationship between AssessmentFeedback and Assessment. """
+        if not create:
+            return
+        if extracted:
+            for assessment in extracted:
+                self.assessments.add(assessment)
+    @factory.post_generation
+    def options(self, create, extracted, **kwargs):  # pylint: disable=unused-argument
+        """ Handle the many-to-many relationship between AssessmentFeedback and AssessmentFeedbackOption. """
+        if not create:
+            return
+        if extracted:
+            for option in extracted:
+                self.options.add(option)
--- a/openassessment/tests/test_data.py
+++ b/openassessment/tests/test_data.py
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -6,7 +6,7 @@
 git+https://github.com/edx/XBlock.git@xblock-0.4.1#egg=XBlock==0.4.1
 # edx-submissions
-git+https://github.com/edx/edx-submissions.git@1.0.0#egg=edx-submissions==1.0.0
+git+https://github.com/edx/edx-submissions.git@1.1.0#egg=edx-submissions==1.1.0
 # Third Party Requirements
 boto>=2.32.1,<3.0.0

--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -6,6 +6,7 @@ django-nose==1.4.1
 mock==1.0.1
 moto==0.3.1
 pep8==1.7.0
+factory_boy==2.6.1
 git+https://github.com/edx/django-pyfs.git@1.0.3#egg=django-pyfs==1.0.3
 git+https://github.com/edx/i18n-tools.git@56f048af9b6868613c14aeae760548834c495011#egg=i18n_tools