Merge pull request #488 from edx/will/data-download-uses-read-replica

Use the read replica in the download data management command.

Merge pull request #488 from edx/will/data-download-uses-read-replica
Use the read replica in the download data management command.
c3b36f0a · Will Daly · c4b88102 · 899ea6e0 · c3b36f0a · c3b36f0a
Commit c3b36f0a authored Jul 11, 2014 by Will Daly
Showing with 79 additions and 33 deletions

.gitignore
+2 -0

manage.py
+6 -0

openassessment/data.py
+38 -13

openassessment/test_utils.py
+23 -11

openassessment/tests/test_data.py
+2 -2

requirements/base.txt
+1 -1

settings/test.py
+7 -6

No files found.
--- a/.gitignore
+++ b/.gitignore
@@ -31,6 +31,8 @@ pip-log.txt
 nosetests.xml
 htmlcov
 coverage.xml
+test_ora2db
+test_ora2db-journal

 # Mr Developer
 .mr.developer.cfg

--- a/manage.py
+++ b/manage.py
@@ -8,5 +8,11 @@ if __name__ == "__main__":
    if os.environ.get('DJANGO_SETTINGS_MODULE') is None:
        os.environ['DJANGO_SETTINGS_MODULE'] = 'settings.dev'

+    # When using an on-disk database for the test suite,
+    # Django asks us if we want to delete the database.
+    # We do.
+    if 'test' in sys.argv[0:3]:
+        sys.argv.append('--noinput')
+
    from django.core.management import execute_from_command_line
    execute_from_command_line(sys.argv)
--- a/openassessment/data.py
+++ b/openassessment/data.py
@@ -3,6 +3,7 @@ Aggregate data for openassessment.
 """
 import csv
 import json
+from django.conf import settings
 from submissions import api as sub_api
 from openassessment.workflow.models import AssessmentWorkflow
 from openassessment.assessment.models import AssessmentPart, AssessmentFeedback
@@ -110,14 +111,18 @@ class CsvWriter(object):

            # Django 1.4 doesn't follow reverse relations when using select_related,
            # so we select AssessmentPart and follow the foreign key to the Assessment.
-            parts = AssessmentPart.objects.select_related(
-                'assessment', 'option', 'option__criterion'
-            ).filter(assessment__submission_uuid=submission_uuid).order_by('assessment__pk')
+            parts = self._use_read_replica(
+                AssessmentPart.objects.select_related('assessment', 'option', 'option__criterion')
+                    .filter(assessment__submission_uuid=submission_uuid)
+                    .order_by('assessment__pk')
+            )
            self._write_assessment_to_csv(parts, rubric_points_cache)

-            feedback_query = AssessmentFeedback.objects.filter(
-                submission_uuid=submission_uuid
-            ).prefetch_related('options')
+            feedback_query = self._use_read_replica(
+                AssessmentFeedback.objects
+                    .filter(submission_uuid=submission_uuid)
+                    .prefetch_related('options')
+            )
            for assessment_feedback in feedback_query:
                self._write_assessment_feedback_to_csv(assessment_feedback)
                feedback_option_set.update(set(
@@ -146,8 +151,8 @@ class CsvWriter(object):
        """
        num_results = 0
        start = 0
-        total_results = AssessmentWorkflow.objects.filter(
-            course_id=course_id
+        total_results = self._use_read_replica(
+            AssessmentWorkflow.objects.filter(course_id=course_id)
        ).count()

        while num_results < total_results:
@@ -156,9 +161,11 @@ class CsvWriter(object):
            # so if we counted N at the start of the loop,
            # there should be >= N for us to process.
            end = start + self.QUERY_INTERVAL
-            query = AssessmentWorkflow.objects.filter(
-                course_id=course_id
-            ).order_by('created').values('submission_uuid')[start:end]
+            query = self._use_read_replica(
+                AssessmentWorkflow.objects
+                    .filter(course_id=course_id)
+                    .order_by('created')
+            ).values('submission_uuid')[start:end]

            for workflow_dict in query:
                num_results += 1
@@ -184,7 +191,7 @@ class CsvWriter(object):
            None

        """
-        submission = sub_api.get_submission_and_student(submission_uuid)
+        submission = sub_api.get_submission_and_student(submission_uuid, read_replica=True)
        self._write_unicode('submission', [
            submission['uuid'],
            submission['student_item']['student_id'],
@@ -194,7 +201,7 @@ class CsvWriter(object):
            json.dumps(submission['answer'])
        ])

-        score = sub_api.get_latest_score_for_submission(submission_uuid)
+        score = sub_api.get_latest_score_for_submission(submission_uuid, read_replica=True)
        if score is not None:
            self._write_unicode('score', [
                score['submission_uuid'],
@@ -307,3 +314,20 @@ class CsvWriter(object):
        if writer is not None:
            encoded_row = [unicode(field).encode('utf-8') for field in row]
            writer.writerow(encoded_row)
+
+    def _use_read_replica(self, queryset):
+        """
+        Use the read replica if it's available.
+
+        Args:
+            queryset (QuerySet)
+
+        Returns:
+            QuerySet
+
+        """
+        return (
+            queryset.using("read_replica")
+            if "read_replica" in settings.DATABASES
+            else queryset
+        )
\ No newline at end of file
--- a/openassessment/test_utils.py
+++ b/openassessment/test_utils.py
@@ -2,28 +2,40 @@
 Test utilities
 """
 from django.core.cache import cache
-from django.test import TestCase
+from django.test import TestCase, TransactionTestCase
 from openassessment.assessment.models.ai import (
    CLASSIFIERS_CACHE_IN_MEM, CLASSIFIERS_CACHE_IN_FILE
 )


+def _clear_all_caches():
+    """Clear the default cache and any custom caches."""
+    cache.clear()
+    CLASSIFIERS_CACHE_IN_MEM.clear()
+    CLASSIFIERS_CACHE_IN_FILE.clear()
+
+
 class CacheResetTest(TestCase):
    """
    Test case that resets the cache before and after each test.
    """
    def setUp(self):
        super(CacheResetTest, self).setUp()
-        self._clear_all_caches()
+        _clear_all_caches()

    def tearDown(self):
        super(CacheResetTest, self).tearDown()
-        self._clear_all_caches()
-
-    def _clear_all_caches(self):
-        """
-        Clear the default cache and any custom caches.
-        """
-        cache.clear()
-        CLASSIFIERS_CACHE_IN_MEM.clear()
-        CLASSIFIERS_CACHE_IN_FILE.clear()
+        _clear_all_caches()
+
+
+class TransactionCacheResetTest(TransactionTestCase):
+    """
+    Transaction test case that resets the cache.
+    """
+    def setUp(self):
+        super(TransactionCacheResetTest, self).setUp()
+        _clear_all_caches()
+
+    def tearDown(self):
+        super(TransactionCacheResetTest, self).tearDown()
+        _clear_all_caches()
--- a/openassessment/tests/test_data.py
+++ b/openassessment/tests/test_data.py
@@ -8,14 +8,14 @@ from StringIO import StringIO
 import csv
 from django.core.management import call_command
 import ddt
-from openassessment.test_utils import CacheResetTest
 from submissions import api as sub_api
+from openassessment.test_utils import TransactionCacheResetTest
 from openassessment.workflow import api as workflow_api
 from openassessment.data import CsvWriter


 @ddt.ddt
-class CsvWriterTest(CacheResetTest):
+class CsvWriterTest(TransactionCacheResetTest):
    """
    Test for writing openassessment data to CSV.
    """

--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -6,7 +6,7 @@
 git+https://github.com/edx/XBlock.git@fc5fea25c973ec66d8db63cf69a817ce624f5ef5#egg=XBlock
 git+https://github.com/edx/xblock-sdk.git@643900aadcb18aaeb7fe67271ca9dbf36e463ee6#egg=xblock-sdk

-edx-submissions==0.0.2
+edx-submissions==0.0.3

 # Third Party Requirements
 boto==2.13.3

--- a/settings/test.py
+++ b/settings/test.py
@@ -23,12 +23,13 @@ NOSE_ARGS = [
 DATABASES = {
    'default': {
        'ENGINE': 'django.db.backends.sqlite3',
-        'NAME': '',
-        'USER': '',
-        'PASSWORD': '',
-        'HOST': '',
-        'PORT': '',
-    }
+        'NAME': 'test_ora2db',
+        'TEST_NAME': 'test_ora2db',
+    },
+    'read_replica': {
+        'ENGINE': 'django.db.backends.sqlite3',
+        'TEST_MIRROR': 'default',
+    },
 }

 TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'