clear request cache after celery task finishes (TNL-2705)

set max_num_courses_in_cache on modulestore

clear request cache after celery task finishes (TNL-2705)
set max_num_courses_in_cache on modulestore
a7d638de · Adam Palay · 3e747c37 · a7d638de · a7d638de · a7d638de
Commit a7d638de authored May 16, 2016 by Adam Palay
6 changed files
--- a/cms/djangoapps/contentstore/views/tests/test_group_configurations.py
+++ b/cms/djangoapps/contentstore/views/tests/test_group_configurations.py
@@ -88,7 +88,7 @@ class HelperMethods(object):
        self.save_course()
        return (vertical, split_test)
-    def _create_problem_with_content_group(self, cid, group_id, name_suffix='', special_characters=''):
+    def _create_problem_with_content_group(self, cid, group_id, name_suffix='', special_characters='', orphan=False):
        """
        Create a problem
        Assign content group to the problem.
@@ -112,7 +112,8 @@ class HelperMethods(object):
            data={'metadata': group_access_content}
        )
-        self.course.children.append(vertical.location)
+        if not orphan:
+            self.course.children.append(vertical.location)
        self.save_course()
        return vertical, problem
@@ -682,24 +683,16 @@ class GroupConfigurationsUsageInfoTestCase(CourseTestCase, HelperMethods):
    @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split)
    def test_can_get_correct_usage_info_with_orphan(self, module_store_type):
        """
-        Test if content group json updated successfully with usage information even if there is
+        Test if content group json updated successfully with usage information
-        an orphan in content group.
+        even if there is an orphan in content group.
        """
        self.course = CourseFactory.create(default_store=module_store_type)
        self._add_user_partitions(count=1, scheme_id='cohort')
-        vertical, problem = self._create_problem_with_content_group(cid=0, group_id=1, name_suffix='0')
+        vertical, __ = self._create_problem_with_content_group(cid=0, group_id=1, name_suffix='0', orphan=True)
-        # Assert that there is no orphan in the course yet.
+        # Assert that there is an orphan in the course, and that it's the vertical
-        self.assertEqual(len(self.store.get_orphans(self.course.id)), 0)
+        self.assertEqual(len(self.store.get_orphans(self.course.id)), 1)
+        self.assertIn(vertical.location, self.store.get_orphans(self.course.id))
-        # Update problem(created earlier) to an orphan.
-        with self.store.branch_setting(ModuleStoreEnum.Branch.published_only):
-            vertical = self.store.get_item(vertical.location)
-            vertical.children.remove(problem.location)
-            self.store.update_item(vertical, self.user.id)
-        # Assert that the problem is orphan now.
-        self.assertIn(problem.location, self.store.get_orphans(self.course.id))
        # Get the expected content group information based on module store.
        if module_store_type == ModuleStoreEnum.Type.mongo:

--- a/common/djangoapps/request_cache/__init__.py
+++ b/common/djangoapps/request_cache/__init__.py
@@ -8,6 +8,7 @@ is installed in order to clear the cache after each request.
 import logging
 from urlparse import urlparse
+from celery.signals import task_postrun
 from django.conf import settings
 from django.test.client import RequestFactory
@@ -17,6 +18,15 @@ from request_cache import middleware
 log = logging.getLogger(__name__)
+@task_postrun.connect
+def clear_request_cache(**kwargs):  # pylint: disable=unused-argument
+    """
+    Once a celery task completes, clear the request cache to
+    prevent memory leaks.
+    """
+    middleware.RequestCache.clear_request_cache()
 def get_cache(name):
    """
    Return the request cache named ``name``.
@@ -50,8 +60,8 @@ def get_request_or_stub():
    if request is None:
        log.warning(
-            "Could not retrieve the current request.  "
+            "Could not retrieve the current request. "
-            "A stub request will be created instead using settings.SITE_NAME.  "
+            "A stub request will be created instead using settings.SITE_NAME. "
            "This should be used *only* in test cases, never in production!"
        )

--- a/common/djangoapps/request_cache/tests.py
+++ b/common/djangoapps/request_cache/tests.py
 """
 Tests for the request cache.
 """
+from celery.task import task
 from django.conf import settings
 from django.test import TestCase
 from request_cache import get_request_or_stub
+from xmodule.modulestore.django import modulestore
 class TestRequestCache(TestCase):
@@ -13,8 +15,21 @@ class TestRequestCache(TestCase):
    """
    def test_get_request_or_stub(self):
-        # Outside the context of the request, we should still get a request
+        """
-        # that allows us to build an absolute URI.
+        Outside the context of the request, we should still get a request
+        that allows us to build an absolute URI.
+        """
        stub = get_request_or_stub()
        expected_url = "http://{site_name}/foobar".format(site_name=settings.SITE_NAME)
        self.assertEqual(stub.build_absolute_uri("foobar"), expected_url)
+    @task
+    def _dummy_task(self):
+        """ Create a task that adds stuff to the request cache. """
+        cache = {"course_cache": "blah blah blah"}
+        modulestore().request_cache.data.update(cache)
+    def test_clear_cache_celery(self):
+        """ Test that the request cache is cleared after a task is run. """
+        self._dummy_task.apply(args=(self,)).get()
+        self.assertEqual(modulestore().request_cache.data, {})
--- a/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py
+++ b/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py
@@ -62,6 +62,7 @@ from importlib import import_module
 from mongodb_proxy import autoretry_read
 from path import Path as path
 from pytz import UTC
+import traceback
 from bson.objectid import ObjectId
 from xblock.core import XBlock
@@ -687,6 +688,10 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
        if self.request_cache is not None:
            self.services["request_cache"] = self.request_cache
+        # set the maximum number of courses we expect
+        # to see in the request cache
+        self.max_num_courses_in_cache = 10
        self.signal_handler = signal_handler
    def close_connections(self):
@@ -795,6 +800,26 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
        """
        if self.request_cache is not None:
            self.request_cache.data.setdefault('course_cache', {})[course_version_guid] = system
+            number_courses_in_cache = len(self.request_cache.data['course_cache'])
+            if number_courses_in_cache > self.max_num_courses_in_cache:
+                # We shouldn't have any scenarios where there are many
+                # courses in the request cache. If there are, it's probably
+                # indicative of a leak. In this case, we should log that here
+                # with a stacktrace.
+                course_structure_ids = self.request_cache.data['course_cache'].keys()[:10]
+                log.warning(
+                    (
+                        "There are more than %d (%d) "
+                        "courses in the request cache. This may be indicative "
+                        "of a memory leak.\n\nHere are some of the course "
+                        "structures' mongo ids that are now in the cache: %s\n\n"
+                        "And here's the current stack:\n%s"
+                    ),
+                    self.max_num_courses_in_cache,
+                    number_courses_in_cache,
+                    ", ".join(unicode(structure_id) for structure_id in course_structure_ids),
+                    "".join(traceback.format_stack()[-10:])
+                )
        return system
    def _clear_cache(self, course_version_guid=None):

--- a/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py
+++ b/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py
 """
    Test split modulestore w/o using any django stuff.
 """
-from mock import patch
+from mock import patch, Mock
 import datetime
 from importlib import import_module
 from path import Path as path
@@ -627,11 +627,27 @@ class SplitModuleCourseTests(SplitModuleTest):
        self.assertEqual(course.edited_by, "testassist@edx.org")
        self.assertDictEqual(course.grade_cutoffs, {"Pass": 0.45})
+    @ddt.data((10, 0), (2, 1))
+    @ddt.unpack
+    @patch("xmodule.modulestore.split_mongo.split.log.warning")
+    def test_request_cache_max_courses(self, max_courses, expected_warnings, mock_log):
+        """
+        Test that we warn if there are too many courses in the request cache
+        at once.
+        """
+        mock_request_cache = Mock()
+        mock_request_cache.data = {}
+        modulestore().max_num_courses_in_cache = max_courses
+        modulestore().request_cache = mock_request_cache
+        modulestore().get_courses(branch=BRANCH_NAME_DRAFT)
+        self.assertEqual(mock_log.call_count, expected_warnings)
    @patch('xmodule.tabs.CourseTab.from_json', side_effect=mock_tab_from_json)
    def test_get_courses_with_same_course_index(self, _from_json):
        """
-        Test that if two courses pointing to same course index,
+        Test that if two courses point to same course index,
-        get_courses should return both.
+        `get_courses` should return both courses.
        """
        courses = modulestore().get_courses(branch=BRANCH_NAME_DRAFT)
        # Should have gotten 3 draft courses.

--- a/openedx/core/djangoapps/bookmarks/tests/test_models.py
+++ b/openedx/core/djangoapps/bookmarks/tests/test_models.py
@@ -255,10 +255,10 @@ class BookmarkModelTests(BookmarksTestsBase):
    @ddt.data(
        (ModuleStoreEnum.Type.mongo, 'course', [], 3),
-        (ModuleStoreEnum.Type.mongo, 'chapter_1', [], 3),
+        (ModuleStoreEnum.Type.mongo, 'chapter_1', [], 4),
-        (ModuleStoreEnum.Type.mongo, 'sequential_1', ['chapter_1'], 4),
+        (ModuleStoreEnum.Type.mongo, 'sequential_1', ['chapter_1'], 6),
-        (ModuleStoreEnum.Type.mongo, 'vertical_1', ['chapter_1', 'sequential_1'], 5),
+        (ModuleStoreEnum.Type.mongo, 'vertical_1', ['chapter_1', 'sequential_1'], 8),
-        (ModuleStoreEnum.Type.mongo, 'html_1', ['chapter_1', 'sequential_2', 'vertical_2'], 6),
+        (ModuleStoreEnum.Type.mongo, 'html_1', ['chapter_1', 'sequential_2', 'vertical_2'], 10),
        (ModuleStoreEnum.Type.split, 'course', [], 3),
        (ModuleStoreEnum.Type.split, 'chapter_1', [], 2),
        (ModuleStoreEnum.Type.split, 'sequential_1', ['chapter_1'], 2),
@@ -268,8 +268,9 @@ class BookmarkModelTests(BookmarksTestsBase):
    @ddt.unpack
    def test_path_and_queries_on_create(self, store_type, block_to_bookmark, ancestors_attrs, expected_mongo_calls):
        """
-        In case of mongo, 1 query is used to fetch the block, and 2 by path_to_location(), and then
+        In case of mongo, 1 query is used to fetch the block, and 2
-        1 query per parent in path is needed to fetch the parent blocks.
+        by path_to_location(), and then 1 query per parent in path
+        is needed to fetch the parent blocks.
        """
        self.setup_test_data(store_type)