Commit a7d638de by Adam Palay

clear request cache after celery task finishes (TNL-2705)

set max_num_courses_in_cache on modulestore
parent 3e747c37
...@@ -88,7 +88,7 @@ class HelperMethods(object): ...@@ -88,7 +88,7 @@ class HelperMethods(object):
self.save_course() self.save_course()
return (vertical, split_test) return (vertical, split_test)
def _create_problem_with_content_group(self, cid, group_id, name_suffix='', special_characters=''): def _create_problem_with_content_group(self, cid, group_id, name_suffix='', special_characters='', orphan=False):
""" """
Create a problem Create a problem
Assign content group to the problem. Assign content group to the problem.
...@@ -112,7 +112,8 @@ class HelperMethods(object): ...@@ -112,7 +112,8 @@ class HelperMethods(object):
data={'metadata': group_access_content} data={'metadata': group_access_content}
) )
self.course.children.append(vertical.location) if not orphan:
self.course.children.append(vertical.location)
self.save_course() self.save_course()
return vertical, problem return vertical, problem
...@@ -682,24 +683,16 @@ class GroupConfigurationsUsageInfoTestCase(CourseTestCase, HelperMethods): ...@@ -682,24 +683,16 @@ class GroupConfigurationsUsageInfoTestCase(CourseTestCase, HelperMethods):
@ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split)
def test_can_get_correct_usage_info_with_orphan(self, module_store_type): def test_can_get_correct_usage_info_with_orphan(self, module_store_type):
""" """
Test if content group json updated successfully with usage information even if there is Test if content group json updated successfully with usage information
an orphan in content group. even if there is an orphan in content group.
""" """
self.course = CourseFactory.create(default_store=module_store_type) self.course = CourseFactory.create(default_store=module_store_type)
self._add_user_partitions(count=1, scheme_id='cohort') self._add_user_partitions(count=1, scheme_id='cohort')
vertical, problem = self._create_problem_with_content_group(cid=0, group_id=1, name_suffix='0') vertical, __ = self._create_problem_with_content_group(cid=0, group_id=1, name_suffix='0', orphan=True)
# Assert that there is no orphan in the course yet. # Assert that there is an orphan in the course, and that it's the vertical
self.assertEqual(len(self.store.get_orphans(self.course.id)), 0) self.assertEqual(len(self.store.get_orphans(self.course.id)), 1)
self.assertIn(vertical.location, self.store.get_orphans(self.course.id))
# Update problem(created earlier) to an orphan.
with self.store.branch_setting(ModuleStoreEnum.Branch.published_only):
vertical = self.store.get_item(vertical.location)
vertical.children.remove(problem.location)
self.store.update_item(vertical, self.user.id)
# Assert that the problem is orphan now.
self.assertIn(problem.location, self.store.get_orphans(self.course.id))
# Get the expected content group information based on module store. # Get the expected content group information based on module store.
if module_store_type == ModuleStoreEnum.Type.mongo: if module_store_type == ModuleStoreEnum.Type.mongo:
......
...@@ -8,6 +8,7 @@ is installed in order to clear the cache after each request. ...@@ -8,6 +8,7 @@ is installed in order to clear the cache after each request.
import logging import logging
from urlparse import urlparse from urlparse import urlparse
from celery.signals import task_postrun
from django.conf import settings from django.conf import settings
from django.test.client import RequestFactory from django.test.client import RequestFactory
...@@ -17,6 +18,15 @@ from request_cache import middleware ...@@ -17,6 +18,15 @@ from request_cache import middleware
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@task_postrun.connect
def clear_request_cache(**kwargs): # pylint: disable=unused-argument
"""
Once a celery task completes, clear the request cache to
prevent memory leaks.
"""
middleware.RequestCache.clear_request_cache()
def get_cache(name): def get_cache(name):
""" """
Return the request cache named ``name``. Return the request cache named ``name``.
...@@ -50,8 +60,8 @@ def get_request_or_stub(): ...@@ -50,8 +60,8 @@ def get_request_or_stub():
if request is None: if request is None:
log.warning( log.warning(
"Could not retrieve the current request. " "Could not retrieve the current request. "
"A stub request will be created instead using settings.SITE_NAME. " "A stub request will be created instead using settings.SITE_NAME. "
"This should be used *only* in test cases, never in production!" "This should be used *only* in test cases, never in production!"
) )
......
""" """
Tests for the request cache. Tests for the request cache.
""" """
from celery.task import task
from django.conf import settings from django.conf import settings
from django.test import TestCase from django.test import TestCase
from request_cache import get_request_or_stub from request_cache import get_request_or_stub
from xmodule.modulestore.django import modulestore
class TestRequestCache(TestCase): class TestRequestCache(TestCase):
...@@ -13,8 +15,21 @@ class TestRequestCache(TestCase): ...@@ -13,8 +15,21 @@ class TestRequestCache(TestCase):
""" """
def test_get_request_or_stub(self): def test_get_request_or_stub(self):
# Outside the context of the request, we should still get a request """
# that allows us to build an absolute URI. Outside the context of the request, we should still get a request
that allows us to build an absolute URI.
"""
stub = get_request_or_stub() stub = get_request_or_stub()
expected_url = "http://{site_name}/foobar".format(site_name=settings.SITE_NAME) expected_url = "http://{site_name}/foobar".format(site_name=settings.SITE_NAME)
self.assertEqual(stub.build_absolute_uri("foobar"), expected_url) self.assertEqual(stub.build_absolute_uri("foobar"), expected_url)
@task
def _dummy_task(self):
""" Create a task that adds stuff to the request cache. """
cache = {"course_cache": "blah blah blah"}
modulestore().request_cache.data.update(cache)
def test_clear_cache_celery(self):
""" Test that the request cache is cleared after a task is run. """
self._dummy_task.apply(args=(self,)).get()
self.assertEqual(modulestore().request_cache.data, {})
...@@ -62,6 +62,7 @@ from importlib import import_module ...@@ -62,6 +62,7 @@ from importlib import import_module
from mongodb_proxy import autoretry_read from mongodb_proxy import autoretry_read
from path import Path as path from path import Path as path
from pytz import UTC from pytz import UTC
import traceback
from bson.objectid import ObjectId from bson.objectid import ObjectId
from xblock.core import XBlock from xblock.core import XBlock
...@@ -687,6 +688,10 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -687,6 +688,10 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
if self.request_cache is not None: if self.request_cache is not None:
self.services["request_cache"] = self.request_cache self.services["request_cache"] = self.request_cache
# set the maximum number of courses we expect
# to see in the request cache
self.max_num_courses_in_cache = 10
self.signal_handler = signal_handler self.signal_handler = signal_handler
def close_connections(self): def close_connections(self):
...@@ -795,6 +800,26 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ...@@ -795,6 +800,26 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
""" """
if self.request_cache is not None: if self.request_cache is not None:
self.request_cache.data.setdefault('course_cache', {})[course_version_guid] = system self.request_cache.data.setdefault('course_cache', {})[course_version_guid] = system
number_courses_in_cache = len(self.request_cache.data['course_cache'])
if number_courses_in_cache > self.max_num_courses_in_cache:
# We shouldn't have any scenarios where there are many
# courses in the request cache. If there are, it's probably
# indicative of a leak. In this case, we should log that here
# with a stacktrace.
course_structure_ids = self.request_cache.data['course_cache'].keys()[:10]
log.warning(
(
"There are more than %d (%d) "
"courses in the request cache. This may be indicative "
"of a memory leak.\n\nHere are some of the course "
"structures' mongo ids that are now in the cache: %s\n\n"
"And here's the current stack:\n%s"
),
self.max_num_courses_in_cache,
number_courses_in_cache,
", ".join(unicode(structure_id) for structure_id in course_structure_ids),
"".join(traceback.format_stack()[-10:])
)
return system return system
def _clear_cache(self, course_version_guid=None): def _clear_cache(self, course_version_guid=None):
......
""" """
Test split modulestore w/o using any django stuff. Test split modulestore w/o using any django stuff.
""" """
from mock import patch from mock import patch, Mock
import datetime import datetime
from importlib import import_module from importlib import import_module
from path import Path as path from path import Path as path
...@@ -627,11 +627,27 @@ class SplitModuleCourseTests(SplitModuleTest): ...@@ -627,11 +627,27 @@ class SplitModuleCourseTests(SplitModuleTest):
self.assertEqual(course.edited_by, "testassist@edx.org") self.assertEqual(course.edited_by, "testassist@edx.org")
self.assertDictEqual(course.grade_cutoffs, {"Pass": 0.45}) self.assertDictEqual(course.grade_cutoffs, {"Pass": 0.45})
@ddt.data((10, 0), (2, 1))
@ddt.unpack
@patch("xmodule.modulestore.split_mongo.split.log.warning")
def test_request_cache_max_courses(self, max_courses, expected_warnings, mock_log):
"""
Test that we warn if there are too many courses in the request cache
at once.
"""
mock_request_cache = Mock()
mock_request_cache.data = {}
modulestore().max_num_courses_in_cache = max_courses
modulestore().request_cache = mock_request_cache
modulestore().get_courses(branch=BRANCH_NAME_DRAFT)
self.assertEqual(mock_log.call_count, expected_warnings)
@patch('xmodule.tabs.CourseTab.from_json', side_effect=mock_tab_from_json) @patch('xmodule.tabs.CourseTab.from_json', side_effect=mock_tab_from_json)
def test_get_courses_with_same_course_index(self, _from_json): def test_get_courses_with_same_course_index(self, _from_json):
""" """
Test that if two courses pointing to same course index, Test that if two courses point to same course index,
get_courses should return both. `get_courses` should return both courses.
""" """
courses = modulestore().get_courses(branch=BRANCH_NAME_DRAFT) courses = modulestore().get_courses(branch=BRANCH_NAME_DRAFT)
# Should have gotten 3 draft courses. # Should have gotten 3 draft courses.
......
...@@ -255,10 +255,10 @@ class BookmarkModelTests(BookmarksTestsBase): ...@@ -255,10 +255,10 @@ class BookmarkModelTests(BookmarksTestsBase):
@ddt.data( @ddt.data(
(ModuleStoreEnum.Type.mongo, 'course', [], 3), (ModuleStoreEnum.Type.mongo, 'course', [], 3),
(ModuleStoreEnum.Type.mongo, 'chapter_1', [], 3), (ModuleStoreEnum.Type.mongo, 'chapter_1', [], 4),
(ModuleStoreEnum.Type.mongo, 'sequential_1', ['chapter_1'], 4), (ModuleStoreEnum.Type.mongo, 'sequential_1', ['chapter_1'], 6),
(ModuleStoreEnum.Type.mongo, 'vertical_1', ['chapter_1', 'sequential_1'], 5), (ModuleStoreEnum.Type.mongo, 'vertical_1', ['chapter_1', 'sequential_1'], 8),
(ModuleStoreEnum.Type.mongo, 'html_1', ['chapter_1', 'sequential_2', 'vertical_2'], 6), (ModuleStoreEnum.Type.mongo, 'html_1', ['chapter_1', 'sequential_2', 'vertical_2'], 10),
(ModuleStoreEnum.Type.split, 'course', [], 3), (ModuleStoreEnum.Type.split, 'course', [], 3),
(ModuleStoreEnum.Type.split, 'chapter_1', [], 2), (ModuleStoreEnum.Type.split, 'chapter_1', [], 2),
(ModuleStoreEnum.Type.split, 'sequential_1', ['chapter_1'], 2), (ModuleStoreEnum.Type.split, 'sequential_1', ['chapter_1'], 2),
...@@ -268,8 +268,9 @@ class BookmarkModelTests(BookmarksTestsBase): ...@@ -268,8 +268,9 @@ class BookmarkModelTests(BookmarksTestsBase):
@ddt.unpack @ddt.unpack
def test_path_and_queries_on_create(self, store_type, block_to_bookmark, ancestors_attrs, expected_mongo_calls): def test_path_and_queries_on_create(self, store_type, block_to_bookmark, ancestors_attrs, expected_mongo_calls):
""" """
In case of mongo, 1 query is used to fetch the block, and 2 by path_to_location(), and then In case of mongo, 1 query is used to fetch the block, and 2
1 query per parent in path is needed to fetch the parent blocks. by path_to_location(), and then 1 query per parent in path
is needed to fetch the parent blocks.
""" """
self.setup_test_data(store_type) self.setup_test_data(store_type)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment