Commit 7fc356b5 by Renzo Lucioni Committed by GitHub

Merge pull request #15035 from edx/renzo/improved-program-caching

Add management command for caching program data
parents 49f20a32 6cf2503f
# Template used to create cache keys for individual programs.
PROGRAM_CACHE_KEY_TPL = 'program-{uuid}'
# Cache key used to locate an item containing a list of all program UUIDs.
PROGRAM_UUIDS_CACHE_KEY = 'program-uuids'
import logging
import sys
from django.contrib.auth import get_user_model
from django.core.cache import cache
from django.core.management import BaseCommand
from openedx.core.djangoapps.catalog.cache import PROGRAM_CACHE_KEY_TPL, PROGRAM_UUIDS_CACHE_KEY
from openedx.core.djangoapps.catalog.models import CatalogIntegration
from openedx.core.djangoapps.catalog.utils import create_catalog_api_client
logger = logging.getLogger(__name__)
User = get_user_model() # pylint: disable=invalid-name
class Command(BaseCommand):
"""Management command used to cache program data.
This command requests every available program from the discovery
service, writing each to its own cache entry with an indefinite expiration.
It is meant to be run on a scheduled basis and should be the only code
updating these cache entries.
"""
help = "Rebuild the LMS' cache of program data."
def handle(self, *args, **options):
catalog_integration = CatalogIntegration.current()
username = catalog_integration.service_username
try:
user = User.objects.get(username=username)
client = create_catalog_api_client(user, catalog_integration)
except User.DoesNotExist:
logger.error(
'Failed to create API client. Service user {username} does not exist.'.format(username)
)
raise
try:
querystring = {
'exclude_utm': 1,
'status': ('active', 'retired'),
'uuids_only': 1,
}
logger.info('Requesting program UUIDs.')
uuids = client.programs.get(**querystring)
except: # pylint: disable=bare-except
logger.error('Failed to retrieve program UUIDs.')
raise
total = len(uuids)
logger.info('Caching UUIDs for {total} programs.'.format(total=total))
cache.set(PROGRAM_UUIDS_CACHE_KEY, uuids, None)
programs = {}
failure = False
for uuid in uuids:
try:
logger.info('Requesting details for program {uuid}.'.format(uuid=uuid))
program = client.programs(uuid).get()
cache_key = PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)
programs[cache_key] = program
except: # pylint: disable=bare-except
logger.exception('Failed to retrieve details for program {uuid}.'.format(uuid=uuid))
failure = True
continue
successful = len(programs)
logger.info('Caching details for {successful} programs.'.format(successful=successful))
cache.set_many(programs, None)
logger.info(
'Program caching complete. Successfully cached {successful} of {total} programs.'.format(
successful=successful,
total=total
)
)
if failure:
# This will fail a Jenkins job running this command, letting site
# operators know that there was a problem.
sys.exit(1)
import json
import httpretty
from django.core.cache import cache
from django.core.management import call_command
from openedx.core.djangoapps.catalog.cache import PROGRAM_CACHE_KEY_TPL, PROGRAM_UUIDS_CACHE_KEY
from openedx.core.djangoapps.catalog.tests.factories import ProgramFactory
from openedx.core.djangoapps.catalog.tests.mixins import CatalogIntegrationMixin
from openedx.core.djangolib.testing.utils import CacheIsolationTestCase, skip_unless_lms
from student.tests.factories import UserFactory
@skip_unless_lms
@httpretty.activate
class TestCachePrograms(CatalogIntegrationMixin, CacheIsolationTestCase):
ENABLED_CACHES = ['default']
def setUp(self):
super(TestCachePrograms, self).setUp()
self.catalog_integration = self.create_catalog_integration()
self.list_url = self.catalog_integration.internal_api_url.rstrip('/') + '/programs/'
self.detail_tpl = self.list_url.rstrip('/') + '/{uuid}/'
self.programs = ProgramFactory.create_batch(3)
self.uuids = [program['uuid'] for program in self.programs]
def mock_list(self):
def list_callback(request, uri, headers):
expected = {
'exclude_utm': ['1'],
'status': ['active', 'retired'],
'uuids_only': ['1']
}
self.assertEqual(request.querystring, expected)
return (200, headers, json.dumps(self.uuids))
httpretty.register_uri(
httpretty.GET,
self.list_url,
body=list_callback,
content_type='application/json'
)
def mock_detail(self, uuid, program):
httpretty.register_uri(
httpretty.GET,
self.detail_tpl.format(uuid=uuid),
body=json.dumps(program),
content_type='application/json'
)
def test_handle(self):
"""
Verify that the command requests and caches program UUIDs and details.
"""
# Ideally, this user would be created in the test setup and deleted in
# the one test case which covers the case where the user is missing. However,
# that deletion causes "OperationalError: no such table: wiki_attachmentrevision"
# when run on Jenkins.
UserFactory(username=self.catalog_integration.service_username)
programs = {
PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs
}
self.mock_list()
for uuid in self.uuids:
program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)]
self.mock_detail(uuid, program)
call_command('cache_programs')
cached_uuids = cache.get(PROGRAM_UUIDS_CACHE_KEY)
self.assertEqual(
set(cached_uuids),
set(self.uuids)
)
program_keys = list(programs.keys())
cached_programs = cache.get_many(program_keys)
# Verify that the keys were all cache hits.
self.assertEqual(
set(cached_programs),
set(programs)
)
# We can't use a set comparison here because these values are dictionaries
# and aren't hashable. We've already verified that all programs came out
# of the cache above, so all we need to do here is verify the accuracy of
# the data itself.
for key, program in cached_programs.items():
self.assertEqual(program, programs[key])
def test_handle_missing_service_user(self):
"""
Verify that the command raises an exception when run without a service
user, and that program UUIDs are not cached.
"""
with self.assertRaises(Exception):
call_command('cache_programs')
cached_uuids = cache.get(PROGRAM_UUIDS_CACHE_KEY)
self.assertEqual(cached_uuids, None)
def test_handle_missing_uuids(self):
"""
Verify that the command raises an exception when it fails to retrieve
program UUIDs.
"""
UserFactory(username=self.catalog_integration.service_username)
with self.assertRaises(Exception):
call_command('cache_programs')
cached_uuids = cache.get(PROGRAM_UUIDS_CACHE_KEY)
self.assertEqual(cached_uuids, None)
def test_handle_missing_programs(self):
"""
Verify that a problem retrieving a program doesn't prevent the command
from retrieving and caching other programs, but does cause it to exit
with a non-zero exit code.
"""
UserFactory(username=self.catalog_integration.service_username)
all_programs = {
PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs
}
partial_programs = {
PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs[:2]
}
self.mock_list()
for uuid in self.uuids[:2]:
program = partial_programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)]
self.mock_detail(uuid, program)
with self.assertRaises(SystemExit) as context:
call_command('cache_programs')
self.assertEqual(context.exception.code, 1)
cached_uuids = cache.get(PROGRAM_UUIDS_CACHE_KEY)
self.assertEqual(
set(cached_uuids),
set(self.uuids)
)
program_keys = list(all_programs.keys())
cached_programs = cache.get_many(program_keys)
# One of the cache keys should result in a cache miss.
self.assertEqual(
set(cached_programs),
set(partial_programs)
)
for key, program in cached_programs.items():
self.assertEqual(program, partial_programs[key])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment