Commit 8718dc13 by David Ormsbee Committed by Feanil Patel

Cache SplitMongo course structures in memcached.

This is primarily to reduce load on MongoDB, where we've lately
had performance problems that we suspect are caused by very
large course structures being evicted from MongoDB's cache. This
may potentially give us a path to better performance as well,
but that's not the goal of this commit.

Surprisingly, LZ4 seemed to actually run more slowly than zlib
for this. Possibly because of some overhead in the Python
bindings? GZip was also surprisingly slow given that it uses
zlib underneath (something like 5x slower).

Use separate cache backend for caching structures.

Abstract out course structure cache.

add datadog metrics for compressed course structure sizes

Since we're using a different cache background, we don't need to have a cache prefix

Use dummy cache backend for tests.

Fallback to default cache if course_structure_cache doesn't exist.
parent 18f69f3b
...@@ -115,7 +115,10 @@ CACHES = { ...@@ -115,7 +115,10 @@ CACHES = {
'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
'LOCATION': 'edx_location_mem_cache', 'LOCATION': 'edx_location_mem_cache',
}, },
'course_structure_cache': {
'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
'LOCATION': 'edx_course_structure_mem_cache',
},
} }
# Make the keyedcache startup warnings go away # Make the keyedcache startup warnings go away
......
...@@ -166,7 +166,9 @@ CACHES = { ...@@ -166,7 +166,9 @@ CACHES = {
'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
'LOCATION': 'edx_location_mem_cache', 'LOCATION': 'edx_location_mem_cache',
}, },
'course_structure_cache': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
},
} }
# Add external_auth to Installed apps for testing # Add external_auth to Installed apps for testing
......
...@@ -2,7 +2,11 @@ ...@@ -2,7 +2,11 @@
Segregation of pymongo functions from the data modeling mechanisms for split modulestore. Segregation of pymongo functions from the data modeling mechanisms for split modulestore.
""" """
import datetime import datetime
import cPickle as pickle
import math import math
import re
import zlib
from mongodb_proxy import autoretry_read, MongoProxy
import pymongo import pymongo
import pytz import pytz
import re import re
...@@ -11,6 +15,8 @@ from time import time ...@@ -11,6 +15,8 @@ from time import time
# Import this just to export it # Import this just to export it
from pymongo.errors import DuplicateKeyError # pylint: disable=unused-import from pymongo.errors import DuplicateKeyError # pylint: disable=unused-import
from django.core.cache import get_cache, InvalidCacheBackendError
import dogstats_wrapper as dog_stats_api
from contracts import check, new_contract from contracts import check, new_contract
from mongodb_proxy import autoretry_read, MongoProxy from mongodb_proxy import autoretry_read, MongoProxy
...@@ -203,6 +209,40 @@ def structure_to_mongo(structure, course_context=None): ...@@ -203,6 +209,40 @@ def structure_to_mongo(structure, course_context=None):
return new_structure return new_structure
class CourseStructureCache(object):
"""
Wrapper around django cache object to cache course structure objects.
The course structures are pickled and compressed when cached.
"""
def __init__(self):
try:
self.cache = get_cache('course_structure_cache')
except InvalidCacheBackendError:
self.cache = get_cache('default')
def get(self, key):
"""Pull the compressed, pickled struct data from cache and deserialize."""
compressed_pickled_data = self.cache.get(key)
if compressed_pickled_data is None:
return None
return pickle.loads(zlib.decompress(compressed_pickled_data))
def set(self, key, structure):
"""Given a structure, will pickle, compress, and write to cache."""
pickled_data = pickle.dumps(structure, pickle.HIGHEST_PROTOCOL)
# 1 = Fastest (slightly larger results)
compressed_pickled_data = zlib.compress(pickled_data, 1)
# record compressed course structure sizes
dog_stats_api.histogram(
'compressed_course_structure.size',
len(compressed_pickled_data),
tags=[key]
)
# Stuctures are immutable, so we set a timeout of "never"
self.cache.set(key, compressed_pickled_data, None)
class MongoConnection(object): class MongoConnection(object):
""" """
Segregation of pymongo functions from the data modeling mechanisms for split modulestore. Segregation of pymongo functions from the data modeling mechanisms for split modulestore.
...@@ -256,15 +296,23 @@ class MongoConnection(object): ...@@ -256,15 +296,23 @@ class MongoConnection(object):
def get_structure(self, key, course_context=None): def get_structure(self, key, course_context=None):
""" """
Get the structure from the persistence mechanism whose id is the given key Get the structure from the persistence mechanism whose id is the given key.
This method will use a cached version of the structure if it is availble.
""" """
with TIMER.timer("get_structure", course_context) as tagger_get_structure: with TIMER.timer("get_structure", course_context) as tagger_get_structure:
with TIMER.timer("get_structure.find_one", course_context) as tagger_find_one: cache = CourseStructureCache()
doc = self.structures.find_one({'_id': key})
tagger_find_one.measure("blocks", len(doc['blocks'])) structure = cache.get(key)
tagger_get_structure.measure("blocks", len(doc['blocks'])) tagger_get_structure.tag(from_cache='true' if structure else 'false')
if not structure:
return structure_from_mongo(doc, course_context) with TIMER.timer("get_structure.find_one", course_context) as tagger_find_one:
doc = self.structures.find_one({'_id': key})
tagger_find_one.measure("blocks", len(doc['blocks']))
structure = structure_from_mongo(doc, course_context)
cache.set(key, structure)
return structure
@autoretry_read() @autoretry_read()
def find_structures_by_id(self, ids, course_context=None): def find_structures_by_id(self, ids, course_context=None):
......
...@@ -794,7 +794,7 @@ class TestMixedModuleStore(CommonMixedModuleStoreSetup): ...@@ -794,7 +794,7 @@ class TestMixedModuleStore(CommonMixedModuleStoreSetup):
# find: find parent (definition.children) 2x, find draft item, get inheritance items # find: find parent (definition.children) 2x, find draft item, get inheritance items
# send: one delete query for specific item # send: one delete query for specific item
# Split: # Split:
# find: active_version & structure # find: active_version & structure (cached)
# send: update structure and active_versions # send: update structure and active_versions
@ddt.data(('draft', 4, 1), ('split', 2, 2)) @ddt.data(('draft', 4, 1), ('split', 2, 2))
@ddt.unpack @ddt.unpack
......
...@@ -94,6 +94,10 @@ CACHES = { ...@@ -94,6 +94,10 @@ CACHES = {
'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
'LOCATION': 'edx_location_mem_cache', 'LOCATION': 'edx_location_mem_cache',
}, },
'course_structure_cache': {
'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
'LOCATION': 'edx_course_structure_mem_cache',
},
} }
......
...@@ -207,7 +207,9 @@ CACHES = { ...@@ -207,7 +207,9 @@ CACHES = {
'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
'LOCATION': 'edx_location_mem_cache', 'LOCATION': 'edx_location_mem_cache',
}, },
'course_structure_cache': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
},
} }
# Dummy secret key for dev # Dummy secret key for dev
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment