Commit cc4482b4 by Don Mitchell

Merge pull request #1245 from MITx/feature/cdodge/cache-toc

add some simple in-mem caching for course TOC. This is a problem when ru...
parents 8760e766 9b72d114
...@@ -6,6 +6,7 @@ from xmodule.modulestore import Location ...@@ -6,6 +6,7 @@ from xmodule.modulestore import Location
from xmodule.seq_module import SequenceDescriptor, SequenceModule from xmodule.seq_module import SequenceDescriptor, SequenceModule
from xmodule.timeparse import parse_time, stringify_time from xmodule.timeparse import parse_time, stringify_time
from xmodule.util.decorators import lazyproperty from xmodule.util.decorators import lazyproperty
from datetime import datetime
import json import json
import logging import logging
import requests import requests
...@@ -18,6 +19,8 @@ log = logging.getLogger(__name__) ...@@ -18,6 +19,8 @@ log = logging.getLogger(__name__)
edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False, edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False,
remove_comments=True, remove_blank_text=True) remove_comments=True, remove_blank_text=True)
_cached_toc = {}
class CourseDescriptor(SequenceDescriptor): class CourseDescriptor(SequenceDescriptor):
module_class = SequenceModule module_class = SequenceModule
...@@ -50,6 +53,24 @@ class CourseDescriptor(SequenceDescriptor): ...@@ -50,6 +53,24 @@ class CourseDescriptor(SequenceDescriptor):
""" """
toc_url = self.book_url + 'toc.xml' toc_url = self.book_url + 'toc.xml'
# cdodge: I've added this caching of TOC because in Mongo-backed instances (but not Filesystem stores)
# course modules have a very short lifespan and are constantly being created and torn down.
# Since this module in the __init__() method does a synchronous call to AWS to get the TOC
# this is causing a big performance problem. So let's be a bit smarter about this and cache
# each fetch and store in-mem for 10 minutes.
# NOTE: I have to get this onto sandbox ASAP as we're having runtime failures. I'd like to swing back and
# rewrite to use the traditional Django in-memory cache.
try:
# see if we already fetched this
if toc_url in _cached_toc:
(table_of_contents, timestamp) = _cached_toc[toc_url]
age = datetime.now() - timestamp
# expire every 10 minutes
if age.seconds < 600:
return table_of_contents
except Exception as err:
pass
# Get the table of contents from S3 # Get the table of contents from S3
log.info("Retrieving textbook table of contents from %s" % toc_url) log.info("Retrieving textbook table of contents from %s" % toc_url)
try: try:
...@@ -62,6 +83,7 @@ class CourseDescriptor(SequenceDescriptor): ...@@ -62,6 +83,7 @@ class CourseDescriptor(SequenceDescriptor):
# TOC is XML. Parse it # TOC is XML. Parse it
try: try:
table_of_contents = etree.fromstring(r.text) table_of_contents = etree.fromstring(r.text)
_cached_toc[toc_url] = (table_of_contents, datetime.now())
except Exception as err: except Exception as err:
msg = 'Error %s: Unable to parse XML for textbook table of contents at %s' % (err, toc_url) msg = 'Error %s: Unable to parse XML for textbook table of contents at %s' % (err, toc_url)
log.error(msg) log.error(msg)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment