added force_memoize with tests and comments

3eaa178f · Steve Komarov · 6d9c0ecb · 3eaa178f · 3eaa178f · 3eaa178f
Commit 3eaa178f authored Jul 22, 2013 by Steve Komarov
Hide whitespace changes
Inline Side-by-side

Showing with 89 additions and 65 deletions

src/edinsights/core/decorators.py
+43 -27

src/edinsights/core/util.py
+1 -1

src/edinsights/periodic/tasks.py
+27 -18

src/edinsights/periodic/tests.py
+18 -19

No files found.
--- a/src/edinsights/core/decorators.py
+++ b/src/edinsights/core/decorators.py
@@ -97,20 +97,30 @@ def query(category = None, name = None, description = None, args = None):
    return query_factory


+
+
 def mq_force_memoize(func):
+    """
+    Forces memoization for a function func that has been decorated by
+    @memoize_query. This means that it will always redo the computation
+    and store the results in cache, regardless of whether a cached result
+    already exists.
+    """
    if hasattr(func, 'force_memoize'):
-        print "FORCING MEMOIZE"
        return func.force_memoize
    else:
-        print "not forcing memoize"
        return func

 def mq_force_retrieve(func):
+    """
+    Forces retrieval from cache for a function func that has been decorated by
+    @memoize_query. This means that it will try to get the result from cache.
+    If the result is not available in cache, it will throw an exception instead
+    of computing the result.
+    """
    if hasattr(func, 'force_retrieve'):
-        print "FORCING RETRIEVE"
        return func.force_retrieve
    else:
-        print "not forcing retrieve"
        return func


@@ -120,9 +130,8 @@ def memoize_query(cache_time = 60*4, timeout = 60*15, ignores = ["<class 'pymong
        will be different per call, but function identically.

        key_override: use this as a cache key instead of computing a key from the
-        function signature. Useful for testing.
+        function signature.
    '''
-    print "in memoize query"

    # Helper functions
    def isuseful(a, ignores):
@@ -131,11 +140,16 @@ def memoize_query(cache_time = 60*4, timeout = 60*15, ignores = ["<class 'pymong
        return True

    def make_cache_key(f, args, kwargs):
-        # Assumption: dict gets dumped in same order
-        # Arguments are serializable. This is okay since
-        # this is just for SOA queries, but may break
-        # down if this were to be used as a generic
-        # memoization framework
+        """
+        Makes a cache key out of the function name and passed arguments
+
+        Assumption: dict gets dumped in same order
+        Arguments are serializable. This is okay since
+        this is just for SOA queries, but may break
+        down if this were to be used as a generic
+        memoization framework
+        """
+
        m = hashlib.new("md4")
        s = str({'uniquifier': 'anevt.memoize',
                 'name' : f.__name__,
@@ -150,6 +164,10 @@ def memoize_query(cache_time = 60*4, timeout = 60*15, ignores = ["<class 'pymong
        return key

    def compute_and_cache(f, key, args, kwargs):
+        """
+        Runs f and stores the results in cache
+        """
+
        # HACK: There's a slight race condition here, where we
        # might recompute twice.
        cache.set(key, 'Processing', timeout)
@@ -166,6 +184,9 @@ def memoize_query(cache_time = 60*4, timeout = 60*15, ignores = ["<class 'pymong


    def get_from_cache_if_possible(f, key):
+        """
+        Tries to retrieve the result from cache, otherwise returns None
+        """
        cached = cache.get(key)
        # If we're already computing it, wait to finish
        # computation
@@ -179,16 +200,17 @@ def memoize_query(cache_time = 60*4, timeout = 60*15, ignores = ["<class 'pymong
        return results

    def factory(f):
-        print "in factory"
+
        def opmode_default(f, *args, **kwargs):
-            print "in opmode_default"
+            # Get he result from cache if possible, otherwise recompute
+            # and store in cache
            key = make_cache_key(f, args, kwargs)
            results = get_from_cache_if_possible(f, key)
            if results:
-                print "Cache hit %s %s" % (f.__name__, key)
+                #print "Cache hit %s %s" % (f.__name__, key)
                pass
            else:
-                print "Cache miss %s %s" % (f.__name__, key)
+                #print "Cache miss %s %s" % (f.__name__, key)
                results = compute_and_cache(f,key, args, kwargs)
            return results

@@ -196,22 +218,22 @@ def memoize_query(cache_time = 60*4, timeout = 60*15, ignores = ["<class 'pymong
            # Recompute and store in cache, regardless of whether key
            # is in cache.
            key = make_cache_key(f, args, kwargs)
-            print "Forcing memoize %s %s " % (f.__name__, key)
+            # print "Forcing memoize %s %s " % (f.__name__, key)
            results = compute_and_cache(f, key, args, kwargs)
            return results

        def opmode_forceretrieve(*args, **kwargs):
            # Retrieve from cache if possible otherwise throw an exception
            key = make_cache_key(f, args, kwargs)
-            print "Forcing retrieve %s %s " % (f.__name__, key)
+            # print "Forcing retrieve %s %s " % (f.__name__, key)
            results = get_from_cache_if_possible(f, key)
            if not results:
                raise KeyError('key %s not found in cache' % key) # TODO better exception class?
            return results

        decfun = decorator(opmode_default,f)
-        decfun.force_memoize = opmode_forcememoize
-        decfun.force_retrieve = opmode_forceretrieve
+        decfun.force_memoize = opmode_forcememoize   # activated by mq_force_memoize
+        decfun.force_retrieve = opmode_forceretrieve  # activated by mq_force_retrieve
        return decfun
    return factory

@@ -227,28 +249,22 @@ def cron(run_every, force_memoize=False, params={}):
    python manage.py celery worker -B --loglevel=INFO
    Celery beat will automatically add tasks from files named 'tasks.py'    
    '''
-    print "in cron"
-
    def factory(f):
-        print "in factory"
-
        @periodic_task(run_every=run_every, name=f.__name__)
        def run(func=None, *args, **kw):
-
            # if the call originated from the periodic_task decorator
            # func will be None. If the call originated from the rest of
            # the code, func will be the same as f
            called_as_periodic = True if func is None else False

            if called_as_periodic:
-                print "called as periodic"
+                #print "called as periodic"
                if force_memoize:
                    func = mq_force_memoize(f)
                else:
                    func = f
            else:
-                #called from code
-                print "called from code"
+                #print "called from code"
                func = f

            result = optional_parameter_call(func, default_optional_kwargs, params)

--- a/src/edinsights/core/util.py
+++ b/src/edinsights/core/util.py
@@ -104,7 +104,7 @@ def get_query(f):

    return get_embed('query', config = embed_config)

-def optional_parameter_call(function, optional_kwargs, passed_kwargs,  arglist = None):
+def optional_parameter_call(function, optional_kwargs, passed_kwargs, arglist = None): 
    ''' Calls a function with parameters: 
    passed_kwargs are input parameters the function must take. 
    Format: Dictionary mapping keywords to arguments. 

--- a/src/edinsights/periodic/tasks.py
+++ b/src/edinsights/periodic/tasks.py
-
 import tempfile
 import time

-from django.core.cache import cache
 from edinsights.core.decorators import memoize_query, cron
 from django.utils.timezone import timedelta

@@ -10,14 +8,16 @@ def timestamp_to_tempfile(filename):
    with open(tempfile.gettempdir() + '/' + filename, 'a') as temp_file:
        temp_file.write(str(time.time()) + '\n') #write a timestamp for each call

+
+# Test tasks are defined in tasks.py files. Other files could also be
+# included using CELERY_IMPORTS. Avoid using @cron with nested functions and
+# methods(the support of @periodic_task for these is experimental)
+# The @cron decorator should precede all other decorators
+
 @cron(run_every=timedelta(seconds=1))
-def test_cron_task(params={}):
+def test_cron_task():
    """ Simple task that gets executed by the scheduler (celery beat).
-        The test case test_cron verifies that the execution
-        has taken place.
-
-        Defined outside of the SimpleTest class because current support of celery decorators
-        for methods and nested functions is experimental.
+        tested by: tests.SimpleTest.test_cron
    """
    timestamp_to_tempfile('test_cron_task_counter')

@@ -25,14 +25,13 @@ def test_cron_task(params={}):
 @cron(run_every=timedelta(seconds=1), force_memoize=False)  # cron decorators should go on top
 @memoize_query(60, key_override='test_cron_memoize_unique_cache_key')
 def test_cron_memoize_task():
-    """ Simple task that gets executed by the scheduler (celery beat).
-        The test case test_cron_and_memoize verifies that the execution
-        has taken place.
-
-        Defined outside of the SimpleTest class because current support of celery decorators
-        for methods and nested functions is experimental.
+    """
+        Simple task that gets executed by the scheduler (celery beat).
+        Combines periodic tasks and memoization, with force_memoize=False.
+        This means that the periodic task will return cached results if possible.
+        This scenario is probably not what you want.

-        The cron decorator should precede all other decorators
+        tested by: tests.SimpleTest.test_cron_and_memoize
    """
    timestamp_to_tempfile('test_cron_memoize_task')
    return 42
@@ -42,9 +41,13 @@ def test_cron_memoize_task():
 @memoize_query(cache_time=60, key_override='big_computation_key')
 def big_computation():
    """
-    Combines periodic tasks and memoization, with force_memoize=False.
-    This means that the periodic task will return cached results if possible.
-    This scenario is probably not what you want.
+        Simple task that gets executed by the scheduler (celery beat) and also by @view
+
+        Combines periodic tasks and memoization, with force_memoize=False.
+        This means that the periodic task will return cached results if possible.
+        This scenario is probably not what you want.
+
+        tested by: tests.SimpleTest.test_cron_and_memoize_and_view
    """
    timestamp_to_tempfile('big_computation_counter')
    return "FAKERESULT"
@@ -54,11 +57,17 @@ def big_computation():
 @memoize_query(cache_time=60, key_override='big_computation_key_withfm')
 def big_computation_withfm():
    """
+     Simple task that gets executed by the scheduler (celery beat) and also by @view
     Combines periodic tasks and memoization, with force_memoize=True.
     This means that the task will redo the computation regardless of
     whether the result was already in the cache when it is called from the
     task scheduler. If the task is called from code, it will return the cached
     result.  This scenario is probably what you want.
+
+     tested by: tests.SimpleTest.test_cron_and_memoize_and_view_with_forcememoize
    """
    timestamp_to_tempfile('big_computation_withfm_counter')
    return "FAKERESULTFM"
+
+# TODO put every task in its own file, and use CELERY_IMPORTS to run
+# individual tasks instead of all tasks at the same time for each test
--- a/src/edinsights/periodic/tests.py
+++ b/src/edinsights/periodic/tests.py
@@ -14,8 +14,12 @@ def count_timestamps(tempfilename):
        last_call = float(timestamps[-1].rstrip())
    return ncalls, last_call

-def truncate_tempfile(filename):
-    with open(tempfile.gettempdir() + '/' + filename, 'w') as temp_file:
+def truncate_tempfile(tempfilename):
+    """
+     Truncates the file used to share state between the test process
+     and the scheduler process (celery beat).
+    """
+    with open(tempfile.gettempdir() + '/' + tempfilename, 'w') as temp_file:
        pass

 def run_celery_beat(seconds=3, verbose=False):
@@ -38,6 +42,7 @@ def run_celery_beat(seconds=3, verbose=False):
        sleep(seconds)
        celery_beat_process.terminate()

+
 class SimpleTest(TestCase):

    def __init__(self, arg):
@@ -46,35 +51,30 @@ class SimpleTest(TestCase):

    def test_cron(self):
        """ Test that periodic tasks are scheduled and run
+
+        tests: tasks.test_cron_task
        """
-        # truncate the file used as a counter of test_cron_task calls
-        # the file is used to share state between the test process and
-        # the scheduler process (celery beat)
-        truncate_tempfile('test_cron_task_counter')

+        truncate_tempfile('test_cron_task_counter')
        run_celery_beat(seconds=3,verbose=True)

        # verify number of calls and time of last call
        ncalls, last_call = count_timestamps('test_cron_task_counter')
-        self.assertGreaterEqual(ncalls,2)
+        self.assertGreaterEqual(ncalls, 2)
        self.assertAlmostEqual(last_call, time.time(), delta=100)


    def test_cron_and_memoize(self):
        """ Test that periodic tasks are scheduled and run, and the results
        are cached.
-        """

-        # truncate the file used as a counter of test_cron_task calls
-        # the file is used to share state between the test process and
-        # the scheduler process (celery beat)
+        tests: tasks.test_cron_memoize_task
+        """
        truncate_tempfile('test_cron_memoize_task')

        # clear the cache from any previous executions of this test
        cache.delete('test_cron_memoize_unique_cache_key')
-
        run_celery_beat(seconds=3,verbose=True)
-
        ncalls, last_call = count_timestamps('test_cron_memoize_task')
        self.assertEqual(ncalls,1)  # after the first call all subsequent calls should be cached
        self.assertAlmostEqual(last_call, time.time(), delta=100)
@@ -82,11 +82,10 @@ class SimpleTest(TestCase):
    def test_cron_and_memoize_and_view(self):
        """ Test that periodic tasks are scheduled, run, cached, and the
        cached results are available to @view
+
+        tests: tasks.big_computation
        """

-        # truncate the file used as a counter of big_computation calls
-        # the file is used to share state between the test process and
-        # the scheduler process (celery beat)
        truncate_tempfile('big_computation_counter')

        # delete cache from previous executions of this unit test
@@ -112,9 +111,9 @@ class SimpleTest(TestCase):
    def test_cron_and_memoize_and_view_with_forcememoize(self):
        """ Test that periodic tasks are scheduled, run, and cached, and the
        cached results are available to @view. If the task is executed from
-        the scheduler (as a periodic task) the computation will be redone and
-        the new result will be stored in cache. If the task is executed from code
-        (e.g. from a @view handler) the result from cache is returned.
+        the scheduler (as a periodic task) the computation should be redone and
+        the new result should be stored in cache. If the task is executed from code
+        (e.g. from a @view or @query handler) the result from cache should be returned.

        Tests task: tasks.big_computation_withfm
        """