Commit bbd1d8d0 by Ned Batchelder

From code review: the hash was shallow, so nested objects could have hashed…

From code review: the hash was shallow, so nested objects could have hashed differently when they didn't need to.
parent f05b25d1
"""Capa's specialized use of codejail.safe_exec."""
from .safe_exec import safe_exec
from .safe_exec import safe_exec, update_hash
......@@ -47,6 +47,29 @@ for name, modname in ASSUMED_IMPORTS:
LAZY_IMPORTS = "".join(LAZY_IMPORTS)
def update_hash(hasher, obj):
"""
Update a `hashlib` hasher with a nested object.
To properly cache nested structures, we need to compute a hash from the
entire structure, canonicalizing at every level.
`hasher`'s `.update()` method is called a number of times, touching all of
`obj` in the process. Only primitive JSON-safe types are supported.
"""
hasher.update(str(type(obj)))
if isinstance(obj, (tuple, list)):
for e in obj:
update_hash(hasher, e)
elif isinstance(obj, dict):
for k in sorted(obj):
update_hash(hasher, k)
update_hash(hasher, obj[k])
else:
hasher.update(repr(obj))
@statsd.timed('capa.safe_exec.time')
def safe_exec(code, globals_dict, random_seed=None, python_path=None, cache=None):
"""
......@@ -67,10 +90,10 @@ def safe_exec(code, globals_dict, random_seed=None, python_path=None, cache=None
"""
# Check the cache for a previous result.
if cache:
canonical_globals = sorted(json_safe(globals_dict).iteritems())
safe_globals = json_safe(globals_dict)
md5er = hashlib.md5()
md5er.update(repr(code))
md5er.update(repr(canonical_globals))
update_hash(md5er, safe_globals)
key = "safe_exec.%r.%s" % (random_seed, md5er.hexdigest())
cached = cache.get(key)
if cached is not None:
......
"""Test safe_exec.py"""
import hashlib
import os.path
import random
import textwrap
import unittest
from capa.safe_exec import safe_exec
from capa.safe_exec import safe_exec, update_hash
from codejail.safe_exec import SafeExecException
......@@ -145,18 +146,73 @@ class TestSafeExecCaching(unittest.TestCase):
def test_unicode_submission(self):
# Check that using non-ASCII unicode does not raise an encoding error.
# Try several non-ASCII unicode characters
for code in [129, 500, 2**8 - 1, 2**16 - 1]:
code_with_unichr = unicode("# ") + unichr(code)
try:
safe_exec(code_with_unichr, {}, cache=DictCache({}))
except UnicodeEncodeError:
self.fail("Tried executing code with non-ASCII unicode: {0}".format(code))
class TestUpdateHash(unittest.TestCase):
"""Test the safe_exec.update_hash function to be sure it canonicalizes properly."""
def hash_obj(self, obj):
"""Return the md5 hash that `update_hash` makes us."""
md5er = hashlib.md5()
update_hash(md5er, obj)
return md5er.hexdigest()
def equal_but_different_dicts(self):
"""
Make two equal dicts with different key order.
Simple literals won't do it. Filling one and then shrinking it will
make them different.
"""
d1 = {k:1 for k in "abcdefghijklmnopqrstuvwxyz"}
d2 = dict(d1)
for i in xrange(10000):
d2[i] = 1
for i in xrange(10000):
del d2[i]
# Check that our dicts are equal, but with different key order.
self.assertEqual(d1, d2)
self.assertNotEqual(d1.keys(), d2.keys())
return d1, d2
def test_simple_cases(self):
h1 = self.hash_obj(1)
h10 = self.hash_obj(10)
hs1 = self.hash_obj("1")
self.assertNotEqual(h1, h10)
self.assertNotEqual(h1, hs1)
def test_list_ordering(self):
h1 = self.hash_obj({'a': [1,2,3]})
h2 = self.hash_obj({'a': [3,2,1]})
self.assertNotEqual(h1, h2)
def test_dict_ordering(self):
d1, d2 = self.equal_but_different_dicts()
h1 = self.hash_obj(d1)
h2 = self.hash_obj(d2)
self.assertEqual(h1, h2)
def test_deep_ordering(self):
d1, d2 = self.equal_but_different_dicts()
o1 = {'a':[1, 2, [d1], 3, 4]}
o2 = {'a':[1, 2, [d2], 3, 4]}
h1 = self.hash_obj(o1)
h2 = self.hash_obj(o2)
self.assertEqual(h1, h2)
class TestRealProblems(unittest.TestCase):
def test_802x(self):
code = textwrap.dedent("""\
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment