events.py 9.57 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
"""Assertions related to event validation"""

import json
import pprint


def assert_event_matches(expected, actual, tolerate=None):
    """
    Compare two event dictionaries.

    Fail if any discrepancies exist, and output the list of all discrepancies. The intent is to produce clearer
    error messages than "{ some massive dict } != { some other massive dict }", instead enumerating the keys that
    differ. Produces period separated "paths" to keys in the output, so "context.foo" refers to the following
    structure:

        {
            'context': {
                'foo': 'bar'  # this key, value pair
            }
        }

    The other key difference between this comparison and `assertEquals` is that it supports differing levels of
    tolerance for discrepancies. We don't want to litter our tests full of exact match tests because then anytime we
    add a field to all events, we have to go update every single test that has a hardcoded complete event structure in
    it. Instead we support making partial assertions about structure and content of the event. So if I say my expected
    event looks like this:

        {
            'event_type': 'foo.bar',
            'event': {
                'user_id': 10
            }
        }

    This method will raise an assertion error if the actual event either does not contain the above fields in their
    exact locations in the hierarchy, or if it does contain them but has different values for them. Note that it will
    *not* necessarily raise an assertion error if the actual event contains other fields that are not listed in the
    expected event. For example, the following event would not raise an assertion error:

        {
            'event_type': 'foo.bar',
            'referer': 'http://example.com'
            'event': {
                'user_id': 10
            }
        }

    Note that the extra "referer" field is not considered an error by default.

    The `tolerate` parameter takes a set that allows you to specify varying degrees of tolerance for some common
    eventing related issues. See the `EventMatchTolerates` class for more information about the various flags that are
    supported here.

    Example output if an error is found:

        Unexpected differences found in structs:

        * <path>: not found in actual
        * <path>: <expected_value> != <actual_value> (expected != actual)

        Expected:
            { <expected event }

        Actual:
            { <actual event> }

    "<path>" is a "." separated string indicating the key that differed. In the examples above "event.user_id" would
    refer to the value of the "user_id" field contained within the dictionary referred to by the "event" field in the
    root dictionary.
    """
    differences = get_event_differences(expected, actual, tolerate=tolerate)
    if len(differences) > 0:
        debug_info = [
            '',
            'Expected:',
            block_indent(expected),
            'Actual:',
            block_indent(actual),
            'Tolerating:',
            block_indent(EventMatchTolerates.default_if_not_defined(tolerate)),
        ]
        differences = ['* ' + d for d in differences]
        message_lines = differences + debug_info
        raise AssertionError('Unexpected differences found in structs:\n\n' + '\n'.join(message_lines))


class EventMatchTolerates(object):
    """
    Represents groups of flags that specify the level of tolerance for deviation between an expected event and an actual
    event.

    These are common event specific deviations that we don't want to handle with special case logic throughout our
    tests.
    """

    # Allow the "event" field to be a string, currently this is the case for all browser events.
    STRING_PAYLOAD = 'string_payload'

    # Allow unexpected fields to exist in the top level event dictionary.
    ROOT_EXTRA_FIELDS = 'root_extra_fields'

    # Allow unexpected fields to exist in the "context" dictionary. This is where new fields that appear in multiple
    # events are most commonly added, so we frequently want to tolerate variation here.
    CONTEXT_EXTRA_FIELDS = 'context_extra_fields'

    # Allow unexpected fields to exist in the "event" dictionary. Typically in unit tests we don't want to allow this
    # type of variance since there are typically only a small number of tests for a particular event type.
    PAYLOAD_EXTRA_FIELDS = 'payload_extra_fields'

    @classmethod
    def default(cls):
        """A reasonable set of tolerated variations."""
        # NOTE: "payload_extra_fields" is deliberately excluded from this list since we want to detect erroneously added
        # fields in the payload by default.
        return {
            cls.STRING_PAYLOAD,
            cls.ROOT_EXTRA_FIELDS,
            cls.CONTEXT_EXTRA_FIELDS,
        }

    @classmethod
    def lenient(cls):
        """Allow all known variations."""
        return cls.default() | {
            cls.PAYLOAD_EXTRA_FIELDS
        }

    @classmethod
    def strict(cls):
        """Allow no variation at all."""
        return frozenset()

    @classmethod
    def default_if_not_defined(cls, tolerates=None):
        """Use the provided tolerance or provide a default one if None was specified."""
        if tolerates is None:
            return cls.default()
        else:
            return tolerates


def assert_events_equal(expected, actual):
    """
    Strict comparison of two events.

    This asserts that every field in the real event exactly matches the expected event.
    """
    assert_event_matches(expected, actual, tolerate=EventMatchTolerates.strict())


def get_event_differences(expected, actual, tolerate=None):
    """Given two events, gather a list of differences between them given some set of tolerated variances."""
    tolerate = EventMatchTolerates.default_if_not_defined(tolerate)

    # Some events store their payload in a JSON string instead of a dict. Comparing these strings can be problematic
    # since the keys may be in different orders, so we parse the string here if we were expecting a dict.
    if EventMatchTolerates.STRING_PAYLOAD in tolerate:
        expected = parse_event_payload(expected)
        actual = parse_event_payload(actual)

    def should_strict_compare(path):
        """
        We want to be able to vary the degree of strictness we apply depending on the testing context.

        Some tests will want to assert that the entire event matches exactly, others will tolerate some variance in the
        context or root fields, but not in the payload (for example).
        """
        if path == [] and EventMatchTolerates.ROOT_EXTRA_FIELDS in tolerate:
            return False
        elif path == ['event'] and EventMatchTolerates.PAYLOAD_EXTRA_FIELDS in tolerate:
            return False
        elif path == ['context'] and EventMatchTolerates.CONTEXT_EXTRA_FIELDS in tolerate:
            return False
        else:
            return True

    return compare_structs(expected, actual, should_strict_compare=should_strict_compare)


def block_indent(text, spaces=4):
    """
    Given a multi-line string, indent every line of it by the given number of spaces.

    If `text` is not a string it is formatted using pprint.pformat.
    """
    return '\n'.join([(' ' * spaces) + l for l in pprint.pformat(text).splitlines()])


def parse_event_payload(event):
    """
    Given an event, parse the "event" field as a JSON string.

    Note that this may simply return the same event unchanged, or return a new copy of the event with the payload
    parsed. It will never modify the event in place.
    """
    if 'event' in event and isinstance(event['event'], basestring):
        event = event.copy()
        try:
            event['event'] = json.loads(event['event'])
        except ValueError:
            pass
    return event


def compare_structs(expected, actual, should_strict_compare=None, path=None):
    """
    Traverse two structures to ensure that the `actual` structure contains all of the elements within the `expected`
    one.

    Note that this performs a "deep" comparison, descending into dictionaries, lists and ohter collections to ensure
    that the structure matches the expectation.

    If a particular value is not recognized, it is simply compared using the "!=" operator.
    """
    if path is None:
        path = []
    differences = []

    if isinstance(expected, dict) and isinstance(actual, dict):
        expected_keys = frozenset(expected.keys())
        actual_keys = frozenset(actual.keys())

        for key in expected_keys - actual_keys:
            differences.append('{0}: not found in actual'.format(_path_to_string(path + [key])))

        if should_strict_compare is not None and should_strict_compare(path):
            for key in actual_keys - expected_keys:
                differences.append('{0}: only defined in actual'.format(_path_to_string(path + [key])))

        for key in expected_keys & actual_keys:
            child_differences = compare_structs(expected[key], actual[key], should_strict_compare, path + [key])
            differences.extend(child_differences)

    elif expected != actual:
        differences.append('{path}: {a} != {b} (expected != actual)'.format(
            path=_path_to_string(path),
            a=repr(expected),
            b=repr(actual)
        ))

    return differences


def is_matching_event(expected_event, actual_event, tolerate=None):
    """Return True iff the `actual_event` matches the `expected_event` given the tolerances."""
    return len(get_event_differences(expected_event, actual_event, tolerate=tolerate)) == 0


def _path_to_string(path):
    """Convert a list of path elements into a single path string."""
    return '.'.join(path)