user_state_client.py 14.4 KB
Newer Older
1 2 3 4 5
"""
An implementation of :class:`XBlockUserStateClient`, which stores XBlock Scope.user_state
data in a Django ORM model.
"""

6 7
import itertools
from operator import attrgetter
8
from time import time
9

10 11 12 13
try:
    import simplejson as json
except ImportError:
    import json
14

15
import dogstats_wrapper as dog_stats_api
16
from django.contrib.auth.models import User
Ned Batchelder committed
17
from xblock.fields import Scope
18
from courseware.models import StudentModule, BaseStudentModuleHistory
19
from edx_user_state_client.interface import XBlockUserStateClient, XBlockUserState
20 21 22


class DjangoXBlockUserStateClient(XBlockUserStateClient):
23 24
    """
    An interface that uses the Django ORM StudentModule as a backend.
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39

    A note on the format of state storage:
        The state for an xblock is stored as a serialized JSON dictionary. The model
        field that it is stored in can also take on a value of ``None``. To preserve
        existing analytic uses, we will preserve the following semantics:

        A state of ``None`` means that the user hasn't ever looked at the xblock.
        A state of ``"{}"`` means that the XBlock has at some point stored state for
           the current user, but that that state has been deleted.
        Otherwise, the dictionary contains all data stored for the user.

        None of these conditions should violate the semantics imposed by
        XBlockUserStateClient (for instance, once all fields have been deleted from
        an XBlock for a user, the state will be listed as ``None`` by :meth:`get_history`,
        even though the actual stored state in the database will be ``"{}"``).
40
    """
41

42
    # Use this sample rate for DataDog events.
43
    API_DATADOG_SAMPLE_RATE = 0.1
44

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
    class ServiceUnavailable(XBlockUserStateClient.ServiceUnavailable):
        """
        This error is raised if the service backing this client is currently unavailable.
        """
        pass

    class PermissionDenied(XBlockUserStateClient.PermissionDenied):
        """
        This error is raised if the caller is not allowed to access the requested data.
        """
        pass

    class DoesNotExist(XBlockUserStateClient.DoesNotExist):
        """
        This error is raised if the caller has requested data that does not exist.
        """
        pass

63 64 65 66 67 68 69
    def __init__(self, user=None):
        """
        Arguments:
            user (:class:`~User`): An already-loaded django user. If this user matches the username
                supplied to `set_many`, then that will reduce the number of queries made to store
                the user state.
        """
70 71
        self.user = user

72
    def _get_student_modules(self, username, block_keys):
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
        """
        Retrieve the :class:`~StudentModule`s for the supplied ``username`` and ``block_keys``.

        Arguments:
            username (str): The name of the user to load `StudentModule`s for.
            block_keys (list of :class:`~UsageKey`): The set of XBlocks to load data for.
        """
        course_key_func = attrgetter('course_key')
        by_course = itertools.groupby(
            sorted(block_keys, key=course_key_func),
            course_key_func,
        )

        for course_key, usage_keys in by_course:
            query = StudentModule.objects.chunked_filter(
                'module_state_key__in',
89
                usage_keys,
90 91 92 93 94 95 96 97
                student__username=username,
                course_id=course_key,
            )

            for student_module in query:
                usage_key = student_module.module_state_key.map_into_course(student_module.course_id)
                yield (student_module, usage_key)

98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
    def _ddog_increment(self, evt_time, evt_name):
        """
        DataDog increment method.
        """
        dog_stats_api.increment(
            'DjangoXBlockUserStateClient.{}'.format(evt_name),
            timestamp=evt_time,
            sample_rate=self.API_DATADOG_SAMPLE_RATE,
        )

    def _ddog_histogram(self, evt_time, evt_name, value):
        """
        DataDog histogram method.
        """
        dog_stats_api.histogram(
            'DjangoXBlockUserStateClient.{}'.format(evt_name),
            value,
            timestamp=evt_time,
            sample_rate=self.API_DATADOG_SAMPLE_RATE,
        )

119
    def get_many(self, username, block_keys, scope=Scope.user_state, fields=None):
120
        """
121
        Retrieve the stored XBlock state for the specified XBlock usages.
122 123 124 125 126 127 128 129

        Arguments:
            username: The name of the user whose state should be retrieved
            block_keys ([UsageKey]): A list of UsageKeys identifying which xblock states to load.
            scope (Scope): The scope to load data from
            fields: A list of field values to retrieve. If None, retrieve all stored fields.

        Yields:
130
            XBlockUserState tuples for each specified UsageKey in block_keys.
131 132
            field_state is a dict mapping field names to values.
        """
133
        if scope != Scope.user_state:
134
            raise ValueError("Only Scope.user_state is supported, not {}".format(scope))
135

136 137 138
        block_count = state_length = 0
        evt_time = time()

139 140
        self._ddog_histogram(evt_time, 'get_many.blks_requested', len(block_keys))

141
        modules = self._get_student_modules(username, block_keys)
142 143
        for module, usage_key in modules:
            if module.state is None:
144
                self._ddog_increment(evt_time, 'get_many.empty_state')
145 146 147
                continue

            state = json.loads(module.state)
148
            state_length += len(module.state)
149

150 151
            self._ddog_histogram(evt_time, 'get_many.block_size', len(module.state))

152 153 154 155 156 157 158 159 160 161 162
            # If the state is the empty dict, then it has been deleted, and so
            # conformant UserStateClients should treat it as if it doesn't exist.
            if state == {}:
                continue

            if fields is not None:
                state = {
                    field: state[field]
                    for field in fields
                    if field in state
                }
163
            block_count += 1
164
            yield XBlockUserState(username, usage_key, state, module.modified, scope)
165

166 167
        # The rest of this method exists only to submit DataDog events.
        # Remove it once we're no longer interested in the data.
168
        finish_time = time()
169
        self._ddog_histogram(evt_time, 'get_many.blks_out', block_count)
170
        self._ddog_histogram(evt_time, 'get_many.response_time', (finish_time - evt_time) * 1000)
171

172
    def set_many(self, username, block_keys_to_state, scope=Scope.user_state):
173 174 175 176 177 178 179 180 181 182 183
        """
        Set fields for a particular XBlock.

        Arguments:
            username: The name of the user whose state should be retrieved
            block_keys_to_state (dict): A dict mapping UsageKeys to state dicts.
                Each state dict maps field names to values. These state dicts
                are overlaid over the stored state. To delete fields, use
                :meth:`delete` or :meth:`delete_many`.
            scope (Scope): The scope to load data from
        """
184 185 186
        if scope != Scope.user_state:
            raise ValueError("Only Scope.user_state is supported")

187 188 189 190
        # We do a find_or_create for every block (rather than re-using field objects
        # that were queried in get_many) so that if the score has
        # been changed by some other piece of the code, we don't overwrite
        # that score.
191
        if self.user is not None and self.user.username == username:
192 193 194 195
            user = self.user
        else:
            user = User.objects.get(username=username)

196 197 198 199 200
        if user.is_anonymous():
            # Anonymous users cannot be persisted to the database, so let's just use
            # what we have.
            return

201 202
        evt_time = time()

203 204
        for usage_key, state in block_keys_to_state.items():
            student_module, created = StudentModule.objects.get_or_create(
205
                student=user,
206 207 208 209 210 211 212 213
                course_id=usage_key.course_key,
                module_state_key=usage_key,
                defaults={
                    'state': json.dumps(state),
                    'module_type': usage_key.block_type,
                },
            )

214 215
            num_fields_before = num_fields_after = num_new_fields_set = len(state)
            num_fields_updated = 0
216 217 218 219 220
            if not created:
                if student_module.state is None:
                    current_state = {}
                else:
                    current_state = json.loads(student_module.state)
221
                num_fields_before = len(current_state)
222
                current_state.update(state)
223
                num_fields_after = len(current_state)
224 225 226 227
                student_module.state = json.dumps(current_state)
                # We just read this object, so we know that we can do an update
                student_module.save(force_update=True)

228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
            # The rest of this method exists only to submit DataDog events.
            # Remove it once we're no longer interested in the data.
            #
            # Record whether a state row has been created or updated.
            if created:
                self._ddog_increment(evt_time, 'set_many.state_created')
            else:
                self._ddog_increment(evt_time, 'set_many.state_updated')

            # Event to record number of fields sent in to set/set_many.
            self._ddog_histogram(evt_time, 'set_many.fields_in', len(state))

            # Event to record number of new fields set in set/set_many.
            num_new_fields_set = num_fields_after - num_fields_before
            self._ddog_histogram(evt_time, 'set_many.fields_set', num_new_fields_set)

            # Event to record number of existing fields updated in set/set_many.
            num_fields_updated = max(0, len(state) - num_new_fields_set)
            self._ddog_histogram(evt_time, 'set_many.fields_updated', num_fields_updated)

248 249
        # Events for the entire set_many call.
        finish_time = time()
250
        self._ddog_histogram(evt_time, 'set_many.blks_updated', len(block_keys_to_state))
251
        self._ddog_histogram(evt_time, 'set_many.response_time', (finish_time - evt_time) * 1000)
252

253 254 255 256 257 258
    def delete_many(self, username, block_keys, scope=Scope.user_state, fields=None):
        """
        Delete the stored XBlock state for a many xblock usages.

        Arguments:
            username: The name of the user whose state should be deleted
259
            block_keys (list): The UsageKey identifying which xblock state to delete.
260 261 262
            scope (Scope): The scope to delete data from
            fields: A list of fields to delete. If None, delete all stored fields.
        """
263 264
        if scope != Scope.user_state:
            raise ValueError("Only Scope.user_state is supported")
265

266 267 268 269 270 271 272 273
        evt_time = time()
        if fields is None:
            self._ddog_increment(evt_time, 'delete_many.empty_state')
        else:
            self._ddog_histogram(evt_time, 'delete_many.field_count', len(fields))

        self._ddog_histogram(evt_time, 'delete_many.block_count', len(block_keys))

274
        student_modules = self._get_student_modules(username, block_keys)
275 276
        for student_module, _ in student_modules:
            if fields is None:
277
                student_module.state = "{}"
278
            else:
279
                current_state = json.loads(student_module.state)
280 281 282 283 284
                for field in fields:
                    if field in current_state:
                        del current_state[field]

                student_module.state = json.dumps(current_state)
285

286 287 288
            # We just read this object, so we know that we can do an update
            student_module.save(force_update=True)

289 290 291 292
        # Event for the entire delete_many call.
        finish_time = time()
        self._ddog_histogram(evt_time, 'delete_many.response_time', (finish_time - evt_time) * 1000)

293
    def get_history(self, username, block_key, scope=Scope.user_state):
294 295 296 297
        """
        Retrieve history of state changes for a given block for a given
        student.  We don't guarantee that history for many blocks will be fast.

298 299
        If the specified block doesn't exist, raise :class:`~DoesNotExist`.

300
        Arguments:
301 302 303 304 305 306 307
            username: The name of the user whose history should be retrieved.
            block_key: The key identifying which xblock history to retrieve.
            scope (Scope): The scope to load data from.

        Yields:
            XBlockUserState entries for each modification to the specified XBlock, from latest
            to earliest.
308 309
        """

310 311
        if scope != Scope.user_state:
            raise ValueError("Only Scope.user_state is supported")
312 313 314 315 316 317 318 319
        student_modules = list(
            student_module
            for student_module, usage_id
            in self._get_student_modules(username, [block_key])
        )
        if len(student_modules) == 0:
            raise self.DoesNotExist()

320
        history_entries = BaseStudentModuleHistory.get_history(student_modules)
321

322
        # If no history records exist, raise an error
323
        if not history_entries:
324 325 326 327 328 329 330 331 332 333 334 335 336 337
            raise self.DoesNotExist()

        for history_entry in history_entries:
            state = history_entry.state

            # If the state is serialized json, then load it
            if state is not None:
                state = json.loads(state)

            # If the state is empty, then for the purposes of `get_history`, it has been
            # deleted, and so we list that entry as `None`.
            if state == {}:
                state = None

338
            block_key = history_entry.csm.module_state_key
339
            block_key = block_key.map_into_course(
340
                history_entry.csm.course_id
341
            )
342

343
            yield XBlockUserState(username, block_key, state, history_entry.created, scope)
344

345
    def iter_all_for_block(self, block_key, scope=Scope.user_state, batch_size=None):
346 347 348 349 350
        """
        You get no ordering guarantees. Fetching will happen in batch_size
        increments. If you're using this method, you should be running in an
        async task.
        """
351 352
        if scope != Scope.user_state:
            raise ValueError("Only Scope.user_state is supported")
353 354
        raise NotImplementedError()

355
    def iter_all_for_course(self, course_key, block_type=None, scope=Scope.user_state, batch_size=None):
356 357 358 359 360
        """
        You get no ordering guarantees. Fetching will happen in batch_size
        increments. If you're using this method, you should be running in an
        async task.
        """
361 362
        if scope != Scope.user_state:
            raise ValueError("Only Scope.user_state is supported")
363
        raise NotImplementedError()