grades.py 21.7 KB
Newer Older
1 2
# Compute grades using real division, with no integer truncation
from __future__ import division
3
from collections import defaultdict
4
import json
5
import random
6
import logging
7

8
from contextlib import contextmanager
9
from django.conf import settings
10 11 12
from django.db import transaction
from django.test.client import RequestFactory

13
import dogstats_wrapper as dog_stats_api
14

15
from courseware import courses
16
from courseware.model_data import FieldDataCache
Will Daly committed
17
from student.models import anonymous_id_for_user
18
from util.module_utils import yield_dynamic_descriptor_descendents
19 20
from xmodule import graders
from xmodule.graders import Score
21 22
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.exceptions import ItemNotFoundError
23
from xmodule.util.duedate import get_extended_due_date
24
from .models import StudentModule
25
from .module_render import get_module_for_descriptor
26
from submissions import api as sub_api  # installed from the edx-submissions repository
27
from opaque_keys import InvalidKeyError
28

Calen Pennington committed
29

30
log = logging.getLogger("edx.courseware")
Calen Pennington committed
31

32

33
def answer_distributions(course_key):
34
    """
35
    Given a course_key, return answer distributions in the form of a dictionary
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
    mapping:

      (problem url_name, problem display_name, problem_id) -> {dict: answer -> count}

    Answer distributions are found by iterating through all StudentModule
    entries for a given course with type="problem" and a grade that is not null.
    This means that we only count LoncapaProblems that people have submitted.
    Other types of items like ORA or sequences will not be collected. Empty
    Loncapa problem state that gets created from runnig the progress page is
    also not counted.

    This method accesses the StudentModule table directly instead of using the
    CapaModule abstraction. The main reason for this is so that we can generate
    the report without any side-effects -- we don't have to worry about answer
    distribution potentially causing re-evaluation of the student answer. This
    also allows us to use the read-replica database, which reduces risk of bad
    locking behavior. And quite frankly, it makes this a lot less confusing.

    Also, we're pulling all available records from the database for this course
    rather than crawling through a student's course-tree -- the latter could
    potentially cause us trouble with A/B testing. The distribution report may
    not be aware of problems that are not visible to the user being used to
    generate the report.

    This method will try to use a read-replica database if one is available.
61
    """
62 63 64
    # dict: { module.module_state_key : (url_name, display_name) }
    state_keys_to_problem_info = {}  # For caching, used by url_and_display_name

65
    def url_and_display_name(usage_key):
66
        """
67
        For a given usage_key, return the problem's url and display_name.
68
        Handle modulestore access and caching. This method ignores permissions.
69 70 71 72 73

        Raises:
            InvalidKeyError: if the usage_key does not parse
            ItemNotFoundError: if there is no content that corresponds
                to this usage_key.
74 75
        """
        problem_store = modulestore()
76 77
        if usage_key not in state_keys_to_problem_info:
            problem = problem_store.get_item(usage_key)
78
            problem_info = (problem.url_name, problem.display_name_with_default)
79
            state_keys_to_problem_info[usage_key] = problem_info
80

81
        return state_keys_to_problem_info[usage_key]
82 83 84 85

    # Iterate through all problems submitted for this course in no particular
    # order, and build up our answer_counts dict that we will eventually return
    answer_counts = defaultdict(lambda: defaultdict(int))
86
    for module in StudentModule.all_submitted_problems_read_only(course_key):
87 88 89 90 91
        try:
            state_dict = json.loads(module.state) if module.state else {}
            raw_answers = state_dict.get("student_answers", {})
        except ValueError:
            log.error(
92 93 94
                u"Answer Distribution: Could not parse module state for StudentModule id=%s, course=%s",
                module.id,
                course_key,
95
            )
96
            continue
97

98
        try:
99
            url, display_name = url_and_display_name(module.module_state_key.map_into_course(course_key))
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
            # Each problem part has an ID that is derived from the
            # module.module_state_key (with some suffix appended)
            for problem_part_id, raw_answer in raw_answers.items():
                # Convert whatever raw answers we have (numbers, unicode, None, etc.)
                # to be unicode values. Note that if we get a string, it's always
                # unicode and not str -- state comes from the json decoder, and that
                # always returns unicode for strings.
                answer = unicode(raw_answer)
                answer_counts[(url, display_name, problem_part_id)][answer] += 1

        except (ItemNotFoundError, InvalidKeyError):
            msg = "Answer Distribution: Item {} referenced in StudentModule {} " + \
                  "for user {} in course {} not found; " + \
                  "This can happen if a student answered a question that " + \
                  "was later deleted from the course. This answer will be " + \
                  "omitted from the answer distribution CSV."
            log.warning(
                msg.format(module.module_state_key, module.id, module.student_id, course_key)
            )
            continue
120 121

    return answer_counts
122

123

124 125
@transaction.commit_manually
def grade(student, request, course, keep_raw_scores=False):
126
    """
127 128 129 130 131 132 133 134 135 136 137
    Wraps "_grade" with the manual_transaction context manager just in case
    there are unanticipated errors.
    """
    with manual_transaction():
        return _grade(student, request, course, keep_raw_scores)


def _grade(student, request, course, keep_raw_scores):
    """
    Unwrapped version of "grade"

138
    This grades a student as quickly as possible. It returns the
139 140
    output from the course grader, augmented with the final letter
    grade. The keys in the output are:
141

Victor Shnayder committed
142 143
    course: a CourseDescriptor

144 145 146
    - grade : A final letter grade.
    - percent : The final percent for the class (rounded up).
    - section_breakdown : A breakdown of each section that makes
147
      up the grade. (For display)
148
    - grade_breakdown : A breakdown of the major components that
149 150 151
      make up the final grade. (For display)
    - keep_raw_scores : if True, then value for key 'raw_scores' contains scores
      for every graded module
152

153
    More information on the format is in the docstring for CourseGrader.
154 155
    """
    grading_context = course.grading_context
156
    raw_scores = []
157

Will Daly committed
158 159 160
    # Dict of item_ids -> (earned, possible) point tuples. This *only* grabs
    # scores that were registered with the submissions API, which for the moment
    # means only openassessment (edx-ora2)
161 162 163
    submissions_scores = sub_api.get_scores(
        course.id.to_deprecated_string(), anonymous_id_for_user(student, course.id)
    )
Will Daly committed
164

165
    totaled_scores = {}
166 167 168
    # This next complicated loop is just to collect the totaled_scores, which is
    # passed to the grader
    for section_format, sections in grading_context['graded_sections'].iteritems():
169 170 171
        format_scores = []
        for section in sections:
            section_descriptor = section['section_descriptor']
172
            section_name = section_descriptor.display_name_with_default
173

174 175 176 177 178 179
            # some problems have state that is updated independently of interaction
            # with the LMS, so they need to always be scored. (E.g. foldit.,
            # combinedopenended)
            should_grade_section = any(
                descriptor.always_recalculate_grades for descriptor in section['xmoduledescriptors']
            )
180

Will Daly committed
181 182 183 184 185
            # If there are no problems that always have to be regraded, check to
            # see if any of our locations are in the scores from the submissions
            # API. If scores exist, we have to calculate grades for this section.
            if not should_grade_section:
                should_grade_section = any(
186
                    descriptor.location.to_deprecated_string() in submissions_scores
Will Daly committed
187 188 189
                    for descriptor in section['xmoduledescriptors']
                )

190 191 192 193
            if not should_grade_section:
                with manual_transaction():
                    should_grade_section = StudentModule.objects.filter(
                        student=student,
194
                        module_state_key__in=[
195 196 197
                            descriptor.location for descriptor in section['xmoduledescriptors']
                        ]
                    ).exists()
198

Will Daly committed
199 200
            # If we haven't seen a single problem in the section, we don't have
            # to grade it at all! We can assume 0%
201 202
            if should_grade_section:
                scores = []
Calen Pennington committed
203

204
                def create_module(descriptor):
205 206
                    '''creates an XModule instance given a descriptor'''
                    # TODO: We need the request to pass into here. If we could forego that, our arguments
207
                    # would be simpler
208 209
                    with manual_transaction():
                        field_data_cache = FieldDataCache([descriptor], course.id, student)
Calen Pennington committed
210
                    return get_module_for_descriptor(student, request, descriptor, field_data_cache, course.id)
Calen Pennington committed
211

212
                for module_descriptor in yield_dynamic_descriptor_descendents(section_descriptor, create_module):
Calen Pennington committed
213

Will Daly committed
214 215 216
                    (correct, total) = get_score(
                        course.id, student, module_descriptor, create_module, scores_cache=submissions_scores
                    )
217 218
                    if correct is None and total is None:
                        continue
219

Calen Pennington committed
220
                    if settings.GENERATE_PROFILE_SCORES:  	# for debugging!
221 222 223 224
                        if total > 1:
                            correct = random.randrange(max(total - 2, 1), total + 1)
                        else:
                            correct = total
225

Calen Pennington committed
226
                    graded = module_descriptor.graded
227 228 229
                    if not total > 0:
                        #We simply cannot grade a problem that is 12/0, because we might need it as a percentage
                        graded = False
230

231
                    scores.append(Score(correct, total, graded, module_descriptor.display_name_with_default))
232

233
                _, graded_total = graders.aggregate_scores(scores, section_name)
234 235
                if keep_raw_scores:
                    raw_scores += scores
236
            else:
237
                graded_total = Score(0.0, 1.0, True, section_name)
238

239 240 241
            #Add the graded total to totaled_scores
            if graded_total.possible > 0:
                format_scores.append(graded_total)
242
            else:
243 244 245 246
                log.info(
                    "Unable to grade a section with a total possible score of zero. " +
                    str(section_descriptor.location)
                )
247

248
        totaled_scores[section_format] = format_scores
249

250
    grade_summary = course.grader.grade(totaled_scores, generate_random_scores=settings.GENERATE_PROFILE_SCORES)
251

252 253 254
    # We round the grade here, to make sure that the grade is an whole percentage and
    # doesn't get displayed differently than it gets grades
    grade_summary['percent'] = round(grade_summary['percent'] * 100 + 0.05) / 100
255

256 257
    letter_grade = grade_for_percentage(course.grade_cutoffs, grade_summary['percent'])
    grade_summary['grade'] = letter_grade
Calen Pennington committed
258
    grade_summary['totaled_scores'] = totaled_scores  	# make this available, eg for instructor download & debugging
259
    if keep_raw_scores:
David Baumgold committed
260 261 262
        # way to get all RAW scores out to instructor
        # so grader can be double-checked
        grade_summary['raw_scores'] = raw_scores
263
    return grade_summary
264

Calen Pennington committed
265

266 267
def grade_for_percentage(grade_cutoffs, percentage):
    """
kimth committed
268
    Returns a letter grade as defined in grading_policy (e.g. 'A' 'B' 'C' for 6.002x) or None.
269

270 271 272 273 274
    Arguments
    - grade_cutoffs is a dictionary mapping a grade to the lowest
        possible percentage to earn that grade.
    - percentage is the final percent across all problems in a course
    """
275

276
    letter_grade = None
Calen Pennington committed
277

kimth committed
278 279 280
    # Possible grades, sorted in descending order of score
    descending_grades = sorted(grade_cutoffs, key=lambda x: grade_cutoffs[x], reverse=True)
    for possible_grade in descending_grades:
281 282 283
        if percentage >= grade_cutoffs[possible_grade]:
            letter_grade = possible_grade
            break
284 285

    return letter_grade
286

287

288 289 290 291 292 293 294 295 296 297
@transaction.commit_manually
def progress_summary(student, request, course):
    """
    Wraps "_progress_summary" with the manual_transaction context manager just
    in case there are unanticipated errors.
    """
    with manual_transaction():
        return _progress_summary(student, request, course)


298 299 300
# TODO: This method is not very good. It was written in the old course style and
# then converted over and performance is not good. Once the progress page is redesigned
# to not have the progress summary this method should be deleted (so it won't be copied).
301
def _progress_summary(student, request, course):
302
    """
303 304
    Unwrapped version of "progress_summary".

305
    This pulls a summary of all problems in the course.
306

307
    Returns
308 309 310 311
    - courseware_summary is a summary of all sections with problems in the course.
    It is organized as an array of chapters, each containing an array of sections,
    each containing an array of scores. This contains information for graded and
    ungraded problems, and is good for displaying a course summary with due dates,
312
    etc.
313

314 315
    Arguments:
        student: A User object for the student to grade
316
        course: A Descriptor containing the course to grade
Calen Pennington committed
317

318 319
    If the student does not have access to load the course module, this function
    will return None.
Calen Pennington committed
320

321
    """
322 323 324 325 326 327 328 329 330 331
    with manual_transaction():
        field_data_cache = FieldDataCache.cache_for_descriptor_descendents(
            course.id, student, course, depth=None
        )
        # TODO: We need the request to pass into here. If we could
        # forego that, our arguments would be simpler
        course_module = get_module_for_descriptor(student, request, course, field_data_cache, course.id)
        if not course_module:
            # This student must not have access to the course.
            return None
Calen Pennington committed
332

333
    submissions_scores = sub_api.get_scores(course.id.to_deprecated_string(), anonymous_id_for_user(student, course.id))
Will Daly committed
334

335
    chapters = []
336
    # Don't include chapters that aren't displayable (e.g. due to error)
337
    for chapter_module in course_module.get_display_items():
338
        # Skip if the chapter is hidden
Calen Pennington committed
339
        if chapter_module.hide_from_toc:
340
            continue
Calen Pennington committed
341

342
        sections = []
343

344
        for section_module in chapter_module.get_display_items():
345
            # Skip if the section is hidden
346 347 348
            with manual_transaction():
                if section_module.hide_from_toc:
                    continue
Calen Pennington committed
349

350 351
                graded = section_module.graded
                scores = []
Calen Pennington committed
352

353
                module_creator = section_module.xmodule_runtime.get_module
354

355 356
                for module_descriptor in yield_dynamic_descriptor_descendents(section_module, module_creator):
                    course_id = course.id
Will Daly committed
357 358 359
                    (correct, total) = get_score(
                        course_id, student, module_descriptor, module_creator, scores_cache=submissions_scores
                    )
360 361
                    if correct is None and total is None:
                        continue
362

363
                    scores.append(Score(correct, total, graded, module_descriptor.display_name_with_default))
364

365 366 367 368 369 370 371 372 373 374 375
                scores.reverse()
                section_total, _ = graders.aggregate_scores(
                    scores, section_module.display_name_with_default)

                module_format = section_module.format if section_module.format is not None else ''
                sections.append({
                    'display_name': section_module.display_name_with_default,
                    'url_name': section_module.url_name,
                    'scores': scores,
                    'section_total': section_total,
                    'format': module_format,
376
                    'due': get_extended_due_date(section_module),
377 378 379 380 381 382 383 384 385
                    'graded': graded,
                })

        chapters.append({
            'course': course.display_name_with_default,
            'display_name': chapter_module.display_name_with_default,
            'url_name': chapter_module.url_name,
            'sections': sections
        })
386

387
    return chapters
388

Will Daly committed
389 390

def get_score(course_id, user, problem_descriptor, module_creator, scores_cache=None):
391
    """
392
    Return the score for a user on a problem, as a tuple (correct, total).
393 394 395 396
    e.g. (5,7) if you got 5 out of 7 points.

    If this problem doesn't have a score, or we couldn't load it, returns (None,
    None).
397 398

    user: a Student object
399 400 401
    problem_descriptor: an XModuleDescriptor
    module_creator: a function that takes a descriptor, and returns the corresponding XModule for this user.
           Can return None if user doesn't have access, or if something else went wrong.
Will Daly committed
402 403
    scores_cache: A dict of location names to (earned, possible) point tuples.
           If an entry is found in this cache, it takes precedence.
404
    """
Will Daly committed
405 406
    scores_cache = scores_cache or {}

407 408 409
    if not user.is_authenticated():
        return (None, None)

410
    location_url = problem_descriptor.location.to_deprecated_string()
Will Daly committed
411 412 413
    if location_url in scores_cache:
        return scores_cache[location_url]

414 415
    # some problems have state that is updated independently of interaction
    # with the LMS, so they need to always be scored. (E.g. foldit.)
Victor Shnayder committed
416 417
    if problem_descriptor.always_recalculate_grades:
        problem = module_creator(problem_descriptor)
418 419
        if problem is None:
            return (None, None)
420 421 422
        score = problem.get_score()
        if score is not None:
            return (score['score'], score['total'])
Victor Shnayder committed
423 424 425
        else:
            return (None, None)

426
    if not problem_descriptor.has_score:
427
        # These are not problems, and do not have a score
428
        return (None, None)
429

430 431 432 433
    try:
        student_module = StudentModule.objects.get(
            student=user,
            course_id=course_id,
434
            module_state_key=problem_descriptor.location
435 436 437
        )
    except StudentModule.DoesNotExist:
        student_module = None
438

439 440 441
    if student_module is not None and student_module.max_grade is not None:
        correct = student_module.grade if student_module.grade is not None else 0
        total = student_module.max_grade
442
    else:
443 444
        # If the problem was not in the cache, or hasn't been graded yet,
        # we need to instantiate the problem.
445
        # Otherwise, the max score (cached in student_module) won't be available
446
        problem = module_creator(problem_descriptor)
447 448
        if problem is None:
            return (None, None)
449

450
        correct = 0.0
451 452
        total = problem.max_score()

453 454 455 456 457
        # Problem may be an error module (if something in the problem builder failed)
        # In which case total might be None
        if total is None:
            return (None, None)

458
    # Now we re-weight the problem, if specified
459
    weight = problem_descriptor.weight
460 461
    if weight is not None:
        if total == 0:
462
            log.exception("Cannot reweight a problem with zero total points. Problem: " + str(student_module))
463 464 465
            return (correct, total)
        correct = correct * weight / total
        total = weight
466 467

    return (correct, total)
468 469 470 471 472 473 474 475 476 477 478 479 480 481


@contextmanager
def manual_transaction():
    """A context manager for managing manual transactions"""
    try:
        yield
    except Exception:
        transaction.rollback()
        log.exception('Due to an error, this transaction has been rolled back')
        raise
    else:
        transaction.commit()

482

483 484 485 486 487
def iterate_grades_for(course_id, students):
    """Given a course_id and an iterable of students (User), yield a tuple of:

    (student, gradeset, err_msg) for every student enrolled in the course.

488
    If an error occurred, gradeset will be an empty dict and err_msg will be an
489 490 491 492 493 494 495 496 497 498
    exception message. If there was no error, err_msg is an empty string.

    The gradeset is a dictionary with the following fields:

    - grade : A final letter grade.
    - percent : The final percent for the class (rounded up).
    - section_breakdown : A breakdown of each section that makes
        up the grade. (For display)
    - grade_breakdown : A breakdown of the major components that
        make up the final grade. (For display)
499
    - raw_scores: contains scores for every graded module
500 501 502 503 504 505 506 507 508
    """
    course = courses.get_course_by_id(course_id)

    # We make a fake request because grading code expects to be able to look at
    # the request. We have to attach the correct user to the request before
    # grading that student.
    request = RequestFactory().get('/')

    for student in students:
509
        with dog_stats_api.timer('lms.grades.iterate_grades_for', tags=[u'action:{}'.format(course_id)]):
510 511
            try:
                request.user = student
512 513 514 515 516
                # Grading calls problem rendering, which calls masquerading,
                # which checks session vars -- thus the empty session dict below.
                # It's not pretty, but untangling that is currently beyond the
                # scope of this feature.
                request.session = {}
517 518
                gradeset = grade(student, request, course)
                yield student, gradeset, ""
519
            except Exception as exc:  # pylint: disable=broad-except
520
                # Keep marching on even if this student couldn't be graded for
521
                # some reason, but log it for future reference.
522 523 524 525 526 527 528 529
                log.exception(
                    'Cannot grade student %s (%s) in course %s because of exception: %s',
                    student.username,
                    student.id,
                    course_id,
                    exc.message
                )
                yield student, {}, exc.message