grades.py 22.7 KB
Newer Older
1 2
# Compute grades using real division, with no integer truncation
from __future__ import division
3
from collections import defaultdict
4
import json
5
import random
6
import logging
7

8
from contextlib import contextmanager
9
from django.conf import settings
10 11 12 13
from django.db import transaction
from django.test.client import RequestFactory

from dogapi import dog_stats_api
14

15
from courseware import courses
16
from courseware.model_data import FieldDataCache
Will Daly committed
17 18
from student.models import anonymous_id_for_user
from submissions import api as sub_api
19 20
from xmodule import graders
from xmodule.graders import Score
21 22
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.exceptions import ItemNotFoundError
23
from xmodule.util.duedate import get_extended_due_date
24
from .models import StudentModule
25
from .module_render import get_module_for_descriptor
26

27
log = logging.getLogger("edx.courseware")
28

Calen Pennington committed
29

30 31 32 33 34 35 36 37 38
def yield_dynamic_descriptor_descendents(descriptor, module_creator):
    """
    This returns all of the descendants of a descriptor. If the descriptor
    has dynamic children, the module will be created using module_creator
    and the children (as descriptors) of that module will be returned.
    """
    def get_dynamic_descriptor_children(descriptor):
        if descriptor.has_dynamic_children():
            module = module_creator(descriptor)
39 40
            if module is None:
                return []
41
            return module.get_child_descriptors()
42 43
        else:
            return descriptor.get_children()
Calen Pennington committed
44

45
    stack = [descriptor]
46 47 48

    while len(stack) > 0:
        next_descriptor = stack.pop()
Calen Pennington committed
49
        stack.extend(get_dynamic_descriptor_children(next_descriptor))
50
        yield next_descriptor
Calen Pennington committed
51

52

53
def answer_distributions(course_id):
54
    """
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
    Given a course_id, return answer distributions in the form of a dictionary
    mapping:

      (problem url_name, problem display_name, problem_id) -> {dict: answer -> count}

    Answer distributions are found by iterating through all StudentModule
    entries for a given course with type="problem" and a grade that is not null.
    This means that we only count LoncapaProblems that people have submitted.
    Other types of items like ORA or sequences will not be collected. Empty
    Loncapa problem state that gets created from runnig the progress page is
    also not counted.

    This method accesses the StudentModule table directly instead of using the
    CapaModule abstraction. The main reason for this is so that we can generate
    the report without any side-effects -- we don't have to worry about answer
    distribution potentially causing re-evaluation of the student answer. This
    also allows us to use the read-replica database, which reduces risk of bad
    locking behavior. And quite frankly, it makes this a lot less confusing.

    Also, we're pulling all available records from the database for this course
    rather than crawling through a student's course-tree -- the latter could
    potentially cause us trouble with A/B testing. The distribution report may
    not be aware of problems that are not visible to the user being used to
    generate the report.

    This method will try to use a read-replica database if one is available.
81
    """
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
    # dict: { module.module_state_key : (url_name, display_name) }
    state_keys_to_problem_info = {}  # For caching, used by url_and_display_name

    def url_and_display_name(module_state_key):
        """
        For a given module_state_key, return the problem's url and display_name.
        Handle modulestore access and caching. This method ignores permissions.
        May throw an ItemNotFoundError if there is no content that corresponds
        to this module_state_key.
        """
        problem_store = modulestore()
        if module_state_key not in state_keys_to_problem_info:
            problems = problem_store.get_items(module_state_key, course_id=course_id, depth=1)
            if not problems:
                # Likely means that the problem was deleted from the course
                # after the student had answered. We log this suspicion where
                # this exception is caught.
                raise ItemNotFoundError(
                    "Answer Distribution: Module {} not found for course {}"
                    .format(module_state_key, course_id)
                )
            problem = problems[0]
            problem_info = (problem.url_name, problem.display_name_with_default)
            state_keys_to_problem_info[module_state_key] = problem_info

        return state_keys_to_problem_info[module_state_key]

    # Iterate through all problems submitted for this course in no particular
    # order, and build up our answer_counts dict that we will eventually return
    answer_counts = defaultdict(lambda: defaultdict(int))
    for module in StudentModule.all_submitted_problems_read_only(course_id):
        try:
            state_dict = json.loads(module.state) if module.state else {}
            raw_answers = state_dict.get("student_answers", {})
        except ValueError:
            log.error(
                "Answer Distribution: Could not parse module state for " +
                "StudentModule id={}, course={}".format(module.id, course_id)
            )
121
            continue
122

123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
        # Each problem part has an ID that is derived from the
        # module.module_state_key (with some suffix appended)
        for problem_part_id, raw_answer in raw_answers.items():
            # Convert whatever raw answers we have (numbers, unicode, None, etc.)
            # to be unicode values. Note that if we get a string, it's always
            # unicode and not str -- state comes from the json decoder, and that
            # always returns unicode for strings.
            answer = unicode(raw_answer)

            try:
                url, display_name = url_and_display_name(module.module_state_key)
            except ItemNotFoundError:
                msg = "Answer Distribution: Item {} referenced in StudentModule {} " + \
                      "for user {} in course {} not found; " + \
                      "This can happen if a student answered a question that " + \
                      "was later deleted from the course. This answer will be " + \
                      "omitted from the answer distribution CSV."
                log.warning(
                    msg.format(module.module_state_key, module.id, module.student_id, course_id)
                )
                continue

            answer_counts[(url, display_name, problem_part_id)][answer] += 1

    return answer_counts
148

149 150
@transaction.commit_manually
def grade(student, request, course, keep_raw_scores=False):
151
    """
152 153 154 155 156 157 158 159 160 161 162
    Wraps "_grade" with the manual_transaction context manager just in case
    there are unanticipated errors.
    """
    with manual_transaction():
        return _grade(student, request, course, keep_raw_scores)


def _grade(student, request, course, keep_raw_scores):
    """
    Unwrapped version of "grade"

163
    This grades a student as quickly as possible. It returns the
164 165
    output from the course grader, augmented with the final letter
    grade. The keys in the output are:
166

Victor Shnayder committed
167 168
    course: a CourseDescriptor

169 170 171
    - grade : A final letter grade.
    - percent : The final percent for the class (rounded up).
    - section_breakdown : A breakdown of each section that makes
172
      up the grade. (For display)
173
    - grade_breakdown : A breakdown of the major components that
174 175 176
      make up the final grade. (For display)
    - keep_raw_scores : if True, then value for key 'raw_scores' contains scores
      for every graded module
177

178
    More information on the format is in the docstring for CourseGrader.
179 180
    """
    grading_context = course.grading_context
181
    raw_scores = []
182

Will Daly committed
183 184 185 186 187
    # Dict of item_ids -> (earned, possible) point tuples. This *only* grabs
    # scores that were registered with the submissions API, which for the moment
    # means only openassessment (edx-ora2)
    submissions_scores = sub_api.get_scores(course.id, anonymous_id_for_user(student, course.id))

188
    totaled_scores = {}
189 190 191
    # This next complicated loop is just to collect the totaled_scores, which is
    # passed to the grader
    for section_format, sections in grading_context['graded_sections'].iteritems():
192 193 194
        format_scores = []
        for section in sections:
            section_descriptor = section['section_descriptor']
195
            section_name = section_descriptor.display_name_with_default
196

197 198 199 200 201 202
            # some problems have state that is updated independently of interaction
            # with the LMS, so they need to always be scored. (E.g. foldit.,
            # combinedopenended)
            should_grade_section = any(
                descriptor.always_recalculate_grades for descriptor in section['xmoduledescriptors']
            )
203

Will Daly committed
204 205 206 207 208 209 210 211 212
            # If there are no problems that always have to be regraded, check to
            # see if any of our locations are in the scores from the submissions
            # API. If scores exist, we have to calculate grades for this section.
            if not should_grade_section:
                should_grade_section = any(
                    descriptor.location.url() in submissions_scores
                    for descriptor in section['xmoduledescriptors']
                )

213 214 215 216 217 218 219 220
            if not should_grade_section:
                with manual_transaction():
                    should_grade_section = StudentModule.objects.filter(
                        student=student,
                        module_state_key__in=[
                            descriptor.location for descriptor in section['xmoduledescriptors']
                        ]
                    ).exists()
221

Will Daly committed
222 223
            # If we haven't seen a single problem in the section, we don't have
            # to grade it at all! We can assume 0%
224 225
            if should_grade_section:
                scores = []
Calen Pennington committed
226

227
                def create_module(descriptor):
228 229
                    '''creates an XModule instance given a descriptor'''
                    # TODO: We need the request to pass into here. If we could forego that, our arguments
230
                    # would be simpler
231 232
                    with manual_transaction():
                        field_data_cache = FieldDataCache([descriptor], course.id, student)
Calen Pennington committed
233
                    return get_module_for_descriptor(student, request, descriptor, field_data_cache, course.id)
Calen Pennington committed
234

235
                for module_descriptor in yield_dynamic_descriptor_descendents(section_descriptor, create_module):
Calen Pennington committed
236

Will Daly committed
237 238 239
                    (correct, total) = get_score(
                        course.id, student, module_descriptor, create_module, scores_cache=submissions_scores
                    )
240 241
                    if correct is None and total is None:
                        continue
242

Calen Pennington committed
243
                    if settings.GENERATE_PROFILE_SCORES:  	# for debugging!
244 245 246 247
                        if total > 1:
                            correct = random.randrange(max(total - 2, 1), total + 1)
                        else:
                            correct = total
248

Calen Pennington committed
249
                    graded = module_descriptor.graded
250 251 252
                    if not total > 0:
                        #We simply cannot grade a problem that is 12/0, because we might need it as a percentage
                        graded = False
253

254
                    scores.append(Score(correct, total, graded, module_descriptor.display_name_with_default))
255

256
                _, graded_total = graders.aggregate_scores(scores, section_name)
257 258
                if keep_raw_scores:
                    raw_scores += scores
259
            else:
260
                graded_total = Score(0.0, 1.0, True, section_name)
261

262 263 264
            #Add the graded total to totaled_scores
            if graded_total.possible > 0:
                format_scores.append(graded_total)
265
            else:
266 267
                log.exception("Unable to grade a section with a total possible score of zero. " +
                              str(section_descriptor.location))
268

269
        totaled_scores[section_format] = format_scores
270

271
    grade_summary = course.grader.grade(totaled_scores, generate_random_scores=settings.GENERATE_PROFILE_SCORES)
272

273 274 275
    # We round the grade here, to make sure that the grade is an whole percentage and
    # doesn't get displayed differently than it gets grades
    grade_summary['percent'] = round(grade_summary['percent'] * 100 + 0.05) / 100
276

277 278
    letter_grade = grade_for_percentage(course.grade_cutoffs, grade_summary['percent'])
    grade_summary['grade'] = letter_grade
Calen Pennington committed
279
    grade_summary['totaled_scores'] = totaled_scores  	# make this available, eg for instructor download & debugging
280 281 282
    if keep_raw_scores:
        grade_summary['raw_scores'] = raw_scores        # way to get all RAW scores out to instructor
                                                        # so grader can be double-checked
283
    return grade_summary
284

Calen Pennington committed
285

286 287
def grade_for_percentage(grade_cutoffs, percentage):
    """
kimth committed
288
    Returns a letter grade as defined in grading_policy (e.g. 'A' 'B' 'C' for 6.002x) or None.
289

290 291 292 293 294
    Arguments
    - grade_cutoffs is a dictionary mapping a grade to the lowest
        possible percentage to earn that grade.
    - percentage is the final percent across all problems in a course
    """
295

296
    letter_grade = None
Calen Pennington committed
297

kimth committed
298 299 300
    # Possible grades, sorted in descending order of score
    descending_grades = sorted(grade_cutoffs, key=lambda x: grade_cutoffs[x], reverse=True)
    for possible_grade in descending_grades:
301 302 303
        if percentage >= grade_cutoffs[possible_grade]:
            letter_grade = possible_grade
            break
304 305

    return letter_grade
306

307

308 309 310 311 312 313 314 315 316 317
@transaction.commit_manually
def progress_summary(student, request, course):
    """
    Wraps "_progress_summary" with the manual_transaction context manager just
    in case there are unanticipated errors.
    """
    with manual_transaction():
        return _progress_summary(student, request, course)


318 319 320
# TODO: This method is not very good. It was written in the old course style and
# then converted over and performance is not good. Once the progress page is redesigned
# to not have the progress summary this method should be deleted (so it won't be copied).
321
def _progress_summary(student, request, course):
322
    """
323 324
    Unwrapped version of "progress_summary".

325
    This pulls a summary of all problems in the course.
326

327
    Returns
328 329 330 331
    - courseware_summary is a summary of all sections with problems in the course.
    It is organized as an array of chapters, each containing an array of sections,
    each containing an array of scores. This contains information for graded and
    ungraded problems, and is good for displaying a course summary with due dates,
332
    etc.
333

334 335
    Arguments:
        student: A User object for the student to grade
336
        course: A Descriptor containing the course to grade
Calen Pennington committed
337

338 339
    If the student does not have access to load the course module, this function
    will return None.
Calen Pennington committed
340

341
    """
342 343 344 345 346 347 348 349 350 351
    with manual_transaction():
        field_data_cache = FieldDataCache.cache_for_descriptor_descendents(
            course.id, student, course, depth=None
        )
        # TODO: We need the request to pass into here. If we could
        # forego that, our arguments would be simpler
        course_module = get_module_for_descriptor(student, request, course, field_data_cache, course.id)
        if not course_module:
            # This student must not have access to the course.
            return None
Calen Pennington committed
352

Will Daly committed
353 354
    submissions_scores = sub_api.get_scores(course.id, anonymous_id_for_user(student, course.id))

355
    chapters = []
356
    # Don't include chapters that aren't displayable (e.g. due to error)
357
    for chapter_module in course_module.get_display_items():
358
        # Skip if the chapter is hidden
Calen Pennington committed
359
        if chapter_module.hide_from_toc:
360
            continue
Calen Pennington committed
361

362
        sections = []
363

364
        for section_module in chapter_module.get_display_items():
365
            # Skip if the section is hidden
366 367 368
            with manual_transaction():
                if section_module.hide_from_toc:
                    continue
Calen Pennington committed
369

370 371
                graded = section_module.graded
                scores = []
Calen Pennington committed
372

373
                module_creator = section_module.xmodule_runtime.get_module
374

375 376
                for module_descriptor in yield_dynamic_descriptor_descendents(section_module, module_creator):
                    course_id = course.id
Will Daly committed
377 378 379
                    (correct, total) = get_score(
                        course_id, student, module_descriptor, module_creator, scores_cache=submissions_scores
                    )
380 381
                    if correct is None and total is None:
                        continue
382

383
                    scores.append(Score(correct, total, graded, module_descriptor.display_name_with_default))
384

385 386 387 388 389 390 391 392 393 394 395
                scores.reverse()
                section_total, _ = graders.aggregate_scores(
                    scores, section_module.display_name_with_default)

                module_format = section_module.format if section_module.format is not None else ''
                sections.append({
                    'display_name': section_module.display_name_with_default,
                    'url_name': section_module.url_name,
                    'scores': scores,
                    'section_total': section_total,
                    'format': module_format,
396
                    'due': get_extended_due_date(section_module),
397 398 399 400 401 402 403 404 405
                    'graded': graded,
                })

        chapters.append({
            'course': course.display_name_with_default,
            'display_name': chapter_module.display_name_with_default,
            'url_name': chapter_module.url_name,
            'sections': sections
        })
406

407
    return chapters
408

Will Daly committed
409 410

def get_score(course_id, user, problem_descriptor, module_creator, scores_cache=None):
411
    """
412
    Return the score for a user on a problem, as a tuple (correct, total).
413 414 415 416
    e.g. (5,7) if you got 5 out of 7 points.

    If this problem doesn't have a score, or we couldn't load it, returns (None,
    None).
417 418

    user: a Student object
419 420 421
    problem_descriptor: an XModuleDescriptor
    module_creator: a function that takes a descriptor, and returns the corresponding XModule for this user.
           Can return None if user doesn't have access, or if something else went wrong.
Will Daly committed
422 423
    scores_cache: A dict of location names to (earned, possible) point tuples.
           If an entry is found in this cache, it takes precedence.
424
    """
Will Daly committed
425 426
    scores_cache = scores_cache or {}

427 428 429
    if not user.is_authenticated():
        return (None, None)

Will Daly committed
430 431 432 433
    location_url = problem_descriptor.location.url()
    if location_url in scores_cache:
        return scores_cache[location_url]

434 435
    # some problems have state that is updated independently of interaction
    # with the LMS, so they need to always be scored. (E.g. foldit.)
Victor Shnayder committed
436 437
    if problem_descriptor.always_recalculate_grades:
        problem = module_creator(problem_descriptor)
438 439
        if problem is None:
            return (None, None)
440 441 442
        score = problem.get_score()
        if score is not None:
            return (score['score'], score['total'])
Victor Shnayder committed
443 444 445
        else:
            return (None, None)

446
    if not problem_descriptor.has_score:
447
        # These are not problems, and do not have a score
448
        return (None, None)
449

450 451 452 453 454 455 456 457
    try:
        student_module = StudentModule.objects.get(
            student=user,
            course_id=course_id,
            module_state_key=problem_descriptor.location
        )
    except StudentModule.DoesNotExist:
        student_module = None
458

459 460 461
    if student_module is not None and student_module.max_grade is not None:
        correct = student_module.grade if student_module.grade is not None else 0
        total = student_module.max_grade
462
    else:
463 464
        # If the problem was not in the cache, or hasn't been graded yet,
        # we need to instantiate the problem.
465
        # Otherwise, the max score (cached in student_module) won't be available
466
        problem = module_creator(problem_descriptor)
467 468
        if problem is None:
            return (None, None)
469

470
        correct = 0.0
471 472
        total = problem.max_score()

473 474 475 476 477
        # Problem may be an error module (if something in the problem builder failed)
        # In which case total might be None
        if total is None:
            return (None, None)

478
    # Now we re-weight the problem, if specified
479
    weight = problem_descriptor.weight
480 481
    if weight is not None:
        if total == 0:
482
            log.exception("Cannot reweight a problem with zero total points. Problem: " + str(student_module))
483 484 485
            return (correct, total)
        correct = correct * weight / total
        total = weight
486 487

    return (correct, total)
488 489 490 491 492 493 494 495 496 497 498 499 500 501


@contextmanager
def manual_transaction():
    """A context manager for managing manual transactions"""
    try:
        yield
    except Exception:
        transaction.rollback()
        log.exception('Due to an error, this transaction has been rolled back')
        raise
    else:
        transaction.commit()

502

503 504 505 506 507
def iterate_grades_for(course_id, students):
    """Given a course_id and an iterable of students (User), yield a tuple of:

    (student, gradeset, err_msg) for every student enrolled in the course.

508
    If an error occurred, gradeset will be an empty dict and err_msg will be an
509 510 511 512 513 514 515 516 517 518
    exception message. If there was no error, err_msg is an empty string.

    The gradeset is a dictionary with the following fields:

    - grade : A final letter grade.
    - percent : The final percent for the class (rounded up).
    - section_breakdown : A breakdown of each section that makes
        up the grade. (For display)
    - grade_breakdown : A breakdown of the major components that
        make up the final grade. (For display)
519
    - raw_scores: contains scores for every graded module
520 521 522 523 524 525 526 527 528 529 530 531
    """
    course = courses.get_course_by_id(course_id)

    # We make a fake request because grading code expects to be able to look at
    # the request. We have to attach the correct user to the request before
    # grading that student.
    request = RequestFactory().get('/')

    for student in students:
        with dog_stats_api.timer('lms.grades.iterate_grades_for', tags=['action:{}'.format(course_id)]):
            try:
                request.user = student
532 533 534 535 536
                # Grading calls problem rendering, which calls masquerading,
                # which checks session vars -- thus the empty session dict below.
                # It's not pretty, but untangling that is currently beyond the
                # scope of this feature.
                request.session = {}
537 538
                gradeset = grade(student, request, course)
                yield student, gradeset, ""
539
            except Exception as exc:  # pylint: disable=broad-except
540
                # Keep marching on even if this student couldn't be graded for
541
                # some reason, but log it for future reference.
542 543 544 545 546 547 548 549
                log.exception(
                    'Cannot grade student %s (%s) in course %s because of exception: %s',
                    student.username,
                    student.id,
                    course_id,
                    exc.message
                )
                yield student, {}, exc.message