grades.py 22.5 KB
Newer Older
1 2
# Compute grades using real division, with no integer truncation
from __future__ import division
3
from collections import defaultdict
4
import json
5
import random
6
import logging
7

8
from contextlib import contextmanager
9
from django.conf import settings
10 11 12
from django.db import transaction
from django.test.client import RequestFactory

13
import dogstats_wrapper as dog_stats_api
14

15
from courseware import courses
16
from courseware.model_data import FieldDataCache
Will Daly committed
17
from student.models import anonymous_id_for_user
18 19
from xmodule import graders
from xmodule.graders import Score
20 21
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.exceptions import ItemNotFoundError
22
from xmodule.util.duedate import get_extended_due_date
23
from .models import StudentModule
24
from .module_render import get_module_for_descriptor
25
from submissions import api as sub_api  # installed from the edx-submissions repository
26
from opaque_keys import InvalidKeyError
27

28
log = logging.getLogger("edx.courseware")
29

Calen Pennington committed
30

31 32 33 34 35 36 37 38 39
def yield_dynamic_descriptor_descendents(descriptor, module_creator):
    """
    This returns all of the descendants of a descriptor. If the descriptor
    has dynamic children, the module will be created using module_creator
    and the children (as descriptors) of that module will be returned.
    """
    def get_dynamic_descriptor_children(descriptor):
        if descriptor.has_dynamic_children():
            module = module_creator(descriptor)
40 41
            if module is None:
                return []
42
            return module.get_child_descriptors()
43 44
        else:
            return descriptor.get_children()
Calen Pennington committed
45

46
    stack = [descriptor]
47 48 49

    while len(stack) > 0:
        next_descriptor = stack.pop()
Calen Pennington committed
50
        stack.extend(get_dynamic_descriptor_children(next_descriptor))
51
        yield next_descriptor
Calen Pennington committed
52

53

54
def answer_distributions(course_key):
55
    """
56
    Given a course_key, return answer distributions in the form of a dictionary
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
    mapping:

      (problem url_name, problem display_name, problem_id) -> {dict: answer -> count}

    Answer distributions are found by iterating through all StudentModule
    entries for a given course with type="problem" and a grade that is not null.
    This means that we only count LoncapaProblems that people have submitted.
    Other types of items like ORA or sequences will not be collected. Empty
    Loncapa problem state that gets created from runnig the progress page is
    also not counted.

    This method accesses the StudentModule table directly instead of using the
    CapaModule abstraction. The main reason for this is so that we can generate
    the report without any side-effects -- we don't have to worry about answer
    distribution potentially causing re-evaluation of the student answer. This
    also allows us to use the read-replica database, which reduces risk of bad
    locking behavior. And quite frankly, it makes this a lot less confusing.

    Also, we're pulling all available records from the database for this course
    rather than crawling through a student's course-tree -- the latter could
    potentially cause us trouble with A/B testing. The distribution report may
    not be aware of problems that are not visible to the user being used to
    generate the report.

    This method will try to use a read-replica database if one is available.
82
    """
83 84 85
    # dict: { module.module_state_key : (url_name, display_name) }
    state_keys_to_problem_info = {}  # For caching, used by url_and_display_name

86
    def url_and_display_name(usage_key):
87
        """
88
        For a given usage_key, return the problem's url and display_name.
89
        Handle modulestore access and caching. This method ignores permissions.
90 91 92 93 94

        Raises:
            InvalidKeyError: if the usage_key does not parse
            ItemNotFoundError: if there is no content that corresponds
                to this usage_key.
95 96
        """
        problem_store = modulestore()
97 98
        if usage_key not in state_keys_to_problem_info:
            problem = problem_store.get_item(usage_key)
99
            problem_info = (problem.url_name, problem.display_name_with_default)
100
            state_keys_to_problem_info[usage_key] = problem_info
101

102
        return state_keys_to_problem_info[usage_key]
103 104 105 106

    # Iterate through all problems submitted for this course in no particular
    # order, and build up our answer_counts dict that we will eventually return
    answer_counts = defaultdict(lambda: defaultdict(int))
107
    for module in StudentModule.all_submitted_problems_read_only(course_key):
108 109 110 111 112 113
        try:
            state_dict = json.loads(module.state) if module.state else {}
            raw_answers = state_dict.get("student_answers", {})
        except ValueError:
            log.error(
                "Answer Distribution: Could not parse module state for " +
114
                "StudentModule id={}, course={}".format(module.id, course_key)
115
            )
116
            continue
117

118
        try:
119
            url, display_name = url_and_display_name(module.module_state_key.map_into_course(course_key))
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
            # Each problem part has an ID that is derived from the
            # module.module_state_key (with some suffix appended)
            for problem_part_id, raw_answer in raw_answers.items():
                # Convert whatever raw answers we have (numbers, unicode, None, etc.)
                # to be unicode values. Note that if we get a string, it's always
                # unicode and not str -- state comes from the json decoder, and that
                # always returns unicode for strings.
                answer = unicode(raw_answer)
                answer_counts[(url, display_name, problem_part_id)][answer] += 1

        except (ItemNotFoundError, InvalidKeyError):
            msg = "Answer Distribution: Item {} referenced in StudentModule {} " + \
                  "for user {} in course {} not found; " + \
                  "This can happen if a student answered a question that " + \
                  "was later deleted from the course. This answer will be " + \
                  "omitted from the answer distribution CSV."
            log.warning(
                msg.format(module.module_state_key, module.id, module.student_id, course_key)
            )
            continue
140 141

    return answer_counts
142

143 144
@transaction.commit_manually
def grade(student, request, course, keep_raw_scores=False):
145
    """
146 147 148 149 150 151 152 153 154 155 156
    Wraps "_grade" with the manual_transaction context manager just in case
    there are unanticipated errors.
    """
    with manual_transaction():
        return _grade(student, request, course, keep_raw_scores)


def _grade(student, request, course, keep_raw_scores):
    """
    Unwrapped version of "grade"

157
    This grades a student as quickly as possible. It returns the
158 159
    output from the course grader, augmented with the final letter
    grade. The keys in the output are:
160

Victor Shnayder committed
161 162
    course: a CourseDescriptor

163 164 165
    - grade : A final letter grade.
    - percent : The final percent for the class (rounded up).
    - section_breakdown : A breakdown of each section that makes
166
      up the grade. (For display)
167
    - grade_breakdown : A breakdown of the major components that
168 169 170
      make up the final grade. (For display)
    - keep_raw_scores : if True, then value for key 'raw_scores' contains scores
      for every graded module
171

172
    More information on the format is in the docstring for CourseGrader.
173 174
    """
    grading_context = course.grading_context
175
    raw_scores = []
176

Will Daly committed
177 178 179
    # Dict of item_ids -> (earned, possible) point tuples. This *only* grabs
    # scores that were registered with the submissions API, which for the moment
    # means only openassessment (edx-ora2)
180 181 182
    submissions_scores = sub_api.get_scores(
        course.id.to_deprecated_string(), anonymous_id_for_user(student, course.id)
    )
Will Daly committed
183

184
    totaled_scores = {}
185 186 187
    # This next complicated loop is just to collect the totaled_scores, which is
    # passed to the grader
    for section_format, sections in grading_context['graded_sections'].iteritems():
188 189 190
        format_scores = []
        for section in sections:
            section_descriptor = section['section_descriptor']
191
            section_name = section_descriptor.display_name_with_default
192

193 194 195 196 197 198
            # some problems have state that is updated independently of interaction
            # with the LMS, so they need to always be scored. (E.g. foldit.,
            # combinedopenended)
            should_grade_section = any(
                descriptor.always_recalculate_grades for descriptor in section['xmoduledescriptors']
            )
199

Will Daly committed
200 201 202 203 204
            # If there are no problems that always have to be regraded, check to
            # see if any of our locations are in the scores from the submissions
            # API. If scores exist, we have to calculate grades for this section.
            if not should_grade_section:
                should_grade_section = any(
205
                    descriptor.location.to_deprecated_string() in submissions_scores
Will Daly committed
206 207 208
                    for descriptor in section['xmoduledescriptors']
                )

209 210 211 212
            if not should_grade_section:
                with manual_transaction():
                    should_grade_section = StudentModule.objects.filter(
                        student=student,
213
                        module_state_key__in=[
214 215 216
                            descriptor.location for descriptor in section['xmoduledescriptors']
                        ]
                    ).exists()
217

Will Daly committed
218 219
            # If we haven't seen a single problem in the section, we don't have
            # to grade it at all! We can assume 0%
220 221
            if should_grade_section:
                scores = []
Calen Pennington committed
222

223
                def create_module(descriptor):
224 225
                    '''creates an XModule instance given a descriptor'''
                    # TODO: We need the request to pass into here. If we could forego that, our arguments
226
                    # would be simpler
227 228
                    with manual_transaction():
                        field_data_cache = FieldDataCache([descriptor], course.id, student)
Calen Pennington committed
229
                    return get_module_for_descriptor(student, request, descriptor, field_data_cache, course.id)
Calen Pennington committed
230

231
                for module_descriptor in yield_dynamic_descriptor_descendents(section_descriptor, create_module):
Calen Pennington committed
232

Will Daly committed
233 234 235
                    (correct, total) = get_score(
                        course.id, student, module_descriptor, create_module, scores_cache=submissions_scores
                    )
236 237
                    if correct is None and total is None:
                        continue
238

Calen Pennington committed
239
                    if settings.GENERATE_PROFILE_SCORES:  	# for debugging!
240 241 242 243
                        if total > 1:
                            correct = random.randrange(max(total - 2, 1), total + 1)
                        else:
                            correct = total
244

Calen Pennington committed
245
                    graded = module_descriptor.graded
246 247 248
                    if not total > 0:
                        #We simply cannot grade a problem that is 12/0, because we might need it as a percentage
                        graded = False
249

250
                    scores.append(Score(correct, total, graded, module_descriptor.display_name_with_default))
251

252
                _, graded_total = graders.aggregate_scores(scores, section_name)
253 254
                if keep_raw_scores:
                    raw_scores += scores
255
            else:
256
                graded_total = Score(0.0, 1.0, True, section_name)
257

258 259 260
            #Add the graded total to totaled_scores
            if graded_total.possible > 0:
                format_scores.append(graded_total)
261
            else:
262
                log.info("Unable to grade a section with a total possible score of zero. " +
263
                              str(section_descriptor.location))
264

265
        totaled_scores[section_format] = format_scores
266

267
    grade_summary = course.grader.grade(totaled_scores, generate_random_scores=settings.GENERATE_PROFILE_SCORES)
268

269 270 271
    # We round the grade here, to make sure that the grade is an whole percentage and
    # doesn't get displayed differently than it gets grades
    grade_summary['percent'] = round(grade_summary['percent'] * 100 + 0.05) / 100
272

273 274
    letter_grade = grade_for_percentage(course.grade_cutoffs, grade_summary['percent'])
    grade_summary['grade'] = letter_grade
Calen Pennington committed
275
    grade_summary['totaled_scores'] = totaled_scores  	# make this available, eg for instructor download & debugging
276 277 278
    if keep_raw_scores:
        grade_summary['raw_scores'] = raw_scores        # way to get all RAW scores out to instructor
                                                        # so grader can be double-checked
279
    return grade_summary
280

Calen Pennington committed
281

282 283
def grade_for_percentage(grade_cutoffs, percentage):
    """
kimth committed
284
    Returns a letter grade as defined in grading_policy (e.g. 'A' 'B' 'C' for 6.002x) or None.
285

286 287 288 289 290
    Arguments
    - grade_cutoffs is a dictionary mapping a grade to the lowest
        possible percentage to earn that grade.
    - percentage is the final percent across all problems in a course
    """
291

292
    letter_grade = None
Calen Pennington committed
293

kimth committed
294 295 296
    # Possible grades, sorted in descending order of score
    descending_grades = sorted(grade_cutoffs, key=lambda x: grade_cutoffs[x], reverse=True)
    for possible_grade in descending_grades:
297 298 299
        if percentage >= grade_cutoffs[possible_grade]:
            letter_grade = possible_grade
            break
300 301

    return letter_grade
302

303

304 305 306 307 308 309 310 311 312 313
@transaction.commit_manually
def progress_summary(student, request, course):
    """
    Wraps "_progress_summary" with the manual_transaction context manager just
    in case there are unanticipated errors.
    """
    with manual_transaction():
        return _progress_summary(student, request, course)


314 315 316
# TODO: This method is not very good. It was written in the old course style and
# then converted over and performance is not good. Once the progress page is redesigned
# to not have the progress summary this method should be deleted (so it won't be copied).
317
def _progress_summary(student, request, course):
318
    """
319 320
    Unwrapped version of "progress_summary".

321
    This pulls a summary of all problems in the course.
322

323
    Returns
324 325 326 327
    - courseware_summary is a summary of all sections with problems in the course.
    It is organized as an array of chapters, each containing an array of sections,
    each containing an array of scores. This contains information for graded and
    ungraded problems, and is good for displaying a course summary with due dates,
328
    etc.
329

330 331
    Arguments:
        student: A User object for the student to grade
332
        course: A Descriptor containing the course to grade
Calen Pennington committed
333

334 335
    If the student does not have access to load the course module, this function
    will return None.
Calen Pennington committed
336

337
    """
338 339 340 341 342 343 344 345 346 347
    with manual_transaction():
        field_data_cache = FieldDataCache.cache_for_descriptor_descendents(
            course.id, student, course, depth=None
        )
        # TODO: We need the request to pass into here. If we could
        # forego that, our arguments would be simpler
        course_module = get_module_for_descriptor(student, request, course, field_data_cache, course.id)
        if not course_module:
            # This student must not have access to the course.
            return None
Calen Pennington committed
348

349
    submissions_scores = sub_api.get_scores(course.id.to_deprecated_string(), anonymous_id_for_user(student, course.id))
Will Daly committed
350

351
    chapters = []
352
    # Don't include chapters that aren't displayable (e.g. due to error)
353
    for chapter_module in course_module.get_display_items():
354
        # Skip if the chapter is hidden
Calen Pennington committed
355
        if chapter_module.hide_from_toc:
356
            continue
Calen Pennington committed
357

358
        sections = []
359

360
        for section_module in chapter_module.get_display_items():
361
            # Skip if the section is hidden
362 363 364
            with manual_transaction():
                if section_module.hide_from_toc:
                    continue
Calen Pennington committed
365

366 367
                graded = section_module.graded
                scores = []
Calen Pennington committed
368

369
                module_creator = section_module.xmodule_runtime.get_module
370

371 372
                for module_descriptor in yield_dynamic_descriptor_descendents(section_module, module_creator):
                    course_id = course.id
Will Daly committed
373 374 375
                    (correct, total) = get_score(
                        course_id, student, module_descriptor, module_creator, scores_cache=submissions_scores
                    )
376 377
                    if correct is None and total is None:
                        continue
378

379
                    scores.append(Score(correct, total, graded, module_descriptor.display_name_with_default))
380

381 382 383 384 385 386 387 388 389 390 391
                scores.reverse()
                section_total, _ = graders.aggregate_scores(
                    scores, section_module.display_name_with_default)

                module_format = section_module.format if section_module.format is not None else ''
                sections.append({
                    'display_name': section_module.display_name_with_default,
                    'url_name': section_module.url_name,
                    'scores': scores,
                    'section_total': section_total,
                    'format': module_format,
392
                    'due': get_extended_due_date(section_module),
393 394 395 396 397 398 399 400 401
                    'graded': graded,
                })

        chapters.append({
            'course': course.display_name_with_default,
            'display_name': chapter_module.display_name_with_default,
            'url_name': chapter_module.url_name,
            'sections': sections
        })
402

403
    return chapters
404

Will Daly committed
405 406

def get_score(course_id, user, problem_descriptor, module_creator, scores_cache=None):
407
    """
408
    Return the score for a user on a problem, as a tuple (correct, total).
409 410 411 412
    e.g. (5,7) if you got 5 out of 7 points.

    If this problem doesn't have a score, or we couldn't load it, returns (None,
    None).
413 414

    user: a Student object
415 416 417
    problem_descriptor: an XModuleDescriptor
    module_creator: a function that takes a descriptor, and returns the corresponding XModule for this user.
           Can return None if user doesn't have access, or if something else went wrong.
Will Daly committed
418 419
    scores_cache: A dict of location names to (earned, possible) point tuples.
           If an entry is found in this cache, it takes precedence.
420
    """
Will Daly committed
421 422
    scores_cache = scores_cache or {}

423 424 425
    if not user.is_authenticated():
        return (None, None)

426
    location_url = problem_descriptor.location.to_deprecated_string()
Will Daly committed
427 428 429
    if location_url in scores_cache:
        return scores_cache[location_url]

430 431
    # some problems have state that is updated independently of interaction
    # with the LMS, so they need to always be scored. (E.g. foldit.)
Victor Shnayder committed
432 433
    if problem_descriptor.always_recalculate_grades:
        problem = module_creator(problem_descriptor)
434 435
        if problem is None:
            return (None, None)
436 437 438
        score = problem.get_score()
        if score is not None:
            return (score['score'], score['total'])
Victor Shnayder committed
439 440 441
        else:
            return (None, None)

442
    if not problem_descriptor.has_score:
443
        # These are not problems, and do not have a score
444
        return (None, None)
445

446 447 448 449
    try:
        student_module = StudentModule.objects.get(
            student=user,
            course_id=course_id,
450
            module_state_key=problem_descriptor.location
451 452 453
        )
    except StudentModule.DoesNotExist:
        student_module = None
454

455 456 457
    if student_module is not None and student_module.max_grade is not None:
        correct = student_module.grade if student_module.grade is not None else 0
        total = student_module.max_grade
458
    else:
459 460
        # If the problem was not in the cache, or hasn't been graded yet,
        # we need to instantiate the problem.
461
        # Otherwise, the max score (cached in student_module) won't be available
462
        problem = module_creator(problem_descriptor)
463 464
        if problem is None:
            return (None, None)
465

466
        correct = 0.0
467 468
        total = problem.max_score()

469 470 471 472 473
        # Problem may be an error module (if something in the problem builder failed)
        # In which case total might be None
        if total is None:
            return (None, None)

474
    # Now we re-weight the problem, if specified
475
    weight = problem_descriptor.weight
476 477
    if weight is not None:
        if total == 0:
478
            log.exception("Cannot reweight a problem with zero total points. Problem: " + str(student_module))
479 480 481
            return (correct, total)
        correct = correct * weight / total
        total = weight
482 483

    return (correct, total)
484 485 486 487 488 489 490 491 492 493 494 495 496 497


@contextmanager
def manual_transaction():
    """A context manager for managing manual transactions"""
    try:
        yield
    except Exception:
        transaction.rollback()
        log.exception('Due to an error, this transaction has been rolled back')
        raise
    else:
        transaction.commit()

498

499 500 501 502 503
def iterate_grades_for(course_id, students):
    """Given a course_id and an iterable of students (User), yield a tuple of:

    (student, gradeset, err_msg) for every student enrolled in the course.

504
    If an error occurred, gradeset will be an empty dict and err_msg will be an
505 506 507 508 509 510 511 512 513 514
    exception message. If there was no error, err_msg is an empty string.

    The gradeset is a dictionary with the following fields:

    - grade : A final letter grade.
    - percent : The final percent for the class (rounded up).
    - section_breakdown : A breakdown of each section that makes
        up the grade. (For display)
    - grade_breakdown : A breakdown of the major components that
        make up the final grade. (For display)
515
    - raw_scores: contains scores for every graded module
516 517 518 519 520 521 522 523 524
    """
    course = courses.get_course_by_id(course_id)

    # We make a fake request because grading code expects to be able to look at
    # the request. We have to attach the correct user to the request before
    # grading that student.
    request = RequestFactory().get('/')

    for student in students:
525
        with dog_stats_api.timer('lms.grades.iterate_grades_for', tags=[u'action:{}'.format(course_id)]):
526 527
            try:
                request.user = student
528 529 530 531 532
                # Grading calls problem rendering, which calls masquerading,
                # which checks session vars -- thus the empty session dict below.
                # It's not pretty, but untangling that is currently beyond the
                # scope of this feature.
                request.session = {}
533 534
                gradeset = grade(student, request, course)
                yield student, gradeset, ""
535
            except Exception as exc:  # pylint: disable=broad-except
536
                # Keep marching on even if this student couldn't be graded for
537
                # some reason, but log it for future reference.
538 539 540 541 542 543 544 545
                log.exception(
                    'Cannot grade student %s (%s) in course %s because of exception: %s',
                    student.username,
                    student.id,
                    course_id,
                    exc.message
                )
                yield student, {}, exc.message