Commit a99fd080 by David Ormsbee Committed by Sarina Canelake

Fix error case where we have items in our grading csv output

that are not present in a given student's gradeset.

General code cleanup and addition of comments.

Instructor dashboard API unit tests.

LMS-58
parent e2423386
......@@ -769,7 +769,7 @@ class CourseEnrollment(models.Model):
activation_changed = True
mode_changed = False
# if mode is None, the call to update_enrollment didn't specify a new
# if mode is None, the call to update_enrollment didn't specify a new
# mode, so leave as-is
if self.mode != mode and mode is not None:
self.mode = mode
......@@ -967,6 +967,14 @@ class CourseEnrollment(models.Model):
def enrollments_for_user(cls, user):
return CourseEnrollment.objects.filter(user=user, is_active=1)
@classmethod
def users_enrolled_in(cls, course_id):
"""Return a queryset of User for every user enrolled in the course."""
return User.objects.filter(
courseenrollment__course_id=course_id,
courseenrollment__is_active=True
)
def activate(self):
"""Makes this `CourseEnrollment` record active. Saves immediately."""
self.update_enrollment(is_active=True)
......
......@@ -8,13 +8,12 @@ from collections import defaultdict
from django.conf import settings
from django.contrib.auth.models import User
from django.db import transaction
from django.core.handlers.base import BaseHandler
from django.test.client import RequestFactory
from dogapi import dog_stats_api
from courseware import courses
from courseware.model_data import FieldDataCache, DjangoKeyValueStore
from courseware.model_data import FieldDataCache
from xblock.fields import Scope
from xmodule import graders
from xmodule.capa_module import CapaModule
......@@ -431,6 +430,7 @@ def manual_transaction():
else:
transaction.commit()
def iterate_grades_for(course_id, students):
"""Given a course_id and an iterable of students (User), yield a tuple of:
......@@ -447,7 +447,7 @@ def iterate_grades_for(course_id, students):
up the grade. (For display)
- grade_breakdown : A breakdown of the major components that
make up the final grade. (For display)
- raw_scores contains scores for every graded module
- raw_scores: contains scores for every graded module
"""
course = courses.get_course_by_id(course_id)
......@@ -460,11 +460,16 @@ def iterate_grades_for(course_id, students):
with dog_stats_api.timer('lms.grades.iterate_grades_for', tags=['action:{}'.format(course_id)]):
try:
request.user = student
# Grading calls problem rendering, which calls masquerading,
# which checks session vars -- thus the empty session dict below.
# It's not pretty, but untangling that is currently beyond the
# scope of this feature.
request.session = {}
gradeset = grade(student, request, course)
yield student, gradeset, ""
except Exception as exc:
except Exception as exc: # pylint: disable=broad-except
# Keep marching on even if this student couldn't be graded for
# some reason.
# some reason, but log it for future reference.
log.exception(
'Cannot grade student %s (%s) in course %s because of exception: %s',
student.username,
......
......@@ -29,7 +29,6 @@ from courseware.models import StudentModule
# modules which are mocked in test cases.
import instructor_task.api
from instructor.access import allow_access
import instructor.views.api
from instructor.views.api import _split_input_list, _msk_from_problem_urlname, common_exceptions_400
......@@ -128,6 +127,8 @@ class TestInstructorAPIDenyLevels(ModuleStoreTestCase, LoginEnrollmentTestCase):
('send_email', {'send_to': 'staff', 'subject': 'test', 'message': 'asdf'}),
('list_instructor_tasks', {}),
('list_background_email_tasks', {}),
('list_grade_downloads', {}),
('calculate_grades_csv', {}),
]
# Endpoints that only Instructors can access
self.instructor_level_endpoints = [
......@@ -790,6 +791,50 @@ class TestInstructorAPILevelsDataDump(ModuleStoreTestCase, LoginEnrollmentTestCa
self.assertTrue(body.startswith('"User ID","Anonymized user ID"\n"2","42"\n'))
self.assertTrue(body.endswith('"7","42"\n'))
def test_list_grade_downloads(self):
url = reverse('list_grade_downloads', kwargs={'course_id': self.course.id})
with patch('instructor_task.models.LocalFSGradesStore.links_for') as mock_links_for:
mock_links_for.return_value = [
('mock_file_name_1', 'https://1.mock.url'),
('mock_file_name_2', 'https://2.mock.url'),
]
response = self.client.get(url, {})
expected_response = {
"downloads": [
{
"url": "https://1.mock.url",
"link": "<a href=\"https://1.mock.url\">mock_file_name_1</a>",
"name": "mock_file_name_1"
},
{
"url": "https://2.mock.url",
"link": "<a href=\"https://2.mock.url\">mock_file_name_2</a>",
"name": "mock_file_name_2"
}
]
}
res_json = json.loads(response.content)
self.assertEqual(res_json, expected_response)
def test_calculate_grades_csv_success(self):
url = reverse('calculate_grades_csv', kwargs={'course_id': self.course.id})
with patch('instructor_task.api.submit_calculate_grades_csv') as mock_cal_grades:
mock_cal_grades.return_value = True
response = self.client.get(url, {})
success_status = "Your grade report is being generated! You can view the status of the generation task in the 'Pending Instructor Tasks' section."
self.assertIn(success_status, response.content)
def test_calculate_grades_csv_already_running(self):
url = reverse('calculate_grades_csv', kwargs={'course_id': self.course.id})
with patch('instructor_task.api.submit_calculate_grades_csv') as mock_cal_grades:
mock_cal_grades.side_effect = AlreadyRunningError()
response = self.client.get(url, {})
already_running_status = "A grade report generation task is already in progress. Check the 'Pending Instructor Tasks' table for the status of the task. When completed, the report will be available for download in the table below."
self.assertIn(already_running_status, response.content)
def test_get_students_features_csv(self):
"""
Test that some minimum of information is formatted
......@@ -802,7 +847,7 @@ class TestInstructorAPILevelsDataDump(ModuleStoreTestCase, LoginEnrollmentTestCa
def test_get_distribution_no_feature(self):
"""
Test that get_distribution lists available features
when supplied no feature quparameter.
when supplied no feature parameter.
"""
url = reverse('get_distribution', kwargs={'course_id': self.course.id})
response = self.client.get(url)
......
......@@ -761,7 +761,7 @@ def list_grade_downloads(_request, course_id):
grades_store = GradesStore.from_config()
response_payload = {
'downloads' : [
'downloads': [
dict(name=name, url=url, link='<a href="{}">{}</a>'.format(url, name))
for name, url in grades_store.links_for(course_id)
]
......
......@@ -171,8 +171,8 @@ def _section_data_download(course_id, access):
'get_students_features_url': reverse('get_students_features', kwargs={'course_id': course_id}),
'get_anon_ids_url': reverse('get_anon_ids', kwargs={'course_id': course_id}),
'list_instructor_tasks_url': reverse('list_instructor_tasks', kwargs={'course_id': course_id}),
'list_grade_downloads_url' : reverse('list_grade_downloads', kwargs={'course_id' : course_id}),
'calculate_grades_csv_url' : reverse('calculate_grades_csv', kwargs={'course_id' : course_id}),
'list_grade_downloads_url': reverse('list_grade_downloads', kwargs={'course_id': course_id}),
'calculate_grades_csv_url': reverse('calculate_grades_csv', kwargs={'course_id': course_id}),
}
return section_data
......
......@@ -208,6 +208,7 @@ def submit_bulk_course_email(request, course_id, email_id):
task_key = hashlib.md5(task_key_stub).hexdigest()
return submit_task(request, task_type, task_class, course_id, task_input, task_key)
def submit_calculate_grades_csv(request, course_id):
"""
AlreadyRunningError is raised if the course's grades are already being updated.
......
......@@ -211,7 +211,15 @@ class GradesStore(object):
class S3GradesStore(GradesStore):
"""
Grades store backed by S3. The directory structure we use to store things
is::
`{bucket}/{root_path}/{sha1 hash of course_id}/filename`
We might later use subdirectories or metadata to do more intelligent
grouping and querying, but right now it simply depends on its own
conventions on where files are stored to know what to display. Clients using
this class can name the final file whatever they want.
"""
def __init__(self, bucket_name, root_path):
self.root_path = root_path
......@@ -224,13 +232,26 @@ class S3GradesStore(GradesStore):
@classmethod
def from_config(cls):
"""
The expected configuration for an `S3GradesStore` is to have a
`GRADES_DOWNLOAD` dict in settings with the following fields::
STORAGE_TYPE : "s3"
BUCKET : Your bucket name, e.g. "grades-bucket"
ROOT_PATH : The path you want to store all course files under. Do not
use a leading or trailing slash. e.g. "staging" or
"staging/2013", not "/staging", or "/staging/"
Since S3 access relies on boto, you must also define `AWS_ACCESS_KEY_ID`
and `AWS_SECRET_ACCESS_KEY` in settings.
"""
return cls(
settings.GRADES_DOWNLOAD['BUCKET'],
settings.GRADES_DOWNLOAD['ROOT_PATH']
)
def key_for(self, course_id, filename):
"""Return the key we would use to store and retrive the data for the
"""Return the S3 key we would use to store and retrive the data for the
given filename."""
hashed_course_id = hashlib.sha1(course_id)
......@@ -244,6 +265,16 @@ class S3GradesStore(GradesStore):
return key
def store(self, course_id, filename, buff):
"""
Store the contents of `buff` in a directory determined by hashing
`course_id`, and name the file `filename`. `buff` is typically a
`StringIO`, but can be anything that implements `.getvalue()`.
This method assumes that the contents of `buff` are gzip-encoded (it
will add the appropriate headers to S3 to make the decompression
transparent via the browser). Filenames should end in whatever
suffix makes sense for the original file, so `.txt` instead of `.gz`
"""
key = self.key_for(course_id, filename)
data = buff.getvalue()
......@@ -251,19 +282,26 @@ class S3GradesStore(GradesStore):
key.content_encoding = "gzip"
key.content_type = "text/csv"
# Just setting the content encoding and type above should work
# according to the docs, but when experimenting, this was necessary for
# it to actually take.
key.set_contents_from_string(
data,
headers={
"Content-Encoding" : "gzip",
"Content-Length" : len(data),
"Content-Type" : "text/csv",
"Content-Encoding": "gzip",
"Content-Length": len(data),
"Content-Type": "text/csv",
}
)
def store_rows(self, course_id, filename, rows):
"""
Given a course_id, filename, and rows (each row is an iterable of strings),
write this data out.
Given a `course_id`, `filename`, and `rows` (each row is an iterable of
strings), create a buffer that is a gzip'd csv file, and then `store()`
that buffer.
Even though we store it in gzip format, browsers will transparently
download and decompress it. Filenames should end in `.csv`, not `.gz`.
"""
output_buffer = StringIO()
gzip_file = GzipFile(fileobj=output_buffer, mode="wb")
......@@ -291,7 +329,11 @@ class LocalFSGradesStore(GradesStore):
"""
LocalFS implementation of a GradesStore. This is meant for debugging
purposes and is *absolutely not for production use*. Use S3GradesStore for
that.
that. We use this in tests and for local development. When it generates
links, it will make file:/// style links. That means you actually have to
copy them and open them in a separate browser window, for security reasons.
This lets us do the cheap thing locally for debugging without having to open
up a separate URL that would only be used to send files in dev.
"""
def __init__(self, root_path):
"""
......@@ -309,7 +351,10 @@ class LocalFSGradesStore(GradesStore):
that there is a dict in settings named GRADES_DOWNLOAD and that it has
a ROOT_PATH that maps to an absolute file path that the web app has
write permissions to. `LocalFSGradesStore` will create any intermediate
directories as needed.
directories as needed. Example::
STORAGE_TYPE : "localfs"
ROOT_PATH : /tmp/edx/grade-downloads/
"""
return cls(settings.GRADES_DOWNLOAD['ROOT_PATH'])
......@@ -344,7 +389,10 @@ class LocalFSGradesStore(GradesStore):
def links_for(self, course_id):
"""
For a given `course_id`, return a list of `(filename, url)` tuples. `url`
can be plugged straight into an href
can be plugged straight into an href. Note that `LocalFSGradesStore`
will generate `file://` type URLs, so you'll need to copy the URL and
open it in a new browser window. Again, this class is only meant for
local development.
"""
course_dir = self.path_to(course_id, '')
if not os.path.exists(course_dir):
......@@ -355,4 +403,4 @@ class LocalFSGradesStore(GradesStore):
for filename in os.listdir(course_dir)
],
reverse=True
)
\ No newline at end of file
)
......@@ -138,4 +138,4 @@ def calculate_grades_csv(entry_id, xmodule_instance_args):
"""
action_name = ugettext_noop('graded')
task_fn = partial(push_grades_to_s3, xmodule_instance_args)
return run_main_task(entry_id, task_fn, action_name)
\ No newline at end of file
return run_main_task(entry_id, task_fn, action_name)
......@@ -24,6 +24,7 @@ from courseware.models import StudentModule
from courseware.model_data import FieldDataCache
from courseware.module_render import get_module_for_descriptor_internal
from instructor_task.models import GradesStore, InstructorTask, PROGRESS
from student.models import CourseEnrollment
# define different loggers for use within tasks and on client side
TASK_LOG = get_task_logger(__name__)
......@@ -477,27 +478,19 @@ def push_grades_to_s3(_xmodule_instance_args, _entry_id, course_id, _task_input,
`GradesStore.from_config()`) and calling `link_for()` on it. Writes are
buffered, so we'll never write part of a CSV file to S3 -- i.e. any files
that are visible in GradesStore will be complete ones.
As we start to add more CSV downloads, it will probably be worthwhile to
make a more general CSVDoc class instead of building out the rows like we
do here.
"""
# Get start time for task:
start_time = datetime.now(UTC)
status_interval = 100
# The pre-fetching of groups is done to make auth checks not require an
# additional DB lookup (this kills the Progress page in particular).
# But when doing grading at this scale, the memory required to store the resulting
# enrolled_students is too large to fit comfortably in memory, and subsequent
# course grading requests lead to memory fragmentation. So we will err here on the
# side of smaller memory allocations at the cost of additional lookups.
enrolled_students = User.objects.filter(
courseenrollment__course_id=course_id,
courseenrollment__is_active=True
)
# perform the main loop
enrolled_students = CourseEnrollment.users_enrolled_in(course_id)
num_total = enrolled_students.count()
num_attempted = 0
num_succeeded = 0
num_failed = 0
num_total = enrolled_students.count()
curr_step = "Calculating Grades"
def update_task_progress():
......@@ -507,26 +500,27 @@ def push_grades_to_s3(_xmodule_instance_args, _entry_id, course_id, _task_input,
'action_name': action_name,
'attempted': num_attempted,
'succeeded': num_succeeded,
'failed' : num_failed,
'total' : num_total,
'failed': num_failed,
'total': num_total,
'duration_ms': int((current_time - start_time).total_seconds() * 1000),
'step' : curr_step,
'step': curr_step,
}
_get_current_task().update_state(state=PROGRESS, meta=progress)
return progress
# Loop over all our students and build a
# Loop over all our students and build our CSV lists in memory
header = None
rows = []
err_rows = [["id", "username", "error_msg"]]
for student, gradeset, err_msg in iterate_grades_for(course_id, enrolled_students):
# Periodically update task status (this is a db write)
# Periodically update task status (this is a cache write)
if num_attempted % status_interval == 0:
update_task_progress()
num_attempted += 1
if gradeset:
# We were able to successfully grade this student for this course.
num_succeeded += 1
if not header:
header = [section['label'] for section in gradeset[u'section_breakdown']]
......@@ -537,28 +531,37 @@ def push_grades_to_s3(_xmodule_instance_args, _entry_id, course_id, _task_input,
for section in gradeset[u'section_breakdown']
if 'label' in section
}
row_percents = [percents[label] for label in header]
# Not everybody has the same gradable items. If the item is not
# found in the user's gradeset, just assume it's a 0. The aggregated
# grades for their sections and overall course will be calculated
# without regard for the item they didn't have access to, so it's
# possible for a student to have a 0.0 show up in their row but
# still have 100% for the course.
row_percents = [percents.get(label, 0.0) for label in header]
rows.append([student.id, student.email, student.username, gradeset['percent']] + row_percents)
else:
# An empty gradeset means we failed to grade a student.
num_failed += 1
err_rows.append([student.id, student.username, err_msg])
# By this point, we've got the rows we're going to stuff into our CSV files.
curr_step = "Uploading CSVs"
update_task_progress()
grades_store = GradesStore.from_config()
# Generate parts of the file name
timestamp_str = start_time.strftime("%Y-%m-%d-%H%M")
TASK_LOG.debug("Uploading CSV files for course {}".format(course_id))
course_id_prefix = urllib.quote(course_id.replace("/", "_"))
# Perform the actual upload
grades_store = GradesStore.from_config()
grades_store.store_rows(
course_id,
"{}_grade_report_{}.csv".format(course_id_prefix, timestamp_str),
rows
)
# If there are any error rows (don't count the header), write that out as well
# If there are any error rows (don't count the header), write them out as well
if len(err_rows) > 1:
grades_store.store_rows(
course_id,
......
......@@ -195,10 +195,10 @@ MITX_FEATURES = {
# Grade calculation started from the new instructor dashboard will write
# grades CSV files to S3 and give links for downloads.
'ENABLE_S3_GRADE_DOWNLOADS' : True,
'ENABLE_S3_GRADE_DOWNLOADS': True,
# Give course staff unrestricted access to grade downloads (if set to False,
# only edX superusers can perform the downloads)
'ALLOW_COURSE_STAFF_GRADE_DOWNLOADS' : False,
'ALLOW_COURSE_STAFF_GRADE_DOWNLOADS': False,
}
# Used for A/B testing
......@@ -1071,11 +1071,11 @@ REGISTRATION_OPTIONAL_FIELDS = set([
'goals',
])
# Grades download
###################### Grade Downloads ######################
GRADES_DOWNLOAD_ROUTING_KEY = HIGH_MEM_QUEUE
GRADES_DOWNLOAD = {
'STORAGE_TYPE' : 'localfs',
'BUCKET' : 'edx-grades',
'ROOT_PATH' : '/tmp/edx-s3/grades',
'STORAGE_TYPE': 'localfs',
'BUCKET': 'edx-grades',
'ROOT_PATH': '/tmp/edx-s3/grades',
}
......@@ -283,13 +283,6 @@ EDX_API_KEY = None
####################### Shoppingcart ###########################
MITX_FEATURES['ENABLE_SHOPPING_CART'] = True
###################### Grade Downloads ######################
GRADES_DOWNLOAD = {
'STORAGE_TYPE' : 'localfs',
'BUCKET' : 'edx-grades',
'ROOT_PATH' : '/tmp/edx-s3/grades',
}
#####################################################################
# Lastly, see if the developer has any local overrides.
try:
......
......@@ -247,13 +247,6 @@ PASSWORD_HASHERS = (
# 'django.contrib.auth.hashers.CryptPasswordHasher',
)
###################### Grade Downloads ######################
GRADES_DOWNLOAD = {
'STORAGE_TYPE' : 'localfs',
'BUCKET' : 'edx-grades',
'ROOT_PATH' : '/tmp/edx-s3/grades',
}
################### Make tests quieter
# OpenID spews messages like this to stderr, we don't need to see them:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment