Commit 5e72cd6e by Ibrahim Awwal

Brain dump before jumping on the plane to Bangladesh. Broken but includes sort…

Brain dump before jumping on the plane to Bangladesh. Broken but includes sort of an integration test in demo.py so you can see how it works.
parent ae80ae0f
"""
TODO: Make this its own python egg
"""
__author__ = 'Ibrahim Awwal'
#from django.conf import settings
import requests
import slumber
import simplejson as json
# HOST = getattr(settings, 'GRADING_SERVICE_HOST', 'http://localhost:3000/')
# The client library *should* be django independent. Django client should override this value somehow.
HOST= 'http://localhost:3000/'
API = slumber.API(HOST)
class APIModel(object):
"""
A base class for making classes interact easily with a web API.
"""
# This is kind of a premature optimization, but it restricts objects to only have fields specified explicitly. Saves
# a lot of memory because the object doesn't need to have a dict, and I think that might be an issue with some pages
__slots__ = ['_id']
__attributes__ = []
__base_url__ = ""
__parent__ = None
def __init__(self, **kwargs):
self.update_attributes(**kwargs)
def get_parents(self):
p = self.__parent__
parents = []
while p:
parents.append(p)
p = p.__parent__
parents.reverse()
return parents
def url(self):
# I think the smart thing to do is to ask your parent for its base URL
if self.id:
return slumber.url_join(self.__base_url__, self.id)
else:
return self.__base_url__
def update_attributes(self, **kwargs):
if 'id' in kwargs:
self._id = int(kwargs['id'])
for attribute in self.__attributes__:
if attribute in kwargs:
setattr(self, attribute, kwargs[attribute])
def save(self):
# TODO: Think of a better way to handle nested resources, currently you have to manually set __base_url__
attributes = dict([(key, getattr(self,key, None)) for key in self.__attributes__ if hasattr(self, key)])
params = json.dumps({self.__class__.__name__.lower():attributes})
headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
post_url = slumber.url_join(HOST, self.url())
if self.id: # This object was retrieved from the service or otherwise persisted
response = requests.put(post_url, data=params, headers=headers)
else:
response = requests.post(post_url, data=params, headers=headers)
if response.status_code == 200:
self.update_attributes(**response.json)
else:
# TODO: handle errors
self.errors = response.json['errors']
return self
def delete(self):
url = slumber.url_join(HOST, self.url())
if self.id:
response = requests.delete(url)
if response.status_code == 200:
self._id = None
@property
def id(self):
"""
Returns the object's primary key, or None if it hasn't been persisted
"""
try:
return self._id
except AttributeError:
return None
class User(APIModel):
__attributes__ = ['name', 'external_id']
__slots__ = ['name', 'external_id', 'tasks']
__base_url__ = 'users'
def __init__(self, **kwargs):
self.update_attributes(**kwargs)
self.tasks = []
@staticmethod
def get_by_id(id):
return User(**API.users(id).get())
@staticmethod
def get_by_external_id(id):
return User(**API.users.get_by_external_id(id).get())
# def submissions(self):
# API.users(self.id).submissions.get()
class Question(APIModel):
__attributes__ = ['external_id', 'rubric_id', 'total_points']
__slots__ = __attributes__
__base_url__ = 'questions'
def submissions(self):
return [Submission(**data) for data in API.questions(id).submissions.get()]
@property
def grading_queue(self):
return GradingQueue(self, question_id=self.id)
@property
def grading_configuration(self):
if not self._grading_configuration:
# Try to query the service for the grading configuration
response = requests.get(slumber.url_join('question', self.id, 'grading_configuration'))
if response.status_code == 200:
self._grading_configuration = GradingConfiguration(**response.json)
else:
self._grading_configuration = GradingConfiguration(question_id=self.id)
return self._grading_configuration
@staticmethod
def get_by_id(id):
return Question(API.questions(id).get())
class Submission(APIModel):
__attributes__ = ['question_id', 'user_id', 'external_id']
__slots__ = __attributes__
__base_url__ = 'submissions'
def evaluations(self, start=None, end=None):
# TODO: Support pagination
evaluations_url = slumber.url_join(self.url(), 'evaluations')
response = requests.get(evaluations_url)
if response.status_code == 200:
self.evaluations = [Evaluation(**data) for data in response.json]
def url(self):
if self.id:
return slumber.url_join('questions', self.question_id, 'submissions', self.id)
else:
return slumber.url_join('questions', self.question_id, 'submissions')
@staticmethod
def get_by_question_id_and_id(question_id, id):
return Submission(**API.questions(question_id).submissions(id).get())
class Rubric(APIModel):
__attributes__ = ['rubric_type', 'title', 'total_points', 'published']
__slots__ = ['rubric_type', 'title', 'total_points', 'published', 'entries']
__base_url__ = 'rubrics'
def __init__(self, **kwargs):
entries = kwargs.pop('entries', None)
self.update_attributes(**kwargs)
if entries:
self.entries = [RubricEntry(data) for data in entries]
else:
self.entries = []
@staticmethod
def get_by_id(id):
return Rubric(API.rubrics(id).get())
def save(self):
APIModel.save(self)
for entry in self.entries:
entry.save()
return self
def add_entry(self, weight, description, explanation=''):
"""
Adds an entry to this rubric but does not save it. Rubric must already exist
"""
entry = RubricEntry(rubric_id=self.id, weight=weight, description=description, explanation=explanation)
self.entries.append(entry)
return entry
class RubricEntry(APIModel):
__attributes__ = ['rubric_id', 'description', 'explanation', 'weight']
__slots__ = __attributes__
__base_url__ = 'entries'
def url(self):
if self.id:
return slumber.url_join('rubrics', self.rubric_id, 'entries', self.id)
else:
return slumber.url_join('rubrics', self.rubric_id, 'entries')
class GradingConfiguration(APIModel):
__attributes__ = ['due_date', 'evaluations_per_submission', 'evaluations_per_grader',
'open_date', 'priority_weights', 'question_id', 'training_exercises_required']
__slots__ = __attributes__
__base_url__ = 'grading_configuration'
def url(self):
return slumber.url_join('question', self.question_id, 'grading_configuration')
class Group(APIModel):
__attributes__ = ['title']
__slots__ = __attributes__
__base_url__ = 'groups'
def __init__(self, **kwargs):
memberships = kwargs.pop('memberships', None)
self.update_attributes(**kwargs)
if memberships:
print memberships
self.memberships = [GroupMembership(**data) for data in memberships]
else:
self.memberships = []
def get_memberships(self):
memberships = API.groups(self.id).memberships.get()
self.memberships = [GroupMembership(**data) for data in memberships]
def add_user(self, user):
return GroupMembership(**API.groups(self.id).memberships.post(user_id=user.id))
def remove_user(self, user):
"""
The proper way to remove a user from a group would be to have the membership_id ahead of time by eg. clicking on
a user from a list. This operation could be done much more easily on the service side but it would make the
controller less RESTful.
"""
membership_id = next((x.id for x in self.memberships if x.user_id == user.id), None)
if membership_id:
API.groups(self.id).memberships(membership_id).delete()
@staticmethod
def get_by_id(id, include_members=False):
g = Group(**API.groups(id).get(include_members=('1' if include_members else '0')))
return g
class GroupMembership(APIModel):
__attributes__ = ['user_id', 'group_id', 'name']
__slots__ = __attributes__
__base_url__ = 'memberships'
class GroupRole(APIModel):
__attributes__ = ['grading_configuration_id', 'group_id', 'role']
__slots__ = __attributes__
__base_url__ = 'group_roles'
class Example(APIModel):
__attributes__ = ['grading_configuration_id', 'submission_id', 'user_id']
__slots__ = __attributes__
__base_url__ = 'examples'
class Evaluation(APIModel):
__attributes__ = ['rubric_id', 'user_id', 'submission_id', 'comments', 'offset']
__slots__ = __attributes__
class RubricEntryValue(APIModel):
__attributes__ = ['rubric_entry_id', 'evaluation_id', 'present']
__slots__ = __attributes__
class Task(APIModel):
__attributes__ = ['grader_id', 'submission_id', 'question_id', 'completed']
__slots__ = __attributes__
class GradingQueue(APIModel):
__attributes__ = ['question_id']
def __init__(self, question, grading_configuration, **kwargs):
self.question = question
def request_work_for_user(self, user):
url = slumber.url_join('questions', self.question_id, 'grading_queue', 'request_work')
params = {'user_id':user.id}
response = requests.post(url, params)
if response.status_code == 200:
if len(response.json)>0:
return [ Task(**data) for data in response.json ]
#!/usr/bin/env python
import numpy as np
from random import random
import datetime
from grading_client.api import *
# Create 30 local students, 100 remote students, 2 instructors, and 5 graders.
num_local, num_remote, num_instructors, num_graders = (30, 100, 2, 5)
local_students = [User(name="Student %d"%x, external_id="calx:%d"%(x+2000)).save() for x in xrange(num_local)]
remote_students = [User(name="Student %d"%x, external_id="edx:%d"%(x+1000)).save() for x in xrange(num_remote)]
instructors = [User(name="Instructor %d"%x, external_id="edx:%d"%x).save() for x in xrange(num_instructors)]
graders = [User(name="Grader %d"%x, external_id="edx:%d"%(x+100)).save() for x in xrange(num_graders)]
# Create 5 questions
num_questions = 5
questions = {}
group_names = ['local', 'remote1']
for variant in group_names:
questions[variant] = [Question(external_id="calx_q:%d"%x, total_points=2, due_date=datetime.datetime.now()).save() for x in xrange(num_questions)]
# Submit submissions for all users
# Keep track of a "ground-truth" value for the scoring somehow
# Each question has 3 rubric items, worth 0, 1, and 1 (the 1s are independent of each other, the 0 is obviously not)
local_submissions = {}
# local_submissions_true_scores = np.ndarray((num_local, num_questions, 3), dtype=np.bool)
local_true_scores = {}
for question in questions['local']:
local_submissions[question] = [(Submission(question_id=question.id, user_id=user.id, external_id="calx_s:%d"%(user.id+1000*question.id))) for user in local_students]
for submission in local_submissions[question]:
submission.save()
m1 = (random() > 0.8)
m2 = (random() > 0.7)
correct = not (m1 or m2)
local_true_scores[submission.id] = (m1, m2, correct)
# for user_index in xrange(num_local):
# for question_index in xrange(num_questions):
# # Randomly assign true evaluations
# m1 = (random() > 0.8)
# m2 = (random() > 0.7)
# correct = not (m1 or m2)
# local_submissions_true_scores[user_index][question_index] = (m1, m2, correct)
remote_submissions = {}
#remote_submissions_true_scores = np.ndarray((num_remote, num_questions, 3), dtype=np.bool)
for question in questions['remote1']:
remote_submissions[question] = [Submission(question_id=question.id, user_id=user.id, external_id="edx_s:%d"%(user.id+1000*question.id)) for user in remote_students]
for submission in remote_submissions[question]:
submission.save()
# Instructor creates rubric
rubric = Rubric(rubric_type=1, title='My Rubric', total_points = 2).save()
rubric.add_entry(1, 'Mistake 1')
rubric.add_entry(2, 'Mistake 2')
rubric.add_entry(0, 'Perfect')
rubric.save() # Saves all the entries
# Instructor 1 evaluated some local submissions in the process of creating rubric
# This doesn't quite get the interleaving of rubric creation and evaluation, but
# it shouldn't matter in practice
inst1 = instructors[0]
instructor_evals = []
for question in questions['local']:
for submission in local_submissions[question][:5]:
entries_dict = { entry.id:value for entry, value in zip(rubric.entries, local_true_scores[submission.id]) }
evaluation = rubric.build_evaluation(user_id=inst1.id, submission_id=submission.id, entry_values=entries_dict)
evaluation.save()
instructor_evals.append(evaluation)
local_configurations = [question.grading_configuration for question in questions['local']]
# Create group for graders
grader_group = Group(title='Local Graders').save()
for user in graders:
grader_group.add_user(user)
# Configure grading for readers
for config in local_configurations:
config.evaluations_per_submission = 1
config.evaluations_per_grader = num_local / num_graders
config.training_exercises_required = 0
config.open_date = datetime.datetime.now()
config.due_date = datetime.datetime.now() # TODO FIX
config.save()
role = GroupRole(group_id=grader_group.id,grading_configuration_id=config.id,role=1)
role.save()
# Now readers sign in and get work. Readers are also accurate in grading.
queue = question.grading_queue
for user in graders:
for question, config in zip(questions['local'], local_configurations):
tasks = queue.request_work_for_user(user)
for task in tasks:
submission = Submission.get_by_question_id_and_id(question.id, task.submission_id)
entries_dict = { entry.id:value for entry, value in zip(rubric.entries, local_true_scores[submission.id]) }
evaluation = rubric.build_evaluation(user_id=user.id, submission_id=submission.id, entry_values=entries_dict)
evaluation.save()
Notes on refactoring grading queue
* Routes:
resources :users do
get 'get_by_external_id', :on => :collection
end
resources :rubrics do
resources :entries, :controller => :rubric_entries
end
resources :groups do
resources :memberships, :controller => :group_memberships, :only=>[:create, :destroy, :index] do
delete 'by_user_id', :on=> :collection
end
end
resources :questions do
get 'get_by_external_id', :on => :collection
resources :submissions do
resources :evaluations
get 'last', :on => :collection
end
# This is an evil bit of trickery to require a user_id in the url. This might be a Bad Thing
# TODO: move into params[]
# TODO Document not obvious things here
resources :users, :only=>[] do
resources :tasks do
post 'request_work', :on => :collection
end
end
resource :grading_configuration, :only=>[:update, :show] do
resources :examples
resources :group_roles
end
end
* Potential use case/workflow/story:
Flipped classroom model, CS188 being taught online
Local students are graded by readers, rubric is created by instructors by flipping through
Readers are assigned local students' submissions to grade - each student gets graded by 1 reader
Online students then peer grade themselves, grouped by language (let's say we have English speakers and Mandarin speakers, for instance)
* Instructors - Before Submission deadline
** Create Users (just used as a reference, don't need to authenticate)
** Create Questions (possibly multiple questions for one external question)
** Configure groups of submitters and graders for each group
*** TODO Come up with a better name than GroupRole
** Configure parameters such as weights and number of gradings needed
* Local Students - Before deadline
** Accept submissions via some sort of hook
*** Can submit via external ID and have them sorted into the proper question for the user
**** Putting the same user in submitter groups for multiple questions is an undefined, but allowed operation (probably not hard to disallow it completely though)
* Instructors - Post submission deadline
** Grade any submissions they want, creating a rubric as they go along
** TODO Browse submissions easily, by user - edX side
*** Important at this phase that multiple instructors can grade independently and the system tracs all evaluations
*** TODO Notify instructors of other instructors' rubric additions (?) (maybe we just assume that instructors create a rubric together)
*** TODO Notify instructors of other instructors' evaluations, potentially even track in real time ("Another instructor is grading this submission")
*** TODO Need service calls for "Begin Evaluation" and "End Evaluation"
**** Could use some nice in-memory storage option for this (perhaps redis?) [Perhaps a redis-set of current graders]
Model: CurrentGraders=>(submission:references, graderset:redis_set)
Graders Set stores user_ids of current graders
API: /questions/1/submissions/12/current_graders/{index,create,destroy}
**** Some sort of websocket or other ongoing connection, or just poll periodically for current graders
* Once a rubric is created and finalized, instructors open grading for readers
** Readers could potentially come up with the rubric themselves too.
Rubric permissions: We can either restrict rubric editing to admins, or allow graders to edit as well
The other model is to just allow anyone to edit the rubric but have a lock toggle so that instructors can lock it once it's finalized
** Graders open the grading page, see that they have X things left to grade, and can immediately start grading by question
*** TODO Grader todo pages
** Instructors can monitor progress of graders and send them some sort of message to hurry up
*** TODO Things left to grade view
*** TODO Graders view
The table I had before would show you both what submissions need grading and
what students need to finish but in the case of asymmetric grading this
doesn't make sense
* After grading period is closed, grades are finalized
Could run inference algorithm here
Instructor can see results, export data, etc.
** Instructor manually approves grades to be shown (edX)
* Local students can view grades, view stats, submit regrade requests
** TODO Figure out where regrade requests go
* Instructor - for remote class
** Sets up grading configurations per group (using some tool)
** Selects training examples from the pool of locally graded submissions
* Remote students submit
** Service sorts their submission into the appropriate bins
** New questions have the same rubric as the local question
* After remote submission deadline
** Can't submit anymore
* Peer grading for remote students begins
** Students have presumably provided some information by which we have grouped them
** Students go to a page to see what they have to do
** If not finished training, give training examples
*** TODO Structure training in such a way that you don't have to persist the evaluation, just keep track of how many they finished
** Once training completed, students can ask for work
Do we know what service Question they belong to? Does it matter? What does this call look like from the client?
They get the same UI that local graders got
Pop submissions off of the right priority queue
* TODO Benchmark redis performance (for eg. thousands of insertions, priority updates, etc)
* Pseudo-code using a hypothetical API (basically the API I would want to have)
** See demo.py
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment