Brain dump before jumping on the plane to Bangladesh. Broken but includes sort…

Brain dump before jumping on the plane to Bangladesh. Broken but includes sort of an integration test in demo.py so you can see how it works.

Brain dump before jumping on the plane to Bangladesh. Broken but includes sort…
Brain dump before jumping on the plane to Bangladesh. Broken but includes sort of an integration test in demo.py so you can see how it works.
5e72cd6e · Ibrahim Awwal · ae80ae0f · 5e72cd6e · 5e72cd6e · 5e72cd6e
Commit 5e72cd6e authored Jul 26, 2012 by Ibrahim Awwal
Hide whitespace changes
Inline Side-by-side

Showing with 486 additions and 0 deletions

common/lib/grading_client/__init__.py
+5 -0

common/lib/grading_client/api.py
+265 -0

common/lib/grading_client/demo.py
+104 -0

common/lib/grading_client/notes.org
+112 -0

No files found.
--- a/common/lib/grading_client/__init__.py
+++ b/common/lib/grading_client/__init__.py
+"""
+TODO: Make this its own python egg
+"""
+__author__ = 'Ibrahim Awwal'
--- a/common/lib/grading_client/api.py
+++ b/common/lib/grading_client/api.py
+#from django.conf import settings
+import requests
+import slumber
+import simplejson as json
+# HOST = getattr(settings, 'GRADING_SERVICE_HOST', 'http://localhost:3000/')
+# The client library *should* be django independent. Django client should override this value somehow.
+HOST= 'http://localhost:3000/'
+API = slumber.API(HOST)
+class APIModel(object):
+    """
+    A base class for making classes interact easily with a web API.
+    """
+    # This is kind of a premature optimization, but it restricts objects to only have fields specified explicitly. Saves
+    # a lot of memory because the object doesn't need to have a dict, and I think that might be an issue with some pages
+    __slots__ = ['_id']
+    __attributes__ = []
+    __base_url__ = ""
+    __parent__ = None
+    def __init__(self, **kwargs):
+        self.update_attributes(**kwargs)
+    def get_parents(self):
+        p = self.__parent__
+        parents = []
+        while p:
+            parents.append(p)
+            p = p.__parent__
+        parents.reverse()
+        return parents
+    def url(self):
+        # I think the smart thing to do is to ask your parent for its base URL
+        if self.id:
+            return slumber.url_join(self.__base_url__, self.id)
+        else:
+            return self.__base_url__
+    def update_attributes(self, **kwargs):
+        if 'id' in kwargs:
+            self._id = int(kwargs['id'])
+        for attribute in self.__attributes__:
+            if attribute in kwargs:
+                setattr(self, attribute, kwargs[attribute])
+    def save(self):
+        # TODO: Think of a better way to handle nested resources, currently you have to manually set __base_url__
+        attributes = dict([(key, getattr(self,key, None)) for key in self.__attributes__ if hasattr(self, key)])
+        params = json.dumps({self.__class__.__name__.lower():attributes})
+        headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
+        post_url = slumber.url_join(HOST, self.url())
+        if self.id: # This object was retrieved from the service or otherwise persisted
+            response = requests.put(post_url, data=params, headers=headers)
+        else:
+            response = requests.post(post_url, data=params, headers=headers)
+        if response.status_code == 200:
+            self.update_attributes(**response.json)
+        else:
+            # TODO: handle errors
+            self.errors = response.json['errors']
+        return self
+    def delete(self):
+        url = slumber.url_join(HOST, self.url())
+        if self.id:
+            response = requests.delete(url)
+            if response.status_code == 200:
+                self._id = None
+    @property
+    def id(self):
+        """
+        Returns the object's primary key, or None if it hasn't been persisted
+        """
+        try:
+            return self._id
+        except AttributeError:
+            return None
+class User(APIModel):
+    __attributes__ = ['name', 'external_id']
+    __slots__ = ['name', 'external_id', 'tasks']
+    __base_url__ = 'users'
+    def __init__(self, **kwargs):
+        self.update_attributes(**kwargs)
+        self.tasks = []
+    @staticmethod
+    def get_by_id(id):
+        return User(**API.users(id).get())
+    @staticmethod
+    def get_by_external_id(id):
+        return User(**API.users.get_by_external_id(id).get())
+#    def submissions(self):
+#        API.users(self.id).submissions.get()
+class Question(APIModel):
+    __attributes__ = ['external_id', 'rubric_id', 'total_points']
+    __slots__ = __attributes__
+    __base_url__ = 'questions'
+    def submissions(self):
+        return [Submission(**data) for data in API.questions(id).submissions.get()]
+    @property
+    def grading_queue(self):
+        return GradingQueue(self, question_id=self.id)
+    @property
+    def grading_configuration(self):
+        if not self._grading_configuration:
+            # Try to query the service for the grading configuration
+            response = requests.get(slumber.url_join('question', self.id, 'grading_configuration'))
+            if response.status_code == 200:
+                self._grading_configuration = GradingConfiguration(**response.json)
+            else:
+                self._grading_configuration = GradingConfiguration(question_id=self.id)
+        return self._grading_configuration
+    @staticmethod
+    def get_by_id(id):
+        return Question(API.questions(id).get())
+class Submission(APIModel):
+    __attributes__ = ['question_id', 'user_id', 'external_id']
+    __slots__ = __attributes__
+    __base_url__ = 'submissions'
+    def evaluations(self, start=None, end=None):
+        # TODO: Support pagination
+        evaluations_url = slumber.url_join(self.url(), 'evaluations')
+        response = requests.get(evaluations_url)
+        if response.status_code == 200:
+            self.evaluations = [Evaluation(**data) for data in response.json]
+    def url(self):
+        if self.id:
+            return slumber.url_join('questions', self.question_id, 'submissions', self.id)
+        else:
+            return slumber.url_join('questions', self.question_id, 'submissions')
+    @staticmethod
+    def get_by_question_id_and_id(question_id, id):
+        return Submission(**API.questions(question_id).submissions(id).get())
+class Rubric(APIModel):
+    __attributes__ = ['rubric_type', 'title', 'total_points', 'published']
+    __slots__ = ['rubric_type', 'title', 'total_points', 'published', 'entries']
+    __base_url__ = 'rubrics'
+    def __init__(self, **kwargs):
+        entries = kwargs.pop('entries', None)
+        self.update_attributes(**kwargs)
+        if entries:
+            self.entries = [RubricEntry(data) for data in entries]
+        else:
+            self.entries = []
+    @staticmethod
+    def get_by_id(id):
+        return Rubric(API.rubrics(id).get())
+    def save(self):
+        APIModel.save(self)
+        for entry in self.entries:
+            entry.save()
+        return self
+    def add_entry(self, weight, description, explanation=''):
+        """
+        Adds an entry to this rubric but does not save it. Rubric must already exist
+        """
+        entry = RubricEntry(rubric_id=self.id, weight=weight, description=description, explanation=explanation)
+        self.entries.append(entry)
+        return entry
+class RubricEntry(APIModel):
+    __attributes__ = ['rubric_id', 'description', 'explanation', 'weight']
+    __slots__ = __attributes__
+    __base_url__ = 'entries'
+    def url(self):
+        if self.id:
+            return slumber.url_join('rubrics', self.rubric_id, 'entries', self.id)
+        else:
+            return slumber.url_join('rubrics', self.rubric_id, 'entries')
+class GradingConfiguration(APIModel):
+    __attributes__ = ['due_date', 'evaluations_per_submission', 'evaluations_per_grader',
+                      'open_date', 'priority_weights', 'question_id', 'training_exercises_required']
+    __slots__ = __attributes__
+    __base_url__ = 'grading_configuration'
+    def url(self):
+        return slumber.url_join('question', self.question_id, 'grading_configuration')
+class Group(APIModel):
+    __attributes__ = ['title']
+    __slots__ = __attributes__
+    __base_url__ = 'groups'
+    def __init__(self, **kwargs):
+        memberships = kwargs.pop('memberships', None)
+        self.update_attributes(**kwargs)
+        if memberships:
+            print memberships
+            self.memberships = [GroupMembership(**data) for data in memberships]
+        else:
+            self.memberships = []
+    def get_memberships(self):
+        memberships = API.groups(self.id).memberships.get()
+        self.memberships = [GroupMembership(**data) for data in memberships]
+    def add_user(self, user):
+        return GroupMembership(**API.groups(self.id).memberships.post(user_id=user.id))
+    def remove_user(self, user):
+        """
+        The proper way to remove a user from a group would be to have the membership_id ahead of time by eg. clicking on
+        a user from a list. This operation could be done much more easily on the service side but it would make the
+        controller less RESTful.
+        """
+        membership_id = next((x.id for x in self.memberships if x.user_id == user.id), None)
+        if membership_id:
+            API.groups(self.id).memberships(membership_id).delete()
+    @staticmethod
+    def get_by_id(id, include_members=False):
+        g = Group(**API.groups(id).get(include_members=('1' if include_members else '0')))
+        return g
+class GroupMembership(APIModel):
+    __attributes__ = ['user_id', 'group_id', 'name']
+    __slots__ = __attributes__
+    __base_url__ = 'memberships'
+class GroupRole(APIModel):
+    __attributes__ = ['grading_configuration_id', 'group_id', 'role']
+    __slots__ = __attributes__
+    __base_url__ = 'group_roles'
+class Example(APIModel):
+    __attributes__ = ['grading_configuration_id', 'submission_id', 'user_id']
+    __slots__ = __attributes__
+    __base_url__ = 'examples'
+class Evaluation(APIModel):
+    __attributes__ = ['rubric_id', 'user_id', 'submission_id', 'comments', 'offset']
+    __slots__ = __attributes__
+class RubricEntryValue(APIModel):
+    __attributes__ = ['rubric_entry_id', 'evaluation_id', 'present']
+    __slots__ = __attributes__
+class Task(APIModel):
+    __attributes__ = ['grader_id', 'submission_id', 'question_id', 'completed']
+    __slots__ = __attributes__
+class GradingQueue(APIModel):
+    __attributes__ = ['question_id']
+    def __init__(self, question, grading_configuration, **kwargs):
+        self.question = question
+    def request_work_for_user(self, user):
+        url = slumber.url_join('questions', self.question_id, 'grading_queue', 'request_work')
+        params = {'user_id':user.id}
+        response = requests.post(url, params)
+        if response.status_code == 200:
+            if len(response.json)>0:
+                return [ Task(**data) for data in response.json ]
--- a/common/lib/grading_client/demo.py
+++ b/common/lib/grading_client/demo.py
+#!/usr/bin/env python
+import numpy as np
+from random import random
+import datetime
+from grading_client.api import *
+# Create 30 local students, 100 remote students, 2 instructors, and 5 graders.
+num_local, num_remote, num_instructors, num_graders = (30, 100, 2, 5)
+local_students = [User(name="Student %d"%x, external_id="calx:%d"%(x+2000)).save() for x in xrange(num_local)]
+remote_students = [User(name="Student %d"%x, external_id="edx:%d"%(x+1000)).save() for x in xrange(num_remote)]
+instructors = [User(name="Instructor %d"%x, external_id="edx:%d"%x).save() for x in xrange(num_instructors)]
+graders = [User(name="Grader %d"%x, external_id="edx:%d"%(x+100)).save() for x in xrange(num_graders)]
+# Create 5 questions
+num_questions = 5
+questions = {}
+group_names = ['local', 'remote1']
+for variant in group_names:
+  questions[variant] = [Question(external_id="calx_q:%d"%x, total_points=2, due_date=datetime.datetime.now()).save() for x in xrange(num_questions)]
+# Submit submissions for all users
+# Keep track of a "ground-truth" value for the scoring somehow
+# Each question has 3 rubric items, worth 0, 1, and 1 (the 1s are independent of each other, the 0 is obviously not)
+local_submissions = {}
+# local_submissions_true_scores = np.ndarray((num_local, num_questions, 3), dtype=np.bool)
+local_true_scores = {}
+for question in questions['local']:
+  local_submissions[question] = [(Submission(question_id=question.id, user_id=user.id, external_id="calx_s:%d"%(user.id+1000*question.id))) for user in local_students]
+  for submission in local_submissions[question]:
+    submission.save()
+    m1 = (random() > 0.8)
+    m2 = (random() > 0.7)
+    correct = not (m1 or m2)
+    local_true_scores[submission.id] = (m1, m2, correct)
+# for user_index in xrange(num_local):
+#   for question_index in xrange(num_questions):
+#     # Randomly assign true evaluations
+#     m1 = (random() > 0.8)
+#     m2 = (random() > 0.7)
+#     correct = not (m1 or m2)
+#     local_submissions_true_scores[user_index][question_index] = (m1, m2, correct)
+remote_submissions = {}
+#remote_submissions_true_scores = np.ndarray((num_remote, num_questions, 3), dtype=np.bool)
+for question in questions['remote1']:
+  remote_submissions[question] = [Submission(question_id=question.id, user_id=user.id, external_id="edx_s:%d"%(user.id+1000*question.id)) for user in remote_students]
+  for submission in remote_submissions[question]:
+    submission.save()
+# Instructor creates rubric
+rubric = Rubric(rubric_type=1, title='My Rubric', total_points = 2).save()
+rubric.add_entry(1, 'Mistake 1')
+rubric.add_entry(2, 'Mistake 2')
+rubric.add_entry(0, 'Perfect')
+rubric.save() # Saves all the entries
+# Instructor 1 evaluated some local submissions in the process of creating rubric
+# This doesn't quite get the interleaving of rubric creation and evaluation, but
+# it shouldn't matter in practice
+inst1 = instructors[0]
+instructor_evals = []
+for question in questions['local']:
+  for submission in local_submissions[question][:5]:
+    entries_dict = { entry.id:value for entry, value in zip(rubric.entries, local_true_scores[submission.id]) }
+    evaluation = rubric.build_evaluation(user_id=inst1.id, submission_id=submission.id, entry_values=entries_dict)
+    evaluation.save()
+    instructor_evals.append(evaluation)
+local_configurations = [question.grading_configuration for question in questions['local']]
+# Create group for graders
+grader_group = Group(title='Local Graders').save()
+for user in graders:
+  grader_group.add_user(user)
+# Configure grading for readers
+for config in local_configurations:
+  config.evaluations_per_submission = 1
+  config.evaluations_per_grader = num_local / num_graders
+  config.training_exercises_required = 0
+  config.open_date = datetime.datetime.now()
+  config.due_date = datetime.datetime.now() # TODO FIX
+  config.save()
+  role = GroupRole(group_id=grader_group.id,grading_configuration_id=config.id,role=1)
+  role.save()
+# Now readers sign in and get work. Readers are also accurate in grading.
+queue = question.grading_queue
+for user in graders:
+  for question, config in zip(questions['local'], local_configurations):
+    tasks = queue.request_work_for_user(user)
+    for task in tasks:
+      submission = Submission.get_by_question_id_and_id(question.id, task.submission_id)    
+      entries_dict = { entry.id:value for entry, value in zip(rubric.entries, local_true_scores[submission.id]) }
+      evaluation = rubric.build_evaluation(user_id=user.id, submission_id=submission.id, entry_values=entries_dict)
+      evaluation.save()
--- a/common/lib/grading_client/notes.org
+++ b/common/lib/grading_client/notes.org
+Notes on refactoring grading queue
+* Routes:
+  resources :users do
+    get 'get_by_external_id', :on => :collection
+  end
+  resources :rubrics do
+    resources :entries, :controller => :rubric_entries
+  end
+  resources :groups do
+    resources :memberships, :controller => :group_memberships, :only=>[:create, :destroy, :index] do
+      delete 'by_user_id', :on=> :collection
+    end
+  end
+  resources :questions do
+    get 'get_by_external_id', :on => :collection
+    resources :submissions do
+      resources :evaluations
+      get 'last', :on => :collection
+    end
+    # This is an evil bit of trickery to require a user_id in the url. This might be a Bad Thing
+    # TODO: move into params[]
+    # TODO Document not obvious things here
+    resources :users, :only=>[] do
+      resources :tasks do
+        post 'request_work', :on => :collection
+      end
+    end
+    resource :grading_configuration, :only=>[:update, :show] do
+      resources :examples
+      resources :group_roles
+    end
+  end
+* Potential use case/workflow/story:
+  Flipped classroom model, CS188 being taught online
+  Local students are graded by readers, rubric is created by instructors by flipping through
+  Readers are assigned local students' submissions to grade - each student gets graded by 1 reader
+  Online students then peer grade themselves, grouped by language (let's say we have English speakers and Mandarin speakers, for instance)
+* Instructors - Before Submission deadline
+** Create Users (just used as a reference, don't need to authenticate)
+** Create Questions (possibly multiple questions for one external question)
+** Configure groups of submitters and graders for each group
+*** TODO Come up with a better name than GroupRole
+** Configure parameters such as weights and number of gradings needed
+* Local Students - Before deadline
+** Accept submissions via some sort of hook
+*** Can submit via external ID and have them sorted into the proper question for the user
+**** Putting the same user in submitter groups for multiple questions is an undefined, but allowed operation (probably not hard to disallow it completely though)
+* Instructors - Post submission deadline
+** Grade any submissions they want, creating a rubric as they go along
+** TODO Browse submissions easily, by user -  edX side
+*** Important at this phase that multiple instructors can grade independently and the system tracs all evaluations
+*** TODO Notify instructors of other instructors' rubric additions (?) (maybe we just assume that instructors create a rubric together)
+*** TODO Notify instructors of other instructors' evaluations, potentially even track in real time ("Another instructor is grading this submission")
+*** TODO Need service calls for "Begin Evaluation" and "End Evaluation"
+**** Could use some nice in-memory storage option for this (perhaps redis?) [Perhaps a redis-set of current graders]
+     Model: CurrentGraders=>(submission:references, graderset:redis_set)
+     Graders Set stores user_ids of current graders
+     API: /questions/1/submissions/12/current_graders/{index,create,destroy}
+**** Some sort of websocket or other ongoing connection, or just poll periodically for current graders
+* Once a rubric is created and finalized, instructors open grading for readers
+** Readers could potentially come up with the rubric themselves too.
+    Rubric permissions: We can either restrict rubric editing to admins, or allow graders to edit as well
+    The other model is to just allow anyone to edit the rubric but have a lock toggle so that instructors can lock it once it's finalized
+** Graders open the grading page, see that they have X things left to grade, and can immediately start grading by question
+*** TODO Grader todo pages
+** Instructors can monitor progress of graders and send them some sort of message to hurry up
+*** TODO Things left to grade view
+*** TODO Graders view
+    The table I had before would show you both what submissions need grading and
+    what students need to finish but in the case of asymmetric grading this
+    doesn't make sense
+* After grading period is closed, grades are finalized
+   Could run inference algorithm here
+   Instructor can see results, export data, etc.
+** Instructor manually approves grades to be shown (edX)
+* Local students can view grades, view stats, submit regrade requests
+** TODO Figure out where regrade requests go
+* Instructor - for remote class
+** Sets up grading configurations per group (using some tool)
+** Selects training examples from the pool of locally graded submissions
+* Remote students submit
+** Service sorts their submission into the appropriate bins
+** New questions have the same rubric as the local question
+* After remote submission deadline
+** Can't submit anymore
+* Peer grading for remote students begins
+** Students have presumably provided some information by which we have grouped them
+** Students go to a page to see what they have to do
+** If not finished training, give training examples
+*** TODO Structure training in such a way that you don't have to persist the evaluation, just keep track of how many they finished
+** Once training completed, students can ask for work
+   Do we know what service Question they belong to? Does it matter? What does this call look like from the client?
+   They get the same UI that local graders got
+   Pop submissions off of the right priority queue
+* TODO Benchmark redis performance (for eg. thousands of insertions, priority updates, etc)
+* Pseudo-code using a hypothetical API (basically the API I would want to have)
+** See demo.py