""" Profile Distributions Aggregate sums for values of fields in students profiles. For example: The distribution in a course for gender might look like: 'gender': { 'type': 'EASY_CHOICE', 'data': { 'no_data': 1234, 'm': 5678, 'o': 2134, 'f': 5678 }, 'display_names': { 'no_data': 'No Data', 'm': 'Male', 'o': 'Other', 'f': 'Female' } """ from django.db.models import Count from student.models import CourseEnrollment, UserProfile # choices with a restricted domain, e.g. level_of_education _EASY_CHOICE_FEATURES = ('gender', 'level_of_education') # choices with a larger domain e.g. year_of_birth _OPEN_CHOICE_FEATURES = ('year_of_birth',) AVAILABLE_PROFILE_FEATURES = _EASY_CHOICE_FEATURES + _OPEN_CHOICE_FEATURES DISPLAY_NAMES = { 'gender': 'Gender', 'level_of_education': 'Level of Education', 'year_of_birth': 'Year Of Birth', } class ProfileDistribution(object): """ Container for profile distribution data `feature` is the name of the distribution feature `feature_display_name` is the display name of feature `data` is a dictionary of the distribution `type` is either 'EASY_CHOICE' or 'OPEN_CHOICE' `choices_display_names` is a dict if the distribution is an 'EASY_CHOICE' """ class ValidationError(ValueError): """ Error thrown if validation fails. """ pass def __init__(self, feature): self.feature = feature self.feature_display_name = DISPLAY_NAMES.get(feature, feature) # to be set later self.type = None self.data = None self.choices_display_names = None def validate(self): """ Validate this profile distribution. Throws ProfileDistribution.ValidationError """ def validation_assert(predicate): """ Throw a ValidationError if false. """ if not predicate: raise ProfileDistribution.ValidationError() validation_assert(isinstance(self.feature, str)) validation_assert(self.feature in DISPLAY_NAMES) validation_assert(isinstance(self.feature_display_name, str)) validation_assert(self.type in ['EASY_CHOICE', 'OPEN_CHOICE']) validation_assert(isinstance(self.data, dict)) if self.type == 'EASY_CHOICE': validation_assert(isinstance(self.choices_display_names, dict)) def profile_distribution(course_id, feature): """ Retrieve distribution of students over a given feature. feature is one of AVAILABLE_PROFILE_FEATURES. Returns a ProfileDistribution instance. NOTE: no_data will appear as a key instead of None/null to adhere to the json spec. data types are EASY_CHOICE or OPEN_CHOICE """ if feature not in AVAILABLE_PROFILE_FEATURES: raise ValueError( "unsupported feature requested for distribution '{}'".format( feature) ) prd = ProfileDistribution(feature) if feature in _EASY_CHOICE_FEATURES: prd.type = 'EASY_CHOICE' if feature == 'gender': raw_choices = UserProfile.GENDER_CHOICES elif feature == 'level_of_education': raw_choices = UserProfile.LEVEL_OF_EDUCATION_CHOICES # short name and display name (full) of the choices. choices = [(short, full) for (short, full) in raw_choices] + [('no_data', 'No Data')] def get_filter(feature, value): """ Get the orm filter parameters for a feature. """ return { 'gender': {'user__profile__gender': value}, 'level_of_education': {'user__profile__level_of_education': value}, }[feature] def get_count(feature, value): """ Get the count of enrolled students matching the feature value. """ return CourseEnrollment.objects.filter( course_id=course_id, is_active=True, **get_filter(feature, value) ).count() distribution = {} for (short, full) in choices: # handle no data case if short == 'no_data': distribution['no_data'] = 0 distribution['no_data'] += get_count(feature, None) distribution['no_data'] += get_count(feature, '') else: distribution[short] = get_count(feature, short) prd.data = distribution prd.choices_display_names = dict(choices) elif feature in _OPEN_CHOICE_FEATURES: prd.type = 'OPEN_CHOICE' profiles = UserProfile.objects.filter( user__courseenrollment__course_id=course_id, user__courseenrollment__is_active=True ) query_distribution = profiles.values( feature).annotate(Count(feature)).order_by() # query_distribution is of the form [{'featureval': 'value1', 'featureval__count': 4}, # {'featureval': 'value2', 'featureval__count': 2}, ...] distribution = dict((vald[feature], vald[feature + '__count']) for vald in query_distribution) # distribution is of the form {'value1': 4, 'value2': 2, ...} # change none to no_data for valid json key if None in distribution: # django does not properly count NULL values when using annotate Count # so # distribution['no_data'] = distribution.pop(None) # would always be 0. # Correctly count null values distribution['no_data'] = profiles.filter( **{feature: None} ).count() prd.data = distribution prd.validate() return prd