distributions.py 5.54 KB
Newer Older
1 2
"""
Profile Distributions
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21

Aggregate sums for values of fields in students profiles.

For example:
The distribution in a course for gender might look like:
'gender': {
    'type': 'EASY_CHOICE',
    'data': {
        'no_data': 1234,
        'm': 5678,
        'o': 2134,
        'f': 5678
    },
    'display_names': {
        'no_data': 'No Data',
        'm': 'Male',
        'o': 'Other',
        'f': 'Female'
}
22 23 24 25 26
"""

from django.db.models import Count
from student.models import CourseEnrollment, UserProfile

27
# choices with a restricted domain, e.g. level_of_education
28
_EASY_CHOICE_FEATURES = ('gender', 'level_of_education')
29
# choices with a larger domain e.g. year_of_birth
30
_OPEN_CHOICE_FEATURES = ('year_of_birth',)
31

32
AVAILABLE_PROFILE_FEATURES = _EASY_CHOICE_FEATURES + _OPEN_CHOICE_FEATURES
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
DISPLAY_NAMES = {
    'gender': 'Gender',
    'level_of_education': 'Level of Education',
    'year_of_birth': 'Year Of Birth',
}


class ProfileDistribution(object):
    """
    Container for profile distribution data

    `feature` is the name of the distribution feature
    `feature_display_name` is the display name of feature
    `data` is a dictionary of the distribution
    `type` is either 'EASY_CHOICE' or 'OPEN_CHOICE'
    `choices_display_names` is a dict if the distribution is an 'EASY_CHOICE'
    """

    class ValidationError(ValueError):
        """ Error thrown if validation fails. """
        pass

    def __init__(self, feature):
        self.feature = feature
57
        self.feature_display_name = DISPLAY_NAMES.get(feature, feature)
58

Miles Steele committed
59 60 61 62 63
        # to be set later
        self.type = None
        self.data = None
        self.choices_display_names = None

64 65 66 67 68 69 70
    def validate(self):
        """
        Validate this profile distribution.

        Throws ProfileDistribution.ValidationError
        """
        def validation_assert(predicate):
Miles Steele committed
71
            """ Throw a ValidationError if false. """
72 73 74 75
            if not predicate:
                raise ProfileDistribution.ValidationError()

        validation_assert(isinstance(self.feature, str))
76
        validation_assert(self.feature in DISPLAY_NAMES)
77 78 79 80 81
        validation_assert(isinstance(self.feature_display_name, str))
        validation_assert(self.type in ['EASY_CHOICE', 'OPEN_CHOICE'])
        validation_assert(isinstance(self.data, dict))
        if self.type == 'EASY_CHOICE':
            validation_assert(isinstance(self.choices_display_names, dict))
82 83 84 85 86 87 88


def profile_distribution(course_id, feature):
    """
    Retrieve distribution of students over a given feature.
    feature is one of AVAILABLE_PROFILE_FEATURES.

89
    Returns a ProfileDistribution instance.
90

91 92
    NOTE: no_data will appear as a key instead of None/null to adhere to the json spec.
    data types are EASY_CHOICE or OPEN_CHOICE
93 94 95
    """

    if not feature in AVAILABLE_PROFILE_FEATURES:
96 97 98 99
        raise ValueError(
            "unsupported feature requested for distribution '{}'".format(
                feature)
        )
100

101 102
    prd = ProfileDistribution(feature)

103
    if feature in _EASY_CHOICE_FEATURES:
104 105
        prd.type = 'EASY_CHOICE'

106
        if feature == 'gender':
Miles Steele committed
107
            raw_choices = UserProfile.GENDER_CHOICES
108
        elif feature == 'level_of_education':
Miles Steele committed
109 110
            raw_choices = UserProfile.LEVEL_OF_EDUCATION_CHOICES

111
        # short name and display name (full) of the choices.
112 113
        choices = [(short, full)
                   for (short, full) in raw_choices] + [('no_data', 'No Data')]
114

115 116 117 118 119 120 121 122 123 124 125 126 127 128
        def get_filter(feature, value):
            """ Get the orm filter parameters for a feature. """
            return {
                'gender': {'user__profile__gender': value},
                'level_of_education': {'user__profile__level_of_education': value},
            }[feature]

        def get_count(feature, value):
            """ Get the count of enrolled students matching the feature value. """
            return CourseEnrollment.objects.filter(
                course_id=course_id,
                **get_filter(feature, value)
            ).count()

129
        distribution = {}
130
        for (short, full) in choices:
131 132 133 134 135 136 137
            # handle no data case
            if short == 'no_data':
                distribution['no_data'] = 0
                distribution['no_data'] += get_count(feature, None)
                distribution['no_data'] += get_count(feature, '')
            else:
                distribution[short] = get_count(feature, short)
138

139 140
        prd.data = distribution
        prd.choices_display_names = dict(choices)
141
    elif feature in _OPEN_CHOICE_FEATURES:
142
        prd.type = 'OPEN_CHOICE'
143
        profiles = UserProfile.objects.filter(
144 145
            user__courseenrollment__course_id=course_id
        )
146 147 148 149 150 151 152
        query_distribution = profiles.values(
            feature).annotate(Count(feature)).order_by()
        # query_distribution is of the form [{'featureval': 'value1', 'featureval__count': 4},
        #    {'featureval': 'value2', 'featureval__count': 2}, ...]

        distribution = dict((vald[feature], vald[feature + '__count'])
                            for vald in query_distribution)
153
        # distribution is of the form {'value1': 4, 'value2': 2, ...}
Miles Steele committed
154 155 156

        # change none to no_data for valid json key
        if None in distribution:
157 158 159 160 161 162
            # django does not properly count NULL values when using annotate Count
            # so
            #     distribution['no_data'] = distribution.pop(None)
            # would always be 0.

            # Correctly count null values
163 164 165
            distribution['no_data'] = profiles.filter(
                **{feature: None}
            ).count()
Miles Steele committed
166

167
        prd.data = distribution
168

169 170
    prd.validate()
    return prd