xblock_counts.py 14.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
import argparse
import csv
import json
import sys
from datetime import datetime

import os
import requests

# Keys for the CSV and JSON interpretation
PAGINATION_KEY = 'pagination'
NUM_PAGES_KEY = 'num_pages'
NEXT_PAGE_URL_KEY = 'next'
RESULTS_KEY = 'results'
BLOCKS_URL_KEY = 'blocks_url'
BLOCK_ROOT_KEY = 'root'
BLOCKS_KEY = 'blocks'
BLOCK_COUNTS_KEY = 'block_counts'
COURSE_NAME_KEY = 'name'
COURSE_ID_KEY = 'course_id'
COURSE_START_KEY = 'start'
COURSE_END_KEY = 'end'


def monthdelta(date, delta):
    """
    Method to get a delta of Months from a provided datetime

    From this StackOverflow response:
    http://stackoverflow.com/questions/3424899/whats-the-simplest-way-to-subtract-a-month-from-a-date-in-python

    Arguments:
        date datetime: Date to be modified
        delta int: delta value

    Returns:
        datetime: The datetime with the month delta applied
    """
    m, y = (date.month + delta) % 12, date.year + (date.month + delta - 1) // 12
    if not m:
        m = 12
    d = min(date.day, [31,
                       29 if y % 4 == 0 and not y % 400 == 0
                       else 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31][m - 1])
    return date.replace(day=d, month=m, year=y)


def _get_course_data_summary(auth_token, months_restriction, xblock_type_set, api_root, course_count=None):
    """
    Gets the course summary data from the Course Blocks API and returns a list of data objects
    summarizing each courses xBlock usage

    Arguments
        auth_token (str): Authentication token for the API
        months_restriction (int): Restriction on the number of months to go back
        xblock_type_set (set): A set of Strings containing the xBlocks types to be counted

    Returns:
        list: a list of data objects summarizing each courses xBlock usage
    """
    # Get the Course list
    response = requests.get(api_root + '/api/courses/v1/courses/')
    json_result = response.json()
    num_courses = 0
    num_pages = 1

    if PAGINATION_KEY in json_result and NUM_PAGES_KEY in json_result[PAGINATION_KEY]:
        num_pages = json_result[PAGINATION_KEY][NUM_PAGES_KEY]
        num_courses = json_result[PAGINATION_KEY]['count']

    course_summary_data = []
    block_type_url = _get_block_count_url_string(xblock_type_set)

    if course_count is None:
        course_count = num_courses

    course_count_limit = False
    total_courses = 0
    # Look through all pages and courses
    while num_pages > 0 and not course_count_limit:
        if RESULTS_KEY in json_result:
            results_list = json_result[RESULTS_KEY]
            for course in results_list:
                course_data = _get_course_data(auth_token, course, block_type_url,
                                               months_restriction=months_restriction)
                if course_data is not None:
                    course_summary_data.append(course_data)

                if total_courses == course_count:
                    course_count_limit = True
                    break
                total_courses += 1
        num_pages -= 1

        # get the url for the next "page" in the pagenated course data and update the json_result
        page_data = json_result.get(PAGINATION_KEY, None)
        if page_data is not None:
            next_page = page_data.get('next', '')
            if not next_page:
                break
            json_result = requests.get(next_page).json()

        # print to update the screen for status
        sys.stdout.write('.')
        sys.stdout.flush()
    print 'Processed %d courses' % total_courses
    return course_summary_data


def _get_course_data(auth_token, course, block_type_url, months_restriction=None):
    """
    Collects the course data for the provided course data

    Arguments:
        auth_token (str): Authentication token for the API
        course (dict): Dictionary containing the JSON data for the given course

    Returns:
        dict: Dictionary containing the general Course information or None if date restriction is applied and course is
        older than restriction
            {
                name: 'Name of course',
                course_id: 'Course ID',
                start: 'Start date of course',
                course_end: 'End date of course',
                block_counts: Dictionary containing block counts,
                blocks_url: Url to retrieve the Blocks data,
            }
    """
    course_data = {}
    start_time_str = course.get(COURSE_START_KEY, '')
    if start_time_str:
        if months_restriction is not None:
            start_time = datetime.strptime(start_time_str, '%Y-%m-%dT%H:%M:%SZ')
            date_restriction = monthdelta(datetime.now(), -months_restriction)
            if start_time < date_restriction:
                return None
        course_data[COURSE_START_KEY] = start_time_str
    course_data[COURSE_NAME_KEY] = course.get(COURSE_NAME_KEY, '')
    course_data[COURSE_ID_KEY] = course.get(COURSE_ID_KEY, '')
    course_data[COURSE_END_KEY] = course.get(COURSE_END_KEY, '')
    if BLOCKS_URL_KEY in course:
        blocks_url = course.get(BLOCKS_URL_KEY, '')
        block_counts = _get_course_block_counts(auth_token, blocks_url + block_type_url)
        course_data[BLOCK_COUNTS_KEY] = block_counts
        course_data[BLOCKS_URL_KEY] = blocks_url
    return course_data


def _get_block_types_from_json_file(xblock_json_file):
    """
    Retrieves the block types from the provided xBlock configuration JSON file

    Arguments:
        xblock_json_file (str): The name of the xBlock configuration file

    :return:
        set: A set of strings for all the types that are available in the configuration file
    """
    if not os.path.isfile(xblock_json_file):
        print 'xBlock configuration file does not exist: %s' % xblock_json_file
        sys.exit(2)
    with open(xblock_json_file, 'r') as json_file:
        type_set = set()
        try:
            json_data = json.loads(json_file.read())
        except ValueError, e:
            print 'xBlock configuration file does not match the expected layout and is ' \
                  'missing "data" list: %s' % xblock_json_file
            sys.exit(e.message)
        if 'data' in json_data:
            xblock_type_list = json_data['data']
            for xblock in xblock_type_list:
                type_set.add(xblock['name'])
            return type_set
        else:
            print 'xBlock configuration file does not match the expected layout and is ' \
                  'missing "data" list: %s' % xblock_json_file
            sys.exit(2)


def _get_block_count_url_string(xblock_type_set):
    """
    Build the string from the xBlock type set to append to the Block url for block_count types

    Arguments:
        xblock_type_set (set): A set of strings for all the block types

    Returns:
        str: The portion to append to the block url
    """
    block_url = ''
    if len(xblock_type_set) > 0:
        block_url += '&all_blocks=true&block_counts='
        for index, block_type in enumerate(xblock_type_set):
            block_url += block_type
            if index < len(xblock_type_set) - 1:
                block_url += ','
    return block_url


def _get_course_block_counts(auth_token, block_url):
    """
    Get the block counts for a given block_url

    Arguments:
        auth_token (str): The Authentication token to access the API
        block_url (str): The respective url for a Courses xBlock data

    Returns:
        dict: A dictionary containing the Block counts
    """
    headers = {'Authorization': 'Bearer {}'.format(auth_token)}

    response = requests.get(block_url, headers=headers)
    if response.status_code != 200:
        print ("url {} returned status code {}".format(block_url, response.status_code))
        return {}
    response_json = response.json()

    if BLOCK_ROOT_KEY in response_json and BLOCKS_KEY in response_json:
        root_val = response_json[BLOCK_ROOT_KEY]
        counts = response_json[BLOCKS_KEY][root_val][BLOCK_COUNTS_KEY]
        return counts
    return {}


def _get_block_summary_totals(course_data):
    """
    Totals the xBlock types included in the course data and returns those counts by type

    Arguments:
        course_data (list of dicts): a list of course_data objects

    Returns:
        dict: containing the total number of blocks by type
            {
                <block_type>: <count>,
                ...
            }
        dict: containing the total unique courses for a block type
    """
    block_summary_counts = {}
    unique_course_counts = {}

    for course in course_data:
        block_counts = course.get(BLOCK_COUNTS_KEY)
        for count_label, value in block_counts.items():
            unique = 0
            if value > 0:
                unique = 1
            if count_label in block_summary_counts:
                block_summary_counts[count_label] += value
                unique_course_counts[count_label] += unique
            else:
                block_summary_counts[count_label] = value
                unique_course_counts[count_label] = unique

    return block_summary_counts, unique_course_counts


def write_block_summary_report(course_data):
    """
    Generate a CSV file containing a summary of the xBlock usage

    Arguments:
        course_data (list of dicts): a list of course_data objects

    Returns:
        Nothing
    """
    (block_summary_counts, unique_course_counts) = _get_block_summary_totals(course_data)

    # Open and start writing the data into the CSV
    with open('xblock_summary_counts.csv', 'wb') as csvfile:
        summary_writer = csv.writer(csvfile, delimiter=',',
                                    quotechar='"', quoting=csv.QUOTE_MINIMAL)
        summary_writer.writerow(['XBLOCK_NAME', 'UNIQUE_COURSES', 'NUM_TOTAL_INSTANCES'])
        for block_type in sorted(block_summary_counts):
            block_count = block_summary_counts.get(block_type)
            summary_writer.writerow([block_type, str(unique_course_counts[block_type]), str(block_count)])
        csvfile.close()


def write_course_block_detail_report(course_data):
    """
    Generate a CSV file containing the detailed information about the xBlocks available per course

    Arguments:
        course_data (list of dicts): a list of course_data objects

    Returns:
        Nothing
    """
    with open('xblock_course_detail.csv', 'wb') as csvfile:
        detail_writer = csv.writer(
            csvfile,
            delimiter=',',
            quotechar='"',
            quoting=csv.QUOTE_ALL
        )
        detail_writer.writerow(['XBLOCK_TYPE_NAME', 'COURSE_NAME', 'COURSE_ID', 'COURSE_START', 'COURSE_END', 'NUM_XBLOCK_INSTANCES'])
        for course in course_data:
            for block_type, count in course.get(BLOCK_COUNTS_KEY, []).items():
                if count > 0:
                    detail_writer.writerow([
                        block_type,
                        course.get(COURSE_NAME_KEY, '').encode('utf-8'),
                        course.get(COURSE_ID_KEY, ''),
                        course.get(COURSE_START_KEY, ''),
                        course.get(COURSE_END_KEY, ''),
                        str(count)
                    ])
        csvfile.close()


def get_access_token(username, password, oauth2_client_id, api_root):
    """
    Get the Access token using the provided credentials

    Arguments:
        username (str): a string containing the username to log in
        password (str): a string containing the password for the username

    Returns:
        str: Authentication token
    """
    response = requests.post(
        api_root + '/oauth2/access_token/',
        data={
            'client_id': oauth2_client_id,
            'grant_type': 'password',
            'username': username,
            'password': password
        },
    )
    return json.loads(response.text).get('access_token', None)

if __name__ == "__main__":
    # Get username and password from command line arguments
    username = None
    password = None
    months_restriction = 12
    xblock_json_file = 'xblock_studio_configuration.json'
    api_root = 'https://courses.edx.org'
    course_count_limit = None

    parser = argparse.ArgumentParser()
    parser.add_argument('-u', '--username', required=True, help='User name for destination')
    parser.add_argument('-p', '--password', required=True, help='Password for the provided username')
    parser.add_argument('-c', '--clientid', required=True, help='OAuth2 Client ID for the destination')
    parser.add_argument('-a', '--api_root', help='The root of the api that the script is being run against',
                        default=api_root)
    parser.add_argument('-m', '--month', type=int, help='The months to go back when collecting course data '
                                                        '(Default 12 months)')
    parser.add_argument('-x', '--xblock_config', type=str, help='The xBlock configuration JSON file containing all the'
                                                                'xBlock types', default=xblock_json_file)
    parser.add_argument('-n', '--course_count', type=int, help='The number of courses that will be retrieved')
    args = parser.parse_args()
    username = args.username
    password = args.password
    oauth2_client_id = args.clientid
    if args.xblock_config:
        xblock_json_file = args.xblock_config
    if args.month:
        months_restriction = args.month
    if args.api_root:
        api_root = args.api_root
    if args.course_count:
        course_count_limit = args.course_count

    start_time = datetime.now()
    # Get User access token
    token = get_access_token(username, password, oauth2_client_id, api_root)
    if token is None:
        print 'Failed to retrieve user token for user: %s ' % username
        sys.exit(2)

    # Collect course data and write CSV reports
    xblock_type_set = _get_block_types_from_json_file(xblock_json_file)
    course_data = _get_course_data_summary(token, months_restriction, xblock_type_set, api_root,
                                           course_count=course_count_limit)
    if len(course_data) > 0:
        write_block_summary_report(course_data)
        write_course_block_detail_report(course_data)
    print 'Start time: %s Total run time: %s' % (str(start_time), str(datetime.now() - start_time))