Merge pull request #150 from edx/thallada/local-video-data

AN-7775 Generate local video data

Merge pull request #150 from edx/thallada/local-video-data
AN-7775 Generate local video data
b38ecd3c · Tyler Hallada · GitHub · d90a368e · 66d2d3ae · b38ecd3c
Commit b38ecd3c authored Jan 04, 2017 by Tyler Hallada Committed by GitHub Jan 04, 2017
8 changed files
--- a/README.rst
+++ b/README.rst
@@ -71,6 +71,83 @@ database.

        $ make loaddata

+Loading Video Data
+~~~~~~~~~~~~~~~~~~
+
+The above command should work fine on its own, but you may see warnings about
+video ids:
+
+::
+
+        WARNING:analyticsdataserver.clients:Course Blocks API failed to return
+        video ids (401). See README for instructions on how to authenticate the
+        API with your local LMS.
+
+In order to generate video data, the API has to be authenticated with
+your local LMS so that it can access the video ids for each course. Instead of
+adding a whole OAuth client to the API for this one procedure, we will piggyback
+off of the Insights OAuth client by taking the OAuth token it generates and
+using it here.
+
+1. Start your local LMS server. (e.g. in devstack, run `paver devstack --fast lms`).
+
+2. If your local LMS server is running on any address other than the default of
+   `http://localhost:8000/`, make sure to add this setting to
+   `analyticsdataserver/settings/local.py` with the correct URL. (you will
+   likely not need to do this):
+
+   ::
+
+      # Don't forget to add the trailing forward slash
+      LMS_BASE_URL = 'http://example.com:8000/'
+
+3. Sign into your local Insights server making sure to use your local LMS for
+   authentication. This will generate a new OAuth access token if you do not
+   already have one that isn't expired.
+
+   The user you sign in with must have staff access to the courses for which you
+   want generated video data.
+
+4. Visit your local LMS server's admin site (by default, this is at
+   `http://localhost:8000/admin`).
+
+5. Sign in with a superuser account. Don't have one? Make one with this command
+   in your devstack as the `edxapp` user:
+
+   ::
+   
+      $ edxapp@precise64:~/edx-platform$ ./manage.py lms createsuperuser
+   
+   Enter a username and password that you will remember.
+
+6. On the admin site, find the "Oauth2" section and click the link "Access
+   tokens". The breadcrumbs should show "Home > Oauth2 > Access tokens".
+
+   Copy the string in the "Token" column for the first row in the table. Also,
+   make sure the "User" of the first row is the same user that you signed in
+   with in step 3.
+
+7. Paste the string as a new setting in `analyticsdataserver/settings/local.py`:
+
+   ::
+
+      COURSE_BLOCK_API_AUTH_TOKEN = '<paste access token here>'
+
+8. Run `make loaddata` again and ensure that you see the following log message
+   in the output:
+
+   ::
+
+      INFO:analyticsdataserver.clients:Successfully authenticated with the
+      Course Blocks API.
+
+9. Check if you now have video data in the API. Either by querying the API in
+   the swagger docs at `/docs/#!/api/Videos_List_GET`, or visiting the Insights
+   `engagement/videos/` page for a course.
+   
+Note: the access tokens expire in one year so you should only have to follow the
+above steps once a year.
+
 Running Tests
 -------------


--- a/analytics_data_api/management/commands/generate_fake_course_data.py
+++ b/analytics_data_api/management/commands/generate_fake_course_data.py
@@ -4,12 +4,16 @@ import datetime
 import logging
 import math
 import random
+
 from tqdm import tqdm

+from django.conf import settings
 from django.core.management.base import BaseCommand
 from django.utils import timezone
-from analytics_data_api.v0 import models
+
 from analytics_data_api.constants import engagement_events
+from analytics_data_api.v0 import models
+from analyticsdataserver.clients import CourseBlocksApiClient

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -44,7 +48,7 @@ class Command(BaseCommand):
            '--course_id',
            action='store',
            dest='course_id',
-            default='edX/DemoX/Demo_Course',
+            default='course-v1:edX+DemoX+Demo_Courset',
            help='Course ID for which to generate fake data',
        )
        parser.add_argument(
@@ -175,7 +179,7 @@ class Command(BaseCommand):

        logger.info("Generating new weekly course activity data...")

-        progress = tqdm(total=math.ceil((end_date - start).days / 7.0) + 1)
+        progress = tqdm(total=math.ceil((end_date - start).days / 7.0))
        while start < end_date:
            active_students = random.randint(100, 4000)
            # End date should occur on Saturday at 23:59:59
@@ -198,23 +202,13 @@ class Command(BaseCommand):
        logger.info("Done!")

    def generate_video_timeline_data(self, video_id):
-        logger.info("Deleting video timeline data...")
-        models.VideoTimeline.objects.all().delete()
-
-        logger.info("Generating new video timeline...")
        for segment in range(100):
            active_students = random.randint(100, 4000)
            counts = constrained_sum_sample_pos(2, active_students)
            models.VideoTimeline.objects.create(pipeline_video_id=video_id, segment=segment,
                                                num_users=counts[0], num_views=counts[1])

-        logger.info("Done!")
-
    def generate_video_data(self, course_id, video_id, module_id):
-        logger.info("Deleting course video data...")
-        models.Video.objects.all().delete()
-
-        logger.info("Generating new course videos...")
        users_at_start = 1234
        models.Video.objects.create(course_id=course_id, pipeline_video_id=video_id,
                                    encoded_module_id=module_id, duration=500, segment_length=5,
@@ -288,11 +282,44 @@ class Command(BaseCommand):
                    total_submissions=total_submissions, correct_submissions=correct_submissions
                )

+    def fetch_videos_from_course_blocks(self, course_id):
+        logger.info("Fetching video ids from Course Blocks API...")
+        try:
+            api_base_url = settings.LMS_BASE_URL + 'api/courses/v1/'
+        except AttributeError:
+            logger.warning("LMS_BASE_URL is not configured! Cannot get video ids.")
+            return None
+        logger.info("Assuming the Course Blocks API is hosted at: %s", api_base_url)
+
+        blocks_api = CourseBlocksApiClient(api_base_url, settings.COURSE_BLOCK_API_AUTH_TOKEN, timeout=5)
+        return blocks_api.all_videos(course_id)
+
+    def generate_all_video_data(self, course_id, videos):
+        logger.info("Deleting course video data...")
+        models.Video.objects.all().delete()
+
+        logger.info("Deleting video timeline data...")
+        models.VideoTimeline.objects.all().delete()
+
+        logger.info("Generating new course videos and video timeline data...")
+        for video in tqdm(videos):
+            self.generate_video_data(course_id, video['video_id'], video['video_module_id'])
+            self.generate_video_timeline_data(video['video_id'])
+
+        logger.info("Done!")
+
    def handle(self, *args, **options):
        course_id = options['course_id']
        username = options['username']
-        video_id = '0fac49ba'
-        video_module_id = 'i4x-edX-DemoX-video-5c90cffecd9b48b188cbfea176bf7fe9'
+        video_ids = self.fetch_videos_from_course_blocks(course_id)
+        if not video_ids:
+            logger.warning("Falling back to fake video id due to Course Blocks API failure...")
+            video_ids = [
+                {
+                    'video_id': '0fac49ba',
+                    'video_module_id': 'i4x-edX-DemoX-video-5c90cffecd9b48b188cbfea176bf7fe9'
+                }
+            ]
        start_date = timezone.now() - datetime.timedelta(weeks=10)

        num_weeks = options['num_weeks']
@@ -304,8 +331,7 @@ class Command(BaseCommand):
        logger.info("Generating data for %s...", course_id)
        self.generate_weekly_data(course_id, start_date, end_date)
        self.generate_daily_data(course_id, start_date, end_date)
-        self.generate_video_data(course_id, video_id, video_module_id)
-        self.generate_video_timeline_data(video_id)
+        self.generate_all_video_data(course_id, video_ids)
        self.generate_learner_engagement_data(course_id, username, start_date, end_date)
        self.generate_learner_engagement_range_data(course_id, start_date.date(), end_date.date())
        self.generate_tags_distribution_data(course_id)
--- a/analyticsdataserver/clients.py
+++ b/analyticsdataserver/clients.py
+import logging
+
+from edx_rest_api_client.client import EdxRestApiClient
+from edx_rest_api_client.exceptions import HttpClientError
+from opaque_keys.edx.keys import UsageKey
+from opaque_keys import InvalidKeyError
+
+from analyticsdataserver.utils import temp_log_level
+
+logger = logging.getLogger(__name__)
+
+
+class CourseBlocksApiClient(EdxRestApiClient):
+    """
+    This class is a sub-class of the edX Rest API Client
+    (https://github.com/edx/edx-rest-api-client).
+
+    Details about the API itself can be found at
+    https://openedx.atlassian.net/wiki/display/AN/Course+Structure+API.
+
+    Currently, this client is only used for a local-only developer script (generate_fake_course_data).
+    """
+    def __init__(self, url, access_token, timeout):
+        super(CourseBlocksApiClient, self).__init__(url, oauth_access_token=access_token, timeout=timeout)
+
+    def all_videos(self, course_id):
+        try:
+            logger.debug('Retrieving course video blocks for course_id: %s', course_id)
+            response = self.blocks.get(course_id=course_id, all_blocks=True, depth='all', block_types_filter='video')
+            logger.info("Successfully authenticated with the Course Blocks API.")
+        except HttpClientError as e:
+            if e.response.status_code == 401:
+                logger.warning("Course Blocks API failed to return video ids (%s). " +
+                               "See README for instructions on how to authenticate the API with your local LMS.",
+                               e.response.status_code)
+            elif e.response.status_code == 404:
+                logger.warning("Course Blocks API failed to return video ids (%s). " +
+                               "Does the course exist in the LMS?",
+                               e.response.status_code)
+            else:
+                logger.warning("Course Blocks API failed to return video ids (%s).", e.response.status_code)
+            return None
+
+        # Setup a terrible hack to silence mysterious flood of ImportErrors from stevedore inside edx-opaque-keys.
+        # (The UsageKey utility still works despite the import errors, so I think the errors are not important).
+        with temp_log_level('stevedore', log_level=logging.CRITICAL):
+            videos = []
+            for video in response['blocks'].values():
+                try:
+                    encoded_id = UsageKey.from_string(video['id']).html_id()
+                except InvalidKeyError:
+                    encoded_id = video['id']  # just pass through any wonky ids we don't understand
+                videos.append({'video_id': course_id + '|' + encoded_id,
+                               'video_module_id': encoded_id})
+
+        return videos
--- a/analyticsdataserver/settings/local.py
+++ b/analyticsdataserver/settings/local.py
@@ -56,10 +56,15 @@ CACHES = {
 ANALYTICS_DATABASE = 'analytics'
 ENABLE_ADMIN_SITE = True

-########## END ANALYTICS DATA API CONFIGURATION
-
 TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'

 SWAGGER_SETTINGS = {
    'api_key': 'edx'
 }
+
+# These two settings are used in generate_fake_course_data.py.
+# Replace with correct values to generate local fake video data.
+LMS_BASE_URL = 'http://localhost:8000/'  # the base URL for your running local LMS instance
+COURSE_BLOCK_API_AUTH_TOKEN = 'paste auth token here'  # see README for instructions on how to configure this value
+
+########## END ANALYTICS DATA API CONFIGURATION
--- a/analyticsdataserver/tests.py
+++ b/analyticsdataserver/tests.py
+import json
+import logging
 from contextlib import contextmanager

+import mock
+import responses
+
 from django.conf import settings
 from django.contrib.auth.models import User
 from django.db.utils import ConnectionHandler, DatabaseError
 from django.test import TestCase
 from django.test.utils import override_settings
-
-import mock
 from rest_framework.authtoken.models import Token
+
 from analytics_data_api.v0.models import CourseEnrollmentDaily, CourseEnrollmentByBirthYear
+from analyticsdataserver.clients import CourseBlocksApiClient
 from analyticsdataserver.router import AnalyticsApiRouter
+from analyticsdataserver.utils import temp_log_level


 class TestCaseWithAuthentication(TestCase):
@@ -97,3 +103,98 @@ class AnalyticsApiRouterTests(TestCase):
        """
        self.assertFalse(self.router.allow_relation(CourseEnrollmentDaily, User))
        self.assertTrue(self.router.allow_relation(CourseEnrollmentDaily, CourseEnrollmentByBirthYear))
+
+
+class UtilsTests(TestCase):
+    def setUp(self):
+        self.logger = logging.getLogger('test_logger')
+
+    def test_temp_log_level(self):
+        """Ensures log level is adjusted within context manager and returns to original level when exited."""
+        original_level = self.logger.getEffectiveLevel()
+        with temp_log_level('test_logger'):  # NOTE: defaults to logging.CRITICAL
+            self.assertEqual(self.logger.getEffectiveLevel(), logging.CRITICAL)
+        self.assertEqual(self.logger.getEffectiveLevel(), original_level)
+
+        # test with log_level option used
+        with temp_log_level('test_logger', log_level=logging.DEBUG):
+            self.assertEqual(self.logger.getEffectiveLevel(), logging.DEBUG)
+        self.assertEqual(self.logger.getEffectiveLevel(), original_level)
+
+
+class ClientTests(TestCase):
+    @mock.patch('analyticsdataserver.clients.EdxRestApiClient')
+    def setUp(self, *args, **kwargs):  # pylint: disable=unused-argument
+        self.client = CourseBlocksApiClient('http://example.com/', 'token', 5)
+
+    @responses.activate
+    def test_all_videos(self):
+        responses.add(responses.GET, 'http://example.com/blocks/', body=json.dumps({'blocks': {
+            'block-v1:edX+DemoX+Demo_Course+type@video+block@5c90cffecd9b48b188cbfea176bf7fe9': {
+                'id': 'block-v1:edX+DemoX+Demo_Course+type@video+block@5c90cffecd9b48b188cbfea176bf7fe9'
+            },
+            'block-v1:edX+DemoX+Demo_Course+type@video+block@7e9b434e6de3435ab99bd3fb25bde807': {
+                'id': 'block-v1:edX+DemoX+Demo_Course+type@video+block@7e9b434e6de3435ab99bd3fb25bde807'
+            }
+        }}), status=200, content_type='application/json')
+        videos = self.client.all_videos('course_id')
+        self.assertListEqual(videos, [
+            {
+                'video_id': 'course_id|5c90cffecd9b48b188cbfea176bf7fe9',
+                'video_module_id': '5c90cffecd9b48b188cbfea176bf7fe9'
+            },
+            {
+                'video_id': 'course_id|7e9b434e6de3435ab99bd3fb25bde807',
+                'video_module_id': '7e9b434e6de3435ab99bd3fb25bde807'
+            }
+        ])
+
+    @responses.activate
+    @mock.patch('analyticsdataserver.clients.logger')
+    def test_all_videos_401(self, logger):
+        responses.add(responses.GET, 'http://example.com/blocks/', status=401, content_type='application/json')
+        videos = self.client.all_videos('course_id')
+        logger.warning.assert_called_with(
+            'Course Blocks API failed to return video ids (%s). ' +
+            'See README for instructions on how to authenticate the API with your local LMS.', 401)
+        self.assertEqual(videos, None)
+
+    @responses.activate
+    @mock.patch('analyticsdataserver.clients.logger')
+    def test_all_videos_404(self, logger):
+        responses.add(responses.GET, 'http://example.com/blocks/', status=404, content_type='application/json')
+        videos = self.client.all_videos('course_id')
+        logger.warning.assert_called_with('Course Blocks API failed to return video ids (%s). ' +
+                                          'Does the course exist in the LMS?', 404)
+        self.assertEqual(videos, None)
+
+    @responses.activate
+    @mock.patch('analyticsdataserver.clients.logger')
+    def test_all_videos_500(self, logger):
+        responses.add(responses.GET, 'http://example.com/blocks/', status=418, content_type='application/json')
+        videos = self.client.all_videos('course_id')
+        logger.warning.assert_called_with('Course Blocks API failed to return video ids (%s).', 418)
+        self.assertEqual(videos, None)
+
+    @responses.activate
+    def test_all_videos_pass_through_bad_id(self):
+        responses.add(responses.GET, 'http://example.com/blocks/', body=json.dumps({'blocks': {
+            'block-v1:edX+DemoX+Demo_Course+type@video+block@5c90cffecd9b48b188cbfea176bf7fe9': {
+                'id': 'bad_key'
+            },
+            'block-v1:edX+DemoX+Demo_Course+type@video+block@7e9b434e6de3435ab99bd3fb25bde807': {
+                'id': 'bad_key'
+            }
+        }}), status=200, content_type='application/json')
+        responses.add(responses.GET, 'http://example.com/blocks/', status=200, content_type='application/json')
+        videos = self.client.all_videos('course_id')
+        self.assertListEqual(videos, [
+            {
+                'video_id': 'course_id|bad_key',
+                'video_module_id': 'bad_key'
+            },
+            {
+                'video_id': 'course_id|bad_key',
+                'video_module_id': 'bad_key'
+            }
+        ])
--- a/analyticsdataserver/utils.py
+++ b/analyticsdataserver/utils.py
+# Put utilities that are used in managing the server or local environment here.
+# Utilities critical to application functionality should go under analytics_data_api.
+import logging
+from contextlib import contextmanager
+
+
+@contextmanager
+def temp_log_level(logger_name, log_level=logging.CRITICAL):
+    """
+    A context manager that temporarily adjusts a logger's log level.
+
+    By default, log_level is logging.CRITICAL, which will effectively silence the logger while the context
+    manager is active.
+    """
+    logger = logging.getLogger(logger_name)
+    original_log_level = logger.getEffectiveLevel()
+    logger.setLevel(log_level)  # silences all logs up to but not including this level
+    yield
+    # Return log level back to what it was.
+    logger.setLevel(original_log_level)
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -15,3 +15,4 @@ Markdown==2.6.6    					# BSD
 edx-ccx-keys==0.2.1
 edx-django-release-util==0.2.0
 edx-opaque-keys==0.4.0
+edx-rest-api-client==1.4.0          # Apache 2.0
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -13,3 +13,4 @@ pep257==0.7.0
 pep8==1.7.0
 pylint==1.6.4
 pytz==2016.6.1
+responses==0.5.1