Commit b38ecd3c by Tyler Hallada Committed by GitHub

Merge pull request #150 from edx/thallada/local-video-data

AN-7775 Generate local video data
parents d90a368e 66d2d3ae
......@@ -71,6 +71,83 @@ database.
$ make loaddata
Loading Video Data
~~~~~~~~~~~~~~~~~~
The above command should work fine on its own, but you may see warnings about
video ids:
::
WARNING:analyticsdataserver.clients:Course Blocks API failed to return
video ids (401). See README for instructions on how to authenticate the
API with your local LMS.
In order to generate video data, the API has to be authenticated with
your local LMS so that it can access the video ids for each course. Instead of
adding a whole OAuth client to the API for this one procedure, we will piggyback
off of the Insights OAuth client by taking the OAuth token it generates and
using it here.
1. Start your local LMS server. (e.g. in devstack, run `paver devstack --fast lms`).
2. If your local LMS server is running on any address other than the default of
`http://localhost:8000/`, make sure to add this setting to
`analyticsdataserver/settings/local.py` with the correct URL. (you will
likely not need to do this):
::
# Don't forget to add the trailing forward slash
LMS_BASE_URL = 'http://example.com:8000/'
3. Sign into your local Insights server making sure to use your local LMS for
authentication. This will generate a new OAuth access token if you do not
already have one that isn't expired.
The user you sign in with must have staff access to the courses for which you
want generated video data.
4. Visit your local LMS server's admin site (by default, this is at
`http://localhost:8000/admin`).
5. Sign in with a superuser account. Don't have one? Make one with this command
in your devstack as the `edxapp` user:
::
$ edxapp@precise64:~/edx-platform$ ./manage.py lms createsuperuser
Enter a username and password that you will remember.
6. On the admin site, find the "Oauth2" section and click the link "Access
tokens". The breadcrumbs should show "Home > Oauth2 > Access tokens".
Copy the string in the "Token" column for the first row in the table. Also,
make sure the "User" of the first row is the same user that you signed in
with in step 3.
7. Paste the string as a new setting in `analyticsdataserver/settings/local.py`:
::
COURSE_BLOCK_API_AUTH_TOKEN = '<paste access token here>'
8. Run `make loaddata` again and ensure that you see the following log message
in the output:
::
INFO:analyticsdataserver.clients:Successfully authenticated with the
Course Blocks API.
9. Check if you now have video data in the API. Either by querying the API in
the swagger docs at `/docs/#!/api/Videos_List_GET`, or visiting the Insights
`engagement/videos/` page for a course.
Note: the access tokens expire in one year so you should only have to follow the
above steps once a year.
Running Tests
-------------
......
......@@ -4,12 +4,16 @@ import datetime
import logging
import math
import random
from tqdm import tqdm
from django.conf import settings
from django.core.management.base import BaseCommand
from django.utils import timezone
from analytics_data_api.v0 import models
from analytics_data_api.constants import engagement_events
from analytics_data_api.v0 import models
from analyticsdataserver.clients import CourseBlocksApiClient
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
......@@ -44,7 +48,7 @@ class Command(BaseCommand):
'--course_id',
action='store',
dest='course_id',
default='edX/DemoX/Demo_Course',
default='course-v1:edX+DemoX+Demo_Courset',
help='Course ID for which to generate fake data',
)
parser.add_argument(
......@@ -175,7 +179,7 @@ class Command(BaseCommand):
logger.info("Generating new weekly course activity data...")
progress = tqdm(total=math.ceil((end_date - start).days / 7.0) + 1)
progress = tqdm(total=math.ceil((end_date - start).days / 7.0))
while start < end_date:
active_students = random.randint(100, 4000)
# End date should occur on Saturday at 23:59:59
......@@ -198,23 +202,13 @@ class Command(BaseCommand):
logger.info("Done!")
def generate_video_timeline_data(self, video_id):
logger.info("Deleting video timeline data...")
models.VideoTimeline.objects.all().delete()
logger.info("Generating new video timeline...")
for segment in range(100):
active_students = random.randint(100, 4000)
counts = constrained_sum_sample_pos(2, active_students)
models.VideoTimeline.objects.create(pipeline_video_id=video_id, segment=segment,
num_users=counts[0], num_views=counts[1])
logger.info("Done!")
def generate_video_data(self, course_id, video_id, module_id):
logger.info("Deleting course video data...")
models.Video.objects.all().delete()
logger.info("Generating new course videos...")
users_at_start = 1234
models.Video.objects.create(course_id=course_id, pipeline_video_id=video_id,
encoded_module_id=module_id, duration=500, segment_length=5,
......@@ -288,11 +282,44 @@ class Command(BaseCommand):
total_submissions=total_submissions, correct_submissions=correct_submissions
)
def fetch_videos_from_course_blocks(self, course_id):
logger.info("Fetching video ids from Course Blocks API...")
try:
api_base_url = settings.LMS_BASE_URL + 'api/courses/v1/'
except AttributeError:
logger.warning("LMS_BASE_URL is not configured! Cannot get video ids.")
return None
logger.info("Assuming the Course Blocks API is hosted at: %s", api_base_url)
blocks_api = CourseBlocksApiClient(api_base_url, settings.COURSE_BLOCK_API_AUTH_TOKEN, timeout=5)
return blocks_api.all_videos(course_id)
def generate_all_video_data(self, course_id, videos):
logger.info("Deleting course video data...")
models.Video.objects.all().delete()
logger.info("Deleting video timeline data...")
models.VideoTimeline.objects.all().delete()
logger.info("Generating new course videos and video timeline data...")
for video in tqdm(videos):
self.generate_video_data(course_id, video['video_id'], video['video_module_id'])
self.generate_video_timeline_data(video['video_id'])
logger.info("Done!")
def handle(self, *args, **options):
course_id = options['course_id']
username = options['username']
video_id = '0fac49ba'
video_module_id = 'i4x-edX-DemoX-video-5c90cffecd9b48b188cbfea176bf7fe9'
video_ids = self.fetch_videos_from_course_blocks(course_id)
if not video_ids:
logger.warning("Falling back to fake video id due to Course Blocks API failure...")
video_ids = [
{
'video_id': '0fac49ba',
'video_module_id': 'i4x-edX-DemoX-video-5c90cffecd9b48b188cbfea176bf7fe9'
}
]
start_date = timezone.now() - datetime.timedelta(weeks=10)
num_weeks = options['num_weeks']
......@@ -304,8 +331,7 @@ class Command(BaseCommand):
logger.info("Generating data for %s...", course_id)
self.generate_weekly_data(course_id, start_date, end_date)
self.generate_daily_data(course_id, start_date, end_date)
self.generate_video_data(course_id, video_id, video_module_id)
self.generate_video_timeline_data(video_id)
self.generate_all_video_data(course_id, video_ids)
self.generate_learner_engagement_data(course_id, username, start_date, end_date)
self.generate_learner_engagement_range_data(course_id, start_date.date(), end_date.date())
self.generate_tags_distribution_data(course_id)
import logging
from edx_rest_api_client.client import EdxRestApiClient
from edx_rest_api_client.exceptions import HttpClientError
from opaque_keys.edx.keys import UsageKey
from opaque_keys import InvalidKeyError
from analyticsdataserver.utils import temp_log_level
logger = logging.getLogger(__name__)
class CourseBlocksApiClient(EdxRestApiClient):
"""
This class is a sub-class of the edX Rest API Client
(https://github.com/edx/edx-rest-api-client).
Details about the API itself can be found at
https://openedx.atlassian.net/wiki/display/AN/Course+Structure+API.
Currently, this client is only used for a local-only developer script (generate_fake_course_data).
"""
def __init__(self, url, access_token, timeout):
super(CourseBlocksApiClient, self).__init__(url, oauth_access_token=access_token, timeout=timeout)
def all_videos(self, course_id):
try:
logger.debug('Retrieving course video blocks for course_id: %s', course_id)
response = self.blocks.get(course_id=course_id, all_blocks=True, depth='all', block_types_filter='video')
logger.info("Successfully authenticated with the Course Blocks API.")
except HttpClientError as e:
if e.response.status_code == 401:
logger.warning("Course Blocks API failed to return video ids (%s). " +
"See README for instructions on how to authenticate the API with your local LMS.",
e.response.status_code)
elif e.response.status_code == 404:
logger.warning("Course Blocks API failed to return video ids (%s). " +
"Does the course exist in the LMS?",
e.response.status_code)
else:
logger.warning("Course Blocks API failed to return video ids (%s).", e.response.status_code)
return None
# Setup a terrible hack to silence mysterious flood of ImportErrors from stevedore inside edx-opaque-keys.
# (The UsageKey utility still works despite the import errors, so I think the errors are not important).
with temp_log_level('stevedore', log_level=logging.CRITICAL):
videos = []
for video in response['blocks'].values():
try:
encoded_id = UsageKey.from_string(video['id']).html_id()
except InvalidKeyError:
encoded_id = video['id'] # just pass through any wonky ids we don't understand
videos.append({'video_id': course_id + '|' + encoded_id,
'video_module_id': encoded_id})
return videos
......@@ -56,10 +56,15 @@ CACHES = {
ANALYTICS_DATABASE = 'analytics'
ENABLE_ADMIN_SITE = True
########## END ANALYTICS DATA API CONFIGURATION
TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'
SWAGGER_SETTINGS = {
'api_key': 'edx'
}
# These two settings are used in generate_fake_course_data.py.
# Replace with correct values to generate local fake video data.
LMS_BASE_URL = 'http://localhost:8000/' # the base URL for your running local LMS instance
COURSE_BLOCK_API_AUTH_TOKEN = 'paste auth token here' # see README for instructions on how to configure this value
########## END ANALYTICS DATA API CONFIGURATION
import json
import logging
from contextlib import contextmanager
import mock
import responses
from django.conf import settings
from django.contrib.auth.models import User
from django.db.utils import ConnectionHandler, DatabaseError
from django.test import TestCase
from django.test.utils import override_settings
import mock
from rest_framework.authtoken.models import Token
from analytics_data_api.v0.models import CourseEnrollmentDaily, CourseEnrollmentByBirthYear
from analyticsdataserver.clients import CourseBlocksApiClient
from analyticsdataserver.router import AnalyticsApiRouter
from analyticsdataserver.utils import temp_log_level
class TestCaseWithAuthentication(TestCase):
......@@ -97,3 +103,98 @@ class AnalyticsApiRouterTests(TestCase):
"""
self.assertFalse(self.router.allow_relation(CourseEnrollmentDaily, User))
self.assertTrue(self.router.allow_relation(CourseEnrollmentDaily, CourseEnrollmentByBirthYear))
class UtilsTests(TestCase):
def setUp(self):
self.logger = logging.getLogger('test_logger')
def test_temp_log_level(self):
"""Ensures log level is adjusted within context manager and returns to original level when exited."""
original_level = self.logger.getEffectiveLevel()
with temp_log_level('test_logger'): # NOTE: defaults to logging.CRITICAL
self.assertEqual(self.logger.getEffectiveLevel(), logging.CRITICAL)
self.assertEqual(self.logger.getEffectiveLevel(), original_level)
# test with log_level option used
with temp_log_level('test_logger', log_level=logging.DEBUG):
self.assertEqual(self.logger.getEffectiveLevel(), logging.DEBUG)
self.assertEqual(self.logger.getEffectiveLevel(), original_level)
class ClientTests(TestCase):
@mock.patch('analyticsdataserver.clients.EdxRestApiClient')
def setUp(self, *args, **kwargs): # pylint: disable=unused-argument
self.client = CourseBlocksApiClient('http://example.com/', 'token', 5)
@responses.activate
def test_all_videos(self):
responses.add(responses.GET, 'http://example.com/blocks/', body=json.dumps({'blocks': {
'block-v1:edX+DemoX+Demo_Course+type@video+block@5c90cffecd9b48b188cbfea176bf7fe9': {
'id': 'block-v1:edX+DemoX+Demo_Course+type@video+block@5c90cffecd9b48b188cbfea176bf7fe9'
},
'block-v1:edX+DemoX+Demo_Course+type@video+block@7e9b434e6de3435ab99bd3fb25bde807': {
'id': 'block-v1:edX+DemoX+Demo_Course+type@video+block@7e9b434e6de3435ab99bd3fb25bde807'
}
}}), status=200, content_type='application/json')
videos = self.client.all_videos('course_id')
self.assertListEqual(videos, [
{
'video_id': 'course_id|5c90cffecd9b48b188cbfea176bf7fe9',
'video_module_id': '5c90cffecd9b48b188cbfea176bf7fe9'
},
{
'video_id': 'course_id|7e9b434e6de3435ab99bd3fb25bde807',
'video_module_id': '7e9b434e6de3435ab99bd3fb25bde807'
}
])
@responses.activate
@mock.patch('analyticsdataserver.clients.logger')
def test_all_videos_401(self, logger):
responses.add(responses.GET, 'http://example.com/blocks/', status=401, content_type='application/json')
videos = self.client.all_videos('course_id')
logger.warning.assert_called_with(
'Course Blocks API failed to return video ids (%s). ' +
'See README for instructions on how to authenticate the API with your local LMS.', 401)
self.assertEqual(videos, None)
@responses.activate
@mock.patch('analyticsdataserver.clients.logger')
def test_all_videos_404(self, logger):
responses.add(responses.GET, 'http://example.com/blocks/', status=404, content_type='application/json')
videos = self.client.all_videos('course_id')
logger.warning.assert_called_with('Course Blocks API failed to return video ids (%s). ' +
'Does the course exist in the LMS?', 404)
self.assertEqual(videos, None)
@responses.activate
@mock.patch('analyticsdataserver.clients.logger')
def test_all_videos_500(self, logger):
responses.add(responses.GET, 'http://example.com/blocks/', status=418, content_type='application/json')
videos = self.client.all_videos('course_id')
logger.warning.assert_called_with('Course Blocks API failed to return video ids (%s).', 418)
self.assertEqual(videos, None)
@responses.activate
def test_all_videos_pass_through_bad_id(self):
responses.add(responses.GET, 'http://example.com/blocks/', body=json.dumps({'blocks': {
'block-v1:edX+DemoX+Demo_Course+type@video+block@5c90cffecd9b48b188cbfea176bf7fe9': {
'id': 'bad_key'
},
'block-v1:edX+DemoX+Demo_Course+type@video+block@7e9b434e6de3435ab99bd3fb25bde807': {
'id': 'bad_key'
}
}}), status=200, content_type='application/json')
responses.add(responses.GET, 'http://example.com/blocks/', status=200, content_type='application/json')
videos = self.client.all_videos('course_id')
self.assertListEqual(videos, [
{
'video_id': 'course_id|bad_key',
'video_module_id': 'bad_key'
},
{
'video_id': 'course_id|bad_key',
'video_module_id': 'bad_key'
}
])
# Put utilities that are used in managing the server or local environment here.
# Utilities critical to application functionality should go under analytics_data_api.
import logging
from contextlib import contextmanager
@contextmanager
def temp_log_level(logger_name, log_level=logging.CRITICAL):
"""
A context manager that temporarily adjusts a logger's log level.
By default, log_level is logging.CRITICAL, which will effectively silence the logger while the context
manager is active.
"""
logger = logging.getLogger(logger_name)
original_log_level = logger.getEffectiveLevel()
logger.setLevel(log_level) # silences all logs up to but not including this level
yield
# Return log level back to what it was.
logger.setLevel(original_log_level)
......@@ -15,3 +15,4 @@ Markdown==2.6.6 # BSD
edx-ccx-keys==0.2.1
edx-django-release-util==0.2.0
edx-opaque-keys==0.4.0
edx-rest-api-client==1.4.0 # Apache 2.0
......@@ -13,3 +13,4 @@ pep257==0.7.0
pep8==1.7.0
pylint==1.6.4
pytz==2016.6.1
responses==0.5.1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment