Commit 0cd1f00c by Qubad786 Committed by muzaffaryousaf

3Play Media API Integration

This implements 3PlayMedia API integration in pipeline.
Other than that it also iuncludes:
-- rename 'Transcript Preferences' to 'Transcript Credentials'.
-- Add migrations.
-- Rename 'Transcription Ready' to 'Transcript Ready'.
parent fdfb34cf
...@@ -39,4 +39,10 @@ urlpatterns = [ ...@@ -39,4 +39,10 @@ urlpatterns = [
view=transcripts.Cielo24CallbackHandlerView.as_view(), view=transcripts.Cielo24CallbackHandlerView.as_view(),
name='cielo24_transcript_completed' name='cielo24_transcript_completed'
), ),
# 3PlayMedia callback handler view
url(
regex=r'^3playmedia/transcripts/handle/(?P<token>[\w]+)$',
view=transcripts.ThreePlayMediaCallbackHandlerView.as_view(),
name='3play_media_callback'
)
] ]
...@@ -2,7 +2,7 @@ from django.contrib import admin ...@@ -2,7 +2,7 @@ from django.contrib import admin
from VEDA_OS01.models import ( from VEDA_OS01.models import (
Course, Video, Encode, URL, Destination, Institution, VedaUpload, Course, Video, Encode, URL, Destination, Institution, VedaUpload,
TranscriptPreferences, TranscriptProcessMetadata TranscriptCredentials, TranscriptProcessMetadata
) )
...@@ -111,8 +111,8 @@ class VideoUploadAdmin(admin.ModelAdmin): ...@@ -111,8 +111,8 @@ class VideoUploadAdmin(admin.ModelAdmin):
] ]
class TranscriptPreferencesAdmin(admin.ModelAdmin): class TranscriptCredentialsAdmin(admin.ModelAdmin):
model = TranscriptPreferences model = TranscriptCredentials
class TranscriptProcessMetadataAdmin(admin.ModelAdmin): class TranscriptProcessMetadataAdmin(admin.ModelAdmin):
...@@ -126,5 +126,5 @@ admin.site.register(URL, URLAdmin) ...@@ -126,5 +126,5 @@ admin.site.register(URL, URLAdmin)
admin.site.register(Destination, DestinationAdmin) admin.site.register(Destination, DestinationAdmin)
admin.site.register(Institution, InstitutionAdmin) admin.site.register(Institution, InstitutionAdmin)
admin.site.register(VedaUpload, VideoUploadAdmin) admin.site.register(VedaUpload, VideoUploadAdmin)
admin.site.register(TranscriptPreferences, TranscriptPreferencesAdmin) admin.site.register(TranscriptCredentials, TranscriptCredentialsAdmin)
admin.site.register(TranscriptProcessMetadata, TranscriptProcessMetadataAdmin) admin.site.register(TranscriptProcessMetadata, TranscriptProcessMetadataAdmin)
# -*- coding: utf-8 -*-
# Generated by Django 1.9 on 2017-09-11 11:13
from __future__ import unicode_literals
import VEDA_OS01.models
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
import model_utils.fields
class Migration(migrations.Migration):
dependencies = [
('VEDA_OS01', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='TranscriptCredentials',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')),
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')),
('org', models.CharField(help_text=b'This value must match the value of organization in studio/edx-platform.', max_length=50, verbose_name=b'Organization')),
('provider', models.CharField(choices=[(b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')], max_length=50, verbose_name=b'Transcript provider')),
('api_key', models.CharField(max_length=255, verbose_name=b'API key')),
('api_secret', models.CharField(blank=True, max_length=255, null=True, verbose_name=b'API secret')),
],
options={
'verbose_name_plural': 'Transcript Credentials',
},
),
migrations.CreateModel(
name='TranscriptProcessMetadata',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')),
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')),
('provider', models.CharField(choices=[(b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')], max_length=50, verbose_name=b'Transcript provider')),
('process_id', models.CharField(max_length=255, verbose_name=b'Process id')),
('translation_id', models.CharField(blank=True, max_length=255, null=True, verbose_name=b'Translation id')),
('lang_code', models.CharField(max_length=8, verbose_name=b'Language code')),
('status', models.CharField(choices=[(b'PENDING', b'PENDING'), (b'IN PROGRESS', b'IN PROGRESS'), (b'FAILED', b'FAILED'), (b'READY', b'READY')], default=b'PENDING', max_length=50, verbose_name=b'Transcript status')),
],
options={
'get_latest_by': 'modified',
'verbose_name_plural': 'Transcript process metadata',
},
),
migrations.AddField(
model_name='video',
name='cielo24_fidelity',
field=models.CharField(blank=True, choices=[(b'MECHANICAL', b'Mechanical, 75% Accuracy'), (b'PREMIUM', b'Premium, 95% Accuracy'), (b'PROFESSIONAL', b'Professional, 99% Accuracy')], max_length=20, null=True, verbose_name=b'Cielo24 Fidelity'),
),
migrations.AddField(
model_name='video',
name='cielo24_turnaround',
field=models.CharField(blank=True, choices=[(b'STANDARD', b'Standard, 48h'), (b'PRIORITY', b'Priority, 24h')], max_length=20, null=True, verbose_name=b'Cielo24 Turnaround'),
),
migrations.AddField(
model_name='video',
name='preferred_languages',
field=VEDA_OS01.models.ListField(blank=True, default=[]),
),
migrations.AddField(
model_name='video',
name='process_transcription',
field=models.BooleanField(default=False, verbose_name=b'Process transcripts from Cielo24/3PlayMedia'),
),
migrations.AddField(
model_name='video',
name='provider',
field=models.CharField(blank=True, choices=[(b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')], max_length=20, null=True, verbose_name=b'Transcription provider'),
),
migrations.AddField(
model_name='video',
name='three_play_turnaround',
field=models.CharField(blank=True, choices=[(b'extended_service', b'10-Day/Extended'), (b'default', b'4-Day/Default'), (b'expedited_service', b'2-Day/Expedited'), (b'rush_service', b'24 hour/Rush'), (b'same_day_service', b'Same Day')], max_length=20, null=True, verbose_name=b'3PlayMedia Turnaround'),
),
migrations.AlterField(
model_name='video',
name='video_trans_status',
field=models.CharField(choices=[(b'Ingest', b'System Ingest'), (b'Transcode Queue', b'Transcode Queue'), (b'Active Transcode', b'Active Transcode'), (b'Transcode Retry', b'Transcode Retry'), (b'Transcode Complete', b'Transcode Complete'), (b'Deliverable Upload', b'Deliverable Upload'), (b'File Complete', b'File Complete'), (b'Transcode Error', b'Transcode Error'), (b'Corrupt File', b'Corrupt File on Ingest'), (b'Review Hold', b'Review Hold'), (b'Review Reject', b'Review Rejected'), (b'Final Publish', b'Review to Final Publish'), (b'Youtube Duplicate', b'Youtube Duplicate'), (b'In Encode Queue', b'In Encode Queue'), (b'Progress', b'In Progress'), (b'Complete', b'Complete'), (b'transcription_in_progress', b'Transcription In Progress'), (b'transcript_ready', b'Transcript Ready')], default=b'Ingest', max_length=100, verbose_name=b'Transcode Status'),
),
migrations.AddField(
model_name='transcriptprocessmetadata',
name='video',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='VEDA_OS01.Video'),
),
migrations.AlterUniqueTogether(
name='transcriptcredentials',
unique_together=set([('org', 'provider')]),
),
]
...@@ -127,7 +127,7 @@ class VideoStatus(object): ...@@ -127,7 +127,7 @@ class VideoStatus(object):
PROGRESS = 'Progress' PROGRESS = 'Progress'
COMPLETE = 'Complete' COMPLETE = 'Complete'
TRANSCRIPTION_IN_PROGRESS = 'transcription_in_progress' TRANSCRIPTION_IN_PROGRESS = 'transcription_in_progress'
TRANSCRIPTION_READY = 'transcription_ready' TRANSCRIPT_READY = 'transcript_ready'
CHOICES = ( CHOICES = (
(SI, 'System Ingest'), (SI, 'System Ingest'),
...@@ -147,7 +147,7 @@ class VideoStatus(object): ...@@ -147,7 +147,7 @@ class VideoStatus(object):
(PROGRESS, 'In Progress'), (PROGRESS, 'In Progress'),
(COMPLETE, 'Complete'), (COMPLETE, 'Complete'),
(TRANSCRIPTION_IN_PROGRESS, 'Transcription In Progress'), (TRANSCRIPTION_IN_PROGRESS, 'Transcription In Progress'),
(TRANSCRIPTION_READY, 'Transcription Ready'), (TRANSCRIPT_READY, 'Transcript Ready'),
) )
...@@ -619,9 +619,9 @@ class VedaUpload (models.Model): ...@@ -619,9 +619,9 @@ class VedaUpload (models.Model):
) )
class TranscriptPreferences(TimeStampedModel): class TranscriptCredentials(TimeStampedModel):
""" """
Model to contain third party transcription service provider preferances. Model to contain third party transcription service provider preferences.
""" """
org = models.CharField( org = models.CharField(
'Organization', 'Organization',
...@@ -634,7 +634,7 @@ class TranscriptPreferences(TimeStampedModel): ...@@ -634,7 +634,7 @@ class TranscriptPreferences(TimeStampedModel):
class Meta: class Meta:
unique_together = ('org', 'provider') unique_together = ('org', 'provider')
verbose_name_plural = 'Transcript preferences' verbose_name_plural = 'Transcript Credentials'
def __unicode__(self): def __unicode__(self):
return u'{org} - {provider}'.format(org=self.org, provider=self.provider) return u'{org} - {provider}'.format(org=self.org, provider=self.provider)
...@@ -647,7 +647,9 @@ class TranscriptProcessMetadata(TimeStampedModel): ...@@ -647,7 +647,9 @@ class TranscriptProcessMetadata(TimeStampedModel):
video = models.ForeignKey(Video) video = models.ForeignKey(Video)
provider = models.CharField('Transcript provider', max_length=50, choices=TranscriptProvider.CHOICES) provider = models.CharField('Transcript provider', max_length=50, choices=TranscriptProvider.CHOICES)
process_id = models.CharField('Process id', max_length=255) process_id = models.CharField('Process id', max_length=255)
lang_code = models.CharField('Language code', max_length=3) # To keep track of 3Play Translations.
translation_id = models.CharField('Translation id', max_length=255, null=True, blank=True)
lang_code = models.CharField('Language code', max_length=8)
status = models.CharField( status = models.CharField(
'Transcript status', 'Transcript status',
max_length=50, max_length=50,
...@@ -659,6 +661,17 @@ class TranscriptProcessMetadata(TimeStampedModel): ...@@ -659,6 +661,17 @@ class TranscriptProcessMetadata(TimeStampedModel):
verbose_name_plural = 'Transcript process metadata' verbose_name_plural = 'Transcript process metadata'
get_latest_by = 'modified' get_latest_by = 'modified'
def update(self, **fields):
"""
Updates a process.
Keyword Arguments:
fields(dict): dict containing all the fields to be updated.
"""
for attr, value in fields.iteritems():
setattr(self, attr, value)
self.save()
def __unicode__(self): def __unicode__(self):
return u'{video} - {provider} - {lang}'.format( return u'{video} - {provider} - {lang}'.format(
video=self.video.edx_id, video=self.video.edx_id,
......
from __future__ import absolute_import from __future__ import absolute_import
import os
import sys
from celery import Celery from celery import Celery
import yaml import yaml
from VEDA_OS01.transcripts import retrieve_three_play_translations
""" """
Start Celery Worker Start Celery Worker
...@@ -51,6 +51,18 @@ app.conf.update( ...@@ -51,6 +51,18 @@ app.conf.update(
CELERY_ACCEPT_CONTENT=['pickle', 'json', 'msgpack', 'yaml'] CELERY_ACCEPT_CONTENT=['pickle', 'json', 'msgpack', 'yaml']
) )
app.conf.beat_schedule = {
'check-3play-translations-every-30-seconds': {
'task': 'tasks.fetch_three_play_translations',
'schedule': 30.0,
},
}
@app.task(name='fetch_three_play_translations')
def fetch_three_play_translations():
retrieve_three_play_translations()
@app.task(name='worker_encode') @app.task(name='worker_encode')
def worker_task_fire(veda_id, encode_profile, jobid): def worker_task_fire(veda_id, encode_profile, jobid):
......
"""
3PlayMedia transcription unit tests
"""
import json
import urllib
import responses
from ddt import ddt, data, unpack
from mock import patch
from django.test import TestCase
from control.veda_deliver_3play import (
ThreePlayMediaClient,
ThreePlayMediaUrlError,
ThreePlayMediaPerformTranscriptionError,
)
from VEDA_OS01.models import (
Course,
TranscriptProcessMetadata,
Video,
ThreePlayTurnaround,
)
VIDEO_DATA = {
'studio_id': '12345'
}
@ddt
class ThreePlayMediaClientTests(TestCase):
"""
3PlayMedia transcription tests
"""
def setUp(self):
"""
Tests setup
"""
self.course = Course.objects.create(
course_name=u'Intro to VEDA',
institution=u'MAx',
edx_classid=u'123'
)
self.video = Video.objects.create(
inst_class=self.course,
**VIDEO_DATA
)
self.video_transcript_preferences = {
'org': u'MAx',
'video': self.video,
'media_url': u'https://s3.amazonaws.com/bkt/video.mp4',
'api_key': u'insecure_api_key',
'api_secret': u'insecure_api_secret',
'turnaround_level': ThreePlayTurnaround.DEFAULT,
'callback_url': 'https://veda.edx.org/3playmedia/transcripts/handle/123123?org=MAx&edx_video_id=12345'
}
def assert_request(self, received_request, expected_request):
"""
Verify that `received_request` matches `expected_request`
"""
for request_attr in expected_request.keys():
if request_attr == 'headers':
expected_headers = expected_request[request_attr]
actual_headers = getattr(received_request, request_attr)
for attr, expect_value in expected_headers.iteritems():
self.assertEqual(actual_headers[attr], expect_value)
else:
self.assertEqual(getattr(received_request, request_attr), expected_request[request_attr])
@responses.activate
@patch('control.veda_deliver_3play.LOGGER')
def test_transcription_flow(self, mock_logger):
"""
Verify 3PlayMedia happy transcription flow
"""
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
responses.add(
responses.HEAD,
u'https://s3.amazonaws.com/bkt/video.mp4',
headers={'Content-Type': u'video/mp4'},
status=200,
)
responses.add(
responses.POST,
u'https://api.3playmedia.com/files',
body=u'111222',
status=200
)
three_play_client.generate_transcripts()
# Total of 2 HTTP requests are made
self.assertEqual(len(responses.calls), 2)
body = dict(
# Mandatory attributes required for transcription
link=self.video_transcript_preferences['media_url'],
apikey=self.video_transcript_preferences['api_key'],
api_secret_key=self.video_transcript_preferences['api_secret'],
turnaround_level=self.video_transcript_preferences['turnaround_level'],
callback_url=self.video_transcript_preferences['callback_url'],
)
expected_requests = [
{
'url': u'https://s3.amazonaws.com/bkt/video.mp4',
'body': None,
'method': 'HEAD',
},
{
'url': u'https://api.3playmedia.com/files',
'body': json.dumps(body),
'method': 'POST',
'headers': {'Content-Type': 'application/json'}
},
]
for position, expected_request in enumerate(expected_requests):
self.assert_request(responses.calls[position].request, expected_request)
self.assertEqual(TranscriptProcessMetadata.objects.count(), 1)
mock_logger.info.assert_called_with(
'[3PlayMedia] Transcription process has been started for video=%s, language=en.',
VIDEO_DATA['studio_id'],
)
@data(
{
'headers': {'Content-Type': u'video/mp4'},
'status': 400,
},
{
'headers': {'Content-Type': u'application/json'},
'status': 200,
}
)
@responses.activate
def test_validate_media_url(self, response):
"""
Tests media url validations.
"""
responses.add(responses.HEAD, u'https://s3.amazonaws.com/bkt/video.mp4', **response)
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
with self.assertRaises(ThreePlayMediaUrlError):
three_play_client.validate_media_url()
@data(
{
'body': None,
'status': 400,
},
{
'body': json.dumps({'iserror': True, 'error': 'Submission has failed'}),
'status': 200,
}
)
@responses.activate
def test_submit_media_exceptions(self, response):
"""
Tests media submission exceptions
"""
responses.add(
responses.HEAD,
u'https://s3.amazonaws.com/bkt/video.mp4',
headers={'Content-Type': u'video/mp4'},
status=200,
)
responses.add(responses.POST, u'https://api.3playmedia.com/files', **response)
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
with self.assertRaises(ThreePlayMediaPerformTranscriptionError):
three_play_client.submit_media()
@data(
(
{
'body': None,
'status': 400,
},
{
'body': '11111',
'status': 200,
},
),
(
{
'headers': {'Content-Type': u'video/mp4'},
'status': 200,
},
{
'body': None,
'status': 400,
},
)
)
@unpack
@responses.activate
@patch('control.veda_deliver_3play.LOGGER')
def test_generate_transcripts_exceptions(self, first_response, second_response, mock_log):
"""
Tests the proper exceptions during transcript generation.
"""
responses.add(responses.HEAD, u'https://s3.amazonaws.com/bkt/video.mp4', **first_response)
responses.add(responses.POST, u'https://api.3playmedia.com/files', **second_response)
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
three_play_client.generate_transcripts()
self.assertFalse(mock_log.info.called)
mock_log.exception.assert_called_with(
u'[3PlayMedia] Could not process transcripts for video=%s language=en.',
VIDEO_DATA['studio_id'],
)
self.assertEqual(TranscriptProcessMetadata.objects.count(), 0)
import datetime import datetime
import ftplib
import logging import logging
import os
import shutil import shutil
import sys
from os.path import expanduser from os.path import expanduser
import boto import boto
...@@ -16,11 +13,12 @@ from boto.s3.key import Key ...@@ -16,11 +13,12 @@ from boto.s3.key import Key
from django.core.urlresolvers import reverse from django.core.urlresolvers import reverse
import veda_deliver_xuetang import veda_deliver_xuetang
from control.veda_deliver_3play import ThreePlayMediaClient
from control_env import * from control_env import *
from veda_deliver_cielo import Cielo24Transcript from veda_deliver_cielo import Cielo24Transcript
from veda_deliver_youtube import DeliverYoutube from veda_deliver_youtube import DeliverYoutube
from VEDA_OS01 import utils from VEDA_OS01 import utils
from VEDA_OS01.models import (TranscriptPreferences, TranscriptProvider, from VEDA_OS01.models import (TranscriptCredentials, TranscriptProvider,
VideoStatus) VideoStatus)
from VEDA_OS01.utils import build_url from VEDA_OS01.utils import build_url
from veda_utils import ErrorObject, Metadata, Output, VideoProto from veda_utils import ErrorObject, Metadata, Output, VideoProto
...@@ -190,7 +188,6 @@ class VedaDelivery: ...@@ -190,7 +188,6 @@ class VedaDelivery:
self._CLEANUP() self._CLEANUP()
self._THREEPLAY_UPLOAD()
# Transcription Process # Transcription Process
# We only want to generate transcripts for `desktop_mp4` profile. # We only want to generate transcripts for `desktop_mp4` profile.
if self.encode_profile == 'desktop_mp4' and self.video_query.process_transcription: if self.encode_profile == 'desktop_mp4' and self.video_query.process_transcription:
...@@ -203,7 +200,6 @@ class VedaDelivery: ...@@ -203,7 +200,6 @@ class VedaDelivery:
if self.video_query.provider == TranscriptProvider.CIELO24: if self.video_query.provider == TranscriptProvider.CIELO24:
self.cielo24_transcription_flow() self.cielo24_transcription_flow()
def _INFORM_INTAKE(self): def _INFORM_INTAKE(self):
""" """
Collect all salient metadata and Collect all salient metadata and
...@@ -538,8 +534,8 @@ class VedaDelivery: ...@@ -538,8 +534,8 @@ class VedaDelivery:
org = utils.extract_course_org(self.video_proto.platform_course_url[0]) org = utils.extract_course_org(self.video_proto.platform_course_url[0])
try: try:
api_key = TranscriptPreferences.objects.get(org=org, provider=self.video_query.provider).api_key api_key = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider).api_key
except TranscriptPreferences.DoesNotExist: except TranscriptCredentials.DoesNotExist:
LOGGER.warn('[cielo24] Unable to find api_key for org=%s', org) LOGGER.warn('[cielo24] Unable to find api_key for org=%s', org)
return None return None
...@@ -574,47 +570,54 @@ class VedaDelivery: ...@@ -574,47 +570,54 @@ class VedaDelivery:
) )
cielo24.start_transcription_flow() cielo24.start_transcription_flow()
def _THREEPLAY_UPLOAD(self): def start_3play_transcription_process(self):
"""
if self.video_query.inst_class.tp_proc is False: 3PlayMedia Transcription Flow
return None """
if self.video_query.inst_class.mobile_override is False:
if self.encode_profile != 'desktop_mp4':
return None
ftp1 = ftplib.FTP(
self.auth_dict['threeplay_ftphost']
)
user = self.video_query.inst_class.tp_username.strip()
passwd = self.video_query.inst_class.tp_password.strip()
try:
ftp1.login(user, passwd)
except:
ErrorObject.print_error(
message='3Play Authentication Failure'
)
try: try:
ftp1.cwd( # Picks the first course from the list as there may be multiple
self.video_query.inst_class.tp_speed # course runs in that list (i.e. all having the same org).
) org = utils.extract_course_org(self.video_proto.platform_course_url[0])
except: transcript_secrets = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider)
ftp1.mkd(
self.video_query.inst_class.tp_speed # update transcript status for video in edx-val
) VALAPICall(video_proto=None, val_status=None).update_video_status(
ftp1.cwd( self.video_query.studio_id, VideoStatus.TRANSCRIPTION_IN_PROGRESS
self.video_query.inst_class.tp_speed
) )
os.chdir(self.node_work_directory)
ftp1.storbinary( # Initialize 3playMedia client and start transcription process
'STOR ' + self.encoded_file, s3_video_url = build_url(
open(os.path.join( self.auth_dict['s3_base_url'],
self.node_work_directory, self.auth_dict['edx_s3_endpoint_bucket'],
self.encoded_file self.encoded_file
), 'rb')
) )
callback_url = build_url(
os.chdir(homedir) self.auth_dict['veda_base_url'],
reverse(
'3play_media_callback',
args=[self.auth_dict['transcript_provider_request_token']]
),
# Additional attributes that'll come back with the callback
org=org,
edx_video_id=self.video_query.studio_id,
)
three_play_media = ThreePlayMediaClient(
org=org,
video=self.video_query,
media_url=s3_video_url,
api_key=transcript_secrets.api_key,
api_secret=transcript_secrets.api_secret,
callback_url=callback_url,
turnaround_level=self.video_query.three_play_turnaround,
)
three_play_media.generate_transcripts()
except TranscriptCredentials.DoesNotExist:
LOGGER.warning(
'Transcript preference is not found for provider=%s, video=%s',
self.video_query.provider,
self.video_query.studio_id,
)
def _XUETANG_ROUTE(self): def _XUETANG_ROUTE(self):
if self.video_query.inst_class.xuetang_proc is False: if self.video_query.inst_class.xuetang_proc is False:
......
"""
3PlayMedia Transcription Client
"""
import json
import logging
import requests
import sys
from requests.packages.urllib3.exceptions import InsecurePlatformWarning
from VEDA_OS01.models import TranscriptProcessMetadata, TranscriptProvider, TranscriptStatus
from VEDA_OS01.utils import build_url
requests.packages.urllib3.disable_warnings(InsecurePlatformWarning)
LOGGER = logging.getLogger(__name__)
class ThreePlayMediaError(Exception):
"""
An error that occurs during 3PlayMedia actions.
"""
pass
class ThreePlayMediaLanguageNotFoundError(ThreePlayMediaError):
"""
An error when language is not found in available 3playMedia languages.
"""
pass
class ThreePlayMediaPerformTranscriptionError(ThreePlayMediaError):
"""
An error occurred while adding media for transcription.
"""
pass
class ThreePlayMediaUrlError(ThreePlayMediaError):
"""
Occurs when the media url is either inaccessible or of invalid content type.
"""
pass
class ThreePlayMediaClient(object):
def __init__(self, org, video, media_url, api_key, api_secret, callback_url, turnaround_level):
"""
Initialize 3play media client
"""
self.org = org
self.video = video
self.media_url = media_url
self.api_key = api_key
self.api_secret = api_secret
self.callback_url = callback_url
self.turnaround_level = turnaround_level
# default attributes
self.base_url = u'https://api.3playmedia.com/'
self.upload_media_file_url = u'files/'
self.available_languages_url = u'caption_imports/available_languages/'
self.allowed_content_type = u'video/mp4'
def validate_media_url(self):
"""
Validates the media URL
Raises:
3PlayMediaUrlError: on invalid media url or content type
"""
if not self.media_url:
raise ThreePlayMediaUrlError('Invalid media URL "{media_url}".'.format(media_url=self.media_url))
response = requests.head(url=self.media_url)
if not response.ok:
raise ThreePlayMediaUrlError('The URL "{media_url}" is not Accessible.'.format(media_url=self.media_url))
elif response.headers['Content-Type'] != self.allowed_content_type:
raise ThreePlayMediaUrlError(
'Media content-type should be "{allowed_type}". URL was "{media_url}", content-type was "{type}"'.format(
allowed_type=self.allowed_content_type,
media_url=self.media_url,
type=response.headers['Content-Type'],
)
)
def submit_media(self):
"""
Submits the media to perform transcription.
Raises:
ThreePlayMediaPerformTranscriptionError: error while transcription process
"""
self.validate_media_url()
# Prepare requests payload
payload = dict(
# Mandatory attributes required for transcription
link=self.media_url,
apikey=self.api_key,
api_secret_key=self.api_secret,
turnaround_level=self.turnaround_level,
callback_url=self.callback_url,
)
upload_url = build_url(self.base_url, self.upload_media_file_url)
response = requests.post(url=upload_url, json=payload)
if not response.ok:
raise ThreePlayMediaPerformTranscriptionError(
'Upload file request failed with: {response} -- {status}'.format(
response=response.text, status=response.status_code
)
)
# A normal response should be a text containing file id and if we're getting a deserializable dict, there
# must be an error: http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods
if isinstance(json.loads(response.text), dict):
raise ThreePlayMediaPerformTranscriptionError(
'Expected file id but got: {response}'.format(response=response.text)
)
return response.text
def generate_transcripts(self):
"""
Kicks off transcription process for default language.
"""
try:
file_id = self.submit_media()
# Track progress of transcription process
TranscriptProcessMetadata.objects.create(
video=self.video,
process_id=file_id,
lang_code=u'en',
provider=TranscriptProvider.THREE_PLAY,
status=TranscriptStatus.IN_PROGRESS,
)
# Successfully kicked off transcription process for a video with the given language.
LOGGER.info(
'[3PlayMedia] Transcription process has been started for video=%s, language=en.',
self.video.studio_id,
)
except ThreePlayMediaError:
LOGGER.exception(
'[3PlayMedia] Could not process transcripts for video=%s language=en.',
self.video.studio_id,
)
except Exception:
LOGGER.exception(
'[3PlayMedia] Unexpected error while transcription for video=%s language=en .',
self.video.studio_id,
)
raise
def main():
pass
if __name__ == '__main__':
sys.exit(main())
...@@ -189,8 +189,7 @@ class Cielo24Transcript(object): ...@@ -189,8 +189,7 @@ class Cielo24Transcript(object):
Returns: Returns:
cielo24 job id cielo24 job id
""" """
response = requests.get( create_job_url = build_url(
build_url(
self.cielo24_site, self.cielo24_site,
self.cielo24_new_job, self.cielo24_new_job,
v=1, v=1,
...@@ -198,13 +197,14 @@ class Cielo24Transcript(object): ...@@ -198,13 +197,14 @@ class Cielo24Transcript(object):
api_token=self.api_key, api_token=self.api_key,
job_name=self.video.studio_id job_name=self.video.studio_id
) )
) response = requests.get(create_job_url)
if not response.ok: if not response.ok:
raise Cielo24CreateJobError( raise Cielo24CreateJobError(
'[CREATE JOB ERROR] status={} -- text={}'.format( '[CREATE JOB ERROR] url={} -- status={} -- text={}'.format(
create_job_url,
response.status_code, response.status_code,
response.text response.text,
) )
) )
......
...@@ -7,7 +7,7 @@ import boto.s3 ...@@ -7,7 +7,7 @@ import boto.s3
from boto.exception import S3ResponseError, S3DataError from boto.exception import S3ResponseError, S3DataError
import yaml import yaml
from VEDA_OS01.models import TranscriptPreferences from VEDA_OS01.models import TranscriptCredentials
from VEDA_OS01.utils import extract_course_org from VEDA_OS01.utils import extract_course_org
try: try:
...@@ -237,12 +237,12 @@ class FileDiscovery(object): ...@@ -237,12 +237,12 @@ class FileDiscovery(object):
# Make decision if this video needs the transcription as well. # Make decision if this video needs the transcription as well.
try: try:
transcript_preferences = json.loads(transcript_preferences) transcript_preferences = json.loads(transcript_preferences)
TranscriptPreferences.objects.get( TranscriptCredentials.objects.get(
org=extract_course_org(course_url), org=extract_course_org(course_url),
provider=transcript_preferences.get('provider') provider=transcript_preferences.get('provider')
) )
process_transcription = True process_transcription = True
except (TypeError, TranscriptPreferences.DoesNotExist): except (TypeError, TranscriptCredentials.DoesNotExist):
# when the preferences are not set OR these are set to some data in invalid format OR these don't # when the preferences are not set OR these are set to some data in invalid format OR these don't
# have associated 3rd party transcription provider API keys. # have associated 3rd party transcription provider API keys.
process_transcription = False process_transcription = False
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment