Commit 6c87fa83 by Qubad786

Integrate 3PlayMedia Transcription API

parent 1b69155e
......@@ -39,4 +39,10 @@ urlpatterns = [
view=transcripts.Cielo24CallbackHandlerView.as_view(),
name='cielo24_transcript_completed'
),
# 3PlayMedia callback handler view
url(
regex=r'^3playmedia/transcripts/handle/(?P<token>[\w]+)$',
view=transcripts.ThreePlayMediaCallbackHandlerView.as_view(),
name='3play_media_callback'
)
]
......@@ -647,6 +647,8 @@ class TranscriptProcessMetadata(TimeStampedModel):
video = models.ForeignKey(Video)
provider = models.CharField('Transcript provider', max_length=50, choices=TranscriptProvider.CHOICES)
process_id = models.CharField('Process id', max_length=255)
# To keep track of 3Play Translations.
translation_id = models.CharField('Translation id', max_length=255, null=True, blank=True)
lang_code = models.CharField('Language code', max_length=3)
status = models.CharField(
'Transcript status',
......
from __future__ import absolute_import
import os
import sys
from celery import Celery
import yaml
from VEDA_OS01.transcripts import retrieve_three_play_translations
"""
Start Celery Worker
......@@ -51,6 +51,18 @@ app.conf.update(
CELERY_ACCEPT_CONTENT=['pickle', 'json', 'msgpack', 'yaml']
)
app.conf.beat_schedule = {
'check-3play-translations-every-30-seconds': {
'task': 'tasks.fetch_three_play_translations',
'schedule': 30.0,
},
}
@app.task(name='fetch_three_play_translations')
def fetch_three_play_translations():
retrieve_three_play_translations()
@app.task(name='worker_encode')
def worker_task_fire(veda_id, encode_profile, jobid):
......
import datetime
import ftplib
import logging
import os
import shutil
import sys
from os.path import expanduser
import boto
......@@ -16,6 +13,7 @@ from boto.s3.key import Key
from django.core.urlresolvers import reverse
import veda_deliver_xuetang
from control.veda_deliver_3play import ThreePLayMediaClient
from control_env import *
from veda_deliver_cielo import Cielo24Transcript
from veda_deliver_youtube import DeliverYoutube
......@@ -190,7 +188,6 @@ class VedaDelivery:
self._CLEANUP()
self._THREEPLAY_UPLOAD()
# Transcription Process
# We only want to generate transcripts for `desktop_mp4` profile.
if self.encode_profile == 'desktop_mp4' and self.video_query.process_transcription:
......@@ -203,7 +200,6 @@ class VedaDelivery:
if self.video_query.provider == TranscriptProvider.CIELO24:
self.cielo24_transcription_flow()
def _INFORM_INTAKE(self):
"""
Collect all salient metadata and
......@@ -574,48 +570,55 @@ class VedaDelivery:
)
cielo24.start_transcription_flow()
def _THREEPLAY_UPLOAD(self):
if self.video_query.inst_class.tp_proc is False:
return None
if self.video_query.inst_class.mobile_override is False:
if self.encode_profile != 'desktop_mp4':
return None
ftp1 = ftplib.FTP(
self.auth_dict['threeplay_ftphost']
)
user = self.video_query.inst_class.tp_username.strip()
passwd = self.video_query.inst_class.tp_password.strip()
def start_3play_transcription_process(self):
"""
3PlayMedia Transcription Flow
"""
try:
ftp1.login(user, passwd)
except:
ErrorObject.print_error(
message='3Play Authentication Failure'
# Picks the first course from the list as there may be multiple
# course runs in that list (i.e. all having the same org).
org = utils.extract_course_org(self.video_proto.platform_course_url[0])
transcript_secrets = TranscriptPreferences.objects.get(org=org, provider=self.video_query.provider)
# update transcript status for video in edx-val
VALAPICall(video_proto=None, val_status=None).update_video_status(
self.video_query.studio_id, VideoStatus.TRANSCRIPTION_IN_PROGRESS
)
try:
ftp1.cwd(
self.video_query.inst_class.tp_speed
# Initialize 3playMedia client and start transcription process
s3_video_url = build_url(
self.auth_dict['s3_base_url'],
self.auth_dict['edx_s3_endpoint_bucket'],
self.encoded_file
)
except:
ftp1.mkd(
self.video_query.inst_class.tp_speed
callback_url = build_url(
self.auth_dict['veda_base_url'],
reverse(
'3play_media_callback',
args=[self.auth_dict['transcript_provider_request_token']]
),
# Additional attributes that'll come back with the callback
org=org,
edx_video_id=self.video_query.studio_id,
)
ftp1.cwd(
self.video_query.inst_class.tp_speed
three_play_media = ThreePLayMediaClient(
org=org,
video=self.video_query,
media_url=s3_video_url,
api_key=transcript_secrets.api_key,
api_secret=transcript_secrets.api_secret,
callback_url=callback_url,
turnaround_level=self.video_query.three_play_turnaround,
)
os.chdir(self.node_work_directory)
three_play_media.generate_transcripts()
ftp1.storbinary(
'STOR ' + self.encoded_file,
open(os.path.join(
self.node_work_directory,
self.encoded_file
), 'rb')
except TranscriptPreferences.DoesNotExist:
LOGGER.warning(
'Transcript preference is not found for provider=%s, video=%s',
self.video_query.provider,
self.video_query.studio_id,
)
os.chdir(homedir)
def _XUETANG_ROUTE(self):
if self.video_query.inst_class.xuetang_proc is False:
return None
......
"""
3PlayMedia Transcription Client
"""
import json
import logging
import requests
import sys
from requests.packages.urllib3.exceptions import InsecurePlatformWarning
from VEDA_OS01.models import TranscriptProcessMetadata, TranscriptProvider, TranscriptStatus
from VEDA_OS01.utils import build_url
requests.packages.urllib3.disable_warnings(InsecurePlatformWarning)
LOGGER = logging.getLogger(__name__)
class ThreePlayMediaError(Exception):
"""
An error that occurs during 3PlayMedia actions.
"""
pass
class ThreePlayMediaLanguageNotFoundError(ThreePlayMediaError):
"""
An error when language is not found in available 3playMedia languages.
"""
pass
class ThreePlayMediaPerformTranscriptionError(ThreePlayMediaError):
"""
An error occurred while adding media for transcription.
"""
pass
class ThreePlayMediaUrlError(ThreePlayMediaError):
"""
Occurs when the media url is either inaccessible or of invalid content type.
"""
pass
class ThreePLayMediaClient(object):
def __init__(self, org, video, media_url, api_key, api_secret, callback_url, turnaround_level):
"""
Initialize 3play media client
"""
self.org = org
self.video = video
self.media_url = media_url
self.api_key = api_key
self.api_secret = api_secret
self.callback_url = callback_url
self.turnaround_level = turnaround_level
# default attributes
self.base_url = u'https://api.3playmedia.com/'
self.upload_media_file_url = u'files/'
self.available_languages_url = u'caption_imports/available_languages/'
self.allowed_content_type = u'video/mp4'
def validate_media_url(self):
"""
Validates the media URL
Raises:
3PlayMediaUrlError: on invalid media url or content type
"""
if not self.media_url:
raise ThreePlayMediaUrlError('Invalid media URL "{media_url}".'.format(media_url=self.media_url))
response = requests.head(url=self.media_url)
if not response.ok:
raise ThreePlayMediaUrlError('The URL "{media_url}" is not Accessible.'.format(media_url=self.media_url))
elif response.headers['Content-Type'] != self.allowed_content_type:
raise ThreePlayMediaUrlError(
'Media content-type should be "{allowed_type}". URL was "{media_url}", content-type was "{type}"'.format(
allowed_type=self.allowed_content_type,
media_url=self.media_url,
type=response.headers['Content-Type'],
)
)
def submit_media(self):
"""
Submits the media to perform transcription.
Raises:
ThreePlayMediaPerformTranscriptionError: error while transcription process
"""
self.validate_media_url()
# Prepare requests payload
payload = dict(
# Mandatory attributes required for transcription
link=self.media_url,
apikey=self.api_key,
api_secret_key=self.api_secret,
turnaround_level=self.turnaround_level,
callback_url=self.callback_url,
)
upload_url = build_url(self.base_url, self.upload_media_file_url)
response = requests.post(
url=upload_url,
data=json.dumps(payload),
headers={'Content-Type': 'application/json'}
)
if not response.ok:
raise ThreePlayMediaPerformTranscriptionError(
'Upload file request failed with: {response} -- {status}'.format(
response=response.text, status=response.status_code
)
)
try:
# A normal response should be a text containing file id and if we're getting a deserializable dict, there
# must be an error: http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods
if isinstance(json.loads(response.text), dict):
raise ThreePlayMediaPerformTranscriptionError(
'Expected file id but got: {response}'.format(response=response.text)
)
except ValueError:
return response.text
def generate_transcripts(self):
"""
Kicks off transcription process for default language.
"""
try:
file_id = self.submit_media()
# Track progress of transcription process
TranscriptProcessMetadata.objects.create(
video=self.video,
process_id=file_id,
lang_code=u'en',
provider=TranscriptProvider.THREE_PLAY,
status=TranscriptStatus.IN_PROGRESS,
)
# Successfully kicked off transcription process for a video with the given language.
LOGGER.info(
'[3PlayMedia] Transcription process has been started for video=%s, language=en.',
self.video.studio_id,
)
except ThreePlayMediaError:
LOGGER.exception(
'[3PlayMedia] Could not process transcripts for video=%s language=en.',
self.video.studio_id,
)
except Exception:
LOGGER.exception(
'[3PlayMedia] Unexpected error while transcription for video=%s language=en .',
self.video.studio_id,
)
raise
def main():
pass
if __name__ == '__main__':
sys.exit(main())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment