Commit 6c87fa83 by Qubad786

Integrate 3PlayMedia Transcription API

parent 1b69155e
...@@ -39,4 +39,10 @@ urlpatterns = [ ...@@ -39,4 +39,10 @@ urlpatterns = [
view=transcripts.Cielo24CallbackHandlerView.as_view(), view=transcripts.Cielo24CallbackHandlerView.as_view(),
name='cielo24_transcript_completed' name='cielo24_transcript_completed'
), ),
# 3PlayMedia callback handler view
url(
regex=r'^3playmedia/transcripts/handle/(?P<token>[\w]+)$',
view=transcripts.ThreePlayMediaCallbackHandlerView.as_view(),
name='3play_media_callback'
)
] ]
...@@ -647,6 +647,8 @@ class TranscriptProcessMetadata(TimeStampedModel): ...@@ -647,6 +647,8 @@ class TranscriptProcessMetadata(TimeStampedModel):
video = models.ForeignKey(Video) video = models.ForeignKey(Video)
provider = models.CharField('Transcript provider', max_length=50, choices=TranscriptProvider.CHOICES) provider = models.CharField('Transcript provider', max_length=50, choices=TranscriptProvider.CHOICES)
process_id = models.CharField('Process id', max_length=255) process_id = models.CharField('Process id', max_length=255)
# To keep track of 3Play Translations.
translation_id = models.CharField('Translation id', max_length=255, null=True, blank=True)
lang_code = models.CharField('Language code', max_length=3) lang_code = models.CharField('Language code', max_length=3)
status = models.CharField( status = models.CharField(
'Transcript status', 'Transcript status',
......
from __future__ import absolute_import from __future__ import absolute_import
import os
import sys
from celery import Celery from celery import Celery
import yaml import yaml
from VEDA_OS01.transcripts import retrieve_three_play_translations
""" """
Start Celery Worker Start Celery Worker
...@@ -51,6 +51,18 @@ app.conf.update( ...@@ -51,6 +51,18 @@ app.conf.update(
CELERY_ACCEPT_CONTENT=['pickle', 'json', 'msgpack', 'yaml'] CELERY_ACCEPT_CONTENT=['pickle', 'json', 'msgpack', 'yaml']
) )
app.conf.beat_schedule = {
'check-3play-translations-every-30-seconds': {
'task': 'tasks.fetch_three_play_translations',
'schedule': 30.0,
},
}
@app.task(name='fetch_three_play_translations')
def fetch_three_play_translations():
retrieve_three_play_translations()
@app.task(name='worker_encode') @app.task(name='worker_encode')
def worker_task_fire(veda_id, encode_profile, jobid): def worker_task_fire(veda_id, encode_profile, jobid):
......
import datetime import datetime
import ftplib
import logging import logging
import os
import shutil import shutil
import sys
from os.path import expanduser from os.path import expanduser
import boto import boto
...@@ -16,6 +13,7 @@ from boto.s3.key import Key ...@@ -16,6 +13,7 @@ from boto.s3.key import Key
from django.core.urlresolvers import reverse from django.core.urlresolvers import reverse
import veda_deliver_xuetang import veda_deliver_xuetang
from control.veda_deliver_3play import ThreePLayMediaClient
from control_env import * from control_env import *
from veda_deliver_cielo import Cielo24Transcript from veda_deliver_cielo import Cielo24Transcript
from veda_deliver_youtube import DeliverYoutube from veda_deliver_youtube import DeliverYoutube
...@@ -190,7 +188,6 @@ class VedaDelivery: ...@@ -190,7 +188,6 @@ class VedaDelivery:
self._CLEANUP() self._CLEANUP()
self._THREEPLAY_UPLOAD()
# Transcription Process # Transcription Process
# We only want to generate transcripts for `desktop_mp4` profile. # We only want to generate transcripts for `desktop_mp4` profile.
if self.encode_profile == 'desktop_mp4' and self.video_query.process_transcription: if self.encode_profile == 'desktop_mp4' and self.video_query.process_transcription:
...@@ -203,7 +200,6 @@ class VedaDelivery: ...@@ -203,7 +200,6 @@ class VedaDelivery:
if self.video_query.provider == TranscriptProvider.CIELO24: if self.video_query.provider == TranscriptProvider.CIELO24:
self.cielo24_transcription_flow() self.cielo24_transcription_flow()
def _INFORM_INTAKE(self): def _INFORM_INTAKE(self):
""" """
Collect all salient metadata and Collect all salient metadata and
...@@ -574,47 +570,54 @@ class VedaDelivery: ...@@ -574,47 +570,54 @@ class VedaDelivery:
) )
cielo24.start_transcription_flow() cielo24.start_transcription_flow()
def _THREEPLAY_UPLOAD(self): def start_3play_transcription_process(self):
"""
if self.video_query.inst_class.tp_proc is False: 3PlayMedia Transcription Flow
return None """
if self.video_query.inst_class.mobile_override is False:
if self.encode_profile != 'desktop_mp4':
return None
ftp1 = ftplib.FTP(
self.auth_dict['threeplay_ftphost']
)
user = self.video_query.inst_class.tp_username.strip()
passwd = self.video_query.inst_class.tp_password.strip()
try: try:
ftp1.login(user, passwd) # Picks the first course from the list as there may be multiple
except: # course runs in that list (i.e. all having the same org).
ErrorObject.print_error( org = utils.extract_course_org(self.video_proto.platform_course_url[0])
message='3Play Authentication Failure' transcript_secrets = TranscriptPreferences.objects.get(org=org, provider=self.video_query.provider)
# update transcript status for video in edx-val
VALAPICall(video_proto=None, val_status=None).update_video_status(
self.video_query.studio_id, VideoStatus.TRANSCRIPTION_IN_PROGRESS
) )
try:
ftp1.cwd( # Initialize 3playMedia client and start transcription process
self.video_query.inst_class.tp_speed s3_video_url = build_url(
self.auth_dict['s3_base_url'],
self.auth_dict['edx_s3_endpoint_bucket'],
self.encoded_file
) )
except: callback_url = build_url(
ftp1.mkd( self.auth_dict['veda_base_url'],
self.video_query.inst_class.tp_speed reverse(
'3play_media_callback',
args=[self.auth_dict['transcript_provider_request_token']]
),
# Additional attributes that'll come back with the callback
org=org,
edx_video_id=self.video_query.studio_id,
) )
ftp1.cwd( three_play_media = ThreePLayMediaClient(
self.video_query.inst_class.tp_speed org=org,
video=self.video_query,
media_url=s3_video_url,
api_key=transcript_secrets.api_key,
api_secret=transcript_secrets.api_secret,
callback_url=callback_url,
turnaround_level=self.video_query.three_play_turnaround,
) )
os.chdir(self.node_work_directory) three_play_media.generate_transcripts()
ftp1.storbinary(
'STOR ' + self.encoded_file,
open(os.path.join(
self.node_work_directory,
self.encoded_file
), 'rb')
)
os.chdir(homedir) except TranscriptPreferences.DoesNotExist:
LOGGER.warning(
'Transcript preference is not found for provider=%s, video=%s',
self.video_query.provider,
self.video_query.studio_id,
)
def _XUETANG_ROUTE(self): def _XUETANG_ROUTE(self):
if self.video_query.inst_class.xuetang_proc is False: if self.video_query.inst_class.xuetang_proc is False:
......
"""
3PlayMedia Transcription Client
"""
import json
import logging
import requests
import sys
from requests.packages.urllib3.exceptions import InsecurePlatformWarning
from VEDA_OS01.models import TranscriptProcessMetadata, TranscriptProvider, TranscriptStatus
from VEDA_OS01.utils import build_url
requests.packages.urllib3.disable_warnings(InsecurePlatformWarning)
LOGGER = logging.getLogger(__name__)
class ThreePlayMediaError(Exception):
"""
An error that occurs during 3PlayMedia actions.
"""
pass
class ThreePlayMediaLanguageNotFoundError(ThreePlayMediaError):
"""
An error when language is not found in available 3playMedia languages.
"""
pass
class ThreePlayMediaPerformTranscriptionError(ThreePlayMediaError):
"""
An error occurred while adding media for transcription.
"""
pass
class ThreePlayMediaUrlError(ThreePlayMediaError):
"""
Occurs when the media url is either inaccessible or of invalid content type.
"""
pass
class ThreePLayMediaClient(object):
def __init__(self, org, video, media_url, api_key, api_secret, callback_url, turnaround_level):
"""
Initialize 3play media client
"""
self.org = org
self.video = video
self.media_url = media_url
self.api_key = api_key
self.api_secret = api_secret
self.callback_url = callback_url
self.turnaround_level = turnaround_level
# default attributes
self.base_url = u'https://api.3playmedia.com/'
self.upload_media_file_url = u'files/'
self.available_languages_url = u'caption_imports/available_languages/'
self.allowed_content_type = u'video/mp4'
def validate_media_url(self):
"""
Validates the media URL
Raises:
3PlayMediaUrlError: on invalid media url or content type
"""
if not self.media_url:
raise ThreePlayMediaUrlError('Invalid media URL "{media_url}".'.format(media_url=self.media_url))
response = requests.head(url=self.media_url)
if not response.ok:
raise ThreePlayMediaUrlError('The URL "{media_url}" is not Accessible.'.format(media_url=self.media_url))
elif response.headers['Content-Type'] != self.allowed_content_type:
raise ThreePlayMediaUrlError(
'Media content-type should be "{allowed_type}". URL was "{media_url}", content-type was "{type}"'.format(
allowed_type=self.allowed_content_type,
media_url=self.media_url,
type=response.headers['Content-Type'],
)
)
def submit_media(self):
"""
Submits the media to perform transcription.
Raises:
ThreePlayMediaPerformTranscriptionError: error while transcription process
"""
self.validate_media_url()
# Prepare requests payload
payload = dict(
# Mandatory attributes required for transcription
link=self.media_url,
apikey=self.api_key,
api_secret_key=self.api_secret,
turnaround_level=self.turnaround_level,
callback_url=self.callback_url,
)
upload_url = build_url(self.base_url, self.upload_media_file_url)
response = requests.post(
url=upload_url,
data=json.dumps(payload),
headers={'Content-Type': 'application/json'}
)
if not response.ok:
raise ThreePlayMediaPerformTranscriptionError(
'Upload file request failed with: {response} -- {status}'.format(
response=response.text, status=response.status_code
)
)
try:
# A normal response should be a text containing file id and if we're getting a deserializable dict, there
# must be an error: http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods
if isinstance(json.loads(response.text), dict):
raise ThreePlayMediaPerformTranscriptionError(
'Expected file id but got: {response}'.format(response=response.text)
)
except ValueError:
return response.text
def generate_transcripts(self):
"""
Kicks off transcription process for default language.
"""
try:
file_id = self.submit_media()
# Track progress of transcription process
TranscriptProcessMetadata.objects.create(
video=self.video,
process_id=file_id,
lang_code=u'en',
provider=TranscriptProvider.THREE_PLAY,
status=TranscriptStatus.IN_PROGRESS,
)
# Successfully kicked off transcription process for a video with the given language.
LOGGER.info(
'[3PlayMedia] Transcription process has been started for video=%s, language=en.',
self.video.studio_id,
)
except ThreePlayMediaError:
LOGGER.exception(
'[3PlayMedia] Could not process transcripts for video=%s language=en.',
self.video.studio_id,
)
except Exception:
LOGGER.exception(
'[3PlayMedia] Unexpected error while transcription for video=%s language=en .',
self.video.studio_id,
)
raise
def main():
pass
if __name__ == '__main__':
sys.exit(main())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment