Integrate 3PlayMedia Transcription API

6c87fa83 · Qubad786 · 1b69155e · 6c87fa83 · 6c87fa83 · 6c87fa83
Commit 6c87fa83 authored Aug 28, 2017 by Qubad786
Showing with 229 additions and 41 deletions

VEDA/urls.py
+6 -0

VEDA_OS01/models.py
+2 -0

VEDA_OS01/transcripts.py
+0 -0

control/celeryapp.py
+14 -2

control/veda_deliver.py
+42 -39

control/veda_deliver_3play.py
+165 -0

No files found.
--- a/VEDA/urls.py
+++ b/VEDA/urls.py
@@ -39,4 +39,10 @@ urlpatterns = [
        view=transcripts.Cielo24CallbackHandlerView.as_view(),
        name='cielo24_transcript_completed'
    ),
+    # 3PlayMedia callback handler view
+    url(
+        regex=r'^3playmedia/transcripts/handle/(?P<token>[\w]+)$',
+        view=transcripts.ThreePlayMediaCallbackHandlerView.as_view(),
+        name='3play_media_callback'
+    )
 ]
--- a/VEDA_OS01/models.py
+++ b/VEDA_OS01/models.py
@@ -647,6 +647,8 @@ class TranscriptProcessMetadata(TimeStampedModel):
    video = models.ForeignKey(Video)
    provider = models.CharField('Transcript provider', max_length=50, choices=TranscriptProvider.CHOICES)
    process_id = models.CharField('Process id', max_length=255)
+    # To keep track of 3Play Translations.
+    translation_id = models.CharField('Translation id', max_length=255, null=True, blank=True)
    lang_code = models.CharField('Language code', max_length=3)
    status = models.CharField(
        'Transcript status',

--- a/VEDA_OS01/transcripts.py
+++ b/VEDA_OS01/transcripts.py
--- a/control/celeryapp.py
+++ b/control/celeryapp.py

 from __future__ import absolute_import
-import os
-import sys
 from celery import Celery
 import yaml

+from VEDA_OS01.transcripts import retrieve_three_play_translations
+
 """
 Start Celery Worker

@@ -51,6 +51,18 @@ app.conf.update(
    CELERY_ACCEPT_CONTENT=['pickle', 'json', 'msgpack', 'yaml']
 )

+app.conf.beat_schedule = {
+    'check-3play-translations-every-30-seconds': {
+        'task': 'tasks.fetch_three_play_translations',
+        'schedule': 30.0,
+    },
+}
+
+
+@app.task(name='fetch_three_play_translations')
+def fetch_three_play_translations():
+    retrieve_three_play_translations()
+

 @app.task(name='worker_encode')
 def worker_task_fire(veda_id, encode_profile, jobid):

--- a/control/veda_deliver.py
+++ b/control/veda_deliver.py

 import datetime
-import ftplib
 import logging
-import os
 import shutil
-import sys
 from os.path import expanduser

 import boto
@@ -16,6 +13,7 @@ from boto.s3.key import Key
 from django.core.urlresolvers import reverse

 import veda_deliver_xuetang
+from control.veda_deliver_3play import ThreePLayMediaClient
 from control_env import *
 from veda_deliver_cielo import Cielo24Transcript
 from veda_deliver_youtube import DeliverYoutube
@@ -190,7 +188,6 @@ class VedaDelivery:

        self._CLEANUP()

-        self._THREEPLAY_UPLOAD()
        # Transcription Process
        # We only want to generate transcripts for `desktop_mp4` profile.
        if self.encode_profile == 'desktop_mp4' and self.video_query.process_transcription:
@@ -203,7 +200,6 @@ class VedaDelivery:
            if self.video_query.provider == TranscriptProvider.CIELO24:
                self.cielo24_transcription_flow()

-
    def _INFORM_INTAKE(self):
        """
        Collect all salient metadata and
@@ -574,48 +570,55 @@ class VedaDelivery:
        )
        cielo24.start_transcription_flow()

-    def _THREEPLAY_UPLOAD(self):
-
-        if self.video_query.inst_class.tp_proc is False:
-            return None
-        if self.video_query.inst_class.mobile_override is False:
-            if self.encode_profile != 'desktop_mp4':
-                return None
-
-        ftp1 = ftplib.FTP(
-            self.auth_dict['threeplay_ftphost']
-        )
-        user = self.video_query.inst_class.tp_username.strip()
-        passwd = self.video_query.inst_class.tp_password.strip()
+    def start_3play_transcription_process(self):
+        """
+        3PlayMedia Transcription Flow
+        """
        try:
-            ftp1.login(user, passwd)
-        except:
-            ErrorObject.print_error(
-                message='3Play Authentication Failure'
+            # Picks the first course from the list as there may be multiple
+            # course runs in that list (i.e. all having the same org).
+            org = utils.extract_course_org(self.video_proto.platform_course_url[0])
+            transcript_secrets = TranscriptPreferences.objects.get(org=org, provider=self.video_query.provider)
+
+            # update transcript status for video in edx-val
+            VALAPICall(video_proto=None, val_status=None).update_video_status(
+                self.video_query.studio_id, VideoStatus.TRANSCRIPTION_IN_PROGRESS
            )
-        try:
-            ftp1.cwd(
-                self.video_query.inst_class.tp_speed
+
+            # Initialize 3playMedia client and start transcription process
+            s3_video_url = build_url(
+                self.auth_dict['s3_base_url'],
+                self.auth_dict['edx_s3_endpoint_bucket'],
+                self.encoded_file
            )
-        except:
-            ftp1.mkd(
-                self.video_query.inst_class.tp_speed
+            callback_url = build_url(
+                self.auth_dict['veda_base_url'],
+                reverse(
+                    '3play_media_callback',
+                    args=[self.auth_dict['transcript_provider_request_token']]
+                ),
+                # Additional attributes that'll come back with the callback
+                org=org,
+                edx_video_id=self.video_query.studio_id,
            )
-            ftp1.cwd(
-                self.video_query.inst_class.tp_speed
+            three_play_media = ThreePLayMediaClient(
+                org=org,
+                video=self.video_query,
+                media_url=s3_video_url,
+                api_key=transcript_secrets.api_key,
+                api_secret=transcript_secrets.api_secret,
+                callback_url=callback_url,
+                turnaround_level=self.video_query.three_play_turnaround,
            )
-            os.chdir(self.node_work_directory)
+            three_play_media.generate_transcripts()

-        ftp1.storbinary(
-            'STOR ' + self.encoded_file,
-            open(os.path.join(
-                self.node_work_directory,
-                self.encoded_file
-            ), 'rb')
+        except TranscriptPreferences.DoesNotExist:
+            LOGGER.warning(
+                'Transcript preference is not found for provider=%s, video=%s',
+                self.video_query.provider,
+                self.video_query.studio_id,
            )

-        os.chdir(homedir)
-
    def _XUETANG_ROUTE(self):
        if self.video_query.inst_class.xuetang_proc is False:
            return None

--- a/control/veda_deliver_3play.py
+++ b/control/veda_deliver_3play.py
+"""
+3PlayMedia Transcription Client
+"""
+import json
+import logging
+import requests
+import sys
+
+from requests.packages.urllib3.exceptions import InsecurePlatformWarning
+from VEDA_OS01.models import TranscriptProcessMetadata, TranscriptProvider, TranscriptStatus
+from VEDA_OS01.utils import build_url
+
+requests.packages.urllib3.disable_warnings(InsecurePlatformWarning)
+
+LOGGER = logging.getLogger(__name__)
+
+
+class ThreePlayMediaError(Exception):
+    """
+    An error that occurs during 3PlayMedia actions.
+    """
+    pass
+
+
+class ThreePlayMediaLanguageNotFoundError(ThreePlayMediaError):
+    """
+    An error when language is not found in available 3playMedia languages.
+    """
+    pass
+
+
+class ThreePlayMediaPerformTranscriptionError(ThreePlayMediaError):
+    """
+    An error occurred while adding media for transcription.
+    """
+    pass
+
+
+class ThreePlayMediaUrlError(ThreePlayMediaError):
+    """
+    Occurs when the media url is either inaccessible or of invalid content type.
+    """
+    pass
+
+
+class ThreePLayMediaClient(object):
+
+    def __init__(self, org, video, media_url, api_key, api_secret, callback_url, turnaround_level):
+        """
+        Initialize 3play media client
+        """
+        self.org = org
+        self.video = video
+        self.media_url = media_url
+        self.api_key = api_key
+        self.api_secret = api_secret
+        self.callback_url = callback_url
+        self.turnaround_level = turnaround_level
+        # default attributes
+        self.base_url = u'https://api.3playmedia.com/'
+        self.upload_media_file_url = u'files/'
+        self.available_languages_url = u'caption_imports/available_languages/'
+        self.allowed_content_type = u'video/mp4'
+
+    def validate_media_url(self):
+        """
+        Validates the media URL
+
+        Raises:
+            3PlayMediaUrlError: on invalid media url or content type
+        """
+        if not self.media_url:
+            raise ThreePlayMediaUrlError('Invalid media URL "{media_url}".'.format(media_url=self.media_url))
+
+        response = requests.head(url=self.media_url)
+        if not response.ok:
+            raise ThreePlayMediaUrlError('The URL "{media_url}" is not Accessible.'.format(media_url=self.media_url))
+        elif response.headers['Content-Type'] != self.allowed_content_type:
+            raise ThreePlayMediaUrlError(
+                'Media content-type should be "{allowed_type}". URL was "{media_url}", content-type was "{type}"'.format(
+                    allowed_type=self.allowed_content_type,
+                    media_url=self.media_url,
+                    type=response.headers['Content-Type'],
+                )
+            )
+
+    def submit_media(self):
+        """
+        Submits the media to perform transcription.
+
+        Raises:
+            ThreePlayMediaPerformTranscriptionError: error while transcription process
+        """
+        self.validate_media_url()
+        # Prepare requests payload
+        payload = dict(
+            # Mandatory attributes required for transcription
+            link=self.media_url,
+            apikey=self.api_key,
+            api_secret_key=self.api_secret,
+            turnaround_level=self.turnaround_level,
+            callback_url=self.callback_url,
+        )
+        upload_url = build_url(self.base_url, self.upload_media_file_url)
+        response = requests.post(
+            url=upload_url,
+            data=json.dumps(payload),
+            headers={'Content-Type': 'application/json'}
+        )
+
+        if not response.ok:
+            raise ThreePlayMediaPerformTranscriptionError(
+                'Upload file request failed with: {response} -- {status}'.format(
+                    response=response.text, status=response.status_code
+                )
+            )
+
+        try:
+            # A normal response should be a text containing file id and if we're getting a deserializable dict, there
+            # must be an error: http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods
+            if isinstance(json.loads(response.text), dict):
+                raise ThreePlayMediaPerformTranscriptionError(
+                    'Expected file id but got: {response}'.format(response=response.text)
+                )
+        except ValueError:
+            return response.text
+
+    def generate_transcripts(self):
+        """
+        Kicks off transcription process for default language.
+        """
+        try:
+            file_id = self.submit_media()
+            # Track progress of transcription process
+            TranscriptProcessMetadata.objects.create(
+                video=self.video,
+                process_id=file_id,
+                lang_code=u'en',
+                provider=TranscriptProvider.THREE_PLAY,
+                status=TranscriptStatus.IN_PROGRESS,
+            )
+            # Successfully kicked off transcription process for a video with the given language.
+            LOGGER.info(
+                '[3PlayMedia] Transcription process has been started for video=%s, language=en.',
+                self.video.studio_id,
+            )
+        except ThreePlayMediaError:
+            LOGGER.exception(
+                '[3PlayMedia] Could not process transcripts for video=%s language=en.',
+                self.video.studio_id,
+            )
+        except Exception:
+            LOGGER.exception(
+                '[3PlayMedia] Unexpected error while transcription for video=%s language=en .',
+                self.video.studio_id,
+            )
+            raise
+
+
+def main():
+    pass
+
+
+if __name__ == '__main__':
+    sys.exit(main())