Add transcript preferences in Video and accomodate those preferences during file discovery

49a33707 · Qubad786 · muzaffaryousaf · f62e3515 · 49a33707 · 49a33707
Commit 49a33707 authored Aug 03, 2017 by Qubad786 Committed by muzaffaryousaf Aug 09, 2017
Showing with 339 additions and 85 deletions

.gitignore
+3 -0

VEDA_OS01/admin.py
+7 -1

VEDA_OS01/models.py
+226 -68

VEDA_OS01/serializers.py
+31 -1

control/veda_file_discovery.py
+42 -10

control/veda_file_ingest.py
+30 -5

No files found.
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,6 @@ sandbox.db
 .coverage
 coverage/
+# TODO remove this once migrations are shipped with the code
+*/migrations/*
--- a/VEDA_OS01/admin.py
+++ b/VEDA_OS01/admin.py
@@ -34,7 +34,13 @@ class VideoAdmin(admin.ModelAdmin):
        'studio_id',
        'video_trans_start',
        'video_trans_status',
-        'video_active'
+        'video_active',
+        'process_transcription',
+        'provider',
+        'three_play_turnaround',
+        'cielo24_turnaround',
+        'cielo24_fidelity',
+        'preferred_languages',
    ]
    list_filter = ['inst_class__institution']
    search_fields = ['edx_id', 'client_title', 'studio_id']

--- a/VEDA_OS01/models.py
+++ b/VEDA_OS01/models.py
--- a/VEDA_OS01/serializers.py
+++ b/VEDA_OS01/serializers.py
@@ -70,7 +70,13 @@ class VideoSerializer(serializers.ModelSerializer):
            'video_trans_start',
            'video_trans_end',
            'video_trans_status',
-            'video_glacierid'
+            'video_glacierid',
+            'process_transcription',
+            'provider',
+            'three_play_turnaround',
+            'cielo24_turnaround',
+            'cielo24_fidelity',
+            'preferred_languages',
        )
    def create(self, validated_data):
@@ -122,6 +128,30 @@ class VideoSerializer(serializers.ModelSerializer):
            'video_glacierid',
            instance.video_glacierid
        )
+        instance.process_transcription = validated_data.get(
+            'process_transcription',
+            instance.process_transcription
+        )
+        instance.provider = validated_data.get(
+            'provider',
+            instance.provider
+        )
+        instance.three_play_turnaround = validated_data.get(
+            'three_play_turnaround',
+            instance.three_play_turnaround
+        )
+        instance.cielo24_turnaround = validated_data.get(
+            'cielo24_turnaround',
+            instance.cielo24_turnaround
+        )
+        instance.cielo24_fidelity = validated_data.get(
+            'cielo24_fidelity',
+            instance.cielo24_fidelity
+        )
+        instance.preferred_languages = validated_data.get(
+            'preferred_languages',
+            instance.preferred_languages
+        )
        instance.save()
        return instance

--- a/control/veda_file_discovery.py
+++ b/control/veda_file_discovery.py
+import json
+import logging
 import os.path
 import boto
 import yaml
 from boto.s3.connection import S3Connection
 import newrelic.agent
+from VEDA_OS01.models import TranscriptPreferences
 try:
    boto.config.add_section('Boto')
 except:
@@ -32,6 +36,8 @@ from veda_utils import ErrorObject
 from veda_file_ingest import VideoProto, VedaIngest
 from veda_val import VALAPICall
+LOGGER = logging.getLogger(__name__)
 class FileDiscovery():
@@ -182,6 +188,7 @@ class FileDiscovery():
        client_title = meta.get_metadata('client_video_id')
        course_hex = meta.get_metadata('course_video_upload_token')
        course_url = meta.get_metadata('course_key')
+        transcript_preferences = meta.get_metadata('transcript_preferences')
        edx_filename = key.name[::-1].split('/')[0][::-1]
        if len(course_hex) == 0:
@@ -252,24 +259,49 @@ class FileDiscovery():
            key.delete()
            return None
-        """
+        # Make decision if this video needs the transcription as well.
-        Trigger Ingest Process
+        try:
-        """
+            transcript_preferences = json.loads(transcript_preferences)
-        V = VideoProto(
+            TranscriptPreferences.objects.get(
+                # TODO: Once ammar is done with cielo24.
+                # org=extract_course_org(course_url),
+                org=transcript_preferences.get('org'),
+                provider=transcript_preferences.get('provider')
+            )
+            process_transcription = True
+        except (TypeError, TranscriptPreferences.DoesNotExist):
+            # when the preferences are not set OR these are set to some data in invalid format OR these don't
+            # have associated 3rd party transcription provider API keys.
+            process_transcription = False
+        except ValueError:
+            LOGGER.error('[VIDEO-PIPELINE] File Discovery - Invalid transcripts preferences=%s', transcript_preferences)
+            process_transcription = False
+        # Trigger Ingest Process
+        video_metadata = dict(
            s3_filename=edx_filename,
            client_title=client_title,
            file_extension=file_extension,
-            platform_course_url=course_url
+            platform_course_url=course_url,
        )
+        if process_transcription:
-        I = VedaIngest(
+            video_metadata.update({
+                'process_transcription': process_transcription,
+                'provider': transcript_preferences.get('provider'),
+                'three_play_turnaround': transcript_preferences.get('three_play_turnaround'),
+                'cielo24_turnaround': transcript_preferences.get('cielo24_turnaround'),
+                'cielo24_fidelity': transcript_preferences.get('cielo24_fidelity'),
+                'preferred_languages': transcript_preferences.get('preferred_languages'),
+            })
+        ingest = VedaIngest(
            course_object=course_query[0],
-            video_proto=V,
+            video_proto=VideoProto(**video_metadata),
            node_work_directory=self.node_work_directory
        )
-        I.insert()
+        ingest.insert()
-        if I.complete is False:
+        if ingest.complete is False:
            return None
        """

--- a/control/veda_file_ingest.py
+++ b/control/veda_file_ingest.py
+import logging
 import os
 import sys
 import subprocess
@@ -7,6 +7,7 @@ from datetime import timedelta
 import time
 import fnmatch
 import django
+from django.db.utils import DatabaseError
 from django.utils.timezone import utc
 from django.db import reset_queries
 import uuid
@@ -39,6 +40,8 @@ from veda_val import VALAPICall
 from veda_encode import VedaEncode
 import celeryapp
+LOGGER = logging.getLogger(__name__)
 '''
 V = VideoProto(
    s3_filename=edx_filename,
@@ -66,9 +69,16 @@ class VideoProto():
        self.file_extension = kwargs.get('file_extension', None)
        self.platform_course_url = kwargs.get('platform_course_url', None)
        self.abvid_serial = kwargs.get('abvid_serial', None)
-        """
-        Determined Attrib
+        # Transcription Process related Attributes
-        """
+        self.process_transcription = kwargs.get('process_transcription', False)
+        self.provider = kwargs.get('provider', None)
+        self.three_play_turnaround = kwargs.get('three_play_turnaround', None)
+        self.cielo24_turnaround = kwargs.get('cielo24_turnaround', None)
+        self.cielo24_fidelity = kwargs.get('cielo24_fidelity', None)
+        self.preferred_languages = kwargs.get('preferred_languages', [])
+        # Determined Attributes
        self.valid = False
        self.filesize = 0
        self.duration = 0
@@ -342,6 +352,15 @@ class VedaIngest:
            self.complete = True
            return None
+        # Update transcription preferences for the Video
+        if self.video_proto.process_transcription:
+            v1.process_transcription = self.video_proto.process_transcription
+            v1.provider = self.video_proto.provider
+            v1.three_play_turnaround = self.video_proto.three_play_turnaround
+            v1.cielo24_turnaround = self.video_proto.cielo24_turnaround
+            v1.cielo24_fidelity = self.video_proto.cielo24_fidelity
+            v1.preferred_languages = self.video_proto.preferred_languages
        """
        Files Below are all valid
        """
@@ -362,7 +381,8 @@ class VedaIngest:
        """
        try:
            v1.save()
-        except:
+        except DatabaseError:
+            # in case if the client title's length is too long
            char_string = self.video_proto.client_title
            string_len = len(char_string)
            s1 = 0
@@ -377,6 +397,11 @@ class VedaIngest:
            v1.client_title = final_string
            v1.save()
+        except Exception:
+            # Log the exception and raise.
+            LOGGER.exception('[VIDEO-PIPELINE] File Ingest - Cataloging of video=%s failed.', self.video_proto.veda_id)
+            raise
    def val_insert(self):
        if self.video_proto.abvid_serial is not None:
            return None