Merge pull request #80 from edx/ammar/improve-val-status-update-logic

Start transcription once video encodings(except for "review" and "youtube") get ready

Merge pull request #80 from edx/ammar/improve-val-status-update-logic
Start transcription once video encodings(except for "review" and "youtube") get ready
192f378e · M. Rehan · GitHub · 9bb16501 · 1b821369 · 192f378e
Unverified Commit 192f378e authored Feb 08, 2018 by M. Rehan Committed by GitHub Feb 08, 2018
Showing with 253 additions and 18 deletions

VEDA_OS01/tests/factories.py
+88 -0

VEDA_OS01/tests/test_utils.py
+68 -2

VEDA_OS01/utils.py
+52 -2

control/tests/test_deliver.py
+1 -0

control/veda_deliver.py
+38 -9

test_requirements.txt
+6 -5

No files found.
--- a/VEDA_OS01/tests/factories.py
+++ b/VEDA_OS01/tests/factories.py
+"""
+VEDA model factories.
+"""
+from factory import Sequence, SubFactory
+from factory.django import DjangoModelFactory
+
+from VEDA_OS01.models import Course, Destination, Encode, TranscriptStatus, URL, Video, VideoStatus
+
+
+class CourseFactory(DjangoModelFactory):
+    """
+    Course data model factory.
+    """
+    class Meta(object):
+        model = Course
+
+    course_name = Sequence('Test Course {0}'.format)
+    institution = Sequence('INST-{0}'.format)
+    edx_classid = Sequence('CLASS-{0}'.format)
+    semesterid = Sequence('2018-{0}'.format)
+    proc_loc = False
+    review_proc = False
+    yt_proc = False
+    s3_proc = False
+    local_storedir = None
+
+
+class VideoFactory(DjangoModelFactory):
+    """
+    Video data model factory.
+    """
+    class Meta(object):
+        model = Video
+
+    inst_class = SubFactory(CourseFactory)
+    client_title = Sequence('Video {0}'.format)
+    edx_id = Sequence('ABC-CDE-EFG-{0}'.format)
+    studio_id = Sequence('61bd0526{0}'.format)
+    video_trans_status = VideoStatus.SI
+    transcript_status = TranscriptStatus.NOT_APPLICABLE
+    process_transcription = False
+    provider = None
+    three_play_turnaround = None
+    cielo24_turnaround = None
+    cielo24_fidelity = None
+    source_language = None
+    preferred_languages = []
+
+
+class DestinationFactory(DjangoModelFactory):
+    """
+    Destination data model factory.
+    """
+    class Meta(object):
+        model = Destination
+
+    destination_name = Sequence('Dest-{0}'.format)
+    destination_active = False
+    destination_nick = Sequence('D{0}'.format)
+
+
+class EncodeFactory(DjangoModelFactory):
+    """
+    Encode data model factory.
+    """
+    class Meta(object):
+        model = Encode
+
+    encode_destination = SubFactory(DestinationFactory)
+    encode_name = Sequence('Encode-{0}'.format)
+    profile_active = False
+    encode_suffix = ''
+    encode_filetype = 'mp4'
+    encode_bitdepth = None
+    encode_resolution = None
+    product_spec = None
+
+
+class UrlFactory(DjangoModelFactory):
+    """
+    URL data model factory.
+    """
+    class Meta(object):
+        model = URL
+
+    encode_profile = SubFactory(EncodeFactory)
+    videoID = SubFactory(VideoFactory)
+    encode_url = Sequence('https://www.querty.com/{0}'.format)
--- a/VEDA_OS01/tests/test_utils.py
+++ b/VEDA_OS01/tests/test_utils.py
@@ -5,12 +5,13 @@ from unittest import TestCase

 from ddt import data, ddt, unpack
 from django.conf import settings
-from django.test import override_settings
+from django.test import override_settings, TransactionTestCase
 from mock import MagicMock, Mock

 from VEDA_OS01 import utils
 from VEDA_OS01.models import TranscriptCredentials
-
+from VEDA_OS01.tests.factories import CourseFactory, DestinationFactory, EncodeFactory, VideoFactory, UrlFactory
+from VEDA_OS01.utils import get_incomplete_encodes, is_video_ready

 OLD_FERNET_KEYS_LIST = ['test-ferent-key']

@@ -61,3 +62,68 @@ class UtilTests(TestCase):
        with override_settings(FERNET_KEYS=new_keys_set):
            self.assertEqual(settings.FERNET_KEYS, new_keys_set)
            verify_model_field_keys(TranscriptCredentials, 'api_key', new_keys_set)
+
+
+class EncodeUtilsTest(TransactionTestCase):
+    """
+    Tests for video encode utils
+    """
+
+    def setUp(self):
+        # Setup test courses
+        course1 = CourseFactory(review_proc=True, yt_proc=True, s3_proc=True)
+        course2 = CourseFactory(review_proc=False, yt_proc=True, s3_proc=True)
+        course3 = CourseFactory(review_proc=True, yt_proc=True, s3_proc=False)
+
+        # Setup test encode profiles
+        destination = DestinationFactory(destination_active=True)
+        encode1 = EncodeFactory(encode_destination=destination, product_spec='desktop_mp4', profile_active=True)
+        encode2 = EncodeFactory(encode_destination=destination, product_spec='review', profile_active=True)
+        encode3 = EncodeFactory(encode_destination=destination, product_spec='mobile_low', profile_active=True)
+        encode4 = EncodeFactory(encode_destination=destination, product_spec='audio_mp3', profile_active=True)
+        encode5 = EncodeFactory(encode_destination=destination, product_spec='hls', profile_active=False)
+        encode6 = EncodeFactory(encode_destination=destination, product_spec='youtube', profile_active=True)
+
+        # Setup videos
+        self.video1 = VideoFactory(inst_class=course1)
+        self.video2 = VideoFactory(inst_class=course2)
+        self.video3 = VideoFactory(inst_class=course3)
+
+        # Setup urls for video1
+        UrlFactory(encode_profile=encode1, videoID=self.video1)
+        UrlFactory(encode_profile=encode2, videoID=self.video1)
+        UrlFactory(encode_profile=encode3, videoID=self.video1)
+        UrlFactory(encode_profile=encode4, videoID=self.video1)
+        UrlFactory(encode_profile=encode5, videoID=self.video1)
+        UrlFactory(encode_profile=encode6, videoID=self.video1)
+
+        # Setup urls for video2
+        UrlFactory(encode_profile=encode1, videoID=self.video2)
+        UrlFactory(encode_profile=encode3, videoID=self.video2)
+        UrlFactory(encode_profile=encode6, videoID=self.video2)
+
+        # Setup urls for video3
+        UrlFactory(encode_profile=encode6, videoID=self.video3)
+
+    def test_get_incomplete_encodes_invalid_video(self):
+        """
+        Tests that `get_incomplete_encodes` returns an empty list with non existent video id.
+        """
+        self.assertEqual(get_incomplete_encodes(u'non-existent-id'), [])
+
+    def test_get_incomplete_encodes(self):
+        """
+        Tests that `get_incomplete_encodes` works as expected.
+        """
+        self.assertEqual(get_incomplete_encodes(self.video1.edx_id), [])
+        self.assertEqual(get_incomplete_encodes(self.video2.edx_id), ['audio_mp3'])
+        self.assertEqual(get_incomplete_encodes(self.video3.edx_id), ['review'])
+
+    def test_is_video_ready(self):
+        """
+        Tests that `is_video_ready` works as expected.
+        """
+        self.assertTrue(is_video_ready(self.video1.edx_id))
+        self.assertFalse(is_video_ready(self.video2.edx_id))
+        self.assertTrue(is_video_ready(self.video2.edx_id, ignore_encodes=['audio_mp3']))
+        self.assertTrue(is_video_ready(self.video3.edx_id, ignore_encodes=['review', 'abc_encode']))
--- a/VEDA_OS01/utils.py
+++ b/VEDA_OS01/utils.py
 """
 Common utils.
 """
-
-from VEDA_OS01.models import TranscriptStatus
+from VEDA.utils import get_config
+from VEDA_OS01.models import Encode, TranscriptStatus, URL, Video


 class ValTranscriptStatus(object):
@@ -57,3 +57,53 @@ def invalidate_fernet_cached_properties(model, fields):
            del field.fernet
        except AttributeError:
            pass
+
+
+def get_incomplete_encodes(edx_id):
+    """
+    Get incomplete encodes for the given video.
+
+    Arguments:
+        edx_id(unicode): an ID identifying the VEDA video.
+    """
+    encode_list = []
+    try:
+        video = Video.objects.filter(edx_id=edx_id).latest()
+    except Video.DoesNotExist:
+        return encode_list
+
+    course = video.inst_class
+    # Pick the encodes map from the settings.
+    encodes_map = get_config().get('encode_dict', {})
+    # Active encodes according to course instance.
+    for attr, encodes in encodes_map.iteritems():
+        if getattr(course, attr, False):
+            encode_list += [encode.strip() for encode in encodes]
+
+    # Filter active encodes further according to their corresponding encode profiles activation.
+    for encode in list(encode_list):
+        encode_profile = Encode.objects.filter(product_spec=encode).first()
+        if not encode_profile or (encode_profile and not encode_profile.profile_active):
+            encode_list.remove(encode)
+
+    # Filter encodes based on their successful encoding for the specified video.
+    for encode in list(encode_list):
+        completed_encode_profile = URL.objects.filter(
+            videoID=video,
+            encode_profile__product_spec=encode
+        )
+        if completed_encode_profile.exists():
+            encode_list.remove(encode)
+
+    return encode_list
+
+
+def is_video_ready(edx_id, ignore_encodes=list()):
+    """
+    Check whether a video should be considered ready.
+
+    Arguments:
+        edx_id(unicode): An ID identifying the VEDA video.
+        ignore_encodes(list): A list containing the profiles that should not be considered.
+    """
+    return set(get_incomplete_encodes(edx_id)).issubset(set(ignore_encodes))
--- a/control/tests/test_deliver.py
+++ b/control/tests/test_deliver.py
@@ -141,6 +141,7 @@ class VedaDeliverRunTest(TestCase):
            videoID=self.video,
            encode_url='Test_URL'
        )
+
        self.assertEqual(self.deliver_instance._DETERMINE_STATUS(), 'Complete')

    def test_validate_url(self):

--- a/control/veda_deliver.py
+++ b/control/veda_deliver.py
@@ -100,8 +100,14 @@ class VedaDelivery:
        """
        if self.encode_profile == 'youtube':
            self._CLEANUP()
+            # We only want to generate transcripts when all the encodings(except for YT and Review) are done.
+            if utils.is_video_ready(self.video_query.edx_id, ignore_encodes=['review', 'youtube']):
+                self.start_transcription()
            return None
+
        if self.encode_profile == 'review':
+            # No need to start transcription here separately as the `self.encode_profile == 'youtube'`
+            # will take care for this encode profile as well.
            return None

        if self.auth_dict['edx_cloudfront_prefix'] is not None:
@@ -126,17 +132,34 @@ class VedaDelivery:
        self._UPDATE_DATA()
        self._CLEANUP()

-        # Transcription Process
-        # We only want to generate transcripts for `desktop_mp4` profile.
-        if self.encode_profile == 'desktop_mp4' and self.video_query.process_transcription:
+        # We only want to generate transcripts when all the encodings(except for YT and Review) are done.
+        if utils.is_video_ready(self.video_query.edx_id, ignore_encodes=['review', 'youtube']):
+            self.start_transcription()
+
+    def start_transcription(self):
+        """
+        Kick off the transcription process.
+
+        NOTE: Transcription should be started without waiting for YT/Review encodings.
+        """
+        if self.video_query.process_transcription:
+            encode_query = Encode.objects.get(
+                product_spec='desktop_mp4'
+            )
+
+            encoded_file = u'{video_id}_{suffix}.{ext}'.format(
+                video_id=self.veda_id,
+                suffix=encode_query.encode_suffix,
+                ext=encode_query.encode_filetype
+            )

            # 3PlayMedia
            if self.video_query.provider == TranscriptProvider.THREE_PLAY:
-                self.start_3play_transcription_process()
+                self.start_3play_transcription_process(encoded_file)

            # Cielo24
            if self.video_query.provider == TranscriptProvider.CIELO24:
-                self.cielo24_transcription_flow()
+                self.cielo24_transcription_flow(encoded_file)

    def hls_run(self):
        """
@@ -499,9 +522,12 @@ class VedaDelivery:
        os.chdir(homedir)
        return True

-    def cielo24_transcription_flow(self):
+    def cielo24_transcription_flow(self, encoded_file):
        """
        Cielo24 transcription flow.
+
+        Arguments:
+            encoded_file (str): name of encoded file to construct video url
        """
        org = extract_course_org(self.video_proto.platform_course_url[0])

@@ -514,7 +540,7 @@ class VedaDelivery:
        s3_video_url = build_url(
            self.auth_dict['s3_base_url'],
            self.auth_dict['edx_s3_endpoint_bucket'],
-            self.encoded_file
+            encoded_file
        )

        callback_base_url = build_url(
@@ -546,9 +572,12 @@ class VedaDelivery:
        )
        cielo24.start_transcription_flow()

-    def start_3play_transcription_process(self):
+    def start_3play_transcription_process(self, encoded_file):
        """
        3PlayMedia Transcription Flow
+
+        Arguments:
+            encoded_file (str): name of encoded file to construct video url
        """
        try:
            # Picks the first course from the list as there may be multiple
@@ -568,7 +597,7 @@ class VedaDelivery:
            s3_video_url = build_url(
                self.auth_dict['s3_base_url'],
                self.auth_dict['edx_s3_endpoint_bucket'],
-                self.encoded_file
+                encoded_file
            )
            callback_url = build_url(
                self.auth_dict['veda_base_url'],

--- a/test_requirements.txt
+++ b/test_requirements.txt
 ## NOTE:  Test requirements.
 codecov==2.0.9
-pep8==1.7.0
 coverage==4.2
-isort==4.2.15
 ddt==1.1.1
+edx-lint==0.5.4
+factory_boy==2.10.0
+isort==4.2.15
 moto==1.0.1
-responses==0.6.1
+Paver==1.2.4
+pep8==1.7.0
 pytest==3.0.6
 pytest-django==3.1.2
 pytest-django-ordering==1.0.1
-Paver==1.2.4
 pylint==1.7.1
 pylint-celery==0.3
 pylint-django==0.7.2
-edx-lint==0.5.4
+responses==0.6.1