Commit 79a90655 by Muzaffar yousaf Committed by GitHub

Merge pull request #22 from edx/transcripts-3rd-party-integration

Transcripts 3rd party integration
parents 5f53d133 a3a193b9
# .coveragerc for edx-video-pipeline
[run]
data_file = reports/.coverage
data_file = .coverage
source =
VEDA
VEDA_OS01
control
frontend
youtube_callback
scripts
# do not calculate coverage for these for now
# youtube_callback
# scripts
omit =
templates/*
frontend/tests/*
dependencies/*
control/*
control/tests/*
VEDA/tests/*
VEDA_OS01/tests/*
VEDA_OS01/migrations/*
VEDA_OS01/admin.py
concurrency=multiprocessing
......@@ -28,5 +35,3 @@ exclude_lines =
title = edx-video-worker Python Test Coverage Report
directory = reports/cover
[xml]
output = reports/coverage.xml
......@@ -3,10 +3,16 @@
*.pyc
static/admin/
static/
sandbox.db
.coverage
coverage/
reports/
.cache/
VEDA/private.py
......@@ -5,13 +5,14 @@ python:
sudo: required
before_install:
- export BOTO_CONFIG=/dev/null
install:
- pip install -r requirements.txt
- pip install -r test_requirements.txt
- make requirements
# build tests
script:
- make validate
- make validate
after_success:
- pip install -U codecov
......
Mushtaq Ali <mushtaak@gmail.com>
Muhammad Ammar <mammar@gmail.com>
PACKAGES = VEDA VEDA_OS01 control frontend youtube_callback scripts
requirements:
pip install -r requirements.txt
pip install -r test_requirements.txt
validate: test ## Run tests and quality checks
test: clean
nosetests --with-coverage --cover-inclusive --cover-branches \
--cover-html --cover-html-dir=build/coverage/html/ \
--cover-xml --cover-xml-file=build/coverage/coverage.xml --verbosity=2 \
$(foreach package,$(PACKAGES),--cover-package=$(package)) \
$(PACKAGES)
coverage run -m pytest --durations=10
coverage combine
coverage report
clean:
coverage erase
......
......@@ -21,5 +21,5 @@ with open(read_yaml, 'r') as stream:
DJANGO_SECRET_KEY = return_dict['django_secret_key'] or 'test_secret_key'
DJANGO_ADMIN = ('', '')
DJANGO_DEBUG = return_dict['debug']
DJANGO_DEBUG = return_dict['debug'] if 'debug' in return_dict else False
DATABASES = return_dict['DATABASES']
......@@ -2,6 +2,8 @@
Settings
"""
from os.path import join, dirname, abspath
DATABASES = None
import os
......@@ -138,6 +140,7 @@ INSTALLED_APPS = (
'rest_framework.authtoken',
'oauth2_provider',
'rest_framework',
'django_filters',
'corsheaders',
'frontend',
'VEDA_OS01',
......@@ -175,3 +178,7 @@ LOGGING = {
},
}
}
# See if the developer has any local overrides.
if os.path.isfile(join(dirname(abspath(__file__)), 'private.py')):
from .private import * # pylint: disable=import-error, wildcard-import
"""
Test Settings
"""
from settings import *
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': 'pipeline.db',
}
}
......@@ -10,7 +10,7 @@ from rest_framework import routers
from django.conf.urls import patterns, include, url
from django.contrib import admin
from VEDA_OS01 import views
from VEDA_OS01 import views, transcripts
router = routers.DefaultRouter()
admin.autodiscover()
......@@ -33,5 +33,16 @@ urlpatterns = [
url(r'^api/', include(router.urls)),
url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework')),
# Cheap auth server
url(r'^veda_auth/', views.token_auth)
url(r'^veda_auth/', views.token_auth),
url(
regex=r'^cielo24/transcript_completed/(?P<token>[\w]+)$',
view=transcripts.Cielo24CallbackHandlerView.as_view(),
name='cielo24_transcript_completed'
),
# 3PlayMedia callback handler view
url(
regex=r'^3playmedia/transcripts/handle/(?P<token>[\w]+)$',
view=transcripts.ThreePlayMediaCallbackHandlerView.as_view(),
name='3play_media_callback'
)
]
from django.contrib import admin
from VEDA_OS01.models import Course, Video, Encode, URL, Destination, Institution, VedaUpload
from VEDA_OS01.models import (
Course, Video, Encode, URL, Destination, Institution, VedaUpload,
TranscriptCredentials, TranscriptProcessMetadata
)
class CourseAdmin(admin.ModelAdmin):
......@@ -30,7 +34,15 @@ class VideoAdmin(admin.ModelAdmin):
'studio_id',
'video_trans_start',
'video_trans_status',
'video_active'
'transcript_status',
'video_active',
'process_transcription',
'source_language',
'provider',
'three_play_turnaround',
'cielo24_turnaround',
'cielo24_fidelity',
'preferred_languages',
]
list_filter = ['inst_class__institution']
search_fields = ['edx_id', 'client_title', 'studio_id']
......@@ -101,6 +113,14 @@ class VideoUploadAdmin(admin.ModelAdmin):
]
class TranscriptCredentialsAdmin(admin.ModelAdmin):
model = TranscriptCredentials
class TranscriptProcessMetadataAdmin(admin.ModelAdmin):
model = TranscriptProcessMetadata
admin.site.register(Course, CourseAdmin)
admin.site.register(Video, VideoAdmin)
admin.site.register(Encode, EncodeAdmin)
......@@ -108,3 +128,5 @@ admin.site.register(URL, URLAdmin)
admin.site.register(Destination, DestinationAdmin)
admin.site.register(Institution, InstitutionAdmin)
admin.site.register(VedaUpload, VideoUploadAdmin)
admin.site.register(TranscriptCredentials, TranscriptCredentialsAdmin)
admin.site.register(TranscriptProcessMetadata, TranscriptProcessMetadataAdmin)
# -*- coding: utf-8 -*-
# Generated by Django 1.9 on 2017-10-16 12:11
from __future__ import unicode_literals
import VEDA_OS01.models
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
import model_utils.fields
class Migration(migrations.Migration):
dependencies = [
('VEDA_OS01', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='TranscriptCredentials',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')),
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')),
('org', models.CharField(help_text=b'This value must match the value of organization in studio/edx-platform.', max_length=50, verbose_name=b'Organization')),
('provider', models.CharField(choices=[(b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')], max_length=50, verbose_name=b'Transcript provider')),
('api_key', models.CharField(max_length=255, verbose_name=b'API key')),
('api_secret', models.CharField(blank=True, max_length=255, null=True, verbose_name=b'API secret')),
],
options={
'verbose_name_plural': 'Transcript Credentials',
},
),
migrations.CreateModel(
name='TranscriptProcessMetadata',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')),
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')),
('provider', models.CharField(choices=[(b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')], max_length=50, verbose_name=b'Transcript provider')),
('process_id', models.CharField(max_length=255, verbose_name=b'Process id')),
('translation_id', models.CharField(blank=True, help_text=b'Keeps track of 3Play Translations', max_length=255, null=True, verbose_name=b'Translation id')),
('lang_code', models.CharField(max_length=50, verbose_name=b'Language code')),
('status', models.CharField(choices=[(b'N/A', b'N/A'), (b'PENDING', b'PENDING'), (b'IN PROGRESS', b'IN PROGRESS'), (b'FAILED', b'FAILED'), (b'READY', b'READY')], default=b'PENDING', max_length=50, verbose_name=b'Transcript status')),
],
options={
'get_latest_by': 'modified',
'verbose_name_plural': 'Transcript process metadata',
},
),
migrations.AddField(
model_name='video',
name='cielo24_fidelity',
field=models.CharField(blank=True, choices=[(b'MECHANICAL', b'Mechanical, 75% Accuracy'), (b'PREMIUM', b'Premium, 95% Accuracy'), (b'PROFESSIONAL', b'Professional, 99% Accuracy')], max_length=20, null=True, verbose_name=b'Cielo24 Fidelity'),
),
migrations.AddField(
model_name='video',
name='cielo24_turnaround',
field=models.CharField(blank=True, choices=[(b'STANDARD', b'Standard, 48h'), (b'PRIORITY', b'Priority, 24h')], max_length=20, null=True, verbose_name=b'Cielo24 Turnaround'),
),
migrations.AddField(
model_name='video',
name='preferred_languages',
field=VEDA_OS01.models.ListField(blank=True, default=[]),
),
migrations.AddField(
model_name='video',
name='process_transcription',
field=models.BooleanField(default=False, verbose_name=b'Process transcripts from Cielo24/3PlayMedia'),
),
migrations.AddField(
model_name='video',
name='provider',
field=models.CharField(blank=True, choices=[(b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')], max_length=20, null=True, verbose_name=b'Transcription provider'),
),
migrations.AddField(
model_name='video',
name='source_language',
field=models.CharField(blank=True, help_text=b'This is video speech language.', max_length=50, null=True, verbose_name=b'video source language'),
),
migrations.AddField(
model_name='video',
name='three_play_turnaround',
field=models.CharField(blank=True, choices=[(b'extended_service', b'10-Day/Extended'), (b'default', b'4-Day/Default'), (b'expedited_service', b'2-Day/Expedited'), (b'rush_service', b'24 hour/Rush'), (b'same_day_service', b'Same Day')], max_length=20, null=True, verbose_name=b'3PlayMedia Turnaround'),
),
migrations.AddField(
model_name='video',
name='transcript_status',
field=models.CharField(choices=[(b'N/A', b'N/A'), (b'PENDING', b'PENDING'), (b'IN PROGRESS', b'IN PROGRESS'), (b'FAILED', b'FAILED'), (b'READY', b'READY')], default=b'N/A', max_length=100, verbose_name=b'Transcription Status'),
),
migrations.AddField(
model_name='transcriptprocessmetadata',
name='video',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='transcript_processes', to='VEDA_OS01.Video'),
),
migrations.AlterUniqueTogether(
name='transcriptcredentials',
unique_together=set([('org', 'provider')]),
),
]
"""
Models for Video Pipeline
"""
import json
import uuid
from django.db import models
from model_utils.models import TimeStampedModel
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import CourseKey
def _createHex():
return uuid.uuid1().hex
class TranscriptProvider(object):
"""
3rd party transcript providers.
"""
THREE_PLAY = '3PlayMedia'
CIELO24 = 'Cielo24'
CHOICES = (
(THREE_PLAY, THREE_PLAY),
(CIELO24, CIELO24),
)
class TranscriptStatus(object):
"""
Transcript statuses.
Note:
If this status is 'PENDING' for:
1. Video, then transcription is applicable to a video but its not started yet.
2. TranscriptProcessMetadata, then transcript/translation process is created but its not actually
sent for processing to the 3rd Party Transcription Services.
"""
NOT_APPLICABLE = 'N/A'
PENDING = 'PENDING'
IN_PROGRESS = 'IN PROGRESS'
FAILED = 'FAILED'
READY = 'READY'
CHOICES = (
(NOT_APPLICABLE, NOT_APPLICABLE),
(PENDING, PENDING),
(IN_PROGRESS, IN_PROGRESS),
(FAILED, FAILED),
(READY, READY)
)
class Cielo24Turnaround(object):
"""
Turnaround Enumeration.
Its the time taken by Cielo24 transcription process.
"""
STANDARD = 'STANDARD'
PRIORITY = 'PRIORITY'
CHOICES = (
(STANDARD, 'Standard, 48h'),
(PRIORITY, 'Priority, 24h'),
)
class Cielo24Fidelity(object):
"""
Fidelity Enumeration.
This decides transcript's accuracy and supported languages.
"""
MECHANICAL = 'MECHANICAL'
PREMIUM = 'PREMIUM'
PROFESSIONAL = 'PROFESSIONAL'
CHOICES = (
(MECHANICAL, 'Mechanical, 75% Accuracy'),
(PREMIUM, 'Premium, 95% Accuracy'),
(PROFESSIONAL, 'Professional, 99% Accuracy'),
)
class ThreePlayTurnaround(object):
"""
Turnaround Enumeration.
Its the time taken by 3PlayMedia transcription process.
"""
EXTENDED_SERVICE = 'extended_service'
DEFAULT = 'default'
EXPEDITED_SERVICE = 'expedited_service'
RUSH_SERVICE = 'rush_service'
SAME_DAY_SERVICE = 'same_day_service'
CHOICES = (
(EXTENDED_SERVICE, '10-Day/Extended'),
(DEFAULT, '4-Day/Default'),
(EXPEDITED_SERVICE, '2-Day/Expedited'),
(RUSH_SERVICE, '24 hour/Rush'),
(SAME_DAY_SERVICE, 'Same Day'),
)
class VideoStatus(object):
"""
Video Status Enumeration
TODO: STATUS REMODEL:
Change to
'Ingest',
'Queued',
'In Progress',
'Corrupt',
'Complete',
'Error',
'Duplicate',
'Review',
'Reject'
Possibles:
'Invalid' (for ingest detected)
'Retry'
'Delivery' (for celery states?)
"""
SI = 'Ingest'
TQ = 'Transcode Queue'
AT = 'Active Transcode'
TR = 'Transcode Retry'
TC = 'Transcode Complete'
DU = 'Deliverable Upload'
FC = 'File Complete'
TE = 'Transcode Error'
CF = 'Corrupt File'
RH = 'Review Hold'
RR = 'Review Reject'
RP = 'Final Publish'
YD = 'Youtube Duplicate'
QUEUE = 'Queue'
PROGRESS = 'Progress'
COMPLETE = 'Complete'
CHOICES = (
(SI, 'System Ingest'),
(TQ, 'Transcode Queue'),
(AT, 'Active Transcode'),
(TR, 'Transcode Retry'),
(TC, 'Transcode Complete'),
(DU, 'Deliverable Upload'),
(FC, 'File Complete'),
(TE, 'Transcode Error'),
(CF, 'Corrupt File on Ingest'),
(RH, 'Review Hold'),
(RR, 'Review Rejected'),
(RP, 'Review to Final Publish'),
(YD, 'Youtube Duplicate'),
(QUEUE, 'In Encode Queue'),
(PROGRESS, 'In Progress'),
(COMPLETE, 'Complete'),
)
class ListField(models.TextField):
"""
A List Field which can be used to store and retrieve pythonic list of strings.
"""
def get_prep_value(self, value):
"""
Converts a list to its json representation to store in database as text.
"""
if value and not isinstance(value, list):
raise ValueError(u'The given value {} is not a list.'.format(value))
return json.dumps(self.validate_list(value) or [])
def from_db_value(self, value, expression, connection, context):
"""
Converts a json list representation in a database to a python object.
"""
return self.to_python(value)
def to_python(self, value):
"""
Converts the value into a list.
"""
if not value:
value = []
# If a list is set then validated its items
if isinstance(value, list):
py_list = self.validate_list(value)
else: # try to de-serialize value and expect list and then validate
try:
py_list = json.loads(value)
if not isinstance(py_list, list):
raise TypeError
self.validate_list(py_list)
except (ValueError, TypeError):
raise ValueError(u'Must be a valid list of strings.')
return py_list
def validate_list(self, value):
"""
Validate the data before saving into the database.
Arguments:
value(list): list to be validated
Returns:
A list if validation is successful
Raises:
ValidationError
"""
if all(isinstance(item, basestring) for item in value) is False:
raise ValueError(u'list must only contain strings.')
return value
class Institution (models.Model):
institution_code = models.CharField(max_length=4)
institution_name = models.CharField(max_length=50)
def __unicode__(self):
return u'%s %s'.format(self.institution_name, self.institution_code) or u''
return u'{institution_name} {institution_code}'.format(
institution_name=self.institution_name,
institution_code=self.institution_code,
)
class Course (models.Model):
......@@ -172,12 +382,29 @@ class Course (models.Model):
unique=True
)
@property
def org(self):
"""
Get course organization.
"""
org = None
course_runs = self.local_storedir
if course_runs:
course_id = course_runs.split(',')[0]
# Extract course organization.
try:
org = CourseKey.from_string(course_id).org
except InvalidKeyError:
pass
return org
def __unicode__(self):
return u'%s %s %s'.format(
self.institution,
self.edx_classid,
self.course_name
) or u''
return u'{institution} {edx_class_id} {course_name}'.format(
institution=self.institution,
edx_class_id=self.edx_classid,
course_name=self.course_name,
)
class Video (models.Model):
......@@ -224,73 +451,66 @@ class Video (models.Model):
video_trans_start = models.DateTimeField('Process Start', null=True, blank=True)
video_trans_end = models.DateTimeField('Process Complete', null=True, blank=True)
"""
TODO: STATUS REMODEL:
Change to
'Ingest',
'Queued',
'In Progress',
'Corrupt',
'Complete',
'Error',
'Duplicate',
'Review',
'Reject'
Possile:
'Invalid' (for ingest detected)
'Retry'
'Delivery' (for celery states?)
"""
SI = 'Ingest'
TQ = 'Transcode Queue'
AT = 'Active Transcode'
TR = 'Transcode Retry'
TC = 'Transcode Complete'
DU = 'Deliverable Upload'
FC = 'File Complete'
CF = 'Corrupt File'
RH = 'Review Hold'
RR = 'Review Reject'
RP = 'Final Publish'
YD = 'Youtube Duplicate'
TRANS_STATUS_OPTIONS = (
(SI, "System Ingest"),
(TQ, "Transcode Queue"),
(AT, "Active Transcode"),
(TR, "Transcode Retry"),
(TC, "Transcode Complete"),
(DU, "Deliverable Upload"),
(FC, "File Complete"),
('Transcode Error', "Transcode Error"),
(CF, "Corrupt File on Ingest"),
(RH, "Review Hold"),
(RR, "Review Rejected"),
(RP, "Review to Final Publish"),
(YD, "Youtube Duplicate"),
('Queue', "In Encode Queue"),
('Progress', "In Progress"),
('Complete', "Complete")
)
video_trans_status = models.CharField(
'Transcode Status',
max_length=100,
choices=TRANS_STATUS_OPTIONS,
default=SI
choices=VideoStatus.CHOICES,
default=VideoStatus.SI
)
transcript_status = models.CharField(
'Transcription Status',
max_length=100,
choices=TranscriptStatus.CHOICES,
default=TranscriptStatus.NOT_APPLICABLE
)
video_glacierid = models.CharField('Glacier Archive ID String', max_length=200, null=True, blank=True)
abvid_serial = models.CharField('VEDA Upload Process Serial', max_length=20, null=True, blank=True)
stat_queuetime = models.FloatField('Video Avg. Queuetime (sec)', default=0)
# 3rd Party Transcription
process_transcription = models.BooleanField('Process transcripts from Cielo24/3PlayMedia', default=False)
provider = models.CharField(
'Transcription provider',
max_length=20,
choices=TranscriptProvider.CHOICES,
null=True,
blank=True,
)
three_play_turnaround = models.CharField(
'3PlayMedia Turnaround',
max_length=20,
choices=ThreePlayTurnaround.CHOICES,
null=True,
blank=True,
)
cielo24_turnaround = models.CharField(
'Cielo24 Turnaround', max_length=20,
choices=Cielo24Turnaround.CHOICES,
null=True,
blank=True,
)
cielo24_fidelity = models.CharField(
'Cielo24 Fidelity',
max_length=20,
choices=Cielo24Fidelity.CHOICES,
null=True,
blank=True,
)
source_language = models.CharField(
'video source language',
help_text="This is video speech language.",
max_length=50,
null=True,
blank=True,
)
preferred_languages = ListField(blank=True, default=[])
class Meta:
get_latest_by = 'video_trans_start'
def __unicode__(self):
return u'%s'.format(self.edx_id) or u''
return u'{edx_id}'.format(edx_id=self.edx_id)
class Destination (models.Model):
......@@ -347,7 +567,7 @@ class Encode (models.Model):
xuetang_proc = models.BooleanField('Submit to XuetangX', default=False)
def __unicode__(self):
return u'%s'.format(self.encode_name)
return u'{encode_profile}'.format(encode_profile=self.encode_name)
class URL (models.Model):
......@@ -370,7 +590,11 @@ class URL (models.Model):
get_latest_by = 'url_date'
def __unicode__(self):
return u'%s : %s : %s'.format(self.videoID.edx_id, self.encode_profile.encode_name, self.url_date) or u''
return u'{video_id} : {encode_profile} : {date}'.format(
video_id=self.videoID.edx_id,
encode_profile=self.encode_profile.encode_name,
date=self.url_date,
)
class VedaUpload (models.Model):
......@@ -422,9 +646,72 @@ class VedaUpload (models.Model):
get_latest_by = 'upload_date'
def __unicode__(self):
return u'%s %s %s %s'.format(
self.client_information,
self.upload_filename,
self.status_email,
self.file_complete
return u'{client_information} {upload_filename} {status_email} {file_complete}'.format(
client_information=self.client_information,
upload_filename=self.upload_filename,
status_email=self.status_email,
file_complete=self.file_complete
)
class TranscriptCredentials(TimeStampedModel):
"""
Model to contain third party transcription service provider preferences.
"""
org = models.CharField(
'Organization',
max_length=50,
help_text='This value must match the value of organization in studio/edx-platform.'
)
provider = models.CharField('Transcript provider', max_length=50, choices=TranscriptProvider.CHOICES)
api_key = models.CharField('API key', max_length=255)
api_secret = models.CharField('API secret', max_length=255, null=True, blank=True)
class Meta:
unique_together = ('org', 'provider')
verbose_name_plural = 'Transcript Credentials'
def __unicode__(self):
return u'{org} - {provider}'.format(org=self.org, provider=self.provider)
class TranscriptProcessMetadata(TimeStampedModel):
"""
Model to contain third party transcript process metadata.
"""
video = models.ForeignKey(Video, related_name='transcript_processes')
provider = models.CharField('Transcript provider', max_length=50, choices=TranscriptProvider.CHOICES)
process_id = models.CharField('Process id', max_length=255)
translation_id = models.CharField(
'Translation id', help_text='Keeps track of 3Play Translations', max_length=255, null=True, blank=True
)
lang_code = models.CharField('Language code', max_length=50)
status = models.CharField(
'Transcript status',
max_length=50,
choices=TranscriptStatus.CHOICES,
default=TranscriptStatus.PENDING
)
class Meta:
verbose_name_plural = 'Transcript process metadata'
get_latest_by = 'modified'
def update(self, **fields):
"""
Updates a process.
Keyword Arguments:
fields(dict): dict containing all the fields to be updated.
"""
for attr, value in fields.iteritems():
setattr(self, attr, value)
self.save()
def __unicode__(self):
return u'{video} - {provider} - {lang} - {status}'.format(
video=self.video.edx_id,
provider=self.provider,
lang=self.lang_code,
status=self.status,
)
......@@ -72,8 +72,16 @@ class VideoSerializer(serializers.ModelSerializer):
'video_trans_start',
'video_trans_end',
'video_trans_status',
'transcript_status',
'video_glacierid',
'course_ids'
'course_ids',
'process_transcription',
'source_language',
'provider',
'three_play_turnaround',
'cielo24_turnaround',
'cielo24_fidelity',
'preferred_languages',
)
def get_course_ids(self, video):
......@@ -124,10 +132,42 @@ class VideoSerializer(serializers.ModelSerializer):
'video_trans_status',
instance.video_trans_status
)
instance.transcript_status = validated_data.get(
'transcript_status',
instance.transcript_status
)
instance.video_glacierid = validated_data.get(
'video_glacierid',
instance.video_glacierid
)
instance.process_transcription = validated_data.get(
'process_transcription',
instance.process_transcription
)
instance.source_language = validated_data.get(
'source_language',
instance.source_language
)
instance.provider = validated_data.get(
'provider',
instance.provider
)
instance.three_play_turnaround = validated_data.get(
'three_play_turnaround',
instance.three_play_turnaround
)
instance.cielo24_turnaround = validated_data.get(
'cielo24_turnaround',
instance.cielo24_turnaround
)
instance.cielo24_fidelity = validated_data.get(
'cielo24_fidelity',
instance.cielo24_fidelity
)
instance.preferred_languages = validated_data.get(
'preferred_languages',
instance.preferred_languages
)
instance.save()
return instance
......
from django.test import TestCase
from VEDA_OS01.models import Course, Destination, Encode, URL, Video
from VEDA_OS01.serializers import CourseSerializer, EncodeSerializer, URLSerializer, VideoSerializer
class TestCourseSerializer(TestCase):
"""
Tests for `CourseSerializer`.
"""
def setUp(self):
self.course_props = dict(
course_name=u'Intro to VEDA',
institution=u'MAx',
edx_classid=u'123',
semesterid=u'2017',
)
def test_create_course(self):
"""
Tests that `CourseSerializer.create` works as expected.
"""
course_serializer = CourseSerializer(data=self.course_props)
course_serializer.is_valid(raise_exception=True)
course_serializer.save()
# Now, get the created course record.
serialized_course = CourseSerializer(
instance=Course.objects.get(**self.course_props)
).data
self.assertDictEqual(serialized_course, course_serializer.data)
def test_update_course(self):
"""
Tests that `CourseSerializer.update` works as expected.
"""
course = Course.objects.create(**self.course_props)
# Perform the update via serializer.
updated_course_props = dict(self.course_props, course_name=u'Intro to edx-video-pipeline')
course_serializer = CourseSerializer(instance=course, data=updated_course_props, partial=True)
course_serializer.is_valid(raise_exception=True)
course_serializer.save()
# Now, see if its updated
serialized_course = CourseSerializer(
instance=Course.objects.first()
).data
self.assertDictEqual(serialized_course, course_serializer.data)
class TestVideoSerializer(TestCase):
"""
Tests for `VideoSerializer`.
"""
def setUp(self):
self.course = Course.objects.create(
course_name=u'Intro to VEDA',
institution=u'MAx',
edx_classid=u'123',
semesterid=u'2017',
local_storedir='course_id1, course_id2',
)
self.video_props = dict(
inst_class=self.course.pk,
client_title=u'Intro to video',
edx_id=u'12345678',
studio_id=u'43211234',
video_active=True,
process_transcription=True,
source_language=u'fr',
)
def test_create_video(self):
"""
Tests that `VideoSerializer.create` works as expected.
"""
video_serializer = VideoSerializer(data=self.video_props)
video_serializer.is_valid(raise_exception=True)
video_serializer.save()
# Now, get the created video record.
serialized_video = VideoSerializer(
instance=Video.objects.get(**self.video_props)
).data
self.assertDictEqual(serialized_video, video_serializer.data)
def test_update_video(self):
"""
Tests that `VideoSerializer.update` works as expected.
"""
video = Video.objects.create(**dict(self.video_props, inst_class=self.course))
# Perform the update via serializer.
updated_video_props = dict(self.video_props, client_title=u'Intro to new Video')
video_serializer = VideoSerializer(instance=video, data=updated_video_props, partial=True)
video_serializer.is_valid(raise_exception=True)
video_serializer.save()
# Now, see if its updated
serialized_video = VideoSerializer(
instance=Video.objects.first()
).data
self.assertDictEqual(serialized_video, video_serializer.data)
class TestURLSerializer(TestCase):
"""
Tests for `URLSerializer`.
"""
def setUp(self):
# Setup an encode
destination = Destination.objects.create(
destination_name='test_destination',
destination_nick='des',
destination_active=True
)
encode = Encode.objects.create(
encode_destination=destination,
encode_name='desktop_mp4',
profile_active=True,
)
# Setup a video
course = Course.objects.create(
course_name=u'Intro to VEDA',
institution=u'MAx',
edx_classid=u'123',
semesterid=u'2017',
local_storedir='course_id1, course_id2',
)
video = Video.objects.create(
inst_class=course,
client_title=u'Intro to video',
edx_id=u'12345678',
studio_id=u'43211234'
)
# Setup URL properties
self.url_props = dict(
encode_profile=encode.pk,
videoID=video.pk,
encode_url='https://www.s3.amazon.com/123.mp4'
)
def test_create_url(self):
"""
Tests that `URLSerializer.create` works as expected.
"""
url_serializer = URLSerializer(data=self.url_props)
url_serializer.is_valid(raise_exception=True)
url_serializer.save()
# Now, get the created URL record.
serialized_url = URLSerializer(
instance=URL.objects.first()
).data
self.assertDictEqual(serialized_url, url_serializer.data)
class TestEncodeSerializer(TestCase):
"""
Tests for `EncodeSerializer`.
"""
def test_serialized_encode(self):
"""
Tests that serializing/de-serializing 'Encode' works as expected.
"""
destination = Destination.objects.create(
destination_name='test_destination',
destination_nick='des',
destination_active=True
)
encode = Encode.objects.create(
encode_destination=destination,
encode_name='desktop_mp4',
profile_active=True,
)
self.assertEqual(Encode.objects.count(), 1)
actual_serialized_encode = EncodeSerializer(encode).data
for attr, actual_value in actual_serialized_encode.iteritems():
expected_value = getattr(encode, attr)
self.assertEqual(actual_value, expected_value)
# -*- encoding: utf-8 -*-
"""
Transcript tests
"""
import json
import responses
import urllib
import urlparse
from boto.exception import S3ResponseError
from boto.s3.connection import S3Connection
from boto.s3.key import Key
from ddt import data, ddt, unpack
from django.core.urlresolvers import reverse
from mock import Mock, PropertyMock, patch
from moto import mock_s3_deprecated
from rest_framework import status
from rest_framework.test import APITestCase
from VEDA_OS01 import transcripts, utils
from VEDA_OS01.models import (Course, TranscriptCredentials,
TranscriptProcessMetadata, TranscriptProvider,
TranscriptStatus, Video)
CONFIG_DATA = utils.get_config('test_config.yaml')
VIDEO_DATA = {
'studio_id': '12345',
'preferred_languages': ['en']
}
TRANSCRIPT_PROCESS_METADATA = {
'process_id': 100,
'lang_code': 'en',
'provider': TranscriptProvider.CIELO24,
'status': TranscriptStatus.IN_PROGRESS
}
TRANSCRIPT_PREFERENCES = {
'org': 'MAx',
'provider': TranscriptProvider.CIELO24,
'api_key': 'i_am_key',
'api_secret': 'i_am_secret',
}
REQUEST_PARAMS = {'job_id': 100, 'iwp_name': 'FINAL', 'lang_code': 'en', 'org': 'MAx', 'video_id': '111'}
TRANSCRIPT_SRT_DATA = """
1
00:00:07,180 --> 00:00:08,460
This is subtitle line 1.
2
00:00:08,460 --> 00:00:10,510
This is subtitle line 2.
3
00:00:10,510 --> 00:00:13,560
This is subtitle line 3.
4
00:00:13,560 --> 00:00:14,360
This is subtitle line 4.
5
00:00:14,370 --> 00:00:16,530
This is subtitle line 5.
6
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.
"""
TRANSCRIPT_SJSON_DATA = {
u'start': [7180, 8460, 10510, 13560, 14370, 16500],
u'end': [8460, 10510, 13560, 14360, 16530, 18600],
u'text': [
u'This is subtitle line 1.',
u'This is subtitle line 2.',
u'This is subtitle line 3.',
u'This is subtitle line 4.',
u'This is subtitle line 5.',
u'可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.'
]
}
@ddt
@patch.dict('VEDA_OS01.transcripts.CONFIG', CONFIG_DATA)
@patch('VEDA_OS01.utils.get_config', Mock(return_value=CONFIG_DATA))
class Cielo24TranscriptTests(APITestCase):
"""
Cielo24 Transcript Tests
"""
def setUp(self):
"""
Tests setup.
"""
super(Cielo24TranscriptTests, self).setUp()
self.url = reverse('cielo24_transcript_completed', args=[CONFIG_DATA['transcript_provider_request_token']])
self.uuid_hex = '01234567890123456789'
self.course = Course.objects.create(
course_name='Intro to VEDA',
institution='MAx',
edx_classid='123'
)
self.video = Video.objects.create(
inst_class=self.course,
**VIDEO_DATA
)
self.transcript_prefs = TranscriptCredentials.objects.create(
**TRANSCRIPT_PREFERENCES
)
metadata = dict(TRANSCRIPT_PROCESS_METADATA)
metadata['video'] = self.video
self.transcript_process_metadata = TranscriptProcessMetadata.objects.create(**metadata)
self.transcript_create_data = {
'file_format': transcripts.TRANSCRIPT_SJSON,
'video_id': self.video.studio_id,
'name': '{directory}{uuid}.sjson'.format(
directory=CONFIG_DATA['aws_video_transcripts_prefix'], uuid=self.uuid_hex
),
'language_code': 'en',
'provider': TranscriptProvider.CIELO24
}
self.video_transcript_ready_status_data = {
'status': utils.ValTranscriptStatus.TRANSCRIPT_READY,
'edx_video_id': self.video.studio_id
}
uuid_patcher = patch.object(
transcripts.uuid.UUID,
'hex',
new_callable=PropertyMock(return_value=self.uuid_hex)
)
uuid_patcher.start()
self.addCleanup(uuid_patcher.stop)
REQUEST_PARAMS['video_id'] = self.video.studio_id
@data(
('cielo24/transcript_completed', 404),
(None, 200),
)
@unpack
@patch('VEDA_OS01.transcripts.CIELO24_TRANSCRIPT_COMPLETED.send_robust', Mock(return_value=None))
def test_provider(self, url, status_code):
"""
Verify that only valid provider requests are allowed .
"""
response = self.client.get(
url or self.url,
{'job_id': 3, 'iwp_name': 'FINAL', 'lang_code': 'ar', 'org': 'edx', 'video_id': 12345}
)
self.assertEqual(response.status_code, status_code)
@data(
({}, ['job_id', 'iwp_name', 'lang_code', 'org', 'video_id']),
({'job_id': 1}, ['iwp_name', 'lang_code', 'org', 'video_id']),
({'job_id': 2, 'lang_code': 'en'}, ['iwp_name', 'org', 'video_id']),
({'job_id': 3, 'lang_code': 'ar', 'org': 'edx'}, ['iwp_name', 'video_id']),
)
@unpack
@patch('VEDA_OS01.transcripts.LOGGER')
def test_missing_required_params(self, params, logger_params, mock_logger):
"""
Verify that 400 response is recevied if any required param is missing.
"""
response = self.client.get(self.url, params)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
mock_logger.warning.assert_called_with(
'[CIELO24 HANDLER] Required params are missing %s',
logger_params,
)
@responses.activate
@patch('VEDA_OS01.transcripts.CIELO24_TRANSCRIPT_COMPLETED.send_robust', Mock(return_value=None))
def test_transcript_callback_get_request(self):
"""
Verify that transcript callback get request is working as expected.
"""
def signal_handler(**kwargs):
"""
signal handler for testing.
"""
for key, value in REQUEST_PARAMS.items():
self.assertEqual(kwargs[key], value)
transcripts.CIELO24_TRANSCRIPT_COMPLETED.connect(signal_handler)
response = self.client.get(self.url, REQUEST_PARAMS)
self.assertEqual(response.status_code, status.HTTP_200_OK)
@patch('VEDA_OS01.transcripts.VALAPICall._AUTH', PropertyMock(return_value=lambda: CONFIG_DATA))
@patch('VEDA_OS01.transcripts.LOGGER')
@responses.activate
@mock_s3_deprecated
def test_cielo24_callback(self, mock_logger):
"""
Verify that `cielo24_transcript_callback` method works as expected.
"""
# register urls to be listen by responses
responses.add(
responses.GET,
transcripts.CIELO24_GET_CAPTION_URL,
body=TRANSCRIPT_SRT_DATA,
adding_headers={'Content-Type': 'text/plain; charset=utf-8'},
content_type='text/plain',
status=200
)
responses.add(responses.POST, CONFIG_DATA['val_token_url'], '{"access_token": "1234567890"}', status=200)
responses.add(responses.POST, CONFIG_DATA['val_transcript_create_url'], status=200)
responses.add(responses.PATCH, CONFIG_DATA['val_video_transcript_status_url'], status=200)
# create s3 bucket -- all this is happening in moto's virtual environment
conn = S3Connection()
conn.create_bucket(CONFIG_DATA['aws_video_transcripts_bucket'])
transcripts.cielo24_transcript_callback(None, **REQUEST_PARAMS)
# Assert the logs.
mock_logger.info.assert_called_with(
'[CIELO24 TRANSCRIPTS] Transcript complete request received for '
'video=%s -- org=%s -- lang=%s -- job_id=%s -- iwp_name=%s',
REQUEST_PARAMS['video_id'],
REQUEST_PARAMS['org'],
REQUEST_PARAMS['lang_code'],
REQUEST_PARAMS['job_id'],
REQUEST_PARAMS['iwp_name']
)
# Total of 4 HTTP requests are made as registered above
self.assertEqual(len(responses.calls), 4)
# verify requests
self.assertTrue(
responses.calls[0].request.url,
'http://api.cielo24.com/job/get_caption?api_token=i_am_key&job_id=%28100%2C%29&caption_format=SRT&v=1'
)
self.assertEqual(responses.calls[2].request.url, CONFIG_DATA['val_transcript_create_url'])
transcript_create_request_data = json.loads(responses.calls[2].request.body)
self.assertEqual(transcript_create_request_data, self.transcript_create_data)
self.assertEqual(responses.calls[3].request.url, CONFIG_DATA['val_video_transcript_status_url'])
self.assertEqual(json.loads(responses.calls[3].request.body), self.video_transcript_ready_status_data)
# Assert edx-video-pipeline's video status
video = Video.objects.get(studio_id=self.video.studio_id)
self.assertEqual(video.transcript_status, TranscriptStatus.READY)
# verify sjson data uploaded to s3
bucket = conn.get_bucket(CONFIG_DATA['aws_video_transcripts_bucket'])
key = Key(bucket)
key.key = transcript_create_request_data['name']
sjson = json.loads(key.get_contents_as_string())
self.assertEqual(sjson, TRANSCRIPT_SJSON_DATA)
@patch('VEDA_OS01.transcripts.LOGGER')
@responses.activate
def test_fetch_exception_log(self, mock_logger):
"""
Verify that correct exception log created for `fetch_srt_data` function error.
"""
responses.add(responses.GET, transcripts.CIELO24_GET_CAPTION_URL, status=400)
transcripts.cielo24_transcript_callback(None, **REQUEST_PARAMS)
mock_logger.exception.assert_called_with(
'[CIELO24 TRANSCRIPTS] Fetch request failed for video=%s -- lang=%s -- job_id=%s',
REQUEST_PARAMS['video_id'],
REQUEST_PARAMS['lang_code'],
REQUEST_PARAMS['job_id']
)
@patch('VEDA_OS01.transcripts.LOGGER')
@responses.activate
def test_conversion_exception_log(self, mock_logger):
"""
Verify that correct exception log created for `convert_srt_to_sjson` function error.
"""
conversion_exception_message = 'conversion failed'
responses.add(responses.GET, transcripts.CIELO24_GET_CAPTION_URL, body='aaa', status=200)
with patch('VEDA_OS01.transcripts.convert_srt_to_sjson') as mock_convert_srt_to_sjson:
mock_convert_srt_to_sjson.side_effect = transcripts.TranscriptConversionError(conversion_exception_message)
with self.assertRaises(transcripts.TranscriptConversionError) as conversion_exception:
transcripts.cielo24_transcript_callback(None, **REQUEST_PARAMS)
mock_logger.exception.assert_called_with(
'[CIELO24 TRANSCRIPTS] Request failed for video=%s -- lang=%s -- job_id=%s.',
REQUEST_PARAMS['video_id'],
REQUEST_PARAMS['lang_code'],
REQUEST_PARAMS['job_id']
)
self.assertEqual(
conversion_exception.exception.message,
conversion_exception_message
)
@patch('VEDA_OS01.transcripts.LOGGER')
@responses.activate
def test_s3_exception_log(self, mock_logger):
"""
Verify that correct exception log created for `convert_srt_to_sjson` function error.
"""
s3_message = 'upload failed'
responses.add(responses.GET, transcripts.CIELO24_GET_CAPTION_URL, body='aaa', status=200)
with patch('VEDA_OS01.transcripts.convert_srt_to_sjson') as mock_convert_srt_to_sjson:
with patch('VEDA_OS01.transcripts.upload_sjson_to_s3') as mock_upload_sjson_to_s3:
mock_convert_srt_to_sjson.return_value = {'a': 1}
mock_upload_sjson_to_s3.side_effect = transcripts.TranscriptConversionError(s3_message)
with self.assertRaises(transcripts.TranscriptConversionError) as s3_exception:
transcripts.cielo24_transcript_callback(None, **REQUEST_PARAMS)
mock_logger.exception.assert_called_with(
'[CIELO24 TRANSCRIPTS] Request failed for video=%s -- lang=%s -- job_id=%s -- message=%s',
REQUEST_PARAMS['video_id'],
REQUEST_PARAMS['lang_code'],
REQUEST_PARAMS['job_id']
)
self.assertEqual(
s3_exception.exception.message,
s3_message
)
@ddt
@patch.dict('VEDA_OS01.transcripts.CONFIG', CONFIG_DATA)
@patch('VEDA_OS01.transcripts.VALAPICall._AUTH', PropertyMock(return_value=lambda: CONFIG_DATA))
class ThreePlayTranscriptionCallbackTest(APITestCase):
"""
3Play Media callback tests
"""
def setUp(self):
"""
Tests setup.
"""
super(ThreePlayTranscriptionCallbackTest, self).setUp()
self.org = u'MAx'
self.file_id = u'112233'
self.video_source_language = u'en'
self.edx_video_id = VIDEO_DATA['studio_id']
self.url = reverse('3play_media_callback', args=[CONFIG_DATA['transcript_provider_request_token']])
self.course = Course.objects.create(
course_name='Intro to VEDA',
institution=self.org,
edx_classid='123',
local_storedir='course-v1:MAx+123+test_run',
)
self.video = Video.objects.create(
inst_class=self.course,
source_language=self.video_source_language,
provider=TranscriptProvider.THREE_PLAY,
transcript_status=TranscriptStatus.IN_PROGRESS,
**VIDEO_DATA
)
self.transcript_prefs = TranscriptCredentials.objects.create(
org=self.org,
provider=TranscriptProvider.THREE_PLAY,
api_key='insecure_api_key',
api_secret='insecure_api_secret'
)
TranscriptProcessMetadata.objects.create(
video=self.video,
process_id=self.file_id,
lang_code='en',
provider=TranscriptProvider.THREE_PLAY,
status=TranscriptStatus.IN_PROGRESS,
)
self.uuid_hex = '01234567890123456789'
uuid_patcher = patch.object(
transcripts.uuid.UUID,
'hex',
new_callable=PropertyMock(return_value=self.uuid_hex)
)
uuid_patcher.start()
self.addCleanup(uuid_patcher.stop)
def setup_s3_bucket(self):
"""
Creates an s3 bucket. That is happening in moto's virtual environment.
"""
connection = S3Connection()
connection.create_bucket(CONFIG_DATA['aws_video_transcripts_bucket'])
return connection
def invoke_3play_callback(self, state='complete'):
"""
Make request to 3PlayMedia callback handler, this invokes
callback with all the necessary parameters.
Arguments:
state(str): state of the callback
"""
response = self.client.post(
# `build_url` strips `/`, putting it back and add necessary query params.
'/{}'.format(utils.build_url(
self.url, edx_video_id=self.video.studio_id,
org=self.org, lang_code=self.video_source_language
)),
content_type='application/x-www-form-urlencoded',
data=urllib.urlencode(dict(file_id=self.file_id, status=state))
)
return response
def setup_translations_prereqs(self, file_id, translation_lang_map, preferred_languages):
"""
Sets up pre-requisites for 3Play Media translations retrieval process.
"""
# Update preferred languages.
self.video.preferred_languages = preferred_languages
self.video.save()
# Assumes the speech transcript is ready.
TranscriptProcessMetadata.objects.filter(
process_id=self.file_id,
lang_code=self.video_source_language,
).update(status=TranscriptStatus.READY)
# Create translation processes and set their statuses to 'IN PROGRESS'.
for target_language, translation_id in translation_lang_map.iteritems():
# Create translation processes for all the target languages.
TranscriptProcessMetadata.objects.create(
video=self.video,
provider=TranscriptProvider.THREE_PLAY,
process_id=file_id,
translation_id=translation_id,
lang_code=target_language,
status=TranscriptStatus.IN_PROGRESS,
)
def assert_request(self, received_request, expected_request, decode_func):
"""
Verify that `received_request` matches `expected_request`
"""
for request_attr in expected_request.keys():
if request_attr == 'headers':
expected_headers = expected_request[request_attr]
actual_headers = getattr(received_request, request_attr)
for attr, expect_value in expected_headers.iteritems():
self.assertEqual(actual_headers[attr], expect_value)
elif request_attr == 'body' and decode_func:
expected_body = expected_request[request_attr]
actual_body = decode_func(getattr(received_request, request_attr))
for attr, expect_value in expected_body.iteritems():
self.assertEqual(actual_body[attr], expect_value)
else:
self.assertEqual(getattr(received_request, request_attr), expected_request[request_attr])
def assert_uploaded_transcript_on_s3(self, connection):
"""
Verify sjson data uploaded to s3
"""
key = Key(connection.get_bucket(CONFIG_DATA['aws_video_transcripts_bucket']))
key.key = '{directory}{uuid}.sjson'.format(
directory=CONFIG_DATA['aws_video_transcripts_prefix'], uuid=self.uuid_hex
)
sjson_transcript = json.loads(key.get_contents_as_string())
self.assertEqual(sjson_transcript, TRANSCRIPT_SJSON_DATA)
def test_unauthorized_access_to_3play_callback(self):
"""
Tests that the invalid token leads to 401 Unauthorized Response
"""
self.url = reverse('3play_media_callback', args=['123invalidtoken456'])
response = self.client.post(self.url, content_type='application/x-www-form-urlencoded')
self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED)
@data(
{'data': {}, 'query_params': {}},
{'data': {'file_id': '1122'}, 'query_params': {'edx_video_id': '1234'}}
)
@patch('VEDA_OS01.transcripts.LOGGER')
def test_missing_required_params(self, request_data, mock_logger):
"""
Test the callback in case of missing attributes.
"""
response = self.client.post(
'/{}'.format(utils.build_url(self.url, **request_data['query_params'])),
content_type='application/x-www-form-urlencoded',
data=urllib.urlencode(request_data['data']),
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
# Assert the logs
required_attrs = ['file_id', 'lang_code', 'status', 'org', 'edx_video_id']
received_attrs = request_data['data'].keys() + request_data['query_params'].keys()
missing_attrs = [attr for attr in required_attrs if attr not in received_attrs]
mock_logger.warning.assert_called_with(
u'[3PlayMedia Callback] process_id=%s Received Attributes=%s Missing Attributes=%s',
request_data['data'].get('file_id', None),
received_attrs,
missing_attrs,
)
@data(
(
u'error',
u'[3PlayMedia Callback] Error while transcription - error=%s, org=%s, edx_video_id=%s, file_id=%s.',
TranscriptStatus.FAILED
),
(
u'invalid_status',
u'[3PlayMedia Callback] Got invalid status - status=%s, org=%s, edx_video_id=%s, file_id=%s.',
TranscriptStatus.IN_PROGRESS
)
)
@unpack
@responses.activate
@patch('VEDA_OS01.transcripts.LOGGER')
def test_callback_for_non_success_statuses(self, state, message, expected_status, mock_logger):
"""
Tests the callback for all the non-success statuses.
"""
self.url = '/{}'.format(utils.build_url(
self.url, edx_video_id='12345', org='MAx', lang_code=self.video_source_language
))
self.client.post(self.url, content_type='application/x-www-form-urlencoded', data=urllib.urlencode({
'file_id': self.file_id,
'status': state,
'error_description': state # this will be logged.
}))
self.assertEqual(
TranscriptProcessMetadata.objects.filter(process_id=self.file_id).latest().status,
expected_status
)
mock_logger.error.assert_called_with(
message,
state,
self.org,
self.video.studio_id,
self.file_id
)
@responses.activate
@mock_s3_deprecated
@patch('VEDA_OS01.transcripts.LOGGER')
def test_single_lang_callback_flow(self, mock_logger):
"""
Tests 3Play Media callback works as expected.
"""
# Setup an s3 bucket
conn = self.setup_s3_bucket()
# 3Play mocked response
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
body=TRANSCRIPT_SRT_DATA,
content_type='text/plain; charset=utf-8',
status=200
)
# edx-val mocked responses
responses.add(responses.POST, CONFIG_DATA['val_token_url'], '{"access_token": "1234567890"}', status=200)
responses.add(responses.POST, CONFIG_DATA['val_transcript_create_url'], status=200)
responses.add(responses.PATCH, CONFIG_DATA['val_video_transcript_status_url'], status=200)
# Make request to callback
response = self.invoke_3play_callback()
# Assert the response and the process
self.assertEqual(response.status_code, 200)
self.assertEqual(
TranscriptProcessMetadata.objects.filter(process_id=self.file_id).latest().status,
TranscriptStatus.READY
)
# Total of 4 HTTP requests are made as registered above
self.assertEqual(len(responses.calls), 4)
expected_requests = [
# request - 1
{
'url': utils.build_url(
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
apikey=self.transcript_prefs.api_key
)
},
# request - 2
{
'url': CONFIG_DATA['val_token_url'],
'body': {
'grant_type': ['password'],
'client_id': [CONFIG_DATA['val_client_id']],
'client_secret': [CONFIG_DATA['val_secret_key']],
'username': [CONFIG_DATA['val_username']],
'password': [CONFIG_DATA['val_password']],
},
'decode_func': urlparse.parse_qs,
},
# request - 3
{
'url': CONFIG_DATA['val_transcript_create_url'],
'body': {
'file_format': transcripts.TRANSCRIPT_SJSON,
'video_id': self.video.studio_id,
'language_code': 'en',
'name': '{directory}{uuid}.sjson'.format(
directory=CONFIG_DATA['aws_video_transcripts_prefix'], uuid=self.uuid_hex
),
'provider': TranscriptProvider.THREE_PLAY
},
'headers': {
'Authorization': 'Bearer 1234567890',
'content-type': 'application/json'
},
'decode_func': json.loads,
},
# request - 4
{
'url': CONFIG_DATA['val_video_transcript_status_url'],
'body': {
'status': utils.ValTranscriptStatus.TRANSCRIPT_READY,
'edx_video_id': self.video.studio_id
},
'headers': {
'Authorization': 'Bearer 1234567890',
'content-type': 'application/json'
},
'decode_func': json.loads,
}
]
for position, expected_request in enumerate(expected_requests):
self.assert_request(
responses.calls[position].request,
expected_request,
expected_request.pop('decode_func', None)
)
# Assert edx-video-pipeline's video status
video = Video.objects.get(studio_id=self.video.studio_id)
self.assertEqual(video.transcript_status, TranscriptStatus.READY)
# verify transcript sjson data uploaded to s3
self.assert_uploaded_transcript_on_s3(connection=conn)
mock_logger.info.assert_called_with(
u'[3PlayMedia Callback] Video speech transcription was successful for video=%s -- lang_code=%s -- '
u'process_id=%s',
self.video.studio_id,
'en',
self.file_id,
)
@responses.activate
@mock_s3_deprecated
@patch('VEDA_OS01.transcripts.LOGGER')
def test_multi_lang_callback_flow(self, mock_logger):
"""
Tests 3Play Media callback works as expected.
"""
conn = self.setup_s3_bucket()
# Video needs to transcripts in multiple languages
self.video.preferred_languages = ['en', 'ro']
self.video.save()
# 3Play mock translation id
translation_id = '007-abc'
# 3Play mocked response
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
body=TRANSCRIPT_SRT_DATA,
content_type='text/plain; charset=utf-8',
status=200
)
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
json.dumps([
{
'id': 30,
'source_language_name': 'English',
'source_language_iso_639_1_code': 'en',
'target_language_name': 'Romanian',
'target_language_iso_639_1_code': 'ro',
'service_level': 'standard',
'per_word_rate': 0.16
},
{
'id': 31,
'source_language_name': 'English',
'source_language_iso_639_1_code': 'en',
'target_language_name': 'German',
'target_language_iso_639_1_code': 'da',
'service_level': 'standard',
'per_word_rate': 0.19
}
]),
status=200,
)
responses.add(
responses.POST,
transcripts.THREE_PLAY_ORDER_TRANSLATION_URL.format(file_id=self.file_id),
json.dumps({
'success': True,
'translation_id': translation_id
}),
status=200,
)
# edx-val mocked responses
responses.add(responses.POST, CONFIG_DATA['val_token_url'], '{"access_token": "1234567890"}', status=200)
responses.add(responses.POST, CONFIG_DATA['val_transcript_create_url'], status=200)
# Make request to callback
response = self.invoke_3play_callback()
# Assert the response and the speech lang process
self.assertEqual(response.status_code, 200)
self.assertEqual(
TranscriptProcessMetadata.objects.get(
process_id=self.file_id,
provider=TranscriptProvider.THREE_PLAY,
lang_code='en'
).status,
TranscriptStatus.READY
)
# Assert the transcript translation process
self.assertEqual(
TranscriptProcessMetadata.objects.get(
process_id=self.file_id,
provider=TranscriptProvider.THREE_PLAY,
lang_code='ro'
).status,
TranscriptStatus.IN_PROGRESS,
)
# Total of 5 HTTP requests are made as registered above
self.assertEqual(len(responses.calls), 5)
expected_requests = [
# request - 1
{
'url': utils.build_url(
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
apikey=self.transcript_prefs.api_key
)
},
# request - 2
{
'url': CONFIG_DATA['val_token_url'],
'body': {
'grant_type': ['password'],
'client_id': [CONFIG_DATA['val_client_id']],
'client_secret': [CONFIG_DATA['val_secret_key']],
'username': [CONFIG_DATA['val_username']],
'password': [CONFIG_DATA['val_password']],
},
'decode_func': urlparse.parse_qs,
},
# request - 3
{
'url': CONFIG_DATA['val_transcript_create_url'],
'body': {
'file_format': transcripts.TRANSCRIPT_SJSON,
'video_id': self.video.studio_id,
'language_code': 'en',
'name': '{directory}{uuid}.sjson'.format(
directory=CONFIG_DATA['aws_video_transcripts_prefix'], uuid=self.uuid_hex
),
'provider': TranscriptProvider.THREE_PLAY
},
'headers': {
'Authorization': 'Bearer 1234567890',
'content-type': 'application/json'
},
'decode_func': json.loads,
},
# request - 4
{
'url': utils.build_url(
transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
apikey=self.transcript_prefs.api_key
)
},
# request - 5
{
'url': transcripts.THREE_PLAY_ORDER_TRANSLATION_URL.format(file_id=self.file_id),
'body': {
'apikey': self.transcript_prefs.api_key,
'api_secret_key': self.transcript_prefs.api_secret,
'translation_service_id': 30,
},
'decode_func': json.loads,
},
]
for position, expected_request in enumerate(expected_requests):
self.assert_request(
responses.calls[position].request,
expected_request,
expected_request.pop('decode_func', None),
)
# verify sjson data uploaded to s3
self.assert_uploaded_transcript_on_s3(connection=conn)
mock_logger.info.assert_called_with(
u'[3PlayMedia Callback] Video speech transcription was successful for video=%s -- lang_code=%s -- '
u'process_id=%s',
self.video.studio_id,
'en',
self.file_id,
)
@data(
(
{'body': json.dumps({'iserror': True}), 'content_type': 'application/json', 'status': 200},
'error',
(
u'[%s] Transcript fetch error for video=%s -- lang_code=%s -- process=%s -- response=%s',
u'3PlayMedia Callback',
u'12345',
u'en',
u'112233',
json.dumps({'iserror': True}),
),
),
(
{'body': None, 'status': 400},
'exception',
(
u'[3PlayMedia Callback] Fetch request failed for video=%s -- lang_code=%s -- process_id=%s',
u'12345',
u'en',
u'112233',
),
)
)
@unpack
@responses.activate
@patch('VEDA_OS01.transcripts.LOGGER')
def test_fetch_transcript_exceptions(self, response, log_method, log_args, mock_logger):
"""
Verify the logs if there is an error during transcript fetch.
"""
# 3Play mocked response
responses.add(responses.GET, transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id), **response)
# Make request to the callback
response = self.invoke_3play_callback()
# Assert the response, process and the logs.
self.assertEqual(response.status_code, 200)
self.assertEqual(
TranscriptProcessMetadata.objects.filter(process_id=self.file_id).latest().status,
TranscriptStatus.FAILED
)
getattr(mock_logger, log_method).assert_called_with(*log_args)
@patch('VEDA_OS01.transcripts.LOGGER')
@responses.activate
def test_srt_to_sjson_conversion_exceptions(self, mock_logger):
"""
Tests that the correct exception is logged on conversion error.
"""
# 3Play mocked response
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
body=TRANSCRIPT_SRT_DATA,
content_type=u'text/plain; charset=utf-8',
status=200
)
# make `convert_srt_to_sjson` to fail with ValueError
with patch('VEDA_OS01.transcripts.convert_srt_to_sjson') as mock_convert_srt_to_sjson:
mock_convert_srt_to_sjson.side_effect = ValueError
# Make request to the callback
self.invoke_3play_callback()
mock_logger.exception.assert_called_with(
u'[3PlayMedia Callback] Request failed for video=%s -- lang_code=%s -- process_id=%s',
self.video.studio_id,
'en',
self.file_id,
)
@patch('VEDA_OS01.transcripts.LOGGER')
@responses.activate
def test_upload_to_s3_exceptions(self, mock_logger):
"""
Tests that the correct exception is logged on error while uploading to s3.
"""
# 3Play mocked response
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
body=TRANSCRIPT_SRT_DATA,
content_type=u'text/plain; charset=utf-8',
status=200
)
with patch('VEDA_OS01.transcripts.upload_sjson_to_s3') as mock_upload_sjson_to_s3:
mock_upload_sjson_to_s3.side_effect = S3ResponseError(status=401, reason='invalid secrets')
# Make request to the callback
self.invoke_3play_callback()
mock_logger.exception.assert_called_with(
u'[3PlayMedia Callback] Request failed for video=%s -- lang_code=%s -- process_id=%s',
self.video.studio_id,
'en',
self.file_id,
)
@data(
# not-an-ok response on translation services fetch request.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
'body': 'Your request was invalid.',
'status': 400,
}
],
{
'method': 'exception',
'args': (
'[3PlayMedia Callback] Translation could not be performed - video=%s, lang_code=%s, file_id=%s.',
'12345',
'en',
'112233'
)
}
),
# Error on 3Play while fetching translation services.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
'body': json.dumps({
'success': False
}),
'status': 200,
}
],
{
'method': 'exception',
'args': (
'[3PlayMedia Callback] Translation could not be performed - video=%s, lang_code=%s, file_id=%s.',
'12345',
'en',
'112233'
)
}
),
# not-an-ok response on translation order request.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
'body': json.dumps(
[{
'id': 30,
'source_language_name': 'English',
'source_language_iso_639_1_code': 'en',
'target_language_name': 'Romanian',
'target_language_iso_639_1_code': 'ro',
'service_level': 'standard',
'per_word_rate': 0.16
}]
),
'status': 200,
},
{
'method': responses.POST,
'url': transcripts.THREE_PLAY_ORDER_TRANSLATION_URL.format(file_id=u'112233'),
'body': '1s2d3f4',
'status': 400
}
],
{
'method': 'error',
'args': (
'[3PlayMedia Callback] An error occurred during translation, target language=%s, file_id=%s, '
'status=%s',
'ro',
'112233',
400,
)
}
),
# Error on 3Play during placing order for a translation.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
'body': json.dumps(
[{
'id': 30,
'source_language_name': 'English',
'source_language_iso_639_1_code': 'en',
'target_language_name': 'Romanian',
'target_language_iso_639_1_code': 'ro',
'service_level': 'standard',
'per_word_rate': 0.16
}]
),
'status': 200,
},
{
'method': responses.POST,
'url': transcripts.THREE_PLAY_ORDER_TRANSLATION_URL.format(file_id=u'112233'),
'body': json.dumps({'success': False}),
'status': 200
}
],
{
'method': 'error',
'args': (
'[3PlayMedia Callback] Translation failed fot target language=%s, file_id=%s, response=%s',
'ro',
'112233',
json.dumps({'success': False}),
)
}
),
# When translation service is not found for our language
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
'body': json.dumps(
[{
'id': 30,
'source_language_name': 'English',
'source_language_iso_639_1_code': 'en',
'target_language_name': 'German',
'target_language_iso_639_1_code': 'de',
'service_level': 'standard',
'per_word_rate': 0.16
}]
),
'status': 200,
}
],
{
'method': 'error',
'args': (
'[3PlayMedia Callback] No translation service found for '
'source language "%s" target language "%s" -- process id %s',
'en',
'ro',
'112233',
)
}
)
)
@unpack
@responses.activate
@mock_s3_deprecated
@patch('VEDA_OS01.transcripts.LOGGER')
def test_order_translations_exception_cases(self, mock_responses, expected_logging, mock_logger):
"""
Tests all the error scenarios while ordering translation for a transcript in various languages.
"""
# Setup an s3 bucket
self.setup_s3_bucket()
# for multi-language translations
self.video.preferred_languages = ['en', 'ro']
self.video.save()
# Mocked responses
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
body=TRANSCRIPT_SRT_DATA,
content_type='text/plain; charset=utf-8',
status=200
)
responses.add(responses.POST, CONFIG_DATA['val_token_url'], '{"access_token": "1234567890"}', status=200)
responses.add(responses.POST, CONFIG_DATA['val_transcript_create_url'], status=200)
for response in mock_responses:
responses.add(response.pop('method'), response.pop('url'), **response)
# Make request to callback
response = self.invoke_3play_callback()
# Assert the response and the logs
self.assertEqual(response.status_code, 200)
getattr(mock_logger, expected_logging['method']).assert_called_with(*expected_logging['args'])
# Assert the transcript translation process
self.assertEqual(
TranscriptProcessMetadata.objects.get(
process_id=self.file_id,
provider=TranscriptProvider.THREE_PLAY,
lang_code='ro'
).status,
TranscriptStatus.FAILED,
)
@responses.activate
@mock_s3_deprecated
def test_translations_retrieval(self):
"""
Tests translations retrieval from 3PlayMedia
"""
# Setup an S3 bucket
connection = self.setup_s3_bucket()
# Setup translations
translations_lang_map = {
'ro': '1z2x3c',
'da': '1q2w3e',
}
self.setup_translations_prereqs(
file_id=self.file_id,
translation_lang_map=translations_lang_map,
preferred_languages=['en', 'ro', 'da']
)
# Setup mock responses
translation_status_mock_response = []
for target_language, translation_id in translations_lang_map.iteritems():
translation_status_mock_response.append({
'id': translation_id,
'source_language_iso_639_1_code': 'en',
'target_language_iso_639_1_code': target_language,
'state': 'complete'
})
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format(
file_id=self.file_id, translation_id=translation_id
),
body=TRANSCRIPT_SRT_DATA,
content_type='text/plain; charset=utf-8',
status=200,
)
# edx-val mocked responses
responses.add(responses.POST, CONFIG_DATA['val_token_url'], '{"access_token": "1234567890"}', status=200)
responses.add(responses.POST, CONFIG_DATA['val_transcript_create_url'], status=200)
responses.add(responses.PATCH, CONFIG_DATA['val_video_transcript_status_url'], status=200)
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id=self.file_id),
json.dumps(translation_status_mock_response),
status=200
)
# Call to retrieve translations
transcripts.retrieve_three_play_translations()
# Total HTTP requests, 1 for retrieving translations metadata, 3 for first translation and
# 3 for second translation and 1 for updating video status.
self.assertEqual(len(responses.calls), 8)
# Assert that the first request was made for getting translations metadata from 3Play Media.
expected_video_status_update_request = {
'url': utils.build_url(
transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id=self.file_id),
apikey=self.transcript_prefs.api_key
)
}
self.assert_request(
responses.calls[0].request,
expected_video_status_update_request,
decode_func=json.loads,
)
position = 1
for lang_code, translation_id in translations_lang_map.iteritems():
expected_requests = [
# request - 1
{
'url': utils.build_url(transcripts.THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format(
file_id=self.file_id, translation_id=translation_id
), apikey=self.transcript_prefs.api_key)
},
# request - 2
{
'url': CONFIG_DATA['val_token_url'],
'body': {
'grant_type': ['password'],
'client_id': [CONFIG_DATA['val_client_id']],
'client_secret': [CONFIG_DATA['val_secret_key']],
'username': [CONFIG_DATA['val_username']],
'password': [CONFIG_DATA['val_password']],
},
'decode_func': urlparse.parse_qs,
},
# request - 3
{
'url': CONFIG_DATA['val_transcript_create_url'],
'body': {
'file_format': transcripts.TRANSCRIPT_SJSON,
'video_id': self.video.studio_id,
'language_code': lang_code,
'name': '{directory}{uuid}.sjson'.format(
directory=CONFIG_DATA['aws_video_transcripts_prefix'], uuid=self.uuid_hex
),
'provider': TranscriptProvider.THREE_PLAY
},
'headers': {
'Authorization': 'Bearer 1234567890',
'content-type': 'application/json'
},
'decode_func': json.loads,
}
]
for expected_request in expected_requests:
self.assert_request(
responses.calls[position].request,
expected_request,
expected_request.pop('decode_func', None),
)
position += 1
# Asserts the transcript sjson data uploaded to s3
self.assert_uploaded_transcript_on_s3(connection=connection)
# Asserts the Process metadata
self.assertEqual(
TranscriptProcessMetadata.objects.get(
provider=TranscriptProvider.THREE_PLAY,
process_id=self.file_id,
lang_code=lang_code,
translation_id=translation_id,
).status,
TranscriptStatus.READY,
)
# Assert that the final request was made for updating video status to `ready`
# upon receiving all the translations
expected_video_status_update_request = {
'url': CONFIG_DATA['val_video_transcript_status_url'],
'body': {
'status': utils.ValTranscriptStatus.TRANSCRIPT_READY,
'edx_video_id': self.video.studio_id
},
'headers': {
'Authorization': 'Bearer 1234567890',
'content-type': 'application/json'
}
}
self.assert_request(
responses.calls[position].request,
expected_video_status_update_request,
decode_func=json.loads,
)
# Asserts edx-video-pipeline's video status
video = Video.objects.get(studio_id=self.video.studio_id)
self.assertEqual(video.transcript_status, TranscriptStatus.READY)
@data(
# not-an-ok response on translation status fetch request.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id='112233'),
'body': 'Your request was invalid.',
'status': 400,
}
],
{
'method': 'error',
'args': (
'[3PlayMedia Task] Translations metadata request failed for video=%s -- process_id=%s -- status=%s',
VIDEO_DATA['studio_id'],
'112233',
400,
)
},
TranscriptStatus.FAILED
),
# 3Play Error response on fetching translations status.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id='112233'),
'body': json.dumps({'iserror': True}),
'status': 200,
}
],
{
'method': 'error',
'args': (
'[3PlayMedia Task] unable to get translations metadata for video=%s -- '
'process_id=%s -- response=%s',
VIDEO_DATA['studio_id'],
'112233',
json.dumps({'iserror': True}),
)
},
TranscriptStatus.FAILED,
),
# not-an-ok response on translation fetch request.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id='112233'),
'body': json.dumps([{
'id': '1q2w3e',
'source_language_iso_639_1_code': 'en',
'target_language_iso_639_1_code': 'ro',
'state': 'complete'
}]),
'status': 200,
},
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format(
file_id='112233', translation_id='1q2w3e'
),
'body': 'invalid blah blah',
'status': 400
}
],
{
'method': 'exception',
'args': (
'[3PlayMedia Task] Translation download failed for video=%s -- lang_code=%s -- process_id=%s.',
VIDEO_DATA['studio_id'],
'ro',
'112233'
)
},
TranscriptStatus.IN_PROGRESS
),
# 3Play Error response on translation fetch request.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id='112233'),
'body': json.dumps([{
'id': '1q2w3e',
'source_language_iso_639_1_code': 'en',
'target_language_iso_639_1_code': 'ro',
'state': 'complete'
}]),
'status': 200,
},
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format(
file_id='112233', translation_id='1q2w3e'
),
'body': json.dumps({'iserror': True}),
'status': 200
}
],
{
'method': 'error',
'args': (
'[%s] Transcript fetch error for video=%s -- lang_code=%s -- process=%s -- response=%s',
'3PlayMedia Task',
VIDEO_DATA['studio_id'],
'ro',
'112233',
json.dumps({'iserror': True}),
)
},
TranscriptStatus.FAILED
),
)
@unpack
@responses.activate
@mock_s3_deprecated
@patch('VEDA_OS01.transcripts.LOGGER')
def test_translations_retrieval_exceptions(self, mock_responses, expected_logging, transcript_status, mock_logger):
"""
Tests possible error cases during translation fetch process form 3PlayMedia.
"""
# Setup translation processes
translation_id = '1q2w3e'
self.setup_translations_prereqs(
file_id=self.file_id,
translation_lang_map={'ro': translation_id},
preferred_languages=['en', 'ro']
)
for response in mock_responses:
responses.add(response.pop('method'), response.pop('url'), **response)
# Fetch translations
transcripts.retrieve_three_play_translations()
# Assert the logs
getattr(mock_logger, expected_logging['method']).assert_called_with(*expected_logging['args'])
# Assert the transcript translation process
self.assertEqual(
TranscriptProcessMetadata.objects.get(
provider=TranscriptProvider.THREE_PLAY,
process_id=self.file_id,
translation_id=translation_id,
lang_code='ro'
).status,
transcript_status,
)
@patch('VEDA_OS01.transcripts.LOGGER')
@patch('VEDA_OS01.transcripts.convert_srt_to_sjson', Mock(side_effect=ValueError))
def test_translations_retrieval_uncaught_exceptions(self, mock_logger):
"""
Test that `convert_to_sjson_and_upload_to_s3` logs and throws any uncaught exceptions
during translation retrieval process.
"""
with self.assertRaises(ValueError):
transcripts.convert_to_sjson_and_upload_to_s3(
srt_transcript='invalid SRT content}',
edx_video_id=self.video.studio_id,
file_id=self.file_id,
target_language='es'
)
mock_logger.exception.assert_called_with(
u'[3PlayMedia Task] translation failed for video=%s -- lang_code=%s -- process_id=%s',
self.video.studio_id,
self.file_id,
'es',
)
@patch('VEDA_OS01.transcripts.LOGGER')
def test_translations_retrieval_with_zero_translation_process(self, mock_logger):
"""
Tests the translations retrieval when a video doesn't have any 'in progress' translation processes.
"""
# Try fetching translations
transcripts.retrieve_three_play_translations()
# Assert the logs
mock_logger.info.assert_called_with(
'[3PlayMedia Task] video=%s does not have any translation process who is in progress.',
self.video.studio_id,
)
@patch('VEDA_OS01.transcripts.LOGGER')
def test_translations_retrieval_no_credentials(self, mock_logger):
"""
Tests the the translations retrieval when 3Play Media credentials are deleted from the data model.
"""
translation_id = '1q2w3e'
self.setup_translations_prereqs(
file_id=self.file_id,
translation_lang_map={'ro': translation_id},
preferred_languages=['en', 'ro']
)
# Delete transcript credentials
TranscriptCredentials.objects.all().delete()
# Try fetching translations
transcripts.retrieve_three_play_translations()
# assert the exception logs
mock_logger.exception.assert_called_with(
'[%s] Unable to get transcript secrets for org=%s, edx_video_id=%s, file_id=%s.',
'3PlayMedia Task',
self.org,
self.video.studio_id,
self.file_id,
)
# assert the translation process status
process = TranscriptProcessMetadata.objects.get(
provider=TranscriptProvider.THREE_PLAY,
process_id=self.file_id,
translation_id=translation_id,
lang_code='ro'
)
self.assertEqual(process.status, TranscriptStatus.FAILED)
@responses.activate
@patch('VEDA_OS01.transcripts.LOGGER')
def test_translations_retrieval_with_removed_translation_process(self, mock_logger):
"""
Tests the translations retrieval when a tracking translation process is not there or deleted.
"""
translation_id = '1q2w3e'
non_existent_target_language = 'es'
self.setup_translations_prereqs(
file_id=self.file_id,
translation_lang_map={'ro': translation_id},
preferred_languages=['en', 'ro']
)
# We get Translations metadata for a language whose tracking process is no more in pipeline.
responses.add(
method=responses.GET,
url=transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id='112233'),
body=json.dumps([{
'id': translation_id,
'source_language_iso_639_1_code': 'en',
'target_language_iso_639_1_code': non_existent_target_language,
'state': 'complete'
}]),
status=200
)
# Try fetching translations
transcripts.retrieve_three_play_translations()
mock_logger.warning.assert_called_with(
(u'[3PlayMedia Task] Tracking process is either not found or already complete '
u'-- process_id=%s -- target_language=%s -- translation_id=%s.'),
'112233',
non_existent_target_language,
translation_id,
)
@data(None, 'invalid_course_id_1, invalid_course_id_2')
@patch('VEDA_OS01.transcripts.LOGGER')
def test_translation_retrieval_with_invalid_course_id(self, course_runs, mock_logger):
"""
Tests the translations retrieval when an associated course does not have course ids or
have some invalid course ids.
Note:
Its insane for a course to not to have course id but we have to do as
`Course.local_storedir` is null=True, blank=True.
"""
self.setup_translations_prereqs(
file_id=self.file_id,
translation_lang_map={'ro': '1q2w3e'},
preferred_languages=['en', 'ro']
)
# Make our course to not to have course ids.
self.course.local_storedir = course_runs
self.course.save()
# Now, Try fetching translations
transcripts.retrieve_three_play_translations()
mock_logger.exception.assert_called_with(
u'[%s] Unable to get transcript secrets for org=%s, edx_video_id=%s, file_id=%s.',
'3PlayMedia Task',
None,
self.edx_video_id,
self.file_id,
)
"""
Tests common utils
"""
from ddt import data, ddt, unpack
from mock import MagicMock, Mock
from unittest import TestCase
from VEDA_OS01 import utils
@ddt
class UtilTests(TestCase):
"""
Common util tests.
"""
@data(
{
'urls': ('http://api.cielo24/', '/add/job'),
'params': {},
'expected_url': 'http://api.cielo24/add/job'
},
{
'urls': ('http://api.cielo24', '/add/job'),
'params': {'a': 1, 'b': 2},
'expected_url': 'http://api.cielo24/add/job?a=1&b=2'
},
{
'urls': ('http://api.cielo24/', 'add/job'),
'params': {'c': 3, 'd': 4},
'expected_url': 'http://api.cielo24/add/job?c=3&d=4'
},
{
'urls': ('http://api.cielo24','add/job'),
'params': {'p': 100},
'expected_url': 'http://api.cielo24/add/job?p=100'
},
{
'urls': ('http://api.cielo24', 'add/job', 'media'),
'params': {'p': 100},
'expected_url': 'http://api.cielo24/add/job/media?p=100'
}
)
@unpack
def test_build_url(self, urls, params, expected_url):
"""
Tests that utils.build_url works as expected.
"""
url = utils.build_url(
*urls,
**params
)
self.assertEqual(
url,
expected_url
)
@data(
{
'course_id': 'course-v1:MITx+4.605x+3T2017',
'expected_org': 'MITx'
},
{
'course_id': 'WestonHS/PFLC1x/3T2015',
'expected_org': 'WestonHS'
},
{
'course_id': '',
'expected_org': None
},
)
@unpack
def test_extract_course_org(self, course_id, expected_org):
"""
Tests that utils.extract_course_org works as expected.
"""
org = utils.extract_course_org(course_id)
self.assertEqual(
org,
expected_org
)
def test_get_config(self):
"""
Tests that utils.get_config works as expected.
"""
config = utils.get_config()
self.assertNotEqual(config, {})
@data(
('IN PROGRESS', True),
('FAILED', False)
)
@unpack
def test_video_status_update(self, status, update_val_status):
"""
Tests that utils.video_status_update works as expected.
"""
val_api_client = MagicMock()
video = Mock(studio_id='1234', transcript_status='earlier status')
# Make call to update_video_status.
utils.update_video_status(val_api_client=val_api_client, video=video, status=status)
# Assert the status and call to edx-val api method.
self.assertEqual(val_api_client.update_video_status.called, update_val_status)
self.assertEqual(video.transcript_status, status)
"""
Transcript handlers.
"""
import json
import logging
import uuid
import boto
import django.dispatch
import requests
from boto.s3.key import Key
from django.db.models import Q
from pysrt import SubRipFile
from requests.packages.urllib3.exceptions import InsecurePlatformWarning
from rest_framework import status
from rest_framework.parsers import FormParser
from rest_framework.permissions import AllowAny
from rest_framework.response import Response
from rest_framework.views import APIView
from control.veda_val import VALAPICall
from VEDA_OS01 import utils
from VEDA_OS01.models import (TranscriptCredentials, TranscriptProcessMetadata,
TranscriptProvider, TranscriptStatus, Video)
requests.packages.urllib3.disable_warnings(InsecurePlatformWarning)
logging.basicConfig()
LOGGER = logging.getLogger(__name__)
# 3PlayMedia possible send-along statuses for a transcription callback.
COMPLETE = 'complete'
ERROR = 'error'
# Transcript format
TRANSCRIPT_SJSON = 'sjson'
CIELO24_TRANSCRIPT_COMPLETED = django.dispatch.Signal(providing_args=[
'job_id', 'iwp_name', 'lang_code', 'org', 'video_id'
])
CONFIG = utils.get_config()
# Cielo24 API URLs
CIELO24_GET_CAPTION_URL = utils.build_url(
CONFIG['cielo24_api_base_url'],
'job/get_caption'
)
# 3PlayMedia callback signal
THREE_PLAY_TRANSCRIPTION_DONE = django.dispatch.Signal(
providing_args=['org', 'lang_code', 'edx_video_id', 'file_id', 'status', 'error_description']
)
# 3PlayMedia API URLs.
THREE_PLAY_TRANSCRIPT_URL = utils.build_url(
CONFIG['three_play_api_transcript_url'],
'files/{file_id}/transcript.srt'
)
THREE_PLAY_TRANSLATION_SERVICES_URL = utils.build_url(
CONFIG['three_play_api_transcript_url'],
'translation_services'
)
THREE_PLAY_ORDER_TRANSLATION_URL = utils.build_url(
CONFIG['three_play_api_base_url'],
'files/{file_id}/translations/order'
)
THREE_PLAY_TRANSLATIONS_METADATA_URL = utils.build_url(
CONFIG['three_play_api_transcript_url'],
'files/{file_id}/translations'
)
THREE_PLAY_TRANSLATION_DOWNLOAD_URL = utils.build_url(
CONFIG['three_play_api_transcript_url'],
'files/{file_id}/translations/{translation_id}/captions.srt'
)
class TranscriptError(Exception):
"""
An error occurred during fetching transcript from cielo24.
"""
pass
class TranscriptFetchError(TranscriptError):
"""
An error occurred during fetching transcript from cielo24.
"""
pass
class TranscriptTranslationError(TranscriptError):
"""
An error occurred during the translation attempt on 3PlayMedia.
"""
pass
class TranscriptConversionError(TranscriptError):
"""
An error occurred during srt to sjson conversion.
"""
pass
class TranscriptUploadError(TranscriptError):
"""
An error occurred during sjson upload to s3.
"""
pass
class AllowValidTranscriptProvider(AllowAny):
"""
Permission class to allow only valid transcript provider.
"""
def has_permission(self, request, view):
"""
Check if request is from valid transcript provider.
"""
try:
return CONFIG['transcript_provider_request_token'] == view.kwargs['token']
except KeyError:
return False
class Cielo24CallbackHandlerView(APIView):
"""
View to handler Cielo24 callback requests.
"""
permission_classes = (AllowValidTranscriptProvider,)
def get(self, request, **kwargs):
"""
Handle Cielo24 callback request.
"""
required_attrs = ('job_id', 'iwp_name', 'lang_code', 'org', 'video_id')
missing = [attr for attr in required_attrs if attr not in request.query_params.keys()]
if missing:
LOGGER.warning(
'[CIELO24 HANDLER] Required params are missing %s',
missing,
)
return Response({}, status=status.HTTP_400_BAD_REQUEST)
CIELO24_TRANSCRIPT_COMPLETED.send_robust(
sender=self,
org=request.query_params['org'],
job_id=request.query_params['job_id'],
iwp_name=request.query_params['iwp_name'],
video_id=request.query_params['video_id'],
lang_code=request.query_params['lang_code'],
)
return Response()
@django.dispatch.receiver(CIELO24_TRANSCRIPT_COMPLETED, dispatch_uid="cielo24_transcript_completed")
def cielo24_transcript_callback(sender, **kwargs):
"""
* download transcript(SRT) from Cielo24
* convert SRT to SJSON
* upload SJSON to AWS S3
* update transcript status in VAL
"""
process_metadata = None
transcript_prefs = None
org = kwargs['org']
job_id = kwargs['job_id']
video_id = kwargs['video_id']
iwp_name = kwargs['iwp_name']
lang_code = kwargs['lang_code']
LOGGER.info(
'[CIELO24 TRANSCRIPTS] Transcript complete request received for '
'video=%s -- org=%s -- lang=%s -- job_id=%s -- iwp_name=%s',
video_id,
org,
lang_code,
job_id,
iwp_name
)
# get transcript credentials for an organization
try:
transcript_prefs = TranscriptCredentials.objects.get(
org=org,
provider=TranscriptProvider.CIELO24,
)
except TranscriptCredentials.DoesNotExist:
LOGGER.exception('[CIELO24 TRANSCRIPTS] Unable to get transcript credentials for job_id=%s', job_id)
# mark the transcript for a particular language as ready
try:
process_metadata = TranscriptProcessMetadata.objects.filter(
provider=TranscriptProvider.CIELO24,
process_id=job_id,
lang_code=lang_code
).latest()
except TranscriptProcessMetadata.DoesNotExist:
LOGGER.exception(
'[CIELO24 TRANSCRIPTS] Unable to get transcript process metadata for job_id=%s',
job_id
)
# if transcript credentials are missing then we can do nothing
if not transcript_prefs and process_metadata:
process_metadata.status = TranscriptStatus.FAILED
process_metadata.save()
if transcript_prefs and process_metadata:
api_key = transcript_prefs.api_key
try:
srt_data = fetch_srt_data(
CIELO24_GET_CAPTION_URL,
v=1,
job_id=job_id,
api_token=api_key,
caption_format='SRT'
)
except TranscriptFetchError:
process_metadata.status = TranscriptStatus.FAILED
process_metadata.save()
LOGGER.exception(
'[CIELO24 TRANSCRIPTS] Fetch request failed for video=%s -- lang=%s -- job_id=%s',
video_id,
lang_code,
job_id
)
return
process_metadata.status = TranscriptStatus.READY
process_metadata.save()
try:
sjson = convert_srt_to_sjson(srt_data)
sjson_file_name = upload_sjson_to_s3(CONFIG, sjson)
except Exception:
LOGGER.exception(
'[CIELO24 TRANSCRIPTS] Request failed for video=%s -- lang=%s -- job_id=%s.',
video_id,
lang_code,
job_id
)
raise
# update edx-val with completed transcript information
val_api = VALAPICall(process_metadata.video, val_status=None)
val_api.update_val_transcript(
video_id=process_metadata.video.studio_id,
lang_code=lang_code,
name=sjson_file_name,
transcript_format=TRANSCRIPT_SJSON,
provider=TranscriptProvider.CIELO24
)
# update transcript status for video in edx-val only if all langauge transcripts are ready
video_jobs = TranscriptProcessMetadata.objects.filter(video__studio_id=video_id)
if all(video_job.status == TranscriptStatus.READY for video_job in video_jobs):
utils.update_video_status(
val_api_client=val_api,
video=process_metadata.video,
status=TranscriptStatus.READY
)
def fetch_srt_data(url, **request_params):
"""
Fetch srt data from transcript provider.
"""
# return TRANSCRIPT_SRT_DATA
response = requests.get(
utils.build_url(url, **request_params)
)
if not response.ok:
raise TranscriptFetchError(
'[TRANSCRIPT FETCH ERROR] status={} -- text={}'.format(
response.status_code,
response.text
)
)
return response.text
def convert_srt_to_sjson(srt_data):
"""
Convert SRT to SJSON
Arguments:
srt_data: unicode, content of source subs.
Returns:
dict: SJSON data
"""
srt_subs_obj = SubRipFile.from_string(srt_data)
sub_starts = []
sub_ends = []
sub_texts = []
for sub in srt_subs_obj:
sub_starts.append(sub.start.ordinal)
sub_ends.append(sub.end.ordinal)
sub_texts.append(sub.text.replace('\n', ' '))
subs = {
'start': sub_starts,
'end': sub_ends,
'text': sub_texts
}
return subs
def upload_sjson_to_s3(config, sjson_data):
"""
Upload sjson data to s3.
"""
s3_conn = boto.connect_s3()
bucket = s3_conn.get_bucket(config['aws_video_transcripts_bucket'])
k = Key(bucket)
k.content_type = 'application/json'
k.key = '{directory}{uuid}.sjson'.format(
directory=config['aws_video_transcripts_prefix'],
uuid=uuid.uuid4().hex
)
k.set_contents_from_string(json.dumps(sjson_data))
k.set_acl('public-read')
return k.key
class ThreePlayMediaCallbackHandlerView(APIView):
"""
View to handle 3PlayMedia callback requests.
"""
parser_classes = (FormParser,)
permission_classes = (AllowValidTranscriptProvider,)
def post(self, request, **kwargs):
"""
Handle 3PlayMedia callback request.
"""
required_attrs = ['file_id', 'lang_code', 'status', 'org', 'edx_video_id']
received_attributes = request.data.keys() + request.query_params.keys()
missing = [attr for attr in required_attrs if attr not in received_attributes]
if missing:
LOGGER.warning(
u'[3PlayMedia Callback] process_id=%s Received Attributes=%s Missing Attributes=%s',
request.data.get('file_id'),
received_attributes,
missing,
)
return Response(status=status.HTTP_200_OK)
# Dispatch 3playMedia transcription signal
THREE_PLAY_TRANSCRIPTION_DONE.send_robust(
sender=self,
org=request.query_params['org'],
edx_video_id=request.query_params['edx_video_id'],
lang_code=request.query_params['lang_code'],
file_id=request.data['file_id'],
status=request.data['status'],
# Following is going to be an error description if an error occurs during
# 3playMedia transcription process
error_description=request.data.get('error_description'),
)
return Response(status=status.HTTP_200_OK)
def get_translation_services(api_key):
"""
GET available 3Play Media Translation services
Arguments:
api_key(unicode): api key which is required to make an authentic call to 3Play Media
Returns:
Available 3Play Media Translation services.
"""
response = requests.get(utils.build_url(THREE_PLAY_TRANSLATION_SERVICES_URL, apikey=api_key))
if not response.ok:
raise TranscriptTranslationError(
u'[3PlayMedia Callback] Error while fetching the translation services -- {status}, {response}'.format(
status=response.status_code,
response=response.text,
)
)
# Response should be a list containing services, details:
# http://support.3playmedia.com/hc/en-us/articles/227729988-Translations-API-Methods
available_services = json.loads(response.text)
if not isinstance(available_services, list):
raise TranscriptTranslationError(
u'[3PlayMedia Callback] Expected list but got: -- {response}.'.format(
response=response.text,
)
)
return available_services
def get_standard_translation_service(translation_services, source_language, target_language):
"""
Get standard translation service
Arguments:
translation_services(list): List of available 3play media translation services.
source_language(unicode): A language code for video source/speech language.
target_language(unicode): A language code whose standard translation service is needed.
Returns:
A translation service id or None.
"""
translation_service_id = None
for service in translation_services:
service_found = (
service['source_language_iso_639_1_code'] == source_language and
service['target_language_iso_639_1_code'] == target_language and
service['service_level'] == 'standard'
)
if service_found:
translation_service_id = service['id']
break
return translation_service_id
def place_translation_order(api_key, api_secret, translation_service_id, target_language, file_id):
"""
Places a translation order on 3play media.
Arguments:
api_key(unicode): api key
api_secret(unicode): api secret
translation_service_id(unicode): translation service id got from 3Play Media
target_language(unicode): A language code translation is being ordered
file_id(unicode): 3play media file id / process id
"""
order_response = requests.post(THREE_PLAY_ORDER_TRANSLATION_URL.format(file_id=file_id), json={
'apikey': api_key,
'api_secret_key': api_secret,
'translation_service_id': translation_service_id,
})
if not order_response.ok:
LOGGER.error(
'[3PlayMedia Callback] An error occurred during translation, target language=%s, file_id=%s, status=%s',
target_language,
file_id,
order_response.status_code,
)
return
# Translation Order API returns `success` attribute specifying whether the order has been placed
# successfully: http://support.3playmedia.com/hc/en-us/articles/227729988-Translations-API-Methods
translation_order = json.loads(order_response.text)
if not translation_order.get('success'):
LOGGER.error(
'[3PlayMedia Callback] Translation failed fot target language=%s, file_id=%s, response=%s',
target_language,
file_id,
order_response.text,
)
return
return translation_order
def order_translations(file_id, api_key, api_secret, source_language, target_languages):
"""
Order translations on 3PlayMedia for all the target languages.
Process:
* Fetch all the pending translations process for a file
* Fetch all the translation services from 3PlayMedia
* For each process,
- Find suitable translation service
- Order translation from that service
- Move the process to `in progress` and update it with the
translation id received from 3Play.
Arguments:
file_id(unicode): File identifier
api_key(unicode): API key
api_secret(unicode): API Secret
source_language(unicode): video source/speech language code
target_languages(list): List of language codes
Raises:
TranscriptTranslationError: when an error occurred while fetching the translation services.
"""
if not target_languages:
return
translation_processes = TranscriptProcessMetadata.objects.filter(
process_id=file_id,
provider=TranscriptProvider.THREE_PLAY,
status=TranscriptStatus.PENDING,
lang_code__in=target_languages,
)
# Retrieve available translation services.
try:
available_services = get_translation_services(api_key)
except TranscriptTranslationError:
# Fail all the pending translation processes associated with this file id.
translation_processes.update(status=TranscriptStatus.FAILED)
raise
for target_language in target_languages:
# 1 - get a translation process for the target language
try:
translation_process = translation_processes.filter(lang_code=target_language).latest()
except TranscriptProcessMetadata.DoesNotExist:
LOGGER.warning(
u'[3PlayMedia Callback] process not found for target language %s -- process id %s',
target_language,
file_id,
)
continue
# 2 - Find a standard service for translation for the target language.
translation_service_id = get_standard_translation_service(available_services, source_language, target_language)
if translation_service_id is None:
# Fail the process
translation_process.update(status=TranscriptStatus.FAILED)
LOGGER.error(
u'[3PlayMedia Callback] No translation service found for source language "%s" '
u'target language "%s" -- process id %s',
source_language,
target_language,
file_id,
)
continue
# 3 - Place an order
# At this point, we've got our service ready to use. Now, place an order for the translation.
translation_order = place_translation_order(
api_key=api_key,
api_secret=api_secret,
translation_service_id=translation_service_id,
target_language=target_language,
file_id=file_id,
)
if translation_order:
translation_process.update(
translation_id=translation_order['translation_id'],
status=TranscriptStatus.IN_PROGRESS
)
else:
translation_process.update(status=TranscriptStatus.FAILED)
def validate_transcript_response(edx_video_id, file_id, transcript, lang_code, log_prefix):
"""
This validates transcript response received from 3Play Media.
Arguments:
edx_video_id(unicode): studio video identifier
file_id(unicode): file identifier
transcript(unicode): SRT transcript content ideally
lang_code(unicode): language code
log_prefix(unicode): A prefix for the emitted logs
transcript is going to be SRT content and if this is not so, then it'll be a json response
describing the error and process will be marked as failed. Error response will be logged
along with the validation.
"""
try:
json.loads(transcript)
# Log the details.
LOGGER.error(
u'[%s] Transcript fetch error for video=%s -- lang_code=%s -- process=%s -- response=%s',
log_prefix,
edx_video_id,
lang_code,
file_id,
transcript,
)
return False
except ValueError:
pass
return True
def get_transcript_credentials(provider, org, edx_video_id, file_id, log_prefix):
"""
Get org-specific transcript credentials.
Arguments:
provider(TranscriptProvider): transcript provider
org(unicode): organization extracted from course id
log_prefix(unicode): A prefix for the emitted logs
edx_video_id(unicode): studio video identifier
file_id(unicode): file identifier or process identifier
"""
transcript_secrets = None
try:
transcript_secrets = TranscriptCredentials.objects.get(org=org, provider=provider)
except TranscriptCredentials.DoesNotExist:
LOGGER.exception(
u'[%s] Unable to get transcript secrets for org=%s, edx_video_id=%s, file_id=%s.',
log_prefix,
org,
edx_video_id,
file_id,
)
return transcript_secrets
@django.dispatch.receiver(THREE_PLAY_TRANSCRIPTION_DONE, dispatch_uid="three_play_transcription_done")
def three_play_transcription_callback(sender, **kwargs):
"""
This is a receiver for 3Play Media callback signal.
Arguments:
sender: sender of the signal
kwargs(dict): video transcription metadata
Process:
* download transcript(SRT) from 3PlayMedia
* convert SRT to SJSON
* upload SJSON to AWS S3
* order translations for all the preferred languages
* update transcript status in VAL
"""
log_prefix = u'3PlayMedia Callback'
# Extract all the must have attributes
org = kwargs['org']
edx_video_id = kwargs['edx_video_id']
lang_code = kwargs['lang_code']
file_id = kwargs['file_id']
state = kwargs['status']
try:
process = TranscriptProcessMetadata.objects.filter(
provider=TranscriptProvider.THREE_PLAY,
process_id=file_id,
lang_code=lang_code,
).latest()
except TranscriptProcessMetadata.DoesNotExist:
LOGGER.exception(
u'[3PlayMedia Callback] Unable to get transcript process for org=%s, edx_video_id=%s, file_id=%s.',
org,
edx_video_id,
file_id,
)
return
# On completion of a transcript
# Indicates that the default video speech transcription has been done successfully.
if state == COMPLETE:
log_args = (edx_video_id, lang_code, file_id)
# 1 - Retrieve transcript credentials
transcript_secrets = get_transcript_credentials(
provider=TranscriptProvider.THREE_PLAY,
org=org,
edx_video_id=edx_video_id,
file_id=file_id,
log_prefix=log_prefix,
)
if not transcript_secrets:
process.update(status=TranscriptStatus.FAILED)
return
# 2 - Fetch the transcript from 3Play Media.
try:
srt_transcript = fetch_srt_data(
THREE_PLAY_TRANSCRIPT_URL.format(file_id=file_id),
apikey=transcript_secrets.api_key,
)
except TranscriptFetchError:
LOGGER.exception(
u'[3PlayMedia Callback] Fetch request failed for video=%s -- lang_code=%s -- process_id=%s',
*log_args
)
process.update(status=TranscriptStatus.FAILED)
return
# 3 - Validate transcript content received from 3Play Media and mark the transcription process.
is_valid_transcript = validate_transcript_response(
edx_video_id=edx_video_id,
file_id=file_id,
transcript=srt_transcript,
lang_code=lang_code,
log_prefix=log_prefix,
)
if is_valid_transcript:
process.update(status=TranscriptStatus.READY)
else:
process.update(status=TranscriptStatus.FAILED)
# 4 - Convert SRT transcript to SJson format and upload it to S3.
try:
sjson_transcript = convert_srt_to_sjson(srt_transcript)
sjson_file = upload_sjson_to_s3(CONFIG, sjson_transcript)
except Exception:
# in case of any exception, log and raise.
LOGGER.exception(
u'[3PlayMedia Callback] Request failed for video=%s -- lang_code=%s -- process_id=%s',
*log_args
)
raise
# 5 - Update edx-val with completed transcript information.
val_api = VALAPICall(video_proto=None, val_status=None)
val_api.update_val_transcript(
video_id=process.video.studio_id,
lang_code=lang_code,
name=sjson_file,
transcript_format=TRANSCRIPT_SJSON,
provider=TranscriptProvider.THREE_PLAY,
)
# 6 - Translation Phase
# That's the phase for kicking off translation processes for all the
# preferred languages except the video's speech language.
target_languages = list(process.video.preferred_languages)
target_languages.remove(lang_code)
# Create the translation tracking processes for all the target languages.
for target_language in target_languages:
TranscriptProcessMetadata.objects.create(
video=process.video,
provider=TranscriptProvider.THREE_PLAY,
process_id=file_id,
lang_code=target_language,
status=TranscriptStatus.PENDING,
)
# Order translations for target languages
try:
order_translations(
file_id,
transcript_secrets.api_key,
transcript_secrets.api_secret,
source_language=lang_code,
target_languages=target_languages
)
except TranscriptTranslationError:
LOGGER.exception(
u'[3PlayMedia Callback] Translation could not be performed - video=%s, lang_code=%s, file_id=%s.',
*log_args
)
except Exception:
LOGGER.exception(
u'[3PlayMedia Callback] Error while translating the transcripts - video=%s, lang_code=%s, file_id=%s',
*log_args
)
raise
# 7 - Update transcript status.
# It will be for edx-val as well as edx-video-pipeline and this will be the case when
# there is only one transcript language for a video(that is, already been processed).
if not target_languages:
utils.update_video_status(
val_api_client=val_api,
video=process.video,
status=TranscriptStatus.READY
)
# On success, a happy farewell log.
LOGGER.info(
(u'[3PlayMedia Callback] Video speech transcription was successful for'
u' video=%s -- lang_code=%s -- process_id=%s'),
*log_args
)
elif state == ERROR:
# Fail the process
process.status = TranscriptStatus.FAILED
process.save()
# Log the error information
LOGGER.error(
u'[3PlayMedia Callback] Error while transcription - error=%s, org=%s, edx_video_id=%s, file_id=%s.',
kwargs['error_description'],
org,
edx_video_id,
file_id,
)
else:
# Status must be either 'complete' or 'error'
# more details on http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods
LOGGER.error(
u'[3PlayMedia Callback] Got invalid status - status=%s, org=%s, edx_video_id=%s, file_id=%s.',
state,
org,
edx_video_id,
file_id,
)
def get_translations_metadata(api_key, file_id, edx_video_id):
"""
Get translations metadata from 3Play Media for a given file id.
Arguments:
api_key(unicode): api key
file_id(unicode): file identifier or process identifier
edx_video_id(unicode): video studio identifier
Returns:
A List containing the translations metadata for a file id or None
in case of a faulty response.
Example:
[
{
"id": 1234,
"translation_service_id": 12,
"source_language_name": "English",
"source_language_iso_639_1_code": "en",
"target_language_name": "French (Canada)",
"target_language_iso_639_1_code": "fr",
"state": "complete"
},
{
"id": 1345,
"translation_service_id": 32,
"source_language_name": "English",
"source_language_iso_639_1_code": "en",
"target_language_name": "German",
"target_language_iso_639_1_code": "de",
"state": "in_progress"
}
]
"""
translations_metadata_url = utils.build_url(
THREE_PLAY_TRANSLATIONS_METADATA_URL.format(
file_id=file_id,
),
apikey=api_key
)
translations_metadata_response = requests.get(translations_metadata_url)
if not translations_metadata_response.ok:
LOGGER.error(
u'[3PlayMedia Task] Translations metadata request failed for video=%s -- process_id=%s -- status=%s',
edx_video_id,
file_id,
translations_metadata_response.status_code,
)
return
translations = json.loads(translations_metadata_response.text)
if not isinstance(translations, list):
LOGGER.error(
u'[3PlayMedia Task] unable to get translations metadata for video=%s -- process_id=%s -- response=%s',
edx_video_id,
file_id,
translations_metadata_response.text,
)
return
return translations
def get_in_progress_translation_processes(video):
"""
Retrieves 'IN PROGRESS' translation tracking processes associated to a Video.
"""
translation_processes = video.transcript_processes.filter(
provider=TranscriptProvider.THREE_PLAY,
status=TranscriptStatus.IN_PROGRESS,
).exclude(
Q(translation_id__isnull=True) | Q(translation_id__exact='')
)
return translation_processes
def get_in_progress_translation_process(processes, file_id, translation_id, target_language):
"""
Returns a single translation process from the given Processes.
"""
translation_process = None
try:
translation_process = processes.filter(
translation_id=translation_id,
lang_code=target_language,
process_id=file_id
).latest()
except TranscriptProcessMetadata.DoesNotExist:
LOGGER.warning(
(u'[3PlayMedia Task] Tracking process is either not found or already complete -- process_id=%s -- '
u'target_language=%s -- translation_id=%s.'),
file_id,
target_language,
translation_id
)
return translation_process
def get_transcript_content_from_3play_media(api_key, edx_video_id, file_id, translation_id, target_language):
"""
Get transcript content from 3Play Media in SRT format.
"""
srt_transcript = None
try:
transcript_url = THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format(file_id=file_id, translation_id=translation_id)
srt_transcript = fetch_srt_data(url=transcript_url, apikey=api_key)
except TranscriptFetchError:
LOGGER.exception(
u'[3PlayMedia Task] Translation download failed for video=%s -- lang_code=%s -- process_id=%s.',
edx_video_id,
target_language,
file_id,
)
return srt_transcript
def convert_to_sjson_and_upload_to_s3(srt_transcript, edx_video_id, file_id, target_language):
"""
Converts SRT content to sjson format, upload it to S3 and returns an S3 file path of the uploaded file.
Raises:
Logs and raises any unexpected Exception.
"""
try:
sjson_transcript = convert_srt_to_sjson(srt_transcript)
sjson_file = upload_sjson_to_s3(CONFIG, sjson_transcript)
except Exception:
# in case of any exception, log and raise.
LOGGER.exception(
u'[3PlayMedia Task] translation failed for video=%s -- lang_code=%s -- process_id=%s',
edx_video_id,
file_id,
target_language,
)
raise
return sjson_file
def handle_video_translations(video, translations, file_id, api_key, log_prefix):
"""
It is a sub-module of `retrieve_three_play_translations` to handle
all the completed translations for a single video.
Arguments:
video: Video data object whose translations need to be handled here.
translations: A list containing translations metadata information received from 3play Media.
file_id: It is file identifier that is assigned to a Video by 3Play Media.
api_key: An api key to communicate to the 3Play Media.
log_prefix: A logging prefix used by the main process.
Steps include:
- Fetch translated transcript content from 3Play Media.
- Validate the content of received translated transcript.
- Convert translated SRT transcript to SJson format and upload it to S3.
- Update edx-val for a completed transcript.
- update transcript status for video in edx-val as well as edx-video-pipeline.
"""
video_translation_processes = get_in_progress_translation_processes(video)
for translation_metadata in translations:
translation_id = translation_metadata['id']
translation_state = translation_metadata['state']
target_language = translation_metadata['target_language_iso_639_1_code']
if translation_state == COMPLETE:
# Fetch the corresponding tracking process.
translation_process = get_in_progress_translation_process(
video_translation_processes,
file_id=file_id,
translation_id=translation_id,
target_language=target_language
)
if translation_process is None:
continue
# 1 - Fetch translated transcript content from 3Play Media.
srt_transcript = get_transcript_content_from_3play_media(
api_key=api_key,
edx_video_id=video.studio_id,
file_id=file_id,
translation_id=translation_id,
target_language=target_language,
)
if srt_transcript is None:
continue
# 2 - Validate the content of received translated transcript.
is_transcript_valid = validate_transcript_response(
edx_video_id=video.studio_id,
file_id=file_id,
transcript=srt_transcript,
lang_code=target_language,
log_prefix=log_prefix
)
if is_transcript_valid:
translation_process.update(status=TranscriptStatus.READY)
else:
translation_process.update(status=TranscriptStatus.FAILED)
continue
# 3 - Convert SRT translation to SJson format and upload it to S3.
sjson_file = convert_to_sjson_and_upload_to_s3(
srt_transcript=srt_transcript,
target_language=target_language,
edx_video_id=video.studio_id,
file_id=file_id,
)
# 4 Update edx-val with completed transcript information
val_api = VALAPICall(video_proto=None, val_status=None)
val_api.update_val_transcript(
video_id=video.studio_id,
lang_code=target_language,
name=sjson_file,
transcript_format=TRANSCRIPT_SJSON,
provider=TranscriptProvider.THREE_PLAY,
)
# 5 - if all the processes for this video are complete, update transcript status
# for video in edx-val as well as edx-video-pipeline.
video_jobs = TranscriptProcessMetadata.objects.filter(video=video)
if all(video_job.status == TranscriptStatus.READY for video_job in video_jobs):
utils.update_video_status(
val_api_client=val_api,
video=video,
status=TranscriptStatus.READY
)
def retrieve_three_play_translations():
"""
Checks translation status on 3PlayMedia for all the progressing processes, fetches them if they're complete.
Retrieval flow:
1. Fetches 3PlayMedia translation processes whose status is `in progress`
2. For each process, retrieve the org-wide api keys
3. Check translation status through 3PlayMedia
4. If its done, mark the process as complete, fetch translated transcript, convert to sjson, upload it to s3 and
finally, update it in edx-val.
"""
log_prefix = u'3PlayMedia Task'
candidate_videos = Video.objects.filter(
provider=TranscriptProvider.THREE_PLAY, transcript_status=TranscriptStatus.IN_PROGRESS,
)
for video in candidate_videos:
# For a video, fetch its in progress translation processes.
in_progress_translation_processes = get_in_progress_translation_processes(video)
if not in_progress_translation_processes.exists():
LOGGER.info(
'[3PlayMedia Task] video=%s does not have any translation process who is in progress.',
video.studio_id,
)
continue
# Process id remains same across all the processes of a video and its also referred as `file_id`.
file_id = in_progress_translation_processes.first().process_id
# Retrieve transcript credentials
three_play_secrets = get_transcript_credentials(
provider=TranscriptProvider.THREE_PLAY,
org=video.inst_class.org,
edx_video_id=video.studio_id,
file_id=file_id,
log_prefix=log_prefix
)
if not three_play_secrets:
in_progress_translation_processes.update(status=TranscriptStatus.FAILED)
continue
# Retrieve Translations metadata to check the status for each translation.
translations = get_translations_metadata(
api_key=three_play_secrets.api_key,
file_id=file_id,
edx_video_id=video.studio_id,
)
if translations is None:
in_progress_translation_processes.update(status=TranscriptStatus.FAILED)
continue
handle_video_translations(
video=video,
translations=translations,
file_id=file_id,
api_key=three_play_secrets.api_key,
log_prefix=log_prefix,
)
"""
Common utils.
"""
import os
import urllib
import yaml
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import CourseKey
from VEDA_OS01.models import TranscriptStatus
class ValTranscriptStatus(object):
"""
VAL supported video transcript statuses.
"""
TRANSCRIPTION_IN_PROGRESS = 'transcription_in_progress'
TRANSCRIPT_READY = 'transcript_ready'
# Maps the edx-video-pipeline video transcript statuses to edx-val statuses.
VAL_TRANSCRIPT_STATUS_MAP = {
TranscriptStatus.IN_PROGRESS: ValTranscriptStatus.TRANSCRIPTION_IN_PROGRESS,
TranscriptStatus.READY: ValTranscriptStatus.TRANSCRIPT_READY
}
def get_config(yaml_config_file='instance_config.yaml'):
"""
Read yaml config file.
Arguments:
yaml_config_file (str): yaml config file name
Returns:
dict: yaml conifg
"""
config_dict = {}
yaml_config_file = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
yaml_config_file
)
with open(yaml_config_file, 'r') as config:
try:
config_dict = yaml.load(config)
except yaml.YAMLError:
pass
return config_dict
def extract_course_org(course_id):
"""
Extract video organization from course url.
"""
org = None
try:
org = CourseKey.from_string(course_id).org
except InvalidKeyError:
pass
return org
def build_url(*urls, **query_params):
"""
Build a url from specified params.
Arguments:
base_url (str): base url
relative_url (str): endpoint
query_params (dict): query params
Returns:
absolute url
"""
url = '/'.join(item.strip('/') for item in urls)
if query_params:
url = '{}?{}'.format(url, urllib.urlencode(query_params))
return url
def update_video_status(val_api_client, video, status):
"""
Updates video status both in edx-val and edx-video-pipeline.
Arguments:
video(Video): Video data model object
status(Str): Video status to be updated
"""
# update edx-val's video status
try:
val_status = VAL_TRANSCRIPT_STATUS_MAP[status]
val_api_client.update_video_status(video.studio_id, val_status)
except KeyError:
# Don't update edx-val's video status.
pass
# update edx-video-pipeline's video status
video.transcript_status = status
video.save()
......@@ -12,7 +12,8 @@ if project_path not in sys.path:
from control.celeryapp import maintainer_healer
from control.veda_heal import VedaHeal
from VEDA_OS01.models import Video
from VEDA_OS01.models import Course, Video
from VEDA_OS01.transcripts import retrieve_three_play_translations
"""
Deliver
......@@ -79,6 +80,11 @@ def main():
VH = VedaHeal()
VH.discovery()
VH.purge()
# Kicks off a round of retrieving successful
# translations from 3Play Media
retrieve_three_play_translations()
HC = HealCli()
HC.schedule()
return None
......
......@@ -15,6 +15,10 @@ if project_path not in sys.path:
This is a cheapo way to get a pager (using SES)
"""
import django
django.setup()
from control.veda_file_discovery import FileDiscovery
from youtube_callback.daemon import generate_course_list
from youtube_callback.sftp_id_retrieve import callfunction
......
"""
Start Celery Worker
"""
from __future__ import absolute_import
import os
import sys
from celery import Celery
import yaml
"""
Start Celery Worker
"""
try:
from control.control_env import *
except:
from control_env import *
try:
from control.veda_deliver import VedaDelivery
except:
except ImportError:
from veda_deliver import VedaDelivery
auth_yaml = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'instance_config.yaml'
......@@ -59,12 +54,14 @@ def worker_task_fire(veda_id, encode_profile, jobid):
@app.task(name='supervisor_deliver')
def deliverable_route(veda_id, encode_profile):
VD = VedaDelivery(
"""
Task for deliverable route.
"""
veda_deliver = VedaDelivery(
veda_id=veda_id,
encode_profile=encode_profile
)
VD.run()
veda_deliver.run()
@app.task
......
"""
3PlayMedia transcription unit tests
"""
import json
import responses
from ddt import ddt, data, unpack
from mock import Mock, patch
from django.test import TestCase
from control.veda_deliver_3play import (
ThreePlayMediaClient,
ThreePlayMediaUrlError,
ThreePlayMediaPerformTranscriptionError,
)
from VEDA_OS01.models import (
Course,
TranscriptProcessMetadata,
Video,
ThreePlayTurnaround,
)
from VEDA_OS01.utils import build_url
VIDEO_DATA = {
'studio_id': '12345',
'source_language': 'en'
}
@ddt
class ThreePlayMediaClientTests(TestCase):
"""
3PlayMedia transcription tests
"""
def setUp(self):
"""
Tests setup
"""
self.course = Course.objects.create(
course_name=u'Intro to VEDA',
institution=u'MAx',
edx_classid=u'123'
)
self.video = Video.objects.create(
inst_class=self.course,
**VIDEO_DATA
)
self.video_transcript_preferences = {
'org': u'MAx',
'video': self.video,
'media_url': u'https://s3.amazonaws.com/bkt/video.mp4',
'api_key': u'insecure_api_key',
'api_secret': u'insecure_api_secret',
'turnaround_level': ThreePlayTurnaround.DEFAULT,
'callback_url': build_url(
u'https://veda.edx.org/3playmedia/transcripts/handle/123123',
org=u'MAx',
edx_video_id=VIDEO_DATA['studio_id'],
lang_code=VIDEO_DATA['source_language'],
),
'three_play_api_base_url': 'https://api.3playmedia.com/',
}
def assert_request(self, received_request, expected_request, decode_func):
"""
Verify that `received_request` matches `expected_request`
"""
for request_attr in expected_request.keys():
if request_attr == 'headers':
expected_headers = expected_request[request_attr]
actual_headers = getattr(received_request, request_attr)
for attr, expect_value in expected_headers.iteritems():
self.assertEqual(actual_headers[attr], expect_value)
elif request_attr == 'body' and decode_func:
self.assertDictEqual(decode_func(received_request.body), expected_request[request_attr])
else:
self.assertEqual(getattr(received_request, request_attr), expected_request[request_attr])
@responses.activate
@patch('control.veda_deliver_3play.LOGGER')
def test_transcription_flow(self, mock_logger):
"""
Verify 3PlayMedia happy transcription flow
"""
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
responses.add(
responses.HEAD,
u'https://s3.amazonaws.com/bkt/video.mp4',
headers={'Content-Type': u'video/mp4'},
status=200,
)
responses.add(
responses.GET,
u'https://api.3playmedia.com/caption_imports/available_languages',
body=json.dumps([{
"iso_639_1_code": "en",
"language_id": 1,
}]),
status=200,
)
responses.add(
responses.POST,
u'https://api.3playmedia.com/files',
body=u'111222',
status=200
)
three_play_client.generate_transcripts()
# Total of 3 HTTP requests are made
self.assertEqual(len(responses.calls), 3)
body = dict(
# Mandatory attributes required for transcription
link=self.video_transcript_preferences['media_url'],
apikey=self.video_transcript_preferences['api_key'],
api_secret_key=self.video_transcript_preferences['api_secret'],
turnaround_level=self.video_transcript_preferences['turnaround_level'],
callback_url=self.video_transcript_preferences['callback_url'],
language_id=1,
)
expected_requests = [
{
'url': u'https://s3.amazonaws.com/bkt/video.mp4',
'body': None,
'method': 'HEAD',
},
{
'url': u'https://api.3playmedia.com/caption_imports/available_languages?apikey=insecure_api_key',
'body': None,
'method': 'GET',
},
{
'url': u'https://api.3playmedia.com/files',
'body': body,
'method': 'POST',
'headers': {'Content-Type': 'application/json'},
'decode_func': json.loads
},
]
for position, expected_request in enumerate(expected_requests):
self.assert_request(
received_request=responses.calls[position].request,
expected_request=expected_request,
decode_func=expected_request.pop('decode_func', None)
)
self.assertEqual(TranscriptProcessMetadata.objects.count(), 1)
mock_logger.info.assert_called_with(
'[3PlayMedia] Transcription process has been started for video=%s, source_language=%s.',
VIDEO_DATA['studio_id'],
VIDEO_DATA['source_language'],
)
@data(
{
'headers': {'Content-Type': u'video/mp4'},
'status': 400,
},
{
'headers': {'Content-Type': u'application/json'},
'status': 200,
}
)
@responses.activate
def test_validate_media_url(self, response):
"""
Tests media url validations.
"""
responses.add(responses.HEAD, u'https://s3.amazonaws.com/bkt/video.mp4', **response)
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
with self.assertRaises(ThreePlayMediaUrlError):
three_play_client.validate_media_url()
@data(
{
'body': None,
'status': 400,
},
{
'body': json.dumps({'iserror': True, 'error': 'Submission has failed'}),
'status': 200,
}
)
@responses.activate
def test_submit_media_exceptions(self, response):
"""
Tests media submission exceptions
"""
responses.add(
responses.HEAD,
u'https://s3.amazonaws.com/bkt/video.mp4',
headers={'Content-Type': u'video/mp4'},
status=200,
)
responses.add(responses.GET, u'https://api.3playmedia.com/caption_imports/available_languages', **{
'status': 200,
'body': json.dumps([{
"iso_639_1_code": "en",
"language_id": 1,
}])
})
responses.add(responses.POST, u'https://api.3playmedia.com/files', **response)
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
with self.assertRaises(ThreePlayMediaPerformTranscriptionError):
three_play_client.submit_media()
@data(
(
# Error
{
'body': None,
'status': 400,
},
# Success
{
'body': '[{"iso_639_1_code": "en", "language_id": 1}]',
'status': 200,
},
# Success
{
'body': '11111',
'status': 200,
},
),
(
# Success
{
'headers': {'Content-Type': u'video/mp4'},
'status': 200,
},
# Error
{
'body': None,
'status': 400,
},
# Success
{
'body': '11111',
'status': 200,
},
),
(
# Success
{
'headers': {'Content-Type': u'video/mp4'},
'status': 200,
},
# Error
{
'body': '{"error": "unauthorized"}',
'status': 200,
},
# Success
{
'body': '11111',
'status': 200,
},
),
(
# Success
{
'headers': {'Content-Type': u'video/mp4'},
'status': 200,
},
# Success
{
'body': '[{"iso_639_1_code": "en", "language_id": 1}]',
'status': 200,
},
# Error
{
'body': None,
'status': 400,
},
),
(
# Success
{
'headers': {'Content-Type': u'video/mp4'},
'status': 200,
},
# Success
{
'body': '[{"iso_639_1_code": "en", "language_id": 1}]',
'status': 200,
},
# Error
{
'body': '{"error": "unauthorized"}',
'status': 200,
},
)
)
@unpack
@responses.activate
@patch('control.veda_deliver_3play.LOGGER')
def test_generate_transcripts_exceptions(self, first_response, second_response, third_response, mock_log):
"""
Tests the proper exceptions during transcript generation.
"""
responses.add(responses.HEAD, u'https://s3.amazonaws.com/bkt/video.mp4', **first_response)
responses.add(
responses.GET, u'https://api.3playmedia.com/caption_imports/available_languages', **second_response
)
responses.add(responses.POST, u'https://api.3playmedia.com/files', **third_response)
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
three_play_client.generate_transcripts()
self.assertFalse(mock_log.info.called)
mock_log.exception.assert_called_with(
u'[3PlayMedia] Could not process transcripts for video=%s source_language=%s.',
VIDEO_DATA['studio_id'],
VIDEO_DATA['source_language'],
)
self.assertEqual(TranscriptProcessMetadata.objects.count(), 0)
@patch('control.veda_deliver_3play.LOGGER')
@patch('control.veda_deliver_3play.ThreePlayMediaClient.submit_media', Mock(side_effect=ValueError))
def test_generate_transcripts_unknown_exceptions(self, mock_log):
"""
Verify that the unknown exceptions are logged during transcript generation.
"""
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
with self.assertRaises(ValueError):
three_play_client.generate_transcripts()
self.assertFalse(mock_log.info.called)
mock_log.exception.assert_called_with(
u'[3PlayMedia] Unexpected error while transcription for video=%s source_language=%s.',
VIDEO_DATA['studio_id'],
VIDEO_DATA['source_language'],
)
self.assertEqual(TranscriptProcessMetadata.objects.count(), 0)
"""
Cielo24 transcription testing
"""
from django.test import TestCase
import responses
from ddt import ddt
from mock import patch
from control.veda_deliver_cielo import Cielo24Transcript
from VEDA_OS01.models import (Cielo24Fidelity, Cielo24Turnaround, Course,
TranscriptProcessMetadata, TranscriptStatus,
Video)
from VEDA_OS01.utils import build_url
VIDEO_DATA = {
'studio_id': '12345',
'source_language': 'en'
}
@ddt
class Cielo24TranscriptTests(TestCase):
"""
Cielo24 transcription tests
"""
def setUp(self):
"""
Tests setup
"""
self.course = Course.objects.create(
course_name='Intro to VEDA',
institution='MAx',
edx_classid='123'
)
self.video = Video.objects.create(
inst_class=self.course,
**VIDEO_DATA
)
self.video_transcript_preferences = {
'org': 'MAx',
'api_key': 'cielo24_api_key',
'turnaround': Cielo24Turnaround.PRIORITY,
'fidelity': Cielo24Fidelity.PROFESSIONAL,
'preferred_languages': ['en', 'ur'],
's3_video_url': 'https://s3.amazonaws.com/bkt/video.mp4',
'callback_base_url': 'https://veda.edx.org/cielo24/transcript_completed/1234567890',
'cielo24_api_base_url': 'https://sandbox.cielo24.com/api',
}
def tearDown(self):
"""
Test cleanup
"""
TranscriptProcessMetadata.objects.all().delete()
def cielo24_url(self, cielo24, endpoint):
"""
Return absolute url
Arguments:
cielo24 (Cielo24Transcript), object
endpoint (srt): url endpoint
Returns:
absolute url
"""
return build_url(cielo24.cielo24_api_base_url, endpoint)
def assert_request(self, received_request, expected_request):
"""
Verify that `received_request` matches `expected_request`
"""
self.assertEqual(received_request.method, expected_request['method'])
self.assertEqual(received_request.url, expected_request['url'])
self.assertEqual(received_request.body, expected_request['body'])
@responses.activate
def test_transcript_flow(self):
"""
Verify cielo24 transcription flow
"""
job_id = '000-111-222'
cielo24 = Cielo24Transcript(
video=self.video,
**self.video_transcript_preferences
)
responses.add(
responses.GET,
self.cielo24_url(cielo24, cielo24.cielo24_new_job),
body={'JobId': job_id},
status=200
)
responses.add(
responses.GET,
self.cielo24_url(cielo24, cielo24.cielo24_add_media),
body={'TaskId': '000-000-111'},
status=200
)
responses.add(
responses.GET,
self.cielo24_url(cielo24, cielo24.cielo24_perform_transcription),
body={'TaskId': '000-000-000'},
status=200
)
cielo24.start_transcription_flow()
# Total of 6 HTTP requests are made
# 3 cielo24 requests for first language(en)
# 3 cielo24 requests for second language(ur)
self.assertEqual(len(responses.calls), 6)
# pylint: disable=line-too-long
expected_data = [
{
'url': build_url(
'https://sandbox.cielo24.com/api/job/new',
v=1,
job_name='12345',
language='en', # A job's language.
api_token='cielo24_api_key',
),
'body': None,
'method': 'GET'
},
{
'url': build_url(
'https://sandbox.cielo24.com/api/job/add_media',
v=1,
job_id='000-111-222',
api_token='cielo24_api_key',
media_url='https://s3.amazonaws.com/bkt/video.mp4',
),
'body': None,
'method': 'GET',
},
{
'url': build_url(
'https://sandbox.cielo24.com/api/job/perform_transcription',
v=1,
job_id='000-111-222',
target_language='TARGET_LANG',
callback_url=build_url(
'https://veda.edx.org/cielo24/transcript_completed/1234567890',
lang_code='TARGET_LANG',
video_id='12345',
job_id='000-111-222',
iwp_name='{iwp_name}',
org='MAx',
),
api_token='cielo24_api_key',
priority='PRIORITY',
transcription_fidelity='PROFESSIONAL',
options='{"return_iwp": ["FINAL"]}'
),
'body': None,
'method': 'GET'
}
]
received_request_index = 0
for preferred_language in self.video_transcript_preferences['preferred_languages']:
for request_data in expected_data:
# replace target language with appropriate value
if 'api/job/perform_transcription' in request_data['url']:
request_data = dict(request_data)
request_data['url'] = request_data['url'].replace('TARGET_LANG', preferred_language)
self.assert_request(
responses.calls[received_request_index].request,
request_data
)
received_request_index += 1
@patch('control.veda_deliver_cielo.LOGGER')
@responses.activate
def test_transcript_flow_exceptions(self, mock_logger):
"""
Verify that cielo24 transcription flow works as expected in case of bad response from cielo24
"""
job_id = '010-010-010'
bad_request_message = 'Bad request data'
preferences = dict(self.video_transcript_preferences)
preferences['preferred_languages'] = ['en']
cielo24 = Cielo24Transcript(
video=self.video,
**preferences
)
responses.add(
responses.GET,
self.cielo24_url(cielo24, cielo24.cielo24_new_job),
body={'JobId': job_id},
status=200
)
responses.add(
responses.GET,
self.cielo24_url(cielo24, cielo24.cielo24_add_media),
body=bad_request_message,
status=400
)
cielo24.start_transcription_flow()
mock_logger.exception.assert_called_with(
'[CIELO24] Request failed for video=%s -- lang=%s -- job_id=%s',
self.video.studio_id,
preferences['preferred_languages'][0],
job_id
)
# Total of 2 HTTP requests are made for2 cielo24
self.assertEqual(len(responses.calls), 2)
process_metadata = TranscriptProcessMetadata.objects.all()
self.assertEqual(process_metadata.count(), 1)
self.assertEqual(process_metadata.first().status, TranscriptStatus.FAILED)
from ..veda_deliver_cielo import Cielo24Transcript
'''
TEST
list_of_ids = [
'XXXC93BC2016-V000100'
]
for l in list_of_ids:
x = Cielo24Transcript(
veda_id = l
)
output = x.perform_transcription()
print output
'''
......@@ -2,6 +2,10 @@
import os
import sys
import unittest
from django.test import TestCase
from control.veda_encode import VedaEncode
from VEDA_OS01.models import URL, Course, Destination, Encode, Video
"""
Test encode profiler
......@@ -12,18 +16,39 @@ sys.path.append(os.path.dirname(os.path.dirname(
os.path.abspath(__file__)
)))
from control.veda_encode import VedaEncode
from VEDA_OS01.models import Course, URL, Video, Encode
VIDEO_DATA = {
'studio_id': '12345'
}
class TestEncode(unittest.TestCase):
class TestEncode(TestCase):
def setUp(self):
self.course_object = Course.objects.get(
self.veda_id = 'XXXXXXXX2016-V00TEST'
self.course_object = Course.objects.create(
institution='XXX',
edx_classid='XXXXX'
)
self.veda_id = 'XXXXXXXX2016-V00TEST'
self.video = Video.objects.create(
inst_class=self.course_object,
studio_id='12345',
edx_id=self.veda_id,
)
Encode.objects.create(
product_spec='mobile_low',
encode_destination=Destination.objects.create(destination_name='destination_name'),
profile_active=True
)
Encode.objects.create(
product_spec='desktop_mp4',
encode_destination=Destination.objects.create(destination_name='destination_name'),
profile_active=True
)
self.E = VedaEncode(
course_object=self.course_object,
veda_id=self.veda_id
......@@ -32,27 +57,25 @@ class TestEncode(unittest.TestCase):
def test_encode_url(self):
"""
gen baseline, gen a url, test against baseline
"""
URL.objects.filter(
videoID=Video.objects.filter(edx_id=self.veda_id).latest()
).delete()
encode_list = self.E.determine_encodes()
baseline = len(encode_list)
self.assertTrue(isinstance(encode_list, list))
self.assertTrue(isinstance(encode_list, set))
self.E.encode_list = []
U = URL(
self.E.encode_list = set()
url = URL(
videoID=Video.objects.filter(edx_id=self.veda_id).latest(),
encode_profile=Encode.objects.get(product_spec='mobile_low'),
encode_url='THIS Is A TEST'
)
U.save()
url.save()
encode_list = self.E.determine_encodes()
self.assertTrue(len(encode_list) == baseline - 1)
self.E.encode_list = []
self.E.encode_list = set()
URL.objects.filter(
videoID=Video.objects.filter(edx_id=self.veda_id).latest(),
).delete()
......
import os
import sys
import unittest
from django.test import TestCase
"""
Test VEDA API
......@@ -12,7 +13,7 @@ sys.path.append(os.path.dirname(os.path.dirname(
from control.veda_file_discovery import FileDiscovery
class TestValidation(unittest.TestCase):
class TestValidation(TestCase):
def setUp(self):
......
......@@ -4,6 +4,7 @@ import os
import sys
import unittest
from django.test import TestCase
import requests
import yaml
......@@ -19,7 +20,7 @@ sys.path.append(os.path.dirname(os.path.dirname(__file__)))
requests.packages.urllib3.disable_warnings()
class TestIngest(unittest.TestCase):
class TestIngest(TestCase):
def setUp(self):
self.VP = VideoProto(
......
"""
Tests HEAL process
Test heal processor
"""
import datetime
import json
import os
import sys
from django.test import TestCase
from datetime import timedelta
from unittest import TestCase, skip
import yaml
from ddt import data, ddt, unpack
import responses
from django.utils.timezone import utc
from mock import PropertyMock, patch
from control.veda_heal import VedaHeal
from VEDA_OS01.models import Course, Video
from VEDA_OS01.models import URL, Course, Destination, Encode, Video, TranscriptStatus
from VEDA_OS01.utils import build_url, get_config, ValTranscriptStatus
sys.path.append(os.path.dirname(os.path.dirname(
os.path.abspath(__file__)
)))
CONFIG_DATA = get_config('test_config.yaml')
@ddt
......@@ -22,15 +31,78 @@ class HealTests(TestCase):
def setUp(self):
self.heal_instance = VedaHeal()
self.auth_yaml = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
'instance_config.yaml'
)
self.encode_list = set()
with open(self.auth_yaml, 'r') as stream:
for key, entry in yaml.load(stream)['encode_dict'].items():
for e in entry:
self.encode_list.add(e)
for key, entry in CONFIG_DATA['encode_dict'].items():
for e in entry:
self.encode_list.add(e)
self.video_id = '12345'
self.course_object = Course.objects.create(
institution='XXX',
edx_classid='XXXXX',
local_storedir='WestonHS/PFLC1x/3T2015'
)
self.video = Video.objects.create(
inst_class=self.course_object,
studio_id=self.video_id,
edx_id='XXXXXXXX2014-V00TES1',
video_trans_start=datetime.datetime.utcnow().replace(tzinfo=utc) - timedelta(
hours=CONFIG_DATA['heal_start']
),
video_trans_end=datetime.datetime.utcnow().replace(tzinfo=utc),
)
self.encode = Encode.objects.create(
product_spec='mobile_low',
encode_destination=Destination.objects.create(destination_name='destination_name')
)
self.hls_encode = Encode.objects.create(
product_spec='hls',
encode_destination=Destination.objects.create(destination_name='destination_name')
)
url = URL(
videoID=self.video,
encode_profile=self.encode,
encode_bitdepth='22',
encode_url='http://veda.edx.org/encode')
url.save()
@patch('control.veda_heal.VALAPICall._AUTH', PropertyMock(return_value=lambda: CONFIG_DATA))
@responses.activate
def test_heal(self):
val_response = {
'courses': [{u'WestonHS/PFLC1x/3T2015': None}],
'encoded_videos': [{
'url': 'https://testurl.mp4',
'file_size': 8499040,
'bitrate': 131,
'profile': 'mobile_low',
}]
}
responses.add(
responses.POST,
CONFIG_DATA['val_token_url'],
'{"access_token": "1234567890"}',
status=200
)
responses.add(
responses.GET,
build_url(CONFIG_DATA['val_api_url'], self.video_id),
body=json.dumps(val_response),
content_type='application/json',
status=200
)
responses.add(
responses.PUT,
build_url(CONFIG_DATA['val_api_url'], self.video_id),
status=200
)
heal = VedaHeal()
heal.discovery()
@data(
{
......@@ -71,7 +143,6 @@ class HealTests(TestCase):
},
)
@unpack
@skip("Failing from day 1 https://github.com/edx/edx-video-pipeline/pull/26")
def test_determine_fault(self, edx_id, video_trans_status, video_trans_start, video_active):
"""
Tests that determine_fault works in various video states.
......@@ -81,8 +152,10 @@ class HealTests(TestCase):
video_trans_status=video_trans_status,
video_trans_start=video_trans_start,
video_active=video_active,
inst_class=Course()
inst_class=self.course_object
)
video_instance.save()
encode_list = self.heal_instance.determine_fault(video_instance)
if video_instance.edx_id == '1':
......@@ -95,48 +168,103 @@ class HealTests(TestCase):
{
'uncompleted_encodes': [],
'expected_encodes': ['test_obj'],
'video_object': {
'video_props': {
'edx_id': '1',
'video_trans_status': 'Complete',
'video_trans_start': datetime.datetime.utcnow().replace(tzinfo=utc),
'video_active': True,
}
},
'result': []
},
{
'uncompleted_encodes': ['test_obj'],
'expected_encodes': ['test_obj'],
'video_object': {
'video_props': {
'edx_id': '2',
'video_trans_status': 'Ingest',
'video_trans_start': datetime.datetime.utcnow().replace(tzinfo=utc),
'video_active': True,
}
},
'result': ['test_obj']
}
)
@unpack
def test_differentiate_encodes(self, uncompleted_encodes, expected_encodes, video_object):
def test_differentiate_encodes(self, uncompleted_encodes, expected_encodes, video_props, result):
"""
Tests that differentiate_encodes list comparison works as expected. This doesn't test video states,
just the list comparison function.
"""
video_instance = Video(
edx_id=video_object['edx_id'],
video_trans_status=video_object['video_trans_status'],
video_trans_start=video_object['video_trans_start'],
video_active=video_object['video_active'],
inst_class=Course()
)
video_instance = Video.objects.create(inst_class=self.course_object, **video_props)
encode_list = self.heal_instance.differentiate_encodes(
uncompleted_encodes,
expected_encodes,
video_instance
)
self.assertEqual(encode_list, result)
if video_instance.edx_id == '1':
self.assertEqual(encode_list, [])
elif video_instance.edx_id == '2':
self.assertEqual(encode_list, ['test_obj'])
@data(
{
'uncompleted_encodes': [],
'expected_encodes': ['test_obj'],
'video_props': {
'edx_id': '1',
'video_trans_status': 'Complete',
'video_trans_start': datetime.datetime.utcnow().replace(tzinfo=utc),
'video_active': True,
'transcript_status': TranscriptStatus.PENDING
},
'expected_val_status': 'file_complete'
},
{
'uncompleted_encodes': [],
'expected_encodes': ['test_obj'],
'video_props': {
'edx_id': '1',
'video_trans_status': 'Complete',
'video_trans_start': datetime.datetime.utcnow().replace(tzinfo=utc),
'video_active': True,
'transcript_status': TranscriptStatus.IN_PROGRESS
},
'expected_val_status': ValTranscriptStatus.TRANSCRIPTION_IN_PROGRESS
},
{
'uncompleted_encodes': [],
'expected_encodes': ['test_obj'],
'video_props': {
'edx_id': '1',
'video_trans_status': 'Complete',
'video_trans_start': datetime.datetime.utcnow().replace(tzinfo=utc),
'video_active': True,
'transcript_status': TranscriptStatus.READY
},
'expected_val_status': ValTranscriptStatus.TRANSCRIPT_READY
},
{
'uncompleted_encodes': ['test_obj'],
'expected_encodes': ['test_obj'],
'video_props': {
'edx_id': '2',
'video_trans_status': 'Ingest',
'video_trans_start': datetime.datetime.utcnow().replace(tzinfo=utc),
'video_active': True,
'transcript_status': TranscriptStatus.READY
},
'expected_val_status': 'transcode_queue'
}
)
@unpack
def test_differentiate_encodes_val_status(self, uncompleted_encodes,
expected_encodes, video_props, expected_val_status):
"""
Tests that the val status changes as expected based on encode list.
"""
video_instance = Video.objects.create(inst_class=self.course_object, **video_props)
self.heal_instance.differentiate_encodes(
uncompleted_encodes,
expected_encodes,
video_instance
)
self.assertEqual(self.heal_instance.val_status, expected_val_status)
@data(
{
......@@ -171,16 +299,17 @@ class HealTests(TestCase):
}
)
@unpack
@skip("Failing from day 1 https://github.com/edx/edx-video-pipeline/pull/26")
def test_determine_longterm_corrupt(self, uncompleted_encodes, expected_encodes, video_object):
video_instance = Video(
edx_id=video_object['edx_id'],
video_trans_status=video_object['video_trans_status'],
video_trans_start=video_object['video_trans_start'],
video_active=video_object['video_active'],
inst_class=Course()
inst_class=self.course_object
)
video_instance.save()
longterm_corrupt = self.heal_instance.determine_longterm_corrupt(
uncompleted_encodes,
expected_encodes,
......
"""
Test upload processes
"""
import os
import sys
import unittest
from django.test import TestCase
"""
Test upload processes
from boto.s3.connection import S3Connection
from mock import PropertyMock, patch
from moto import mock_s3_deprecated
from VEDA_OS01 import utils
from control.veda_file_ingest import VideoProto
from control.veda_hotstore import Hotstore
"""
sys.path.append(os.path.dirname(os.path.dirname(
os.path.abspath(__file__)
)))
from control.veda_hotstore import Hotstore
from control.veda_file_ingest import VideoProto
CONFIG_DATA = utils.get_config('test_config.yaml')
class TestHotstore(unittest.TestCase):
class TestHotstore(TestCase):
def setUp(self):
VP = VideoProto()
VP.veda_id = 'XXXXXXXX2014-V00TEST'
video_proto = VideoProto()
video_proto.veda_id = 'XXXXXXXX2014-V00TEST'
self.upload_filepath = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
'test_files',
'OVTESTFILE_01.mp4'
)
self.H1 = Hotstore(
video_object=VP,
upload_filepath=self.upload_filepath,
video_proto=VP
)
with patch.object(Hotstore, '_READ_AUTH', PropertyMock(return_value=lambda: CONFIG_DATA)):
self.hotstore = Hotstore(
video_object=video_proto,
upload_filepath=self.upload_filepath,
video_proto=video_proto
)
# do s3 mocking
mock = mock_s3_deprecated()
mock.start()
conn = S3Connection()
conn.create_bucket(CONFIG_DATA['veda_s3_hotstore_bucket'])
self.addCleanup(mock.stop)
def test_single_upload(self):
if self.H1.auth_dict is None:
self.assertTrue(self.H1.upload() is False)
"""
Verify S3 single part upload.
"""
if self.hotstore.auth_dict is None:
self.assertTrue(self.hotstore.upload() is False)
return None
self.assertTrue(self.H1.upload())
self.hotstore.auth_dict['multi_upload_barrier'] = os.stat(self.upload_filepath).st_size + 1
self.assertTrue(self.hotstore.upload())
def test_multi_upload(self):
if self.H1.auth_dict is None:
self.assertTrue(self.H1.upload() is None)
"""
Verify S3 single multi-part upload.
"""
if self.hotstore.auth_dict is None:
self.assertTrue(self.hotstore.upload() is None)
return None
self.H1.auth_dict['multi_upload_barrier'] = 0
self.assertTrue(self.H1.upload())
def main():
unittest.main()
if __name__ == '__main__':
sys.exit(main())
self.hotstore.auth_dict['multi_upload_barrier'] = 0
self.assertTrue(self.hotstore.upload())
......@@ -2,6 +2,7 @@
import os
import sys
import unittest
from django.test import TestCase
"""
A basic unittest for the "Course Addition Tool"
......@@ -14,7 +15,7 @@ sys.path.append(
from veda_utils import Report
class TestReporting(unittest.TestCase):
class TestReporting(TestCase):
def setUp(self):
self.R = Report(
......
......@@ -2,25 +2,30 @@
import ast
import os
import sys
import unittest
from django.test import TestCase
from mock import PropertyMock, patch
import requests
import yaml
import responses
from control.veda_val import VALAPICall
from veda_file_ingest import VideoProto
from veda_val import VALAPICall
from VEDA_OS01 import utils
requests.packages.urllib3.disable_warnings()
"""
This is an API connection test
set to pass if instance_config.yaml is missing
"""
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
CONFIG_DATA = utils.get_config('test_config.yaml')
class TestVALAPI(unittest.TestCase):
class TestVALAPI(TestCase):
def setUp(self):
self.VP = VideoProto(
......@@ -28,20 +33,17 @@ class TestVALAPI(unittest.TestCase):
veda_id='TESTID'
)
self.VAC = VALAPICall(
video_proto=self.VP,
val_status='complete'
)
with patch.object(VALAPICall, '_AUTH', PropertyMock(return_value=lambda: CONFIG_DATA)):
self.VAC = VALAPICall(
video_proto=self.VP,
val_status='complete'
)
self.auth_yaml = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'instance_config.yaml'
)
self.auth_yaml = CONFIG_DATA
def test_val_setup(self):
if not os.path.exists(self.auth_yaml):
self.assertTrue(self.VAC.auth_dict is None)
return None
# register val url to send api response
responses.add(responses.POST, CONFIG_DATA['val_token_url'], '{"access_token": "1234567890"}', status=200)
salient_variables = [
'val_api_url',
......@@ -51,30 +53,23 @@ class TestVALAPI(unittest.TestCase):
'val_username',
'val_token_url',
]
for s in salient_variables:
self.assertTrue(len(self.VAC.auth_dict[s]) > 0)
for salient_variable in salient_variables:
self.assertTrue(len(self.VAC.auth_dict[salient_variable]) > 0)
@responses.activate
def test_val_connection(self):
if not os.path.exists(self.auth_yaml):
self.assertTrue(self.VAC.auth_dict is None)
return None
# register val url to send api response
responses.add(responses.POST, CONFIG_DATA['val_token_url'], '{"access_token": "1234567890"}', status=200)
responses.add(responses.GET, CONFIG_DATA['val_api_url'], status=200)
self.VAC.val_tokengen()
self.assertFalse(self.VAC.val_token is None)
s = requests.get(
response = requests.get(
self.VAC.auth_dict['val_api_url'],
headers=self.VAC.headers,
timeout=20
)
self.assertFalse(s.status_code == 404)
self.assertFalse(s.status_code > 299)
def main():
unittest.main()
if __name__ == '__main__':
sys.exit(main())
self.assertFalse(response.status_code == 404)
self.assertFalse(response.status_code > 299)
import os
import sys
import unittest
from django.test import TestCase
"""
Test VEDA API
......@@ -12,7 +13,7 @@ sys.path.append(os.path.dirname(os.path.dirname(
from control.veda_video_validation import Validation
class TestValidation(unittest.TestCase):
class TestValidation(TestCase):
"""
Test class for Validation
"""
......@@ -28,6 +29,7 @@ class TestValidation(unittest.TestCase):
videofile=self.videofile
)
@unittest.skip('Skipping this test due to unavailability of required ffprobe version.')
def test_validation(self):
"""
Check a known file for validity
......
import os
import sys
import yaml
import datetime
import logging
import shutil
from os.path import expanduser
import boto
import boto.s3
from boto.s3.key import Key
from boto.exception import S3ResponseError
from os.path import expanduser
import requests
import datetime
import ftplib
import shutil
import yaml
from boto.exception import S3ResponseError
from boto.s3.key import Key
from django.core.urlresolvers import reverse
import veda_deliver_xuetang
from control_env import *
from veda_deliver_cielo import Cielo24Transcript
from veda_deliver_youtube import DeliverYoutube
from VEDA_OS01 import utils
from VEDA_OS01.models import (TranscriptCredentials, TranscriptProvider,
TranscriptStatus)
from VEDA_OS01.utils import build_url
from veda_utils import ErrorObject, Metadata, Output, VideoProto
from veda_val import VALAPICall
from veda_video_validation import Validation
from watchdog import Watchdog
try:
from control.veda_deliver_3play import ThreePlayMediaClient
except ImportError:
from veda_deliver_3play import ThreePlayMediaClient
LOGGER = logging.getLogger(__name__)
try:
......@@ -28,14 +50,6 @@ and upload to the appropriate endpoint via the approp. methods
"""
homedir = expanduser("~")
from control_env import *
from veda_utils import ErrorObject, Output, Metadata, VideoProto
from veda_video_validation import Validation
from veda_val import VALAPICall
from veda_deliver_cielo import Cielo24Transcript
import veda_deliver_xuetang
from veda_deliver_youtube import DeliverYoutube
from watchdog import Watchdog
watchdog_time = 10.0
......@@ -170,8 +184,7 @@ class VedaDelivery:
"""
Transcript, Xuetang
"""
self._THREEPLAY_UPLOAD()
self._CIELO24_UPLOAD()
self._XUETANG_ROUTE()
self.status = self._DETERMINE_STATUS()
......@@ -180,6 +193,18 @@ class VedaDelivery:
self._CLEANUP()
# Transcription Process
# We only want to generate transcripts for `desktop_mp4` profile.
if self.encode_profile == 'desktop_mp4' and self.video_query.process_transcription:
# 3PlayMedia
if self.video_query.provider == TranscriptProvider.THREE_PLAY:
self.start_3play_transcription_process()
# Cielo24
if self.video_query.provider == TranscriptProvider.CIELO24:
self.cielo24_transcription_flow()
def _INFORM_INTAKE(self):
"""
Collect all salient metadata and
......@@ -507,63 +532,106 @@ class VedaDelivery:
os.chdir(homedir)
return True
def _CIELO24_UPLOAD(self):
if self.video_query.inst_class.c24_proc is False:
return None
def cielo24_transcription_flow(self):
"""
Cielo24 transcription flow.
"""
org = utils.extract_course_org(self.video_proto.platform_course_url[0])
if self.video_query.inst_class.mobile_override is False:
if self.encode_profile != 'desktop_mp4':
return None
try:
api_key = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider).api_key
except TranscriptCredentials.DoesNotExist:
LOGGER.warn('[cielo24] Unable to find api_key for org=%s', org)
return None
C24 = Cielo24Transcript(
veda_id=self.video_query.edx_id
)
output = C24.perform_transcription()
print '[ %s ] : %s' % (
'Cielo24 JOB', self.video_query.edx_id
s3_video_url = build_url(
self.auth_dict['s3_base_url'],
self.auth_dict['edx_s3_endpoint_bucket'],
self.encoded_file
)
def _THREEPLAY_UPLOAD(self):
callback_base_url = build_url(
self.auth_dict['veda_base_url'],
reverse(
'cielo24_transcript_completed',
args=[self.auth_dict['transcript_provider_request_token']]
)
)
if self.video_query.inst_class.tp_proc is False:
return None
if self.video_query.inst_class.mobile_override is False:
if self.encode_profile != 'desktop_mp4':
return None
# update transcript status for video.
val_api_client = VALAPICall(video_proto=None, val_status=None)
utils.update_video_status(
val_api_client=val_api_client,
video=self.video_query,
status=TranscriptStatus.IN_PROGRESS
)
ftp1 = ftplib.FTP(
self.auth_dict['threeplay_ftphost']
cielo24 = Cielo24Transcript(
self.video_query,
org,
api_key,
self.video_query.cielo24_turnaround,
self.video_query.cielo24_fidelity,
self.video_query.preferred_languages,
s3_video_url,
callback_base_url,
self.auth_dict['cielo24_api_base_url'],
)
user = self.video_query.inst_class.tp_username.strip()
passwd = self.video_query.inst_class.tp_password.strip()
cielo24.start_transcription_flow()
def start_3play_transcription_process(self):
"""
3PlayMedia Transcription Flow
"""
try:
ftp1.login(user, passwd)
except:
ErrorObject.print_error(
message='3Play Authentication Failure'
# Picks the first course from the list as there may be multiple
# course runs in that list (i.e. all having the same org).
org = utils.extract_course_org(self.video_proto.platform_course_url[0])
transcript_secrets = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider)
# update transcript status for video.
val_api_client = VALAPICall(video_proto=None, val_status=None)
utils.update_video_status(
val_api_client=val_api_client,
video=self.video_query,
status=TranscriptStatus.IN_PROGRESS
)
try:
ftp1.cwd(
self.video_query.inst_class.tp_speed
# Initialize 3playMedia client and start transcription process
s3_video_url = build_url(
self.auth_dict['s3_base_url'],
self.auth_dict['edx_s3_endpoint_bucket'],
self.encoded_file
)
except:
ftp1.mkd(
self.video_query.inst_class.tp_speed
callback_url = build_url(
self.auth_dict['veda_base_url'],
reverse(
'3play_media_callback',
args=[self.auth_dict['transcript_provider_request_token']]
),
# Additional attributes that'll come back with the callback
org=org,
edx_video_id=self.video_query.studio_id,
lang_code=self.video_query.source_language,
)
ftp1.cwd(
self.video_query.inst_class.tp_speed
three_play_media = ThreePlayMediaClient(
org=org,
video=self.video_query,
media_url=s3_video_url,
api_key=transcript_secrets.api_key,
api_secret=transcript_secrets.api_secret,
callback_url=callback_url,
turnaround_level=self.video_query.three_play_turnaround,
three_play_api_base_url=self.auth_dict['three_play_api_base_url'],
)
os.chdir(self.node_work_directory)
ftp1.storbinary(
'STOR ' + self.encoded_file,
open(os.path.join(
self.node_work_directory,
self.encoded_file
), 'rb')
)
three_play_media.generate_transcripts()
os.chdir(homedir)
except TranscriptCredentials.DoesNotExist:
LOGGER.warning(
'Transcript preference is not found for provider=%s, video=%s',
self.video_query.provider,
self.video_query.studio_id,
)
def _XUETANG_ROUTE(self):
if self.video_query.inst_class.xuetang_proc is False:
......
"""
3PlayMedia Transcription Client
"""
import json
import logging
import requests
import sys
from requests.packages.urllib3.exceptions import InsecurePlatformWarning
from VEDA_OS01.models import TranscriptProcessMetadata, TranscriptProvider, TranscriptStatus
from VEDA_OS01.utils import build_url
requests.packages.urllib3.disable_warnings(InsecurePlatformWarning)
LOGGER = logging.getLogger(__name__)
class ThreePlayMediaError(Exception):
"""
An error that occurs during 3PlayMedia actions.
"""
pass
class ThreePlayMediaLanguageNotFoundError(ThreePlayMediaError):
"""
An error when language is not found in available 3playMedia languages.
"""
pass
class ThreePlayMediaPerformTranscriptionError(ThreePlayMediaError):
"""
An error occurred while adding media for transcription.
"""
pass
class ThreePlayMediaUrlError(ThreePlayMediaError):
"""
Occurs when the media url is either inaccessible or of invalid content type.
"""
pass
class ThreePlayMediaLanguagesRetrievalError(ThreePlayMediaError):
"""
An error Occurred while retrieving available 3PlayMedia languages.
"""
pass
class ThreePlayMediaClient(object):
def __init__(
self,
org,
video,
media_url,
api_key,
api_secret,
callback_url,
turnaround_level,
three_play_api_base_url
):
"""
Initialize 3play media client
"""
self.org = org
self.video = video
self.media_url = media_url
self.api_key = api_key
self.api_secret = api_secret
self.callback_url = callback_url
self.turnaround_level = turnaround_level
# default attributes
self.base_url = three_play_api_base_url
self.upload_media_file_url = u'files/'
self.available_languages_url = u'caption_imports/available_languages/'
self.allowed_content_type = u'video/mp4'
def validate_media_url(self):
"""
Validates the media URL
Raises:
3PlayMediaUrlError: on invalid media url or content type
"""
if not self.media_url:
raise ThreePlayMediaUrlError('Invalid media URL "{media_url}".'.format(media_url=self.media_url))
response = requests.head(url=self.media_url)
if not response.ok:
raise ThreePlayMediaUrlError('The URL "{media_url}" is not Accessible.'.format(media_url=self.media_url))
elif response.headers['Content-Type'] != self.allowed_content_type:
raise ThreePlayMediaUrlError(
'Media content-type should be "{allowed_type}". URL was "{media_url}", content-type was "{type}"'.format(
allowed_type=self.allowed_content_type,
media_url=self.media_url,
type=response.headers['Content-Type'],
)
)
def get_available_languages(self):
"""
Gets all the 3Play Media supported languages
"""
response = requests.get(url=build_url(self.base_url, self.available_languages_url, apikey=self.api_key))
if not response.ok:
raise ThreePlayMediaLanguagesRetrievalError(
'Error while retrieving available languages: {response} -- {status}'.format(
response=response.text, status=response.status_code
)
)
# A normal response should be a list containing 3Play Media supported languages and if we're getting a dict,
# there must be an error: https://support.3playmedia.com/hc/en-us/articles/227729968-Captions-Imports-API
available_languages = json.loads(response.text)
if isinstance(available_languages, dict):
raise ThreePlayMediaLanguagesRetrievalError(
'Expected 3Play Media Supported languages but got: {response}'.format(response=response.text)
)
return available_languages
def get_source_language_id(self, languages, source_language_code):
"""
Extracts language id for a language that matches `source_language_code`
from the given 3Play Media languages.
Arguments:
languages(list): 3PlayMedia supported languages.
source_language_code(unicode): A video source language code whose 3Play language id is required.
"""
for language in languages:
if language['iso_639_1_code'] == source_language_code:
return language['language_id']
def submit_media(self):
"""
Submits the media to perform transcription.
Raises:
ThreePlayMediaPerformTranscriptionError: error while transcription process
"""
self.validate_media_url()
# Prepare requests payload
payload = dict(
# Mandatory attributes required for transcription
link=self.media_url,
apikey=self.api_key,
api_secret_key=self.api_secret,
turnaround_level=self.turnaround_level,
callback_url=self.callback_url,
)
available_languages = self.get_available_languages()
source_language_id = self.get_source_language_id(available_languages, self.video.source_language)
if source_language_id:
payload['language_id'] = source_language_id
upload_url = build_url(self.base_url, self.upload_media_file_url)
response = requests.post(url=upload_url, json=payload)
if not response.ok:
raise ThreePlayMediaPerformTranscriptionError(
'Upload file request failed with: {response} -- {status}'.format(
response=response.text, status=response.status_code
)
)
# A normal response should be a text containing file id and if we're getting a deserializable dict, there
# must be an error: http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods
if isinstance(json.loads(response.text), dict):
raise ThreePlayMediaPerformTranscriptionError(
'Expected file id but got: {response}'.format(response=response.text)
)
return response.text
def generate_transcripts(self):
"""
Kicks off transcription process for default language.
"""
try:
file_id = self.submit_media()
# Track progress of transcription process
TranscriptProcessMetadata.objects.create(
video=self.video,
process_id=file_id,
lang_code=self.video.source_language,
provider=TranscriptProvider.THREE_PLAY,
status=TranscriptStatus.IN_PROGRESS,
)
# Successfully kicked off transcription process for a video with the given language.
LOGGER.info(
'[3PlayMedia] Transcription process has been started for video=%s, source_language=%s.',
self.video.studio_id,
self.video.source_language,
)
except ThreePlayMediaError:
LOGGER.exception(
'[3PlayMedia] Could not process transcripts for video=%s source_language=%s.',
self.video.studio_id,
self.video.source_language,
)
except Exception:
LOGGER.exception(
'[3PlayMedia] Unexpected error while transcription for video=%s source_language=%s.',
self.video.studio_id,
self.video.source_language,
)
raise
"""
Cielo24 Integration
"""
import ast
import logging
import json
import os
import sys
import requests
from requests.auth import HTTPBasicAuth
import ast
import urllib
from requests.packages.urllib3.exceptions import InsecurePlatformWarning
"""
Cielo24 API Job Start and Download
from VEDA_OS01.models import (TranscriptProcessMetadata, TranscriptProvider,
TranscriptStatus)
from VEDA_OS01.utils import build_url
Options (reflected in Course.models):
transcription_fidelity =
Mechanical (75%),
Premium (95%)(3-72h),
Professional (99+%)(3-72h)
requests.packages.urllib3.disable_warnings(InsecurePlatformWarning)
priority =
standard (24h),
priority (48h)
LOGGER = logging.getLogger(__name__)
turnaround_hours = number, overrides 'priority' call, will change a standard to a priority silently
"""
from control_env import *
from veda_utils import ErrorObject, Output
requests.packages.urllib3.disable_warnings()
class Cielo24Transcript():
def __init__(self, veda_id):
self.veda_id = veda_id
'''Defaults'''
self.c24_site = 'https://api.cielo24.com/api'
self.c24_login = '/account/login'
self.c24_joblist = '/job/list'
self.c24_newjob = '/job/new'
self.add_media = '/job/add_media'
self.transcribe = '/job/perform_transcription'
'''Retreive C24 Course-based defaults'''
self.c24_defaults = self.retrieve_defaults()
def perform_transcription(self):
if self.c24_defaults['c24_user'] is None:
return None
'''
GET /api/job/perform_transcription?v=1 HTTP/1.1
&api_token=xxxx
&job_id=xxxx
&transcription_fidelity=PREMIUM&priority=STANDARD
Host: api.cielo24.com
'''
api_token = self.tokengenerator()
if api_token is None:
return None
job_id = self.generate_jobs(api_token)
task_id = self.embed_url(api_token, job_id)
r5 = requests.get(
''.join((
self.c24_site,
self.transcribe,
'?v=1&api_token=',
api_token,
'&job_id=',
job_id,
'&transcription_fidelity=',
self.c24_defaults['c24_fidelity'],
'&priority=',
self.c24_defaults['c24_speed']
))
class Cielo24Error(Exception):
"""
An error that occurs during cielo24 actions.
"""
pass
class Cielo24CreateJobError(Cielo24Error):
"""
An error occurred during new job creation.
"""
pass
class Cielo24AddMediaError(Cielo24Error):
"""
An error occurred during add media.
"""
pass
class Cielo24PerformTranscriptError(Cielo24Error):
"""
An error occurred during perform transcript.
"""
pass
class Cielo24Transcript(object):
"""
Cielo24 Integration
"""
def __init__(
self,
video,
org,
api_key,
turnaround,
fidelity,
preferred_languages,
s3_video_url,
callback_base_url,
cielo24_api_base_url
):
self.org = org
self.video = video
self.api_key = api_key
self.fidelity = fidelity
self.turnaround = turnaround
self.preferred_languages = preferred_languages
self.s3_video_url = s3_video_url
self.callback_base_url = callback_base_url
# Defaults
self.cielo24_api_base_url = cielo24_api_base_url
self.cielo24_new_job = '/job/new'
self.cielo24_add_media = '/job/add_media'
self.cielo24_perform_transcription = '/job/perform_transcription'
def start_transcription_flow(self):
"""
Start cielo24 transcription flow.
This will do the following steps:
For each preferred language:
1. create a new job
2. add media url
3. perform transcript
"""
job_id = None
for preferred_lang in self.preferred_languages:
try:
job_id = self.create_job()
transcript_process_metadata = TranscriptProcessMetadata.objects.create(
video=self.video,
process_id=job_id,
lang_code=preferred_lang,
provider=TranscriptProvider.CIELO24,
status=TranscriptStatus.IN_PROGRESS
)
self.embed_media_url(job_id)
self.perform_transcript(job_id, preferred_lang)
except Cielo24Error as ex:
if job_id:
transcript_process_metadata.status = TranscriptStatus.FAILED
transcript_process_metadata.save()
LOGGER.exception(
'[CIELO24] Request failed for video=%s -- lang=%s -- job_id=%s',
self.video.studio_id,
preferred_lang,
job_id
)
def perform_transcript(self, job_id, lang_code):
"""
Request cielo24 to generate transcripts for a video.
"""
callback_url = build_url(
self.callback_base_url,
job_id=job_id,
iwp_name='{iwp_name}',
lang_code=lang_code,
org=self.org,
video_id=self.video.studio_id
)
return ast.literal_eval(r5.text)['TaskId']
def retrieve_defaults(self):
video_query = Video.objects.filter(
edx_id=self.veda_id
).latest()
if video_query.inst_class.mobile_override is True:
url_query = URL.objects.filter(
videoID=video_query,
encode_url__icontains='_LBO.mp4',
).latest()
else:
url_query = URL.objects.filter(
videoID=video_query,
encode_url__icontains='_DTH.mp4',
).latest()
if video_query.inst_class.c24_username is None:
ErrorObject.print_error(
message='Cielo24 Record Incomplete',
)
return None
c24_defaults = {
'c24_user': video_query.inst_class.c24_username,
'c24_pass': video_query.inst_class.c24_password,
'c24_speed': video_query.inst_class.c24_speed,
'c24_fidelity': video_query.inst_class.c24_fidelity,
'edx_id': self.veda_id,
'url': url_query.encode_url
}
return c24_defaults
def tokengenerator(self):
token_url = self.c24_site + self.c24_login + \
'?v=1&username=' + self.c24_defaults['c24_user'] + \
'&password=' + self.c24_defaults['c24_pass']
# Generate Token
r1 = requests.get(token_url)
if r1.status_code > 299:
ErrorObject.print_error(
message='Cielo24 API Access Error',
response = requests.get(
build_url(
self.cielo24_api_base_url,
self.cielo24_perform_transcription,
v=1,
job_id=job_id,
target_language=lang_code,
callback_url=callback_url,
api_token=self.api_key,
priority=self.turnaround,
transcription_fidelity=self.fidelity,
options=json.dumps({"return_iwp": ["FINAL"]})
)
return None
api_token = ast.literal_eval(r1.text)["ApiToken"]
return api_token
def listjobs(self):
"""List Jobs"""
api_token = self.tokengenerator()
r2 = requests.get(
''.join((
self.c24_site,
self.c24_joblist,
'?v=1&api_token=',
api_token
))
)
job_list = r2.text
return job_list
def generate_jobs(self, api_token):
"""
'https://api.cielo24.com/job/new?v=1&\
api_token=xxx&job_name=xxx&language=en'
"""
r3 = requests.get(
''.join((
self.c24_site,
self.c24_newjob,
'?v=1&api_token=',
api_token,
'&job_name=',
self.c24_defaults['edx_id'],
'&language=en'
))
if not response.ok:
raise Cielo24PerformTranscriptError(
'[PERFORM TRANSCRIPT ERROR] status={} -- text={}'.format(
response.status_code,
response.text
)
)
task_id = ast.literal_eval(response.text)['TaskId']
LOGGER.info(
'[CIELO24] Perform transcript request successful for video=%s with job_id=%s and task_id=%s',
self.video.studio_id,
job_id,
task_id
)
job_id = ast.literal_eval(r3.text)['JobId']
return job_id
def embed_url(self, api_token, job_id):
def embed_media_url(self, job_id):
"""
GET /api/job/add_media?v=1&api_token=xxxx
&job_id=xxxxx
&media_url=http%3A%2F%2Fwww.domain.com%2Fvideo.mp4 HTTP/1.1
Host: api.cielo24.com
Create cielo24 add media url.
Arguments:
job_id (str): cielo24 job id
Returns:
cielo24 task id
"""
r4 = requests.get(
''.join((
self.c24_site,
self.add_media,
'?v=1&api_token=',
api_token,
'&job_id=',
job_id,
'&media_url=',
urllib.quote_plus(self.c24_defaults['url'])
))
response = requests.get(
build_url(
self.cielo24_api_base_url,
self.cielo24_add_media,
v=1,
job_id=job_id,
api_token=self.api_key,
media_url=self.s3_video_url
)
)
print str(r4.status_code) + ' : Cielo24 Status Code'
return ast.literal_eval(r4.text)['TaskId']
if not response.ok:
raise Cielo24AddMediaError(
'[ADD MEDIA ERROR] status={} -- text={}'.format(
response.status_code,
response.text
)
)
def main():
pass
task_id = ast.literal_eval(response.text)['TaskId']
LOGGER.info(
'[CIELO24] Media url created for video=%s with job_id=%s and task_id=%s',
self.video.studio_id,
job_id,
task_id
)
return task_id
def create_job(self):
"""
Create new job for transcription.
Returns:
cielo24 job id
"""
create_job_url = build_url(
self.cielo24_api_base_url,
self.cielo24_new_job,
v=1,
language=self.video.source_language,
api_token=self.api_key,
job_name=self.video.studio_id
)
response = requests.get(create_job_url)
if not response.ok:
raise Cielo24CreateJobError(
'[CREATE JOB ERROR] url={} -- status={} -- text={}'.format(
create_job_url,
response.status_code,
response.text,
)
)
if __name__ == "__main__":
sys.exit(main())
job_id = ast.literal_eval(response.text)['JobId']
LOGGER.info(
'[CIELO24] New job created for video=%s with job_id=%s',
self.video.studio_id,
job_id
)
return job_id
import json
import logging
import os.path
import boto
import boto.s3
from boto.exception import S3ResponseError, S3DataError
import yaml
from VEDA_OS01.models import TranscriptCredentials
from VEDA_OS01.utils import extract_course_org
try:
boto.config.add_section('Boto')
except:
......@@ -26,6 +31,8 @@ from veda_utils import ErrorObject
from veda_file_ingest import VideoProto, VedaIngest
from veda_val import VALAPICall
LOGGER = logging.getLogger(__name__)
class FileDiscovery(object):
......@@ -159,6 +166,7 @@ class FileDiscovery(object):
client_title = meta.get_metadata('client_video_id')
course_hex = meta.get_metadata('course_video_upload_token')
course_url = meta.get_metadata('course_key')
transcript_preferences = meta.get_metadata('transcript_preferences')
edx_filename = key.name[::-1].split('/')[0][::-1]
if len(course_hex) == 0:
......@@ -226,24 +234,48 @@ class FileDiscovery(object):
key.delete()
return
"""
Trigger Ingest Process
"""
V = VideoProto(
# Make decision if this video needs the transcription as well.
try:
transcript_preferences = json.loads(transcript_preferences)
TranscriptCredentials.objects.get(
org=extract_course_org(course_url),
provider=transcript_preferences.get('provider')
)
process_transcription = True
except (TypeError, TranscriptCredentials.DoesNotExist):
# when the preferences are not set OR these are set to some data in invalid format OR these don't
# have associated 3rd party transcription provider API keys.
process_transcription = False
except ValueError:
LOGGER.error('[VIDEO-PIPELINE] File Discovery - Invalid transcripts preferences=%s', transcript_preferences)
process_transcription = False
# Trigger Ingest Process
video_metadata = dict(
s3_filename=edx_filename,
client_title=client_title,
file_extension=file_extension,
platform_course_url=course_url
platform_course_url=course_url,
)
I = VedaIngest(
if process_transcription:
video_metadata.update({
'process_transcription': process_transcription,
'provider': transcript_preferences.get('provider'),
'three_play_turnaround': transcript_preferences.get('three_play_turnaround'),
'cielo24_turnaround': transcript_preferences.get('cielo24_turnaround'),
'cielo24_fidelity': transcript_preferences.get('cielo24_fidelity'),
'preferred_languages': transcript_preferences.get('preferred_languages'),
'source_language': transcript_preferences.get('video_source_language'),
})
ingest = VedaIngest(
course_object=course_query[0],
video_proto=V,
video_proto=VideoProto(**video_metadata),
node_work_directory=self.node_work_directory
)
I.insert()
ingest.insert()
if I.complete is False:
if ingest.complete is False:
return
"""
......
import logging
import os
import sys
import subprocess
......@@ -7,6 +7,7 @@ from datetime import timedelta
import time
import fnmatch
import django
from django.db.utils import DatabaseError
from django.utils.timezone import utc
from django.db import reset_queries
import uuid
......@@ -32,6 +33,10 @@ from veda_val import VALAPICall
from veda_encode import VedaEncode
import celeryapp
from VEDA_OS01.models import TranscriptStatus
LOGGER = logging.getLogger(__name__)
'''
V = VideoProto(
s3_filename=edx_filename,
......@@ -59,9 +64,17 @@ class VideoProto():
self.file_extension = kwargs.get('file_extension', None)
self.platform_course_url = kwargs.get('platform_course_url', None)
self.abvid_serial = kwargs.get('abvid_serial', None)
"""
Determined Attrib
"""
# Transcription Process related Attributes
self.process_transcription = kwargs.get('process_transcription', False)
self.provider = kwargs.get('provider', None)
self.three_play_turnaround = kwargs.get('three_play_turnaround', None)
self.cielo24_turnaround = kwargs.get('cielo24_turnaround', None)
self.cielo24_fidelity = kwargs.get('cielo24_fidelity', None)
self.preferred_languages = kwargs.get('preferred_languages', [])
self.source_language = kwargs.get('source_language', None)
# Determined Attributes
self.valid = False
self.filesize = 0
self.duration = 0
......@@ -325,6 +338,17 @@ class VedaIngest:
self.complete = True
return None
# Update transcription preferences for the Video
if self.video_proto.process_transcription:
v1.process_transcription = self.video_proto.process_transcription
v1.transcript_status = TranscriptStatus.PENDING
v1.provider = self.video_proto.provider
v1.three_play_turnaround = self.video_proto.three_play_turnaround
v1.cielo24_turnaround = self.video_proto.cielo24_turnaround
v1.cielo24_fidelity = self.video_proto.cielo24_fidelity
v1.preferred_languages = self.video_proto.preferred_languages
v1.source_language = self.video_proto.source_language
"""
Files Below are all valid
"""
......@@ -345,7 +369,8 @@ class VedaIngest:
"""
try:
v1.save()
except:
except DatabaseError:
# in case if the client title's length is too long
char_string = self.video_proto.client_title
string_len = len(char_string)
s1 = 0
......@@ -360,6 +385,11 @@ class VedaIngest:
v1.client_title = final_string
v1.save()
except Exception:
# Log the exception and raise.
LOGGER.exception('[VIDEO-PIPELINE] File Ingest - Cataloging of video=%s failed.', self.video_proto.veda_id)
raise
def val_insert(self):
if self.video_proto.abvid_serial is not None:
return None
......
......@@ -8,14 +8,19 @@ Roll through videos, check for completion
"""
import datetime
import uuid
from datetime import timedelta
import os
import sys
import uuid
import yaml
from django.utils.timezone import utc
from VEDA_OS01.models import Encode, URL, Video
from VEDA_OS01.utils import VAL_TRANSCRIPT_STATUS_MAP
import celeryapp
from control_env import *
from control_env import WORK_DIRECTORY
from veda_encode import VedaEncode
from veda_val import VALAPICall
......@@ -73,6 +78,11 @@ class VedaHeal(object):
encode_list = self.determine_fault(video_object=v)
# Using the 'Video Proto' Model
if self.val_status is not None:
# Update to VAL is also happening for those videos which are already marked complete,
# All these retries are for the data-parity between VAL and VEDA, as calls to VAL api are
# unreliable and times out. For a completed Video, VEDA heal will keep doing this unless
# the Video is old enough and escapes from the time-span that HEAL is picking up on.
# cc Greg Martin
VAC = VALAPICall(
video_proto=None,
video_object=v,
......@@ -82,14 +92,15 @@ class VedaHeal(object):
self.val_status = None
# Enqueue
for e in encode_list:
veda_id = v.edx_id
encode_profile = e
jobid = uuid.uuid1().hex[0:10]
celeryapp.worker_task_fire.apply_async(
(veda_id, encode_profile, jobid),
queue=self.auth_dict['celery_worker_queue']
)
if self.auth_dict['rabbitmq_broker'] is not None:
for e in encode_list:
veda_id = v.edx_id
encode_profile = e
jobid = uuid.uuid1().hex[0:10]
celeryapp.worker_task_fire.apply_async(
(veda_id, encode_profile, jobid),
queue=self.auth_dict['celery_worker_queue']
)
def determine_fault(self, video_object):
"""
......@@ -119,8 +130,9 @@ class VedaHeal(object):
course_object=video_object.inst_class,
).determine_encodes()
try:
uncompleted_encodes.remove('review')
except ValueError:
if uncompleted_encodes:
uncompleted_encodes.remove('review')
except KeyError:
pass
# list comparison
......@@ -137,8 +149,19 @@ class VedaHeal(object):
# These encodes don't count towards 'file_complete'
if e != 'mobile_high' and e != 'audio_mp3' and e != 'review':
check_list.append(e)
# See if VEDA's Video data model is already having transcript status which corresponds
# to any of Val's Video transcript statuses. If its True, set `val_status` to that status
# instead of `file_complete` as transcription phase comes after encoding phase of a Video,
# and `file_complete` shows that a Video's encodes are complete, while there may be possibility
# that the Video has gone through transcription phase as well after the encodes were ready.
val_transcription_status = VAL_TRANSCRIPT_STATUS_MAP.get(video_object.transcript_status, None)
if check_list is None or len(check_list) == 0:
self.val_status = 'file_complete'
if val_transcription_status:
self.val_status = val_transcription_status
else:
self.val_status = 'file_complete'
# File is complete!
# Check for data parity, and call done
if video_object.video_trans_status != 'Complete':
......@@ -155,7 +178,11 @@ class VedaHeal(object):
if self.determine_longterm_corrupt(uncompleted_encodes, expected_encodes, video_object):
return []
if self.val_status != 'file_complete':
complete_statuses = ['file_complete']
if val_transcription_status:
complete_statuses.append(val_transcription_status)
if self.val_status not in complete_statuses:
self.val_status = 'transcode_queue'
return uncompleted_encodes
......
import logging
import os
import sys
import requests
......@@ -7,6 +8,8 @@ import json
import datetime
import yaml
LOGGER = logging.getLogger(__name__)
requests.packages.urllib3.disable_warnings()
......@@ -386,6 +389,64 @@ class VALAPICall():
)
)
def update_val_transcript(self, video_id, lang_code, name, transcript_format, provider):
"""
Update status for a completed transcript.
"""
if self.val_token is None:
self.val_tokengen()
post_data = {
'video_id': video_id,
'name': name,
'provider': provider,
'language_code': lang_code,
'file_format': transcript_format,
}
response = requests.post(
self.auth_dict['val_transcript_create_url'],
json=post_data,
headers=self.headers,
timeout=20
)
if not response.ok:
LOGGER.error(
'update_val_transcript failed -- video_id=%s -- provider=% -- status=%s -- content=%s',
video_id,
provider,
response.status_code,
response.content,
)
def update_video_status(self, video_id, status):
"""
Update video transcript status.
"""
if self.val_token is None:
self.val_tokengen()
val_data = {
'edx_video_id': video_id,
'status': status
}
response = requests.patch(
self.auth_dict['val_video_transcript_status_url'],
json=val_data,
headers=self.headers,
timeout=20
)
if not response.ok:
LOGGER.error(
'update_video_status failed -- video_id=%s -- status=%s -- text=%s',
video_id,
response.status_code,
response.text
)
def main():
pass
......
......@@ -5,6 +5,9 @@ import subprocess
import fnmatch
import django
from control.control_env import FFPROBE
from VEDA_OS01.models import Video
"""
VEDA Intake/Product Final Testing Suite
......@@ -15,10 +18,9 @@ image files (which read as 0:00 duration or N/A)
Mismatched Durations (within 5 sec)
"""
from control_env import *
class Validation():
class Validation(object):
"""
Expects a full filepath
"""
......@@ -43,7 +45,6 @@ class Validation():
FFPROBE,
"\"" + self.videofile + "\""
))
"""
Test if size is zero
"""
......@@ -61,6 +62,10 @@ class Validation():
if "multiple edit list entries, a/v desync might occur, patch welcome" in line:
return False
if "command not found" in line:
print line
return False
if "Duration: " in line:
if "Duration: 00:00:00.0" in line:
return False
......
......@@ -2,6 +2,7 @@
import os
import sys
import unittest
from django.test import TestCase
"""
A basic unittest for the "Course Addition Tool"
......@@ -14,7 +15,7 @@ sys.path.append(
import abvid_reporting
from frontend.course_validate import VEDACat
class TestVariables(unittest.TestCase):
class TestVariables(TestCase):
def setUp(self):
self.VCT = VEDACat()
......
......@@ -2,6 +2,7 @@
import os
import sys
import unittest
from django.test import TestCase
"""
A basic unittest for the "Course Addition Tool"
......@@ -14,7 +15,7 @@ sys.path.append(
from course_validate import VEDACat
class TestVariables(unittest.TestCase):
class TestVariables(TestCase):
def setUp(self):
self.VCT = VEDACat()
......
......@@ -32,17 +32,38 @@ debug: false
edx_s3_ingest_prefix:
edx_s3_ingest_bucket:
edx_s3_endpoint_bucket:
# CF
edx_cloudfront_prefix:
# Images
aws_video_images_bucket:
aws_video_images_prefix: "video-images/"
# VEDA Internal
veda_s3_upload_bucket:
veda_s3_hotstore_bucket:
veda_deliverable_bucket:
# Settings
multi_upload_barrier: 2000000000
veda_base_url:
s3_base_url: https://s3.amazonaws.com
# Transcripts
aws_video_transcripts_bucket:
aws_video_transcripts_prefix: video-transcripts/
# cielo24 api urls
cielo24_api_base_url: https://sandbox.cielo24.com/api
# 3playmedia api urls
three_play_api_base_url: https://api.3playmedia.com/
three_play_api_transcript_url: https://static.3playmedia.com/
# a token identifying a valid request from transcript provider
transcript_provider_request_token: testtoken
# Ingest Secret
# TODO: Elminate access key after AWS Support ticket 08/20/17 regarding cross-account IAM role access.
......@@ -76,6 +97,8 @@ val_client_id:
val_secret_key:
val_password:
val_username:
val_transcript_create_url:
val_video_transcript_status_url:
# ---
# Celery Info
......
[pytest]
DJANGO_SETTINGS_MODULE = VEDA.test_settings
## NOTE: This is not a working req file -- merely a collection for notes.
##
django==1.9
djangorestframework==3.6.4
django-cors-headers
django-oauth-toolkit==0.11.0
django-model-utils==3.0.0
django-filter==1.0.4
newrelic
uwsgi
......@@ -11,3 +12,5 @@ boto
pyyaml
requests==2.18.1
celery==3.1.18
pysrt==1.1.1
edx-opaque-keys==0.4
\ No newline at end of file
---
veda_s3_hotstore_bucket: s3_hotstore_bucket
multi_upload_barrier: 2000000000
veda_base_url: https://veda.edx.org
s3_base_url: https://s3.amazonaws.com
# transcript bucket config
aws_video_transcripts_bucket: bucket_name
aws_video_transcripts_prefix: video-transcripts/
# cielo24 api urls
cielo24_api_base_url: https://sandbox.cielo24.com/api
# 3playmedia api urls
three_play_api_base_url: https://api.3playmedia.com/
three_play_api_transcript_url: https://static.3playmedia.com/
# a token identifying a valid request from transcript provider
transcript_provider_request_token: 1234a5a67cr890
# ---
# VAL
# ---
val_api_url: http://val.edx.org/api
val_token_url: http://val.edx.org/token
val_video_images_url:
# Credentials
val_client_id: client
val_secret_key: secret
val_password: password
val_username: username
val_transcript_create_url: http://val.edx.org/transcript/create
val_video_transcript_status_url: http://val.edx.org/video/status
celery_worker_queue: encode_worker
celery_deliver_queue: deliver_worker
# ----------
##---
# This is a list of encodes and their respective course
# boolean matches
encode_dict:
review_proc:
- review
mobile_override:
- override
s3_proc:
- mobile_high
- mobile_low
- audio_mp3
- desktop_webm
- desktop_mp4
- hls
yt_proc:
- youtube
# This is a list of encode profiles and their val profile matches
# boolean matches
val_profile_dict:
mobile_low:
- mobile_low
desktop_mp4:
- desktop_mp4
override:
- desktop_mp4
- mobile_low
- mobile_high
mobile_high:
- mobile_high
audio_mp3:
- audio_mp3
desktop_webm:
- desktop_webm
youtube:
- youtube
review:
hls:
- hls
# Heal settings
heal_start: 2
heal_end: 50
global_timeout: 40
## NOTE: Test requirements.
codecov==2.0.9
pep8==1.7.0
coverage==3.7.1
coverage==4.2
isort==4.2.15
ddt==1.1.1
moto==1.0.1
responses==0.6.1
pytest==3.0.6
pytest-django==3.1.2
pytest-django-ordering==1.0.1
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment