Commit 0cd1f00c by Qubad786 Committed by muzaffaryousaf

3Play Media API Integration

This implements 3PlayMedia API integration in pipeline.
Other than that it also iuncludes:
-- rename 'Transcript Preferences' to 'Transcript Credentials'.
-- Add migrations.
-- Rename 'Transcription Ready' to 'Transcript Ready'.
parent fdfb34cf
......@@ -39,4 +39,10 @@ urlpatterns = [
view=transcripts.Cielo24CallbackHandlerView.as_view(),
name='cielo24_transcript_completed'
),
# 3PlayMedia callback handler view
url(
regex=r'^3playmedia/transcripts/handle/(?P<token>[\w]+)$',
view=transcripts.ThreePlayMediaCallbackHandlerView.as_view(),
name='3play_media_callback'
)
]
......@@ -2,7 +2,7 @@ from django.contrib import admin
from VEDA_OS01.models import (
Course, Video, Encode, URL, Destination, Institution, VedaUpload,
TranscriptPreferences, TranscriptProcessMetadata
TranscriptCredentials, TranscriptProcessMetadata
)
......@@ -111,8 +111,8 @@ class VideoUploadAdmin(admin.ModelAdmin):
]
class TranscriptPreferencesAdmin(admin.ModelAdmin):
model = TranscriptPreferences
class TranscriptCredentialsAdmin(admin.ModelAdmin):
model = TranscriptCredentials
class TranscriptProcessMetadataAdmin(admin.ModelAdmin):
......@@ -126,5 +126,5 @@ admin.site.register(URL, URLAdmin)
admin.site.register(Destination, DestinationAdmin)
admin.site.register(Institution, InstitutionAdmin)
admin.site.register(VedaUpload, VideoUploadAdmin)
admin.site.register(TranscriptPreferences, TranscriptPreferencesAdmin)
admin.site.register(TranscriptCredentials, TranscriptCredentialsAdmin)
admin.site.register(TranscriptProcessMetadata, TranscriptProcessMetadataAdmin)
# -*- coding: utf-8 -*-
# Generated by Django 1.9 on 2017-09-11 11:13
from __future__ import unicode_literals
import VEDA_OS01.models
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
import model_utils.fields
class Migration(migrations.Migration):
dependencies = [
('VEDA_OS01', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='TranscriptCredentials',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')),
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')),
('org', models.CharField(help_text=b'This value must match the value of organization in studio/edx-platform.', max_length=50, verbose_name=b'Organization')),
('provider', models.CharField(choices=[(b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')], max_length=50, verbose_name=b'Transcript provider')),
('api_key', models.CharField(max_length=255, verbose_name=b'API key')),
('api_secret', models.CharField(blank=True, max_length=255, null=True, verbose_name=b'API secret')),
],
options={
'verbose_name_plural': 'Transcript Credentials',
},
),
migrations.CreateModel(
name='TranscriptProcessMetadata',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')),
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')),
('provider', models.CharField(choices=[(b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')], max_length=50, verbose_name=b'Transcript provider')),
('process_id', models.CharField(max_length=255, verbose_name=b'Process id')),
('translation_id', models.CharField(blank=True, max_length=255, null=True, verbose_name=b'Translation id')),
('lang_code', models.CharField(max_length=8, verbose_name=b'Language code')),
('status', models.CharField(choices=[(b'PENDING', b'PENDING'), (b'IN PROGRESS', b'IN PROGRESS'), (b'FAILED', b'FAILED'), (b'READY', b'READY')], default=b'PENDING', max_length=50, verbose_name=b'Transcript status')),
],
options={
'get_latest_by': 'modified',
'verbose_name_plural': 'Transcript process metadata',
},
),
migrations.AddField(
model_name='video',
name='cielo24_fidelity',
field=models.CharField(blank=True, choices=[(b'MECHANICAL', b'Mechanical, 75% Accuracy'), (b'PREMIUM', b'Premium, 95% Accuracy'), (b'PROFESSIONAL', b'Professional, 99% Accuracy')], max_length=20, null=True, verbose_name=b'Cielo24 Fidelity'),
),
migrations.AddField(
model_name='video',
name='cielo24_turnaround',
field=models.CharField(blank=True, choices=[(b'STANDARD', b'Standard, 48h'), (b'PRIORITY', b'Priority, 24h')], max_length=20, null=True, verbose_name=b'Cielo24 Turnaround'),
),
migrations.AddField(
model_name='video',
name='preferred_languages',
field=VEDA_OS01.models.ListField(blank=True, default=[]),
),
migrations.AddField(
model_name='video',
name='process_transcription',
field=models.BooleanField(default=False, verbose_name=b'Process transcripts from Cielo24/3PlayMedia'),
),
migrations.AddField(
model_name='video',
name='provider',
field=models.CharField(blank=True, choices=[(b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')], max_length=20, null=True, verbose_name=b'Transcription provider'),
),
migrations.AddField(
model_name='video',
name='three_play_turnaround',
field=models.CharField(blank=True, choices=[(b'extended_service', b'10-Day/Extended'), (b'default', b'4-Day/Default'), (b'expedited_service', b'2-Day/Expedited'), (b'rush_service', b'24 hour/Rush'), (b'same_day_service', b'Same Day')], max_length=20, null=True, verbose_name=b'3PlayMedia Turnaround'),
),
migrations.AlterField(
model_name='video',
name='video_trans_status',
field=models.CharField(choices=[(b'Ingest', b'System Ingest'), (b'Transcode Queue', b'Transcode Queue'), (b'Active Transcode', b'Active Transcode'), (b'Transcode Retry', b'Transcode Retry'), (b'Transcode Complete', b'Transcode Complete'), (b'Deliverable Upload', b'Deliverable Upload'), (b'File Complete', b'File Complete'), (b'Transcode Error', b'Transcode Error'), (b'Corrupt File', b'Corrupt File on Ingest'), (b'Review Hold', b'Review Hold'), (b'Review Reject', b'Review Rejected'), (b'Final Publish', b'Review to Final Publish'), (b'Youtube Duplicate', b'Youtube Duplicate'), (b'In Encode Queue', b'In Encode Queue'), (b'Progress', b'In Progress'), (b'Complete', b'Complete'), (b'transcription_in_progress', b'Transcription In Progress'), (b'transcript_ready', b'Transcript Ready')], default=b'Ingest', max_length=100, verbose_name=b'Transcode Status'),
),
migrations.AddField(
model_name='transcriptprocessmetadata',
name='video',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='VEDA_OS01.Video'),
),
migrations.AlterUniqueTogether(
name='transcriptcredentials',
unique_together=set([('org', 'provider')]),
),
]
......@@ -127,7 +127,7 @@ class VideoStatus(object):
PROGRESS = 'Progress'
COMPLETE = 'Complete'
TRANSCRIPTION_IN_PROGRESS = 'transcription_in_progress'
TRANSCRIPTION_READY = 'transcription_ready'
TRANSCRIPT_READY = 'transcript_ready'
CHOICES = (
(SI, 'System Ingest'),
......@@ -147,7 +147,7 @@ class VideoStatus(object):
(PROGRESS, 'In Progress'),
(COMPLETE, 'Complete'),
(TRANSCRIPTION_IN_PROGRESS, 'Transcription In Progress'),
(TRANSCRIPTION_READY, 'Transcription Ready'),
(TRANSCRIPT_READY, 'Transcript Ready'),
)
......@@ -619,9 +619,9 @@ class VedaUpload (models.Model):
)
class TranscriptPreferences(TimeStampedModel):
class TranscriptCredentials(TimeStampedModel):
"""
Model to contain third party transcription service provider preferances.
Model to contain third party transcription service provider preferences.
"""
org = models.CharField(
'Organization',
......@@ -634,7 +634,7 @@ class TranscriptPreferences(TimeStampedModel):
class Meta:
unique_together = ('org', 'provider')
verbose_name_plural = 'Transcript preferences'
verbose_name_plural = 'Transcript Credentials'
def __unicode__(self):
return u'{org} - {provider}'.format(org=self.org, provider=self.provider)
......@@ -647,7 +647,9 @@ class TranscriptProcessMetadata(TimeStampedModel):
video = models.ForeignKey(Video)
provider = models.CharField('Transcript provider', max_length=50, choices=TranscriptProvider.CHOICES)
process_id = models.CharField('Process id', max_length=255)
lang_code = models.CharField('Language code', max_length=3)
# To keep track of 3Play Translations.
translation_id = models.CharField('Translation id', max_length=255, null=True, blank=True)
lang_code = models.CharField('Language code', max_length=8)
status = models.CharField(
'Transcript status',
max_length=50,
......@@ -659,6 +661,17 @@ class TranscriptProcessMetadata(TimeStampedModel):
verbose_name_plural = 'Transcript process metadata'
get_latest_by = 'modified'
def update(self, **fields):
"""
Updates a process.
Keyword Arguments:
fields(dict): dict containing all the fields to be updated.
"""
for attr, value in fields.iteritems():
setattr(self, attr, value)
self.save()
def __unicode__(self):
return u'{video} - {provider} - {lang}'.format(
video=self.video.edx_id,
......
......@@ -3,26 +3,30 @@
Transcript tests
"""
import json
import responses
import urllib
import urlparse
from boto.exception import S3ResponseError
from boto.s3.connection import S3Connection
from boto.s3.key import Key
from ddt import data, ddt, unpack
from django.core.urlresolvers import reverse
from mock import Mock, PropertyMock, patch
from mock import Mock, PropertyMock, patch, ANY
from moto import mock_s3_deprecated
from rest_framework import status
from rest_framework.test import APITestCase
from VEDA_OS01 import transcripts, utils
from VEDA_OS01.models import (Course, TranscriptPreferences,
from VEDA_OS01.models import (Course, TranscriptCredentials,
TranscriptProcessMetadata, TranscriptProvider,
TranscriptStatus, Video)
CONFIG_DATA = utils.get_config('test_config.yaml')
VIDEO_DATA = {
'studio_id': '12345'
'studio_id': '12345',
'preferred_languages': ['en']
}
TRANSCRIPT_PROCESS_METADATA = {
......@@ -105,7 +109,7 @@ class Cielo24TranscriptTests(APITestCase):
**VIDEO_DATA
)
self.transcript_prefs = TranscriptPreferences.objects.create(
self.transcript_prefs = TranscriptCredentials.objects.create(
**TRANSCRIPT_PREFERENCES
)
......@@ -124,7 +128,7 @@ class Cielo24TranscriptTests(APITestCase):
}
self.video_transcript_ready_status_data = {
'status': transcripts.VideoStatus.TRANSCRIPTION_READY,
'status': transcripts.VideoStatus.TRANSCRIPT_READY,
'edx_video_id': self.video.studio_id
}
......@@ -139,10 +143,11 @@ class Cielo24TranscriptTests(APITestCase):
REQUEST_PARAMS['video_id'] = self.video.studio_id
@data(
{'url': 'cielo24/transcript_completed', 'status_code': 404},
{'url': None, 'status_code': 200},
('cielo24/transcript_completed', 404),
(None, 200),
)
@unpack
@patch('VEDA_OS01.transcripts.CIELO24_TRANSCRIPT_COMPLETED.send_robust', Mock(return_value=None))
def test_provider(self, url, status_code):
"""
Verify that only valid provider requests are allowed .
......@@ -154,19 +159,26 @@ class Cielo24TranscriptTests(APITestCase):
self.assertEqual(response.status_code, status_code)
@data(
{'params': {}},
{'params': {'job_id': 1}},
{'params': {'job_id': 2, 'lang_code': 'en'}},
{'params': {'job_id': 3, 'lang_code': 'ar', 'org': 'edx'}}
({}, ['job_id', 'lang_code', 'org', 'video_id']),
({'job_id': 1}, ['lang_code', 'org', 'video_id']),
({'job_id': 2, 'lang_code': 'en'}, ['org', 'video_id']),
({'job_id': 3, 'lang_code': 'ar', 'org': 'edx'}, ['video_id']),
)
@unpack
def test_missing_required_params(self, params):
@patch('VEDA_OS01.transcripts.LOGGER')
def test_missing_required_params(self, params, logger_params, mock_logger):
"""
Verify that 400 response is recevied if any required param is missing.
"""
response = self.client.get(self.url, params)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
mock_logger.warning.assert_called_with(
'[CIELO24 HANDLER] Required params are missing %s',
logger_params,
)
@responses.activate
@patch('VEDA_OS01.transcripts.CIELO24_TRANSCRIPT_COMPLETED.send_robust', Mock(return_value=None))
def test_transcript_callback_get_request(self):
"""
Verify that transcript callback get request is working as expected.
......@@ -260,7 +272,7 @@ class Cielo24TranscriptTests(APITestCase):
with self.assertRaises(transcripts.TranscriptConversionError) as conversion_exception:
transcripts.cielo24_transcript_callback(None, **REQUEST_PARAMS)
mock_logger.exception.assert_called_with(
'[CIELO24 TRANSCRIPTS] Request failed for video=%s -- lang=%s -- job_id=%s -- message=%s',
'[CIELO24 TRANSCRIPTS] Request failed for video=%s -- lang=%s -- job_id=%s.',
REQUEST_PARAMS['video_id'],
REQUEST_PARAMS['lang_code'],
REQUEST_PARAMS['job_id']
......@@ -296,3 +308,1060 @@ class Cielo24TranscriptTests(APITestCase):
s3_exception.exception.message,
s3_message
)
@ddt
@patch.dict('VEDA_OS01.transcripts.CONFIG', CONFIG_DATA)
@patch('VEDA_OS01.transcripts.VALAPICall._AUTH', PropertyMock(return_value=lambda: CONFIG_DATA))
class ThreePlayTranscriptionCallbackTest(APITestCase):
"""
3Play Media callback tests
"""
def setUp(self):
"""
Tests setup.
"""
super(ThreePlayTranscriptionCallbackTest, self).setUp()
self.org = u'MAx'
self.file_id = u'112233'
self.edx_video_id = VIDEO_DATA['studio_id']
self.url = reverse('3play_media_callback', args=[CONFIG_DATA['transcript_provider_request_token']])
self.course = Course.objects.create(
course_name='Intro to VEDA',
institution=self.org,
edx_classid='123',
local_storedir='course-v1:MAx+123+test_run',
)
self.video = Video.objects.create(
inst_class=self.course,
**VIDEO_DATA
)
self.transcript_prefs = TranscriptCredentials.objects.create(
org=self.org,
provider=TranscriptProvider.THREE_PLAY,
api_key='insecure_api_key',
api_secret='insecure_api_secret'
)
TranscriptProcessMetadata.objects.create(
video=self.video,
process_id=self.file_id,
lang_code='en',
provider=TranscriptProvider.THREE_PLAY,
status=TranscriptStatus.IN_PROGRESS,
)
self.uuid_hex = '01234567890123456789'
uuid_patcher = patch.object(
transcripts.uuid.UUID,
'hex',
new_callable=PropertyMock(return_value=self.uuid_hex)
)
uuid_patcher.start()
self.addCleanup(uuid_patcher.stop)
def setup_s3_bucket(self):
"""
Creates an s3 bucket. That is happening in moto's virtual environment.
"""
connection = S3Connection()
connection.create_bucket(CONFIG_DATA['aws_video_transcripts_bucket'])
return connection
def invoke_3play_callback(self, state='complete'):
"""
Make request to 3PlayMedia callback handler, this invokes
callback with all the necessary parameters.
Arguments:
state(str): state of the callback
"""
response = self.client.post(
# `build_url` strips `/`, putting it back and add necessary query params.
'/{}'.format(utils.build_url(self.url, edx_video_id=self.video.studio_id, org=self.org)),
content_type='application/x-www-form-urlencoded',
data=urllib.urlencode(dict(file_id=self.file_id, status=state))
)
return response
def assert_request(self, received_request, expected_request, decode_func):
"""
Verify that `received_request` matches `expected_request`
"""
for request_attr in expected_request.keys():
if request_attr == 'headers':
expected_headers = expected_request[request_attr]
actual_headers = getattr(received_request, request_attr)
for attr, expect_value in expected_headers.iteritems():
self.assertEqual(actual_headers[attr], expect_value)
elif request_attr == 'body' and decode_func:
expected_body = expected_request[request_attr]
actual_body = decode_func(getattr(received_request, request_attr))
for attr, expect_value in expected_body.iteritems():
self.assertEqual(actual_body[attr], expect_value)
else:
self.assertEqual(getattr(received_request, request_attr), expected_request[request_attr])
def assert_uploaded_transcript_on_s3(self, connection):
"""
Verify sjson data uploaded to s3
"""
key = Key(connection.get_bucket(CONFIG_DATA['aws_video_transcripts_bucket']))
key.key = '{directory}{uuid}.sjson'.format(
directory=CONFIG_DATA['aws_video_transcripts_prefix'], uuid=self.uuid_hex
)
sjson_transcript = json.loads(key.get_contents_as_string())
self.assertEqual(sjson_transcript, TRANSCRIPT_SJSON_DATA)
def test_unauthorized_access_to_3play_callback(self):
"""
Tests that the invalid token leads to 401 Unauthorized Response
"""
self.url = reverse('3play_media_callback', args=['123invalidtoken456'])
response = self.client.post(self.url, content_type='application/x-www-form-urlencoded')
self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED)
@data(
{'data': {}, 'query_params': {}},
{'data': {'file_id': '1122'}, 'query_params': {'edx_video_id': '1234'}}
)
@patch('VEDA_OS01.transcripts.LOGGER')
def test_missing_required_params(self, request_data, mock_logger):
"""
Test the callback in case of missing attributes.
"""
response = self.client.post(
'/{}'.format(utils.build_url(self.url, **request_data['query_params'])),
content_type='application/x-www-form-urlencoded',
data=urllib.urlencode(request_data['data']),
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
# Assert the logs
required_attrs = ['file_id', 'status', 'org', 'edx_video_id']
received_attrs = request_data['data'].keys() + request_data['query_params'].keys()
missing_attrs = [attr for attr in required_attrs if attr not in received_attrs]
mock_logger.warning.assert_called_with(
u'[3PlayMedia Callback] process_id=%s Received Attributes=%s Missing Attributes=%s',
request_data['data'].get('file_id', None),
received_attrs,
missing_attrs,
)
@data(
(
u'error',
u'[3PlayMedia Callback] Error while transcription - error=%s, org=%s, edx_video_id=%s, file_id=%s.',
TranscriptStatus.FAILED
),
(
u'invalid_status',
u'[3PlayMedia Callback] Got invalid status - status=%s, org=%s, edx_video_id=%s, file_id=%s.',
TranscriptStatus.IN_PROGRESS
)
)
@unpack
@responses.activate
@patch('VEDA_OS01.transcripts.LOGGER')
def test_callback_for_non_success_statuses(self, state, message, expected_status, mock_logger):
"""
Tests the callback for all the non-success statuses.
"""
self.url = '/{}'.format(utils.build_url(self.url, edx_video_id='12345', org='MAx'))
self.client.post(self.url, content_type='application/x-www-form-urlencoded', data=urllib.urlencode({
'file_id': self.file_id,
'status': state,
'error_description': state # this will be logged.
}))
self.assertEqual(
TranscriptProcessMetadata.objects.filter(process_id=self.file_id).latest().status,
expected_status
)
mock_logger.error.assert_called_with(
message,
state,
self.org,
self.video.studio_id,
self.file_id
)
@responses.activate
@mock_s3_deprecated
@patch('VEDA_OS01.transcripts.LOGGER')
def test_single_lang_callback_flow(self, mock_logger):
"""
Tests 3Play Media callback works as expected.
"""
# Setup an s3 bucket
conn = self.setup_s3_bucket()
# 3Play mocked response
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
body=TRANSCRIPT_SRT_DATA,
content_type='text/plain; charset=utf-8',
status=200
)
# edx-val mocked responses
responses.add(responses.POST, CONFIG_DATA['val_token_url'], '{"access_token": "1234567890"}', status=200)
responses.add(responses.POST, CONFIG_DATA['val_transcript_create_url'], status=200)
responses.add(responses.PATCH, CONFIG_DATA['val_video_transcript_status_url'], status=200)
# Make request to callback
response = self.invoke_3play_callback()
# Assert the response and the process
self.assertEqual(response.status_code, 200)
self.assertEqual(
TranscriptProcessMetadata.objects.filter(process_id=self.file_id).latest().status,
TranscriptStatus.READY
)
# Total of 4 HTTP requests are made as registered above
self.assertEqual(len(responses.calls), 4)
expected_requests = [
# request - 1
{
'url': utils.build_url(
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
apikey=self.transcript_prefs.api_key
)
},
# request - 2
{
'url': CONFIG_DATA['val_token_url'],
'body': {
'grant_type': ['password'],
'client_id': [CONFIG_DATA['val_client_id']],
'client_secret': [CONFIG_DATA['val_secret_key']],
'username': [CONFIG_DATA['val_username']],
'password': [CONFIG_DATA['val_password']],
},
'decode_func': urlparse.parse_qs,
},
# request - 3
{
'url': CONFIG_DATA['val_transcript_create_url'],
'body': {
'file_format': transcripts.TRANSCRIPT_SJSON,
'video_id': self.video.studio_id,
'language_code': 'en',
'name': '{directory}{uuid}.sjson'.format(
directory=CONFIG_DATA['aws_video_transcripts_prefix'], uuid=self.uuid_hex
),
'provider': TranscriptProvider.THREE_PLAY
},
'headers': {
'Authorization': 'Bearer 1234567890',
'content-type': 'application/json'
},
'decode_func': json.loads,
},
# request - 4
{
'url': CONFIG_DATA['val_video_transcript_status_url'],
'body': {
'status': transcripts.VideoStatus.TRANSCRIPT_READY,
'edx_video_id': self.video.studio_id
},
'headers': {
'Authorization': 'Bearer 1234567890',
'content-type': 'application/json'
},
'decode_func': json.loads,
}
]
for position, expected_request in enumerate(expected_requests):
self.assert_request(
responses.calls[position].request,
expected_request,
expected_request.pop('decode_func', None)
)
# verify transcript sjson data uploaded to s3
self.assert_uploaded_transcript_on_s3(connection=conn)
mock_logger.info.assert_called_with(
u'[3PlayMedia Callback] Video speech transcription was successful for video=%s -- lang_code=%s -- '
u'process_id=%s',
self.video.studio_id,
'en',
self.file_id,
)
@responses.activate
@mock_s3_deprecated
@patch('VEDA_OS01.transcripts.LOGGER')
def test_multi_lang_callback_flow(self, mock_logger):
"""
Tests 3Play Media callback works as expected.
"""
conn = self.setup_s3_bucket()
# Video needs to transcripts in multiple languages
self.video.preferred_languages = ['en', 'ro']
self.video.save()
# 3Play mock translation id
translation_id = '007-abc'
# 3Play mocked response
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
body=TRANSCRIPT_SRT_DATA,
content_type='text/plain; charset=utf-8',
status=200
)
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
json.dumps([
{
'id': 30,
'source_language_name': 'English',
'source_language_iso_639_1_code': 'en',
'target_language_name': 'Romanian',
'target_language_iso_639_1_code': 'ro',
'service_level': 'standard',
'per_word_rate': 0.16
},
{
'id': 31,
'source_language_name': 'English',
'source_language_iso_639_1_code': 'en',
'target_language_name': 'German',
'target_language_iso_639_1_code': 'da',
'service_level': 'standard',
'per_word_rate': 0.19
}
]),
status=200,
)
responses.add(
responses.POST,
transcripts.THREE_PLAY_ORDER_TRANSLATION_URL.format(file_id=self.file_id),
json.dumps({
'success': True,
'translation_id': translation_id
}),
status=200,
)
# edx-val mocked responses
responses.add(responses.POST, CONFIG_DATA['val_token_url'], '{"access_token": "1234567890"}', status=200)
responses.add(responses.POST, CONFIG_DATA['val_transcript_create_url'], status=200)
# Make request to callback
response = self.invoke_3play_callback()
# Assert the response and the speech lang process
self.assertEqual(response.status_code, 200)
self.assertEqual(
TranscriptProcessMetadata.objects.get(
process_id=self.file_id,
provider=TranscriptProvider.THREE_PLAY,
lang_code='en'
).status,
TranscriptStatus.READY
)
# Assert the transcript translation process
self.assertEqual(
TranscriptProcessMetadata.objects.get(
process_id=self.file_id,
provider=TranscriptProvider.THREE_PLAY,
lang_code='ro'
).status,
TranscriptStatus.IN_PROGRESS,
)
# Total of 5 HTTP requests are made as registered above
self.assertEqual(len(responses.calls), 5)
expected_requests = [
# request - 1
{
'url': utils.build_url(
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
apikey=self.transcript_prefs.api_key
)
},
# request - 2
{
'url': CONFIG_DATA['val_token_url'],
'body': {
'grant_type': ['password'],
'client_id': [CONFIG_DATA['val_client_id']],
'client_secret': [CONFIG_DATA['val_secret_key']],
'username': [CONFIG_DATA['val_username']],
'password': [CONFIG_DATA['val_password']],
},
'decode_func': urlparse.parse_qs,
},
# request - 3
{
'url': CONFIG_DATA['val_transcript_create_url'],
'body': {
'file_format': transcripts.TRANSCRIPT_SJSON,
'video_id': self.video.studio_id,
'language_code': 'en',
'name': '{directory}{uuid}.sjson'.format(
directory=CONFIG_DATA['aws_video_transcripts_prefix'], uuid=self.uuid_hex
),
'provider': TranscriptProvider.THREE_PLAY
},
'headers': {
'Authorization': 'Bearer 1234567890',
'content-type': 'application/json'
},
'decode_func': json.loads,
},
# request - 4
{
'url': utils.build_url(
transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
apikey=self.transcript_prefs.api_key
)
},
# request - 5
{
'url': transcripts.THREE_PLAY_ORDER_TRANSLATION_URL.format(file_id=self.file_id),
'body': {
'apikey': self.transcript_prefs.api_key,
'api_secret_key': self.transcript_prefs.api_secret,
'translation_service_id': 30,
},
'decode_func': json.loads,
},
]
for position, expected_request in enumerate(expected_requests):
self.assert_request(
responses.calls[position].request,
expected_request,
expected_request.pop('decode_func', None),
)
# verify sjson data uploaded to s3
self.assert_uploaded_transcript_on_s3(connection=conn)
mock_logger.info.assert_called_with(
u'[3PlayMedia Callback] Video speech transcription was successful for video=%s -- lang_code=%s -- '
u'process_id=%s',
self.video.studio_id,
'en',
self.file_id,
)
@data(
(
{'body': json.dumps({'iserror': True}), 'content_type': 'application/json', 'status': 200},
'error',
(
u'[%s] Transcript fetch error for video=%s -- lang_code=%s -- process=%s -- response=%s',
u'3PlayMedia Callback',
u'12345',
u'en',
u'112233',
json.dumps({'iserror': True}),
),
),
(
{'body': None, 'status': 400},
'exception',
(
u'[3PlayMedia Callback] Fetch request failed for video=%s -- lang_code=%s -- process_id=%s',
u'12345',
u'en',
u'112233',
),
)
)
@unpack
@responses.activate
@patch('VEDA_OS01.transcripts.LOGGER')
def test_fetch_transcript_exceptions(self, response, log_method, log_args, mock_logger):
"""
Verify the logs if there is an error during transcript fetch.
"""
# 3Play mocked response
responses.add(responses.GET, transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id), **response)
# Make request to the callback
response = self.invoke_3play_callback()
# Assert the response, process and the logs.
self.assertEqual(response.status_code, 200)
self.assertEqual(
TranscriptProcessMetadata.objects.filter(process_id=self.file_id).latest().status,
TranscriptStatus.FAILED
)
getattr(mock_logger, log_method).assert_called_with(*log_args)
@patch('VEDA_OS01.transcripts.LOGGER')
@responses.activate
def test_srt_to_sjson_conversion_exceptions(self, mock_logger):
"""
Tests that the correct exception is logged on conversion error.
"""
# 3Play mocked response
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
body=TRANSCRIPT_SRT_DATA,
content_type=u'text/plain; charset=utf-8',
status=200
)
# make `convert_srt_to_sjson` to fail with ValueError
with patch('VEDA_OS01.transcripts.convert_srt_to_sjson') as mock_convert_srt_to_sjson:
mock_convert_srt_to_sjson.side_effect = ValueError
# Make request to the callback
self.invoke_3play_callback()
mock_logger.exception.assert_called_with(
u'[3PlayMedia Callback] Request failed for video=%s -- lang_code=%s -- process_id=%s',
self.video.studio_id,
'en',
self.file_id,
)
@patch('VEDA_OS01.transcripts.LOGGER')
@responses.activate
def test_upload_to_s3_exceptions(self, mock_logger):
"""
Tests that the correct exception is logged on error while uploading to s3.
"""
# 3Play mocked response
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
body=TRANSCRIPT_SRT_DATA,
content_type=u'text/plain; charset=utf-8',
status=200
)
with patch('VEDA_OS01.transcripts.upload_sjson_to_s3') as mock_upload_sjson_to_s3:
mock_upload_sjson_to_s3.side_effect = S3ResponseError(status=401, reason='invalid secrets')
# Make request to the callback
self.invoke_3play_callback()
mock_logger.exception.assert_called_with(
u'[3PlayMedia Callback] Request failed for video=%s -- lang_code=%s -- process_id=%s',
self.video.studio_id,
'en',
self.file_id,
)
@data(
# not-an-ok response on translation services fetch request.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
'body': 'Your request was invalid.',
'status': 400,
}
],
{
'method': 'exception',
'args': (
'[3PlayMedia Callback] Translation could not be performed - video=%s, lang_code=%s, file_id=%s.',
'12345',
'en',
'112233'
)
}
),
# Error on 3Play while fetching translation services.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
'body': json.dumps({
'success': False
}),
'status': 200,
}
],
{
'method': 'exception',
'args': (
'[3PlayMedia Callback] Translation could not be performed - video=%s, lang_code=%s, file_id=%s.',
'12345',
'en',
'112233'
)
}
),
# not-an-ok response on translation order request.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
'body': json.dumps(
[{
'id': 30,
'source_language_name': 'English',
'source_language_iso_639_1_code': 'en',
'target_language_name': 'Romanian',
'target_language_iso_639_1_code': 'ro',
'service_level': 'standard',
'per_word_rate': 0.16
}]
),
'status': 200,
},
{
'method': responses.POST,
'url': transcripts.THREE_PLAY_ORDER_TRANSLATION_URL.format(file_id=u'112233'),
'body': '1s2d3f4',
'status': 400
}
],
{
'method': 'error',
'args': (
'[3PlayMedia Callback] An error occurred during translation, target language=%s, file_id=%s, '
'status=%s',
'ro',
'112233',
400,
)
}
),
# Error on 3Play during placing order for a translation.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
'body': json.dumps(
[{
'id': 30,
'source_language_name': 'English',
'source_language_iso_639_1_code': 'en',
'target_language_name': 'Romanian',
'target_language_iso_639_1_code': 'ro',
'service_level': 'standard',
'per_word_rate': 0.16
}]
),
'status': 200,
},
{
'method': responses.POST,
'url': transcripts.THREE_PLAY_ORDER_TRANSLATION_URL.format(file_id=u'112233'),
'body': json.dumps({'success': False}),
'status': 200
}
],
{
'method': 'error',
'args': (
'[3PlayMedia Callback] Translation failed fot target language=%s, file_id=%s, response=%s',
'ro',
'112233',
json.dumps({'success': False}),
)
}
),
# When translation service is not found for our language
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_SERVICES_URL,
'body': json.dumps(
[{
'id': 30,
'source_language_name': 'English',
'source_language_iso_639_1_code': 'en',
'target_language_name': 'Romanian',
'target_language_iso_639_1_code': 'da',
'service_level': 'standard',
'per_word_rate': 0.16
}]
),
'status': 200,
}
],
{
'method': 'error',
'args': (
'[3PlayMedia Callback] No translation service found for target language %s -- process id %s',
'ro',
'112233',
)
}
)
)
@unpack
@responses.activate
@mock_s3_deprecated
@patch('VEDA_OS01.transcripts.LOGGER')
def test_order_translations_exception_cases(self, mock_responses, expected_logging, mock_logger):
"""
Tests all the error scenarios while ordering translation for a transcript in various languages.
"""
# Setup an s3 bucket
self.setup_s3_bucket()
# for multi-language translations
self.video.preferred_languages = ['en', 'ro']
self.video.save()
# Mocked responses
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSCRIPT_URL.format(file_id=self.file_id),
body=TRANSCRIPT_SRT_DATA,
content_type='text/plain; charset=utf-8',
status=200
)
responses.add(responses.POST, CONFIG_DATA['val_token_url'], '{"access_token": "1234567890"}', status=200)
responses.add(responses.POST, CONFIG_DATA['val_transcript_create_url'], status=200)
for response in mock_responses:
responses.add(response.pop('method'), response.pop('url'), **response)
# Make request to callback
response = self.invoke_3play_callback()
# Assert the response and the logs
self.assertEqual(response.status_code, 200)
getattr(mock_logger, expected_logging['method']).assert_called_with(*expected_logging['args'])
# Assert the transcript translation process
self.assertEqual(
TranscriptProcessMetadata.objects.get(
process_id=self.file_id,
provider=TranscriptProvider.THREE_PLAY,
lang_code='ro'
).status,
TranscriptStatus.FAILED,
)
@responses.activate
@mock_s3_deprecated
def test_translations_retrieval(self):
"""
Tests translations retrieval from 3PlayMedia
"""
# Setup an S3 bucket
connection = self.setup_s3_bucket()
# Setup translation processes
mock_translations = {
'ro': '1z2x3c',
'da': '1q2w3e',
}
self.video.preferred_languages = ['en', 'ro', 'da']
self.video.save()
# Assume the speech transcript is ready.
TranscriptProcessMetadata.objects.filter(
process_id=self.file_id,
lang_code='en'
).update(status=TranscriptStatus.READY)
# in progress translation processes (which will normally be done by the callback)
for lang_code, translation_id in mock_translations.iteritems():
TranscriptProcessMetadata.objects.create(
video=self.video,
provider=TranscriptProvider.THREE_PLAY,
process_id=self.file_id,
translation_id=translation_id,
lang_code=lang_code,
status=TranscriptStatus.IN_PROGRESS,
)
# Setup mock responses
for __, translation_id in mock_translations.iteritems():
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSLATION_STATUS_URL.format(
file_id=self.file_id, translation_id=translation_id
),
json.dumps({'state': 'complete'}),
status=200
)
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format(
file_id=self.file_id, translation_id=translation_id
),
body=TRANSCRIPT_SRT_DATA,
content_type='text/plain; charset=utf-8',
status=200,
)
# edx-val mocked responses
responses.add(responses.POST, CONFIG_DATA['val_token_url'], '{"access_token": "1234567890"}', status=200)
responses.add(responses.POST, CONFIG_DATA['val_transcript_create_url'], status=200)
responses.add(responses.PATCH, CONFIG_DATA['val_video_transcript_status_url'], status=200)
# Call to retrieve translations
transcripts.retrieve_three_play_translations()
# Total HTTP requests, 4 for first translation and 4 for second translation and 1 for updating video status.
self.assertEqual(len(responses.calls), 9)
position = 0
for lang_code, translation_id in mock_translations.iteritems():
expected_requests = [
# request - 1
{
'url': utils.build_url(transcripts.THREE_PLAY_TRANSLATION_STATUS_URL.format(
file_id=self.file_id, translation_id=translation_id
), apikey=self.transcript_prefs.api_key)
},
# request - 2
{
'url': utils.build_url(transcripts.THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format(
file_id=self.file_id, translation_id=translation_id
), apikey=self.transcript_prefs.api_key)
},
# request - 3
{
'url': CONFIG_DATA['val_token_url'],
'body': {
'grant_type': ['password'],
'client_id': [CONFIG_DATA['val_client_id']],
'client_secret': [CONFIG_DATA['val_secret_key']],
'username': [CONFIG_DATA['val_username']],
'password': [CONFIG_DATA['val_password']],
},
'decode_func': urlparse.parse_qs,
},
# request - 4
{
'url': CONFIG_DATA['val_transcript_create_url'],
'body': {
'file_format': transcripts.TRANSCRIPT_SJSON,
'video_id': self.video.studio_id,
'language_code': lang_code,
'name': '{directory}{uuid}.sjson'.format(
directory=CONFIG_DATA['aws_video_transcripts_prefix'], uuid=self.uuid_hex
),
'provider': TranscriptProvider.THREE_PLAY
},
'headers': {
'Authorization': 'Bearer 1234567890',
'content-type': 'application/json'
},
'decode_func': json.loads,
}
]
for expected_request in expected_requests:
self.assert_request(
responses.calls[position].request,
expected_request,
expected_request.pop('decode_func', None),
)
position += 1
# Asserts the transcript sjson data uploaded to s3
self.assert_uploaded_transcript_on_s3(connection=connection)
# Asserts the Process metadata
self.assertEqual(
TranscriptProcessMetadata.objects.get(
provider=TranscriptProvider.THREE_PLAY,
process_id=self.file_id,
lang_code=lang_code,
translation_id=translation_id,
).status,
TranscriptStatus.READY,
)
# Assert that the final request was made for updating video status to `ready`
# upon receiving all the translations
expected_video_status_update_request = {
'url': CONFIG_DATA['val_video_transcript_status_url'],
'body': {
'status': transcripts.VideoStatus.TRANSCRIPT_READY,
'edx_video_id': self.video.studio_id
},
'headers': {
'Authorization': 'Bearer 1234567890',
'content-type': 'application/json'
}
}
self.assert_request(
responses.calls[position].request,
expected_video_status_update_request,
decode_func=json.loads,
)
@data(
# not-an-ok response on translation status fetch request.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_STATUS_URL.format(
file_id='112233', translation_id='1q2w3e'
),
'body': 'Your request was invalid.',
'status': 400,
}
],
{
'method': 'error',
'args': (
'[3PlayMedia Task] Translation status request failed for video=%s -- lang_code=%s -- '
'process_id=%s -- status=%s',
VIDEO_DATA['studio_id'],
'ro',
'112233',
400,
)
},
TranscriptStatus.FAILED
),
# 3Play Error response on fetching translations status.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_STATUS_URL.format(
file_id='112233', translation_id='1q2w3e'
),
'body': json.dumps({'iserror': True}),
'status': 200,
}
],
{
'method': 'error',
'args': (
'[3PlayMedia Task] unable to get translation status for '
'video=%s -- lang_code=%s -- process_id=%s -- response=%s',
VIDEO_DATA['studio_id'],
'ro',
'112233',
json.dumps({'iserror': True}),
)
},
TranscriptStatus.FAILED,
),
# not-an-ok response on translation fetch request.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_STATUS_URL.format(
file_id='112233', translation_id='1q2w3e'
),
'body': json.dumps({
'state': 'complete'
}),
'status': 200,
},
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format(
file_id='112233', translation_id='1q2w3e'
),
'body': 'invalid blah blah',
'status': 400
}
],
{
'method': 'exception',
'args': (
'[3PlayMedia Task] Translation download failed for video=%s -- lang_code=%s -- process_id=%s.',
VIDEO_DATA['studio_id'],
'ro',
'112233'
)
},
TranscriptStatus.IN_PROGRESS
),
# 3Play Error response on translation fetch request.
(
[
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_STATUS_URL.format(
file_id='112233', translation_id='1q2w3e'
),
'body': json.dumps({
'state': 'complete'
}),
'status': 200,
},
{
'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format(
file_id='112233', translation_id='1q2w3e'
),
'body': json.dumps({'iserror': True}),
'status': 200
}
],
{
'method': 'error',
'args': (
'[%s] Transcript fetch error for video=%s -- lang_code=%s -- process=%s -- response=%s',
'3PlayMedia Task',
VIDEO_DATA['studio_id'],
'ro',
'112233',
json.dumps({'iserror': True}),
)
},
TranscriptStatus.FAILED
),
)
@unpack
@responses.activate
@mock_s3_deprecated
@patch('VEDA_OS01.transcripts.LOGGER')
def test_translations_retrieval_exceptions(self, mock_responses, expected_logging, transcript_status, mock_logger):
"""
Tests possible error cases during translation fetch process form 3PlayMedia.
"""
# Setup translation processes
translation_id = '1q2w3e'
self.video.preferred_languages = ['en', 'ro']
self.video.save()
# in progress translation processes (i.e. this was done as a part of callback)
TranscriptProcessMetadata.objects.create(
video=self.video,
provider=TranscriptProvider.THREE_PLAY,
process_id=self.file_id,
translation_id=translation_id,
lang_code='ro',
status=TranscriptStatus.IN_PROGRESS,
)
for response in mock_responses:
responses.add(response.pop('method'), response.pop('url'), **response)
# Fetch translations
transcripts.retrieve_three_play_translations()
# Assert the logs
getattr(mock_logger, expected_logging['method']).assert_called_with(*expected_logging['args'])
# Assert the transcript translation process
self.assertEqual(
TranscriptProcessMetadata.objects.get(
process_id=self.file_id,
provider=TranscriptProvider.THREE_PLAY,
lang_code='ro'
).status,
transcript_status,
)
......@@ -9,16 +9,18 @@ import boto
import django.dispatch
import requests
from boto.s3.key import Key
from django.db.models import Q
from pysrt import SubRipFile
from requests.packages.urllib3.exceptions import InsecurePlatformWarning
from rest_framework import status
from rest_framework.parsers import FormParser
from rest_framework.permissions import AllowAny
from rest_framework.response import Response
from rest_framework.views import APIView
from control.veda_val import VALAPICall
from VEDA_OS01 import utils
from VEDA_OS01.models import (TranscriptPreferences, TranscriptProcessMetadata,
from VEDA_OS01.models import (TranscriptCredentials, TranscriptProcessMetadata,
TranscriptProvider, TranscriptStatus,
VideoStatus)
......@@ -26,11 +28,29 @@ requests.packages.urllib3.disable_warnings(InsecurePlatformWarning)
logging.basicConfig()
LOGGER = logging.getLogger(__name__)
# 3PlayMedia possible send-along statuses for a transcription callback.
COMPLETE = 'complete'
ERROR = 'error'
# Transcript format
TRANSCRIPT_SJSON = 'sjson'
CIELO24_TRANSCRIPT_COMPLETED = django.dispatch.Signal(providing_args=['job_id', 'lang_code', 'org', 'video_id'])
CIELO24_GET_CAPTION_URL = 'https://api.cielo24.com/api/job/get_caption'
CONFIG = utils.get_config()
# 3PlayMedia callback signal
THREE_PLAY_TRANSCRIPTION_DONE = django.dispatch.Signal(
providing_args=['org', 'lang_code', 'edx_video_id', 'file_id', 'status', 'error_description']
)
# 3PlayMedia API URLs.
THREE_PLAY_TRANSCRIPT_URL = u'https://static.3playmedia.com/files/{file_id}/transcript.srt'
THREE_PLAY_TRANSLATION_SERVICES_URL = u'https://static.3playmedia.com/translation_services'
THREE_PLAY_ORDER_TRANSLATION_URL = u'https://api.3playmedia.com/files/{file_id}/translations/order'
THREE_PLAY_TRANSLATION_STATUS_URL = u'https://static.3playmedia.com/files/{file_id}/translations/{translation_id}'
THREE_PLAY_TRANSLATION_DOWNLOAD_URL = (u'https://static.3playmedia.com/files/{file_id}/translations/{translation_id}/'
u'captions.srt')
class TranscriptError(Exception):
"""
......@@ -46,6 +66,13 @@ class TranscriptFetchError(TranscriptError):
pass
class TranscriptTranslationError(TranscriptError):
"""
An error occurred during the translation attempt on 3PlayMedia.
"""
pass
class TranscriptConversionError(TranscriptError):
"""
An error occurred during srt to sjson conversion.
......@@ -84,9 +111,13 @@ class Cielo24CallbackHandlerView(APIView):
"""
Handle Cielo24 callback request.
"""
attrs = ('job_id', 'lang_code', 'org', 'video_id')
if not all([attr in request.query_params for attr in attrs]):
LOGGER.warn('[CIELO24 HANDLER] Required params are missing %s', request.query_params.keys())
required_attrs = ('job_id', 'lang_code', 'org', 'video_id')
missing = [attr for attr in required_attrs if attr not in request.query_params.keys()]
if missing:
LOGGER.warning(
'[CIELO24 HANDLER] Required params are missing %s',
missing,
)
return Response({}, status=status.HTTP_400_BAD_REQUEST)
CIELO24_TRANSCRIPT_COMPLETED.send_robust(
......@@ -123,14 +154,14 @@ def cielo24_transcript_callback(sender, **kwargs):
job_id
)
# get transcript preferences for an organization
# get transcript credentials for an organization
try:
transcript_prefs = TranscriptPreferences.objects.get(
transcript_prefs = TranscriptCredentials.objects.get(
org=org,
provider=TranscriptProvider.CIELO24,
)
except TranscriptPreferences.DoesNotExist:
LOGGER.exception('[CIELO24 TRANSCRIPTS] Unable to get transcript preferences for job_id=%s', job_id)
except TranscriptCredentials.DoesNotExist:
LOGGER.exception('[CIELO24 TRANSCRIPTS] Unable to get transcript credentials for job_id=%s', job_id)
# mark the transcript for a particular language as ready
try:
......@@ -145,7 +176,7 @@ def cielo24_transcript_callback(sender, **kwargs):
job_id
)
# if transcript preferences are missing then we can do nothing
# if transcript credentials are missing then we can do nothing
if not transcript_prefs and process_metadata:
process_metadata.status = TranscriptStatus.FAILED
process_metadata.save()
......@@ -179,7 +210,7 @@ def cielo24_transcript_callback(sender, **kwargs):
sjson_file_name = upload_sjson_to_s3(CONFIG, sjson)
except Exception:
LOGGER.exception(
'[CIELO24 TRANSCRIPTS] Request failed for video=%s -- lang=%s -- job_id=%s',
'[CIELO24 TRANSCRIPTS] Request failed for video=%s -- lang=%s -- job_id=%s.',
video_id,
lang_code,
job_id
......@@ -199,7 +230,7 @@ def cielo24_transcript_callback(sender, **kwargs):
# update transcript status for video in edx-val only if all langauge transcripts are ready
video_jobs = TranscriptProcessMetadata.objects.filter(video__studio_id=video_id)
if all(video_job.status == TranscriptStatus.READY for video_job in video_jobs):
val_api.update_video_status(process_metadata.video.studio_id, VideoStatus.TRANSCRIPTION_READY)
val_api.update_video_status(process_metadata.video.studio_id, VideoStatus.TRANSCRIPT_READY)
def fetch_srt_data(url, **request_params):
......@@ -266,3 +297,614 @@ def upload_sjson_to_s3(config, sjson_data):
)
k.set_contents_from_string(json.dumps(sjson_data))
return k.key
class ThreePlayMediaCallbackHandlerView(APIView):
"""
View to handle 3PlayMedia callback requests.
"""
parser_classes = (FormParser,)
permission_classes = (AllowValidTranscriptProvider,)
def post(self, request, **kwargs):
"""
Handle 3PlayMedia callback request.
"""
required_attrs = ['file_id', 'status', 'org', 'edx_video_id']
received_attributes = request.data.keys() + request.query_params.keys()
missing = [attr for attr in required_attrs if attr not in received_attributes]
if missing:
LOGGER.warning(
u'[3PlayMedia Callback] process_id=%s Received Attributes=%s Missing Attributes=%s',
request.data.get('file_id'),
received_attributes,
missing,
)
return Response(status=status.HTTP_200_OK)
# Dispatch 3playMedia transcription signal
THREE_PLAY_TRANSCRIPTION_DONE.send_robust(
sender=self,
org=request.query_params['org'],
edx_video_id=request.query_params['edx_video_id'],
lang_code='en',
file_id=request.data['file_id'],
status=request.data['status'],
# Following is going to be an error description if an error occurs during
# 3playMedia transcription process
error_description=request.data.get('error_description'),
)
return Response(status=status.HTTP_200_OK)
def get_translation_services(api_key):
"""
GET available 3Play Media Translation services
Arguments:
api_key(unicode): api key which is required to make an authentic call to 3Play Media
Returns:
Available 3Play Media Translation services.
"""
response = requests.get(utils.build_url(THREE_PLAY_TRANSLATION_SERVICES_URL, apikey=api_key))
if not response.ok:
raise TranscriptTranslationError(
u'[3PlayMedia Callback] Error while fetching the translation services -- {status}, {response}'.format(
status=response.status_code,
response=response.text,
)
)
# Response should be a list containing services, details:
# http://support.3playmedia.com/hc/en-us/articles/227729988-Translations-API-Methods
available_services = json.loads(response.text)
if not isinstance(available_services, list):
raise TranscriptTranslationError(
u'[3PlayMedia Callback] Expected list but got: -- {response}.'.format(
response=response.text,
)
)
return available_services
def get_standard_translation_service(translation_services, target_language):
"""
Get standard translation service
Arguments:
translation_services(list): List of available 3play media translation services.
target_language(str): A language code whose standard translation service is needed.
Returns:
A translation service id or None.
"""
translation_service_id = None
for service in translation_services:
service_found = (
service['target_language_iso_639_1_code'] == target_language and
service['service_level'] == 'standard'
)
if service_found:
translation_service_id = service['id']
break
return translation_service_id
def place_translation_order(api_key, api_secret, translation_service_id, target_language, file_id):
"""
Places a translation order on 3play media.
Arguments:
api_key(unicode): api key
api_secret(unicode): api secret
translation_service_id(unicode): translation service id got from 3Play Media
target_language(unicode): A language code translation is being ordered
file_id(unicode): 3play media file id / process id
"""
order_response = requests.post(THREE_PLAY_ORDER_TRANSLATION_URL.format(file_id=file_id), json={
'apikey': api_key,
'api_secret_key': api_secret,
'translation_service_id': translation_service_id,
})
if not order_response.ok:
LOGGER.error(
'[3PlayMedia Callback] An error occurred during translation, target language=%s, file_id=%s, status=%s',
target_language,
file_id,
order_response.status_code,
)
return
# Translation Order API returns `success` attribute specifying whether the order has been placed
# successfully: http://support.3playmedia.com/hc/en-us/articles/227729988-Translations-API-Methods
translation_order = json.loads(order_response.text)
if not translation_order.get('success'):
LOGGER.error(
'[3PlayMedia Callback] Translation failed fot target language=%s, file_id=%s, response=%s',
target_language,
file_id,
order_response.text,
)
return
return translation_order
def order_translations(file_id, api_key, api_secret, target_languages):
"""
Order translations on 3PlayMedia for all the target languages.
Process:
* Fetch all the pending translations process for a file
* Fetch all the translation services from 3PlayMedia
* For each process,
- Find suitable translation service
- Order translation from that service
- Move the process to `in progress` and update it with the
translation id received from 3Play.
Arguments:
file_id(unicode): File identifier
api_key(unicode): API key
api_secret(unicode): API Secret
target_languages(list): List of language codes
Raises:
TranscriptTranslationError: when an error occurred while fetching the translation services.
"""
if not target_languages:
return
translation_processes = TranscriptProcessMetadata.objects.filter(
process_id=file_id,
provider=TranscriptProvider.THREE_PLAY,
status=TranscriptStatus.PENDING,
lang_code__in=target_languages,
)
# Retrieve available translation services.
try:
available_services = get_translation_services(api_key)
except TranscriptTranslationError:
# Fail all the pending translation processes associated with this file id.
translation_processes.update(status=TranscriptStatus.FAILED)
raise
for target_language in target_languages:
# 1 - get a translation process for the target language
try:
translation_process = translation_processes.filter(lang_code=target_language).latest()
except TranscriptProcessMetadata.DoesNotExist:
LOGGER.warning(
u'[3PlayMedia Callback] process not found for target language %s -- process id %s',
target_language,
file_id,
)
continue
# 2 - Find a standard service for translation for the target language.
translation_service_id = get_standard_translation_service(available_services, target_language)
if translation_service_id is None:
# Fail the process
translation_process.update(status=TranscriptStatus.FAILED)
LOGGER.error(
u'[3PlayMedia Callback] No translation service found for target language %s -- process id %s',
target_language,
file_id,
)
continue
# 3 - Place an order
# At this point, we've got our service ready to use. Now, place an order for the translation.
translation_order = place_translation_order(
api_key=api_key,
api_secret=api_secret,
translation_service_id=translation_service_id,
target_language=target_language,
file_id=file_id,
)
if translation_order:
translation_process.update(
translation_id=translation_order['translation_id'],
status=TranscriptStatus.IN_PROGRESS
)
else:
translation_process.update(status=TranscriptStatus.FAILED)
def validate_transcript_response(edx_video_id, file_id, transcript, lang_code, log_prefix):
"""
This validates transcript response received from 3Play Media.
Arguments:
edx_video_id(unicode): studio video identifier
file_id(unicode): file identifier
transcript(unicode): SRT transcript content ideally
lang_code(unicode): language code
log_prefix(unicode): A prefix for the emitted logs
transcript is going to be SRT content and if this is not so, then it'll be a json response
describing the error and process will be marked as failed. Error response will be logged
along with the validation.
"""
try:
json.loads(transcript)
# Log the details.
LOGGER.error(
u'[%s] Transcript fetch error for video=%s -- lang_code=%s -- process=%s -- response=%s',
log_prefix,
edx_video_id,
lang_code,
file_id,
transcript,
)
return False
except ValueError:
pass
return True
def get_transcript_credentials(provider, org, edx_video_id, file_id, log_prefix):
"""
Get org-specific transcript credentials.
Arguments:
provider(TranscriptProvider): transcript provider
org(unicode): organization extracted from course id
log_prefix(unicode): A prefix for the emitted logs
edx_video_id(unicode): studio video identifier
file_id(unicode): file identifier or process identifier
"""
transcript_secrets = None
try:
transcript_secrets = TranscriptCredentials.objects.get(org=org, provider=provider)
except TranscriptCredentials.DoesNotExist:
LOGGER.exception(
u'[%s] Unable to get transcript secrets for org=%s, edx_video_id=%s, file_id=%s.',
log_prefix,
org,
edx_video_id,
file_id,
)
return transcript_secrets
@django.dispatch.receiver(THREE_PLAY_TRANSCRIPTION_DONE, dispatch_uid="three_play_transcription_done")
def three_play_transcription_callback(sender, **kwargs):
"""
This is a receiver for 3Play Media callback signal.
Arguments:
sender: sender of the signal
kwargs(dict): video transcription metadata
Process:
* download transcript(SRT) from 3PlayMedia
* convert SRT to SJSON
* upload SJSON to AWS S3
* order translations for all the preferred languages
* update transcript status in VAL
"""
log_prefix = u'3PlayMedia Callback'
# Extract all the must have attributes
org = kwargs['org']
edx_video_id = kwargs['edx_video_id']
lang_code = kwargs['lang_code']
file_id = kwargs['file_id']
state = kwargs['status']
try:
process = TranscriptProcessMetadata.objects.filter(
provider=TranscriptProvider.THREE_PLAY,
process_id=file_id,
lang_code=lang_code,
).latest()
except TranscriptProcessMetadata.DoesNotExist:
LOGGER.exception(
u'[3PlayMedia Callback] Unable to get transcript process for org=%s, edx_video_id=%s, file_id=%s.',
org,
edx_video_id,
file_id,
)
return
# On completion of a transcript
# Indicates that the default video speech transcription has been done successfully.
if state == COMPLETE:
log_args = (edx_video_id, lang_code, file_id)
# 1 - Retrieve transcript credentials
transcript_secrets = get_transcript_credentials(
provider=TranscriptProvider.THREE_PLAY,
org=org,
edx_video_id=edx_video_id,
file_id=file_id,
log_prefix=log_prefix,
)
if not transcript_secrets:
process.update(status=TranscriptStatus.FAILED)
return
# 2 - Fetch the transcript from 3Play Media.
try:
srt_transcript = fetch_srt_data(
THREE_PLAY_TRANSCRIPT_URL.format(file_id=file_id),
apikey=transcript_secrets.api_key,
)
except TranscriptFetchError:
LOGGER.exception(
u'[3PlayMedia Callback] Fetch request failed for video=%s -- lang_code=%s -- process_id=%s',
*log_args
)
process.update(status=TranscriptStatus.FAILED)
return
# 3 - Validate transcript content received from 3Play Media and mark the transcription process.
is_valid_transcript = validate_transcript_response(
edx_video_id=edx_video_id,
file_id=file_id,
transcript=srt_transcript,
lang_code=lang_code,
log_prefix=log_prefix,
)
if is_valid_transcript:
process.update(status=TranscriptStatus.READY)
else:
process.update(status=TranscriptStatus.FAILED)
# 4 - Convert SRT transcript to SJson format and upload it to S3.
try:
sjson_transcript = convert_srt_to_sjson(srt_transcript)
sjson_file = upload_sjson_to_s3(CONFIG, sjson_transcript)
except Exception:
# in case of any exception, log and raise.
LOGGER.exception(
u'[3PlayMedia Callback] Request failed for video=%s -- lang_code=%s -- process_id=%s',
*log_args
)
raise
# 5 - Update edx-val with completed transcript information.
val_api = VALAPICall(video_proto=None, val_status=None)
val_api.update_val_transcript(
video_id=process.video.studio_id,
lang_code=lang_code,
name=sjson_file,
transcript_format=TRANSCRIPT_SJSON,
provider=TranscriptProvider.THREE_PLAY,
)
# 6 - Translation Phase
# That's the phase for kicking off translation processes for all the
# preferred languages except the video's speech language.
target_languages = list(process.video.preferred_languages)
target_languages.remove(lang_code)
# Create the translation tracking processes for all the target languages.
for target_language in target_languages:
TranscriptProcessMetadata.objects.create(
video=process.video,
provider=TranscriptProvider.THREE_PLAY,
process_id=file_id,
lang_code=target_language,
status=TranscriptStatus.PENDING,
)
# Order translations for target languages
try:
order_translations(file_id, transcript_secrets.api_key, transcript_secrets.api_secret, target_languages)
except TranscriptTranslationError:
LOGGER.exception(
u'[3PlayMedia Callback] Translation could not be performed - video=%s, lang_code=%s, file_id=%s.',
*log_args
)
except Exception:
LOGGER.exception(
u'[3PlayMedia Callback] Error while translating the transcripts - video=%s, lang_code=%s, file_id=%s',
*log_args
)
raise
# 7 - Update transcript status.
# It will be for edx-val as well as edx-video-pipeline and this will be the case when
# there is only one transcript language for a video(that is, already been processed).
if not target_languages:
val_api.update_video_status(process.video.studio_id, VideoStatus.TRANSCRIPT_READY)
# On success, a happy farewell log.
LOGGER.info(
(u'[3PlayMedia Callback] Video speech transcription was successful for'
u' video=%s -- lang_code=%s -- process_id=%s'),
*log_args
)
elif state == ERROR:
# Fail the process
process.status = TranscriptStatus.FAILED
process.save()
# Log the error information
LOGGER.error(
u'[3PlayMedia Callback] Error while transcription - error=%s, org=%s, edx_video_id=%s, file_id=%s.',
kwargs['error_description'],
org,
edx_video_id,
file_id,
)
else:
# Status must be either 'complete' or 'error'
# more details on http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods
LOGGER.error(
u'[3PlayMedia Callback] Got invalid status - status=%s, org=%s, edx_video_id=%s, file_id=%s.',
state,
org,
edx_video_id,
file_id,
)
def get_translation_status(api_key, file_id, translation_id, edx_video_id, lang_code):
"""
Get translation status for a translation process from 3Play Media.
Arguments:
api_key(unicode): api key
file_id(unicode): file identifier or process identifier
translation_id(unicode): translation identifier associated with that file identifier
edx_video_id(unicode): video studio identifier
lang_code(unicode): language code
Returns:
A translation status retrieved from 3play media or None in case of a faulty response.
"""
translation_status_url = utils.build_url(
THREE_PLAY_TRANSLATION_STATUS_URL.format(
file_id=file_id,
translation_id=translation_id,
),
apikey=api_key
)
translation_status_response = requests.get(translation_status_url)
if not translation_status_response.ok:
LOGGER.error(
(u'[3PlayMedia Task] Translation status request failed for video=%s -- '
u'lang_code=%s -- process_id=%s -- status=%s'),
edx_video_id,
lang_code,
file_id,
translation_status_response.status_code,
)
return
translation_status = json.loads(translation_status_response.text)
if translation_status.get('iserror'):
LOGGER.error(
(u'[3PlayMedia Task] unable to get translation status for video=%s -- '
u'lang_code=%s -- process_id=%s -- response=%s'),
edx_video_id,
lang_code,
file_id,
translation_status_response.text,
)
return
return translation_status
def retrieve_three_play_translations():
"""
Checks translation status on 3PlayMedia for all the progressing processes, fetches them if they're complete.
Retrieval flow:
1. Fetches 3PlayMedia translation processes whose status is `in progress`
2. For each process, retrieve the org-wide api keys
3. Check translation status through 3PlayMedia
4. If its done, mark the process as complete, fetch translated transcript, convert to sjson, upload it to s3 and
finally, update it in edx-val.
"""
log_prefix = u'3PlayMedia Task'
translation_processes = TranscriptProcessMetadata.objects.filter(
provider=TranscriptProvider.THREE_PLAY,
status=TranscriptStatus.IN_PROGRESS,
).exclude(Q(translation_id__isnull=True) | Q(translation_id__exact=''))
for translation_process in translation_processes:
log_args = (
translation_process.video.studio_id,
translation_process.lang_code,
translation_process.process_id,
)
course_id = translation_process.video.inst_class.local_storedir.split(',')[0]
org = utils.extract_course_org(course_id=course_id)
# Retrieve transcript credentials
three_play_secrets = get_transcript_credentials(
provider=TranscriptProvider.THREE_PLAY,
org=org,
edx_video_id=translation_process.video.studio_id,
file_id=translation_process.process_id,
log_prefix=log_prefix
)
if not three_play_secrets:
# Fail the process
translation_process.update(status=TranscriptStatus.FAILED)
continue
# Check transcript status
translation_status = get_translation_status(
three_play_secrets.api_key,
translation_process.process_id,
translation_process.translation_id,
translation_process.video.studio_id,
translation_process.lang_code,
)
if not translation_status:
# Fail the process
translation_process.update(status=TranscriptStatus.FAILED)
continue
# On a complete translation
if translation_status['state'] == COMPLETE:
# 1 - Fetch translation content from 3Play Media.
try:
srt_transcript = fetch_srt_data(
url=THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format(
file_id=translation_process.process_id, translation_id=translation_process.translation_id
),
apikey=three_play_secrets.api_key,
)
except TranscriptFetchError:
LOGGER.exception(
u'[3PlayMedia Task] Translation download failed for video=%s -- lang_code=%s -- process_id=%s.',
*log_args
)
continue
# 2 - Validate the translation's SRT content received from 3Play Media.
is_transcript_valid = validate_transcript_response(
edx_video_id=translation_process.video.studio_id,
file_id=translation_process.process_id,
transcript=srt_transcript,
lang_code=translation_process.lang_code,
log_prefix=log_prefix
)
if is_transcript_valid:
translation_process.update(status=TranscriptStatus.READY)
else:
translation_process.update(status=TranscriptStatus.FAILED)
continue
# 3 - Convert SRT translation to SJson format and upload it to S3.
try:
sjson_transcript = convert_srt_to_sjson(srt_transcript)
sjson_file = upload_sjson_to_s3(CONFIG, sjson_transcript)
except Exception:
# in case of any exception, log and raise.
LOGGER.exception(
u'[3PlayMedia Task] translation failed for video=%s -- lang_code=%s -- process_id=%s',
*log_args
)
raise
# 4 Update edx-val with completed transcript information
val_api = VALAPICall(video_proto=None, val_status=None)
val_api.update_val_transcript(
video_id=translation_process.video.studio_id,
lang_code=translation_process.lang_code,
name=sjson_file,
transcript_format=TRANSCRIPT_SJSON,
provider=TranscriptProvider.THREE_PLAY,
)
# 5 - if all the processes for this video are complete, update video status in edx-val
# update transcript status for video in edx-val as well as edx-video-pipeline.
video_jobs = TranscriptProcessMetadata.objects.filter(video__studio_id=translation_process.video.studio_id)
if all(video_job.status == TranscriptStatus.READY for video_job in video_jobs):
val_api.update_video_status(translation_process.video.studio_id, VideoStatus.TRANSCRIPT_READY)
from __future__ import absolute_import
import os
import sys
from celery import Celery
import yaml
from VEDA_OS01.transcripts import retrieve_three_play_translations
"""
Start Celery Worker
......@@ -51,6 +51,18 @@ app.conf.update(
CELERY_ACCEPT_CONTENT=['pickle', 'json', 'msgpack', 'yaml']
)
app.conf.beat_schedule = {
'check-3play-translations-every-30-seconds': {
'task': 'tasks.fetch_three_play_translations',
'schedule': 30.0,
},
}
@app.task(name='fetch_three_play_translations')
def fetch_three_play_translations():
retrieve_three_play_translations()
@app.task(name='worker_encode')
def worker_task_fire(veda_id, encode_profile, jobid):
......
"""
3PlayMedia transcription unit tests
"""
import json
import urllib
import responses
from ddt import ddt, data, unpack
from mock import patch
from django.test import TestCase
from control.veda_deliver_3play import (
ThreePlayMediaClient,
ThreePlayMediaUrlError,
ThreePlayMediaPerformTranscriptionError,
)
from VEDA_OS01.models import (
Course,
TranscriptProcessMetadata,
Video,
ThreePlayTurnaround,
)
VIDEO_DATA = {
'studio_id': '12345'
}
@ddt
class ThreePlayMediaClientTests(TestCase):
"""
3PlayMedia transcription tests
"""
def setUp(self):
"""
Tests setup
"""
self.course = Course.objects.create(
course_name=u'Intro to VEDA',
institution=u'MAx',
edx_classid=u'123'
)
self.video = Video.objects.create(
inst_class=self.course,
**VIDEO_DATA
)
self.video_transcript_preferences = {
'org': u'MAx',
'video': self.video,
'media_url': u'https://s3.amazonaws.com/bkt/video.mp4',
'api_key': u'insecure_api_key',
'api_secret': u'insecure_api_secret',
'turnaround_level': ThreePlayTurnaround.DEFAULT,
'callback_url': 'https://veda.edx.org/3playmedia/transcripts/handle/123123?org=MAx&edx_video_id=12345'
}
def assert_request(self, received_request, expected_request):
"""
Verify that `received_request` matches `expected_request`
"""
for request_attr in expected_request.keys():
if request_attr == 'headers':
expected_headers = expected_request[request_attr]
actual_headers = getattr(received_request, request_attr)
for attr, expect_value in expected_headers.iteritems():
self.assertEqual(actual_headers[attr], expect_value)
else:
self.assertEqual(getattr(received_request, request_attr), expected_request[request_attr])
@responses.activate
@patch('control.veda_deliver_3play.LOGGER')
def test_transcription_flow(self, mock_logger):
"""
Verify 3PlayMedia happy transcription flow
"""
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
responses.add(
responses.HEAD,
u'https://s3.amazonaws.com/bkt/video.mp4',
headers={'Content-Type': u'video/mp4'},
status=200,
)
responses.add(
responses.POST,
u'https://api.3playmedia.com/files',
body=u'111222',
status=200
)
three_play_client.generate_transcripts()
# Total of 2 HTTP requests are made
self.assertEqual(len(responses.calls), 2)
body = dict(
# Mandatory attributes required for transcription
link=self.video_transcript_preferences['media_url'],
apikey=self.video_transcript_preferences['api_key'],
api_secret_key=self.video_transcript_preferences['api_secret'],
turnaround_level=self.video_transcript_preferences['turnaround_level'],
callback_url=self.video_transcript_preferences['callback_url'],
)
expected_requests = [
{
'url': u'https://s3.amazonaws.com/bkt/video.mp4',
'body': None,
'method': 'HEAD',
},
{
'url': u'https://api.3playmedia.com/files',
'body': json.dumps(body),
'method': 'POST',
'headers': {'Content-Type': 'application/json'}
},
]
for position, expected_request in enumerate(expected_requests):
self.assert_request(responses.calls[position].request, expected_request)
self.assertEqual(TranscriptProcessMetadata.objects.count(), 1)
mock_logger.info.assert_called_with(
'[3PlayMedia] Transcription process has been started for video=%s, language=en.',
VIDEO_DATA['studio_id'],
)
@data(
{
'headers': {'Content-Type': u'video/mp4'},
'status': 400,
},
{
'headers': {'Content-Type': u'application/json'},
'status': 200,
}
)
@responses.activate
def test_validate_media_url(self, response):
"""
Tests media url validations.
"""
responses.add(responses.HEAD, u'https://s3.amazonaws.com/bkt/video.mp4', **response)
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
with self.assertRaises(ThreePlayMediaUrlError):
three_play_client.validate_media_url()
@data(
{
'body': None,
'status': 400,
},
{
'body': json.dumps({'iserror': True, 'error': 'Submission has failed'}),
'status': 200,
}
)
@responses.activate
def test_submit_media_exceptions(self, response):
"""
Tests media submission exceptions
"""
responses.add(
responses.HEAD,
u'https://s3.amazonaws.com/bkt/video.mp4',
headers={'Content-Type': u'video/mp4'},
status=200,
)
responses.add(responses.POST, u'https://api.3playmedia.com/files', **response)
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
with self.assertRaises(ThreePlayMediaPerformTranscriptionError):
three_play_client.submit_media()
@data(
(
{
'body': None,
'status': 400,
},
{
'body': '11111',
'status': 200,
},
),
(
{
'headers': {'Content-Type': u'video/mp4'},
'status': 200,
},
{
'body': None,
'status': 400,
},
)
)
@unpack
@responses.activate
@patch('control.veda_deliver_3play.LOGGER')
def test_generate_transcripts_exceptions(self, first_response, second_response, mock_log):
"""
Tests the proper exceptions during transcript generation.
"""
responses.add(responses.HEAD, u'https://s3.amazonaws.com/bkt/video.mp4', **first_response)
responses.add(responses.POST, u'https://api.3playmedia.com/files', **second_response)
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
three_play_client.generate_transcripts()
self.assertFalse(mock_log.info.called)
mock_log.exception.assert_called_with(
u'[3PlayMedia] Could not process transcripts for video=%s language=en.',
VIDEO_DATA['studio_id'],
)
self.assertEqual(TranscriptProcessMetadata.objects.count(), 0)
import datetime
import ftplib
import logging
import os
import shutil
import sys
from os.path import expanduser
import boto
......@@ -16,11 +13,12 @@ from boto.s3.key import Key
from django.core.urlresolvers import reverse
import veda_deliver_xuetang
from control.veda_deliver_3play import ThreePlayMediaClient
from control_env import *
from veda_deliver_cielo import Cielo24Transcript
from veda_deliver_youtube import DeliverYoutube
from VEDA_OS01 import utils
from VEDA_OS01.models import (TranscriptPreferences, TranscriptProvider,
from VEDA_OS01.models import (TranscriptCredentials, TranscriptProvider,
VideoStatus)
from VEDA_OS01.utils import build_url
from veda_utils import ErrorObject, Metadata, Output, VideoProto
......@@ -190,7 +188,6 @@ class VedaDelivery:
self._CLEANUP()
self._THREEPLAY_UPLOAD()
# Transcription Process
# We only want to generate transcripts for `desktop_mp4` profile.
if self.encode_profile == 'desktop_mp4' and self.video_query.process_transcription:
......@@ -203,7 +200,6 @@ class VedaDelivery:
if self.video_query.provider == TranscriptProvider.CIELO24:
self.cielo24_transcription_flow()
def _INFORM_INTAKE(self):
"""
Collect all salient metadata and
......@@ -538,8 +534,8 @@ class VedaDelivery:
org = utils.extract_course_org(self.video_proto.platform_course_url[0])
try:
api_key = TranscriptPreferences.objects.get(org=org, provider=self.video_query.provider).api_key
except TranscriptPreferences.DoesNotExist:
api_key = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider).api_key
except TranscriptCredentials.DoesNotExist:
LOGGER.warn('[cielo24] Unable to find api_key for org=%s', org)
return None
......@@ -574,47 +570,54 @@ class VedaDelivery:
)
cielo24.start_transcription_flow()
def _THREEPLAY_UPLOAD(self):
if self.video_query.inst_class.tp_proc is False:
return None
if self.video_query.inst_class.mobile_override is False:
if self.encode_profile != 'desktop_mp4':
return None
ftp1 = ftplib.FTP(
self.auth_dict['threeplay_ftphost']
)
user = self.video_query.inst_class.tp_username.strip()
passwd = self.video_query.inst_class.tp_password.strip()
def start_3play_transcription_process(self):
"""
3PlayMedia Transcription Flow
"""
try:
ftp1.login(user, passwd)
except:
ErrorObject.print_error(
message='3Play Authentication Failure'
# Picks the first course from the list as there may be multiple
# course runs in that list (i.e. all having the same org).
org = utils.extract_course_org(self.video_proto.platform_course_url[0])
transcript_secrets = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider)
# update transcript status for video in edx-val
VALAPICall(video_proto=None, val_status=None).update_video_status(
self.video_query.studio_id, VideoStatus.TRANSCRIPTION_IN_PROGRESS
)
try:
ftp1.cwd(
self.video_query.inst_class.tp_speed
# Initialize 3playMedia client and start transcription process
s3_video_url = build_url(
self.auth_dict['s3_base_url'],
self.auth_dict['edx_s3_endpoint_bucket'],
self.encoded_file
)
except:
ftp1.mkd(
self.video_query.inst_class.tp_speed
callback_url = build_url(
self.auth_dict['veda_base_url'],
reverse(
'3play_media_callback',
args=[self.auth_dict['transcript_provider_request_token']]
),
# Additional attributes that'll come back with the callback
org=org,
edx_video_id=self.video_query.studio_id,
)
ftp1.cwd(
self.video_query.inst_class.tp_speed
three_play_media = ThreePlayMediaClient(
org=org,
video=self.video_query,
media_url=s3_video_url,
api_key=transcript_secrets.api_key,
api_secret=transcript_secrets.api_secret,
callback_url=callback_url,
turnaround_level=self.video_query.three_play_turnaround,
)
os.chdir(self.node_work_directory)
ftp1.storbinary(
'STOR ' + self.encoded_file,
open(os.path.join(
self.node_work_directory,
self.encoded_file
), 'rb')
)
three_play_media.generate_transcripts()
os.chdir(homedir)
except TranscriptCredentials.DoesNotExist:
LOGGER.warning(
'Transcript preference is not found for provider=%s, video=%s',
self.video_query.provider,
self.video_query.studio_id,
)
def _XUETANG_ROUTE(self):
if self.video_query.inst_class.xuetang_proc is False:
......
"""
3PlayMedia Transcription Client
"""
import json
import logging
import requests
import sys
from requests.packages.urllib3.exceptions import InsecurePlatformWarning
from VEDA_OS01.models import TranscriptProcessMetadata, TranscriptProvider, TranscriptStatus
from VEDA_OS01.utils import build_url
requests.packages.urllib3.disable_warnings(InsecurePlatformWarning)
LOGGER = logging.getLogger(__name__)
class ThreePlayMediaError(Exception):
"""
An error that occurs during 3PlayMedia actions.
"""
pass
class ThreePlayMediaLanguageNotFoundError(ThreePlayMediaError):
"""
An error when language is not found in available 3playMedia languages.
"""
pass
class ThreePlayMediaPerformTranscriptionError(ThreePlayMediaError):
"""
An error occurred while adding media for transcription.
"""
pass
class ThreePlayMediaUrlError(ThreePlayMediaError):
"""
Occurs when the media url is either inaccessible or of invalid content type.
"""
pass
class ThreePlayMediaClient(object):
def __init__(self, org, video, media_url, api_key, api_secret, callback_url, turnaround_level):
"""
Initialize 3play media client
"""
self.org = org
self.video = video
self.media_url = media_url
self.api_key = api_key
self.api_secret = api_secret
self.callback_url = callback_url
self.turnaround_level = turnaround_level
# default attributes
self.base_url = u'https://api.3playmedia.com/'
self.upload_media_file_url = u'files/'
self.available_languages_url = u'caption_imports/available_languages/'
self.allowed_content_type = u'video/mp4'
def validate_media_url(self):
"""
Validates the media URL
Raises:
3PlayMediaUrlError: on invalid media url or content type
"""
if not self.media_url:
raise ThreePlayMediaUrlError('Invalid media URL "{media_url}".'.format(media_url=self.media_url))
response = requests.head(url=self.media_url)
if not response.ok:
raise ThreePlayMediaUrlError('The URL "{media_url}" is not Accessible.'.format(media_url=self.media_url))
elif response.headers['Content-Type'] != self.allowed_content_type:
raise ThreePlayMediaUrlError(
'Media content-type should be "{allowed_type}". URL was "{media_url}", content-type was "{type}"'.format(
allowed_type=self.allowed_content_type,
media_url=self.media_url,
type=response.headers['Content-Type'],
)
)
def submit_media(self):
"""
Submits the media to perform transcription.
Raises:
ThreePlayMediaPerformTranscriptionError: error while transcription process
"""
self.validate_media_url()
# Prepare requests payload
payload = dict(
# Mandatory attributes required for transcription
link=self.media_url,
apikey=self.api_key,
api_secret_key=self.api_secret,
turnaround_level=self.turnaround_level,
callback_url=self.callback_url,
)
upload_url = build_url(self.base_url, self.upload_media_file_url)
response = requests.post(url=upload_url, json=payload)
if not response.ok:
raise ThreePlayMediaPerformTranscriptionError(
'Upload file request failed with: {response} -- {status}'.format(
response=response.text, status=response.status_code
)
)
# A normal response should be a text containing file id and if we're getting a deserializable dict, there
# must be an error: http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods
if isinstance(json.loads(response.text), dict):
raise ThreePlayMediaPerformTranscriptionError(
'Expected file id but got: {response}'.format(response=response.text)
)
return response.text
def generate_transcripts(self):
"""
Kicks off transcription process for default language.
"""
try:
file_id = self.submit_media()
# Track progress of transcription process
TranscriptProcessMetadata.objects.create(
video=self.video,
process_id=file_id,
lang_code=u'en',
provider=TranscriptProvider.THREE_PLAY,
status=TranscriptStatus.IN_PROGRESS,
)
# Successfully kicked off transcription process for a video with the given language.
LOGGER.info(
'[3PlayMedia] Transcription process has been started for video=%s, language=en.',
self.video.studio_id,
)
except ThreePlayMediaError:
LOGGER.exception(
'[3PlayMedia] Could not process transcripts for video=%s language=en.',
self.video.studio_id,
)
except Exception:
LOGGER.exception(
'[3PlayMedia] Unexpected error while transcription for video=%s language=en .',
self.video.studio_id,
)
raise
def main():
pass
if __name__ == '__main__':
sys.exit(main())
......@@ -189,22 +189,22 @@ class Cielo24Transcript(object):
Returns:
cielo24 job id
"""
response = requests.get(
build_url(
self.cielo24_site,
self.cielo24_new_job,
v=1,
language='en',
api_token=self.api_key,
job_name=self.video.studio_id
)
create_job_url = build_url(
self.cielo24_site,
self.cielo24_new_job,
v=1,
language='en',
api_token=self.api_key,
job_name=self.video.studio_id
)
response = requests.get(create_job_url)
if not response.ok:
raise Cielo24CreateJobError(
'[CREATE JOB ERROR] status={} -- text={}'.format(
'[CREATE JOB ERROR] url={} -- status={} -- text={}'.format(
create_job_url,
response.status_code,
response.text
response.text,
)
)
......
......@@ -7,7 +7,7 @@ import boto.s3
from boto.exception import S3ResponseError, S3DataError
import yaml
from VEDA_OS01.models import TranscriptPreferences
from VEDA_OS01.models import TranscriptCredentials
from VEDA_OS01.utils import extract_course_org
try:
......@@ -237,12 +237,12 @@ class FileDiscovery(object):
# Make decision if this video needs the transcription as well.
try:
transcript_preferences = json.loads(transcript_preferences)
TranscriptPreferences.objects.get(
TranscriptCredentials.objects.get(
org=extract_course_org(course_url),
provider=transcript_preferences.get('provider')
)
process_transcription = True
except (TypeError, TranscriptPreferences.DoesNotExist):
except (TypeError, TranscriptCredentials.DoesNotExist):
# when the preferences are not set OR these are set to some data in invalid format OR these don't
# have associated 3rd party transcription provider API keys.
process_transcription = False
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment