Commit c0977ca6 by Qubad786 Committed by muzaffaryousaf

Add video source/speech language for 3PlayMedia.

parent 01fb6fac
......@@ -3,7 +3,7 @@
*.pyc
static/admin/
static/
sandbox.db
......
# -*- coding: utf-8 -*-
# Generated by Django 1.9 on 2017-09-13 07:53
# Generated by Django 1.9 on 2017-10-03 10:22
from __future__ import unicode_literals
import VEDA_OS01.models
......@@ -40,7 +40,7 @@ class Migration(migrations.Migration):
('provider', models.CharField(choices=[(b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')], max_length=50, verbose_name=b'Transcript provider')),
('process_id', models.CharField(max_length=255, verbose_name=b'Process id')),
('translation_id', models.CharField(blank=True, help_text=b'Keeps track of 3Play Translations', max_length=255, null=True, verbose_name=b'Translation id')),
('lang_code', models.CharField(max_length=8, verbose_name=b'Language code')),
('lang_code', models.CharField(max_length=50, verbose_name=b'Language code')),
('status', models.CharField(choices=[(b'PENDING', b'PENDING'), (b'IN PROGRESS', b'IN PROGRESS'), (b'FAILED', b'FAILED'), (b'READY', b'READY')], default=b'PENDING', max_length=50, verbose_name=b'Transcript status')),
],
options={
......@@ -75,6 +75,11 @@ class Migration(migrations.Migration):
),
migrations.AddField(
model_name='video',
name='source_language',
field=models.CharField(blank=True, help_text=b'This is only needed with 3Play Media Provider', max_length=50, null=True, verbose_name=b'video source language'),
),
migrations.AddField(
model_name='video',
name='three_play_turnaround',
field=models.CharField(blank=True, choices=[(b'extended_service', b'10-Day/Extended'), (b'default', b'4-Day/Default'), (b'expedited_service', b'2-Day/Expedited'), (b'rush_service', b'24 hour/Rush'), (b'same_day_service', b'Same Day')], max_length=20, null=True, verbose_name=b'3PlayMedia Turnaround'),
),
......
......@@ -469,6 +469,13 @@ class Video (models.Model):
null=True,
blank=True,
)
source_language = models.CharField(
'video source language',
help_text='This is only needed with 3Play Media Provider',
max_length=50,
null=True,
blank=True,
)
preferred_languages = ListField(blank=True, default=[])
class Meta:
......@@ -650,7 +657,7 @@ class TranscriptProcessMetadata(TimeStampedModel):
translation_id = models.CharField(
'Translation id', help_text='Keeps track of 3Play Translations', max_length=255, null=True, blank=True
)
lang_code = models.CharField('Language code', max_length=8)
lang_code = models.CharField('Language code', max_length=50)
status = models.CharField(
'Transcript status',
max_length=50,
......
......@@ -329,6 +329,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
self.org = u'MAx'
self.file_id = u'112233'
self.video_source_language = u'en'
self.edx_video_id = VIDEO_DATA['studio_id']
self.url = reverse('3play_media_callback', args=[CONFIG_DATA['transcript_provider_request_token']])
......@@ -341,6 +342,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
)
self.video = Video.objects.create(
inst_class=self.course,
source_language=self.video_source_language,
**VIDEO_DATA
)
......@@ -386,7 +388,10 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
"""
response = self.client.post(
# `build_url` strips `/`, putting it back and add necessary query params.
'/{}'.format(utils.build_url(self.url, edx_video_id=self.video.studio_id, org=self.org)),
'/{}'.format(utils.build_url(
self.url, edx_video_id=self.video.studio_id,
org=self.org, lang_code=self.video_source_language
)),
content_type='application/x-www-form-urlencoded',
data=urllib.urlencode(dict(file_id=self.file_id, status=state))
)
......@@ -446,7 +451,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
# Assert the logs
required_attrs = ['file_id', 'status', 'org', 'edx_video_id']
required_attrs = ['file_id', 'lang_code', 'status', 'org', 'edx_video_id']
received_attrs = request_data['data'].keys() + request_data['query_params'].keys()
missing_attrs = [attr for attr in required_attrs if attr not in received_attrs]
mock_logger.warning.assert_called_with(
......@@ -475,7 +480,9 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
"""
Tests the callback for all the non-success statuses.
"""
self.url = '/{}'.format(utils.build_url(self.url, edx_video_id='12345', org='MAx'))
self.url = '/{}'.format(utils.build_url(
self.url, edx_video_id='12345', org='MAx', lang_code=self.video_source_language
))
self.client.post(self.url, content_type='application/x-www-form-urlencoded', data=urllib.urlencode({
'file_id': self.file_id,
'status': state,
......@@ -993,8 +1000,8 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
'id': 30,
'source_language_name': 'English',
'source_language_iso_639_1_code': 'en',
'target_language_name': 'Romanian',
'target_language_iso_639_1_code': 'da',
'target_language_name': 'German',
'target_language_iso_639_1_code': 'de',
'service_level': 'standard',
'per_word_rate': 0.16
}]
......@@ -1005,7 +1012,9 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
{
'method': 'error',
'args': (
'[3PlayMedia Callback] No translation service found for target language %s -- process id %s',
'[3PlayMedia Callback] No translation service found for '
'source language "%s" target language "%s" -- process id %s',
'en',
'ro',
'112233',
)
......
......@@ -333,7 +333,7 @@ class ThreePlayMediaCallbackHandlerView(APIView):
"""
Handle 3PlayMedia callback request.
"""
required_attrs = ['file_id', 'status', 'org', 'edx_video_id']
required_attrs = ['file_id', 'lang_code', 'status', 'org', 'edx_video_id']
received_attributes = request.data.keys() + request.query_params.keys()
missing = [attr for attr in required_attrs if attr not in received_attributes]
if missing:
......@@ -350,7 +350,7 @@ class ThreePlayMediaCallbackHandlerView(APIView):
sender=self,
org=request.query_params['org'],
edx_video_id=request.query_params['edx_video_id'],
lang_code='en',
lang_code=request.query_params['lang_code'],
file_id=request.data['file_id'],
status=request.data['status'],
# Following is going to be an error description if an error occurs during
......@@ -392,12 +392,13 @@ def get_translation_services(api_key):
return available_services
def get_standard_translation_service(translation_services, target_language):
def get_standard_translation_service(translation_services, source_language, target_language):
"""
Get standard translation service
Arguments:
translation_services(list): List of available 3play media translation services.
target_language(str): A language code whose standard translation service is needed.
source_language(unicode): A language code for video source/speech language.
target_language(unicode): A language code whose standard translation service is needed.
Returns:
A translation service id or None.
......@@ -405,6 +406,7 @@ def get_standard_translation_service(translation_services, target_language):
translation_service_id = None
for service in translation_services:
service_found = (
service['source_language_iso_639_1_code'] == source_language and
service['target_language_iso_639_1_code'] == target_language and
service['service_level'] == 'standard'
)
......@@ -455,7 +457,7 @@ def place_translation_order(api_key, api_secret, translation_service_id, target_
return translation_order
def order_translations(file_id, api_key, api_secret, target_languages):
def order_translations(file_id, api_key, api_secret, source_language, target_languages):
"""
Order translations on 3PlayMedia for all the target languages.
......@@ -472,6 +474,7 @@ def order_translations(file_id, api_key, api_secret, target_languages):
file_id(unicode): File identifier
api_key(unicode): API key
api_secret(unicode): API Secret
source_language(unicode): video source/speech language code
target_languages(list): List of language codes
Raises:
......@@ -508,12 +511,14 @@ def order_translations(file_id, api_key, api_secret, target_languages):
continue
# 2 - Find a standard service for translation for the target language.
translation_service_id = get_standard_translation_service(available_services, target_language)
translation_service_id = get_standard_translation_service(available_services, source_language, target_language)
if translation_service_id is None:
# Fail the process
translation_process.update(status=TranscriptStatus.FAILED)
LOGGER.error(
u'[3PlayMedia Callback] No translation service found for target language %s -- process id %s',
u'[3PlayMedia Callback] No translation service found for source language "%s" '
u'target language "%s" -- process id %s',
source_language,
target_language,
file_id,
)
......@@ -719,7 +724,13 @@ def three_play_transcription_callback(sender, **kwargs):
# Order translations for target languages
try:
order_translations(file_id, transcript_secrets.api_key, transcript_secrets.api_secret, target_languages)
order_translations(
file_id,
transcript_secrets.api_key,
transcript_secrets.api_secret,
source_language=lang_code,
target_languages=target_languages
)
except TranscriptTranslationError:
LOGGER.exception(
u'[3PlayMedia Callback] Translation could not be performed - video=%s, lang_code=%s, file_id=%s.',
......
......@@ -2,11 +2,10 @@
3PlayMedia transcription unit tests
"""
import json
import urllib
import responses
from ddt import ddt, data, unpack
from mock import patch
from mock import Mock, patch
from django.test import TestCase
from control.veda_deliver_3play import (
......@@ -20,9 +19,11 @@ from VEDA_OS01.models import (
Video,
ThreePlayTurnaround,
)
from VEDA_OS01.utils import build_url
VIDEO_DATA = {
'studio_id': '12345'
'studio_id': '12345',
'source_language': 'en'
}
......@@ -53,11 +54,16 @@ class ThreePlayMediaClientTests(TestCase):
'api_key': u'insecure_api_key',
'api_secret': u'insecure_api_secret',
'turnaround_level': ThreePlayTurnaround.DEFAULT,
'callback_url': 'https://veda.edx.org/3playmedia/transcripts/handle/123123?org=MAx&edx_video_id=12345',
'callback_url': build_url(
u'https://veda.edx.org/3playmedia/transcripts/handle/123123',
org=u'MAx',
edx_video_id=VIDEO_DATA['studio_id'],
lang_code=VIDEO_DATA['source_language'],
),
'three_play_api_base_url': 'https://api.3playmedia.com/',
}
def assert_request(self, received_request, expected_request):
def assert_request(self, received_request, expected_request, decode_func):
"""
Verify that `received_request` matches `expected_request`
"""
......@@ -67,6 +73,8 @@ class ThreePlayMediaClientTests(TestCase):
actual_headers = getattr(received_request, request_attr)
for attr, expect_value in expected_headers.iteritems():
self.assertEqual(actual_headers[attr], expect_value)
elif request_attr == 'body' and decode_func:
self.assertDictEqual(decode_func(received_request.body), expected_request[request_attr])
else:
self.assertEqual(getattr(received_request, request_attr), expected_request[request_attr])
......@@ -86,6 +94,16 @@ class ThreePlayMediaClientTests(TestCase):
)
responses.add(
responses.GET,
u'https://api.3playmedia.com/caption_imports/available_languages',
body=json.dumps([{
"iso_639_1_code": "en",
"language_id": 1,
}]),
status=200,
)
responses.add(
responses.POST,
u'https://api.3playmedia.com/files',
body=u'111222',
......@@ -94,8 +112,8 @@ class ThreePlayMediaClientTests(TestCase):
three_play_client.generate_transcripts()
# Total of 2 HTTP requests are made
self.assertEqual(len(responses.calls), 2)
# Total of 3 HTTP requests are made
self.assertEqual(len(responses.calls), 3)
body = dict(
# Mandatory attributes required for transcription
......@@ -104,6 +122,7 @@ class ThreePlayMediaClientTests(TestCase):
api_secret_key=self.video_transcript_preferences['api_secret'],
turnaround_level=self.video_transcript_preferences['turnaround_level'],
callback_url=self.video_transcript_preferences['callback_url'],
language_id=1,
)
expected_requests = [
......@@ -113,21 +132,32 @@ class ThreePlayMediaClientTests(TestCase):
'method': 'HEAD',
},
{
'url': u'https://api.3playmedia.com/caption_imports/available_languages?apikey=insecure_api_key',
'body': None,
'method': 'GET',
},
{
'url': u'https://api.3playmedia.com/files',
'body': json.dumps(body),
'body': body,
'method': 'POST',
'headers': {'Content-Type': 'application/json'}
'headers': {'Content-Type': 'application/json'},
'decode_func': json.loads
},
]
for position, expected_request in enumerate(expected_requests):
self.assert_request(responses.calls[position].request, expected_request)
self.assert_request(
received_request=responses.calls[position].request,
expected_request=expected_request,
decode_func=expected_request.pop('decode_func', None)
)
self.assertEqual(TranscriptProcessMetadata.objects.count(), 1)
mock_logger.info.assert_called_with(
'[3PlayMedia] Transcription process has been started for video=%s, language=en.',
'[3PlayMedia] Transcription process has been started for video=%s, source_language=%s.',
VIDEO_DATA['studio_id'],
VIDEO_DATA['source_language'],
)
@data(
......@@ -171,6 +201,13 @@ class ThreePlayMediaClientTests(TestCase):
headers={'Content-Type': u'video/mp4'},
status=200,
)
responses.add(responses.GET, u'https://api.3playmedia.com/caption_imports/available_languages', **{
'status': 200,
'body': json.dumps([{
"iso_639_1_code": "en",
"language_id": 1,
}])
})
responses.add(responses.POST, u'https://api.3playmedia.com/files', **response)
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
......@@ -179,41 +216,129 @@ class ThreePlayMediaClientTests(TestCase):
@data(
(
# Error
{
'body': None,
'status': 400,
},
# Success
{
'body': '[{"iso_639_1_code": "en", "language_id": 1}]',
'status': 200,
},
# Success
{
'body': '11111',
'status': 200,
},
),
(
# Success
{
'headers': {'Content-Type': u'video/mp4'},
'status': 200,
},
# Error
{
'body': None,
'status': 400,
},
# Success
{
'body': '11111',
'status': 200,
},
),
(
# Success
{
'headers': {'Content-Type': u'video/mp4'},
'status': 200,
},
# Error
{
'body': '{"error": "unauthorized"}',
'status': 200,
},
# Success
{
'body': '11111',
'status': 200,
},
),
(
# Success
{
'headers': {'Content-Type': u'video/mp4'},
'status': 200,
},
# Success
{
'body': '[{"iso_639_1_code": "en", "language_id": 1}]',
'status': 200,
},
# Error
{
'body': None,
'status': 400,
},
),
(
# Success
{
'headers': {'Content-Type': u'video/mp4'},
'status': 200,
},
# Success
{
'body': '[{"iso_639_1_code": "en", "language_id": 1}]',
'status': 200,
},
# Error
{
'body': '{"error": "unauthorized"}',
'status': 200,
},
)
)
@unpack
@responses.activate
@patch('control.veda_deliver_3play.LOGGER')
def test_generate_transcripts_exceptions(self, first_response, second_response, mock_log):
def test_generate_transcripts_exceptions(self, first_response, second_response, third_response, mock_log):
"""
Tests the proper exceptions during transcript generation.
"""
responses.add(responses.HEAD, u'https://s3.amazonaws.com/bkt/video.mp4', **first_response)
responses.add(responses.POST, u'https://api.3playmedia.com/files', **second_response)
responses.add(
responses.GET, u'https://api.3playmedia.com/caption_imports/available_languages', **second_response
)
responses.add(responses.POST, u'https://api.3playmedia.com/files', **third_response)
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
three_play_client.generate_transcripts()
self.assertFalse(mock_log.info.called)
mock_log.exception.assert_called_with(
u'[3PlayMedia] Could not process transcripts for video=%s language=en.',
u'[3PlayMedia] Could not process transcripts for video=%s source_language=%s.',
VIDEO_DATA['studio_id'],
VIDEO_DATA['source_language'],
)
self.assertEqual(TranscriptProcessMetadata.objects.count(), 0)
@patch('control.veda_deliver_3play.LOGGER')
@patch('control.veda_deliver_3play.ThreePlayMediaClient.submit_media', Mock(side_effect=ValueError))
def test_generate_transcripts_unknown_exceptions(self, mock_log):
"""
Verify that the unknown exceptions are logged during transcript generation.
"""
three_play_client = ThreePlayMediaClient(**self.video_transcript_preferences)
with self.assertRaises(ValueError):
three_play_client.generate_transcripts()
self.assertFalse(mock_log.info.called)
mock_log.exception.assert_called_with(
u'[3PlayMedia] Unexpected error while transcription for video=%s source_language=%s.',
VIDEO_DATA['studio_id'],
VIDEO_DATA['source_language'],
)
self.assertEqual(TranscriptProcessMetadata.objects.count(), 0)
......@@ -612,6 +612,7 @@ class VedaDelivery:
# Additional attributes that'll come back with the callback
org=org,
edx_video_id=self.video_query.studio_id,
lang_code=self.video_query.source_language,
)
three_play_media = ThreePlayMediaClient(
org=org,
......
......@@ -43,6 +43,13 @@ class ThreePlayMediaUrlError(ThreePlayMediaError):
pass
class ThreePlayMediaLanguagesRetrievalError(ThreePlayMediaError):
"""
An error Occurred while retrieving available 3PlayMedia languages.
"""
pass
class ThreePlayMediaClient(object):
def __init__(
......@@ -94,6 +101,41 @@ class ThreePlayMediaClient(object):
)
)
def get_available_languages(self):
"""
Gets all the 3Play Media supported languages
"""
response = requests.get(url=build_url(self.base_url, self.available_languages_url, apikey=self.api_key))
if not response.ok:
raise ThreePlayMediaLanguagesRetrievalError(
'Error while retrieving available languages: {response} -- {status}'.format(
response=response.text, status=response.status_code
)
)
# A normal response should be a list containing 3Play Media supported languages and if we're getting a dict,
# there must be an error: https://support.3playmedia.com/hc/en-us/articles/227729968-Captions-Imports-API
available_languages = json.loads(response.text)
if isinstance(available_languages, dict):
raise ThreePlayMediaLanguagesRetrievalError(
'Expected 3Play Media Supported languages but got: {response}'.format(response=response.text)
)
return available_languages
def get_source_language_id(self, languages, source_language_code):
"""
Extracts language id for a language that matches `source_language_code`
from the given 3Play Media languages.
Arguments:
languages(list): 3PlayMedia supported languages.
source_language_code(unicode): A video source language code whose 3Play language id is required.
"""
for language in languages:
if language['iso_639_1_code'] == source_language_code:
return language['language_id']
def submit_media(self):
"""
Submits the media to perform transcription.
......@@ -111,6 +153,12 @@ class ThreePlayMediaClient(object):
turnaround_level=self.turnaround_level,
callback_url=self.callback_url,
)
available_languages = self.get_available_languages()
source_language_id = self.get_source_language_id(available_languages, self.video.source_language)
if source_language_id:
payload['language_id'] = source_language_id
upload_url = build_url(self.base_url, self.upload_media_file_url)
response = requests.post(url=upload_url, json=payload)
......@@ -140,24 +188,27 @@ class ThreePlayMediaClient(object):
TranscriptProcessMetadata.objects.create(
video=self.video,
process_id=file_id,
lang_code=u'en',
lang_code=self.video.source_language,
provider=TranscriptProvider.THREE_PLAY,
status=TranscriptStatus.IN_PROGRESS,
)
# Successfully kicked off transcription process for a video with the given language.
LOGGER.info(
'[3PlayMedia] Transcription process has been started for video=%s, language=en.',
'[3PlayMedia] Transcription process has been started for video=%s, source_language=%s.',
self.video.studio_id,
self.video.source_language,
)
except ThreePlayMediaError:
LOGGER.exception(
'[3PlayMedia] Could not process transcripts for video=%s language=en.',
'[3PlayMedia] Could not process transcripts for video=%s source_language=%s.',
self.video.studio_id,
self.video.source_language,
)
except Exception:
LOGGER.exception(
'[3PlayMedia] Unexpected error while transcription for video=%s language=en .',
'[3PlayMedia] Unexpected error while transcription for video=%s source_language=%s.',
self.video.studio_id,
self.video.source_language,
)
raise
......
......@@ -265,6 +265,7 @@ class FileDiscovery(object):
'cielo24_turnaround': transcript_preferences.get('cielo24_turnaround'),
'cielo24_fidelity': transcript_preferences.get('cielo24_fidelity'),
'preferred_languages': transcript_preferences.get('preferred_languages'),
'source_language': transcript_preferences.get('video_source_language'),
})
ingest = VedaIngest(
......
......@@ -70,6 +70,7 @@ class VideoProto():
self.cielo24_turnaround = kwargs.get('cielo24_turnaround', None)
self.cielo24_fidelity = kwargs.get('cielo24_fidelity', None)
self.preferred_languages = kwargs.get('preferred_languages', [])
self.source_language = kwargs.get('source_language', None)
# Determined Attributes
self.valid = False
......@@ -343,6 +344,7 @@ class VedaIngest:
v1.cielo24_turnaround = self.video_proto.cielo24_turnaround
v1.cielo24_fidelity = self.video_proto.cielo24_fidelity
v1.preferred_languages = self.video_proto.preferred_languages
v1.source_language = self.video_proto.source_language
"""
Files Below are all valid
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment