Commit 9bda5dca by M. Rehan Committed by GitHub

Merge pull request #44 from edx/mrehan/3play-translations-endpoint-fix

3Play Media translation process improvements
parents 58a52c63 5441feb6
# -*- coding: utf-8 -*-
# Generated by Django 1.9 on 2017-10-12 12:03
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('VEDA_OS01', '0002_auto_20171009_1054'),
]
operations = [
migrations.AlterField(
model_name='transcriptprocessmetadata',
name='video',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='transcript_processes', to='VEDA_OS01.Video'),
),
]
...@@ -6,6 +6,8 @@ import uuid ...@@ -6,6 +6,8 @@ import uuid
from django.db import models from django.db import models
from model_utils.models import TimeStampedModel from model_utils.models import TimeStampedModel
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import CourseKey
def _createHex(): def _createHex():
return uuid.uuid1().hex return uuid.uuid1().hex
...@@ -380,6 +382,23 @@ class Course (models.Model): ...@@ -380,6 +382,23 @@ class Course (models.Model):
unique=True unique=True
) )
@property
def org(self):
"""
Get course organization.
"""
org = None
course_runs = self.local_storedir
if course_runs:
course_id = course_runs.split(',')[0]
# Extract course organization.
try:
org = CourseKey.from_string(course_id).org
except InvalidKeyError:
pass
return org
def __unicode__(self): def __unicode__(self):
return u'{institution} {edx_class_id} {course_name}'.format( return u'{institution} {edx_class_id} {course_name}'.format(
institution=self.institution, institution=self.institution,
...@@ -660,7 +679,7 @@ class TranscriptProcessMetadata(TimeStampedModel): ...@@ -660,7 +679,7 @@ class TranscriptProcessMetadata(TimeStampedModel):
""" """
Model to contain third party transcript process metadata. Model to contain third party transcript process metadata.
""" """
video = models.ForeignKey(Video) video = models.ForeignKey(Video, related_name='transcript_processes')
provider = models.CharField('Transcript provider', max_length=50, choices=TranscriptProvider.CHOICES) provider = models.CharField('Transcript provider', max_length=50, choices=TranscriptProvider.CHOICES)
process_id = models.CharField('Process id', max_length=255) process_id = models.CharField('Process id', max_length=255)
translation_id = models.CharField( translation_id = models.CharField(
......
...@@ -354,6 +354,8 @@ class ThreePlayTranscriptionCallbackTest(APITestCase): ...@@ -354,6 +354,8 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
self.video = Video.objects.create( self.video = Video.objects.create(
inst_class=self.course, inst_class=self.course,
source_language=self.video_source_language, source_language=self.video_source_language,
provider=TranscriptProvider.THREE_PLAY,
transcript_status=TranscriptStatus.IN_PROGRESS,
**VIDEO_DATA **VIDEO_DATA
) )
...@@ -408,6 +410,32 @@ class ThreePlayTranscriptionCallbackTest(APITestCase): ...@@ -408,6 +410,32 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
) )
return response return response
def setup_translations_prereqs(self, file_id, translation_lang_map, preferred_languages):
"""
Sets up pre-requisites for 3Play Media translations retrieval process.
"""
# Update preferred languages.
self.video.preferred_languages = preferred_languages
self.video.save()
# Assumes the speech transcript is ready.
TranscriptProcessMetadata.objects.filter(
process_id=self.file_id,
lang_code=self.video_source_language,
).update(status=TranscriptStatus.READY)
# Create translation processes and set their statuses to 'IN PROGRESS'.
for target_language, translation_id in translation_lang_map.iteritems():
# Create translation processes for all the target languages.
TranscriptProcessMetadata.objects.create(
video=self.video,
provider=TranscriptProvider.THREE_PLAY,
process_id=file_id,
translation_id=translation_id,
lang_code=target_language,
status=TranscriptStatus.IN_PROGRESS,
)
def assert_request(self, received_request, expected_request, decode_func): def assert_request(self, received_request, expected_request, decode_func):
""" """
Verify that `received_request` matches `expected_request` Verify that `received_request` matches `expected_request`
...@@ -1086,41 +1114,26 @@ class ThreePlayTranscriptionCallbackTest(APITestCase): ...@@ -1086,41 +1114,26 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
# Setup an S3 bucket # Setup an S3 bucket
connection = self.setup_s3_bucket() connection = self.setup_s3_bucket()
# Setup translation processes # Setup translations
mock_translations = { translations_lang_map = {
'ro': '1z2x3c', 'ro': '1z2x3c',
'da': '1q2w3e', 'da': '1q2w3e',
} }
self.video.preferred_languages = ['en', 'ro', 'da'] self.setup_translations_prereqs(
self.video.save() file_id=self.file_id,
translation_lang_map=translations_lang_map,
# Assume the speech transcript is ready. preferred_languages=['en', 'ro', 'da']
TranscriptProcessMetadata.objects.filter( )
process_id=self.file_id,
lang_code='en'
).update(status=TranscriptStatus.READY)
# in progress translation processes (which will normally be done by the callback)
for lang_code, translation_id in mock_translations.iteritems():
TranscriptProcessMetadata.objects.create(
video=self.video,
provider=TranscriptProvider.THREE_PLAY,
process_id=self.file_id,
translation_id=translation_id,
lang_code=lang_code,
status=TranscriptStatus.IN_PROGRESS,
)
# Setup mock responses # Setup mock responses
for __, translation_id in mock_translations.iteritems(): translation_status_mock_response = []
responses.add( for target_language, translation_id in translations_lang_map.iteritems():
responses.GET, translation_status_mock_response.append({
transcripts.THREE_PLAY_TRANSLATION_STATUS_URL.format( 'id': translation_id,
file_id=self.file_id, translation_id=translation_id 'source_language_iso_639_1_code': 'en',
), 'target_language_iso_639_1_code': target_language,
json.dumps({'state': 'complete'}), 'state': 'complete'
status=200 })
)
responses.add( responses.add(
responses.GET, responses.GET,
...@@ -1136,29 +1149,42 @@ class ThreePlayTranscriptionCallbackTest(APITestCase): ...@@ -1136,29 +1149,42 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
responses.add(responses.POST, CONFIG_DATA['val_transcript_create_url'], status=200) responses.add(responses.POST, CONFIG_DATA['val_transcript_create_url'], status=200)
responses.add(responses.PATCH, CONFIG_DATA['val_video_transcript_status_url'], status=200) responses.add(responses.PATCH, CONFIG_DATA['val_video_transcript_status_url'], status=200)
responses.add(
responses.GET,
transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id=self.file_id),
json.dumps(translation_status_mock_response),
status=200
)
# Call to retrieve translations # Call to retrieve translations
transcripts.retrieve_three_play_translations() transcripts.retrieve_three_play_translations()
# Total HTTP requests, 4 for first translation and 4 for second translation and 1 for updating video status. # Total HTTP requests, 1 for retrieving translations metadata, 3 for first translation and
self.assertEqual(len(responses.calls), 9) # 3 for second translation and 1 for updating video status.
self.assertEqual(len(responses.calls), 8)
position = 0 # Assert that the first request was made for getting translations metadata from 3Play Media.
for lang_code, translation_id in mock_translations.iteritems(): expected_video_status_update_request = {
'url': utils.build_url(
transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id=self.file_id),
apikey=self.transcript_prefs.api_key
)
}
self.assert_request(
responses.calls[0].request,
expected_video_status_update_request,
decode_func=json.loads,
)
position = 1
for lang_code, translation_id in translations_lang_map.iteritems():
expected_requests = [ expected_requests = [
# request - 1 # request - 1
{ {
'url': utils.build_url(transcripts.THREE_PLAY_TRANSLATION_STATUS_URL.format(
file_id=self.file_id, translation_id=translation_id
), apikey=self.transcript_prefs.api_key)
},
# request - 2
{
'url': utils.build_url(transcripts.THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format( 'url': utils.build_url(transcripts.THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format(
file_id=self.file_id, translation_id=translation_id file_id=self.file_id, translation_id=translation_id
), apikey=self.transcript_prefs.api_key) ), apikey=self.transcript_prefs.api_key)
}, },
# request - 3 # request - 2
{ {
'url': CONFIG_DATA['val_token_url'], 'url': CONFIG_DATA['val_token_url'],
'body': { 'body': {
...@@ -1170,7 +1196,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase): ...@@ -1170,7 +1196,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
}, },
'decode_func': urlparse.parse_qs, 'decode_func': urlparse.parse_qs,
}, },
# request - 4 # request - 3
{ {
'url': CONFIG_DATA['val_transcript_create_url'], 'url': CONFIG_DATA['val_transcript_create_url'],
'body': { 'body': {
...@@ -1240,9 +1266,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase): ...@@ -1240,9 +1266,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
[ [
{ {
'method': responses.GET, 'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_STATUS_URL.format( 'url': transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id='112233'),
file_id='112233', translation_id='1q2w3e'
),
'body': 'Your request was invalid.', 'body': 'Your request was invalid.',
'status': 400, 'status': 400,
} }
...@@ -1250,10 +1274,8 @@ class ThreePlayTranscriptionCallbackTest(APITestCase): ...@@ -1250,10 +1274,8 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
{ {
'method': 'error', 'method': 'error',
'args': ( 'args': (
'[3PlayMedia Task] Translation status request failed for video=%s -- lang_code=%s -- ' '[3PlayMedia Task] Translations metadata request failed for video=%s -- process_id=%s -- status=%s',
'process_id=%s -- status=%s',
VIDEO_DATA['studio_id'], VIDEO_DATA['studio_id'],
'ro',
'112233', '112233',
400, 400,
) )
...@@ -1265,9 +1287,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase): ...@@ -1265,9 +1287,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
[ [
{ {
'method': responses.GET, 'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_STATUS_URL.format( 'url': transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id='112233'),
file_id='112233', translation_id='1q2w3e'
),
'body': json.dumps({'iserror': True}), 'body': json.dumps({'iserror': True}),
'status': 200, 'status': 200,
} }
...@@ -1275,10 +1295,9 @@ class ThreePlayTranscriptionCallbackTest(APITestCase): ...@@ -1275,10 +1295,9 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
{ {
'method': 'error', 'method': 'error',
'args': ( 'args': (
'[3PlayMedia Task] unable to get translation status for ' '[3PlayMedia Task] unable to get translations metadata for video=%s -- '
'video=%s -- lang_code=%s -- process_id=%s -- response=%s', 'process_id=%s -- response=%s',
VIDEO_DATA['studio_id'], VIDEO_DATA['studio_id'],
'ro',
'112233', '112233',
json.dumps({'iserror': True}), json.dumps({'iserror': True}),
) )
...@@ -1290,12 +1309,13 @@ class ThreePlayTranscriptionCallbackTest(APITestCase): ...@@ -1290,12 +1309,13 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
[ [
{ {
'method': responses.GET, 'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_STATUS_URL.format( 'url': transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id='112233'),
file_id='112233', translation_id='1q2w3e' 'body': json.dumps([{
), 'id': '1q2w3e',
'body': json.dumps({ 'source_language_iso_639_1_code': 'en',
'target_language_iso_639_1_code': 'ro',
'state': 'complete' 'state': 'complete'
}), }]),
'status': 200, 'status': 200,
}, },
{ {
...@@ -1324,12 +1344,13 @@ class ThreePlayTranscriptionCallbackTest(APITestCase): ...@@ -1324,12 +1344,13 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
[ [
{ {
'method': responses.GET, 'method': responses.GET,
'url': transcripts.THREE_PLAY_TRANSLATION_STATUS_URL.format( 'url': transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id='112233'),
file_id='112233', translation_id='1q2w3e' 'body': json.dumps([{
), 'id': '1q2w3e',
'body': json.dumps({ 'source_language_iso_639_1_code': 'en',
'target_language_iso_639_1_code': 'ro',
'state': 'complete' 'state': 'complete'
}), }]),
'status': 200, 'status': 200,
}, },
{ {
...@@ -1366,17 +1387,10 @@ class ThreePlayTranscriptionCallbackTest(APITestCase): ...@@ -1366,17 +1387,10 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
""" """
# Setup translation processes # Setup translation processes
translation_id = '1q2w3e' translation_id = '1q2w3e'
self.video.preferred_languages = ['en', 'ro'] self.setup_translations_prereqs(
self.video.save() file_id=self.file_id,
translation_lang_map={'ro': translation_id},
# in progress translation processes (i.e. this was done as a part of callback) preferred_languages=['en', 'ro']
TranscriptProcessMetadata.objects.create(
video=self.video,
provider=TranscriptProvider.THREE_PLAY,
process_id=self.file_id,
translation_id=translation_id,
lang_code='ro',
status=TranscriptStatus.IN_PROGRESS,
) )
for response in mock_responses: for response in mock_responses:
...@@ -1391,9 +1405,150 @@ class ThreePlayTranscriptionCallbackTest(APITestCase): ...@@ -1391,9 +1405,150 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
# Assert the transcript translation process # Assert the transcript translation process
self.assertEqual( self.assertEqual(
TranscriptProcessMetadata.objects.get( TranscriptProcessMetadata.objects.get(
process_id=self.file_id,
provider=TranscriptProvider.THREE_PLAY, provider=TranscriptProvider.THREE_PLAY,
process_id=self.file_id,
translation_id=translation_id,
lang_code='ro' lang_code='ro'
).status, ).status,
transcript_status, transcript_status,
) )
@patch('VEDA_OS01.transcripts.LOGGER')
@patch('VEDA_OS01.transcripts.convert_srt_to_sjson', Mock(side_effect=ValueError))
def test_translations_retrieval_uncaught_exceptions(self, mock_logger):
"""
Test that `convert_to_sjson_and_upload_to_s3` logs and throws any uncaught exceptions
during translation retrieval process.
"""
with self.assertRaises(ValueError):
transcripts.convert_to_sjson_and_upload_to_s3(
srt_transcript='invalid SRT content}',
edx_video_id=self.video.studio_id,
file_id=self.file_id,
target_language='es'
)
mock_logger.exception.assert_called_with(
u'[3PlayMedia Task] translation failed for video=%s -- lang_code=%s -- process_id=%s',
self.video.studio_id,
self.file_id,
'es',
)
@patch('VEDA_OS01.transcripts.LOGGER')
def test_translations_retrieval_with_zero_translation_process(self, mock_logger):
"""
Tests the translations retrieval when a video doesn't have any 'in progress' translation processes.
"""
# Try fetching translations
transcripts.retrieve_three_play_translations()
# Assert the logs
mock_logger.info.assert_called_with(
'[3PlayMedia Task] video=%s does not have any translation process who is in progress.',
self.video.studio_id,
)
@patch('VEDA_OS01.transcripts.LOGGER')
def test_translations_retrieval_no_credentials(self, mock_logger):
"""
Tests the the translations retrieval when 3Play Media credentials are deleted from the data model.
"""
translation_id = '1q2w3e'
self.setup_translations_prereqs(
file_id=self.file_id,
translation_lang_map={'ro': translation_id},
preferred_languages=['en', 'ro']
)
# Delete transcript credentials
TranscriptCredentials.objects.all().delete()
# Try fetching translations
transcripts.retrieve_three_play_translations()
# assert the exception logs
mock_logger.exception.assert_called_with(
'[%s] Unable to get transcript secrets for org=%s, edx_video_id=%s, file_id=%s.',
'3PlayMedia Task',
self.org,
self.video.studio_id,
self.file_id,
)
# assert the translation process status
process = TranscriptProcessMetadata.objects.get(
provider=TranscriptProvider.THREE_PLAY,
process_id=self.file_id,
translation_id=translation_id,
lang_code='ro'
)
self.assertEqual(process.status, TranscriptStatus.FAILED)
@responses.activate
@patch('VEDA_OS01.transcripts.LOGGER')
def test_translations_retrieval_with_removed_translation_process(self, mock_logger):
"""
Tests the translations retrieval when a tracking translation process is not there or deleted.
"""
translation_id = '1q2w3e'
non_existent_target_language = 'es'
self.setup_translations_prereqs(
file_id=self.file_id,
translation_lang_map={'ro': translation_id},
preferred_languages=['en', 'ro']
)
# We get Translations metadata for a language whose tracking process is no more in pipeline.
responses.add(
method=responses.GET,
url=transcripts.THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id='112233'),
body=json.dumps([{
'id': translation_id,
'source_language_iso_639_1_code': 'en',
'target_language_iso_639_1_code': non_existent_target_language,
'state': 'complete'
}]),
status=200
)
# Try fetching translations
transcripts.retrieve_three_play_translations()
mock_logger.warning.assert_called_with(
(u'[3PlayMedia Task] Tracking process is either not found or already complete '
u'-- process_id=%s -- target_language=%s -- translation_id=%s.'),
'112233',
non_existent_target_language,
translation_id,
)
@data(None, 'invalid_course_id_1, invalid_course_id_2')
@patch('VEDA_OS01.transcripts.LOGGER')
def test_translation_retrieval_with_invalid_course_id(self, course_runs, mock_logger):
"""
Tests the translations retrieval when an associated course does not have course ids or
have some invalid course ids.
Note:
Its insane for a course to not to have course id but we have to do as
`Course.local_storedir` is null=True, blank=True.
"""
self.setup_translations_prereqs(
file_id=self.file_id,
translation_lang_map={'ro': '1q2w3e'},
preferred_languages=['en', 'ro']
)
# Make our course to not to have course ids.
self.course.local_storedir = course_runs
self.course.save()
# Now, Try fetching translations
transcripts.retrieve_three_play_translations()
mock_logger.exception.assert_called_with(
u'[%s] Unable to get transcript secrets for org=%s, edx_video_id=%s, file_id=%s.',
'3PlayMedia Task',
None,
self.edx_video_id,
self.file_id,
)
...@@ -21,7 +21,7 @@ from rest_framework.views import APIView ...@@ -21,7 +21,7 @@ from rest_framework.views import APIView
from control.veda_val import VALAPICall from control.veda_val import VALAPICall
from VEDA_OS01 import utils from VEDA_OS01 import utils
from VEDA_OS01.models import (TranscriptCredentials, TranscriptProcessMetadata, from VEDA_OS01.models import (TranscriptCredentials, TranscriptProcessMetadata,
TranscriptProvider, TranscriptStatus) TranscriptProvider, TranscriptStatus, Video)
requests.packages.urllib3.disable_warnings(InsecurePlatformWarning) requests.packages.urllib3.disable_warnings(InsecurePlatformWarning)
...@@ -62,9 +62,9 @@ THREE_PLAY_ORDER_TRANSLATION_URL = utils.build_url( ...@@ -62,9 +62,9 @@ THREE_PLAY_ORDER_TRANSLATION_URL = utils.build_url(
CONFIG['three_play_api_base_url'], CONFIG['three_play_api_base_url'],
'files/{file_id}/translations/order' 'files/{file_id}/translations/order'
) )
THREE_PLAY_TRANSLATION_STATUS_URL = utils.build_url( THREE_PLAY_TRANSLATIONS_METADATA_URL = utils.build_url(
CONFIG['three_play_api_transcript_url'], CONFIG['three_play_api_transcript_url'],
'files/{file_id}/translations/{translation_id}' 'files/{file_id}/translations'
) )
THREE_PLAY_TRANSLATION_DOWNLOAD_URL = utils.build_url( THREE_PLAY_TRANSLATION_DOWNLOAD_URL = utils.build_url(
CONFIG['three_play_api_transcript_url'], CONFIG['three_play_api_transcript_url'],
...@@ -790,133 +790,200 @@ def three_play_transcription_callback(sender, **kwargs): ...@@ -790,133 +790,200 @@ def three_play_transcription_callback(sender, **kwargs):
) )
def get_translation_status(api_key, file_id, translation_id, edx_video_id, lang_code): def get_translations_metadata(api_key, file_id, edx_video_id):
""" """
Get translation status for a translation process from 3Play Media. Get translations metadata from 3Play Media for a given file id.
Arguments: Arguments:
api_key(unicode): api key api_key(unicode): api key
file_id(unicode): file identifier or process identifier file_id(unicode): file identifier or process identifier
translation_id(unicode): translation identifier associated with that file identifier
edx_video_id(unicode): video studio identifier edx_video_id(unicode): video studio identifier
lang_code(unicode): language code
Returns: Returns:
A translation status retrieved from 3play media or None in case of a faulty response. A List containing the translations metadata for a file id or None
in case of a faulty response.
Example:
[
{
"id": 1234,
"translation_service_id": 12,
"source_language_name": "English",
"source_language_iso_639_1_code": "en",
"target_language_name": "French (Canada)",
"target_language_iso_639_1_code": "fr",
"state": "complete"
},
{
"id": 1345,
"translation_service_id": 32,
"source_language_name": "English",
"source_language_iso_639_1_code": "en",
"target_language_name": "German",
"target_language_iso_639_1_code": "de",
"state": "in_progress"
}
]
""" """
translation_status_url = utils.build_url( translations_metadata_url = utils.build_url(
THREE_PLAY_TRANSLATION_STATUS_URL.format( THREE_PLAY_TRANSLATIONS_METADATA_URL.format(
file_id=file_id, file_id=file_id,
translation_id=translation_id,
), ),
apikey=api_key apikey=api_key
) )
translation_status_response = requests.get(translation_status_url) translations_metadata_response = requests.get(translations_metadata_url)
if not translation_status_response.ok: if not translations_metadata_response.ok:
LOGGER.error( LOGGER.error(
(u'[3PlayMedia Task] Translation status request failed for video=%s -- ' u'[3PlayMedia Task] Translations metadata request failed for video=%s -- process_id=%s -- status=%s',
u'lang_code=%s -- process_id=%s -- status=%s'),
edx_video_id, edx_video_id,
lang_code,
file_id, file_id,
translation_status_response.status_code, translations_metadata_response.status_code,
) )
return return
translation_status = json.loads(translation_status_response.text) translations = json.loads(translations_metadata_response.text)
if translation_status.get('iserror'): if not isinstance(translations, list):
LOGGER.error( LOGGER.error(
(u'[3PlayMedia Task] unable to get translation status for video=%s -- ' u'[3PlayMedia Task] unable to get translations metadata for video=%s -- process_id=%s -- response=%s',
u'lang_code=%s -- process_id=%s -- response=%s'),
edx_video_id, edx_video_id,
lang_code,
file_id, file_id,
translation_status_response.text, translations_metadata_response.text,
) )
return return
return translation_status return translations
def retrieve_three_play_translations(): def get_in_progress_translation_processes(video):
""" """
Checks translation status on 3PlayMedia for all the progressing processes, fetches them if they're complete. Retrieves 'IN PROGRESS' translation tracking processes associated to a Video.
Retrieval flow:
1. Fetches 3PlayMedia translation processes whose status is `in progress`
2. For each process, retrieve the org-wide api keys
3. Check translation status through 3PlayMedia
4. If its done, mark the process as complete, fetch translated transcript, convert to sjson, upload it to s3 and
finally, update it in edx-val.
""" """
log_prefix = u'3PlayMedia Task' translation_processes = video.transcript_processes.filter(
translation_processes = TranscriptProcessMetadata.objects.filter(
provider=TranscriptProvider.THREE_PLAY, provider=TranscriptProvider.THREE_PLAY,
status=TranscriptStatus.IN_PROGRESS, status=TranscriptStatus.IN_PROGRESS,
).exclude(Q(translation_id__isnull=True) | Q(translation_id__exact='')) ).exclude(
Q(translation_id__isnull=True) | Q(translation_id__exact='')
)
return translation_processes
for translation_process in translation_processes:
log_args = ( def get_in_progress_translation_process(processes, file_id, translation_id, target_language):
translation_process.video.studio_id, """
translation_process.lang_code, Returns a single translation process from the given Processes.
translation_process.process_id, """
translation_process = None
try:
translation_process = processes.filter(
translation_id=translation_id,
lang_code=target_language,
process_id=file_id
).latest()
except TranscriptProcessMetadata.DoesNotExist:
LOGGER.warning(
(u'[3PlayMedia Task] Tracking process is either not found or already complete -- process_id=%s -- '
u'target_language=%s -- translation_id=%s.'),
file_id,
target_language,
translation_id
) )
course_id = translation_process.video.inst_class.local_storedir.split(',')[0] return translation_process
org = utils.extract_course_org(course_id=course_id)
# Retrieve transcript credentials
three_play_secrets = get_transcript_credentials( def get_transcript_content_from_3play_media(api_key, edx_video_id, file_id, translation_id, target_language):
provider=TranscriptProvider.THREE_PLAY, """
org=org, Get transcript content from 3Play Media in SRT format.
edx_video_id=translation_process.video.studio_id, """
file_id=translation_process.process_id, srt_transcript = None
log_prefix=log_prefix try:
transcript_url = THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format(file_id=file_id, translation_id=translation_id)
srt_transcript = fetch_srt_data(url=transcript_url, apikey=api_key)
except TranscriptFetchError:
LOGGER.exception(
u'[3PlayMedia Task] Translation download failed for video=%s -- lang_code=%s -- process_id=%s.',
edx_video_id,
target_language,
file_id,
) )
if not three_play_secrets:
# Fail the process
translation_process.update(status=TranscriptStatus.FAILED)
continue
# Check transcript status return srt_transcript
translation_status = get_translation_status(
three_play_secrets.api_key,
translation_process.process_id, def convert_to_sjson_and_upload_to_s3(srt_transcript, edx_video_id, file_id, target_language):
translation_process.translation_id, """
translation_process.video.studio_id, Converts SRT content to sjson format, upload it to S3 and returns an S3 file path of the uploaded file.
translation_process.lang_code, Raises:
Logs and raises any unexpected Exception.
"""
try:
sjson_transcript = convert_srt_to_sjson(srt_transcript)
sjson_file = upload_sjson_to_s3(CONFIG, sjson_transcript)
except Exception:
# in case of any exception, log and raise.
LOGGER.exception(
u'[3PlayMedia Task] translation failed for video=%s -- lang_code=%s -- process_id=%s',
edx_video_id,
file_id,
target_language,
) )
raise
if not translation_status: return sjson_file
# Fail the process
translation_process.update(status=TranscriptStatus.FAILED)
continue
# On a complete translation
if translation_status['state'] == COMPLETE:
# 1 - Fetch translation content from 3Play Media. def handle_video_translations(video, translations, file_id, api_key, log_prefix):
try: """
srt_transcript = fetch_srt_data( It is a sub-module of `retrieve_three_play_translations` to handle
url=THREE_PLAY_TRANSLATION_DOWNLOAD_URL.format( all the completed translations for a single video.
file_id=translation_process.process_id, translation_id=translation_process.translation_id
), Arguments:
apikey=three_play_secrets.api_key, video: Video data object whose translations need to be handled here.
) translations: A list containing translations metadata information received from 3play Media.
except TranscriptFetchError: file_id: It is file identifier that is assigned to a Video by 3Play Media.
LOGGER.exception( api_key: An api key to communicate to the 3Play Media.
u'[3PlayMedia Task] Translation download failed for video=%s -- lang_code=%s -- process_id=%s.', log_prefix: A logging prefix used by the main process.
*log_args
) Steps include:
- Fetch translated transcript content from 3Play Media.
- Validate the content of received translated transcript.
- Convert translated SRT transcript to SJson format and upload it to S3.
- Update edx-val for a completed transcript.
- update transcript status for video in edx-val as well as edx-video-pipeline.
"""
video_translation_processes = get_in_progress_translation_processes(video)
for translation_metadata in translations:
translation_id = translation_metadata['id']
translation_state = translation_metadata['state']
target_language = translation_metadata['target_language_iso_639_1_code']
if translation_state == COMPLETE:
# Fetch the corresponding tracking process.
translation_process = get_in_progress_translation_process(
video_translation_processes,
file_id=file_id,
translation_id=translation_id,
target_language=target_language
)
if translation_process is None:
continue
# 1 - Fetch translated transcript content from 3Play Media.
srt_transcript = get_transcript_content_from_3play_media(
api_key=api_key,
edx_video_id=video.studio_id,
file_id=file_id,
translation_id=translation_id,
target_language=target_language,
)
if srt_transcript is None:
continue continue
# 2 - Validate the translation's SRT content received from 3Play Media. # 2 - Validate the content of received translated transcript.
is_transcript_valid = validate_transcript_response( is_transcript_valid = validate_transcript_response(
edx_video_id=translation_process.video.studio_id, edx_video_id=video.studio_id,
file_id=translation_process.process_id, file_id=file_id,
transcript=srt_transcript, transcript=srt_transcript,
lang_code=translation_process.lang_code, lang_code=target_language,
log_prefix=log_prefix log_prefix=log_prefix
) )
if is_transcript_valid: if is_transcript_valid:
...@@ -926,33 +993,88 @@ def retrieve_three_play_translations(): ...@@ -926,33 +993,88 @@ def retrieve_three_play_translations():
continue continue
# 3 - Convert SRT translation to SJson format and upload it to S3. # 3 - Convert SRT translation to SJson format and upload it to S3.
try: sjson_file = convert_to_sjson_and_upload_to_s3(
sjson_transcript = convert_srt_to_sjson(srt_transcript) srt_transcript=srt_transcript,
sjson_file = upload_sjson_to_s3(CONFIG, sjson_transcript) target_language=target_language,
except Exception: edx_video_id=video.studio_id,
# in case of any exception, log and raise. file_id=file_id,
LOGGER.exception( )
u'[3PlayMedia Task] translation failed for video=%s -- lang_code=%s -- process_id=%s',
*log_args
)
raise
# 4 Update edx-val with completed transcript information # 4 Update edx-val with completed transcript information
val_api = VALAPICall(video_proto=None, val_status=None) val_api = VALAPICall(video_proto=None, val_status=None)
val_api.update_val_transcript( val_api.update_val_transcript(
video_id=translation_process.video.studio_id, video_id=video.studio_id,
lang_code=translation_process.lang_code, lang_code=target_language,
name=sjson_file, name=sjson_file,
transcript_format=TRANSCRIPT_SJSON, transcript_format=TRANSCRIPT_SJSON,
provider=TranscriptProvider.THREE_PLAY, provider=TranscriptProvider.THREE_PLAY,
) )
# 5 - if all the processes for this video are complete, update video status in edx-val # 5 - if all the processes for this video are complete, update transcript status
# update transcript status for video in edx-val as well as edx-video-pipeline. # for video in edx-val as well as edx-video-pipeline.
video_jobs = TranscriptProcessMetadata.objects.filter(video__studio_id=translation_process.video.studio_id) video_jobs = TranscriptProcessMetadata.objects.filter(video=video)
if all(video_job.status == TranscriptStatus.READY for video_job in video_jobs): if all(video_job.status == TranscriptStatus.READY for video_job in video_jobs):
utils.update_video_status( utils.update_video_status(
val_api_client=val_api, val_api_client=val_api,
video=translation_process.video, video=video,
status=TranscriptStatus.READY status=TranscriptStatus.READY
) )
def retrieve_three_play_translations():
"""
Checks translation status on 3PlayMedia for all the progressing processes, fetches them if they're complete.
Retrieval flow:
1. Fetches 3PlayMedia translation processes whose status is `in progress`
2. For each process, retrieve the org-wide api keys
3. Check translation status through 3PlayMedia
4. If its done, mark the process as complete, fetch translated transcript, convert to sjson, upload it to s3 and
finally, update it in edx-val.
"""
log_prefix = u'3PlayMedia Task'
candidate_videos = Video.objects.filter(
provider=TranscriptProvider.THREE_PLAY, transcript_status=TranscriptStatus.IN_PROGRESS,
)
for video in candidate_videos:
# For a video, fetch its in progress translation processes.
in_progress_translation_processes = get_in_progress_translation_processes(video)
if not in_progress_translation_processes.exists():
LOGGER.info(
'[3PlayMedia Task] video=%s does not have any translation process who is in progress.',
video.studio_id,
)
continue
# Process id remains same across all the processes of a video and its also referred as `file_id`.
file_id = in_progress_translation_processes.first().process_id
# Retrieve transcript credentials
three_play_secrets = get_transcript_credentials(
provider=TranscriptProvider.THREE_PLAY,
org=video.inst_class.org,
edx_video_id=video.studio_id,
file_id=file_id,
log_prefix=log_prefix
)
if not three_play_secrets:
in_progress_translation_processes.update(status=TranscriptStatus.FAILED)
continue
# Retrieve Translations metadata to check the status for each translation.
translations = get_translations_metadata(
api_key=three_play_secrets.api_key,
file_id=file_id,
edx_video_id=video.studio_id,
)
if translations is None:
in_progress_translation_processes.update(status=TranscriptStatus.FAILED)
continue
handle_video_translations(
video=video,
translations=translations,
file_id=file_id,
api_key=three_play_secrets.api_key,
log_prefix=log_prefix,
)
...@@ -29,10 +29,7 @@ class TestValidation(TestCase): ...@@ -29,10 +29,7 @@ class TestValidation(TestCase):
videofile=self.videofile videofile=self.videofile
) )
@unittest.skipIf( @unittest.skip('Skipping this test due to unavailability of required ffprobe version.')
'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true',
'Skipping this test on Travis CI due to unavailability of required ffprobe version.'
)
def test_validation(self): def test_validation(self):
""" """
Check a known file for validity Check a known file for validity
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment