Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-video-pipeline
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-video-pipeline
Commits
7d9e88af
Commit
7d9e88af
authored
Sep 12, 2017
by
Qubad786
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
improve code, fix tests, address feedback
parent
7ee3449d
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
382 additions
and
249 deletions
+382
-249
VEDA_OS01/models.py
+11
-0
VEDA_OS01/tests/test_transcripts.py
+22
-22
VEDA_OS01/transcripts.py
+349
-227
No files found.
VEDA_OS01/models.py
View file @
7d9e88af
...
@@ -661,6 +661,17 @@ class TranscriptProcessMetadata(TimeStampedModel):
...
@@ -661,6 +661,17 @@ class TranscriptProcessMetadata(TimeStampedModel):
verbose_name_plural
=
'Transcript process metadata'
verbose_name_plural
=
'Transcript process metadata'
get_latest_by
=
'modified'
get_latest_by
=
'modified'
def
update
(
self
,
**
fields
):
"""
Updates a process.
Keyword Arguments:
fields(dict): dict containing all the fields to be updated.
"""
for
attr
,
value
in
fields
.
iteritems
():
setattr
(
self
,
attr
,
value
)
self
.
save
()
def
__unicode__
(
self
):
def
__unicode__
(
self
):
return
u'{video} - {provider} - {lang}'
.
format
(
return
u'{video} - {provider} - {lang}'
.
format
(
video
=
self
.
video
.
edx_id
,
video
=
self
.
video
.
edx_id
,
...
...
VEDA_OS01/tests/test_transcripts.py
View file @
7d9e88af
...
@@ -765,21 +765,22 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -765,21 +765,22 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
{
'body'
:
json
.
dumps
({
'iserror'
:
True
}),
'content_type'
:
'application/json'
,
'status'
:
200
},
{
'body'
:
json
.
dumps
({
'iserror'
:
True
}),
'content_type'
:
'application/json'
,
'status'
:
200
},
'error'
,
'error'
,
(
(
'[3PlayMedia Task] Transcript fetch error for video=
%
s -- lang_code=
%
s -- process=
%
s -- response=
%
s'
,
u'[
%
s] Transcript fetch error for video=
%
s -- lang_code=
%
s -- process=
%
s -- response=
%
s'
,
ANY
,
u'3PlayMedia Callback'
,
ANY
,
u'12345'
,
ANY
,
u'en'
,
ANY
u'112233'
,
json
.
dumps
({
'iserror'
:
True
}),
),
),
),
),
(
(
{
'body'
:
None
,
'status'
:
400
},
{
'body'
:
None
,
'status'
:
400
},
'exception'
,
'exception'
,
(
(
'[3PlayMedia Callback] Fetch request failed for video=
%
s -- lang
=
%
s -- process_id=
%
s'
,
u'[3PlayMedia Callback] Fetch request failed for video=
%
s -- lang_code
=
%
s -- process_id=
%
s'
,
ANY
,
u'12345'
,
ANY
,
u'en'
,
ANY
,
u'112233'
,
),
),
)
)
)
)
...
@@ -870,10 +871,9 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -870,10 +871,9 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
{
{
'method'
:
'exception'
,
'method'
:
'exception'
,
'args'
:
(
'args'
:
(
'[3PlayMedia Callback] Translation could not be performed - org=
%
s, edx_video_id=
%
s, '
'[3PlayMedia Callback] Translation could not be performed - video=
%
s, lang_code=
%
s, file_id=
%
s.'
,
'file_id=
%
s.'
,
'MAx'
,
'12345'
,
'12345'
,
'en'
,
'112233'
'112233'
)
)
}
}
...
@@ -893,10 +893,9 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -893,10 +893,9 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
{
{
'method'
:
'exception'
,
'method'
:
'exception'
,
'args'
:
(
'args'
:
(
'[3PlayMedia Callback] Translation could not be performed - org=
%
s, edx_video_id=
%
s, '
'[3PlayMedia Callback] Translation could not be performed - video=
%
s, lang_code=
%
s, file_id=
%
s.'
,
'file_id=
%
s.'
,
'MAx'
,
'12345'
,
'12345'
,
'en'
,
'112233'
'112233'
)
)
}
}
...
@@ -1203,7 +1202,6 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1203,7 +1202,6 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
decode_func
=
json
.
loads
,
decode_func
=
json
.
loads
,
)
)
@data
(
@data
(
# not-an-ok response on translation status fetch request.
# not-an-ok response on translation status fetch request.
(
(
...
@@ -1228,7 +1226,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1228,7 +1226,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
400
,
400
,
)
)
},
},
TranscriptStatus
.
IN_PROGRESS
TranscriptStatus
.
FAILED
),
),
# 3Play Error response on fetching translations status.
# 3Play Error response on fetching translations status.
(
(
...
@@ -1245,7 +1243,8 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1245,7 +1243,8 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
{
{
'method'
:
'error'
,
'method'
:
'error'
,
'args'
:
(
'args'
:
(
'[3PlayMedia Task] Translation error for video=
%
s -- lang_code=
%
s -- process_id=
%
s -- response=
%
s'
,
'[3PlayMedia Task] unable to get translation status for '
'video=
%
s -- lang_code=
%
s -- process_id=
%
s -- response=
%
s'
,
VIDEO_DATA
[
'studio_id'
],
VIDEO_DATA
[
'studio_id'
],
'ro'
,
'ro'
,
'112233'
,
'112233'
,
...
@@ -1268,7 +1267,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1268,7 +1267,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
'status'
:
200
,
'status'
:
200
,
},
},
{
{
'm
o
thod'
:
responses
.
GET
,
'm
e
thod'
:
responses
.
GET
,
'url'
:
transcripts
.
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
.
format
(
'url'
:
transcripts
.
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
.
format
(
file_id
=
'112233'
,
translation_id
=
'1q2w3e'
file_id
=
'112233'
,
translation_id
=
'1q2w3e'
),
),
...
@@ -1302,7 +1301,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1302,7 +1301,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
'status'
:
200
,
'status'
:
200
,
},
},
{
{
'm
o
thod'
:
responses
.
GET
,
'm
e
thod'
:
responses
.
GET
,
'url'
:
transcripts
.
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
.
format
(
'url'
:
transcripts
.
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
.
format
(
file_id
=
'112233'
,
translation_id
=
'1q2w3e'
file_id
=
'112233'
,
translation_id
=
'1q2w3e'
),
),
...
@@ -1314,7 +1313,8 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1314,7 +1313,8 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
{
{
'method'
:
'error'
,
'method'
:
'error'
,
'args'
:
(
'args'
:
(
'[3PlayMedia Task] Translation error for video=
%
s -- lang_code=
%
s -- process_id=
%
s -- response=
%
s'
,
'[
%
s] Transcript fetch error for video=
%
s -- lang_code=
%
s -- process=
%
s -- response=
%
s'
,
'3PlayMedia Task'
,
VIDEO_DATA
[
'studio_id'
],
VIDEO_DATA
[
'studio_id'
],
'ro'
,
'ro'
,
'112233'
,
'112233'
,
...
@@ -1328,7 +1328,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1328,7 +1328,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
@responses.activate
@responses.activate
@mock_s3_deprecated
@mock_s3_deprecated
@patch
(
'VEDA_OS01.transcripts.LOGGER'
)
@patch
(
'VEDA_OS01.transcripts.LOGGER'
)
def
translations_retrieval_exceptions
(
self
,
mock_responses
,
expected_logging
,
transcript_status
,
mock_logger
):
def
t
est_t
ranslations_retrieval_exceptions
(
self
,
mock_responses
,
expected_logging
,
transcript_status
,
mock_logger
):
"""
"""
Tests possible error cases during translation fetch process form 3PlayMedia.
Tests possible error cases during translation fetch process form 3PlayMedia.
"""
"""
...
...
VEDA_OS01/transcripts.py
View file @
7d9e88af
...
@@ -337,6 +337,101 @@ class ThreePlayMediaCallbackHandlerView(APIView):
...
@@ -337,6 +337,101 @@ class ThreePlayMediaCallbackHandlerView(APIView):
return
Response
(
status
=
status
.
HTTP_200_OK
)
return
Response
(
status
=
status
.
HTTP_200_OK
)
def
get_translation_services
(
api_key
):
"""
GET available 3Play Media Translation services
Arguments:
api_key(unicode): api key which is required to make an authentic call to 3Play Media
Returns:
Available 3Play Media Translation services.
"""
response
=
requests
.
get
(
utils
.
build_url
(
THREE_PLAY_TRANSLATION_SERVICES_URL
,
apikey
=
api_key
))
if
not
response
.
ok
:
raise
TranscriptTranslationError
(
u'[3PlayMedia Callback] Error while fetching the translation services -- {status}, {response}'
.
format
(
status
=
response
.
status_code
,
response
=
response
.
text
,
)
)
# Response should be a list containing services, details:
# http://support.3playmedia.com/hc/en-us/articles/227729988-Translations-API-Methods
available_services
=
json
.
loads
(
response
.
text
)
if
not
isinstance
(
available_services
,
list
):
raise
TranscriptTranslationError
(
u'[3PlayMedia Callback] Expected list but got: -- {response}.'
.
format
(
response
=
response
.
text
,
)
)
return
available_services
def
get_standard_translation_service
(
translation_services
,
target_language
):
"""
Get standard translation service
Arguments:
translation_services(list): List of available 3play media translation services.
target_language(str): A language code whose standard translation service is needed.
Returns:
A translation service id or None.
"""
translation_service_id
=
None
for
service
in
translation_services
:
service_found
=
(
service
[
'target_language_iso_639_1_code'
]
==
target_language
and
service
[
'service_level'
]
==
'standard'
)
if
service_found
:
translation_service_id
=
service
[
'id'
]
break
return
translation_service_id
def
place_translation_order
(
api_key
,
api_secret
,
translation_service_id
,
target_language
,
file_id
):
"""
Places a translation order on 3play media.
Arguments:
api_key(unicode): api key
api_secret(unicode): api secret
translation_service_id(unicode): translation service id got from 3Play Media
target_language(unicode): A language code translation is being ordered
file_id(unicode): 3play media file id / process id
"""
order_response
=
requests
.
post
(
THREE_PLAY_ORDER_TRANSLATION_URL
.
format
(
file_id
=
file_id
),
json
=
{
'apikey'
:
api_key
,
'api_secret_key'
:
api_secret
,
'translation_service_id'
:
translation_service_id
,
})
if
not
order_response
.
ok
:
LOGGER
.
error
(
'[3PlayMedia Callback] An error occurred during translation, target language=
%
s, file_id=
%
s, status=
%
s'
,
target_language
,
file_id
,
order_response
.
status_code
,
)
return
# Translation Order API returns `success` attribute specifying whether the order has been placed
# successfully: http://support.3playmedia.com/hc/en-us/articles/227729988-Translations-API-Methods
translation_order
=
json
.
loads
(
order_response
.
text
)
if
not
translation_order
.
get
(
'success'
):
LOGGER
.
error
(
'[3PlayMedia Callback] Translation failed fot target language=
%
s, file_id=
%
s, response=
%
s'
,
target_language
,
file_id
,
order_response
.
text
,
)
return
return
translation_order
def
order_translations
(
file_id
,
api_key
,
api_secret
,
target_languages
):
def
order_translations
(
file_id
,
api_key
,
api_secret
,
target_languages
):
"""
"""
Order translations on 3PlayMedia for all the target languages.
Order translations on 3PlayMedia for all the target languages.
...
@@ -359,6 +454,9 @@ def order_translations(file_id, api_key, api_secret, target_languages):
...
@@ -359,6 +454,9 @@ def order_translations(file_id, api_key, api_secret, target_languages):
Raises:
Raises:
TranscriptTranslationError: when an error occurred while fetching the translation services.
TranscriptTranslationError: when an error occurred while fetching the translation services.
"""
"""
if
not
target_languages
:
return
translation_processes
=
TranscriptProcessMetadata
.
objects
.
filter
(
translation_processes
=
TranscriptProcessMetadata
.
objects
.
filter
(
process_id
=
file_id
,
process_id
=
file_id
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
...
@@ -366,32 +464,16 @@ def order_translations(file_id, api_key, api_secret, target_languages):
...
@@ -366,32 +464,16 @@ def order_translations(file_id, api_key, api_secret, target_languages):
lang_code__in
=
target_languages
,
lang_code__in
=
target_languages
,
)
)
response
=
requests
.
get
(
utils
.
build_url
(
THREE_PLAY_TRANSLATION_SERVICES_URL
,
apikey
=
api_key
))
# Retrieve available translation services.
if
not
response
.
ok
:
try
:
# Fail all the pending translation processes associated with this file id.
available_services
=
get_translation_services
(
api_key
)
translation_processes
.
update
(
status
=
TranscriptStatus
.
FAILED
)
except
TranscriptTranslationError
:
raise
TranscriptTranslationError
(
u'[3PlayMedia Callback] Error while fetching the translation services -- {status}, {response}'
.
format
(
status
=
response
.
status_code
,
response
=
response
.
text
,
)
)
# Response should be a list containing services, details:
# http://support.3playmedia.com/hc/en-us/articles/227729988-Translations-API-Methods
available_services
=
json
.
loads
(
response
.
text
)
if
not
isinstance
(
available_services
,
list
):
# Fail all the pending translation processes associated with this file id.
# Fail all the pending translation processes associated with this file id.
translation_processes
.
update
(
status
=
TranscriptStatus
.
FAILED
)
translation_processes
.
update
(
status
=
TranscriptStatus
.
FAILED
)
raise
raise
TranscriptTranslationError
(
u'[3PlayMedia Callback] Expected list but got: -- {response}.'
.
format
(
response
=
response
.
text
,
)
)
for
target_language
in
target_languages
:
for
target_language
in
target_languages
:
# 1 - get a translation process for the target language
try
:
try
:
translation_process
=
translation_processes
.
filter
(
lang_code
=
target_language
)
.
latest
()
translation_process
=
translation_processes
.
filter
(
lang_code
=
target_language
)
.
latest
()
except
TranscriptProcessMetadata
.
DoesNotExist
:
except
TranscriptProcessMetadata
.
DoesNotExist
:
...
@@ -402,63 +484,93 @@ def order_translations(file_id, api_key, api_secret, target_languages):
...
@@ -402,63 +484,93 @@ def order_translations(file_id, api_key, api_secret, target_languages):
)
)
continue
continue
# 1 - Find a standard service for translation in the target language.
# 2 - Find a standard service for translation for the target language.
translation_service_id
=
None
translation_service_id
=
get_standard_translation_service
(
available_services
,
target_language
)
for
service
in
available_services
:
service_found
=
(
service
[
'target_language_iso_639_1_code'
]
==
target_language
and
service
[
'service_level'
]
==
'standard'
)
if
service_found
:
translation_service_id
=
service
[
'id'
]
break
if
translation_service_id
is
None
:
if
translation_service_id
is
None
:
# Fail the process
# Fail the process
translation_process
.
status
=
TranscriptStatus
.
FAILED
translation_process
.
update
(
status
=
TranscriptStatus
.
FAILED
)
translation_process
.
save
()
LOGGER
.
error
(
LOGGER
.
error
(
'[3PlayMedia Callback] No translation service found for target language
%
s -- process id
%
s'
,
u
'[3PlayMedia Callback] No translation service found for target language
%
s -- process id
%
s'
,
target_language
,
target_language
,
file_id
,
file_id
,
)
)
continue
continue
# 2 - At this point, we've got our service ready to use. Now, place an order for the translation.
# 3 - Place an order
response
=
requests
.
post
(
THREE_PLAY_ORDER_TRANSLATION_URL
.
format
(
file_id
=
file_id
),
json
=
{
# At this point, we've got our service ready to use. Now, place an order for the translation.
'apikey'
:
api_key
,
translation_order
=
place_translation_order
(
'api_secret_key'
:
api_secret
,
api_key
=
api_key
,
'translation_service_id'
:
translation_service_id
,
api_secret
=
api_secret
,
})
translation_service_id
=
translation_service_id
,
target_language
=
target_language
,
if
not
response
.
ok
:
file_id
=
file_id
,
# Fail the process
)
translation_process
.
status
=
TranscriptStatus
.
FAILED
if
translation_order
:
translation_process
.
save
()
translation_process
.
update
(
LOGGER
.
error
(
translation_id
=
translation_order
[
'translation_id'
],
'[3PlayMedia Callback] An error occurred during translation, target language=
%
s, file_id=
%
s, status=
%
s'
,
status
=
TranscriptStatus
.
IN_PROGRESS
target_language
,
file_id
,
response
.
status_code
,
)
)
continue
# Translation Order API returns `success` attribute specifying whether the order has been placed
# successfully: http://support.3playmedia.com/hc/en-us/articles/227729988-Translations-API-Methods
translation_order
=
json
.
loads
(
response
.
text
)
if
translation_order
.
get
(
'success'
):
translation_process
.
status
=
TranscriptStatus
.
IN_PROGRESS
translation_process
.
translation_id
=
translation_order
[
'translation_id'
]
translation_process
.
save
()
else
:
else
:
translation_process
.
status
=
TranscriptStatus
.
FAILED
translation_process
.
update
(
status
=
TranscriptStatus
.
FAILED
)
translation_process
.
save
()
LOGGER
.
error
(
'[3PlayMedia Callback] Translation failed fot target language=
%
s, file_id=
%
s, response=
%
s'
,
def
validate_transcript_response
(
edx_video_id
,
file_id
,
transcript
,
lang_code
,
log_prefix
):
target_language
,
"""
file_id
,
This validates transcript response received from 3Play Media.
response
.
text
,
)
Arguments:
edx_video_id(unicode): studio video identifier
file_id(unicode): file identifier
transcript(unicode): SRT transcript content ideally
lang_code(unicode): language code
log_prefix(unicode): A prefix for the emitted logs
transcript is going to be SRT content and if this is not so, then it'll be a json response
describing the error and process will be marked as failed. Error response will be logged
along with the validation.
"""
try
:
json
.
loads
(
transcript
)
# Log the details.
LOGGER
.
error
(
u'[
%
s] Transcript fetch error for video=
%
s -- lang_code=
%
s -- process=
%
s -- response=
%
s'
,
log_prefix
,
edx_video_id
,
lang_code
,
file_id
,
transcript
,
)
return
False
except
ValueError
:
pass
return
True
def
get_transcript_credentials
(
provider
,
org
,
edx_video_id
,
file_id
,
log_prefix
):
"""
Get org-specific transcript credentials.
Arguments:
provider(TranscriptProvider): transcript provider
org(unicode): organization extracted from course id
log_prefix(unicode): A prefix for the emitted logs
edx_video_id(unicode): studio video identifier
file_id(unicode): file identifier or process identifier
"""
transcript_secrets
=
None
try
:
transcript_secrets
=
TranscriptCredentials
.
objects
.
get
(
org
=
org
,
provider
=
provider
)
except
TranscriptCredentials
.
DoesNotExist
:
LOGGER
.
exception
(
u'[
%
s] Unable to get transcript secrets for org=
%
s, edx_video_id=
%
s, file_id=
%
s.'
,
log_prefix
,
org
,
edx_video_id
,
file_id
,
)
return
transcript_secrets
@django.dispatch.receiver
(
THREE_PLAY_TRANSCRIPTION_DONE
,
dispatch_uid
=
"three_play_transcription_done"
)
@django.dispatch.receiver
(
THREE_PLAY_TRANSCRIPTION_DONE
,
dispatch_uid
=
"three_play_transcription_done"
)
...
@@ -477,6 +589,7 @@ def three_play_transcription_callback(sender, **kwargs):
...
@@ -477,6 +589,7 @@ def three_play_transcription_callback(sender, **kwargs):
* order translations for all the preferred languages
* order translations for all the preferred languages
* update transcript status in VAL
* update transcript status in VAL
"""
"""
log_prefix
=
u'3PlayMedia Callback'
# Extract all the must have attributes
# Extract all the must have attributes
org
=
kwargs
[
'org'
]
org
=
kwargs
[
'org'
]
edx_video_id
=
kwargs
[
'edx_video_id'
]
edx_video_id
=
kwargs
[
'edx_video_id'
]
...
@@ -499,62 +612,51 @@ def three_play_transcription_callback(sender, **kwargs):
...
@@ -499,62 +612,51 @@ def three_play_transcription_callback(sender, **kwargs):
)
)
return
return
# On completion of a transcript
# Indicates that the default video speech transcription has been done successfully.
if
state
==
COMPLETE
:
if
state
==
COMPLETE
:
# Indicates that the default video speech transcription has been done successfully.
log_args
=
(
edx_video_id
,
lang_code
,
file_id
)
try
:
transcript_secrets
=
TranscriptCredentials
.
objects
.
get
(
org
=
org
,
provider
=
TranscriptProvider
.
THREE_PLAY
)
# 1 - Retrieve transcript credentials
except
TranscriptCredentials
.
DoesNotExist
:
transcript_secrets
=
get_transcript_credentials
(
# Fail the process
provider
=
TranscriptProvider
.
THREE_PLAY
,
process
.
status
=
TranscriptStatus
.
FAILED
org
=
org
,
process
.
save
()
edx_video_id
=
edx_video_id
,
# Log the failure
file_id
=
file_id
,
LOGGER
.
exception
(
log_prefix
=
log_prefix
,
u'[3PlayMedia Callback] Unable to get transcript secrets for org=
%
s, edx_video_id=
%
s, file_id=
%
s.'
,
)
org
,
if
not
transcript_secrets
:
edx_video_id
,
process
.
update
(
status
=
TranscriptStatus
.
FAILED
)
file_id
,
)
return
return
#
Fetch the transcript from 3PlayMedia
#
2 - Fetch the transcript from 3Play Media.
try
:
try
:
srt_transcript
=
fetch_srt_data
(
srt_transcript
=
fetch_srt_data
(
THREE_PLAY_TRANSCRIPT_URL
.
format
(
file_id
=
file_id
),
THREE_PLAY_TRANSCRIPT_URL
.
format
(
file_id
=
file_id
),
apikey
=
transcript_secrets
.
api_key
,
apikey
=
transcript_secrets
.
api_key
,
)
)
except
TranscriptFetchError
:
except
TranscriptFetchError
:
process
.
status
=
TranscriptStatus
.
FAILED
process
.
save
()
LOGGER
.
exception
(
LOGGER
.
exception
(
'[3PlayMedia Callback] Fetch request failed for video=
%
s -- lang=
%
s -- process_id=
%
s'
,
u'[3PlayMedia Callback] Fetch request failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s'
,
edx_video_id
,
*
log_args
lang_code
,
file_id
)
return
# fetched transcript is going to be SRT content and if this is not so, it'll be a json response
# describing the error.
try
:
json
.
loads
(
srt_transcript
)
# Fail the process and log all the details.
process
.
status
=
TranscriptStatus
.
FAILED
process
.
save
()
LOGGER
.
error
(
'[3PlayMedia Task] Transcript fetch error for video=
%
s -- lang_code=
%
s -- process=
%
s -- response=
%
s'
,
process
.
video
.
studio_id
,
process
.
lang_code
,
process
.
process_id
,
srt_transcript
,
)
)
process
.
update
(
status
=
TranscriptStatus
.
FAILED
)
return
return
except
ValueError
:
pass
# We've got the transcript from 3PlayMedia, now update process status accordingly.
# 3 - Validate transcript content received from 3Play Media and mark the transcription process.
process
.
status
=
TranscriptStatus
.
READY
is_valid_transcript
=
validate_transcript_response
(
process
.
save
()
edx_video_id
=
edx_video_id
,
file_id
=
file_id
,
transcript
=
srt_transcript
,
lang_code
=
lang_code
,
log_prefix
=
log_prefix
,
)
if
is_valid_transcript
:
process
.
update
(
status
=
TranscriptStatus
.
READY
)
else
:
process
.
update
(
status
=
TranscriptStatus
.
FAILED
)
# 4 - Convert SRT transcript to SJson format and upload it to S3.
try
:
try
:
sjson_transcript
=
convert_srt_to_sjson
(
srt_transcript
)
sjson_transcript
=
convert_srt_to_sjson
(
srt_transcript
)
sjson_file
=
upload_sjson_to_s3
(
CONFIG
,
sjson_transcript
)
sjson_file
=
upload_sjson_to_s3
(
CONFIG
,
sjson_transcript
)
...
@@ -562,13 +664,11 @@ def three_play_transcription_callback(sender, **kwargs):
...
@@ -562,13 +664,11 @@ def three_play_transcription_callback(sender, **kwargs):
# in case of any exception, log and raise.
# in case of any exception, log and raise.
LOGGER
.
exception
(
LOGGER
.
exception
(
u'[3PlayMedia Callback] Request failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s'
,
u'[3PlayMedia Callback] Request failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s'
,
edx_video_id
,
*
log_args
lang_code
,
file_id
,
)
)
raise
raise
#
Update edx-val with completed transcript information
#
5 - Update edx-val with completed transcript information.
val_api
=
VALAPICall
(
video_proto
=
None
,
val_status
=
None
)
val_api
=
VALAPICall
(
video_proto
=
None
,
val_status
=
None
)
val_api
.
update_val_transcript
(
val_api
.
update_val_transcript
(
video_id
=
process
.
video
.
studio_id
,
video_id
=
process
.
video
.
studio_id
,
...
@@ -578,56 +678,48 @@ def three_play_transcription_callback(sender, **kwargs):
...
@@ -578,56 +678,48 @@ def three_play_transcription_callback(sender, **kwargs):
provider
=
TranscriptProvider
.
THREE_PLAY
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
)
)
# Translation Phase
# 6 - Translation Phase
# That's the phase for kicking off translation processes for all the
# preferred languages except the video's speech language.
target_languages
=
list
(
process
.
video
.
preferred_languages
)
target_languages
=
list
(
process
.
video
.
preferred_languages
)
# Remove the language that is already processed - in our case, its en.
target_languages
.
remove
(
lang_code
)
target_languages
.
remove
(
lang_code
)
# Check if the translations are needed.
# Create the translation tracking processes for all the target languages.
if
target_languages
:
for
target_language
in
target_languages
:
# Create the translation tracking processes for all the target languages.
TranscriptProcessMetadata
.
objects
.
create
(
for
target_language
in
target_languages
:
video
=
process
.
video
,
TranscriptProcessMetadata
.
objects
.
create
(
provider
=
TranscriptProvider
.
THREE_PLAY
,
video
=
process
.
video
,
process_id
=
file_id
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
lang_code
=
target_language
,
process_id
=
file_id
,
status
=
TranscriptStatus
.
PENDING
,
lang_code
=
target_language
,
)
status
=
TranscriptStatus
.
PENDING
,
)
try
:
# Order translations for target languages
# Order translations for target languages
try
:
order_translations
(
file_id
,
transcript_secrets
.
api_key
,
transcript_secrets
.
api_secret
,
target_languages
)
order_translations
(
file_id
,
transcript_secrets
.
api_key
,
transcript_secrets
.
api_secret
,
target_languages
)
except
TranscriptTranslationError
:
except
TranscriptTranslationError
:
LOGGER
.
exception
(
LOGGER
.
exception
(
u'[3PlayMedia Callback] Translation could not be performed - org=
%
s, edx_video_id=
%
s, file_id=
%
s.'
,
u'[3PlayMedia Callback] Translation could not be performed - video=
%
s, lang_code=
%
s, file_id=
%
s.'
,
org
,
*
log_args
edx_video_id
,
)
file_id
,
except
Exception
:
)
LOGGER
.
exception
(
except
Exception
:
u'[3PlayMedia Callback] Error while translating the transcripts - video=
%
s, lang_code=
%
s, file_id=
%
s'
,
LOGGER
.
exception
(
*
log_args
(
u'[3PlayMedia Callback] Error while translating the transcripts - org=
%
s, edx_video_id=
%
s, '
)
u'file_id=
%
s.'
),
raise
org
,
edx_video_id
,
file_id
,
)
raise
# in case if there is only one language which has already been processed.
# 7 - Update transcript status.
# It will be for edx-val as well as edx-video-pipeline and this will be the case when
# there is only one transcript language for a video(that is, already been processed).
if
not
target_languages
:
if
not
target_languages
:
val_api
.
update_video_status
(
val_api
.
update_video_status
(
process
.
video
.
studio_id
,
VideoStatus
.
TRANSCRIPT_READY
)
process
.
video
.
studio_id
,
VideoStatus
.
TRANSCRIPT_READY
)
# On success, a happy farewell log.
# On success, a happy farewell log.
LOGGER
.
info
(
LOGGER
.
info
(
u'[3PlayMedia Callback] Video speech transcription was successful for video=
%
s -- lang_code=
%
s -- '
(
u'[3PlayMedia Callback] Video speech transcription was successful for'
u'process_id=
%
s'
,
u' video=
%
s -- lang_code=
%
s -- process_id=
%
s'
),
edx_video_id
,
*
log_args
lang_code
,
file_id
,
)
)
elif
state
==
ERROR
:
elif
state
==
ERROR
:
...
@@ -654,6 +746,54 @@ def three_play_transcription_callback(sender, **kwargs):
...
@@ -654,6 +746,54 @@ def three_play_transcription_callback(sender, **kwargs):
)
)
def
get_translation_status
(
api_key
,
file_id
,
translation_id
,
edx_video_id
,
lang_code
):
"""
Get translation status for a translation process from 3Play Media.
Arguments:
api_key(unicode): api key
file_id(unicode): file identifier or process identifier
translation_id(unicode): translation identifier associated with that file identifier
edx_video_id(unicode): video studio identifier
lang_code(unicode): language code
Returns:
A translation status retrieved from 3play media or None in case of a faulty response.
"""
translation_status_url
=
utils
.
build_url
(
THREE_PLAY_TRANSLATION_STATUS_URL
.
format
(
file_id
=
file_id
,
translation_id
=
translation_id
,
),
apikey
=
api_key
)
translation_status_response
=
requests
.
get
(
translation_status_url
)
if
not
translation_status_response
.
ok
:
LOGGER
.
error
(
(
u'[3PlayMedia Task] Translation status request failed for video=
%
s -- '
u'lang_code=
%
s -- process_id=
%
s -- status=
%
s'
),
edx_video_id
,
lang_code
,
file_id
,
translation_status_response
.
status_code
,
)
return
translation_status
=
json
.
loads
(
translation_status_response
.
text
)
if
translation_status
.
get
(
'iserror'
):
LOGGER
.
error
(
(
u'[3PlayMedia Task] unable to get translation status for video=
%
s -- '
u'lang_code=
%
s -- process_id=
%
s -- response=
%
s'
),
edx_video_id
,
lang_code
,
file_id
,
translation_status_response
.
text
,
)
return
return
translation_status
def
retrieve_three_play_translations
():
def
retrieve_three_play_translations
():
"""
"""
Checks translation status on 3PlayMedia for all the progressing processes, fetches them if they're complete.
Checks translation status on 3PlayMedia for all the progressing processes, fetches them if they're complete.
...
@@ -665,63 +805,56 @@ def retrieve_three_play_translations():
...
@@ -665,63 +805,56 @@ def retrieve_three_play_translations():
4. If its done, mark the process as complete, fetch translated transcript, convert to sjson, upload it to s3 and
4. If its done, mark the process as complete, fetch translated transcript, convert to sjson, upload it to s3 and
finally, update it in edx-val.
finally, update it in edx-val.
"""
"""
log_prefix
=
u'3PlayMedia Task'
translation_processes
=
TranscriptProcessMetadata
.
objects
.
filter
(
translation_processes
=
TranscriptProcessMetadata
.
objects
.
filter
(
provider
=
TranscriptProvider
.
THREE_PLAY
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
status
=
TranscriptStatus
.
IN_PROGRESS
,
status
=
TranscriptStatus
.
IN_PROGRESS
,
)
.
exclude
(
Q
(
translation_id__isnull
=
True
)
|
Q
(
translation_id__exact
=
''
))
)
.
exclude
(
Q
(
translation_id__isnull
=
True
)
|
Q
(
translation_id__exact
=
''
))
for
translation_process
in
translation_processes
:
for
translation_process
in
translation_processes
:
log_args
=
(
translation_process
.
video
.
studio_id
,
translation_process
.
lang_code
,
translation_process
.
process_id
,
)
course_id
=
translation_process
.
video
.
inst_class
.
local_storedir
.
split
(
','
)[
0
]
course_id
=
translation_process
.
video
.
inst_class
.
local_storedir
.
split
(
','
)[
0
]
org
=
utils
.
extract_course_org
(
course_id
=
course_id
)
org
=
utils
.
extract_course_org
(
course_id
=
course_id
)
try
:
# Retrieve transcript credentials
three_play_secrets
=
TranscriptCredentials
.
objects
.
get
(
org
=
org
,
provider
=
TranscriptProvider
.
THREE_PLAY
)
three_play_secrets
=
get_transcript_credentials
(
except
TranscriptCredentials
.
DoesNotExist
:
provider
=
TranscriptProvider
.
THREE_PLAY
,
LOGGER
.
exception
(
org
=
org
,
u'[3PlayMedia Task] 3Play secrets not found for video=
%
s -- lang_code=
%
s -- process_id=
%
s'
,
edx_video_id
=
translation_process
.
video
.
studio_id
,
translation_process
.
video
.
studio_id
,
file_id
=
translation_process
.
process_id
,
translation_process
.
lang_code
,
log_prefix
=
log_prefix
translation_process
.
process_id
,
)
)
if
not
three_play_secrets
:
# Fail the process
translation_process
.
update
(
status
=
TranscriptStatus
.
FAILED
)
continue
continue
translation_status_url
=
utils
.
build_url
(
# Check transcript status
THREE_PLAY_TRANSLATION_STATUS_URL
.
format
(
translation_status
=
get_translation_status
(
file_id
=
translation_process
.
process_id
,
three_play_secrets
.
api_key
,
translation_id
=
translation_process
.
translation_id
,
translation_process
.
process_id
,
),
translation_process
.
translation_id
,
apikey
=
three_play_secrets
.
api_key
translation_process
.
video
.
studio_id
,
translation_process
.
lang_code
,
)
)
response
=
requests
.
get
(
translation_status_url
)
if
not
response
.
ok
:
LOGGER
.
error
(
(
u'[3PlayMedia Task] Translation status request failed for video=
%
s -- '
u'lang_code=
%
s -- process_id=
%
s -- status=
%
s'
),
translation_process
.
video
.
studio_id
,
translation_process
.
lang_code
,
translation_process
.
process_id
,
response
.
status_code
,
)
continue
translation_status
=
json
.
loads
(
response
.
text
)
if
not
translation_status
:
if
translation_status
.
get
(
'iserror'
):
# Fail the process
translation_process
.
status
=
TranscriptStatus
.
FAILED
translation_process
.
update
(
status
=
TranscriptStatus
.
FAILED
)
translation_process
.
save
()
LOGGER
.
error
(
(
u'[3PlayMedia Task] unable to get translation status for video=
%
s -- '
u'lang_code=
%
s -- process_id=
%
s -- response=
%
s'
),
translation_process
.
video
.
studio_id
,
translation_process
.
lang_code
,
translation_process
.
process_id
,
response
.
text
,
)
continue
continue
if
translation_status
[
'state'
]
==
'complete'
:
# On a complete translation
if
translation_status
[
'state'
]
==
COMPLETE
:
# 1 - Fetch translation content from 3Play Media.
try
:
try
:
response
=
fetch_srt_data
(
srt_transcript
=
fetch_srt_data
(
url
=
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
.
format
(
url
=
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
.
format
(
file_id
=
translation_process
.
process_id
,
translation_id
=
translation_process
.
translation_id
file_id
=
translation_process
.
process_id
,
translation_id
=
translation_process
.
translation_id
),
),
...
@@ -730,48 +863,37 @@ def retrieve_three_play_translations():
...
@@ -730,48 +863,37 @@ def retrieve_three_play_translations():
except
TranscriptFetchError
:
except
TranscriptFetchError
:
LOGGER
.
exception
(
LOGGER
.
exception
(
u'[3PlayMedia Task] Translation download failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s.'
,
u'[3PlayMedia Task] Translation download failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s.'
,
translation_process
.
video
.
studio_id
,
*
log_args
translation_process
.
lang_code
,
translation_process
.
process_id
)
)
continue
continue
# its going to be SRT content and `json.loads` should raise
# 2 - Validate the translation's SRT content received from 3Play Media.
# ValueError if its a valid response, otherwise it'll be json
is_transcript_valid
=
validate_transcript_response
(
# response in result of an error.
edx_video_id
=
translation_process
.
video
.
studio_id
,
try
:
file_id
=
translation_process
.
process_id
,
json
.
loads
(
response
)
transcript
=
srt_transcript
,
translation_process
.
status
=
TranscriptStatus
.
FAILED
lang_code
=
translation_process
.
lang_code
,
translation_process
.
save
()
log_prefix
=
log_prefix
LOGGER
.
error
(
)
u'[3PlayMedia Task] Translation error for video=
%
s -- lang_code=
%
s -- process_id=
%
s -- response=
%
s'
,
if
is_transcript_valid
:
translation_process
.
video
.
studio_id
,
translation_process
.
update
(
status
=
TranscriptStatus
.
READY
)
translation_process
.
lang_code
,
else
:
translation_process
.
process_id
,
translation_process
.
update
(
status
=
TranscriptStatus
.
FAILED
)
response
.
text
,
)
continue
continue
except
ValueError
:
pass
# We've got the transcript from 3PlayMedia, now update process status accordingly.
translation_process
.
status
=
TranscriptStatus
.
READY
translation_process
.
save
()
# 3 - Convert SRT translation to SJson format and upload it to S3.
try
:
try
:
sjson_transcript
=
convert_srt_to_sjson
(
response
)
sjson_transcript
=
convert_srt_to_sjson
(
srt_transcript
)
sjson_file
=
upload_sjson_to_s3
(
CONFIG
,
sjson_transcript
)
sjson_file
=
upload_sjson_to_s3
(
CONFIG
,
sjson_transcript
)
except
Exception
:
except
Exception
:
# in case of any exception, log and raise.
# in case of any exception, log and raise.
LOGGER
.
exception
(
LOGGER
.
exception
(
u'[3PlayMedia Task] translation failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s'
,
u'[3PlayMedia Task] translation failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s'
,
translation_process
.
video
.
studio_id
,
*
log_args
translation_process
.
lang_code
,
translation_process
.
process_id
,
)
)
raise
raise
# Update edx-val with completed transcript information
#
4
Update edx-val with completed transcript information
val_api
=
VALAPICall
(
video_proto
=
None
,
val_status
=
None
)
val_api
=
VALAPICall
(
video_proto
=
None
,
val_status
=
None
)
val_api
.
update_val_transcript
(
val_api
.
update_val_transcript
(
video_id
=
translation_process
.
video
.
studio_id
,
video_id
=
translation_process
.
video
.
studio_id
,
...
@@ -781,8 +903,8 @@ def retrieve_three_play_translations():
...
@@ -781,8 +903,8 @@ def retrieve_three_play_translations():
provider
=
TranscriptProvider
.
THREE_PLAY
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
)
)
# if all the processes for this video are complete, update video status in edx-val
#
5 -
if all the processes for this video are complete, update video status in edx-val
# update transcript status for video in edx-val
only if all language transcripts are ready
# update transcript status for video in edx-val
as well as edx-video-pipeline.
video_jobs
=
TranscriptProcessMetadata
.
objects
.
filter
(
video__studio_id
=
translation_process
.
video
.
studio_id
)
video_jobs
=
TranscriptProcessMetadata
.
objects
.
filter
(
video__studio_id
=
translation_process
.
video
.
studio_id
)
if
all
(
video_job
.
status
==
TranscriptStatus
.
READY
for
video_job
in
video_jobs
):
if
all
(
video_job
.
status
==
TranscriptStatus
.
READY
for
video_job
in
video_jobs
):
val_api
.
update_video_status
(
translation_process
.
video
.
studio_id
,
VideoStatus
.
TRANSCRIPT_READY
)
val_api
.
update_video_status
(
translation_process
.
video
.
studio_id
,
VideoStatus
.
TRANSCRIPT_READY
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment