Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-video-pipeline
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-video-pipeline
Commits
9bda5dca
Commit
9bda5dca
authored
Oct 16, 2017
by
M. Rehan
Committed by
GitHub
Oct 16, 2017
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #44 from edx/mrehan/3play-translations-endpoint-fix
3Play Media translation process improvements
parents
58a52c63
5441feb6
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
495 additions
and
181 deletions
+495
-181
VEDA_OS01/migrations/0003_auto_20171012_1203.py
+21
-0
VEDA_OS01/models.py
+20
-1
VEDA_OS01/tests/test_transcripts.py
+231
-76
VEDA_OS01/transcripts.py
+222
-100
control/tests/test_video_validation.py
+1
-4
No files found.
VEDA_OS01/migrations/0003_auto_20171012_1203.py
0 → 100644
View file @
9bda5dca
# -*- coding: utf-8 -*-
# Generated by Django 1.9 on 2017-10-12 12:03
from
__future__
import
unicode_literals
from
django.db
import
migrations
,
models
import
django.db.models.deletion
class
Migration
(
migrations
.
Migration
):
dependencies
=
[
(
'VEDA_OS01'
,
'0002_auto_20171009_1054'
),
]
operations
=
[
migrations
.
AlterField
(
model_name
=
'transcriptprocessmetadata'
,
name
=
'video'
,
field
=
models
.
ForeignKey
(
on_delete
=
django
.
db
.
models
.
deletion
.
CASCADE
,
related_name
=
'transcript_processes'
,
to
=
'VEDA_OS01.Video'
),
),
]
VEDA_OS01/models.py
View file @
9bda5dca
...
@@ -6,6 +6,8 @@ import uuid
...
@@ -6,6 +6,8 @@ import uuid
from
django.db
import
models
from
django.db
import
models
from
model_utils.models
import
TimeStampedModel
from
model_utils.models
import
TimeStampedModel
from
opaque_keys
import
InvalidKeyError
from
opaque_keys.edx.keys
import
CourseKey
def
_createHex
():
def
_createHex
():
return
uuid
.
uuid1
()
.
hex
return
uuid
.
uuid1
()
.
hex
...
@@ -380,6 +382,23 @@ class Course (models.Model):
...
@@ -380,6 +382,23 @@ class Course (models.Model):
unique
=
True
unique
=
True
)
)
@property
def
org
(
self
):
"""
Get course organization.
"""
org
=
None
course_runs
=
self
.
local_storedir
if
course_runs
:
course_id
=
course_runs
.
split
(
','
)[
0
]
# Extract course organization.
try
:
org
=
CourseKey
.
from_string
(
course_id
)
.
org
except
InvalidKeyError
:
pass
return
org
def
__unicode__
(
self
):
def
__unicode__
(
self
):
return
u'{institution} {edx_class_id} {course_name}'
.
format
(
return
u'{institution} {edx_class_id} {course_name}'
.
format
(
institution
=
self
.
institution
,
institution
=
self
.
institution
,
...
@@ -660,7 +679,7 @@ class TranscriptProcessMetadata(TimeStampedModel):
...
@@ -660,7 +679,7 @@ class TranscriptProcessMetadata(TimeStampedModel):
"""
"""
Model to contain third party transcript process metadata.
Model to contain third party transcript process metadata.
"""
"""
video
=
models
.
ForeignKey
(
Video
)
video
=
models
.
ForeignKey
(
Video
,
related_name
=
'transcript_processes'
)
provider
=
models
.
CharField
(
'Transcript provider'
,
max_length
=
50
,
choices
=
TranscriptProvider
.
CHOICES
)
provider
=
models
.
CharField
(
'Transcript provider'
,
max_length
=
50
,
choices
=
TranscriptProvider
.
CHOICES
)
process_id
=
models
.
CharField
(
'Process id'
,
max_length
=
255
)
process_id
=
models
.
CharField
(
'Process id'
,
max_length
=
255
)
translation_id
=
models
.
CharField
(
translation_id
=
models
.
CharField
(
...
...
VEDA_OS01/tests/test_transcripts.py
View file @
9bda5dca
...
@@ -354,6 +354,8 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -354,6 +354,8 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
self
.
video
=
Video
.
objects
.
create
(
self
.
video
=
Video
.
objects
.
create
(
inst_class
=
self
.
course
,
inst_class
=
self
.
course
,
source_language
=
self
.
video_source_language
,
source_language
=
self
.
video_source_language
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
transcript_status
=
TranscriptStatus
.
IN_PROGRESS
,
**
VIDEO_DATA
**
VIDEO_DATA
)
)
...
@@ -408,6 +410,32 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -408,6 +410,32 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
)
)
return
response
return
response
def
setup_translations_prereqs
(
self
,
file_id
,
translation_lang_map
,
preferred_languages
):
"""
Sets up pre-requisites for 3Play Media translations retrieval process.
"""
# Update preferred languages.
self
.
video
.
preferred_languages
=
preferred_languages
self
.
video
.
save
()
# Assumes the speech transcript is ready.
TranscriptProcessMetadata
.
objects
.
filter
(
process_id
=
self
.
file_id
,
lang_code
=
self
.
video_source_language
,
)
.
update
(
status
=
TranscriptStatus
.
READY
)
# Create translation processes and set their statuses to 'IN PROGRESS'.
for
target_language
,
translation_id
in
translation_lang_map
.
iteritems
():
# Create translation processes for all the target languages.
TranscriptProcessMetadata
.
objects
.
create
(
video
=
self
.
video
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
process_id
=
file_id
,
translation_id
=
translation_id
,
lang_code
=
target_language
,
status
=
TranscriptStatus
.
IN_PROGRESS
,
)
def
assert_request
(
self
,
received_request
,
expected_request
,
decode_func
):
def
assert_request
(
self
,
received_request
,
expected_request
,
decode_func
):
"""
"""
Verify that `received_request` matches `expected_request`
Verify that `received_request` matches `expected_request`
...
@@ -1086,41 +1114,26 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1086,41 +1114,26 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
# Setup an S3 bucket
# Setup an S3 bucket
connection
=
self
.
setup_s3_bucket
()
connection
=
self
.
setup_s3_bucket
()
# Setup translation
processe
s
# Setup translations
mock_translations
=
{
translations_lang_map
=
{
'ro'
:
'1z2x3c'
,
'ro'
:
'1z2x3c'
,
'da'
:
'1q2w3e'
,
'da'
:
'1q2w3e'
,
}
}
self
.
video
.
preferred_languages
=
[
'en'
,
'ro'
,
'da'
]
self
.
setup_translations_prereqs
(
self
.
video
.
save
()
file_id
=
self
.
file_id
,
translation_lang_map
=
translations_lang_map
,
# Assume the speech transcript is ready.
preferred_languages
=
[
'en'
,
'ro'
,
'da'
]
TranscriptProcessMetadata
.
objects
.
filter
(
)
process_id
=
self
.
file_id
,
lang_code
=
'en'
)
.
update
(
status
=
TranscriptStatus
.
READY
)
# in progress translation processes (which will normally be done by the callback)
for
lang_code
,
translation_id
in
mock_translations
.
iteritems
():
TranscriptProcessMetadata
.
objects
.
create
(
video
=
self
.
video
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
process_id
=
self
.
file_id
,
translation_id
=
translation_id
,
lang_code
=
lang_code
,
status
=
TranscriptStatus
.
IN_PROGRESS
,
)
# Setup mock responses
# Setup mock responses
for
__
,
translation_id
in
mock_translations
.
iteritems
():
translation_status_mock_response
=
[]
responses
.
add
(
for
target_language
,
translation_id
in
translations_lang_map
.
iteritems
():
responses
.
GET
,
translation_status_mock_response
.
append
({
transcripts
.
THREE_PLAY_TRANSLATION_STATUS_URL
.
format
(
'id'
:
translation_id
,
file_id
=
self
.
file_id
,
translation_id
=
translation_id
'source_language_iso_639_1_code'
:
'en'
,
),
'target_language_iso_639_1_code'
:
target_language
,
json
.
dumps
({
'state'
:
'complete'
}),
'state'
:
'complete'
status
=
200
})
)
responses
.
add
(
responses
.
add
(
responses
.
GET
,
responses
.
GET
,
...
@@ -1136,29 +1149,42 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1136,29 +1149,42 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
responses
.
add
(
responses
.
POST
,
CONFIG_DATA
[
'val_transcript_create_url'
],
status
=
200
)
responses
.
add
(
responses
.
POST
,
CONFIG_DATA
[
'val_transcript_create_url'
],
status
=
200
)
responses
.
add
(
responses
.
PATCH
,
CONFIG_DATA
[
'val_video_transcript_status_url'
],
status
=
200
)
responses
.
add
(
responses
.
PATCH
,
CONFIG_DATA
[
'val_video_transcript_status_url'
],
status
=
200
)
responses
.
add
(
responses
.
GET
,
transcripts
.
THREE_PLAY_TRANSLATIONS_METADATA_URL
.
format
(
file_id
=
self
.
file_id
),
json
.
dumps
(
translation_status_mock_response
),
status
=
200
)
# Call to retrieve translations
# Call to retrieve translations
transcripts
.
retrieve_three_play_translations
()
transcripts
.
retrieve_three_play_translations
()
# Total HTTP requests, 4 for first translation and 4 for second translation and 1 for updating video status.
# Total HTTP requests, 1 for retrieving translations metadata, 3 for first translation and
self
.
assertEqual
(
len
(
responses
.
calls
),
9
)
# 3 for second translation and 1 for updating video status.
self
.
assertEqual
(
len
(
responses
.
calls
),
8
)
position
=
0
# Assert that the first request was made for getting translations metadata from 3Play Media.
for
lang_code
,
translation_id
in
mock_translations
.
iteritems
():
expected_video_status_update_request
=
{
'url'
:
utils
.
build_url
(
transcripts
.
THREE_PLAY_TRANSLATIONS_METADATA_URL
.
format
(
file_id
=
self
.
file_id
),
apikey
=
self
.
transcript_prefs
.
api_key
)
}
self
.
assert_request
(
responses
.
calls
[
0
]
.
request
,
expected_video_status_update_request
,
decode_func
=
json
.
loads
,
)
position
=
1
for
lang_code
,
translation_id
in
translations_lang_map
.
iteritems
():
expected_requests
=
[
expected_requests
=
[
# request - 1
# request - 1
{
{
'url'
:
utils
.
build_url
(
transcripts
.
THREE_PLAY_TRANSLATION_STATUS_URL
.
format
(
file_id
=
self
.
file_id
,
translation_id
=
translation_id
),
apikey
=
self
.
transcript_prefs
.
api_key
)
},
# request - 2
{
'url'
:
utils
.
build_url
(
transcripts
.
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
.
format
(
'url'
:
utils
.
build_url
(
transcripts
.
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
.
format
(
file_id
=
self
.
file_id
,
translation_id
=
translation_id
file_id
=
self
.
file_id
,
translation_id
=
translation_id
),
apikey
=
self
.
transcript_prefs
.
api_key
)
),
apikey
=
self
.
transcript_prefs
.
api_key
)
},
},
# request -
3
# request -
2
{
{
'url'
:
CONFIG_DATA
[
'val_token_url'
],
'url'
:
CONFIG_DATA
[
'val_token_url'
],
'body'
:
{
'body'
:
{
...
@@ -1170,7 +1196,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1170,7 +1196,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
},
},
'decode_func'
:
urlparse
.
parse_qs
,
'decode_func'
:
urlparse
.
parse_qs
,
},
},
# request -
4
# request -
3
{
{
'url'
:
CONFIG_DATA
[
'val_transcript_create_url'
],
'url'
:
CONFIG_DATA
[
'val_transcript_create_url'
],
'body'
:
{
'body'
:
{
...
@@ -1240,9 +1266,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1240,9 +1266,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
[
[
{
{
'method'
:
responses
.
GET
,
'method'
:
responses
.
GET
,
'url'
:
transcripts
.
THREE_PLAY_TRANSLATION_STATUS_URL
.
format
(
'url'
:
transcripts
.
THREE_PLAY_TRANSLATIONS_METADATA_URL
.
format
(
file_id
=
'112233'
),
file_id
=
'112233'
,
translation_id
=
'1q2w3e'
),
'body'
:
'Your request was invalid.'
,
'body'
:
'Your request was invalid.'
,
'status'
:
400
,
'status'
:
400
,
}
}
...
@@ -1250,10 +1274,8 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1250,10 +1274,8 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
{
{
'method'
:
'error'
,
'method'
:
'error'
,
'args'
:
(
'args'
:
(
'[3PlayMedia Task] Translation status request failed for video=
%
s -- lang_code=
%
s -- '
'[3PlayMedia Task] Translations metadata request failed for video=
%
s -- process_id=
%
s -- status=
%
s'
,
'process_id=
%
s -- status=
%
s'
,
VIDEO_DATA
[
'studio_id'
],
VIDEO_DATA
[
'studio_id'
],
'ro'
,
'112233'
,
'112233'
,
400
,
400
,
)
)
...
@@ -1265,9 +1287,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1265,9 +1287,7 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
[
[
{
{
'method'
:
responses
.
GET
,
'method'
:
responses
.
GET
,
'url'
:
transcripts
.
THREE_PLAY_TRANSLATION_STATUS_URL
.
format
(
'url'
:
transcripts
.
THREE_PLAY_TRANSLATIONS_METADATA_URL
.
format
(
file_id
=
'112233'
),
file_id
=
'112233'
,
translation_id
=
'1q2w3e'
),
'body'
:
json
.
dumps
({
'iserror'
:
True
}),
'body'
:
json
.
dumps
({
'iserror'
:
True
}),
'status'
:
200
,
'status'
:
200
,
}
}
...
@@ -1275,10 +1295,9 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1275,10 +1295,9 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
{
{
'method'
:
'error'
,
'method'
:
'error'
,
'args'
:
(
'args'
:
(
'[3PlayMedia Task] unable to get translation
status for
'
'[3PlayMedia Task] unable to get translation
s metadata for video=
%
s --
'
'
video=
%
s -- lang_code=
%
s --
process_id=
%
s -- response=
%
s'
,
'process_id=
%
s -- response=
%
s'
,
VIDEO_DATA
[
'studio_id'
],
VIDEO_DATA
[
'studio_id'
],
'ro'
,
'112233'
,
'112233'
,
json
.
dumps
({
'iserror'
:
True
}),
json
.
dumps
({
'iserror'
:
True
}),
)
)
...
@@ -1290,12 +1309,13 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1290,12 +1309,13 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
[
[
{
{
'method'
:
responses
.
GET
,
'method'
:
responses
.
GET
,
'url'
:
transcripts
.
THREE_PLAY_TRANSLATION_STATUS_URL
.
format
(
'url'
:
transcripts
.
THREE_PLAY_TRANSLATIONS_METADATA_URL
.
format
(
file_id
=
'112233'
),
file_id
=
'112233'
,
translation_id
=
'1q2w3e'
'body'
:
json
.
dumps
([{
),
'id'
:
'1q2w3e'
,
'body'
:
json
.
dumps
({
'source_language_iso_639_1_code'
:
'en'
,
'target_language_iso_639_1_code'
:
'ro'
,
'state'
:
'complete'
'state'
:
'complete'
}),
}
]
),
'status'
:
200
,
'status'
:
200
,
},
},
{
{
...
@@ -1324,12 +1344,13 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1324,12 +1344,13 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
[
[
{
{
'method'
:
responses
.
GET
,
'method'
:
responses
.
GET
,
'url'
:
transcripts
.
THREE_PLAY_TRANSLATION_STATUS_URL
.
format
(
'url'
:
transcripts
.
THREE_PLAY_TRANSLATIONS_METADATA_URL
.
format
(
file_id
=
'112233'
),
file_id
=
'112233'
,
translation_id
=
'1q2w3e'
'body'
:
json
.
dumps
([{
),
'id'
:
'1q2w3e'
,
'body'
:
json
.
dumps
({
'source_language_iso_639_1_code'
:
'en'
,
'target_language_iso_639_1_code'
:
'ro'
,
'state'
:
'complete'
'state'
:
'complete'
}),
}
]
),
'status'
:
200
,
'status'
:
200
,
},
},
{
{
...
@@ -1366,17 +1387,10 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1366,17 +1387,10 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
"""
"""
# Setup translation processes
# Setup translation processes
translation_id
=
'1q2w3e'
translation_id
=
'1q2w3e'
self
.
video
.
preferred_languages
=
[
'en'
,
'ro'
]
self
.
setup_translations_prereqs
(
self
.
video
.
save
()
file_id
=
self
.
file_id
,
translation_lang_map
=
{
'ro'
:
translation_id
},
# in progress translation processes (i.e. this was done as a part of callback)
preferred_languages
=
[
'en'
,
'ro'
]
TranscriptProcessMetadata
.
objects
.
create
(
video
=
self
.
video
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
process_id
=
self
.
file_id
,
translation_id
=
translation_id
,
lang_code
=
'ro'
,
status
=
TranscriptStatus
.
IN_PROGRESS
,
)
)
for
response
in
mock_responses
:
for
response
in
mock_responses
:
...
@@ -1391,9 +1405,150 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
...
@@ -1391,9 +1405,150 @@ class ThreePlayTranscriptionCallbackTest(APITestCase):
# Assert the transcript translation process
# Assert the transcript translation process
self
.
assertEqual
(
self
.
assertEqual
(
TranscriptProcessMetadata
.
objects
.
get
(
TranscriptProcessMetadata
.
objects
.
get
(
process_id
=
self
.
file_id
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
process_id
=
self
.
file_id
,
translation_id
=
translation_id
,
lang_code
=
'ro'
lang_code
=
'ro'
)
.
status
,
)
.
status
,
transcript_status
,
transcript_status
,
)
)
@patch
(
'VEDA_OS01.transcripts.LOGGER'
)
@patch
(
'VEDA_OS01.transcripts.convert_srt_to_sjson'
,
Mock
(
side_effect
=
ValueError
))
def
test_translations_retrieval_uncaught_exceptions
(
self
,
mock_logger
):
"""
Test that `convert_to_sjson_and_upload_to_s3` logs and throws any uncaught exceptions
during translation retrieval process.
"""
with
self
.
assertRaises
(
ValueError
):
transcripts
.
convert_to_sjson_and_upload_to_s3
(
srt_transcript
=
'invalid SRT content}'
,
edx_video_id
=
self
.
video
.
studio_id
,
file_id
=
self
.
file_id
,
target_language
=
'es'
)
mock_logger
.
exception
.
assert_called_with
(
u'[3PlayMedia Task] translation failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s'
,
self
.
video
.
studio_id
,
self
.
file_id
,
'es'
,
)
@patch
(
'VEDA_OS01.transcripts.LOGGER'
)
def
test_translations_retrieval_with_zero_translation_process
(
self
,
mock_logger
):
"""
Tests the translations retrieval when a video doesn't have any 'in progress' translation processes.
"""
# Try fetching translations
transcripts
.
retrieve_three_play_translations
()
# Assert the logs
mock_logger
.
info
.
assert_called_with
(
'[3PlayMedia Task] video=
%
s does not have any translation process who is in progress.'
,
self
.
video
.
studio_id
,
)
@patch
(
'VEDA_OS01.transcripts.LOGGER'
)
def
test_translations_retrieval_no_credentials
(
self
,
mock_logger
):
"""
Tests the the translations retrieval when 3Play Media credentials are deleted from the data model.
"""
translation_id
=
'1q2w3e'
self
.
setup_translations_prereqs
(
file_id
=
self
.
file_id
,
translation_lang_map
=
{
'ro'
:
translation_id
},
preferred_languages
=
[
'en'
,
'ro'
]
)
# Delete transcript credentials
TranscriptCredentials
.
objects
.
all
()
.
delete
()
# Try fetching translations
transcripts
.
retrieve_three_play_translations
()
# assert the exception logs
mock_logger
.
exception
.
assert_called_with
(
'[
%
s] Unable to get transcript secrets for org=
%
s, edx_video_id=
%
s, file_id=
%
s.'
,
'3PlayMedia Task'
,
self
.
org
,
self
.
video
.
studio_id
,
self
.
file_id
,
)
# assert the translation process status
process
=
TranscriptProcessMetadata
.
objects
.
get
(
provider
=
TranscriptProvider
.
THREE_PLAY
,
process_id
=
self
.
file_id
,
translation_id
=
translation_id
,
lang_code
=
'ro'
)
self
.
assertEqual
(
process
.
status
,
TranscriptStatus
.
FAILED
)
@responses.activate
@patch
(
'VEDA_OS01.transcripts.LOGGER'
)
def
test_translations_retrieval_with_removed_translation_process
(
self
,
mock_logger
):
"""
Tests the translations retrieval when a tracking translation process is not there or deleted.
"""
translation_id
=
'1q2w3e'
non_existent_target_language
=
'es'
self
.
setup_translations_prereqs
(
file_id
=
self
.
file_id
,
translation_lang_map
=
{
'ro'
:
translation_id
},
preferred_languages
=
[
'en'
,
'ro'
]
)
# We get Translations metadata for a language whose tracking process is no more in pipeline.
responses
.
add
(
method
=
responses
.
GET
,
url
=
transcripts
.
THREE_PLAY_TRANSLATIONS_METADATA_URL
.
format
(
file_id
=
'112233'
),
body
=
json
.
dumps
([{
'id'
:
translation_id
,
'source_language_iso_639_1_code'
:
'en'
,
'target_language_iso_639_1_code'
:
non_existent_target_language
,
'state'
:
'complete'
}]),
status
=
200
)
# Try fetching translations
transcripts
.
retrieve_three_play_translations
()
mock_logger
.
warning
.
assert_called_with
(
(
u'[3PlayMedia Task] Tracking process is either not found or already complete '
u'-- process_id=
%
s -- target_language=
%
s -- translation_id=
%
s.'
),
'112233'
,
non_existent_target_language
,
translation_id
,
)
@data
(
None
,
'invalid_course_id_1, invalid_course_id_2'
)
@patch
(
'VEDA_OS01.transcripts.LOGGER'
)
def
test_translation_retrieval_with_invalid_course_id
(
self
,
course_runs
,
mock_logger
):
"""
Tests the translations retrieval when an associated course does not have course ids or
have some invalid course ids.
Note:
Its insane for a course to not to have course id but we have to do as
`Course.local_storedir` is null=True, blank=True.
"""
self
.
setup_translations_prereqs
(
file_id
=
self
.
file_id
,
translation_lang_map
=
{
'ro'
:
'1q2w3e'
},
preferred_languages
=
[
'en'
,
'ro'
]
)
# Make our course to not to have course ids.
self
.
course
.
local_storedir
=
course_runs
self
.
course
.
save
()
# Now, Try fetching translations
transcripts
.
retrieve_three_play_translations
()
mock_logger
.
exception
.
assert_called_with
(
u'[
%
s] Unable to get transcript secrets for org=
%
s, edx_video_id=
%
s, file_id=
%
s.'
,
'3PlayMedia Task'
,
None
,
self
.
edx_video_id
,
self
.
file_id
,
)
VEDA_OS01/transcripts.py
View file @
9bda5dca
...
@@ -21,7 +21,7 @@ from rest_framework.views import APIView
...
@@ -21,7 +21,7 @@ from rest_framework.views import APIView
from
control.veda_val
import
VALAPICall
from
control.veda_val
import
VALAPICall
from
VEDA_OS01
import
utils
from
VEDA_OS01
import
utils
from
VEDA_OS01.models
import
(
TranscriptCredentials
,
TranscriptProcessMetadata
,
from
VEDA_OS01.models
import
(
TranscriptCredentials
,
TranscriptProcessMetadata
,
TranscriptProvider
,
TranscriptStatus
)
TranscriptProvider
,
TranscriptStatus
,
Video
)
requests
.
packages
.
urllib3
.
disable_warnings
(
InsecurePlatformWarning
)
requests
.
packages
.
urllib3
.
disable_warnings
(
InsecurePlatformWarning
)
...
@@ -62,9 +62,9 @@ THREE_PLAY_ORDER_TRANSLATION_URL = utils.build_url(
...
@@ -62,9 +62,9 @@ THREE_PLAY_ORDER_TRANSLATION_URL = utils.build_url(
CONFIG
[
'three_play_api_base_url'
],
CONFIG
[
'three_play_api_base_url'
],
'files/{file_id}/translations/order'
'files/{file_id}/translations/order'
)
)
THREE_PLAY_TRANSLATION
_STATUS
_URL
=
utils
.
build_url
(
THREE_PLAY_TRANSLATION
S_METADATA
_URL
=
utils
.
build_url
(
CONFIG
[
'three_play_api_transcript_url'
],
CONFIG
[
'three_play_api_transcript_url'
],
'files/{file_id}/translations
/{translation_id}
'
'files/{file_id}/translations'
)
)
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
=
utils
.
build_url
(
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
=
utils
.
build_url
(
CONFIG
[
'three_play_api_transcript_url'
],
CONFIG
[
'three_play_api_transcript_url'
],
...
@@ -790,133 +790,200 @@ def three_play_transcription_callback(sender, **kwargs):
...
@@ -790,133 +790,200 @@ def three_play_transcription_callback(sender, **kwargs):
)
)
def
get_translation
_status
(
api_key
,
file_id
,
translation_id
,
edx_video_id
,
lang_code
):
def
get_translation
s_metadata
(
api_key
,
file_id
,
edx_video_id
):
"""
"""
Get translation
status for a translation process from 3Play Media
.
Get translation
s metadata from 3Play Media for a given file id
.
Arguments:
Arguments:
api_key(unicode): api key
api_key(unicode): api key
file_id(unicode): file identifier or process identifier
file_id(unicode): file identifier or process identifier
translation_id(unicode): translation identifier associated with that file identifier
edx_video_id(unicode): video studio identifier
edx_video_id(unicode): video studio identifier
lang_code(unicode): language code
Returns:
Returns:
A translation status retrieved from 3play media or None in case of a faulty response.
A List containing the translations metadata for a file id or None
in case of a faulty response.
Example:
[
{
"id": 1234,
"translation_service_id": 12,
"source_language_name": "English",
"source_language_iso_639_1_code": "en",
"target_language_name": "French (Canada)",
"target_language_iso_639_1_code": "fr",
"state": "complete"
},
{
"id": 1345,
"translation_service_id": 32,
"source_language_name": "English",
"source_language_iso_639_1_code": "en",
"target_language_name": "German",
"target_language_iso_639_1_code": "de",
"state": "in_progress"
}
]
"""
"""
translation
_status
_url
=
utils
.
build_url
(
translation
s_metadata
_url
=
utils
.
build_url
(
THREE_PLAY_TRANSLATION
_STATUS
_URL
.
format
(
THREE_PLAY_TRANSLATION
S_METADATA
_URL
.
format
(
file_id
=
file_id
,
file_id
=
file_id
,
translation_id
=
translation_id
,
),
),
apikey
=
api_key
apikey
=
api_key
)
)
translation
_status_response
=
requests
.
get
(
translation_status
_url
)
translation
s_metadata_response
=
requests
.
get
(
translations_metadata
_url
)
if
not
translation
_status
_response
.
ok
:
if
not
translation
s_metadata
_response
.
ok
:
LOGGER
.
error
(
LOGGER
.
error
(
(
u'[3PlayMedia Task] Translation status request failed for video=
%
s -- '
u'[3PlayMedia Task] Translations metadata request failed for video=
%
s -- process_id=
%
s -- status=
%
s'
,
u'lang_code=
%
s -- process_id=
%
s -- status=
%
s'
),
edx_video_id
,
edx_video_id
,
lang_code
,
file_id
,
file_id
,
translation
_status
_response
.
status_code
,
translation
s_metadata
_response
.
status_code
,
)
)
return
return
translation
_status
=
json
.
loads
(
translation_status
_response
.
text
)
translation
s
=
json
.
loads
(
translations_metadata
_response
.
text
)
if
translation_status
.
get
(
'iserror'
):
if
not
isinstance
(
translations
,
list
):
LOGGER
.
error
(
LOGGER
.
error
(
(
u'[3PlayMedia Task] unable to get translation status for video=
%
s -- '
u'[3PlayMedia Task] unable to get translations metadata for video=
%
s -- process_id=
%
s -- response=
%
s'
,
u'lang_code=
%
s -- process_id=
%
s -- response=
%
s'
),
edx_video_id
,
edx_video_id
,
lang_code
,
file_id
,
file_id
,
translation
_status
_response
.
text
,
translation
s_metadata
_response
.
text
,
)
)
return
return
return
translation
_statu
s
return
translations
def
retrieve_three_play_translations
(
):
def
get_in_progress_translation_processes
(
video
):
"""
"""
Checks translation status on 3PlayMedia for all the progressing processes, fetches them if they're complete.
Retrieves 'IN PROGRESS' translation tracking processes associated to a Video.
Retrieval flow:
1. Fetches 3PlayMedia translation processes whose status is `in progress`
2. For each process, retrieve the org-wide api keys
3. Check translation status through 3PlayMedia
4. If its done, mark the process as complete, fetch translated transcript, convert to sjson, upload it to s3 and
finally, update it in edx-val.
"""
"""
log_prefix
=
u'3PlayMedia Task'
translation_processes
=
video
.
transcript_processes
.
filter
(
translation_processes
=
TranscriptProcessMetadata
.
objects
.
filter
(
provider
=
TranscriptProvider
.
THREE_PLAY
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
status
=
TranscriptStatus
.
IN_PROGRESS
,
status
=
TranscriptStatus
.
IN_PROGRESS
,
)
.
exclude
(
Q
(
translation_id__isnull
=
True
)
|
Q
(
translation_id__exact
=
''
))
)
.
exclude
(
Q
(
translation_id__isnull
=
True
)
|
Q
(
translation_id__exact
=
''
)
)
return
translation_processes
for
translation_process
in
translation_processes
:
log_args
=
(
def
get_in_progress_translation_process
(
processes
,
file_id
,
translation_id
,
target_language
):
translation_process
.
video
.
studio_id
,
"""
translation_process
.
lang_code
,
Returns a single translation process from the given Processes.
translation_process
.
process_id
,
"""
translation_process
=
None
try
:
translation_process
=
processes
.
filter
(
translation_id
=
translation_id
,
lang_code
=
target_language
,
process_id
=
file_id
)
.
latest
()
except
TranscriptProcessMetadata
.
DoesNotExist
:
LOGGER
.
warning
(
(
u'[3PlayMedia Task] Tracking process is either not found or already complete -- process_id=
%
s -- '
u'target_language=
%
s -- translation_id=
%
s.'
),
file_id
,
target_language
,
translation_id
)
)
course_id
=
translation_process
.
video
.
inst_class
.
local_storedir
.
split
(
','
)[
0
]
return
translation_process
org
=
utils
.
extract_course_org
(
course_id
=
course_id
)
# Retrieve transcript credentials
three_play_secrets
=
get_transcript_credentials
(
def
get_transcript_content_from_3play_media
(
api_key
,
edx_video_id
,
file_id
,
translation_id
,
target_language
):
provider
=
TranscriptProvider
.
THREE_PLAY
,
"""
org
=
org
,
Get transcript content from 3Play Media in SRT format.
edx_video_id
=
translation_process
.
video
.
studio_id
,
"""
file_id
=
translation_process
.
process_id
,
srt_transcript
=
None
log_prefix
=
log_prefix
try
:
transcript_url
=
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
.
format
(
file_id
=
file_id
,
translation_id
=
translation_id
)
srt_transcript
=
fetch_srt_data
(
url
=
transcript_url
,
apikey
=
api_key
)
except
TranscriptFetchError
:
LOGGER
.
exception
(
u'[3PlayMedia Task] Translation download failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s.'
,
edx_video_id
,
target_language
,
file_id
,
)
)
if
not
three_play_secrets
:
# Fail the process
translation_process
.
update
(
status
=
TranscriptStatus
.
FAILED
)
continue
# Check transcript status
return
srt_transcript
translation_status
=
get_translation_status
(
three_play_secrets
.
api_key
,
translation_process
.
process_id
,
def
convert_to_sjson_and_upload_to_s3
(
srt_transcript
,
edx_video_id
,
file_id
,
target_language
):
translation_process
.
translation_id
,
"""
translation_process
.
video
.
studio_id
,
Converts SRT content to sjson format, upload it to S3 and returns an S3 file path of the uploaded file.
translation_process
.
lang_code
,
Raises:
Logs and raises any unexpected Exception.
"""
try
:
sjson_transcript
=
convert_srt_to_sjson
(
srt_transcript
)
sjson_file
=
upload_sjson_to_s3
(
CONFIG
,
sjson_transcript
)
except
Exception
:
# in case of any exception, log and raise.
LOGGER
.
exception
(
u'[3PlayMedia Task] translation failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s'
,
edx_video_id
,
file_id
,
target_language
,
)
)
raise
if
not
translation_status
:
return
sjson_file
# Fail the process
translation_process
.
update
(
status
=
TranscriptStatus
.
FAILED
)
continue
# On a complete translation
if
translation_status
[
'state'
]
==
COMPLETE
:
# 1 - Fetch translation content from 3Play Media.
def
handle_video_translations
(
video
,
translations
,
file_id
,
api_key
,
log_prefix
):
try
:
"""
srt_transcript
=
fetch_srt_data
(
It is a sub-module of `retrieve_three_play_translations` to handle
url
=
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
.
format
(
all the completed translations for a single video.
file_id
=
translation_process
.
process_id
,
translation_id
=
translation_process
.
translation_id
),
Arguments:
apikey
=
three_play_secrets
.
api_key
,
video: Video data object whose translations need to be handled here.
)
translations: A list containing translations metadata information received from 3play Media.
except
TranscriptFetchError
:
file_id: It is file identifier that is assigned to a Video by 3Play Media.
LOGGER
.
exception
(
api_key: An api key to communicate to the 3Play Media.
u'[3PlayMedia Task] Translation download failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s.'
,
log_prefix: A logging prefix used by the main process.
*
log_args
)
Steps include:
- Fetch translated transcript content from 3Play Media.
- Validate the content of received translated transcript.
- Convert translated SRT transcript to SJson format and upload it to S3.
- Update edx-val for a completed transcript.
- update transcript status for video in edx-val as well as edx-video-pipeline.
"""
video_translation_processes
=
get_in_progress_translation_processes
(
video
)
for
translation_metadata
in
translations
:
translation_id
=
translation_metadata
[
'id'
]
translation_state
=
translation_metadata
[
'state'
]
target_language
=
translation_metadata
[
'target_language_iso_639_1_code'
]
if
translation_state
==
COMPLETE
:
# Fetch the corresponding tracking process.
translation_process
=
get_in_progress_translation_process
(
video_translation_processes
,
file_id
=
file_id
,
translation_id
=
translation_id
,
target_language
=
target_language
)
if
translation_process
is
None
:
continue
# 1 - Fetch translated transcript content from 3Play Media.
srt_transcript
=
get_transcript_content_from_3play_media
(
api_key
=
api_key
,
edx_video_id
=
video
.
studio_id
,
file_id
=
file_id
,
translation_id
=
translation_id
,
target_language
=
target_language
,
)
if
srt_transcript
is
None
:
continue
continue
# 2 - Validate the
translation's SRT content received from 3Play Media
.
# 2 - Validate the
content of received translated transcript
.
is_transcript_valid
=
validate_transcript_response
(
is_transcript_valid
=
validate_transcript_response
(
edx_video_id
=
translation_process
.
video
.
studio_id
,
edx_video_id
=
video
.
studio_id
,
file_id
=
translation_process
.
process
_id
,
file_id
=
file
_id
,
transcript
=
srt_transcript
,
transcript
=
srt_transcript
,
lang_code
=
t
ranslation_process
.
lang_cod
e
,
lang_code
=
t
arget_languag
e
,
log_prefix
=
log_prefix
log_prefix
=
log_prefix
)
)
if
is_transcript_valid
:
if
is_transcript_valid
:
...
@@ -926,33 +993,88 @@ def retrieve_three_play_translations():
...
@@ -926,33 +993,88 @@ def retrieve_three_play_translations():
continue
continue
# 3 - Convert SRT translation to SJson format and upload it to S3.
# 3 - Convert SRT translation to SJson format and upload it to S3.
try
:
sjson_file
=
convert_to_sjson_and_upload_to_s3
(
sjson_transcript
=
convert_srt_to_sjson
(
srt_transcript
)
srt_transcript
=
srt_transcript
,
sjson_file
=
upload_sjson_to_s3
(
CONFIG
,
sjson_transcript
)
target_language
=
target_language
,
except
Exception
:
edx_video_id
=
video
.
studio_id
,
# in case of any exception, log and raise.
file_id
=
file_id
,
LOGGER
.
exception
(
)
u'[3PlayMedia Task] translation failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s'
,
*
log_args
)
raise
# 4 Update edx-val with completed transcript information
# 4 Update edx-val with completed transcript information
val_api
=
VALAPICall
(
video_proto
=
None
,
val_status
=
None
)
val_api
=
VALAPICall
(
video_proto
=
None
,
val_status
=
None
)
val_api
.
update_val_transcript
(
val_api
.
update_val_transcript
(
video_id
=
translation_process
.
video
.
studio_id
,
video_id
=
video
.
studio_id
,
lang_code
=
t
ranslation_process
.
lang_cod
e
,
lang_code
=
t
arget_languag
e
,
name
=
sjson_file
,
name
=
sjson_file
,
transcript_format
=
TRANSCRIPT_SJSON
,
transcript_format
=
TRANSCRIPT_SJSON
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
)
)
# 5 - if all the processes for this video are complete, update
video status in edx-val
# 5 - if all the processes for this video are complete, update
transcript status
#
update transcript status
for video in edx-val as well as edx-video-pipeline.
# for video in edx-val as well as edx-video-pipeline.
video_jobs
=
TranscriptProcessMetadata
.
objects
.
filter
(
video
__studio_id
=
translation_process
.
video
.
studio_id
)
video_jobs
=
TranscriptProcessMetadata
.
objects
.
filter
(
video
=
video
)
if
all
(
video_job
.
status
==
TranscriptStatus
.
READY
for
video_job
in
video_jobs
):
if
all
(
video_job
.
status
==
TranscriptStatus
.
READY
for
video_job
in
video_jobs
):
utils
.
update_video_status
(
utils
.
update_video_status
(
val_api_client
=
val_api
,
val_api_client
=
val_api
,
video
=
translation_process
.
video
,
video
=
video
,
status
=
TranscriptStatus
.
READY
status
=
TranscriptStatus
.
READY
)
)
def
retrieve_three_play_translations
():
"""
Checks translation status on 3PlayMedia for all the progressing processes, fetches them if they're complete.
Retrieval flow:
1. Fetches 3PlayMedia translation processes whose status is `in progress`
2. For each process, retrieve the org-wide api keys
3. Check translation status through 3PlayMedia
4. If its done, mark the process as complete, fetch translated transcript, convert to sjson, upload it to s3 and
finally, update it in edx-val.
"""
log_prefix
=
u'3PlayMedia Task'
candidate_videos
=
Video
.
objects
.
filter
(
provider
=
TranscriptProvider
.
THREE_PLAY
,
transcript_status
=
TranscriptStatus
.
IN_PROGRESS
,
)
for
video
in
candidate_videos
:
# For a video, fetch its in progress translation processes.
in_progress_translation_processes
=
get_in_progress_translation_processes
(
video
)
if
not
in_progress_translation_processes
.
exists
():
LOGGER
.
info
(
'[3PlayMedia Task] video=
%
s does not have any translation process who is in progress.'
,
video
.
studio_id
,
)
continue
# Process id remains same across all the processes of a video and its also referred as `file_id`.
file_id
=
in_progress_translation_processes
.
first
()
.
process_id
# Retrieve transcript credentials
three_play_secrets
=
get_transcript_credentials
(
provider
=
TranscriptProvider
.
THREE_PLAY
,
org
=
video
.
inst_class
.
org
,
edx_video_id
=
video
.
studio_id
,
file_id
=
file_id
,
log_prefix
=
log_prefix
)
if
not
three_play_secrets
:
in_progress_translation_processes
.
update
(
status
=
TranscriptStatus
.
FAILED
)
continue
# Retrieve Translations metadata to check the status for each translation.
translations
=
get_translations_metadata
(
api_key
=
three_play_secrets
.
api_key
,
file_id
=
file_id
,
edx_video_id
=
video
.
studio_id
,
)
if
translations
is
None
:
in_progress_translation_processes
.
update
(
status
=
TranscriptStatus
.
FAILED
)
continue
handle_video_translations
(
video
=
video
,
translations
=
translations
,
file_id
=
file_id
,
api_key
=
three_play_secrets
.
api_key
,
log_prefix
=
log_prefix
,
)
control/tests/test_video_validation.py
View file @
9bda5dca
...
@@ -29,10 +29,7 @@ class TestValidation(TestCase):
...
@@ -29,10 +29,7 @@ class TestValidation(TestCase):
videofile
=
self
.
videofile
videofile
=
self
.
videofile
)
)
@unittest.skipIf
(
@unittest.skip
(
'Skipping this test due to unavailability of required ffprobe version.'
)
'TRAVIS'
in
os
.
environ
and
os
.
environ
[
'TRAVIS'
]
==
'true'
,
'Skipping this test on Travis CI due to unavailability of required ffprobe version.'
)
def
test_validation
(
self
):
def
test_validation
(
self
):
"""
"""
Check a known file for validity
Check a known file for validity
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment