Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-video-pipeline
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-video-pipeline
Commits
6c87fa83
Commit
6c87fa83
authored
Aug 28, 2017
by
Qubad786
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Integrate 3PlayMedia Transcription API
parent
1b69155e
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
752 additions
and
41 deletions
+752
-41
VEDA/urls.py
+6
-0
VEDA_OS01/models.py
+2
-0
VEDA_OS01/transcripts.py
+523
-0
control/celeryapp.py
+14
-2
control/veda_deliver.py
+42
-39
control/veda_deliver_3play.py
+165
-0
No files found.
VEDA/urls.py
View file @
6c87fa83
...
...
@@ -39,4 +39,10 @@ urlpatterns = [
view
=
transcripts
.
Cielo24CallbackHandlerView
.
as_view
(),
name
=
'cielo24_transcript_completed'
),
# 3PlayMedia callback handler view
url
(
regex
=
r'^3playmedia/transcripts/handle/(?P<token>[\w]+)$'
,
view
=
transcripts
.
ThreePlayMediaCallbackHandlerView
.
as_view
(),
name
=
'3play_media_callback'
)
]
VEDA_OS01/models.py
View file @
6c87fa83
...
...
@@ -647,6 +647,8 @@ class TranscriptProcessMetadata(TimeStampedModel):
video
=
models
.
ForeignKey
(
Video
)
provider
=
models
.
CharField
(
'Transcript provider'
,
max_length
=
50
,
choices
=
TranscriptProvider
.
CHOICES
)
process_id
=
models
.
CharField
(
'Process id'
,
max_length
=
255
)
# To keep track of 3Play Translations.
translation_id
=
models
.
CharField
(
'Translation id'
,
max_length
=
255
,
null
=
True
,
blank
=
True
)
lang_code
=
models
.
CharField
(
'Language code'
,
max_length
=
3
)
status
=
models
.
CharField
(
'Transcript status'
,
...
...
VEDA_OS01/transcripts.py
View file @
6c87fa83
...
...
@@ -9,9 +9,11 @@ import boto
import
django.dispatch
import
requests
from
boto.s3.key
import
Key
from
django.db.models
import
Q
from
pysrt
import
SubRipFile
from
requests.packages.urllib3.exceptions
import
InsecurePlatformWarning
from
rest_framework
import
status
from
rest_framework.parsers
import
FormParser
from
rest_framework.permissions
import
AllowAny
from
rest_framework.response
import
Response
from
rest_framework.views
import
APIView
...
...
@@ -26,11 +28,34 @@ requests.packages.urllib3.disable_warnings(InsecurePlatformWarning)
logging
.
basicConfig
()
LOGGER
=
logging
.
getLogger
(
__name__
)
VALID_TRANSCRIPT_PROVIDERS
=
[
''
,
''
,
]
# 3PlayMedia possible send-along statuses for a transcription callback.
COMPLETE
=
'complete'
ERROR
=
'error'
# Transcript format
TRANSCRIPT_SJSON
=
'sjson'
CIELO24_TRANSCRIPT_COMPLETED
=
django
.
dispatch
.
Signal
(
providing_args
=
[
'job_id'
,
'lang_code'
,
'org'
,
'video_id'
])
CIELO24_GET_CAPTION_URL
=
'https://api.cielo24.com/api/job/get_caption'
CONFIG
=
utils
.
get_config
()
# 3PlayMedia callback signal
THREE_PLAY_TRANSCRIPTION_DONE
=
django
.
dispatch
.
Signal
(
providing_args
=
[
'org'
,
'lang_code'
,
'edx_video_id'
,
'file_id'
,
'status'
,
'error_description'
]
)
# 3PlayMedia API URLs.
THREE_PLAY_TRANSCRIPT_URL
=
u'https://static.3playmedia.com/files/{file_id}/transcript.srt'
THREE_PLAY_TRANSLATION_SERVICES_URL
=
u'https://static.3playmedia.com/translation_services'
THREE_PLAY_ORDER_TRANSLATION_URL
=
u'https://api.3playmedia.com/files/{file_id}/translations/order'
THREE_PLAY_TRANSLATION_STATUS_URL
=
u'https://static.3playmedia.com/files/{file_id}/translations/{translation_id}'
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
=
(
u'https://static.3playmedia.com/files/{file_id}/translations/{translation_id}/'
u'captions.srt'
)
class
TranscriptError
(
Exception
):
"""
...
...
@@ -46,6 +71,13 @@ class TranscriptFetchError(TranscriptError):
pass
class
TranscriptTranslationError
(
TranscriptError
):
"""
An error occurred during the translation attempt on 3PlayMedia.
"""
pass
class
TranscriptConversionError
(
TranscriptError
):
"""
An error occurred during srt to sjson conversion.
...
...
@@ -266,3 +298,494 @@ def upload_sjson_to_s3(config, sjson_data):
)
k
.
set_contents_from_string
(
json
.
dumps
(
sjson_data
))
return
k
.
key
class
ThreePlayMediaCallbackHandlerView
(
APIView
):
"""
View to handle 3PlayMedia callback requests.
"""
parser_classes
=
(
FormParser
,)
permission_classes
=
(
AllowValidTranscriptProvider
,)
def
post
(
self
,
request
,
**
kwargs
):
"""
Handle 3PlayMedia callback request.
"""
required_attrs
=
[
'file_id'
,
'status'
,
'org'
,
'edx_video_id'
]
received_attributes
=
request
.
data
.
keys
()
+
request
.
query_params
.
keys
()
missing
=
[
attr
for
attr
in
required_attrs
if
attr
not
in
received_attributes
]
if
missing
:
LOGGER
.
warning
(
u'[3PlayMedia Callback] process_id=
%
s Received Attributes=
%
s Missing Attributes=
%
s'
,
request
.
data
.
get
(
'file_id'
),
received_attributes
,
missing
,
)
return
Response
(
status
=
status
.
HTTP_200_OK
)
# Dispatch 3playMedia transcription signal
THREE_PLAY_TRANSCRIPTION_DONE
.
send_robust
(
sender
=
self
,
org
=
request
.
query_params
[
'org'
],
edx_video_id
=
request
.
query_params
[
'edx_video_id'
],
lang_code
=
'en'
,
file_id
=
request
.
data
[
'file_id'
],
status
=
request
.
data
[
'status'
],
# Following is going to be an error description if an error occurs during
# 3playMedia transcription process
error_description
=
request
.
data
.
get
(
'error_description'
),
)
return
Response
(
status
=
status
.
HTTP_200_OK
)
def
order_translations
(
file_id
,
api_key
,
api_secret
,
target_languages
):
"""
Order translations on 3PlayMedia for all the target languages.
Process:
* Fetch all the pending translations process for a file
* Fetch all the translation services from 3PlayMedia
* For each process,
- Find suitable translation service
- Order translation from that service
- Move the process to `in progress` and update it with the
translation id received from 3Play.
Arguments:
file_id(unicode): File identifier
api_key(unicode): API key
api_secret(unicode): API Secret
target_languages(list): List of language codes
Raises:
TranscriptTranslationError: when an error occurred while fetching the translation services.
"""
translation_processes
=
TranscriptProcessMetadata
.
objects
.
filter
(
process_id
=
file_id
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
status
=
TranscriptStatus
.
PENDING
,
lang_code__in
=
target_languages
,
)
response
=
requests
.
get
(
utils
.
build_url
(
THREE_PLAY_TRANSLATION_SERVICES_URL
,
apikey
=
api_key
))
if
not
response
.
ok
:
# Fail all the pending translation processes associated with this file id.
translation_processes
.
update
(
status
=
TranscriptStatus
.
FAILED
)
raise
TranscriptTranslationError
(
u'[3PlayMedia Callback] Error while fetching the translation services -- {status}, {response}'
.
format
(
status
=
response
.
status_code
,
response
=
response
.
text
,
)
)
# Response should be a list containing services, details:
# http://support.3playmedia.com/hc/en-us/articles/227729988-Translations-API-Methods
available_services
=
json
.
loads
(
response
.
text
)
if
not
isinstance
(
available_services
,
list
):
# Fail all the pending translation processes associated with this file id.
translation_processes
.
update
(
status
=
TranscriptStatus
.
FAILED
)
raise
TranscriptTranslationError
(
u'[3PlayMedia Callback] Expected list but got: -- {response}.'
.
format
(
response
=
response
.
text
,
)
)
for
target_language
in
target_languages
:
try
:
translation_process
=
translation_processes
.
filter
(
lang_code
=
target_language
)
.
latest
()
except
TranscriptProcessMetadata
.
DoesNotExist
:
LOGGER
.
warning
(
u'[3PlayMedia Callback] process not found for target language
%
s -- process id
%
s'
,
target_language
,
file_id
,
)
continue
# 1 - Find a standard service for translation in the target language.
translation_service_id
=
None
for
service
in
available_services
:
service_found
=
(
service
[
'target_language_iso_639_1_code'
]
==
target_language
and
service
[
'service_level'
]
==
'standard'
)
if
service_found
:
translation_service_id
=
service
[
'id'
]
break
if
translation_service_id
is
None
:
# Fail the process
translation_process
.
status
=
TranscriptStatus
.
FAILED
translation_process
.
save
()
LOGGER
.
error
(
'[3PlayMedia Callback] No translation service found for target language
%
s -- process id
%
s'
,
target_language
,
file_id
,
)
continue
# 2 - At this point, we've got our service ready to use. Now, place an order for the translation.
response
=
requests
.
post
(
THREE_PLAY_ORDER_TRANSLATION_URL
.
format
(
file_id
=
file_id
),
data
=
{
'apikey'
:
api_key
,
'api_secret_key'
:
api_secret
,
'translation_service_id'
:
translation_service_id
,
})
if
not
response
.
ok
:
# Fail the process
translation_process
.
status
=
TranscriptStatus
.
FAILED
translation_process
.
save
()
LOGGER
.
error
(
'[3PlayMedia Callback] An error occurred during translation, target language=
%
s, file_id=
%
s, status=
%
s'
,
target_language
,
file_id
,
response
.
status_code
,
)
continue
# Translation Order API returns `success` attribute specifying whether the order has been placed
# successfully: http://support.3playmedia.com/hc/en-us/articles/227729988-Translations-API-Methods
translation_order
=
json
.
loads
(
response
.
text
)
if
translation_order
.
get
(
'success'
):
translation_process
.
status
=
TranscriptStatus
.
IN_PROGRESS
translation_process
.
translation_id
=
translation_order
[
'translation_id'
]
translation_process
.
save
()
else
:
translation_process
.
status
=
TranscriptStatus
.
FAILED
translation_process
.
save
()
LOGGER
.
error
(
'[3PlayMedia Callback] Translation failed fot target language=
%
s, file_id=
%
s, response=
%
s'
,
target_language
,
file_id
,
response
.
text
,
)
@django.dispatch.receiver
(
THREE_PLAY_TRANSCRIPTION_DONE
,
dispatch_uid
=
"three_play_transcription_done"
)
def
three_play_transcription_callback
(
sender
,
**
kwargs
):
"""
Arguments:
sender: sender of the signal
kwargs(dict): video transcription metadata
Process:
* download transcript(SRT) from 3PlayMedia
* convert SRT to SJSON
* upload SJSON to AWS S3
* order translations for all the preferred languages
* update transcript status in VAL
"""
# Extract all the must have attributes
org
=
kwargs
[
'org'
]
edx_video_id
=
kwargs
[
'edx_video_id'
]
lang_code
=
kwargs
[
'lang_code'
]
file_id
=
kwargs
[
'file_id'
]
state
=
kwargs
[
'status'
]
try
:
process
=
TranscriptProcessMetadata
.
objects
.
filter
(
provider
=
TranscriptProvider
.
THREE_PLAY
,
process_id
=
file_id
,
lang_code
=
lang_code
,
)
.
latest
()
except
TranscriptProcessMetadata
.
DoesNotExist
:
LOGGER
.
exception
(
u'[3PlayMedia Callback] Unable to get transcript process for org=
%
s, edx_video_id=
%
s, file_id=
%
s.'
,
org
,
edx_video_id
,
file_id
,
)
return
if
state
==
COMPLETE
:
# Indicates that the default video speech transcription has been done successfully.
try
:
transcript_secrets
=
TranscriptPreferences
.
objects
.
get
(
org
=
org
,
provider
=
TranscriptProvider
.
THREE_PLAY
)
except
TranscriptPreferences
.
DoesNotExist
:
# Fail the process
process
.
status
=
TranscriptStatus
.
FAILED
process
.
save
()
# Log the failure
LOGGER
.
exception
(
u'[3PlayMedia Callback] Unable to get transcript secrets for org=
%
s, edx_video_id=
%
s, file_id=
%
s.'
,
org
,
edx_video_id
,
file_id
,
)
return
# Fetch the transcript from 3PlayMedia
try
:
srt_transcript
=
fetch_srt_data
(
THREE_PLAY_TRANSCRIPT_URL
.
format
(
file_id
=
file_id
),
apikey
=
transcript_secrets
.
api_key
,
)
except
TranscriptFetchError
:
process
.
status
=
TranscriptStatus
.
FAILED
process
.
save
()
LOGGER
.
exception
(
'[3PlayMedia Callback] Fetch request failed for video=
%
s -- lang=
%
s -- process_id=
%
s'
,
edx_video_id
,
lang_code
,
file_id
)
return
# fetched transcript is going to be SRT content and if this is not so, it'll be a json response
# describing the error.
try
:
json
.
loads
(
srt_transcript
)
# Fail the process and log all the details.
process
.
status
=
TranscriptStatus
.
FAILED
process
.
save
()
LOGGER
.
error
(
'[3PlayMedia Task] Transcript fetch error for video=
%
s -- lang_code=
%
s -- process=
%
s -- response=
%
s'
,
process
.
video
.
studio_id
,
process
.
lang_code
,
process
.
process_id
,
srt_transcript
,
)
return
except
ValueError
:
pass
# We've got the transcript from 3PlayMedia, now update process status accordingly.
process
.
status
=
TranscriptStatus
.
READY
process
.
save
()
try
:
sjson_transcript
=
convert_srt_to_sjson
(
srt_transcript
)
sjson_file
=
upload_sjson_to_s3
(
CONFIG
,
sjson_transcript
)
except
Exception
:
# in case of any exception, log and raise.
LOGGER
.
exception
(
u'[3PlayMedia Callback] Request failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s'
,
edx_video_id
,
lang_code
,
file_id
,
)
raise
# Update edx-val with completed transcript information
val_api
=
VALAPICall
(
video_proto
=
None
,
val_status
=
None
)
val_api
.
update_val_transcript
(
video_id
=
process
.
video
.
studio_id
,
lang_code
=
lang_code
,
name
=
sjson_file
,
transcript_format
=
TRANSCRIPT_SJSON
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
)
# Translation Phase
target_languages
=
list
(
process
.
video
.
preferred_languages
)
# Remove the language that is already processed - in our case, its en.
target_languages
.
remove
(
lang_code
)
# Check if the translations are needed.
if
target_languages
:
# Create the translation tracking processes for all the target languages.
for
target_language
in
target_languages
:
TranscriptProcessMetadata
.
objects
.
create
(
video
=
process
.
video
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
process_id
=
file_id
,
lang_code
=
target_language
,
status
=
TranscriptStatus
.
PENDING
,
)
try
:
# Order translations for target languages
order_translations
(
file_id
,
transcript_secrets
.
api_key
,
transcript_secrets
.
api_secret
,
target_languages
)
except
TranscriptTranslationError
:
LOGGER
.
exception
(
u'[3PlayMedia Callback] Translation could not be performed - org=
%
s, edx_video_id=
%
s, file_id=
%
s.'
,
org
,
edx_video_id
,
file_id
,
)
except
Exception
:
LOGGER
.
exception
(
(
u'[3PlayMedia Callback] Error while translating the transcripts - org=
%
s, edx_video_id=
%
s, '
u'file_id=
%
s.'
),
org
,
edx_video_id
,
file_id
,
)
raise
# in case if there is only one language which has already been processed.
if
not
target_languages
:
val_api
.
update_video_status
(
process
.
video
.
studio_id
,
VideoStatus
.
TRANSCRIPTION_READY
)
# On success, a happy farewell log.
LOGGER
.
info
(
u'[3PlayMedia Callback] Video speech transcription was successful for video=
%
s -- lang_code=
%
s -- '
u'process_id=
%
s'
,
edx_video_id
,
lang_code
,
file_id
,
)
elif
state
==
ERROR
:
# Fail the process
process
.
status
=
TranscriptStatus
.
FAILED
process
.
save
()
# Log the error information
LOGGER
.
error
(
u'[3PlayMedia Callback] Error while transcription - error=
%
s, org=
%
s, edx_video_id=
%
s, file_id=
%
s.'
,
kwargs
[
'error_description'
],
org
,
edx_video_id
,
file_id
,
)
else
:
# Status must be either 'complete' or 'error'
# more details on http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods
LOGGER
.
error
(
u'[3PlayMedia Callback] Got invalid status - status=
%
s, org=
%
s, edx_video_id=
%
s, file_id=
%
s.'
,
state
,
org
,
edx_video_id
,
file_id
,
)
def
retrieve_three_play_translations
():
"""
Checks translation status on 3PlayMedia for all the progressing processes, fetches them if they're complete.
Retrieval flow:
1. Fetches 3PlayMedia translation processes whose status is `in progress`
2. For each process, retrieve the org-wide api keys
3. Check translation status through 3PlayMedia
4. If its done, mark the process as complete, fetch translated transcript, convert to sjson, upload it to s3 and
finally, update it in edx-val.
"""
translation_processes
=
TranscriptProcessMetadata
.
objects
.
filter
(
provider
=
TranscriptProvider
.
THREE_PLAY
,
status
=
TranscriptStatus
.
IN_PROGRESS
,
)
.
exclude
(
Q
(
translation_id__isnull
=
True
)
|
Q
(
translation_id__exact
=
''
))
for
translation_process
in
translation_processes
:
course_id
=
translation_process
.
video
.
inst_class
.
local_storedir
.
split
(
','
)[
0
]
org
=
utils
.
extract_course_org
(
course_id
=
course_id
)
try
:
three_play_secrets
=
TranscriptPreferences
.
objects
.
get
(
org
=
org
,
provider
=
TranscriptProvider
.
THREE_PLAY
)
except
TranscriptPreferences
.
DoesNotExist
:
LOGGER
.
exception
(
u'[3PlayMedia Task] 3Play secrets not found for video=
%
s -- lang_code=
%
s -- process_id=
%
s'
,
translation_process
.
video
.
studio_id
,
translation_process
.
lang_code
,
translation_process
.
process_id
,
)
continue
translation_status_url
=
utils
.
build_url
(
THREE_PLAY_TRANSLATION_STATUS_URL
.
format
(
file_id
=
translation_process
.
process_id
,
translation_id
=
translation_process
.
translation_id
,
),
apikey
=
three_play_secrets
.
api_key
)
response
=
requests
.
get
(
translation_status_url
)
if
not
response
.
ok
:
LOGGER
.
error
(
(
u'[3PlayMedia Task] Translation status request failed for video=
%
s -- '
u'lang_code=
%
s -- process_id=
%
s -- status=
%
s'
),
translation_process
.
video
.
studio_id
,
translation_process
.
lang_code
,
translation_process
.
process_id
,
response
.
status_code
,
)
continue
translation_status
=
json
.
loads
(
response
.
text
)
if
translation_status
.
get
(
'iserror'
):
translation_process
.
status
=
TranscriptStatus
.
FAILED
translation_process
.
save
()
LOGGER
.
error
(
(
u'[3PlayMedia Task] unable to get translation status for video=
%
s -- '
u'lang_code=
%
s -- process_id=
%
s -- response=
%
s'
),
translation_process
.
video
.
studio_id
,
translation_process
.
lang_code
,
translation_process
.
process_id
,
response
.
text
,
)
continue
if
translation_status
[
'state'
]
==
'complete'
:
translation_download_url
=
utils
.
build_url
(
THREE_PLAY_TRANSLATION_DOWNLOAD_URL
.
format
(
file_id
=
translation_process
.
process_id
,
translation_id
=
translation_process
.
translation_id
,
),
apikey
=
three_play_secrets
.
api_key
)
response
=
requests
.
get
(
translation_download_url
)
if
not
response
.
ok
:
LOGGER
.
error
(
u'[3PlayMedia Task] Translation download failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s -- '
u'status=
%
s'
,
translation_process
.
video
.
studio_id
,
translation_process
.
lang_code
,
translation_process
.
process_id
,
response
.
status_code
,
)
continue
# its going to be SRT content and `json.loads` should raise
# ValueError if its a valid response, otherwise it'll be json
# response in result of an error.
try
:
json
.
loads
(
response
.
text
)
translation_process
.
status
=
TranscriptStatus
.
FAILED
translation_process
.
save
()
LOGGER
.
error
(
u'[3PlayMedia Task] Translation error for video=
%
s -- lang_code=
%
s -- process_id=
%
s -- response=
%
s'
,
translation_process
.
video
.
studio_id
,
translation_process
.
lang_code
,
translation_process
.
process_id
,
response
.
text
,
)
continue
except
ValueError
:
pass
# We've got the transcript from 3PlayMedia, now update process status accordingly.
translation_process
.
status
=
TranscriptStatus
.
READY
translation_process
.
save
()
try
:
sjson_transcript
=
convert_srt_to_sjson
(
response
.
text
)
sjson_file
=
upload_sjson_to_s3
(
CONFIG
,
sjson_transcript
)
except
Exception
:
# in case of any exception, log and raise.
LOGGER
.
exception
(
u'[3PlayMedia Task] translation failed for video=
%
s -- lang_code=
%
s -- process_id=
%
s'
,
translation_process
.
video
.
studio_id
,
translation_process
.
lang_code
,
translation_process
.
process_id
,
)
raise
# Update edx-val with completed transcript information
val_api
=
VALAPICall
(
video_proto
=
None
,
val_status
=
None
)
val_api
.
update_val_transcript
(
video_id
=
translation_process
.
video
.
studio_id
,
lang_code
=
translation_process
.
lang_code
,
name
=
sjson_file
,
transcript_format
=
TRANSCRIPT_SJSON
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
)
# if all the processes for this video are complete, update video status in edx-val
# update transcript status for video in edx-val only if all language transcripts are ready
video_jobs
=
TranscriptProcessMetadata
.
objects
.
filter
(
video__studio_id
=
translation_process
.
video
.
studio_id
)
if
all
(
video_job
.
status
==
TranscriptStatus
.
READY
for
video_job
in
video_jobs
):
val_api
.
update_video_status
(
translation_process
.
video
.
studio_id
,
VideoStatus
.
TRANSCRIPTION_READY
)
control/celeryapp.py
View file @
6c87fa83
from
__future__
import
absolute_import
import
os
import
sys
from
celery
import
Celery
import
yaml
from
VEDA_OS01.transcripts
import
retrieve_three_play_translations
"""
Start Celery Worker
...
...
@@ -51,6 +51,18 @@ app.conf.update(
CELERY_ACCEPT_CONTENT
=
[
'pickle'
,
'json'
,
'msgpack'
,
'yaml'
]
)
app
.
conf
.
beat_schedule
=
{
'check-3play-translations-every-30-seconds'
:
{
'task'
:
'tasks.fetch_three_play_translations'
,
'schedule'
:
30.0
,
},
}
@app.task
(
name
=
'fetch_three_play_translations'
)
def
fetch_three_play_translations
():
retrieve_three_play_translations
()
@app.task
(
name
=
'worker_encode'
)
def
worker_task_fire
(
veda_id
,
encode_profile
,
jobid
):
...
...
control/veda_deliver.py
View file @
6c87fa83
import
datetime
import
ftplib
import
logging
import
os
import
shutil
import
sys
from
os.path
import
expanduser
import
boto
...
...
@@ -16,6 +13,7 @@ from boto.s3.key import Key
from
django.core.urlresolvers
import
reverse
import
veda_deliver_xuetang
from
control.veda_deliver_3play
import
ThreePLayMediaClient
from
control_env
import
*
from
veda_deliver_cielo
import
Cielo24Transcript
from
veda_deliver_youtube
import
DeliverYoutube
...
...
@@ -190,7 +188,6 @@ class VedaDelivery:
self
.
_CLEANUP
()
self
.
_THREEPLAY_UPLOAD
()
# Transcription Process
# We only want to generate transcripts for `desktop_mp4` profile.
if
self
.
encode_profile
==
'desktop_mp4'
and
self
.
video_query
.
process_transcription
:
...
...
@@ -203,7 +200,6 @@ class VedaDelivery:
if
self
.
video_query
.
provider
==
TranscriptProvider
.
CIELO24
:
self
.
cielo24_transcription_flow
()
def
_INFORM_INTAKE
(
self
):
"""
Collect all salient metadata and
...
...
@@ -574,48 +570,55 @@ class VedaDelivery:
)
cielo24
.
start_transcription_flow
()
def
_THREEPLAY_UPLOAD
(
self
):
if
self
.
video_query
.
inst_class
.
tp_proc
is
False
:
return
None
if
self
.
video_query
.
inst_class
.
mobile_override
is
False
:
if
self
.
encode_profile
!=
'desktop_mp4'
:
return
None
ftp1
=
ftplib
.
FTP
(
self
.
auth_dict
[
'threeplay_ftphost'
]
)
user
=
self
.
video_query
.
inst_class
.
tp_username
.
strip
()
passwd
=
self
.
video_query
.
inst_class
.
tp_password
.
strip
()
def
start_3play_transcription_process
(
self
):
"""
3PlayMedia Transcription Flow
"""
try
:
ftp1
.
login
(
user
,
passwd
)
except
:
ErrorObject
.
print_error
(
message
=
'3Play Authentication Failure'
# Picks the first course from the list as there may be multiple
# course runs in that list (i.e. all having the same org).
org
=
utils
.
extract_course_org
(
self
.
video_proto
.
platform_course_url
[
0
])
transcript_secrets
=
TranscriptPreferences
.
objects
.
get
(
org
=
org
,
provider
=
self
.
video_query
.
provider
)
# update transcript status for video in edx-val
VALAPICall
(
video_proto
=
None
,
val_status
=
None
)
.
update_video_status
(
self
.
video_query
.
studio_id
,
VideoStatus
.
TRANSCRIPTION_IN_PROGRESS
)
try
:
ftp1
.
cwd
(
self
.
video_query
.
inst_class
.
tp_speed
# Initialize 3playMedia client and start transcription process
s3_video_url
=
build_url
(
self
.
auth_dict
[
's3_base_url'
],
self
.
auth_dict
[
'edx_s3_endpoint_bucket'
],
self
.
encoded_file
)
except
:
ftp1
.
mkd
(
self
.
video_query
.
inst_class
.
tp_speed
callback_url
=
build_url
(
self
.
auth_dict
[
'veda_base_url'
],
reverse
(
'3play_media_callback'
,
args
=
[
self
.
auth_dict
[
'transcript_provider_request_token'
]]
),
# Additional attributes that'll come back with the callback
org
=
org
,
edx_video_id
=
self
.
video_query
.
studio_id
,
)
ftp1
.
cwd
(
self
.
video_query
.
inst_class
.
tp_speed
three_play_media
=
ThreePLayMediaClient
(
org
=
org
,
video
=
self
.
video_query
,
media_url
=
s3_video_url
,
api_key
=
transcript_secrets
.
api_key
,
api_secret
=
transcript_secrets
.
api_secret
,
callback_url
=
callback_url
,
turnaround_level
=
self
.
video_query
.
three_play_turnaround
,
)
os
.
chdir
(
self
.
node_work_directory
)
three_play_media
.
generate_transcripts
(
)
ftp1
.
storbinary
(
'STOR '
+
self
.
encoded_file
,
open
(
os
.
path
.
join
(
self
.
node_work_directory
,
self
.
encoded_file
),
'rb'
)
except
TranscriptPreferences
.
DoesNotExist
:
LOGGER
.
warning
(
'Transcript preference is not found for provider=
%
s, video=
%
s'
,
self
.
video_query
.
provider
,
self
.
video_query
.
studio_id
,
)
os
.
chdir
(
homedir
)
def
_XUETANG_ROUTE
(
self
):
if
self
.
video_query
.
inst_class
.
xuetang_proc
is
False
:
return
None
...
...
control/veda_deliver_3play.py
0 → 100644
View file @
6c87fa83
"""
3PlayMedia Transcription Client
"""
import
json
import
logging
import
requests
import
sys
from
requests.packages.urllib3.exceptions
import
InsecurePlatformWarning
from
VEDA_OS01.models
import
TranscriptProcessMetadata
,
TranscriptProvider
,
TranscriptStatus
from
VEDA_OS01.utils
import
build_url
requests
.
packages
.
urllib3
.
disable_warnings
(
InsecurePlatformWarning
)
LOGGER
=
logging
.
getLogger
(
__name__
)
class
ThreePlayMediaError
(
Exception
):
"""
An error that occurs during 3PlayMedia actions.
"""
pass
class
ThreePlayMediaLanguageNotFoundError
(
ThreePlayMediaError
):
"""
An error when language is not found in available 3playMedia languages.
"""
pass
class
ThreePlayMediaPerformTranscriptionError
(
ThreePlayMediaError
):
"""
An error occurred while adding media for transcription.
"""
pass
class
ThreePlayMediaUrlError
(
ThreePlayMediaError
):
"""
Occurs when the media url is either inaccessible or of invalid content type.
"""
pass
class
ThreePLayMediaClient
(
object
):
def
__init__
(
self
,
org
,
video
,
media_url
,
api_key
,
api_secret
,
callback_url
,
turnaround_level
):
"""
Initialize 3play media client
"""
self
.
org
=
org
self
.
video
=
video
self
.
media_url
=
media_url
self
.
api_key
=
api_key
self
.
api_secret
=
api_secret
self
.
callback_url
=
callback_url
self
.
turnaround_level
=
turnaround_level
# default attributes
self
.
base_url
=
u'https://api.3playmedia.com/'
self
.
upload_media_file_url
=
u'files/'
self
.
available_languages_url
=
u'caption_imports/available_languages/'
self
.
allowed_content_type
=
u'video/mp4'
def
validate_media_url
(
self
):
"""
Validates the media URL
Raises:
3PlayMediaUrlError: on invalid media url or content type
"""
if
not
self
.
media_url
:
raise
ThreePlayMediaUrlError
(
'Invalid media URL "{media_url}".'
.
format
(
media_url
=
self
.
media_url
))
response
=
requests
.
head
(
url
=
self
.
media_url
)
if
not
response
.
ok
:
raise
ThreePlayMediaUrlError
(
'The URL "{media_url}" is not Accessible.'
.
format
(
media_url
=
self
.
media_url
))
elif
response
.
headers
[
'Content-Type'
]
!=
self
.
allowed_content_type
:
raise
ThreePlayMediaUrlError
(
'Media content-type should be "{allowed_type}". URL was "{media_url}", content-type was "{type}"'
.
format
(
allowed_type
=
self
.
allowed_content_type
,
media_url
=
self
.
media_url
,
type
=
response
.
headers
[
'Content-Type'
],
)
)
def
submit_media
(
self
):
"""
Submits the media to perform transcription.
Raises:
ThreePlayMediaPerformTranscriptionError: error while transcription process
"""
self
.
validate_media_url
()
# Prepare requests payload
payload
=
dict
(
# Mandatory attributes required for transcription
link
=
self
.
media_url
,
apikey
=
self
.
api_key
,
api_secret_key
=
self
.
api_secret
,
turnaround_level
=
self
.
turnaround_level
,
callback_url
=
self
.
callback_url
,
)
upload_url
=
build_url
(
self
.
base_url
,
self
.
upload_media_file_url
)
response
=
requests
.
post
(
url
=
upload_url
,
data
=
json
.
dumps
(
payload
),
headers
=
{
'Content-Type'
:
'application/json'
}
)
if
not
response
.
ok
:
raise
ThreePlayMediaPerformTranscriptionError
(
'Upload file request failed with: {response} -- {status}'
.
format
(
response
=
response
.
text
,
status
=
response
.
status_code
)
)
try
:
# A normal response should be a text containing file id and if we're getting a deserializable dict, there
# must be an error: http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods
if
isinstance
(
json
.
loads
(
response
.
text
),
dict
):
raise
ThreePlayMediaPerformTranscriptionError
(
'Expected file id but got: {response}'
.
format
(
response
=
response
.
text
)
)
except
ValueError
:
return
response
.
text
def
generate_transcripts
(
self
):
"""
Kicks off transcription process for default language.
"""
try
:
file_id
=
self
.
submit_media
()
# Track progress of transcription process
TranscriptProcessMetadata
.
objects
.
create
(
video
=
self
.
video
,
process_id
=
file_id
,
lang_code
=
u'en'
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
status
=
TranscriptStatus
.
IN_PROGRESS
,
)
# Successfully kicked off transcription process for a video with the given language.
LOGGER
.
info
(
'[3PlayMedia] Transcription process has been started for video=
%
s, language=en.'
,
self
.
video
.
studio_id
,
)
except
ThreePlayMediaError
:
LOGGER
.
exception
(
'[3PlayMedia] Could not process transcripts for video=
%
s language=en.'
,
self
.
video
.
studio_id
,
)
except
Exception
:
LOGGER
.
exception
(
'[3PlayMedia] Unexpected error while transcription for video=
%
s language=en .'
,
self
.
video
.
studio_id
,
)
raise
def
main
():
pass
if
__name__
==
'__main__'
:
sys
.
exit
(
main
())
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment