Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-video-pipeline
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-video-pipeline
Commits
6c87fa83
Commit
6c87fa83
authored
Aug 28, 2017
by
Qubad786
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Integrate 3PlayMedia Transcription API
parent
1b69155e
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
230 additions
and
42 deletions
+230
-42
VEDA/urls.py
+6
-0
VEDA_OS01/models.py
+2
-0
VEDA_OS01/transcripts.py
+0
-0
control/celeryapp.py
+14
-2
control/veda_deliver.py
+43
-40
control/veda_deliver_3play.py
+165
-0
No files found.
VEDA/urls.py
View file @
6c87fa83
...
@@ -39,4 +39,10 @@ urlpatterns = [
...
@@ -39,4 +39,10 @@ urlpatterns = [
view
=
transcripts
.
Cielo24CallbackHandlerView
.
as_view
(),
view
=
transcripts
.
Cielo24CallbackHandlerView
.
as_view
(),
name
=
'cielo24_transcript_completed'
name
=
'cielo24_transcript_completed'
),
),
# 3PlayMedia callback handler view
url
(
regex
=
r'^3playmedia/transcripts/handle/(?P<token>[\w]+)$'
,
view
=
transcripts
.
ThreePlayMediaCallbackHandlerView
.
as_view
(),
name
=
'3play_media_callback'
)
]
]
VEDA_OS01/models.py
View file @
6c87fa83
...
@@ -647,6 +647,8 @@ class TranscriptProcessMetadata(TimeStampedModel):
...
@@ -647,6 +647,8 @@ class TranscriptProcessMetadata(TimeStampedModel):
video
=
models
.
ForeignKey
(
Video
)
video
=
models
.
ForeignKey
(
Video
)
provider
=
models
.
CharField
(
'Transcript provider'
,
max_length
=
50
,
choices
=
TranscriptProvider
.
CHOICES
)
provider
=
models
.
CharField
(
'Transcript provider'
,
max_length
=
50
,
choices
=
TranscriptProvider
.
CHOICES
)
process_id
=
models
.
CharField
(
'Process id'
,
max_length
=
255
)
process_id
=
models
.
CharField
(
'Process id'
,
max_length
=
255
)
# To keep track of 3Play Translations.
translation_id
=
models
.
CharField
(
'Translation id'
,
max_length
=
255
,
null
=
True
,
blank
=
True
)
lang_code
=
models
.
CharField
(
'Language code'
,
max_length
=
3
)
lang_code
=
models
.
CharField
(
'Language code'
,
max_length
=
3
)
status
=
models
.
CharField
(
status
=
models
.
CharField
(
'Transcript status'
,
'Transcript status'
,
...
...
VEDA_OS01/transcripts.py
View file @
6c87fa83
This diff is collapsed.
Click to expand it.
control/celeryapp.py
View file @
6c87fa83
from
__future__
import
absolute_import
from
__future__
import
absolute_import
import
os
import
sys
from
celery
import
Celery
from
celery
import
Celery
import
yaml
import
yaml
from
VEDA_OS01.transcripts
import
retrieve_three_play_translations
"""
"""
Start Celery Worker
Start Celery Worker
...
@@ -51,6 +51,18 @@ app.conf.update(
...
@@ -51,6 +51,18 @@ app.conf.update(
CELERY_ACCEPT_CONTENT
=
[
'pickle'
,
'json'
,
'msgpack'
,
'yaml'
]
CELERY_ACCEPT_CONTENT
=
[
'pickle'
,
'json'
,
'msgpack'
,
'yaml'
]
)
)
app
.
conf
.
beat_schedule
=
{
'check-3play-translations-every-30-seconds'
:
{
'task'
:
'tasks.fetch_three_play_translations'
,
'schedule'
:
30.0
,
},
}
@app.task
(
name
=
'fetch_three_play_translations'
)
def
fetch_three_play_translations
():
retrieve_three_play_translations
()
@app.task
(
name
=
'worker_encode'
)
@app.task
(
name
=
'worker_encode'
)
def
worker_task_fire
(
veda_id
,
encode_profile
,
jobid
):
def
worker_task_fire
(
veda_id
,
encode_profile
,
jobid
):
...
...
control/veda_deliver.py
View file @
6c87fa83
import
datetime
import
datetime
import
ftplib
import
logging
import
logging
import
os
import
shutil
import
shutil
import
sys
from
os.path
import
expanduser
from
os.path
import
expanduser
import
boto
import
boto
...
@@ -16,6 +13,7 @@ from boto.s3.key import Key
...
@@ -16,6 +13,7 @@ from boto.s3.key import Key
from
django.core.urlresolvers
import
reverse
from
django.core.urlresolvers
import
reverse
import
veda_deliver_xuetang
import
veda_deliver_xuetang
from
control.veda_deliver_3play
import
ThreePLayMediaClient
from
control_env
import
*
from
control_env
import
*
from
veda_deliver_cielo
import
Cielo24Transcript
from
veda_deliver_cielo
import
Cielo24Transcript
from
veda_deliver_youtube
import
DeliverYoutube
from
veda_deliver_youtube
import
DeliverYoutube
...
@@ -190,7 +188,6 @@ class VedaDelivery:
...
@@ -190,7 +188,6 @@ class VedaDelivery:
self
.
_CLEANUP
()
self
.
_CLEANUP
()
self
.
_THREEPLAY_UPLOAD
()
# Transcription Process
# Transcription Process
# We only want to generate transcripts for `desktop_mp4` profile.
# We only want to generate transcripts for `desktop_mp4` profile.
if
self
.
encode_profile
==
'desktop_mp4'
and
self
.
video_query
.
process_transcription
:
if
self
.
encode_profile
==
'desktop_mp4'
and
self
.
video_query
.
process_transcription
:
...
@@ -203,7 +200,6 @@ class VedaDelivery:
...
@@ -203,7 +200,6 @@ class VedaDelivery:
if
self
.
video_query
.
provider
==
TranscriptProvider
.
CIELO24
:
if
self
.
video_query
.
provider
==
TranscriptProvider
.
CIELO24
:
self
.
cielo24_transcription_flow
()
self
.
cielo24_transcription_flow
()
def
_INFORM_INTAKE
(
self
):
def
_INFORM_INTAKE
(
self
):
"""
"""
Collect all salient metadata and
Collect all salient metadata and
...
@@ -574,47 +570,54 @@ class VedaDelivery:
...
@@ -574,47 +570,54 @@ class VedaDelivery:
)
)
cielo24
.
start_transcription_flow
()
cielo24
.
start_transcription_flow
()
def
_THREEPLAY_UPLOAD
(
self
):
def
start_3play_transcription_process
(
self
):
"""
if
self
.
video_query
.
inst_class
.
tp_proc
is
False
:
3PlayMedia Transcription Flow
return
None
"""
if
self
.
video_query
.
inst_class
.
mobile_override
is
False
:
if
self
.
encode_profile
!=
'desktop_mp4'
:
return
None
ftp1
=
ftplib
.
FTP
(
self
.
auth_dict
[
'threeplay_ftphost'
]
)
user
=
self
.
video_query
.
inst_class
.
tp_username
.
strip
()
passwd
=
self
.
video_query
.
inst_class
.
tp_password
.
strip
()
try
:
try
:
ftp1
.
login
(
user
,
passwd
)
# Picks the first course from the list as there may be multiple
except
:
# course runs in that list (i.e. all having the same org).
ErrorObject
.
print_error
(
org
=
utils
.
extract_course_org
(
self
.
video_proto
.
platform_course_url
[
0
])
message
=
'3Play Authentication Failure'
transcript_secrets
=
TranscriptPreferences
.
objects
.
get
(
org
=
org
,
provider
=
self
.
video_query
.
provider
)
# update transcript status for video in edx-val
VALAPICall
(
video_proto
=
None
,
val_status
=
None
)
.
update_video_status
(
self
.
video_query
.
studio_id
,
VideoStatus
.
TRANSCRIPTION_IN_PROGRESS
)
)
try
:
ftp1
.
cwd
(
# Initialize 3playMedia client and start transcription process
self
.
video_query
.
inst_class
.
tp_speed
s3_video_url
=
build_url
(
self
.
auth_dict
[
's3_base_url'
],
self
.
auth_dict
[
'edx_s3_endpoint_bucket'
],
self
.
encoded_file
)
)
except
:
callback_url
=
build_url
(
ftp1
.
mkd
(
self
.
auth_dict
[
'veda_base_url'
],
self
.
video_query
.
inst_class
.
tp_speed
reverse
(
'3play_media_callback'
,
args
=
[
self
.
auth_dict
[
'transcript_provider_request_token'
]]
),
# Additional attributes that'll come back with the callback
org
=
org
,
edx_video_id
=
self
.
video_query
.
studio_id
,
)
)
ftp1
.
cwd
(
three_play_media
=
ThreePLayMediaClient
(
self
.
video_query
.
inst_class
.
tp_speed
org
=
org
,
video
=
self
.
video_query
,
media_url
=
s3_video_url
,
api_key
=
transcript_secrets
.
api_key
,
api_secret
=
transcript_secrets
.
api_secret
,
callback_url
=
callback_url
,
turnaround_level
=
self
.
video_query
.
three_play_turnaround
,
)
)
os
.
chdir
(
self
.
node_work_directory
)
three_play_media
.
generate_transcripts
()
ftp1
.
storbinary
(
'STOR '
+
self
.
encoded_file
,
open
(
os
.
path
.
join
(
self
.
node_work_directory
,
self
.
encoded_file
),
'rb'
)
)
os
.
chdir
(
homedir
)
except
TranscriptPreferences
.
DoesNotExist
:
LOGGER
.
warning
(
'Transcript preference is not found for provider=
%
s, video=
%
s'
,
self
.
video_query
.
provider
,
self
.
video_query
.
studio_id
,
)
def
_XUETANG_ROUTE
(
self
):
def
_XUETANG_ROUTE
(
self
):
if
self
.
video_query
.
inst_class
.
xuetang_proc
is
False
:
if
self
.
video_query
.
inst_class
.
xuetang_proc
is
False
:
...
...
control/veda_deliver_3play.py
0 → 100644
View file @
6c87fa83
"""
3PlayMedia Transcription Client
"""
import
json
import
logging
import
requests
import
sys
from
requests.packages.urllib3.exceptions
import
InsecurePlatformWarning
from
VEDA_OS01.models
import
TranscriptProcessMetadata
,
TranscriptProvider
,
TranscriptStatus
from
VEDA_OS01.utils
import
build_url
requests
.
packages
.
urllib3
.
disable_warnings
(
InsecurePlatformWarning
)
LOGGER
=
logging
.
getLogger
(
__name__
)
class
ThreePlayMediaError
(
Exception
):
"""
An error that occurs during 3PlayMedia actions.
"""
pass
class
ThreePlayMediaLanguageNotFoundError
(
ThreePlayMediaError
):
"""
An error when language is not found in available 3playMedia languages.
"""
pass
class
ThreePlayMediaPerformTranscriptionError
(
ThreePlayMediaError
):
"""
An error occurred while adding media for transcription.
"""
pass
class
ThreePlayMediaUrlError
(
ThreePlayMediaError
):
"""
Occurs when the media url is either inaccessible or of invalid content type.
"""
pass
class
ThreePLayMediaClient
(
object
):
def
__init__
(
self
,
org
,
video
,
media_url
,
api_key
,
api_secret
,
callback_url
,
turnaround_level
):
"""
Initialize 3play media client
"""
self
.
org
=
org
self
.
video
=
video
self
.
media_url
=
media_url
self
.
api_key
=
api_key
self
.
api_secret
=
api_secret
self
.
callback_url
=
callback_url
self
.
turnaround_level
=
turnaround_level
# default attributes
self
.
base_url
=
u'https://api.3playmedia.com/'
self
.
upload_media_file_url
=
u'files/'
self
.
available_languages_url
=
u'caption_imports/available_languages/'
self
.
allowed_content_type
=
u'video/mp4'
def
validate_media_url
(
self
):
"""
Validates the media URL
Raises:
3PlayMediaUrlError: on invalid media url or content type
"""
if
not
self
.
media_url
:
raise
ThreePlayMediaUrlError
(
'Invalid media URL "{media_url}".'
.
format
(
media_url
=
self
.
media_url
))
response
=
requests
.
head
(
url
=
self
.
media_url
)
if
not
response
.
ok
:
raise
ThreePlayMediaUrlError
(
'The URL "{media_url}" is not Accessible.'
.
format
(
media_url
=
self
.
media_url
))
elif
response
.
headers
[
'Content-Type'
]
!=
self
.
allowed_content_type
:
raise
ThreePlayMediaUrlError
(
'Media content-type should be "{allowed_type}". URL was "{media_url}", content-type was "{type}"'
.
format
(
allowed_type
=
self
.
allowed_content_type
,
media_url
=
self
.
media_url
,
type
=
response
.
headers
[
'Content-Type'
],
)
)
def
submit_media
(
self
):
"""
Submits the media to perform transcription.
Raises:
ThreePlayMediaPerformTranscriptionError: error while transcription process
"""
self
.
validate_media_url
()
# Prepare requests payload
payload
=
dict
(
# Mandatory attributes required for transcription
link
=
self
.
media_url
,
apikey
=
self
.
api_key
,
api_secret_key
=
self
.
api_secret
,
turnaround_level
=
self
.
turnaround_level
,
callback_url
=
self
.
callback_url
,
)
upload_url
=
build_url
(
self
.
base_url
,
self
.
upload_media_file_url
)
response
=
requests
.
post
(
url
=
upload_url
,
data
=
json
.
dumps
(
payload
),
headers
=
{
'Content-Type'
:
'application/json'
}
)
if
not
response
.
ok
:
raise
ThreePlayMediaPerformTranscriptionError
(
'Upload file request failed with: {response} -- {status}'
.
format
(
response
=
response
.
text
,
status
=
response
.
status_code
)
)
try
:
# A normal response should be a text containing file id and if we're getting a deserializable dict, there
# must be an error: http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods
if
isinstance
(
json
.
loads
(
response
.
text
),
dict
):
raise
ThreePlayMediaPerformTranscriptionError
(
'Expected file id but got: {response}'
.
format
(
response
=
response
.
text
)
)
except
ValueError
:
return
response
.
text
def
generate_transcripts
(
self
):
"""
Kicks off transcription process for default language.
"""
try
:
file_id
=
self
.
submit_media
()
# Track progress of transcription process
TranscriptProcessMetadata
.
objects
.
create
(
video
=
self
.
video
,
process_id
=
file_id
,
lang_code
=
u'en'
,
provider
=
TranscriptProvider
.
THREE_PLAY
,
status
=
TranscriptStatus
.
IN_PROGRESS
,
)
# Successfully kicked off transcription process for a video with the given language.
LOGGER
.
info
(
'[3PlayMedia] Transcription process has been started for video=
%
s, language=en.'
,
self
.
video
.
studio_id
,
)
except
ThreePlayMediaError
:
LOGGER
.
exception
(
'[3PlayMedia] Could not process transcripts for video=
%
s language=en.'
,
self
.
video
.
studio_id
,
)
except
Exception
:
LOGGER
.
exception
(
'[3PlayMedia] Unexpected error while transcription for video=
%
s language=en .'
,
self
.
video
.
studio_id
,
)
raise
def
main
():
pass
if
__name__
==
'__main__'
:
sys
.
exit
(
main
())
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment