Commit 9c06bb65 by Qubad786 Committed by muzaffaryousaf

Use edx-val transcripts and translations collectively with contentstore.

Adds val trancripts in outcome of get_transcripts_info and rest of flow remains the same and also add fallback  to edx-val rtanscripts for mobile accessible video endpoints.
parent ec7b78d6
...@@ -633,6 +633,16 @@ class TestTranscript(unittest.TestCase): ...@@ -633,6 +633,16 @@ class TestTranscript(unittest.TestCase):
with self.assertRaises(NotImplementedError): with self.assertRaises(NotImplementedError):
transcripts_utils.Transcript.convert(self.srt_transcript, 'srt', 'sjson') transcripts_utils.Transcript.convert(self.srt_transcript, 'srt', 'sjson')
def test_dummy_non_existent_transcript(self):
"""
Test `Transcript.asset` raises `NotFoundError` for dummy non-existent transcript.
"""
with self.assertRaises(NotFoundError):
transcripts_utils.Transcript.asset(None, transcripts_utils.NON_EXISTENT_TRANSCRIPT)
with self.assertRaises(NotFoundError):
transcripts_utils.Transcript.asset(None, None, filename=transcripts_utils.NON_EXISTENT_TRANSCRIPT)
class TestSubsFilename(unittest.TestCase): class TestSubsFilename(unittest.TestCase):
""" """
......
...@@ -1001,7 +1001,8 @@ class TranscriptPreferencesTestCase(VideoUploadTestBase, CourseTestCase): ...@@ -1001,7 +1001,8 @@ class TranscriptPreferencesTestCase(VideoUploadTestBase, CourseTestCase):
{ {
'provider': TranscriptProvider.THREE_PLAY_MEDIA, 'provider': TranscriptProvider.THREE_PLAY_MEDIA,
'three_play_turnaround': 'default', 'three_play_turnaround': 'default',
'preferred_languages': ['en'] 'preferred_languages': ['en'],
'video_source_language': None, # TODO change this once we support source language in platform.
}, },
True, True,
'', '',
...@@ -1020,6 +1021,7 @@ class TranscriptPreferencesTestCase(VideoUploadTestBase, CourseTestCase): ...@@ -1020,6 +1021,7 @@ class TranscriptPreferencesTestCase(VideoUploadTestBase, CourseTestCase):
'cielo24_turnaround': preferences.get('cielo24_turnaround'), 'cielo24_turnaround': preferences.get('cielo24_turnaround'),
'three_play_turnaround': preferences.get('three_play_turnaround'), 'three_play_turnaround': preferences.get('three_play_turnaround'),
'preferred_languages': preferences.get('preferred_languages', []), 'preferred_languages': preferences.get('preferred_languages', []),
'video_source_language': preferences.get('video_source_language'),
} }
with patch( with patch(
......
...@@ -31,9 +31,9 @@ from xmodule.video_module.transcripts_utils import ( ...@@ -31,9 +31,9 @@ from xmodule.video_module.transcripts_utils import (
download_youtube_subs, download_youtube_subs,
GetTranscriptsFromYouTubeException, GetTranscriptsFromYouTubeException,
get_video_transcript_content, get_video_transcript_content,
generate_srt_from_sjson,
generate_subs_from_source, generate_subs_from_source,
get_transcripts_from_youtube, get_transcripts_from_youtube,
is_val_transcript_feature_enabled_for_course,
manage_video_subtitles_save, manage_video_subtitles_save,
remove_subs_from_store, remove_subs_from_store,
Transcript, Transcript,
...@@ -173,13 +173,15 @@ def download_transcripts(request): ...@@ -173,13 +173,15 @@ def download_transcripts(request):
sjson_transcript = contentstore().find(content_location).data sjson_transcript = contentstore().find(content_location).data
except NotFoundError: except NotFoundError:
# Try searching in VAL for the transcript as a last resort # Try searching in VAL for the transcript as a last resort
transcript = get_video_transcript_content( transcript = None
course_id=item.location.course_key, if is_val_transcript_feature_enabled_for_course(item.location.course_key):
language_code=u'en', transcript = get_video_transcript_content(
edx_video_id=item.edx_video_id, language_code=u'en',
youtube_id_1_0=item.youtube_id_1_0, edx_video_id=item.edx_video_id,
html5_sources=item.html5_sources, youtube_id_1_0=item.youtube_id_1_0,
) html5_sources=item.html5_sources,
)
if not transcript: if not transcript:
raise Http404 raise Http404
...@@ -303,14 +305,14 @@ def check_transcripts(request): ...@@ -303,14 +305,14 @@ def check_transcripts(request):
command, subs_to_use = _transcripts_logic(transcripts_presence, videos) command, subs_to_use = _transcripts_logic(transcripts_presence, videos)
if command == 'not_found': if command == 'not_found':
# Try searching in VAL for the transcript as a last resort # Try searching in VAL for the transcript as a last resort
video_transcript = get_video_transcript_content( if is_val_transcript_feature_enabled_for_course(item.location.course_key):
course_id=item.location.course_key, video_transcript = get_video_transcript_content(
language_code=u'en', language_code=u'en',
edx_video_id=item.edx_video_id, edx_video_id=item.edx_video_id,
youtube_id_1_0=item.youtube_id_1_0, youtube_id_1_0=item.youtube_id_1_0,
html5_sources=item.html5_sources, html5_sources=item.html5_sources,
) )
command = 'found' if video_transcript else command command = 'found' if video_transcript else command
transcripts_presence.update({ transcripts_presence.update({
'command': command, 'command': command,
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
</div> </div>
<div class='course-video-settings-wrapper'> <div class='course-video-settings-wrapper'>
<div class='course-video-settings-message-wrapper'></div> <div class='course-video-settings-message-wrapper'></div>
<span class="course-video-settings-title"><%- gettext('Transcript Settings') %></span> <span class="course-video-settings-title"><%- gettext('Course Video Settings') %></span>
<div class='transcript-preferance-wrapper transcript-provider-wrapper'> <div class='transcript-preferance-wrapper transcript-provider-wrapper'>
<label class='transcript-preferance-label' for='transcript-provider'><%- gettext('Transcript Provider') %><span class='error-icon' aria-hidden="true"></span></label> <label class='transcript-preferance-label' for='transcript-provider'><%- gettext('Transcript Provider') %><span class='error-icon' aria-hidden="true"></span></label>
<div class='transcript-provider-group' id='transcript-provider'></div> <div class='transcript-provider-group' id='transcript-provider'></div>
......
...@@ -27,6 +27,8 @@ except ImportError: ...@@ -27,6 +27,8 @@ except ImportError:
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
NON_EXISTENT_TRANSCRIPT = 'non_existent_dummy_file_name'
class TranscriptException(Exception): # pylint: disable=missing-docstring class TranscriptException(Exception): # pylint: disable=missing-docstring
pass pass
...@@ -498,12 +500,21 @@ def get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources): ...@@ -498,12 +500,21 @@ def get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources):
return external, video_ids return external, video_ids
def get_video_transcript_content(course_id, language_code, edx_video_id, youtube_id_1_0, html5_sources): def is_val_transcript_feature_enabled_for_course(course_id):
"""
Get edx-val transcript feature flag
Arguments:
course_id(CourseKey): Course key identifying a course whose feature flag is being inspected.
"""
return VideoTranscriptEnabledFlag.feature_enabled(course_id=course_id)
def get_video_transcript_content(language_code, edx_video_id, youtube_id_1_0, html5_sources):
""" """
Gets video transcript content, only if the corresponding feature flag is enabled for the given `course_id`. Gets video transcript content, only if the corresponding feature flag is enabled for the given `course_id`.
Arguments: Arguments:
course_id(CourseKey): Course key identifying a course
language_code(unicode): Language code of the requested transcript language_code(unicode): Language code of the requested transcript
edx_video_id(unicode): edx-val's video identifier edx_video_id(unicode): edx-val's video identifier
youtube_id_1_0(unicode): A youtube source identifier youtube_id_1_0(unicode): A youtube source identifier
...@@ -513,13 +524,33 @@ def get_video_transcript_content(course_id, language_code, edx_video_id, youtube ...@@ -513,13 +524,33 @@ def get_video_transcript_content(course_id, language_code, edx_video_id, youtube
A dict containing transcript's file name and its sjson content. A dict containing transcript's file name and its sjson content.
""" """
transcript = None transcript = None
if VideoTranscriptEnabledFlag.feature_enabled(course_id=course_id) and edxval_api: if edxval_api:
__, video_candidate_ids = get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources) __, video_candidate_ids = get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources)
transcript = edxval_api.get_video_transcript_data(video_candidate_ids, language_code) transcript = edxval_api.get_video_transcript_data(video_candidate_ids, language_code)
return transcript return transcript
def get_available_transcript_languages(edx_video_id, youtube_id_1_0, html5_sources):
"""
Gets available transcript languages from edx-val.
Arguments:
edx_video_id(unicode): edx-val's video identifier
youtube_id_1_0(unicode): A youtube source identifier
html5_sources(list): A list containing html5 sources
Returns:
A list containing distinct transcript language codes against all the passed video ids.
"""
available_languages = []
if edxval_api:
__, video_candidate_ids = get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources)
available_languages = edxval_api.get_available_transcript_languages(video_candidate_ids)
return available_languages
class Transcript(object): class Transcript(object):
""" """
Container for transcript methods. Container for transcript methods.
...@@ -569,6 +600,13 @@ class Transcript(object): ...@@ -569,6 +600,13 @@ class Transcript(object):
`location` is module location. `location` is module location.
""" """
# HACK Warning! this is temporary and will be removed once edx-val take over the
# transcript module and contentstore will only function as fallback until all the
# data is migrated to edx-val. It will be saving a contentstore hit for a hardcoded
# dummy-non-existent-transcript name.
if NON_EXISTENT_TRANSCRIPT in [subs_id, filename]:
raise NotFoundError
asset_filename = subs_filename(subs_id, lang) if not filename else filename asset_filename = subs_filename(subs_id, lang) if not filename else filename
return Transcript.get_asset(location, asset_filename) return Transcript.get_asset(location, asset_filename)
...@@ -608,10 +646,11 @@ class VideoTranscriptsMixin(object): ...@@ -608,10 +646,11 @@ class VideoTranscriptsMixin(object):
This is necessary for both VideoModule and VideoDescriptor. This is necessary for both VideoModule and VideoDescriptor.
""" """
def available_translations(self, transcripts, verify_assets=None): def available_translations(self, transcripts, verify_assets=None, include_val_transcripts=None):
"""Return a list of language codes for which we have transcripts. """
Return a list of language codes for which we have transcripts.
Args: Arguments:
verify_assets (boolean): If True, checks to ensure that the transcripts verify_assets (boolean): If True, checks to ensure that the transcripts
really exist in the contentstore. If False, we just look at the really exist in the contentstore. If False, we just look at the
VideoDescriptor fields and do not query the contentstore. One reason VideoDescriptor fields and do not query the contentstore. One reason
...@@ -621,8 +660,7 @@ class VideoTranscriptsMixin(object): ...@@ -621,8 +660,7 @@ class VideoTranscriptsMixin(object):
Defaults to `not FALLBACK_TO_ENGLISH_TRANSCRIPTS`. Defaults to `not FALLBACK_TO_ENGLISH_TRANSCRIPTS`.
transcripts (dict): A dict with all transcripts and a sub. transcripts (dict): A dict with all transcripts and a sub.
include_val_transcripts(boolean): If True, adds the edx-val transcript languages as well.
Defaults to False
""" """
translations = [] translations = []
if verify_assets is None: if verify_assets is None:
...@@ -639,7 +677,14 @@ class VideoTranscriptsMixin(object): ...@@ -639,7 +677,14 @@ class VideoTranscriptsMixin(object):
return translations return translations
# If we've gotten this far, we're going to verify that the transcripts # If we've gotten this far, we're going to verify that the transcripts
# being referenced are actually in the contentstore. # being referenced are actually either in the contentstore or in edx-val.
if include_val_transcripts:
translations = get_available_transcript_languages(
edx_video_id=self.edx_video_id,
youtube_id_1_0=self.youtube_id_1_0,
html5_sources=self.html5_sources
)
if sub: # check if sjson exists for 'en'. if sub: # check if sjson exists for 'en'.
try: try:
Transcript.asset(self.location, sub, 'en') Transcript.asset(self.location, sub, 'en')
...@@ -649,18 +694,20 @@ class VideoTranscriptsMixin(object): ...@@ -649,18 +694,20 @@ class VideoTranscriptsMixin(object):
except NotFoundError: except NotFoundError:
pass pass
else: else:
translations += ['en'] translations.append('en')
else: else:
translations += ['en'] translations.append('en')
for lang in other_langs: for lang in other_langs:
try: try:
Transcript.asset(self.location, None, None, other_langs[lang]) Transcript.asset(self.location, None, None, other_langs[lang])
except NotFoundError: except NotFoundError:
continue continue
translations += [lang]
return translations translations.append(lang)
# to clean redundant language codes.
return list(set(translations))
def get_transcript(self, transcripts, transcript_format='srt', lang=None): def get_transcript(self, transcripts, transcript_format='srt', lang=None):
""" """
...@@ -723,9 +770,13 @@ class VideoTranscriptsMixin(object): ...@@ -723,9 +770,13 @@ class VideoTranscriptsMixin(object):
transcript_language = u'en' transcript_language = u'en'
return transcript_language return transcript_language
def get_transcripts_info(self, is_bumper=False): def get_transcripts_info(self, is_bumper=False, include_val_transcripts=False):
""" """
Returns a transcript dictionary for the video. Returns a transcript dictionary for the video.
Arguments:
is_bumper(bool): If True, the request is for the bumper transcripts
include_val_transcripts(bool): If True, include edx-val transcripts as well
""" """
if is_bumper: if is_bumper:
transcripts = copy.deepcopy(get_bumper_settings(self).get('transcripts', {})) transcripts = copy.deepcopy(get_bumper_settings(self).get('transcripts', {}))
...@@ -739,6 +790,24 @@ class VideoTranscriptsMixin(object): ...@@ -739,6 +790,24 @@ class VideoTranscriptsMixin(object):
language_code: transcript_file language_code: transcript_file
for language_code, transcript_file in transcripts.items() if transcript_file != '' for language_code, transcript_file in transcripts.items() if transcript_file != ''
} }
# For phase 2, removing `include_val_transcripts` will make edx-val
# taking over the control for transcripts.
if include_val_transcripts:
transcript_languages = get_available_transcript_languages(
edx_video_id=self.edx_video_id,
youtube_id_1_0=self.youtube_id_1_0,
html5_sources=self.html5_sources
)
# HACK Warning! this is temporary and will be removed once edx-val take over the
# transcript module and contentstore will only function as fallback until all the
# data is migrated to edx-val.
for language_code in transcript_languages:
if language_code == 'en' and not sub:
sub = NON_EXISTENT_TRANSCRIPT
elif not transcripts.get(language_code):
transcripts[language_code] = NON_EXISTENT_TRANSCRIPT
return { return {
"sub": sub, "sub": sub,
"transcripts": transcripts, "transcripts": transcripts,
......
...@@ -20,14 +20,15 @@ from opaque_keys.edx.locator import CourseLocator ...@@ -20,14 +20,15 @@ from opaque_keys.edx.locator import CourseLocator
from .transcripts_utils import ( from .transcripts_utils import (
get_or_create_sjson, get_or_create_sjson,
TranscriptException,
TranscriptsGenerationException,
generate_sjson_for_all_speeds, generate_sjson_for_all_speeds,
get_video_transcript_content, get_video_transcript_content,
youtube_speed_dict, is_val_transcript_feature_enabled_for_course,
Transcript,
save_to_store, save_to_store,
subs_filename subs_filename,
Transcript,
TranscriptException,
TranscriptsGenerationException,
youtube_speed_dict,
) )
...@@ -224,7 +225,8 @@ class VideoStudentViewHandlers(object): ...@@ -224,7 +225,8 @@ class VideoStudentViewHandlers(object):
For 'en' check if SJSON exists. For non-`en` check if SRT file exists. For 'en' check if SJSON exists. For non-`en` check if SRT file exists.
""" """
is_bumper = request.GET.get('is_bumper', False) is_bumper = request.GET.get('is_bumper', False)
transcripts = self.get_transcripts_info(is_bumper) feature_enabled = is_val_transcript_feature_enabled_for_course(self.course_id)
transcripts = self.get_transcripts_info(is_bumper, include_val_transcripts=feature_enabled)
if dispatch.startswith('translation'): if dispatch.startswith('translation'):
language = dispatch.replace('translation', '').strip('/') language = dispatch.replace('translation', '').strip('/')
...@@ -241,15 +243,17 @@ class VideoStudentViewHandlers(object): ...@@ -241,15 +243,17 @@ class VideoStudentViewHandlers(object):
try: try:
transcript = self.translation(request.GET.get('videoId', None), transcripts) transcript = self.translation(request.GET.get('videoId', None), transcripts)
except (TypeError, NotFoundError) as ex: except (TypeError, TranscriptException, NotFoundError) as ex:
# Catching `TranscriptException` because its also getting raised at places
# when transcript is not found in contentstore.
log.debug(ex.message) log.debug(ex.message)
# Try to return static URL redirection as last resort # Try to return static URL redirection as last resort
# if no translation is required # if no translation is required
response = self.get_static_transcript(request, transcripts) response = self.get_static_transcript(request, transcripts)
if response.status_code == 404: if response.status_code == 404 and feature_enabled:
# Try to get transcript from edx-val as a last resort.
transcript = get_video_transcript_content( transcript = get_video_transcript_content(
course_id=self.course_id, language_code=self.transcript_language,
language_code=language,
edx_video_id=self.edx_video_id, edx_video_id=self.edx_video_id,
youtube_id_1_0=self.youtube_id_1_0, youtube_id_1_0=self.youtube_id_1_0,
html5_sources=self.html5_sources, html5_sources=self.html5_sources,
...@@ -257,17 +261,13 @@ class VideoStudentViewHandlers(object): ...@@ -257,17 +261,13 @@ class VideoStudentViewHandlers(object):
if transcript: if transcript:
response = Response( response = Response(
transcript['content'], transcript['content'],
headerlist=[('Content-Language', language)], headerlist=[('Content-Language', self.transcript_language)],
charset='utf8', charset='utf8',
) )
response.content_type = Transcript.mime_types['sjson'] response.content_type = Transcript.mime_types['sjson']
return response return response
except ( except (UnicodeDecodeError, TranscriptsGenerationException) as ex:
TranscriptException,
UnicodeDecodeError,
TranscriptsGenerationException
) as ex:
log.info(ex.message) log.info(ex.message)
response = Response(status=404) response = Response(status=404)
else: else:
...@@ -280,44 +280,44 @@ class VideoStudentViewHandlers(object): ...@@ -280,44 +280,44 @@ class VideoStudentViewHandlers(object):
transcript_content, transcript_filename, transcript_mime_type = self.get_transcript( transcript_content, transcript_filename, transcript_mime_type = self.get_transcript(
transcripts, transcript_format=self.transcript_download_format, lang=lang transcripts, transcript_format=self.transcript_download_format, lang=lang
) )
except NotFoundError: except (ValueError, NotFoundError):
response = Response(status=404) response = Response(status=404)
# Make sure the language is set. # Check for transcripts in edx-val as a last resort if corresponding feature is enabled.
if lang is None: if feature_enabled:
lang = self.get_default_transcript_language(transcripts) # Make sure the language is set.
if not lang:
transcript = get_video_transcript_content( lang = self.get_default_transcript_language(transcripts)
course_id=self.course_id,
language_code=lang,
edx_video_id=self.edx_video_id,
youtube_id_1_0=self.youtube_id_1_0,
html5_sources=self.html5_sources,
)
if transcript:
transcript_content = Transcript.convert(
transcript['content'],
input_format='sjson',
output_format=self.transcript_download_format
)
# Construct the response transcript = get_video_transcript_content(
base_name, __ = os.path.splitext(os.path.basename(transcript['file_name'])) language_code=lang,
filename = '{base_name}.{ext}'.format( edx_video_id=self.edx_video_id,
base_name=base_name.encode('utf8'), youtube_id_1_0=self.youtube_id_1_0,
ext=self.transcript_download_format html5_sources=self.html5_sources,
)
response = Response(
transcript_content,
headerlist=[
('Content-Disposition', 'attachment; filename="{filename}"'.format(filename=filename)),
('Content-Language', lang),
],
charset='utf8',
) )
response.content_type = Transcript.mime_types[self.transcript_download_format] if transcript:
transcript_content = Transcript.convert(
transcript['content'],
input_format='sjson',
output_format=self.transcript_download_format
)
# Construct the response
base_name, __ = os.path.splitext(os.path.basename(transcript['file_name']))
filename = '{base_name}.{ext}'.format(
base_name=base_name.encode('utf8'),
ext=self.transcript_download_format
)
response = Response(
transcript_content,
headerlist=[
('Content-Disposition', 'attachment; filename="{filename}"'.format(filename=filename)),
('Content-Language', lang),
],
charset='utf8',
)
response.content_type = Transcript.mime_types[self.transcript_download_format]
return response return response
except (ValueError, KeyError, UnicodeDecodeError): except (KeyError, UnicodeDecodeError):
return Response(status=404) return Response(status=404)
else: else:
response = Response( response = Response(
...@@ -332,7 +332,11 @@ class VideoStudentViewHandlers(object): ...@@ -332,7 +332,11 @@ class VideoStudentViewHandlers(object):
elif dispatch.startswith('available_translations'): elif dispatch.startswith('available_translations'):
available_translations = self.available_translations(transcripts, verify_assets=True) available_translations = self.available_translations(
transcripts,
verify_assets=True,
include_val_transcripts=feature_enabled,
)
if available_translations: if available_translations:
response = Response(json.dumps(available_translations)) response = Response(json.dumps(available_translations))
response.content_type = 'application/json' response.content_type = 'application/json'
......
...@@ -44,7 +44,7 @@ from .bumper_utils import bumperize ...@@ -44,7 +44,7 @@ from .bumper_utils import bumperize
from .transcripts_utils import ( from .transcripts_utils import (
get_html5_ids, get_html5_ids,
get_video_ids_info, get_video_ids_info,
get_video_transcript_content, is_val_transcript_feature_enabled_for_course,
Transcript, Transcript,
VideoTranscriptsMixin, VideoTranscriptsMixin,
) )
...@@ -186,26 +186,14 @@ class VideoModule(VideoFields, VideoTranscriptsMixin, VideoStudentViewHandlers, ...@@ -186,26 +186,14 @@ class VideoModule(VideoFields, VideoTranscriptsMixin, VideoStudentViewHandlers,
elif sub or other_lang: elif sub or other_lang:
track_url = self.runtime.handler_url(self, 'transcript', 'download').rstrip('/?') track_url = self.runtime.handler_url(self, 'transcript', 'download').rstrip('/?')
if not track_url:
# Check transcript's availability in edx-val
transcript = get_video_transcript_content(
course_id=self.course_id,
language_code=self.transcript_language,
edx_video_id=self.edx_video_id,
youtube_id_1_0=self.youtube_id_1_0,
html5_sources=self.html5_sources,
)
if transcript:
track_url = self.runtime.handler_url(self, 'transcript', 'download').rstrip('/?')
transcript_language = self.get_default_transcript_language(transcripts) transcript_language = self.get_default_transcript_language(transcripts)
native_languages = {lang: label for lang, label in settings.LANGUAGES if len(lang) == 2} native_languages = {lang: label for lang, label in settings.LANGUAGES if len(lang) == 2}
languages = { languages = {
lang: native_languages.get(lang, display) lang: native_languages.get(lang, display)
for lang, display in settings.ALL_LANGUAGES for lang, display in settings.ALL_LANGUAGES
if lang in other_lang if lang in other_lang
} }
if not other_lang or (other_lang and sub): if not other_lang or (other_lang and sub):
languages['en'] = 'English' languages['en'] = 'English'
...@@ -295,7 +283,9 @@ class VideoModule(VideoFields, VideoTranscriptsMixin, VideoStudentViewHandlers, ...@@ -295,7 +283,9 @@ class VideoModule(VideoFields, VideoTranscriptsMixin, VideoStudentViewHandlers,
if download_video_link and download_video_link.endswith('.m3u8'): if download_video_link and download_video_link.endswith('.m3u8'):
download_video_link = None download_video_link = None
track_url, transcript_language, sorted_languages = self.get_transcripts_for_student(self.get_transcripts_info()) feature_enabled = is_val_transcript_feature_enabled_for_course(self.course_id)
transcripts = self.get_transcripts_info(include_val_transcripts=feature_enabled)
track_url, transcript_language, sorted_languages = self.get_transcripts_for_student(transcripts=transcripts)
# CDN_VIDEO_URLS is only to be used here and will be deleted # CDN_VIDEO_URLS is only to be used here and will be deleted
# TODO(ali@edx.org): Delete this after the CDN experiment has completed. # TODO(ali@edx.org): Delete this after the CDN experiment has completed.
...@@ -1026,10 +1016,12 @@ class VideoDescriptor(VideoFields, VideoTranscriptsMixin, VideoStudioViewHandler ...@@ -1026,10 +1016,12 @@ class VideoDescriptor(VideoFields, VideoTranscriptsMixin, VideoStudioViewHandler
"file_size": 0, # File size is not relevant for external link "file_size": 0, # File size is not relevant for external link
} }
transcripts_info = self.get_transcripts_info() feature_enabled = is_val_transcript_feature_enabled_for_course(self.runtime.course_id.for_branch(None))
transcripts_info = self.get_transcripts_info(include_val_transcripts=feature_enabled)
available_translations = self.available_translations(transcripts_info, include_val_transcripts=feature_enabled)
transcripts = { transcripts = {
lang: self.runtime.handler_url(self, 'transcript', 'download', query="lang=" + lang, thirdparty=True) lang: self.runtime.handler_url(self, 'transcript', 'download', query="lang=" + lang, thirdparty=True)
for lang in self.available_translations(transcripts_info) for lang in available_translations
} }
return { return {
......
...@@ -189,6 +189,7 @@ class TestVideo(BaseTestXmodule): ...@@ -189,6 +189,7 @@ class TestVideo(BaseTestXmodule):
@attr(shard=1) @attr(shard=1)
@ddt.ddt
class TestTranscriptAvailableTranslationsDispatch(TestVideo): class TestTranscriptAvailableTranslationsDispatch(TestVideo):
""" """
Test video handler that provide available translations info. Test video handler that provide available translations info.
...@@ -247,6 +248,80 @@ class TestTranscriptAvailableTranslationsDispatch(TestVideo): ...@@ -247,6 +248,80 @@ class TestTranscriptAvailableTranslationsDispatch(TestVideo):
response = self.item.transcript(request=request, dispatch='available_translations') response = self.item.transcript(request=request, dispatch='available_translations')
self.assertEqual(json.loads(response.body), ['en', 'uk']) self.assertEqual(json.loads(response.body), ['en', 'uk'])
@patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', Mock(return_value=True))
@patch('xmodule.video_module.transcripts_utils.get_available_transcript_languages')
@ddt.data(
(
['en', 'uk', 'ro'],
'',
{},
['en', 'uk', 'ro']
),
(
['uk', 'ro'],
True,
{},
['en', 'uk', 'ro']
),
(
['de', 'ro'],
True,
{
'uk': True,
'ro': False,
},
['en', 'uk', 'de', 'ro']
),
(
['de'],
True,
{
'uk': True,
'ro': False,
},
['en', 'uk', 'de']
),
)
@ddt.unpack
def test_val_available_translations(self, val_transcripts, sub, transcripts, result, mock_get_transcript_languages):
"""
Tests available translations with video component's and val's transcript languages
while the feature is enabled.
"""
for lang_code, in_content_store in dict(transcripts).iteritems():
if in_content_store:
file_name, __ = os.path.split(self.srt_file.name)
_upload_file(self.srt_file, self.item_descriptor.location, file_name)
transcripts[lang_code] = file_name
else:
transcripts[lang_code] = 'non_existent.srt.sjson'
if sub:
sjson_transcript = _create_file(json.dumps(self.subs))
_upload_sjson_file(sjson_transcript, self.item_descriptor.location)
sub = _get_subs_id(sjson_transcript.name)
mock_get_transcript_languages.return_value = val_transcripts
self.item.transcripts = transcripts
self.item.sub = sub
# Make request to available translations dispatch.
request = Request.blank('/available_translations')
response = self.item.transcript(request=request, dispatch='available_translations')
self.assertItemsEqual(json.loads(response.body), result)
@patch(
'xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled',
Mock(return_value=False),
)
@patch('xmodule.video_module.transcripts_utils.edxval_api.get_available_transcript_languages')
def test_val_available_translations_feature_disabled(self, mock_get_available_transcript_languages):
"""
Tests available translations with val transcript languages when feature is disabled.
"""
mock_get_available_transcript_languages.return_value = ['en', 'de', 'ro']
request = Request.blank('/available_translations')
response = self.item.transcript(request=request, dispatch='available_translations')
self.assertEqual(response.status_code, 404)
@attr(shard=1) @attr(shard=1)
@ddt.ddt @ddt.ddt
......
...@@ -1315,6 +1315,7 @@ class TestVideoDescriptorStudentViewJson(TestCase): ...@@ -1315,6 +1315,7 @@ class TestVideoDescriptorStudentViewJson(TestCase):
self.transcript_url = "transcript_url" self.transcript_url = "transcript_url"
self.video = instantiate_descriptor(data=sample_xml) self.video = instantiate_descriptor(data=sample_xml)
self.video.runtime.handler_url = Mock(return_value=self.transcript_url) self.video.runtime.handler_url = Mock(return_value=self.transcript_url)
self.video.runtime.course_id = MagicMock()
def setup_val_video(self, associate_course_in_val=False): def setup_val_video(self, associate_course_in_val=False):
""" """
...@@ -1413,6 +1414,7 @@ class TestVideoDescriptorStudentViewJson(TestCase): ...@@ -1413,6 +1414,7 @@ class TestVideoDescriptorStudentViewJson(TestCase):
self.transcript_url = "transcript_url" self.transcript_url = "transcript_url"
self.video = instantiate_descriptor(data=sample_xml) self.video = instantiate_descriptor(data=sample_xml)
self.video.runtime.handler_url = Mock(return_value=self.transcript_url) self.video.runtime.handler_url = Mock(return_value=self.transcript_url)
self.video.runtime.course_id = MagicMock()
result = self.get_result() result = self.get_result()
self.verify_result_with_youtube_url(result) self.verify_result_with_youtube_url(result)
...@@ -1450,6 +1452,43 @@ class TestVideoDescriptorStudentViewJson(TestCase): ...@@ -1450,6 +1452,43 @@ class TestVideoDescriptorStudentViewJson(TestCase):
result = self.get_result(allow_cache_miss) result = self.get_result(allow_cache_miss)
self.verify_result_with_fallback_and_youtube(result) self.verify_result_with_fallback_and_youtube(result)
@ddt.data(
({}, '', [], ['en']),
({}, '', ['de'], ['de']),
({}, '', ['en', 'de'], ['en', 'de']),
({}, 'en-subs', ['de'], ['en', 'de']),
({'uk': 1}, 'en-subs', ['de'], ['en', 'uk', 'de']),
({'uk': 1, 'de': 1}, 'en-subs', ['de', 'en'], ['en', 'uk', 'de']),
)
@ddt.unpack
@patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', Mock(return_value=True))
@patch('xmodule.video_module.transcripts_utils.edxval_api.get_available_transcript_languages')
def test_student_view_with_val_transcripts_enabled(self, transcripts, english_sub, val_transcripts,
expected_transcripts, mock_get_transcript_languages):
"""
Test `student_view_data` with edx-val transcripts enabled.
"""
mock_get_transcript_languages.return_value = val_transcripts
self.video.transcripts = transcripts
self.video.sub = english_sub
student_view_response = self.get_result()
self.assertItemsEqual(student_view_response['transcripts'].keys(), expected_transcripts)
@patch(
'xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled',
Mock(return_value=False),
)
@patch(
'xmodule.video_module.transcripts_utils.edxval_api.get_available_transcript_languages',
Mock(return_value=['ro', 'es']),
)
def test_student_view_with_val_transcripts_disabled(self):
"""
Test `student_view_data` with edx-val transcripts disabled.
"""
student_view_response = self.get_result()
self.assertDictEqual(student_view_response['transcripts'], {self.TEST_LANGUAGE: self.transcript_url})
@attr(shard=1) @attr(shard=1)
class VideoDescriptorTest(TestCase, VideoDescriptorTestBase): class VideoDescriptorTest(TestCase, VideoDescriptorTestBase):
......
...@@ -11,6 +11,7 @@ from courseware.module_render import get_module_for_descriptor ...@@ -11,6 +11,7 @@ from courseware.module_render import get_module_for_descriptor
from util.module_utils import get_dynamic_descriptor_children from util.module_utils import get_dynamic_descriptor_children
from xmodule.modulestore.django import modulestore from xmodule.modulestore.django import modulestore
from xmodule.modulestore.mongo.base import BLOCK_TYPES_WITH_CHILDREN from xmodule.modulestore.mongo.base import BLOCK_TYPES_WITH_CHILDREN
from xmodule.video_module.transcripts_utils import is_val_transcript_feature_enabled_for_course
class BlockOutline(object): class BlockOutline(object):
...@@ -208,8 +209,12 @@ def video_summary(video_profiles, course_id, video_descriptor, request, local_ca ...@@ -208,8 +209,12 @@ def video_summary(video_profiles, course_id, video_descriptor, request, local_ca
size = default_encoded_video.get('file_size', 0) size = default_encoded_video.get('file_size', 0)
# Transcripts... # Transcripts...
transcripts_info = video_descriptor.get_transcripts_info() feature_enabled = is_val_transcript_feature_enabled_for_course(course_id)
transcript_langs = video_descriptor.available_translations(transcripts_info) transcripts_info = video_descriptor.get_transcripts_info(include_val_transcripts=feature_enabled)
transcript_langs = video_descriptor.available_translations(
transcripts=transcripts_info,
include_val_transcripts=feature_enabled
)
transcripts = { transcripts = {
lang: reverse( lang: reverse(
......
...@@ -2,12 +2,14 @@ ...@@ -2,12 +2,14 @@
""" """
Tests for video outline API Tests for video outline API
""" """
import ddt
import itertools import itertools
import json
from collections import namedtuple from collections import namedtuple
from mock import Mock
from uuid import uuid4 from uuid import uuid4
import ddt
from django.conf import settings from django.conf import settings
from edxval import api from edxval import api
from milestones.tests.utils import MilestonesTestCaseMixin from milestones.tests.utils import MilestonesTestCaseMixin
...@@ -876,6 +878,36 @@ class TestVideoSummaryList(TestVideoAPITestCase, MobileAuthTestMixin, MobileCour ...@@ -876,6 +878,36 @@ class TestVideoSummaryList(TestVideoAPITestCase, MobileAuthTestMixin, MobileCour
set(case.expected_transcripts) set(case.expected_transcripts)
) )
@ddt.data(
({}, '', [], ['en']),
({}, '', ['de'], ['de']),
({}, '', ['en', 'de'], ['en', 'de']),
({}, 'en-subs', ['de'], ['en', 'de']),
({'uk': 1}, 'en-subs', ['de'], ['en', 'uk', 'de']),
({'uk': 1, 'de': 1}, 'en-subs', ['de', 'en'], ['en', 'uk', 'de']),
)
@ddt.unpack
@patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', Mock(return_value=True))
@patch('xmodule.video_module.transcripts_utils.edxval_api.get_available_transcript_languages')
def test_val_transcripts_with_feature_enabled(self, transcripts, english_sub, val_transcripts,
expected_transcripts, mock_get_transcript_languages):
self.login_and_enroll()
video = ItemFactory.create(
parent=self.nameless_unit,
category="video",
edx_video_id=self.edx_video_id,
display_name=u"test draft video omega 2 \u03a9"
)
mock_get_transcript_languages.return_value = val_transcripts
video.transcripts = transcripts
video.sub = english_sub
modulestore().update_item(video, self.user.id)
course_outline = self.api_response().data
self.assertEqual(len(course_outline), 1)
self.assertItemsEqual(course_outline[0]['summary']['transcripts'].keys(), expected_transcripts)
@attr(shard=2) @attr(shard=2)
class TestTranscriptsDetail(TestVideoAPITestCase, MobileAuthTestMixin, MobileCourseAccessTestMixin, class TestTranscriptsDetail(TestVideoAPITestCase, MobileAuthTestMixin, MobileCourseAccessTestMixin,
...@@ -905,3 +937,57 @@ class TestTranscriptsDetail(TestVideoAPITestCase, MobileAuthTestMixin, MobileCou ...@@ -905,3 +937,57 @@ class TestTranscriptsDetail(TestVideoAPITestCase, MobileAuthTestMixin, MobileCou
self.video = self._create_video_with_subs(custom_subid=u'你好') self.video = self._create_video_with_subs(custom_subid=u'你好')
self.login_and_enroll() self.login_and_enroll()
self.api_response(expected_response_code=200, lang='en') self.api_response(expected_response_code=200, lang='en')
@patch(
'xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled',
Mock(return_value=True),
)
@patch(
'xmodule.video_module.transcripts_utils.edxval_api.get_available_transcript_languages',
Mock(return_value=['uk']),
)
@patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data')
def test_val_transcript(self, mock_get_video_transcript_content):
"""
Tests transcript retrieval view with val transcripts.
"""
mock_get_video_transcript_content.return_value = {
'content': json.dumps({
'start': [10],
'end': [100],
'text': [u'Hi, welcome to Edx.'],
}),
'file_name': 'edx.sjson'
}
self.login_and_enroll()
# Now, make request to retrieval endpoint
response = self.api_response(expected_response_code=200, lang='uk')
# Expected headers
expected_content = u'0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
expected_headers = {
'Content-Disposition': 'attachment; filename="edx.srt"',
'Content-Type': 'application/x-subrip; charset=utf-8'
}
# Assert the actual response
self.assertEqual(response.content, expected_content)
for attribute, value in expected_headers.iteritems():
self.assertEqual(response.get(attribute), value)
@patch(
'xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled',
Mock(return_value=False),
)
@patch(
'xmodule.video_module.transcripts_utils.edxval_api.get_available_transcript_languages',
Mock(return_value=['uk']),
)
def test_val_transcript_feature_disabled(self):
"""
Tests transcript retrieval view with val transcripts when
the corresponding feature is disabled.
"""
self.login_and_enroll()
# request to retrieval endpoint will result in 404 as val transcripts are disabled.
self.api_response(expected_response_code=404, lang='uk')
...@@ -6,6 +6,7 @@ only displayed at the course level. This is because it makes it a lot easier to ...@@ -6,6 +6,7 @@ only displayed at the course level. This is because it makes it a lot easier to
optimize and reason about, and it avoids having to tackle the bigger problem of optimize and reason about, and it avoids having to tackle the bigger problem of
general XBlock representation in this rather specialized formatting. general XBlock representation in this rather specialized formatting.
""" """
import os
from functools import partial from functools import partial
from django.http import Http404, HttpResponse from django.http import Http404, HttpResponse
...@@ -16,6 +17,11 @@ from rest_framework.response import Response ...@@ -16,6 +17,11 @@ from rest_framework.response import Response
from mobile_api.models import MobileApiConfig from mobile_api.models import MobileApiConfig
from xmodule.exceptions import NotFoundError from xmodule.exceptions import NotFoundError
from xmodule.modulestore.django import modulestore from xmodule.modulestore.django import modulestore
from xmodule.video_module.transcripts_utils import (
get_video_transcript_content,
is_val_transcript_feature_enabled_for_course,
Transcript,
)
from ..decorators import mobile_course_access, mobile_view from ..decorators import mobile_course_access, mobile_view
from .serializers import BlockOutline, video_summary from .serializers import BlockOutline, video_summary
...@@ -111,14 +117,31 @@ class VideoTranscripts(generics.RetrieveAPIView): ...@@ -111,14 +117,31 @@ class VideoTranscripts(generics.RetrieveAPIView):
block_id = kwargs['block_id'] block_id = kwargs['block_id']
lang = kwargs['lang'] lang = kwargs['lang']
usage_key = BlockUsageLocator( usage_key = BlockUsageLocator(course.id, block_type='video', block_id=block_id)
course.id, block_type="video", block_id=block_id video_descriptor = modulestore().get_item(usage_key)
) feature_enabled = is_val_transcript_feature_enabled_for_course(usage_key.course_key)
try: try:
video_descriptor = modulestore().get_item(usage_key) transcripts = video_descriptor.get_transcripts_info(include_val_transcripts=feature_enabled)
transcripts = video_descriptor.get_transcripts_info()
content, filename, mimetype = video_descriptor.get_transcript(transcripts, lang=lang) content, filename, mimetype = video_descriptor.get_transcript(transcripts, lang=lang)
except (NotFoundError, ValueError, KeyError): except (ValueError, NotFoundError):
# Fallback mechanism for edx-val transcripts
transcript = None
if feature_enabled:
transcript = get_video_transcript_content(
language_code=lang,
edx_video_id=video_descriptor.edx_video_id,
youtube_id_1_0=video_descriptor.youtube_id_1_0,
html5_sources=video_descriptor.html5_sources,
)
if not transcript:
raise Http404(u'Transcript not found for {}, lang: {}'.format(block_id, lang))
base_name, __ = os.path.splitext(os.path.basename(transcript['file_name']))
filename = '{base_name}.srt'.format(base_name=base_name)
content = Transcript.convert(transcript['content'], 'sjson', 'srt')
mimetype = Transcript.mime_types['srt']
except KeyError:
raise Http404(u"Transcript not found for {}, lang: {}".format(block_id, lang)) raise Http404(u"Transcript not found for {}, lang: {}".format(block_id, lang))
response = HttpResponse(content, content_type=mimetype) response = HttpResponse(content, content_type=mimetype)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment