Commit 4a93d531 by Adam

Merge pull request #8436 from edx/waheed/tnl2122-fix-download-transcript-error-youtube-server

transcript name param url
parents de5e12d9 ecc3473d
......@@ -270,6 +270,107 @@ class TestDownloadYoutubeSubs(ModuleStoreTestCase):
self.clear_sub_content(good_youtube_sub)
@patch('xmodule.video_module.transcripts_utils.requests.get')
def test_get_transcript_name_youtube_server_success(self, mock_get):
"""
Get transcript name from transcript_list fetch from youtube server api
depends on language code, default language in YOUTUBE Text Api is "en"
"""
youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API'])
youtube_text_api['params']['v'] = 'dummy_video_id'
response_success = """
<transcript_list>
<track id="1" name="Custom" lang_code="en" />
<track id="0" name="Custom1" lang_code="en-GB"/>
</transcript_list>
"""
mock_get.return_value = Mock(status_code=200, text=response_success, content=response_success)
transcript_name = transcripts_utils.youtube_video_transcript_name(youtube_text_api)
self.assertEqual(transcript_name, 'Custom')
@patch('xmodule.video_module.transcripts_utils.requests.get')
def test_get_transcript_name_youtube_server_no_transcripts(self, mock_get):
"""
When there are no transcripts of video transcript name will be None
"""
youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API'])
youtube_text_api['params']['v'] = 'dummy_video_id'
response_success = "<transcript_list></transcript_list>"
mock_get.return_value = Mock(status_code=200, text=response_success, content=response_success)
transcript_name = transcripts_utils.youtube_video_transcript_name(youtube_text_api)
self.assertIsNone(transcript_name)
@patch('xmodule.video_module.transcripts_utils.requests.get')
def test_get_transcript_name_youtube_server_language_not_exist(self, mock_get):
"""
When the language does not exist in transcript_list transcript name will be None
"""
youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API'])
youtube_text_api['params']['v'] = 'dummy_video_id'
youtube_text_api['params']['lang'] = 'abc'
response_success = """
<transcript_list>
<track id="1" name="Custom" lang_code="en" />
<track id="0" name="Custom1" lang_code="en-GB"/>
</transcript_list>
"""
mock_get.return_value = Mock(status_code=200, text=response_success, content=response_success)
transcript_name = transcripts_utils.youtube_video_transcript_name(youtube_text_api)
self.assertIsNone(transcript_name)
def mocked_requests_get(*args, **kwargs):
"""
This method will be used by the mock to replace requests.get
"""
# pylint: disable=no-method-argument
response_transcript_list = """
<transcript_list>
<track id="1" name="Custom" lang_code="en" />
<track id="0" name="Custom1" lang_code="en-GB"/>
</transcript_list>
"""
response_transcript = textwrap.dedent("""
<transcript>
<text start="0" dur="0.27"></text>
<text start="0.27" dur="2.45">Test text 1.</text>
<text start="2.72">Test text 2.</text>
<text start="5.43" dur="1.73">Test text 3.</text>
</transcript>
""")
if kwargs == {'params': {'lang': 'en', 'v': 'good_id_2'}}:
return Mock(status_code=200, text='')
elif kwargs == {'params': {'type': 'list', 'v': 'good_id_2'}}:
return Mock(status_code=200, text=response_transcript_list, content=response_transcript_list)
elif kwargs == {'params': {'lang': 'en', 'v': 'good_id_2', 'name': 'Custom'}}:
return Mock(status_code=200, text=response_transcript, content=response_transcript)
return Mock(status_code=404, text='')
@patch('xmodule.video_module.transcripts_utils.requests.get', side_effect=mocked_requests_get)
def test_downloading_subs_using_transcript_name(self, mock_get):
"""
Download transcript using transcript name in url
"""
good_youtube_sub = 'good_id_2'
self.clear_sub_content(good_youtube_sub)
transcripts_utils.download_youtube_subs(good_youtube_sub, self.course, settings)
mock_get.assert_any_call(
'http://video.google.com/timedtext',
params={'lang': 'en', 'v': 'good_id_2', 'name': 'Custom'}
)
# Check asset status after import of transcript.
filename = 'subs_{0}.srt.sjson'.format(good_youtube_sub)
content_location = StaticContent.compute_location(self.course.id, filename)
self.assertTrue(contentstore().find(content_location))
self.clear_sub_content(good_youtube_sub)
class TestGenerateSubsFromSource(TestDownloadYoutubeSubs):
"""Tests for `generate_subs_from_source` function."""
......
......@@ -94,7 +94,32 @@ def save_subs_to_store(subs, subs_id, item, language='en'):
return save_to_store(filedata, filename, 'application/json', item.location)
def get_transcripts_from_youtube(youtube_id, settings, i18n):
def youtube_video_transcript_name(youtube_text_api):
"""
Get the transcript name from available transcripts of video
with respect to language from youtube server
"""
# pylint: disable=no-member
utf8_parser = etree.XMLParser(encoding='utf-8')
transcripts_param = {'type': 'list', 'v': youtube_text_api['params']['v']}
lang = youtube_text_api['params']['lang']
# get list of transcripts of specific video
# url-form
# http://video.google.com/timedtext?type=list&v={VideoId}
youtube_response = requests.get('http://' + youtube_text_api['url'], params=transcripts_param)
if youtube_response.status_code == 200 and youtube_response.text:
# pylint: disable=no-member
youtube_data = etree.fromstring(youtube_response.content, parser=utf8_parser)
# iterate all transcripts information from youtube server
for element in youtube_data:
# search specific language code such as 'en' in transcripts info list
if element.tag == 'track' and element.get('lang_code', '') == lang:
return element.get('name')
return None
def get_transcripts_from_youtube(youtube_id, settings, i18n, youtube_transcript_name=''):
"""
Gets transcripts from youtube for youtube_id.
......@@ -109,6 +134,12 @@ def get_transcripts_from_youtube(youtube_id, settings, i18n):
youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API'])
youtube_text_api['params']['v'] = youtube_id
# if the transcript name is not empty on youtube server we have to pass
# name param in url in order to get transcript
# example http://video.google.com/timedtext?lang=en&v={VideoId}&name={transcript_name}
youtube_transcript_name = youtube_video_transcript_name(youtube_text_api)
if youtube_transcript_name:
youtube_text_api['params']['name'] = youtube_transcript_name
data = requests.get('http://' + youtube_text_api['url'], params=youtube_text_api['params'])
if data.status_code != 200 or not data.text:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment