Commit ec7b78d6 by Qubad786 Committed by muzaffaryousaf

Use edx-val transcripts as a fallback on the video component

Purpose of this, is to serve transcripts from edx-val if the requested transcripts are not found in the contentstore i.e. that's where all the transcripts are living to date.
parent 845bb7b8
# -*- coding: utf-8 -*-
""" Tests for transcripts_utils. """
import copy
import ddt
import textwrap
import unittest
from uuid import uuid4
import ddt
from django.conf import settings
from django.test.utils import override_settings
......
"""Tests for items views."""
import copy
import ddt
import json
import os
import tempfile
......@@ -10,7 +11,7 @@ from uuid import uuid4
from django.conf import settings
from django.core.urlresolvers import reverse
from django.test.utils import override_settings
from mock import patch
from mock import patch, Mock
from opaque_keys.edx.keys import UsageKey
from contentstore.tests.utils import CourseTestCase, mock_requests_get
......@@ -525,7 +526,70 @@ class TestDownloadTranscripts(BaseTranscripts):
self.assertEqual(resp.status_code, 404)
@patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', Mock(return_value=True))
@patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data')
def test_download_fallback_transcript(self, mock_get_video_transcript_data):
"""
Verify that the val transcript is returned if its not found in content-store.
"""
mock_get_video_transcript_data.return_value = {
'content': json.dumps({
"start": [10],
"end": [100],
"text": ["Hi, welcome to Edx."],
}),
'file_name': 'edx.sjson'
}
self.item.data = textwrap.dedent("""
<video youtube="" sub="">
<source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.mp4"/>
<source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.webm"/>
<source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.ogv"/>
</video>
""")
modulestore().update_item(self.item, self.user.id)
download_transcripts_url = reverse('download_transcripts')
response = self.client.get(download_transcripts_url, {'locator': self.video_usage_key})
# Expected response
expected_content = u'0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
expected_headers = {
'content-disposition': 'attachment; filename="edx.srt"',
'content-type': 'application/x-subrip; charset=utf-8'
}
# Assert the actual response
self.assertEqual(response.status_code, 200)
self.assertEqual(response.content, expected_content)
for attribute, value in expected_headers.iteritems():
self.assertEqual(response.get(attribute), value)
@patch(
'xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled',
Mock(return_value=False),
)
def test_download_fallback_transcript_feature_disabled(self):
"""
Verify the transcript download when feature is disabled.
"""
self.item.data = textwrap.dedent("""
<video youtube="" sub="">
<source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.mp4"/>
<source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.webm"/>
<source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.ogv"/>
</video>
""")
modulestore().update_item(self.item, self.user.id)
download_transcripts_url = reverse('download_transcripts')
response = self.client.get(download_transcripts_url, {'locator': self.video_usage_key})
# Assert the actual response
self.assertEqual(response.status_code, 404)
@ddt.ddt
class TestCheckTranscripts(BaseTranscripts):
"""
Tests for '/transcripts/check' url.
......@@ -760,6 +824,58 @@ class TestCheckTranscripts(BaseTranscripts):
self.assertEqual(resp.status_code, 400)
self.assertEqual(json.loads(resp.content).get('status'), 'Transcripts are supported only for "video" modules.')
@ddt.data(
(True, 'found'),
(False, 'not_found')
)
@ddt.unpack
@patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled')
@patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data', Mock(return_value=True))
def test_command_for_fallback_transcript(self, feature_enabled, expected_command, video_transcript_feature):
"""
Verify the command if a transcript is not found in content-store but
its there in edx-val.
"""
video_transcript_feature.return_value = feature_enabled
self.item.data = textwrap.dedent("""
<video youtube="" sub="">
<source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.mp4"/>
<source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.webm"/>
<source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.ogv"/>
</video>
""")
modulestore().update_item(self.item, self.user.id)
# Make request to check transcript view
data = {
'locator': unicode(self.video_usage_key),
'videos': [{
'type': 'html5',
'video': "",
'mode': 'mp4',
}]
}
check_transcripts_url = reverse('check_transcripts')
response = self.client.get(check_transcripts_url, {'data': json.dumps(data)})
# Assert the response
self.assertEqual(response.status_code, 200)
self.assertDictEqual(
json.loads(response.content),
{
u'status': u'Success',
u'subs': u'',
u'youtube_local': False,
u'is_youtube_mode': False,
u'youtube_server': False,
u'command': expected_command,
u'current_item_subs': None,
u'youtube_diff': True,
u'html5_local': [],
u'html5_equal': False,
}
)
class TestSaveTranscripts(BaseTranscripts):
"""
......
......@@ -27,16 +27,18 @@ from xmodule.exceptions import NotFoundError
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.exceptions import ItemNotFoundError
from xmodule.video_module.transcripts_utils import (
GetTranscriptsFromYouTubeException,
TranscriptsRequestValidationException,
copy_or_rename_transcript,
download_youtube_subs,
GetTranscriptsFromYouTubeException,
get_video_transcript_content,
generate_srt_from_sjson,
generate_subs_from_source,
get_transcripts_from_youtube,
manage_video_subtitles_save,
remove_subs_from_store,
youtube_video_transcript_name
Transcript,
TranscriptsRequestValidationException,
youtube_video_transcript_name,
)
__all__ = [
......@@ -144,6 +146,7 @@ def download_transcripts(request):
Raises Http404 if unsuccessful.
"""
locator = request.GET.get('locator')
subs_id = request.GET.get('subs_id')
if not locator:
log.debug('GET data without "locator" property.')
raise Http404
......@@ -154,31 +157,45 @@ def download_transcripts(request):
log.debug("Can't find item by locator.")
raise Http404
subs_id = request.GET.get('subs_id')
if not subs_id:
log.debug('GET data without "subs_id" property.')
raise Http404
if item.category != 'video':
log.debug('transcripts are supported only for video" modules.')
raise Http404
filename = 'subs_{0}.srt.sjson'.format(subs_id)
content_location = StaticContent.compute_location(item.location.course_key, filename)
try:
sjson_transcripts = contentstore().find(content_location)
log.debug("Downloading subs for %s id", subs_id)
str_subs = generate_srt_from_sjson(json.loads(sjson_transcripts.data), speed=1.0)
if not str_subs:
log.debug('generate_srt_from_sjson produces no subtitles')
raise Http404
response = HttpResponse(str_subs, content_type='application/x-subrip')
response['Content-Disposition'] = 'attachment; filename="{0}.srt"'.format(subs_id)
return response
if not subs_id:
raise NotFoundError
filename = subs_id
content_location = StaticContent.compute_location(
item.location.course_key,
'subs_{filename}.srt.sjson'.format(filename=filename),
)
sjson_transcript = contentstore().find(content_location).data
except NotFoundError:
log.debug("Can't find content in storage for %s subs", subs_id)
# Try searching in VAL for the transcript as a last resort
transcript = get_video_transcript_content(
course_id=item.location.course_key,
language_code=u'en',
edx_video_id=item.edx_video_id,
youtube_id_1_0=item.youtube_id_1_0,
html5_sources=item.html5_sources,
)
if not transcript:
raise Http404
filename = os.path.splitext(os.path.basename(transcript['file_name']))[0].encode('utf8')
sjson_transcript = transcript['content']
# convert sjson content into srt format.
transcript_content = Transcript.convert(sjson_transcript, input_format='sjson', output_format='srt')
if not transcript_content:
raise Http404
# Construct an HTTP response
response = HttpResponse(transcript_content, content_type='application/x-subrip; charset=utf-8')
response['Content-Disposition'] = 'attachment; filename="{filename}.srt"'.format(filename=filename)
return response
@login_required
def check_transcripts(request):
......@@ -284,6 +301,17 @@ def check_transcripts(request):
transcripts_presence['html5_equal'] = json.loads(html5_subs[0]) == json.loads(html5_subs[1])
command, subs_to_use = _transcripts_logic(transcripts_presence, videos)
if command == 'not_found':
# Try searching in VAL for the transcript as a last resort
video_transcript = get_video_transcript_content(
course_id=item.location.course_key,
language_code=u'en',
edx_video_id=item.edx_video_id,
youtube_id_1_0=item.youtube_id_1_0,
html5_sources=item.html5_sources,
)
command = 'found' if video_transcript else command
transcripts_presence.update({
'command': command,
'subs': subs_to_use,
......
......@@ -12,12 +12,18 @@ from pysrt import SubRipTime, SubRipItem, SubRipFile
from lxml import etree
from HTMLParser import HTMLParser
from openedx.core.djangoapps.video_config.models import VideoTranscriptEnabledFlag
from xmodule.exceptions import NotFoundError
from xmodule.contentstore.content import StaticContent
from xmodule.contentstore.django import contentstore
from .bumper_utils import get_bumper_settings
try:
from edxval import api as edxval_api
except ImportError:
edxval_api = None
log = logging.getLogger(__name__)
......@@ -474,8 +480,8 @@ def get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources):
Returns list internal or external video ids.
Arguments:
edx_video_id (str): edx_video_id
youtube_id_1_0 (str): youtube id
edx_video_id (unicode): edx_video_id
youtube_id_1_0 (unicode): youtube id
html5_sources (list): html5 video ids
Returns:
......@@ -492,6 +498,28 @@ def get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources):
return external, video_ids
def get_video_transcript_content(course_id, language_code, edx_video_id, youtube_id_1_0, html5_sources):
"""
Gets video transcript content, only if the corresponding feature flag is enabled for the given `course_id`.
Arguments:
course_id(CourseKey): Course key identifying a course
language_code(unicode): Language code of the requested transcript
edx_video_id(unicode): edx-val's video identifier
youtube_id_1_0(unicode): A youtube source identifier
html5_sources(list): A list containing html5 sources
Returns:
A dict containing transcript's file name and its sjson content.
"""
transcript = None
if VideoTranscriptEnabledFlag.feature_enabled(course_id=course_id) and edxval_api:
__, video_candidate_ids = get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources)
transcript = edxval_api.get_video_transcript_data(video_candidate_ids, language_code)
return transcript
class Transcript(object):
"""
Container for transcript methods.
......
......@@ -7,6 +7,8 @@ StudioViewHandlers are handlers for video descriptor instance.
import json
import logging
import os
from datetime import datetime
from webob import Response
......@@ -21,6 +23,7 @@ from .transcripts_utils import (
TranscriptException,
TranscriptsGenerationException,
generate_sjson_for_all_speeds,
get_video_transcript_content,
youtube_speed_dict,
Transcript,
save_to_store,
......@@ -242,7 +245,24 @@ class VideoStudentViewHandlers(object):
log.debug(ex.message)
# Try to return static URL redirection as last resort
# if no translation is required
return self.get_static_transcript(request, transcripts)
response = self.get_static_transcript(request, transcripts)
if response.status_code == 404:
transcript = get_video_transcript_content(
course_id=self.course_id,
language_code=language,
edx_video_id=self.edx_video_id,
youtube_id_1_0=self.youtube_id_1_0,
html5_sources=self.html5_sources,
)
if transcript:
response = Response(
transcript['content'],
headerlist=[('Content-Language', language)],
charset='utf8',
)
response.content_type = Transcript.mime_types['sjson']
return response
except (
TranscriptException,
UnicodeDecodeError,
......@@ -260,8 +280,44 @@ class VideoStudentViewHandlers(object):
transcript_content, transcript_filename, transcript_mime_type = self.get_transcript(
transcripts, transcript_format=self.transcript_download_format, lang=lang
)
except (NotFoundError, ValueError, KeyError, UnicodeDecodeError):
log.debug("Video@download exception")
except NotFoundError:
response = Response(status=404)
# Make sure the language is set.
if lang is None:
lang = self.get_default_transcript_language(transcripts)
transcript = get_video_transcript_content(
course_id=self.course_id,
language_code=lang,
edx_video_id=self.edx_video_id,
youtube_id_1_0=self.youtube_id_1_0,
html5_sources=self.html5_sources,
)
if transcript:
transcript_content = Transcript.convert(
transcript['content'],
input_format='sjson',
output_format=self.transcript_download_format
)
# Construct the response
base_name, __ = os.path.splitext(os.path.basename(transcript['file_name']))
filename = '{base_name}.{ext}'.format(
base_name=base_name.encode('utf8'),
ext=self.transcript_download_format
)
response = Response(
transcript_content,
headerlist=[
('Content-Disposition', 'attachment; filename="{filename}"'.format(filename=filename)),
('Content-Language', lang),
],
charset='utf8',
)
response.content_type = Transcript.mime_types[self.transcript_download_format]
return response
except (ValueError, KeyError, UnicodeDecodeError):
return Response(status=404)
else:
response = Response(
......
......@@ -24,7 +24,7 @@ from pkg_resources import resource_string
from django.conf import settings
from lxml import etree
from opaque_keys.edx.locator import AssetLocator
from openedx.core.djangoapps.video_config.models import HLSPlaybackEnabledFlag
from openedx.core.djangoapps.video_config.models import HLSPlaybackEnabledFlag, VideoTranscriptEnabledFlag
from openedx.core.lib.cache_utils import memoize_in_request_cache
from openedx.core.lib.license import LicenseMixin
from xblock.core import XBlock
......@@ -42,7 +42,11 @@ from xmodule.xml_module import deserialize_field, is_pointer_tag, name_to_pathna
from .bumper_utils import bumperize
from .transcripts_utils import (
Transcript, VideoTranscriptsMixin, get_html5_ids, get_video_ids_info
get_html5_ids,
get_video_ids_info,
get_video_transcript_content,
Transcript,
VideoTranscriptsMixin,
)
from .video_handlers import VideoStudentViewHandlers, VideoStudioViewHandlers
from .video_utils import create_youtube_string, format_xml_exception_message, get_poster, rewrite_video_url
......@@ -182,6 +186,18 @@ class VideoModule(VideoFields, VideoTranscriptsMixin, VideoStudentViewHandlers,
elif sub or other_lang:
track_url = self.runtime.handler_url(self, 'transcript', 'download').rstrip('/?')
if not track_url:
# Check transcript's availability in edx-val
transcript = get_video_transcript_content(
course_id=self.course_id,
language_code=self.transcript_language,
edx_video_id=self.edx_video_id,
youtube_id_1_0=self.youtube_id_1_0,
html5_sources=self.html5_sources,
)
if transcript:
track_url = self.runtime.handler_url(self, 'transcript', 'download').rstrip('/?')
transcript_language = self.get_default_transcript_language(transcripts)
native_languages = {lang: label for lang, label in settings.LANGUAGES if len(lang) == 2}
......
......@@ -11,7 +11,7 @@ import ddt
import freezegun
from mock import MagicMock, Mock, patch
from nose.plugins.attrib import attr
from webob import Request
from webob import Request, Response
from common.test.utils import normalize_repr
from openedx.core.djangoapps.contentserver.caching import del_cached_content
......@@ -370,6 +370,55 @@ class TestTranscriptDownloadDispatch(TestVideo):
self.assertEqual(response.headers['Content-Type'], 'application/x-subrip; charset=utf-8')
self.assertEqual(response.headers['Content-Disposition'], 'attachment; filename="塞.srt"')
@patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data')
@patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', Mock(return_value=True))
@patch('xmodule.video_module.VideoModule.get_transcript', Mock(side_effect=NotFoundError))
def test_download_fallback_transcript(self, mock_get_video_transcript_data):
"""
Verify val transcript is returned as a fallback if it is not found in the content store.
"""
mock_get_video_transcript_data.return_value = {
'content': json.dumps({
"start": [10],
"end": [100],
"text": ["Hi, welcome to Edx."],
}),
'file_name': 'edx.sjson'
}
# Make request to XModule transcript handler
request = Request.blank('/download')
response = self.item.transcript(request=request, dispatch='download')
# Expected response
expected_content = u'0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
expected_headers = {
'Content-Disposition': 'attachment; filename="edx.srt"',
'Content-Language': u'en',
'Content-Type': 'application/x-subrip; charset=utf-8'
}
# Assert the actual response
self.assertEqual(response.status_code, 200)
self.assertEqual(response.text, expected_content)
for attribute, value in expected_headers.iteritems():
self.assertEqual(response.headers[attribute], value)
@patch(
'xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled',
Mock(return_value=False),
)
@patch('xmodule.video_module.VideoModule.get_transcript', Mock(side_effect=NotFoundError))
def test_download_fallback_transcript_feature_disabled(self):
"""
Verify val transcript if its feature is disabled.
"""
# Make request to XModule transcript handler
request = Request.blank('/download')
response = self.item.transcript(request=request, dispatch='download')
# Assert the actual response
self.assertEqual(response.status_code, 404)
@attr(shard=1)
@ddt.ddt
......@@ -602,6 +651,55 @@ class TestTranscriptTranslationGetDispatch(TestVideo):
with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred, self.course.id):
store.update_item(self.course, self.user.id)
@patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data')
@patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', Mock(return_value=True))
@patch('xmodule.video_module.VideoModule.translation', Mock(side_effect=NotFoundError))
@patch('xmodule.video_module.VideoModule.get_static_transcript', Mock(return_value=Response(status=404)))
def test_translation_fallback_transcript(self, mock_get_video_transcript_data):
"""
Verify that the val transcript is returned as a fallback,
if it is not found in the content store.
"""
transcript = {
'content': json.dumps({
"start": [10],
"end": [100],
"text": ["Hi, welcome to Edx."],
}),
'file_name': 'edx.sjson'
}
mock_get_video_transcript_data.return_value = transcript
# Make request to XModule transcript handler
response = self.item.transcript(request=Request.blank('/translation/en'), dispatch='translation/en')
# Expected headers
expected_headers = {
'Content-Language': 'en',
'Content-Type': 'application/json'
}
# Assert the actual response
self.assertEqual(response.status_code, 200)
self.assertEqual(response.text, transcript['content'])
for attribute, value in expected_headers.iteritems():
self.assertEqual(response.headers[attribute], value)
@patch(
'xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled',
Mock(return_value=False),
)
@patch('xmodule.video_module.VideoModule.translation', Mock(side_effect=NotFoundError))
@patch('xmodule.video_module.VideoModule.get_static_transcript', Mock(return_value=Response(status=404)))
def test_translation_fallback_transcript_feature_disabled(self):
"""
Verify that val transcript is not returned when its feature is disabled.
"""
# Make request to XModule transcript handler
response = self.item.transcript(request=Request.blank('/translation/en'), dispatch='translation/en')
# Assert the actual response
self.assertEqual(response.status_code, 404)
@attr(shard=1)
class TestStudioTranscriptTranslationGetDispatch(TestVideo):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment