BLD-837: Fix download transcript behavior.

d9ec9cec · polesye · Adam Palay · 227197ec · d9ec9cec · d9ec9cec
Commit d9ec9cec authored Feb 13, 2014 by polesye Committed by Adam Palay Feb 14, 2014
Showing with 130 additions and 140 deletions

cms/djangoapps/contentstore/features/video-editor.py
+1 -1

common/lib/xmodule/xmodule/tests/test_video.py
+30 -0

common/lib/xmodule/xmodule/video_module.py
+81 -47

lms/djangoapps/courseware/tests/test_video_mongo.py
+18 -92

No files found.
--- a/cms/djangoapps/contentstore/features/video-editor.py
+++ b/cms/djangoapps/contentstore/features/video-editor.py
@@ -45,7 +45,7 @@ def correct_video_settings(_step):
        ['HTML5 Transcript', '', False],
        ['Show Transcript', 'True', False],
        ['Start Time', '00:00:00', False],
-        # ['Transcript Download Allowed', 'False', False],
+        ['Transcript Download Allowed', 'False', False],
        ['Video Download Allowed', 'False', False],
        ['Video Sources', '', False],
        ['Youtube ID', 'OEoXaMPEzfM', False],

--- a/common/lib/xmodule/xmodule/tests/test_video.py
+++ b/common/lib/xmodule/xmodule/tests/test_video.py
@@ -277,6 +277,36 @@ class VideoDescriptorImportTestCase(unittest.TestCase):
            'data': ''
        })
+    def test_from_xml_missing_download_track(self):
+        """
+        Ensure that attributes have the right values if they aren't
+        explicitly set in XML.
+        """
+        module_system = DummySystem(load_error_modules=True)
+        xml_data = '''
+            <video display_name="Test Video"
+                   youtube="1.0:p2Q6BrNhdh8,1.25:1EeWXzPdhSA"
+                   show_captions="true">
+              <source src="http://www.example.com/source.mp4"/>
+              <track src="http://www.example.com/track"/>
+            </video>
+        '''
+        output = VideoDescriptor.from_xml(xml_data, module_system, Mock())
+        self.assert_attributes_equal(output, {
+            'youtube_id_0_75': '',
+            'youtube_id_1_0': 'p2Q6BrNhdh8',
+            'youtube_id_1_25': '1EeWXzPdhSA',
+            'youtube_id_1_5': '',
+            'show_captions': True,
+            'start_time': datetime.timedelta(seconds=0.0),
+            'end_time': datetime.timedelta(seconds=0.0),
+            'track': 'http://www.example.com/track',
+            'download_track': True,
+            'download_video': True,
+            'html5_sources': ['http://www.example.com/source.mp4'],
+            'data': ''
+        })
    def test_from_xml_no_attributes(self):
        """
        Make sure settings are correct if none are explicitly set in XML.

--- a/common/lib/xmodule/xmodule/video_module.py
+++ b/common/lib/xmodule/xmodule/video_module.py
@@ -13,12 +13,12 @@ in XML.
 import json
 import logging
-from HTMLParser import HTMLParser
 from lxml import etree
 from pkg_resources import resource_string
 import datetime
 import copy
 from webob import Response
+from pysrt import SubRipTime, SubRipItem
 from django.conf import settings
@@ -116,8 +116,6 @@ class VideoFields(object):
        display_name="Video Sources",
        scope=Scope.settings,
    )
-    # `track` is deprecated field and should not be used in future.
-    # `download_track` is used instead.
    track = String(
        help="The external URL to download the timed transcript track. This appears as a link beneath the video.",
        display_name="Download Transcript",
@@ -215,14 +213,11 @@ class VideoModule(VideoFields, XModule):
            elif self.html5_sources:
                sources['main'] = self.html5_sources[0]
-        # Commented due to the reason described in BLD-811.
+        if self.download_track:
-        # if self.download_track:
+            if self.track:
-        #     if self.track:
+                track_url = self.track
-        #         track_url = self.track
+            elif self.sub:
-        #     elif self.sub:
+                track_url = self.runtime.handler_url(self, 'download_transcript')
-        #         track_url = self.runtime.handler_url(self, 'download_transcript')
-        track_url = self.track
        return self.system.render_template('video.html', {
            'ajax_url': self.system.ajax_url + '/save_user_state',
@@ -250,14 +245,14 @@ class VideoModule(VideoFields, XModule):
    def get_transcript(self, subs_id):
        '''
-        Returns transcript without timecodes.
+        Returns transcript in *.srt format.
        Args:
            `subs_id`: str, subtitles id
        Raises:
            - NotFoundError if cannot find transcript file in storage.
-            - ValueError if transcript file is incorrect JSON.
+            - ValueError if transcript file is empty or incorrect JSON.
            - KeyError if transcript file has incorrect format.
        '''
@@ -265,11 +260,13 @@ class VideoModule(VideoFields, XModule):
        content_location = StaticContent.compute_location(
            self.location.org, self.location.course, filename
        )
+        sjson_transcripts = contentstore().find(content_location)
+        str_subs = _generate_srt_from_sjson(json.loads(sjson_transcripts.data), speed=1.0)
+        if not str_subs:
+            log.debug('generate_srt_from_sjson produces no subtitles')
+            raise ValueError
-        data = contentstore().find(content_location).data
+        return str_subs
-        text = json.loads(data)['text']
-        return HTMLParser().unescape("\n".join(text))
    @XBlock.handler
@@ -289,9 +286,9 @@ class VideoModule(VideoFields, XModule):
        response = Response(
            subs,
            headerlist=[
-                ('Content-Disposition', 'attachment; filename="{0}.txt"'.format(self.sub)),
+                ('Content-Disposition', 'attachment; filename="{0}.srt"'.format(self.sub)),
            ])
-        response.content_type="text/plain; charset=utf-8"
+        response.content_type="application/x-subrip"
        return response
@@ -317,15 +314,6 @@ class VideoDescriptor(VideoFields, TabsEditingDescriptor, EmptyDataRawDescriptor
        '''
        Mostly handles backward compatibility issues.
-        Track was deprecated field, but functionality was reverted,
-        this is commented out because might be used in future.
-        ###
-        `track` is deprecated field.
-        If `track` field exists show `track` field on front-end as not-editable
-        but clearable. Dropdown `download_track` is a new field and it has value
-        True.
-        ###
        `source` is deprecated field.
        a) If `source` exists and `source` is not `html5_sources`: show `source`
            field on front-end as not-editable but clearable. Dropdown is a new
@@ -344,14 +332,6 @@ class VideoDescriptor(VideoFields, TabsEditingDescriptor, EmptyDataRawDescriptor
        editable_fields = self.editable_metadata_fields
-        # Commented due to the reason described in BLD-811.
-        # self.track_visible = False
-        # if self.track:
-        #     self.track_visible = True
-        #     download_track = editable_fields['download_track']
-        #     if not download_track['explicitly_set']:
-        #         self.download_track = True
        self.source_visible = False
        if self.source:
            # If `source` field value exist in the `html5_sources` field values,
@@ -369,16 +349,6 @@ class VideoDescriptor(VideoFields, TabsEditingDescriptor, EmptyDataRawDescriptor
    def editable_metadata_fields(self):
        editable_fields = super(VideoDescriptor, self).editable_metadata_fields
-        # Commented due to the reason described in BLD-811.
-        # if hasattr(self, 'track_visible'):
-        #     if self.track_visible:
-        #         editable_fields['track']['non_editable'] = True
-        #     else:
-        #         editable_fields.pop('track')
-        if 'download_track' in editable_fields:
-            editable_fields.pop('download_track')
        if hasattr(self, 'source_visible'):
            if self.source_visible:
                editable_fields['source']['non_editable'] = True
@@ -577,10 +547,17 @@ class VideoDescriptor(VideoFields, TabsEditingDescriptor, EmptyDataRawDescriptor
                    value = deserialize_field(cls.fields[attr], value)
                field_data[attr] = value
-        # Add `source` for backwards compatibility if xml doesn't  have `download_video`.
+        # For backwards compatibility: Add `source` if XML doesn't have `download_video`
+        # attribute.
        if 'download_video' not in field_data and sources:
            field_data['source'] = field_data['html5_sources'][0]
+        # For backwards compatibility: if XML doesn't have `download_track` attribute,
+        # it means that it is an old format. So, if `track` has some value,
+        # `download_track` needs to have value `True`.
+        if 'download_track' not in field_data and track is not None:
+            field_data['download_track'] = True
        return field_data
@@ -602,3 +579,60 @@ def _create_youtube_string(module):
                     for pair
                     in zip(youtube_speeds, youtube_ids)
                     if pair[1]])
+def _generate_subs(speed, source_speed, source_subs):
+    """
+    Generate transcripts from one speed to another speed.
+    Args:
+    `speed`: float, for this speed subtitles will be generated,
+    `source_speed`: float, speed of source_subs
+    `soource_subs`: dict, existing subtitles for speed `source_speed`.
+    Returns:
+    `subs`: dict, actual subtitles.
+    """
+    if speed == source_speed:
+        return source_subs
+    coefficient = 1.0 * speed / source_speed
+    subs = {
+        'start': [
+            int(round(timestamp * coefficient)) for
+            timestamp in source_subs['start']
+        ],
+        'end': [
+            int(round(timestamp * coefficient)) for
+            timestamp in source_subs['end']
+        ],
+        'text': source_subs['text']}
+    return subs
+def _generate_srt_from_sjson(sjson_subs, speed):
+    """Generate transcripts with speed = 1.0 from sjson to SubRip (*.srt).
+    :param sjson_subs: "sjson" subs.
+    :param speed: speed of `sjson_subs`.
+    :returns: "srt" subs.
+    """
+    output = ''
+    equal_len = len(sjson_subs['start']) == len(sjson_subs['end']) == len(sjson_subs['text'])
+    if not equal_len:
+        return output
+    sjson_speed_1 = _generate_subs(speed, 1, sjson_subs)
+    for i in range(len(sjson_speed_1['start'])):
+        item = SubRipItem(
+            index=i,
+            start=SubRipTime(milliseconds=sjson_speed_1['start'][i]),
+            end=SubRipTime(milliseconds=sjson_speed_1['end'][i]),
+            text=sjson_speed_1['text'][i]
+        )
+        output += (unicode(item))
+        output += '\n'
+    return output
--- a/lms/djangoapps/courseware/tests/test_video_mongo.py
+++ b/lms/djangoapps/courseware/tests/test_video_mongo.py
@@ -5,7 +5,6 @@ from mock import patch, PropertyMock
 import os
 import tempfile
 import textwrap
-import unittest
 from functools import partial
 from xmodule.contentstore.content import StaticContent
@@ -20,7 +19,6 @@ from xmodule.exceptions import NotFoundError
 class TestVideo(BaseTestXmodule):
    """Integration tests: web client + mongo."""
    CATEGORY = "video"
    DATA = SOURCE_XML
    METADATA = {}
@@ -71,7 +69,7 @@ class TestVideoYouTube(TestVideo):
            'general_speed': 1.0,
            'start': 3603.0,
            'sub': u'a_sub_file.srt.sjson',
-            'track': '',
+            'track': None,
            'youtube_streams': _create_youtube_string(self.item_module),
            'autoplay': settings.FEATURES.get('AUTOPLAY_VIDEOS', False),
            'yt_test_timeout': 1500,
@@ -126,7 +124,7 @@ class TestVideoNonYouTube(TestVideo):
            'general_speed': 1.0,
            'start': 3603.0,
            'sub': u'a_sub_file.srt.sjson',
-            'track': '',
+            'track': None,
            'youtube_streams': '1.00:OEoXaMPEzfM',
            'autoplay': settings.FEATURES.get('AUTOPLAY_VIDEOS', True),
            'yt_test_timeout': 1500,
@@ -145,7 +143,6 @@ class TestGetHtmlMethod(BaseTestXmodule):
    '''
    CATEGORY = "video"
    DATA = SOURCE_XML
-    maxDiff = None
    METADATA = {}
    def setUp(self):
@@ -222,14 +219,13 @@ class TestGetHtmlMethod(BaseTestXmodule):
            )
            self.initialize_module(data=DATA)
-            # track_url = self.item_descriptor.xmodule_runtime.handler_url(self.item_module, 'download_transcript')
+            track_url = self.item_descriptor.xmodule_runtime.handler_url(self.item_module, 'download_transcript')
            context = self.item_module.render('student_view').content
            expected_context.update({
                'ajax_url': self.item_descriptor.xmodule_runtime.ajax_url + '/save_user_state',
-                # 'track': track_url if data['expected_track_url'] == u'a_sub_file.srt.sjson' else data['expected_track_url'],
+                'track': track_url if data['expected_track_url'] == u'a_sub_file.srt.sjson' else data['expected_track_url'],
-                'track': u'http://www.example.com/track' if data['track'] else '',
                'sub': data['sub'],
                'id': self.item_module.location.html_id(),
            })
@@ -312,7 +308,7 @@ class TestGetHtmlMethod(BaseTestXmodule):
            'general_speed': 1.0,
            'start': 3603.0,
            'sub': u'a_sub_file.srt.sjson',
-            'track': '',
+            'track': None,
            'youtube_streams': '1.00:OEoXaMPEzfM',
            'autoplay': settings.FEATURES.get('AUTOPLAY_VIDEOS', True),
            'yt_test_timeout': 1500,
@@ -426,7 +422,7 @@ class TestVideoDescriptorInitialization(BaseTestXmodule):
            },
        }
        metadata = {
-            'track': '',
+            'track': None,
            'source': 'http://example.org/video.mp4',
            'html5_sources': ['http://youtu.be/OEoXaMPEzfM.mp4'],
        }
@@ -450,85 +446,6 @@ class TestVideoDescriptorInitialization(BaseTestXmodule):
        self.assertNotIn('source', fields)
        self.assertFalse(self.item_module.download_video)
-    @unittest.skip('Skipped due to the reason described in BLD-811')
-    def test_track_is_not_empty(self):
-        metatdata = {
-            'track': 'http://example.org/track',
-        }
-        self.initialize_module(metadata=metatdata)
-        fields = self.item_descriptor.editable_metadata_fields
-        self.assertIn('track', fields)
-        self.assertEqual(self.item_module.track, 'http://example.org/track')
-        self.assertTrue(self.item_module.download_track)
-        self.assertTrue(self.item_module.track_visible)
-    @unittest.skip('Skipped due to the reason described in BLD-811')
-    @patch('xmodule.x_module.XModuleDescriptor.editable_metadata_fields', new_callable=PropertyMock)
-    def test_download_track_is_explicitly_set(self, mock_editable_fields):
-        mock_editable_fields.return_value = {
-            'download_track': {
-                'default_value': False,
-                'explicitly_set': True,
-                'display_name': 'Transcript Download Allowed',
-                'help': 'Show a link beneath the video to allow students to download the transcript.',
-                'type': 'Boolean',
-                'value': False,
-                'field_name': 'download_track',
-                'options': [
-                    {'display_name': "True", "value": True},
-                    {'display_name': "False", "value": False}
-                ],
-            },
-            'track': {
-                'default_value': '',
-                'explicitly_set': False,
-                'display_name': 'Download Transcript',
-                'help': 'The external URL to download the timed transcript track.',
-                'type': 'Generic',
-                'value': u'http://example.org/track',
-                'field_name': 'track',
-                'options': [],
-            },
-            'source': {
-                'default_value': '',
-                'explicitly_set': False,
-                'display_name': 'Download Video',
-                'help': 'The external URL to download the video.',
-                'type': 'Generic',
-                'value': u'',
-                'field_name': 'source',
-                'options': [],
-            },
-        }
-        metadata = {
-            'source': '',
-            'track': 'http://example.org/track',
-        }
-        self.initialize_module(metadata=metadata)
-        fields = self.item_descriptor.editable_metadata_fields
-        self.assertIn('track', fields)
-        self.assertEqual(self.item_module.track, 'http://example.org/track')
-        self.assertFalse(self.item_module.download_track)
-        self.assertTrue(self.item_module.track_visible)
-    @unittest.skip('Skipped due to the reason described in BLD-811')
-    def test_track_is_empty(self):
-        metatdata = {
-            'track': '',
-        }
-        self.initialize_module(metadata=metatdata)
-        fields = self.item_descriptor.editable_metadata_fields
-        self.assertNotIn('track', fields)
-        self.assertEqual(self.item_module.track, '')
-        self.assertFalse(self.item_module.download_track)
-        self.assertFalse(self.item_module.track_visible)
 class TestVideoGetTranscriptsMethod(TestVideo):
    """
@@ -552,7 +469,7 @@ class TestVideoGetTranscriptsMethod(TestVideo):
        self.item_module.render('student_view')
        item = self.item_descriptor.xmodule_runtime.xmodule_instance
-        good_sjson = _create_file(content="""
+        good_sjson = _create_file(content=textwrap.dedent("""\
                {
                  "start": [
                    270,
@@ -567,13 +484,22 @@ class TestVideoGetTranscriptsMethod(TestVideo):
                    "Let&#39;s start with what is on your screen right now."
                  ]
                }
-            """)
+            """))
        _upload_file(good_sjson, self.item_module.location)
        subs_id = _get_subs_id(good_sjson.name)
        text = item.get_transcript(subs_id)
-        expected_text = "Hi, welcome to Edx.\nLet's start with what is on your screen right now."
+        expected_text = textwrap.dedent("""\
+            0
+            00:00:00,270 --> 00:00:02,720
+            Hi, welcome to Edx.
+            1
+            00:00:02,720 --> 00:00:05,430
+            Let&#39;s start with what is on your screen right now.
+            """)
        self.assertEqual(text, expected_text)