video component basic tab upload transcript to S3

EDUCATOR-1758

video component basic tab upload transcript to S3
EDUCATOR-1758
932a7892 · muhammad-ammar · 3cd9069a · 932a7892 · 932a7892 · 932a7892
Commit 932a7892 authored Nov 23, 2017 by muhammad-ammar
Showing with 77 additions and 24 deletions

cms/djangoapps/contentstore/views/tests/test_transcripts.py
+30 -11

cms/djangoapps/contentstore/views/transcripts_ajax.py
+23 -13

cms/envs/bok_choy.py
+13 -0

common/test/db_fixtures/video_config.json
+11 -0

No files found.
--- a/cms/djangoapps/contentstore/views/tests/test_transcripts.py
+++ b/cms/djangoapps/contentstore/views/tests/test_transcripts.py
@@ -21,6 +21,7 @@ from xmodule.contentstore.django import contentstore
 from xmodule.exceptions import NotFoundError
 from xmodule.modulestore.django import modulestore
 from xmodule.video_module import transcripts_utils
+from edxval import api as edxval_api
 TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
 TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex
@@ -106,11 +107,11 @@ class TestUploadTranscripts(BaseTranscripts):
        self.good_srt_file = tempfile.NamedTemporaryFile(suffix='.srt')
        self.good_srt_file.write(textwrap.dedent("""
-            1
+            0
            00:00:10,500 --> 00:00:13,000
            Elephant's Dream
-            2
+            1
            00:00:15,000 --> 00:00:18,000
            At the left we can see...
        """))
@@ -134,6 +135,29 @@ class TestUploadTranscripts(BaseTranscripts):
        self.ufeff_srt_file = tempfile.NamedTemporaryFile(suffix='.srt')
+    def assert_transcript_upload(self, subs_id, expected_transcript_content):
+        """
+        Verify that transcript is uploaded as expected.
+        Arguments:
+            subs_id (str): subtitle id
+            expected_transcript_content (str): transcript content be checked
+        """
+        # verify that transcript should not be in contentstore
+        content_location = StaticContent.compute_location(self.course.id, 'subs_{0}.srt.sjson'.format(subs_id))
+        with self.assertRaises(NotFoundError):
+            contentstore().find(content_location)
+        # verify uploaded transcript content
+        transcript_data = edxval_api.get_video_transcript_data([subs_id], 'en')
+        sjson_transcript = transcript_data['content']
+        uploaded_transcript_content = transcripts_utils.Transcript.convert(
+            sjson_transcript,
+            input_format='sjson',
+            output_format='srt'
+        )
+        self.assertIn(expected_transcript_content.strip(), uploaded_transcript_content.strip())
    def test_success_video_module_source_subs_uploading(self):
        self.item.data = textwrap.dedent("""
            <video youtube="">
@@ -161,9 +185,9 @@ class TestUploadTranscripts(BaseTranscripts):
        item = modulestore().get_item(self.video_usage_key)
        self.assertEqual(item.sub, filename)
-        content_location = StaticContent.compute_location(
+        # move the file pointer to start of the file
-            self.course.id, 'subs_{0}.srt.sjson'.format(filename))
+        self.good_srt_file.seek(0)
-        self.assertTrue(contentstore().find(content_location))
+        self.assert_transcript_upload(filename, self.good_srt_file.read())
    def test_fail_data_without_id(self):
        link = reverse('upload_transcripts')
@@ -346,12 +370,7 @@ class TestUploadTranscripts(BaseTranscripts):
        })
        self.assertEqual(resp.status_code, 200)
-        content_location = StaticContent.compute_location(
+        self.assert_transcript_upload(filename, 'Test ufeff characters')
-            self.course.id, 'subs_{0}.srt.sjson'.format(filename))
-        self.assertTrue(contentstore().find(content_location))
-        subs_text = json.loads(contentstore().find(content_location).data).get('text')
-        self.assertIn("Test ufeff characters", subs_text)
    def tearDown(self):
        super(TestUploadTranscripts, self).tearDown()

--- a/cms/djangoapps/contentstore/views/transcripts_ajax.py
+++ b/cms/djangoapps/contentstore/views/transcripts_ajax.py
@@ -11,14 +11,16 @@ import logging
 import os
 import requests
 from django.conf import settings
 from django.contrib.auth.decorators import login_required
 from django.core.exceptions import PermissionDenied
+from django.core.files.base import ContentFile
 from django.http import Http404, HttpResponse
 from django.utils.translation import ugettext as _
+from edxval import api as edxval_api
 from opaque_keys import InvalidKeyError
 from opaque_keys.edx.keys import UsageKey
 from student.auth import has_course_author_access
 from util.json_request import JsonResponse
 from xmodule.contentstore.content import StaticContent
@@ -27,18 +29,18 @@ from xmodule.exceptions import NotFoundError
 from xmodule.modulestore.django import modulestore
 from xmodule.modulestore.exceptions import ItemNotFoundError
 from xmodule.video_module.transcripts_utils import (
+    GetTranscriptsFromYouTubeException,
+    Transcript,
+    TranscriptsRequestValidationException,
    copy_or_rename_transcript,
    download_youtube_subs,
-    GetTranscriptsFromYouTubeException,
-    get_video_transcript_content,
    generate_subs_from_source,
    get_transcripts_from_youtube,
+    get_video_transcript_content,
    is_val_transcript_feature_enabled_for_course,
    manage_video_subtitles_save,
    remove_subs_from_store,
-    Transcript,
+    youtube_video_transcript_name
-    TranscriptsRequestValidationException,
-    youtube_video_transcript_name,
 )
 __all__ = [
@@ -119,16 +121,24 @@ def upload_transcripts(request):
    if video_list:
        sub_attr = source_subs_name
        try:
-            # Generate and save for 1.0 speed, will create subs_sub_attr.srt.sjson subtitles file in storage.
+            # Generate sjson subtitles from srt substitles
-            generate_subs_from_source({1: sub_attr}, source_subs_ext, source_subs_filedata, item)
+            sjson_subs = generate_subs_from_source({}, source_subs_ext, source_subs_filedata, item)
            for video_dict in video_list:
-                video_name = video_dict['video']
+                video_id = video_dict['video']
-                # We are creating transcripts for every video source, if in future some of video sources would be deleted.
+                # We are creating transcripts for every video source in case a video source is deleted in future.
-                # Updates item.sub with `video_name` on success.
+                edxval_api.create_or_update_video_transcript(
-                copy_or_rename_transcript(video_name, sub_attr, item, user=request.user)
+                    video_id=video_id,
+                    language_code='en',
+                    file_name='subs.sjson',  # S3 filename will be `{uuid}.sjson` like 5d30d3ehebacf6163976388cae.sjson
+                    file_format='sjson',
+                    provider='Custom',
+                    file_data=ContentFile(json.dumps(sjson_subs)),
+                )
-            response['subs'] = item.sub
+            item.sub = video_id
+            item.save_with_metadata(request.user)
+            response['subs'] = video_id
            response['status'] = 'Success'
        except Exception as ex:
            return error_response(response, ex.message)

--- a/cms/envs/bok_choy.py
+++ b/cms/envs/bok_choy.py
@@ -144,6 +144,19 @@ if RELEASE_LINE == "master":
        'course_author': 'http://edx.readthedocs.io/projects/edx-partner-course-staff',
    }
+########################### Video Transcript #################################
+VIDEO_TRANSCRIPTS_SETTINGS = dict(
+    VIDEO_TRANSCRIPTS_MAX_BYTES=3 * 1024 * 1024,    # 3 MB
+    # Backend storage
+    # STORAGE_CLASS='storages.backends.s3boto.S3BotoStorage',
+    # STORAGE_KWARGS=dict(bucket='video-transcripts-bucket'),
+    STORAGE_KWARGS=dict(
+        location=MEDIA_ROOT,
+        base_url=MEDIA_URL,
+    ),
+    DIRECTORY_PREFIX='video-transcripts/',
+)
 #####################################################################
 # Lastly, see if the developer has any local overrides.
 try:

--- a/common/test/db_fixtures/video_config.json
+++ b/common/test/db_fixtures/video_config.json
+[
+  {
+    "pk": 2,
+    "model": "video_config.videotranscriptenabledflag",
+    "fields": {
+      "enabled": 1,
+      "enabled_for_all_courses": 1,
+      "change_date": "2016-04-21 10:19:32.034856"
+    }
+  }
+]