Merge pull request #134 from edx/import_video_transcripts

Import video transcripts

Merge pull request #134 from edx/import_video_transcripts
Import video transcripts
00b8ded8 · Mushtaq Ali · GitHub · 6469fc26 · 7ef924d0 · 00b8ded8
Unverified Commit 00b8ded8 authored Apr 13, 2018 by Mushtaq Ali Committed by GitHub Apr 13, 2018
10 changed files
--- a/edxval/api.py
+++ b/edxval/api.py
--- a/edxval/models.py
+++ b/edxval/models.py
@@ -25,7 +25,7 @@ from django.dispatch import receiver
 from django.utils.six import python_2_unicode_compatible
 from model_utils.models import TimeStampedModel

-from edxval.utils import (get_video_image_storage,
+from edxval.utils import (TranscriptFormat, get_video_image_storage,
                          get_video_transcript_storage, video_image_path,
                          video_transcript_path)

@@ -373,16 +373,6 @@ class TranscriptProviderType(object):
    )


-class TranscriptFormat(object):
-    SRT = 'srt'
-    SJSON = 'sjson'
-
-    CHOICES = (
-        (SRT, 'SubRip'),
-        (SJSON, 'SRT JSON')
-    )
-
-
 class CustomizableFileField(models.FileField):
    """
    Subclass of FileField that allows custom settings to not

--- a/edxval/tests/constants.py
+++ b/edxval/tests/constants.py
@@ -4,14 +4,19 @@
 Constants used for tests.
 """
 from edxval.models import (
-    TranscriptFormat,
    TranscriptProviderType,
    Cielo24Fidelity,
    Cielo24Turnaround,
    ThreePlayTurnaround
 )

+from edxval.utils import TranscriptFormat
+
 EDX_VIDEO_ID = "itchyjacket"
+
+EXPORT_IMPORT_COURSE_DIR = u'course'
+EXPORT_IMPORT_STATIC_DIR = u'static'
+
 """
 Generic Profiles for manually creating profile objects
 """
@@ -363,12 +368,46 @@ VIDEO_DICT_UPDATE_ANIMAL = dict(
    encoded_videos=[],
 )

+
+TRANSCRIPT_DATA = {
+    "overwatch": """
+1
+00:00:14,370 --> 00:00:16,530
+I am overwatch.
+
+2
+00:00:16,500 --> 00:00:18,600
+可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.""",
+    "flash": """
+1
+00:00:07,180 --> 00:00:08,460
+This is Flash line 1.""",
+    "wow": """{\n   "start": [10],\n   "end": [100],\n   "text": ["Hi, welcome to edxval."]\n}\n"""
+}
+
+VIDEO_TRANSCRIPT_CUSTOM_SRT = dict(
+    language_code='en',
+    transcript='edxval/tests/data/The_Flash.srt',
+    provider=TranscriptProviderType.CUSTOM,
+    file_format=TranscriptFormat.SRT,
+    file_data=TRANSCRIPT_DATA['flash']
+)
+
+VIDEO_TRANSCRIPT_CUSTOM_SJSON = dict(
+    language_code='en',
+    transcript='edxval/tests/data/wow.sjson',
+    provider=TranscriptProviderType.CUSTOM,
+    file_format=TranscriptFormat.SJSON,
+    file_data=TRANSCRIPT_DATA['wow']
+)
+
 VIDEO_TRANSCRIPT_CIELO24 = dict(
    video_id='super-soaker',
    language_code='en',
    transcript='edxval/tests/data/The_Flash.srt',
    provider=TranscriptProviderType.CIELO24,
    file_format=TranscriptFormat.SRT,
+    file_data=TRANSCRIPT_DATA['flash']
 )

 VIDEO_TRANSCRIPT_3PLAY = dict(
@@ -377,6 +416,7 @@ VIDEO_TRANSCRIPT_3PLAY = dict(
    transcript='edxval/tests/data/wow.sjson',
    provider=TranscriptProviderType.THREE_PLAY_MEDIA,
    file_format=TranscriptFormat.SJSON,
+    file_data=TRANSCRIPT_DATA['wow']
 )

 TRANSCRIPT_PREFERENCES_CIELO24 = dict(

--- a/edxval/tests/data/wow.sjson
+++ b/edxval/tests/data/wow.sjson
 {
   "start": [10],
   "end": [100],
-   "text": ["Hi, welcome to edxval."],
+   "text": ["Hi, welcome to edxval."]
 }
--- a/edxval/tests/test_api.py
+++ b/edxval/tests/test_api.py
--- a/edxval/tests/test_views.py
+++ b/edxval/tests/test_views.py
@@ -8,10 +8,11 @@ from ddt import data, ddt, unpack
 from django.core.urlresolvers import reverse
 from rest_framework import status

-from edxval.models import (CourseVideo, Profile, TranscriptFormat,
+from edxval.models import (CourseVideo, Profile,
                           TranscriptProviderType, Video, VideoTranscript)
 from edxval.serializers import TranscriptSerializer
 from edxval.tests import APIAuthTestCase, constants
+from edxval.utils import TranscriptFormat


 class VideoDetail(APIAuthTestCase):
@@ -813,6 +814,7 @@ class VideoTranscriptViewTest(APIAuthTestCase):
        Tests POSTing transcript successfully.
        """
        post_transcript_data = dict(self.transcript_data)
+        post_transcript_data.pop('file_data')
        post_transcript_data['name'] = post_transcript_data.pop('transcript')

        response = self.client.post(self.url, post_transcript_data, format='json')

--- a/edxval/utils.py
+++ b/edxval/utils.py
@@ -2,8 +2,22 @@
 Util methods to be used in api and models.
 """

+import json
 from django.conf import settings
 from django.core.files.storage import get_storage_class
+from fs.path import combine
+from pysrt import SubRipFile
+
+
+class TranscriptFormat(object):
+    SRT = 'srt'
+    SJSON = 'sjson'
+
+    CHOICES = (
+        (SRT, 'SubRip'),
+        (SJSON, 'SRT JSON')
+    )
+

 # 3rd Party Transcription Plans
 THIRD_PARTY_TRANSCRIPTION_PLANS = {
@@ -169,3 +183,35 @@ def get_video_transcript_storage():
        # during edx-platform loading this method gets called but settings are not ready yet
        # so in that case we will return default(FileSystemStorage) storage class instance
        return get_storage_class()()
+
+
+def create_file_in_fs(file_data, file_name, file_system, static_dir):
+    """
+    Writes file in specific file system.
+
+    Arguments:
+        file_data (str): Data to store into the file.
+        file_name (str): File name of the file to be created.
+        resource_fs (OSFS): Import file system.
+        static_dir (str): The Directory to retrieve transcript file.
+    """
+    with file_system.open(combine(static_dir, file_name), 'wb') as f:
+        f.write(file_data)
+
+
+def get_transcript_format(transcript_content):
+    """
+    Returns transcript format.
+
+    Arguments:
+        transcript_content (str): Transcript file content.
+    """
+    try:
+        sjson_obj = json.loads(transcript_content)
+    except ValueError:
+        # With error handling (set to 'ERROR_RAISE'), we will be getting
+        # the exception if something went wrong in parsing the transcript.
+        srt_subs = SubRipFile.from_string(transcript_content, error_handling=SubRipFile.ERROR_RAISE)
+        if len(srt_subs) > 0:
+            return TranscriptFormat.SRT
+    return TranscriptFormat.SJSON
--- a/edxval/views.py
+++ b/edxval/views.py
@@ -15,13 +15,13 @@ from rest_framework_oauth.authentication import OAuth2Authentication
 from edxval.api import create_or_update_video_transcript
 from edxval.models import (
    CourseVideo,
-    TranscriptFormat,
    TranscriptProviderType,
    Video,
    VideoImage,
    VideoTranscript
 )
 from edxval.serializers import VideoSerializer
+from edxval.utils import TranscriptFormat

 LOGGER = logging.getLogger(__name__)  # pylint: disable=C0103


--- a/requirements/base.in
+++ b/requirements/base.in
@@ -10,3 +10,4 @@ django-storages
 enum34
 lxml
 pillow
+pysrt==0.4.7
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ def load_requirements(*requirements_paths):

 setup(
    name='edxval',
-    version='0.1.12',
+    version='0.1.13',
    author='edX',
    url='http://github.com/edx/edx-val',
    description='edx-val',