Import contentstore transcripts - EDUCATOR-2403

c8f8219a · Mushtaq Ali · b8a64a57 · c8f8219a · c8f8219a · c8f8219a
Commit c8f8219a authored Mar 13, 2018 by Mushtaq Ali
Showing with 55 additions and 19 deletions

edxval/api.py
+0 -0

edxval/models.py
+1 -11

edxval/tests/constants.py
+20 -6

edxval/tests/test_api.py
+0 -0

edxval/tests/test_views.py
+2 -1

edxval/utils.py
+30 -0

edxval/views.py
+1 -1

requirements/base.in
+1 -0

No files found.
--- a/edxval/api.py
+++ b/edxval/api.py
--- a/edxval/models.py
+++ b/edxval/models.py
@@ -25,7 +25,7 @@ from django.dispatch import receiver
 from django.utils.six import python_2_unicode_compatible
 from model_utils.models import TimeStampedModel
-from edxval.utils import (get_video_image_storage,
+from edxval.utils import (TranscriptFormat, get_video_image_storage,
                          get_video_transcript_storage, video_image_path,
                          video_transcript_path)
@@ -373,16 +373,6 @@ class TranscriptProviderType(object):
    )
-class TranscriptFormat(object):
-    SRT = 'srt'
-    SJSON = 'sjson'
-    CHOICES = (
-        (SRT, 'SubRip'),
-        (SJSON, 'SRT JSON')
-    )
 class CustomizableFileField(models.FileField):
    """
    Subclass of FileField that allows custom settings to not

--- a/edxval/tests/constants.py
+++ b/edxval/tests/constants.py
@@ -4,15 +4,17 @@
 Constants used for tests.
 """
 from edxval.models import (
-    TranscriptFormat,
    TranscriptProviderType,
    Cielo24Fidelity,
    Cielo24Turnaround,
    ThreePlayTurnaround
 )
+from edxval.utils import TranscriptFormat
 EDX_VIDEO_ID = "itchyjacket"
+EXPORT_IMPORT_COURSE_DIR = u'course'
 EXPORT_IMPORT_STATIC_DIR = u'static'
 """
@@ -380,13 +382,25 @@ I am overwatch.
 1
 00:00:07,180 --> 00:00:08,460
 This is Flash line 1.""",
-    "wow": {
+    "wow": """{\n   "start": [10],\n   "end": [100],\n   "text": ["Hi, welcome to edxval."]\n}\n"""
-        "start": [10],
-        "end": [100],
-        "text": ["Hi, welcome to edxval."],
-    }
 }
+VIDEO_TRANSCRIPT_CUSTOM_SRT = dict(
+    language_code='en',
+    transcript='edxval/tests/data/The_Flash.srt',
+    provider=TranscriptProviderType.CUSTOM,
+    file_format=TranscriptFormat.SRT,
+    file_data=TRANSCRIPT_DATA['flash']
+)
+VIDEO_TRANSCRIPT_CUSTOM_SJSON = dict(
+    language_code='en',
+    transcript='edxval/tests/data/wow.sjson',
+    provider=TranscriptProviderType.CUSTOM,
+    file_format=TranscriptFormat.SJSON,
+    file_data=TRANSCRIPT_DATA['wow']
+)
 VIDEO_TRANSCRIPT_CIELO24 = dict(
    video_id='super-soaker',
    language_code='en',

--- a/edxval/tests/test_api.py
+++ b/edxval/tests/test_api.py
--- a/edxval/tests/test_views.py
+++ b/edxval/tests/test_views.py
@@ -8,10 +8,11 @@ from ddt import data, ddt, unpack
 from django.core.urlresolvers import reverse
 from rest_framework import status
-from edxval.models import (CourseVideo, Profile, TranscriptFormat,
+from edxval.models import (CourseVideo, Profile,
                           TranscriptProviderType, Video, VideoTranscript)
 from edxval.serializers import TranscriptSerializer
 from edxval.tests import APIAuthTestCase, constants
+from edxval.utils import TranscriptFormat
 class VideoDetail(APIAuthTestCase):

--- a/edxval/utils.py
+++ b/edxval/utils.py
@@ -2,9 +2,21 @@
 Util methods to be used in api and models.
 """
+import json
 from django.conf import settings
 from django.core.files.storage import get_storage_class
 from fs.path import combine
+from pysrt import SubRipFile
+class TranscriptFormat(object):
+    SRT = 'srt'
+    SJSON = 'sjson'
+    CHOICES = (
+        (SRT, 'SubRip'),
+        (SJSON, 'SRT JSON')
+    )
 # 3rd Party Transcription Plans
@@ -185,3 +197,21 @@ def create_file_in_fs(file_data, file_name, file_system, static_dir):
    """
    with file_system.open(combine(static_dir, file_name), 'wb') as f:
        f.write(file_data)
+def get_transcript_format(transcript_content):
+    """
+    Returns transcript format.
+    Arguments:
+        transcript_content (str): Transcript file content.
+    """
+    try:
+        sjson_obj = json.loads(transcript_content)
+    except ValueError:
+        # With error handling (set to 'ERROR_RAISE'), we will be getting
+        # the exception if something went wrong in parsing the transcript.
+        srt_subs = SubRipFile.from_string(transcript_content, error_handling=SubRipFile.ERROR_RAISE)
+        if len(srt_subs) > 0:
+            return TranscriptFormat.SRT
+    return TranscriptFormat.SJSON
--- a/edxval/views.py
+++ b/edxval/views.py
@@ -15,13 +15,13 @@ from rest_framework_oauth.authentication import OAuth2Authentication
 from edxval.api import create_or_update_video_transcript
 from edxval.models import (
    CourseVideo,
-    TranscriptFormat,
    TranscriptProviderType,
    Video,
    VideoImage,
    VideoTranscript
 )
 from edxval.serializers import VideoSerializer
+from edxval.utils import TranscriptFormat
 LOGGER = logging.getLogger(__name__)  # pylint: disable=C0103

--- a/requirements/base.in
+++ b/requirements/base.in
@@ -10,3 +10,4 @@ django-storages
 enum34
 lxml
 pillow
+pysrt==0.4.7