Commit c8f8219a by Mushtaq Ali

Import contentstore transcripts - EDUCATOR-2403

parent b8a64a57
...@@ -25,7 +25,7 @@ from django.dispatch import receiver ...@@ -25,7 +25,7 @@ from django.dispatch import receiver
from django.utils.six import python_2_unicode_compatible from django.utils.six import python_2_unicode_compatible
from model_utils.models import TimeStampedModel from model_utils.models import TimeStampedModel
from edxval.utils import (get_video_image_storage, from edxval.utils import (TranscriptFormat, get_video_image_storage,
get_video_transcript_storage, video_image_path, get_video_transcript_storage, video_image_path,
video_transcript_path) video_transcript_path)
...@@ -373,16 +373,6 @@ class TranscriptProviderType(object): ...@@ -373,16 +373,6 @@ class TranscriptProviderType(object):
) )
class TranscriptFormat(object):
SRT = 'srt'
SJSON = 'sjson'
CHOICES = (
(SRT, 'SubRip'),
(SJSON, 'SRT JSON')
)
class CustomizableFileField(models.FileField): class CustomizableFileField(models.FileField):
""" """
Subclass of FileField that allows custom settings to not Subclass of FileField that allows custom settings to not
......
...@@ -4,15 +4,17 @@ ...@@ -4,15 +4,17 @@
Constants used for tests. Constants used for tests.
""" """
from edxval.models import ( from edxval.models import (
TranscriptFormat,
TranscriptProviderType, TranscriptProviderType,
Cielo24Fidelity, Cielo24Fidelity,
Cielo24Turnaround, Cielo24Turnaround,
ThreePlayTurnaround ThreePlayTurnaround
) )
from edxval.utils import TranscriptFormat
EDX_VIDEO_ID = "itchyjacket" EDX_VIDEO_ID = "itchyjacket"
EXPORT_IMPORT_COURSE_DIR = u'course'
EXPORT_IMPORT_STATIC_DIR = u'static' EXPORT_IMPORT_STATIC_DIR = u'static'
""" """
...@@ -380,13 +382,25 @@ I am overwatch. ...@@ -380,13 +382,25 @@ I am overwatch.
1 1
00:00:07,180 --> 00:00:08,460 00:00:07,180 --> 00:00:08,460
This is Flash line 1.""", This is Flash line 1.""",
"wow": { "wow": """{\n "start": [10],\n "end": [100],\n "text": ["Hi, welcome to edxval."]\n}\n"""
"start": [10],
"end": [100],
"text": ["Hi, welcome to edxval."],
}
} }
VIDEO_TRANSCRIPT_CUSTOM_SRT = dict(
language_code='en',
transcript='edxval/tests/data/The_Flash.srt',
provider=TranscriptProviderType.CUSTOM,
file_format=TranscriptFormat.SRT,
file_data=TRANSCRIPT_DATA['flash']
)
VIDEO_TRANSCRIPT_CUSTOM_SJSON = dict(
language_code='en',
transcript='edxval/tests/data/wow.sjson',
provider=TranscriptProviderType.CUSTOM,
file_format=TranscriptFormat.SJSON,
file_data=TRANSCRIPT_DATA['wow']
)
VIDEO_TRANSCRIPT_CIELO24 = dict( VIDEO_TRANSCRIPT_CIELO24 = dict(
video_id='super-soaker', video_id='super-soaker',
language_code='en', language_code='en',
......
...@@ -8,10 +8,11 @@ from ddt import data, ddt, unpack ...@@ -8,10 +8,11 @@ from ddt import data, ddt, unpack
from django.core.urlresolvers import reverse from django.core.urlresolvers import reverse
from rest_framework import status from rest_framework import status
from edxval.models import (CourseVideo, Profile, TranscriptFormat, from edxval.models import (CourseVideo, Profile,
TranscriptProviderType, Video, VideoTranscript) TranscriptProviderType, Video, VideoTranscript)
from edxval.serializers import TranscriptSerializer from edxval.serializers import TranscriptSerializer
from edxval.tests import APIAuthTestCase, constants from edxval.tests import APIAuthTestCase, constants
from edxval.utils import TranscriptFormat
class VideoDetail(APIAuthTestCase): class VideoDetail(APIAuthTestCase):
......
...@@ -2,9 +2,21 @@ ...@@ -2,9 +2,21 @@
Util methods to be used in api and models. Util methods to be used in api and models.
""" """
import json
from django.conf import settings from django.conf import settings
from django.core.files.storage import get_storage_class from django.core.files.storage import get_storage_class
from fs.path import combine from fs.path import combine
from pysrt import SubRipFile
class TranscriptFormat(object):
SRT = 'srt'
SJSON = 'sjson'
CHOICES = (
(SRT, 'SubRip'),
(SJSON, 'SRT JSON')
)
# 3rd Party Transcription Plans # 3rd Party Transcription Plans
...@@ -185,3 +197,21 @@ def create_file_in_fs(file_data, file_name, file_system, static_dir): ...@@ -185,3 +197,21 @@ def create_file_in_fs(file_data, file_name, file_system, static_dir):
""" """
with file_system.open(combine(static_dir, file_name), 'wb') as f: with file_system.open(combine(static_dir, file_name), 'wb') as f:
f.write(file_data) f.write(file_data)
def get_transcript_format(transcript_content):
"""
Returns transcript format.
Arguments:
transcript_content (str): Transcript file content.
"""
try:
sjson_obj = json.loads(transcript_content)
except ValueError:
# With error handling (set to 'ERROR_RAISE'), we will be getting
# the exception if something went wrong in parsing the transcript.
srt_subs = SubRipFile.from_string(transcript_content, error_handling=SubRipFile.ERROR_RAISE)
if len(srt_subs) > 0:
return TranscriptFormat.SRT
return TranscriptFormat.SJSON
...@@ -15,13 +15,13 @@ from rest_framework_oauth.authentication import OAuth2Authentication ...@@ -15,13 +15,13 @@ from rest_framework_oauth.authentication import OAuth2Authentication
from edxval.api import create_or_update_video_transcript from edxval.api import create_or_update_video_transcript
from edxval.models import ( from edxval.models import (
CourseVideo, CourseVideo,
TranscriptFormat,
TranscriptProviderType, TranscriptProviderType,
Video, Video,
VideoImage, VideoImage,
VideoTranscript VideoTranscript
) )
from edxval.serializers import VideoSerializer from edxval.serializers import VideoSerializer
from edxval.utils import TranscriptFormat
LOGGER = logging.getLogger(__name__) # pylint: disable=C0103 LOGGER = logging.getLogger(__name__) # pylint: disable=C0103
......
...@@ -10,3 +10,4 @@ django-storages ...@@ -10,3 +10,4 @@ django-storages
enum34 enum34
lxml lxml
pillow pillow
pysrt==0.4.7
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment