Unverified Commit 00b8ded8 by Mushtaq Ali Committed by GitHub

Merge pull request #134 from edx/import_video_transcripts

Import video transcripts
parents 6469fc26 7ef924d0
......@@ -25,7 +25,7 @@ from django.dispatch import receiver
from django.utils.six import python_2_unicode_compatible
from model_utils.models import TimeStampedModel
from edxval.utils import (get_video_image_storage,
from edxval.utils import (TranscriptFormat, get_video_image_storage,
get_video_transcript_storage, video_image_path,
video_transcript_path)
......@@ -373,16 +373,6 @@ class TranscriptProviderType(object):
)
class TranscriptFormat(object):
SRT = 'srt'
SJSON = 'sjson'
CHOICES = (
(SRT, 'SubRip'),
(SJSON, 'SRT JSON')
)
class CustomizableFileField(models.FileField):
"""
Subclass of FileField that allows custom settings to not
......
......@@ -4,14 +4,19 @@
Constants used for tests.
"""
from edxval.models import (
TranscriptFormat,
TranscriptProviderType,
Cielo24Fidelity,
Cielo24Turnaround,
ThreePlayTurnaround
)
from edxval.utils import TranscriptFormat
EDX_VIDEO_ID = "itchyjacket"
EXPORT_IMPORT_COURSE_DIR = u'course'
EXPORT_IMPORT_STATIC_DIR = u'static'
"""
Generic Profiles for manually creating profile objects
"""
......@@ -363,12 +368,46 @@ VIDEO_DICT_UPDATE_ANIMAL = dict(
encoded_videos=[],
)
TRANSCRIPT_DATA = {
"overwatch": """
1
00:00:14,370 --> 00:00:16,530
I am overwatch.
2
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.""",
"flash": """
1
00:00:07,180 --> 00:00:08,460
This is Flash line 1.""",
"wow": """{\n "start": [10],\n "end": [100],\n "text": ["Hi, welcome to edxval."]\n}\n"""
}
VIDEO_TRANSCRIPT_CUSTOM_SRT = dict(
language_code='en',
transcript='edxval/tests/data/The_Flash.srt',
provider=TranscriptProviderType.CUSTOM,
file_format=TranscriptFormat.SRT,
file_data=TRANSCRIPT_DATA['flash']
)
VIDEO_TRANSCRIPT_CUSTOM_SJSON = dict(
language_code='en',
transcript='edxval/tests/data/wow.sjson',
provider=TranscriptProviderType.CUSTOM,
file_format=TranscriptFormat.SJSON,
file_data=TRANSCRIPT_DATA['wow']
)
VIDEO_TRANSCRIPT_CIELO24 = dict(
video_id='super-soaker',
language_code='en',
transcript='edxval/tests/data/The_Flash.srt',
provider=TranscriptProviderType.CIELO24,
file_format=TranscriptFormat.SRT,
file_data=TRANSCRIPT_DATA['flash']
)
VIDEO_TRANSCRIPT_3PLAY = dict(
......@@ -377,6 +416,7 @@ VIDEO_TRANSCRIPT_3PLAY = dict(
transcript='edxval/tests/data/wow.sjson',
provider=TranscriptProviderType.THREE_PLAY_MEDIA,
file_format=TranscriptFormat.SJSON,
file_data=TRANSCRIPT_DATA['wow']
)
TRANSCRIPT_PREFERENCES_CIELO24 = dict(
......
{
"start": [10],
"end": [100],
"text": ["Hi, welcome to edxval."],
}
\ No newline at end of file
"text": ["Hi, welcome to edxval."]
}
......@@ -8,10 +8,11 @@ from ddt import data, ddt, unpack
from django.core.urlresolvers import reverse
from rest_framework import status
from edxval.models import (CourseVideo, Profile, TranscriptFormat,
from edxval.models import (CourseVideo, Profile,
TranscriptProviderType, Video, VideoTranscript)
from edxval.serializers import TranscriptSerializer
from edxval.tests import APIAuthTestCase, constants
from edxval.utils import TranscriptFormat
class VideoDetail(APIAuthTestCase):
......@@ -813,6 +814,7 @@ class VideoTranscriptViewTest(APIAuthTestCase):
Tests POSTing transcript successfully.
"""
post_transcript_data = dict(self.transcript_data)
post_transcript_data.pop('file_data')
post_transcript_data['name'] = post_transcript_data.pop('transcript')
response = self.client.post(self.url, post_transcript_data, format='json')
......
......@@ -2,8 +2,22 @@
Util methods to be used in api and models.
"""
import json
from django.conf import settings
from django.core.files.storage import get_storage_class
from fs.path import combine
from pysrt import SubRipFile
class TranscriptFormat(object):
SRT = 'srt'
SJSON = 'sjson'
CHOICES = (
(SRT, 'SubRip'),
(SJSON, 'SRT JSON')
)
# 3rd Party Transcription Plans
THIRD_PARTY_TRANSCRIPTION_PLANS = {
......@@ -169,3 +183,35 @@ def get_video_transcript_storage():
# during edx-platform loading this method gets called but settings are not ready yet
# so in that case we will return default(FileSystemStorage) storage class instance
return get_storage_class()()
def create_file_in_fs(file_data, file_name, file_system, static_dir):
"""
Writes file in specific file system.
Arguments:
file_data (str): Data to store into the file.
file_name (str): File name of the file to be created.
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
"""
with file_system.open(combine(static_dir, file_name), 'wb') as f:
f.write(file_data)
def get_transcript_format(transcript_content):
"""
Returns transcript format.
Arguments:
transcript_content (str): Transcript file content.
"""
try:
sjson_obj = json.loads(transcript_content)
except ValueError:
# With error handling (set to 'ERROR_RAISE'), we will be getting
# the exception if something went wrong in parsing the transcript.
srt_subs = SubRipFile.from_string(transcript_content, error_handling=SubRipFile.ERROR_RAISE)
if len(srt_subs) > 0:
return TranscriptFormat.SRT
return TranscriptFormat.SJSON
......@@ -15,13 +15,13 @@ from rest_framework_oauth.authentication import OAuth2Authentication
from edxval.api import create_or_update_video_transcript
from edxval.models import (
CourseVideo,
TranscriptFormat,
TranscriptProviderType,
Video,
VideoImage,
VideoTranscript
)
from edxval.serializers import VideoSerializer
from edxval.utils import TranscriptFormat
LOGGER = logging.getLogger(__name__) # pylint: disable=C0103
......
......@@ -10,3 +10,4 @@ django-storages
enum34
lxml
pillow
pysrt==0.4.7
......@@ -41,7 +41,7 @@ def load_requirements(*requirements_paths):
setup(
name='edxval',
version='0.1.12',
version='0.1.13',
author='edX',
url='http://github.com/edx/edx-val',
description='edx-val',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment