Commit ac5c5c23 by Qubad786 Committed by muhammad-ammar

Add transcript model and VEDA endpoint to create video transcripts.

parent cb392214
......@@ -8,3 +8,7 @@ omit =
**/tests/*
**/settings.py
**/migrations*
[html]
title = edx-val Python Test Coverage Report
directory = html_coverage
......@@ -69,3 +69,4 @@ venv/
venvs/
video-images/
video-transcripts/
......@@ -3,7 +3,7 @@ Admin file for django app edxval.
"""
from django.contrib import admin
from .models import Video, Profile, EncodedVideo, Subtitle, CourseVideo, VideoImage
from .models import Video, Profile, EncodedVideo, VideoTranscript, CourseVideo, VideoImage
class ProfileAdmin(admin.ModelAdmin): # pylint: disable=C0111
......@@ -48,6 +48,6 @@ class CourseVideoAdmin(admin.ModelAdmin):
admin.site.register(Profile, ProfileAdmin)
admin.site.register(Video, VideoAdmin)
admin.site.register(Subtitle)
admin.site.register(VideoTranscript)
admin.site.register(VideoImage, VideoImageAdmin)
admin.site.register(CourseVideo, CourseVideoAdmin)
......@@ -5,21 +5,18 @@ The internal API for VAL.
"""
import logging
from lxml.etree import Element, SubElement
from django.core.exceptions import ObjectDoesNotExist, ValidationError
from enum import Enum
from lxml.etree import Element, SubElement
from django.core.exceptions import ValidationError, ObjectDoesNotExist
from django.core.files.base import ContentFile
from edxval.models import Video, EncodedVideo, CourseVideo, Profile, VideoImage
from edxval.serializers import VideoSerializer
from edxval.exceptions import ( # pylint: disable=unused-import
ValError,
ValInternalError,
ValVideoNotFoundError,
ValCannotCreateError,
ValCannotUpdateError
)
from edxval.exceptions import (InvalidTranscriptFormat,
InvalidTranscriptProvider, ValCannotCreateError,
ValCannotUpdateError, ValInternalError,
ValVideoNotFoundError)
from edxval.models import (CourseVideo, EncodedVideo, Profile,
TranscriptFormat, TranscriptProviderType, Video,
VideoImage, VideoTranscript)
from edxval.serializers import TranscriptSerializer, VideoSerializer
logger = logging.getLogger(__name__) # pylint: disable=C0103
......@@ -143,6 +140,92 @@ def update_video_status(edx_video_id, status):
video.save()
def get_video_transcript(video_id, language_code):
"""
Get a video's transcript
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
language_code: it will the language code of the requested transcript.
"""
try:
transcript = VideoTranscript.objects.get(video_id=video_id, language_code=language_code)
except VideoTranscript.DoesNotExist:
transcript = None
return transcript
def get_video_transcripts(video_id):
"""
Get a video's transcripts
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
"""
transcripts_set = VideoTranscript.objects.filter(video_id=video_id)
transcripts = []
if transcripts_set.exists():
transcripts = TranscriptSerializer(transcripts_set, many=True).data
return transcripts
def get_video_transcript_url(video_id, language_code):
"""
Returns course video transcript url or None if no transcript
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
language_code: language code of a video transcript
"""
video_transcript = get_video_transcript(video_id, language_code)
if video_transcript:
return video_transcript.url()
def create_or_update_video_transcript(
video_id,
language_code,
file_name,
file_format,
provider,
file_data=None,
):
"""
Create or Update video transcript for an existing video.
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
language_code: language code of a video transcript
file_name: file name of a video transcript
file_data (InMemoryUploadedFile): Transcript data to be saved for a course video.
file_format: format of the transcript
provider: transcript provider
Returns:
video transcript url
"""
if file_format not in dict(TranscriptFormat.CHOICES).keys():
raise InvalidTranscriptFormat('{} transcript format is not supported'.format(file_format))
if provider not in dict(TranscriptProviderType.CHOICES).keys():
raise InvalidTranscriptProvider('{} transcript provider is not supported'.format(provider))
video_transcript, __ = VideoTranscript.create_or_update(
video_id,
language_code,
file_name,
file_format,
provider,
file_data,
)
return video_transcript.url()
def get_course_video_image_url(course_id, edx_video_id):
"""
Returns course video image url or None if no image found
......@@ -246,11 +329,6 @@ def get_video_info(edx_video_id):
url: url of the video
file_size: size of the video in bytes
profile: ID of the profile
subtitles: a list of Subtitle dicts
fmt: file format (SRT or SJSON)
language: language code
content_url: url of file
url: api url to subtitle
}
Raises:
......
......@@ -48,3 +48,17 @@ class ValCannotUpdateError(ValError):
This error is raised when an object cannot be updated
"""
pass
class InvalidTranscriptFormat(ValError):
"""
This error is raised when an transcript format is not supported
"""
pass
class InvalidTranscriptProvider(ValError):
"""
This error is raised when an transcript provider is not supported
"""
pass
# -*- coding: utf-8 -*-
# Generated by Django 1.11.4 on 2017-08-23 04:15
from __future__ import unicode_literals
from django.db import migrations, models
import django.utils.timezone
import edxval.models
import model_utils.fields
class Migration(migrations.Migration):
dependencies = [
('edxval', '0005_videoimage'),
]
operations = [
migrations.CreateModel(
name='VideoTranscript',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')),
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')),
('video_id', models.CharField(help_text=b'It can be an edx_video_id or an external video id', max_length=255)),
('transcript', edxval.models.CustomizableFileField(blank=True, null=True)),
('language_code', models.CharField(db_index=True, max_length=8)),
('provider', models.CharField(choices=[(b'Custom', b'Custom'), (b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')], default=b'Custom', max_length=30)),
('file_format', models.CharField(choices=[(b'srt', b'SubRip'), (b'sjson', b'SRT JSON')], db_index=True, max_length=20)),
],
),
migrations.AlterUniqueTogether(
name='videotranscript',
unique_together=set([('video_id', 'language_code')]),
),
]
......@@ -11,22 +11,23 @@ themselves. After these are resolved, errors such as a negative file_size or
invalid profile_name will be returned.
"""
from contextlib import closing
import json
import logging
import os
from contextlib import closing
from uuid import uuid4
from django.db import models
from django.dispatch import receiver
from django.core.exceptions import ValidationError
from django.core.validators import MinValueValidator, RegexValidator
from django.core.urlresolvers import reverse
from django.core.validators import MinValueValidator, RegexValidator
from django.db import models
from django.dispatch import receiver
from django.utils.six import python_2_unicode_compatible
from model_utils.models import TimeStampedModel
from edxval.utils import video_image_path, get_video_image_storage
from edxval.utils import (get_video_image_storage,
get_video_transcript_storage, video_image_path,
video_transcript_path)
logger = logging.getLogger(__name__) # pylint: disable=C0103
......@@ -129,7 +130,7 @@ class Video(models.Model):
qset = cls.objects.filter(
encoded_videos__profile__profile_name='youtube',
encoded_videos__url=youtube_id
).prefetch_related('encoded_videos', 'courses', 'subtitles')
).prefetch_related('encoded_videos', 'courses')
return qset
......@@ -335,6 +336,123 @@ class VideoImage(TimeStampedModel):
return storage.url(self.image.name)
class TranscriptProviderType(object):
CUSTOM = 'Custom'
THREE_PLAY_MEDIA = '3PlayMedia'
CIELO24 = 'Cielo24'
CHOICES = (
(CUSTOM, CUSTOM),
(THREE_PLAY_MEDIA, THREE_PLAY_MEDIA),
(CIELO24, CIELO24),
)
class TranscriptFormat(object):
SRT = 'srt'
SJSON = 'sjson'
CHOICES = (
(SRT, 'SubRip'),
(SJSON, 'SRT JSON')
)
class CustomizableFileField(models.FileField):
"""
Subclass of FileField that allows custom settings to not
be serialized (hard-coded) in migrations. Otherwise,
migrations include optional settings for storage (such as
the storage class and bucket name); we don't want to
create new migration files for each configuration change.
"""
def __init__(self, *args, **kwargs):
kwargs.update(dict(
upload_to=video_transcript_path,
storage=get_video_transcript_storage(),
max_length=255, # enoungh for uuid
blank=True,
null=True
))
super(CustomizableFileField, self).__init__(*args, **kwargs)
def deconstruct(self):
"""
Override base class method.
"""
name, path, args, kwargs = super(CustomizableFileField, self).deconstruct()
del kwargs['upload_to']
del kwargs['storage']
del kwargs['max_length']
return name, path, args, kwargs
class VideoTranscript(TimeStampedModel):
"""
Transcript for a video
"""
video_id = models.CharField(max_length=255, help_text='It can be an edx_video_id or an external video id')
transcript = CustomizableFileField()
language_code = models.CharField(max_length=8, db_index=True)
provider = models.CharField(
max_length=30,
choices=TranscriptProviderType.CHOICES,
default=TranscriptProviderType.CUSTOM,
)
file_format = models.CharField(max_length=20, db_index=True, choices=TranscriptFormat.CHOICES)
class Meta:
unique_together = ('video_id', 'language_code')
@classmethod
def create_or_update(cls, video_id, language_code, file_name, file_format, provider, file_data=None):
"""
Create or update Transcript object.
Arguments:
video_id (str): unique id for a video
language_code (str): language code
file_name (str): File name of the image
file_format (str): Format of transcript
provider (str): Transcript provider
file_data (InMemoryUploadedFile): File data to be saved
Returns:
Returns a tuple of (video_transcript, created).
"""
video_transcript, created = cls.objects.get_or_create(video_id=video_id, language_code=language_code)
# delete the existing transcript file
if not created and file_data:
video_transcript.transcript.delete()
video_transcript.transcript.name = file_name
video_transcript.file_format = file_format
video_transcript.provider = provider
if file_data:
with closing(file_data) as transcript_file_data:
file_name = '{uuid}{ext}'.format(uuid=uuid4().hex, ext=os.path.splitext(file_name)[1])
try:
video_transcript.transcript.save(file_name, transcript_file_data)
except Exception: # pylint: disable=broad-except
logger.exception('VAL: Transcript save failed to storage for video_id [%s]', video_id)
raise
video_transcript.save()
return video_transcript, created
def url(self):
"""
Returns language transcript url for a particular language.
"""
storage = get_video_transcript_storage()
return storage.url(self.transcript.name)
def __unicode__(self):
return u'{lang} Transcript for {video}'.format(lang=self.language_code, video=self.video_id)
SUBTITLE_FORMATS = (
('srt', 'SubRip'),
('sjson', 'SRT JSON')
......
......@@ -5,9 +5,10 @@ Serialization is usually sent through the VideoSerializer which uses the
EncodedVideoSerializer which uses the profile_name as it's profile field.
"""
from rest_framework import serializers
from rest_framework.fields import IntegerField, DateTimeField
from rest_framework.fields import DateTimeField, IntegerField
from edxval.models import Profile, Video, EncodedVideo, Subtitle, CourseVideo, VideoImage
from edxval.models import (CourseVideo, EncodedVideo, Profile, Video,
VideoImage, VideoTranscript)
class EncodedVideoSerializer(serializers.ModelSerializer):
......@@ -50,38 +51,22 @@ class EncodedVideoSerializer(serializers.ModelSerializer):
return data.get('profile', None)
class SubtitleSerializer(serializers.ModelSerializer):
class TranscriptSerializer(serializers.ModelSerializer):
"""
Serializer for Subtitle objects
Serializer for VideoTranscript objects
"""
content_url = serializers.CharField(source='get_absolute_url', read_only=True)
content = serializers.CharField(write_only=True)
def validate(self, data):
"""
Validate that the subtitle is in the correct format
"""
value = data.get("content")
if data.get("fmt") == "sjson":
import json
try:
loaded = json.loads(value)
except ValueError:
raise serializers.ValidationError("Not in JSON format")
else:
data["content"] = json.dumps(loaded)
return data
class Meta: # pylint: disable=C1001, C0111
model = Subtitle
lookup_field = "id"
fields = (
"fmt",
"language",
"content_url",
"content",
)
model = VideoTranscript
lookup_field = 'video_id'
fields = ('video_id', 'url', 'language_code', 'provider', 'file_format')
url = serializers.SerializerMethodField()
def get_url(self, transcript):
"""
Retrieves the transcript url.
"""
return transcript.url()
class CourseSerializer(serializers.RelatedField):
"""
......@@ -118,7 +103,6 @@ class VideoSerializer(serializers.ModelSerializer):
encoded_videos takes a list of dicts EncodedVideo data.
"""
encoded_videos = EncodedVideoSerializer(many=True)
subtitles = SubtitleSerializer(many=True, required=False)
courses = CourseSerializer(
many=True,
read_only=False,
......@@ -170,7 +154,6 @@ class VideoSerializer(serializers.ModelSerializer):
"""
courses = validated_data.pop("courses", [])
encoded_videos = validated_data.pop("encoded_videos", [])
subtitles = validated_data.pop("subtitles", [])
video = Video.objects.create(**validated_data)
......@@ -179,11 +162,6 @@ class VideoSerializer(serializers.ModelSerializer):
for video_data in encoded_videos
)
Subtitle.objects.bulk_create(
Subtitle(video=video, **subtitle_data)
for subtitle_data in subtitles
)
# The CourseSerializer will already have converted the course data
# to CourseVideo models, so we can just set the video and save.
# Also create VideoImage objects if an image filename is present
......@@ -211,13 +189,6 @@ class VideoSerializer(serializers.ModelSerializer):
for video_data in validated_data.get("encoded_videos", [])
)
# Set subtitles
instance.subtitles.all().delete()
Subtitle.objects.bulk_create(
Subtitle(video=instance, **subtitle_data)
for subtitle_data in validated_data.get("subtitles", [])
)
# Set courses
# NOTE: for backwards compatibility with the DRF v2 behavior,
# we do NOT delete existing course videos during the update.
......
......@@ -190,3 +190,13 @@ VIDEO_IMAGE_SETTINGS = dict(
VIDEO_IMAGE_MIN_BYTES=100,
DIRECTORY_PREFIX='video-images/',
)
VIDEO_TRANSCRIPTS_SETTINGS = dict(
# Backend storage
# STORAGE_CLASS='storages.backends.s3boto.S3BotoStorage',
# STORAGE_KWARGS=dict(bucket='video-transcripts-bucket'),
# If you are changing prefix value then update the .gitignore accordingly
# so that transcripts created during tests due to upload should be ignored
VIDEO_TRANSCRIPTS_MAX_BYTES=3145728, # 3 MB
DIRECTORY_PREFIX='video-transcripts/',
)
......@@ -3,6 +3,8 @@
"""
Constants used for tests.
"""
from edxval.models import TranscriptFormat, TranscriptProviderType
EDX_VIDEO_ID = "itchyjacket"
"""
Generic Profiles for manually creating profile objects
......@@ -388,3 +390,19 @@ VIDEO_DICT_UPDATE_ANIMAL = dict(
encoded_videos=[],
subtitles=[]
)
VIDEO_TRANSCRIPT_CIELO24 = dict(
video_id='super-soaker',
language_code='en',
transcript='wow.srt',
provider=TranscriptProviderType.CIELO24,
file_format=TranscriptFormat.SRT,
)
VIDEO_TRANSCRIPT_3PLAY = dict(
video_id='super-soaker',
language_code='de',
transcript='wow.sjson',
provider=TranscriptProviderType.THREE_PLAY_MEDIA,
file_format=TranscriptFormat.SJSON,
)
1
00:00:07,180 --> 00:00:08,460
This is Arrow line 1.
2
00:00:08,460 --> 00:00:10,510
This is Arrow line 2.
3
00:00:10,510 --> 00:00:13,560
This is Arrow line 3.
4
00:00:13,560 --> 00:00:14,360
This is Arrow line 4.
5
00:00:14,370 --> 00:00:16,530
This is Arrow line 5.
6
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.
\ No newline at end of file
1
00:00:07,180 --> 00:00:08,460
This is Flash line 1.
2
00:00:08,460 --> 00:00:10,510
This is Flash line 2.
3
00:00:10,510 --> 00:00:13,560
This is Flash line 3.
4
00:00:13,560 --> 00:00:14,360
This is Flash line 4.
5
00:00:14,370 --> 00:00:16,530
This is Flash line 5.
6
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.
\ No newline at end of file
......@@ -5,27 +5,39 @@ Tests for the API for Video Abstraction Layer
import json
import mock
from mock import patch
from lxml import etree
from ddt import data, ddt, unpack
from django.core.exceptions import ValidationError
from django.core.files import File
from django.core.files.base import ContentFile
from django.core.files.images import ImageFile
from django.test import TestCase
from django.db import DatabaseError
from django.core.urlresolvers import reverse
from django.db import DatabaseError
from django.test import TestCase
from lxml import etree
from mock import patch
from rest_framework import status
from ddt import ddt, data, unpack
from django.conf import settings
from edxval.models import Profile, Video, EncodedVideo, CourseVideo, VideoImage, LIST_MAX_ITEMS
from edxval import api as api
from edxval.api import (
SortDirection,
ValCannotCreateError,
ValCannotUpdateError,
ValVideoNotFoundError,
VideoSortField,
)
from edxval.tests import constants, APIAuthTestCase
from edxval.api import (InvalidTranscriptFormat, InvalidTranscriptProvider,
SortDirection, ValCannotCreateError,
ValCannotUpdateError, ValVideoNotFoundError,
VideoSortField)
from edxval.models import (LIST_MAX_ITEMS, CourseVideo, EncodedVideo, Profile,
TranscriptFormat, TranscriptProviderType, Video,
VideoImage, VideoTranscript)
from edxval.tests import APIAuthTestCase, constants
FILE_DATA = """
1
00:00:14,370 --> 00:00:16,530
I am overwatch.
2
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.
"""
class SortedVideoTestMixin(object):
......@@ -759,7 +771,7 @@ class GetVideoInfoTestWithHttpCalls(APIAuthTestCase):
"""
Tests number of queries for a Video/EncodedVideo(1) pair
"""
with self.assertNumQueries(6):
with self.assertNumQueries(5):
api.get_video_info(constants.COMPLETE_SET_FISH.get("edx_video_id"))
def test_get_info_queries_for_one_encoded_video(self):
......@@ -771,7 +783,7 @@ class GetVideoInfoTestWithHttpCalls(APIAuthTestCase):
url, constants.COMPLETE_SET_STAR, format='json'
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
with self.assertNumQueries(5):
with self.assertNumQueries(4):
api.get_video_info(constants.COMPLETE_SET_STAR.get("edx_video_id"))
def test_get_info_queries_for_only_video(self):
......@@ -783,7 +795,7 @@ class GetVideoInfoTestWithHttpCalls(APIAuthTestCase):
url, constants.VIDEO_DICT_ZEBRA, format='json'
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
with self.assertNumQueries(4):
with self.assertNumQueries(3):
api.get_video_info(constants.VIDEO_DICT_ZEBRA.get("edx_video_id"))
......@@ -1460,3 +1472,215 @@ class CourseVideoImageTest(TestCase):
# Open the shared image file to verify it is not deleted
ImageFile(open(shared_image))
@ddt
class TranscriptTest(TestCase):
"""
Tests to check transcript related functions.
"""
def setUp(self):
"""
Creates video and video transcript objects.
"""
self.video1 = Video.objects.create(**constants.VIDEO_DICT_FISH)
self.edx_video_id1 = self.video1.edx_video_id
self.video2 = Video.objects.create(**constants.VIDEO_DICT_DIFFERENT_ID_FISH)
self.edx_video_id2 = self.video2.edx_video_id
self.transcript_data1 = dict(constants.VIDEO_TRANSCRIPT_CIELO24)
self.transcript_data1['name'] = self.transcript_data1.pop('transcript')
self.transcript_data2 = dict(constants.VIDEO_TRANSCRIPT_3PLAY)
self.transcript_data2['name'] = self.transcript_data2.pop('transcript')
self.transcript1 = VideoTranscript.objects.create(**constants.VIDEO_TRANSCRIPT_CIELO24)
self.transcript2 = VideoTranscript.objects.create(**constants.VIDEO_TRANSCRIPT_3PLAY)
self.video_id = '0987654321'
self.arrow_transcript_path = 'edxval/tests/data/The_Arrow.srt'
self.flash_transcript_path = 'edxval/tests/data/The_Flash.srt'
self.transcript_url = api.create_or_update_video_transcript(
self.video_id,
'ur',
'The_Arrow.srt',
TranscriptFormat.SRT,
provider=TranscriptProviderType.CUSTOM,
file_data=File(open(self.arrow_transcript_path)),
)
@data(
{'video_id': 'super-soaker', 'language_code': 'en', 'result': True},
{'video_id': 'super-soaker', 'language_code': 'ur', 'result': False},
{'video_id': 'super123', 'language_code': 'en', 'result': False},
{'video_id': 'super123', 'language_code': 'ur', 'result': False},
)
@unpack
def test_get_video_transcript(self, video_id, language_code, result):
"""
Verify that `get_video_transcript` api function works as expected.
"""
transcript = api.get_video_transcript(video_id, language_code)
if not result:
self.assertEqual(transcript, None)
else:
serialized_data = api.TranscriptSerializer(transcript).data
transcript_data = dict(self.transcript_data1)
transcript_data['url'] = transcript_data.pop('name')
self.assertEqual(serialized_data, transcript_data)
@data(
{'video_id': 'super-soaker', 'result': True},
{'video_id': 'super-soaker1', 'result': False},
)
@unpack
def test_get_video_transcripts(self, video_id, result):
"""
Verify that `get_video_transcripts` api function works as expected.
"""
transcripts = api.get_video_transcripts(video_id)
if result:
self.assertEqual(len(transcripts), 2)
for transcript, transcript_data in zip(transcripts, [self.transcript_data2, self.transcript_data1]):
transcript_data['url'] = transcript_data.pop('name')
self.assertEqual(transcript, transcript_data)
else:
self.assertEqual(transcripts, [])
def test_create_video_transcript(self):
"""
Verify that `create_or_update_video_transcript` api function creates transcript if there is no already.
"""
transcript_data = dict(self.transcript_data1)
transcript_data['language_code'] = 'ur'
video_transcript = api.get_video_transcript(transcript_data['video_id'], transcript_data['language_code'])
self.assertIsNone(video_transcript)
transcript_url = api.create_or_update_video_transcript(
video_id=transcript_data['video_id'],
language_code=transcript_data['language_code'],
file_name=transcript_data['name'],
file_format=transcript_data['file_format'],
provider=transcript_data['provider'],
)
self.assertEqual(transcript_url, transcript_data['name'])
video_transcript = api.get_video_transcript(transcript_data['video_id'], transcript_data['language_code'])
transcript_data['url'] = transcript_data.pop('name')
self.assertEqual(
transcript_data,
api.TranscriptSerializer(video_transcript).data
)
@data(
{'language_code': 'ur', 'has_url': True},
{'language_code': 'xyz', 'has_url': False},
)
@unpack
def test_get_video_transcript_url(self, language_code, has_url):
"""
Verify that `get_video_transcript_url` api function works as expected.
"""
transcript_url = api.get_video_transcript_url(self.video_id, language_code)
if has_url:
self.assertEqual(self.transcript_url, transcript_url)
else:
self.assertIsNone(transcript_url)
@data(
{
'file_data': None,
'file_format': TranscriptFormat.SJSON,
'provider': TranscriptProviderType.CIELO24
},
{
'file_data': ContentFile(FILE_DATA),
'file_format': TranscriptFormat.SRT,
'provider': TranscriptProviderType.THREE_PLAY_MEDIA
},
)
@unpack
def test_create_or_update_video_transcript(self, file_data, file_format, provider):
"""
Verify that `create_or_update_video_transcript` api function updates existing transcript as expected.
"""
video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
self.assertIsNotNone(video_transcript)
file_name = 'overwatch.{}'.format(file_format)
transcript_url = api.create_or_update_video_transcript(
self.video_id, 'ur', file_name, file_format, provider, file_data
)
video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
self.assertEqual(transcript_url, video_transcript.url())
self.assertEqual(video_transcript.file_format, file_format)
self.assertEqual(video_transcript.provider, provider)
if file_data:
self.assertTrue(transcript_url.startswith(settings.VIDEO_TRANSCRIPTS_SETTINGS['DIRECTORY_PREFIX']))
self.assertEqual(video_transcript.transcript.name, transcript_url)
with open(video_transcript.transcript.name) as saved_transcript:
self.assertEqual(saved_transcript.read(), FILE_DATA)
else:
self.assertEqual(video_transcript.transcript.name, file_name)
@data(
{
'file_format': '123',
'provider': TranscriptProviderType.CIELO24,
'exception': InvalidTranscriptFormat,
'exception_message': '123 transcript format is not supported',
},
{
'file_format': TranscriptFormat.SRT,
'provider': 123,
'exception': InvalidTranscriptProvider,
'exception_message': '123 transcript provider is not supported',
},
)
@unpack
def test_create_or_update_video_exceptions(self, file_format, provider, exception, exception_message):
"""
Verify that `create_or_update_video_transcript` api function raise exceptions on invalid values.
"""
with self.assertRaises(exception) as transcript_exception:
api.create_or_update_video_transcript(self.video_id, 'ur', 'overwatch.srt', file_format, provider)
self.assertEqual(transcript_exception.exception.message, exception_message)
def test_video_transcript_deletion(self):
"""
Test video transcript deletion works as expected.
"""
# get an existing video transcript
video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
existing_transcript_url = video_transcript.transcript.name
# This will replace the transcript for an existing video and delete the existing transcript
new_transcript_url = api.create_or_update_video_transcript(
self.video_id,
'ur',
'overwatch.srt',
TranscriptFormat.SRT,
TranscriptProviderType.CIELO24,
ContentFile(FILE_DATA)
)
# Verify that new transcript is set to video
video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
self.assertEqual(video_transcript.transcript.name, new_transcript_url)
# verify that new data is written correctly
with open(video_transcript.transcript.name) as saved_transcript:
self.assertEqual(saved_transcript.read(), FILE_DATA)
# Verify that an exception is raised if we try to open a deleted transcript file
with self.assertRaises(IOError) as file_open_exception:
File(open(existing_transcript_url))
self.assertEqual(file_open_exception.exception.strerror, u'No such file or directory')
......@@ -3,13 +3,16 @@
Tests for Video Abstraction Layer views
"""
import json
from ddt import ddt, data, unpack
import unittest
from ddt import data, ddt, unpack
from django.core.urlresolvers import reverse
from rest_framework import status
from edxval.tests import constants, APIAuthTestCase
from edxval.models import Profile, Video, CourseVideo
from edxval.models import (CourseVideo, Profile, TranscriptFormat,
TranscriptProviderType, Video, VideoTranscript)
from edxval.serializers import TranscriptSerializer
from edxval.tests import APIAuthTestCase, constants
class VideoDetail(APIAuthTestCase):
......@@ -206,6 +209,7 @@ class VideoDetail(APIAuthTestCase):
)
self.assertEqual(len(videos[0].encoded_videos.all()), 1)
@unittest.skip("Skipping for now. We may need this later when we create transcripts alongwith video")
def test_update_remove_subtitles(self):
# Create some subtitles
self._create_videos(constants.COMPLETE_SET_STAR)
......@@ -665,7 +669,7 @@ class VideoListTest(APIAuthTestCase):
Tests number of queries for a Video with no Encoded Videos
"""
url = reverse('video-list')
with self.assertNumQueries(9):
with self.assertNumQueries(8):
self.client.post(url, constants.VIDEO_DICT_ZEBRA, format='json')
def test_queries_for_two_encoded_video(self):
......@@ -673,7 +677,7 @@ class VideoListTest(APIAuthTestCase):
Tests number of queries for a Video/EncodedVideo(2) pair
"""
url = reverse('video-list')
with self.assertNumQueries(15):
with self.assertNumQueries(13):
self.client.post(url, constants.COMPLETE_SET_FISH, format='json')
def test_queries_for_single_encoded_videos(self):
......@@ -681,7 +685,7 @@ class VideoListTest(APIAuthTestCase):
Tests number of queries for a Video/EncodedVideo(1) pair
"""
url = reverse('video-list')
with self.assertNumQueries(13):
with self.assertNumQueries(11):
self.client.post(url, constants.COMPLETE_SET_STAR, format='json')
......@@ -718,18 +722,19 @@ class VideoDetailTest(APIAuthTestCase):
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
response = self.client.post(url, constants.VIDEO_DICT_ZEBRA, format='json')
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
with self.assertNumQueries(9):
with self.assertNumQueries(7):
self.client.get("/edxval/videos/").data
response = self.client.post(url, constants.COMPLETE_SET_FISH, format='json')
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
with self.assertNumQueries(12):
with self.assertNumQueries(9):
self.client.get("/edxval/videos/").data
response = self.client.post(url, constants.COMPLETE_SET_STAR, format='json')
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
with self.assertNumQueries(14):
with self.assertNumQueries(10):
self.client.get("/edxval/videos/").data
@unittest.skip("Skipping for now. We may need these later when we create transcripts alongwith video")
class SubtitleDetailTest(APIAuthTestCase):
"""
Tests for subtitle API
......@@ -811,6 +816,7 @@ class SubtitleDetailTest(APIAuthTestCase):
)
self.assertEqual(self.client.get(video_subtitles['content_url']).content, '{"start": "00:00:00"}')
@ddt
class VideoImagesViewTest(APIAuthTestCase):
"""
......@@ -897,3 +903,90 @@ class VideoImagesViewTest(APIAuthTestCase):
response.data['message'],
message
)
@ddt
class VideoTranscriptViewTest(APIAuthTestCase):
"""
Tests VideoTranscriptView.
"""
def setUp(self):
"""
Tests setup.
"""
self.url = reverse('create-video-transcript')
self.video = Video.objects.create(**constants.VIDEO_DICT_FISH)
self.transcript_data = constants.VIDEO_TRANSCRIPT_CIELO24
super(VideoTranscriptViewTest, self).setUp()
def test_create_transcript(self):
"""
Tests POSTing transcript successfully.
"""
post_transcript_data = dict(self.transcript_data)
post_transcript_data['name'] = post_transcript_data.pop('transcript')
response = self.client.post(self.url, post_transcript_data, format='json')
self.assertEqual(response.status_code, status.HTTP_200_OK)
serialized_data = TranscriptSerializer(VideoTranscript.objects.first()).data
post_transcript_data['url'] = post_transcript_data.pop('name')
self.assertEqual(serialized_data, post_transcript_data)
def test_update_existing_transcript(self):
"""
Tests updating existing transcript works as expected.
"""
VideoTranscript.objects.create(**self.transcript_data)
post_transcript_data = dict(self.transcript_data)
post_transcript_data['name'] = post_transcript_data.pop('transcript')
response = self.client.post(self.url, post_transcript_data, format='json')
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(
response.data['message'],
u'Can not override existing transcript for video "{video_id}" and language code "{language}".'.format(
video_id=self.video.edx_video_id, language=post_transcript_data['language_code'])
)
@data(
{
'post_data': {},
'message': u'video_id and name and language_code and provider and file_format must be specified.'
},
{
'post_data': {
'video_id': 'super-soaker',
'name': 'abc.xyz',
'language_code': 'en',
'provider': TranscriptProviderType.CIELO24,
'file_format': 'xyz'
},
'message': u'"xyz" transcript file type is not supported. Supported formats are "{}"'.format(
sorted(dict(TranscriptFormat.CHOICES).keys())
)
},
{
'post_data': {
'video_id': 'super-soaker',
'name': 'abc.srt',
'language_code': 'en',
'provider': 'xyz',
'file_format': TranscriptFormat.SRT
},
'message': u'"xyz" provider is not supported. Supported transcription providers are "{}"'.format(
sorted(dict(TranscriptProviderType.CHOICES).keys())
)
},
)
@unpack
def test_error_responses(self, post_data, message):
"""
Tests error responses occurred during POSTing.
"""
response = self.client.post(self.url, post_data, format='json')
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.data['message'], message)
......@@ -17,14 +17,9 @@ urlpatterns = [
name="video-detail"
),
url(
r'^videos/(?P<video__edx_video_id>[-\w]+)/(?P<language>[-_\w]+)$',
views.SubtitleDetail.as_view(),
name="subtitle-detail"
),
url(
r'^videos/(?P<edx_video_id>[-\w]+)/(?P<language>[-_\w]+)/subtitle$',
views.get_subtitle,
name="subtitle-content"
r'^videos/video-transcripts/create/$',
views.VideoTranscriptView.as_view(),
name='create-video-transcript'
),
url(
r'^videos/video-images/update/$',
......
......@@ -29,3 +29,28 @@ def get_video_image_storage():
# during edx-platform loading this method gets called but settings are not ready yet
# so in that case we will return default(FileSystemStorage) storage class instance
return get_storage_class()()
def video_transcript_path(video_transcript_instance, filename): # pylint:disable=unused-argument
"""
Returns video transcript path.
Arguments:
video_transcript_instance (VideoTranscript): This is passed automatically by models.CustomizableFileField
filename (str): name of image file
"""
return u'{}{}'.format(settings.VIDEO_TRANSCRIPTS_SETTINGS.get('DIRECTORY_PREFIX', ''), filename)
def get_video_transcript_storage():
"""
Return the configured django storage backend for video transcripts.
"""
if hasattr(settings, 'VIDEO_TRANSCRIPTS_SETTINGS'):
return get_storage_class(
settings.VIDEO_TRANSCRIPTS_SETTINGS.get('STORAGE_CLASS'),
)(**settings.VIDEO_TRANSCRIPTS_SETTINGS.get('STORAGE_KWARGS', {}))
else:
# during edx-platform loading this method gets called but settings are not ready yet
# so in that case we will return default(FileSystemStorage) storage class instance
return get_storage_class()()
"""
Views file for django app edxval.
"""
from rest_framework.views import APIView
from rest_framework import generics
from rest_framework.authentication import SessionAuthentication
from rest_framework_oauth.authentication import OAuth2Authentication
from rest_framework.permissions import DjangoModelPermissions
from rest_framework.response import Response
from rest_framework import status
import logging
from django.core.exceptions import ValidationError
from django.http import HttpResponse
from django.shortcuts import get_object_or_404
from django.core.exceptions import ValidationError
from django.views.decorators.http import last_modified
from rest_framework import generics, status
from rest_framework.authentication import SessionAuthentication
from rest_framework.permissions import DjangoModelPermissions
from rest_framework.response import Response
from rest_framework.views import APIView
from rest_framework_oauth.authentication import OAuth2Authentication
from edxval.api import create_or_update_video_transcript, get_video_transcript, create_or_update_video_transcript
from edxval.models import (CourseVideo, Profile, TranscriptFormat,
TranscriptProviderType, Video, VideoImage,
VideoTranscript)
from edxval.serializers import TranscriptSerializer, VideoSerializer
from edxval.models import Video, Profile, Subtitle, CourseVideo, VideoImage
from edxval.serializers import (
VideoSerializer,
SubtitleSerializer
)
LOGGER = logging.getLogger(__name__) # pylint: disable=C0103
class ReadRestrictedDjangoModelPermissions(DjangoModelPermissions):
......@@ -92,15 +95,68 @@ class VideoDetail(generics.RetrieveUpdateDestroyAPIView):
serializer_class = VideoSerializer
class SubtitleDetail(MultipleFieldLookupMixin, generics.RetrieveUpdateDestroyAPIView):
class VideoTranscriptView(APIView):
"""
Gets a subtitle instance given its id
A Transcription View, used by edx-video-pipeline to create video transcripts.
"""
authentication_classes = (OAuth2Authentication, SessionAuthentication)
permission_classes = (ReadRestrictedDjangoModelPermissions,)
lookup_fields = ("video__edx_video_id", "language")
queryset = Subtitle.objects.all()
serializer_class = SubtitleSerializer
# noinspection PyMethodMayBeStatic
def post(self, request):
"""
Creates a video transcript instance with the given information.
Arguments:
request: A WSGI request.
"""
attrs = ('video_id', 'name', 'language_code', 'provider', 'file_format')
missing = [attr for attr in attrs if attr not in request.data]
if missing:
LOGGER.warn(
'[VAL] Required transcript params are missing. %s', ' and '.join(missing)
)
return Response(
status=status.HTTP_400_BAD_REQUEST,
data=dict(message=u'{missing} must be specified.'.format(missing=' and '.join(missing)))
)
video_id = request.data['video_id']
language_code = request.data['language_code']
transcript_name = request.data['name']
provider = request.data['provider']
file_format = request.data['file_format']
supported_formats = sorted(dict(TranscriptFormat.CHOICES).keys())
if file_format not in supported_formats:
message = (
u'"{format}" transcript file type is not supported. Supported formats are "{supported_formats}"'
).format(format=file_format, supported_formats=supported_formats)
return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message})
supported_providers = sorted(dict(TranscriptProviderType.CHOICES).keys())
if provider not in supported_providers:
message = (
u'"{provider}" provider is not supported. Supported transcription providers are "{supported_providers}"'
).format(provider=provider, supported_providers=supported_providers)
return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message})
transcript = get_video_transcript(video_id, language_code)
if transcript is None:
create_or_update_video_transcript(
video_id,
language_code,
transcript_name,
file_format,
provider,
)
response = Response(status=status.HTTP_200_OK)
else:
message = (
u'Can not override existing transcript for video "{video_id}" and language code "{language}".'
).format(video_id=video_id, language=language_code)
response = Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message})
return response
class VideoImagesView(APIView):
......@@ -148,19 +204,3 @@ class VideoImagesView(APIView):
)
return Response()
def _last_modified_subtitle(request, edx_video_id, language): # pylint: disable=W0613
"""
Returns the last modified subtitle
"""
return Subtitle.objects.get(video__edx_video_id=edx_video_id, language=language).modified
@last_modified(last_modified_func=_last_modified_subtitle)
def get_subtitle(request, edx_video_id, language): # pylint: disable=W0613
"""
Return content of subtitle by id
"""
sub = Subtitle.objects.get(video__edx_video_id=edx_video_id, language=language)
response = HttpResponse(sub.content, content_type=sub.content_type)
return response
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment