Commit 01e67fb1 by Muzaffar yousaf Committed by GitHub

Merge pull request #87 from edx/mrehan/val-transcripts-backend-api

VAL changes for Video Transcripts.
parents cb392214 c2b29bee
......@@ -8,3 +8,7 @@ omit =
**/tests/*
**/settings.py
**/migrations*
[html]
title = edx-val Python Test Coverage Report
directory = html_coverage
......@@ -68,4 +68,6 @@ logs/*/*.log*
venv/
venvs/
src/
video-images/
video-transcripts/
Christopher Lee <clee@edx.org>
Mushtaq Ali <mushtaak@gmail.com>
Muhammad Ammar <mammar@gmail.com>
Muhammad Rehan <mrehan@edx.org>
"""
Admin file for django app edxval.
"""
from django import forms
from django.contrib import admin
from .models import Video, Profile, EncodedVideo, Subtitle, CourseVideo, VideoImage
from .models import (CourseVideo, EncodedVideo, Profile, TranscriptPreference,
Video, VideoImage, VideoTranscript)
class ProfileAdmin(admin.ModelAdmin): # pylint: disable=C0111
......@@ -35,19 +37,53 @@ class VideoAdmin(admin.ModelAdmin): # pylint: disable=C0111
class VideoImageAdmin(admin.ModelAdmin):
raw_id_fields = ('course_video', )
list_display = ('get_course_video', 'image', 'generated_images')
def get_course_video(self, obj):
return u'"{course_id}" -- "{edx_video_id}" '.format(
course_id=obj.course_video.course_id,
edx_video_id=obj.course_video.video.edx_video_id
)
get_course_video.admin_order_field = 'course_video'
get_course_video.short_description = 'Course Video'
model = VideoImage
verbose_name = 'Video Image'
verbose_name_plural = 'Video Images'
class CourseVideoAdmin(admin.ModelAdmin):
list_display = ('course_id', 'get_video_id', 'is_hidden')
def get_video_id(self, obj):
return obj.video.edx_video_id
get_video_id.admin_order_field = 'video'
get_video_id.short_description = 'edX Video Id'
model = CourseVideo
verbose_name = 'Course Video'
verbose_name_plural = 'Course Videos'
class VideoTranscriptAdmin(admin.ModelAdmin):
list_display = ('video_id', 'language_code', 'provider', 'file_format')
model = VideoTranscript
class TranscriptPreferenceAdmin(admin.ModelAdmin):
list_display = ('course_id', 'provider', 'video_source_language', 'preferred_languages')
model = TranscriptPreference
admin.site.register(Profile, ProfileAdmin)
admin.site.register(Video, VideoAdmin)
admin.site.register(Subtitle)
admin.site.register(VideoTranscript, VideoTranscriptAdmin)
admin.site.register(TranscriptPreference, TranscriptPreferenceAdmin)
admin.site.register(VideoImage, VideoImageAdmin)
admin.site.register(CourseVideo, CourseVideoAdmin)
......@@ -4,22 +4,22 @@
The internal API for VAL.
"""
import logging
from lxml.etree import Element, SubElement
from enum import Enum
from django.core.exceptions import ValidationError, ObjectDoesNotExist
from django.core.files.base import ContentFile
from django.core.exceptions import ObjectDoesNotExist, ValidationError
from lxml import etree
from lxml.etree import Element, SubElement
from edxval.models import Video, EncodedVideo, CourseVideo, Profile, VideoImage
from edxval.serializers import VideoSerializer
from edxval.exceptions import ( # pylint: disable=unused-import
ValError,
ValInternalError,
ValVideoNotFoundError,
ValCannotCreateError,
ValCannotUpdateError
)
from edxval.exceptions import (InvalidTranscriptFormat,
InvalidTranscriptProvider, ValCannotCreateError,
ValCannotUpdateError, ValInternalError,
ValVideoNotFoundError)
from edxval.models import (CourseVideo, EncodedVideo, Profile,
TranscriptFormat, TranscriptPreference,
TranscriptProviderType, Video, VideoImage,
VideoTranscript)
from edxval.serializers import TranscriptPreferenceSerializer, TranscriptSerializer, VideoSerializer
from edxval.utils import THIRD_PARTY_TRANSCRIPTION_PLANS
logger = logging.getLogger(__name__) # pylint: disable=C0103
......@@ -143,6 +143,210 @@ def update_video_status(edx_video_id, status):
video.save()
def is_transcript_available(video_id, language_code=None):
"""
Returns whether the transcripts are available for a video.
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
language_code: it will the language code of the requested transcript.
"""
filter_attrs = {'video_id': video_id}
if language_code:
filter_attrs['language_code'] = language_code
transcript_set = VideoTranscript.objects.filter(**filter_attrs)
return transcript_set.exists()
def get_video_transcripts(video_id):
"""
Get a video's transcripts
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
"""
transcripts_set = VideoTranscript.objects.filter(video_id=video_id)
transcripts = []
if transcripts_set.exists():
transcripts = TranscriptSerializer(transcripts_set, many=True).data
return transcripts
def get_video_transcript(video_id, language_code):
"""
Get video transcript info
Arguments:
video_id(unicode): A video id, it can be an edx_video_id or an external video id extracted from
external sources of a video component.
language_code(unicode): it will be the language code of the requested transcript.
"""
transcript = VideoTranscript.get_or_none(video_id=video_id, language_code=language_code)
return TranscriptSerializer(transcript).data if transcript else None
def get_video_transcript_data(video_ids, language_code):
"""
Get video transcript data
Arguments:
video_ids(list): list containing edx_video_id and external video ids extracted from
external sources from a video component.
language_code(unicode): it will be the language code of the requested transcript.
Returns:
A dict containing transcript file name and its content. It will be for a video whose transcript
found first while iterating the video ids.
"""
transcript_data = None
for video_id in video_ids:
try:
video_transcript = VideoTranscript.objects.get(video_id=video_id, language_code=language_code)
transcript_data = dict(
file_name=video_transcript.transcript.name,
content=video_transcript.transcript.file.read()
)
break
except VideoTranscript.DoesNotExist:
continue
except Exception:
logger.exception(
'[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
video_id,
language_code
)
raise
return transcript_data
def get_available_transcript_languages(video_ids):
"""
Get available transcript languages
Arguments:
video_ids(list): list containing edx_video_id and external video ids extracted from
external sources of a video component.
Returns:
A list containing unique transcript language codes for the video ids.
"""
available_languages = VideoTranscript.objects.filter(
video_id__in=video_ids
).values_list(
'language_code', flat=True
)
return list(set(available_languages))
def get_video_transcript_url(video_id, language_code):
"""
Returns course video transcript url or None if no transcript
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
language_code: language code of a video transcript
"""
video_transcript = VideoTranscript.get_or_none(video_id, language_code)
if video_transcript:
return video_transcript.url()
def create_or_update_video_transcript(
video_id,
language_code,
file_name,
file_format,
provider,
file_data=None,
):
"""
Create or Update video transcript for an existing video.
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
language_code: language code of a video transcript
file_name: file name of a video transcript
file_data (InMemoryUploadedFile): Transcript data to be saved for a course video.
file_format: format of the transcript
provider: transcript provider
Returns:
video transcript url
"""
if file_format not in dict(TranscriptFormat.CHOICES).keys():
raise InvalidTranscriptFormat('{} transcript format is not supported'.format(file_format))
if provider not in dict(TranscriptProviderType.CHOICES).keys():
raise InvalidTranscriptProvider('{} transcript provider is not supported'.format(provider))
video_transcript, __ = VideoTranscript.create_or_update(
video_id,
language_code,
file_name,
file_format,
provider,
file_data,
)
return video_transcript.url()
def get_3rd_party_transcription_plans():
"""
Retrieves 3rd party transcription plans.
"""
return THIRD_PARTY_TRANSCRIPTION_PLANS
def get_transcript_preferences(course_id):
"""
Retrieves course wide transcript preferences
Arguments:
course_id (str): course id
"""
try:
transcript_preference = TranscriptPreference.objects.get(course_id=course_id)
except TranscriptPreference.DoesNotExist:
return
return TranscriptPreferenceSerializer(transcript_preference).data
def create_or_update_transcript_preferences(course_id, **preferences):
"""
Creates or updates course-wide transcript preferences
Arguments:
course_id(str): course id
Keyword Arguments:
preferences(dict): keyword arguments
"""
transcript_preference, __ = TranscriptPreference.objects.update_or_create(
course_id=course_id, defaults=preferences
)
return TranscriptPreferenceSerializer(transcript_preference).data
def remove_transcript_preferences(course_id):
"""
Deletes course-wide transcript preferences.
Arguments:
course_id(str): course id
"""
try:
transcript_preference = TranscriptPreference.objects.get(course_id=course_id)
transcript_preference.delete()
except TranscriptPreference.DoesNotExist:
pass
def get_course_video_image_url(course_id, edx_video_id):
"""
Returns course video image url or None if no image found
......@@ -246,11 +450,6 @@ def get_video_info(edx_video_id):
url: url of the video
file_size: size of the video in bytes
profile: ID of the profile
subtitles: a list of Subtitle dicts
fmt: file format (SRT or SJSON)
language: language code
content_url: url of file
url: api url to subtitle
}
Raises:
......@@ -504,13 +703,19 @@ def copy_course_videos(source_course_id, destination_course_id):
)
def export_to_xml(edx_video_id, course_id=None):
def export_to_xml(video_ids, course_id=None, external=False):
"""
Exports data about the given edx_video_id into the given xml object.
Exports data for a video into an xml object.
Args:
edx_video_id (str): The ID of the video to export
NOTE: For external video ids, only transcripts information will be added into xml.
If external=False, then edx_video_id is going to be on first index of the list.
Arguments:
video_ids (list): It can contain edx_video_id and/or multiple external video ids.
We are passing all video ids associated with a video component
so that we can export transcripts for each video id.
course_id (str): The ID of the course with which this video is associated
external (bool): True if first video id in `video_ids` is not edx_video_id else False
Returns:
An lxml video_asset element containing export data
......@@ -518,8 +723,16 @@ def export_to_xml(edx_video_id, course_id=None):
Raises:
ValVideoNotFoundError: if the video does not exist
"""
# val does not store external videos, so construct transcripts information only.
if external:
video_el = Element('video_asset')
return create_transcripts_xml(video_ids, video_el)
# for an internal video, first video id must be edx_video_id
video_id = video_ids[0]
video_image_name = ''
video = _get_video(edx_video_id)
video = _get_video(video_id)
try:
course_video = CourseVideo.objects.select_related('video_image').get(course_id=course_id, video=video)
......@@ -544,23 +757,56 @@ def export_to_xml(edx_video_id, course_id=None):
for name in ['profile', 'url', 'file_size', 'bitrate']
}
)
# Note: we are *not* exporting Subtitle data since it is not currently updated by VEDA or used
# by LMS/Studio.
return create_transcripts_xml(video_ids, video_el)
def create_transcripts_xml(video_ids, video_el):
"""
Create xml for transcripts.
Arguments:
video_ids (list): It can contain edx_video_id and/or multiple external video ids
video_el (Element): lxml Element object
Returns:
lxml Element object with transcripts information
"""
video_transcripts = VideoTranscript.objects.filter(video_id__in=video_ids)
# create transcripts node only when we have transcripts for a video
if video_transcripts.exists():
transcripts_el = SubElement(video_el, 'transcripts')
exported_language_codes = []
for video_transcript in video_transcripts:
if video_transcript.language_code not in exported_language_codes:
SubElement(
transcripts_el,
'transcript',
{
'video_id': video_transcript.video_id,
'file_name': video_transcript.transcript.name,
'language_code': video_transcript.language_code,
'file_format': video_transcript.file_format,
'provider': video_transcript.provider,
}
)
exported_language_codes.append(video_transcript.language_code)
return video_el
def import_from_xml(xml, edx_video_id, course_id=None):
"""
Imports data from a video_asset element about the given edx_video_id.
Imports data from a video_asset element about the given video_id.
If the edx_video_id already exists, then no changes are made. If an unknown
profile is referenced by an encoded video, that encoding will be ignored.
Args:
xml: An lxml video_asset element containing import data
edx_video_id (str): The ID for the video content
Arguments:
xml (Element): An lxml video_asset element containing import data
edx_video_id (str): val video id
course_id (str): The ID of a course to associate the video with
(optional)
Raises:
ValCannotCreateError: if there is an error importing the video
......@@ -568,6 +814,10 @@ def import_from_xml(xml, edx_video_id, course_id=None):
if xml.tag != 'video_asset':
raise ValCannotCreateError('Invalid XML')
# if edx_video_id does not exist then create video transcripts only
if not edx_video_id:
return create_transcript_objects(xml)
# If video with edx_video_id already exists, associate it with the given course_id.
try:
video = Video.objects.get(edx_video_id=edx_video_id)
......@@ -583,6 +833,9 @@ def import_from_xml(xml, edx_video_id, course_id=None):
if image_file_name:
VideoImage.create_or_update(course_video, image_file_name)
# import transcripts
create_transcript_objects(xml)
return
except ValidationError as err:
logger.exception(err.message)
......@@ -617,3 +870,24 @@ def import_from_xml(xml, edx_video_id, course_id=None):
'bitrate': encoded_video_el.get('bitrate'),
})
create_video(data)
create_transcript_objects(xml)
def create_transcript_objects(xml):
"""
Create VideoTranscript objects.
Arguments:
xml (Element): lxml Element object
"""
for transcript in xml.findall('.//transcripts/transcript'):
try:
VideoTranscript.create_or_update(
transcript.attrib['video_id'],
transcript.attrib['language_code'],
transcript.attrib['file_name'],
transcript.attrib['file_format'],
transcript.attrib['provider'],
)
except KeyError:
logger.warn("VAL: Required attributes are missing from xml, xml=[%s]", etree.tostring(transcript).strip())
......@@ -48,3 +48,17 @@ class ValCannotUpdateError(ValError):
This error is raised when an object cannot be updated
"""
pass
class InvalidTranscriptFormat(ValError):
"""
This error is raised when an transcript format is not supported
"""
pass
class InvalidTranscriptProvider(ValError):
"""
This error is raised when an transcript provider is not supported
"""
pass
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
import model_utils.fields
import django.utils.timezone
import edxval.models
class Migration(migrations.Migration):
dependencies = [
('edxval', '0005_videoimage'),
]
operations = [
migrations.CreateModel(
name='TranscriptPreference',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, verbose_name='created', editable=False)),
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, verbose_name='modified', editable=False)),
('course_id', models.CharField(unique=True, max_length=255, verbose_name=b'Course ID')),
('provider', models.CharField(max_length=20, verbose_name=b'Provider', choices=[(b'Custom', b'Custom'), (b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')])),
('cielo24_fidelity', models.CharField(blank=True, max_length=20, null=True, verbose_name=b'Cielo24 Fidelity', choices=[(b'MECHANICAL', b'Mechanical, 75% Accuracy'), (b'PREMIUM', b'Premium, 95% Accuracy'), (b'PROFESSIONAL', b'Professional, 99% Accuracy')])),
('cielo24_turnaround', models.CharField(blank=True, max_length=20, null=True, verbose_name=b'Cielo24 Turnaround', choices=[(b'STANDARD', b'Standard, 48h'), (b'PRIORITY', b'Priority, 24h')])),
('three_play_turnaround', models.CharField(blank=True, max_length=20, null=True, verbose_name=b'3PlayMedia Turnaround', choices=[(b'extended_service', b'10-Day/Extended'), (b'default', b'4-Day/Default'), (b'expedited_service', b'2-Day/Expedited'), (b'rush_service', b'24 hour/Rush'), (b'same_day_service', b'Same Day')])),
('preferred_languages', edxval.models.ListField(default=[], verbose_name=b'Preferred Languages', max_items=50, blank=True)),
('video_source_language', models.CharField(help_text=b'This specifies the speech language of a Video.', max_length=50, null=True, verbose_name=b'Video Source Language', blank=True)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='VideoTranscript',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, verbose_name='created', editable=False)),
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, verbose_name='modified', editable=False)),
('video_id', models.CharField(help_text=b'It can be an edx_video_id or an external video id', max_length=255)),
('transcript', edxval.models.CustomizableFileField(null=True, blank=True)),
('language_code', models.CharField(max_length=50, db_index=True)),
('provider', models.CharField(default=b'Custom', max_length=30, choices=[(b'Custom', b'Custom'), (b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')])),
('file_format', models.CharField(db_index=True, max_length=20, choices=[(b'srt', b'SubRip'), (b'sjson', b'SRT JSON')])),
],
),
migrations.AlterUniqueTogether(
name='videotranscript',
unique_together=set([('video_id', 'language_code')]),
),
]
......@@ -11,22 +11,23 @@ themselves. After these are resolved, errors such as a negative file_size or
invalid profile_name will be returned.
"""
from contextlib import closing
import json
import logging
import os
from contextlib import closing
from uuid import uuid4
from django.db import models
from django.dispatch import receiver
from django.core.exceptions import ValidationError
from django.core.validators import MinValueValidator, RegexValidator
from django.core.urlresolvers import reverse
from django.core.validators import MinValueValidator, RegexValidator
from django.db import models
from django.dispatch import receiver
from django.utils.six import python_2_unicode_compatible
from model_utils.models import TimeStampedModel
from edxval.utils import video_image_path, get_video_image_storage
from edxval.utils import (get_video_image_storage,
get_video_transcript_storage, video_image_path,
video_transcript_path)
logger = logging.getLogger(__name__) # pylint: disable=C0103
......@@ -129,7 +130,7 @@ class Video(models.Model):
qset = cls.objects.filter(
encoded_videos__profile__profile_name='youtube',
encoded_videos__url=youtube_id
).prefetch_related('encoded_videos', 'courses', 'subtitles')
).prefetch_related('encoded_videos', 'courses')
return qset
......@@ -209,13 +210,17 @@ class ListField(models.TextField):
"""
ListField use to store and retrieve list data.
"""
def __init__(self, max_items=LIST_MAX_ITEMS, *args, **kwargs):
self.max_items = max_items
super(ListField, self).__init__(*args, **kwargs)
def get_prep_value(self, value):
"""
Converts a list to its json represetation to store in database as text.
Converts a list to its json representation to store in database as text.
"""
if value and not isinstance(value, list):
raise ValidationError(u'ListField value {} is not a list.'.format(value))
return json.dumps(self.validate(value) or [])
return json.dumps(self.validate_list(value) or [])
def from_db_value(self, value, expression, connection, context):
"""
......@@ -232,7 +237,7 @@ class ListField(models.TextField):
# If a list is set then validated its items
if isinstance(value, list):
return self.validate(value)
py_list = self.validate_list(value)
else: # try to de-serialize value and expect list and then validate
try:
py_list = json.loads(value)
......@@ -240,13 +245,13 @@ class ListField(models.TextField):
if not isinstance(py_list, list):
raise TypeError
self.validate(py_list)
self.validate_list(py_list)
except (ValueError, TypeError):
raise ValidationError(u'Must be a valid list of strings.')
return py_list
def validate(self, value):
def validate_list(self, value):
"""
Validate data before saving to database.
......@@ -259,14 +264,23 @@ class ListField(models.TextField):
Raises:
ValidationError
"""
if len(value) > LIST_MAX_ITEMS:
raise ValidationError(u'list must not contain more than {} items.'.format(LIST_MAX_ITEMS))
if len(value) > self.max_items:
raise ValidationError(
u'list must not contain more than {max_items} items.'.format(max_items=self.max_items)
)
if all(isinstance(item, basestring) for item in value) is False:
raise ValidationError(u'list must only contain strings.')
return value
def deconstruct(self):
name, path, args, kwargs = super(ListField, self).deconstruct()
# Only include kwarg if it's not the default
if self.max_items != LIST_MAX_ITEMS:
kwargs['max_items'] = self.max_items
return name, path, args, kwargs
class VideoImage(TimeStampedModel):
"""
......@@ -335,6 +349,139 @@ class VideoImage(TimeStampedModel):
return storage.url(self.image.name)
class TranscriptProviderType(object):
CUSTOM = 'Custom'
THREE_PLAY_MEDIA = '3PlayMedia'
CIELO24 = 'Cielo24'
CHOICES = (
(CUSTOM, CUSTOM),
(THREE_PLAY_MEDIA, THREE_PLAY_MEDIA),
(CIELO24, CIELO24),
)
class TranscriptFormat(object):
SRT = 'srt'
SJSON = 'sjson'
CHOICES = (
(SRT, 'SubRip'),
(SJSON, 'SRT JSON')
)
class CustomizableFileField(models.FileField):
"""
Subclass of FileField that allows custom settings to not
be serialized (hard-coded) in migrations. Otherwise,
migrations include optional settings for storage (such as
the storage class and bucket name); we don't want to
create new migration files for each configuration change.
"""
def __init__(self, *args, **kwargs):
kwargs.update(dict(
upload_to=video_transcript_path,
storage=get_video_transcript_storage(),
max_length=255, # enoungh for uuid
blank=True,
null=True
))
super(CustomizableFileField, self).__init__(*args, **kwargs)
def deconstruct(self):
"""
Override base class method.
"""
name, path, args, kwargs = super(CustomizableFileField, self).deconstruct()
del kwargs['upload_to']
del kwargs['storage']
del kwargs['max_length']
return name, path, args, kwargs
class VideoTranscript(TimeStampedModel):
"""
Transcript for a video
"""
video_id = models.CharField(max_length=255, help_text='It can be an edx_video_id or an external video id')
transcript = CustomizableFileField()
language_code = models.CharField(max_length=50, db_index=True)
provider = models.CharField(
max_length=30,
choices=TranscriptProviderType.CHOICES,
default=TranscriptProviderType.CUSTOM,
)
file_format = models.CharField(max_length=20, db_index=True, choices=TranscriptFormat.CHOICES)
class Meta:
unique_together = ('video_id', 'language_code')
@classmethod
def get_or_none(cls, video_id, language_code):
"""
Returns a data model object if found or none otherwise.
Arguments:
video_id(unicode): video id to which transcript may be associated
language_code(unicode): language of the requested transcript
"""
try:
transcript = cls.objects.get(video_id=video_id, language_code=language_code)
except cls.DoesNotExist:
transcript = None
return transcript
@classmethod
def create_or_update(cls, video_id, language_code, file_name, file_format, provider, file_data=None):
"""
Create or update Transcript object.
Arguments:
video_id (str): unique id for a video
language_code (str): language code
file_name (str): File name of the image
file_format (str): Format of transcript
provider (str): Transcript provider
file_data (InMemoryUploadedFile): File data to be saved
Returns:
Returns a tuple of (video_transcript, created).
"""
video_transcript, created = cls.objects.get_or_create(video_id=video_id, language_code=language_code)
# delete the existing transcript file
if not created and file_data:
video_transcript.transcript.delete()
video_transcript.transcript.name = file_name
video_transcript.file_format = file_format
video_transcript.provider = provider
if file_data:
with closing(file_data) as transcript_file_data:
file_name = '{uuid}{ext}'.format(uuid=uuid4().hex, ext=os.path.splitext(file_name)[1])
try:
video_transcript.transcript.save(file_name, transcript_file_data)
except Exception: # pylint: disable=broad-except
logger.exception('VAL: Transcript save failed to storage for video_id [%s]', video_id)
raise
video_transcript.save()
return video_transcript, created
def url(self):
"""
Returns language transcript url for a particular language.
"""
storage = get_video_transcript_storage()
return storage.url(self.transcript.name)
def __unicode__(self):
return u'{lang} Transcript for {video}'.format(lang=self.language_code, video=self.video_id)
SUBTITLE_FORMATS = (
('srt', 'SubRip'),
('sjson', 'SRT JSON')
......@@ -376,6 +523,95 @@ class Subtitle(models.Model):
return 'text/plain'
class Cielo24Turnaround(object):
"""
Cielo24 turnarounds.
"""
STANDARD = 'STANDARD'
PRIORITY = 'PRIORITY'
CHOICES = (
(STANDARD, 'Standard, 48h'),
(PRIORITY, 'Priority, 24h'),
)
class Cielo24Fidelity(object):
"""
Cielo24 fidelity.
"""
MECHANICAL = 'MECHANICAL'
PREMIUM = 'PREMIUM'
PROFESSIONAL = 'PROFESSIONAL'
CHOICES = (
(MECHANICAL, 'Mechanical, 75% Accuracy'),
(PREMIUM, 'Premium, 95% Accuracy'),
(PROFESSIONAL, 'Professional, 99% Accuracy'),
)
class ThreePlayTurnaround(object):
"""
3PlayMedia turnarounds.
"""
EXTENDED_SERVICE = 'extended_service'
DEFAULT = 'default'
EXPEDITED_SERVICE = 'expedited_service'
RUSH_SERVICE = 'rush_service'
SAME_DAY_SERVICE = 'same_day_service'
CHOICES = (
(EXTENDED_SERVICE, '10-Day/Extended'),
(DEFAULT, '4-Day/Default'),
(EXPEDITED_SERVICE, '2-Day/Expedited'),
(RUSH_SERVICE, '24 hour/Rush'),
(SAME_DAY_SERVICE, 'Same Day'),
)
class TranscriptPreference(TimeStampedModel):
"""
Third Party Transcript Preferences for a Course
"""
course_id = models.CharField(verbose_name='Course ID', max_length=255, unique=True)
provider = models.CharField(
verbose_name='Provider',
max_length=20,
choices=TranscriptProviderType.CHOICES,
)
cielo24_fidelity = models.CharField(
verbose_name='Cielo24 Fidelity',
max_length=20,
choices=Cielo24Fidelity.CHOICES,
null=True,
blank=True,
)
cielo24_turnaround = models.CharField(
verbose_name='Cielo24 Turnaround',
max_length=20,
choices=Cielo24Turnaround.CHOICES,
null=True,
blank=True,
)
three_play_turnaround = models.CharField(
verbose_name='3PlayMedia Turnaround',
max_length=20,
choices=ThreePlayTurnaround.CHOICES,
null=True,
blank=True,
)
preferred_languages = ListField(verbose_name='Preferred Languages', max_items=50, default=[], blank=True)
video_source_language = models.CharField(
verbose_name='Video Source Language',
max_length=50,
null=True,
blank=True,
help_text='This specifies the speech language of a Video.'
)
def __unicode__(self):
return u'{course_id} - {provider}'.format(course_id=self.course_id, provider=self.provider)
@receiver(models.signals.post_save, sender=Video)
def video_status_update_callback(sender, **kwargs): # pylint: disable=unused-argument
"""
......
......@@ -5,9 +5,10 @@ Serialization is usually sent through the VideoSerializer which uses the
EncodedVideoSerializer which uses the profile_name as it's profile field.
"""
from rest_framework import serializers
from rest_framework.fields import IntegerField, DateTimeField
from rest_framework.fields import DateTimeField, IntegerField
from edxval.models import Profile, Video, EncodedVideo, Subtitle, CourseVideo, VideoImage
from edxval.models import (CourseVideo, EncodedVideo, Profile, TranscriptPreference, Video,
VideoImage, VideoTranscript)
class EncodedVideoSerializer(serializers.ModelSerializer):
......@@ -50,37 +51,22 @@ class EncodedVideoSerializer(serializers.ModelSerializer):
return data.get('profile', None)
class SubtitleSerializer(serializers.ModelSerializer):
class TranscriptSerializer(serializers.ModelSerializer):
"""
Serializer for Subtitle objects
Serializer for VideoTranscript objects
"""
content_url = serializers.CharField(source='get_absolute_url', read_only=True)
content = serializers.CharField(write_only=True)
class Meta: # pylint: disable=C1001, C0111
model = VideoTranscript
lookup_field = 'video_id'
fields = ('video_id', 'url', 'language_code', 'provider', 'file_format')
def validate(self, data):
"""
Validate that the subtitle is in the correct format
"""
value = data.get("content")
if data.get("fmt") == "sjson":
import json
try:
loaded = json.loads(value)
except ValueError:
raise serializers.ValidationError("Not in JSON format")
else:
data["content"] = json.dumps(loaded)
return data
url = serializers.SerializerMethodField()
class Meta: # pylint: disable=C1001, C0111
model = Subtitle
lookup_field = "id"
fields = (
"fmt",
"language",
"content_url",
"content",
)
def get_url(self, transcript):
"""
Retrieves the transcript url.
"""
return transcript.url()
class CourseSerializer(serializers.RelatedField):
......@@ -118,7 +104,6 @@ class VideoSerializer(serializers.ModelSerializer):
encoded_videos takes a list of dicts EncodedVideo data.
"""
encoded_videos = EncodedVideoSerializer(many=True)
subtitles = SubtitleSerializer(many=True, required=False)
courses = CourseSerializer(
many=True,
read_only=False,
......@@ -170,7 +155,6 @@ class VideoSerializer(serializers.ModelSerializer):
"""
courses = validated_data.pop("courses", [])
encoded_videos = validated_data.pop("encoded_videos", [])
subtitles = validated_data.pop("subtitles", [])
video = Video.objects.create(**validated_data)
......@@ -179,11 +163,6 @@ class VideoSerializer(serializers.ModelSerializer):
for video_data in encoded_videos
)
Subtitle.objects.bulk_create(
Subtitle(video=video, **subtitle_data)
for subtitle_data in subtitles
)
# The CourseSerializer will already have converted the course data
# to CourseVideo models, so we can just set the video and save.
# Also create VideoImage objects if an image filename is present
......@@ -211,13 +190,6 @@ class VideoSerializer(serializers.ModelSerializer):
for video_data in validated_data.get("encoded_videos", [])
)
# Set subtitles
instance.subtitles.all().delete()
Subtitle.objects.bulk_create(
Subtitle(video=instance, **subtitle_data)
for subtitle_data in validated_data.get("subtitles", [])
)
# Set courses
# NOTE: for backwards compatibility with the DRF v2 behavior,
# we do NOT delete existing course videos during the update.
......@@ -229,3 +201,30 @@ class VideoSerializer(serializers.ModelSerializer):
VideoImage.create_or_update(course_video, image_name)
return instance
class TranscriptPreferenceSerializer(serializers.ModelSerializer):
"""
Serializer for TranscriptPreference
"""
class Meta: # pylint: disable=C1001, C0111
model = TranscriptPreference
fields = (
'course_id',
'provider',
'cielo24_fidelity',
'cielo24_turnaround',
'three_play_turnaround',
'preferred_languages',
'video_source_language',
'modified',
)
preferred_languages = serializers.SerializerMethodField()
def get_preferred_languages(self, transcript_preference):
"""
Returns python list for preferred_languages model field.
"""
return transcript_preference.preferred_languages
......@@ -190,3 +190,13 @@ VIDEO_IMAGE_SETTINGS = dict(
VIDEO_IMAGE_MIN_BYTES=100,
DIRECTORY_PREFIX='video-images/',
)
VIDEO_TRANSCRIPTS_SETTINGS = dict(
# Backend storage
# STORAGE_CLASS='storages.backends.s3boto.S3BotoStorage',
# STORAGE_KWARGS=dict(bucket='video-transcripts-bucket'),
# If you are changing prefix value then update the .gitignore accordingly
# so that transcripts created during tests due to upload should be ignored
VIDEO_TRANSCRIPTS_MAX_BYTES=3145728, # 3 MB
DIRECTORY_PREFIX='video-transcripts/',
)
......@@ -3,6 +3,14 @@
"""
Constants used for tests.
"""
from edxval.models import (
TranscriptFormat,
TranscriptProviderType,
Cielo24Fidelity,
Cielo24Turnaround,
ThreePlayTurnaround
)
EDX_VIDEO_ID = "itchyjacket"
"""
Generic Profiles for manually creating profile objects
......@@ -388,3 +396,44 @@ VIDEO_DICT_UPDATE_ANIMAL = dict(
encoded_videos=[],
subtitles=[]
)
VIDEO_TRANSCRIPT_CIELO24 = dict(
video_id='super-soaker',
language_code='en',
transcript='wow.srt',
provider=TranscriptProviderType.CIELO24,
file_format=TranscriptFormat.SRT,
)
VIDEO_TRANSCRIPT_3PLAY = dict(
video_id='super-soaker',
language_code='de',
transcript='wow.sjson',
provider=TranscriptProviderType.THREE_PLAY_MEDIA,
file_format=TranscriptFormat.SJSON,
)
VIDEO_TRANSCRIPT_CUSTOM = dict(
video_id='external_video_id',
language_code='de',
transcript='wow.srt',
provider=TranscriptProviderType.CUSTOM,
file_format=TranscriptFormat.SRT,
)
TRANSCRIPT_PREFERENCES_CIELO24 = dict(
course_id='edX/DemoX/Demo_Course',
provider=TranscriptProviderType.CIELO24,
cielo24_fidelity=Cielo24Fidelity.PROFESSIONAL,
cielo24_turnaround=Cielo24Turnaround.PRIORITY,
preferred_languages=['ar'],
video_source_language='en',
)
TRANSCRIPT_PREFERENCES_3PLAY = dict(
course_id='edX/DemoX/Demo_Course',
provider=TranscriptProviderType.THREE_PLAY_MEDIA,
three_play_turnaround=ThreePlayTurnaround.SAME_DAY_SERVICE,
preferred_languages=['ar', 'en'],
video_source_language='en',
)
1
00:00:07,180 --> 00:00:08,460
This is Arrow line 1.
2
00:00:08,460 --> 00:00:10,510
This is Arrow line 2.
3
00:00:10,510 --> 00:00:13,560
This is Arrow line 3.
4
00:00:13,560 --> 00:00:14,360
This is Arrow line 4.
5
00:00:14,370 --> 00:00:16,530
This is Arrow line 5.
6
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.
\ No newline at end of file
1
00:00:07,180 --> 00:00:08,460
This is Flash line 1.
2
00:00:08,460 --> 00:00:10,510
This is Flash line 2.
3
00:00:10,510 --> 00:00:13,560
This is Flash line 3.
4
00:00:13,560 --> 00:00:14,360
This is Flash line 4.
5
00:00:14,370 --> 00:00:16,530
This is Flash line 5.
6
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.
\ No newline at end of file
......@@ -5,27 +5,40 @@ Tests for the API for Video Abstraction Layer
import json
import mock
from mock import patch
from lxml import etree
from ddt import data, ddt, unpack
from django.core.exceptions import ValidationError
from django.core.files import File
from django.core.files.base import ContentFile
from django.core.files.images import ImageFile
from django.test import TestCase
from django.db import DatabaseError
from django.core.urlresolvers import reverse
from django.db import DatabaseError
from django.test import TestCase
from lxml import etree
from mock import patch
from rest_framework import status
from ddt import ddt, data, unpack
from django.conf import settings
from edxval.models import Profile, Video, EncodedVideo, CourseVideo, VideoImage, LIST_MAX_ITEMS
from edxval import api as api
from edxval.api import (
SortDirection,
ValCannotCreateError,
ValCannotUpdateError,
ValVideoNotFoundError,
VideoSortField,
)
from edxval.tests import constants, APIAuthTestCase
from edxval.api import (InvalidTranscriptFormat, InvalidTranscriptProvider,
SortDirection, ValCannotCreateError,
ValCannotUpdateError, ValVideoNotFoundError,
VideoSortField)
from edxval.models import (LIST_MAX_ITEMS, CourseVideo, EncodedVideo, Profile,
TranscriptFormat, TranscriptProviderType, Video,
VideoImage, VideoTranscript, TranscriptPreference)
from edxval.tests import APIAuthTestCase, constants
from edxval import utils
FILE_DATA = """
1
00:00:14,370 --> 00:00:16,530
I am overwatch.
2
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.
"""
class SortedVideoTestMixin(object):
......@@ -759,7 +772,7 @@ class GetVideoInfoTestWithHttpCalls(APIAuthTestCase):
"""
Tests number of queries for a Video/EncodedVideo(1) pair
"""
with self.assertNumQueries(6):
with self.assertNumQueries(5):
api.get_video_info(constants.COMPLETE_SET_FISH.get("edx_video_id"))
def test_get_info_queries_for_one_encoded_video(self):
......@@ -771,7 +784,7 @@ class GetVideoInfoTestWithHttpCalls(APIAuthTestCase):
url, constants.COMPLETE_SET_STAR, format='json'
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
with self.assertNumQueries(5):
with self.assertNumQueries(4):
api.get_video_info(constants.COMPLETE_SET_STAR.get("edx_video_id"))
def test_get_info_queries_for_only_video(self):
......@@ -783,7 +796,7 @@ class GetVideoInfoTestWithHttpCalls(APIAuthTestCase):
url, constants.VIDEO_DICT_ZEBRA, format='json'
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
with self.assertNumQueries(4):
with self.assertNumQueries(3):
api.get_video_info(constants.VIDEO_DICT_ZEBRA.get("edx_video_id"))
......@@ -892,6 +905,17 @@ class ExportTest(TestCase):
**constants.ENCODED_VIDEO_DICT_HLS
)
# create external video transcripts
VideoTranscript.objects.create(**constants.VIDEO_TRANSCRIPT_CUSTOM)
video_transcript = dict(constants.VIDEO_TRANSCRIPT_CUSTOM, language_code=u'ar')
VideoTranscript.objects.create(**video_transcript)
video_transcript = dict(constants.VIDEO_TRANSCRIPT_CUSTOM, video_id=u'external_video_id2', language_code=u'fr')
VideoTranscript.objects.create(**video_transcript)
# create internal video transcripts
VideoTranscript.objects.create(**constants.VIDEO_TRANSCRIPT_CIELO24)
VideoTranscript.objects.create(**constants.VIDEO_TRANSCRIPT_3PLAY)
def assert_xml_equal(self, left, right):
"""
Assert that the given XML fragments have the same attributes, text, and
......@@ -917,7 +941,7 @@ class ExportTest(TestCase):
<video_asset client_video_id="TWINKLE TWINKLE" duration="122.0" image=""/>
""")
self.assert_xml_equal(
api.export_to_xml(constants.VIDEO_DICT_STAR["edx_video_id"]),
api.export_to_xml([constants.VIDEO_DICT_STAR["edx_video_id"]]),
expected
)
......@@ -932,17 +956,73 @@ class ExportTest(TestCase):
<encoded_video url="http://www.meowmix.com" file_size="11" bitrate="22" profile="mobile"/>
<encoded_video url="http://www.meowmagic.com" file_size="33" bitrate="44" profile="desktop"/>
<encoded_video url="https://www.tmnt.com/tmnt101.m3u8" file_size="100" bitrate="0" profile="hls"/>
<transcripts>
<transcript file_format="sjson" file_name="wow.sjson" language_code="de" provider="3PlayMedia" video_id="super-soaker"/>
<transcript file_format="srt" file_name="wow.srt" language_code="en" provider="Cielo24" video_id="super-soaker" />
</transcripts>
</video_asset>
""".format(image=image))
""".format(image=image, video_id=constants.VIDEO_DICT_FISH['edx_video_id']))
self.assert_xml_equal(
api.export_to_xml(constants.VIDEO_DICT_FISH['edx_video_id'], course_id),
api.export_to_xml([constants.VIDEO_DICT_FISH['edx_video_id']], course_id),
expected
)
def test_unknown_video(self):
with self.assertRaises(ValVideoNotFoundError):
api.export_to_xml("unknown_video")
api.export_to_xml(["unknown_video"])
def test_external_video_transcript(self):
"""
Verify that transcript export for multiple external videos is working as expected.
"""
video_ids = ['missing', 'external_video_id', 'missing2', 'external_video_id2']
expected = self.parse_xml("""
<video_asset>
<transcripts>
<transcript file_format="srt" file_name="wow.srt" language_code="ar" provider="Custom" video_id="external_video_id"/>
<transcript file_format="srt" file_name="wow.srt" language_code="de" provider="Custom" video_id="external_video_id"/>
<transcript file_format="srt" file_name="wow.srt" language_code="fr" provider="Custom" video_id="external_video_id2"/>
</transcripts>
</video_asset>
""".format(video_id=''))
self.assert_xml_equal(
api.export_to_xml(video_ids, external=True),
expected
)
def test_with_multiple_video_ids(self):
"""
Verify that transcript export with multiple video ids is working as expected.
"""
video_ids = ['super-soaker', 'external_video_id']
expected = self.parse_xml("""
<video_asset client_video_id="Shallow Swordfish" duration="122.0" image="">
<encoded_video bitrate="22" file_size="11" profile="mobile" url="http://www.meowmix.com" />
<encoded_video bitrate="44" file_size="33" profile="desktop" url="http://www.meowmagic.com" />
<encoded_video bitrate="0" file_size="100" profile="hls" url="https://www.tmnt.com/tmnt101.m3u8" />
<transcripts>
<transcript file_format="srt" file_name="wow.srt" language_code="ar" provider="Custom" video_id="external_video_id" />
<transcript file_format="srt" file_name="wow.srt" language_code="de" provider="Custom" video_id="external_video_id"/>
<transcript file_format="srt" file_name="wow.srt" language_code="en" provider="Cielo24" video_id="super-soaker" />
</transcripts>
</video_asset>
""")
self.assert_xml_equal(
api.export_to_xml(video_ids),
expected
)
def test_external_no_video_transcript(self):
"""
Verify that transcript export for external video working as expected when there is no transcript.
"""
self.assert_xml_equal(
api.export_to_xml(['external_video_no_transcript'], external=True),
self.parse_xml('<video_asset/>')
)
@ddt
......@@ -960,7 +1040,11 @@ class ImportTest(TestCase):
)
CourseVideo.objects.create(video=video, course_id='existing_course_id')
def make_import_xml(self, video_dict, encoded_video_dicts=None, image=None):
self.transcript_data1 = dict(constants.VIDEO_TRANSCRIPT_CIELO24, video_id='little-star')
self.transcript_data2 = dict(constants.VIDEO_TRANSCRIPT_3PLAY, video_id='little-star')
self.transcript_data3 = dict(self.transcript_data2, video_id='super-soaker')
def make_import_xml(self, video_dict, encoded_video_dicts=None, image=None, video_transcripts=None):
import_xml = etree.Element(
"video_asset",
attrib={
......@@ -981,6 +1065,22 @@ class ImportTest(TestCase):
for key, val in encoding_dict.items()
}
)
if video_transcripts:
transcripts_el = etree.SubElement(import_xml, 'transcripts')
for video_transcript in video_transcripts:
etree.SubElement(
transcripts_el,
'transcript',
{
'video_id': video_transcript['video_id'],
'file_name': video_transcript['transcript'],
'language_code': video_transcript['language_code'],
'file_format': video_transcript['file_format'],
'provider': video_transcript['provider'],
}
)
return import_xml
def assert_obj_matches_dict_for_keys(self, obj, dict_, keys):
......@@ -1007,18 +1107,44 @@ class ImportTest(TestCase):
api.import_from_xml(xml, edx_video_id, course_id)
self.assertFalse(Video.objects.filter(edx_video_id=edx_video_id).exists())
def assert_transcripts(self, video_id, expected_transcripts):
"""
Compare `received` with `expected` and assert if not equal
"""
# Verify total number of expected transcripts for a video
video_transcripts = VideoTranscript.objects.filter(video_id=video_id)
self.assertEqual(video_transcripts.count(), len(expected_transcripts))
# Verify data for each transcript
for expected_transcript in expected_transcripts:
language_code = expected_transcript['language_code']
expected_transcript['name'] = expected_transcript.pop('transcript')
# get the imported transcript and rename `url` key
received = api.TranscriptSerializer(
VideoTranscript.objects.get(video_id=video_id, language_code=language_code)
).data
received['name'] = received.pop('url')
self.assertDictEqual(received, expected_transcript)
def test_new_video_full(self):
new_course_id = "new_course_id"
new_course_id = 'new_course_id'
xml = self.make_import_xml(
video_dict=constants.VIDEO_DICT_STAR,
encoded_video_dicts=[constants.ENCODED_VIDEO_DICT_STAR, constants.ENCODED_VIDEO_DICT_FISH_HLS],
image=self.image_name
image=self.image_name,
video_transcripts=[self.transcript_data1, self.transcript_data2]
)
api.import_from_xml(xml, constants.VIDEO_DICT_STAR["edx_video_id"], new_course_id)
# there must not be any transcript before import
with self.assertRaises(VideoTranscript.DoesNotExist):
VideoTranscript.objects.get(video_id=constants.VIDEO_DICT_STAR['edx_video_id'])
video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_STAR["edx_video_id"])
api.import_from_xml(xml, constants.VIDEO_DICT_STAR['edx_video_id'], new_course_id)
video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_STAR['edx_video_id'])
self.assert_video_matches_dict(video, constants.VIDEO_DICT_STAR)
self.assert_encoded_video_matches_dict(
video.encoded_videos.get(profile__profile_name=constants.PROFILE_MOBILE),
......@@ -1031,6 +1157,11 @@ class ImportTest(TestCase):
course_video = video.courses.get(course_id=new_course_id)
self.assertTrue(course_video.video_image.image.name, self.image_name)
self.assert_transcripts(
constants.VIDEO_DICT_STAR['edx_video_id'],
[self.transcript_data1, self.transcript_data2]
)
def test_new_video_minimal(self):
edx_video_id = "test_edx_video_id"
......@@ -1048,11 +1179,13 @@ class ImportTest(TestCase):
@data(
# import into another course, where the video already exists, but is not associated with the course.
"new_course_id",
{'course_id': 'new_course_id', 'language_code': 'fr'},
# re-import case, where the video and course association already exists.
"existing_course_id"
{'course_id': 'existing_course_id', 'language_code': 'nl'}
)
def test_existing_video(self, course_id):
@unpack
def test_existing_video(self, course_id, language_code):
transcript_data = dict(self.transcript_data3, language_code=language_code)
xml = self.make_import_xml(
video_dict={
"client_video_id": "new_client_video_id",
......@@ -1067,8 +1200,14 @@ class ImportTest(TestCase):
"profile": "mobile",
},
],
image=self.image_name
image=self.image_name,
video_transcripts=[transcript_data]
)
# there must not be any transcript before import
with self.assertRaises(VideoTranscript.DoesNotExist):
VideoTranscript.objects.get(video_id=constants.VIDEO_DICT_FISH["edx_video_id"])
api.import_from_xml(xml, constants.VIDEO_DICT_FISH["edx_video_id"], course_id)
video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_FISH["edx_video_id"])
......@@ -1084,6 +1223,10 @@ class ImportTest(TestCase):
course_video = video.courses.get(course_id=course_id)
self.assertTrue(course_video.video_image.image.name, self.image_name)
self.assert_transcripts(
constants.VIDEO_DICT_FISH["edx_video_id"],
[transcript_data]
)
def test_existing_video_with_invalid_course_id(self):
xml = self.make_import_xml(video_dict=constants.VIDEO_DICT_FISH)
......@@ -1144,6 +1287,65 @@ class ImportTest(TestCase):
xml = self.make_import_xml(video_dict=constants.VIDEO_DICT_FISH)
self.assert_invalid_import(xml, "x" * 300)
def test_external_video_transcript(self):
"""
Verify that transcript import for external video working as expected.
"""
external_video_id = 'little-star'
xml = etree.fromstring("""
<video_asset>
<transcripts>
<transcript file_name="wow.srt" language_code="en" file_format="srt" provider='Cielo24' video_id="{video_id}"/>
<transcript file_name="wow.sjson" language_code="de" file_format="sjson" provider='3PlayMedia' video_id="{video_id}"/>
</transcripts>
</video_asset>
""".format(video_id=external_video_id))
with self.assertRaises(VideoTranscript.DoesNotExist):
VideoTranscript.objects.get(video_id=external_video_id)
api.import_from_xml(xml, '')
self.assert_transcripts(external_video_id, [self.transcript_data1, self.transcript_data2])
def test_external_no_video_transcript(self):
"""
Verify that transcript import for external video working as expected when there is no transcript.
"""
api.import_from_xml(etree.fromstring('<video_asset/>'), '')
self.assertEqual(
VideoTranscript.objects.count(),
0
)
@patch('edxval.api.logger')
def test_video_transcript_missing_attribute(self, mock_logger):
"""
Verify that video transcript import working as expected if transcript xml data is missing.
"""
video_id = 'little-star'
transcript_xml = '<transcript file_name="wow.srt" language_code="en" file_format="srt" provider="Cielo24"/>'
xml = etree.fromstring("""
<video_asset>
<transcripts>
{transcript_xml}
<transcript file_name="wow.sjson" language_code="de" file_format="sjson" provider='3PlayMedia' video_id="{video_id}"/>
</transcripts>
</video_asset>
""".format(transcript_xml=transcript_xml, video_id=video_id))
# there should be no video transcript before import
with self.assertRaises(VideoTranscript.DoesNotExist):
VideoTranscript.objects.get(video_id=video_id)
api.create_transcript_objects(xml)
mock_logger.warn.assert_called_with(
"VAL: Required attributes are missing from xml, xml=[%s]",
transcript_xml
)
self.assert_transcripts(video_id, [self.transcript_data2])
class GetCourseVideoRemoveTest(TestCase):
"""
......@@ -1460,3 +1662,386 @@ class CourseVideoImageTest(TestCase):
# Open the shared image file to verify it is not deleted
ImageFile(open(shared_image))
@ddt
class TranscriptTest(TestCase):
"""
Tests to check transcript related functions.
"""
def setUp(self):
"""
Creates video and video transcript objects.
"""
self.video1 = Video.objects.create(**constants.VIDEO_DICT_FISH)
self.edx_video_id1 = self.video1.edx_video_id
self.video2 = Video.objects.create(**constants.VIDEO_DICT_DIFFERENT_ID_FISH)
self.edx_video_id2 = self.video2.edx_video_id
self.transcript_data1 = dict(constants.VIDEO_TRANSCRIPT_CIELO24)
self.transcript_data1['name'] = self.transcript_data1.pop('transcript')
self.transcript_data2 = dict(constants.VIDEO_TRANSCRIPT_3PLAY)
self.transcript_data2['name'] = self.transcript_data2.pop('transcript')
self.transcript1 = VideoTranscript.objects.create(**constants.VIDEO_TRANSCRIPT_CIELO24)
self.transcript2 = VideoTranscript.objects.create(**constants.VIDEO_TRANSCRIPT_3PLAY)
self.video_id = '0987654321'
self.arrow_transcript_path = 'edxval/tests/data/The_Arrow.srt'
self.flash_transcript_path = 'edxval/tests/data/The_Flash.srt'
self.transcript_url = api.create_or_update_video_transcript(
self.video_id,
'ur',
'The_Arrow.srt',
TranscriptFormat.SRT,
provider=TranscriptProviderType.CUSTOM,
file_data=File(open(self.arrow_transcript_path)),
)
@data(
{'video_id': 'super-soaker', 'language_code': 'en', 'expected_availability': True},
{'video_id': 'super-soaker', 'language_code': None, 'expected_availability': True},
{'video_id': 'super123', 'language_code': 'en', 'expected_availability': False},
{'video_id': 'super-soaker', 'language_code': 'ro', 'expected_availability': False},
)
@unpack
def test_is_transcript_available(self, video_id, language_code, expected_availability):
"""
Verify that `is_transcript_available` api function works as expected.
"""
is_transcript_available = api.is_transcript_available(video_id, language_code)
self.assertEqual(is_transcript_available, expected_availability)
@data(
{'video_id': 'non-existant-video', 'language_code': 'en'},
{'video_id': '0987654321', 'language_code': 'en'},
)
@unpack
def test_get_video_transcript_not_found(self, video_id, language_code):
"""
Verify that `get_video_transcript` works as expected if transcript is not found.
"""
self.assertIsNone(api.get_video_transcript(video_id, language_code))
def test_get_video_transcript(self):
"""
Verify that `get_video_transcript` works as expected if transcript is found.
"""
transcript = api.get_video_transcript(u'0987654321', u'ur')
expectation = {
'video_id': u'0987654321',
'url': self.transcript_url,
'file_format': TranscriptFormat.SRT,
'provider': TranscriptProviderType.CUSTOM,
'language_code': u'ur'
}
self.assertDictEqual(transcript, expectation)
@patch('edxval.api.logger')
def test_get_video_transcript_data_exception(self, mock_logger):
"""
Verify that `get_video_transcript_data` logs and raises an exception.
"""
with self.assertRaises(IOError):
api.get_video_transcript_data(video_ids=['super-soaker'], language_code=u'en')
mock_logger.exception.assert_called_with(
'[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
'super-soaker',
'en',
)
@data(
{'video_ids': ['non-existant-video', 'another-non-existant-id'], 'language_code': 'en', 'result': None},
{'video_ids': ['non-existant-video', '0987654321'], 'language_code': 'en', 'result': None},
)
@unpack
def test_get_video_transcript_data_not_found(self, video_ids, language_code, result):
"""
Verify that `get_video_transcript_data` api function works as expected.
"""
transcript = api.get_video_transcript_data(video_ids, language_code)
self.assertEqual(transcript, result)
def test_get_video_transcript_data(self):
"""
Verify that `get_video_transcript_data` api function works as expected.
"""
expected_transcript = {
'file_name': self.transcript_url,
'content': File(open(self.arrow_transcript_path)).read()
}
transcript = api.get_video_transcript_data(
video_ids=['super-soaker', '0987654321'],
language_code=u'ur'
)
self.assertDictEqual(transcript, expected_transcript)
@data(
{'video_id': 'super-soaker', 'result': True},
{'video_id': 'super-soaker1', 'result': False},
)
@unpack
def test_get_video_transcripts(self, video_id, result):
"""
Verify that `get_video_transcripts` api function works as expected.
"""
transcripts = api.get_video_transcripts(video_id)
if result:
self.assertEqual(len(transcripts), 2)
for transcript, transcript_data in zip(transcripts, [self.transcript_data2, self.transcript_data1]):
transcript_data['url'] = transcript_data.pop('name')
self.assertEqual(transcript, transcript_data)
else:
self.assertEqual(transcripts, [])
def test_create_video_transcript(self):
"""
Verify that `create_or_update_video_transcript` api function creates transcript if there is no already.
"""
transcript_data = dict(self.transcript_data1)
transcript_data['language_code'] = 'ur'
with self.assertRaises(VideoTranscript.DoesNotExist):
VideoTranscript.objects.get(
video_id=transcript_data['video_id'],
language_code=transcript_data['language_code']
)
transcript_url = api.create_or_update_video_transcript(
video_id=transcript_data['video_id'],
language_code=transcript_data['language_code'],
file_name=transcript_data['name'],
file_format=transcript_data['file_format'],
provider=transcript_data['provider'],
)
self.assertEqual(transcript_url, transcript_data['name'])
expected_transcript = api.get_video_transcript(
video_id=transcript_data['video_id'],
language_code=transcript_data['language_code']
)
transcript_data['url'] = transcript_data.pop('name')
self.assertEqual(transcript_data, expected_transcript)
@data(
{'language_code': 'ur', 'has_url': True},
{'language_code': 'xyz', 'has_url': False},
)
@unpack
def test_get_video_transcript_url(self, language_code, has_url):
"""
Verify that `get_video_transcript_url` api function works as expected.
"""
transcript_url = api.get_video_transcript_url(self.video_id, language_code)
if has_url:
self.assertEqual(self.transcript_url, transcript_url)
else:
self.assertIsNone(transcript_url)
@data(
{
'file_data': None,
'file_format': TranscriptFormat.SJSON,
'provider': TranscriptProviderType.CIELO24
},
{
'file_data': ContentFile(FILE_DATA),
'file_format': TranscriptFormat.SRT,
'provider': TranscriptProviderType.THREE_PLAY_MEDIA
},
)
@unpack
def test_create_or_update_video_transcript(self, file_data, file_format, provider):
"""
Verify that `create_or_update_video_transcript` api function updates existing transcript as expected.
"""
video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
self.assertIsNotNone(video_transcript)
file_name = 'overwatch.{}'.format(file_format)
transcript_url = api.create_or_update_video_transcript(
self.video_id, 'ur', file_name, file_format, provider, file_data
)
video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
self.assertEqual(transcript_url, video_transcript.url())
self.assertEqual(video_transcript.file_format, file_format)
self.assertEqual(video_transcript.provider, provider)
if file_data:
self.assertTrue(transcript_url.startswith(settings.VIDEO_TRANSCRIPTS_SETTINGS['DIRECTORY_PREFIX']))
self.assertEqual(video_transcript.transcript.name, transcript_url)
with open(video_transcript.transcript.name) as saved_transcript:
self.assertEqual(saved_transcript.read(), FILE_DATA)
else:
self.assertEqual(video_transcript.transcript.name, file_name)
@data(
{
'file_format': '123',
'provider': TranscriptProviderType.CIELO24,
'exception': InvalidTranscriptFormat,
'exception_message': '123 transcript format is not supported',
},
{
'file_format': TranscriptFormat.SRT,
'provider': 123,
'exception': InvalidTranscriptProvider,
'exception_message': '123 transcript provider is not supported',
},
)
@unpack
def test_create_or_update_video_exceptions(self, file_format, provider, exception, exception_message):
"""
Verify that `create_or_update_video_transcript` api function raise exceptions on invalid values.
"""
with self.assertRaises(exception) as transcript_exception:
api.create_or_update_video_transcript(self.video_id, 'ur', 'overwatch.srt', file_format, provider)
self.assertEqual(transcript_exception.exception.message, exception_message)
def test_video_transcript_deletion(self):
"""
Test video transcript deletion works as expected.
"""
# get an existing video transcript
video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
existing_transcript_url = video_transcript.transcript.name
# This will replace the transcript for an existing video and delete the existing transcript
new_transcript_url = api.create_or_update_video_transcript(
self.video_id,
'ur',
'overwatch.srt',
TranscriptFormat.SRT,
TranscriptProviderType.CIELO24,
ContentFile(FILE_DATA)
)
# Verify that new transcript is set to video
video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
self.assertEqual(video_transcript.transcript.name, new_transcript_url)
# verify that new data is written correctly
with open(video_transcript.transcript.name) as saved_transcript:
self.assertEqual(saved_transcript.read(), FILE_DATA)
# Verify that an exception is raised if we try to open a deleted transcript file
with self.assertRaises(IOError) as file_open_exception:
File(open(existing_transcript_url))
self.assertEqual(file_open_exception.exception.strerror, u'No such file or directory')
def test_get_available_transcript_languages(self):
"""
Verify that `get_available_transcript_languages` works as expected.
"""
dupe_lang_video_id = 'duplicate_lang_video'
VideoTranscript.objects.create(**dict(constants.VIDEO_TRANSCRIPT_CIELO24, video_id=dupe_lang_video_id))
# `super-soaker` has got 'en' and 'de' transcripts
# `self.video_id` has got 'ur' transcript
# `duplicate_lang_video` has got 'en' transcript
# `non_existent_video_id` that does not have transcript
video_ids = ['super-soaker', self.video_id, dupe_lang_video_id, 'non_existent_video_id']
transcript_languages = api.get_available_transcript_languages(video_ids=video_ids)
self.assertItemsEqual(transcript_languages, ['de', 'en', 'ur'])
@ddt
class TranscriptPreferencesTest(TestCase):
"""
TranscriptPreferences API Tests
"""
def setUp(self):
"""
Tests setup
"""
self.course_id = 'edX/DemoX/Demo_Course'
self.transcript_preferences = TranscriptPreference.objects.create(
**constants.TRANSCRIPT_PREFERENCES_CIELO24
)
self.prefs = dict(constants.TRANSCRIPT_PREFERENCES_CIELO24)
self.prefs.update(constants.TRANSCRIPT_PREFERENCES_3PLAY)
def assert_prefs(self, received, expected):
"""
Compare `received` with `expected` and assert if not equal
"""
# no need to compare modified datetime
del received['modified']
self.assertEqual(received, expected)
def test_get_3rd_party_transcription_plans(self):
"""
Verify that `get_3rd_party_transcription_plans` api function works as expected
"""
self.assertEqual(
api.get_3rd_party_transcription_plans(),
utils.THIRD_PARTY_TRANSCRIPTION_PLANS
)
def test_get_transcript_preferences(self):
"""
Verify that `get_transcript_preferences` api function works as expected
"""
cielo24_prefs = dict(constants.TRANSCRIPT_PREFERENCES_CIELO24)
cielo24_prefs['three_play_turnaround'] = None
transcript_preferences = api.get_transcript_preferences(self.course_id)
self.assert_prefs(transcript_preferences, cielo24_prefs)
def test_remove_transcript_preferences(self):
"""
Verify that `remove_transcript_preferences` api method works as expected.
"""
# Verify that transcript preferences exist.
transcript_preferences = api.get_transcript_preferences(self.course_id)
self.assertIsNotNone(transcript_preferences)
# Remove course wide transcript preferences.
api.remove_transcript_preferences(self.course_id)
# Verify now transcript preferences no longer exist.
transcript_preferences = api.get_transcript_preferences(self.course_id)
self.assertIsNone(transcript_preferences)
def test_remove_transcript_preferences_not_found(self):
"""
Verify that `remove_transcript_preferences` api method works as expected when no record is found.
"""
course_id = 'dummy-course-id'
# Verify that transcript preferences do not exist.
transcript_preferences = api.get_transcript_preferences(course_id)
self.assertIsNone(transcript_preferences)
# Verify that calling `remove_transcript_preferences` does not break the code.
api.remove_transcript_preferences(course_id)
def test_update_transcript_preferences(self):
"""
Verify that `create_or_update_transcript_preferences` api function updates as expected
"""
transcript_preferences = api.create_or_update_transcript_preferences(**constants.TRANSCRIPT_PREFERENCES_3PLAY)
self.assert_prefs(transcript_preferences, self.prefs)
def test_create_transcript_preferences(self):
"""
Verify that `create_or_update_transcript_preferences` api function creates as expected
"""
self.prefs['course_id'] = 'edX/DemoX/Astonomy'
# Verify that no preference is present for course id `edX/DemoX/Astonomy`
self.assertIsNone(api.get_transcript_preferences(self.prefs['course_id']))
# create new preference
transcript_preferences = api.create_or_update_transcript_preferences(**self.prefs)
self.assert_prefs(transcript_preferences, self.prefs)
# Verify that there should be 2 preferences exists
self.assertEqual(TranscriptPreference.objects.count(), 2)
......@@ -3,13 +3,16 @@
Tests for Video Abstraction Layer views
"""
import json
from ddt import ddt, data, unpack
import unittest
from ddt import data, ddt, unpack
from django.core.urlresolvers import reverse
from rest_framework import status
from edxval.tests import constants, APIAuthTestCase
from edxval.models import Profile, Video, CourseVideo
from edxval.models import (CourseVideo, Profile, TranscriptFormat,
TranscriptProviderType, Video, VideoTranscript)
from edxval.serializers import TranscriptSerializer
from edxval.tests import APIAuthTestCase, constants
class VideoDetail(APIAuthTestCase):
......@@ -206,6 +209,7 @@ class VideoDetail(APIAuthTestCase):
)
self.assertEqual(len(videos[0].encoded_videos.all()), 1)
@unittest.skip("Skipping for now. We may need this later when we create transcripts alongwith video")
def test_update_remove_subtitles(self):
# Create some subtitles
self._create_videos(constants.COMPLETE_SET_STAR)
......@@ -665,7 +669,7 @@ class VideoListTest(APIAuthTestCase):
Tests number of queries for a Video with no Encoded Videos
"""
url = reverse('video-list')
with self.assertNumQueries(9):
with self.assertNumQueries(8):
self.client.post(url, constants.VIDEO_DICT_ZEBRA, format='json')
def test_queries_for_two_encoded_video(self):
......@@ -673,7 +677,7 @@ class VideoListTest(APIAuthTestCase):
Tests number of queries for a Video/EncodedVideo(2) pair
"""
url = reverse('video-list')
with self.assertNumQueries(15):
with self.assertNumQueries(13):
self.client.post(url, constants.COMPLETE_SET_FISH, format='json')
def test_queries_for_single_encoded_videos(self):
......@@ -681,7 +685,7 @@ class VideoListTest(APIAuthTestCase):
Tests number of queries for a Video/EncodedVideo(1) pair
"""
url = reverse('video-list')
with self.assertNumQueries(13):
with self.assertNumQueries(11):
self.client.post(url, constants.COMPLETE_SET_STAR, format='json')
......@@ -718,18 +722,19 @@ class VideoDetailTest(APIAuthTestCase):
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
response = self.client.post(url, constants.VIDEO_DICT_ZEBRA, format='json')
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
with self.assertNumQueries(9):
with self.assertNumQueries(7):
self.client.get("/edxval/videos/").data
response = self.client.post(url, constants.COMPLETE_SET_FISH, format='json')
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
with self.assertNumQueries(12):
with self.assertNumQueries(9):
self.client.get("/edxval/videos/").data
response = self.client.post(url, constants.COMPLETE_SET_STAR, format='json')
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
with self.assertNumQueries(14):
with self.assertNumQueries(10):
self.client.get("/edxval/videos/").data
@unittest.skip("Skipping for now. We may need these later when we create transcripts alongwith video")
class SubtitleDetailTest(APIAuthTestCase):
"""
Tests for subtitle API
......@@ -811,6 +816,7 @@ class SubtitleDetailTest(APIAuthTestCase):
)
self.assertEqual(self.client.get(video_subtitles['content_url']).content, '{"start": "00:00:00"}')
@ddt
class VideoImagesViewTest(APIAuthTestCase):
"""
......@@ -897,3 +903,135 @@ class VideoImagesViewTest(APIAuthTestCase):
response.data['message'],
message
)
@ddt
class VideoTranscriptViewTest(APIAuthTestCase):
"""
Tests VideoTranscriptView.
"""
def setUp(self):
"""
Tests setup.
"""
self.url = reverse('create-video-transcript')
self.video = Video.objects.create(**constants.VIDEO_DICT_FISH)
self.transcript_data = constants.VIDEO_TRANSCRIPT_CIELO24
super(VideoTranscriptViewTest, self).setUp()
def test_create_transcript(self):
"""
Tests POSTing transcript successfully.
"""
post_transcript_data = dict(self.transcript_data)
post_transcript_data['name'] = post_transcript_data.pop('transcript')
response = self.client.post(self.url, post_transcript_data, format='json')
self.assertEqual(response.status_code, status.HTTP_200_OK)
serialized_data = TranscriptSerializer(VideoTranscript.objects.first()).data
post_transcript_data['url'] = post_transcript_data.pop('name')
self.assertEqual(serialized_data, post_transcript_data)
def test_update_existing_transcript(self):
"""
Tests updating existing transcript works as expected.
"""
VideoTranscript.objects.create(**self.transcript_data)
post_transcript_data = dict(self.transcript_data)
post_transcript_data['name'] = post_transcript_data.pop('transcript')
response = self.client.post(self.url, post_transcript_data, format='json')
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(
response.data['message'],
u'Can not override existing transcript for video "{video_id}" and language code "{language}".'.format(
video_id=self.video.edx_video_id, language=post_transcript_data['language_code'])
)
@data(
{
'post_data': {},
'message': u'video_id and name and language_code and provider and file_format must be specified.'
},
{
'post_data': {
'video_id': 'super-soaker',
'name': 'abc.xyz',
'language_code': 'en',
'provider': TranscriptProviderType.CIELO24,
'file_format': 'xyz'
},
'message': u'"xyz" transcript file type is not supported. Supported formats are "{}"'.format(
sorted(dict(TranscriptFormat.CHOICES).keys())
)
},
{
'post_data': {
'video_id': 'super-soaker',
'name': 'abc.srt',
'language_code': 'en',
'provider': 'xyz',
'file_format': TranscriptFormat.SRT
},
'message': u'"xyz" provider is not supported. Supported transcription providers are "{}"'.format(
sorted(dict(TranscriptProviderType.CHOICES).keys())
)
},
)
@unpack
def test_error_responses(self, post_data, message):
"""
Tests error responses occurred during POSTing.
"""
response = self.client.post(self.url, post_data, format='json')
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.data['message'], message)
@ddt
class VideoStatusViewTest(APIAuthTestCase):
"""
VideoStatusView Tests.
"""
def setUp(self):
"""
Tests setup.
"""
self.url = reverse('video-status-update')
self.video = Video.objects.create(**constants.VIDEO_DICT_FISH)
super(VideoStatusViewTest, self).setUp()
@data(
{
'patch_data': {},
'message': u'"edx_video_id and status" params must be specified.',
'status_code': status.HTTP_400_BAD_REQUEST,
},
{
'patch_data': {'edx_video_id': 'super-soaker', 'status': 'fake'},
'message': u'"fake" is not a valid Video status.',
'status_code': status.HTTP_400_BAD_REQUEST,
},
{
'patch_data': {'edx_video_id': 'fake', 'status': 'transcript_ready'},
'message': u'Video is not found for specified edx_video_id: fake',
'status_code': status.HTTP_400_BAD_REQUEST,
},
{
'patch_data': {'edx_video_id': 'super-soaker', 'status': 'transcript_ready'},
'message': None,
'status_code': status.HTTP_200_OK,
},
)
@unpack
def test_transcript_status(self, patch_data, message, status_code):
"""
Tests PATCHing video transcript status.
"""
response = self.client.patch(self.url, patch_data, format='json')
self.assertEqual(response.status_code, status_code)
self.assertEqual(response.data.get('message'), message)
......@@ -9,22 +9,22 @@ from edxval import views
urlpatterns = [
url(r'^videos/$',
views.VideoList.as_view(),
name="video-list"
name='video-list'
),
url(
r'^videos/(?P<edx_video_id>[-\w]+)$',
views.VideoDetail.as_view(),
name="video-detail"
name='video-detail'
),
url(
r'^videos/(?P<video__edx_video_id>[-\w]+)/(?P<language>[-_\w]+)$',
views.SubtitleDetail.as_view(),
name="subtitle-detail"
r'^videos/status/$',
views.VideoStatusView.as_view(),
name='video-status-update'
),
url(
r'^videos/(?P<edx_video_id>[-\w]+)/(?P<language>[-_\w]+)/subtitle$',
views.get_subtitle,
name="subtitle-content"
r'^videos/video-transcripts/create/$',
views.VideoTranscriptView.as_view(),
name='create-video-transcript'
),
url(
r'^videos/video-images/update/$',
......
......@@ -5,6 +5,120 @@ Util methods to be used in api and models.
from django.conf import settings
from django.core.files.storage import get_storage_class
# 3rd Party Transcription Plans
THIRD_PARTY_TRANSCRIPTION_PLANS = {
'Cielo24': {
'display_name': 'Cielo24',
'turnaround': {
'PRIORITY': 'Priority (24 hours)',
'STANDARD': 'Standard (48 hours)'
},
'fidelity': {
'MECHANICAL': {
'display_name': 'Mechanical (75% accuracy)',
'languages': {
'nl': 'Dutch',
'en': 'English',
'fr': 'French',
'de': 'German',
'it': 'Italian',
'es': 'Spanish',
}
},
'PREMIUM': {
'display_name': 'Premium (95% accuracy)',
'languages': {
'en': 'English',
}
},
'PROFESSIONAL': {
'display_name': 'Professional (99% accuracy)',
'languages': {
'ar': 'Arabic',
'zh-tw': 'Chinese - Mandarin (Traditional)',
'zh-cmn': 'Chinese - Mandarin (Simplified)',
'zh-yue': 'Chinese - Cantonese (Traditional)',
'nl': 'Dutch',
'en': 'English',
'fr': 'French',
'de': 'German',
'he': 'Hebrew',
'hi': 'Hindi',
'it': 'Italian',
'ja': 'Japanese',
'ko': 'Korean',
'pt': 'Portuguese',
'ru': 'Russian',
'es': 'Spanish',
'tr': 'Turkish',
}
},
}
},
'3PlayMedia': {
'display_name': '3Play Media',
'turnaround': {
'same_day_service': 'Same day',
'rush_service': '24 hours (rush)',
'expedited_service': '2 days (expedited)',
'default': '4 days (default)',
'extended_service':'10 days (extended)'
},
'languages': {
'en': 'English',
'fr': 'French',
'de': 'German',
'it': 'Italian',
'nl': 'Dutch',
'es': 'Spanish',
'el': 'Greek',
'pt': 'Portuguese',
'zh': 'Chinese',
'ar': 'Arabic',
'he': 'Hebrew',
'ru': 'Russian',
'ja': 'Japanese',
'sv': 'Swedish',
'cs': 'Czech',
'da': 'Danish',
'fi': 'Finnish',
'id': 'Indonesian',
'ko': 'Korean',
'no': 'Norwegian',
'pl': 'Polish',
'th': 'Thai',
'tr': 'Turkish',
'vi': 'Vietnamese',
'ro': 'Romanian',
'hu': 'Hungarian',
'ms': 'Malay',
'bg': 'Bulgarian',
'tl': 'Tagalog',
'sr': 'Serbian',
'sk': 'Slovak',
'uk': 'Ukrainian',
},
# Valid translations -- a mapping of source languages to the
# translatable target languages.
'translations': {
'es': [
'en'
],
'en': [
'el', 'en', 'zh', 'vi',
'it', 'ar', 'cs', 'id',
'es', 'ru', 'nl', 'pt',
'no', 'tr', 'tl', 'th',
'ro', 'pl', 'fr', 'bg',
'uk', 'de', 'da', 'fi',
'hu', 'ja', 'he', 'sr',
'ko', 'sv', 'sk', 'ms'
],
}
}
}
def video_image_path(video_image_instance, filename): # pylint:disable=unused-argument
"""
......@@ -29,3 +143,28 @@ def get_video_image_storage():
# during edx-platform loading this method gets called but settings are not ready yet
# so in that case we will return default(FileSystemStorage) storage class instance
return get_storage_class()()
def video_transcript_path(video_transcript_instance, filename): # pylint:disable=unused-argument
"""
Returns video transcript path.
Arguments:
video_transcript_instance (VideoTranscript): This is passed automatically by models.CustomizableFileField
filename (str): name of image file
"""
return u'{}{}'.format(settings.VIDEO_TRANSCRIPTS_SETTINGS.get('DIRECTORY_PREFIX', ''), filename)
def get_video_transcript_storage():
"""
Return the configured django storage backend for video transcripts.
"""
if hasattr(settings, 'VIDEO_TRANSCRIPTS_SETTINGS'):
return get_storage_class(
settings.VIDEO_TRANSCRIPTS_SETTINGS.get('STORAGE_CLASS'),
)(**settings.VIDEO_TRANSCRIPTS_SETTINGS.get('STORAGE_KWARGS', {}))
else:
# during edx-platform loading this method gets called but settings are not ready yet
# so in that case we will return default(FileSystemStorage) storage class instance
return get_storage_class()()
"""
Views file for django app edxval.
"""
from rest_framework.views import APIView
from rest_framework import generics
from rest_framework.authentication import SessionAuthentication
from rest_framework_oauth.authentication import OAuth2Authentication
from rest_framework.permissions import DjangoModelPermissions
from rest_framework.response import Response
from rest_framework import status
import logging
from django.core.exceptions import ValidationError
from django.http import HttpResponse
from django.shortcuts import get_object_or_404
from django.core.exceptions import ValidationError
from django.views.decorators.http import last_modified
from rest_framework import generics, status
from rest_framework.authentication import SessionAuthentication
from rest_framework.permissions import DjangoModelPermissions
from rest_framework.response import Response
from rest_framework.views import APIView
from rest_framework_oauth.authentication import OAuth2Authentication
from edxval.api import (create_or_update_video_transcript,
get_video_transcript, update_video_status)
from edxval.models import (CourseVideo, Profile, TranscriptFormat,
TranscriptProviderType, Video, VideoImage,
VideoTranscript)
from edxval.serializers import TranscriptSerializer, VideoSerializer
from edxval.models import Video, Profile, Subtitle, CourseVideo, VideoImage
from edxval.serializers import (
VideoSerializer,
SubtitleSerializer
)
LOGGER = logging.getLogger(__name__) # pylint: disable=C0103
VALID_VIDEO_STATUSES = [
'transcription_in_progress',
'transcript_ready',
]
class ReadRestrictedDjangoModelPermissions(DjangoModelPermissions):
......@@ -92,15 +101,116 @@ class VideoDetail(generics.RetrieveUpdateDestroyAPIView):
serializer_class = VideoSerializer
class SubtitleDetail(MultipleFieldLookupMixin, generics.RetrieveUpdateDestroyAPIView):
class VideoTranscriptView(APIView):
"""
Gets a subtitle instance given its id
A Transcription View, used by edx-video-pipeline to create video transcripts.
"""
authentication_classes = (OAuth2Authentication, SessionAuthentication)
# noinspection PyMethodMayBeStatic
def post(self, request):
"""
Creates a video transcript instance with the given information.
Arguments:
request: A WSGI request.
"""
attrs = ('video_id', 'name', 'language_code', 'provider', 'file_format')
missing = [attr for attr in attrs if attr not in request.data]
if missing:
LOGGER.warn(
'[VAL] Required transcript params are missing. %s', ' and '.join(missing)
)
return Response(
status=status.HTTP_400_BAD_REQUEST,
data=dict(message=u'{missing} must be specified.'.format(missing=' and '.join(missing)))
)
video_id = request.data['video_id']
language_code = request.data['language_code']
transcript_name = request.data['name']
provider = request.data['provider']
file_format = request.data['file_format']
supported_formats = sorted(dict(TranscriptFormat.CHOICES).keys())
if file_format not in supported_formats:
message = (
u'"{format}" transcript file type is not supported. Supported formats are "{supported_formats}"'
).format(format=file_format, supported_formats=supported_formats)
return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message})
supported_providers = sorted(dict(TranscriptProviderType.CHOICES).keys())
if provider not in supported_providers:
message = (
u'"{provider}" provider is not supported. Supported transcription providers are "{supported_providers}"'
).format(provider=provider, supported_providers=supported_providers)
return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message})
transcript = VideoTranscript.get_or_none(video_id, language_code)
if transcript is None:
create_or_update_video_transcript(
video_id,
language_code,
transcript_name,
file_format,
provider,
)
response = Response(status=status.HTTP_200_OK)
else:
message = (
u'Can not override existing transcript for video "{video_id}" and language code "{language}".'
).format(video_id=video_id, language=language_code)
response = Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message})
return response
class VideoStatusView(APIView):
"""
A Video View to update the status of a video.
Note:
Currently, the valid statuses are `transcription_in_progress` and `transcript_ready` because it
was intended to only be used for video transcriptions but if you found it helpful to your needs, you
can add more statuses so that you can use it for updating other video statuses too.
"""
authentication_classes = (OAuth2Authentication, SessionAuthentication)
permission_classes = (ReadRestrictedDjangoModelPermissions,)
lookup_fields = ("video__edx_video_id", "language")
queryset = Subtitle.objects.all()
serializer_class = SubtitleSerializer
def patch(self, request):
"""
Update the status of a video.
"""
attrs = ('edx_video_id', 'status')
missing = [attr for attr in attrs if attr not in request.data]
if missing:
return Response(
status=status.HTTP_400_BAD_REQUEST,
data={'message': u'"{missing}" params must be specified.'.format(missing=' and '.join(missing))}
)
edx_video_id = request.data['edx_video_id']
video_status = request.data['status']
if video_status not in VALID_VIDEO_STATUSES:
return Response(
status=status.HTTP_400_BAD_REQUEST,
data={'message': u'"{status}" is not a valid Video status.'.format(status=video_status)}
)
try:
video = Video.objects.get(edx_video_id=edx_video_id)
video.status = video_status
video.save()
response_status = status.HTTP_200_OK
response_payload = {}
except Video.DoesNotExist:
response_status = status.HTTP_400_BAD_REQUEST
response_payload = {
'message': u'Video is not found for specified edx_video_id: {edx_video_id}'.format(
edx_video_id=edx_video_id
)
}
return Response(status=response_status, data=response_payload)
class VideoImagesView(APIView):
......@@ -148,19 +258,3 @@ class VideoImagesView(APIView):
)
return Response()
def _last_modified_subtitle(request, edx_video_id, language): # pylint: disable=W0613
"""
Returns the last modified subtitle
"""
return Subtitle.objects.get(video__edx_video_id=edx_video_id, language=language).modified
@last_modified(last_modified_func=_last_modified_subtitle)
def get_subtitle(request, edx_video_id, language): # pylint: disable=W0613
"""
Return content of subtitle by id
"""
sub = Subtitle.objects.get(video__edx_video_id=edx_video_id, language=language)
response = HttpResponse(sub.content, content_type=sub.content_type)
return response
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment