Commit ed6a1588 by Qubad786 Committed by muzaffaryousaf

Add transcript model and VEDA endpoint to create video transcripts.

parent cb392214
...@@ -3,7 +3,7 @@ Admin file for django app edxval. ...@@ -3,7 +3,7 @@ Admin file for django app edxval.
""" """
from django.contrib import admin from django.contrib import admin
from .models import Video, Profile, EncodedVideo, Subtitle, CourseVideo, VideoImage from .models import Video, Profile, EncodedVideo, Transcript, CourseVideo, VideoImage
class ProfileAdmin(admin.ModelAdmin): # pylint: disable=C0111 class ProfileAdmin(admin.ModelAdmin): # pylint: disable=C0111
...@@ -48,6 +48,6 @@ class CourseVideoAdmin(admin.ModelAdmin): ...@@ -48,6 +48,6 @@ class CourseVideoAdmin(admin.ModelAdmin):
admin.site.register(Profile, ProfileAdmin) admin.site.register(Profile, ProfileAdmin)
admin.site.register(Video, VideoAdmin) admin.site.register(Video, VideoAdmin)
admin.site.register(Subtitle) admin.site.register(Transcript)
admin.site.register(VideoImage, VideoImageAdmin) admin.site.register(VideoImage, VideoImageAdmin)
admin.site.register(CourseVideo, CourseVideoAdmin) admin.site.register(CourseVideo, CourseVideoAdmin)
...@@ -9,16 +9,22 @@ from lxml.etree import Element, SubElement ...@@ -9,16 +9,22 @@ from lxml.etree import Element, SubElement
from enum import Enum from enum import Enum
from django.core.exceptions import ValidationError, ObjectDoesNotExist from django.core.exceptions import ValidationError, ObjectDoesNotExist
from django.core.files.base import ContentFile
from edxval.models import Video, EncodedVideo, CourseVideo, Profile, VideoImage from edxval.models import (
from edxval.serializers import VideoSerializer Video,
EncodedVideo,
CourseVideo,
Profile,
VideoImage,
Transcript,
TranscriptProviderType,
)
from edxval.serializers import VideoSerializer, TranscriptSerializer
from edxval.exceptions import ( # pylint: disable=unused-import from edxval.exceptions import ( # pylint: disable=unused-import
ValError,
ValInternalError, ValInternalError,
ValVideoNotFoundError, ValVideoNotFoundError,
ValCannotCreateError, ValCannotCreateError,
ValCannotUpdateError ValCannotUpdateError,
) )
logger = logging.getLogger(__name__) # pylint: disable=C0103 logger = logging.getLogger(__name__) # pylint: disable=C0103
...@@ -143,6 +149,87 @@ def update_video_status(edx_video_id, status): ...@@ -143,6 +149,87 @@ def update_video_status(edx_video_id, status):
video.save() video.save()
def get_video_transcript(video_id, lang_code):
"""
Get a video's transcript
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
lang_code: it will the language code of the requested transcript.
"""
try:
transcript = Transcript.objects.get(video_id=video_id, language=lang_code)
except Transcript.DoesNotExist:
transcript = None
return transcript
def get_video_transcripts(video_id):
"""
Get a video's transcripts
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
"""
transcripts_set = Transcript.objects.filter(video_id=video_id)
transcripts = []
if transcripts_set.exists():
transcripts = TranscriptSerializer(transcripts_set, many=True).data
return transcripts
def create_video_transcript(video_id, language, transcript_url, transcript_format, provider=TranscriptProviderType.CUSTOM):
"""
Creates a transcript record for a video.
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
language: language code of a video transcript
transcript_url: url of a video transcript
transcript_format: format of the transcript
provider: transcript provider
Raises:
IntegrityError: raises IntegrityError if there is an existing transcript with same video_id and lang_code.
"""
transcript = Transcript.objects.create(
video_id=video_id,
language=language,
transcript_url=transcript_url,
fmt=transcript_format,
provider=provider,
)
return TranscriptSerializer(transcript).data
def update_video_transcript(video_id, language, transcript_url, transcript_format, provider=TranscriptProviderType.CUSTOM):
"""
Update a transcript for a video
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
language: language code of a video transcript
transcript_url: url of a video transcript
transcript_format: format of the transcript
provider: transcript provider
Raises:
DoesNotExist: raises DoesNotExist if there is not any transcript for the given video_id and lang_code.
"""
transcript = Transcript.objects.get(video_id=video_id, language=language)
transcript.language = language
transcript.transcript_url = transcript_url
transcript.fmt = transcript_format
transcript.provider = provider
transcript.save()
return TranscriptSerializer(transcript).data
def get_course_video_image_url(course_id, edx_video_id): def get_course_video_image_url(course_id, edx_video_id):
""" """
Returns course video image url or None if no image found Returns course video image url or None if no image found
...@@ -246,11 +333,11 @@ def get_video_info(edx_video_id): ...@@ -246,11 +333,11 @@ def get_video_info(edx_video_id):
url: url of the video url: url of the video
file_size: size of the video in bytes file_size: size of the video in bytes
profile: ID of the profile profile: ID of the profile
subtitles: a list of Subtitle dicts transcripts: a list of Subtitle dicts
fmt: file format (SRT or SJSON) fmt: file format (SRT or SJSON)
language: language code language: language code
content_url: url of file provider: transcript provider 3Play/Cielo24/Custom
url: api url to subtitle transcript_url: URL of the transcript file
} }
Raises: Raises:
...@@ -276,7 +363,12 @@ def get_video_info(edx_video_id): ...@@ -276,7 +363,12 @@ def get_video_info(edx_video_id):
] ]
} }
""" """
return VideoSerializer(_get_video(edx_video_id)).data serialized_video = VideoSerializer(_get_video(edx_video_id)).data
serialized_video.update({
'transcripts': get_video_transcripts(edx_video_id)
})
return serialized_video
def get_urls_for_profiles(edx_video_id, profiles): def get_urls_for_profiles(edx_video_id, profiles):
......
...@@ -335,45 +335,59 @@ class VideoImage(TimeStampedModel): ...@@ -335,45 +335,59 @@ class VideoImage(TimeStampedModel):
return storage.url(self.image.name) return storage.url(self.image.name)
SUBTITLE_FORMATS = ( class TranscriptProviderType(object):
('srt', 'SubRip'), CUSTOM = 'Custom'
('sjson', 'SRT JSON') THREE_PLAY_MEDIA = '3PlayMedia'
) CIELO24 = 'Cielo24'
CHOICES = (
(CUSTOM, CUSTOM),
(THREE_PLAY_MEDIA, THREE_PLAY_MEDIA),
(CIELO24, CIELO24),
)
class TranscriptFormat(object):
SRT = 'srt'
SJSON = 'sjson'
CHOICES = (
(SRT, 'SubRip'),
(SJSON, 'SRT JSON')
)
class Subtitle(models.Model): class Transcript(TimeStampedModel):
""" """
Subtitle for video Transcript for a video
Attributes: Attributes:
video: the video that the subtitles are for video_id: this is transcript's video.
fmt: the format of the subttitles file language: language of this transcript.
provider: source of this transcript (Custom/3PlayMedia/Cielo24 uploaded)
fmt: format of this transcript srt/sjson
status: status of the transcript (pending/in progress/ready)
transcript_url: relative path to transcript from an S3 bucket
""" """
created = models.DateTimeField(auto_now_add=True) # It can be an edx_video_id or an external video id (e.g. in case of external URLs - YT/MP4/WEBM etc.)
modified = models.DateTimeField(auto_now=True) video_id = models.CharField(max_length=255)
video = models.ForeignKey(Video, related_name="subtitles") transcript_url = models.TextField(null=True, blank=True)
fmt = models.CharField(max_length=20, db_index=True, choices=SUBTITLE_FORMATS)
language = models.CharField(max_length=8, db_index=True) language = models.CharField(max_length=8, db_index=True)
content = models.TextField(default='') provider = models.CharField(
max_length=30,
def __str__(self): choices=TranscriptProviderType.CHOICES,
return '%s Subtitle for %s' % (self.language, self.video) default=TranscriptProviderType.CUSTOM,
)
fmt = models.CharField(max_length=20, db_index=True, choices=TranscriptFormat.CHOICES)
def get_absolute_url(self): class Meta:
""" """
Returns the full url link to the edx_video_id course_id is listed first in this composite index
""" """
return reverse('subtitle-content', args=[self.video.edx_video_id, self.language]) unique_together = ("video_id", "language")
@property def __str__(self):
def content_type(self): return '{lang} Transcript for {video}'.format(lang=self.language, video=self.video_id)
"""
Sjson is returned as application/json, otherwise text/plain
"""
if self.fmt == 'sjson':
return 'application/json'
else:
return 'text/plain'
@receiver(models.signals.post_save, sender=Video) @receiver(models.signals.post_save, sender=Video)
......
...@@ -7,7 +7,7 @@ EncodedVideoSerializer which uses the profile_name as it's profile field. ...@@ -7,7 +7,7 @@ EncodedVideoSerializer which uses the profile_name as it's profile field.
from rest_framework import serializers from rest_framework import serializers
from rest_framework.fields import IntegerField, DateTimeField from rest_framework.fields import IntegerField, DateTimeField
from edxval.models import Profile, Video, EncodedVideo, Subtitle, CourseVideo, VideoImage from edxval.models import Profile, Video, EncodedVideo, CourseVideo, VideoImage, Transcript
class EncodedVideoSerializer(serializers.ModelSerializer): class EncodedVideoSerializer(serializers.ModelSerializer):
...@@ -50,37 +50,14 @@ class EncodedVideoSerializer(serializers.ModelSerializer): ...@@ -50,37 +50,14 @@ class EncodedVideoSerializer(serializers.ModelSerializer):
return data.get('profile', None) return data.get('profile', None)
class SubtitleSerializer(serializers.ModelSerializer): class TranscriptSerializer(serializers.ModelSerializer):
""" """
Serializer for Subtitle objects Serializer for Transcript objects
""" """
content_url = serializers.CharField(source='get_absolute_url', read_only=True)
content = serializers.CharField(write_only=True)
def validate(self, data):
"""
Validate that the subtitle is in the correct format
"""
value = data.get("content")
if data.get("fmt") == "sjson":
import json
try:
loaded = json.loads(value)
except ValueError:
raise serializers.ValidationError("Not in JSON format")
else:
data["content"] = json.dumps(loaded)
return data
class Meta: # pylint: disable=C1001, C0111 class Meta: # pylint: disable=C1001, C0111
model = Subtitle model = Transcript
lookup_field = "id" lookup_field = 'video_id'
fields = ( fields = ('video_id', 'transcript_url', 'language', 'provider', 'fmt')
"fmt",
"language",
"content_url",
"content",
)
class CourseSerializer(serializers.RelatedField): class CourseSerializer(serializers.RelatedField):
...@@ -118,7 +95,6 @@ class VideoSerializer(serializers.ModelSerializer): ...@@ -118,7 +95,6 @@ class VideoSerializer(serializers.ModelSerializer):
encoded_videos takes a list of dicts EncodedVideo data. encoded_videos takes a list of dicts EncodedVideo data.
""" """
encoded_videos = EncodedVideoSerializer(many=True) encoded_videos = EncodedVideoSerializer(many=True)
subtitles = SubtitleSerializer(many=True, required=False)
courses = CourseSerializer( courses = CourseSerializer(
many=True, many=True,
read_only=False, read_only=False,
...@@ -179,11 +155,6 @@ class VideoSerializer(serializers.ModelSerializer): ...@@ -179,11 +155,6 @@ class VideoSerializer(serializers.ModelSerializer):
for video_data in encoded_videos for video_data in encoded_videos
) )
Subtitle.objects.bulk_create(
Subtitle(video=video, **subtitle_data)
for subtitle_data in subtitles
)
# The CourseSerializer will already have converted the course data # The CourseSerializer will already have converted the course data
# to CourseVideo models, so we can just set the video and save. # to CourseVideo models, so we can just set the video and save.
# Also create VideoImage objects if an image filename is present # Also create VideoImage objects if an image filename is present
...@@ -211,13 +182,6 @@ class VideoSerializer(serializers.ModelSerializer): ...@@ -211,13 +182,6 @@ class VideoSerializer(serializers.ModelSerializer):
for video_data in validated_data.get("encoded_videos", []) for video_data in validated_data.get("encoded_videos", [])
) )
# Set subtitles
instance.subtitles.all().delete()
Subtitle.objects.bulk_create(
Subtitle(video=instance, **subtitle_data)
for subtitle_data in validated_data.get("subtitles", [])
)
# Set courses # Set courses
# NOTE: for backwards compatibility with the DRF v2 behavior, # NOTE: for backwards compatibility with the DRF v2 behavior,
# we do NOT delete existing course videos during the update. # we do NOT delete existing course videos during the update.
......
...@@ -17,14 +17,9 @@ urlpatterns = [ ...@@ -17,14 +17,9 @@ urlpatterns = [
name="video-detail" name="video-detail"
), ),
url( url(
r'^videos/(?P<video__edx_video_id>[-\w]+)/(?P<language>[-_\w]+)$', r'^videos/video-transcripts/create/$',
views.SubtitleDetail.as_view(), views.VideoTranscriptView.as_view(),
name="subtitle-detail" name='create-video-transcript'
),
url(
r'^videos/(?P<edx_video_id>[-\w]+)/(?P<language>[-_\w]+)/subtitle$',
views.get_subtitle,
name="subtitle-content"
), ),
url( url(
r'^videos/video-images/update/$', r'^videos/video-images/update/$',
......
...@@ -13,10 +13,11 @@ from django.shortcuts import get_object_or_404 ...@@ -13,10 +13,11 @@ from django.shortcuts import get_object_or_404
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from django.views.decorators.http import last_modified from django.views.decorators.http import last_modified
from edxval.models import Video, Profile, Subtitle, CourseVideo, VideoImage from edxval.api import get_video_transcript, update_video_transcript, create_video_transcript
from edxval.models import Video, Profile, Transcript, CourseVideo, VideoImage, TranscriptFormat, TranscriptProviderType
from edxval.serializers import ( from edxval.serializers import (
VideoSerializer, VideoSerializer,
SubtitleSerializer TranscriptSerializer,
) )
...@@ -92,15 +93,73 @@ class VideoDetail(generics.RetrieveUpdateDestroyAPIView): ...@@ -92,15 +93,73 @@ class VideoDetail(generics.RetrieveUpdateDestroyAPIView):
serializer_class = VideoSerializer serializer_class = VideoSerializer
class SubtitleDetail(MultipleFieldLookupMixin, generics.RetrieveUpdateDestroyAPIView): class VideoTranscriptView(APIView):
""" """
Gets a subtitle instance given its id A Transcription View, used by VEDA to create video transcripts.
""" """
authentication_classes = (OAuth2Authentication, SessionAuthentication) authentication_classes = (OAuth2Authentication, SessionAuthentication)
permission_classes = (ReadRestrictedDjangoModelPermissions,)
lookup_fields = ("video__edx_video_id", "language") # noinspection PyMethodMayBeStatic
queryset = Subtitle.objects.all() def post(self, request):
serializer_class = SubtitleSerializer """
Creates a video transcript instance with the given information.
Arguments:
request: A WSGI request.
"""
attrs = ('video_id', 'language', 'url', 'format', 'provider')
missing = [attr for attr in attrs if attr not in request.data]
if missing:
return Response(
status=status.HTTP_400_BAD_REQUEST,
data=dict(message=u'{missing} must be specified.'.format(missing=' and '.join(missing)))
)
video_id = request.data['video_id']
language = request.data['language']
transcript_url = request.data['url']
transcript_format = request.data['format']
provider = request.data['provider']
supported_formats = dict(TranscriptFormat.CHOICES).keys()
if transcript_format not in supported_formats:
return Response(
status=status.HTTP_400_BAD_REQUEST,
data=dict(
message=(u'This transcript file type is not supported. Supported formats are'
u'{supported_formats}').format(supported_formats=supported_formats)
)
)
supported_providers = dict(TranscriptProviderType.CHOICES).keys()
if provider not in supported_providers:
return Response(
status=status.HTTP_400_BAD_REQUEST,
data=dict(
message=(u'This provider is not supported. Supported transcription providers are'
u'{supported_providers}').format(supported_providers=supported_providers)
)
)
transcript = get_video_transcript(video_id, language)
if not transcript:
serialized_transcript = create_video_transcript(
video_id=video_id,
language=language,
transcript_url=transcript_url,
transcript_format=transcript_format,
)
response = Response(data=serialized_transcript, status=status.HTTP_200_OK)
else:
response = Response(
data=dict(
message=(u'Transcript for video "{video_id}" and lang code "{language}" already exists. '
u'It can not be overwritten.').format(video_id=video_id, language=language)
),
status=status.HTTP_400_BAD_REQUEST
)
return response
class VideoImagesView(APIView): class VideoImagesView(APIView):
...@@ -148,19 +207,3 @@ class VideoImagesView(APIView): ...@@ -148,19 +207,3 @@ class VideoImagesView(APIView):
) )
return Response() return Response()
def _last_modified_subtitle(request, edx_video_id, language): # pylint: disable=W0613
"""
Returns the last modified subtitle
"""
return Subtitle.objects.get(video__edx_video_id=edx_video_id, language=language).modified
@last_modified(last_modified_func=_last_modified_subtitle)
def get_subtitle(request, edx_video_id, language): # pylint: disable=W0613
"""
Return content of subtitle by id
"""
sub = Subtitle.objects.get(video__edx_video_id=edx_video_id, language=language)
response = HttpResponse(sub.content, content_type=sub.content_type)
return response
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment