Commit 64462ad6 by Qubad786

refactor/add transcript api utils.

parent 7dc65bc4
......@@ -5,6 +5,7 @@ The internal API for VAL.
"""
import logging
from enum import Enum
from uuid import uuid4
from django.core.exceptions import ObjectDoesNotExist, ValidationError
from lxml import etree
......@@ -39,6 +40,13 @@ class SortDirection(Enum):
desc = "desc"
def generate_video_id():
"""
Generates a video ID.
"""
return unicode(uuid4())
def create_video(video_data):
"""
Called on to create Video objects in the database
......@@ -78,6 +86,23 @@ def create_video(video_data):
raise ValCannotCreateError(serializer.errors)
def create_external_video(display_name):
"""
Create an external video.
Arguments:
display_name(unicode): Client title for the external video
"""
return create_video({
'edx_video_id': generate_video_id(),
'status': 'external',
'client_video_id': display_name,
'duration': 0,
'encoded_videos': [],
'courses': []
})
def update_video(video_data):
"""
Called on to update Video objects in the database
......@@ -213,30 +238,21 @@ def get_video_transcript(video_id, language_code):
return TranscriptSerializer(transcript).data if transcript else None
def get_video_transcript_data(video_ids, language_code):
def get_video_transcript_data(video_id, language_code):
"""
Get video transcript data
Arguments:
video_ids(list): list containing edx_video_id and external video ids extracted from
external sources from a video component.
video_id(unicode): An id identifying the Video.
language_code(unicode): it will be the language code of the requested transcript.
Returns:
A dict containing transcript file name and its content. It will be for a video whose transcript
found first while iterating the video ids.
A dict containing transcript file name and its content.
"""
transcript_data = None
for video_id in video_ids:
video_transcript = VideoTranscript.get_or_none(video_id, language_code)
if video_transcript:
try:
video_transcript = VideoTranscript.objects.get(video__edx_video_id=video_id, language_code=language_code)
transcript_data = dict(
file_name=video_transcript.filename,
content=video_transcript.transcript.file.read()
)
break
except VideoTranscript.DoesNotExist:
continue
return dict(file_name=video_transcript.filename, content=video_transcript.transcript.file.read())
except Exception:
logger.exception(
'[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
......@@ -245,26 +261,23 @@ def get_video_transcript_data(video_ids, language_code):
)
raise
return transcript_data
def get_available_transcript_languages(video_ids):
def get_available_transcript_languages(video_id):
"""
Get available transcript languages
Arguments:
video_ids(list): list containing edx_video_id and external video ids extracted from
external sources of a video component.
video_id(unicode): An id identifying the Video.
Returns:
A list containing unique transcript language codes for the video ids.
A list containing transcript language codes for the Video.
"""
available_languages = VideoTranscript.objects.filter(
video__edx_video_id__in=video_ids
video__edx_video_id=video_id
).values_list(
'language_code', flat=True
)
return list(set(available_languages))
return list(available_languages)
def get_video_transcript_url(video_id, language_code):
......@@ -280,6 +293,28 @@ def get_video_transcript_url(video_id, language_code):
return video_transcript.url()
def create_video_transcript(video_id, language_code, file_format, content, provider=TranscriptProviderType.CUSTOM):
"""
Create a video transcript.
Arguments:
video_id(unicode): An Id identifying the Video data model object.
language_code(unicode): A language code.
file_format(unicode): Transcript file format.
content(InMemoryUploadedFile): Transcript content.
provider(unicode): Transcript provider (it will be 'custom' by default if not selected).
"""
transcript_serializer = TranscriptSerializer(
data=dict(provider=provider, language_code=language_code, file_format=file_format),
context=dict(video_id=video_id),
)
if transcript_serializer.is_valid():
transcript_serializer.save(content=content)
return transcript_serializer.data
else:
raise ValCannotCreateError(transcript_serializer.errors)
def create_or_update_video_transcript(video_id, language_code, metadata, file_data=None):
"""
Create or Update video transcript for an existing video.
......@@ -323,17 +358,16 @@ def delete_video_transcript(video_id, language_code):
Delete transcript for an existing video.
Arguments:
video_id: id of the video with which transcript is associated
language_code: language code of a video transcript
video_id: id identifying the video to which the transcript is associated.
language_code: language code of a video transcript.
"""
try:
video_transcript = VideoTranscript.objects.get(video__edx_video_id=video_id, language_code=language_code)
# delete the actual transcript file from storage
video_transcript = VideoTranscript.get_or_none(video_id, language_code)
if video_transcript:
# delete the transcript content from storage.
video_transcript.transcript.delete()
# delete the record from db
# delete the transcript metadata from db.
video_transcript.delete()
except VideoTranscript.DoesNotExist:
pass
logger.info('Transcript is removed for video "%s" and language code "%s"', video_id, language_code)
def get_3rd_party_transcription_plans():
......
......@@ -123,6 +123,18 @@ class Video(models.Model):
return self.edx_video_id
@classmethod
def get_or_none(cls, **filter_kwargs):
"""
Returns a video or None.
"""
try:
video = cls.objects.get(**filter_kwargs)
except cls.DoesNotExist:
video = None
return video
@classmethod
def by_youtube_id(cls, youtube_id):
"""
Look up video by youtube id
......@@ -448,6 +460,34 @@ class VideoTranscript(TimeStampedModel):
return transcript
@classmethod
def create(cls, video, language_code, file_format, content, provider):
"""
Create a Video Transcript.
Arguments:
video(Video): Video data model object
language_code(unicode): A language code.
file_format(unicode): Transcript file format.
content(InMemoryUploadedFile): Transcript content.
provider(unicode): Transcript provider.
"""
video_transcript = cls(video=video, language_code=language_code, file_format=file_format, provider=provider)
with closing(content) as transcript_content:
try:
file_name = '{uuid}.{ext}'.format(uuid=uuid4().hex, ext=video_transcript.file_format)
video_transcript.transcript.save(file_name, transcript_content)
video_transcript.save()
except Exception:
logger.exception(
'[VAL] Transcript save failed to storage for video_id "%s" language code "%s"',
video.edx_video_id,
language_code
)
raise
return video_transcript
@classmethod
def create_or_update(cls, video, language_code, metadata, file_data=None):
"""
Create or update Transcript object.
......@@ -481,7 +521,11 @@ class VideoTranscript(TimeStampedModel):
try:
video_transcript.transcript.save(file_name, transcript_file_data)
except Exception:
logger.exception('VAL: Transcript save failed to storage for video_id [%s]', video.edx_video_id)
logger.exception(
'[VAL] Transcript save failed to storage for video_id "%s" language code "%s"',
video.edx_video_id,
language_code
)
raise
video_transcript.save()
......
......@@ -74,6 +74,24 @@ class TranscriptSerializer(serializers.ModelSerializer):
"""
return transcript.url()
def validate(self, data):
"""
Validates the transcript data.
"""
video_id = self.context.get('video_id')
video = Video.get_or_none(edx_video_id=video_id)
if not video:
raise serializers.ValidationError('Video "{video_id}" is not valid.'.format(video_id=video_id))
data.update(video=video)
return data
def create(self, validated_data):
"""
Create the video transcript.
"""
return VideoTranscript.create(**validated_data)
class CourseSerializer(serializers.RelatedField):
"""
......
......@@ -30,6 +30,7 @@ from edxval.models import (LIST_MAX_ITEMS, CourseVideo, EncodedVideo, Profile,
TranscriptFormat, TranscriptPreference,
TranscriptProviderType, Video, VideoImage,
VideoTranscript)
from edxval.serializers import VideoSerializer
from edxval.tests import APIAuthTestCase, constants
......@@ -139,6 +140,21 @@ class CreateVideoTest(TestCase):
with self.assertRaises(ValCannotCreateError):
api.create_video(data)
def test_create_external_video(self):
"""
Tests the creation of an external video.
"""
expected_video = {
'status': u'external',
'client_video_id': u'Test Video',
'duration': 0,
'encoded_videos': [],
'courses': []
}
edx_video_id = api.create_external_video(display_name=expected_video['client_video_id'])
video = VideoSerializer(Video.objects.get(edx_video_id=edx_video_id)).data
self.assertDictContainsSubset(expected_video, video)
@ddt
class UpdateVideoTest(TestCase):
......@@ -1762,26 +1778,23 @@ class TranscriptTest(TestCase):
"""
Verify that `get_video_transcript_data` logs and raises an exception.
"""
video_id = u'medium-soaker'
language_code = u'zh'
with self.assertRaises(IOError):
api.get_video_transcript_data(video_ids=['medium-soaker'], language_code=u'zh')
api.get_video_transcript_data(video_id, language_code)
mock_logger.exception.assert_called_with(
'[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
'medium-soaker',
'zh',
video_id,
language_code,
)
@data(
{'video_ids': ['non-existant-video', 'another-non-existant-id'], 'language_code': 'en', 'result': None},
{'video_ids': ['non-existant-video', 'super-soaker'], 'language_code': 'zh', 'result': None},
)
@unpack
def test_get_video_transcript_data_not_found(self, video_ids, language_code, result):
def test_get_video_transcript_data_not_found(self):
"""
Verify that `get_video_transcript_data` api function works as expected.
Verify the `get_video_transcript_data` returns none if transcript is not present for a video.
"""
transcript = api.get_video_transcript_data(video_ids, language_code)
self.assertEqual(transcript, result)
transcript = api.get_video_transcript_data(u'non-existant-video', u'en')
self.assertIsNone(transcript)
@data(
('super-soaker', 'en', 'Shallow Swordfish-en.srt', 'edxval/tests/data/The_Flash.srt'),
......@@ -1796,10 +1809,7 @@ class TranscriptTest(TestCase):
'file_name': expected_file_name,
'content': File(open(expected_transcript_path)).read()
}
transcript = api.get_video_transcript_data(
video_ids=[video_id, '0987654321'],
language_code=language_code
)
transcript = api.get_video_transcript_data(video_id=video_id, language_code=language_code)
self.assertDictEqual(transcript, expected_transcript)
def test_get_video_transcript_url(self):
......@@ -1894,6 +1904,67 @@ class TranscriptTest(TestCase):
self.assertEqual(transcript_exception.exception.message, exception_message)
def test_create_video_transcript(self):
"""
Verify that `create_video_transcript` api function creates transcript as expected.
"""
edx_video_id = u'1234'
language_code = u'en'
transcript_props = dict(
video_id=edx_video_id,
language_code=language_code,
provider=TranscriptProviderType.THREE_PLAY_MEDIA,
file_format=TranscriptFormat.SRT,
content=ContentFile(FILE_DATA)
)
# setup video with the `edx_video_id` above.
self.setup_video_with_transcripts(
video_data=dict(constants.VIDEO_DICT_DIFFERENT_ID_FISH, edx_video_id=edx_video_id),
transcripts_data=[]
)
# Assert that 'en' transcript is not already present.
video_transcript = VideoTranscript.get_or_none(edx_video_id, language_code)
self.assertIsNone(video_transcript)
# Create the transcript
api.create_video_transcript(**transcript_props)
# Assert the transcript object and its content
video_transcript = VideoTranscript.get_or_none(edx_video_id, language_code)
self.assertIsNotNone(video_transcript)
self.assertEqual(video_transcript.file_format, transcript_props['file_format'])
self.assertEqual(video_transcript.provider, transcript_props['provider'])
with open(video_transcript.transcript.name) as created_transcript:
self.assertEqual(created_transcript.read(), FILE_DATA)
@data(
{
'video_id': 'super-soaker',
'language_code': 'en',
'file_format': '123',
'provider': TranscriptProviderType.CIELO24,
'exception_msg': '"123" is not a valid choice.'
},
{
'video_id': 'medium-soaker',
'language_code': 'en',
'file_format': TranscriptFormat.SRT,
'provider': 'unknown provider',
'exception_msg': '"unknown provider" is not a valid choice.'
}
)
@unpack
def test_create_video_transcript_exceptions(self, video_id, language_code, file_format, provider, exception_msg):
"""
Verify that `create_video_transcript` api function raise exceptions on invalid values.
"""
with self.assertRaises(ValCannotCreateError) as transcript_exception:
api.create_video_transcript(video_id, language_code, file_format, ContentFile(FILE_DATA), provider)
self.assertIn(exception_msg, unicode(transcript_exception.exception.message))
def test_video_transcript_deletion(self):
"""
Test video transcript deletion works as expected.
......@@ -1930,12 +2001,11 @@ class TranscriptTest(TestCase):
Verify that `get_available_transcript_languages` works as expected.
"""
# `super-soaker` has got 'en' and 'fr' transcripts
# `non_existent_video_id` that does not have transcript
video_ids = ['super-soaker', 'non_existent_video_id']
transcript_languages = api.get_available_transcript_languages(video_ids=video_ids)
transcript_languages = api.get_available_transcript_languages(video_id=u'super-soaker')
self.assertItemsEqual(transcript_languages, ['en', 'fr'])
def test_delete_video_transcript(self):
@patch('edxval.api.logger')
def test_delete_video_transcript(self, mock_logger):
"""
Verify that `delete_video_transcript` works as expected.
"""
......@@ -1954,6 +2024,11 @@ class TranscriptTest(TestCase):
# assert that the transcript does not exist on the path anymore.
self.assertFalse(os.path.exists(transcript_path))
self.assertEqual(VideoTranscript.objects.filter(**query_filter).count(), 0)
mock_logger.info.assert_called_with(
'Transcript is removed for video "%s" and language code "%s"',
query_filter['video__edx_video_id'],
query_filter['language_code']
)
@ddt
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment