Commit 64462ad6 by Qubad786

refactor/add transcript api utils.

parent 7dc65bc4
...@@ -5,6 +5,7 @@ The internal API for VAL. ...@@ -5,6 +5,7 @@ The internal API for VAL.
""" """
import logging import logging
from enum import Enum from enum import Enum
from uuid import uuid4
from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.core.exceptions import ObjectDoesNotExist, ValidationError
from lxml import etree from lxml import etree
...@@ -39,6 +40,13 @@ class SortDirection(Enum): ...@@ -39,6 +40,13 @@ class SortDirection(Enum):
desc = "desc" desc = "desc"
def generate_video_id():
"""
Generates a video ID.
"""
return unicode(uuid4())
def create_video(video_data): def create_video(video_data):
""" """
Called on to create Video objects in the database Called on to create Video objects in the database
...@@ -78,6 +86,23 @@ def create_video(video_data): ...@@ -78,6 +86,23 @@ def create_video(video_data):
raise ValCannotCreateError(serializer.errors) raise ValCannotCreateError(serializer.errors)
def create_external_video(display_name):
"""
Create an external video.
Arguments:
display_name(unicode): Client title for the external video
"""
return create_video({
'edx_video_id': generate_video_id(),
'status': 'external',
'client_video_id': display_name,
'duration': 0,
'encoded_videos': [],
'courses': []
})
def update_video(video_data): def update_video(video_data):
""" """
Called on to update Video objects in the database Called on to update Video objects in the database
...@@ -213,30 +238,21 @@ def get_video_transcript(video_id, language_code): ...@@ -213,30 +238,21 @@ def get_video_transcript(video_id, language_code):
return TranscriptSerializer(transcript).data if transcript else None return TranscriptSerializer(transcript).data if transcript else None
def get_video_transcript_data(video_ids, language_code): def get_video_transcript_data(video_id, language_code):
""" """
Get video transcript data Get video transcript data
Arguments: Arguments:
video_ids(list): list containing edx_video_id and external video ids extracted from video_id(unicode): An id identifying the Video.
external sources from a video component.
language_code(unicode): it will be the language code of the requested transcript. language_code(unicode): it will be the language code of the requested transcript.
Returns: Returns:
A dict containing transcript file name and its content. It will be for a video whose transcript A dict containing transcript file name and its content.
found first while iterating the video ids.
""" """
transcript_data = None video_transcript = VideoTranscript.get_or_none(video_id, language_code)
for video_id in video_ids: if video_transcript:
try: try:
video_transcript = VideoTranscript.objects.get(video__edx_video_id=video_id, language_code=language_code) return dict(file_name=video_transcript.filename, content=video_transcript.transcript.file.read())
transcript_data = dict(
file_name=video_transcript.filename,
content=video_transcript.transcript.file.read()
)
break
except VideoTranscript.DoesNotExist:
continue
except Exception: except Exception:
logger.exception( logger.exception(
'[edx-val] Error while retrieving transcript for video=%s -- language_code=%s', '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
...@@ -245,26 +261,23 @@ def get_video_transcript_data(video_ids, language_code): ...@@ -245,26 +261,23 @@ def get_video_transcript_data(video_ids, language_code):
) )
raise raise
return transcript_data
def get_available_transcript_languages(video_id):
def get_available_transcript_languages(video_ids):
""" """
Get available transcript languages Get available transcript languages
Arguments: Arguments:
video_ids(list): list containing edx_video_id and external video ids extracted from video_id(unicode): An id identifying the Video.
external sources of a video component.
Returns: Returns:
A list containing unique transcript language codes for the video ids. A list containing transcript language codes for the Video.
""" """
available_languages = VideoTranscript.objects.filter( available_languages = VideoTranscript.objects.filter(
video__edx_video_id__in=video_ids video__edx_video_id=video_id
).values_list( ).values_list(
'language_code', flat=True 'language_code', flat=True
) )
return list(set(available_languages)) return list(available_languages)
def get_video_transcript_url(video_id, language_code): def get_video_transcript_url(video_id, language_code):
...@@ -280,6 +293,28 @@ def get_video_transcript_url(video_id, language_code): ...@@ -280,6 +293,28 @@ def get_video_transcript_url(video_id, language_code):
return video_transcript.url() return video_transcript.url()
def create_video_transcript(video_id, language_code, file_format, content, provider=TranscriptProviderType.CUSTOM):
"""
Create a video transcript.
Arguments:
video_id(unicode): An Id identifying the Video data model object.
language_code(unicode): A language code.
file_format(unicode): Transcript file format.
content(InMemoryUploadedFile): Transcript content.
provider(unicode): Transcript provider (it will be 'custom' by default if not selected).
"""
transcript_serializer = TranscriptSerializer(
data=dict(provider=provider, language_code=language_code, file_format=file_format),
context=dict(video_id=video_id),
)
if transcript_serializer.is_valid():
transcript_serializer.save(content=content)
return transcript_serializer.data
else:
raise ValCannotCreateError(transcript_serializer.errors)
def create_or_update_video_transcript(video_id, language_code, metadata, file_data=None): def create_or_update_video_transcript(video_id, language_code, metadata, file_data=None):
""" """
Create or Update video transcript for an existing video. Create or Update video transcript for an existing video.
...@@ -323,17 +358,16 @@ def delete_video_transcript(video_id, language_code): ...@@ -323,17 +358,16 @@ def delete_video_transcript(video_id, language_code):
Delete transcript for an existing video. Delete transcript for an existing video.
Arguments: Arguments:
video_id: id of the video with which transcript is associated video_id: id identifying the video to which the transcript is associated.
language_code: language code of a video transcript language_code: language code of a video transcript.
""" """
try: video_transcript = VideoTranscript.get_or_none(video_id, language_code)
video_transcript = VideoTranscript.objects.get(video__edx_video_id=video_id, language_code=language_code) if video_transcript:
# delete the actual transcript file from storage # delete the transcript content from storage.
video_transcript.transcript.delete() video_transcript.transcript.delete()
# delete the record from db # delete the transcript metadata from db.
video_transcript.delete() video_transcript.delete()
except VideoTranscript.DoesNotExist: logger.info('Transcript is removed for video "%s" and language code "%s"', video_id, language_code)
pass
def get_3rd_party_transcription_plans(): def get_3rd_party_transcription_plans():
......
...@@ -123,6 +123,18 @@ class Video(models.Model): ...@@ -123,6 +123,18 @@ class Video(models.Model):
return self.edx_video_id return self.edx_video_id
@classmethod @classmethod
def get_or_none(cls, **filter_kwargs):
"""
Returns a video or None.
"""
try:
video = cls.objects.get(**filter_kwargs)
except cls.DoesNotExist:
video = None
return video
@classmethod
def by_youtube_id(cls, youtube_id): def by_youtube_id(cls, youtube_id):
""" """
Look up video by youtube id Look up video by youtube id
...@@ -448,6 +460,34 @@ class VideoTranscript(TimeStampedModel): ...@@ -448,6 +460,34 @@ class VideoTranscript(TimeStampedModel):
return transcript return transcript
@classmethod @classmethod
def create(cls, video, language_code, file_format, content, provider):
"""
Create a Video Transcript.
Arguments:
video(Video): Video data model object
language_code(unicode): A language code.
file_format(unicode): Transcript file format.
content(InMemoryUploadedFile): Transcript content.
provider(unicode): Transcript provider.
"""
video_transcript = cls(video=video, language_code=language_code, file_format=file_format, provider=provider)
with closing(content) as transcript_content:
try:
file_name = '{uuid}.{ext}'.format(uuid=uuid4().hex, ext=video_transcript.file_format)
video_transcript.transcript.save(file_name, transcript_content)
video_transcript.save()
except Exception:
logger.exception(
'[VAL] Transcript save failed to storage for video_id "%s" language code "%s"',
video.edx_video_id,
language_code
)
raise
return video_transcript
@classmethod
def create_or_update(cls, video, language_code, metadata, file_data=None): def create_or_update(cls, video, language_code, metadata, file_data=None):
""" """
Create or update Transcript object. Create or update Transcript object.
...@@ -481,7 +521,11 @@ class VideoTranscript(TimeStampedModel): ...@@ -481,7 +521,11 @@ class VideoTranscript(TimeStampedModel):
try: try:
video_transcript.transcript.save(file_name, transcript_file_data) video_transcript.transcript.save(file_name, transcript_file_data)
except Exception: except Exception:
logger.exception('VAL: Transcript save failed to storage for video_id [%s]', video.edx_video_id) logger.exception(
'[VAL] Transcript save failed to storage for video_id "%s" language code "%s"',
video.edx_video_id,
language_code
)
raise raise
video_transcript.save() video_transcript.save()
......
...@@ -74,6 +74,24 @@ class TranscriptSerializer(serializers.ModelSerializer): ...@@ -74,6 +74,24 @@ class TranscriptSerializer(serializers.ModelSerializer):
""" """
return transcript.url() return transcript.url()
def validate(self, data):
"""
Validates the transcript data.
"""
video_id = self.context.get('video_id')
video = Video.get_or_none(edx_video_id=video_id)
if not video:
raise serializers.ValidationError('Video "{video_id}" is not valid.'.format(video_id=video_id))
data.update(video=video)
return data
def create(self, validated_data):
"""
Create the video transcript.
"""
return VideoTranscript.create(**validated_data)
class CourseSerializer(serializers.RelatedField): class CourseSerializer(serializers.RelatedField):
""" """
......
...@@ -30,6 +30,7 @@ from edxval.models import (LIST_MAX_ITEMS, CourseVideo, EncodedVideo, Profile, ...@@ -30,6 +30,7 @@ from edxval.models import (LIST_MAX_ITEMS, CourseVideo, EncodedVideo, Profile,
TranscriptFormat, TranscriptPreference, TranscriptFormat, TranscriptPreference,
TranscriptProviderType, Video, VideoImage, TranscriptProviderType, Video, VideoImage,
VideoTranscript) VideoTranscript)
from edxval.serializers import VideoSerializer
from edxval.tests import APIAuthTestCase, constants from edxval.tests import APIAuthTestCase, constants
...@@ -139,6 +140,21 @@ class CreateVideoTest(TestCase): ...@@ -139,6 +140,21 @@ class CreateVideoTest(TestCase):
with self.assertRaises(ValCannotCreateError): with self.assertRaises(ValCannotCreateError):
api.create_video(data) api.create_video(data)
def test_create_external_video(self):
"""
Tests the creation of an external video.
"""
expected_video = {
'status': u'external',
'client_video_id': u'Test Video',
'duration': 0,
'encoded_videos': [],
'courses': []
}
edx_video_id = api.create_external_video(display_name=expected_video['client_video_id'])
video = VideoSerializer(Video.objects.get(edx_video_id=edx_video_id)).data
self.assertDictContainsSubset(expected_video, video)
@ddt @ddt
class UpdateVideoTest(TestCase): class UpdateVideoTest(TestCase):
...@@ -1762,26 +1778,23 @@ class TranscriptTest(TestCase): ...@@ -1762,26 +1778,23 @@ class TranscriptTest(TestCase):
""" """
Verify that `get_video_transcript_data` logs and raises an exception. Verify that `get_video_transcript_data` logs and raises an exception.
""" """
video_id = u'medium-soaker'
language_code = u'zh'
with self.assertRaises(IOError): with self.assertRaises(IOError):
api.get_video_transcript_data(video_ids=['medium-soaker'], language_code=u'zh') api.get_video_transcript_data(video_id, language_code)
mock_logger.exception.assert_called_with( mock_logger.exception.assert_called_with(
'[edx-val] Error while retrieving transcript for video=%s -- language_code=%s', '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
'medium-soaker', video_id,
'zh', language_code,
) )
@data( def test_get_video_transcript_data_not_found(self):
{'video_ids': ['non-existant-video', 'another-non-existant-id'], 'language_code': 'en', 'result': None},
{'video_ids': ['non-existant-video', 'super-soaker'], 'language_code': 'zh', 'result': None},
)
@unpack
def test_get_video_transcript_data_not_found(self, video_ids, language_code, result):
""" """
Verify that `get_video_transcript_data` api function works as expected. Verify the `get_video_transcript_data` returns none if transcript is not present for a video.
""" """
transcript = api.get_video_transcript_data(video_ids, language_code) transcript = api.get_video_transcript_data(u'non-existant-video', u'en')
self.assertEqual(transcript, result) self.assertIsNone(transcript)
@data( @data(
('super-soaker', 'en', 'Shallow Swordfish-en.srt', 'edxval/tests/data/The_Flash.srt'), ('super-soaker', 'en', 'Shallow Swordfish-en.srt', 'edxval/tests/data/The_Flash.srt'),
...@@ -1796,10 +1809,7 @@ class TranscriptTest(TestCase): ...@@ -1796,10 +1809,7 @@ class TranscriptTest(TestCase):
'file_name': expected_file_name, 'file_name': expected_file_name,
'content': File(open(expected_transcript_path)).read() 'content': File(open(expected_transcript_path)).read()
} }
transcript = api.get_video_transcript_data( transcript = api.get_video_transcript_data(video_id=video_id, language_code=language_code)
video_ids=[video_id, '0987654321'],
language_code=language_code
)
self.assertDictEqual(transcript, expected_transcript) self.assertDictEqual(transcript, expected_transcript)
def test_get_video_transcript_url(self): def test_get_video_transcript_url(self):
...@@ -1894,6 +1904,67 @@ class TranscriptTest(TestCase): ...@@ -1894,6 +1904,67 @@ class TranscriptTest(TestCase):
self.assertEqual(transcript_exception.exception.message, exception_message) self.assertEqual(transcript_exception.exception.message, exception_message)
def test_create_video_transcript(self):
"""
Verify that `create_video_transcript` api function creates transcript as expected.
"""
edx_video_id = u'1234'
language_code = u'en'
transcript_props = dict(
video_id=edx_video_id,
language_code=language_code,
provider=TranscriptProviderType.THREE_PLAY_MEDIA,
file_format=TranscriptFormat.SRT,
content=ContentFile(FILE_DATA)
)
# setup video with the `edx_video_id` above.
self.setup_video_with_transcripts(
video_data=dict(constants.VIDEO_DICT_DIFFERENT_ID_FISH, edx_video_id=edx_video_id),
transcripts_data=[]
)
# Assert that 'en' transcript is not already present.
video_transcript = VideoTranscript.get_or_none(edx_video_id, language_code)
self.assertIsNone(video_transcript)
# Create the transcript
api.create_video_transcript(**transcript_props)
# Assert the transcript object and its content
video_transcript = VideoTranscript.get_or_none(edx_video_id, language_code)
self.assertIsNotNone(video_transcript)
self.assertEqual(video_transcript.file_format, transcript_props['file_format'])
self.assertEqual(video_transcript.provider, transcript_props['provider'])
with open(video_transcript.transcript.name) as created_transcript:
self.assertEqual(created_transcript.read(), FILE_DATA)
@data(
{
'video_id': 'super-soaker',
'language_code': 'en',
'file_format': '123',
'provider': TranscriptProviderType.CIELO24,
'exception_msg': '"123" is not a valid choice.'
},
{
'video_id': 'medium-soaker',
'language_code': 'en',
'file_format': TranscriptFormat.SRT,
'provider': 'unknown provider',
'exception_msg': '"unknown provider" is not a valid choice.'
}
)
@unpack
def test_create_video_transcript_exceptions(self, video_id, language_code, file_format, provider, exception_msg):
"""
Verify that `create_video_transcript` api function raise exceptions on invalid values.
"""
with self.assertRaises(ValCannotCreateError) as transcript_exception:
api.create_video_transcript(video_id, language_code, file_format, ContentFile(FILE_DATA), provider)
self.assertIn(exception_msg, unicode(transcript_exception.exception.message))
def test_video_transcript_deletion(self): def test_video_transcript_deletion(self):
""" """
Test video transcript deletion works as expected. Test video transcript deletion works as expected.
...@@ -1930,12 +2001,11 @@ class TranscriptTest(TestCase): ...@@ -1930,12 +2001,11 @@ class TranscriptTest(TestCase):
Verify that `get_available_transcript_languages` works as expected. Verify that `get_available_transcript_languages` works as expected.
""" """
# `super-soaker` has got 'en' and 'fr' transcripts # `super-soaker` has got 'en' and 'fr' transcripts
# `non_existent_video_id` that does not have transcript transcript_languages = api.get_available_transcript_languages(video_id=u'super-soaker')
video_ids = ['super-soaker', 'non_existent_video_id']
transcript_languages = api.get_available_transcript_languages(video_ids=video_ids)
self.assertItemsEqual(transcript_languages, ['en', 'fr']) self.assertItemsEqual(transcript_languages, ['en', 'fr'])
def test_delete_video_transcript(self): @patch('edxval.api.logger')
def test_delete_video_transcript(self, mock_logger):
""" """
Verify that `delete_video_transcript` works as expected. Verify that `delete_video_transcript` works as expected.
""" """
...@@ -1954,6 +2024,11 @@ class TranscriptTest(TestCase): ...@@ -1954,6 +2024,11 @@ class TranscriptTest(TestCase):
# assert that the transcript does not exist on the path anymore. # assert that the transcript does not exist on the path anymore.
self.assertFalse(os.path.exists(transcript_path)) self.assertFalse(os.path.exists(transcript_path))
self.assertEqual(VideoTranscript.objects.filter(**query_filter).count(), 0) self.assertEqual(VideoTranscript.objects.filter(**query_filter).count(), 0)
mock_logger.info.assert_called_with(
'Transcript is removed for video "%s" and language code "%s"',
query_filter['video__edx_video_id'],
query_filter['language_code']
)
@ddt @ddt
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment