Commit f52b3d00 by M. Rehan Committed by GitHub

Merge pull request #96 from edx/mrehan/component-api-utils

Add api util to retrieve a transcript given a list of video ids.
parents be91bb5e f3f08d0a
......@@ -68,5 +68,6 @@ logs/*/*.log*
venv/
venvs/
src/
video-images/
video-transcripts/
Christopher Lee <clee@edx.org>
Mushtaq Ali <mushtaak@gmail.com>
Muhammad Ammar <mammar@gmail.com>
Muhammad Rehan <mrehan@edx.org>
......@@ -158,22 +158,6 @@ def is_transcript_available(video_id, language_code=None):
return transcript_set.exists()
def get_video_transcript(video_id, language_code):
"""
Get a video's transcript
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
language_code: it will the language code of the requested transcript.
"""
try:
transcript = VideoTranscript.objects.get(video_id=video_id, language_code=language_code)
except VideoTranscript.DoesNotExist:
transcript = None
return transcript
def get_video_transcripts(video_id):
"""
Get a video's transcripts
......@@ -190,6 +174,54 @@ def get_video_transcripts(video_id):
return transcripts
def get_video_transcript(video_id, language_code):
"""
Get video transcript info
Arguments:
video_id(unicode): A video id, it can be an edx_video_id or an external video id extracted from
external sources of a video component.
language_code(unicode): it will be the language code of the requested transcript.
"""
transcript = VideoTranscript.get_or_none(video_id=video_id, language_code=language_code)
return TranscriptSerializer(transcript).data if transcript else None
def get_video_transcript_data(video_ids, language_code):
"""
Get video transcript data
Arguments:
video_ids(list): list containing edx_video_id and external video ids extracted from
external sources from a video component.
language_code(unicode): it will be the language code of the requested transcript.
Returns:
A dict containing transcript file name and its content. It will be for a video whose transcript
found first while iterating the video ids.
"""
transcript_data = None
for video_id in video_ids:
try:
video_transcript = VideoTranscript.objects.get(video_id=video_id, language_code=language_code)
transcript_data = dict(
file_name=video_transcript.transcript.name,
content=video_transcript.transcript.file.read()
)
break
except VideoTranscript.DoesNotExist:
continue
except Exception:
logger.exception(
'[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
video_id,
language_code
)
raise
return transcript_data
def get_video_transcript_url(video_id, language_code):
"""
Returns course video transcript url or None if no transcript
......@@ -198,8 +230,7 @@ def get_video_transcript_url(video_id, language_code):
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
language_code: language code of a video transcript
"""
video_transcript = get_video_transcript(video_id, language_code)
video_transcript = VideoTranscript.get_or_none(video_id, language_code)
if video_transcript:
return video_transcript.url()
......
......@@ -418,6 +418,22 @@ class VideoTranscript(TimeStampedModel):
unique_together = ('video_id', 'language_code')
@classmethod
def get_or_none(cls, video_id, language_code):
"""
Returns a data model object if found or none otherwise.
Arguments:
video_id(unicode): video id to which transcript may be associated
language_code(unicode): language of the requested transcript
"""
try:
transcript = cls.objects.get(video_id=video_id, language_code=language_code)
except cls.DoesNotExist:
transcript = None
return transcript
@classmethod
def create_or_update(cls, video_id, language_code, file_name, file_format, provider, file_data=None):
"""
Create or update Transcript object.
......
......@@ -1525,27 +1525,70 @@ class TranscriptTest(TestCase):
is_transcript_available = api.is_transcript_available(video_id, language_code)
self.assertEqual(is_transcript_available, expected_availability)
@data(
{'video_id': 'non-existant-video', 'language_code': 'en'},
{'video_id': '0987654321', 'language_code': 'en'},
)
@unpack
def test_get_video_transcript_not_found(self, video_id, language_code):
"""
Verify that `get_video_transcript` works as expected if transcript is not found.
"""
self.assertIsNone(api.get_video_transcript(video_id, language_code))
def test_get_video_transcript(self):
"""
Verify that `get_video_transcript` works as expected if transcript is found.
"""
transcript = api.get_video_transcript(u'0987654321', u'ur')
expectation = {
'video_id': u'0987654321',
'url': self.transcript_url,
'file_format': TranscriptFormat.SRT,
'provider': TranscriptProviderType.CUSTOM,
'language_code': u'ur'
}
self.assertDictEqual(transcript, expectation)
@patch('edxval.api.logger')
def test_get_video_transcript_data_exception(self, mock_logger):
"""
Verify that `get_video_transcript_data` logs and raises an exception.
"""
with self.assertRaises(IOError):
api.get_video_transcript_data(video_ids=['super-soaker'], language_code=u'en')
mock_logger.exception.assert_called_with(
'[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
'super-soaker',
'en',
)
@data(
{'video_id': 'super-soaker', 'language_code': 'en', 'result': True},
{'video_id': 'super-soaker', 'language_code': 'ur', 'result': False},
{'video_id': 'super123', 'language_code': 'en', 'result': False},
{'video_id': 'super123', 'language_code': 'ur', 'result': False},
{'video_ids': ['non-existant-video', 'another-non-existant-id'], 'language_code': 'en', 'result': None},
{'video_ids': ['non-existant-video', '0987654321'], 'language_code': 'en', 'result': None},
)
@unpack
def test_get_video_transcript(self, video_id, language_code, result):
def test_get_video_transcript_data_not_found(self, video_ids, language_code, result):
"""
Verify that `get_video_transcript` api function works as expected.
Verify that `get_video_transcript_data` api function works as expected.
"""
transcript = api.get_video_transcript(video_id, language_code)
transcript = api.get_video_transcript_data(video_ids, language_code)
self.assertEqual(transcript, result)
if not result:
self.assertEqual(transcript, None)
else:
serialized_data = api.TranscriptSerializer(transcript).data
transcript_data = dict(self.transcript_data1)
transcript_data['url'] = transcript_data.pop('name')
self.assertEqual(serialized_data, transcript_data)
def test_get_video_transcript_data(self):
"""
Verify that `get_video_transcript_data` api function works as expected.
"""
expected_transcript = {
'file_name': self.transcript_url,
'content': File(open(self.arrow_transcript_path)).read()
}
transcript = api.get_video_transcript_data(
video_ids=['super-soaker', '0987654321'],
language_code=u'ur'
)
self.assertDictEqual(transcript, expected_transcript)
@data(
{'video_id': 'super-soaker', 'result': True},
......@@ -1573,8 +1616,11 @@ class TranscriptTest(TestCase):
transcript_data = dict(self.transcript_data1)
transcript_data['language_code'] = 'ur'
video_transcript = api.get_video_transcript(transcript_data['video_id'], transcript_data['language_code'])
self.assertIsNone(video_transcript)
with self.assertRaises(VideoTranscript.DoesNotExist):
VideoTranscript.objects.get(
video_id=transcript_data['video_id'],
language_code=transcript_data['language_code']
)
transcript_url = api.create_or_update_video_transcript(
video_id=transcript_data['video_id'],
......@@ -1585,12 +1631,12 @@ class TranscriptTest(TestCase):
)
self.assertEqual(transcript_url, transcript_data['name'])
video_transcript = api.get_video_transcript(transcript_data['video_id'], transcript_data['language_code'])
transcript_data['url'] = transcript_data.pop('name')
self.assertEqual(
transcript_data,
api.TranscriptSerializer(video_transcript).data
expected_transcript = api.get_video_transcript(
video_id=transcript_data['video_id'],
language_code=transcript_data['language_code']
)
transcript_data['url'] = transcript_data.pop('name')
self.assertEqual(transcript_data, expected_transcript)
@data(
{'language_code': 'ur', 'has_url': True},
......
......@@ -146,7 +146,7 @@ class VideoTranscriptView(APIView):
).format(provider=provider, supported_providers=supported_providers)
return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message})
transcript = get_video_transcript(video_id, language_code)
transcript = VideoTranscript.get_or_none(video_id, language_code)
if transcript is None:
create_or_update_video_transcript(
video_id,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment