Unverified Commit e0b308bf by M. Rehan Committed by GitHub

Merge pull request #114 from edx/mrehan/download-transcripts-on-upload-page

Feature: Create/edit/delete transcript via Video Upload Page
parents cd3d5f30 83d2c3e5
......@@ -71,3 +71,6 @@ venvs/
src/
video-images/
video-transcripts/
### VisualStudioCode ###
.vscode/*
......@@ -6,8 +6,7 @@ env:
- TOXENV=django110
- TOXENV=django111
install:
- pip install -r requirements.txt
- pip install -r test-requirements.txt
- pip install tox
- pip install coveralls
script:
- tox
......
......@@ -247,7 +247,7 @@ def get_video_transcript_data(video_ids, language_code):
try:
video_transcript = VideoTranscript.objects.get(video_id=video_id, language_code=language_code)
transcript_data = dict(
file_name=video_transcript.transcript.name,
file_name=video_transcript.filename,
content=video_transcript.transcript.file.read()
)
break
......@@ -296,46 +296,57 @@ def get_video_transcript_url(video_id, language_code):
return video_transcript.url()
def create_or_update_video_transcript(
video_id,
language_code,
file_name,
file_format,
provider,
file_data=None,
):
def create_or_update_video_transcript(video_id, language_code, metadata, file_data=None):
"""
Create or Update video transcript for an existing video.
Arguments:
video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
language_code: language code of a video transcript
file_name: file name of a video transcript
metadata (dict): A dict containing (to be overwritten) properties
file_data (InMemoryUploadedFile): Transcript data to be saved for a course video.
file_format: format of the transcript
provider: transcript provider
Returns:
video transcript url
"""
if file_format not in dict(TranscriptFormat.CHOICES).keys():
# Filter wanted properties
metadata = {
prop: value
for prop, value in metadata.iteritems()
if prop in ['provider', 'language_code', 'file_name', 'file_format'] and value
}
file_format = metadata.get('file_format')
if file_format and file_format not in dict(TranscriptFormat.CHOICES).keys():
raise InvalidTranscriptFormat('{} transcript format is not supported'.format(file_format))
if provider not in dict(TranscriptProviderType.CHOICES).keys():
provider = metadata.get('provider')
if provider and provider not in dict(TranscriptProviderType.CHOICES).keys():
raise InvalidTranscriptProvider('{} transcript provider is not supported'.format(provider))
video_transcript, __ = VideoTranscript.create_or_update(
video_id,
language_code,
file_name,
file_format,
provider,
file_data,
)
video_transcript, __ = VideoTranscript.create_or_update(video_id, language_code, metadata, file_data)
return video_transcript.url()
def delete_video_transcript(video_id, language_code):
"""
Delete transcript for an existing video.
Arguments:
video_id: id of the video with which transcript is associated
language_code: language code of a video transcript
"""
try:
video_transcript = VideoTranscript.objects.get(video_id=video_id, language_code=language_code)
# delete the actual transcript file from storage
video_transcript.transcript.delete()
# delete the record from db
video_transcript.delete()
except VideoTranscript.DoesNotExist:
pass
def get_3rd_party_transcription_plans():
"""
Retrieves 3rd party transcription plans.
......@@ -926,9 +937,11 @@ def create_transcript_objects(xml):
VideoTranscript.create_or_update(
transcript.attrib['video_id'],
transcript.attrib['language_code'],
transcript.attrib['file_name'],
transcript.attrib['file_format'],
transcript.attrib['provider'],
metadata=dict(
provider=transcript.attrib['provider'],
file_name=transcript.attrib['file_name'],
file_format=transcript.attrib['file_format'],
)
)
except KeyError:
logger.warn("VAL: Required attributes are missing from xml, xml=[%s]", etree.tostring(transcript).strip())
......@@ -417,6 +417,28 @@ class VideoTranscript(TimeStampedModel):
class Meta:
unique_together = ('video_id', 'language_code')
@property
def filename(self):
"""
Returns readable filename for a transcript
"""
try:
video = Video.objects.get(edx_video_id=self.video_id)
client_id, __ = os.path.splitext(video.client_video_id)
file_name = u'{name}-{language}.{format}'.format(
name=client_id,
language=self.language_code,
format=self.file_format
)
except Video.DoesNotExist:
file_name = u'{name}-{language}.{format}'.format(
name=self.video_id,
language=self.language_code,
format=self.file_format
)
return file_name
@classmethod
def get_or_none(cls, video_id, language_code):
"""
......@@ -434,16 +456,14 @@ class VideoTranscript(TimeStampedModel):
return transcript
@classmethod
def create_or_update(cls, video_id, language_code, file_name, file_format, provider, file_data=None):
def create_or_update(cls, video_id, language_code, metadata, file_data=None):
"""
Create or update Transcript object.
Arguments:
video_id (str): unique id for a video
language_code (str): language code
file_name (str): File name of the image
file_format (str): Format of transcript
provider (str): Transcript provider
language_code (str): language code for (to be created/updated) transcript
metadata (dict): A dict containing (to be overwritten) properties
file_data (InMemoryUploadedFile): File data to be saved
Returns:
......@@ -451,20 +471,24 @@ class VideoTranscript(TimeStampedModel):
"""
video_transcript, created = cls.objects.get_or_create(video_id=video_id, language_code=language_code)
# delete the existing transcript file
if not created and file_data:
video_transcript.transcript.delete()
for prop, value in metadata.iteritems():
if prop in ['language_code', 'file_format', 'provider']:
setattr(video_transcript, prop, value)
video_transcript.transcript.name = file_name
video_transcript.file_format = file_format
video_transcript.provider = provider
transcript_name = metadata.get('file_name')
if transcript_name:
video_transcript.transcript.name = transcript_name
elif file_data:
# Delete the existing transcript file and
# recreate with the new content
if not created:
video_transcript.transcript.delete()
if file_data:
with closing(file_data) as transcript_file_data:
file_name = '{uuid}{ext}'.format(uuid=uuid4().hex, ext=os.path.splitext(file_name)[1])
file_name = '{uuid}.{ext}'.format(uuid=uuid4().hex, ext=video_transcript.file_format)
try:
video_transcript.transcript.save(file_name, transcript_file_data)
except Exception: # pylint: disable=broad-except
except Exception:
logger.exception('VAL: Transcript save failed to storage for video_id [%s]', video_id)
raise
......
......@@ -373,7 +373,7 @@ VIDEO_TRANSCRIPT_CIELO24 = dict(
VIDEO_TRANSCRIPT_3PLAY = dict(
video_id='super-soaker',
language_code='de',
transcript='wow.sjson',
transcript='edxval/tests/data/wow.sjson',
provider=TranscriptProviderType.THREE_PLAY_MEDIA,
file_format=TranscriptFormat.SJSON,
)
......
{
"start": [10],
"end": [100],
"text": ["Hi, welcome to edxval."],
}
\ No newline at end of file
......@@ -3,9 +3,12 @@
Tests for the API for Video Abstraction Layer
"""
import json
import os
import tempfile
import mock
from ddt import data, ddt, unpack
from django.conf import settings
from django.core.exceptions import ValidationError
from django.core.files import File
from django.core.files.base import ContentFile
......@@ -16,18 +19,19 @@ from django.test import TestCase
from lxml import etree
from mock import patch
from rest_framework import status
from django.conf import settings
from edxval import api as api
from edxval import utils
from edxval.api import (InvalidTranscriptFormat, InvalidTranscriptProvider,
SortDirection, ValCannotCreateError,
ValCannotUpdateError, ValVideoNotFoundError,
VideoSortField)
from edxval.models import (LIST_MAX_ITEMS, CourseVideo, EncodedVideo, Profile,
TranscriptFormat, TranscriptProviderType, Video,
VideoImage, VideoTranscript, TranscriptPreference, ThirdPartyTranscriptCredentialsState)
ThirdPartyTranscriptCredentialsState,
TranscriptFormat, TranscriptPreference,
TranscriptProviderType, Video, VideoImage,
VideoTranscript)
from edxval.tests import APIAuthTestCase, constants
from edxval import utils
FILE_DATA = """
......@@ -137,7 +141,6 @@ class CreateVideoTest(TestCase):
api.create_video(data)
@ddt
class UpdateVideoTest(TestCase):
"""
......@@ -957,7 +960,7 @@ class ExportTest(TestCase):
<encoded_video url="http://www.meowmagic.com" file_size="33" bitrate="44" profile="desktop"/>
<encoded_video url="https://www.tmnt.com/tmnt101.m3u8" file_size="100" bitrate="0" profile="hls"/>
<transcripts>
<transcript file_format="sjson" file_name="wow.sjson" language_code="de" provider="3PlayMedia" video_id="super-soaker"/>
<transcript file_format="sjson" file_name="edxval/tests/data/wow.sjson" language_code="de" provider="3PlayMedia" video_id="super-soaker"/>
<transcript file_format="srt" file_name="wow.srt" language_code="en" provider="Cielo24" video_id="super-soaker" />
</transcripts>
</video_asset>
......@@ -1296,7 +1299,7 @@ class ImportTest(TestCase):
<video_asset>
<transcripts>
<transcript file_name="wow.srt" language_code="en" file_format="srt" provider='Cielo24' video_id="{video_id}"/>
<transcript file_name="wow.sjson" language_code="de" file_format="sjson" provider='3PlayMedia' video_id="{video_id}"/>
<transcript file_name="edxval/tests/data/wow.sjson" language_code="de" file_format="sjson" provider='3PlayMedia' video_id="{video_id}"/>
</transcripts>
</video_asset>
""".format(video_id=external_video_id))
......@@ -1328,7 +1331,7 @@ class ImportTest(TestCase):
<video_asset>
<transcripts>
{transcript_xml}
<transcript file_name="wow.sjson" language_code="de" file_format="sjson" provider='3PlayMedia' video_id="{video_id}"/>
<transcript file_name="edxval/tests/data/wow.sjson" language_code="de" file_format="sjson" provider='3PlayMedia' video_id="{video_id}"/>
</transcripts>
</video_asset>
""".format(transcript_xml=transcript_xml, video_id=video_id))
......@@ -1694,11 +1697,33 @@ class TranscriptTest(TestCase):
self.transcript_url = api.create_or_update_video_transcript(
self.video_id,
'ur',
'The_Arrow.srt',
TranscriptFormat.SRT,
provider=TranscriptProviderType.CUSTOM,
file_data=File(open(self.arrow_transcript_path)),
metadata={'file_format': TranscriptFormat.SRT},
file_data=File(open(self.arrow_transcript_path))
)
# create a temporary transcript file
_, self.transcript_file = tempfile.mkstemp(
suffix='.srt',
dir='edxval/tests/data/'
)
with open(self.transcript_file, 'w') as outfile:
outfile.write(FILE_DATA)
self.transcript3 = VideoTranscript.objects.create(
video_id='super-soaker',
language_code='fr',
transcript=self.transcript_file,
provider=TranscriptProviderType.THREE_PLAY_MEDIA,
file_format=TranscriptFormat.SRT,
)
def tearDown(self):
"""
Reverse the setup
"""
# Remove the temporary transcript file
if os.path.exists(self.transcript_file):
os.remove(self.transcript_file)
@data(
{'video_id': 'super-soaker', 'language_code': 'en', 'expected_availability': True},
......@@ -1765,17 +1790,22 @@ class TranscriptTest(TestCase):
transcript = api.get_video_transcript_data(video_ids, language_code)
self.assertEqual(transcript, result)
def test_get_video_transcript_data(self):
@data(
('de', 'Shallow Swordfish-de.sjson', 'edxval/tests/data/wow.sjson'),
('ur', '0987654321-ur.srt', 'edxval/tests/data/The_Arrow.srt')
)
@unpack
def test_get_video_transcript_data(self, language_code, expected_file_name, expected_transcript_path):
"""
Verify that `get_video_transcript_data` api function works as expected.
"""
expected_transcript = {
'file_name': self.transcript_url,
'content': File(open(self.arrow_transcript_path)).read()
'file_name': expected_file_name,
'content': File(open(expected_transcript_path)).read()
}
transcript = api.get_video_transcript_data(
video_ids=['super-soaker', '0987654321'],
language_code=u'ur'
language_code=language_code
)
self.assertDictEqual(transcript, expected_transcript)
......@@ -1791,7 +1821,7 @@ class TranscriptTest(TestCase):
transcripts = api.get_video_transcripts(video_id)
if result:
self.assertEqual(len(transcripts), 2)
self.assertEqual(len(transcripts), 3)
for transcript, transcript_data in zip(transcripts, [self.transcript_data2, self.transcript_data1]):
transcript_data['url'] = transcript_data.pop('name')
self.assertEqual(transcript, transcript_data)
......@@ -1814,9 +1844,11 @@ class TranscriptTest(TestCase):
transcript_url = api.create_or_update_video_transcript(
video_id=transcript_data['video_id'],
language_code=transcript_data['language_code'],
file_name=transcript_data['name'],
file_format=transcript_data['file_format'],
provider=transcript_data['provider'],
metadata=dict(
file_name=transcript_data['name'],
file_format=transcript_data['file_format'],
provider=transcript_data['provider']
)
)
self.assertEqual(transcript_url, transcript_data['name'])
......@@ -1845,32 +1877,49 @@ class TranscriptTest(TestCase):
@data(
{
'file_data': None,
'file_name': 'overwatch.sjson',
'file_format': TranscriptFormat.SJSON,
'language_code': 'da',
'provider': TranscriptProviderType.CIELO24
},
{
'file_data': ContentFile(FILE_DATA),
'file_name': None,
'file_format': TranscriptFormat.SRT,
'language_code': 'es',
'provider': TranscriptProviderType.THREE_PLAY_MEDIA
},
)
@unpack
def test_create_or_update_video_transcript(self, file_data, file_format, provider):
def test_create_or_update_video_transcript(self, file_data, file_name, file_format, language_code, provider):
"""
Verify that `create_or_update_video_transcript` api function updates existing transcript as expected.
"""
video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
self.assertIsNotNone(video_transcript)
file_name = 'overwatch.{}'.format(file_format)
transcript_url = api.create_or_update_video_transcript(
self.video_id, 'ur', file_name, file_format, provider, file_data
video_id=self.video_id,
language_code='ur',
metadata=dict(
provider=provider,
language_code=language_code,
file_name=file_name,
file_format=file_format
),
file_data=file_data
)
video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
# Now, Querying Video Transcript with previous/old language code leads to DoesNotExist
with self.assertRaises(VideoTranscript.DoesNotExist):
VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
# Assert the updates to the transcript object
video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code=language_code)
self.assertEqual(transcript_url, video_transcript.url())
self.assertEqual(video_transcript.file_format, file_format)
self.assertEqual(video_transcript.provider, provider)
self.assertEqual(video_transcript.language_code, language_code)
if file_data:
self.assertTrue(transcript_url.startswith(settings.VIDEO_TRANSCRIPTS_SETTINGS['DIRECTORY_PREFIX']))
......@@ -1900,7 +1949,10 @@ class TranscriptTest(TestCase):
Verify that `create_or_update_video_transcript` api function raise exceptions on invalid values.
"""
with self.assertRaises(exception) as transcript_exception:
api.create_or_update_video_transcript(self.video_id, 'ur', 'overwatch.srt', file_format, provider)
api.create_or_update_video_transcript(self.video_id, 'ur', metadata={
'provider': provider,
'file_format': file_format
})
self.assertEqual(transcript_exception.exception.message, exception_message)
......@@ -1914,12 +1966,10 @@ class TranscriptTest(TestCase):
# This will replace the transcript for an existing video and delete the existing transcript
new_transcript_url = api.create_or_update_video_transcript(
self.video_id,
'ur',
'overwatch.srt',
TranscriptFormat.SRT,
TranscriptProviderType.CIELO24,
ContentFile(FILE_DATA)
video_id=self.video_id,
language_code='ur',
metadata=dict(provider=TranscriptProviderType.CIELO24),
file_data=ContentFile(FILE_DATA)
)
# Verify that new transcript is set to video
......@@ -1948,7 +1998,21 @@ class TranscriptTest(TestCase):
# `non_existent_video_id` that does not have transcript
video_ids = ['super-soaker', self.video_id, dupe_lang_video_id, 'non_existent_video_id']
transcript_languages = api.get_available_transcript_languages(video_ids=video_ids)
self.assertItemsEqual(transcript_languages, ['de', 'en', 'ur'])
self.assertItemsEqual(transcript_languages, ['de', 'en', 'ur', 'fr'])
def test_delete_video_transcript(self):
"""
Verify that `delete_video_transcript` works as expected.
"""
query_filter = {
'video_id': 'super-soaker',
'language_code': 'fr'
}
self.assertEqual(VideoTranscript.objects.filter(**query_filter).count(), 1)
api.delete_video_transcript(**query_filter)
self.assertFalse(os.path.exists(self.transcript_file))
self.assertEqual(VideoTranscript.objects.filter(**query_filter).count(), 0)
@ddt
......
......@@ -4,9 +4,7 @@ Views file for django app edxval.
import logging
from django.core.exceptions import ValidationError
from django.http import HttpResponse
from django.shortcuts import get_object_or_404
from django.views.decorators.http import last_modified
from rest_framework import generics, status
from rest_framework.authentication import SessionAuthentication
from rest_framework.permissions import DjangoModelPermissions
......@@ -14,12 +12,16 @@ from rest_framework.response import Response
from rest_framework.views import APIView
from rest_framework_oauth.authentication import OAuth2Authentication
from edxval.api import (create_or_update_video_transcript,
get_video_transcript, update_video_status)
from edxval.models import (CourseVideo, Profile, TranscriptFormat,
TranscriptProviderType, Video, VideoImage,
VideoTranscript)
from edxval.serializers import TranscriptSerializer, VideoSerializer
from edxval.api import create_or_update_video_transcript
from edxval.models import (
CourseVideo,
TranscriptFormat,
TranscriptProviderType,
Video,
VideoImage,
VideoTranscript
)
from edxval.serializers import VideoSerializer
LOGGER = logging.getLogger(__name__) # pylint: disable=C0103
......@@ -148,13 +150,11 @@ class VideoTranscriptView(APIView):
transcript = VideoTranscript.get_or_none(video_id, language_code)
if transcript is None:
create_or_update_video_transcript(
video_id,
language_code,
transcript_name,
file_format,
provider,
)
create_or_update_video_transcript(video_id, language_code, metadata={
'provider': provider,
'file_name': transcript_name,
'file_format': file_format
})
response = Response(status=status.HTTP_200_OK)
else:
message = (
......
......@@ -3,4 +3,3 @@ ddt==0.8.0
django-nose==1.4.4
mock==1.0.1
pylint==1.3.0
tox==2.7.0
......@@ -3,10 +3,10 @@ envlist = django{18,110,111}
[testenv]
deps =
-r{toxinidir}/requirements.txt
-r{toxinidir}/test-requirements.txt
django18: Django>=1.8,<1.9
django110: Django>=1.10,<1.11
django111: Django>=1.11,<2
-r{toxinidir}/requirements.txt
-r{toxinidir}/test-requirements.txt
commands =
python manage.py test {posargs}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment