Unverified Commit 04dd6fd9 by Mushtaq Ali Committed by GitHub

Merge pull request #130 from edx/mushtaq/import-transcripts

Import video transcripts
parents 6469fc26 b8a64a57
...@@ -8,6 +8,8 @@ from enum import Enum ...@@ -8,6 +8,8 @@ from enum import Enum
from uuid import uuid4 from uuid import uuid4
from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.core.exceptions import ObjectDoesNotExist, ValidationError
from django.core.files import File
from fs.path import combine
from lxml import etree from lxml import etree
from lxml.etree import Element, SubElement from lxml.etree import Element, SubElement
...@@ -20,7 +22,7 @@ from edxval.models import (CourseVideo, EncodedVideo, Profile, ...@@ -20,7 +22,7 @@ from edxval.models import (CourseVideo, EncodedVideo, Profile,
TranscriptProviderType, Video, VideoImage, TranscriptProviderType, Video, VideoImage,
VideoTranscript, ThirdPartyTranscriptCredentialsState) VideoTranscript, ThirdPartyTranscriptCredentialsState)
from edxval.serializers import TranscriptPreferenceSerializer, TranscriptSerializer, VideoSerializer from edxval.serializers import TranscriptPreferenceSerializer, TranscriptSerializer, VideoSerializer
from edxval.utils import THIRD_PARTY_TRANSCRIPTION_PLANS from edxval.utils import THIRD_PARTY_TRANSCRIPTION_PLANS, create_file_in_fs
logger = logging.getLogger(__name__) # pylint: disable=C0103 logger = logging.getLogger(__name__) # pylint: disable=C0103
...@@ -789,7 +791,7 @@ def export_to_xml(video_id, resource_fs, static_dir, course_id=None): ...@@ -789,7 +791,7 @@ def export_to_xml(video_id, resource_fs, static_dir, course_id=None):
video_id (str): Video id of the video to export transcripts. video_id (str): Video id of the video to export transcripts.
course_id (str): The ID of the course with which this video is associated. course_id (str): The ID of the course with which this video is associated.
static_dir (str): The Directory to store transcript file. static_dir (str): The Directory to store transcript file.
resource_fs (OSFS): The file system to store transcripts. resource_fs (OSFS): Export file system.
Returns: Returns:
An lxml video_asset element containing export data An lxml video_asset element containing export data
...@@ -837,22 +839,15 @@ def create_trancript_file(video_id, language_code, file_format, resource_fs, sta ...@@ -837,22 +839,15 @@ def create_trancript_file(video_id, language_code, file_format, resource_fs, sta
static_dir (str): The Directory to store transcript file. static_dir (str): The Directory to store transcript file.
resource_fs (OSFS): The file system to store transcripts. resource_fs (OSFS): The file system to store transcripts.
""" """
transcript_name = u'{static_dir}/{video_id}-{language_code}.{file_format}'.format( transcript_name = u'{video_id}-{language_code}.{file_format}'.format(
static_dir=static_dir,
video_id=video_id, video_id=video_id,
language_code=language_code, language_code=language_code,
file_format=file_format file_format=file_format
) )
try: transcript_data = get_video_transcript_data(video_id, language_code)
transcript_data = get_video_transcript_data(video_id, language_code) if transcript_data:
if transcript_data: transcript_content = transcript_data['content']
transcript_content = transcript_data['content'] create_file_in_fs(transcript_content, transcript_name, resource_fs, static_dir)
with resource_fs.open(transcript_name, 'wb') as f:
f.write(transcript_content)
except Exception:
# Do not raise exception in case no transcript file is found for now.
# TODO: Remove this - EDUCATOR-2173
pass
def create_transcripts_xml(video_id, video_el, resource_fs, static_dir): def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
...@@ -886,7 +881,6 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir): ...@@ -886,7 +881,6 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
transcripts_el, transcripts_el,
'transcript', 'transcript',
{ {
'file_name': video_transcript.transcript.name,
'language_code': language_code, 'language_code': language_code,
'file_format': file_format, 'file_format': file_format,
'provider': video_transcript.provider, 'provider': video_transcript.provider,
...@@ -897,7 +891,7 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir): ...@@ -897,7 +891,7 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
return video_el return video_el
def import_from_xml(xml, edx_video_id, course_id=None): def import_from_xml(xml, edx_video_id, resource_fs, static_dir, course_id=None):
""" """
Imports data from a video_asset element about the given video_id. Imports data from a video_asset element about the given video_id.
...@@ -907,6 +901,8 @@ def import_from_xml(xml, edx_video_id, course_id=None): ...@@ -907,6 +901,8 @@ def import_from_xml(xml, edx_video_id, course_id=None):
Arguments: Arguments:
xml (Element): An lxml video_asset element containing import data xml (Element): An lxml video_asset element containing import data
edx_video_id (str): val video id edx_video_id (str): val video id
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
course_id (str): The ID of a course to associate the video with course_id (str): The ID of a course to associate the video with
Raises: Raises:
...@@ -915,7 +911,7 @@ def import_from_xml(xml, edx_video_id, course_id=None): ...@@ -915,7 +911,7 @@ def import_from_xml(xml, edx_video_id, course_id=None):
if xml.tag != 'video_asset': if xml.tag != 'video_asset':
raise ValCannotCreateError('Invalid XML') raise ValCannotCreateError('Invalid XML')
# TODO this will be moved as a part of EDUCATOR-2173 # TODO this will be moved as a part of EDUCATOR-2403
if not edx_video_id: if not edx_video_id:
return return
...@@ -968,26 +964,43 @@ def import_from_xml(xml, edx_video_id, course_id=None): ...@@ -968,26 +964,43 @@ def import_from_xml(xml, edx_video_id, course_id=None):
'bitrate': encoded_video_el.get('bitrate'), 'bitrate': encoded_video_el.get('bitrate'),
}) })
create_video(data) create_video(data)
create_transcript_objects(xml) create_transcript_objects(xml, edx_video_id, resource_fs, static_dir)
def create_transcript_objects(xml): def create_transcript_objects(xml, edx_video_id, resource_fs, static_dir):
""" """
Create VideoTranscript objects. Create VideoTranscript objects.
Arguments: Arguments:
xml (Element): lxml Element object xml (Element): lxml Element object.
edx_video_id (str): Video id of the video.
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
""" """
for transcript in xml.findall('.//transcripts/transcript'): for transcript in xml.findall('.//transcripts/transcript'):
try: try:
create_or_update_video_transcript( file_format = transcript.attrib['file_format']
transcript.attrib['video_id'], language_code = transcript.attrib['language_code']
transcript.attrib['language_code'], transcript_data = get_video_transcript_data(edx_video_id, language_code)
metadata=dict(
provider=transcript.attrib['provider'], # First check if transcript record does not exist.
file_name=transcript.attrib['file_name'], if not transcript_data:
file_format=transcript.attrib['file_format'], transcript_file_name = u'{edx_video_id}-{language_code}.{file_format}'.format(
edx_video_id=edx_video_id,
language_code=language_code,
file_format=file_format
)
# Read file from import file system and attach File to transcript record in DS.
file_data = File(resource_fs.open(combine(static_dir, transcript_file_name)))
# Create transcript record.
create_video_transcript(
video_id=edx_video_id,
language_code=language_code,
file_format=file_format,
content=file_data,
provider=transcript.attrib['provider']
) )
)
except KeyError: except KeyError:
logger.warn("VAL: Required attributes are missing from xml, xml=[%s]", etree.tostring(transcript).strip()) logger.warn("VAL: Required attributes are missing from xml, xml=[%s]", etree.tostring(transcript).strip())
...@@ -12,6 +12,9 @@ from edxval.models import ( ...@@ -12,6 +12,9 @@ from edxval.models import (
) )
EDX_VIDEO_ID = "itchyjacket" EDX_VIDEO_ID = "itchyjacket"
EXPORT_IMPORT_STATIC_DIR = u'static'
""" """
Generic Profiles for manually creating profile objects Generic Profiles for manually creating profile objects
""" """
...@@ -363,12 +366,34 @@ VIDEO_DICT_UPDATE_ANIMAL = dict( ...@@ -363,12 +366,34 @@ VIDEO_DICT_UPDATE_ANIMAL = dict(
encoded_videos=[], encoded_videos=[],
) )
TRANSCRIPT_DATA = {
"overwatch": """
1
00:00:14,370 --> 00:00:16,530
I am overwatch.
2
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.""",
"flash": """
1
00:00:07,180 --> 00:00:08,460
This is Flash line 1.""",
"wow": {
"start": [10],
"end": [100],
"text": ["Hi, welcome to edxval."],
}
}
VIDEO_TRANSCRIPT_CIELO24 = dict( VIDEO_TRANSCRIPT_CIELO24 = dict(
video_id='super-soaker', video_id='super-soaker',
language_code='en', language_code='en',
transcript='edxval/tests/data/The_Flash.srt', transcript='edxval/tests/data/The_Flash.srt',
provider=TranscriptProviderType.CIELO24, provider=TranscriptProviderType.CIELO24,
file_format=TranscriptFormat.SRT, file_format=TranscriptFormat.SRT,
file_data=TRANSCRIPT_DATA['flash']
) )
VIDEO_TRANSCRIPT_3PLAY = dict( VIDEO_TRANSCRIPT_3PLAY = dict(
...@@ -377,6 +402,7 @@ VIDEO_TRANSCRIPT_3PLAY = dict( ...@@ -377,6 +402,7 @@ VIDEO_TRANSCRIPT_3PLAY = dict(
transcript='edxval/tests/data/wow.sjson', transcript='edxval/tests/data/wow.sjson',
provider=TranscriptProviderType.THREE_PLAY_MEDIA, provider=TranscriptProviderType.THREE_PLAY_MEDIA,
file_format=TranscriptFormat.SJSON, file_format=TranscriptFormat.SJSON,
file_data=TRANSCRIPT_DATA['wow']
) )
TRANSCRIPT_PREFERENCES_CIELO24 = dict( TRANSCRIPT_PREFERENCES_CIELO24 = dict(
......
{ {
"start": [10], "start": [10],
"end": [100], "end": [100],
"text": ["Hi, welcome to edxval."], "text": ["Hi, welcome to edxval."]
} }
\ No newline at end of file
...@@ -38,18 +38,11 @@ from edxval.serializers import VideoSerializer ...@@ -38,18 +38,11 @@ from edxval.serializers import VideoSerializer
from edxval.tests import APIAuthTestCase, constants from edxval.tests import APIAuthTestCase, constants
STATIC_DIR = u'static' def omit_attrs(dict, attrs_to_omit=[]):
STATIC_PATH = u'/static' """
Omits provided attributes from the dict.
FILE_DATA = """ """
1 return {attr: value for attr, value in dict.iteritems() if attr not in attrs_to_omit}
00:00:14,370 --> 00:00:16,530
I am overwatch.
2
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.
"""
class SortedVideoTestMixin(object): class SortedVideoTestMixin(object):
...@@ -938,15 +931,15 @@ class ExportTest(TestCase): ...@@ -938,15 +931,15 @@ class ExportTest(TestCase):
# create internal video transcripts # create internal video transcripts
transcript_data = dict(constants.VIDEO_TRANSCRIPT_CIELO24, video=video) transcript_data = dict(constants.VIDEO_TRANSCRIPT_CIELO24, video=video)
transcript_data.pop('video_id') transcript_data = omit_attrs(transcript_data, ['video_id', 'file_data'])
VideoTranscript.objects.create(**transcript_data) VideoTranscript.objects.create(**transcript_data)
transcript_data = dict(constants.VIDEO_TRANSCRIPT_3PLAY, video=video) transcript_data = dict(constants.VIDEO_TRANSCRIPT_3PLAY, video=video)
transcript_data.pop('video_id') transcript_data = omit_attrs(transcript_data, ['video_id', 'file_data'])
VideoTranscript.objects.create(**transcript_data) VideoTranscript.objects.create(**transcript_data)
self.temp_dir = mkdtemp() self.temp_dir = mkdtemp()
self.file_system = OSFS(self.temp_dir) self.file_system = OSFS(self.temp_dir)
self.file_system.makedir(STATIC_DIR, recreate=True) self.file_system.makedir(constants.EXPORT_IMPORT_STATIC_DIR, recreate=True)
self.addCleanup(shutil.rmtree, self.temp_dir) self.addCleanup(shutil.rmtree, self.temp_dir)
def assert_xml_equal(self, left, right): def assert_xml_equal(self, left, right):
...@@ -981,7 +974,7 @@ class ExportTest(TestCase): ...@@ -981,7 +974,7 @@ class ExportTest(TestCase):
<video_asset client_video_id="TWINKLE TWINKLE" duration="122.0" image=""/> <video_asset client_video_id="TWINKLE TWINKLE" duration="122.0" image=""/>
""") """)
self.assert_xml_equal( self.assert_xml_equal(
api.export_to_xml(constants.VIDEO_DICT_STAR['edx_video_id'], self.file_system, STATIC_DIR), api.export_to_xml(constants.VIDEO_DICT_STAR['edx_video_id'], self.file_system, constants.EXPORT_IMPORT_STATIC_DIR),
expected expected
) )
...@@ -993,7 +986,11 @@ class ExportTest(TestCase): ...@@ -993,7 +986,11 @@ class ExportTest(TestCase):
<video_asset client_video_id="TWINKLE TWINKLE" duration="122.0" image=""/> <video_asset client_video_id="TWINKLE TWINKLE" duration="122.0" image=""/>
""") """)
exported_xml = api.export_to_xml(constants.VIDEO_DICT_STAR['edx_video_id'], self.file_system, STATIC_DIR) exported_xml = api.export_to_xml(
constants.VIDEO_DICT_STAR['edx_video_id'],
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR
)
self.assert_xml_equal(exported_xml, expected) self.assert_xml_equal(exported_xml, expected)
# Verify that no transcript is present in the XML. # Verify that no transcript is present in the XML.
...@@ -1014,14 +1011,19 @@ class ExportTest(TestCase): ...@@ -1014,14 +1011,19 @@ class ExportTest(TestCase):
<encoded_video url="http://www.meowmagic.com" file_size="33" bitrate="44" profile="desktop"/> <encoded_video url="http://www.meowmagic.com" file_size="33" bitrate="44" profile="desktop"/>
<encoded_video url="https://www.tmnt.com/tmnt101.m3u8" file_size="100" bitrate="0" profile="hls"/> <encoded_video url="https://www.tmnt.com/tmnt101.m3u8" file_size="100" bitrate="0" profile="hls"/>
<transcripts> <transcripts>
<transcript file_format="sjson" file_name="edxval/tests/data/wow.sjson" language_code="de" provider="3PlayMedia" /> <transcript file_format="sjson" language_code="de" provider="3PlayMedia" />
<transcript file_format="srt" file_name="edxval/tests/data/The_Flash.srt" language_code="en" provider="Cielo24" /> <transcript file_format="srt" language_code="en" provider="Cielo24" />
</transcripts> </transcripts>
</video_asset> </video_asset>
""".format(image=image)) """.format(image=image))
self.assert_xml_equal( self.assert_xml_equal(
api.export_to_xml(constants.VIDEO_DICT_FISH['edx_video_id'], self.file_system, STATIC_DIR, course_id), api.export_to_xml(
constants.VIDEO_DICT_FISH['edx_video_id'],
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR,
course_id
),
expected expected
) )
...@@ -1032,7 +1034,7 @@ class ExportTest(TestCase): ...@@ -1032,7 +1034,7 @@ class ExportTest(TestCase):
language_code = 'en' language_code = 'en'
video_id = constants.VIDEO_DICT_FISH['edx_video_id'] video_id = constants.VIDEO_DICT_FISH['edx_video_id']
transcript_files = {'de': u'super-soaker-de.sjson', 'en': u'super-soaker-en.srt'} transcript_files = {'de': u'super-soaker-de.sjson', 'en': u'super-soaker-en.srt'}
expected_transcript_path = combine(self.temp_dir, STATIC_PATH) expected_transcript_path = combine(self.temp_dir, constants.EXPORT_IMPORT_STATIC_DIR)
expected_xml = self.parse_xml(""" expected_xml = self.parse_xml("""
<video_asset client_video_id="Shallow Swordfish" duration="122.0" image="image.jpg"> <video_asset client_video_id="Shallow Swordfish" duration="122.0" image="image.jpg">
...@@ -1040,19 +1042,19 @@ class ExportTest(TestCase): ...@@ -1040,19 +1042,19 @@ class ExportTest(TestCase):
<encoded_video url="http://www.meowmagic.com" file_size="33" bitrate="44" profile="desktop"/> <encoded_video url="http://www.meowmagic.com" file_size="33" bitrate="44" profile="desktop"/>
<encoded_video url="https://www.tmnt.com/tmnt101.m3u8" file_size="100" bitrate="0" profile="hls"/> <encoded_video url="https://www.tmnt.com/tmnt101.m3u8" file_size="100" bitrate="0" profile="hls"/>
<transcripts> <transcripts>
<transcript file_format="sjson" file_name="edxval/tests/data/wow.sjson" language_code="de" provider="3PlayMedia" /> <transcript file_format="sjson" language_code="de" provider="3PlayMedia" />
<transcript file_format="srt" file_name="edxval/tests/data/The_Flash.srt" language_code="en" provider="Cielo24" /> <transcript file_format="srt" language_code="en" provider="Cielo24" />
</transcripts> </transcripts>
</video_asset> </video_asset>
""") """)
exported_xml = api.export_to_xml(video_id, self.file_system, STATIC_DIR, 'test-course') exported_xml = api.export_to_xml(video_id, self.file_system, constants.EXPORT_IMPORT_STATIC_DIR, 'test-course')
# Assert video and transcript xml is exported correctly. # Assert video and transcript xml is exported correctly.
self.assert_xml_equal(exported_xml, expected_xml) self.assert_xml_equal(exported_xml, expected_xml)
# Verify transcript file is created. # Verify transcript file is created.
self.assertItemsEqual(transcript_files.values(), self.file_system.listdir(STATIC_PATH)) self.assertItemsEqual(transcript_files.values(), self.file_system.listdir(constants.EXPORT_IMPORT_STATIC_DIR))
# Also verify the content of created transcript file. # Also verify the content of created transcript file.
for language_code in transcript_files.keys(): for language_code in transcript_files.keys():
...@@ -1068,12 +1070,15 @@ class ExportTest(TestCase): ...@@ -1068,12 +1070,15 @@ class ExportTest(TestCase):
Test export with invalid video id. Test export with invalid video id.
""" """
with self.assertRaises(ValVideoNotFoundError): with self.assertRaises(ValVideoNotFoundError):
api.export_to_xml('unknown_video', self.file_system, STATIC_DIR) api.export_to_xml('unknown_video', self.file_system, constants.EXPORT_IMPORT_STATIC_DIR)
@ddt @ddt
class ImportTest(TestCase): class ImportTest(TestCase):
"""Tests import_from_xml""" """
Tests import_from_xml
"""
def setUp(self): def setUp(self):
self.image_name = 'image.jpg' self.image_name = 'image.jpg'
mobile_profile = Profile.objects.create(profile_name=constants.PROFILE_MOBILE) mobile_profile = Profile.objects.create(profile_name=constants.PROFILE_MOBILE)
...@@ -1090,6 +1095,12 @@ class ImportTest(TestCase): ...@@ -1090,6 +1095,12 @@ class ImportTest(TestCase):
self.transcript_data2 = dict(constants.VIDEO_TRANSCRIPT_3PLAY, video_id='little-star') self.transcript_data2 = dict(constants.VIDEO_TRANSCRIPT_3PLAY, video_id='little-star')
self.transcript_data3 = dict(self.transcript_data2, video_id='super-soaker') self.transcript_data3 = dict(self.transcript_data2, video_id='super-soaker')
self.temp_dir = mkdtemp()
self.file_system = OSFS(self.temp_dir)
self.file_system.makedir(constants.EXPORT_IMPORT_STATIC_DIR, recreate=True)
self.addCleanup(shutil.rmtree, self.temp_dir)
def make_import_xml(self, video_dict, encoded_video_dicts=None, image=None, video_transcripts=None): def make_import_xml(self, video_dict, encoded_video_dicts=None, image=None, video_transcripts=None):
import_xml = etree.Element( import_xml = etree.Element(
"video_asset", "video_asset",
...@@ -1115,18 +1126,32 @@ class ImportTest(TestCase): ...@@ -1115,18 +1126,32 @@ class ImportTest(TestCase):
if video_transcripts: if video_transcripts:
transcripts_el = etree.SubElement(import_xml, 'transcripts') transcripts_el = etree.SubElement(import_xml, 'transcripts')
for video_transcript in video_transcripts: for video_transcript in video_transcripts:
file_format = video_transcript['file_format']
language_code = video_transcript['language_code']
etree.SubElement( etree.SubElement(
transcripts_el, transcripts_el,
'transcript', 'transcript',
{ {
'video_id': video_transcript['video_id'], 'language_code': language_code,
'file_name': video_transcript['transcript'], 'file_format': file_format,
'language_code': video_transcript['language_code'],
'file_format': video_transcript['file_format'],
'provider': video_transcript['provider'], 'provider': video_transcript['provider'],
} }
) )
# Create transcript files
transcript_file_name = u'{edx_video_id}-{language_code}.{file_format}'.format(
edx_video_id=video_dict['edx_video_id'],
language_code=language_code,
file_format=file_format
)
utils.create_file_in_fs(
json.dumps(video_transcript['file_data']),
transcript_file_name,
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR
)
return import_xml return import_xml
def assert_obj_matches_dict_for_keys(self, obj, dict_, keys): def assert_obj_matches_dict_for_keys(self, obj, dict_, keys):
...@@ -1150,29 +1175,36 @@ class ImportTest(TestCase): ...@@ -1150,29 +1175,36 @@ class ImportTest(TestCase):
def assert_invalid_import(self, xml, course_id=None): def assert_invalid_import(self, xml, course_id=None):
edx_video_id = "test_edx_video_id" edx_video_id = "test_edx_video_id"
with self.assertRaises(ValCannotCreateError): with self.assertRaises(ValCannotCreateError):
api.import_from_xml(xml, edx_video_id, course_id) api.import_from_xml(xml, edx_video_id, self.file_system, constants.EXPORT_IMPORT_STATIC_DIR, course_id)
self.assertFalse(Video.objects.filter(edx_video_id=edx_video_id).exists()) self.assertFalse(Video.objects.filter(edx_video_id=edx_video_id).exists())
def assert_transcripts(self, video_id, expected_transcripts): def assert_transcripts(self, video_id, expected_transcripts):
""" """
Compare `received` with `expected` and assert if not equal Compare `received` with `expected` and assert if not equal.
""" """
# Verify total number of expected transcripts for a video # Verify total number of expected transcripts for a video.
video_transcripts = VideoTranscript.objects.filter(video__edx_video_id=video_id) video_transcripts = VideoTranscript.objects.filter(video__edx_video_id=video_id)
self.assertEqual(video_transcripts.count(), len(expected_transcripts)) self.assertEqual(video_transcripts.count(), len(expected_transcripts))
# Verify data for each transcript # Verify data for each transcript.
for expected_transcript in expected_transcripts: for expected_transcript in expected_transcripts:
language_code = expected_transcript['language_code'] language_code = expected_transcript['language_code']
expected_transcript['name'] = expected_transcript.pop('transcript')
# get the imported transcript and rename `url` key # Get the imported transcript and rename `url` key.
received = api.TranscriptSerializer( received_transcript = api.TranscriptSerializer(
VideoTranscript.objects.get(video__edx_video_id=video_id, language_code=language_code) VideoTranscript.objects.get(video__edx_video_id=video_id, language_code=language_code)
).data ).data
received['name'] = received.pop('url')
self.assertDictEqual(received, expected_transcript) # Assert transcript content
received_transcript['file_data'] = json.loads(
api.get_video_transcript_data(video_id, language_code)['content']
)
# Omit not needed attrs.
expected_transcript = omit_attrs(expected_transcript, ['transcript'])
received_transcript = omit_attrs(received_transcript, ['url'])
self.assertDictEqual(received_transcript, expected_transcript)
def test_new_video_full(self): def test_new_video_full(self):
new_course_id = 'new_course_id' new_course_id = 'new_course_id'
...@@ -1184,11 +1216,16 @@ class ImportTest(TestCase): ...@@ -1184,11 +1216,16 @@ class ImportTest(TestCase):
video_transcripts=[self.transcript_data1, self.transcript_data2] video_transcripts=[self.transcript_data1, self.transcript_data2]
) )
# there must not be any transcript before import # There must not be any transcript before import.
with self.assertRaises(VideoTranscript.DoesNotExist): self.assert_transcripts(constants.VIDEO_DICT_STAR['edx_video_id'], [])
VideoTranscript.objects.get(video__edx_video_id=constants.VIDEO_DICT_STAR['edx_video_id'])
api.import_from_xml(xml, constants.VIDEO_DICT_STAR['edx_video_id'], new_course_id) api.import_from_xml(
xml,
constants.VIDEO_DICT_STAR['edx_video_id'],
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR,
new_course_id
)
video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_STAR['edx_video_id']) video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_STAR['edx_video_id'])
self.assert_video_matches_dict(video, constants.VIDEO_DICT_STAR) self.assert_video_matches_dict(video, constants.VIDEO_DICT_STAR)
...@@ -1217,7 +1254,7 @@ class ImportTest(TestCase): ...@@ -1217,7 +1254,7 @@ class ImportTest(TestCase):
"duration": "0", "duration": "0",
} }
) )
api.import_from_xml(xml, edx_video_id) api.import_from_xml(xml, edx_video_id, self.file_system, constants.EXPORT_IMPORT_STATIC_DIR)
video = Video.objects.get(edx_video_id=edx_video_id) video = Video.objects.get(edx_video_id=edx_video_id)
self.assertFalse(video.encoded_videos.all().exists()) self.assertFalse(video.encoded_videos.all().exists())
...@@ -1234,29 +1271,35 @@ class ImportTest(TestCase): ...@@ -1234,29 +1271,35 @@ class ImportTest(TestCase):
transcript_data = dict(self.transcript_data3, language_code=language_code) transcript_data = dict(self.transcript_data3, language_code=language_code)
xml = self.make_import_xml( xml = self.make_import_xml(
video_dict={ video_dict={
"client_video_id": "new_client_video_id", 'edx_video_id': 'new_video_id',
"duration": 0, 'client_video_id': 'new_client_video_id',
'duration': 0,
}, },
encoded_video_dicts=[ encoded_video_dicts=[
constants.ENCODED_VIDEO_DICT_FISH_DESKTOP, constants.ENCODED_VIDEO_DICT_FISH_DESKTOP,
{ {
"url": "http://example.com/new_url", 'url': 'http://example.com/new_url',
"file_size": 2733256, 'file_size': 2733256,
"bitrate": 1597804, 'bitrate': 1597804,
"profile": "mobile", 'profile': 'mobile',
}, },
], ],
image=self.image_name, image=self.image_name,
video_transcripts=[transcript_data] video_transcripts=[transcript_data]
) )
# there must not be any transcript before import # There must not be any transcript before import.
with self.assertRaises(VideoTranscript.DoesNotExist): self.assert_transcripts(constants.VIDEO_DICT_FISH['edx_video_id'], [])
VideoTranscript.objects.get(video__edx_video_id=constants.VIDEO_DICT_FISH["edx_video_id"])
api.import_from_xml(xml, constants.VIDEO_DICT_FISH["edx_video_id"], course_id) api.import_from_xml(
xml,
constants.VIDEO_DICT_FISH['edx_video_id'],
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR,
course_id
)
video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_FISH["edx_video_id"]) video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_FISH['edx_video_id'])
self.assert_video_matches_dict(video, constants.VIDEO_DICT_FISH) self.assert_video_matches_dict(video, constants.VIDEO_DICT_FISH)
self.assert_encoded_video_matches_dict( self.assert_encoded_video_matches_dict(
video.encoded_videos.get(profile__profile_name=constants.PROFILE_MOBILE), video.encoded_videos.get(profile__profile_name=constants.PROFILE_MOBILE),
...@@ -1270,14 +1313,20 @@ class ImportTest(TestCase): ...@@ -1270,14 +1313,20 @@ class ImportTest(TestCase):
self.assertTrue(course_video.video_image.image.name, self.image_name) self.assertTrue(course_video.video_image.image.name, self.image_name)
self.assert_transcripts( self.assert_transcripts(
constants.VIDEO_DICT_FISH["edx_video_id"], constants.VIDEO_DICT_FISH['edx_video_id'],
[] []
) )
def test_existing_video_with_invalid_course_id(self): def test_existing_video_with_invalid_course_id(self):
xml = self.make_import_xml(video_dict=constants.VIDEO_DICT_FISH) xml = self.make_import_xml(video_dict=constants.VIDEO_DICT_FISH)
with self.assertRaises(ValCannotCreateError): with self.assertRaises(ValCannotCreateError):
api.import_from_xml(xml, edx_video_id=constants.VIDEO_DICT_FISH["edx_video_id"], course_id="x" * 300) api.import_from_xml(
xml,
constants.VIDEO_DICT_FISH['edx_video_id'],
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR,
course_id='x' * 300
)
def test_unknown_profile(self): def test_unknown_profile(self):
profile = "unknown_profile" profile = "unknown_profile"
...@@ -1293,9 +1342,14 @@ class ImportTest(TestCase): ...@@ -1293,9 +1342,14 @@ class ImportTest(TestCase):
} }
] ]
) )
api.import_from_xml(xml, constants.VIDEO_DICT_STAR["edx_video_id"]) api.import_from_xml(
xml,
constants.VIDEO_DICT_STAR['edx_video_id'],
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR
)
video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_STAR["edx_video_id"]) video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_STAR['edx_video_id'])
self.assertFalse(video.encoded_videos.filter(profile__profile_name=profile).exists()) self.assertFalse(video.encoded_videos.filter(profile__profile_name=profile).exists())
def test_invalid_tag(self): def test_invalid_tag(self):
...@@ -1333,37 +1387,151 @@ class ImportTest(TestCase): ...@@ -1333,37 +1387,151 @@ class ImportTest(TestCase):
xml = self.make_import_xml(video_dict=constants.VIDEO_DICT_FISH) xml = self.make_import_xml(video_dict=constants.VIDEO_DICT_FISH)
self.assert_invalid_import(xml, "x" * 300) self.assert_invalid_import(xml, "x" * 300)
# FIXME: EDUCATOR-2403
def test_external_no_video_transcript(self): def test_external_no_video_transcript(self):
""" """
Verify that transcript import for external video working as expected when there is no transcript. Verify that transcript import for external video working as expected when there is no transcript.
""" """
api.import_from_xml(etree.fromstring('<video_asset/>'), '') api.import_from_xml(
etree.fromstring('<video_asset/>'),
'',
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR
)
self.assertEqual( self.assertEqual(
VideoTranscript.objects.count(), VideoTranscript.objects.count(),
0 0
) )
def test_import_transcript_attached_existing_video(self):
"""
Verify that transcript import for existing video with transcript attached is working as expected.
"""
expected_video_transcripts = [self.transcript_data3]
import_xml = self.make_import_xml(
video_dict=constants.VIDEO_DICT_FISH,
video_transcripts=expected_video_transcripts
)
# Verify video is present before.
video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_FISH['edx_video_id'])
self.assertIsNotNone(video)
# Create internal video transcripts
transcript_data = dict(constants.VIDEO_TRANSCRIPT_3PLAY, video=video)
transcript_data = omit_attrs(transcript_data, ['video_id', 'file_data'])
transcript = VideoTranscript.objects.create(**transcript_data)
# Verify that video has expected transcripts before import.
self.assert_transcripts(
constants.VIDEO_DICT_FISH['edx_video_id'],
expected_video_transcripts
)
api.import_from_xml(
import_xml,
constants.VIDEO_DICT_FISH['edx_video_id'],
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR,
'test_course_id'
)
# Verify that video has expected transcripts after import.
self.assert_transcripts(
constants.VIDEO_DICT_FISH['edx_video_id'],
expected_video_transcripts
)
def test_import_no_transcript_attached_existing_video(self):
"""
Verify that transcript import for existing video with no transcript attached is working as expected.
"""
exported_video_transcripts = [self.transcript_data1, self.transcript_data2]
# Verify video is present before.
video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_FISH['edx_video_id'])
self.assertIsNotNone(video)
import_xml = self.make_import_xml(
video_dict=constants.VIDEO_DICT_FISH,
video_transcripts=exported_video_transcripts
)
# There must not be any transcript before import.
self.assert_transcripts(constants.VIDEO_DICT_FISH['edx_video_id'], [])
api.import_from_xml(
import_xml,
constants.VIDEO_DICT_FISH['edx_video_id'],
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR,
'test_course_id'
)
# Verify that no transcript record is created.
self.assert_transcripts(constants.VIDEO_DICT_FISH['edx_video_id'], [])
def test_import_transcript_new_video(self):
"""
Verify that transcript import for new video is working as expected when transcript is present in XML.
"""
expected_video_transcripts = [self.transcript_data1, self.transcript_data2]
import_xml = self.make_import_xml(
video_dict=constants.VIDEO_DICT_STAR,
video_transcripts=expected_video_transcripts
)
# Verify video is not present before.
with self.assertRaises(Video.DoesNotExist):
Video.objects.get(edx_video_id=constants.VIDEO_DICT_STAR['edx_video_id'])
# There must not be any transcript before import.
self.assert_transcripts(constants.VIDEO_DICT_STAR['edx_video_id'], [])
api.import_from_xml(
import_xml,
constants.VIDEO_DICT_STAR['edx_video_id'],
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR,
'test_course_id'
)
# Verify that transcript record is created with correct data.
self.assert_transcripts(
constants.VIDEO_DICT_STAR['edx_video_id'],
expected_video_transcripts
)
@patch('edxval.api.logger') @patch('edxval.api.logger')
def test_video_transcript_missing_attribute(self, mock_logger): def test_video_transcript_missing_attribute(self, mock_logger):
""" """
Verify that video transcript import working as expected if transcript xml data is missing. Verify that video transcript import working as expected if transcript xml data is missing.
""" """
video_id = 'super-soaker' video_id = 'super-soaker'
transcript_xml = '<transcript file_name="wow.srt" language_code="en" file_format="srt" provider="Cielo24"/>' transcript_xml = '<transcript file_format="srt" provider="Cielo24"/>'
xml = etree.fromstring(""" xml = etree.fromstring("""
<video_asset> <video_asset>
<transcripts> <transcripts>
{transcript_xml} {transcript_xml}
<transcript file_name="edxval/tests/data/wow.sjson" language_code="de" file_format="sjson" provider='3PlayMedia' video_id="{video_id}"/> <transcript language_code="de" file_format="sjson" provider='3PlayMedia'/>
</transcripts> </transcripts>
</video_asset> </video_asset>
""".format(transcript_xml=transcript_xml, video_id=video_id)) """.format(transcript_xml=transcript_xml))
# there should be no video transcript before import # There should be no video transcript before import
with self.assertRaises(VideoTranscript.DoesNotExist): with self.assertRaises(VideoTranscript.DoesNotExist):
VideoTranscript.objects.get(video__edx_video_id=video_id) VideoTranscript.objects.get(video__edx_video_id=video_id)
api.create_transcript_objects(xml) # Create transcript files
utils.create_file_in_fs(
json.dumps(constants.TRANSCRIPT_DATA['wow']),
u'super-soaker-de.sjson',
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR
)
api.create_transcript_objects(xml, video_id, self.file_system, constants.EXPORT_IMPORT_STATIC_DIR)
mock_logger.warn.assert_called_with( mock_logger.warn.assert_called_with(
"VAL: Required attributes are missing from xml, xml=[%s]", "VAL: Required attributes are missing from xml, xml=[%s]",
...@@ -1718,7 +1886,7 @@ class TranscriptTest(TestCase): ...@@ -1718,7 +1886,7 @@ class TranscriptTest(TestCase):
'provider': TranscriptProviderType.CIELO24, 'provider': TranscriptProviderType.CIELO24,
'file_name': None, 'file_name': None,
'file_format': TranscriptFormat.SRT, 'file_format': TranscriptFormat.SRT,
'file_data': ContentFile(FILE_DATA) 'file_data': ContentFile(constants.TRANSCRIPT_DATA['overwatch'])
} }
] ]
) )
...@@ -1879,7 +2047,7 @@ class TranscriptTest(TestCase): ...@@ -1879,7 +2047,7 @@ class TranscriptTest(TestCase):
'provider': TranscriptProviderType.CIELO24 'provider': TranscriptProviderType.CIELO24
}, },
{ {
'file_data': ContentFile(FILE_DATA), 'file_data': ContentFile(constants.TRANSCRIPT_DATA['overwatch']),
'file_name': None, 'file_name': None,
'file_format': TranscriptFormat.SRT, 'file_format': TranscriptFormat.SRT,
'language_code': 'es', 'language_code': 'es',
...@@ -1922,7 +2090,7 @@ class TranscriptTest(TestCase): ...@@ -1922,7 +2090,7 @@ class TranscriptTest(TestCase):
self.assertTrue(transcript_url.startswith(settings.VIDEO_TRANSCRIPTS_SETTINGS['DIRECTORY_PREFIX'])) self.assertTrue(transcript_url.startswith(settings.VIDEO_TRANSCRIPTS_SETTINGS['DIRECTORY_PREFIX']))
self.assertEqual(video_transcript.transcript.name, transcript_url) self.assertEqual(video_transcript.transcript.name, transcript_url)
with open(video_transcript.transcript.name) as saved_transcript: with open(video_transcript.transcript.name) as saved_transcript:
self.assertEqual(saved_transcript.read(), FILE_DATA) self.assertEqual(saved_transcript.read(), constants.TRANSCRIPT_DATA['overwatch'])
else: else:
self.assertEqual(video_transcript.transcript.name, file_name) self.assertEqual(video_transcript.transcript.name, file_name)
...@@ -1966,7 +2134,7 @@ class TranscriptTest(TestCase): ...@@ -1966,7 +2134,7 @@ class TranscriptTest(TestCase):
language_code=language_code, language_code=language_code,
provider=TranscriptProviderType.THREE_PLAY_MEDIA, provider=TranscriptProviderType.THREE_PLAY_MEDIA,
file_format=TranscriptFormat.SRT, file_format=TranscriptFormat.SRT,
content=ContentFile(FILE_DATA) content=ContentFile(constants.TRANSCRIPT_DATA['overwatch'])
) )
# setup video with the `edx_video_id` above. # setup video with the `edx_video_id` above.
...@@ -1988,7 +2156,7 @@ class TranscriptTest(TestCase): ...@@ -1988,7 +2156,7 @@ class TranscriptTest(TestCase):
self.assertEqual(video_transcript.file_format, transcript_props['file_format']) self.assertEqual(video_transcript.file_format, transcript_props['file_format'])
self.assertEqual(video_transcript.provider, transcript_props['provider']) self.assertEqual(video_transcript.provider, transcript_props['provider'])
with open(video_transcript.transcript.name) as created_transcript: with open(video_transcript.transcript.name) as created_transcript:
self.assertEqual(created_transcript.read(), FILE_DATA) self.assertEqual(created_transcript.read(), constants.TRANSCRIPT_DATA['overwatch'])
@data( @data(
{ {
...@@ -2012,7 +2180,7 @@ class TranscriptTest(TestCase): ...@@ -2012,7 +2180,7 @@ class TranscriptTest(TestCase):
Verify that `create_video_transcript` api function raise exceptions on invalid values. Verify that `create_video_transcript` api function raise exceptions on invalid values.
""" """
with self.assertRaises(ValCannotCreateError) as transcript_exception: with self.assertRaises(ValCannotCreateError) as transcript_exception:
api.create_video_transcript(video_id, language_code, file_format, ContentFile(FILE_DATA), provider) api.create_video_transcript(video_id, language_code, file_format, ContentFile(constants.TRANSCRIPT_DATA['overwatch']), provider)
self.assertIn(exception_msg, unicode(transcript_exception.exception.message)) self.assertIn(exception_msg, unicode(transcript_exception.exception.message))
...@@ -2030,7 +2198,7 @@ class TranscriptTest(TestCase): ...@@ -2030,7 +2198,7 @@ class TranscriptTest(TestCase):
video_id=edx_video_id, video_id=edx_video_id,
language_code='en', language_code='en',
metadata=dict(provider=TranscriptProviderType.CIELO24), metadata=dict(provider=TranscriptProviderType.CIELO24),
file_data=ContentFile(FILE_DATA) file_data=ContentFile(constants.TRANSCRIPT_DATA['overwatch'])
) )
# Verify that new transcript is set to video # Verify that new transcript is set to video
...@@ -2039,7 +2207,7 @@ class TranscriptTest(TestCase): ...@@ -2039,7 +2207,7 @@ class TranscriptTest(TestCase):
# verify that new data is written correctly # verify that new data is written correctly
with open(video_transcript.transcript.name) as saved_transcript: with open(video_transcript.transcript.name) as saved_transcript:
self.assertEqual(saved_transcript.read(), FILE_DATA) self.assertEqual(saved_transcript.read(), constants.TRANSCRIPT_DATA['overwatch'])
# Verify that an exception is raised if we try to open a deleted transcript file # Verify that an exception is raised if we try to open a deleted transcript file
with self.assertRaises(IOError) as file_open_exception: with self.assertRaises(IOError) as file_open_exception:
...@@ -2088,22 +2256,22 @@ class TranscriptTest(TestCase): ...@@ -2088,22 +2256,22 @@ class TranscriptTest(TestCase):
language_code = 'en' language_code = 'en'
video_id = constants.VIDEO_DICT_FISH['edx_video_id'] video_id = constants.VIDEO_DICT_FISH['edx_video_id']
transcript_file_name = u'super-soaker-en.srt' transcript_file_name = u'super-soaker-en.srt'
expected_transcript_path = combine(self.temp_dir, combine(STATIC_PATH, transcript_file_name)) expected_transcript_path = combine(self.temp_dir, combine(constants.EXPORT_IMPORT_STATIC_DIR, transcript_file_name))
file_system = OSFS(self.temp_dir) file_system = OSFS(self.temp_dir)
file_system.makedir(STATIC_DIR, recreate=True) file_system.makedir(constants.EXPORT_IMPORT_STATIC_DIR, recreate=True)
# Create transcript file now. # Create transcript file now.
api.create_trancript_file( api.create_trancript_file(
video_id=video_id, video_id=video_id,
language_code=language_code, language_code=language_code,
file_format=TranscriptFormat.SRT, file_format=TranscriptFormat.SRT,
static_dir=STATIC_DIR, static_dir=constants.EXPORT_IMPORT_STATIC_DIR,
resource_fs=file_system resource_fs=file_system
) )
# Verify transcript file is created. # Verify transcript file is created.
self.assertTrue(transcript_file_name in file_system.listdir(STATIC_PATH)) self.assertTrue(transcript_file_name in file_system.listdir(constants.EXPORT_IMPORT_STATIC_DIR))
# Also verify the content of created transcript file. # Also verify the content of created transcript file.
expected_transcript_content = File(open(expected_transcript_path)).read() expected_transcript_content = File(open(expected_transcript_path)).read()
...@@ -2120,19 +2288,19 @@ class TranscriptTest(TestCase): ...@@ -2120,19 +2288,19 @@ class TranscriptTest(TestCase):
Tests that no transcript file is created in case of invalid scenario. Tests that no transcript file is created in case of invalid scenario.
""" """
file_system = OSFS(self.temp_dir) file_system = OSFS(self.temp_dir)
file_system.makedir(STATIC_DIR, recreate=True) file_system.makedir(constants.EXPORT_IMPORT_STATIC_DIR, recreate=True)
# Try to create transcript file now. # Try to create transcript file now.
api.create_trancript_file( api.create_trancript_file(
video_id=video_id, video_id=video_id,
language_code=language_code, language_code=language_code,
file_format=TranscriptFormat.SRT, file_format=TranscriptFormat.SRT,
static_dir=STATIC_DIR, static_dir=constants.EXPORT_IMPORT_STATIC_DIR,
resource_fs=file_system resource_fs=file_system
) )
# Verify no file is created. # Verify no file is created.
self.assertEqual(file_system.listdir(STATIC_PATH), []) self.assertEqual(file_system.listdir(constants.EXPORT_IMPORT_STATIC_DIR), [])
@ddt @ddt
......
...@@ -813,6 +813,7 @@ class VideoTranscriptViewTest(APIAuthTestCase): ...@@ -813,6 +813,7 @@ class VideoTranscriptViewTest(APIAuthTestCase):
Tests POSTing transcript successfully. Tests POSTing transcript successfully.
""" """
post_transcript_data = dict(self.transcript_data) post_transcript_data = dict(self.transcript_data)
post_transcript_data.pop('file_data')
post_transcript_data['name'] = post_transcript_data.pop('transcript') post_transcript_data['name'] = post_transcript_data.pop('transcript')
response = self.client.post(self.url, post_transcript_data, format='json') response = self.client.post(self.url, post_transcript_data, format='json')
......
...@@ -4,6 +4,8 @@ Util methods to be used in api and models. ...@@ -4,6 +4,8 @@ Util methods to be used in api and models.
from django.conf import settings from django.conf import settings
from django.core.files.storage import get_storage_class from django.core.files.storage import get_storage_class
from fs.path import combine
# 3rd Party Transcription Plans # 3rd Party Transcription Plans
THIRD_PARTY_TRANSCRIPTION_PLANS = { THIRD_PARTY_TRANSCRIPTION_PLANS = {
...@@ -169,3 +171,17 @@ def get_video_transcript_storage(): ...@@ -169,3 +171,17 @@ def get_video_transcript_storage():
# during edx-platform loading this method gets called but settings are not ready yet # during edx-platform loading this method gets called but settings are not ready yet
# so in that case we will return default(FileSystemStorage) storage class instance # so in that case we will return default(FileSystemStorage) storage class instance
return get_storage_class()() return get_storage_class()()
def create_file_in_fs(file_data, file_name, file_system, static_dir):
"""
Writes file in specific file system.
Arguments:
file_data (str): Data to store into the file.
file_name (str): File name of the file to be created.
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
"""
with file_system.open(combine(static_dir, file_name), 'wb') as f:
f.write(file_data)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment