Commit b8a64a57 by Mushtaq Ali

Import DS video transcripts from course OLX - EDUCATOR-2173

parent 6469fc26
...@@ -8,6 +8,8 @@ from enum import Enum ...@@ -8,6 +8,8 @@ from enum import Enum
from uuid import uuid4 from uuid import uuid4
from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.core.exceptions import ObjectDoesNotExist, ValidationError
from django.core.files import File
from fs.path import combine
from lxml import etree from lxml import etree
from lxml.etree import Element, SubElement from lxml.etree import Element, SubElement
...@@ -20,7 +22,7 @@ from edxval.models import (CourseVideo, EncodedVideo, Profile, ...@@ -20,7 +22,7 @@ from edxval.models import (CourseVideo, EncodedVideo, Profile,
TranscriptProviderType, Video, VideoImage, TranscriptProviderType, Video, VideoImage,
VideoTranscript, ThirdPartyTranscriptCredentialsState) VideoTranscript, ThirdPartyTranscriptCredentialsState)
from edxval.serializers import TranscriptPreferenceSerializer, TranscriptSerializer, VideoSerializer from edxval.serializers import TranscriptPreferenceSerializer, TranscriptSerializer, VideoSerializer
from edxval.utils import THIRD_PARTY_TRANSCRIPTION_PLANS from edxval.utils import THIRD_PARTY_TRANSCRIPTION_PLANS, create_file_in_fs
logger = logging.getLogger(__name__) # pylint: disable=C0103 logger = logging.getLogger(__name__) # pylint: disable=C0103
...@@ -789,7 +791,7 @@ def export_to_xml(video_id, resource_fs, static_dir, course_id=None): ...@@ -789,7 +791,7 @@ def export_to_xml(video_id, resource_fs, static_dir, course_id=None):
video_id (str): Video id of the video to export transcripts. video_id (str): Video id of the video to export transcripts.
course_id (str): The ID of the course with which this video is associated. course_id (str): The ID of the course with which this video is associated.
static_dir (str): The Directory to store transcript file. static_dir (str): The Directory to store transcript file.
resource_fs (OSFS): The file system to store transcripts. resource_fs (OSFS): Export file system.
Returns: Returns:
An lxml video_asset element containing export data An lxml video_asset element containing export data
...@@ -837,22 +839,15 @@ def create_trancript_file(video_id, language_code, file_format, resource_fs, sta ...@@ -837,22 +839,15 @@ def create_trancript_file(video_id, language_code, file_format, resource_fs, sta
static_dir (str): The Directory to store transcript file. static_dir (str): The Directory to store transcript file.
resource_fs (OSFS): The file system to store transcripts. resource_fs (OSFS): The file system to store transcripts.
""" """
transcript_name = u'{static_dir}/{video_id}-{language_code}.{file_format}'.format( transcript_name = u'{video_id}-{language_code}.{file_format}'.format(
static_dir=static_dir,
video_id=video_id, video_id=video_id,
language_code=language_code, language_code=language_code,
file_format=file_format file_format=file_format
) )
try: transcript_data = get_video_transcript_data(video_id, language_code)
transcript_data = get_video_transcript_data(video_id, language_code) if transcript_data:
if transcript_data: transcript_content = transcript_data['content']
transcript_content = transcript_data['content'] create_file_in_fs(transcript_content, transcript_name, resource_fs, static_dir)
with resource_fs.open(transcript_name, 'wb') as f:
f.write(transcript_content)
except Exception:
# Do not raise exception in case no transcript file is found for now.
# TODO: Remove this - EDUCATOR-2173
pass
def create_transcripts_xml(video_id, video_el, resource_fs, static_dir): def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
...@@ -886,7 +881,6 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir): ...@@ -886,7 +881,6 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
transcripts_el, transcripts_el,
'transcript', 'transcript',
{ {
'file_name': video_transcript.transcript.name,
'language_code': language_code, 'language_code': language_code,
'file_format': file_format, 'file_format': file_format,
'provider': video_transcript.provider, 'provider': video_transcript.provider,
...@@ -897,7 +891,7 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir): ...@@ -897,7 +891,7 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
return video_el return video_el
def import_from_xml(xml, edx_video_id, course_id=None): def import_from_xml(xml, edx_video_id, resource_fs, static_dir, course_id=None):
""" """
Imports data from a video_asset element about the given video_id. Imports data from a video_asset element about the given video_id.
...@@ -907,6 +901,8 @@ def import_from_xml(xml, edx_video_id, course_id=None): ...@@ -907,6 +901,8 @@ def import_from_xml(xml, edx_video_id, course_id=None):
Arguments: Arguments:
xml (Element): An lxml video_asset element containing import data xml (Element): An lxml video_asset element containing import data
edx_video_id (str): val video id edx_video_id (str): val video id
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
course_id (str): The ID of a course to associate the video with course_id (str): The ID of a course to associate the video with
Raises: Raises:
...@@ -915,7 +911,7 @@ def import_from_xml(xml, edx_video_id, course_id=None): ...@@ -915,7 +911,7 @@ def import_from_xml(xml, edx_video_id, course_id=None):
if xml.tag != 'video_asset': if xml.tag != 'video_asset':
raise ValCannotCreateError('Invalid XML') raise ValCannotCreateError('Invalid XML')
# TODO this will be moved as a part of EDUCATOR-2173 # TODO this will be moved as a part of EDUCATOR-2403
if not edx_video_id: if not edx_video_id:
return return
...@@ -968,26 +964,43 @@ def import_from_xml(xml, edx_video_id, course_id=None): ...@@ -968,26 +964,43 @@ def import_from_xml(xml, edx_video_id, course_id=None):
'bitrate': encoded_video_el.get('bitrate'), 'bitrate': encoded_video_el.get('bitrate'),
}) })
create_video(data) create_video(data)
create_transcript_objects(xml) create_transcript_objects(xml, edx_video_id, resource_fs, static_dir)
def create_transcript_objects(xml): def create_transcript_objects(xml, edx_video_id, resource_fs, static_dir):
""" """
Create VideoTranscript objects. Create VideoTranscript objects.
Arguments: Arguments:
xml (Element): lxml Element object xml (Element): lxml Element object.
edx_video_id (str): Video id of the video.
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
""" """
for transcript in xml.findall('.//transcripts/transcript'): for transcript in xml.findall('.//transcripts/transcript'):
try: try:
create_or_update_video_transcript( file_format = transcript.attrib['file_format']
transcript.attrib['video_id'], language_code = transcript.attrib['language_code']
transcript.attrib['language_code'], transcript_data = get_video_transcript_data(edx_video_id, language_code)
metadata=dict(
provider=transcript.attrib['provider'], # First check if transcript record does not exist.
file_name=transcript.attrib['file_name'], if not transcript_data:
file_format=transcript.attrib['file_format'], transcript_file_name = u'{edx_video_id}-{language_code}.{file_format}'.format(
edx_video_id=edx_video_id,
language_code=language_code,
file_format=file_format
)
# Read file from import file system and attach File to transcript record in DS.
file_data = File(resource_fs.open(combine(static_dir, transcript_file_name)))
# Create transcript record.
create_video_transcript(
video_id=edx_video_id,
language_code=language_code,
file_format=file_format,
content=file_data,
provider=transcript.attrib['provider']
) )
)
except KeyError: except KeyError:
logger.warn("VAL: Required attributes are missing from xml, xml=[%s]", etree.tostring(transcript).strip()) logger.warn("VAL: Required attributes are missing from xml, xml=[%s]", etree.tostring(transcript).strip())
...@@ -12,6 +12,9 @@ from edxval.models import ( ...@@ -12,6 +12,9 @@ from edxval.models import (
) )
EDX_VIDEO_ID = "itchyjacket" EDX_VIDEO_ID = "itchyjacket"
EXPORT_IMPORT_STATIC_DIR = u'static'
""" """
Generic Profiles for manually creating profile objects Generic Profiles for manually creating profile objects
""" """
...@@ -363,12 +366,34 @@ VIDEO_DICT_UPDATE_ANIMAL = dict( ...@@ -363,12 +366,34 @@ VIDEO_DICT_UPDATE_ANIMAL = dict(
encoded_videos=[], encoded_videos=[],
) )
TRANSCRIPT_DATA = {
"overwatch": """
1
00:00:14,370 --> 00:00:16,530
I am overwatch.
2
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.""",
"flash": """
1
00:00:07,180 --> 00:00:08,460
This is Flash line 1.""",
"wow": {
"start": [10],
"end": [100],
"text": ["Hi, welcome to edxval."],
}
}
VIDEO_TRANSCRIPT_CIELO24 = dict( VIDEO_TRANSCRIPT_CIELO24 = dict(
video_id='super-soaker', video_id='super-soaker',
language_code='en', language_code='en',
transcript='edxval/tests/data/The_Flash.srt', transcript='edxval/tests/data/The_Flash.srt',
provider=TranscriptProviderType.CIELO24, provider=TranscriptProviderType.CIELO24,
file_format=TranscriptFormat.SRT, file_format=TranscriptFormat.SRT,
file_data=TRANSCRIPT_DATA['flash']
) )
VIDEO_TRANSCRIPT_3PLAY = dict( VIDEO_TRANSCRIPT_3PLAY = dict(
...@@ -377,6 +402,7 @@ VIDEO_TRANSCRIPT_3PLAY = dict( ...@@ -377,6 +402,7 @@ VIDEO_TRANSCRIPT_3PLAY = dict(
transcript='edxval/tests/data/wow.sjson', transcript='edxval/tests/data/wow.sjson',
provider=TranscriptProviderType.THREE_PLAY_MEDIA, provider=TranscriptProviderType.THREE_PLAY_MEDIA,
file_format=TranscriptFormat.SJSON, file_format=TranscriptFormat.SJSON,
file_data=TRANSCRIPT_DATA['wow']
) )
TRANSCRIPT_PREFERENCES_CIELO24 = dict( TRANSCRIPT_PREFERENCES_CIELO24 = dict(
......
{ {
"start": [10], "start": [10],
"end": [100], "end": [100],
"text": ["Hi, welcome to edxval."], "text": ["Hi, welcome to edxval."]
} }
\ No newline at end of file
...@@ -813,6 +813,7 @@ class VideoTranscriptViewTest(APIAuthTestCase): ...@@ -813,6 +813,7 @@ class VideoTranscriptViewTest(APIAuthTestCase):
Tests POSTing transcript successfully. Tests POSTing transcript successfully.
""" """
post_transcript_data = dict(self.transcript_data) post_transcript_data = dict(self.transcript_data)
post_transcript_data.pop('file_data')
post_transcript_data['name'] = post_transcript_data.pop('transcript') post_transcript_data['name'] = post_transcript_data.pop('transcript')
response = self.client.post(self.url, post_transcript_data, format='json') response = self.client.post(self.url, post_transcript_data, format='json')
......
...@@ -4,6 +4,8 @@ Util methods to be used in api and models. ...@@ -4,6 +4,8 @@ Util methods to be used in api and models.
from django.conf import settings from django.conf import settings
from django.core.files.storage import get_storage_class from django.core.files.storage import get_storage_class
from fs.path import combine
# 3rd Party Transcription Plans # 3rd Party Transcription Plans
THIRD_PARTY_TRANSCRIPTION_PLANS = { THIRD_PARTY_TRANSCRIPTION_PLANS = {
...@@ -169,3 +171,17 @@ def get_video_transcript_storage(): ...@@ -169,3 +171,17 @@ def get_video_transcript_storage():
# during edx-platform loading this method gets called but settings are not ready yet # during edx-platform loading this method gets called but settings are not ready yet
# so in that case we will return default(FileSystemStorage) storage class instance # so in that case we will return default(FileSystemStorage) storage class instance
return get_storage_class()() return get_storage_class()()
def create_file_in_fs(file_data, file_name, file_system, static_dir):
"""
Writes file in specific file system.
Arguments:
file_data (str): Data to store into the file.
file_name (str): File name of the file to be created.
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
"""
with file_system.open(combine(static_dir, file_name), 'wb') as f:
f.write(file_data)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment