Export video transcripts - EDUCATOR-1789

1d994038 · Mushtaq Ali · 776b2439 · 1d994038 · 1d994038 · 1d994038
Commit 1d994038 authored Feb 01, 2018 by Mushtaq Ali
Hide whitespace changes
Inline Side-by-side

Showing with 182 additions and 55 deletions

edxval/api.py
+43 -21

edxval/tests/constants.py
+1 -1

edxval/tests/test_api.py
+137 -33

test-requirements.txt
+1 -0

No files found.
--- a/edxval/api.py
+++ b/edxval/api.py
@@ -778,7 +778,7 @@ def copy_course_videos(source_course_id, destination_course_id):
            )


-def export_to_xml(video_ids, course_id=None, external=False):
+def export_to_xml(video_id, resource_fs, static_dir, course_id=None):
    """
    Exports data for a video into an xml object.

@@ -786,11 +786,10 @@ def export_to_xml(video_ids, course_id=None, external=False):
          If external=False, then edx_video_id is going to be on first index of the list.

    Arguments:
-        video_ids (list): It can contain edx_video_id and/or multiple external video ids.
-                          We are passing all video ids associated with a video component
-                          so that we can export transcripts for each video id.
-        course_id (str): The ID of the course with which this video is associated
-        external (bool): True if first video id in `video_ids` is not edx_video_id else False
+        video_id (str): Video id of the video to export transcripts.
+        course_id (str): The ID of the course with which this video is associated.
+        static_dir (str): The Directory to store transcript file.
+        resource_fs (OSFS): The file system to store transcripts.

    Returns:
        An lxml video_asset element containing export data
@@ -798,13 +797,6 @@ def export_to_xml(video_ids, course_id=None, external=False):
    Raises:
        ValVideoNotFoundError: if the video does not exist
    """
-    # TODO: This will be removed as a part of EDUCATOR-1789
-    if external:
-        return Element('video_asset')
-
-    # for an internal video, first video id must be edx_video_id
-    video_id = video_ids[0]
-
    video_image_name = ''
    video = _get_video(video_id)

@@ -831,22 +823,48 @@ def export_to_xml(video_ids, course_id=None, external=False):
                for name in ['profile', 'url', 'file_size', 'bitrate']
            }
        )
+    return create_transcripts_xml(video_id, video_el, resource_fs, static_dir)

-    return create_transcripts_xml(video_ids, video_el)

+def create_trancript_file(video_id, language_code, file_format, resource_fs, static_dir):
+    """
+    Writes transcript file to file system.

-def create_transcripts_xml(video_ids, video_el):
+    Arguments:
+        video_id (str): Video id of the video transcript file is attached.
+        language_code (str): Language code of the transcript.
+        file_format (str): File format of the transcript file.
+        static_dir (str): The Directory to store transcript file.
+        resource_fs (OSFS): The file system to store transcripts.
+    """
+    transcript_name = u'{static_dir}/{video_id}-{language_code}.{file_format}'.format(
+        static_dir=static_dir,
+        video_id=video_id,
+        language_code=language_code,
+        file_format=file_format
+    )
+    transcript_data = get_video_transcript_data(video_id, language_code)
+    if transcript_data:
+        transcript_content = transcript_data['content']
+        with resource_fs.open(transcript_name, 'wb') as f:
+            f.write(transcript_content)
+
+
+def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
    """
-    Create xml for transcripts.
+    Creates xml for transcripts.
+    For each transcript elment, an associated transcript file is also created in course OLX.

    Arguments:
-        video_ids (list): It can contain edx_video_id and/or multiple external video ids
+        video_id (str): Video id of the video.
        video_el (Element): lxml Element object
+        static_dir (str): The Directory to store transcript file.
+        resource_fs (OSFS): The file system to store transcripts.

    Returns:
        lxml Element object with transcripts information
    """
-    video_transcripts = VideoTranscript.objects.filter(video__edx_video_id__in=video_ids).order_by('language_code')
+    video_transcripts = VideoTranscript.objects.filter(video__edx_video_id=video_id).order_by('language_code')
    # create transcripts node only when we have transcripts for a video
    if video_transcripts.exists():
        transcripts_el = SubElement(video_el, 'transcripts')
@@ -854,14 +872,18 @@ def create_transcripts_xml(video_ids, video_el):
    exported_language_codes = []
    for video_transcript in video_transcripts:
        if video_transcript.language_code not in exported_language_codes:
+            language_code = video_transcript.language_code
+            file_format = video_transcript.file_format
+
+            create_trancript_file(video_id, language_code, file_format, resource_fs, static_dir)
+
            SubElement(
                transcripts_el,
                'transcript',
                {
-                    'video_id': video_transcript.video.edx_video_id,
                    'file_name': video_transcript.transcript.name,
-                    'language_code': video_transcript.language_code,
-                    'file_format': video_transcript.file_format,
+                    'language_code': language_code,
+                    'file_format': file_format,
                    'provider': video_transcript.provider,
                }
            )

--- a/edxval/tests/constants.py
+++ b/edxval/tests/constants.py
@@ -366,7 +366,7 @@ VIDEO_DICT_UPDATE_ANIMAL = dict(
 VIDEO_TRANSCRIPT_CIELO24 = dict(
    video_id='super-soaker',
    language_code='en',
-    transcript='wow.srt',
+    transcript='edxval/tests/data/The_Flash.srt',
    provider=TranscriptProviderType.CIELO24,
    file_format=TranscriptFormat.SRT,
 )

--- a/edxval/tests/test_api.py
+++ b/edxval/tests/test_api.py
@@ -4,6 +4,7 @@ Tests for the API for Video Abstraction Layer
 """
 import json
 import os
+import shutil

 import mock
 from ddt import data, ddt, unpack
@@ -15,6 +16,9 @@ from django.core.files.images import ImageFile
 from django.core.urlresolvers import reverse
 from django.db import DatabaseError
 from django.test import TestCase
+from fs.osfs import OSFS
+from fs.path import combine
+from tempfile import mkdtemp
 from lxml import etree
 from mock import patch
 from rest_framework import status
@@ -34,6 +38,9 @@ from edxval.serializers import VideoSerializer
 from edxval.tests import APIAuthTestCase, constants


+STATIC_DIR = u'static'
+STATIC_PATH = u'/static'
+
 FILE_DATA = """
 1
 00:00:14,370 --> 00:00:16,530
@@ -901,7 +908,9 @@ class TestCopyCourse(TestCase):

 @ddt
 class ExportTest(TestCase):
-    """Tests export_to_xml"""
+    """
+    Tests export_to_xml method.
+    """
    def setUp(self):
        mobile_profile = Profile.objects.create(profile_name=constants.PROFILE_MOBILE)
        desktop_profile = Profile.objects.create(profile_name=constants.PROFILE_DESKTOP)
@@ -935,13 +944,20 @@ class ExportTest(TestCase):
        transcript_data.pop('video_id')
        VideoTranscript.objects.create(**transcript_data)

+        self.temp_dir = mkdtemp()
+        self.file_system = OSFS(self.temp_dir)
+        self.file_system.makedir(STATIC_DIR, recreate=True)
+        self.addCleanup(shutil.rmtree, self.temp_dir)
+
    def assert_xml_equal(self, left, right):
        """
        Assert that the given XML fragments have the same attributes, text, and
-        (recursively) children
+        (recursively) children.
        """
        def get_child_tags(elem):
-            """Extract the list of tag names for children of elem"""
+            """
+            Extract the list of tag names for children of elem.
+            """
            return [child.tag for child in elem]

        for attr in ['tag', 'attrib', 'text', 'tail']:
@@ -951,76 +967,108 @@ class ExportTest(TestCase):
            self.assert_xml_equal(left_child, right_child)

    def parse_xml(self, xml_str):
-        """Parse XML for comparison with export output"""
+        """
+        Parse XML for comparison with export output.
+        """
        parser = etree.XMLParser(remove_blank_text=True)
        return etree.XML(xml_str, parser=parser)

    def test_no_encodings(self):
+        """
+        Verify that transcript export for video with no encodings is working as expected.
+        """
        expected = self.parse_xml("""
            <video_asset client_video_id="TWINKLE TWINKLE" duration="122.0" image=""/>
        """)
        self.assert_xml_equal(
-            api.export_to_xml([constants.VIDEO_DICT_STAR["edx_video_id"]]),
+            api.export_to_xml(constants.VIDEO_DICT_STAR['edx_video_id'], self.file_system, STATIC_DIR),
            expected
        )

+    def test_no_video_transcript(self):
+        """
+        Verify that transcript export for video with no transcript is working as expected.
+        """
+        expected = self.parse_xml("""
+            <video_asset client_video_id="TWINKLE TWINKLE" duration="122.0" image=""/>
+        """)
+
+        exported_xml = api.export_to_xml(constants.VIDEO_DICT_STAR['edx_video_id'], self.file_system, STATIC_DIR)
+        self.assert_xml_equal(exported_xml, expected)
+
+        # Verify that no transcript is present in the XML.
+        self.assertIsNone(exported_xml.attrib.get('transcripts'))
+
    @data(
        {'course_id': None, 'image': ''},
        {'course_id': 'test-course', 'image': 'image.jpg'},
    )
    @unpack
    def test_basic(self, course_id, image):
+        """
+        Test that video export works as expected.
+        """
        expected = self.parse_xml("""
            <video_asset client_video_id="Shallow Swordfish" duration="122.0" image="{image}">
                <encoded_video url="http://www.meowmix.com" file_size="11" bitrate="22" profile="mobile"/>
                <encoded_video url="http://www.meowmagic.com" file_size="33" bitrate="44" profile="desktop"/>
                <encoded_video url="https://www.tmnt.com/tmnt101.m3u8" file_size="100" bitrate="0" profile="hls"/>
                <transcripts>
-                    <transcript file_format="sjson" file_name="edxval/tests/data/wow.sjson" language_code="de" provider="3PlayMedia" video_id="{video_id}"/>
-                    <transcript file_format="srt" file_name="wow.srt" language_code="en" provider="Cielo24" video_id="{video_id}" />
+                    <transcript file_format="sjson" file_name="edxval/tests/data/wow.sjson" language_code="de" provider="3PlayMedia" />
+                    <transcript file_format="srt" file_name="edxval/tests/data/The_Flash.srt" language_code="en" provider="Cielo24" />
                </transcripts>
            </video_asset>
-        """.format(image=image, video_id=constants.VIDEO_DICT_FISH['edx_video_id']))
+        """.format(image=image))

        self.assert_xml_equal(
-            api.export_to_xml([constants.VIDEO_DICT_FISH['edx_video_id']], course_id),
+            api.export_to_xml(constants.VIDEO_DICT_FISH['edx_video_id'], self.file_system, STATIC_DIR, course_id),
            expected
        )

-    def test_unknown_video(self):
-        with self.assertRaises(ValVideoNotFoundError):
-            api.export_to_xml(["unknown_video"])
-
-    def test_with_multiple_video_ids(self):
+    def test_transcript_export(self):
        """
-        Verify that transcript export with multiple video ids is working as expected.
+        Test that transcript are exported correctly.
        """
-        video_ids = ['super-soaker', 'external_video_id']
-        expected = self.parse_xml("""
-            <video_asset client_video_id="Shallow Swordfish" duration="122.0" image="">
-                <encoded_video bitrate="22" file_size="11" profile="mobile" url="http://www.meowmix.com" />
-                <encoded_video bitrate="44" file_size="33" profile="desktop" url="http://www.meowmagic.com" />
-                <encoded_video bitrate="0" file_size="100" profile="hls" url="https://www.tmnt.com/tmnt101.m3u8" />
+        language_code = 'en'
+        video_id = constants.VIDEO_DICT_FISH['edx_video_id']
+        transcript_files = {'de': u'super-soaker-de.sjson', 'en': u'super-soaker-en.srt'}
+        expected_transcript_path = combine(self.temp_dir, STATIC_PATH)
+
+        expected_xml = self.parse_xml("""
+            <video_asset client_video_id="Shallow Swordfish" duration="122.0" image="image.jpg">
+                <encoded_video url="http://www.meowmix.com" file_size="11" bitrate="22" profile="mobile"/>
+                <encoded_video url="http://www.meowmagic.com" file_size="33" bitrate="44" profile="desktop"/>
+                <encoded_video url="https://www.tmnt.com/tmnt101.m3u8" file_size="100" bitrate="0" profile="hls"/>
                <transcripts>
-                    <transcript file_format="sjson" file_name="edxval/tests/data/wow.sjson" language_code="de" provider="3PlayMedia" video_id="super-soaker"/>
-                    <transcript file_format="srt" file_name="wow.srt" language_code="en" provider="Cielo24" video_id="super-soaker" />
+                    <transcript file_format="sjson" file_name="edxval/tests/data/wow.sjson" language_code="de" provider="3PlayMedia" />
+                    <transcript file_format="srt" file_name="edxval/tests/data/The_Flash.srt" language_code="en" provider="Cielo24" />
                </transcripts>
            </video_asset>
        """)

-        self.assert_xml_equal(
-            api.export_to_xml(video_ids),
-            expected
-        )
+        exported_xml = api.export_to_xml(video_id, self.file_system, STATIC_DIR, 'test-course')

-    def test_external_no_video_transcript(self):
+        # Assert video and transcript xml is exported correctly.
+        self.assert_xml_equal(exported_xml, expected_xml)
+
+        # Verify transcript file is created.
+        self.assertItemsEqual(transcript_files.values(), self.file_system.listdir(STATIC_PATH))
+
+        # Also verify the content of created transcript file.
+        for language_code in transcript_files.keys():
+            expected_transcript_content = File(
+                open(combine(expected_transcript_path, transcript_files[language_code]))
+            ).read()
+            transcript = api.get_video_transcript_data(video_id=video_id, language_code=language_code)
+            self.assertEqual(transcript['content'], expected_transcript_content)
+
+
+    def test_unknown_video(self):
        """
-        Verify that transcript export for external video working as expected when there is no transcript.
+        Test export with invalid video id.
        """
-        self.assert_xml_equal(
-            api.export_to_xml(['external_video_no_transcript'], external=True),
-            self.parse_xml('<video_asset/>')
-        )
+        with self.assertRaises(ValVideoNotFoundError):
+            api.export_to_xml('unknown_video', self.file_system, STATIC_DIR)


 @ddt
@@ -1702,6 +1750,9 @@ class TranscriptTest(TestCase):
        self.v2_transcript1 = video_and_transcripts['transcripts']['de']
        self.v2_transcript2 = video_and_transcripts['transcripts']['zh']

+        self.temp_dir = mkdtemp()
+        self.addCleanup(shutil.rmtree, self.temp_dir)
+
    def setup_video_with_transcripts(self, video_data, transcripts_data):
        """
        Setup a video with transcripts and returns them
@@ -2030,6 +2081,59 @@ class TranscriptTest(TestCase):
            query_filter['language_code']
        )

+    def test_create_transcript_file(self):
+        """
+        Tests that transcript file is created correctly.
+        """
+        language_code = 'en'
+        video_id = constants.VIDEO_DICT_FISH['edx_video_id']
+        transcript_file_name = u'super-soaker-en.srt'
+        expected_transcript_path = combine(self.temp_dir, combine(STATIC_PATH, transcript_file_name))
+
+        file_system = OSFS(self.temp_dir)
+        file_system.makedir(STATIC_DIR, recreate=True)
+
+        # Create transcript file now.
+        api.create_trancript_file(
+            video_id=video_id,
+            language_code=language_code,
+            file_format=TranscriptFormat.SRT,
+            static_dir=STATIC_DIR,
+            resource_fs=file_system
+        )
+
+        # Verify transcript file is created.
+        self.assertTrue(transcript_file_name in file_system.listdir(STATIC_PATH))
+
+        # Also verify the content of created transcript file.
+        expected_transcript_content = File(open(expected_transcript_path)).read()
+        transcript = api.get_video_transcript_data(video_id=video_id, language_code=language_code)
+        self.assertEqual(transcript['content'], expected_transcript_content)
+
+    @data(
+        ('invalid-video-id', 'invalid-language-code'),
+        ('super-soaker', 'invalid-language-code')
+    )
+    @unpack
+    def test_no_create_transcript_file(self, video_id, language_code):
+        """
+        Tests that no transcript file is created in case of invalid scenario.
+        """
+        file_system = OSFS(self.temp_dir)
+        file_system.makedir(STATIC_DIR, recreate=True)
+
+        # Try to create transcript file now.
+        api.create_trancript_file(
+            video_id=video_id,
+            language_code=language_code,
+            file_format=TranscriptFormat.SRT,
+            static_dir=STATIC_DIR,
+            resource_fs=file_system
+        )
+
+        # Verify no file is created.
+        self.assertEqual(file_system.listdir(STATIC_PATH), [])
+

 @ddt
 class TranscriptPreferencesTest(TestCase):

--- a/test-requirements.txt
+++ b/test-requirements.txt
 coverage==3.7.1
 ddt==0.8.0
 django-nose==1.4.4
+fs==2.0.17
 mock==1.0.1
 pylint==1.3.0