Merge pull request #87 from edx/mrehan/val-transcripts-backend-api

VAL changes for Video Transcripts.

Merge pull request #87 from edx/mrehan/val-transcripts-backend-api
VAL changes for Video Transcripts.
01e67fb1 · Muzaffar yousaf · GitHub · cb392214 · c2b29bee · 01e67fb1
Commit 01e67fb1 authored Oct 16, 2017 by Muzaffar yousaf Committed by GitHub Oct 16, 2017
18 changed files
--- a/.coveragerc
+++ b/.coveragerc
@@ -8,3 +8,7 @@ omit =
    **/tests/*
    **/settings.py
    **/migrations*
+
+[html]
+title = edx-val Python Test Coverage Report
+directory = html_coverage
--- a/.gitignore
+++ b/.gitignore
@@ -68,4 +68,6 @@ logs/*/*.log*
 venv/
 venvs/

+src/
 video-images/
+video-transcripts/
--- a/AUTHORS
+++ b/AUTHORS
 Christopher Lee <clee@edx.org>
 Mushtaq Ali <mushtaak@gmail.com>
 Muhammad Ammar <mammar@gmail.com>
+Muhammad Rehan <mrehan@edx.org>
--- a/edxval/admin.py
+++ b/edxval/admin.py
 """
 Admin file for django app edxval.
 """
-
+from django import forms
 from django.contrib import admin
-from .models import Video, Profile, EncodedVideo, Subtitle, CourseVideo, VideoImage
+
+from .models import (CourseVideo, EncodedVideo, Profile, TranscriptPreference,
+                     Video, VideoImage, VideoTranscript)


 class ProfileAdmin(admin.ModelAdmin):  # pylint: disable=C0111
@@ -35,19 +37,53 @@ class VideoAdmin(admin.ModelAdmin):  # pylint: disable=C0111


 class VideoImageAdmin(admin.ModelAdmin):
+    raw_id_fields = ('course_video', )
+    list_display = ('get_course_video', 'image', 'generated_images')
+
+    def get_course_video(self, obj):
+        return u'"{course_id}" -- "{edx_video_id}" '.format(
+            course_id=obj.course_video.course_id,
+            edx_video_id=obj.course_video.video.edx_video_id
+        )
+
+    get_course_video.admin_order_field = 'course_video'
+    get_course_video.short_description = 'Course Video'
+
    model = VideoImage
+
    verbose_name = 'Video Image'
    verbose_name_plural = 'Video Images'


 class CourseVideoAdmin(admin.ModelAdmin):
+    list_display = ('course_id', 'get_video_id', 'is_hidden')
+
+    def get_video_id(self, obj):
+        return obj.video.edx_video_id
+
+    get_video_id.admin_order_field = 'video'
+    get_video_id.short_description = 'edX Video Id'
+
    model = CourseVideo
    verbose_name = 'Course Video'
    verbose_name_plural = 'Course Videos'


+class VideoTranscriptAdmin(admin.ModelAdmin):
+    list_display = ('video_id', 'language_code', 'provider', 'file_format')
+
+    model = VideoTranscript
+
+
+class TranscriptPreferenceAdmin(admin.ModelAdmin):
+    list_display = ('course_id', 'provider', 'video_source_language', 'preferred_languages')
+
+    model = TranscriptPreference
+
+
 admin.site.register(Profile, ProfileAdmin)
 admin.site.register(Video, VideoAdmin)
-admin.site.register(Subtitle)
+admin.site.register(VideoTranscript, VideoTranscriptAdmin)
+admin.site.register(TranscriptPreference, TranscriptPreferenceAdmin)
 admin.site.register(VideoImage, VideoImageAdmin)
 admin.site.register(CourseVideo, CourseVideoAdmin)
--- a/edxval/api.py
+++ b/edxval/api.py
@@ -4,22 +4,22 @@
 The internal API for VAL.
 """
 import logging
-
-from lxml.etree import Element, SubElement
 from enum import Enum

-from django.core.exceptions import ValidationError, ObjectDoesNotExist
-from django.core.files.base import ContentFile
+from django.core.exceptions import ObjectDoesNotExist, ValidationError
+from lxml import etree
+from lxml.etree import Element, SubElement

-from edxval.models import Video, EncodedVideo, CourseVideo, Profile, VideoImage
-from edxval.serializers import VideoSerializer
-from edxval.exceptions import (  # pylint: disable=unused-import
-    ValError,
-    ValInternalError,
-    ValVideoNotFoundError,
-    ValCannotCreateError,
-    ValCannotUpdateError
-)
+from edxval.exceptions import (InvalidTranscriptFormat,
+                               InvalidTranscriptProvider, ValCannotCreateError,
+                               ValCannotUpdateError, ValInternalError,
+                               ValVideoNotFoundError)
+from edxval.models import (CourseVideo, EncodedVideo, Profile,
+                           TranscriptFormat, TranscriptPreference,
+                           TranscriptProviderType, Video, VideoImage,
+                           VideoTranscript)
+from edxval.serializers import TranscriptPreferenceSerializer, TranscriptSerializer, VideoSerializer
+from edxval.utils import THIRD_PARTY_TRANSCRIPTION_PLANS

 logger = logging.getLogger(__name__)  # pylint: disable=C0103

@@ -143,6 +143,210 @@ def update_video_status(edx_video_id, status):
    video.save()


+def is_transcript_available(video_id, language_code=None):
+    """
+    Returns whether the transcripts are available for a video.
+
+    Arguments:
+        video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
+        language_code: it will the language code of the requested transcript.
+    """
+    filter_attrs = {'video_id': video_id}
+    if language_code:
+        filter_attrs['language_code'] = language_code
+
+    transcript_set = VideoTranscript.objects.filter(**filter_attrs)
+    return transcript_set.exists()
+
+
+def get_video_transcripts(video_id):
+    """
+    Get a video's transcripts
+
+    Arguments:
+        video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
+    """
+    transcripts_set = VideoTranscript.objects.filter(video_id=video_id)
+
+    transcripts = []
+    if transcripts_set.exists():
+        transcripts = TranscriptSerializer(transcripts_set, many=True).data
+
+    return transcripts
+
+
+def get_video_transcript(video_id, language_code):
+    """
+    Get video transcript info
+
+    Arguments:
+        video_id(unicode): A video id, it can be an edx_video_id or an external video id extracted from
+        external sources of a video component.
+        language_code(unicode): it will be the language code of the requested transcript.
+    """
+    transcript = VideoTranscript.get_or_none(video_id=video_id, language_code=language_code)
+    return TranscriptSerializer(transcript).data if transcript else None
+
+
+def get_video_transcript_data(video_ids, language_code):
+    """
+    Get video transcript data
+
+    Arguments:
+        video_ids(list): list containing edx_video_id and external video ids extracted from
+        external sources from a video component.
+        language_code(unicode): it will be the language code of the requested transcript.
+
+    Returns:
+        A dict containing transcript file name and its content. It will be for a video whose transcript
+        found first while iterating the video ids.
+    """
+    transcript_data = None
+    for video_id in video_ids:
+        try:
+            video_transcript = VideoTranscript.objects.get(video_id=video_id, language_code=language_code)
+            transcript_data = dict(
+                file_name=video_transcript.transcript.name,
+                content=video_transcript.transcript.file.read()
+            )
+            break
+        except VideoTranscript.DoesNotExist:
+            continue
+        except Exception:
+            logger.exception(
+                '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
+                video_id,
+                language_code
+            )
+            raise
+
+    return transcript_data
+
+
+def get_available_transcript_languages(video_ids):
+    """
+    Get available transcript languages
+
+    Arguments:
+        video_ids(list): list containing edx_video_id and external video ids extracted from
+        external sources of a video component.
+
+    Returns:
+        A list containing unique transcript language codes for the video ids.
+    """
+    available_languages = VideoTranscript.objects.filter(
+        video_id__in=video_ids
+    ).values_list(
+        'language_code', flat=True
+    )
+    return list(set(available_languages))
+
+
+def get_video_transcript_url(video_id, language_code):
+    """
+    Returns course video transcript url or None if no transcript
+
+    Arguments:
+        video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
+        language_code: language code of a video transcript
+    """
+    video_transcript = VideoTranscript.get_or_none(video_id, language_code)
+    if video_transcript:
+        return video_transcript.url()
+
+
+def create_or_update_video_transcript(
+        video_id,
+        language_code,
+        file_name,
+        file_format,
+        provider,
+        file_data=None,
+    ):
+    """
+    Create or Update video transcript for an existing video.
+
+    Arguments:
+        video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
+        language_code: language code of a video transcript
+        file_name: file name of a video transcript
+        file_data (InMemoryUploadedFile): Transcript data to be saved for a course video.
+        file_format: format of the transcript
+        provider: transcript provider
+
+    Returns:
+        video transcript url
+    """
+    if file_format not in dict(TranscriptFormat.CHOICES).keys():
+        raise InvalidTranscriptFormat('{} transcript format is not supported'.format(file_format))
+
+    if provider not in dict(TranscriptProviderType.CHOICES).keys():
+        raise InvalidTranscriptProvider('{} transcript provider is not supported'.format(provider))
+
+    video_transcript, __ = VideoTranscript.create_or_update(
+        video_id,
+        language_code,
+        file_name,
+        file_format,
+        provider,
+        file_data,
+    )
+
+    return video_transcript.url()
+
+
+def get_3rd_party_transcription_plans():
+    """
+    Retrieves 3rd party transcription plans.
+    """
+    return THIRD_PARTY_TRANSCRIPTION_PLANS
+
+
+def get_transcript_preferences(course_id):
+    """
+    Retrieves course wide transcript preferences
+
+    Arguments:
+        course_id (str): course id
+    """
+    try:
+        transcript_preference = TranscriptPreference.objects.get(course_id=course_id)
+    except TranscriptPreference.DoesNotExist:
+        return
+
+    return TranscriptPreferenceSerializer(transcript_preference).data
+
+
+def create_or_update_transcript_preferences(course_id, **preferences):
+    """
+    Creates or updates course-wide transcript preferences
+
+    Arguments:
+        course_id(str): course id
+
+    Keyword Arguments:
+        preferences(dict): keyword arguments
+    """
+    transcript_preference, __ = TranscriptPreference.objects.update_or_create(
+        course_id=course_id, defaults=preferences
+    )
+    return TranscriptPreferenceSerializer(transcript_preference).data
+
+
+def remove_transcript_preferences(course_id):
+    """
+    Deletes course-wide transcript preferences.
+
+    Arguments:
+        course_id(str): course id
+    """
+    try:
+        transcript_preference = TranscriptPreference.objects.get(course_id=course_id)
+        transcript_preference.delete()
+    except TranscriptPreference.DoesNotExist:
+        pass
+
+
 def get_course_video_image_url(course_id, edx_video_id):
    """
    Returns course video image url or None if no image found
@@ -246,11 +450,6 @@ def get_video_info(edx_video_id):
                    url: url of the video
                    file_size: size of the video in bytes
                    profile: ID of the profile
-                subtitles: a list of Subtitle dicts
-                    fmt: file format (SRT or SJSON)
-                    language: language code
-                    content_url: url of file
-                    url: api url to subtitle
            }

    Raises:
@@ -504,13 +703,19 @@ def copy_course_videos(source_course_id, destination_course_id):
            )


-def export_to_xml(edx_video_id, course_id=None):
+def export_to_xml(video_ids, course_id=None, external=False):
    """
-    Exports data about the given edx_video_id into the given xml object.
+    Exports data for a video into an xml object.

-    Args:
-        edx_video_id (str): The ID of the video to export
+    NOTE: For external video ids, only transcripts information will be added into xml.
+          If external=False, then edx_video_id is going to be on first index of the list.
+
+    Arguments:
+        video_ids (list): It can contain edx_video_id and/or multiple external video ids.
+                          We are passing all video ids associated with a video component
+                          so that we can export transcripts for each video id.
        course_id (str): The ID of the course with which this video is associated
+        external (bool): True if first video id in `video_ids` is not edx_video_id else False

    Returns:
        An lxml video_asset element containing export data
@@ -518,8 +723,16 @@ def export_to_xml(edx_video_id, course_id=None):
    Raises:
        ValVideoNotFoundError: if the video does not exist
    """
+    # val does not store external videos, so construct transcripts information only.
+    if external:
+        video_el = Element('video_asset')
+        return create_transcripts_xml(video_ids, video_el)
+
+    # for an internal video, first video id must be edx_video_id
+    video_id = video_ids[0]
+
    video_image_name = ''
-    video = _get_video(edx_video_id)
+    video = _get_video(video_id)

    try:
        course_video = CourseVideo.objects.select_related('video_image').get(course_id=course_id, video=video)
@@ -544,23 +757,56 @@ def export_to_xml(edx_video_id, course_id=None):
                for name in ['profile', 'url', 'file_size', 'bitrate']
            }
        )
-    # Note: we are *not* exporting Subtitle data since it is not currently updated by VEDA or used
-    # by LMS/Studio.
+
+    return create_transcripts_xml(video_ids, video_el)
+
+
+def create_transcripts_xml(video_ids, video_el):
+    """
+    Create xml for transcripts.
+
+    Arguments:
+        video_ids (list): It can contain edx_video_id and/or multiple external video ids
+        video_el (Element): lxml Element object
+
+    Returns:
+        lxml Element object with transcripts information
+    """
+    video_transcripts = VideoTranscript.objects.filter(video_id__in=video_ids)
+    # create transcripts node only when we have transcripts for a video
+    if video_transcripts.exists():
+        transcripts_el = SubElement(video_el, 'transcripts')
+
+    exported_language_codes = []
+    for video_transcript in video_transcripts:
+        if video_transcript.language_code not in exported_language_codes:
+            SubElement(
+                transcripts_el,
+                'transcript',
+                {
+                    'video_id': video_transcript.video_id,
+                    'file_name': video_transcript.transcript.name,
+                    'language_code': video_transcript.language_code,
+                    'file_format': video_transcript.file_format,
+                    'provider': video_transcript.provider,
+                }
+            )
+            exported_language_codes.append(video_transcript.language_code)
+
    return video_el


 def import_from_xml(xml, edx_video_id, course_id=None):
    """
-    Imports data from a video_asset element about the given edx_video_id.
+    Imports data from a video_asset element about the given video_id.

    If the edx_video_id already exists, then no changes are made. If an unknown
    profile is referenced by an encoded video, that encoding will be ignored.

-    Args:
-        xml: An lxml video_asset element containing import data
-        edx_video_id (str): The ID for the video content
+    Arguments:
+        xml (Element): An lxml video_asset element containing import data
+        edx_video_id (str): val video id
        course_id (str): The ID of a course to associate the video with
-            (optional)

    Raises:
        ValCannotCreateError: if there is an error importing the video
@@ -568,6 +814,10 @@ def import_from_xml(xml, edx_video_id, course_id=None):
    if xml.tag != 'video_asset':
        raise ValCannotCreateError('Invalid XML')

+    # if edx_video_id does not exist then create video transcripts only
+    if not edx_video_id:
+        return create_transcript_objects(xml)
+
    # If video with edx_video_id already exists, associate it with the given course_id.
    try:
        video = Video.objects.get(edx_video_id=edx_video_id)
@@ -583,6 +833,9 @@ def import_from_xml(xml, edx_video_id, course_id=None):
            if image_file_name:
                VideoImage.create_or_update(course_video, image_file_name)

+        # import transcripts
+        create_transcript_objects(xml)
+
        return
    except ValidationError as err:
        logger.exception(err.message)
@@ -617,3 +870,24 @@ def import_from_xml(xml, edx_video_id, course_id=None):
            'bitrate': encoded_video_el.get('bitrate'),
        })
    create_video(data)
+    create_transcript_objects(xml)
+
+
+def create_transcript_objects(xml):
+    """
+    Create VideoTranscript objects.
+
+    Arguments:
+        xml (Element): lxml Element object
+    """
+    for transcript in xml.findall('.//transcripts/transcript'):
+        try:
+            VideoTranscript.create_or_update(
+                transcript.attrib['video_id'],
+                transcript.attrib['language_code'],
+                transcript.attrib['file_name'],
+                transcript.attrib['file_format'],
+                transcript.attrib['provider'],
+            )
+        except KeyError:
+            logger.warn("VAL: Required attributes are missing from xml, xml=[%s]", etree.tostring(transcript).strip())
--- a/edxval/exceptions.py
+++ b/edxval/exceptions.py
@@ -48,3 +48,17 @@ class ValCannotUpdateError(ValError):
    This error is raised when an object cannot be updated
    """
    pass
+
+
+class InvalidTranscriptFormat(ValError):
+    """
+    This error is raised when an transcript format is not supported
+    """
+    pass
+
+
+class InvalidTranscriptProvider(ValError):
+    """
+    This error is raised when an transcript provider is not supported
+    """
+    pass
--- a/edxval/migrations/0006_auto_20171009_0725.py
+++ b/edxval/migrations/0006_auto_20171009_0725.py
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import model_utils.fields
+import django.utils.timezone
+import edxval.models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('edxval', '0005_videoimage'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='TranscriptPreference',
+            fields=[
+                ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
+                ('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, verbose_name='created', editable=False)),
+                ('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, verbose_name='modified', editable=False)),
+                ('course_id', models.CharField(unique=True, max_length=255, verbose_name=b'Course ID')),
+                ('provider', models.CharField(max_length=20, verbose_name=b'Provider', choices=[(b'Custom', b'Custom'), (b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')])),
+                ('cielo24_fidelity', models.CharField(blank=True, max_length=20, null=True, verbose_name=b'Cielo24 Fidelity', choices=[(b'MECHANICAL', b'Mechanical, 75% Accuracy'), (b'PREMIUM', b'Premium, 95% Accuracy'), (b'PROFESSIONAL', b'Professional, 99% Accuracy')])),
+                ('cielo24_turnaround', models.CharField(blank=True, max_length=20, null=True, verbose_name=b'Cielo24 Turnaround', choices=[(b'STANDARD', b'Standard, 48h'), (b'PRIORITY', b'Priority, 24h')])),
+                ('three_play_turnaround', models.CharField(blank=True, max_length=20, null=True, verbose_name=b'3PlayMedia Turnaround', choices=[(b'extended_service', b'10-Day/Extended'), (b'default', b'4-Day/Default'), (b'expedited_service', b'2-Day/Expedited'), (b'rush_service', b'24 hour/Rush'), (b'same_day_service', b'Same Day')])),
+                ('preferred_languages', edxval.models.ListField(default=[], verbose_name=b'Preferred Languages', max_items=50, blank=True)),
+                ('video_source_language', models.CharField(help_text=b'This specifies the speech language of a Video.', max_length=50, null=True, verbose_name=b'Video Source Language', blank=True)),
+            ],
+            options={
+                'abstract': False,
+            },
+        ),
+        migrations.CreateModel(
+            name='VideoTranscript',
+            fields=[
+                ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
+                ('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, verbose_name='created', editable=False)),
+                ('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, verbose_name='modified', editable=False)),
+                ('video_id', models.CharField(help_text=b'It can be an edx_video_id or an external video id', max_length=255)),
+                ('transcript', edxval.models.CustomizableFileField(null=True, blank=True)),
+                ('language_code', models.CharField(max_length=50, db_index=True)),
+                ('provider', models.CharField(default=b'Custom', max_length=30, choices=[(b'Custom', b'Custom'), (b'3PlayMedia', b'3PlayMedia'), (b'Cielo24', b'Cielo24')])),
+                ('file_format', models.CharField(db_index=True, max_length=20, choices=[(b'srt', b'SubRip'), (b'sjson', b'SRT JSON')])),
+            ],
+        ),
+        migrations.AlterUniqueTogether(
+            name='videotranscript',
+            unique_together=set([('video_id', 'language_code')]),
+        ),
+    ]
--- a/edxval/models.py
+++ b/edxval/models.py
@@ -11,22 +11,23 @@ themselves. After these are resolved, errors such as a negative file_size or
 invalid profile_name will be returned.
 """

-from contextlib import closing
 import json
 import logging
 import os
+from contextlib import closing
 from uuid import uuid4

-from django.db import models
-from django.dispatch import receiver
 from django.core.exceptions import ValidationError
-from django.core.validators import MinValueValidator, RegexValidator
 from django.core.urlresolvers import reverse
+from django.core.validators import MinValueValidator, RegexValidator
+from django.db import models
+from django.dispatch import receiver
 from django.utils.six import python_2_unicode_compatible
-
 from model_utils.models import TimeStampedModel

-from edxval.utils import video_image_path, get_video_image_storage
+from edxval.utils import (get_video_image_storage,
+                          get_video_transcript_storage, video_image_path,
+                          video_transcript_path)

 logger = logging.getLogger(__name__)  # pylint: disable=C0103

@@ -129,7 +130,7 @@ class Video(models.Model):
        qset = cls.objects.filter(
            encoded_videos__profile__profile_name='youtube',
            encoded_videos__url=youtube_id
-        ).prefetch_related('encoded_videos', 'courses', 'subtitles')
+        ).prefetch_related('encoded_videos', 'courses')
        return qset


@@ -209,13 +210,17 @@ class ListField(models.TextField):
    """
    ListField use to store and retrieve list data.
    """
+    def __init__(self, max_items=LIST_MAX_ITEMS, *args, **kwargs):
+        self.max_items = max_items
+        super(ListField, self).__init__(*args, **kwargs)
+
    def get_prep_value(self, value):
        """
-        Converts a list to its json represetation to store in database as text.
+        Converts a list to its json representation to store in database as text.
        """
        if value and not isinstance(value, list):
            raise ValidationError(u'ListField value {} is not a list.'.format(value))
-        return json.dumps(self.validate(value) or [])
+        return json.dumps(self.validate_list(value) or [])

    def from_db_value(self, value, expression, connection, context):
        """
@@ -232,7 +237,7 @@ class ListField(models.TextField):

        # If a list is set then validated its items
        if isinstance(value, list):
-            return self.validate(value)
+            py_list = self.validate_list(value)
        else:  # try to de-serialize value and expect list and then validate
            try:
                py_list = json.loads(value)
@@ -240,13 +245,13 @@ class ListField(models.TextField):
                if not isinstance(py_list, list):
                    raise TypeError

-                self.validate(py_list)
+                self.validate_list(py_list)
            except (ValueError, TypeError):
                raise ValidationError(u'Must be a valid list of strings.')

        return py_list

-    def validate(self, value):
+    def validate_list(self, value):
        """
        Validate data before saving to database.

@@ -259,14 +264,23 @@ class ListField(models.TextField):
        Raises:
            ValidationError
        """
-        if len(value) > LIST_MAX_ITEMS:
-            raise ValidationError(u'list must not contain more than {} items.'.format(LIST_MAX_ITEMS))
+        if len(value) > self.max_items:
+            raise ValidationError(
+                u'list must not contain more than {max_items} items.'.format(max_items=self.max_items)
+            )

        if all(isinstance(item, basestring) for item in value) is False:
            raise ValidationError(u'list must only contain strings.')

        return value

+    def deconstruct(self):
+        name, path, args, kwargs = super(ListField, self).deconstruct()
+        # Only include kwarg if it's not the default
+        if self.max_items != LIST_MAX_ITEMS:
+            kwargs['max_items'] = self.max_items
+        return name, path, args, kwargs
+

 class VideoImage(TimeStampedModel):
    """
@@ -335,6 +349,139 @@ class VideoImage(TimeStampedModel):
        return storage.url(self.image.name)


+class TranscriptProviderType(object):
+    CUSTOM = 'Custom'
+    THREE_PLAY_MEDIA = '3PlayMedia'
+    CIELO24 = 'Cielo24'
+
+    CHOICES = (
+        (CUSTOM, CUSTOM),
+        (THREE_PLAY_MEDIA, THREE_PLAY_MEDIA),
+        (CIELO24, CIELO24),
+    )
+
+
+class TranscriptFormat(object):
+    SRT = 'srt'
+    SJSON = 'sjson'
+
+    CHOICES = (
+        (SRT, 'SubRip'),
+        (SJSON, 'SRT JSON')
+    )
+
+
+class CustomizableFileField(models.FileField):
+    """
+    Subclass of FileField that allows custom settings to not
+    be serialized (hard-coded) in migrations. Otherwise,
+    migrations include optional settings for storage (such as
+    the storage class and bucket name); we don't want to
+    create new migration files for each configuration change.
+    """
+    def __init__(self, *args, **kwargs):
+        kwargs.update(dict(
+            upload_to=video_transcript_path,
+            storage=get_video_transcript_storage(),
+            max_length=255,  # enoungh for uuid
+            blank=True,
+            null=True
+        ))
+        super(CustomizableFileField, self).__init__(*args, **kwargs)
+
+    def deconstruct(self):
+        """
+        Override base class method.
+        """
+        name, path, args, kwargs = super(CustomizableFileField, self).deconstruct()
+        del kwargs['upload_to']
+        del kwargs['storage']
+        del kwargs['max_length']
+        return name, path, args, kwargs
+
+
+class VideoTranscript(TimeStampedModel):
+    """
+    Transcript for a video
+    """
+    video_id = models.CharField(max_length=255, help_text='It can be an edx_video_id or an external video id')
+    transcript = CustomizableFileField()
+    language_code = models.CharField(max_length=50, db_index=True)
+    provider = models.CharField(
+        max_length=30,
+        choices=TranscriptProviderType.CHOICES,
+        default=TranscriptProviderType.CUSTOM,
+    )
+    file_format = models.CharField(max_length=20, db_index=True, choices=TranscriptFormat.CHOICES)
+
+    class Meta:
+        unique_together = ('video_id', 'language_code')
+
+    @classmethod
+    def get_or_none(cls, video_id, language_code):
+        """
+        Returns a data model object if found or none otherwise.
+
+        Arguments:
+            video_id(unicode): video id to which transcript may be associated
+            language_code(unicode): language of the requested transcript
+        """
+        try:
+            transcript = cls.objects.get(video_id=video_id, language_code=language_code)
+        except cls.DoesNotExist:
+            transcript = None
+
+        return transcript
+
+    @classmethod
+    def create_or_update(cls, video_id, language_code, file_name, file_format, provider, file_data=None):
+        """
+        Create or update Transcript object.
+
+        Arguments:
+            video_id (str): unique id for a video
+            language_code (str): language code
+            file_name (str): File name of the image
+            file_format (str): Format of transcript
+            provider (str): Transcript provider
+            file_data (InMemoryUploadedFile): File data to be saved
+
+        Returns:
+            Returns a tuple of (video_transcript, created).
+        """
+        video_transcript, created = cls.objects.get_or_create(video_id=video_id, language_code=language_code)
+
+        # delete the existing transcript file
+        if not created and file_data:
+            video_transcript.transcript.delete()
+
+        video_transcript.transcript.name = file_name
+        video_transcript.file_format = file_format
+        video_transcript.provider = provider
+
+        if file_data:
+            with closing(file_data) as transcript_file_data:
+                file_name = '{uuid}{ext}'.format(uuid=uuid4().hex, ext=os.path.splitext(file_name)[1])
+                try:
+                    video_transcript.transcript.save(file_name, transcript_file_data)
+                except Exception:  # pylint: disable=broad-except
+                    logger.exception('VAL: Transcript save failed to storage for video_id [%s]', video_id)
+                    raise
+
+        video_transcript.save()
+        return video_transcript, created
+
+    def url(self):
+        """
+        Returns language transcript url for a particular language.
+        """
+        storage = get_video_transcript_storage()
+        return storage.url(self.transcript.name)
+
+    def __unicode__(self):
+        return u'{lang} Transcript for {video}'.format(lang=self.language_code, video=self.video_id)
+
+
 SUBTITLE_FORMATS = (
    ('srt', 'SubRip'),
    ('sjson', 'SRT JSON')
@@ -376,6 +523,95 @@ class Subtitle(models.Model):
            return 'text/plain'


+class Cielo24Turnaround(object):
+    """
+    Cielo24 turnarounds.
+    """
+    STANDARD = 'STANDARD'
+    PRIORITY = 'PRIORITY'
+    CHOICES = (
+        (STANDARD, 'Standard, 48h'),
+        (PRIORITY, 'Priority, 24h'),
+    )
+
+
+class Cielo24Fidelity(object):
+    """
+    Cielo24 fidelity.
+    """
+    MECHANICAL = 'MECHANICAL'
+    PREMIUM = 'PREMIUM'
+    PROFESSIONAL = 'PROFESSIONAL'
+    CHOICES = (
+        (MECHANICAL, 'Mechanical, 75% Accuracy'),
+        (PREMIUM, 'Premium, 95% Accuracy'),
+        (PROFESSIONAL, 'Professional, 99% Accuracy'),
+    )
+
+
+class ThreePlayTurnaround(object):
+    """
+    3PlayMedia turnarounds.
+    """
+    EXTENDED_SERVICE = 'extended_service'
+    DEFAULT = 'default'
+    EXPEDITED_SERVICE = 'expedited_service'
+    RUSH_SERVICE = 'rush_service'
+    SAME_DAY_SERVICE = 'same_day_service'
+
+    CHOICES = (
+        (EXTENDED_SERVICE, '10-Day/Extended'),
+        (DEFAULT, '4-Day/Default'),
+        (EXPEDITED_SERVICE, '2-Day/Expedited'),
+        (RUSH_SERVICE, '24 hour/Rush'),
+        (SAME_DAY_SERVICE, 'Same Day'),
+    )
+
+
+class TranscriptPreference(TimeStampedModel):
+    """
+    Third Party Transcript Preferences for a Course
+    """
+    course_id = models.CharField(verbose_name='Course ID', max_length=255, unique=True)
+    provider = models.CharField(
+        verbose_name='Provider',
+        max_length=20,
+        choices=TranscriptProviderType.CHOICES,
+    )
+    cielo24_fidelity = models.CharField(
+        verbose_name='Cielo24 Fidelity',
+        max_length=20,
+        choices=Cielo24Fidelity.CHOICES,
+        null=True,
+        blank=True,
+    )
+    cielo24_turnaround = models.CharField(
+        verbose_name='Cielo24 Turnaround',
+        max_length=20,
+        choices=Cielo24Turnaround.CHOICES,
+        null=True,
+        blank=True,
+    )
+    three_play_turnaround = models.CharField(
+        verbose_name='3PlayMedia Turnaround',
+        max_length=20,
+        choices=ThreePlayTurnaround.CHOICES,
+        null=True,
+        blank=True,
+    )
+    preferred_languages = ListField(verbose_name='Preferred Languages', max_items=50, default=[], blank=True)
+    video_source_language = models.CharField(
+        verbose_name='Video Source Language',
+        max_length=50,
+        null=True,
+        blank=True,
+        help_text='This specifies the speech language of a Video.'
+    )
+
+    def __unicode__(self):
+        return u'{course_id} - {provider}'.format(course_id=self.course_id, provider=self.provider)
+
+
 @receiver(models.signals.post_save, sender=Video)
 def video_status_update_callback(sender, **kwargs):  # pylint: disable=unused-argument
    """

--- a/edxval/serializers.py
+++ b/edxval/serializers.py
@@ -5,9 +5,10 @@ Serialization is usually sent through the VideoSerializer which uses the
 EncodedVideoSerializer which uses the profile_name as it's profile field.
 """
 from rest_framework import serializers
-from rest_framework.fields import IntegerField, DateTimeField
+from rest_framework.fields import DateTimeField, IntegerField

-from edxval.models import Profile, Video, EncodedVideo, Subtitle, CourseVideo, VideoImage
+from edxval.models import (CourseVideo, EncodedVideo, Profile, TranscriptPreference, Video,
+                           VideoImage, VideoTranscript)


 class EncodedVideoSerializer(serializers.ModelSerializer):
@@ -50,37 +51,22 @@ class EncodedVideoSerializer(serializers.ModelSerializer):
        return data.get('profile', None)


-class SubtitleSerializer(serializers.ModelSerializer):
+class TranscriptSerializer(serializers.ModelSerializer):
    """
-    Serializer for Subtitle objects
+    Serializer for VideoTranscript objects
    """
-    content_url = serializers.CharField(source='get_absolute_url', read_only=True)
-    content = serializers.CharField(write_only=True)
+    class Meta:  # pylint: disable=C1001, C0111
+        model = VideoTranscript
+        lookup_field = 'video_id'
+        fields = ('video_id', 'url', 'language_code', 'provider', 'file_format')

-    def validate(self, data):
-        """
-        Validate that the subtitle is in the correct format
-        """
-        value = data.get("content")
-        if data.get("fmt") == "sjson":
-            import json
-            try:
-                loaded = json.loads(value)
-            except ValueError:
-                raise serializers.ValidationError("Not in JSON format")
-            else:
-                data["content"] = json.dumps(loaded)
-        return data
+    url = serializers.SerializerMethodField()

-    class Meta:  # pylint: disable=C1001, C0111
-        model = Subtitle
-        lookup_field = "id"
-        fields = (
-            "fmt",
-            "language",
-            "content_url",
-            "content",
-        )
+    def get_url(self, transcript):
+        """
+        Retrieves the transcript url.
+        """
+        return transcript.url()


 class CourseSerializer(serializers.RelatedField):
@@ -118,7 +104,6 @@ class VideoSerializer(serializers.ModelSerializer):
    encoded_videos takes a list of dicts EncodedVideo data.
    """
    encoded_videos = EncodedVideoSerializer(many=True)
-    subtitles = SubtitleSerializer(many=True, required=False)
    courses = CourseSerializer(
        many=True,
        read_only=False,
@@ -170,7 +155,6 @@ class VideoSerializer(serializers.ModelSerializer):
        """
        courses = validated_data.pop("courses", [])
        encoded_videos = validated_data.pop("encoded_videos", [])
-        subtitles = validated_data.pop("subtitles", [])

        video = Video.objects.create(**validated_data)

@@ -179,11 +163,6 @@ class VideoSerializer(serializers.ModelSerializer):
            for video_data in encoded_videos
        )

-        Subtitle.objects.bulk_create(
-            Subtitle(video=video, **subtitle_data)
-            for subtitle_data in subtitles
-        )
-
        # The CourseSerializer will already have converted the course data
        # to CourseVideo models, so we can just set the video and save.
        # Also create VideoImage objects if an image filename is present
@@ -211,13 +190,6 @@ class VideoSerializer(serializers.ModelSerializer):
            for video_data in validated_data.get("encoded_videos", [])
        )

-        # Set subtitles
-        instance.subtitles.all().delete()
-        Subtitle.objects.bulk_create(
-            Subtitle(video=instance, **subtitle_data)
-            for subtitle_data in validated_data.get("subtitles", [])
-        )
-
        # Set courses
        # NOTE: for backwards compatibility with the DRF v2 behavior,
        # we do NOT delete existing course videos during the update.
@@ -229,3 +201,30 @@ class VideoSerializer(serializers.ModelSerializer):
                VideoImage.create_or_update(course_video, image_name)

        return instance
+
+
+class TranscriptPreferenceSerializer(serializers.ModelSerializer):
+    """
+    Serializer for TranscriptPreference
+    """
+
+    class Meta:  # pylint: disable=C1001, C0111
+        model = TranscriptPreference
+        fields = (
+            'course_id',
+            'provider',
+            'cielo24_fidelity',
+            'cielo24_turnaround',
+            'three_play_turnaround',
+            'preferred_languages',
+            'video_source_language',
+            'modified',
+        )
+
+    preferred_languages = serializers.SerializerMethodField()
+
+    def get_preferred_languages(self, transcript_preference):
+        """
+        Returns python list for preferred_languages model field.
+        """
+        return transcript_preference.preferred_languages
--- a/edxval/settings.py
+++ b/edxval/settings.py
@@ -190,3 +190,13 @@ VIDEO_IMAGE_SETTINGS = dict(
    VIDEO_IMAGE_MIN_BYTES=100,
    DIRECTORY_PREFIX='video-images/',
 )
+
+VIDEO_TRANSCRIPTS_SETTINGS = dict(
+    # Backend storage
+    # STORAGE_CLASS='storages.backends.s3boto.S3BotoStorage',
+    # STORAGE_KWARGS=dict(bucket='video-transcripts-bucket'),
+    # If you are changing prefix value then update the .gitignore accordingly
+    # so that transcripts created during tests due to upload should be ignored
+    VIDEO_TRANSCRIPTS_MAX_BYTES=3145728,  # 3 MB
+    DIRECTORY_PREFIX='video-transcripts/',
+)
--- a/edxval/tests/constants.py
+++ b/edxval/tests/constants.py
@@ -3,6 +3,14 @@
 """
 Constants used for tests.
 """
+from edxval.models import (
+    TranscriptFormat,
+    TranscriptProviderType,
+    Cielo24Fidelity,
+    Cielo24Turnaround,
+    ThreePlayTurnaround
+)
+
 EDX_VIDEO_ID = "itchyjacket"
 """
 Generic Profiles for manually creating profile objects
@@ -388,3 +396,44 @@ VIDEO_DICT_UPDATE_ANIMAL = dict(
    encoded_videos=[],
    subtitles=[]
 )
+
+VIDEO_TRANSCRIPT_CIELO24 = dict(
+    video_id='super-soaker',
+    language_code='en',
+    transcript='wow.srt',
+    provider=TranscriptProviderType.CIELO24,
+    file_format=TranscriptFormat.SRT,
+)
+
+VIDEO_TRANSCRIPT_3PLAY = dict(
+    video_id='super-soaker',
+    language_code='de',
+    transcript='wow.sjson',
+    provider=TranscriptProviderType.THREE_PLAY_MEDIA,
+    file_format=TranscriptFormat.SJSON,
+)
+
+VIDEO_TRANSCRIPT_CUSTOM = dict(
+    video_id='external_video_id',
+    language_code='de',
+    transcript='wow.srt',
+    provider=TranscriptProviderType.CUSTOM,
+    file_format=TranscriptFormat.SRT,
+)
+
+TRANSCRIPT_PREFERENCES_CIELO24 = dict(
+    course_id='edX/DemoX/Demo_Course',
+    provider=TranscriptProviderType.CIELO24,
+    cielo24_fidelity=Cielo24Fidelity.PROFESSIONAL,
+    cielo24_turnaround=Cielo24Turnaround.PRIORITY,
+    preferred_languages=['ar'],
+    video_source_language='en',
+)
+
+TRANSCRIPT_PREFERENCES_3PLAY = dict(
+    course_id='edX/DemoX/Demo_Course',
+    provider=TranscriptProviderType.THREE_PLAY_MEDIA,
+    three_play_turnaround=ThreePlayTurnaround.SAME_DAY_SERVICE,
+    preferred_languages=['ar', 'en'],
+    video_source_language='en',
+)
--- a/edxval/tests/data/The_Arrow.srt
+++ b/edxval/tests/data/The_Arrow.srt
+1
+00:00:07,180 --> 00:00:08,460
+This is Arrow line 1.
+
+2
+00:00:08,460 --> 00:00:10,510
+This is Arrow line 2.
+
+3
+00:00:10,510 --> 00:00:13,560
+This is Arrow line 3.
+
+4
+00:00:13,560 --> 00:00:14,360
+This is Arrow line 4.
+
+5
+00:00:14,370 --> 00:00:16,530
+This is Arrow line 5.
+
+6
+00:00:16,500 --> 00:00:18,600
+可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.
\ No newline at end of file
--- a/edxval/tests/data/The_Flash.srt
+++ b/edxval/tests/data/The_Flash.srt
+1
+00:00:07,180 --> 00:00:08,460
+This is Flash line 1.
+
+2
+00:00:08,460 --> 00:00:10,510
+This is Flash line 2.
+
+3
+00:00:10,510 --> 00:00:13,560
+This is Flash line 3.
+
+4
+00:00:13,560 --> 00:00:14,360
+This is Flash line 4.
+
+5
+00:00:14,370 --> 00:00:16,530
+This is Flash line 5.
+
+6
+00:00:16,500 --> 00:00:18,600
+可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.
\ No newline at end of file
--- a/edxval/tests/test_api.py
+++ b/edxval/tests/test_api.py
@@ -5,27 +5,40 @@ Tests for the API for Video Abstraction Layer
 import json

 import mock
-from mock import patch
-from lxml import etree
-
+from ddt import data, ddt, unpack
 from django.core.exceptions import ValidationError
+from django.core.files import File
+from django.core.files.base import ContentFile
 from django.core.files.images import ImageFile
-from django.test import TestCase
-from django.db import DatabaseError
 from django.core.urlresolvers import reverse
+from django.db import DatabaseError
+from django.test import TestCase
+from lxml import etree
+from mock import patch
 from rest_framework import status
-from ddt import ddt, data, unpack
+from django.conf import settings

-from edxval.models import Profile, Video, EncodedVideo, CourseVideo, VideoImage, LIST_MAX_ITEMS
 from edxval import api as api
-from edxval.api import (
-    SortDirection,
-    ValCannotCreateError,
-    ValCannotUpdateError,
-    ValVideoNotFoundError,
-    VideoSortField,
-)
-from edxval.tests import constants, APIAuthTestCase
+from edxval.api import (InvalidTranscriptFormat, InvalidTranscriptProvider,
+                        SortDirection, ValCannotCreateError,
+                        ValCannotUpdateError, ValVideoNotFoundError,
+                        VideoSortField)
+from edxval.models import (LIST_MAX_ITEMS, CourseVideo, EncodedVideo, Profile,
+                           TranscriptFormat, TranscriptProviderType, Video,
+                           VideoImage, VideoTranscript, TranscriptPreference)
+from edxval.tests import APIAuthTestCase, constants
+from edxval import utils
+
+
+FILE_DATA = """
+1
+00:00:14,370 --> 00:00:16,530
+I am overwatch.
+
+2
+00:00:16,500 --> 00:00:18,600
+可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.
+"""


 class SortedVideoTestMixin(object):
@@ -759,7 +772,7 @@ class GetVideoInfoTestWithHttpCalls(APIAuthTestCase):
        """
        Tests number of queries for a Video/EncodedVideo(1) pair
        """
-        with self.assertNumQueries(6):
+        with self.assertNumQueries(5):
            api.get_video_info(constants.COMPLETE_SET_FISH.get("edx_video_id"))

    def test_get_info_queries_for_one_encoded_video(self):
@@ -771,7 +784,7 @@ class GetVideoInfoTestWithHttpCalls(APIAuthTestCase):
            url, constants.COMPLETE_SET_STAR, format='json'
        )
        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
-        with self.assertNumQueries(5):
+        with self.assertNumQueries(4):
            api.get_video_info(constants.COMPLETE_SET_STAR.get("edx_video_id"))

    def test_get_info_queries_for_only_video(self):
@@ -783,7 +796,7 @@ class GetVideoInfoTestWithHttpCalls(APIAuthTestCase):
            url, constants.VIDEO_DICT_ZEBRA, format='json'
        )
        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
-        with self.assertNumQueries(4):
+        with self.assertNumQueries(3):
            api.get_video_info(constants.VIDEO_DICT_ZEBRA.get("edx_video_id"))


@@ -892,6 +905,17 @@ class ExportTest(TestCase):
            **constants.ENCODED_VIDEO_DICT_HLS
        )

+        # create external video transcripts
+        VideoTranscript.objects.create(**constants.VIDEO_TRANSCRIPT_CUSTOM)
+        video_transcript = dict(constants.VIDEO_TRANSCRIPT_CUSTOM, language_code=u'ar')
+        VideoTranscript.objects.create(**video_transcript)
+        video_transcript = dict(constants.VIDEO_TRANSCRIPT_CUSTOM, video_id=u'external_video_id2', language_code=u'fr')
+        VideoTranscript.objects.create(**video_transcript)
+
+        # create internal video transcripts
+        VideoTranscript.objects.create(**constants.VIDEO_TRANSCRIPT_CIELO24)
+        VideoTranscript.objects.create(**constants.VIDEO_TRANSCRIPT_3PLAY)
+
    def assert_xml_equal(self, left, right):
        """
        Assert that the given XML fragments have the same attributes, text, and
@@ -917,7 +941,7 @@ class ExportTest(TestCase):
            <video_asset client_video_id="TWINKLE TWINKLE" duration="122.0" image=""/>
        """)
        self.assert_xml_equal(
-            api.export_to_xml(constants.VIDEO_DICT_STAR["edx_video_id"]),
+            api.export_to_xml([constants.VIDEO_DICT_STAR["edx_video_id"]]),
            expected
        )

@@ -932,17 +956,73 @@ class ExportTest(TestCase):
                <encoded_video url="http://www.meowmix.com" file_size="11" bitrate="22" profile="mobile"/>
                <encoded_video url="http://www.meowmagic.com" file_size="33" bitrate="44" profile="desktop"/>
                <encoded_video url="https://www.tmnt.com/tmnt101.m3u8" file_size="100" bitrate="0" profile="hls"/>
+                <transcripts>
+                    <transcript file_format="sjson" file_name="wow.sjson" language_code="de" provider="3PlayMedia" video_id="super-soaker"/>
+                    <transcript file_format="srt" file_name="wow.srt" language_code="en" provider="Cielo24" video_id="super-soaker" />
+                </transcripts>
            </video_asset>
-        """.format(image=image))
+        """.format(image=image, video_id=constants.VIDEO_DICT_FISH['edx_video_id']))

        self.assert_xml_equal(
-            api.export_to_xml(constants.VIDEO_DICT_FISH['edx_video_id'], course_id),
+            api.export_to_xml([constants.VIDEO_DICT_FISH['edx_video_id']], course_id),
            expected
        )

    def test_unknown_video(self):
        with self.assertRaises(ValVideoNotFoundError):
-            api.export_to_xml("unknown_video")
+            api.export_to_xml(["unknown_video"])
+
+    def test_external_video_transcript(self):
+        """
+        Verify that transcript export for multiple external videos is working as expected.
+        """
+        video_ids = ['missing', 'external_video_id', 'missing2', 'external_video_id2']
+        expected = self.parse_xml("""
+            <video_asset>
+                <transcripts>
+                    <transcript file_format="srt" file_name="wow.srt" language_code="ar" provider="Custom" video_id="external_video_id"/>
+                    <transcript file_format="srt" file_name="wow.srt" language_code="de" provider="Custom" video_id="external_video_id"/>
+                    <transcript file_format="srt" file_name="wow.srt" language_code="fr" provider="Custom" video_id="external_video_id2"/>
+                </transcripts>
+            </video_asset>
+        """.format(video_id=''))
+
+        self.assert_xml_equal(
+            api.export_to_xml(video_ids, external=True),
+            expected
+        )
+
+    def test_with_multiple_video_ids(self):
+        """
+        Verify that transcript export with multiple video ids is working as expected.
+        """
+        video_ids = ['super-soaker', 'external_video_id']
+        expected = self.parse_xml("""
+            <video_asset client_video_id="Shallow Swordfish" duration="122.0" image="">
+                <encoded_video bitrate="22" file_size="11" profile="mobile" url="http://www.meowmix.com" />
+                <encoded_video bitrate="44" file_size="33" profile="desktop" url="http://www.meowmagic.com" />
+                <encoded_video bitrate="0" file_size="100" profile="hls" url="https://www.tmnt.com/tmnt101.m3u8" />
+                <transcripts>
+                    <transcript file_format="srt" file_name="wow.srt" language_code="ar" provider="Custom" video_id="external_video_id" />
+                    <transcript file_format="srt" file_name="wow.srt" language_code="de" provider="Custom" video_id="external_video_id"/>
+                    <transcript file_format="srt" file_name="wow.srt" language_code="en" provider="Cielo24" video_id="super-soaker" />
+                </transcripts>
+            </video_asset>
+        """)
+
+        self.assert_xml_equal(
+            api.export_to_xml(video_ids),
+            expected
+        )
+
+    def test_external_no_video_transcript(self):
+        """
+        Verify that transcript export for external video working as expected when there is no transcript.
+        """
+        self.assert_xml_equal(
+            api.export_to_xml(['external_video_no_transcript'], external=True),
+            self.parse_xml('<video_asset/>')
+        )


 @ddt
@@ -960,7 +1040,11 @@ class ImportTest(TestCase):
        )
        CourseVideo.objects.create(video=video, course_id='existing_course_id')

-    def make_import_xml(self, video_dict, encoded_video_dicts=None, image=None):
+        self.transcript_data1 = dict(constants.VIDEO_TRANSCRIPT_CIELO24, video_id='little-star')
+        self.transcript_data2 = dict(constants.VIDEO_TRANSCRIPT_3PLAY, video_id='little-star')
+        self.transcript_data3 = dict(self.transcript_data2, video_id='super-soaker')
+
+    def make_import_xml(self, video_dict, encoded_video_dicts=None, image=None, video_transcripts=None):
        import_xml = etree.Element(
            "video_asset",
            attrib={
@@ -981,6 +1065,22 @@ class ImportTest(TestCase):
                    for key, val in encoding_dict.items()
                }
            )
+
+        if video_transcripts:
+            transcripts_el = etree.SubElement(import_xml, 'transcripts')
+            for video_transcript in video_transcripts:
+                etree.SubElement(
+                    transcripts_el,
+                    'transcript',
+                    {
+                        'video_id': video_transcript['video_id'],
+                        'file_name': video_transcript['transcript'],
+                        'language_code': video_transcript['language_code'],
+                        'file_format': video_transcript['file_format'],
+                        'provider': video_transcript['provider'],
+                    }
+                )
+
        return import_xml

    def assert_obj_matches_dict_for_keys(self, obj, dict_, keys):
@@ -1007,18 +1107,44 @@ class ImportTest(TestCase):
            api.import_from_xml(xml, edx_video_id, course_id)
        self.assertFalse(Video.objects.filter(edx_video_id=edx_video_id).exists())

+    def assert_transcripts(self, video_id, expected_transcripts):
+        """
+        Compare `received` with `expected` and assert if not equal
+        """
+        # Verify total number of expected transcripts for a video
+        video_transcripts = VideoTranscript.objects.filter(video_id=video_id)
+        self.assertEqual(video_transcripts.count(), len(expected_transcripts))
+
+        # Verify data for each transcript
+        for expected_transcript in expected_transcripts:
+            language_code = expected_transcript['language_code']
+            expected_transcript['name'] = expected_transcript.pop('transcript')
+
+            # get the imported transcript and rename `url` key
+            received = api.TranscriptSerializer(
+                VideoTranscript.objects.get(video_id=video_id, language_code=language_code)
+            ).data
+            received['name'] = received.pop('url')
+
+            self.assertDictEqual(received, expected_transcript)
+
    def test_new_video_full(self):
-        new_course_id = "new_course_id"
+        new_course_id = 'new_course_id'

        xml = self.make_import_xml(
            video_dict=constants.VIDEO_DICT_STAR,
            encoded_video_dicts=[constants.ENCODED_VIDEO_DICT_STAR, constants.ENCODED_VIDEO_DICT_FISH_HLS],
-            image=self.image_name
+            image=self.image_name,
+            video_transcripts=[self.transcript_data1, self.transcript_data2]
        )

-        api.import_from_xml(xml, constants.VIDEO_DICT_STAR["edx_video_id"], new_course_id)
+        # there must not be any transcript before import
+        with self.assertRaises(VideoTranscript.DoesNotExist):
+            VideoTranscript.objects.get(video_id=constants.VIDEO_DICT_STAR['edx_video_id'])

-        video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_STAR["edx_video_id"])
+        api.import_from_xml(xml, constants.VIDEO_DICT_STAR['edx_video_id'], new_course_id)
+
+        video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_STAR['edx_video_id'])
        self.assert_video_matches_dict(video, constants.VIDEO_DICT_STAR)
        self.assert_encoded_video_matches_dict(
            video.encoded_videos.get(profile__profile_name=constants.PROFILE_MOBILE),
@@ -1031,6 +1157,11 @@ class ImportTest(TestCase):
        course_video = video.courses.get(course_id=new_course_id)
        self.assertTrue(course_video.video_image.image.name, self.image_name)

+        self.assert_transcripts(
+            constants.VIDEO_DICT_STAR['edx_video_id'],
+            [self.transcript_data1, self.transcript_data2]
+        )
+
    def test_new_video_minimal(self):
        edx_video_id = "test_edx_video_id"

@@ -1048,11 +1179,13 @@ class ImportTest(TestCase):

    @data(
        # import into another course, where the video already exists, but is not associated with the course.
-        "new_course_id",
+        {'course_id': 'new_course_id', 'language_code': 'fr'},
        # re-import case, where the video and course association already exists.
-        "existing_course_id"
+        {'course_id': 'existing_course_id', 'language_code': 'nl'}
    )
-    def test_existing_video(self, course_id):
+    @unpack
+    def test_existing_video(self, course_id, language_code):
+        transcript_data = dict(self.transcript_data3, language_code=language_code)
        xml = self.make_import_xml(
            video_dict={
                "client_video_id": "new_client_video_id",
@@ -1067,8 +1200,14 @@ class ImportTest(TestCase):
                    "profile": "mobile",
                },
            ],
-            image=self.image_name
+            image=self.image_name,
+            video_transcripts=[transcript_data]
        )
+
+        # there must not be any transcript before import
+        with self.assertRaises(VideoTranscript.DoesNotExist):
+            VideoTranscript.objects.get(video_id=constants.VIDEO_DICT_FISH["edx_video_id"])
+
        api.import_from_xml(xml, constants.VIDEO_DICT_FISH["edx_video_id"], course_id)

        video = Video.objects.get(edx_video_id=constants.VIDEO_DICT_FISH["edx_video_id"])
@@ -1084,6 +1223,10 @@ class ImportTest(TestCase):
        course_video = video.courses.get(course_id=course_id)
        self.assertTrue(course_video.video_image.image.name, self.image_name)

+        self.assert_transcripts(
+            constants.VIDEO_DICT_FISH["edx_video_id"],
+            [transcript_data]
+        )

    def test_existing_video_with_invalid_course_id(self):
        xml = self.make_import_xml(video_dict=constants.VIDEO_DICT_FISH)
@@ -1144,6 +1287,65 @@ class ImportTest(TestCase):
        xml = self.make_import_xml(video_dict=constants.VIDEO_DICT_FISH)
        self.assert_invalid_import(xml, "x" * 300)

+    def test_external_video_transcript(self):
+        """
+        Verify that transcript import for external video working as expected.
+        """
+        external_video_id = 'little-star'
+        xml = etree.fromstring("""
+            <video_asset>
+                <transcripts>
+                    <transcript file_name="wow.srt" language_code="en" file_format="srt" provider='Cielo24' video_id="{video_id}"/>
+                    <transcript file_name="wow.sjson" language_code="de" file_format="sjson" provider='3PlayMedia' video_id="{video_id}"/>
+                </transcripts>
+            </video_asset>
+        """.format(video_id=external_video_id))
+
+        with self.assertRaises(VideoTranscript.DoesNotExist):
+            VideoTranscript.objects.get(video_id=external_video_id)
+
+        api.import_from_xml(xml, '')
+        self.assert_transcripts(external_video_id, [self.transcript_data1, self.transcript_data2])
+
+    def test_external_no_video_transcript(self):
+        """
+        Verify that transcript import for external video working as expected when there is no transcript.
+        """
+        api.import_from_xml(etree.fromstring('<video_asset/>'), '')
+        self.assertEqual(
+            VideoTranscript.objects.count(),
+            0
+        )
+
+    @patch('edxval.api.logger')
+    def test_video_transcript_missing_attribute(self, mock_logger):
+        """
+        Verify that video transcript import working as expected if transcript xml data is missing.
+        """
+        video_id = 'little-star'
+        transcript_xml = '<transcript file_name="wow.srt" language_code="en" file_format="srt" provider="Cielo24"/>'
+        xml = etree.fromstring("""
+            <video_asset>
+                <transcripts>
+                    {transcript_xml}
+                    <transcript file_name="wow.sjson" language_code="de" file_format="sjson" provider='3PlayMedia' video_id="{video_id}"/>
+                </transcripts>
+            </video_asset>
+        """.format(transcript_xml=transcript_xml, video_id=video_id))
+
+        # there should be no video transcript before import
+        with self.assertRaises(VideoTranscript.DoesNotExist):
+            VideoTranscript.objects.get(video_id=video_id)
+
+        api.create_transcript_objects(xml)
+
+        mock_logger.warn.assert_called_with(
+            "VAL: Required attributes are missing from xml, xml=[%s]",
+            transcript_xml
+        )
+
+        self.assert_transcripts(video_id, [self.transcript_data2])
+

 class GetCourseVideoRemoveTest(TestCase):
    """
@@ -1460,3 +1662,386 @@ class CourseVideoImageTest(TestCase):

        # Open the shared image file to verify it is not deleted
        ImageFile(open(shared_image))
+
+
+@ddt
+class TranscriptTest(TestCase):
+    """
+    Tests to check transcript related functions.
+    """
+    def setUp(self):
+        """
+        Creates video and video transcript objects.
+        """
+        self.video1 = Video.objects.create(**constants.VIDEO_DICT_FISH)
+        self.edx_video_id1 = self.video1.edx_video_id
+
+        self.video2 = Video.objects.create(**constants.VIDEO_DICT_DIFFERENT_ID_FISH)
+        self.edx_video_id2 = self.video2.edx_video_id
+
+        self.transcript_data1 = dict(constants.VIDEO_TRANSCRIPT_CIELO24)
+        self.transcript_data1['name'] = self.transcript_data1.pop('transcript')
+
+        self.transcript_data2 = dict(constants.VIDEO_TRANSCRIPT_3PLAY)
+        self.transcript_data2['name'] = self.transcript_data2.pop('transcript')
+
+        self.transcript1 = VideoTranscript.objects.create(**constants.VIDEO_TRANSCRIPT_CIELO24)
+        self.transcript2 = VideoTranscript.objects.create(**constants.VIDEO_TRANSCRIPT_3PLAY)
+
+        self.video_id = '0987654321'
+        self.arrow_transcript_path = 'edxval/tests/data/The_Arrow.srt'
+        self.flash_transcript_path = 'edxval/tests/data/The_Flash.srt'
+        self.transcript_url = api.create_or_update_video_transcript(
+            self.video_id,
+            'ur',
+            'The_Arrow.srt',
+            TranscriptFormat.SRT,
+            provider=TranscriptProviderType.CUSTOM,
+            file_data=File(open(self.arrow_transcript_path)),
+        )
+
+    @data(
+        {'video_id': 'super-soaker', 'language_code': 'en', 'expected_availability': True},
+        {'video_id': 'super-soaker', 'language_code': None, 'expected_availability': True},
+        {'video_id': 'super123', 'language_code': 'en', 'expected_availability': False},
+        {'video_id': 'super-soaker', 'language_code': 'ro', 'expected_availability': False},
+    )
+    @unpack
+    def test_is_transcript_available(self, video_id, language_code, expected_availability):
+        """
+        Verify that `is_transcript_available` api function works as expected.
+        """
+        is_transcript_available = api.is_transcript_available(video_id, language_code)
+        self.assertEqual(is_transcript_available, expected_availability)
+
+    @data(
+        {'video_id': 'non-existant-video', 'language_code': 'en'},
+        {'video_id': '0987654321', 'language_code': 'en'},
+    )
+    @unpack
+    def test_get_video_transcript_not_found(self, video_id, language_code):
+        """
+        Verify that `get_video_transcript` works as expected if transcript is not found.
+        """
+        self.assertIsNone(api.get_video_transcript(video_id, language_code))
+
+    def test_get_video_transcript(self):
+        """
+        Verify that `get_video_transcript` works as expected if transcript is found.
+        """
+        transcript = api.get_video_transcript(u'0987654321', u'ur')
+        expectation = {
+            'video_id': u'0987654321',
+            'url': self.transcript_url,
+            'file_format': TranscriptFormat.SRT,
+            'provider': TranscriptProviderType.CUSTOM,
+            'language_code': u'ur'
+        }
+        self.assertDictEqual(transcript, expectation)
+
+    @patch('edxval.api.logger')
+    def test_get_video_transcript_data_exception(self, mock_logger):
+        """
+        Verify that `get_video_transcript_data` logs and raises an exception.
+        """
+        with self.assertRaises(IOError):
+            api.get_video_transcript_data(video_ids=['super-soaker'], language_code=u'en')
+
+        mock_logger.exception.assert_called_with(
+            '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
+            'super-soaker',
+            'en',
+        )
+
+    @data(
+        {'video_ids': ['non-existant-video', 'another-non-existant-id'], 'language_code': 'en', 'result': None},
+        {'video_ids': ['non-existant-video', '0987654321'], 'language_code': 'en', 'result': None},
+    )
+    @unpack
+    def test_get_video_transcript_data_not_found(self, video_ids, language_code, result):
+        """
+        Verify that `get_video_transcript_data` api function works as expected.
+        """
+        transcript = api.get_video_transcript_data(video_ids, language_code)
+        self.assertEqual(transcript, result)
+
+    def test_get_video_transcript_data(self):
+        """
+        Verify that `get_video_transcript_data` api function works as expected.
+        """
+        expected_transcript = {
+            'file_name': self.transcript_url,
+            'content': File(open(self.arrow_transcript_path)).read()
+        }
+        transcript = api.get_video_transcript_data(
+            video_ids=['super-soaker', '0987654321'],
+            language_code=u'ur'
+        )
+        self.assertDictEqual(transcript, expected_transcript)
+
+    @data(
+        {'video_id': 'super-soaker', 'result': True},
+        {'video_id': 'super-soaker1', 'result': False},
+    )
+    @unpack
+    def test_get_video_transcripts(self, video_id, result):
+        """
+        Verify that `get_video_transcripts` api function works as expected.
+        """
+        transcripts = api.get_video_transcripts(video_id)
+
+        if result:
+            self.assertEqual(len(transcripts), 2)
+            for transcript, transcript_data in zip(transcripts, [self.transcript_data2, self.transcript_data1]):
+                transcript_data['url'] = transcript_data.pop('name')
+                self.assertEqual(transcript, transcript_data)
+        else:
+            self.assertEqual(transcripts, [])
+
+    def test_create_video_transcript(self):
+        """
+        Verify that `create_or_update_video_transcript` api function creates transcript if there is no already.
+        """
+        transcript_data = dict(self.transcript_data1)
+        transcript_data['language_code'] = 'ur'
+
+        with self.assertRaises(VideoTranscript.DoesNotExist):
+            VideoTranscript.objects.get(
+                video_id=transcript_data['video_id'],
+                language_code=transcript_data['language_code']
+            )
+
+        transcript_url = api.create_or_update_video_transcript(
+            video_id=transcript_data['video_id'],
+            language_code=transcript_data['language_code'],
+            file_name=transcript_data['name'],
+            file_format=transcript_data['file_format'],
+            provider=transcript_data['provider'],
+        )
+        self.assertEqual(transcript_url, transcript_data['name'])
+
+        expected_transcript = api.get_video_transcript(
+            video_id=transcript_data['video_id'],
+            language_code=transcript_data['language_code']
+        )
+        transcript_data['url'] = transcript_data.pop('name')
+        self.assertEqual(transcript_data, expected_transcript)
+
+    @data(
+        {'language_code': 'ur', 'has_url': True},
+        {'language_code': 'xyz', 'has_url': False},
+    )
+    @unpack
+    def test_get_video_transcript_url(self, language_code, has_url):
+        """
+        Verify that `get_video_transcript_url` api function works as expected.
+        """
+        transcript_url = api.get_video_transcript_url(self.video_id, language_code)
+        if has_url:
+            self.assertEqual(self.transcript_url, transcript_url)
+        else:
+            self.assertIsNone(transcript_url)
+
+    @data(
+        {
+            'file_data': None,
+            'file_format': TranscriptFormat.SJSON,
+            'provider': TranscriptProviderType.CIELO24
+        },
+        {
+            'file_data': ContentFile(FILE_DATA),
+            'file_format': TranscriptFormat.SRT,
+            'provider': TranscriptProviderType.THREE_PLAY_MEDIA
+        },
+    )
+    @unpack
+    def test_create_or_update_video_transcript(self, file_data, file_format, provider):
+        """
+        Verify that `create_or_update_video_transcript` api function updates existing transcript as expected.
+        """
+        video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
+        self.assertIsNotNone(video_transcript)
+
+        file_name = 'overwatch.{}'.format(file_format)
+        transcript_url = api.create_or_update_video_transcript(
+            self.video_id, 'ur', file_name, file_format, provider, file_data
+        )
+        video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
+
+        self.assertEqual(transcript_url, video_transcript.url())
+        self.assertEqual(video_transcript.file_format, file_format)
+        self.assertEqual(video_transcript.provider, provider)
+
+        if file_data:
+            self.assertTrue(transcript_url.startswith(settings.VIDEO_TRANSCRIPTS_SETTINGS['DIRECTORY_PREFIX']))
+            self.assertEqual(video_transcript.transcript.name, transcript_url)
+            with open(video_transcript.transcript.name) as saved_transcript:
+                self.assertEqual(saved_transcript.read(), FILE_DATA)
+        else:
+            self.assertEqual(video_transcript.transcript.name, file_name)
+
+    @data(
+        {
+            'file_format': '123',
+            'provider': TranscriptProviderType.CIELO24,
+            'exception': InvalidTranscriptFormat,
+            'exception_message': '123 transcript format is not supported',
+        },
+        {
+            'file_format': TranscriptFormat.SRT,
+            'provider': 123,
+            'exception': InvalidTranscriptProvider,
+            'exception_message': '123 transcript provider is not supported',
+        },
+    )
+    @unpack
+    def test_create_or_update_video_exceptions(self, file_format, provider, exception, exception_message):
+        """
+        Verify that `create_or_update_video_transcript` api function raise exceptions on invalid values.
+        """
+        with self.assertRaises(exception) as transcript_exception:
+            api.create_or_update_video_transcript(self.video_id, 'ur', 'overwatch.srt', file_format, provider)
+
+        self.assertEqual(transcript_exception.exception.message, exception_message)
+
+    def test_video_transcript_deletion(self):
+        """
+        Test video transcript deletion works as expected.
+        """
+        # get an existing video transcript
+        video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
+        existing_transcript_url = video_transcript.transcript.name
+
+        # This will replace the transcript for an existing video and delete the existing transcript
+        new_transcript_url = api.create_or_update_video_transcript(
+            self.video_id,
+            'ur',
+            'overwatch.srt',
+            TranscriptFormat.SRT,
+            TranscriptProviderType.CIELO24,
+            ContentFile(FILE_DATA)
+        )
+
+        # Verify that new transcript is set to video
+        video_transcript = VideoTranscript.objects.get(video_id=self.video_id, language_code='ur')
+        self.assertEqual(video_transcript.transcript.name, new_transcript_url)
+
+        # verify that new data is written correctly
+        with open(video_transcript.transcript.name) as saved_transcript:
+            self.assertEqual(saved_transcript.read(), FILE_DATA)
+
+        # Verify that an exception is raised if we try to open a deleted transcript file
+        with self.assertRaises(IOError) as file_open_exception:
+            File(open(existing_transcript_url))
+
+        self.assertEqual(file_open_exception.exception.strerror, u'No such file or directory')
+
+    def test_get_available_transcript_languages(self):
+        """
+        Verify that `get_available_transcript_languages` works as expected.
+        """
+        dupe_lang_video_id = 'duplicate_lang_video'
+        VideoTranscript.objects.create(**dict(constants.VIDEO_TRANSCRIPT_CIELO24, video_id=dupe_lang_video_id))
+        # `super-soaker` has got 'en' and 'de' transcripts
+        # `self.video_id` has got 'ur' transcript
+        # `duplicate_lang_video` has got 'en' transcript
+        # `non_existent_video_id` that does not have transcript
+        video_ids = ['super-soaker', self.video_id, dupe_lang_video_id, 'non_existent_video_id']
+        transcript_languages = api.get_available_transcript_languages(video_ids=video_ids)
+        self.assertItemsEqual(transcript_languages, ['de', 'en', 'ur'])
+
+
+@ddt
+class TranscriptPreferencesTest(TestCase):
+    """
+    TranscriptPreferences API Tests
+    """
+    def setUp(self):
+        """
+        Tests setup
+        """
+        self.course_id = 'edX/DemoX/Demo_Course'
+        self.transcript_preferences = TranscriptPreference.objects.create(
+            **constants.TRANSCRIPT_PREFERENCES_CIELO24
+        )
+
+        self.prefs = dict(constants.TRANSCRIPT_PREFERENCES_CIELO24)
+        self.prefs.update(constants.TRANSCRIPT_PREFERENCES_3PLAY)
+
+    def assert_prefs(self, received, expected):
+        """
+        Compare `received` with `expected` and assert if not equal
+        """
+        # no need to compare modified datetime
+        del received['modified']
+        self.assertEqual(received, expected)
+
+    def test_get_3rd_party_transcription_plans(self):
+        """
+        Verify that `get_3rd_party_transcription_plans` api function works as expected
+        """
+        self.assertEqual(
+            api.get_3rd_party_transcription_plans(),
+            utils.THIRD_PARTY_TRANSCRIPTION_PLANS
+        )
+
+    def test_get_transcript_preferences(self):
+        """
+        Verify that `get_transcript_preferences` api function works as expected
+        """
+        cielo24_prefs = dict(constants.TRANSCRIPT_PREFERENCES_CIELO24)
+        cielo24_prefs['three_play_turnaround'] = None
+
+        transcript_preferences = api.get_transcript_preferences(self.course_id)
+        self.assert_prefs(transcript_preferences, cielo24_prefs)
+
+    def test_remove_transcript_preferences(self):
+        """
+        Verify that `remove_transcript_preferences` api method works as expected.
+        """
+        # Verify that transcript preferences exist.
+        transcript_preferences = api.get_transcript_preferences(self.course_id)
+        self.assertIsNotNone(transcript_preferences)
+
+        # Remove course wide transcript preferences.
+        api.remove_transcript_preferences(self.course_id)
+
+        # Verify now transcript preferences no longer exist.
+        transcript_preferences = api.get_transcript_preferences(self.course_id)
+        self.assertIsNone(transcript_preferences)
+
+    def test_remove_transcript_preferences_not_found(self):
+        """
+        Verify that `remove_transcript_preferences` api method works as expected when no record is found.
+        """
+        course_id = 'dummy-course-id'
+
+        # Verify that transcript preferences do not exist.
+        transcript_preferences = api.get_transcript_preferences(course_id)
+        self.assertIsNone(transcript_preferences)
+
+        # Verify that calling `remove_transcript_preferences` does not break the code.
+        api.remove_transcript_preferences(course_id)
+
+    def test_update_transcript_preferences(self):
+        """
+        Verify that `create_or_update_transcript_preferences` api function updates as expected
+        """
+        transcript_preferences = api.create_or_update_transcript_preferences(**constants.TRANSCRIPT_PREFERENCES_3PLAY)
+        self.assert_prefs(transcript_preferences, self.prefs)
+
+    def test_create_transcript_preferences(self):
+        """
+        Verify that `create_or_update_transcript_preferences` api function creates as expected
+        """
+        self.prefs['course_id'] = 'edX/DemoX/Astonomy'
+
+        # Verify that no preference is present for course id `edX/DemoX/Astonomy`
+        self.assertIsNone(api.get_transcript_preferences(self.prefs['course_id']))
+
+        # create new preference
+        transcript_preferences = api.create_or_update_transcript_preferences(**self.prefs)
+        self.assert_prefs(transcript_preferences, self.prefs)
+
+        # Verify that there should be 2 preferences exists
+        self.assertEqual(TranscriptPreference.objects.count(), 2)
--- a/edxval/tests/test_views.py
+++ b/edxval/tests/test_views.py
@@ -3,13 +3,16 @@
 Tests for Video Abstraction Layer views
 """
 import json
-from ddt import ddt, data, unpack
+import unittest

+from ddt import data, ddt, unpack
 from django.core.urlresolvers import reverse
 from rest_framework import status

-from edxval.tests import constants, APIAuthTestCase
-from edxval.models import Profile, Video, CourseVideo
+from edxval.models import (CourseVideo, Profile, TranscriptFormat,
+                           TranscriptProviderType, Video, VideoTranscript)
+from edxval.serializers import TranscriptSerializer
+from edxval.tests import APIAuthTestCase, constants


 class VideoDetail(APIAuthTestCase):
@@ -206,6 +209,7 @@ class VideoDetail(APIAuthTestCase):
        )
        self.assertEqual(len(videos[0].encoded_videos.all()), 1)

+    @unittest.skip("Skipping for now. We may need this later when we create transcripts alongwith video")
    def test_update_remove_subtitles(self):
        # Create some subtitles
        self._create_videos(constants.COMPLETE_SET_STAR)
@@ -665,7 +669,7 @@ class VideoListTest(APIAuthTestCase):
        Tests number of queries for a Video with no Encoded Videos
        """
        url = reverse('video-list')
-        with self.assertNumQueries(9):
+        with self.assertNumQueries(8):
            self.client.post(url, constants.VIDEO_DICT_ZEBRA, format='json')

    def test_queries_for_two_encoded_video(self):
@@ -673,7 +677,7 @@ class VideoListTest(APIAuthTestCase):
        Tests number of queries for a Video/EncodedVideo(2) pair
        """
        url = reverse('video-list')
-        with self.assertNumQueries(15):
+        with self.assertNumQueries(13):
            self.client.post(url, constants.COMPLETE_SET_FISH, format='json')

    def test_queries_for_single_encoded_videos(self):
@@ -681,7 +685,7 @@ class VideoListTest(APIAuthTestCase):
        Tests number of queries for a Video/EncodedVideo(1) pair
                """
        url = reverse('video-list')
-        with self.assertNumQueries(13):
+        with self.assertNumQueries(11):
            self.client.post(url, constants.COMPLETE_SET_STAR, format='json')


@@ -718,18 +722,19 @@ class VideoDetailTest(APIAuthTestCase):
        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
        response = self.client.post(url, constants.VIDEO_DICT_ZEBRA, format='json')
        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
-        with self.assertNumQueries(9):
+        with self.assertNumQueries(7):
            self.client.get("/edxval/videos/").data
        response = self.client.post(url, constants.COMPLETE_SET_FISH, format='json')
        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
-        with self.assertNumQueries(12):
+        with self.assertNumQueries(9):
            self.client.get("/edxval/videos/").data
        response = self.client.post(url, constants.COMPLETE_SET_STAR, format='json')
        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
-        with self.assertNumQueries(14):
+        with self.assertNumQueries(10):
            self.client.get("/edxval/videos/").data


+@unittest.skip("Skipping for now. We may need these later when we create transcripts alongwith video")
 class SubtitleDetailTest(APIAuthTestCase):
    """
    Tests for subtitle API
@@ -811,6 +816,7 @@ class SubtitleDetailTest(APIAuthTestCase):
        )
        self.assertEqual(self.client.get(video_subtitles['content_url']).content, '{"start": "00:00:00"}')

+
 @ddt
 class VideoImagesViewTest(APIAuthTestCase):
    """
@@ -897,3 +903,135 @@ class VideoImagesViewTest(APIAuthTestCase):
            response.data['message'],
            message
        )
+
+
+@ddt
+class VideoTranscriptViewTest(APIAuthTestCase):
+    """
+    Tests VideoTranscriptView.
+    """
+
+    def setUp(self):
+        """
+        Tests setup.
+        """
+        self.url = reverse('create-video-transcript')
+        self.video = Video.objects.create(**constants.VIDEO_DICT_FISH)
+        self.transcript_data = constants.VIDEO_TRANSCRIPT_CIELO24
+        super(VideoTranscriptViewTest, self).setUp()
+
+    def test_create_transcript(self):
+        """
+        Tests POSTing transcript successfully.
+        """
+        post_transcript_data = dict(self.transcript_data)
+        post_transcript_data['name'] = post_transcript_data.pop('transcript')
+
+        response = self.client.post(self.url, post_transcript_data, format='json')
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+        serialized_data = TranscriptSerializer(VideoTranscript.objects.first()).data
+        post_transcript_data['url'] = post_transcript_data.pop('name')
+        self.assertEqual(serialized_data, post_transcript_data)
+
+    def test_update_existing_transcript(self):
+        """
+        Tests updating existing transcript works as expected.
+        """
+        VideoTranscript.objects.create(**self.transcript_data)
+
+        post_transcript_data = dict(self.transcript_data)
+        post_transcript_data['name'] = post_transcript_data.pop('transcript')
+
+        response = self.client.post(self.url, post_transcript_data, format='json')
+
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(
+            response.data['message'],
+            u'Can not override existing transcript for video "{video_id}" and language code "{language}".'.format(
+                video_id=self.video.edx_video_id, language=post_transcript_data['language_code'])
+            )
+
+    @data(
+        {
+            'post_data': {},
+            'message': u'video_id and name and language_code and provider and file_format must be specified.'
+        },
+        {
+            'post_data': {
+                'video_id': 'super-soaker',
+                'name': 'abc.xyz',
+                'language_code': 'en',
+                'provider': TranscriptProviderType.CIELO24,
+                'file_format': 'xyz'
+            },
+            'message': u'"xyz" transcript file type is not supported. Supported formats are "{}"'.format(
+                sorted(dict(TranscriptFormat.CHOICES).keys())
+            )
+        },
+        {
+            'post_data': {
+                'video_id': 'super-soaker',
+                'name': 'abc.srt',
+                'language_code': 'en',
+                'provider': 'xyz',
+                'file_format': TranscriptFormat.SRT
+            },
+            'message': u'"xyz" provider is not supported. Supported transcription providers are "{}"'.format(
+                sorted(dict(TranscriptProviderType.CHOICES).keys())
+            )
+        },
+    )
+    @unpack
+    def test_error_responses(self, post_data, message):
+        """
+        Tests error responses occurred during POSTing.
+        """
+        response = self.client.post(self.url, post_data, format='json')
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(response.data['message'], message)
+
+
+@ddt
+class VideoStatusViewTest(APIAuthTestCase):
+    """
+    VideoStatusView Tests.
+    """
+    def setUp(self):
+        """
+        Tests setup.
+        """
+        self.url = reverse('video-status-update')
+        self.video = Video.objects.create(**constants.VIDEO_DICT_FISH)
+        super(VideoStatusViewTest, self).setUp()
+
+    @data(
+        {
+            'patch_data': {},
+            'message': u'"edx_video_id and status" params must be specified.',
+            'status_code': status.HTTP_400_BAD_REQUEST,
+        },
+        {
+            'patch_data': {'edx_video_id': 'super-soaker', 'status': 'fake'},
+            'message': u'"fake" is not a valid Video status.',
+            'status_code': status.HTTP_400_BAD_REQUEST,
+        },
+        {
+            'patch_data': {'edx_video_id': 'fake', 'status': 'transcript_ready'},
+            'message': u'Video is not found for specified edx_video_id: fake',
+            'status_code': status.HTTP_400_BAD_REQUEST,
+        },
+        {
+            'patch_data': {'edx_video_id': 'super-soaker', 'status': 'transcript_ready'},
+            'message': None,
+            'status_code': status.HTTP_200_OK,
+        },
+    )
+    @unpack
+    def test_transcript_status(self, patch_data, message, status_code):
+        """
+        Tests PATCHing video transcript status.
+        """
+        response = self.client.patch(self.url, patch_data, format='json')
+        self.assertEqual(response.status_code, status_code)
+        self.assertEqual(response.data.get('message'), message)
--- a/edxval/urls.py
+++ b/edxval/urls.py
@@ -9,22 +9,22 @@ from edxval import views
 urlpatterns = [
    url(r'^videos/$',
        views.VideoList.as_view(),
-        name="video-list"
+        name='video-list'
    ),
    url(
        r'^videos/(?P<edx_video_id>[-\w]+)$',
        views.VideoDetail.as_view(),
-        name="video-detail"
+        name='video-detail'
    ),
    url(
-        r'^videos/(?P<video__edx_video_id>[-\w]+)/(?P<language>[-_\w]+)$',
-        views.SubtitleDetail.as_view(),
-        name="subtitle-detail"
+        r'^videos/status/$',
+        views.VideoStatusView.as_view(),
+        name='video-status-update'
    ),
    url(
-        r'^videos/(?P<edx_video_id>[-\w]+)/(?P<language>[-_\w]+)/subtitle$',
-        views.get_subtitle,
-        name="subtitle-content"
+        r'^videos/video-transcripts/create/$',
+        views.VideoTranscriptView.as_view(),
+        name='create-video-transcript'
    ),
    url(
        r'^videos/video-images/update/$',

--- a/edxval/utils.py
+++ b/edxval/utils.py
@@ -5,6 +5,120 @@ Util methods to be used in api and models.
 from django.conf import settings
 from django.core.files.storage import get_storage_class

+# 3rd Party Transcription Plans
+THIRD_PARTY_TRANSCRIPTION_PLANS = {
+
+    'Cielo24': {
+        'display_name': 'Cielo24',
+        'turnaround': {
+            'PRIORITY': 'Priority (24 hours)',
+            'STANDARD': 'Standard (48 hours)'
+        },
+        'fidelity': {
+            'MECHANICAL': {
+                'display_name': 'Mechanical (75% accuracy)',
+                'languages': {
+                    'nl': 'Dutch',
+                    'en': 'English',
+                    'fr': 'French',
+                    'de': 'German',
+                    'it': 'Italian',
+                    'es': 'Spanish',
+                }
+            },
+            'PREMIUM': {
+                'display_name': 'Premium (95% accuracy)',
+                'languages': {
+                    'en': 'English',
+                }
+            },
+            'PROFESSIONAL': {
+                'display_name': 'Professional (99% accuracy)',
+                'languages': {
+                    'ar': 'Arabic',
+                    'zh-tw': 'Chinese - Mandarin (Traditional)',
+                    'zh-cmn': 'Chinese - Mandarin (Simplified)',
+                    'zh-yue': 'Chinese - Cantonese (Traditional)',
+                    'nl': 'Dutch',
+                    'en': 'English',
+                    'fr': 'French',
+                    'de': 'German',
+                    'he': 'Hebrew',
+                    'hi': 'Hindi',
+                    'it': 'Italian',
+                    'ja': 'Japanese',
+                    'ko': 'Korean',
+                    'pt': 'Portuguese',
+                    'ru': 'Russian',
+                    'es': 'Spanish',
+                    'tr': 'Turkish',
+                }
+            },
+        }
+    },
+    '3PlayMedia': {
+        'display_name': '3Play Media',
+        'turnaround': {
+            'same_day_service': 'Same day',
+            'rush_service': '24 hours (rush)',
+            'expedited_service': '2 days (expedited)',
+            'default': '4 days (default)',
+            'extended_service':'10 days (extended)'
+        },
+        'languages': {
+            'en': 'English',
+            'fr': 'French',
+            'de': 'German',
+            'it': 'Italian',
+            'nl': 'Dutch',
+            'es': 'Spanish',
+            'el': 'Greek',
+            'pt': 'Portuguese',
+            'zh': 'Chinese',
+            'ar': 'Arabic',
+            'he': 'Hebrew',
+            'ru': 'Russian',
+            'ja': 'Japanese',
+            'sv': 'Swedish',
+            'cs': 'Czech',
+            'da': 'Danish',
+            'fi': 'Finnish',
+            'id': 'Indonesian',
+            'ko': 'Korean',
+            'no': 'Norwegian',
+            'pl': 'Polish',
+            'th': 'Thai',
+            'tr': 'Turkish',
+            'vi': 'Vietnamese',
+            'ro': 'Romanian',
+            'hu': 'Hungarian',
+            'ms': 'Malay',
+            'bg': 'Bulgarian',
+            'tl': 'Tagalog',
+            'sr': 'Serbian',
+            'sk': 'Slovak',
+            'uk': 'Ukrainian',
+        },
+        # Valid translations -- a mapping of source languages to the
+        # translatable target languages.
+        'translations': {
+            'es': [
+                'en'
+            ],
+            'en': [
+                'el', 'en', 'zh', 'vi',
+                'it', 'ar', 'cs', 'id',
+                'es', 'ru', 'nl', 'pt',
+                'no', 'tr', 'tl', 'th',
+                'ro', 'pl', 'fr', 'bg',
+                'uk', 'de', 'da', 'fi',
+                'hu', 'ja', 'he', 'sr',
+                'ko', 'sv', 'sk', 'ms'
+            ],
+        }
+    }
+}
+

 def video_image_path(video_image_instance, filename):  # pylint:disable=unused-argument
    """
@@ -29,3 +143,28 @@ def get_video_image_storage():
        # during edx-platform loading this method gets called but settings are not ready yet
        # so in that case we will return default(FileSystemStorage) storage class instance
        return get_storage_class()()
+
+
+def video_transcript_path(video_transcript_instance, filename):  # pylint:disable=unused-argument
+    """
+    Returns video transcript path.
+
+    Arguments:
+        video_transcript_instance (VideoTranscript): This is passed automatically by models.CustomizableFileField
+        filename (str): name of image file
+    """
+    return u'{}{}'.format(settings.VIDEO_TRANSCRIPTS_SETTINGS.get('DIRECTORY_PREFIX', ''), filename)
+
+
+def get_video_transcript_storage():
+    """
+    Return the configured django storage backend for video transcripts.
+    """
+    if hasattr(settings, 'VIDEO_TRANSCRIPTS_SETTINGS'):
+        return get_storage_class(
+            settings.VIDEO_TRANSCRIPTS_SETTINGS.get('STORAGE_CLASS'),
+        )(**settings.VIDEO_TRANSCRIPTS_SETTINGS.get('STORAGE_KWARGS', {}))
+    else:
+        # during edx-platform loading this method gets called but settings are not ready yet
+        # so in that case we will return default(FileSystemStorage) storage class instance
+        return get_storage_class()()
--- a/edxval/views.py
+++ b/edxval/views.py
 """
 Views file for django app edxval.
 """
-from rest_framework.views import APIView
-from rest_framework import generics
-from rest_framework.authentication import SessionAuthentication
-from rest_framework_oauth.authentication import OAuth2Authentication
-from rest_framework.permissions import DjangoModelPermissions
-from rest_framework.response import Response
-from rest_framework import status
+import logging
+
+from django.core.exceptions import ValidationError
 from django.http import HttpResponse
 from django.shortcuts import get_object_or_404
-from django.core.exceptions import ValidationError
 from django.views.decorators.http import last_modified
+from rest_framework import generics, status
+from rest_framework.authentication import SessionAuthentication
+from rest_framework.permissions import DjangoModelPermissions
+from rest_framework.response import Response
+from rest_framework.views import APIView
+from rest_framework_oauth.authentication import OAuth2Authentication
+
+from edxval.api import (create_or_update_video_transcript,
+                        get_video_transcript, update_video_status)
+from edxval.models import (CourseVideo, Profile, TranscriptFormat,
+                           TranscriptProviderType, Video, VideoImage,
+                           VideoTranscript)
+from edxval.serializers import TranscriptSerializer, VideoSerializer

-from edxval.models import Video, Profile, Subtitle, CourseVideo, VideoImage
-from edxval.serializers import (
-    VideoSerializer,
-    SubtitleSerializer
-)
+LOGGER = logging.getLogger(__name__)  # pylint: disable=C0103
+
+VALID_VIDEO_STATUSES = [
+    'transcription_in_progress',
+    'transcript_ready',
+]


 class ReadRestrictedDjangoModelPermissions(DjangoModelPermissions):
@@ -92,15 +101,116 @@ class VideoDetail(generics.RetrieveUpdateDestroyAPIView):
    serializer_class = VideoSerializer


-class SubtitleDetail(MultipleFieldLookupMixin, generics.RetrieveUpdateDestroyAPIView):
+class VideoTranscriptView(APIView):
    """
-    Gets a subtitle instance given its id
+    A Transcription View, used by edx-video-pipeline to create video transcripts.
+    """
+    authentication_classes = (OAuth2Authentication, SessionAuthentication)
+
+    # noinspection PyMethodMayBeStatic
+    def post(self, request):
+        """
+        Creates a video transcript instance with the given information.
+
+        Arguments:
+            request: A WSGI request.
+        """
+        attrs = ('video_id', 'name', 'language_code', 'provider', 'file_format')
+        missing = [attr for attr in attrs if attr not in request.data]
+        if missing:
+            LOGGER.warn(
+                '[VAL] Required transcript params are missing. %s', ' and '.join(missing)
+            )
+            return Response(
+                status=status.HTTP_400_BAD_REQUEST,
+                data=dict(message=u'{missing} must be specified.'.format(missing=' and '.join(missing)))
+            )
+
+        video_id = request.data['video_id']
+        language_code = request.data['language_code']
+        transcript_name = request.data['name']
+        provider = request.data['provider']
+        file_format = request.data['file_format']
+
+        supported_formats = sorted(dict(TranscriptFormat.CHOICES).keys())
+        if file_format not in supported_formats:
+            message = (
+                u'"{format}" transcript file type is not supported. Supported formats are "{supported_formats}"'
+            ).format(format=file_format, supported_formats=supported_formats)
+            return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message})
+
+        supported_providers = sorted(dict(TranscriptProviderType.CHOICES).keys())
+        if provider not in supported_providers:
+            message = (
+                u'"{provider}" provider is not supported. Supported transcription providers are "{supported_providers}"'
+            ).format(provider=provider, supported_providers=supported_providers)
+            return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message})
+
+        transcript = VideoTranscript.get_or_none(video_id, language_code)
+        if transcript is None:
+            create_or_update_video_transcript(
+                video_id,
+                language_code,
+                transcript_name,
+                file_format,
+                provider,
+            )
+            response = Response(status=status.HTTP_200_OK)
+        else:
+            message = (
+                u'Can not override existing transcript for video "{video_id}" and language code "{language}".'
+            ).format(video_id=video_id, language=language_code)
+            response = Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message})
+
+        return response
+
+
+class VideoStatusView(APIView):
+    """
+    A Video View to update the status of a video.
+
+    Note:
+        Currently, the valid statuses are `transcription_in_progress` and `transcript_ready` because it
+        was intended to only be used for video transcriptions but if you found it helpful to your needs, you
+        can add more statuses so that you can use it for updating other video statuses too.
    """
    authentication_classes = (OAuth2Authentication, SessionAuthentication)
-    permission_classes = (ReadRestrictedDjangoModelPermissions,)
-    lookup_fields = ("video__edx_video_id", "language")
-    queryset = Subtitle.objects.all()
-    serializer_class = SubtitleSerializer
+
+    def patch(self, request):
+        """
+        Update the status of a video.
+        """
+        attrs = ('edx_video_id', 'status')
+        missing = [attr for attr in attrs if attr not in request.data]
+        if missing:
+            return Response(
+                status=status.HTTP_400_BAD_REQUEST,
+                data={'message': u'"{missing}" params must be specified.'.format(missing=' and '.join(missing))}
+            )
+
+        edx_video_id = request.data['edx_video_id']
+        video_status = request.data['status']
+        if video_status not in VALID_VIDEO_STATUSES:
+            return Response(
+                status=status.HTTP_400_BAD_REQUEST,
+                data={'message': u'"{status}" is not a valid Video status.'.format(status=video_status)}
+            )
+
+        try:
+            video = Video.objects.get(edx_video_id=edx_video_id)
+            video.status = video_status
+            video.save()
+            response_status = status.HTTP_200_OK
+            response_payload = {}
+        except Video.DoesNotExist:
+            response_status = status.HTTP_400_BAD_REQUEST
+            response_payload = {
+                'message': u'Video is not found for specified edx_video_id: {edx_video_id}'.format(
+                    edx_video_id=edx_video_id
+                )
+            }
+
+        return Response(status=response_status, data=response_payload)


 class VideoImagesView(APIView):
@@ -148,19 +258,3 @@ class VideoImagesView(APIView):
            )

        return Response()
-
-
-def _last_modified_subtitle(request, edx_video_id, language):  # pylint: disable=W0613
-    """
-    Returns the last modified subtitle
-    """
-    return Subtitle.objects.get(video__edx_video_id=edx_video_id, language=language).modified
-
-@last_modified(last_modified_func=_last_modified_subtitle)
-def get_subtitle(request, edx_video_id, language): # pylint: disable=W0613
-    """
-    Return content of subtitle by id
-    """
-    sub = Subtitle.objects.get(video__edx_video_id=edx_video_id, language=language)
-    response = HttpResponse(sub.content, content_type=sub.content_type)
-    return response