Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-val
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-val
Commits
00b8ded8
Unverified
Commit
00b8ded8
authored
Apr 13, 2018
by
Mushtaq Ali
Committed by
GitHub
Apr 13, 2018
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #134 from edx/import_video_transcripts
Import video transcripts
parents
6469fc26
7ef924d0
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
906 additions
and
182 deletions
+906
-182
edxval/api.py
+182
-65
edxval/models.py
+1
-11
edxval/tests/constants.py
+41
-1
edxval/tests/data/wow.sjson
+2
-3
edxval/tests/test_api.py
+628
-99
edxval/tests/test_views.py
+3
-1
edxval/utils.py
+46
-0
edxval/views.py
+1
-1
requirements/base.in
+1
-0
setup.py
+1
-1
No files found.
edxval/api.py
View file @
00b8ded8
...
...
@@ -8,19 +8,25 @@ from enum import Enum
from
uuid
import
uuid4
from
django.core.exceptions
import
ObjectDoesNotExist
,
ValidationError
from
django.core.files
import
File
from
django.core.files.base
import
ContentFile
from
fs
import
open_fs
from
fs.errors
import
ResourceNotFound
from
fs.path
import
combine
from
lxml
import
etree
from
lxml.etree
import
Element
,
SubElement
from
pysrt.srtexc
import
Error
from
edxval.exceptions
import
(
InvalidTranscriptFormat
,
InvalidTranscriptProvider
,
ValCannotCreateError
,
ValCannotUpdateError
,
ValInternalError
,
ValVideoNotFoundError
)
from
edxval.models
import
(
CourseVideo
,
EncodedVideo
,
Profile
,
TranscriptFormat
,
TranscriptPreference
,
from
edxval.models
import
(
CourseVideo
,
EncodedVideo
,
Profile
,
TranscriptPreference
,
TranscriptProviderType
,
Video
,
VideoImage
,
VideoTranscript
,
ThirdPartyTranscriptCredentialsState
)
from
edxval.serializers
import
TranscriptPreferenceSerializer
,
TranscriptSerializer
,
VideoSerializer
from
edxval.utils
import
THIRD_PARTY_TRANSCRIPTION_PLANS
from
edxval.utils
import
TranscriptFormat
,
THIRD_PARTY_TRANSCRIPTION_PLANS
,
create_file_in_fs
,
get_transcript_format
logger
=
logging
.
getLogger
(
__name__
)
# pylint: disable=C0103
...
...
@@ -789,7 +795,7 @@ def export_to_xml(video_id, resource_fs, static_dir, course_id=None):
video_id (str): Video id of the video to export transcripts.
course_id (str): The ID of the course with which this video is associated.
static_dir (str): The Directory to store transcript file.
resource_fs (
OSFS): The file system to store transcripts
.
resource_fs (
SubFS): Export file system
.
Returns:
An lxml video_asset element containing export data
...
...
@@ -826,7 +832,7 @@ def export_to_xml(video_id, resource_fs, static_dir, course_id=None):
return
create_transcripts_xml
(
video_id
,
video_el
,
resource_fs
,
static_dir
)
def
create_trancript_file
(
video_id
,
language_code
,
file_format
,
resource_fs
,
static_dir
):
def
create_tran
s
cript_file
(
video_id
,
language_code
,
file_format
,
resource_fs
,
static_dir
):
"""
Writes transcript file to file system.
...
...
@@ -835,24 +841,17 @@ def create_trancript_file(video_id, language_code, file_format, resource_fs, sta
language_code (str): Language code of the transcript.
file_format (str): File format of the transcript file.
static_dir (str): The Directory to store transcript file.
resource_fs (
OS
FS): The file system to store transcripts.
resource_fs (
Sub
FS): The file system to store transcripts.
"""
transcript_name
=
u'{static_dir}/{video_id}-{language_code}.{file_format}'
.
format
(
static_dir
=
static_dir
,
transcript_name
=
u'{video_id}-{language_code}.{file_format}'
.
format
(
video_id
=
video_id
,
language_code
=
language_code
,
file_format
=
file_format
)
try
:
transcript_data
=
get_video_transcript_data
(
video_id
,
language_code
)
if
transcript_data
:
transcript_content
=
transcript_data
[
'content'
]
with
resource_fs
.
open
(
transcript_name
,
'wb'
)
as
f
:
f
.
write
(
transcript_content
)
except
Exception
:
# Do not raise exception in case no transcript file is found for now.
# TODO: Remove this - EDUCATOR-2173
pass
transcript_data
=
get_video_transcript_data
(
video_id
,
language_code
)
if
transcript_data
:
transcript_content
=
transcript_data
[
'content'
]
create_file_in_fs
(
transcript_content
,
transcript_name
,
resource_fs
,
static_dir
)
def
create_transcripts_xml
(
video_id
,
video_el
,
resource_fs
,
static_dir
):
...
...
@@ -864,7 +863,7 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
video_id (str): Video id of the video.
video_el (Element): lxml Element object
static_dir (str): The Directory to store transcript file.
resource_fs (
OS
FS): The file system to store transcripts.
resource_fs (
Sub
FS): The file system to store transcripts.
Returns:
lxml Element object with transcripts information
...
...
@@ -880,13 +879,18 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
language_code
=
video_transcript
.
language_code
file_format
=
video_transcript
.
file_format
create_trancript_file
(
video_id
,
language_code
,
file_format
,
resource_fs
,
static_dir
)
create_transcript_file
(
video_id
,
language_code
,
file_format
,
resource_fs
.
delegate_fs
(),
combine
(
u'course'
,
static_dir
)
# File system should not start from /draft directory.
)
SubElement
(
transcripts_el
,
'transcript'
,
{
'file_name'
:
video_transcript
.
transcript
.
name
,
'language_code'
:
language_code
,
'file_format'
:
file_format
,
'provider'
:
video_transcript
.
provider
,
...
...
@@ -897,7 +901,7 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
return
video_el
def
import_from_xml
(
xml
,
edx_video_id
,
course_id
=
None
):
def
import_from_xml
(
xml
,
edx_video_id
,
resource_fs
,
static_dir
,
external_transcripts
=
dict
(),
course_id
=
None
):
"""
Imports data from a video_asset element about the given video_id.
...
...
@@ -907,20 +911,30 @@ def import_from_xml(xml, edx_video_id, course_id=None):
Arguments:
xml (Element): An lxml video_asset element containing import data
edx_video_id (str): val video id
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
external_transcripts (dict): A dict containing the list of names of the external transcripts.
Example:
{
'en': ['The_Flash.srt', 'Harry_Potter.srt'],
'es': ['Green_Arrow.srt']
}
course_id (str): The ID of a course to associate the video with
Raises:
ValCannotCreateError: if there is an error importing the video
Returns:
edx_video_id (str): val video id.
"""
if
xml
.
tag
!=
'video_asset'
:
raise
ValCannotCreateError
(
'Invalid XML'
)
# TODO this will be moved as a part of EDUCATOR-2173
if
not
edx_video_id
:
return
# If video with edx_video_id already exists, associate it with the given course_id.
try
:
if
not
edx_video_id
:
raise
Video
.
DoesNotExist
video
=
Video
.
objects
.
get
(
edx_video_id
=
edx_video_id
)
logger
.
info
(
"edx_video_id '
%
s' present in course '
%
s' not imported because it exists in VAL."
,
...
...
@@ -934,60 +948,163 @@ def import_from_xml(xml, edx_video_id, course_id=None):
if
image_file_name
:
VideoImage
.
create_or_update
(
course_video
,
image_file_name
)
return
return
edx_video_id
except
ValidationError
as
err
:
logger
.
exception
(
err
.
message
)
raise
ValCannotCreateError
(
err
.
message_dict
)
except
Video
.
DoesNotExist
:
pass
# Video with edx_video_id did not exist, so create one from xml data.
data
=
{
'edx_video_id'
:
edx_video_id
,
'client_video_id'
:
xml
.
get
(
'client_video_id'
),
'duration'
:
xml
.
get
(
'duration'
),
'status'
:
'imported'
,
'encoded_videos'
:
[],
'courses'
:
[{
course_id
:
xml
.
get
(
'image'
)}]
if
course_id
else
[],
}
for
encoded_video_el
in
xml
.
iterfind
(
'encoded_video'
):
profile_name
=
encoded_video_el
.
get
(
'profile'
)
if
edx_video_id
:
# Video with edx_video_id did not exist, so create one from xml data.
data
=
{
'edx_video_id'
:
edx_video_id
,
'client_video_id'
:
xml
.
get
(
'client_video_id'
),
'duration'
:
xml
.
get
(
'duration'
),
'status'
:
'imported'
,
'encoded_videos'
:
[],
'courses'
:
[{
course_id
:
xml
.
get
(
'image'
)}]
if
course_id
else
[],
}
for
encoded_video_el
in
xml
.
iterfind
(
'encoded_video'
):
profile_name
=
encoded_video_el
.
get
(
'profile'
)
try
:
Profile
.
objects
.
get
(
profile_name
=
profile_name
)
except
Profile
.
DoesNotExist
:
logger
.
info
(
"Imported edx_video_id '
%
s' contains unknown profile '
%
s'."
,
edx_video_id
,
profile_name
)
continue
data
[
'encoded_videos'
]
.
append
({
'profile'
:
profile_name
,
'url'
:
encoded_video_el
.
get
(
'url'
),
'file_size'
:
encoded_video_el
.
get
(
'file_size'
),
'bitrate'
:
encoded_video_el
.
get
(
'bitrate'
),
})
# Create external video if no edx_video_id.
edx_video_id
=
create_video
(
data
)
else
:
edx_video_id
=
create_external_video
(
'External Video'
)
create_transcript_objects
(
xml
,
edx_video_id
,
resource_fs
,
static_dir
,
external_transcripts
)
return
edx_video_id
def
import_transcript_from_fs
(
edx_video_id
,
language_code
,
file_name
,
provider
,
resource_fs
,
static_dir
):
"""
Imports transcript file from file system and creates transcript record in DS.
Arguments:
edx_video_id (str): Video id of the video.
language_code (unicode): Language code of the requested transcript.
file_name (unicode): File name of the transcript file.
provider (unicode): Transcript provider.
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
"""
file_format
=
None
transcript_data
=
get_video_transcript_data
(
edx_video_id
,
language_code
)
# First check if transcript record does not exist.
if
not
transcript_data
:
# Read file from import file system and attach it to transcript record in DS.
try
:
Profile
.
objects
.
get
(
profile_name
=
profile_name
)
except
Profile
.
DoesNotExist
:
logger
.
info
(
"Imported edx_video_id '
%
s' contains unknown profile '
%
s'."
,
with
resource_fs
.
open
(
combine
(
static_dir
,
file_name
),
'rb'
)
as
f
:
file_content
=
f
.
read
()
file_content
=
file_content
.
decode
(
'utf-8-sig'
)
except
ResourceNotFound
as
exc
:
# Don't raise exception in case transcript file is not found in course OLX.
logger
.
warn
(
'[edx-val] "
%
s" transcript "
%
s" for video "
%
s" is not found.'
,
language_code
,
file_name
,
edx_video_id
)
return
except
UnicodeDecodeError
:
# Don't raise exception in case transcript contains non-utf8 content.
logger
.
warn
(
'[edx-val] "
%
s" transcript "
%
s" for video "
%
s" contains a non-utf8 file content.'
,
language_code
,
file_name
,
edx_video_id
)
return
# Get file format from transcript content.
try
:
file_format
=
get_transcript_format
(
file_content
)
except
Error
as
ex
:
# Don't raise exception, just don't create transcript record.
logger
.
warn
(
'[edx-val] Error while getting transcript format for video=
%
s -- language_code=
%
s --file_name=
%
s'
,
edx_video_id
,
profile_name
language_code
,
file_name
)
continue
data
[
'encoded_videos'
]
.
append
({
'profile'
:
profile_name
,
'url'
:
encoded_video_el
.
get
(
'url'
),
'file_size'
:
encoded_video_el
.
get
(
'file_size'
),
'bitrate'
:
encoded_video_el
.
get
(
'bitrate'
),
})
create_video
(
data
)
create_transcript_objects
(
xml
)
return
# Create transcript record.
create_video_transcript
(
video_id
=
edx_video_id
,
language_code
=
language_code
,
file_format
=
file_format
,
content
=
ContentFile
(
file_content
),
provider
=
provider
)
def
create_transcript_objects
(
xml
):
def
create_transcript_objects
(
xml
,
edx_video_id
,
resource_fs
,
static_dir
,
external_transcripts
):
"""
Create VideoTranscript objects.
Arguments:
xml (Element): lxml Element object
xml (Element): lxml Element object.
edx_video_id (str): Video id of the video.
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
external_transcripts (dict): A dict containing the list of names of the external transcripts.
Example:
{
'en': ['The_Flash.srt', 'Harry_Potter.srt'],
'es': ['Green_Arrow.srt']
}
"""
for
transcript
in
xml
.
findall
(
'.//transcripts/transcript'
):
try
:
create_or_update_video_transcript
(
transcript
.
attrib
[
'video_id'
],
transcript
.
attrib
[
'language_code'
],
metadata
=
dict
(
# File system should not start from /draft directory.
with
open_fs
(
resource_fs
.
root_path
.
split
(
'/drafts'
)[
0
])
as
file_system
:
# First import VAL transcripts.
for
transcript
in
xml
.
findall
(
'.//transcripts/transcript'
):
try
:
file_format
=
transcript
.
attrib
[
'file_format'
]
language_code
=
transcript
.
attrib
[
'language_code'
]
transcript_file_name
=
u'{edx_video_id}-{language_code}.{file_format}'
.
format
(
edx_video_id
=
edx_video_id
,
language_code
=
language_code
,
file_format
=
file_format
)
import_transcript_from_fs
(
edx_video_id
=
edx_video_id
,
language_code
=
transcript
.
attrib
[
'language_code'
],
file_name
=
transcript_file_name
,
provider
=
transcript
.
attrib
[
'provider'
],
file_name
=
transcript
.
attrib
[
'file_name'
],
file_format
=
transcript
.
attrib
[
'file_format'
],
resource_fs
=
file_system
,
static_dir
=
static_dir
)
except
KeyError
:
logger
.
warn
(
"VAL: Required attributes are missing from xml, xml=[
%
s]"
,
etree
.
tostring
(
transcript
)
.
strip
())
# This won't overwrite transcript for a language which is already present for the video.
for
language_code
,
transcript_file_names
in
external_transcripts
.
iteritems
():
for
transcript_file_name
in
transcript_file_names
:
import_transcript_from_fs
(
edx_video_id
=
edx_video_id
,
language_code
=
language_code
,
file_name
=
transcript_file_name
,
provider
=
TranscriptProviderType
.
CUSTOM
,
resource_fs
=
file_system
,
static_dir
=
static_dir
)
)
except
KeyError
:
logger
.
warn
(
"VAL: Required attributes are missing from xml, xml=[
%
s]"
,
etree
.
tostring
(
transcript
)
.
strip
())
edxval/models.py
View file @
00b8ded8
...
...
@@ -25,7 +25,7 @@ from django.dispatch import receiver
from
django.utils.six
import
python_2_unicode_compatible
from
model_utils.models
import
TimeStampedModel
from
edxval.utils
import
(
get_video_image_storage
,
from
edxval.utils
import
(
TranscriptFormat
,
get_video_image_storage
,
get_video_transcript_storage
,
video_image_path
,
video_transcript_path
)
...
...
@@ -373,16 +373,6 @@ class TranscriptProviderType(object):
)
class
TranscriptFormat
(
object
):
SRT
=
'srt'
SJSON
=
'sjson'
CHOICES
=
(
(
SRT
,
'SubRip'
),
(
SJSON
,
'SRT JSON'
)
)
class
CustomizableFileField
(
models
.
FileField
):
"""
Subclass of FileField that allows custom settings to not
...
...
edxval/tests/constants.py
View file @
00b8ded8
...
...
@@ -4,14 +4,19 @@
Constants used for tests.
"""
from
edxval.models
import
(
TranscriptFormat
,
TranscriptProviderType
,
Cielo24Fidelity
,
Cielo24Turnaround
,
ThreePlayTurnaround
)
from
edxval.utils
import
TranscriptFormat
EDX_VIDEO_ID
=
"itchyjacket"
EXPORT_IMPORT_COURSE_DIR
=
u'course'
EXPORT_IMPORT_STATIC_DIR
=
u'static'
"""
Generic Profiles for manually creating profile objects
"""
...
...
@@ -363,12 +368,46 @@ VIDEO_DICT_UPDATE_ANIMAL = dict(
encoded_videos
=
[],
)
TRANSCRIPT_DATA
=
{
"overwatch"
:
"""
1
00:00:14,370 --> 00:00:16,530
I am overwatch.
2
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻."""
,
"flash"
:
"""
1
00:00:07,180 --> 00:00:08,460
This is Flash line 1."""
,
"wow"
:
"""{
\n
"start": [10],
\n
"end": [100],
\n
"text": ["Hi, welcome to edxval."]
\n
}
\n
"""
}
VIDEO_TRANSCRIPT_CUSTOM_SRT
=
dict
(
language_code
=
'en'
,
transcript
=
'edxval/tests/data/The_Flash.srt'
,
provider
=
TranscriptProviderType
.
CUSTOM
,
file_format
=
TranscriptFormat
.
SRT
,
file_data
=
TRANSCRIPT_DATA
[
'flash'
]
)
VIDEO_TRANSCRIPT_CUSTOM_SJSON
=
dict
(
language_code
=
'en'
,
transcript
=
'edxval/tests/data/wow.sjson'
,
provider
=
TranscriptProviderType
.
CUSTOM
,
file_format
=
TranscriptFormat
.
SJSON
,
file_data
=
TRANSCRIPT_DATA
[
'wow'
]
)
VIDEO_TRANSCRIPT_CIELO24
=
dict
(
video_id
=
'super-soaker'
,
language_code
=
'en'
,
transcript
=
'edxval/tests/data/The_Flash.srt'
,
provider
=
TranscriptProviderType
.
CIELO24
,
file_format
=
TranscriptFormat
.
SRT
,
file_data
=
TRANSCRIPT_DATA
[
'flash'
]
)
VIDEO_TRANSCRIPT_3PLAY
=
dict
(
...
...
@@ -377,6 +416,7 @@ VIDEO_TRANSCRIPT_3PLAY = dict(
transcript
=
'edxval/tests/data/wow.sjson'
,
provider
=
TranscriptProviderType
.
THREE_PLAY_MEDIA
,
file_format
=
TranscriptFormat
.
SJSON
,
file_data
=
TRANSCRIPT_DATA
[
'wow'
]
)
TRANSCRIPT_PREFERENCES_CIELO24
=
dict
(
...
...
edxval/tests/data/wow.sjson
View file @
00b8ded8
{
"start": [10],
"end": [100],
"text": ["Hi, welcome to edxval."],
}
\ No newline at end of file
"text": ["Hi, welcome to edxval."]
}
edxval/tests/test_api.py
View file @
00b8ded8
...
...
@@ -30,26 +30,18 @@ from edxval.api import (InvalidTranscriptFormat, InvalidTranscriptProvider,
ValCannotUpdateError
,
ValVideoNotFoundError
,
VideoSortField
)
from
edxval.models
import
(
LIST_MAX_ITEMS
,
CourseVideo
,
EncodedVideo
,
Profile
,
ThirdPartyTranscriptCredentialsState
,
TranscriptFormat
,
TranscriptPreference
,
ThirdPartyTranscriptCredentialsState
,
TranscriptPreference
,
TranscriptProviderType
,
Video
,
VideoImage
,
VideoTranscript
)
from
edxval.serializers
import
VideoSerializer
from
edxval.tests
import
APIAuthTestCase
,
constants
STATIC_DIR
=
u'static'
STATIC_PATH
=
u'/static'
FILE_DATA
=
"""
1
00:00:14,370 --> 00:00:16,530
I am overwatch.
2
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.
"""
def
omit_attrs
(
dict
,
attrs_to_omit
=
[]):
"""
Omits provided attributes from the dict.
"""
return
{
attr
:
value
for
attr
,
value
in
dict
.
iteritems
()
if
attr
not
in
attrs_to_omit
}
class
SortedVideoTestMixin
(
object
):
...
...
@@ -938,15 +930,16 @@ class ExportTest(TestCase):
# create internal video transcripts
transcript_data
=
dict
(
constants
.
VIDEO_TRANSCRIPT_CIELO24
,
video
=
video
)
transcript_data
.
pop
(
'video_id'
)
transcript_data
=
omit_attrs
(
transcript_data
,
[
'video_id'
,
'file_data'
]
)
VideoTranscript
.
objects
.
create
(
**
transcript_data
)
transcript_data
=
dict
(
constants
.
VIDEO_TRANSCRIPT_3PLAY
,
video
=
video
)
transcript_data
.
pop
(
'video_id'
)
transcript_data
=
omit_attrs
(
transcript_data
,
[
'video_id'
,
'file_data'
]
)
VideoTranscript
.
objects
.
create
(
**
transcript_data
)
self
.
temp_dir
=
mkdtemp
()
self
.
file_system
=
OSFS
(
self
.
temp_dir
)
self
.
file_system
.
makedir
(
STATIC_DIR
,
recreate
=
True
)
delegate_fs
=
OSFS
(
self
.
temp_dir
)
self
.
file_system
=
delegate_fs
.
makedir
(
constants
.
EXPORT_IMPORT_COURSE_DIR
,
recreate
=
True
)
self
.
file_system
.
makedir
(
constants
.
EXPORT_IMPORT_STATIC_DIR
,
recreate
=
True
)
self
.
addCleanup
(
shutil
.
rmtree
,
self
.
temp_dir
)
def
assert_xml_equal
(
self
,
left
,
right
):
...
...
@@ -981,7 +974,7 @@ class ExportTest(TestCase):
<video_asset client_video_id="TWINKLE TWINKLE" duration="122.0" image=""/>
"""
)
self
.
assert_xml_equal
(
api
.
export_to_xml
(
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
self
.
file_system
,
STATIC_DIR
),
api
.
export_to_xml
(
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
self
.
file_system
,
constants
.
EXPORT_IMPORT_
STATIC_DIR
),
expected
)
...
...
@@ -993,7 +986,11 @@ class ExportTest(TestCase):
<video_asset client_video_id="TWINKLE TWINKLE" duration="122.0" image=""/>
"""
)
exported_xml
=
api
.
export_to_xml
(
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
self
.
file_system
,
STATIC_DIR
)
exported_xml
=
api
.
export_to_xml
(
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
self
.
assert_xml_equal
(
exported_xml
,
expected
)
# Verify that no transcript is present in the XML.
...
...
@@ -1014,14 +1011,19 @@ class ExportTest(TestCase):
<encoded_video url="http://www.meowmagic.com" file_size="33" bitrate="44" profile="desktop"/>
<encoded_video url="https://www.tmnt.com/tmnt101.m3u8" file_size="100" bitrate="0" profile="hls"/>
<transcripts>
<transcript file_format="sjson"
file_name="edxval/tests/data/wow.sjson"
language_code="de" provider="3PlayMedia" />
<transcript file_format="srt"
file_name="edxval/tests/data/The_Flash.srt"
language_code="en" provider="Cielo24" />
<transcript file_format="sjson" language_code="de" provider="3PlayMedia" />
<transcript file_format="srt" language_code="en" provider="Cielo24" />
</transcripts>
</video_asset>
"""
.
format
(
image
=
image
))
self
.
assert_xml_equal
(
api
.
export_to_xml
(
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
],
self
.
file_system
,
STATIC_DIR
,
course_id
),
api
.
export_to_xml
(
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
],
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
course_id
),
expected
)
...
...
@@ -1032,7 +1034,10 @@ class ExportTest(TestCase):
language_code
=
'en'
video_id
=
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
]
transcript_files
=
{
'de'
:
u'super-soaker-de.sjson'
,
'en'
:
u'super-soaker-en.srt'
}
expected_transcript_path
=
combine
(
self
.
temp_dir
,
STATIC_PATH
)
expected_transcript_path
=
combine
(
self
.
temp_dir
,
combine
(
constants
.
EXPORT_IMPORT_COURSE_DIR
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
)
expected_xml
=
self
.
parse_xml
(
"""
<video_asset client_video_id="Shallow Swordfish" duration="122.0" image="image.jpg">
...
...
@@ -1040,19 +1045,19 @@ class ExportTest(TestCase):
<encoded_video url="http://www.meowmagic.com" file_size="33" bitrate="44" profile="desktop"/>
<encoded_video url="https://www.tmnt.com/tmnt101.m3u8" file_size="100" bitrate="0" profile="hls"/>
<transcripts>
<transcript file_format="sjson"
file_name="edxval/tests/data/wow.sjson"
language_code="de" provider="3PlayMedia" />
<transcript file_format="srt"
file_name="edxval/tests/data/The_Flash.srt"
language_code="en" provider="Cielo24" />
<transcript file_format="sjson" language_code="de" provider="3PlayMedia" />
<transcript file_format="srt" language_code="en" provider="Cielo24" />
</transcripts>
</video_asset>
"""
)
exported_xml
=
api
.
export_to_xml
(
video_id
,
self
.
file_system
,
STATIC_DIR
,
'test-course'
)
exported_xml
=
api
.
export_to_xml
(
video_id
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_
STATIC_DIR
,
'test-course'
)
# Assert video and transcript xml is exported correctly.
self
.
assert_xml_equal
(
exported_xml
,
expected_xml
)
# Verify transcript file is created.
self
.
assertItemsEqual
(
transcript_files
.
values
(),
self
.
file_system
.
listdir
(
STATIC_PATH
))
self
.
assertItemsEqual
(
transcript_files
.
values
(),
self
.
file_system
.
listdir
(
constants
.
EXPORT_IMPORT_STATIC_DIR
))
# Also verify the content of created transcript file.
for
language_code
in
transcript_files
.
keys
():
...
...
@@ -1068,12 +1073,15 @@ class ExportTest(TestCase):
Test export with invalid video id.
"""
with
self
.
assertRaises
(
ValVideoNotFoundError
):
api
.
export_to_xml
(
'unknown_video'
,
self
.
file_system
,
STATIC_DIR
)
api
.
export_to_xml
(
'unknown_video'
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_
STATIC_DIR
)
@ddt
class
ImportTest
(
TestCase
):
"""Tests import_from_xml"""
"""
Tests import_from_xml
"""
def
setUp
(
self
):
self
.
image_name
=
'image.jpg'
mobile_profile
=
Profile
.
objects
.
create
(
profile_name
=
constants
.
PROFILE_MOBILE
)
...
...
@@ -1090,6 +1098,16 @@ class ImportTest(TestCase):
self
.
transcript_data2
=
dict
(
constants
.
VIDEO_TRANSCRIPT_3PLAY
,
video_id
=
'little-star'
)
self
.
transcript_data3
=
dict
(
self
.
transcript_data2
,
video_id
=
'super-soaker'
)
self
.
temp_dir
=
mkdtemp
()
self
.
file_system
=
OSFS
(
self
.
temp_dir
)
self
.
file_system
.
makedir
(
constants
.
EXPORT_IMPORT_COURSE_DIR
,
recreate
=
True
)
self
.
file_system
.
makedir
(
constants
.
EXPORT_IMPORT_STATIC_DIR
,
recreate
=
True
)
self
.
addCleanup
(
shutil
.
rmtree
,
self
.
temp_dir
)
def
make_import_xml
(
self
,
video_dict
,
encoded_video_dicts
=
None
,
image
=
None
,
video_transcripts
=
None
):
import_xml
=
etree
.
Element
(
"video_asset"
,
...
...
@@ -1115,18 +1133,32 @@ class ImportTest(TestCase):
if
video_transcripts
:
transcripts_el
=
etree
.
SubElement
(
import_xml
,
'transcripts'
)
for
video_transcript
in
video_transcripts
:
file_format
=
video_transcript
[
'file_format'
]
language_code
=
video_transcript
[
'language_code'
]
etree
.
SubElement
(
transcripts_el
,
'transcript'
,
{
'video_id'
:
video_transcript
[
'video_id'
],
'file_name'
:
video_transcript
[
'transcript'
],
'language_code'
:
video_transcript
[
'language_code'
],
'file_format'
:
video_transcript
[
'file_format'
],
'language_code'
:
language_code
,
'file_format'
:
file_format
,
'provider'
:
video_transcript
[
'provider'
],
}
)
# Create transcript files
transcript_file_name
=
u'{edx_video_id}-{language_code}.{file_format}'
.
format
(
edx_video_id
=
video_dict
[
'edx_video_id'
],
language_code
=
language_code
,
file_format
=
file_format
)
utils
.
create_file_in_fs
(
video_transcript
[
'file_data'
],
transcript_file_name
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
return
import_xml
def
assert_obj_matches_dict_for_keys
(
self
,
obj
,
dict_
,
keys
):
...
...
@@ -1150,29 +1182,41 @@ class ImportTest(TestCase):
def
assert_invalid_import
(
self
,
xml
,
course_id
=
None
):
edx_video_id
=
"test_edx_video_id"
with
self
.
assertRaises
(
ValCannotCreateError
):
api
.
import_from_xml
(
xml
,
edx_video_id
,
course_id
)
api
.
import_from_xml
(
xml
,
edx_video_id
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
{},
course_id
)
self
.
assertFalse
(
Video
.
objects
.
filter
(
edx_video_id
=
edx_video_id
)
.
exists
())
def
assert_transcripts
(
self
,
video_id
,
expected_transcripts
):
"""
Compare `received` with `expected` and assert if not equal
Compare `received` with `expected` and assert if not equal
.
"""
# Verify total number of expected transcripts for a video
# Verify total number of expected transcripts for a video
.
video_transcripts
=
VideoTranscript
.
objects
.
filter
(
video__edx_video_id
=
video_id
)
self
.
assertEqual
(
video_transcripts
.
count
(),
len
(
expected_transcripts
))
# Verify data for each transcript
# Verify data for each transcript
.
for
expected_transcript
in
expected_transcripts
:
language_code
=
expected_transcript
[
'language_code'
]
expected_transcript
[
'name'
]
=
expected_transcript
.
pop
(
'transcript'
)
#
get the imported transcript and rename `url` key
received
=
api
.
TranscriptSerializer
(
#
Get the imported transcript and remove `url` key.
received
_transcript
=
api
.
TranscriptSerializer
(
VideoTranscript
.
objects
.
get
(
video__edx_video_id
=
video_id
,
language_code
=
language_code
)
)
.
data
received
[
'name'
]
=
received
.
pop
(
'url'
)
self
.
assertDictEqual
(
received
,
expected_transcript
)
# Assert transcript content
received_transcript
[
'file_data'
]
=
api
.
get_video_transcript_data
(
video_id
,
language_code
)[
'content'
]
# Omit not needed attrs.
expected_transcript
=
omit_attrs
(
expected_transcript
,
[
'transcript'
])
received_transcript
=
omit_attrs
(
received_transcript
,
[
'url'
])
self
.
assertDictEqual
(
received_transcript
,
expected_transcript
)
def
test_new_video_full
(
self
):
new_course_id
=
'new_course_id'
...
...
@@ -1184,11 +1228,18 @@ class ImportTest(TestCase):
video_transcripts
=
[
self
.
transcript_data1
,
self
.
transcript_data2
]
)
# there must not be any transcript before import
with
self
.
assertRaises
(
VideoTranscript
.
DoesNotExist
):
VideoTranscript
.
objects
.
get
(
video__edx_video_id
=
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
])
# There must not be any transcript before import.
self
.
assert_transcripts
(
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
[])
api
.
import_from_xml
(
xml
,
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
new_course_id
)
edx_video_id
=
api
.
import_from_xml
(
xml
,
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
{},
new_course_id
)
self
.
assertEqual
(
edx_video_id
,
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
])
video
=
Video
.
objects
.
get
(
edx_video_id
=
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
])
self
.
assert_video_matches_dict
(
video
,
constants
.
VIDEO_DICT_STAR
)
...
...
@@ -1217,7 +1268,7 @@ class ImportTest(TestCase):
"duration"
:
"0"
,
}
)
api
.
import_from_xml
(
xml
,
edx_video_id
)
api
.
import_from_xml
(
xml
,
edx_video_id
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
video
=
Video
.
objects
.
get
(
edx_video_id
=
edx_video_id
)
self
.
assertFalse
(
video
.
encoded_videos
.
all
()
.
exists
())
...
...
@@ -1234,29 +1285,37 @@ class ImportTest(TestCase):
transcript_data
=
dict
(
self
.
transcript_data3
,
language_code
=
language_code
)
xml
=
self
.
make_import_xml
(
video_dict
=
{
"client_video_id"
:
"new_client_video_id"
,
"duration"
:
0
,
'edx_video_id'
:
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
],
'client_video_id'
:
'new_client_video_id'
,
'duration'
:
0
,
},
encoded_video_dicts
=
[
constants
.
ENCODED_VIDEO_DICT_FISH_DESKTOP
,
{
"url"
:
"http://example.com/new_url"
,
"file_size"
:
2733256
,
"bitrate"
:
1597804
,
"profile"
:
"mobile"
,
'url'
:
'http://example.com/new_url'
,
'file_size'
:
2733256
,
'bitrate'
:
1597804
,
'profile'
:
'mobile'
,
},
],
image
=
self
.
image_name
,
video_transcripts
=
[
transcript_data
]
)
# there must not be any transcript before import
with
self
.
assertRaises
(
VideoTranscript
.
DoesNotExist
):
VideoTranscript
.
objects
.
get
(
video__edx_video_id
=
constants
.
VIDEO_DICT_FISH
[
"edx_video_id"
])
# There must not be any transcript before import.
self
.
assert_transcripts
(
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
],
[])
api
.
import_from_xml
(
xml
,
constants
.
VIDEO_DICT_FISH
[
"edx_video_id"
],
course_id
)
edx_video_id
=
api
.
import_from_xml
(
xml
,
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
],
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
{},
course_id
)
self
.
assertEqual
(
edx_video_id
,
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
])
video
=
Video
.
objects
.
get
(
edx_video_id
=
constants
.
VIDEO_DICT_FISH
[
"edx_video_id"
])
video
=
Video
.
objects
.
get
(
edx_video_id
=
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
])
self
.
assert_video_matches_dict
(
video
,
constants
.
VIDEO_DICT_FISH
)
self
.
assert_encoded_video_matches_dict
(
video
.
encoded_videos
.
get
(
profile__profile_name
=
constants
.
PROFILE_MOBILE
),
...
...
@@ -1270,14 +1329,21 @@ class ImportTest(TestCase):
self
.
assertTrue
(
course_video
.
video_image
.
image
.
name
,
self
.
image_name
)
self
.
assert_transcripts
(
constants
.
VIDEO_DICT_FISH
[
"edx_video_id"
],
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
],
[]
)
def
test_existing_video_with_invalid_course_id
(
self
):
xml
=
self
.
make_import_xml
(
video_dict
=
constants
.
VIDEO_DICT_FISH
)
with
self
.
assertRaises
(
ValCannotCreateError
):
api
.
import_from_xml
(
xml
,
edx_video_id
=
constants
.
VIDEO_DICT_FISH
[
"edx_video_id"
],
course_id
=
"x"
*
300
)
api
.
import_from_xml
(
xml
,
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
],
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
{},
course_id
=
'x'
*
300
)
def
test_unknown_profile
(
self
):
profile
=
"unknown_profile"
...
...
@@ -1293,9 +1359,14 @@ class ImportTest(TestCase):
}
]
)
api
.
import_from_xml
(
xml
,
constants
.
VIDEO_DICT_STAR
[
"edx_video_id"
])
api
.
import_from_xml
(
xml
,
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
video
=
Video
.
objects
.
get
(
edx_video_id
=
constants
.
VIDEO_DICT_STAR
[
"edx_video_id"
])
video
=
Video
.
objects
.
get
(
edx_video_id
=
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
])
self
.
assertFalse
(
video
.
encoded_videos
.
filter
(
profile__profile_name
=
profile
)
.
exists
())
def
test_invalid_tag
(
self
):
...
...
@@ -1337,33 +1408,487 @@ class ImportTest(TestCase):
"""
Verify that transcript import for external video working as expected when there is no transcript.
"""
api
.
import_from_xml
(
etree
.
fromstring
(
'<video_asset/>'
),
''
)
api
.
import_from_xml
(
etree
.
fromstring
(
'<video_asset/>'
),
''
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
self
.
assertEqual
(
VideoTranscript
.
objects
.
count
(),
0
)
@data
(
(
'external-transcript.srt'
,
constants
.
VIDEO_TRANSCRIPT_CUSTOM_SRT
),
(
'external-transcript.sjson'
,
constants
.
VIDEO_TRANSCRIPT_CUSTOM_SJSON
)
)
@unpack
def
test_external_video_transcript
(
self
,
transcript_file_name
,
transcript_data
):
"""
Verify that transcript import for external video working as expected when there is transcript present.
"""
# First create external transcript.
utils
.
create_file_in_fs
(
transcript_data
[
'file_data'
],
transcript_file_name
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
# Verify that one video is present before import.
self
.
assertEqual
(
Video
.
objects
.
count
(),
1
)
# Verify that no transript was present before import.
self
.
assertEqual
(
VideoTranscript
.
objects
.
count
(),
0
)
# Import xml with empty edx video id.
edx_video_id
=
api
.
import_from_xml
(
etree
.
fromstring
(
'<video_asset/>'
),
''
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
{
'en'
:
[
transcript_file_name
]
}
)
# Verify that a new video is created.
self
.
assertIsNotNone
(
edx_video_id
)
# Verify transcript record is created with correct data.
self
.
assert_transcripts
(
edx_video_id
,
[
dict
(
transcript_data
,
video_id
=
edx_video_id
)]
)
def
test_multiple_external_transcripts_different_langauges
(
self
):
"""
Verify that transcript import for external video working as expected when multiple transcripts are imported.
"""
# First create external transcripts.
en_transcript_file_name
=
'external-transcript-en.srt'
utils
.
create_file_in_fs
(
constants
.
VIDEO_TRANSCRIPT_CUSTOM_SRT
[
'file_data'
],
en_transcript_file_name
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
es_transcript_file_name
=
'external-transcript-es.srt'
utils
.
create_file_in_fs
(
constants
.
VIDEO_TRANSCRIPT_CUSTOM_SRT
[
'file_data'
],
es_transcript_file_name
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
# Verify that one video is present before import.
self
.
assertEqual
(
Video
.
objects
.
count
(),
1
)
# Verify that no transript was present before import.
self
.
assertEqual
(
VideoTranscript
.
objects
.
count
(),
0
)
# Import xml with empty edx video id.
edx_video_id
=
api
.
import_from_xml
(
etree
.
fromstring
(
'<video_asset/>'
),
''
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
{
'en'
:
[
en_transcript_file_name
],
'es'
:
[
es_transcript_file_name
]
}
)
# Verify that new video is created.
self
.
assertIsNotNone
(
edx_video_id
)
# Verify transcript records are created with correct data.
expected_transcripts
=
[
dict
(
constants
.
VIDEO_TRANSCRIPT_CUSTOM_SRT
,
video_id
=
edx_video_id
,
language_code
=
'en'
),
dict
(
constants
.
VIDEO_TRANSCRIPT_CUSTOM_SRT
,
video_id
=
edx_video_id
,
language_code
=
'es'
)
]
self
.
assert_transcripts
(
edx_video_id
,
expected_transcripts
)
def
test_multiple_external_transcripts_for_language
(
self
):
"""
Verify that transcript import for external video working as expected when multiple transcripts present against
a language e.g. external english transcript is imported through sub and transcripts field.
"""
# First create external transcripts.
sub_transcript_file_name
=
'external-transcript-sub.srt'
utils
.
create_file_in_fs
(
constants
.
VIDEO_TRANSCRIPT_CUSTOM_SRT
[
'file_data'
],
sub_transcript_file_name
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
ext_transcript_file_name
=
'external-transcript-ext.sjson'
utils
.
create_file_in_fs
(
constants
.
VIDEO_TRANSCRIPT_CUSTOM_SJSON
[
'file_data'
],
ext_transcript_file_name
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
# Verify that one video is present before import.
self
.
assertEqual
(
Video
.
objects
.
count
(),
1
)
# Verify that no transript was present before import.
self
.
assertEqual
(
VideoTranscript
.
objects
.
count
(),
0
)
# Import xml with empty edx video id.
edx_video_id
=
api
.
import_from_xml
(
etree
.
fromstring
(
'<video_asset/>'
),
''
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
{
'en'
:
[
sub_transcript_file_name
,
ext_transcript_file_name
]
}
)
# Verify that new video is created.
self
.
assertIsNotNone
(
edx_video_id
)
# Verify transcript record is created with correct data i.e sub field transcript.
expected_transcripts
=
[
dict
(
constants
.
VIDEO_TRANSCRIPT_CUSTOM_SRT
,
video_id
=
edx_video_id
,
language_code
=
'en'
)
]
self
.
assert_transcripts
(
edx_video_id
,
expected_transcripts
)
def
test_external_internal_transcripts_conflict
(
self
):
"""
Tests that when importing both external and internal (VAL) transcripts, internal transcript is imported.
"""
# First create external transcript in sjson format.
en_transcript_file_name
=
'external-transcript-en.sjson'
utils
.
create_file_in_fs
(
constants
.
VIDEO_TRANSCRIPT_CUSTOM_SJSON
[
'file_data'
],
en_transcript_file_name
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
# Let's create internal transcript in srt format.
expected_val_transcript
=
[
self
.
transcript_data1
]
import_xml
=
self
.
make_import_xml
(
video_dict
=
constants
.
VIDEO_DICT_STAR
,
video_transcripts
=
expected_val_transcript
)
# Verify that one video is present before import.
self
.
assertEqual
(
Video
.
objects
.
count
(),
1
)
# Verify that no transript was present before import.
self
.
assertEqual
(
VideoTranscript
.
objects
.
count
(),
0
)
# Note that we have an external en transcript as well as internal en transcript.
edx_video_id
=
api
.
import_from_xml
(
import_xml
,
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
{
'en'
:
[
en_transcript_file_name
]
}
)
# Verify that new video is created.
self
.
assertIsNotNone
(
edx_video_id
)
# Verify transcript record is created with internal transcript data.
self
.
assert_transcripts
(
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
[
self
.
transcript_data1
]
)
def
test_external_internal_transcripts_different_languages
(
self
):
"""
Tests that when importing both external and internal (VAL) transcripts for different langauges, all transcripts
are imported correctly.
"""
edx_video_id
=
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
]
# First create external es transcript.
es_transcript_file_name
=
'external-transcript-es.sjson'
es_external_transcript
=
dict
(
constants
.
VIDEO_TRANSCRIPT_CUSTOM_SJSON
,
video_id
=
edx_video_id
,
language_code
=
'es'
)
utils
.
create_file_in_fs
(
es_external_transcript
[
'file_data'
],
es_transcript_file_name
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
# Let's create en internal transcript.
import_xml
=
self
.
make_import_xml
(
video_dict
=
constants
.
VIDEO_DICT_STAR
,
video_transcripts
=
[
self
.
transcript_data1
]
)
# Verify that one video is present before import.
self
.
assertEqual
(
Video
.
objects
.
count
(),
1
)
# Verify that no transript was present before import.
self
.
assertEqual
(
VideoTranscript
.
objects
.
count
(),
0
)
# Note that we have an external 'es' language transcript as well as an internal 'es' language transcript.
edx_video_id
=
api
.
import_from_xml
(
import_xml
,
edx_video_id
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
{
'es'
:
[
es_transcript_file_name
]
}
)
# Verify all transcript records are created correctly.
self
.
assert_transcripts
(
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
[
self
.
transcript_data1
,
es_external_transcript
]
)
@patch
(
'edxval.api.logger'
)
def
test_import_transcript_from_fs_resource_not_found
(
self
,
mock_logger
):
"""
Test that `import_transcript_from_fs` correctly logs if transcript file is not found in file system.
"""
language_code
=
'en'
edx_video_id
=
'test-edx-video-id'
file_name
=
'file-not-found.srt'
api
.
import_transcript_from_fs
(
edx_video_id
=
edx_video_id
,
language_code
=
language_code
,
file_name
=
file_name
,
provider
=
TranscriptProviderType
.
CUSTOM
,
resource_fs
=
self
.
file_system
,
static_dir
=
constants
.
EXPORT_IMPORT_STATIC_DIR
)
mock_logger
.
warn
.
assert_called_with
(
'[edx-val] "
%
s" transcript "
%
s" for video "
%
s" is not found.'
,
language_code
,
file_name
,
edx_video_id
)
@patch
(
'edxval.api.logger'
)
def
test_import_transcript_from_fs_invalid_format
(
self
,
mock_logger
):
"""
Test that `import_transcript_from_fs` correctly logs if we get error while retrieving transcript file format.
"""
language_code
=
'en'
edx_video_id
=
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
]
# First create transcript file.
invalid_transcript_file_name
=
'invalid-transcript.txt'
invalid_transcript
=
dict
(
constants
.
VIDEO_TRANSCRIPT_CUSTOM_SJSON
,
video_id
=
edx_video_id
,
file_data
=
'This is an invalid transcript file data.'
)
utils
.
create_file_in_fs
(
invalid_transcript
[
'file_data'
],
invalid_transcript_file_name
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
api
.
import_transcript_from_fs
(
edx_video_id
=
edx_video_id
,
language_code
=
language_code
,
file_name
=
invalid_transcript_file_name
,
provider
=
TranscriptProviderType
.
CUSTOM
,
resource_fs
=
self
.
file_system
,
static_dir
=
constants
.
EXPORT_IMPORT_STATIC_DIR
)
mock_logger
.
warn
.
assert_called_with
(
'[edx-val] Error while getting transcript format for video=
%
s -- language_code=
%
s --file_name=
%
s'
,
edx_video_id
,
language_code
,
invalid_transcript_file_name
)
@patch
(
'edxval.api.logger'
)
def
test_import_transcript_from_fs_bad_content
(
self
,
mock_logger
):
"""
Test that `import_transcript_from_fs` correctly logs if we get error while decoding transcript content.
"""
language_code
=
'en'
edx_video_id
=
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
]
# First create transcript file.
transcript_file_name
=
'invalid-transcript.txt'
invalid_transcript
=
dict
(
constants
.
VIDEO_TRANSCRIPT_CUSTOM_SJSON
,
video_id
=
edx_video_id
,
file_data
=
u'Привіт, edX вітає вас.'
.
encode
(
'cp1251'
)
)
utils
.
create_file_in_fs
(
invalid_transcript
[
'file_data'
],
transcript_file_name
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
api
.
import_transcript_from_fs
(
edx_video_id
=
edx_video_id
,
language_code
=
language_code
,
file_name
=
transcript_file_name
,
provider
=
TranscriptProviderType
.
CUSTOM
,
resource_fs
=
self
.
file_system
,
static_dir
=
constants
.
EXPORT_IMPORT_STATIC_DIR
)
mock_logger
.
warn
.
assert_called_with
(
'[edx-val] "
%
s" transcript "
%
s" for video "
%
s" contains a non-utf8 file content.'
,
language_code
,
transcript_file_name
,
edx_video_id
)
def
test_import_transcript_attached_existing_video
(
self
):
"""
Verify that transcript import for existing video with transcript attached is working as expected.
"""
expected_video_transcripts
=
[
self
.
transcript_data3
]
import_xml
=
self
.
make_import_xml
(
video_dict
=
constants
.
VIDEO_DICT_FISH
,
video_transcripts
=
expected_video_transcripts
)
# Verify video is present before.
video
=
Video
.
objects
.
get
(
edx_video_id
=
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
])
self
.
assertIsNotNone
(
video
)
# Create internal video transcripts
transcript_data
=
dict
(
constants
.
VIDEO_TRANSCRIPT_3PLAY
,
video
=
video
)
transcript_data
=
omit_attrs
(
transcript_data
,
[
'video_id'
,
'file_data'
])
transcript
=
VideoTranscript
.
objects
.
create
(
**
transcript_data
)
# Verify that video has expected transcripts before import.
self
.
assert_transcripts
(
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
],
expected_video_transcripts
)
api
.
import_from_xml
(
import_xml
,
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
],
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
{},
'test_course_id'
)
# Verify that video has expected transcripts after import.
self
.
assert_transcripts
(
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
],
expected_video_transcripts
)
def
test_import_no_transcript_attached_existing_video
(
self
):
"""
Verify that transcript import for existing video with no transcript attached is working as expected.
"""
exported_video_transcripts
=
[
self
.
transcript_data1
,
self
.
transcript_data2
]
# Verify video is present before.
video
=
Video
.
objects
.
get
(
edx_video_id
=
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
])
self
.
assertIsNotNone
(
video
)
import_xml
=
self
.
make_import_xml
(
video_dict
=
constants
.
VIDEO_DICT_FISH
,
video_transcripts
=
exported_video_transcripts
)
# There must not be any transcript before import.
self
.
assert_transcripts
(
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
],
[])
api
.
import_from_xml
(
import_xml
,
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
],
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
{},
'test_course_id'
)
# Verify that no transcript record is created.
self
.
assert_transcripts
(
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
],
[])
def
test_import_transcript_new_video
(
self
):
"""
Verify that transcript import for new video is working as expected when transcript is present in XML.
"""
expected_video_transcripts
=
[
self
.
transcript_data1
,
self
.
transcript_data2
]
import_xml
=
self
.
make_import_xml
(
video_dict
=
constants
.
VIDEO_DICT_STAR
,
video_transcripts
=
expected_video_transcripts
)
# Verify video is not present before.
with
self
.
assertRaises
(
Video
.
DoesNotExist
):
Video
.
objects
.
get
(
edx_video_id
=
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
])
# There must not be any transcript before import.
self
.
assert_transcripts
(
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
[])
api
.
import_from_xml
(
import_xml
,
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
{},
'test_course_id'
)
# Verify that transcript record is created with correct data.
self
.
assert_transcripts
(
constants
.
VIDEO_DICT_STAR
[
'edx_video_id'
],
expected_video_transcripts
)
@patch
(
'edxval.api.logger'
)
def
test_video_transcript_missing_attribute
(
self
,
mock_logger
):
"""
Verify that video transcript import working as expected if transcript xml data is missing.
"""
video_id
=
'super-soaker'
transcript_xml
=
'<transcript file_
name="wow.srt" language_code="en" file_
format="srt" provider="Cielo24"/>'
transcript_xml
=
'<transcript file_format="srt" provider="Cielo24"/>'
xml
=
etree
.
fromstring
(
"""
<video_asset>
<transcripts>
{transcript_xml}
<transcript
file_name="edxval/tests/data/wow.sjson" language_code="de" file_format="sjson" provider='3PlayMedia' video_id="{video_id}"
/>
<transcript
language_code="de" file_format="sjson" provider='3PlayMedia'
/>
</transcripts>
</video_asset>
"""
.
format
(
transcript_xml
=
transcript_xml
,
video_id
=
video_id
))
"""
.
format
(
transcript_xml
=
transcript_xml
))
#
t
here should be no video transcript before import
#
T
here should be no video transcript before import
with
self
.
assertRaises
(
VideoTranscript
.
DoesNotExist
):
VideoTranscript
.
objects
.
get
(
video__edx_video_id
=
video_id
)
api
.
create_transcript_objects
(
xml
)
# Create transcript files
utils
.
create_file_in_fs
(
constants
.
TRANSCRIPT_DATA
[
'wow'
],
u'super-soaker-de.sjson'
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
)
api
.
create_transcript_objects
(
xml
,
video_id
,
self
.
file_system
,
constants
.
EXPORT_IMPORT_STATIC_DIR
,
{})
mock_logger
.
warn
.
assert_called_with
(
"VAL: Required attributes are missing from xml, xml=[
%
s]"
,
...
...
@@ -1710,15 +2235,15 @@ class TranscriptTest(TestCase):
'language_code'
:
'en'
,
'provider'
:
TranscriptProviderType
.
THREE_PLAY_MEDIA
,
'file_name'
:
None
,
'file_format'
:
TranscriptFormat
.
SRT
,
'file_format'
:
utils
.
TranscriptFormat
.
SRT
,
'file_data'
:
File
(
open
(
self
.
flash_transcript_path
))
},
{
'language_code'
:
'fr'
,
'provider'
:
TranscriptProviderType
.
CIELO24
,
'file_name'
:
None
,
'file_format'
:
TranscriptFormat
.
SRT
,
'file_data'
:
ContentFile
(
FILE_DATA
)
'file_format'
:
utils
.
TranscriptFormat
.
SRT
,
'file_data'
:
ContentFile
(
constants
.
TRANSCRIPT_DATA
[
'overwatch'
]
)
}
]
)
...
...
@@ -1734,14 +2259,14 @@ class TranscriptTest(TestCase):
'language_code'
:
'de'
,
'provider'
:
TranscriptProviderType
.
CUSTOM
,
'file_name'
:
None
,
'file_format'
:
TranscriptFormat
.
SRT
,
'file_format'
:
utils
.
TranscriptFormat
.
SRT
,
'file_data'
:
File
(
open
(
self
.
arrow_transcript_path
))
},
{
'language_code'
:
'zh'
,
'provider'
:
TranscriptProviderType
.
CUSTOM
,
'file_name'
:
'non/existent/transcript/path'
,
'file_format'
:
TranscriptFormat
.
SRT
,
'file_format'
:
utils
.
TranscriptFormat
.
SRT
,
'file_data'
:
None
}
]
...
...
@@ -1818,7 +2343,7 @@ class TranscriptTest(TestCase):
expectation
=
{
'video_id'
:
u'super-soaker'
,
'url'
:
self
.
v1_transcript2
.
url
(),
'file_format'
:
TranscriptFormat
.
SRT
,
'file_format'
:
utils
.
TranscriptFormat
.
SRT
,
'provider'
:
TranscriptProviderType
.
CIELO24
,
'language_code'
:
u'fr'
}
...
...
@@ -1874,14 +2399,14 @@ class TranscriptTest(TestCase):
{
'file_data'
:
None
,
'file_name'
:
'overwatch.sjson'
,
'file_format'
:
TranscriptFormat
.
SJSON
,
'file_format'
:
utils
.
TranscriptFormat
.
SJSON
,
'language_code'
:
'da'
,
'provider'
:
TranscriptProviderType
.
CIELO24
},
{
'file_data'
:
ContentFile
(
FILE_DATA
),
'file_data'
:
ContentFile
(
constants
.
TRANSCRIPT_DATA
[
'overwatch'
]
),
'file_name'
:
None
,
'file_format'
:
TranscriptFormat
.
SRT
,
'file_format'
:
utils
.
TranscriptFormat
.
SRT
,
'language_code'
:
'es'
,
'provider'
:
TranscriptProviderType
.
THREE_PLAY_MEDIA
},
...
...
@@ -1922,7 +2447,7 @@ class TranscriptTest(TestCase):
self
.
assertTrue
(
transcript_url
.
startswith
(
settings
.
VIDEO_TRANSCRIPTS_SETTINGS
[
'DIRECTORY_PREFIX'
]))
self
.
assertEqual
(
video_transcript
.
transcript
.
name
,
transcript_url
)
with
open
(
video_transcript
.
transcript
.
name
)
as
saved_transcript
:
self
.
assertEqual
(
saved_transcript
.
read
(),
FILE_DATA
)
self
.
assertEqual
(
saved_transcript
.
read
(),
constants
.
TRANSCRIPT_DATA
[
'overwatch'
]
)
else
:
self
.
assertEqual
(
video_transcript
.
transcript
.
name
,
file_name
)
...
...
@@ -1936,7 +2461,7 @@ class TranscriptTest(TestCase):
},
{
'video_id'
:
'medium-soaker'
,
'file_format'
:
TranscriptFormat
.
SRT
,
'file_format'
:
utils
.
TranscriptFormat
.
SRT
,
'provider'
:
123
,
'exception'
:
InvalidTranscriptProvider
,
'exception_message'
:
'123 transcript provider is not supported'
,
...
...
@@ -1965,8 +2490,8 @@ class TranscriptTest(TestCase):
video_id
=
edx_video_id
,
language_code
=
language_code
,
provider
=
TranscriptProviderType
.
THREE_PLAY_MEDIA
,
file_format
=
TranscriptFormat
.
SRT
,
content
=
ContentFile
(
FILE_DATA
)
file_format
=
utils
.
TranscriptFormat
.
SRT
,
content
=
ContentFile
(
constants
.
TRANSCRIPT_DATA
[
'overwatch'
]
)
)
# setup video with the `edx_video_id` above.
...
...
@@ -1988,7 +2513,7 @@ class TranscriptTest(TestCase):
self
.
assertEqual
(
video_transcript
.
file_format
,
transcript_props
[
'file_format'
])
self
.
assertEqual
(
video_transcript
.
provider
,
transcript_props
[
'provider'
])
with
open
(
video_transcript
.
transcript
.
name
)
as
created_transcript
:
self
.
assertEqual
(
created_transcript
.
read
(),
FILE_DATA
)
self
.
assertEqual
(
created_transcript
.
read
(),
constants
.
TRANSCRIPT_DATA
[
'overwatch'
]
)
@data
(
{
...
...
@@ -2001,7 +2526,7 @@ class TranscriptTest(TestCase):
{
'video_id'
:
'medium-soaker'
,
'language_code'
:
'en'
,
'file_format'
:
TranscriptFormat
.
SRT
,
'file_format'
:
utils
.
TranscriptFormat
.
SRT
,
'provider'
:
'unknown provider'
,
'exception_msg'
:
'"unknown provider" is not a valid choice.'
}
...
...
@@ -2012,7 +2537,7 @@ class TranscriptTest(TestCase):
Verify that `create_video_transcript` api function raise exceptions on invalid values.
"""
with
self
.
assertRaises
(
ValCannotCreateError
)
as
transcript_exception
:
api
.
create_video_transcript
(
video_id
,
language_code
,
file_format
,
ContentFile
(
FILE_DATA
),
provider
)
api
.
create_video_transcript
(
video_id
,
language_code
,
file_format
,
ContentFile
(
constants
.
TRANSCRIPT_DATA
[
'overwatch'
]
),
provider
)
self
.
assertIn
(
exception_msg
,
unicode
(
transcript_exception
.
exception
.
message
))
...
...
@@ -2030,7 +2555,7 @@ class TranscriptTest(TestCase):
video_id
=
edx_video_id
,
language_code
=
'en'
,
metadata
=
dict
(
provider
=
TranscriptProviderType
.
CIELO24
),
file_data
=
ContentFile
(
FILE_DATA
)
file_data
=
ContentFile
(
constants
.
TRANSCRIPT_DATA
[
'overwatch'
]
)
)
# Verify that new transcript is set to video
...
...
@@ -2039,7 +2564,7 @@ class TranscriptTest(TestCase):
# verify that new data is written correctly
with
open
(
video_transcript
.
transcript
.
name
)
as
saved_transcript
:
self
.
assertEqual
(
saved_transcript
.
read
(),
FILE_DATA
)
self
.
assertEqual
(
saved_transcript
.
read
(),
constants
.
TRANSCRIPT_DATA
[
'overwatch'
]
)
# Verify that an exception is raised if we try to open a deleted transcript file
with
self
.
assertRaises
(
IOError
)
as
file_open_exception
:
...
...
@@ -2088,22 +2613,26 @@ class TranscriptTest(TestCase):
language_code
=
'en'
video_id
=
constants
.
VIDEO_DICT_FISH
[
'edx_video_id'
]
transcript_file_name
=
u'super-soaker-en.srt'
expected_transcript_path
=
combine
(
self
.
temp_dir
,
combine
(
STATIC_PATH
,
transcript_file_name
))
expected_transcript_path
=
combine
(
combine
(
self
.
temp_dir
,
constants
.
EXPORT_IMPORT_COURSE_DIR
),
combine
(
constants
.
EXPORT_IMPORT_STATIC_DIR
,
transcript_file_name
)
)
file_system
=
OSFS
(
self
.
temp_dir
)
file_system
.
makedir
(
STATIC_DIR
,
recreate
=
True
)
delegate_fs
=
OSFS
(
self
.
temp_dir
)
file_system
=
delegate_fs
.
makedir
(
constants
.
EXPORT_IMPORT_COURSE_DIR
,
recreate
=
True
)
file_system
.
makedir
(
constants
.
EXPORT_IMPORT_STATIC_DIR
,
recreate
=
True
)
# Create transcript file now.
api
.
create_trancript_file
(
api
.
create_tran
s
cript_file
(
video_id
=
video_id
,
language_code
=
language_code
,
file_format
=
TranscriptFormat
.
SRT
,
static_dir
=
STATIC_DIR
,
file_format
=
utils
.
TranscriptFormat
.
SRT
,
static_dir
=
constants
.
EXPORT_IMPORT_
STATIC_DIR
,
resource_fs
=
file_system
)
# Verify transcript file is created.
self
.
assertTrue
(
transcript_file_name
in
file_system
.
listdir
(
STATIC_PATH
))
self
.
assertTrue
(
transcript_file_name
in
file_system
.
listdir
(
constants
.
EXPORT_IMPORT_STATIC_DIR
))
# Also verify the content of created transcript file.
expected_transcript_content
=
File
(
open
(
expected_transcript_path
))
.
read
()
...
...
@@ -2120,19 +2649,19 @@ class TranscriptTest(TestCase):
Tests that no transcript file is created in case of invalid scenario.
"""
file_system
=
OSFS
(
self
.
temp_dir
)
file_system
.
makedir
(
STATIC_DIR
,
recreate
=
True
)
file_system
.
makedir
(
constants
.
EXPORT_IMPORT_
STATIC_DIR
,
recreate
=
True
)
# Try to create transcript file now.
api
.
create_trancript_file
(
api
.
create_tran
s
cript_file
(
video_id
=
video_id
,
language_code
=
language_code
,
file_format
=
TranscriptFormat
.
SRT
,
static_dir
=
STATIC_DIR
,
file_format
=
utils
.
TranscriptFormat
.
SRT
,
static_dir
=
constants
.
EXPORT_IMPORT_
STATIC_DIR
,
resource_fs
=
file_system
)
# Verify no file is created.
self
.
assertEqual
(
file_system
.
listdir
(
STATIC_PATH
),
[])
self
.
assertEqual
(
file_system
.
listdir
(
constants
.
EXPORT_IMPORT_STATIC_DIR
),
[])
@ddt
...
...
edxval/tests/test_views.py
View file @
00b8ded8
...
...
@@ -8,10 +8,11 @@ from ddt import data, ddt, unpack
from
django.core.urlresolvers
import
reverse
from
rest_framework
import
status
from
edxval.models
import
(
CourseVideo
,
Profile
,
TranscriptFormat
,
from
edxval.models
import
(
CourseVideo
,
Profile
,
TranscriptProviderType
,
Video
,
VideoTranscript
)
from
edxval.serializers
import
TranscriptSerializer
from
edxval.tests
import
APIAuthTestCase
,
constants
from
edxval.utils
import
TranscriptFormat
class
VideoDetail
(
APIAuthTestCase
):
...
...
@@ -813,6 +814,7 @@ class VideoTranscriptViewTest(APIAuthTestCase):
Tests POSTing transcript successfully.
"""
post_transcript_data
=
dict
(
self
.
transcript_data
)
post_transcript_data
.
pop
(
'file_data'
)
post_transcript_data
[
'name'
]
=
post_transcript_data
.
pop
(
'transcript'
)
response
=
self
.
client
.
post
(
self
.
url
,
post_transcript_data
,
format
=
'json'
)
...
...
edxval/utils.py
View file @
00b8ded8
...
...
@@ -2,8 +2,22 @@
Util methods to be used in api and models.
"""
import
json
from
django.conf
import
settings
from
django.core.files.storage
import
get_storage_class
from
fs.path
import
combine
from
pysrt
import
SubRipFile
class
TranscriptFormat
(
object
):
SRT
=
'srt'
SJSON
=
'sjson'
CHOICES
=
(
(
SRT
,
'SubRip'
),
(
SJSON
,
'SRT JSON'
)
)
# 3rd Party Transcription Plans
THIRD_PARTY_TRANSCRIPTION_PLANS
=
{
...
...
@@ -169,3 +183,35 @@ def get_video_transcript_storage():
# during edx-platform loading this method gets called but settings are not ready yet
# so in that case we will return default(FileSystemStorage) storage class instance
return
get_storage_class
()()
def
create_file_in_fs
(
file_data
,
file_name
,
file_system
,
static_dir
):
"""
Writes file in specific file system.
Arguments:
file_data (str): Data to store into the file.
file_name (str): File name of the file to be created.
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
"""
with
file_system
.
open
(
combine
(
static_dir
,
file_name
),
'wb'
)
as
f
:
f
.
write
(
file_data
)
def
get_transcript_format
(
transcript_content
):
"""
Returns transcript format.
Arguments:
transcript_content (str): Transcript file content.
"""
try
:
sjson_obj
=
json
.
loads
(
transcript_content
)
except
ValueError
:
# With error handling (set to 'ERROR_RAISE'), we will be getting
# the exception if something went wrong in parsing the transcript.
srt_subs
=
SubRipFile
.
from_string
(
transcript_content
,
error_handling
=
SubRipFile
.
ERROR_RAISE
)
if
len
(
srt_subs
)
>
0
:
return
TranscriptFormat
.
SRT
return
TranscriptFormat
.
SJSON
edxval/views.py
View file @
00b8ded8
...
...
@@ -15,13 +15,13 @@ from rest_framework_oauth.authentication import OAuth2Authentication
from
edxval.api
import
create_or_update_video_transcript
from
edxval.models
import
(
CourseVideo
,
TranscriptFormat
,
TranscriptProviderType
,
Video
,
VideoImage
,
VideoTranscript
)
from
edxval.serializers
import
VideoSerializer
from
edxval.utils
import
TranscriptFormat
LOGGER
=
logging
.
getLogger
(
__name__
)
# pylint: disable=C0103
...
...
requirements/base.in
View file @
00b8ded8
...
...
@@ -10,3 +10,4 @@ django-storages
enum34
lxml
pillow
pysrt==0.4.7
setup.py
View file @
00b8ded8
...
...
@@ -41,7 +41,7 @@ def load_requirements(*requirements_paths):
setup
(
name
=
'edxval'
,
version
=
'0.1.1
2
'
,
version
=
'0.1.1
3
'
,
author
=
'edX'
,
url
=
'http://github.com/edx/edx-val'
,
description
=
'edx-val'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment