Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-val
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-val
Commits
b8a64a57
Commit
b8a64a57
authored
Feb 27, 2018
by
Mushtaq Ali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Import DS video transcripts from course OLX - EDUCATOR-2173
parent
6469fc26
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
86 additions
and
31 deletions
+86
-31
edxval/api.py
+41
-28
edxval/tests/constants.py
+26
-0
edxval/tests/data/wow.sjson
+2
-3
edxval/tests/test_api.py
+0
-0
edxval/tests/test_views.py
+1
-0
edxval/utils.py
+16
-0
No files found.
edxval/api.py
View file @
b8a64a57
...
@@ -8,6 +8,8 @@ from enum import Enum
...
@@ -8,6 +8,8 @@ from enum import Enum
from
uuid
import
uuid4
from
uuid
import
uuid4
from
django.core.exceptions
import
ObjectDoesNotExist
,
ValidationError
from
django.core.exceptions
import
ObjectDoesNotExist
,
ValidationError
from
django.core.files
import
File
from
fs.path
import
combine
from
lxml
import
etree
from
lxml
import
etree
from
lxml.etree
import
Element
,
SubElement
from
lxml.etree
import
Element
,
SubElement
...
@@ -20,7 +22,7 @@ from edxval.models import (CourseVideo, EncodedVideo, Profile,
...
@@ -20,7 +22,7 @@ from edxval.models import (CourseVideo, EncodedVideo, Profile,
TranscriptProviderType
,
Video
,
VideoImage
,
TranscriptProviderType
,
Video
,
VideoImage
,
VideoTranscript
,
ThirdPartyTranscriptCredentialsState
)
VideoTranscript
,
ThirdPartyTranscriptCredentialsState
)
from
edxval.serializers
import
TranscriptPreferenceSerializer
,
TranscriptSerializer
,
VideoSerializer
from
edxval.serializers
import
TranscriptPreferenceSerializer
,
TranscriptSerializer
,
VideoSerializer
from
edxval.utils
import
THIRD_PARTY_TRANSCRIPTION_PLANS
from
edxval.utils
import
THIRD_PARTY_TRANSCRIPTION_PLANS
,
create_file_in_fs
logger
=
logging
.
getLogger
(
__name__
)
# pylint: disable=C0103
logger
=
logging
.
getLogger
(
__name__
)
# pylint: disable=C0103
...
@@ -789,7 +791,7 @@ def export_to_xml(video_id, resource_fs, static_dir, course_id=None):
...
@@ -789,7 +791,7 @@ def export_to_xml(video_id, resource_fs, static_dir, course_id=None):
video_id (str): Video id of the video to export transcripts.
video_id (str): Video id of the video to export transcripts.
course_id (str): The ID of the course with which this video is associated.
course_id (str): The ID of the course with which this video is associated.
static_dir (str): The Directory to store transcript file.
static_dir (str): The Directory to store transcript file.
resource_fs (OSFS):
The file system to store transcripts
.
resource_fs (OSFS):
Export file system
.
Returns:
Returns:
An lxml video_asset element containing export data
An lxml video_asset element containing export data
...
@@ -837,22 +839,15 @@ def create_trancript_file(video_id, language_code, file_format, resource_fs, sta
...
@@ -837,22 +839,15 @@ def create_trancript_file(video_id, language_code, file_format, resource_fs, sta
static_dir (str): The Directory to store transcript file.
static_dir (str): The Directory to store transcript file.
resource_fs (OSFS): The file system to store transcripts.
resource_fs (OSFS): The file system to store transcripts.
"""
"""
transcript_name
=
u'{static_dir}/{video_id}-{language_code}.{file_format}'
.
format
(
transcript_name
=
u'{video_id}-{language_code}.{file_format}'
.
format
(
static_dir
=
static_dir
,
video_id
=
video_id
,
video_id
=
video_id
,
language_code
=
language_code
,
language_code
=
language_code
,
file_format
=
file_format
file_format
=
file_format
)
)
try
:
transcript_data
=
get_video_transcript_data
(
video_id
,
language_code
)
transcript_data
=
get_video_transcript_data
(
video_id
,
language_code
)
if
transcript_data
:
if
transcript_data
:
transcript_content
=
transcript_data
[
'content'
]
transcript_content
=
transcript_data
[
'content'
]
create_file_in_fs
(
transcript_content
,
transcript_name
,
resource_fs
,
static_dir
)
with
resource_fs
.
open
(
transcript_name
,
'wb'
)
as
f
:
f
.
write
(
transcript_content
)
except
Exception
:
# Do not raise exception in case no transcript file is found for now.
# TODO: Remove this - EDUCATOR-2173
pass
def
create_transcripts_xml
(
video_id
,
video_el
,
resource_fs
,
static_dir
):
def
create_transcripts_xml
(
video_id
,
video_el
,
resource_fs
,
static_dir
):
...
@@ -886,7 +881,6 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
...
@@ -886,7 +881,6 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
transcripts_el
,
transcripts_el
,
'transcript'
,
'transcript'
,
{
{
'file_name'
:
video_transcript
.
transcript
.
name
,
'language_code'
:
language_code
,
'language_code'
:
language_code
,
'file_format'
:
file_format
,
'file_format'
:
file_format
,
'provider'
:
video_transcript
.
provider
,
'provider'
:
video_transcript
.
provider
,
...
@@ -897,7 +891,7 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
...
@@ -897,7 +891,7 @@ def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
return
video_el
return
video_el
def
import_from_xml
(
xml
,
edx_video_id
,
course_id
=
None
):
def
import_from_xml
(
xml
,
edx_video_id
,
resource_fs
,
static_dir
,
course_id
=
None
):
"""
"""
Imports data from a video_asset element about the given video_id.
Imports data from a video_asset element about the given video_id.
...
@@ -907,6 +901,8 @@ def import_from_xml(xml, edx_video_id, course_id=None):
...
@@ -907,6 +901,8 @@ def import_from_xml(xml, edx_video_id, course_id=None):
Arguments:
Arguments:
xml (Element): An lxml video_asset element containing import data
xml (Element): An lxml video_asset element containing import data
edx_video_id (str): val video id
edx_video_id (str): val video id
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
course_id (str): The ID of a course to associate the video with
course_id (str): The ID of a course to associate the video with
Raises:
Raises:
...
@@ -915,7 +911,7 @@ def import_from_xml(xml, edx_video_id, course_id=None):
...
@@ -915,7 +911,7 @@ def import_from_xml(xml, edx_video_id, course_id=None):
if
xml
.
tag
!=
'video_asset'
:
if
xml
.
tag
!=
'video_asset'
:
raise
ValCannotCreateError
(
'Invalid XML'
)
raise
ValCannotCreateError
(
'Invalid XML'
)
# TODO this will be moved as a part of EDUCATOR-2
17
3
# TODO this will be moved as a part of EDUCATOR-2
40
3
if
not
edx_video_id
:
if
not
edx_video_id
:
return
return
...
@@ -968,26 +964,43 @@ def import_from_xml(xml, edx_video_id, course_id=None):
...
@@ -968,26 +964,43 @@ def import_from_xml(xml, edx_video_id, course_id=None):
'bitrate'
:
encoded_video_el
.
get
(
'bitrate'
),
'bitrate'
:
encoded_video_el
.
get
(
'bitrate'
),
})
})
create_video
(
data
)
create_video
(
data
)
create_transcript_objects
(
xml
)
create_transcript_objects
(
xml
,
edx_video_id
,
resource_fs
,
static_dir
)
def
create_transcript_objects
(
xml
):
def
create_transcript_objects
(
xml
,
edx_video_id
,
resource_fs
,
static_dir
):
"""
"""
Create VideoTranscript objects.
Create VideoTranscript objects.
Arguments:
Arguments:
xml (Element): lxml Element object
xml (Element): lxml Element object.
edx_video_id (str): Video id of the video.
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
"""
"""
for
transcript
in
xml
.
findall
(
'.//transcripts/transcript'
):
for
transcript
in
xml
.
findall
(
'.//transcripts/transcript'
):
try
:
try
:
create_or_update_video_transcript
(
file_format
=
transcript
.
attrib
[
'file_format'
]
transcript
.
attrib
[
'video_id'
],
language_code
=
transcript
.
attrib
[
'language_code'
]
transcript
.
attrib
[
'language_code'
],
transcript_data
=
get_video_transcript_data
(
edx_video_id
,
language_code
)
metadata
=
dict
(
provider
=
transcript
.
attrib
[
'provider'
],
# First check if transcript record does not exist.
file_name
=
transcript
.
attrib
[
'file_name'
],
if
not
transcript_data
:
file_format
=
transcript
.
attrib
[
'file_format'
],
transcript_file_name
=
u'{edx_video_id}-{language_code}.{file_format}'
.
format
(
edx_video_id
=
edx_video_id
,
language_code
=
language_code
,
file_format
=
file_format
)
# Read file from import file system and attach File to transcript record in DS.
file_data
=
File
(
resource_fs
.
open
(
combine
(
static_dir
,
transcript_file_name
)))
# Create transcript record.
create_video_transcript
(
video_id
=
edx_video_id
,
language_code
=
language_code
,
file_format
=
file_format
,
content
=
file_data
,
provider
=
transcript
.
attrib
[
'provider'
]
)
)
)
except
KeyError
:
except
KeyError
:
logger
.
warn
(
"VAL: Required attributes are missing from xml, xml=[
%
s]"
,
etree
.
tostring
(
transcript
)
.
strip
())
logger
.
warn
(
"VAL: Required attributes are missing from xml, xml=[
%
s]"
,
etree
.
tostring
(
transcript
)
.
strip
())
edxval/tests/constants.py
View file @
b8a64a57
...
@@ -12,6 +12,9 @@ from edxval.models import (
...
@@ -12,6 +12,9 @@ from edxval.models import (
)
)
EDX_VIDEO_ID
=
"itchyjacket"
EDX_VIDEO_ID
=
"itchyjacket"
EXPORT_IMPORT_STATIC_DIR
=
u'static'
"""
"""
Generic Profiles for manually creating profile objects
Generic Profiles for manually creating profile objects
"""
"""
...
@@ -363,12 +366,34 @@ VIDEO_DICT_UPDATE_ANIMAL = dict(
...
@@ -363,12 +366,34 @@ VIDEO_DICT_UPDATE_ANIMAL = dict(
encoded_videos
=
[],
encoded_videos
=
[],
)
)
TRANSCRIPT_DATA
=
{
"overwatch"
:
"""
1
00:00:14,370 --> 00:00:16,530
I am overwatch.
2
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻."""
,
"flash"
:
"""
1
00:00:07,180 --> 00:00:08,460
This is Flash line 1."""
,
"wow"
:
{
"start"
:
[
10
],
"end"
:
[
100
],
"text"
:
[
"Hi, welcome to edxval."
],
}
}
VIDEO_TRANSCRIPT_CIELO24
=
dict
(
VIDEO_TRANSCRIPT_CIELO24
=
dict
(
video_id
=
'super-soaker'
,
video_id
=
'super-soaker'
,
language_code
=
'en'
,
language_code
=
'en'
,
transcript
=
'edxval/tests/data/The_Flash.srt'
,
transcript
=
'edxval/tests/data/The_Flash.srt'
,
provider
=
TranscriptProviderType
.
CIELO24
,
provider
=
TranscriptProviderType
.
CIELO24
,
file_format
=
TranscriptFormat
.
SRT
,
file_format
=
TranscriptFormat
.
SRT
,
file_data
=
TRANSCRIPT_DATA
[
'flash'
]
)
)
VIDEO_TRANSCRIPT_3PLAY
=
dict
(
VIDEO_TRANSCRIPT_3PLAY
=
dict
(
...
@@ -377,6 +402,7 @@ VIDEO_TRANSCRIPT_3PLAY = dict(
...
@@ -377,6 +402,7 @@ VIDEO_TRANSCRIPT_3PLAY = dict(
transcript
=
'edxval/tests/data/wow.sjson'
,
transcript
=
'edxval/tests/data/wow.sjson'
,
provider
=
TranscriptProviderType
.
THREE_PLAY_MEDIA
,
provider
=
TranscriptProviderType
.
THREE_PLAY_MEDIA
,
file_format
=
TranscriptFormat
.
SJSON
,
file_format
=
TranscriptFormat
.
SJSON
,
file_data
=
TRANSCRIPT_DATA
[
'wow'
]
)
)
TRANSCRIPT_PREFERENCES_CIELO24
=
dict
(
TRANSCRIPT_PREFERENCES_CIELO24
=
dict
(
...
...
edxval/tests/data/wow.sjson
View file @
b8a64a57
{
{
"start": [10],
"start": [10],
"end": [100],
"end": [100],
"text": ["Hi, welcome to edxval."],
"text": ["Hi, welcome to edxval."]
}
}
\ No newline at end of file
edxval/tests/test_api.py
View file @
b8a64a57
This diff is collapsed.
Click to expand it.
edxval/tests/test_views.py
View file @
b8a64a57
...
@@ -813,6 +813,7 @@ class VideoTranscriptViewTest(APIAuthTestCase):
...
@@ -813,6 +813,7 @@ class VideoTranscriptViewTest(APIAuthTestCase):
Tests POSTing transcript successfully.
Tests POSTing transcript successfully.
"""
"""
post_transcript_data
=
dict
(
self
.
transcript_data
)
post_transcript_data
=
dict
(
self
.
transcript_data
)
post_transcript_data
.
pop
(
'file_data'
)
post_transcript_data
[
'name'
]
=
post_transcript_data
.
pop
(
'transcript'
)
post_transcript_data
[
'name'
]
=
post_transcript_data
.
pop
(
'transcript'
)
response
=
self
.
client
.
post
(
self
.
url
,
post_transcript_data
,
format
=
'json'
)
response
=
self
.
client
.
post
(
self
.
url
,
post_transcript_data
,
format
=
'json'
)
...
...
edxval/utils.py
View file @
b8a64a57
...
@@ -4,6 +4,8 @@ Util methods to be used in api and models.
...
@@ -4,6 +4,8 @@ Util methods to be used in api and models.
from
django.conf
import
settings
from
django.conf
import
settings
from
django.core.files.storage
import
get_storage_class
from
django.core.files.storage
import
get_storage_class
from
fs.path
import
combine
# 3rd Party Transcription Plans
# 3rd Party Transcription Plans
THIRD_PARTY_TRANSCRIPTION_PLANS
=
{
THIRD_PARTY_TRANSCRIPTION_PLANS
=
{
...
@@ -169,3 +171,17 @@ def get_video_transcript_storage():
...
@@ -169,3 +171,17 @@ def get_video_transcript_storage():
# during edx-platform loading this method gets called but settings are not ready yet
# during edx-platform loading this method gets called but settings are not ready yet
# so in that case we will return default(FileSystemStorage) storage class instance
# so in that case we will return default(FileSystemStorage) storage class instance
return
get_storage_class
()()
return
get_storage_class
()()
def
create_file_in_fs
(
file_data
,
file_name
,
file_system
,
static_dir
):
"""
Writes file in specific file system.
Arguments:
file_data (str): Data to store into the file.
file_name (str): File name of the file to be created.
resource_fs (OSFS): Import file system.
static_dir (str): The Directory to retrieve transcript file.
"""
with
file_system
.
open
(
combine
(
static_dir
,
file_name
),
'wb'
)
as
f
:
f
.
write
(
file_data
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment