Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-video-pipeline
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-video-pipeline
Commits
e7b38383
Commit
e7b38383
authored
Apr 18, 2018
by
Sofiya Semenova
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Youtube callback XML -> CSV reformatting causes videos to never be marked as complete
parent
5c67dfa8
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
136 additions
and
18 deletions
+136
-18
youtube_callback/sftp_id_retrieve.py
+136
-18
No files found.
youtube_callback/sftp_id_retrieve.py
View file @
e7b38383
...
...
@@ -2,10 +2,12 @@
Check SFTP dropboxes for YT Video ID XML information
"""
import
csv
import
datetime
import
fnmatch
import
logging
import
os
import
re
import
shutil
import
sys
import
xml.etree.ElementTree
as
ET
...
...
@@ -48,21 +50,22 @@ def callfunction(course):
shutil
.
rmtree
(
workdir
)
os
.
mkdir
(
workdir
)
xml_downloader
(
course
)
xml_
csv_
downloader
(
course
)
for
file
in
os
.
listdir
(
workdir
):
upload_data
=
domxml_parser
(
file
)
if
'report-'
in
file
:
upload_data
=
domxml_parser
(
file
)
if
is_xml_file
(
file
)
else
csv_parser
(
file
)
if
upload_data
is
not
None
:
LOGGER
.
info
(
'[YOUTUBE_
CALLBACK] : {inst}{clss} {upload_data}'
.
format
(
inst
=
course
.
institution
,
clss
=
course
.
edx_classid
,
upload_data
=
upload_data
))
urlpatch
(
upload_data
)
if
upload_data
is
not
None
:
LOGGER
.
info
(
'[YOUTUBE
CALLBACK] : {inst}{clss} {upload_data}'
.
format
(
inst
=
course
.
institution
,
clss
=
course
.
edx_classid
,
upload_data
=
upload_data
))
urlpatch
(
upload_data
)
def
xml_downloader
(
course
):
def
xml_
csv_
downloader
(
course
):
"""
:param course:
...
...
@@ -89,17 +92,17 @@ def xml_downloader(course):
for
d
in
s1
.
listdir_attr
():
crawl_sftp
(
d
=
d
,
s1
=
s1
)
except
AuthenticationException
:
LOGGER
.
error
(
"[YOUTUBE
_
CALLBACK] : {inst}{clss} : Authentication Failed"
.
format
(
LOGGER
.
error
(
"[YOUTUBE
CALLBACK] : {inst}{clss} : Authentication Failed"
.
format
(
inst
=
course
.
institution
,
clss
=
course
.
edx_classid
))
except
SSHException
:
LOGGER
.
error
(
"[YOUTUBE
_
CALLBACK] : {inst}{clss} : Authentication Failed"
.
format
(
LOGGER
.
error
(
"[YOUTUBE
CALLBACK] : {inst}{clss} : Authentication Failed"
.
format
(
inst
=
course
.
institution
,
clss
=
course
.
edx_classid
))
except
IOError
:
LOGGER
.
error
(
"[YOUTUBE
_
CALLBACK] : {inst}{clss} : List Dir Failed"
.
format
(
LOGGER
.
error
(
"[YOUTUBE
CALLBACK] : {inst}{clss} : List Dir Failed"
.
format
(
inst
=
course
.
institution
,
clss
=
course
.
edx_classid
))
...
...
@@ -155,6 +158,8 @@ def crawl_sftp(d, s1):
return
except
SSHException
:
return
except
OSError
:
return
s1
.
cwd
(
'..'
)
...
...
@@ -164,10 +169,6 @@ def domxml_parser(file):
:param file:
:return:
"""
if
'status-'
not
in
file
:
return
upload_data
=
{
'datetime'
:
None
,
'status'
:
None
,
...
...
@@ -176,11 +177,18 @@ def domxml_parser(file):
'file_suffix'
:
None
,
'youtube_id'
:
None
}
try
:
tree
=
ET
.
parse
(
os
.
path
.
join
(
workdir
,
file
))
except
ET
.
ParseError
:
LOGGER
.
error
(
'[YOUTUBE CALLBACK] : Parse Error in domxml parser : file {filename}'
.
format
(
filename
=
file
))
return
except
IOError
:
LOGGER
.
error
(
'[YOUTUBE CALLBACK] : IO Error in domxml parser : file {filename}'
.
format
(
filename
=
file
))
return
root
=
tree
.
getroot
()
for
child
in
root
:
...
...
@@ -221,6 +229,105 @@ def domxml_parser(file):
return
upload_data
def
csv_parser
(
filename
):
"""
:param filename: string
:return: upload_data : dict
"""
upload_data
=
{
'datetime'
:
None
,
'status'
:
None
,
'duplicate_url'
:
None
,
'edx_id'
:
filename
.
strip
(
'report-'
)
.
split
(
'_'
)[
0
],
'file_suffix'
:
None
,
'youtube_id'
:
None
}
status_index
=
file_suffix_index
=
youtube_id_index
=
0
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
workdir
,
filename
)):
LOGGER
.
info
(
'[YOUTUBE CALLBACK] : CSV file {filename} does not exist'
.
format
(
filename
=
filename
))
return
with
open
(
os
.
path
.
join
(
workdir
,
filename
),
'rb'
)
as
csvfile
:
file_reader
=
csv
.
reader
(
csvfile
,
delimiter
=
','
)
try
:
headers
=
next
(
file_reader
)
except
StopIteration
:
LOGGER
.
info
(
'[YOUTUBE CALLBACK] : CSV file {filename} exists but is empty'
.
format
(
filename
=
filename
))
return
for
column
in
headers
:
if
column
==
"Status"
:
status_index
=
headers
.
index
(
column
)
elif
column
==
"Video file"
:
file_suffix_index
=
headers
.
index
(
column
)
elif
column
==
"Video ID"
:
youtube_id_index
=
headers
.
index
(
column
)
for
row
in
file_reader
:
video_url
=
row
[
file_suffix_index
]
upload_data
[
'status'
]
=
row
[
status_index
]
if
upload_data
[
'status'
]
==
"Errors"
:
upload_data
=
_process_errors
(
upload_data
,
filename
)
upload_data
[
'youtube_id'
]
=
row
[
youtube_id_index
]
try
:
upload_data
[
'file_suffix'
]
=
video_url
.
split
(
"_"
)[
1
]
.
split
(
"."
)[
0
]
except
IndexError
:
upload_data
[
'file_suffix'
]
=
100
return
upload_data
def
_process_errors
(
upload_data
,
reports_file
):
"""
:param upload_data : dict
reports_file : string
:return: upload_data : dict
"""
errors_file
=
os
.
path
.
join
(
workdir
,
reports_file
.
replace
(
"report-"
,
"errors-"
))
error_code_index
=
error_message_index
=
0
error_message_pattern
=
re
.
compile
(
'Duplicate video ID is
\
[(?P<thing>[0-9a-zA-Z_-]*)
\
]'
)
try
:
with
open
(
errors_file
,
'rb'
)
as
csvfile
:
file_reader
=
csv
.
reader
(
csvfile
,
delimiter
=
','
)
headers
=
next
(
file_reader
)
for
column
in
headers
:
if
column
==
"Error code"
:
error_code_index
=
headers
.
index
(
column
)
elif
column
==
"Error message"
:
error_message_index
=
headers
.
index
(
column
)
for
row
in
file_reader
:
if
row
[
error_code_index
]
==
"VIDEO_REJECTED_DUPLICATE"
:
upload_data
[
'status'
]
=
"Duplicate"
error_message
=
row
[
error_message_index
]
youtube_id_search
=
error_message_pattern
.
search
(
error_message
)
if
youtube_id_search
:
upload_data
[
'duplicate_url'
]
=
youtube_id_search
.
groups
()[
0
]
else
:
LOGGER
.
error
(
'[YOUTUBE CALLBACK] : Youtube callback returned Duplicate Video error but '
+
'duplicate video ID could not be found. Upload data: {upload_data}. '
+
'CSV: {csv}'
.
format
(
upload_data
=
upload_data
,
csv
=
row
))
except
IOError
:
LOGGER
.
error
(
'[YOUTUBE CALLBACK] : Could not open error file {file}'
.
format
(
file
=
errors_file
))
return
upload_data
def
urlpatch
(
upload_data
):
"""
...
...
@@ -235,7 +342,10 @@ def urlpatch(upload_data):
upload_data
[
'status'
]
=
'Failure'
return
if
upload_data
[
'status'
]
==
'Success'
:
if
upload_data
[
'status'
]
==
'Successful'
:
LOGGER
.
info
(
'[YOUTUBE CALLBACK] : Urlpatch : Upload status is successful : {upload_data}'
.
format
(
upload_data
=
upload_data
))
url_query
=
URL
.
objects
.
filter
(
encode_url
=
upload_data
[
'youtube_id'
]
)
...
...
@@ -321,6 +431,10 @@ def urlpatch(upload_data):
elif
upload_data
[
'status'
]
==
'Duplicate'
and
\
upload_data
[
'file_suffix'
]
==
'100'
:
LOGGER
.
info
(
'[YOUTUBE CALLBACK] : Urlpatch : Upload status is duplicate : {upload_data}'
.
format
(
upload_data
=
upload_data
))
url_query
=
URL
.
objects
.
filter
(
videoID
=
Video
.
objects
.
filter
(
edx_id
=
upload_data
[
'edx_id'
]
...
...
@@ -354,3 +468,7 @@ def urlpatch(upload_data):
encode_profile
=
'youtube'
)
ApiConn
.
call
()
def
is_xml_file
(
file
):
return
file
.
lower
()
.
endswith
((
'.xml'
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment