Commit e7b38383 by Sofiya Semenova

Youtube callback XML -> CSV reformatting causes videos to never be marked as complete

parent 5c67dfa8
......@@ -2,10 +2,12 @@
Check SFTP dropboxes for YT Video ID XML information
"""
import csv
import datetime
import fnmatch
import logging
import os
import re
import shutil
import sys
import xml.etree.ElementTree as ET
......@@ -48,13 +50,14 @@ def callfunction(course):
shutil.rmtree(workdir)
os.mkdir(workdir)
xml_downloader(course)
xml_csv_downloader(course)
for file in os.listdir(workdir):
upload_data = domxml_parser(file)
if 'report-' in file:
upload_data = domxml_parser(file) if is_xml_file(file) else csv_parser(file)
if upload_data is not None:
LOGGER.info('[YOUTUBE_CALLBACK] : {inst}{clss} {upload_data}'.format(
LOGGER.info('[YOUTUBE CALLBACK] : {inst}{clss} {upload_data}'.format(
inst=course.institution,
clss=course.edx_classid,
upload_data=upload_data
......@@ -62,7 +65,7 @@ def callfunction(course):
urlpatch(upload_data)
def xml_downloader(course):
def xml_csv_downloader(course):
"""
:param course:
......@@ -89,17 +92,17 @@ def xml_downloader(course):
for d in s1.listdir_attr():
crawl_sftp(d=d, s1=s1)
except AuthenticationException:
LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : Authentication Failed".format(
LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : Authentication Failed".format(
inst=course.institution,
clss=course.edx_classid
))
except SSHException:
LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : Authentication Failed".format(
LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : Authentication Failed".format(
inst=course.institution,
clss=course.edx_classid
))
except IOError:
LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : List Dir Failed".format(
LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : List Dir Failed".format(
inst=course.institution,
clss=course.edx_classid
))
......@@ -155,6 +158,8 @@ def crawl_sftp(d, s1):
return
except SSHException:
return
except OSError:
return
s1.cwd('..')
......@@ -164,10 +169,6 @@ def domxml_parser(file):
:param file:
:return:
"""
if 'status-' not in file:
return
upload_data = {
'datetime': None,
'status': None,
......@@ -176,11 +177,18 @@ def domxml_parser(file):
'file_suffix': None,
'youtube_id': None
}
try:
tree = ET.parse(os.path.join(workdir, file))
except ET.ParseError:
LOGGER.error('[YOUTUBE CALLBACK] : Parse Error in domxml parser : file {filename}'.format(
filename=file
))
return
except IOError:
LOGGER.error('[YOUTUBE CALLBACK] : IO Error in domxml parser : file {filename}'.format(
filename=file
))
return
root = tree.getroot()
for child in root:
......@@ -221,6 +229,105 @@ def domxml_parser(file):
return upload_data
def csv_parser(filename):
"""
:param filename: string
:return: upload_data : dict
"""
upload_data = {
'datetime': None,
'status': None,
'duplicate_url': None,
'edx_id': filename.strip('report-').split('_')[0],
'file_suffix': None,
'youtube_id': None
}
status_index = file_suffix_index = youtube_id_index = 0
if not os.path.exists(os.path.join(workdir, filename)):
LOGGER.info('[YOUTUBE CALLBACK] : CSV file {filename} does not exist'.format(
filename=filename
))
return
with open(os.path.join(workdir, filename), 'rb') as csvfile:
file_reader = csv.reader(csvfile, delimiter=',')
try:
headers = next(file_reader)
except StopIteration:
LOGGER.info('[YOUTUBE CALLBACK] : CSV file {filename} exists but is empty'.format(
filename=filename
))
return
for column in headers:
if column == "Status":
status_index = headers.index(column)
elif column == "Video file":
file_suffix_index = headers.index(column)
elif column == "Video ID":
youtube_id_index = headers.index(column)
for row in file_reader:
video_url = row[file_suffix_index]
upload_data['status'] = row[status_index]
if upload_data['status'] == "Errors":
upload_data = _process_errors(upload_data, filename)
upload_data['youtube_id'] = row[youtube_id_index]
try:
upload_data['file_suffix'] = video_url.split("_")[1].split(".")[0]
except IndexError:
upload_data['file_suffix'] = 100
return upload_data
def _process_errors(upload_data, reports_file):
"""
:param upload_data : dict
reports_file : string
:return: upload_data : dict
"""
errors_file = os.path.join(workdir, reports_file.replace("report-", "errors-"))
error_code_index = error_message_index = 0
error_message_pattern = re.compile('Duplicate video ID is \[(?P<thing>[0-9a-zA-Z_-]*)\]')
try:
with open(errors_file, 'rb') as csvfile:
file_reader = csv.reader(csvfile, delimiter=',')
headers = next(file_reader)
for column in headers:
if column == "Error code":
error_code_index = headers.index(column)
elif column == "Error message":
error_message_index = headers.index(column)
for row in file_reader:
if row[error_code_index] == "VIDEO_REJECTED_DUPLICATE":
upload_data['status'] = "Duplicate"
error_message = row[error_message_index]
youtube_id_search = error_message_pattern.search(error_message)
if youtube_id_search:
upload_data['duplicate_url'] = youtube_id_search.groups()[0]
else:
LOGGER.error(
'[YOUTUBE CALLBACK] : Youtube callback returned Duplicate Video error but ' +
'duplicate video ID could not be found. Upload data: {upload_data}. ' +
'CSV: {csv}'.format(
upload_data=upload_data,
csv=row
))
except IOError:
LOGGER.error('[YOUTUBE CALLBACK] : Could not open error file {file}'.format(
file=errors_file
))
return upload_data
def urlpatch(upload_data):
"""
......@@ -235,7 +342,10 @@ def urlpatch(upload_data):
upload_data['status'] = 'Failure'
return
if upload_data['status'] == 'Success':
if upload_data['status'] == 'Successful':
LOGGER.info('[YOUTUBE CALLBACK] : Urlpatch : Upload status is successful : {upload_data}'.format(
upload_data=upload_data
))
url_query = URL.objects.filter(
encode_url=upload_data['youtube_id']
)
......@@ -321,6 +431,10 @@ def urlpatch(upload_data):
elif upload_data['status'] == 'Duplicate' and \
upload_data['file_suffix'] == '100':
LOGGER.info('[YOUTUBE CALLBACK] : Urlpatch : Upload status is duplicate : {upload_data}'.format(
upload_data=upload_data
))
url_query = URL.objects.filter(
videoID=Video.objects.filter(
edx_id=upload_data['edx_id']
......@@ -354,3 +468,7 @@ def urlpatch(upload_data):
encode_profile='youtube'
)
ApiConn.call()
def is_xml_file(file):
return file.lower().endswith(('.xml'))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment