Merge pull request #100 from edx/sofiya/youtube-bug

Multiple courses stuck in YouTube renditions

Merge pull request #100 from edx/sofiya/youtube-bug
Multiple courses stuck in YouTube renditions
2f0d8d4b · Sofiya Semenova · GitHub · 5c67dfa8 · e7b38383 · 2f0d8d4b
Unverified Commit 2f0d8d4b authored Apr 20, 2018 by Sofiya Semenova Committed by GitHub Apr 20, 2018
Hide whitespace changes
Inline Side-by-side

Showing with 136 additions and 18 deletions

youtube_callback/sftp_id_retrieve.py
+136 -18

No files found.
--- a/youtube_callback/sftp_id_retrieve.py
+++ b/youtube_callback/sftp_id_retrieve.py
@@ -2,10 +2,12 @@
 Check SFTP dropboxes for YT Video ID XML information

 """
+import csv
 import datetime
 import fnmatch
 import logging
 import os
+import re
 import shutil
 import sys
 import xml.etree.ElementTree as ET
@@ -48,21 +50,22 @@ def callfunction(course):
        shutil.rmtree(workdir)
    os.mkdir(workdir)

-    xml_downloader(course)
+    xml_csv_downloader(course)

    for file in os.listdir(workdir):
-        upload_data = domxml_parser(file)
+        if 'report-' in file:
+            upload_data = domxml_parser(file) if is_xml_file(file) else csv_parser(file)

-        if upload_data is not None:
-            LOGGER.info('[YOUTUBE_CALLBACK] : {inst}{clss} {upload_data}'.format(
-                inst=course.institution,
-                clss=course.edx_classid,
-                upload_data=upload_data
-            ))
-            urlpatch(upload_data)
+            if upload_data is not None:
+                LOGGER.info('[YOUTUBE CALLBACK] : {inst}{clss} {upload_data}'.format(
+                    inst=course.institution,
+                    clss=course.edx_classid,
+                    upload_data=upload_data
+                ))
+                urlpatch(upload_data)


-def xml_downloader(course):
+def xml_csv_downloader(course):
    """

    :param course:
@@ -89,17 +92,17 @@ def xml_downloader(course):
            for d in s1.listdir_attr():
                crawl_sftp(d=d, s1=s1)
    except AuthenticationException:
-        LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : Authentication Failed".format(
+        LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : Authentication Failed".format(
            inst=course.institution,
            clss=course.edx_classid
        ))
    except SSHException:
-        LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : Authentication Failed".format(
+        LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : Authentication Failed".format(
            inst=course.institution,
            clss=course.edx_classid
        ))
    except IOError:
-        LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : List Dir Failed".format(
+        LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : List Dir Failed".format(
            inst=course.institution,
            clss=course.edx_classid
        ))
@@ -155,6 +158,8 @@ def crawl_sftp(d, s1):
        return
    except SSHException:
        return
+    except OSError:
+        return
    s1.cwd('..')


@@ -164,10 +169,6 @@ def domxml_parser(file):
    :param file:
    :return:
    """
-
-    if 'status-' not in file:
-        return
-
    upload_data = {
        'datetime': None,
        'status': None,
@@ -176,11 +177,18 @@ def domxml_parser(file):
        'file_suffix': None,
        'youtube_id': None
    }
+
    try:
        tree = ET.parse(os.path.join(workdir, file))
    except ET.ParseError:
+        LOGGER.error('[YOUTUBE CALLBACK] : Parse Error in domxml parser : file {filename}'.format(
+            filename=file
+        ))
        return
    except IOError:
+        LOGGER.error('[YOUTUBE CALLBACK] : IO Error in domxml parser : file {filename}'.format(
+            filename=file
+        ))
        return
    root = tree.getroot()
    for child in root:
@@ -221,6 +229,105 @@ def domxml_parser(file):
    return upload_data


+def csv_parser(filename):
+    """
+    :param filename: string
+    :return: upload_data : dict
+    """
+    upload_data = {
+        'datetime': None,
+        'status': None,
+        'duplicate_url': None,
+        'edx_id': filename.strip('report-').split('_')[0],
+        'file_suffix': None,
+        'youtube_id': None
+    }
+
+    status_index = file_suffix_index = youtube_id_index = 0
+
+    if not os.path.exists(os.path.join(workdir, filename)):
+        LOGGER.info('[YOUTUBE CALLBACK] : CSV file {filename} does not exist'.format(
+            filename=filename
+        ))
+        return
+
+    with open(os.path.join(workdir, filename), 'rb') as csvfile:
+        file_reader = csv.reader(csvfile, delimiter=',')
+        try:
+            headers = next(file_reader)
+        except StopIteration:
+            LOGGER.info('[YOUTUBE CALLBACK] : CSV file {filename} exists but is empty'.format(
+                filename=filename
+            ))
+            return
+
+        for column in headers:
+            if column == "Status":
+                status_index = headers.index(column)
+            elif column == "Video file":
+                file_suffix_index = headers.index(column)
+            elif column == "Video ID":
+                youtube_id_index = headers.index(column)
+
+        for row in file_reader:
+            video_url = row[file_suffix_index]
+            upload_data['status'] = row[status_index]
+            if upload_data['status'] == "Errors":
+                upload_data = _process_errors(upload_data, filename)
+
+            upload_data['youtube_id'] = row[youtube_id_index]
+
+            try:
+                upload_data['file_suffix'] = video_url.split("_")[1].split(".")[0]
+            except IndexError:
+                upload_data['file_suffix'] = 100
+
+    return upload_data
+
+
+def _process_errors(upload_data, reports_file):
+    """
+    :param upload_data : dict
+           reports_file : string
+    :return: upload_data : dict
+    """
+    errors_file = os.path.join(workdir, reports_file.replace("report-", "errors-"))
+
+    error_code_index = error_message_index = 0
+    error_message_pattern = re.compile('Duplicate video ID is \[(?P<thing>[0-9a-zA-Z_-]*)\]')
+
+    try:
+        with open(errors_file, 'rb') as csvfile:
+            file_reader = csv.reader(csvfile, delimiter=',')
+            headers = next(file_reader)
+            for column in headers:
+                if column == "Error code":
+                    error_code_index = headers.index(column)
+                elif column == "Error message":
+                    error_message_index = headers.index(column)
+
+            for row in file_reader:
+                if row[error_code_index] == "VIDEO_REJECTED_DUPLICATE":
+                    upload_data['status'] = "Duplicate"
+                    error_message = row[error_message_index]
+                    youtube_id_search = error_message_pattern.search(error_message)
+                    if youtube_id_search:
+                        upload_data['duplicate_url'] = youtube_id_search.groups()[0]
+                    else:
+                        LOGGER.error(
+                            '[YOUTUBE CALLBACK] : Youtube callback returned Duplicate Video error but ' +
+                            'duplicate video ID could not be found. Upload data: {upload_data}. ' +
+                            'CSV: {csv}'.format(
+                                upload_data=upload_data,
+                                csv=row
+                            ))
+    except IOError:
+        LOGGER.error('[YOUTUBE CALLBACK] : Could not open error file {file}'.format(
+            file=errors_file
+        ))
+    return upload_data
+
+
 def urlpatch(upload_data):
    """

@@ -235,7 +342,10 @@ def urlpatch(upload_data):
        upload_data['status'] = 'Failure'
        return

-    if upload_data['status'] == 'Success':
+    if upload_data['status'] == 'Successful':
+        LOGGER.info('[YOUTUBE CALLBACK] : Urlpatch : Upload status is successful : {upload_data}'.format(
+            upload_data=upload_data
+        ))
        url_query = URL.objects.filter(
            encode_url=upload_data['youtube_id']
        )
@@ -321,6 +431,10 @@ def urlpatch(upload_data):
    elif upload_data['status'] == 'Duplicate' and \
            upload_data['file_suffix'] == '100':

+        LOGGER.info('[YOUTUBE CALLBACK] : Urlpatch : Upload status is duplicate : {upload_data}'.format(
+            upload_data=upload_data
+        ))
+
        url_query = URL.objects.filter(
            videoID=Video.objects.filter(
                edx_id=upload_data['edx_id']
@@ -354,3 +468,7 @@ def urlpatch(upload_data):
                encode_profile='youtube'
            )
            ApiConn.call()
+
+
+def is_xml_file(file):
+    return file.lower().endswith(('.xml'))