youtube callback

9a18f255 · Gregory Martin · 3eb78149 · 9a18f255 · 9a18f255 · 9a18f255
Commit 9a18f255 authored Jul 05, 2017 by Gregory Martin
Hide whitespace changes
Inline Side-by-side

Showing with 480 additions and 0 deletions

youtube_callback/__init__.py
+0 -0

youtube_callback/daemon.py
+113 -0

youtube_callback/sftp_id_retrieve.py
+367 -0

youtube_callback/static_files/__init__.py
+0 -0

No files found.
--- a/youtube_callback/__init__.py
+++ b/youtube_callback/__init__.py
--- a/youtube_callback/daemon.py
+++ b/youtube_callback/daemon.py
+"""
+Youtube Primary Reporting / Callbacks
+"""
+import os
+import sys
+import datetime
+from datetime import timedelta
+import django
+import newrelic.agent
+from django.utils.timezone import utc
+from pipeline.models import Course, Video, Encode, URL
+"""
+Import Django Shit
+"""
+project_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if project_path not in sys.path:
+    sys.path.append(project_path)
+os.environ['DJANGO_SETTINGS_MODULE'] = 'common.settings'
+django.setup()
+newrelic.agent.initialize(
+    os.path.join(
+        os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+        'veda_newrelic.ini'
+    )
+)
+"""
+Defaults
+"""
+data_window = datetime.datetime.utcnow().replace(tzinfo=utc) - \
+    timedelta(days=15)
+def get_course(course_id):
+    course = Course.objects.get(
+        institution=course_id[0:3],
+        edx_classid=course_id[3:8]
+    )
+    return course
+@newrelic.agent.background_task()
+def generate_course_list():
+    course_list = []
+    course_query = Course.objects.filter(
+        previous_statechange__gt=data_window,
+        yt_proc=True,
+    )
+    for course in course_query:
+        if determine_missing_url(course_object=course) is True:
+            if weed_dupes(course_list, course) is True:
+                course_list.append(course)
+    """
+    Review Calls
+    """
+    review_date = datetime.datetime.utcnow().replace(tzinfo=utc) - \
+        timedelta(days=10)
+    review_query = Course.objects.filter(
+        previous_statechange__gt=review_date,
+        review_proc=True
+    )
+    if len(review_query) > 0:
+        review_channel = Course.objects.get(
+            institution='EDX',
+            edx_classid='RVW01'
+        )
+        course_list.append(review_channel)
+    return course_list
+@newrelic.agent.background_task()
+def weed_dupes(course_list, course):
+    for c in course_list:
+        if c.yt_logon == course.yt_logon:
+            return False
+    return True
+@newrelic.agent.background_task()
+def determine_missing_url(course_object):
+    video_query = Video.objects.filter(
+        inst_class=course_object,
+        video_trans_start__gt=data_window
+    )
+    for v in video_query:
+        salient_video = Video.objects.filter(edx_id=v.edx_id).latest()
+        if salient_video.video_trans_status != "Corrupt File" and \
+                salient_video.video_trans_status != "Review Hold":
+            yt_url_query = URL.objects.filter(
+                videoID=salient_video,
+                encode_profile=Encode.objects.filter(
+                    encode_suffix='100'
+                )
+            )
+            if len(yt_url_query) == 0:
+                return True
+    return False
+if __name__ == "__main__":
+    pass
--- a/youtube_callback/sftp_id_retrieve.py
+++ b/youtube_callback/sftp_id_retrieve.py
+"""
+Check SFTP dropboxes for YT Video ID XML information
+"""
+import datetime
+import django
+import fnmatch
+import newrelic.agent
+import os
+import pysftp
+import shutil
+import sys
+import xml.etree.ElementTree as ET
+from os.path import expanduser
+from datetime import timedelta
+from django.utils.timezone import utc
+project_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if project_path not in sys.path:
+    sys.path.append(project_path)
+os.environ['DJANGO_SETTINGS_MODULE'] = 'common.settings'
+django.setup()
+from pipeline.models import Video, Encode, URL
+from frontend.abvid_reporting import report_status
+from control.veda_val import VALAPICall
+from control.veda_utils import ErrorObject, Metadata, VideoProto
+from youtube_callback.daemon import generate_course_list, get_course
+newrelic.agent.initialize(
+    os.path.join(
+        os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+        'veda_newrelic.ini'
+    )
+)
+"""
+Defaults:
+"""
+homedir = expanduser("~")
+workdir = os.path.join(homedir, 'download_data_holding')
+YOUTUBE_LOOKBACK_DAYS = 15
+@newrelic.agent.background_task()
+def callfunction(course):
+    """
+    :param course:
+    :return:
+    """
+    if os.path.exists(workdir):
+        shutil.rmtree(workdir)
+    os.mkdir(workdir)
+    xml_downloader(course)
+    for file in os.listdir(workdir):
+        print file
+        upload_data = domxml_parser(file)
+        if upload_data is not None:
+            print upload_data
+            urlpatch(upload_data)
+@newrelic.agent.background_task()
+def xml_downloader(course):
+    """
+    :param course:
+    :return:
+    """
+    private_key = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)),
+        'static_files',
+        'youtubekey'
+    )
+    cnopts = pysftp.CnOpts()
+    cnopts.hostkeys = None
+    try:
+        with pysftp.Connection(
+            'partnerupload.google.com',
+            username=course.yt_logon,
+            private_key=private_key,
+            port=19321,
+            cnopts=cnopts
+        ) as s1:
+            for d in s1.listdir_attr():
+                crawl_sftp(d=d, s1=s1)
+    except:
+        ErrorObject.print_error("Failed Auth: Youtube SFTP")
+        return None
+@newrelic.agent.background_task()
+def crawl_sftp(d, s1):
+    """
+    crawl the sftp dir and dl the XML files for parsing
+    :param d: directory
+    :param s1: sftp connection
+    :return: None
+    """
+    dirtime = datetime.datetime.fromtimestamp(d.st_mtime)
+    if dirtime < datetime.datetime.now() - timedelta(days=YOUTUBE_LOOKBACK_DAYS):
+        return None
+    if d.filename == "files_to_be_removed.txt":
+        return None
+    if d.filename == 'FAILED':
+        return None
+    try:
+        s1.cwd(d.filename)
+    except:
+        return None
+    for f in s1.listdir_attr():
+        filetime = datetime.datetime.fromtimestamp(f.st_mtime)
+        if filetime > datetime.datetime.now() - timedelta(days=YOUTUBE_LOOKBACK_DAYS) and \
+                fnmatch.fnmatch(f.filename, '*.xml'):
+            """
+            Determine If there's an extant dl for this same ID
+            """
+            x = 0
+            while True:
+                """
+                Just in case something runs out
+                """
+                if x > 20:
+                    break
+                file_to_find = f.filename.split('.')[0] + \
+                    str(x) + \
+                    '.' + \
+                    f.filename.split('.')[1]
+                if os.path.exists(os.path.join(workdir, file_to_find)):
+                    x += 1
+                else:
+                    break
+            print "%s : %s" % (f.filename, file_to_find)
+            s1.get(
+                f.filename,
+                os.path.join(workdir, file_to_find)
+            )
+    s1.cwd('..')
+@newrelic.agent.background_task()
+def domxml_parser(file):
+    """
+    :param file:
+    :return:
+    """
+    if 'status-' not in file:
+        return None
+    upload_data = {
+        'datetime': None,
+        'status': None,
+        'duplicate_url': None,
+        'edx_id': file.strip('status-').split('_')[0],
+        'file_suffix': None,
+        'youtube_id': None
+    }
+    try:
+        tree = ET.parse(os.path.join(workdir, file))
+    except ET.ParseError:
+        return None
+    root = tree.getroot()
+    for child in root:
+        if child.tag == 'timestamp':
+            upload_data['datetime'] = datetime.datetime.strptime(
+                child.text,
+                '%Y-%m-%dT%H:%M:%S'
+            ).replace(tzinfo=utc)
+        elif child.tag == 'action':
+            if child.get('name') == 'Process file':
+                for c in child:
+                    if c.tag == 'status_detail':
+                        if c.text == 'The file size cannot be zero.':
+                            return None
+                    if c.tag == 'action':
+                        if c.get('name') == 'Submit video':
+                            for d in c:
+                                if d.tag == 'status':
+                                    upload_data['status'] = d.text
+                                elif d.tag == 'status_detail':
+                                    if d.text != 'Live!':
+                                        if 'duplicate upload' in d.text:
+                                            upload_data['duplicate_url'] = d.text[::-1].split(' ')[0][::-1].split('.')[0]  # nopep8 TODO: refactor to fix
+                                            upload_data['status'] = 'Duplicate'
+                                elif d.tag == 'in_file':
+                                    try:
+                                        upload_data['file_suffix'] = d.text.split('\'')[1].split('_')[1].split('.')[0]  # nopep8
+                                    except IndexError:
+                                        upload_data['file_suffix'] = '100'
+                                elif d.tag == 'id':
+                                    upload_data['youtube_id'] = d.text
+    return upload_data
+@newrelic.agent.background_task()
+def urlpatch(upload_data):
+    """
+    # :param upload_data: dict
+    # :return:
+    """
+    if upload_data['status'] == 'Failure':
+        return None
+    try:
+        test_id = Video.objects.filter(edx_id=upload_data['edx_id']).latest()
+    except:
+        upload_data['status'] = 'Failure'
+    if upload_data['status'] == 'Success':
+        url_query = URL.objects.filter(
+            encode_url=upload_data['youtube_id']
+        )
+        if len(url_query) == 0:
+            u1 = URL(
+                videoID=Video.objects.filter(
+                    edx_id=test_id.edx_id
+                ).latest()
+            )
+            u1.encode_profile = Encode.objects.get(
+                encode_suffix=upload_data['file_suffix']
+            )
+            u1.encode_url = upload_data['youtube_id']
+            u1.url_date = upload_data['datetime']
+            u1.encode_duration = test_id.video_orig_duration
+            u1.encode_bitdepth = 0
+            u1.encode_size = 0
+            u1.save()
+            """
+            Report to Email
+            """
+            if 'EDXABVID' in upload_data['edx_id']:
+                v1 = Video.objects.filter(edx_id=upload_data['edx_id']).latest()
+                if v1.abvid_serial is not None:
+                    report_status(
+                        status="Complete",
+                        abvid_serial=v1.abvid_serial,
+                        youtube_id=upload_data['youtube_id']
+                    )
+        video_check = Video.objects.filter(edx_id=test_id.edx_id).latest()
+        if video_check.video_trans_status == 'Youtube Duplicate':
+            Video.objects.filter(
+                edx_id=video_check.edx_id
+            ).update(
+                video_trans_status='Progress'
+            )
+        """
+        Update Status & VAL
+        """
+        video_proto = VideoProto(
+            veda_id=test_id.edx_id,
+            val_id=test_id.studio_id,
+            client_title=test_id.client_title,
+            duration=test_id.video_orig_duration,
+            bitrate='0',
+            s3_filename=test_id.studio_id
+        )
+        print test_id.video_orig_duration
+        VF = Metadata(
+            video_object=test_id
+        )
+        encode_list = VF._FAULT(
+            video_object=test_id
+        )
+        """
+        Review can stop here
+        """
+        if upload_data['file_suffix'] == 'RVW':
+            return None
+        if len(encode_list) == 0:
+            Video.objects.filter(
+                edx_id=upload_data['edx_id']
+            ).update(
+                video_trans_status='Complete'
+            )
+            val_status = 'file_complete'
+        else:
+            val_status = 'transcode_active'
+        VAC = VALAPICall(
+            video_proto=video_proto,
+            val_status=val_status,
+            endpoint_url=upload_data['youtube_id'],
+            encode_profile='youtube'
+        )
+        VAC.call()
+    elif upload_data['status'] == 'Duplicate' and \
+            upload_data['file_suffix'] == '100':
+        url_query = URL.objects.filter(
+            videoID=Video.objects.filter(
+                edx_id=test_id.edx_id
+            ).latest(),
+            encode_profile=Encode.objects.get(
+                encode_suffix=upload_data['file_suffix']
+            )
+        )
+        if len(url_query) == 0:
+            if 'EDXABVID' in upload_data['edx_id']:
+                report_status(
+                    status="Youtube Duplicate",
+                    abvid_serial=test_id.abvid_serial,
+                    youtube_id=''
+                )
+            Video.objects.filter(
+                edx_id=upload_data['edx_id']
+            ).update(
+                video_trans_status='Youtube Duplicate'
+            )
+            video_proto = VideoProto(
+                veda_id=test_id.edx_id,
+                val_id=test_id.studio_id,
+                client_title=test_id.client_title,
+                duration=test_id.video_orig_duration,
+                bitrate='0',
+                s3_filename=test_id.studio_id
+            )
+            VAC = VALAPICall(
+                video_proto=video_proto,
+                val_status="duplicate",
+                endpoint_url="DUPLICATE",
+                encode_profile='youtube'
+            )
+            VAC.call()
+if __name__ == "__main__":
+    course = get_course(course_id='COLACGIM')
+    if course is not None:
+        callfunction(course)
--- a/youtube_callback/static_files/__init__.py
+++ b/youtube_callback/static_files/__init__.py