abbey.py 27.8 KB
Newer Older
1
#!/usr/bin/env python -u
2 3 4 5
import sys
from argparse import ArgumentParser
import time
import json
John Jarvis committed
6
import yaml
7
import os
Fred Smith committed
8
import requests
9 10 11 12
try:
    import boto.ec2
    import boto.sqs
    from boto.vpc import VPCConnection
13
    from boto.exception import NoAuthHandlerFound, EC2ResponseError
14
    from boto.sqs.message import RawMessage
15
    from boto.ec2.blockdevicemapping import BlockDeviceType, BlockDeviceMapping
16 17 18 19
except ImportError:
    print "boto required for script"
    sys.exit(1)

20 21
from pprint import pprint

Feanil Patel committed
22
AMI_TIMEOUT = 2700  # time to wait for AMIs to complete(45 minutes)
23 24 25
EC2_RUN_TIMEOUT = 180  # time to wait for ec2 state transition
EC2_STATUS_TIMEOUT = 300  # time to wait for ec2 system status checks
NUM_TASKS = 5  # number of tasks for time summary report
26
NUM_PLAYBOOKS = 2
27

John Jarvis committed
28

29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
class Unbuffered:
    """
    For unbuffered output, not
    needed if PYTHONUNBUFFERED is set
    """
    def __init__(self, stream):
        self.stream = stream

    def write(self, data):
        self.stream.write(data)
        self.stream.flush()

    def __getattr__(self, attr):
        return getattr(self.stream, attr)

sys.stdout = Unbuffered(sys.stdout)


def parse_args():
    parser = ArgumentParser()
    parser.add_argument('--noop', action='store_true',
                        help="don't actually run the cmds",
                        default=False)
52 53
    parser.add_argument('--secure-vars-file', required=False,
                        metavar="SECURE_VAR_FILE", default=None,
54
                        help="path to secure-vars from the root of "
55 56
                        "the secure repo. By default <deployment>.yml and "
                        "<environment>-<deployment>.yml will be used if they "
John Jarvis committed
57 58
                        "exist in <secure-repo>/ansible/vars/. This secure file "
                        "will be used in addition to these if they exist.")
59
    parser.add_argument('--stack-name',
60
                        help="defaults to ENVIRONMENT-DEPLOYMENT",
61 62 63 64 65
                        metavar="STACK_NAME",
                        required=False)
    parser.add_argument('-p', '--play',
                        help='play name without the yml extension',
                        metavar="PLAY", required=True)
66 67
    parser.add_argument('--playbook-dir',
                        help='directory to find playbooks in',
68
                        default='configuration/playbooks/edx-east',
69
                        metavar="PLAYBOOKDIR", required=False)
70 71 72 73 74 75 76 77 78 79 80
    parser.add_argument('-d', '--deployment', metavar="DEPLOYMENT",
                        required=True)
    parser.add_argument('-e', '--environment', metavar="ENVIRONMENT",
                        required=True)
    parser.add_argument('-v', '--verbose', action='store_true',
                        help="turn on verbosity")
    parser.add_argument('--no-cleanup', action='store_true',
                        help="don't cleanup on failures")
    parser.add_argument('--vars', metavar="EXTRA_VAR_FILE",
                        help="path to extra var file", required=False)
    parser.add_argument('--configuration-version', required=False,
81
                        help="configuration repo gitref",
82 83
                        default="master")
    parser.add_argument('--configuration-secure-version', required=False,
84
                        help="configuration-secure repo gitref",
85
                        default="master")
86 87 88
    parser.add_argument('--configuration-secure-repo', required=False,
                        default="git@github.com:edx-ops/prod-secure",
                        help="repo to use for the secure files")
89
    parser.add_argument('--configuration-private-version', required=False,
90
                        help="configuration-private repo gitref",
91 92 93 94
                        default="master")
    parser.add_argument('--configuration-private-repo', required=False,
                        default="git@github.com:edx-ops/ansible-private",
                        help="repo to use for private playbooks")
95 96
    parser.add_argument('-c', '--cache-id', required=True,
                        help="unique id to use as part of cache prefix")
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
    parser.add_argument('-i', '--identity', required=False,
                        help="path to identity file for pulling "
                             "down configuration-secure",
                        default=None)
    parser.add_argument('-r', '--region', required=False,
                        default="us-east-1",
                        help="aws region")
    parser.add_argument('-k', '--keypair', required=False,
                        default="deployment",
                        help="AWS keypair to use for instance")
    parser.add_argument('-t', '--instance-type', required=False,
                        default="m1.large",
                        help="instance type to launch")
    parser.add_argument("--role-name", required=False,
                        default="abbey",
                        help="IAM role name to use (must exist)")
    parser.add_argument("--msg-delay", required=False,
                        default=5,
                        help="How long to delay message display from sqs "
                             "to ensure ordering")
117 118 119 120
    parser.add_argument("--hipchat-room-id", required=False,
                        default=None,
                        help="The API ID of the Hipchat room to post"
                             "status messages to")
121 122 123 124
    parser.add_argument("--ansible-hipchat-room-id", required=False,
                        default='Hammer',
                        help="The room used by the abbey instance for "
                             "printing verbose ansible run data.")
125 126 127
    parser.add_argument("--hipchat-api-token", required=False,
                        default=None,
                        help="The API token for Hipchat integration")
Fred Smith committed
128 129 130
    parser.add_argument("--callback-url", required=False,
                        default=None,
                        help="The callback URL to send notifications to")
131 132 133 134
    parser.add_argument("--root-vol-size", required=False,
                        default=50,
                        help="The size of the root volume to use for the "
                             "abbey instance.")
135 136 137

    group = parser.add_mutually_exclusive_group()
    group.add_argument('-b', '--base-ami', required=False,
John Jarvis committed
138 139
                       help="ami to use as a base ami",
                       default="ami-0568456c")
140
    group.add_argument('--blessed', action='store_true',
John Jarvis committed
141 142
                       help="Look up blessed ami for env-dep-play.",
                       default=False)
143

144 145
    return parser.parse_args()

146
def get_instance_sec_group(vpc_id):
147

148 149
    grp_details = ec2.get_all_security_groups(
        filters={
150
            'vpc_id': vpc_id,
151
            'tag:play': args.play
152 153
        }
    )
154

155 156
    if len(grp_details) < 1:
        sys.stderr.write("ERROR: Expected atleast one security group, got {}\n".format(
Feanil Patel committed
157
            len(grp_details)))
158

159
    return grp_details[0].id
160

John Jarvis committed
161

162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
def get_blessed_ami():
    images = ec2.get_all_images(
        filters={
            'tag:environment': args.environment,
            'tag:deployment': args.deployment,
            'tag:play': args.play,
            'tag:blessed': True
        }
    )

    if len(images) != 1:
        raise Exception("ERROR: Expected only one blessed ami, got {}\n".format(
            len(images)))

    return images[0].id
177

John Jarvis committed
178

179 180 181 182 183 184 185 186
def create_instance_args():
    """
    Looks up security group, subnet
    and returns arguments to pass into
    ec2.run_instances() including
    user data
    """

187 188 189 190
    vpc = VPCConnection()
    subnet = vpc.get_all_subnets(
        filters={
            'tag:aws:cloudformation:stack-name': stack_name,
e0d committed
191
            'tag:play': args.play}
192
    )
e0d committed
193 194 195 196 197 198 199 200 201 202 203 204 205

    if len(subnet) < 1:
        #
        # try scheme for non-cloudformation builds
        #

        subnet = vpc.get_all_subnets(
            filters={
                'tag:cluster': args.play,
                'tag:environment': args.environment,
                'tag:deployment': args.deployment}
        )

206 207
    if len(subnet) < 1:
        sys.stderr.write("ERROR: Expected at least one subnet, got {}\n".format(
208 209 210
            len(subnet)))
        sys.exit(1)
    subnet_id = subnet[0].id
211 212
    vpc_id = subnet[0].vpc_id

213
    security_group_id = get_instance_sec_group(vpc_id)
214 215 216 217

    if args.identity:
        config_secure = 'true'
        with open(args.identity) as f:
218
            identity_contents = f.read()
219 220
    else:
        config_secure = 'false'
221 222
        identity_contents = "dummy"

223 224 225 226 227 228 229 230 231 232
    user_data = """#!/bin/bash
set -x
set -e
exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
base_dir="/var/tmp/edx-cfg"
extra_vars="$base_dir/extra-vars-$$.yml"
secure_identity="$base_dir/secure-identity"
git_ssh="$base_dir/git_ssh.sh"
configuration_version="{configuration_version}"
configuration_secure_version="{configuration_secure_version}"
233
configuration_private_version="{configuration_private_version}"
234 235 236 237
environment="{environment}"
deployment="{deployment}"
play="{play}"
config_secure={config_secure}
238 239 240
git_repo_name="configuration"
git_repo="https://github.com/edx/$git_repo_name"
git_repo_secure="{configuration_secure_repo}"
241
git_repo_secure_name=$(basename $git_repo_secure .git)
242
git_repo_private="{configuration_private_repo}"
243
git_repo_private_name=$(basename $git_repo_private .git)
John Jarvis committed
244
secure_vars_file={secure_vars_file}
245 246
environment_deployment_secure_vars="$base_dir/$git_repo_secure_name/ansible/vars/{environment}-{deployment}.yml"
deployment_secure_vars="$base_dir/$git_repo_secure_name/ansible/vars/{deployment}.yml"
247 248 249 250 251 252
instance_id=\\
$(curl http://169.254.169.254/latest/meta-data/instance-id 2>/dev/null)
instance_ip=\\
$(curl http://169.254.169.254/latest/meta-data/local-ipv4 2>/dev/null)
instance_type=\\
$(curl http://169.254.169.254/latest/meta-data/instance-type 2>/dev/null)
253
playbook_dir="$base_dir/{playbook_dir}"
254 255 256 257 258 259 260 261 262 263 264 265

if $config_secure; then
    git_cmd="env GIT_SSH=$git_ssh git"
else
    git_cmd="git"
fi

ANSIBLE_ENABLE_SQS=true
SQS_NAME={queue_name}
SQS_REGION=us-east-1
SQS_MSG_PREFIX="[ $instance_id $instance_ip $environment-$deployment $play ]"
PYTHONUNBUFFERED=1
266 267 268 269 270
HIPCHAT_TOKEN={hipchat_token}
HIPCHAT_ROOM={hipchat_room}
HIPCHAT_MSG_PREFIX="$environment-$deployment-$play: "
HIPCHAT_FROM="ansible-$instance_id"
HIPCHAT_MSG_COLOR=$(echo -e "yellow\\ngreen\\npurple\\ngray" | shuf | head -1)
271 272
# environment for ansible
export ANSIBLE_ENABLE_SQS SQS_NAME SQS_REGION SQS_MSG_PREFIX PYTHONUNBUFFERED
273
export HIPCHAT_TOKEN HIPCHAT_ROOM HIPCHAT_MSG_PREFIX HIPCHAT_FROM HIPCHAT_MSG_COLOR
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296

if [[ ! -x /usr/bin/git || ! -x /usr/bin/pip ]]; then
    echo "Installing pkg dependencies"
    /usr/bin/apt-get update
    /usr/bin/apt-get install -y git python-pip python-apt \\
        git-core build-essential python-dev libxml2-dev \\
        libxslt-dev curl --force-yes
fi


rm -rf $base_dir
mkdir -p $base_dir
cd $base_dir

cat << EOF > $git_ssh
#!/bin/sh
exec /usr/bin/ssh -o StrictHostKeyChecking=no -i "$secure_identity" "\$@"
EOF

chmod 755 $git_ssh

if $config_secure; then
    cat << EOF > $secure_identity
297
{identity_contents}
298 299 300 301
EOF
fi

cat << EOF >> $extra_vars
302
---
303 304 305
# extra vars passed into
# abbey.py including versions
# of all the repositories
306
{extra_vars_yml}
307

308 309 310 311
# abbey will always run fake migrations
# this is so that the application can come
# up healthy
fake_migrations: true
312

313
disable_edx_services: true
314
COMMON_TAG_EC2_INSTANCE: true
315 316 317 318

# abbey should never take instances in
# and out of elbs
elb_pre_post: false
319 320 321 322
EOF

chmod 400 $secure_identity

323 324 325 326
$git_cmd clone $git_repo $git_repo_name
cd $git_repo_name
$git_cmd checkout $configuration_version
cd $base_dir
327 328

if $config_secure; then
329 330 331 332
    $git_cmd clone $git_repo_secure $git_repo_secure_name
    cd $git_repo_secure_name
    $git_cmd checkout $configuration_secure_version
    cd $base_dir
333 334
fi

Fred Smith committed
335
if [[ ! -z $git_repo_private ]]; then
336 337 338 339 340 341 342
    $git_cmd clone $git_repo_private $git_repo_private_name
    cd $git_repo_private_name
    $git_cmd checkout $configuration_private_version
    cd $base_dir
fi


343
cd $base_dir/$git_repo_name
344
sudo pip install -r pre-requirements.txt
345 346 347 348
sudo pip install -r requirements.txt

cd $playbook_dir

349 350 351 352 353 354 355 356
if [[ -r "$deployment_secure_vars" ]]; then
    extra_args_opts+=" -e@$deployment_secure_vars"
fi

if [[ -r "$environment_deployment_secure_vars" ]]; then
    extra_args_opts+=" -e@$environment_deployment_secure_vars"
fi

John Jarvis committed
357 358
if $secure_vars_file; then
    extra_args_opts+=" -e@$secure_vars_file"
359 360 361 362 363 364
fi

extra_args_opts+=" -e@$extra_vars"

ansible-playbook -vvvv -c local -i "localhost," $play.yml $extra_args_opts
ansible-playbook -vvvv -c local -i "localhost," stop_all_edx_services.yml $extra_args_opts
365 366 367 368

rm -rf $base_dir

    """.format(
369
                hipchat_token=args.hipchat_api_token,
370
                hipchat_room=args.ansible_hipchat_room_id,
371 372
                configuration_version=args.configuration_version,
                configuration_secure_version=args.configuration_secure_version,
373
                configuration_secure_repo=args.configuration_secure_repo,
374 375
                configuration_private_version=args.configuration_private_version,
                configuration_private_repo=args.configuration_private_repo,
376 377 378
                environment=args.environment,
                deployment=args.deployment,
                play=args.play,
379
                playbook_dir=args.playbook_dir,
380
                config_secure=config_secure,
381
                identity_contents=identity_contents,
382
                queue_name=run_id,
383
                extra_vars_yml=extra_vars_yml,
John Jarvis committed
384
                secure_vars_file=secure_vars_file,
385
                cache_id=args.cache_id)
386

Feanil Patel committed
387 388 389
    mapping = BlockDeviceMapping()
    root_vol = BlockDeviceType(size=args.root_vol_size,
                               volume_type='gp2')
390 391
    mapping['/dev/sda1'] = root_vol

392 393 394 395
    ec2_args = {
        'security_group_ids': [security_group_id],
        'subnet_id': subnet_id,
        'key_name': args.keypair,
396
        'image_id': base_ami,
397 398 399
        'instance_type': args.instance_type,
        'instance_profile_name': args.role_name,
        'user_data': user_data,
400
        'block_device_map': mapping,
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
    }

    return ec2_args


def poll_sqs_ansible():
    """
    Prints events to the console and
    blocks until a final STATS ansible
    event is read off of SQS.

    SQS does not guarantee FIFO, for that
    reason there is a buffer that will delay
    messages before they are printed to the
    console.

    Returns length of the ansible run.
    """
    oldest_msg_ts = 0
    buf = []
    task_report = []  # list of tasks for reporting
    last_task = None
423
    completed = 0
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
    while True:
        messages = []
        while True:
            # get all available messages on the queue
            msgs = sqs_queue.get_messages(attributes='All')
            if not msgs:
                break
            messages.extend(msgs)

        for message in messages:
            recv_ts = float(
                message.attributes['ApproximateFirstReceiveTimestamp']) * .001
            sent_ts = float(message.attributes['SentTimestamp']) * .001
            try:
                msg_info = {
                    'msg': json.loads(message.get_body()),
                    'sent_ts': sent_ts,
                    'recv_ts': recv_ts,
                }
                buf.append(msg_info)
            except ValueError as e:
                print "!!! ERROR !!! unable to parse queue message, " \
                      "expecting valid json: {} : {}".format(
                          message.get_body(), e)
            if not oldest_msg_ts or recv_ts < oldest_msg_ts:
                oldest_msg_ts = recv_ts
            sqs_queue.delete_message(message)

        now = int(time.time())
        if buf:
Feanil Patel committed
454
            try:
455
                if (now - min([msg['recv_ts'] for msg in buf])) > args.msg_delay:
Feanil Patel committed
456 457 458 459 460 461 462 463 464 465 466
                    # sort by TS instead of recv_ts
                    # because the sqs timestamp is not as
                    # accurate
                    buf.sort(key=lambda k: k['msg']['TS'])
                    to_disp = buf.pop(0)
                    if 'START' in to_disp['msg']:
                        print '\n{:0>2.0f}:{:0>5.2f} {} : Starting "{}"'.format(
                            to_disp['msg']['TS'] / 60,
                            to_disp['msg']['TS'] % 60,
                            to_disp['msg']['PREFIX'],
                            to_disp['msg']['START']),
467

Feanil Patel committed
468 469 470 471 472 473 474 475 476 477 478 479
                    elif 'TASK' in to_disp['msg']:
                        print "\n{:0>2.0f}:{:0>5.2f} {} : {}".format(
                            to_disp['msg']['TS'] / 60,
                            to_disp['msg']['TS'] % 60,
                            to_disp['msg']['PREFIX'],
                            to_disp['msg']['TASK']),
                        last_task = to_disp['msg']['TASK']
                    elif 'OK' in to_disp['msg']:
                        if args.verbose:
                            print "\n"
                            for key, value in to_disp['msg']['OK'].iteritems():
                                print "    {:<15}{}".format(key, value)
480
                        else:
481 482 483 484 485 486
                            invocation = to_disp['msg']['OK']['invocation']
                            module = invocation['module_name']
                            # 'set_fact' does not provide a changed value.
                            if module == 'set_fact':
                                changed = "OK"
                            elif to_disp['msg']['OK']['changed']:
Feanil Patel committed
487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
                                changed = "*OK*"
                            else:
                                changed = "OK"
                            print " {}".format(changed),
                        task_report.append({
                            'TASK': last_task,
                            'INVOCATION': to_disp['msg']['OK']['invocation'],
                            'DELTA': to_disp['msg']['delta'],
                        })
                    elif 'FAILURE' in to_disp['msg']:
                        print " !!!! FAILURE !!!!",
                        for key, value in to_disp['msg']['FAILURE'].iteritems():
                            print "    {:<15}{}".format(key, value)
                        raise Exception("Failed Ansible run")
                    elif 'STATS' in to_disp['msg']:
                        print "\n{:0>2.0f}:{:0>5.2f} {} : COMPLETE".format(
                            to_disp['msg']['TS'] / 60,
                            to_disp['msg']['TS'] % 60,
                            to_disp['msg']['PREFIX'])
506

Feanil Patel committed
507 508 509 510 511 512 513 514 515
                        # Since 3 ansible plays get run.
                        # We see the COMPLETE message 3 times
                        # wait till the last one to end listening
                        # for new messages.
                        completed += 1
                        if completed >= NUM_PLAYBOOKS:
                            return (to_disp['msg']['TS'], task_report)
            except KeyError:
                print "Failed to print status from message: {}".format(to_disp)
516 517 518 519 520 521 522 523 524 525 526 527 528

        if not messages:
            # wait 1 second between sqs polls
            time.sleep(1)


def create_ami(instance_id, name, description):

    params = {'instance_id': instance_id,
              'name': name,
              'description': description,
              'no_reboot': True}

Feanil Patel committed
529
    AWS_API_WAIT_TIME = 1
530
    image_id = ec2.create_image(**params)
Feanil Patel committed
531
    print("Checking if image is ready.")
532 533 534 535
    for _ in xrange(AMI_TIMEOUT):
        try:
            img = ec2.get_image(image_id)
            if img.state == 'available':
Feanil Patel committed
536
                print("Tagging image.")
537
                img.add_tag("environment", args.environment)
Feanil Patel committed
538
                time.sleep(AWS_API_WAIT_TIME)
539
                img.add_tag("deployment", args.deployment)
Feanil Patel committed
540
                time.sleep(AWS_API_WAIT_TIME)
541
                img.add_tag("play", args.play)
Feanil Patel committed
542
                time.sleep(AWS_API_WAIT_TIME)
543
                conf_tag = "{} {}".format("http://github.com/edx/configuration", args.configuration_version)
544
                img.add_tag("version:configuration", conf_tag)
Feanil Patel committed
545
                time.sleep(AWS_API_WAIT_TIME)
546
                conf_secure_tag = "{} {}".format(args.configuration_secure_repo, args.configuration_secure_version)
547
                img.add_tag("version:configuration_secure", conf_secure_tag)
Feanil Patel committed
548
                time.sleep(AWS_API_WAIT_TIME)
549
                img.add_tag("cache_id", args.cache_id)
Feanil Patel committed
550
                time.sleep(AWS_API_WAIT_TIME)
551 552 553 554 555 556

                # Get versions from the instance.
                tags = ec2.get_all_tags(filters={'resource-id': instance_id})
                for tag in tags:
                    if tag.name.startswith('version:'):
                        img.add_tag(tag.name, tag.value)
557
                        time.sleep(AWS_API_WAIT_TIME)
Feanil Patel committed
558
                break
559 560
            else:
                time.sleep(1)
561
        except EC2ResponseError as e:
562 563 564 565 566 567 568 569 570
            if e.error_code == 'InvalidAMIID.NotFound':
                time.sleep(1)
            else:
                raise Exception("Unexpected error code: {}".format(
                    e.error_code))
            time.sleep(1)
    else:
        raise Exception("Timeout waiting for AMI to finish")

571
    return image_id
John Jarvis committed
572

John Jarvis committed
573

574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
def launch_and_configure(ec2_args):
    """
    Creates an sqs queue, launches an ec2 instance,
    configures it and creates an AMI. Polls
    SQS for updates
    """

    print "{:<40}".format(
        "Creating SQS queue and launching instance for {}:".format(run_id))
    print
    for k, v in ec2_args.iteritems():
        if k != 'user_data':
            print "    {:<25}{}".format(k, v)
    print

589 590
    global sqs_queue
    global instance_id
591 592 593 594 595
    sqs_queue = sqs.create_queue(run_id)
    sqs_queue.set_message_class(RawMessage)
    res = ec2.run_instances(**ec2_args)
    inst = res.instances[0]
    instance_id = inst.id
596

e0d committed
597 598
    print "{:<40}".format(
        "Waiting for instance {} to reach running status:".format(instance_id)),
599 600
    status_start = time.time()
    for _ in xrange(EC2_RUN_TIMEOUT):
601 602 603 604 605 606 607 608 609 610
        try:
            res = ec2.get_all_instances(instance_ids=[instance_id])
        except EC2ResponseError as e:
            if e.code == "InvalidInstanceID.NotFound":
                print("Instance not found({}), will try again.".format(
                    instance_id))
                time.sleep(1)
                continue
            else:
                raise(e)
611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
        if res[0].instances[0].state == 'running':
            status_delta = time.time() - status_start
            run_summary.append(('EC2 Launch', status_delta))
            print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(
                status_delta / 60,
                status_delta % 60)
            break
        else:
            time.sleep(1)
    else:
        raise Exception("Timeout waiting for running status: {} ".format(
            instance_id))

    print "{:<40}".format("Waiting for system status:"),
    system_start = time.time()
    for _ in xrange(EC2_STATUS_TIMEOUT):
        status = ec2.get_all_instance_status(inst.id)
        if status[0].system_status.status == u'ok':
            system_delta = time.time() - system_start
            run_summary.append(('EC2 Status Checks', system_delta))
            print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(
                system_delta / 60,
                system_delta % 60)
            break
        else:
            time.sleep(1)
    else:
        raise Exception("Timeout waiting for status checks: {} ".format(
            instance_id))

    print
    print "{:<40}".format(
        "Waiting for user-data, polling sqs for Ansible events:")

    (ansible_delta, task_report) = poll_sqs_ansible()
    run_summary.append(('Ansible run', ansible_delta))
    print
    print "{} longest Ansible tasks (seconds):".format(NUM_TASKS)
    for task in sorted(
            task_report, reverse=True,
            key=lambda k: k['DELTA'])[:NUM_TASKS]:
        print "{:0>3.0f} {}".format(task['DELTA'], task['TASK'])
        print "  - {}".format(task['INVOCATION'])
    print

    print "{:<40}".format("Creating AMI:"),
    ami_start = time.time()
    ami = create_ami(instance_id, run_id, run_id)
    ami_delta = time.time() - ami_start
    print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(
        ami_delta / 60,
        ami_delta % 60)
    run_summary.append(('AMI Build', ami_delta))
    total_time = time.time() - start_time
    all_stages = sum(run[1] for run in run_summary)
    if total_time - all_stages > 0:
        run_summary.append(('Other', total_time - all_stages))
    run_summary.append(('Total', total_time))

    return run_summary, ami
671

John Jarvis committed
672

e0d committed
673
def send_hipchat_message(message):
674
    print(message)
Fred Smith committed
675 676 677
    if args.callback_url:
        r=requests.get("{}/{}".format(args.callback_url, message))

e0d committed
678 679 680 681 682
    #If hipchat is configured send the details to the specified room
    if args.hipchat_api_token and args.hipchat_room_id:
        import hipchat
        try:
            hipchat = hipchat.HipChat(token=args.hipchat_api_token)
John Jarvis committed
683 684
            hipchat.message_room(args.hipchat_room_id, 'AbbeyNormal',
                                 message)
e0d committed
685 686
        except Exception as e:
            print("Hipchat messaging resulted in an error: %s." % e)
e0d committed
687

688 689 690 691 692 693 694 695 696 697 698
if __name__ == '__main__':

    args = parse_args()

    run_summary = []

    start_time = time.time()

    if args.vars:
        with open(args.vars) as f:
            extra_vars_yml = f.read()
699
            extra_vars = yaml.load(extra_vars_yml)
700
    else:
701
        extra_vars_yml = ""
702
        extra_vars = {}
703

704
    if args.secure_vars_file:
705 706
        # explicit path to a single
        # secure var file
707
        secure_vars_file = args.secure_vars_file
708
    else:
709
        secure_vars_file = 'false'
710

711 712 713 714 715 716 717 718
    if args.stack_name:
        stack_name = args.stack_name
    else:
        stack_name = "{}-{}".format(args.environment, args.deployment)

    try:
        ec2 = boto.ec2.connect_to_region(args.region)
    except NoAuthHandlerFound:
719 720 721 722 723 724 725
        print 'Unable to connect to ec2 in region :{}'.format(args.region)
        sys.exit(1)

    try:
        sqs = boto.sqs.connect_to_region(args.region)
    except NoAuthHandlerFound:
        print 'Unable to connect to sqs in region :{}'.format(args.region)
726 727
        sys.exit(1)

728 729 730 731 732
    if args.blessed:
        base_ami = get_blessed_ami()
    else:
        base_ami = args.base_ami

733
    error_in_abbey_run = False
734 735 736 737
    try:
        sqs_queue = None
        instance_id = None

Feanil Patel committed
738
        run_id = "{}-abbey-{}-{}-{}".format(
739
            int(time.time() * 100), args.environment, args.deployment, args.play)
740 741 742

        ec2_args = create_instance_args()

743
        if args.noop:
John Jarvis committed
744 745
            print "Would have created sqs_queue with id: {}\nec2_args:".format(
                run_id)
746 747
            pprint(ec2_args)
            ami = "ami-00000"
748
        else:
749 750 751 752 753 754 755 756
            run_summary, ami = launch_and_configure(ec2_args)
            print
            print "Summary:\n"

            for run in run_summary:
                print "{:<30} {:0>2.0f}:{:0>5.2f}".format(
                    run[0], run[1] / 60, run[1] % 60)
            print "AMI: {}".format(ami)
e0d committed
757

John Jarvis committed
758 759 760 761 762
            message = 'Finished baking AMI {image_id} for {environment} {deployment} {play}.'.format(
                image_id=ami,
                environment=args.environment,
                deployment=args.deployment,
                play=args.play)
e0d committed
763 764

            send_hipchat_message(message)
e0d committed
765
    except Exception as e:
e0d committed
766
        message = 'An error occurred building AMI for {environment} ' \
e0d committed
767
            '{deployment} {play}.  The Exception was {exception}'.format(
e0d committed
768 769
                environment=args.environment,
                deployment=args.deployment,
e0d committed
770 771
                play=args.play,
                exception=repr(e))
e0d committed
772
        send_hipchat_message(message)
773
        error_in_abbey_run = True
774 775
    finally:
        print
776
        if not args.no_cleanup and not args.noop:
777 778 779 780 781 782
            if sqs_queue:
                print "Cleaning up - Removing SQS queue - {}".format(run_id)
                sqs.delete_queue(sqs_queue)
            if instance_id:
                print "Cleaning up - Terminating instance ID - {}".format(
                    instance_id)
783 784 785
            # Check to make sure we have an instance id.
            if instance_id:
                ec2.terminate_instances(instance_ids=[instance_id])
786 787
        if error_in_abbey_run:
            exit(1)