abbey.py 27 KB
Newer Older
1
#!/usr/bin/env python -u
2 3 4 5
import sys
from argparse import ArgumentParser
import time
import json
John Jarvis committed
6
import yaml
7
import os
8 9 10 11
try:
    import boto.ec2
    import boto.sqs
    from boto.vpc import VPCConnection
12
    from boto.exception import NoAuthHandlerFound, EC2ResponseError
13
    from boto.sqs.message import RawMessage
14
    from boto.ec2.blockdevicemapping import BlockDeviceType, BlockDeviceMapping
15 16 17 18
except ImportError:
    print "boto required for script"
    sys.exit(1)

19 20
from pprint import pprint

21 22 23 24
AMI_TIMEOUT = 600  # time to wait for AMIs to complete
EC2_RUN_TIMEOUT = 180  # time to wait for ec2 state transition
EC2_STATUS_TIMEOUT = 300  # time to wait for ec2 system status checks
NUM_TASKS = 5  # number of tasks for time summary report
25
NUM_PLAYBOOKS = 2
26

John Jarvis committed
27

28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
class Unbuffered:
    """
    For unbuffered output, not
    needed if PYTHONUNBUFFERED is set
    """
    def __init__(self, stream):
        self.stream = stream

    def write(self, data):
        self.stream.write(data)
        self.stream.flush()

    def __getattr__(self, attr):
        return getattr(self.stream, attr)

sys.stdout = Unbuffered(sys.stdout)


def parse_args():
    parser = ArgumentParser()
    parser.add_argument('--noop', action='store_true',
                        help="don't actually run the cmds",
                        default=False)
51 52
    parser.add_argument('--secure-vars-file', required=False,
                        metavar="SECURE_VAR_FILE", default=None,
53
                        help="path to secure-vars from the root of "
54 55
                        "the secure repo. By default <deployment>.yml and "
                        "<environment>-<deployment>.yml will be used if they "
John Jarvis committed
56 57
                        "exist in <secure-repo>/ansible/vars/. This secure file "
                        "will be used in addition to these if they exist.")
58
    parser.add_argument('--stack-name',
59
                        help="defaults to ENVIRONMENT-DEPLOYMENT",
60 61 62 63 64
                        metavar="STACK_NAME",
                        required=False)
    parser.add_argument('-p', '--play',
                        help='play name without the yml extension',
                        metavar="PLAY", required=True)
65 66
    parser.add_argument('--playbook-dir',
                        help='directory to find playbooks in',
67
                        default='configuration/playbooks/edx-east',
68
                        metavar="PLAYBOOKDIR", required=False)
69 70 71 72 73 74 75 76 77 78
    parser.add_argument('-d', '--deployment', metavar="DEPLOYMENT",
                        required=True)
    parser.add_argument('-e', '--environment', metavar="ENVIRONMENT",
                        required=True)
    parser.add_argument('-v', '--verbose', action='store_true',
                        help="turn on verbosity")
    parser.add_argument('--no-cleanup', action='store_true',
                        help="don't cleanup on failures")
    parser.add_argument('--vars', metavar="EXTRA_VAR_FILE",
                        help="path to extra var file", required=False)
79 80
    parser.add_argument('--refs', metavar="GIT_REFS_FILE",
                        help="path to a var file with app git refs", required=False)
81
    parser.add_argument('--configuration-version', required=False,
82
                        help="configuration repo branch(no hashes)",
83 84
                        default="master")
    parser.add_argument('--configuration-secure-version', required=False,
85
                        help="configuration-secure repo branch(no hashes)",
86
                        default="master")
87 88 89
    parser.add_argument('--configuration-secure-repo', required=False,
                        default="git@github.com:edx-ops/prod-secure",
                        help="repo to use for the secure files")
90 91 92 93 94 95
    parser.add_argument('--configuration-private-version', required=False,
                        help="configuration-private repo branch(no hashes)",
                        default="master")
    parser.add_argument('--configuration-private-repo', required=False,
                        default="git@github.com:edx-ops/ansible-private",
                        help="repo to use for private playbooks")
96 97
    parser.add_argument('-c', '--cache-id', required=True,
                        help="unique id to use as part of cache prefix")
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
    parser.add_argument('-i', '--identity', required=False,
                        help="path to identity file for pulling "
                             "down configuration-secure",
                        default=None)
    parser.add_argument('-r', '--region', required=False,
                        default="us-east-1",
                        help="aws region")
    parser.add_argument('-k', '--keypair', required=False,
                        default="deployment",
                        help="AWS keypair to use for instance")
    parser.add_argument('-t', '--instance-type', required=False,
                        default="m1.large",
                        help="instance type to launch")
    parser.add_argument("--role-name", required=False,
                        default="abbey",
                        help="IAM role name to use (must exist)")
    parser.add_argument("--msg-delay", required=False,
                        default=5,
                        help="How long to delay message display from sqs "
                             "to ensure ordering")
118 119 120 121 122 123 124
    parser.add_argument("--hipchat-room-id", required=False,
                        default=None,
                        help="The API ID of the Hipchat room to post"
                             "status messages to")
    parser.add_argument("--hipchat-api-token", required=False,
                        default=None,
                        help="The API token for Hipchat integration")
125 126 127 128
    parser.add_argument("--root-vol-size", required=False,
                        default=50,
                        help="The size of the root volume to use for the "
                             "abbey instance.")
129 130 131

    group = parser.add_mutually_exclusive_group()
    group.add_argument('-b', '--base-ami', required=False,
John Jarvis committed
132 133
                       help="ami to use as a base ami",
                       default="ami-0568456c")
134
    group.add_argument('--blessed', action='store_true',
John Jarvis committed
135 136
                       help="Look up blessed ami for env-dep-play.",
                       default=False)
137

138 139 140
    return parser.parse_args()


141
def get_instance_sec_group(vpc_id):
142

143 144
    grp_details = ec2.get_all_security_groups(
        filters={
145
            'vpc_id': vpc_id,
146
            'tag:play': args.play
147 148
        }
    )
149

150 151
    if len(grp_details) < 1:
        sys.stderr.write("ERROR: Expected atleast one security group, got {}\n".format(
Feanil Patel committed
152
            len(grp_details)))
153

154
    return grp_details[0].id
155

John Jarvis committed
156

157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
def get_blessed_ami():
    images = ec2.get_all_images(
        filters={
            'tag:environment': args.environment,
            'tag:deployment': args.deployment,
            'tag:play': args.play,
            'tag:blessed': True
        }
    )

    if len(images) != 1:
        raise Exception("ERROR: Expected only one blessed ami, got {}\n".format(
            len(images)))

    return images[0].id
172

John Jarvis committed
173

174 175 176 177 178 179 180 181
def create_instance_args():
    """
    Looks up security group, subnet
    and returns arguments to pass into
    ec2.run_instances() including
    user data
    """

182 183 184 185
    vpc = VPCConnection()
    subnet = vpc.get_all_subnets(
        filters={
            'tag:aws:cloudformation:stack-name': stack_name,
186
            'tag:play': args.play}
187
    )
188 189
    if len(subnet) < 1:
        sys.stderr.write("ERROR: Expected at least one subnet, got {}\n".format(
190 191 192
            len(subnet)))
        sys.exit(1)
    subnet_id = subnet[0].id
193 194
    vpc_id = subnet[0].vpc_id

195
    security_group_id = get_instance_sec_group(vpc_id)
196 197 198 199

    if args.identity:
        config_secure = 'true'
        with open(args.identity) as f:
200
            identity_contents = f.read()
201 202
    else:
        config_secure = 'false'
203 204
        identity_contents = "dummy"

205 206 207 208 209 210 211 212 213 214
    user_data = """#!/bin/bash
set -x
set -e
exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
base_dir="/var/tmp/edx-cfg"
extra_vars="$base_dir/extra-vars-$$.yml"
secure_identity="$base_dir/secure-identity"
git_ssh="$base_dir/git_ssh.sh"
configuration_version="{configuration_version}"
configuration_secure_version="{configuration_secure_version}"
215
configuration_private_version="{configuration_private_version}"
216 217 218 219
environment="{environment}"
deployment="{deployment}"
play="{play}"
config_secure={config_secure}
220 221 222 223
git_repo_name="configuration"
git_repo="https://github.com/edx/$git_repo_name"
git_repo_secure="{configuration_secure_repo}"
git_repo_secure_name="{configuration_secure_repo_basename}"
224
git_repo_private="{configuration_private_repo}"
225
git_repo_private_name=$(basename $git_repo_private .git)
John Jarvis committed
226
secure_vars_file={secure_vars_file}
227 228
environment_deployment_secure_vars="$base_dir/$git_repo_secure_name/ansible/vars/{environment}-{deployment}.yml"
deployment_secure_vars="$base_dir/$git_repo_secure_name/ansible/vars/{deployment}.yml"
229 230 231 232 233 234
instance_id=\\
$(curl http://169.254.169.254/latest/meta-data/instance-id 2>/dev/null)
instance_ip=\\
$(curl http://169.254.169.254/latest/meta-data/local-ipv4 2>/dev/null)
instance_type=\\
$(curl http://169.254.169.254/latest/meta-data/instance-type 2>/dev/null)
235
playbook_dir="$base_dir/{playbook_dir}"
236 237 238 239 240 241 242 243 244 245 246 247

if $config_secure; then
    git_cmd="env GIT_SSH=$git_ssh git"
else
    git_cmd="git"
fi

ANSIBLE_ENABLE_SQS=true
SQS_NAME={queue_name}
SQS_REGION=us-east-1
SQS_MSG_PREFIX="[ $instance_id $instance_ip $environment-$deployment $play ]"
PYTHONUNBUFFERED=1
248 249 250 251 252
HIPCHAT_TOKEN={hipchat_token}
HIPCHAT_ROOM={hipchat_room}
HIPCHAT_MSG_PREFIX="$environment-$deployment-$play: "
HIPCHAT_FROM="ansible-$instance_id"
HIPCHAT_MSG_COLOR=$(echo -e "yellow\\ngreen\\npurple\\ngray" | shuf | head -1)
253 254
# environment for ansible
export ANSIBLE_ENABLE_SQS SQS_NAME SQS_REGION SQS_MSG_PREFIX PYTHONUNBUFFERED
255
export HIPCHAT_TOKEN HIPCHAT_ROOM HIPCHAT_MSG_PREFIX HIPCHAT_FROM HIPCHAT_MSG_COLOR
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278

if [[ ! -x /usr/bin/git || ! -x /usr/bin/pip ]]; then
    echo "Installing pkg dependencies"
    /usr/bin/apt-get update
    /usr/bin/apt-get install -y git python-pip python-apt \\
        git-core build-essential python-dev libxml2-dev \\
        libxslt-dev curl --force-yes
fi


rm -rf $base_dir
mkdir -p $base_dir
cd $base_dir

cat << EOF > $git_ssh
#!/bin/sh
exec /usr/bin/ssh -o StrictHostKeyChecking=no -i "$secure_identity" "\$@"
EOF

chmod 755 $git_ssh

if $config_secure; then
    cat << EOF > $secure_identity
279
{identity_contents}
280 281 282 283
EOF
fi

cat << EOF >> $extra_vars
284
---
285 286 287
# extra vars passed into
# abbey.py including versions
# of all the repositories
288
{extra_vars_yml}
289

290
{git_refs_yml}
291

292 293 294 295
# abbey will always run fake migrations
# this is so that the application can come
# up healthy
fake_migrations: true
296 297

# Use the build number an the dynamic cache key.
Feanil Patel committed
298
EDXAPP_UPDATE_STATIC_FILES_KEY: true
299 300 301
edxapp_dynamic_cache_key: {deployment}-{environment}-{play}-{cache_id}

disable_edx_services: true
302 303 304 305

# abbey should never take instances in
# and out of elbs
elb_pre_post: false
306 307 308 309
EOF

chmod 400 $secure_identity

310 311 312 313
$git_cmd clone $git_repo $git_repo_name
cd $git_repo_name
$git_cmd checkout $configuration_version
cd $base_dir
314 315

if $config_secure; then
316 317 318 319
    $git_cmd clone $git_repo_secure $git_repo_secure_name
    cd $git_repo_secure_name
    $git_cmd checkout $configuration_secure_version
    cd $base_dir
320 321
fi

Fred Smith committed
322
if [[ ! -z $git_repo_private ]]; then
323 324 325 326 327 328 329
    $git_cmd clone $git_repo_private $git_repo_private_name
    cd $git_repo_private_name
    $git_cmd checkout $configuration_private_version
    cd $base_dir
fi


330
cd $base_dir/$git_repo_name
331 332 333 334
sudo pip install -r requirements.txt

cd $playbook_dir

335 336 337 338 339 340 341 342
if [[ -r "$deployment_secure_vars" ]]; then
    extra_args_opts+=" -e@$deployment_secure_vars"
fi

if [[ -r "$environment_deployment_secure_vars" ]]; then
    extra_args_opts+=" -e@$environment_deployment_secure_vars"
fi

John Jarvis committed
343 344
if $secure_vars_file; then
    extra_args_opts+=" -e@$secure_vars_file"
345 346 347 348 349 350
fi

extra_args_opts+=" -e@$extra_vars"

ansible-playbook -vvvv -c local -i "localhost," $play.yml $extra_args_opts
ansible-playbook -vvvv -c local -i "localhost," stop_all_edx_services.yml $extra_args_opts
351 352 353 354

rm -rf $base_dir

    """.format(
355 356
                hipchat_token=args.hipchat_api_token,
                hipchat_room=args.hipchat_room_id,
357 358
                configuration_version=args.configuration_version,
                configuration_secure_version=args.configuration_secure_version,
359
                configuration_secure_repo=args.configuration_secure_repo,
360 361
                configuration_secure_repo_basename=os.path.basename(
                    args.configuration_secure_repo),
362 363
                configuration_private_version=args.configuration_private_version,
                configuration_private_repo=args.configuration_private_repo,
364 365 366
                environment=args.environment,
                deployment=args.deployment,
                play=args.play,
367
                playbook_dir=args.playbook_dir,
368
                config_secure=config_secure,
369
                identity_contents=identity_contents,
370
                queue_name=run_id,
371
                extra_vars_yml=extra_vars_yml,
372
                git_refs_yml=git_refs_yml,
John Jarvis committed
373
                secure_vars_file=secure_vars_file,
374
                cache_id=args.cache_id)
375

376 377
    mapping = BlockDeviceMapping()
    root_vol = BlockDeviceType(size=args.root_vol_size)
378 379
    mapping['/dev/sda1'] = root_vol

380 381 382 383
    ec2_args = {
        'security_group_ids': [security_group_id],
        'subnet_id': subnet_id,
        'key_name': args.keypair,
384
        'image_id': base_ami,
385 386 387
        'instance_type': args.instance_type,
        'instance_profile_name': args.role_name,
        'user_data': user_data,
388
        'block_device_map': mapping,
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
    }

    return ec2_args


def poll_sqs_ansible():
    """
    Prints events to the console and
    blocks until a final STATS ansible
    event is read off of SQS.

    SQS does not guarantee FIFO, for that
    reason there is a buffer that will delay
    messages before they are printed to the
    console.

    Returns length of the ansible run.
    """
    oldest_msg_ts = 0
    buf = []
    task_report = []  # list of tasks for reporting
    last_task = None
411
    completed = 0
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441
    while True:
        messages = []
        while True:
            # get all available messages on the queue
            msgs = sqs_queue.get_messages(attributes='All')
            if not msgs:
                break
            messages.extend(msgs)

        for message in messages:
            recv_ts = float(
                message.attributes['ApproximateFirstReceiveTimestamp']) * .001
            sent_ts = float(message.attributes['SentTimestamp']) * .001
            try:
                msg_info = {
                    'msg': json.loads(message.get_body()),
                    'sent_ts': sent_ts,
                    'recv_ts': recv_ts,
                }
                buf.append(msg_info)
            except ValueError as e:
                print "!!! ERROR !!! unable to parse queue message, " \
                      "expecting valid json: {} : {}".format(
                          message.get_body(), e)
            if not oldest_msg_ts or recv_ts < oldest_msg_ts:
                oldest_msg_ts = recv_ts
            sqs_queue.delete_message(message)

        now = int(time.time())
        if buf:
Feanil Patel committed
442
            try:
443
                if (now - min([msg['recv_ts'] for msg in buf])) > args.msg_delay:
Feanil Patel committed
444 445 446 447 448 449 450 451 452 453 454
                    # sort by TS instead of recv_ts
                    # because the sqs timestamp is not as
                    # accurate
                    buf.sort(key=lambda k: k['msg']['TS'])
                    to_disp = buf.pop(0)
                    if 'START' in to_disp['msg']:
                        print '\n{:0>2.0f}:{:0>5.2f} {} : Starting "{}"'.format(
                            to_disp['msg']['TS'] / 60,
                            to_disp['msg']['TS'] % 60,
                            to_disp['msg']['PREFIX'],
                            to_disp['msg']['START']),
455

Feanil Patel committed
456 457 458 459 460 461 462 463 464 465 466 467
                    elif 'TASK' in to_disp['msg']:
                        print "\n{:0>2.0f}:{:0>5.2f} {} : {}".format(
                            to_disp['msg']['TS'] / 60,
                            to_disp['msg']['TS'] % 60,
                            to_disp['msg']['PREFIX'],
                            to_disp['msg']['TASK']),
                        last_task = to_disp['msg']['TASK']
                    elif 'OK' in to_disp['msg']:
                        if args.verbose:
                            print "\n"
                            for key, value in to_disp['msg']['OK'].iteritems():
                                print "    {:<15}{}".format(key, value)
468
                        else:
469 470 471 472 473 474
                            invocation = to_disp['msg']['OK']['invocation']
                            module = invocation['module_name']
                            # 'set_fact' does not provide a changed value.
                            if module == 'set_fact':
                                changed = "OK"
                            elif to_disp['msg']['OK']['changed']:
Feanil Patel committed
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
                                changed = "*OK*"
                            else:
                                changed = "OK"
                            print " {}".format(changed),
                        task_report.append({
                            'TASK': last_task,
                            'INVOCATION': to_disp['msg']['OK']['invocation'],
                            'DELTA': to_disp['msg']['delta'],
                        })
                    elif 'FAILURE' in to_disp['msg']:
                        print " !!!! FAILURE !!!!",
                        for key, value in to_disp['msg']['FAILURE'].iteritems():
                            print "    {:<15}{}".format(key, value)
                        raise Exception("Failed Ansible run")
                    elif 'STATS' in to_disp['msg']:
                        print "\n{:0>2.0f}:{:0>5.2f} {} : COMPLETE".format(
                            to_disp['msg']['TS'] / 60,
                            to_disp['msg']['TS'] % 60,
                            to_disp['msg']['PREFIX'])
494

Feanil Patel committed
495 496 497 498 499 500 501 502 503
                        # Since 3 ansible plays get run.
                        # We see the COMPLETE message 3 times
                        # wait till the last one to end listening
                        # for new messages.
                        completed += 1
                        if completed >= NUM_PLAYBOOKS:
                            return (to_disp['msg']['TS'], task_report)
            except KeyError:
                print "Failed to print status from message: {}".format(to_disp)
504 505 506 507 508 509 510 511 512 513 514 515 516

        if not messages:
            # wait 1 second between sqs polls
            time.sleep(1)


def create_ami(instance_id, name, description):

    params = {'instance_id': instance_id,
              'name': name,
              'description': description,
              'no_reboot': True}

Feanil Patel committed
517
    AWS_API_WAIT_TIME = 1
518
    image_id = ec2.create_image(**params)
Feanil Patel committed
519
    print("Checking if image is ready.")
520 521 522 523
    for _ in xrange(AMI_TIMEOUT):
        try:
            img = ec2.get_image(image_id)
            if img.state == 'available':
Feanil Patel committed
524
                print("Tagging image.")
525
                img.add_tag("environment", args.environment)
Feanil Patel committed
526
                time.sleep(AWS_API_WAIT_TIME)
527
                img.add_tag("deployment", args.deployment)
Feanil Patel committed
528
                time.sleep(AWS_API_WAIT_TIME)
529
                img.add_tag("play", args.play)
Feanil Patel committed
530
                time.sleep(AWS_API_WAIT_TIME)
531
                img.add_tag("configuration_ref", args.configuration_version)
Feanil Patel committed
532
                time.sleep(AWS_API_WAIT_TIME)
533
                img.add_tag("configuration_secure_ref", args.configuration_secure_version)
Feanil Patel committed
534
                time.sleep(AWS_API_WAIT_TIME)
535
                img.add_tag("configuration_secure_repo", args.configuration_secure_repo)
Feanil Patel committed
536
                time.sleep(AWS_API_WAIT_TIME)
537
                img.add_tag("cache_id", args.cache_id)
Feanil Patel committed
538
                time.sleep(AWS_API_WAIT_TIME)
539
                for repo, ref in git_refs.items():
Feanil Patel committed
540
                    key = "refs:{}".format(repo)
Feanil Patel committed
541
                    img.add_tag(key, ref)
Feanil Patel committed
542
                    time.sleep(AWS_API_WAIT_TIME)
Feanil Patel committed
543
                break
544 545
            else:
                time.sleep(1)
546
        except EC2ResponseError as e:
547 548 549 550 551 552 553 554 555
            if e.error_code == 'InvalidAMIID.NotFound':
                time.sleep(1)
            else:
                raise Exception("Unexpected error code: {}".format(
                    e.error_code))
            time.sleep(1)
    else:
        raise Exception("Timeout waiting for AMI to finish")

556
    return image_id
John Jarvis committed
557

John Jarvis committed
558

559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
def launch_and_configure(ec2_args):
    """
    Creates an sqs queue, launches an ec2 instance,
    configures it and creates an AMI. Polls
    SQS for updates
    """

    print "{:<40}".format(
        "Creating SQS queue and launching instance for {}:".format(run_id))
    print
    for k, v in ec2_args.iteritems():
        if k != 'user_data':
            print "    {:<25}{}".format(k, v)
    print

574 575
    global sqs_queue
    global instance_id
576 577 578 579 580
    sqs_queue = sqs.create_queue(run_id)
    sqs_queue.set_message_class(RawMessage)
    res = ec2.run_instances(**ec2_args)
    inst = res.instances[0]
    instance_id = inst.id
581

e0d committed
582 583
    print "{:<40}".format(
        "Waiting for instance {} to reach running status:".format(instance_id)),
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
    status_start = time.time()
    for _ in xrange(EC2_RUN_TIMEOUT):
        res = ec2.get_all_instances(instance_ids=[instance_id])
        if res[0].instances[0].state == 'running':
            status_delta = time.time() - status_start
            run_summary.append(('EC2 Launch', status_delta))
            print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(
                status_delta / 60,
                status_delta % 60)
            break
        else:
            time.sleep(1)
    else:
        raise Exception("Timeout waiting for running status: {} ".format(
            instance_id))

    print "{:<40}".format("Waiting for system status:"),
    system_start = time.time()
    for _ in xrange(EC2_STATUS_TIMEOUT):
        status = ec2.get_all_instance_status(inst.id)
        if status[0].system_status.status == u'ok':
            system_delta = time.time() - system_start
            run_summary.append(('EC2 Status Checks', system_delta))
            print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(
                system_delta / 60,
                system_delta % 60)
            break
        else:
            time.sleep(1)
    else:
        raise Exception("Timeout waiting for status checks: {} ".format(
            instance_id))

    print
    print "{:<40}".format(
        "Waiting for user-data, polling sqs for Ansible events:")

    (ansible_delta, task_report) = poll_sqs_ansible()
    run_summary.append(('Ansible run', ansible_delta))
    print
    print "{} longest Ansible tasks (seconds):".format(NUM_TASKS)
    for task in sorted(
            task_report, reverse=True,
            key=lambda k: k['DELTA'])[:NUM_TASKS]:
        print "{:0>3.0f} {}".format(task['DELTA'], task['TASK'])
        print "  - {}".format(task['INVOCATION'])
    print

    print "{:<40}".format("Creating AMI:"),
    ami_start = time.time()
    ami = create_ami(instance_id, run_id, run_id)
    ami_delta = time.time() - ami_start
    print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(
        ami_delta / 60,
        ami_delta % 60)
    run_summary.append(('AMI Build', ami_delta))
    total_time = time.time() - start_time
    all_stages = sum(run[1] for run in run_summary)
    if total_time - all_stages > 0:
        run_summary.append(('Other', total_time - all_stages))
    run_summary.append(('Total', total_time))

    return run_summary, ami
647

John Jarvis committed
648

e0d committed
649
def send_hipchat_message(message):
650
    print(message)
e0d committed
651 652 653 654 655
    #If hipchat is configured send the details to the specified room
    if args.hipchat_api_token and args.hipchat_room_id:
        import hipchat
        try:
            hipchat = hipchat.HipChat(token=args.hipchat_api_token)
John Jarvis committed
656 657
            hipchat.message_room(args.hipchat_room_id, 'AbbeyNormal',
                                 message)
e0d committed
658 659
        except Exception as e:
            print("Hipchat messaging resulted in an error: %s." % e)
e0d committed
660

661 662 663 664 665 666 667 668 669 670 671
if __name__ == '__main__':

    args = parse_args()

    run_summary = []

    start_time = time.time()

    if args.vars:
        with open(args.vars) as f:
            extra_vars_yml = f.read()
672
            extra_vars = yaml.load(extra_vars_yml)
673
    else:
674
        extra_vars_yml = ""
675
        extra_vars = {}
676

677 678 679 680 681
    if args.refs:
        with open(args.refs) as f:
            git_refs_yml = f.read()
            git_refs = yaml.load(git_refs_yml)
    else:
682
        git_refs_yml = ""
683 684
        git_refs = {}

685
    if args.secure_vars_file:
686 687
        # explicit path to a single
        # secure var file
688
        secure_vars_file = args.secure_vars_file
689
    else:
690
        secure_vars_file = 'false'
691

692 693 694 695 696 697 698 699
    if args.stack_name:
        stack_name = args.stack_name
    else:
        stack_name = "{}-{}".format(args.environment, args.deployment)

    try:
        ec2 = boto.ec2.connect_to_region(args.region)
    except NoAuthHandlerFound:
700 701 702 703 704 705 706
        print 'Unable to connect to ec2 in region :{}'.format(args.region)
        sys.exit(1)

    try:
        sqs = boto.sqs.connect_to_region(args.region)
    except NoAuthHandlerFound:
        print 'Unable to connect to sqs in region :{}'.format(args.region)
707 708
        sys.exit(1)

709 710 711 712 713
    if args.blessed:
        base_ami = get_blessed_ami()
    else:
        base_ami = args.base_ami

714
    error_in_abbey_run = False
715 716 717 718
    try:
        sqs_queue = None
        instance_id = None

Feanil Patel committed
719
        run_id = "{}-abbey-{}-{}-{}".format(
720
            int(time.time() * 100), args.environment, args.deployment, args.play)
721 722 723

        ec2_args = create_instance_args()

724
        if args.noop:
John Jarvis committed
725 726
            print "Would have created sqs_queue with id: {}\nec2_args:".format(
                run_id)
727 728
            pprint(ec2_args)
            ami = "ami-00000"
729
        else:
730 731 732 733 734 735 736 737
            run_summary, ami = launch_and_configure(ec2_args)
            print
            print "Summary:\n"

            for run in run_summary:
                print "{:<30} {:0>2.0f}:{:0>5.2f}".format(
                    run[0], run[1] / 60, run[1] % 60)
            print "AMI: {}".format(ami)
e0d committed
738

John Jarvis committed
739 740 741 742 743
            message = 'Finished baking AMI {image_id} for {environment} {deployment} {play}.'.format(
                image_id=ami,
                environment=args.environment,
                deployment=args.deployment,
                play=args.play)
e0d committed
744 745

            send_hipchat_message(message)
e0d committed
746
    except Exception as e:
e0d committed
747
        message = 'An error occurred building AMI for {environment} ' \
e0d committed
748
            '{deployment} {play}.  The Exception was {exception}'.format(
e0d committed
749 750
                environment=args.environment,
                deployment=args.deployment,
e0d committed
751 752
                play=args.play,
                exception=repr(e))
e0d committed
753
        send_hipchat_message(message)
754
        error_in_abbey_run = True
755 756
    finally:
        print
757
        if not args.no_cleanup and not args.noop:
758 759 760 761 762 763
            if sqs_queue:
                print "Cleaning up - Removing SQS queue - {}".format(run_id)
                sqs.delete_queue(sqs_queue)
            if instance_id:
                print "Cleaning up - Terminating instance ID - {}".format(
                    instance_id)
764 765 766
            # Check to make sure we have an instance id.
            if instance_id:
                ec2.terminate_instances(instance_ids=[instance_id])
767 768
        if error_in_abbey_run:
            exit(1)