Commit 86293769 by Feanil Patel Committed by GitHub

Merge pull request #3970 from edx/feanil/update_jenkins_admin

Update jenkins admin role.
parents d925b8ca c0ccfecf
# Configure an instance with the admin jenkins.
- name: install python2
hosts: all
become: True
gather_facts: False
roles:
- python
- name: Configure instance(s)
hosts: all
become: True
......
......@@ -30,16 +30,11 @@ JENKINS_ADMIN_AWS_CREDENTIALS: !!null
jenkins_admin_role_name: jenkins_admin
jenkins_admin_version: "1.630"
# repo for nodejs
jenkins_chrislea_ppa: "ppa:chris-lea/node.js"
jenkins_admin_version: "1.658"
#
# OS packages
#
jenkins_admin_debian_repos:
- "deb http://cosmos.cites.illinois.edu/pub/ubuntu/ precise-backports main universe"
jenkins_admin_debian_pkgs:
# These are copied from the edxapp
# role so that we can create virtualenvs
......@@ -56,7 +51,6 @@ jenkins_admin_debian_pkgs:
# libopenblas-base, it will cause
# problems for numpy
- gfortran
- libatlas3gf-base
- liblapack-dev
- g++
- libxml2-dev
......@@ -78,7 +72,6 @@ jenkins_admin_debian_pkgs:
- libpng12-dev
# for status.edx.org
- ruby
- ruby1.9.1
# for check-migrations
- mysql-client
# for aws cli scripting
......@@ -93,57 +86,10 @@ jenkins_admin_gem_pkgs:
jenkins_admin_redhat_pkgs: []
jenkins_admin_plugins:
- { name: "greenballs", version: "1.14" }
- { name: "rebuild", version: "1.21" }
- { name: "build-user-vars-plugin", version: "1.1" }
- { name: "matrix-auth", version: "1.2" }
- { name: "matrix-project", version: "1.3" }
- { name: "mailer", version: "1.9" }
- { name: "build-user-vars-plugin", version: "1.3" }
- { name: "credentials", version: "1.15" }
- { name: "ssh-credentials", version: "1.7.1" }
- { name: "ssh-agent", version: "1.4.1" }
- { name: "token-macro", version: "1.10" }
- { name: "parameterized-trigger", version: "2.25" }
- { name: "multiple-scms", version: "0.3" }
- { name: "maven-plugin", version: "2.5" }
- { name: "copy-project-link", version: "1.2" }
- { name: "scriptler", version: "2.6.1" }
- { name: "rebuild", version: "1.21" }
- { name: "ssh-slaves", version: "1.6" }
- { name: "translation", version: "1.11" }
- { name: "dynamicparameter", version: "0.2.0" }
- { name: "hipchat", version: "0.1.6" }
- { name: "throttle-concurrents", version: "1.8.3" }
- { name: "mask-passwords", version: "2.7.2" }
- { name: "jquery", version: "1.7.2-1" }
- { name: "dashboard-view", version: "2.9.4" }
- { name: "build-pipeline-plugin", version: "1.4.3" }
- { name: "s3", version: "0.6" }
- { name: "tmpcleaner", version: "1.1" }
- { name: "jobConfigHistory", version: "2.8" }
- { name: "build-timeout", version: "1.14" }
- { name: "next-build-number", version: "1.1" }
- { name: "nested-view", version: "1.14" }
- { name: "timestamper", version: "1.5.14" }
- { name: "github-api", version: "1.55" }
- { name: "postbuild-task", version: "1.8" }
- { name: "notification", version: "1.5" }
- { name: "copy-to-slave", version: "1.4.3" }
- { name: "github", version: "1.9.1" }
- { name: "copyartifact", version: "1.31" }
- { name: "shiningpanda", version: "0.21" }
- { name: "htmlpublisher", version: "1.3" }
- { name: "github-oauth", version: "0.20" }
- { name: "build-name-setter", version: "1.3" }
- { name: "jenkins-flowdock-plugin", version: "1.1.3" }
- { name: "simple-parameterized-builds-report", version: "1.3" }
- { name: "git-client", version: "1.19.0"}
- { name: "git", version: "2.4.0"}
jenkins_admin_plugins: [] # Plugins installed manually, not tracked here.
jenkins_admin_jobs:
- 'backup-jenkins'
# See templates directory for potential basic jobs you could add to your jenkins.
jenkins_admin_jobs: []
# Supervisor related settings
jenkins_supervisor_user: "{{ jenkins_user }}"
......
#!/bin/bash -x
# This script will monitor two NATs and route to a backup nat
# if the primary fails.
set -e
# Health Check variables
Num_Pings=3
Ping_Timeout=2
Wait_Between_Pings=2
Wait_for_Instance_Stop=60
Wait_for_Instance_Start=300
ID_UPDATE_INTERVAL=150
send_message() {
message_file=/var/tmp/message-$$.json
message_string=$1
if [ -z $message_string ]; then
message_string="Unknown error for $VPC_NAME NAT monitor"
fi
message_body=$2
cat << EOF > $message_file
{"Subject":{"Data":"$message_string"},"Body":{"Text":{"Data": "$message_body"}}}
EOF
echo `date` "-- $message_body"
BASE_PROFILE=$AWS_DEFAULT_PROFILE
export AWS_DEFAULT_PROFILE=$AWS_MAIL_PROFILE
aws ses send-email --from $NAT_MONITOR_FROM_EMAIL --to $NAT_MONITOR_TO_EMAIL --message file://$message_file
export AWS_DEFAULT_PROFILE=$BASE_PROFILE
}
trap send_message ERR SIGHUP SIGINT SIGTERM
# Determine the NAT instance private IP so we can ping the other NAT instance, take over
# its route, and reboot it. Requires EC2 DescribeInstances, ReplaceRoute, and Start/RebootInstances
# permissions. The following example EC2 Roles policy will authorize these commands:
# {
# "Statement": [
# {
# "Action": [
# "ec2:DescribeInstances",
# "ec2:CreateRoute",
# "ec2:ReplaceRoute",
# "ec2:StartInstances",
# "ec2:StopInstances"
# ],
# "Effect": "Allow",
# "Resource": "*"
# }
# ]
# }
COUNTER=0
echo `date` "-- Running NAT monitor"
while [ . ]; do
# Re check thi IDs and IPs periodically
# This is useful in case the primary nat changes by some
# other means than this script.
if [ $COUNTER -eq 0 ]; then
# NAT instance variables
PRIMARY_NAT_ID=`aws ec2 describe-route-tables --filters Name=tag:aws:cloudformation:stack-name,Values=$VPC_NAME Name=tag:aws:cloudformation:logical-id,Values=PrivateRouteTable | jq '.RouteTables[].Routes[].InstanceId|strings' -r`
BACKUP_NAT_ID=`aws ec2 describe-instances --filters Name=tag:aws:cloudformation:stack-name,Values=$VPC_NAME Name=tag:aws:cloudformation:logical-id,Values=NATDevice,BackupNATDevice | jq '.Reservations[].Instances[].InstanceId' -r | grep -v $PRIMARY_NAT_ID`
NAT_RT_ID=`aws ec2 describe-route-tables --filters Name=tag:aws:cloudformation:stack-name,Values=$VPC_NAME Name=tag:aws:cloudformation:logical-id,Values=PrivateRouteTable | jq '.RouteTables[].RouteTableId' -r`
# Get the primary NAT instance's IP
PRIMARY_NAT_IP=`aws ec2 describe-instances --instance-ids $PRIMARY_NAT_ID | jq -r ".Reservations[].Instances[].PrivateIpAddress"`
BACKUP_NAT_IP=`aws ec2 describe-instances --instance-ids $BACKUP_NAT_ID | jq -r ".Reservations[].Instances[].PrivateIpAddress"`
let "COUNTER += 1"
let "COUNTER %= $ID_UPDATE_INTERVAL"
fi
# Check the health of both instances.
primary_pingresult=`ping -c $Num_Pings -W $Ping_Timeout $PRIMARY_NAT_IP| grep time= | wc -l`
if [ "$primary_pingresult" == "0" ]; then
backup_pingresult=`ping -c $Num_Pings -W $Ping_Timeout $BACKUP_NAT_IP| grep time= | wc -l`
if [ "$backup_pingresult" == "0" ]; then
send_message "Error monitoring NATs for $VPC_NAME." "ERROR -- Both NATs($PRIMARY_NAT_ID and $BACKUP_NAT_ID) were unreachable."
else #Backup nat is healthy.
send_message "Primary $VPC_NAME NAT failed ping" "-- NAT($PRIMARY_NAT_ID) heartbeat failed, consider using $BACKUP_NAT_ID for $NAT_RT_ID default route
Command for re-routing:
aws ec2 replace-route --route-table-id $NAT_RT_ID --destination-cidr-block 0.0.0.0/0 --instance-id $BACKUP_NAT_ID"
fi
else
echo `date` "-- PRIMARY NAT ($PRIMARY_NAT_ID $PRIMARY_NAT_IP) reports healthy to pings"
sleep $Wait_Between_Pings
fi
done
......@@ -24,6 +24,9 @@ dependencies:
- role: jenkins_master
jenkins_plugins: "{{ jenkins_admin_plugins }}"
jenkins_version: "{{ jenkins_admin_version }}"
jenkins_deb_url: "https://pkg.jenkins.io/debian/binary/jenkins_{{ jenkins_version }}_all.deb"
jenkins_custom_plugins: []
jenkins_bundled_plugins: []
- role: supervisor
supervisor_app_dir: "{{ jenkins_supervisor_app_dir }}"
supervisor_data_dir: "{{ jenkins_supervisor_data_dir }}"
......
......@@ -33,13 +33,6 @@
- fail: msg="JENKINS_ADMIN_S3_PROFILE.secret_key is not defined."
when: JENKINS_ADMIN_S3_PROFILE.secret_key is not defined
- name: add admin specific apt repositories
apt_repository:
repo: "{{ item }}"
state: "present"
update_cache: "yes"
with_items: "{{ jenkins_admin_debian_repos }}"
- name: create the scripts directory
file:
path: "{{ jenkins_admin_scripts_dir }}"
......@@ -114,7 +107,7 @@
group: "{{ jenkins_group }}"
mode: 0755
state: directory
with_items: jenkins_admin_jobs
with_items: "{{ jenkins_admin_jobs }}"
- name: create admin job config files
template:
......@@ -123,12 +116,7 @@
owner: "{{ jenkins_user }}"
group: "{{ jenkins_group }}"
mode: 0644
with_items: jenkins_admin_jobs
# adding chris-lea nodejs repo
- name: add ppas for current versions of nodejs
apt_repository:
repo: "{{ jenkins_chrislea_ppa }}"
with_items: "{{ jenkins_admin_jobs }}"
- name: install system packages for edxapp virtualenvs
apt:
......@@ -153,11 +141,10 @@
state: present
version: "{{ item.version }}"
user_install: no
with_items: jenkins_admin_gem_pkgs
with_items: "{{ jenkins_admin_gem_pkgs }}"
- name: get s3 one time url
local_action:
module: "s3"
s3:
bucket: "{{ JENKINS_ADMIN_BACKUP_BUCKET }}"
object: "{{ JENKINS_ADMIN_BACKUP_S3_KEY }}"
mode: "geturl"
......@@ -168,7 +155,7 @@
- name: download s3 backup
get_url:
url: "{{ s3_one_time_url.url }}"
dest: "/tmp/{{ JENKINS_ADMIN_BACKUP_S3_KEY | basename }}"
dest: "/tmp/jenkins_backup.tar.gz"
mode: 0644
owner: "{{ jenkins_user }}"
when: JENKINS_ADMIN_BACKUP_BUCKET is defined and JENKINS_ADMIN_BACKUP_S3_KEY is defined
......@@ -192,8 +179,4 @@
service:
name: "jenkins"
state: "started"
when: JENKINS_ADMIN_BACKUP_BUCKET and JENKINS_BACKUP_S3_KEY
- include: nat_monitor.yml
tags:
- nat-monitor
when: JENKINS_ADMIN_BACKUP_BUCKET is defined and JENKINS_ADMIN_BACKUP_S3_KEY is defined
---
# Nat monitors should be defined as a list of dictionaries
# e.g.
# NAT_MONITORS:
# - vpc_name: 'loadtest-edx'
# region: 'us-east-1'
# deployment: 'edx'
#
# To receive E-mails, ses should be setup with the
# aws account that is defined by the JENKINS_ADMIN_MAIL_PROFILE
# and the from adress should be verified
# JENKINS_ADMIN_MAIL_PROFILE: 'aws_account_name'
# JENKINS_ADMIN_FROM_EMAIL: 'admin@example.com'
# JENKINS_ADMIN_TO_EMAIL: 'alert@example.com'
- fail: msg="NAT_MONITORS is not defined."
when: NAT_MONITORS is not defined
- name: upload the monitor script
copy:
dest="{{ jenkins_admin_scripts_dir }}/nat-monitor.sh"
src="nat-monitor.sh"
owner="{{ jenkins_user }}"
group="{{ jenkins_group }}"
mode="755"
become_user: "{{ jenkins_user }}"
notify: restart nat monitor
- name: create a supervisor config
template:
src="nat-monitor.conf.j2" dest="{{ jenkins_supervisor_available_dir }}/nat-monitor.conf"
owner="{{ jenkins_user }}"
group="{{ jenkins_group }}"
become_user: "{{ jenkins_user }}"
notify: restart nat monitor
- name: enable the supervisor config
file:
src="{{ jenkins_supervisor_available_dir }}/nat-monitor.conf"
dest="{{ jenkins_supervisor_cfg_dir }}/nat-monitor.conf"
state=link
force=yes
mode=0644
become_user: "{{ jenkins_user }}"
when: not disable_edx_services
notify: restart nat monitor
- name: update supervisor configuration
shell: "{{ jenkins_supervisor_ctl }} -c {{ jenkins_supervisor_cfg }} update"
register: supervisor_update
changed_when: supervisor_update.stdout is defined and supervisor_update.stdout != ""
when: not disable_edx_services
# Have to use shell here because supervisorctl doesn't support
# process groups.
- name: ensure nat monitor is started
shell: "{{ jenkins_supervisor_ctl }} -c {{ jenkins_supervisor_cfg }} start nat_monitor:*"
when: not disable_edx_services
{% for m in NAT_MONITORS %}
[program:nat_monitor_{{ m.vpc_name|replace('-','_') }}]
environment=VPC_NAME="{{ m.vpc_name }}",AWS_DEFAULT_REGION="{{ m.region }}",AWS_DEFAULT_PROFILE="{{ m.deployment }}",AWS_MAIL_PROFILE="{{ JENKINS_ADMIN_MAIL_PROFILE }}",NAT_MONITOR_FROM_EMAIL="{{ JENKINS_ADMIN_FROM_EMAIL }}",NAT_MONITOR_TO_EMAIL="{{ JENKINS_ADMIN_TO_EMAIL }}"
user={{ jenkins_supervisor_service_user }}
directory={{ jenkins_admin_scripts_dir }}
stdout_logfile={{ jenkins_supervisor_log_dir }}/%(program_name)s-stdout.log
stderr_logfile={{ jenkins_supervisor_log_dir }}/%(program_name)s-stderr.log
command={{ jenkins_admin_scripts_dir }}/nat-monitor.sh
killasgroup=true
stopasgroup=true
{% endfor %}
[group:nat_monitor]
programs={%- for m in NAT_MONITORS %}nat_monitor_{{ m.vpc_name|replace('-','_') }}{%- if not loop.last %},{%- endif %}{%- endfor %}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment