Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
configuration
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
OpenEdx
configuration
Commits
d8f755c6
Commit
d8f755c6
authored
Jul 30, 2014
by
Feanil Patel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add a NAT monitoring role to jenkins_admin.
parent
5a9076ee
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
267 additions
and
2 deletions
+267
-2
playbooks/roles/aws/defaults/main.yml
+1
-1
playbooks/roles/jenkins_admin/defaults/main.yml
+15
-0
playbooks/roles/jenkins_admin/files/nat-monitor.sh
+117
-0
playbooks/roles/jenkins_admin/handlers/main.yml
+21
-0
playbooks/roles/jenkins_admin/meta/main.yml
+11
-0
playbooks/roles/jenkins_admin/tasks/main.yml
+24
-0
playbooks/roles/jenkins_admin/tasks/nat_monitor.yml
+43
-0
playbooks/roles/jenkins_admin/templates/edx/var/jenkins/aws_config.j2
+6
-0
playbooks/roles/jenkins_admin/templates/nat-monitor.conf.j2
+19
-0
playbooks/roles/jenkins_master/defaults/main.yml
+1
-1
playbooks/roles/supervisor/tasks/main.yml
+9
-0
No files found.
playbooks/roles/aws/defaults/main.yml
View file @
d8f755c6
...
...
@@ -56,7 +56,7 @@ aws_debian_pkgs:
aws_pip_pkgs
:
-
https://s3.amazonaws.com/cloudformation-examples/aws-cfn-bootstrap-latest.tar.gz
-
awscli
-
boto==2.
29.1
-
boto==2.
32.0
aws_redhat_pkgs
:
[]
aws_s3cmd_version
:
s3cmd-1.5.0-beta1
...
...
playbooks/roles/jenkins_admin/defaults/main.yml
View file @
d8f755c6
...
...
@@ -143,3 +143,18 @@ jenkins_admin_plugins:
jenkins_admin_jobs
:
-
'
backup-jenkins'
# Supervisor related settings
jenkins_supervisor_user
:
"
{{
jenkins_user
}}"
jenkins_supervisor_app_dir
:
"
{{
jenkins_home
}}/supervisor"
jenkins_supervisor_cfg_dir
:
"
{{
jenkins_supervisor_app_dir
}}/conf.d"
jenkins_supervisor_available_dir
:
"
{{
jenkins_supervisor_app_dir
}}/available.d"
jenkins_supervisor_data_dir
:
"
{{
jenkins_home
}}/supervisor/data"
jenkins_supervisor_cfg
:
"
{{
jenkins_supervisor_app_dir
}}/supervisord.conf"
jenkins_supervisor_log_dir
:
"
{{
COMMON_LOG_DIR
}}/supervisor/jenkins"
jenkins_supervisor_venv_dir
:
"
{{
jenkins_home
}}/venvs/supervisor"
jenkins_supervisor_venv_bin
:
"
{{
jenkins_supervisor_venv_dir
}}/bin"
jenkins_supervisor_ctl
:
"
{{
jenkins_supervisor_venv_bin
}}/supervisorctl"
jenkins_supervisor_service_user
:
"
{{
jenkins_user
}}"
jenkins_admin_scripts_dir
:
"
{{
jenkins_home
}}/scripts"
playbooks/roles/jenkins_admin/files/nat-monitor.sh
0 → 100644
View file @
d8f755c6
#!/bin/bash -x
# This script will monitor two NATs and route to a backup nat
# if the primary fails.
set
-e
# NAT instance variables
PRIMARY_NAT_ID
=
`
aws ec2 describe-route-tables
--filters
Name
=
tag:aws:cloudformation:stack-name,Values
=
$VPC_NAME
Name
=
tag:aws:cloudformation:logical-id,Values
=
PrivateRouteTable | jq
'.RouteTables[].Routes[].InstanceId|strings'
-r
`
BACKUP_NAT_ID
=
`
aws ec2 describe-instances
--filters
Name
=
tag:aws:cloudformation:stack-name,Values
=
$VPC_NAME
Name
=
tag:aws:cloudformation:logical-id,Values
=
NATDevice,BackupNATDevice | jq
'.Reservations[].Instances[].InstanceId'
-r
|
grep
-v
$PRIMARY_NAT_ID
`
NAT_RT_ID
=
`
aws ec2 describe-route-tables
--filters
Name
=
tag:aws:cloudformation:stack-name,Values
=
$VPC_NAME
Name
=
tag:aws:cloudformation:logical-id,Values
=
PrivateRouteTable | jq
'.RouteTables[].RouteTableId'
-r
`
# Health Check variables
Num_Pings
=
3
Ping_Timeout
=
1
Wait_Between_Pings
=
2
Wait_for_Instance_Stop
=
60
Wait_for_Instance_Start
=
300
send_message
()
{
message_file
=
/var/tmp/message-
$$
.json
message_string
=
$1
if
[
-z
$message_string
]
;
then
message_string
=
"Unknown error for
$VPC_NAME
NAT monitor"
fi
message_body
=
$2
cat
<<
EOF
>
$message_file
{"Subject":{"Data":"
$message_string
"},"Body":{"Text":{"Data": "
$message_body
"}}}
EOF
echo
`
date
`
"--
$message_body
"
BASE_PROFILE
=
$AWS_DEFAULT_PROFILE
export
AWS_DEFAULT_PROFILE
=
$AWS_MAIL_PROFILE
aws ses send-email
--from
$NAT_MONITOR_FROM_EMAIL
--to
$NAT_MONITOR_TO_EMAIL
--message
file://
$message_file
export
AWS_DEFAULT_PROFILE
=
$BASE_PROFILE
}
trap
send_message ERR SIGHUP SIGINT SIGTERM
# Determine the NAT instance private IP so we can ping the other NAT instance, take over
# its route, and reboot it. Requires EC2 DescribeInstances, ReplaceRoute, and Start/RebootInstances
# permissions. The following example EC2 Roles policy will authorize these commands:
# {
# "Statement": [
# {
# "Action": [
# "ec2:DescribeInstances",
# "ec2:CreateRoute",
# "ec2:ReplaceRoute",
# "ec2:StartInstances",
# "ec2:StopInstances"
# ],
# "Effect": "Allow",
# "Resource": "*"
# }
# ]
# }
# Get the primary NAT instance's IP
PRIMARY_NAT_IP
=
`
aws ec2 describe-instances
--instance-ids
$PRIMARY_NAT_ID
| jq
-r
".Reservations[].Instances[].PrivateIpAddress"
`
BACKUP_NAT_IP
=
`
aws ec2 describe-instances
--instance-ids
$BACKUP_NAT_ID
| jq
-r
".Reservations[].Instances[].PrivateIpAddress"
`
echo
`
date
`
"-- Running NAT monitor"
while
[
.
]
;
do
# Check the health of both instances.
primary_pingresult
=
`
ping
-c
$Num_Pings
-W
$Ping_Timeout
$PRIMARY_NAT_IP
|
grep time
=
| wc
-l
`
if
[
"
$primary_pingresult
"
==
"0"
]
;
then
backup_pingresult
=
`
ping
-c
$Num_Pings
-W
$Ping_Timeout
$BACKUP_NAT_IP
|
grep time
=
| wc
-l
`
if
[
"
$backup_pingresult
"
==
"0"
]
;
then
send_message
"Error monitoring NATs for
$VPC_NAME
."
"ERROR -- Both NATs(
$PRIMARY_NAT_ID
and
$BACKUP_NAT_ID
) were unreachable."
else
#Backup nat is healthy.
# Set HEALTHY variables to unhealthy (0)
ROUTE_HEALTHY
=
0
NAT_HEALTHY
=
0
STOPPING_NAT
=
0
while
[
"
$NAT_HEALTHY
"
==
"0"
]
;
do
# Primary NAT instance is unhealthy, loop while we try to fix it
if
[
"
$ROUTE_HEALTHY
"
==
"0"
]
;
then
aws ec2 replace-route
--route-table-id
$NAT_RT_ID
--destination-cidr-block
0.0.0.0/0
--instance-id
$BACKUP_NAT_ID
send_message
" Primary
$VPC_NAME
NAT failed"
"-- NAT(
$PRIMARY_NAT_ID
) heartbeat failed, using
$BACKUP_NAT_ID
for
$NAT_RT_ID
default route"
ROUTE_HEALTHY
=
1
fi
# Check NAT state to see if we should stop it or start it again
NAT_STATE
=
`
aws ec2 describe-instances
--instance-ids
$PRIMARY_NAT_ID
| jq
-r
".Reservations[].Instances[].State.Name"
`
if
[
"
$NAT_STATE
"
==
"stopped"
]
;
then
echo
`
date
`
"-- NAT(
$PRIMARY_NAT_ID
) instance stopped, starting it back up"
aws ec2 start-instances
--instance-ids
$PRIMARY_NAT_ID
sleep
$Wait_for_Instance_Start
else
if
[
"
$STOPPING_NAT
"
==
"0"
]
;
then
echo
`
date
`
"-- NAT(
$PRIMARY_NAT_ID
) instance
$NAT_STATE
, attempting to stop for reboot"
aws ec2 stop-instances
--instance-ids
$PRIMARY_NAT_ID
STOPPING_NAT
=
1
fi
sleep
$Wait_for_Instance_Stop
fi
unhealthy_nat_pingresult
=
`
ping
-c
$Num_Pings
-W
$Ping_Timeout
$PRIMARY_NAT_IP
|
grep time
=
| wc
-l
`
if
[
"
$unhealthy_nat_pingresult
"
==
"
$Num_Pings
"
]
;
then
NAT_HEALTHY
=
1
fi
done
# Backup nat was healthy so we switched to it. It is now the primary.
if
[
"
$ROUTE_HEALTHY
"
==
"1"
]
;
then
TEMP_NAT_ID
=
$PRIMARY_NAT_ID
TEMP_NAT_IP
=
$PRIMARY_NAT_IP
PRIMARY_NAT_ID
=
$BACKUP_NAT_ID
PRIMARY_NAT_IP
=
$BACKUP_NAT_IP
BACKUP_NAT_ID
=
$TEMP_NAT_ID
BACKUP_NAT_IP
=
$TEMP_NAT_IP
fi
fi
else
echo
`
date
`
"-- PRIMARY NAT (
$PRIMARY_NAT_ID
$PRIMARY_NAT_IP
) reports healthy to pings"
sleep
$Wait_Between_Pings
fi
done
playbooks/roles/jenkins_admin/handlers/main.yml
0 → 100644
View file @
d8f755c6
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Handlers for role jenkins_admin
#
# Overview:
#
# Have to use shell here because supervisorctl doesn't support
# process groups.
-
name
:
restart nat monitor
shell
:
"
{{
jenkins_supervisor_ctl
}}
-c
{{
jenkins_supervisor_cfg
}}
restart
nat_monitor:*"
when
:
not disable_edx_services
playbooks/roles/jenkins_admin/meta/main.yml
View file @
d8f755c6
...
...
@@ -20,4 +20,15 @@
# }
dependencies
:
-
common
-
aws
-
jenkins_master
-
role
:
supervisor
supervisor_app_dir
:
"
{{
jenkins_supervisor_app_dir
}}"
supervisor_data_dir
:
"
{{
jenkins_supervisor_data_dir
}}"
supervisor_log_dir
:
"
{{
jenkins_supervisor_log_dir
}}"
supervisor_venv_dir
:
"
{{
jenkins_supervisor_venv_dir
}}"
supervisor_service_user
:
"
{{
jenkins_supervisor_user
}}"
supervisor_available_dir
:
"
{{
jenkins_supervisor_available_dir
}}"
supervisor_cfg_dir
:
"
{{
jenkins_supervisor_cfg_dir
}}"
supervisor_service
:
"
supervisor.jenkins"
supervisor_http_bind_port
:
'
9003'
playbooks/roles/jenkins_admin/tasks/main.yml
View file @
d8f755c6
...
...
@@ -37,6 +37,10 @@
apt_repository
:
repo="{{ item }}" state=present update_cache=yes
with_items
:
jenkins_admin_debian_repos
-
name
:
create the scripts directory
file
:
path={{ jenkins_admin_scripts_dir }} state=directory
owner={{ jenkins_user }} group={{ jenkins_group }} mode=755
# We first download the plugins to a temp directory and include
# the version in the file name. That way, if we increment
# the version, the plugin will be updated in Jenkins
...
...
@@ -71,6 +75,24 @@
owner="{{ jenkins_user }}"
group="{{ jenkins_group }}"
mode="0600"
tags
:
-
aws-config
-
name
:
create the .aws directory
file
:
path={{ jenkins_home }}/.aws state=directory
owner={{ jenkins_user }} group={{ jenkins_group }} mode=700
tags
:
-
aws-config
-
name
:
configure the awscli profiles for jenkins
template
:
>
src="./{{ jenkins_home }}/aws_config.j2"
dest="{{ jenkins_home }}/.aws/config"
owner="{{ jenkins_user }}"
group="{{ jenkins_group }}"
mode="0600"
tags
:
-
aws-config
-
name
:
create the ssh directory
file
:
>
...
...
@@ -134,3 +156,5 @@
version={{ item.version }}
user_install=no
with_items
:
jenkins_admin_gem_pkgs
-
include
:
nat_monitor.yml
playbooks/roles/jenkins_admin/tasks/nat_monitor.yml
0 → 100644
View file @
d8f755c6
---
-
fail
:
msg="NAT_MONITORS is not defined."
when
:
NAT_MONITORS is not defined
-
name
:
upload the monitor script
copy
:
dest="{{ jenkins_admin_scripts_dir }}/nat-monitor.sh"
src="nat-monitor.sh"
owner="{{ jenkins_user }}"
group="{{ jenkins_group }}"
mode="755"
sudo_user
:
"
{{
jenkins_user
}}"
-
name
:
create a supervisor config
template
:
src="nat-monitor.conf.j2" dest="{{ jenkins_supervisor_available_dir }}/nat-monitor.conf"
owner="{{ jenkins_user }}"
group="{{ jenkins_group }}"
sudo_user
:
"
{{
jenkins_user
}}"
notify
:
restart nat monitor
-
name
:
enable the supervisor config
file
:
src="{{ jenkins_supervisor_available_dir }}/nat-monitor.conf"
dest="{{ jenkins_supervisor_cfg_dir }}/nat-monitor.conf"
state=link
force=yes
mode=0644
sudo_user
:
"
{{
jenkins_user
}}"
when
:
not disable_edx_services
notify
:
restart nat monitor
-
name
:
update supervisor configuration
shell
:
"
{{
jenkins_supervisor_ctl
}}
-c
{{
jenkins_supervisor_cfg
}}
update"
register
:
supervisor_update
changed_when
:
supervisor_update.stdout is defined and supervisor_update.stdout != ""
when
:
not disable_edx_services
# Have to use shell here because supervisorctl doesn't support
# process groups.
-
name
:
ensure nat monitor is started
shell
:
"
{{
jenkins_supervisor_ctl
}}
-c
{{
jenkins_supervisor_cfg
}}
start
nat_monitor:*"
when
:
not disable_edx_services
playbooks/roles/jenkins_admin/templates/edx/var/jenkins/aws_config.j2
0 → 100644
View file @
d8f755c6
{% for deployment, creds in JENKINS_ADMIN_AWS_CREDENTIALS.iteritems() %}
[profile {{deployment}}]
aws_access_key_id = {{ creds.access_id }}
aws_secret_access_key = {{ creds.secret_key }}
{% endfor %}
playbooks/roles/jenkins_admin/templates/nat-monitor.conf.j2
0 → 100644
View file @
d8f755c6
{% for m in NAT_MONITORS %}
[program:nat_monitor_{{ m.vpc_name|replace('-','_') }}]
environment=VPC_NAME="{{ m.vpc_name }}",AWS_DEFAULT_REGION="{{ m.region }}",AWS_DEFAULT_PROFILE="{{ m.deployment }}",AWS_MAIL_PROFILE="{{ JENKINS_ADMIN_MAIL_PROFILE }}",NAT_MONITOR_FROM_EMAIL="{{ JENKINS_ADMIN_FROM_EMAIL }}",NAT_MONITOR_TO_EMAIL="{{ JENKINS_ADMIN_TO_EMAIL }}"
user={{ jenkins_supervisor_service_user }}
directory={{ jenkins_admin_scripts_dir }}
stdout_logfile={{ jenkins_supervisor_log_dir }}/%(program_name)-stdout.log
stderr_logfile={{ jenkins_supervisor_log_dir }}/%(program_name)-stderr.log
command={{ jenkins_admin_scripts_dir }}/nat-monitor.sh
killasgroup=true
stopasgroup=true
{% endfor %}
[group:nat_monitor]
programs={%- for m in NAT_MONITORS %}nat_monitor_{{ m.vpc_name|replace('-','_') }}{%- if not loop.last %},{%- endif %}{%- endfor %}
playbooks/roles/jenkins_master/defaults/main.yml
View file @
d8f755c6
...
...
@@ -4,7 +4,7 @@ jenkins_group: "edx"
jenkins_server_name
:
"
jenkins.testeng.edx.org"
jenkins_port
:
8080
jenkins_version
:
1.57
1
jenkins_version
:
1.57
4
jenkins_deb_url
:
"
http://pkg.jenkins-ci.org/debian/binary/jenkins_{{
jenkins_version
}}_all.deb"
jenkins_deb
:
"
jenkins_{{
jenkins_version
}}_all.deb"
...
...
playbooks/roles/supervisor/tasks/main.yml
View file @
d8f755c6
...
...
@@ -72,6 +72,15 @@
with_items
:
-
"
{{
supervisor_app_dir
}}"
-
"
{{
supervisor_venv_dir
}}"
-
name
:
create service user accessible dirs
file
:
>
name={{ item }}
state=directory
owner={{ supervisor_user }}
group={{ supervisor_service_user }}
mode="775"
with_items
:
-
"
{{
supervisor_cfg_dir
}}"
-
"
{{
supervisor_available_dir
}}"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment