Commit 83b0744a by Feanil Patel

Add back newlines.

We need them because we are sometimes joining variables with existing lines.
parent fb29a2d4
......@@ -1709,128 +1709,128 @@
"PropagateAtLaunch":true
}
],
"UserData": { "Fn::Base64" : { "Fn::Join" : ["\n", [
"#!/bin/bash -v",
"mkdir -p /opt/edx/bin",
"cd /opt",
"apt-get update",
"apt-get install openjdk-6-jre-headless unzip -y",
"wget http://s3.amazonaws.com/ec2-downloads/ec2-api-tools.zip",
"unzip ec2-api-tools.zip",
"rm ec2-api-tools.zip",
"ln -sf ec2-api-tools-* ec2-api-tools",
"UserData": { "Fn::Base64" : { "Fn::Join" : ["", [
"#!/bin/bash -v\n",
"mkdir -p /opt/edx/bin\n",
"cd /opt\n",
"apt-get update\n",
"apt-get install openjdk-6-jre-headless unzip -y\n",
"wget http://s3.amazonaws.com/ec2-downloads/ec2-api-tools.zip\n",
"unzip ec2-api-tools.zip\n",
"rm ec2-api-tools.zip\n",
"ln -sf ec2-api-tools-* ec2-api-tools\n",
"cat <<'EOF' > /opt/edx/bin/nat_monitor.sh",
"#!/bin/sh",
"# This script will monitor another NAT instance and take over its routes",
"# if communication with the other instance fails",
"",
"# NAT instance variables",
"# Other instance's IP to ping and route to grab if other node goes down",
"PRIMARY_NAT_ID=", { "Ref":"NATDevice" }, "",
"BACKUP_NAT_ID=", { "Ref": "BackupNATDevice" }, "",
"NAT_RT_ID=", { "Ref": "PrivateRouteTable" }, "",
"",
"# Specify the EC2 region that this will be running in (e.g. https://ec2.us-east-1.amazonaws.com)",
"EC2_URL=https://ec2.",{ "Ref": "AWS::Region" },".amazonaws.com",
"",
"# Health Check variables",
"Num_Pings=3",
"Ping_Timeout=1",
"Wait_Between_Pings=2",
"Wait_for_Instance_Stop=60",
"Wait_for_Instance_Start=300",
"",
"# leverage AWS security credentials provided by EC2 roles",
"# Setup environment for ec2 api tools",
"export EC2_HOME=/opt/ec2-api-tools",
"export AWS_IAM_HOME=/opt/IAMCli",
"export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64",
"PATH=/opt/ec2-api-tools/bin:$PATH",
"",
"# Determine the NAT instance private IP so we can ping the other NAT instance, take over",
"# its route, and reboot it. Requires EC2 DescribeInstances, ReplaceRoute, and Start/RebootInstances",
"# permissions. The following example EC2 Roles policy will authorize these commands:",
"# {",
"# \"Statement\": [",
"# {",
"# \"Action\": [",
"# \"ec2:DescribeInstances\",",
"# \"ec2:CreateRoute\",",
"# \"ec2:ReplaceRoute\",",
"# \"ec2:StartInstances\",",
"# \"ec2:StopInstances\"",
"# ],",
"# \"Effect\": \"Allow\",",
"# \"Resource\": \"*\"",
"# }",
"# ]",
"# }",
"",
"# Get the primary NAT instance's IP",
"PRIMARY_NAT_IP=`/opt/ec2-api-tools/bin/ec2-describe-instances $PRIMARY_NAT_ID -U $EC2_URL | grep PRIVATEIPADDRESS -m 1 | awk '{print $2;}'`",
"BACKUP_NAT_IP=`/opt/ec2-api-tools/bin/ec2-describe-instances $BACKUP_NAT_ID -U $EC2_URL | grep PRIVATEIPADDRESS -m 1 | awk '{print $2;}'`",
"",
"echo `date` \"-- Starting NAT monitor\"",
"",
"while [ . ]; do",
" # Check the health of both instances.",
" primary_pingresult=`ping -c $Num_Pings -W $Ping_Timeout $PRIMARY_NAT_IP| grep time= | wc -l`",
"",
" if [ \"$primary_pingresult\" == \"0\" ]; then",
" backup_pingresult=`ping -c $Num_Pings -W $Ping_Timeout $BACKUP_NAT_IP| grep time= | wc -l`",
" if [ \"$backup_pingresult\" == \"0\" ]; then",
" echo `date` \"-- Both NAT devices un reachable.\"",
" #TODO: Notify alert that both NATs are down.",
" else #Backup nat is healthy.",
" # Set HEALTHY variables to unhealthy (0)",
" ROUTE_HEALTHY=0",
" NAT_HEALTHY=0",
" STOPPING_NAT=0",
" while [ \"$NAT_HEALTHY\" == \"0\" ]; do",
" # Primary NAT instance is unhealthy, loop while we try to fix it",
" if [ \"$ROUTE_HEALTHY\" == \"0\" ]; then",
" echo `date` \"-- NAT($PRIMARY_NAT_ID) heartbeat failed, using $BACKUP_NAT_ID for $NAT_RT_ID default route\"",
" /opt/ec2-api-tools/bin/ec2-replace-route $NAT_RT_ID -r 0.0.0.0/0 -i $BACKUP_NAT_ID -U $EC2_URL",
" ROUTE_HEALTHY=1",
" fi",
" # Check NAT state to see if we should stop it or start it again",
" NAT_STATE=`/opt/ec2-api-tools/bin/ec2-describe-instances $PRIMARY_NAT_ID -U $EC2_URL | grep INSTANCE | awk '{print $5;}'`",
" if [ \"$NAT_STATE\" == \"stopped\" ]; then",
" echo `date` \"-- NAT($PRIMARY_NAT_ID) instance stopped, starting it back up\"",
" /opt/ec2-api-tools/bin/ec2-start-instances $PRIMARY_NAT_ID -U $EC2_URL",
" NAT_HEALTHY=1",
" sleep $Wait_for_Instance_Start",
" else",
" if [ \"$STOPPING_NAT\" == \"0\" ]; then",
" echo `date` \"-- NAT($PRIMARY_NAT_ID) instance $NAT_STATE, attempting to stop for reboot\"",
" /opt/ec2-api-tools/bin/ec2-stop-instances $PRIMARY_NAT_ID -U $EC2_URL",
" STOPPING_NAT=1",
" fi",
" sleep $Wait_for_Instance_Stop",
" fi",
" done",
"",
" # Backup nat was healthy so we switched to it. It is now the primary.",
" if [ \"$NAT_HEALTHY\" == \"1\" ]; then",
" TEMP_NAT_ID=$PRIMARY_NAT_ID",
" TEMP_NAT_IP=$PRIMARY_NAT_IP",
"",
" PRIMARY_NAT_ID=$BACKUP_NAT_ID",
" PRIMARY_NAT_IP=$BACKUP_NAT_IP",
"",
" BACKUP_NAT_ID=$TEMP_NAT_ID",
" BACKUP_NAT_IP=$TEMP_NAT_IP",
" fi",
" fi",
" else",
" sleep $Wait_Between_Pings",
" fi",
"done",
"EOF",
"chmod u+x /opt/edx/bin/nat_monitor.sh",
"echo '@reboot /root/nat_monitor.sh > /var/log/nat_monitor.log' | crontab",
"/opt/edx/bin/nat_monitor.sh > /var/log/nat_monitor.log &"
"cat <<'EOF' > /opt/edx/bin/nat_monitor.sh\n",
"#!/bin/sh\n",
"# This script will monitor another NAT instance and take over its routes\n",
"# if communication with the other instance fails\n",
"\n",
"# NAT instance variables\n",
"# Other instance's IP to ping and route to grab if other node goes down\n",
"PRIMARY_NAT_ID=", { "Ref":"NATDevice" }, "\n",
"BACKUP_NAT_ID=", { "Ref": "BackupNATDevice" }, "\n",
"NAT_RT_ID=", { "Ref": "PrivateRouteTable" }, "\n",
"\n",
"# Specify the EC2 region that this will be running in (e.g. https://ec2.us-east-1.amazonaws.com)\n",
"EC2_URL=https://ec2.",{ "Ref": "AWS::Region" },".amazonaws.com\n",
"\n",
"# Health Check variables\n",
"Num_Pings=3\n",
"Ping_Timeout=1\n",
"Wait_Between_Pings=2\n",
"Wait_for_Instance_Stop=60\n",
"Wait_for_Instance_Start=300\n",
"\n",
"# leverage AWS security credentials provided by EC2 roles\n",
"# Setup environment for ec2 api tools\n",
"export EC2_HOME=/opt/ec2-api-tools\n",
"export AWS_IAM_HOME=/opt/IAMCli\n",
"export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64\n",
"PATH=/opt/ec2-api-tools/bin:$PATH\n",
"\n",
"# Determine the NAT instance private IP so we can ping the other NAT instance, take over\n",
"# its route, and reboot it. Requires EC2 DescribeInstances, ReplaceRoute, and Start/RebootInstances\n",
"# permissions. The following example EC2 Roles policy will authorize these commands:\n",
"# {\n",
"# \"Statement\": [\n",
"# {\n",
"# \"Action\": [\n",
"# \"ec2:DescribeInstances\",\n",
"# \"ec2:CreateRoute\",\n",
"# \"ec2:ReplaceRoute\",\n",
"# \"ec2:StartInstances\",\n",
"# \"ec2:StopInstances\"\n",
"# ],\n",
"# \"Effect\": \"Allow\",\n",
"# \"Resource\": \"*\"\n",
"# }\n",
"# ]\n",
"# }\n",
"\n",
"# Get the primary NAT instance's IP\n",
"PRIMARY_NAT_IP=`/opt/ec2-api-tools/bin/ec2-describe-instances $PRIMARY_NAT_ID -U $EC2_URL | grep PRIVATEIPADDRESS -m 1 | awk '{print $2;}'`\n",
"BACKUP_NAT_IP=`/opt/ec2-api-tools/bin/ec2-describe-instances $BACKUP_NAT_ID -U $EC2_URL | grep PRIVATEIPADDRESS -m 1 | awk '{print $2;}'`\n",
"\n",
"echo `date` \"-- Starting NAT monitor\"\n",
"\n",
"while [ . ]; do\n",
" # Check the health of both instances.\n",
" primary_pingresult=`ping -c $Num_Pings -W $Ping_Timeout $PRIMARY_NAT_IP| grep time= | wc -l`\n",
"\n",
" if [ \"$primary_pingresult\" == \"0\" ]; then\n",
" backup_pingresult=`ping -c $Num_Pings -W $Ping_Timeout $BACKUP_NAT_IP| grep time= | wc -l`\n",
" if [ \"$backup_pingresult\" == \"0\" ]; then\n",
" echo `date` \"-- Both NAT devices un reachable.\"\n",
" #TODO: Notify alert that both NATs are down.\n",
" else #Backup nat is healthy.\n",
" # Set HEALTHY variables to unhealthy (0)\n",
" ROUTE_HEALTHY=0\n",
" NAT_HEALTHY=0\n",
" STOPPING_NAT=0\n",
" while [ \"$NAT_HEALTHY\" == \"0\" ]; do\n",
" # Primary NAT instance is unhealthy, loop while we try to fix it\n",
" if [ \"$ROUTE_HEALTHY\" == \"0\" ]; then\n",
" echo `date` \"-- NAT($PRIMARY_NAT_ID) heartbeat failed, using $BACKUP_NAT_ID for $NAT_RT_ID default route\"\n",
" /opt/ec2-api-tools/bin/ec2-replace-route $NAT_RT_ID -r 0.0.0.0/0 -i $BACKUP_NAT_ID -U $EC2_URL\n",
" ROUTE_HEALTHY=1\n",
" fi\n",
" # Check NAT state to see if we should stop it or start it again\n",
" NAT_STATE=`/opt/ec2-api-tools/bin/ec2-describe-instances $PRIMARY_NAT_ID -U $EC2_URL | grep INSTANCE | awk '{print $5;}'`\n",
" if [ \"$NAT_STATE\" == \"stopped\" ]; then\n",
" echo `date` \"-- NAT($PRIMARY_NAT_ID) instance stopped, starting it back up\"\n",
" /opt/ec2-api-tools/bin/ec2-start-instances $PRIMARY_NAT_ID -U $EC2_URL\n",
" NAT_HEALTHY=1\n",
" sleep $Wait_for_Instance_Start\n",
" else\n",
" if [ \"$STOPPING_NAT\" == \"0\" ]; then\n",
" echo `date` \"-- NAT($PRIMARY_NAT_ID) instance $NAT_STATE, attempting to stop for reboot\"\n",
" /opt/ec2-api-tools/bin/ec2-stop-instances $PRIMARY_NAT_ID -U $EC2_URL\n",
" STOPPING_NAT=1\n",
" fi\n",
" sleep $Wait_for_Instance_Stop\n",
" fi\n",
" done\n",
"\n",
" # Backup nat was healthy so we switched to it. It is now the primary.\n",
" if [ \"$NAT_HEALTHY\" == \"1\" ]; then\n",
" TEMP_NAT_ID=$PRIMARY_NAT_ID\n",
" TEMP_NAT_IP=$PRIMARY_NAT_IP\n",
"\n",
" PRIMARY_NAT_ID=$BACKUP_NAT_ID\n",
" PRIMARY_NAT_IP=$BACKUP_NAT_IP\n",
"\n",
" BACKUP_NAT_ID=$TEMP_NAT_ID\n",
" BACKUP_NAT_IP=$TEMP_NAT_IP\n",
" fi\n",
" fi\n",
" else\n",
" sleep $Wait_Between_Pings\n",
" fi\n",
"done\n",
"EOF\n",
"chmod u+x /opt/edx/bin/nat_monitor.sh\n",
"echo '@reboot /root/nat_monitor.sh > /var/log/nat_monitor.log' | crontab\n",
"/opt/edx/bin/nat_monitor.sh > /var/log/nat_monitor.log &\n"
]]}}
}
},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment