Commit cd011004 by Joseph Mulloy

Add alarm creation to celery script OPS-2545

parent b9093889
......@@ -35,19 +35,32 @@ class RedisWrapper(object):
class CwBotoWrapper(object):
def __init__(self):
self.cw = boto3.client('cloudwatch')
self.client = boto3.client('cloudwatch')
@backoff.on_exception(backoff.expo,
(botocore.exceptions.ClientError),
max_tries=max_tries)
def list_metrics(self, *args, **kwargs):
return self.cw.list_metrics(*args, **kwargs)
return self.client.list_metrics(*args, **kwargs)
@backoff.on_exception(backoff.expo,
(botocore.exceptions.ClientError),
max_tries=max_tries)
def put_metric_data(self, *args, **kwargs):
return self.cw.put_metric_data(*args, **kwargs)
return self.client.put_metric_data(*args, **kwargs)
@backoff.on_exception(backoff.expo,
(botocore.exceptions.ClientError),
max_tries=max_tries)
def describe_alarms_for_metric(self, *args, **kwargs):
return self.client.describe_alarms_for_metric(*args, **kwargs)
@backoff.on_exception(backoff.expo,
(botocore.exceptions.ClientError),
max_tries=max_tries)
def put_metric_alarm(self, *args, **kwargs):
return self.client.put_metric_alarm(*args, **kwargs)
@click.command()
@click.option('--host', '-h', default='localhost',
......@@ -58,22 +71,28 @@ class CwBotoWrapper(object):
help="Deployment (i.e. edx or edge)")
@click.option('--max-metrics', default=30,
help='Maximum number of CloudWatch metrics to publish')
def check_queues(host, port, environment, deploy, max_metrics):
@click.option('--threshold', default=50,
help='Maximum queue length before alarm notification is sent')
@click.option('--sns-arn', '-s', help='ARN for SNS alert topic')
def check_queues(host, port, environment, deploy, max_metrics, threshold,
sns_arn):
timeout = 1
namespace = "celery/{}-{}".format(environment, deploy)
r = RedisWrapper(host=host, port=port, socket_timeout=timeout,
socket_connect_timeout=timeout)
cw = CwBotoWrapper()
redis_client = RedisWrapper(host=host, port=port, socket_timeout=timeout,
socket_connect_timeout=timeout)
cloudwatch = CwBotoWrapper()
metric_name = 'queue_length'
dimension = 'queue'
response = cw.list_metrics(Namespace=namespace, MetricName=metric_name,
Dimensions=[{'Name': dimension}])
response = cloudwatch.list_metrics(Namespace=namespace,
MetricName=metric_name,
Dimensions=[{'Name': dimension}])
existing_queues = []
for m in response["Metrics"]:
existing_queues.extend(
[d['Value'] for d in m["Dimensions"] if d['Name'] == dimension])
redis_queues = set([k.decode() for k in r.keys() if r.type(k) == b'list'])
redis_queues = set([k.decode() for k in redis_client.keys()
if redis_client.type(k) == b'list'])
all_queues = existing_queues + list(
set(redis_queues).difference(existing_queues)
......@@ -95,10 +114,34 @@ def check_queues(host, port, environment, deploy, max_metrics):
"Name": dimension,
"Value": queue
}],
'Value': r.llen(queue)
'Value': redis_client.llen(queue)
})
cw.put_metric_data(Namespace=namespace, MetricData=metric_data)
cloudwatch.put_metric_data(Namespace=namespace, MetricData=metric_data)
for queue in queues:
dimensions = [{'Name': dimension, 'Value': queue}]
period = 300
evaluation_periods = 1
comparison_operator = "GreaterThanThreshold"
treat_missing_data = "missing"
statistic = "Maximum"
actions = [sns_arn]
alarm_name = "{} queue length over threshold".format(queue)
# Always create/configure alert to keep config up to date
cloudwatch.put_metric_alarm(AlarmName=alarm_name,
AlarmDescription=alarm_name,
Namespace=namespace,
MetricName=metric_name,
Dimensions=dimensions,
Period=period,
EvaluationPeriods=evaluation_periods,
TreatMissingData=treat_missing_data,
Threshold=threshold,
ComparisonOperator=comparison_operator,
Statistic=statistic,
AlarmActions=actions)
if __name__ == '__main__':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment