Unverified Commit aed6d562 by Joseph Mulloy Committed by GitHub

Merge pull request #4307 from edx/jdmulloy/ops2721/custom_redis_thresholds

Add custom thresholds to redis monitoring script OPS-2721
parents 146207c4 233c612f
...@@ -72,10 +72,17 @@ class CwBotoWrapper(object): ...@@ -72,10 +72,17 @@ class CwBotoWrapper(object):
@click.option('--max-metrics', default=30, @click.option('--max-metrics', default=30,
help='Maximum number of CloudWatch metrics to publish') help='Maximum number of CloudWatch metrics to publish')
@click.option('--threshold', default=50, @click.option('--threshold', default=50,
help='Maximum queue length before alarm notification is sent') help='Default maximum queue length before alarm notification is'
+ ' sent')
@click.option('--queue-threshold', type=(str, int), multiple=True,
help='Threshold per queue in format --queue-threshold'
+ ' {queue_name} {threshold}. May be used multiple times')
@click.option('--sns-arn', '-s', help='ARN for SNS alert topic', required=True) @click.option('--sns-arn', '-s', help='ARN for SNS alert topic', required=True)
def check_queues(host, port, environment, deploy, max_metrics, threshold, def check_queues(host, port, environment, deploy, max_metrics, threshold,
sns_arn): queue_threshold, sns_arn):
thresholds = dict(queue_threshold)
timeout = 1 timeout = 1
namespace = "celery/{}-{}".format(environment, deploy) namespace = "celery/{}-{}".format(environment, deploy)
redis_client = RedisWrapper(host=host, port=port, socket_timeout=timeout, redis_client = RedisWrapper(host=host, port=port, socket_timeout=timeout,
...@@ -101,7 +108,7 @@ def check_queues(host, port, environment, deploy, max_metrics, threshold, ...@@ -101,7 +108,7 @@ def check_queues(host, port, environment, deploy, max_metrics, threshold,
if len(all_queues) > max_metrics: if len(all_queues) > max_metrics:
# TODO: Use proper logging framework # TODO: Use proper logging framework
print("Warning! Too many metrics, refusing to publish more than {}" print("Warning! Too many metrics, refusing to publish more than {}"
.format(max_metrics)) .format(max_metrics))
# Take first max_metrics number of queues from all_queues and remove # Take first max_metrics number of queues from all_queues and remove
# queues that aren't in redis # queues that aren't in redis
...@@ -123,10 +130,14 @@ def check_queues(host, port, environment, deploy, max_metrics, threshold, ...@@ -123,10 +130,14 @@ def check_queues(host, port, environment, deploy, max_metrics, threshold,
for queue in queues: for queue in queues:
dimensions = [{'Name': dimension, 'Value': queue}] dimensions = [{'Name': dimension, 'Value': queue}]
period = 300 queue_threshold = threshold
evaluation_periods = 1 if queue in thresholds:
queue_threshold = thresholds[queue]
# Period is in seconds
period = 60
evaluation_periods = 15
comparison_operator = "GreaterThanThreshold" comparison_operator = "GreaterThanThreshold"
treat_missing_data = "missing" treat_missing_data = "notBreaching"
statistic = "Maximum" statistic = "Maximum"
actions = [sns_arn] actions = [sns_arn]
alarm_name = "{}-{} {} queue length over threshold".format(environment, alarm_name = "{}-{} {} queue length over threshold".format(environment,
...@@ -145,9 +156,11 @@ def check_queues(host, port, environment, deploy, max_metrics, threshold, ...@@ -145,9 +156,11 @@ def check_queues(host, port, environment, deploy, max_metrics, threshold,
Period=period, Period=period,
EvaluationPeriods=evaluation_periods, EvaluationPeriods=evaluation_periods,
TreatMissingData=treat_missing_data, TreatMissingData=treat_missing_data,
Threshold=threshold, Threshold=queue_threshold,
ComparisonOperator=comparison_operator, ComparisonOperator=comparison_operator,
Statistic=statistic, Statistic=statistic,
InsufficientDataActions=actions,
OKActions=actions,
AlarmActions=actions) AlarmActions=actions)
......
...@@ -3,3 +3,4 @@ redis==2.10.6 ...@@ -3,3 +3,4 @@ redis==2.10.6
click==6.7 click==6.7
boto3==1.5.4 boto3==1.5.4
backoff==1.4.3 backoff==1.4.3
awscli==1.14.32
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment