Unverified Commit aed6d562 by Joseph Mulloy Committed by GitHub

Merge pull request #4307 from edx/jdmulloy/ops2721/custom_redis_thresholds

Add custom thresholds to redis monitoring script OPS-2721
parents 146207c4 233c612f
......@@ -72,10 +72,17 @@ class CwBotoWrapper(object):
@click.option('--max-metrics', default=30,
help='Maximum number of CloudWatch metrics to publish')
@click.option('--threshold', default=50,
help='Maximum queue length before alarm notification is sent')
help='Default maximum queue length before alarm notification is'
+ ' sent')
@click.option('--queue-threshold', type=(str, int), multiple=True,
help='Threshold per queue in format --queue-threshold'
+ ' {queue_name} {threshold}. May be used multiple times')
@click.option('--sns-arn', '-s', help='ARN for SNS alert topic', required=True)
def check_queues(host, port, environment, deploy, max_metrics, threshold,
sns_arn):
queue_threshold, sns_arn):
thresholds = dict(queue_threshold)
timeout = 1
namespace = "celery/{}-{}".format(environment, deploy)
redis_client = RedisWrapper(host=host, port=port, socket_timeout=timeout,
......@@ -101,7 +108,7 @@ def check_queues(host, port, environment, deploy, max_metrics, threshold,
if len(all_queues) > max_metrics:
# TODO: Use proper logging framework
print("Warning! Too many metrics, refusing to publish more than {}"
.format(max_metrics))
.format(max_metrics))
# Take first max_metrics number of queues from all_queues and remove
# queues that aren't in redis
......@@ -123,10 +130,14 @@ def check_queues(host, port, environment, deploy, max_metrics, threshold,
for queue in queues:
dimensions = [{'Name': dimension, 'Value': queue}]
period = 300
evaluation_periods = 1
queue_threshold = threshold
if queue in thresholds:
queue_threshold = thresholds[queue]
# Period is in seconds
period = 60
evaluation_periods = 15
comparison_operator = "GreaterThanThreshold"
treat_missing_data = "missing"
treat_missing_data = "notBreaching"
statistic = "Maximum"
actions = [sns_arn]
alarm_name = "{}-{} {} queue length over threshold".format(environment,
......@@ -145,9 +156,11 @@ def check_queues(host, port, environment, deploy, max_metrics, threshold,
Period=period,
EvaluationPeriods=evaluation_periods,
TreatMissingData=treat_missing_data,
Threshold=threshold,
Threshold=queue_threshold,
ComparisonOperator=comparison_operator,
Statistic=statistic,
InsufficientDataActions=actions,
OKActions=actions,
AlarmActions=actions)
......
......@@ -3,3 +3,4 @@ redis==2.10.6
click==6.7
boto3==1.5.4
backoff==1.4.3
awscli==1.14.32
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment