Tweak alertmanager timeout + simplify down-scale of replicas

This commit is contained in:
Alex 2017-01-23 22:44:03 +00:00
parent 59ca597903
commit cdd5219200
2 changed files with 16 additions and 6 deletions

View File

@ -32,12 +32,21 @@ func scaleService(req requests.PrometheusAlert, c *client.Client) error {
} else {
return err
}
} else {
replicas = *service.Spec.Mode.Replicated.Replicas - uint64(5)
if replicas <= 0 {
} else { // Resolved event.
// Previously decremented by 5, but event only fires once, so set to 1/1.
if *service.Spec.Mode.Replicated.Replicas > 1 {
// replicas = *service.Spec.Mode.Replicated.Replicas - uint64(5)
// if replicas < 1 {
// replicas = 1
// }
// return nil
replicas = 1
} else {
return nil
}
}
log.Printf("Scaling %s to %d replicas.\n", serviceName, replicas)
service.Spec.Mode.Replicated.Replicas = &replicas

View File

@ -25,15 +25,15 @@ route:
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s
group_wait: 5s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 5m
group_interval: 10s
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h
repeat_interval: 30s
# A default receiver
receiver: scale-up
@ -66,3 +66,4 @@ receivers:
webhook_configs:
- url: http://gateway:8080/system/alert
send_resolved: true