Introduce gateway_function_invocation_total to track individual functions

Introduce prometheus_alertmanager into stack - have it fire into webhook stash
This commit is contained in:
Alex
2017-01-21 10:11:33 +00:00
parent a9e72cd0a3
commit ab2f8e85f3
8 changed files with 118 additions and 8 deletions

15
prometheus/alert.rules Normal file
View File

@ -0,0 +1,15 @@
ALERT service_down
IF up == 0
ALERT APIHighInvocationRate
IF rate ( gateway_function_invocation_total [10s] ) > 5
FOR 30s
ANNOTATIONS {
summary = "High invocation total on {{ $labels.instance }}",
description = "High invocation total on {{ $labels.instance }}",
}
LABELS {
service = "gateway",
severity = "major",
value = "{{$value}}",
}

View File

@ -0,0 +1,68 @@
global:
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost: 'localhost:25'
smtp_from: 'alertmanager@example.org'
smtp_auth_username: 'alertmanager'
smtp_auth_password: 'password'
# The auth token for Hipchat.
hipchat_auth_token: '1234556789'
# Alternative host for Hipchat.
hipchat_url: 'https://hipchat.foobar.org/'
# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'
# The root route on which each incoming alert enters.
route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['alertname', 'cluster', 'service']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 5m
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h
# A default receiver
receiver: scale-up
# All the above attributes are inherited by all child routes and can
# overwritten on each.
# The child route trees.
routes:
- match:
service: gateway
receiver: scale-up
severity: major
# Inhibition rules allow to mute a set of alerts given that another alert is
# firing.
# We use this to mute any warning-level notifications if the same alert is
# already critical.
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
# Apply inhibition if the alertname is the same.
equal: ['alertname', 'cluster', 'service']
receivers:
- name: 'scale-up'
webhook_configs:
- url: http://gateway:8080/function/func_webhookstash
send_resolved: true

View File

@ -7,12 +7,12 @@ global:
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'codelab-monitor'
monitor: 'faas-monitor'
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first.rules"
# - "second.rules"
- 'alert.rules'
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
@ -29,6 +29,6 @@ scrape_configs:
- targets: ['localhost:9090']
- job_name: "gateway"
scrape_interval: "15s"
scrape_interval: 5s
static_configs:
- targets: ['gateway:8080']