Introduce gateway_function_invocation_total to track individual functions

Introduce prometheus_alertmanager into stack - have it fire into webhook stash
This commit is contained in:
Alex 2017-01-21 10:11:33 +00:00
parent a9e72cd0a3
commit ab2f8e85f3
8 changed files with 118 additions and 8 deletions

View File

@ -2,4 +2,3 @@
echo "Deploying stack"
docker stack rm func ; docker stack deploy func --compose-file docker-compose.yml

View File

@ -13,16 +13,32 @@ services:
image: quay.io/prometheus/prometheus:latest
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
command: "-config.file=/etc/prometheus/prometheus.yml -storage.local.path=/prometheus -storage.local.memory-chunks=10000"
- ./prometheus/alert.rules:/etc/prometheus/alert.rules
command: "-config.file=/etc/prometheus/prometheus.yml -storage.local.path=/prometheus -storage.local.memory-chunks=10000 --alertmanager.url=http://alertmanager:9093"
ports:
- 9090:9090
depends_on:
- gateway
- alertmanager
environment:
no_proxy: "gateway"
networks:
- functions
alertmanager:
image: quay.io/prometheus/alertmanager
environment:
no_proxy: "gateway"
volumes:
- ./prometheus/alertmanager.yml:/alertmanager.yml
command:
- '-config.file=/alertmanager.yml'
networks:
- functions
ports:
- 9093:9093
# Sample functions go here.
webhookstash:
image: alexellis2/faas-webhookstash:latest

View File

@ -10,4 +10,4 @@ docker rm -f gateway_extract
echo Building alexellis2/faas-gateway:latest
docker build -t alexellis2/faas-gateway:latest .
docker build -t alexellis2/faas-gateway:latest-dev .

View File

@ -11,6 +11,7 @@ type MetricOptions struct {
GatewayRequestsTotal prometheus.Counter
GatewayServerlessServedTotal prometheus.Counter
GatewayFunctions prometheus.Histogram
GatewayFunctionInvocation *prometheus.CounterVec
}
// PrometheusHandler Bootstraps prometheus for metrics collection

View File

@ -72,6 +72,8 @@ func isAlexa(requestBody []byte) AlexaRequestBody {
}
func invokeService(w http.ResponseWriter, r *http.Request, metrics metrics.MetricOptions, service string, requestBody []byte) {
metrics.GatewayFunctionInvocation.WithLabelValues(service).Add(1)
stamp := strconv.FormatInt(time.Now().Unix(), 10)
start := time.Now()
@ -171,15 +173,24 @@ func main() {
Name: "gateway_functions",
Help: "Gateway functions",
})
GatewayFunctionInvocation := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "gateway_function_invocation_total",
Help: "Individual function metrics",
},
[]string{"function_name"},
)
prometheus.Register(GatewayRequestsTotal)
prometheus.Register(GatewayServerlessServedTotal)
prometheus.Register(GatewayFunctions)
prometheus.Register(GatewayFunctionInvocation)
metricsOptions := metrics.MetricOptions{
GatewayRequestsTotal: GatewayRequestsTotal,
GatewayServerlessServedTotal: GatewayServerlessServedTotal,
GatewayFunctions: GatewayFunctions,
GatewayFunctionInvocation: GatewayFunctionInvocation,
}
r := mux.NewRouter()

15
prometheus/alert.rules Normal file
View File

@ -0,0 +1,15 @@
ALERT service_down
IF up == 0
ALERT APIHighInvocationRate
IF rate ( gateway_function_invocation_total [10s] ) > 5
FOR 30s
ANNOTATIONS {
summary = "High invocation total on {{ $labels.instance }}",
description = "High invocation total on {{ $labels.instance }}",
}
LABELS {
service = "gateway",
severity = "major",
value = "{{$value}}",
}

View File

@ -0,0 +1,68 @@
global:
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost: 'localhost:25'
smtp_from: 'alertmanager@example.org'
smtp_auth_username: 'alertmanager'
smtp_auth_password: 'password'
# The auth token for Hipchat.
hipchat_auth_token: '1234556789'
# Alternative host for Hipchat.
hipchat_url: 'https://hipchat.foobar.org/'
# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'
# The root route on which each incoming alert enters.
route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['alertname', 'cluster', 'service']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 5m
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h
# A default receiver
receiver: scale-up
# All the above attributes are inherited by all child routes and can
# overwritten on each.
# The child route trees.
routes:
- match:
service: gateway
receiver: scale-up
severity: major
# Inhibition rules allow to mute a set of alerts given that another alert is
# firing.
# We use this to mute any warning-level notifications if the same alert is
# already critical.
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
# Apply inhibition if the alertname is the same.
equal: ['alertname', 'cluster', 'service']
receivers:
- name: 'scale-up'
webhook_configs:
- url: http://gateway:8080/function/func_webhookstash
send_resolved: true

View File

@ -7,12 +7,12 @@ global:
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'codelab-monitor'
monitor: 'faas-monitor'
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first.rules"
# - "second.rules"
- 'alert.rules'
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
@ -29,6 +29,6 @@ scrape_configs:
- targets: ['localhost:9090']
- job_name: "gateway"
scrape_interval: "15s"
scrape_interval: 5s
static_configs:
- targets: ['gateway:8080']