mirror of
https://github.com/openfaas/faas.git
synced 2025-06-16 12:16:47 +00:00
Introduce gateway_function_invocation_total to track individual functions
Introduce prometheus_alertmanager into stack - have it fire into webhook stash
This commit is contained in:
parent
a9e72cd0a3
commit
ab2f8e85f3
@ -2,4 +2,3 @@
|
||||
|
||||
echo "Deploying stack"
|
||||
docker stack rm func ; docker stack deploy func --compose-file docker-compose.yml
|
||||
|
||||
|
@ -7,22 +7,38 @@ services:
|
||||
- 8080:8080
|
||||
image: alexellis2/faas-gateway:latest-dev
|
||||
networks:
|
||||
- functions
|
||||
- functions
|
||||
|
||||
prometheus:
|
||||
image: quay.io/prometheus/prometheus:latest
|
||||
volumes:
|
||||
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
command: "-config.file=/etc/prometheus/prometheus.yml -storage.local.path=/prometheus -storage.local.memory-chunks=10000"
|
||||
- ./prometheus/alert.rules:/etc/prometheus/alert.rules
|
||||
|
||||
command: "-config.file=/etc/prometheus/prometheus.yml -storage.local.path=/prometheus -storage.local.memory-chunks=10000 --alertmanager.url=http://alertmanager:9093"
|
||||
ports:
|
||||
- 9090:9090
|
||||
depends_on:
|
||||
- gateway
|
||||
- alertmanager
|
||||
environment:
|
||||
no_proxy: "gateway"
|
||||
networks:
|
||||
- functions
|
||||
|
||||
alertmanager:
|
||||
image: quay.io/prometheus/alertmanager
|
||||
environment:
|
||||
no_proxy: "gateway"
|
||||
volumes:
|
||||
- ./prometheus/alertmanager.yml:/alertmanager.yml
|
||||
command:
|
||||
- '-config.file=/alertmanager.yml'
|
||||
networks:
|
||||
- functions
|
||||
ports:
|
||||
- 9093:9093
|
||||
|
||||
# Sample functions go here.
|
||||
webhookstash:
|
||||
image: alexellis2/faas-webhookstash:latest
|
||||
|
@ -10,4 +10,4 @@ docker rm -f gateway_extract
|
||||
|
||||
echo Building alexellis2/faas-gateway:latest
|
||||
|
||||
docker build -t alexellis2/faas-gateway:latest .
|
||||
docker build -t alexellis2/faas-gateway:latest-dev .
|
||||
|
@ -11,6 +11,7 @@ type MetricOptions struct {
|
||||
GatewayRequestsTotal prometheus.Counter
|
||||
GatewayServerlessServedTotal prometheus.Counter
|
||||
GatewayFunctions prometheus.Histogram
|
||||
GatewayFunctionInvocation *prometheus.CounterVec
|
||||
}
|
||||
|
||||
// PrometheusHandler Bootstraps prometheus for metrics collection
|
||||
|
@ -72,6 +72,8 @@ func isAlexa(requestBody []byte) AlexaRequestBody {
|
||||
}
|
||||
|
||||
func invokeService(w http.ResponseWriter, r *http.Request, metrics metrics.MetricOptions, service string, requestBody []byte) {
|
||||
metrics.GatewayFunctionInvocation.WithLabelValues(service).Add(1)
|
||||
|
||||
stamp := strconv.FormatInt(time.Now().Unix(), 10)
|
||||
|
||||
start := time.Now()
|
||||
@ -171,15 +173,24 @@ func main() {
|
||||
Name: "gateway_functions",
|
||||
Help: "Gateway functions",
|
||||
})
|
||||
GatewayFunctionInvocation := prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "gateway_function_invocation_total",
|
||||
Help: "Individual function metrics",
|
||||
},
|
||||
[]string{"function_name"},
|
||||
)
|
||||
|
||||
prometheus.Register(GatewayRequestsTotal)
|
||||
prometheus.Register(GatewayServerlessServedTotal)
|
||||
prometheus.Register(GatewayFunctions)
|
||||
prometheus.Register(GatewayFunctionInvocation)
|
||||
|
||||
metricsOptions := metrics.MetricOptions{
|
||||
GatewayRequestsTotal: GatewayRequestsTotal,
|
||||
GatewayServerlessServedTotal: GatewayServerlessServedTotal,
|
||||
GatewayFunctions: GatewayFunctions,
|
||||
GatewayFunctionInvocation: GatewayFunctionInvocation,
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
|
15
prometheus/alert.rules
Normal file
15
prometheus/alert.rules
Normal file
@ -0,0 +1,15 @@
|
||||
ALERT service_down
|
||||
IF up == 0
|
||||
|
||||
ALERT APIHighInvocationRate
|
||||
IF rate ( gateway_function_invocation_total [10s] ) > 5
|
||||
FOR 30s
|
||||
ANNOTATIONS {
|
||||
summary = "High invocation total on {{ $labels.instance }}",
|
||||
description = "High invocation total on {{ $labels.instance }}",
|
||||
}
|
||||
LABELS {
|
||||
service = "gateway",
|
||||
severity = "major",
|
||||
value = "{{$value}}",
|
||||
}
|
68
prometheus/alertmanager.yml
Normal file
68
prometheus/alertmanager.yml
Normal file
@ -0,0 +1,68 @@
|
||||
global:
|
||||
# The smarthost and SMTP sender used for mail notifications.
|
||||
smtp_smarthost: 'localhost:25'
|
||||
smtp_from: 'alertmanager@example.org'
|
||||
smtp_auth_username: 'alertmanager'
|
||||
smtp_auth_password: 'password'
|
||||
# The auth token for Hipchat.
|
||||
hipchat_auth_token: '1234556789'
|
||||
# Alternative host for Hipchat.
|
||||
hipchat_url: 'https://hipchat.foobar.org/'
|
||||
|
||||
# The directory from which notification templates are read.
|
||||
templates:
|
||||
- '/etc/alertmanager/template/*.tmpl'
|
||||
|
||||
# The root route on which each incoming alert enters.
|
||||
route:
|
||||
# The labels by which incoming alerts are grouped together. For example,
|
||||
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
|
||||
# be batched into a single group.
|
||||
group_by: ['alertname', 'cluster', 'service']
|
||||
|
||||
# When a new group of alerts is created by an incoming alert, wait at
|
||||
# least 'group_wait' to send the initial notification.
|
||||
# This way ensures that you get multiple alerts for the same group that start
|
||||
# firing shortly after another are batched together on the first
|
||||
# notification.
|
||||
group_wait: 30s
|
||||
|
||||
# When the first notification was sent, wait 'group_interval' to send a batch
|
||||
# of new alerts that started firing for that group.
|
||||
group_interval: 5m
|
||||
|
||||
# If an alert has successfully been sent, wait 'repeat_interval' to
|
||||
# resend them.
|
||||
repeat_interval: 3h
|
||||
|
||||
# A default receiver
|
||||
receiver: scale-up
|
||||
|
||||
# All the above attributes are inherited by all child routes and can
|
||||
# overwritten on each.
|
||||
|
||||
# The child route trees.
|
||||
routes:
|
||||
- match:
|
||||
service: gateway
|
||||
receiver: scale-up
|
||||
severity: major
|
||||
|
||||
|
||||
# Inhibition rules allow to mute a set of alerts given that another alert is
|
||||
# firing.
|
||||
# We use this to mute any warning-level notifications if the same alert is
|
||||
# already critical.
|
||||
inhibit_rules:
|
||||
- source_match:
|
||||
severity: 'critical'
|
||||
target_match:
|
||||
severity: 'warning'
|
||||
# Apply inhibition if the alertname is the same.
|
||||
equal: ['alertname', 'cluster', 'service']
|
||||
|
||||
receivers:
|
||||
- name: 'scale-up'
|
||||
webhook_configs:
|
||||
- url: http://gateway:8080/function/func_webhookstash
|
||||
send_resolved: true
|
@ -7,12 +7,12 @@ global:
|
||||
# Attach these labels to any time series or alerts when communicating with
|
||||
# external systems (federation, remote storage, Alertmanager).
|
||||
external_labels:
|
||||
monitor: 'codelab-monitor'
|
||||
monitor: 'faas-monitor'
|
||||
|
||||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||
rule_files:
|
||||
# - "first.rules"
|
||||
# - "second.rules"
|
||||
- 'alert.rules'
|
||||
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
@ -29,6 +29,6 @@ scrape_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
- job_name: "gateway"
|
||||
scrape_interval: "15s"
|
||||
scrape_interval: 5s
|
||||
static_configs:
|
||||
- targets: ['gateway:8080']
|
||||
|
Loading…
x
Reference in New Issue
Block a user