mirror of
https://github.com/openfaas/faas.git
synced 2025-06-14 11:16:47 +00:00
Add K8s specific files
This commit is contained in:
parent
ddc973c341
commit
518d2f41a0
3
prometheus/Dockerfile.alertmanager.k8s
Normal file
3
prometheus/Dockerfile.alertmanager.k8s
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
FROM prom/alertmanager:v0.7.1
|
||||||
|
|
||||||
|
COPY k8s.alertmanager.yml /alertmanager.yml
|
5
prometheus/Dockerfile.prometheus.k8s
Normal file
5
prometheus/Dockerfile.prometheus.k8s
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
FROM prom/prometheus:v1.5.2
|
||||||
|
|
||||||
|
COPY k8s.prometheus.yml /etc/prometheus/prometheus.yml
|
||||||
|
COPY k8s.alert.rules /etc/prometheus/alert.rules
|
||||||
|
|
16
prometheus/k8s.alert.rules
Normal file
16
prometheus/k8s.alert.rules
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
ALERT service_down
|
||||||
|
IF up == 0
|
||||||
|
|
||||||
|
ALERT APIHighInvocationRate
|
||||||
|
IF sum ( rate(gateway_function_invocation_total{code="200"}[10s]) ) by (function_name) > 5
|
||||||
|
FOR 5s
|
||||||
|
LABELS {
|
||||||
|
service = "gateway",
|
||||||
|
severity = "major",
|
||||||
|
value = "{{$value}}"
|
||||||
|
}
|
||||||
|
ANNOTATIONS {
|
||||||
|
summary = "High invocation total on {{ $labels.instance }}",
|
||||||
|
description = "High invocation total on {{ $labels.instance }}"
|
||||||
|
}
|
||||||
|
|
69
prometheus/k8s.alertmanager.yml
Normal file
69
prometheus/k8s.alertmanager.yml
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
global:
|
||||||
|
# The smarthost and SMTP sender used for mail notifications.
|
||||||
|
smtp_smarthost: 'localhost:25'
|
||||||
|
smtp_from: 'alertmanager@example.org'
|
||||||
|
smtp_auth_username: 'alertmanager'
|
||||||
|
smtp_auth_password: 'password'
|
||||||
|
# The auth token for Hipchat.
|
||||||
|
hipchat_auth_token: '1234556789'
|
||||||
|
# Alternative host for Hipchat.
|
||||||
|
hipchat_url: 'https://hipchat.foobar.org/'
|
||||||
|
|
||||||
|
# The directory from which notification templates are read.
|
||||||
|
templates:
|
||||||
|
- '/etc/alertmanager/template/*.tmpl'
|
||||||
|
|
||||||
|
# The root route on which each incoming alert enters.
|
||||||
|
route:
|
||||||
|
# The labels by which incoming alerts are grouped together. For example,
|
||||||
|
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
|
||||||
|
# be batched into a single group.
|
||||||
|
group_by: ['alertname', 'cluster', 'service']
|
||||||
|
|
||||||
|
# When a new group of alerts is created by an incoming alert, wait at
|
||||||
|
# least 'group_wait' to send the initial notification.
|
||||||
|
# This way ensures that you get multiple alerts for the same group that start
|
||||||
|
# firing shortly after another are batched together on the first
|
||||||
|
# notification.
|
||||||
|
group_wait: 5s
|
||||||
|
|
||||||
|
# When the first notification was sent, wait 'group_interval' to send a batch
|
||||||
|
# of new alerts that started firing for that group.
|
||||||
|
group_interval: 10s
|
||||||
|
|
||||||
|
# If an alert has successfully been sent, wait 'repeat_interval' to
|
||||||
|
# resend them.
|
||||||
|
repeat_interval: 30s
|
||||||
|
|
||||||
|
# A default receiver
|
||||||
|
receiver: scale-up
|
||||||
|
|
||||||
|
# All the above attributes are inherited by all child routes and can
|
||||||
|
# overwritten on each.
|
||||||
|
|
||||||
|
# The child route trees.
|
||||||
|
routes:
|
||||||
|
- match:
|
||||||
|
service: gateway
|
||||||
|
receiver: scale-up
|
||||||
|
severity: major
|
||||||
|
|
||||||
|
|
||||||
|
# Inhibition rules allow to mute a set of alerts given that another alert is
|
||||||
|
# firing.
|
||||||
|
# We use this to mute any warning-level notifications if the same alert is
|
||||||
|
# already critical.
|
||||||
|
inhibit_rules:
|
||||||
|
- source_match:
|
||||||
|
severity: 'critical'
|
||||||
|
target_match:
|
||||||
|
severity: 'warning'
|
||||||
|
# Apply inhibition if the alertname is the same.
|
||||||
|
equal: ['alertname', 'cluster', 'service']
|
||||||
|
|
||||||
|
receivers:
|
||||||
|
- name: 'scale-up'
|
||||||
|
webhook_configs:
|
||||||
|
- url: http://gateway.default:8080/system/alert
|
||||||
|
send_resolved: true
|
||||||
|
|
40
prometheus/k8s.prometheus.yml
Normal file
40
prometheus/k8s.prometheus.yml
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
# my global config
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||||
|
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
|
||||||
|
# scrape_timeout is set to the global default (10s).
|
||||||
|
|
||||||
|
# Attach these labels to any time series or alerts when communicating with
|
||||||
|
# external systems (federation, remote storage, Alertmanager).
|
||||||
|
external_labels:
|
||||||
|
monitor: 'faas-monitor'
|
||||||
|
|
||||||
|
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||||
|
rule_files:
|
||||||
|
- 'alert.rules'
|
||||||
|
|
||||||
|
|
||||||
|
# A scrape configuration containing exactly one endpoint to scrape:
|
||||||
|
# Here it's Prometheus itself.
|
||||||
|
scrape_configs:
|
||||||
|
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 5s
|
||||||
|
|
||||||
|
# metrics_path defaults to '/metrics'
|
||||||
|
# scheme defaults to 'http'.
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9090']
|
||||||
|
|
||||||
|
- job_name: "gateway"
|
||||||
|
scrape_interval: 5s
|
||||||
|
dns_sd_configs:
|
||||||
|
- names: ['gateway.default']
|
||||||
|
port: 8080
|
||||||
|
type: A
|
||||||
|
refresh_interval: 5s
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user