mirror of
https://github.com/openfaas/faas.git
synced 2025-06-08 16:26:47 +00:00
Upgrade prom and alert manager to 2.7.1 and 0.16.1
**What** - Removes the `alert` label in the scale-up alert - Updates the annotaitons to use the `function_name` label instead of the `instance` label that was removed. - Per prometheus/prometheus#4836 and the related mailing list discussion https://groups.google.com/d/msg/prometheus-users/7Ul6ngc7Ogs/j_YDszV5BwAJ the alert value should not be included in the alert labels otherwise each calculation of the alert is treated like a new alert and then the use of `for 5s` will not behave as expected. - Ports the resoltuion openfaas/faas-netes#372 Signed-off-by: Lucas Roesler <roesler.lucas@gmail.com>
This commit is contained in:
parent
f2ac7b906c
commit
3bcc10a07e
@ -8,17 +8,17 @@ services:
|
||||
- functions
|
||||
environment:
|
||||
functions_provider_url: "http://faas-swarm:8080/"
|
||||
read_timeout: "5m5s" # Maximum time to read HTTP request
|
||||
write_timeout: "5m5s" # Maximum time to write HTTP response
|
||||
upstream_timeout: "5m" # Maximum duration of upstream function call - should be more than read_timeout and write_timeout
|
||||
dnsrr: "true" # Temporarily use dnsrr in place of VIP while issue persists on PWD
|
||||
read_timeout: "5m5s" # Maximum time to read HTTP request
|
||||
write_timeout: "5m5s" # Maximum time to write HTTP response
|
||||
upstream_timeout: "5m" # Maximum duration of upstream function call - should be more than read_timeout and write_timeout
|
||||
dnsrr: "true" # Temporarily use dnsrr in place of VIP while issue persists on PWD
|
||||
faas_nats_address: "nats"
|
||||
faas_nats_port: 4222
|
||||
direct_functions: "true" # Functions are invoked directly over the overlay network
|
||||
direct_functions: "true" # Functions are invoked directly over the overlay network
|
||||
direct_functions_suffix: ""
|
||||
basic_auth: "${BASIC_AUTH:-true}"
|
||||
secret_mount_path: "/run/secrets/"
|
||||
scale_from_zero: "true" # Enable if you want functions to scale from 0/0 to min replica count upon invoke
|
||||
scale_from_zero: "true" # Enable if you want functions to scale from 0/0 to min replica count upon invoke
|
||||
max_idle_conns: 1024
|
||||
max_idle_conns_per_host: 1024
|
||||
deploy:
|
||||
@ -34,7 +34,7 @@ services:
|
||||
window: 380s
|
||||
placement:
|
||||
constraints:
|
||||
- 'node.platform.os == linux'
|
||||
- "node.platform.os == linux"
|
||||
secrets:
|
||||
- basic-auth-user
|
||||
- basic-auth-password
|
||||
@ -43,20 +43,20 @@ services:
|
||||
faas-swarm:
|
||||
volumes:
|
||||
- "/var/run/docker.sock:/var/run/docker.sock"
|
||||
image: openfaas/faas-swarm:0.6.1
|
||||
image: openfaas/faas-swarm:0.6.1
|
||||
networks:
|
||||
- functions
|
||||
environment:
|
||||
read_timeout: "5m5s" # set both here, and on your functions
|
||||
write_timeout: "5m5s" # set both here, and on your functions
|
||||
read_timeout: "5m5s" # set both here, and on your functions
|
||||
write_timeout: "5m5s" # set both here, and on your functions
|
||||
DOCKER_API_VERSION: "1.30"
|
||||
basic_auth: "${BASIC_AUTH:-true}"
|
||||
secret_mount_path: "/run/secrets/"
|
||||
deploy:
|
||||
placement:
|
||||
constraints:
|
||||
- 'node.role == manager'
|
||||
- 'node.platform.os == linux'
|
||||
- "node.role == manager"
|
||||
- "node.platform.os == linux"
|
||||
resources:
|
||||
# limits: # Enable if you want to limit memory usage
|
||||
# memory: 100M
|
||||
@ -89,7 +89,7 @@ services:
|
||||
memory: 50M
|
||||
placement:
|
||||
constraints:
|
||||
- 'node.platform.os == linux'
|
||||
- "node.platform.os == linux"
|
||||
|
||||
queue-worker:
|
||||
image: openfaas/queue-worker:0.7.0
|
||||
@ -97,7 +97,7 @@ services:
|
||||
- functions
|
||||
environment:
|
||||
max_inflight: "1"
|
||||
ack_wait: "5m5s" # Max duration of any async task / request
|
||||
ack_wait: "5m5s" # Max duration of any async task / request
|
||||
basic_auth: "${BASIC_AUTH:-true}"
|
||||
secret_mount_path: "/run/secrets/"
|
||||
deploy:
|
||||
@ -113,7 +113,7 @@ services:
|
||||
window: 380s
|
||||
placement:
|
||||
constraints:
|
||||
- 'node.platform.os == linux'
|
||||
- "node.platform.os == linux"
|
||||
secrets:
|
||||
- basic-auth-user
|
||||
- basic-auth-password
|
||||
@ -127,12 +127,12 @@ services:
|
||||
environment:
|
||||
no_proxy: "gateway"
|
||||
configs:
|
||||
- source: prometheus_config
|
||||
target: /etc/prometheus/prometheus.yml
|
||||
- source: prometheus_rules
|
||||
target: /etc/prometheus/alert.rules.yml
|
||||
- source: prometheus_config
|
||||
target: /etc/prometheus/prometheus.yml
|
||||
- source: prometheus_rules
|
||||
target: /etc/prometheus/alert.rules.yml
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- "--config.file=/etc/prometheus/prometheus.yml"
|
||||
# - '-storage.local.path=/prometheus'
|
||||
ports:
|
||||
- 9090:9090
|
||||
@ -141,8 +141,8 @@ services:
|
||||
deploy:
|
||||
placement:
|
||||
constraints:
|
||||
- 'node.role == manager'
|
||||
- 'node.platform.os == linux'
|
||||
- "node.role == manager"
|
||||
- "node.platform.os == linux"
|
||||
resources:
|
||||
limits:
|
||||
memory: 500M
|
||||
@ -150,12 +150,12 @@ services:
|
||||
memory: 200M
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.15.0
|
||||
image: prom/alertmanager:v0.16.1
|
||||
environment:
|
||||
no_proxy: "gateway"
|
||||
command:
|
||||
- '--config.file=/alertmanager.yml'
|
||||
- '--storage.path=/alertmanager'
|
||||
- "--config.file=/alertmanager.yml"
|
||||
- "--storage.path=/alertmanager"
|
||||
networks:
|
||||
- functions
|
||||
# Uncomment the following port mapping if you wish to expose the Prometheus
|
||||
@ -170,29 +170,28 @@ services:
|
||||
memory: 20M
|
||||
placement:
|
||||
constraints:
|
||||
- 'node.role == manager'
|
||||
- 'node.platform.os == linux'
|
||||
- "node.role == manager"
|
||||
- "node.platform.os == linux"
|
||||
configs:
|
||||
- source: alertmanager_config
|
||||
target: /alertmanager.yml
|
||||
secrets:
|
||||
- basic-auth-password
|
||||
|
||||
|
||||
configs:
|
||||
prometheus_config:
|
||||
file: ./prometheus/prometheus.yml
|
||||
prometheus_rules:
|
||||
file: ./prometheus/alert.rules.yml
|
||||
alertmanager_config:
|
||||
file: ./prometheus/alertmanager.yml
|
||||
prometheus_config:
|
||||
file: ./prometheus/prometheus.yml
|
||||
prometheus_rules:
|
||||
file: ./prometheus/alert.rules.yml
|
||||
alertmanager_config:
|
||||
file: ./prometheus/alertmanager.yml
|
||||
|
||||
networks:
|
||||
functions:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
labels:
|
||||
- "openfaas=true"
|
||||
- "openfaas=true"
|
||||
|
||||
secrets:
|
||||
basic-auth-user:
|
||||
|
@ -9,7 +9,6 @@ groups:
|
||||
labels:
|
||||
service: gateway
|
||||
severity: major
|
||||
value: '{{$value}}'
|
||||
annotations:
|
||||
description: High invocation total on {{ $labels.instance }}
|
||||
summary: High invocation total on {{ $labels.instance }}
|
||||
description: High invocation total on {{ $labels.function_name }}
|
||||
summary: High invocation total on {{ $labels.function_name }}
|
||||
|
@ -2,15 +2,14 @@ ALERT service_down
|
||||
IF up == 0
|
||||
|
||||
ALERT APIHighInvocationRate
|
||||
IF sum ( rate(gateway_function_invocation_total{code="200"}[10s]) ) by (function_name) > 5
|
||||
IF sum ( rate(gateway_function_invocation_total{code="200"}[10s]) ) by (function_name) > 5
|
||||
FOR 5s
|
||||
LABELS {
|
||||
service = "gateway",
|
||||
severity = "major",
|
||||
value = "{{$value}}"
|
||||
}
|
||||
ANNOTATIONS {
|
||||
summary = "High invocation total on {{ $labels.instance }}",
|
||||
description = "High invocation total on {{ $labels.instance }}"
|
||||
}
|
||||
summary = "High invocation total on {{ $labels.function_name }}",
|
||||
description = "High invocation total on {{ $labels.function_name }}"
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user