Upgrade prom and alert manager to 2.7.1 and 0.16.1

**What** - Removes the `alert` label in the scale-up alert - Updates the annotaitons to use the `function_name` label instead of the `instance` label that was removed. - Per prometheus/prometheus#4836 and the related mailing list discussion https://groups.google.com/d/msg/prometheus-users/7Ul6ngc7Ogs/j_YDszV5BwAJ the alert value should not be included in the alert labels otherwise each calculation of the alert is treated like a new alert and then the use of `for 5s` will not behave as expected. - Ports the resoltuion openfaas/faas-netes#372 Signed-off-by: Lucas Roesler <roesler.lucas@gmail.com>
2025-06-08 16:26:47 +00:00 · 2019-02-21 10:01:40 +01:00 · 2019-02-21 10:01:40 +01:00 · 3bcc10a07e
commit 3bcc10a07e
parent f2ac7b906c
3 changed files with 40 additions and 43 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -8,17 +8,17 @@ services:
            - functions
        environment:
            functions_provider_url: "http://faas-swarm:8080/"
-            read_timeout:  "5m5s"           # Maximum time to read HTTP request
-            write_timeout: "5m5s"           # Maximum time to write HTTP response
-            upstream_timeout: "5m"          # Maximum duration of upstream function call - should be more than read_timeout and write_timeout
-            dnsrr: "true"                   # Temporarily use dnsrr in place of VIP while issue persists on PWD
+            read_timeout: "5m5s" # Maximum time to read HTTP request
+            write_timeout: "5m5s" # Maximum time to write HTTP response
+            upstream_timeout: "5m" # Maximum duration of upstream function call - should be more than read_timeout and write_timeout
+            dnsrr: "true" # Temporarily use dnsrr in place of VIP while issue persists on PWD
            faas_nats_address: "nats"
            faas_nats_port: 4222
-            direct_functions: "true"        # Functions are invoked directly over the overlay network
+            direct_functions: "true" # Functions are invoked directly over the overlay network
            direct_functions_suffix: ""
            basic_auth: "${BASIC_AUTH:-true}"
            secret_mount_path: "/run/secrets/"
-            scale_from_zero: "true"         # Enable if you want functions to scale from 0/0 to min replica count upon invoke
+            scale_from_zero: "true" # Enable if you want functions to scale from 0/0 to min replica count upon invoke
            max_idle_conns: 1024
            max_idle_conns_per_host: 1024
        deploy:
@ -34,7 +34,7 @@ services:
                window: 380s
            placement:
                constraints:
-                    - 'node.platform.os == linux'
+                    - "node.platform.os == linux"
        secrets:
            - basic-auth-user
            - basic-auth-password
@ -43,20 +43,20 @@ services:
    faas-swarm:
        volumes:
            - "/var/run/docker.sock:/var/run/docker.sock"
-        image:  openfaas/faas-swarm:0.6.1
+        image: openfaas/faas-swarm:0.6.1
        networks:
            - functions
        environment:
-            read_timeout:  "5m5s"       # set both here, and on your functions
-            write_timeout: "5m5s"       # set both here, and on your functions
+            read_timeout: "5m5s" # set both here, and on your functions
+            write_timeout: "5m5s" # set both here, and on your functions
            DOCKER_API_VERSION: "1.30"
            basic_auth: "${BASIC_AUTH:-true}"
            secret_mount_path: "/run/secrets/"
        deploy:
            placement:
                constraints:
-                    - 'node.role == manager'
-                    - 'node.platform.os == linux'
+                    - "node.role == manager"
+                    - "node.platform.os == linux"
            resources:
                # limits:   # Enable if you want to limit memory usage
                #     memory: 100M
@ -89,7 +89,7 @@ services:
                    memory: 50M
            placement:
                constraints:
-                    - 'node.platform.os == linux'
+                    - "node.platform.os == linux"

    queue-worker:
        image: openfaas/queue-worker:0.7.0
@ -97,7 +97,7 @@ services:
            - functions
        environment:
            max_inflight: "1"
-            ack_wait: "5m5s"    # Max duration of any async task / request
+            ack_wait: "5m5s" # Max duration of any async task / request
            basic_auth: "${BASIC_AUTH:-true}"
            secret_mount_path: "/run/secrets/"
        deploy:
@ -113,7 +113,7 @@ services:
                window: 380s
            placement:
                constraints:
-                    - 'node.platform.os == linux'
+                    - "node.platform.os == linux"
        secrets:
            - basic-auth-user
            - basic-auth-password
@ -127,12 +127,12 @@ services:
        environment:
            no_proxy: "gateway"
        configs:
-          - source: prometheus_config
-            target: /etc/prometheus/prometheus.yml
-          - source: prometheus_rules
-            target: /etc/prometheus/alert.rules.yml
+            - source: prometheus_config
+              target: /etc/prometheus/prometheus.yml
+            - source: prometheus_rules
+              target: /etc/prometheus/alert.rules.yml
        command:
-          - '--config.file=/etc/prometheus/prometheus.yml'
+            - "--config.file=/etc/prometheus/prometheus.yml"
        #   - '-storage.local.path=/prometheus'
        ports:
            - 9090:9090
@ -141,8 +141,8 @@ services:
        deploy:
            placement:
                constraints:
-                    - 'node.role == manager'
-                    - 'node.platform.os == linux'
+                    - "node.role == manager"
+                    - "node.platform.os == linux"
            resources:
                limits:
                    memory: 500M
@ -150,12 +150,12 @@ services:
                    memory: 200M

    alertmanager:
-        image: prom/alertmanager:v0.15.0
+        image: prom/alertmanager:v0.16.1
        environment:
            no_proxy: "gateway"
        command:
-            - '--config.file=/alertmanager.yml'
-            - '--storage.path=/alertmanager'
+            - "--config.file=/alertmanager.yml"
+            - "--storage.path=/alertmanager"
        networks:
            - functions
        # Uncomment the following port mapping if you wish to expose the Prometheus
@ -170,29 +170,28 @@ services:
                    memory: 20M
            placement:
                constraints:
-                    - 'node.role == manager'
-                    - 'node.platform.os == linux'
+                    - "node.role == manager"
+                    - "node.platform.os == linux"
        configs:
            - source: alertmanager_config
              target: /alertmanager.yml
        secrets:
            - basic-auth-password

-
 configs:
-     prometheus_config:
-         file: ./prometheus/prometheus.yml
-     prometheus_rules:
-         file: ./prometheus/alert.rules.yml
-     alertmanager_config:
-         file: ./prometheus/alertmanager.yml
+    prometheus_config:
+        file: ./prometheus/prometheus.yml
+    prometheus_rules:
+        file: ./prometheus/alert.rules.yml
+    alertmanager_config:
+        file: ./prometheus/alertmanager.yml

 networks:
    functions:
        driver: overlay
        attachable: true
        labels:
-          - "openfaas=true"
+            - "openfaas=true"

 secrets:
    basic-auth-user:
--- a/prometheus/alert.rules.yml
+++ b/prometheus/alert.rules.yml
@ -9,7 +9,6 @@ groups:
    labels:
      service: gateway
      severity: major
-      value: '{{$value}}'
    annotations:
-      description: High invocation total on {{ $labels.instance }}
-      summary: High invocation total on {{ $labels.instance }}
+      description: High invocation total on {{ $labels.function_name }}
+      summary: High invocation total on {{ $labels.function_name }}
--- a/prometheus/k8s.alert.rules
+++ b/prometheus/k8s.alert.rules
@ -2,15 +2,14 @@ ALERT service_down
  IF up == 0

 ALERT APIHighInvocationRate
-  IF sum ( rate(gateway_function_invocation_total{code="200"}[10s]) ) by (function_name) > 5 
+  IF sum ( rate(gateway_function_invocation_total{code="200"}[10s]) ) by (function_name) > 5
  FOR 5s
  LABELS {
    service = "gateway",
    severity = "major",
-    value = "{{$value}}"
  }
  ANNOTATIONS {
-    summary = "High invocation total on {{ $labels.instance }}",
-    description =  "High invocation total on {{ $labels.instance }}"
-  } 
+    summary = "High invocation total on {{ $labels.function_name }}",
+    description =  "High invocation total on {{ $labels.function_name }}"
+  }