Add K8s specific files

2025-06-14 11:16:47 +00:00 · 2017-07-25 20:44:14 +01:00 · 2017-07-25 20:44:14 +01:00 · 518d2f41a0
commit 518d2f41a0
parent ddc973c341
5 changed files with 133 additions and 0 deletions
--- a/prometheus/Dockerfile.alertmanager.k8s
+++ b/prometheus/Dockerfile.alertmanager.k8s
@ -0,0 +1,3 @@
 FROM prom/alertmanager:v0.7.1
 COPY k8s.alertmanager.yml /alertmanager.yml
--- a/prometheus/Dockerfile.prometheus.k8s
+++ b/prometheus/Dockerfile.prometheus.k8s
@ -0,0 +1,5 @@
 FROM prom/prometheus:v1.5.2
 COPY k8s.prometheus.yml /etc/prometheus/prometheus.yml
 COPY k8s.alert.rules /etc/prometheus/alert.rules
--- a/prometheus/k8s.alert.rules
+++ b/prometheus/k8s.alert.rules
@ -0,0 +1,16 @@
 ALERT service_down
  IF up == 0
 ALERT APIHighInvocationRate
  IF sum ( rate(gateway_function_invocation_total{code="200"}[10s]) ) by (function_name) > 5 
  FOR 5s
  LABELS {
    service = "gateway",
    severity = "major",
    value = "{{$value}}"
  }
  ANNOTATIONS {
    summary = "High invocation total on {{ $labels.instance }}",
    description =  "High invocation total on {{ $labels.instance }}"
  } 
--- a/prometheus/k8s.alertmanager.yml
+++ b/prometheus/k8s.alertmanager.yml
@ -0,0 +1,69 @@
 global:
  # The smarthost and SMTP sender used for mail notifications.
  smtp_smarthost: 'localhost:25'
  smtp_from: 'alertmanager@example.org'
  smtp_auth_username: 'alertmanager'
  smtp_auth_password: 'password'
  # The auth token for Hipchat.
  hipchat_auth_token: '1234556789'
  # Alternative host for Hipchat.
  hipchat_url: 'https://hipchat.foobar.org/'
 # The directory from which notification templates are read.
 templates: 
 - '/etc/alertmanager/template/*.tmpl'
 # The root route on which each incoming alert enters.
 route:
  # The labels by which incoming alerts are grouped together. For example,
  # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
  # be batched into a single group.
  group_by: ['alertname', 'cluster', 'service']
  # When a new group of alerts is created by an incoming alert, wait at
  # least 'group_wait' to send the initial notification.
  # This way ensures that you get multiple alerts for the same group that start
  # firing shortly after another are batched together on the first 
  # notification.
  group_wait: 5s
  # When the first notification was sent, wait 'group_interval' to send a batch
  # of new alerts that started firing for that group.
  group_interval: 10s
  # If an alert has successfully been sent, wait 'repeat_interval' to
  # resend them.
  repeat_interval: 30s 
  # A default receiver
  receiver: scale-up
  # All the above attributes are inherited by all child routes and can 
  # overwritten on each.
  # The child route trees.
  routes:
  - match:
      service: gateway
      receiver: scale-up
      severity: major
 # Inhibition rules allow to mute a set of alerts given that another alert is
 # firing.
 # We use this to mute any warning-level notifications if the same alert is 
 # already critical.
 inhibit_rules:
 - source_match:
    severity: 'critical'
  target_match:
    severity: 'warning'
  # Apply inhibition if the alertname is the same.
  equal: ['alertname', 'cluster', 'service']
 receivers:
 - name: 'scale-up'
  webhook_configs:
    - url: http://gateway.default:8080/system/alert
      send_resolved: true
--- a/prometheus/k8s.prometheus.yml
+++ b/prometheus/k8s.prometheus.yml
@ -0,0 +1,40 @@
 # my global config
 global:
  scrape_interval:     15s # By default, scrape targets every 15 seconds.
  evaluation_interval: 15s # By default, scrape targets every 15 seconds.
  # scrape_timeout is set to the global default (10s).
  # Attach these labels to any time series or alerts when communicating with
  # external systems (federation, remote storage, Alertmanager).
  external_labels:
      monitor: 'faas-monitor'
 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
 rule_files:
    - 'alert.rules'
 # A scrape configuration containing exactly one endpoint to scrape:
 # Here it's Prometheus itself.
 scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'
    # Override the global default and scrape targets from this job every 5 seconds.
    scrape_interval: 5s
    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.
    static_configs:
      - targets: ['localhost:9090']
  - job_name: "gateway"
    scrape_interval: 5s
    dns_sd_configs:
      - names: ['gateway.default']
        port: 8080
        type: A
        refresh_interval: 5s
		`@ -0,0 +1,3 @@`
							`FROM prom/alertmanager:v0.7.1`

							`COPY k8s.alertmanager.yml /alertmanager.yml`