Add HTTP status code to histogram

The histogram for gateway_functions_seconds excluded the status
code that gives important information for setting up SLOs.

Fixes: #1725

Signed-off-by: Alex Ellis (OpenFaaS Ltd) <alex@openfaas.com>
This commit is contained in:
Alex Ellis (OpenFaaS Ltd) 2022-06-01 09:33:36 +01:00 committed by Alex Ellis
parent 96cfdee085
commit cc2f38938e
7 changed files with 25 additions and 85 deletions

View File

@ -1,33 +0,0 @@
// Copyright (c) Alex Ellis 2017. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
package handlers
import (
"encoding/json"
"io/ioutil"
"net/http"
"time"
"github.com/openfaas/faas/gateway/metrics"
"github.com/openfaas/faas/gateway/requests"
)
// MakeAsyncReport makes a handler for asynchronous invocations to report back into.
func MakeAsyncReport(metrics metrics.MetricOptions) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
defer r.Body.Close()
report := requests.AsyncReport{}
bytesOut, _ := ioutil.ReadAll(r.Body)
json.Unmarshal(bytesOut, &report)
trackInvocation(report.FunctionName, metrics, report.StatusCode)
var taken time.Duration
taken = time.Duration(report.TimeTaken)
trackTimeExact(taken, metrics, report.FunctionName)
w.WriteHeader(http.StatusAccepted)
}
}

View File

@ -1,31 +0,0 @@
// Copyright (c) Alex Ellis 2017. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
package handlers
import (
"strconv"
"time"
"github.com/openfaas/faas/gateway/metrics"
"github.com/prometheus/client_golang/prometheus"
)
func trackInvocation(service string, metrics metrics.MetricOptions, code int) {
metrics.GatewayFunctionInvocation.With(
prometheus.Labels{"function_name": service,
"code": strconv.Itoa(code)}).Inc()
}
func trackTime(then time.Time, metrics metrics.MetricOptions, name string) {
since := time.Since(then)
metrics.GatewayFunctionsHistogram.
WithLabelValues(name).
Observe(since.Seconds())
}
func trackTimeExact(duration time.Duration, metrics metrics.MetricOptions, name string) {
metrics.GatewayFunctionsHistogram.
WithLabelValues(name).
Observe(float64(duration))
}

View File

@ -56,16 +56,17 @@ func (p PrometheusFunctionNotifier) Notify(method string, URL string, originalUR
}
}
code := strconv.Itoa(statusCode)
labels := prometheus.Labels{"function_name": serviceName, "code": code}
if event == "completed" {
seconds := duration.Seconds()
p.Metrics.GatewayFunctionsHistogram.
WithLabelValues(serviceName).
With(labels).
Observe(seconds)
code := strconv.Itoa(statusCode)
p.Metrics.GatewayFunctionInvocation.
With(prometheus.Labels{"function_name": serviceName, "code": code}).
With(labels).
Inc()
} else if event == "started" {
p.Metrics.GatewayFunctionInvocationStarted.WithLabelValues(serviceName).Inc()

View File

@ -175,11 +175,6 @@ func main() {
handlers.MakeCallIDMiddleware(handlers.MakeQueuedProxy(metricsOptions, natsQueue, trimURLTransformer, config.Namespace, functionQuery)),
forwardingNotifiers,
)
faasHandlers.AsyncReport = handlers.MakeNotifierWrapper(
handlers.MakeAsyncReport(metricsOptions),
forwardingNotifiers,
)
}
prometheusQuery := metrics.NewPrometheusQuery(config.PrometheusHost, config.PrometheusPort, &http.Client{})

View File

@ -48,7 +48,7 @@ func Test_Describe_DescribesThePrometheusMetrics(t *testing.T) {
}
d = <-ch
expectedGatewayFunctionsHistogramDesc := `Desc{fqName: "gateway_functions_seconds", help: "Function time taken", constLabels: {}, variableLabels: [function_name]}`
expectedGatewayFunctionsHistogramDesc := `Desc{fqName: "gateway_functions_seconds", help: "Function time taken", constLabels: {}, variableLabels: [function_name code]}`
actualGatewayFunctionsHistogramDesc := d.String()
if expectedGatewayFunctionsHistogramDesc != actualGatewayFunctionsHistogramDesc {
t.Errorf("Want\n%s\ngot\n%s", expectedGatewayFunctionsHistogramDesc, actualGatewayFunctionsHistogramDesc)

View File

@ -50,7 +50,7 @@ func BuildMetricsOptions() MetricOptions {
gatewayFunctionsHistogram := prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "gateway_functions_seconds",
Help: "Function time taken",
}, []string{"function_name"})
}, []string{"function_name", "code"})
gatewayFunctionInvocation := prometheus.NewCounterVec(
prometheus.CounterOpts{

View File

@ -32,7 +32,7 @@ type ExternalServiceQuery struct {
// NewExternalServiceQuery proxies service queries to external plugin via HTTP
func NewExternalServiceQuery(externalURL url.URL, authInjector middleware.AuthInjector) scaling.ServiceQuery {
timeout := 3 * time.Second
timeout := 5 * time.Second
proxyClient := http.Client{
Transport: &http.Transport{
@ -82,23 +82,33 @@ func (s ExternalServiceQuery) GetReplicas(serviceName, serviceNamespace string)
res, err := s.ProxyClient.Do(req)
if err != nil {
log.Println(urlPath, err)
log.Printf("Unable to connect to %s, error: %s", urlPath, err)
} else {
var body []byte
if res.Body != nil {
defer res.Body.Close()
body, _ = ioutil.ReadAll(res.Body)
}
if res.StatusCode == http.StatusOK {
bytesOut, _ := ioutil.ReadAll(res.Body)
err = json.Unmarshal(bytesOut, &function)
err = json.Unmarshal(body, &function)
if err != nil {
log.Println(urlPath, err)
log.Printf("Unable to unmarshal %s, error: %s", string(body), err)
}
log.Printf("GetReplicas [%s.%s] took: %fs", serviceName, serviceNamespace, time.Since(start).Seconds())
log.Printf("GetReplicas [%s.%s] took: %fs",
serviceName,
serviceNamespace,
time.Since(start).Seconds())
} else {
log.Printf("GetReplicas [%s.%s] took: %fs, code: %d\n", serviceName, serviceNamespace, time.Since(start).Seconds(), res.StatusCode)
log.Printf("GetReplicas [%s.%s] took: %fs, code: %d",
serviceName,
serviceNamespace,
time.Since(start).Seconds(),
res.StatusCode)
return emptyServiceQueryResponse, fmt.Errorf("server returned non-200 status code (%d) for function, %s", res.StatusCode, serviceName)
}
}
@ -118,15 +128,13 @@ func (s ExternalServiceQuery) GetReplicas(serviceName, serviceNamespace string)
extractedScalingFactor := extractLabelValue(labels[scaling.ScalingFactorLabel], scalingFactor)
targetLoad = extractLabelValue(labels[scaling.TargetLoadLabel], targetLoad)
if extractedScalingFactor >= 0 && extractedScalingFactor <= 100 {
if extractedScalingFactor > 0 && extractedScalingFactor <= 100 {
scalingFactor = extractedScalingFactor
} else {
log.Printf("Bad Scaling Factor: %d, is not in range of [0 - 100]. Will fallback to %d", extractedScalingFactor, scalingFactor)
}
}
log.Printf("GetReplicas [%s.%s] took: %fs", serviceName, serviceNamespace, time.Since(start).Seconds())
return scaling.ServiceQueryResponse{
Replicas: function.Replicas,
MaxReplicas: maxReplicas,