mirror of
https://github.com/openfaas/faas.git
synced 2025-06-08 16:26:47 +00:00
Add HTTP status code to histogram
The histogram for gateway_functions_seconds excluded the status code that gives important information for setting up SLOs. Fixes: #1725 Signed-off-by: Alex Ellis (OpenFaaS Ltd) <alex@openfaas.com>
This commit is contained in:
parent
96cfdee085
commit
1d10e89aa2
@ -1,33 +0,0 @@
|
||||
// Copyright (c) Alex Ellis 2017. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/openfaas/faas/gateway/metrics"
|
||||
"github.com/openfaas/faas/gateway/requests"
|
||||
)
|
||||
|
||||
// MakeAsyncReport makes a handler for asynchronous invocations to report back into.
|
||||
func MakeAsyncReport(metrics metrics.MetricOptions) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
defer r.Body.Close()
|
||||
|
||||
report := requests.AsyncReport{}
|
||||
bytesOut, _ := ioutil.ReadAll(r.Body)
|
||||
json.Unmarshal(bytesOut, &report)
|
||||
|
||||
trackInvocation(report.FunctionName, metrics, report.StatusCode)
|
||||
|
||||
var taken time.Duration
|
||||
taken = time.Duration(report.TimeTaken)
|
||||
trackTimeExact(taken, metrics, report.FunctionName)
|
||||
|
||||
w.WriteHeader(http.StatusAccepted)
|
||||
}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
// Copyright (c) Alex Ellis 2017. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/openfaas/faas/gateway/metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
func trackInvocation(service string, metrics metrics.MetricOptions, code int) {
|
||||
metrics.GatewayFunctionInvocation.With(
|
||||
prometheus.Labels{"function_name": service,
|
||||
"code": strconv.Itoa(code)}).Inc()
|
||||
}
|
||||
|
||||
func trackTime(then time.Time, metrics metrics.MetricOptions, name string) {
|
||||
since := time.Since(then)
|
||||
metrics.GatewayFunctionsHistogram.
|
||||
WithLabelValues(name).
|
||||
Observe(since.Seconds())
|
||||
}
|
||||
|
||||
func trackTimeExact(duration time.Duration, metrics metrics.MetricOptions, name string) {
|
||||
metrics.GatewayFunctionsHistogram.
|
||||
WithLabelValues(name).
|
||||
Observe(float64(duration))
|
||||
}
|
@ -56,16 +56,17 @@ func (p PrometheusFunctionNotifier) Notify(method string, URL string, originalUR
|
||||
}
|
||||
}
|
||||
|
||||
code := strconv.Itoa(statusCode)
|
||||
labels := prometheus.Labels{"function_name": serviceName, "code": code}
|
||||
|
||||
if event == "completed" {
|
||||
seconds := duration.Seconds()
|
||||
p.Metrics.GatewayFunctionsHistogram.
|
||||
WithLabelValues(serviceName).
|
||||
With(labels).
|
||||
Observe(seconds)
|
||||
|
||||
code := strconv.Itoa(statusCode)
|
||||
|
||||
p.Metrics.GatewayFunctionInvocation.
|
||||
With(prometheus.Labels{"function_name": serviceName, "code": code}).
|
||||
With(labels).
|
||||
Inc()
|
||||
} else if event == "started" {
|
||||
p.Metrics.GatewayFunctionInvocationStarted.WithLabelValues(serviceName).Inc()
|
||||
|
@ -175,11 +175,6 @@ func main() {
|
||||
handlers.MakeCallIDMiddleware(handlers.MakeQueuedProxy(metricsOptions, natsQueue, trimURLTransformer, config.Namespace, functionQuery)),
|
||||
forwardingNotifiers,
|
||||
)
|
||||
|
||||
faasHandlers.AsyncReport = handlers.MakeNotifierWrapper(
|
||||
handlers.MakeAsyncReport(metricsOptions),
|
||||
forwardingNotifiers,
|
||||
)
|
||||
}
|
||||
|
||||
prometheusQuery := metrics.NewPrometheusQuery(config.PrometheusHost, config.PrometheusPort, &http.Client{})
|
||||
|
@ -48,7 +48,7 @@ func Test_Describe_DescribesThePrometheusMetrics(t *testing.T) {
|
||||
}
|
||||
|
||||
d = <-ch
|
||||
expectedGatewayFunctionsHistogramDesc := `Desc{fqName: "gateway_functions_seconds", help: "Function time taken", constLabels: {}, variableLabels: [function_name]}`
|
||||
expectedGatewayFunctionsHistogramDesc := `Desc{fqName: "gateway_functions_seconds", help: "Function time taken", constLabels: {}, variableLabels: [function_name code]}`
|
||||
actualGatewayFunctionsHistogramDesc := d.String()
|
||||
if expectedGatewayFunctionsHistogramDesc != actualGatewayFunctionsHistogramDesc {
|
||||
t.Errorf("Want\n%s\ngot\n%s", expectedGatewayFunctionsHistogramDesc, actualGatewayFunctionsHistogramDesc)
|
||||
|
@ -50,7 +50,7 @@ func BuildMetricsOptions() MetricOptions {
|
||||
gatewayFunctionsHistogram := prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Name: "gateway_functions_seconds",
|
||||
Help: "Function time taken",
|
||||
}, []string{"function_name"})
|
||||
}, []string{"function_name", "code"})
|
||||
|
||||
gatewayFunctionInvocation := prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
|
@ -32,7 +32,7 @@ type ExternalServiceQuery struct {
|
||||
|
||||
// NewExternalServiceQuery proxies service queries to external plugin via HTTP
|
||||
func NewExternalServiceQuery(externalURL url.URL, authInjector middleware.AuthInjector) scaling.ServiceQuery {
|
||||
timeout := 3 * time.Second
|
||||
timeout := 5 * time.Second
|
||||
|
||||
proxyClient := http.Client{
|
||||
Transport: &http.Transport{
|
||||
@ -82,23 +82,33 @@ func (s ExternalServiceQuery) GetReplicas(serviceName, serviceNamespace string)
|
||||
|
||||
res, err := s.ProxyClient.Do(req)
|
||||
if err != nil {
|
||||
log.Println(urlPath, err)
|
||||
log.Printf("Unable to connect to %s, error: %s", urlPath, err)
|
||||
} else {
|
||||
|
||||
var body []byte
|
||||
if res.Body != nil {
|
||||
defer res.Body.Close()
|
||||
body, _ = ioutil.ReadAll(res.Body)
|
||||
}
|
||||
|
||||
if res.StatusCode == http.StatusOK {
|
||||
bytesOut, _ := ioutil.ReadAll(res.Body)
|
||||
err = json.Unmarshal(bytesOut, &function)
|
||||
err = json.Unmarshal(body, &function)
|
||||
if err != nil {
|
||||
log.Println(urlPath, err)
|
||||
log.Printf("Unable to unmarshal %s, error: %s", string(body), err)
|
||||
}
|
||||
log.Printf("GetReplicas [%s.%s] took: %fs", serviceName, serviceNamespace, time.Since(start).Seconds())
|
||||
|
||||
log.Printf("GetReplicas [%s.%s] took: %fs",
|
||||
serviceName,
|
||||
serviceNamespace,
|
||||
time.Since(start).Seconds())
|
||||
|
||||
} else {
|
||||
log.Printf("GetReplicas [%s.%s] took: %fs, code: %d\n", serviceName, serviceNamespace, time.Since(start).Seconds(), res.StatusCode)
|
||||
log.Printf("GetReplicas [%s.%s] took: %fs, code: %d",
|
||||
serviceName,
|
||||
serviceNamespace,
|
||||
time.Since(start).Seconds(),
|
||||
res.StatusCode)
|
||||
|
||||
return emptyServiceQueryResponse, fmt.Errorf("server returned non-200 status code (%d) for function, %s", res.StatusCode, serviceName)
|
||||
}
|
||||
}
|
||||
@ -118,15 +128,13 @@ func (s ExternalServiceQuery) GetReplicas(serviceName, serviceNamespace string)
|
||||
extractedScalingFactor := extractLabelValue(labels[scaling.ScalingFactorLabel], scalingFactor)
|
||||
targetLoad = extractLabelValue(labels[scaling.TargetLoadLabel], targetLoad)
|
||||
|
||||
if extractedScalingFactor >= 0 && extractedScalingFactor <= 100 {
|
||||
if extractedScalingFactor > 0 && extractedScalingFactor <= 100 {
|
||||
scalingFactor = extractedScalingFactor
|
||||
} else {
|
||||
log.Printf("Bad Scaling Factor: %d, is not in range of [0 - 100]. Will fallback to %d", extractedScalingFactor, scalingFactor)
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("GetReplicas [%s.%s] took: %fs", serviceName, serviceNamespace, time.Since(start).Seconds())
|
||||
|
||||
return scaling.ServiceQueryResponse{
|
||||
Replicas: function.Replicas,
|
||||
MaxReplicas: maxReplicas,
|
||||
|
Loading…
x
Reference in New Issue
Block a user