Export new metrics for OpenFaaS Pro scaling

* Add service target metric * Add service min replicas metric * Add scale type metric These combined allow new auto-scaling modes and parameters for OpenFaaS Pro customers. Signed-off-by: Alex Ellis (OpenFaaS Ltd) <alexellis2@gmail.com>
2025-06-28 09:43:24 +00:00 · 2022-01-24 12:11:54 +00:00
parent 34735d61d0
commit d85d5e7239
23 changed files with 235 additions and 472 deletions
--- a/gateway/metrics/add_metrics.go
+++ b/gateway/metrics/add_metrics.go
@ -35,9 +35,7 @@ func AddMetricsHandler(handler http.HandlerFunc, prometheusQuery PrometheusQuery
 				recorder.Code,
 				string(upstreamBody))

-			w.Header().Set("Content-Type", "text/plain")
-			w.WriteHeader(http.StatusInternalServerError)
-			w.Write([]byte(fmt.Sprintf("List functions responded with code %d", recorder.Code)))
+			http.Error(w, "Unexpected status code retriving functions from backend", http.StatusInternalServerError)
 			return
 		}

@ -48,28 +46,33 @@ func AddMetricsHandler(handler http.HandlerFunc, prometheusQuery PrometheusQuery
 		if err != nil {
 			log.Printf("Metrics upstream error: %s", err)

-			w.Header().Set("Content-Type", "text/plain")
-			w.WriteHeader(http.StatusInternalServerError)
-			w.Write([]byte("Error parsing metrics from upstream provider/backend."))
+			http.Error(w, "Error parsing metrics from upstream provider/backend", http.StatusInternalServerError)
 			return
 		}

-		expr := url.QueryEscape(`sum(gateway_function_invocation_total{function_name=~".*", code=~".*"}) by (function_name, code)`)
-		// expr := "sum(gateway_function_invocation_total%7Bfunction_name%3D~%22.*%22%2C+code%3D~%22.*%22%7D)+by+(function_name%2C+code)"
-		results, fetchErr := prometheusQuery.Fetch(expr)
-		if fetchErr != nil {
-			log.Printf("Error querying Prometheus API: %s\n", fetchErr.Error())
-			w.Header().Set("Content-Type", "application/json")
-			w.WriteHeader(http.StatusOK)
-			w.Write(upstreamBody)
-			return
+		// Ensure values are empty first.
+		for i := range functions {
+			functions[i].InvocationCount = 0
 		}

-		mixIn(&functions, results)
+		if len(functions) > 0 {

-		bytesOut, marshalErr := json.Marshal(functions)
-		if marshalErr != nil {
-			log.Println(marshalErr)
+			ns := functions[0].Namespace
+			q := fmt.Sprintf(`sum(gateway_function_invocation_total{function_name=~".*.%s"}) by (function_name)`, ns)
+			// Restrict query results to only function names matching namespace suffix.
+
+			results, err := prometheusQuery.Fetch(url.QueryEscape(q))
+			if err != nil {
+				log.Printf("Error querying Prometheus: %s\n", err.Error())
+				return
+			}
+			mixIn(&functions, results)
+		}
+
+		bytesOut, err := json.Marshal(functions)
+		if err != nil {
+			log.Printf("Error serializing functions: %s", err)
+			http.Error(w, "error writing response after adding metrics", http.StatusInternalServerError)
 			return
 		}

@ -85,25 +88,19 @@ func mixIn(functions *[]types.FunctionStatus, metrics *VectorQueryResponse) {
 		return
 	}

-	// Ensure values are empty first.
-	for i := range *functions {
-		(*functions)[i].InvocationCount = 0
-	}
-
 	for i, function := range *functions {
 		for _, v := range metrics.Data.Result {

 			if v.Metric.FunctionName == fmt.Sprintf("%s.%s", function.Name, function.Namespace) {
 				metricValue := v.Value[1]
-				switch metricValue.(type) {
+				switch value := metricValue.(type) {
 				case string:
-					f, strconvErr := strconv.ParseFloat(metricValue.(string), 64)
-					if strconvErr != nil {
-						log.Printf("Unable to convert value for metric: %s\n", strconvErr)
+					f, err := strconv.ParseFloat(value, 64)
+					if err != nil {
+						log.Printf("add_metrics: unable to convert value %q for metric: %s", value, err)
 						continue
 					}
 					(*functions)[i].InvocationCount += f
-					break
 				}
 			}
 		}
--- a/gateway/metrics/add_metrics_test.go
+++ b/gateway/metrics/add_metrics_test.go
@ -83,7 +83,7 @@ func Test_FunctionsHandler_ReturnsJSONAndOneFunction(t *testing.T) {
 func makeFunctionsHandler() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		functions := []types.FunctionStatus{
-			types.FunctionStatus{
+			{
 				Name:      "func_echoit",
 				Replicas:  0,
 				Namespace: "openfaas-fn",
--- a/gateway/metrics/exporter.go
+++ b/gateway/metrics/exporter.go
@ -12,12 +12,14 @@ import (
 	"net/http"
 	"net/url"
 	"path"
+	"strconv"
 	"time"

 	"log"

 	"github.com/openfaas/faas-provider/auth"
 	types "github.com/openfaas/faas-provider/types"
+	"github.com/openfaas/faas/gateway/scaling"
 	"github.com/prometheus/client_golang/prometheus"
 )

@ -46,6 +48,7 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
 	e.metricOptions.GatewayFunctionsHistogram.Describe(ch)
 	e.metricOptions.ServiceReplicasGauge.Describe(ch)
 	e.metricOptions.GatewayFunctionInvocationStarted.Describe(ch)
+	e.metricOptions.ServiceTargetLoadGauge.Describe(ch)

 	e.metricOptions.ServiceMetrics.Counter.Describe(ch)
 	e.metricOptions.ServiceMetrics.Histogram.Describe(ch)
@ -59,6 +62,8 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
 	e.metricOptions.GatewayFunctionInvocationStarted.Collect(ch)

 	e.metricOptions.ServiceReplicasGauge.Reset()
+	e.metricOptions.ServiceTargetLoadGauge.Reset()
+
 	for _, service := range e.services {
 		var serviceName string
 		if len(service.Namespace) > 0 {
@ -66,12 +71,54 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
 		} else {
 			serviceName = service.Name
 		}
+
+		// Set current replica count
 		e.metricOptions.ServiceReplicasGauge.
 			WithLabelValues(serviceName).
 			Set(float64(service.Replicas))
+
+		// Set minimum replicas
+		minReplicas := scaling.DefaultMinReplicas
+		if service.Labels != nil {
+			a := *service.Labels
+			if v, ok := a[scaling.MinScaleLabel]; ok && len(v) > 0 {
+				val, _ := strconv.Atoi(v)
+				minReplicas = val
+			}
+		}
+
+		e.metricOptions.ServiceMinReplicasGauge.
+			WithLabelValues(serviceName).
+			Set(float64(minReplicas))
+
+		// Set scale type
+		scaleType := scaling.DefaultTypeScale
+		if service.Labels != nil {
+			a := *service.Labels
+			if v, ok := a[scaling.ScaleTypeLabel]; ok && len(v) > 0 {
+				scaleType = v
+			}
+		}
+
+		// Set target load
+		targetScale := scaling.DefaultTargetLoad
+		if service.Labels != nil {
+			a := *service.Labels
+			if v, ok := a[scaling.TargetLoadLabel]; ok && len(v) > 0 {
+				val, _ := strconv.Atoi(v)
+				targetScale = val
+			}
+		}
+
+		e.metricOptions.ServiceTargetLoadGauge.
+			WithLabelValues(serviceName, scaleType).
+			Set(float64(targetScale))
+
 	}

 	e.metricOptions.ServiceReplicasGauge.Collect(ch)
+	e.metricOptions.ServiceMinReplicasGauge.Collect(ch)
+	e.metricOptions.ServiceTargetLoadGauge.Collect(ch)

 	e.metricOptions.ServiceMetrics.Counter.Collect(ch)
 	e.metricOptions.ServiceMetrics.Histogram.Collect(ch)
--- a/gateway/metrics/exporter_test.go
+++ b/gateway/metrics/exporter_test.go
@ -44,21 +44,21 @@ func Test_Describe_DescribesThePrometheusMetrics(t *testing.T) {
 	expectedGatewayFunctionInvocationDesc := `Desc{fqName: "gateway_function_invocation_total", help: "Function metrics", constLabels: {}, variableLabels: [function_name code]}`
 	actualGatewayFunctionInvocationDesc := d.String()
 	if expectedGatewayFunctionInvocationDesc != actualGatewayFunctionInvocationDesc {
-		t.Errorf("Want %s, got: %s", expectedGatewayFunctionInvocationDesc, actualGatewayFunctionInvocationDesc)
+		t.Errorf("Want\n%s\ngot\n%s", expectedGatewayFunctionInvocationDesc, actualGatewayFunctionInvocationDesc)
 	}

 	d = <-ch
 	expectedGatewayFunctionsHistogramDesc := `Desc{fqName: "gateway_functions_seconds", help: "Function time taken", constLabels: {}, variableLabels: [function_name]}`
 	actualGatewayFunctionsHistogramDesc := d.String()
 	if expectedGatewayFunctionsHistogramDesc != actualGatewayFunctionsHistogramDesc {
-		t.Errorf("Want %s, got: %s", expectedGatewayFunctionsHistogramDesc, actualGatewayFunctionsHistogramDesc)
+		t.Errorf("Want\n%s\ngot\n%s", expectedGatewayFunctionsHistogramDesc, actualGatewayFunctionsHistogramDesc)
 	}

 	d = <-ch
-	expectedServiceReplicasGaugeDesc := `Desc{fqName: "gateway_service_count", help: "Service replicas", constLabels: {}, variableLabels: [function_name]}`
+	expectedServiceReplicasGaugeDesc := `Desc{fqName: "gateway_service_count", help: "Current count of replicas for function", constLabels: {}, variableLabels: [function_name]}`
 	actualServiceReplicasGaugeDesc := d.String()
 	if expectedServiceReplicasGaugeDesc != actualServiceReplicasGaugeDesc {
-		t.Errorf("Want %s, got: %s", expectedServiceReplicasGaugeDesc, actualServiceReplicasGaugeDesc)
+		t.Errorf("Want\n%s\ngot\n%s", expectedServiceReplicasGaugeDesc, actualServiceReplicasGaugeDesc)
 	}

 }
--- a/gateway/metrics/metrics.go
+++ b/gateway/metrics/metrics.go
@ -16,8 +16,12 @@ type MetricOptions struct {
 	GatewayFunctionInvocation        *prometheus.CounterVec
 	GatewayFunctionsHistogram        *prometheus.HistogramVec
 	GatewayFunctionInvocationStarted *prometheus.CounterVec
-	ServiceReplicasGauge             *prometheus.GaugeVec
-	ServiceMetrics                   *ServiceMetricOptions
+
+	ServiceReplicasGauge    *prometheus.GaugeVec
+	ServiceMinReplicasGauge *prometheus.GaugeVec
+	ServiceTargetLoadGauge  *prometheus.GaugeVec
+
+	ServiceMetrics *ServiceMetricOptions
 }

 // ServiceMetricOptions provides RED metrics
@ -62,11 +66,29 @@ func BuildMetricsOptions() MetricOptions {
 		prometheus.GaugeOpts{
 			Namespace: "gateway",
 			Name:      "service_count",
-			Help:      "Service replicas",
+			Help:      "Current count of replicas for function",
 		},
 		[]string{"function_name"},
 	)

+	serviceMinReplicas := prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: "gateway",
+			Name:      "service_min",
+			Help:      "Minium replicas for function",
+		},
+		[]string{"function_name"},
+	)
+
+	serviceTargetLoad := prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: "gateway",
+			Name:      "service_target_load",
+			Help:      "Target load for function",
+		},
+		[]string{"function_name", "scaling_type"},
+	)
+
 	// For automatic monitoring and alerting (RED method)
 	histogram := prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Subsystem: "http",
@ -104,6 +126,8 @@ func BuildMetricsOptions() MetricOptions {
 		GatewayFunctionsHistogram:        gatewayFunctionsHistogram,
 		GatewayFunctionInvocation:        gatewayFunctionInvocation,
 		ServiceReplicasGauge:             serviceReplicas,
+		ServiceMinReplicasGauge:          serviceMinReplicas,
+		ServiceTargetLoadGauge:           serviceTargetLoad,
 		ServiceMetrics:                   serviceMetricOptions,
 		GatewayFunctionInvocationStarted: gatewayFunctionInvocationStarted,
 	}