Export new metrics for OpenFaaS Pro scaling

* Add service target metric
* Add service min replicas metric
* Add scale type metric

These combined allow new auto-scaling modes and parameters
for OpenFaaS Pro customers.

Signed-off-by: Alex Ellis (OpenFaaS Ltd) <alexellis2@gmail.com>
This commit is contained in:
Alex Ellis (OpenFaaS Ltd)
2022-01-24 12:11:54 +00:00
committed by Alex Ellis
parent 34735d61d0
commit d85d5e7239
23 changed files with 235 additions and 472 deletions

View File

@ -35,9 +35,7 @@ func AddMetricsHandler(handler http.HandlerFunc, prometheusQuery PrometheusQuery
recorder.Code,
string(upstreamBody))
w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(fmt.Sprintf("List functions responded with code %d", recorder.Code)))
http.Error(w, "Unexpected status code retriving functions from backend", http.StatusInternalServerError)
return
}
@ -48,28 +46,33 @@ func AddMetricsHandler(handler http.HandlerFunc, prometheusQuery PrometheusQuery
if err != nil {
log.Printf("Metrics upstream error: %s", err)
w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte("Error parsing metrics from upstream provider/backend."))
http.Error(w, "Error parsing metrics from upstream provider/backend", http.StatusInternalServerError)
return
}
expr := url.QueryEscape(`sum(gateway_function_invocation_total{function_name=~".*", code=~".*"}) by (function_name, code)`)
// expr := "sum(gateway_function_invocation_total%7Bfunction_name%3D~%22.*%22%2C+code%3D~%22.*%22%7D)+by+(function_name%2C+code)"
results, fetchErr := prometheusQuery.Fetch(expr)
if fetchErr != nil {
log.Printf("Error querying Prometheus API: %s\n", fetchErr.Error())
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write(upstreamBody)
return
// Ensure values are empty first.
for i := range functions {
functions[i].InvocationCount = 0
}
mixIn(&functions, results)
if len(functions) > 0 {
bytesOut, marshalErr := json.Marshal(functions)
if marshalErr != nil {
log.Println(marshalErr)
ns := functions[0].Namespace
q := fmt.Sprintf(`sum(gateway_function_invocation_total{function_name=~".*.%s"}) by (function_name)`, ns)
// Restrict query results to only function names matching namespace suffix.
results, err := prometheusQuery.Fetch(url.QueryEscape(q))
if err != nil {
log.Printf("Error querying Prometheus: %s\n", err.Error())
return
}
mixIn(&functions, results)
}
bytesOut, err := json.Marshal(functions)
if err != nil {
log.Printf("Error serializing functions: %s", err)
http.Error(w, "error writing response after adding metrics", http.StatusInternalServerError)
return
}
@ -85,25 +88,19 @@ func mixIn(functions *[]types.FunctionStatus, metrics *VectorQueryResponse) {
return
}
// Ensure values are empty first.
for i := range *functions {
(*functions)[i].InvocationCount = 0
}
for i, function := range *functions {
for _, v := range metrics.Data.Result {
if v.Metric.FunctionName == fmt.Sprintf("%s.%s", function.Name, function.Namespace) {
metricValue := v.Value[1]
switch metricValue.(type) {
switch value := metricValue.(type) {
case string:
f, strconvErr := strconv.ParseFloat(metricValue.(string), 64)
if strconvErr != nil {
log.Printf("Unable to convert value for metric: %s\n", strconvErr)
f, err := strconv.ParseFloat(value, 64)
if err != nil {
log.Printf("add_metrics: unable to convert value %q for metric: %s", value, err)
continue
}
(*functions)[i].InvocationCount += f
break
}
}
}

View File

@ -83,7 +83,7 @@ func Test_FunctionsHandler_ReturnsJSONAndOneFunction(t *testing.T) {
func makeFunctionsHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
functions := []types.FunctionStatus{
types.FunctionStatus{
{
Name: "func_echoit",
Replicas: 0,
Namespace: "openfaas-fn",

View File

@ -12,12 +12,14 @@ import (
"net/http"
"net/url"
"path"
"strconv"
"time"
"log"
"github.com/openfaas/faas-provider/auth"
types "github.com/openfaas/faas-provider/types"
"github.com/openfaas/faas/gateway/scaling"
"github.com/prometheus/client_golang/prometheus"
)
@ -46,6 +48,7 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
e.metricOptions.GatewayFunctionsHistogram.Describe(ch)
e.metricOptions.ServiceReplicasGauge.Describe(ch)
e.metricOptions.GatewayFunctionInvocationStarted.Describe(ch)
e.metricOptions.ServiceTargetLoadGauge.Describe(ch)
e.metricOptions.ServiceMetrics.Counter.Describe(ch)
e.metricOptions.ServiceMetrics.Histogram.Describe(ch)
@ -59,6 +62,8 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
e.metricOptions.GatewayFunctionInvocationStarted.Collect(ch)
e.metricOptions.ServiceReplicasGauge.Reset()
e.metricOptions.ServiceTargetLoadGauge.Reset()
for _, service := range e.services {
var serviceName string
if len(service.Namespace) > 0 {
@ -66,12 +71,54 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
} else {
serviceName = service.Name
}
// Set current replica count
e.metricOptions.ServiceReplicasGauge.
WithLabelValues(serviceName).
Set(float64(service.Replicas))
// Set minimum replicas
minReplicas := scaling.DefaultMinReplicas
if service.Labels != nil {
a := *service.Labels
if v, ok := a[scaling.MinScaleLabel]; ok && len(v) > 0 {
val, _ := strconv.Atoi(v)
minReplicas = val
}
}
e.metricOptions.ServiceMinReplicasGauge.
WithLabelValues(serviceName).
Set(float64(minReplicas))
// Set scale type
scaleType := scaling.DefaultTypeScale
if service.Labels != nil {
a := *service.Labels
if v, ok := a[scaling.ScaleTypeLabel]; ok && len(v) > 0 {
scaleType = v
}
}
// Set target load
targetScale := scaling.DefaultTargetLoad
if service.Labels != nil {
a := *service.Labels
if v, ok := a[scaling.TargetLoadLabel]; ok && len(v) > 0 {
val, _ := strconv.Atoi(v)
targetScale = val
}
}
e.metricOptions.ServiceTargetLoadGauge.
WithLabelValues(serviceName, scaleType).
Set(float64(targetScale))
}
e.metricOptions.ServiceReplicasGauge.Collect(ch)
e.metricOptions.ServiceMinReplicasGauge.Collect(ch)
e.metricOptions.ServiceTargetLoadGauge.Collect(ch)
e.metricOptions.ServiceMetrics.Counter.Collect(ch)
e.metricOptions.ServiceMetrics.Histogram.Collect(ch)

View File

@ -44,21 +44,21 @@ func Test_Describe_DescribesThePrometheusMetrics(t *testing.T) {
expectedGatewayFunctionInvocationDesc := `Desc{fqName: "gateway_function_invocation_total", help: "Function metrics", constLabels: {}, variableLabels: [function_name code]}`
actualGatewayFunctionInvocationDesc := d.String()
if expectedGatewayFunctionInvocationDesc != actualGatewayFunctionInvocationDesc {
t.Errorf("Want %s, got: %s", expectedGatewayFunctionInvocationDesc, actualGatewayFunctionInvocationDesc)
t.Errorf("Want\n%s\ngot\n%s", expectedGatewayFunctionInvocationDesc, actualGatewayFunctionInvocationDesc)
}
d = <-ch
expectedGatewayFunctionsHistogramDesc := `Desc{fqName: "gateway_functions_seconds", help: "Function time taken", constLabels: {}, variableLabels: [function_name]}`
actualGatewayFunctionsHistogramDesc := d.String()
if expectedGatewayFunctionsHistogramDesc != actualGatewayFunctionsHistogramDesc {
t.Errorf("Want %s, got: %s", expectedGatewayFunctionsHistogramDesc, actualGatewayFunctionsHistogramDesc)
t.Errorf("Want\n%s\ngot\n%s", expectedGatewayFunctionsHistogramDesc, actualGatewayFunctionsHistogramDesc)
}
d = <-ch
expectedServiceReplicasGaugeDesc := `Desc{fqName: "gateway_service_count", help: "Service replicas", constLabels: {}, variableLabels: [function_name]}`
expectedServiceReplicasGaugeDesc := `Desc{fqName: "gateway_service_count", help: "Current count of replicas for function", constLabels: {}, variableLabels: [function_name]}`
actualServiceReplicasGaugeDesc := d.String()
if expectedServiceReplicasGaugeDesc != actualServiceReplicasGaugeDesc {
t.Errorf("Want %s, got: %s", expectedServiceReplicasGaugeDesc, actualServiceReplicasGaugeDesc)
t.Errorf("Want\n%s\ngot\n%s", expectedServiceReplicasGaugeDesc, actualServiceReplicasGaugeDesc)
}
}

View File

@ -16,8 +16,12 @@ type MetricOptions struct {
GatewayFunctionInvocation *prometheus.CounterVec
GatewayFunctionsHistogram *prometheus.HistogramVec
GatewayFunctionInvocationStarted *prometheus.CounterVec
ServiceReplicasGauge *prometheus.GaugeVec
ServiceMetrics *ServiceMetricOptions
ServiceReplicasGauge *prometheus.GaugeVec
ServiceMinReplicasGauge *prometheus.GaugeVec
ServiceTargetLoadGauge *prometheus.GaugeVec
ServiceMetrics *ServiceMetricOptions
}
// ServiceMetricOptions provides RED metrics
@ -62,11 +66,29 @@ func BuildMetricsOptions() MetricOptions {
prometheus.GaugeOpts{
Namespace: "gateway",
Name: "service_count",
Help: "Service replicas",
Help: "Current count of replicas for function",
},
[]string{"function_name"},
)
serviceMinReplicas := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "gateway",
Name: "service_min",
Help: "Minium replicas for function",
},
[]string{"function_name"},
)
serviceTargetLoad := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "gateway",
Name: "service_target_load",
Help: "Target load for function",
},
[]string{"function_name", "scaling_type"},
)
// For automatic monitoring and alerting (RED method)
histogram := prometheus.NewHistogramVec(prometheus.HistogramOpts{
Subsystem: "http",
@ -104,6 +126,8 @@ func BuildMetricsOptions() MetricOptions {
GatewayFunctionsHistogram: gatewayFunctionsHistogram,
GatewayFunctionInvocation: gatewayFunctionInvocation,
ServiceReplicasGauge: serviceReplicas,
ServiceMinReplicasGauge: serviceMinReplicas,
ServiceTargetLoadGauge: serviceTargetLoad,
ServiceMetrics: serviceMetricOptions,
GatewayFunctionInvocationStarted: gatewayFunctionInvocationStarted,
}