mirror of
https://github.com/openfaas/faas.git
synced 2025-06-28 09:43:24 +00:00
Export new metrics for OpenFaaS Pro scaling
* Add service target metric * Add service min replicas metric * Add scale type metric These combined allow new auto-scaling modes and parameters for OpenFaaS Pro customers. Signed-off-by: Alex Ellis (OpenFaaS Ltd) <alexellis2@gmail.com>
This commit is contained in:
committed by
Alex Ellis
parent
34735d61d0
commit
d85d5e7239
@ -35,9 +35,7 @@ func AddMetricsHandler(handler http.HandlerFunc, prometheusQuery PrometheusQuery
|
||||
recorder.Code,
|
||||
string(upstreamBody))
|
||||
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
w.Write([]byte(fmt.Sprintf("List functions responded with code %d", recorder.Code)))
|
||||
http.Error(w, "Unexpected status code retriving functions from backend", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
@ -48,28 +46,33 @@ func AddMetricsHandler(handler http.HandlerFunc, prometheusQuery PrometheusQuery
|
||||
if err != nil {
|
||||
log.Printf("Metrics upstream error: %s", err)
|
||||
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
w.Write([]byte("Error parsing metrics from upstream provider/backend."))
|
||||
http.Error(w, "Error parsing metrics from upstream provider/backend", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
expr := url.QueryEscape(`sum(gateway_function_invocation_total{function_name=~".*", code=~".*"}) by (function_name, code)`)
|
||||
// expr := "sum(gateway_function_invocation_total%7Bfunction_name%3D~%22.*%22%2C+code%3D~%22.*%22%7D)+by+(function_name%2C+code)"
|
||||
results, fetchErr := prometheusQuery.Fetch(expr)
|
||||
if fetchErr != nil {
|
||||
log.Printf("Error querying Prometheus API: %s\n", fetchErr.Error())
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write(upstreamBody)
|
||||
return
|
||||
// Ensure values are empty first.
|
||||
for i := range functions {
|
||||
functions[i].InvocationCount = 0
|
||||
}
|
||||
|
||||
mixIn(&functions, results)
|
||||
if len(functions) > 0 {
|
||||
|
||||
bytesOut, marshalErr := json.Marshal(functions)
|
||||
if marshalErr != nil {
|
||||
log.Println(marshalErr)
|
||||
ns := functions[0].Namespace
|
||||
q := fmt.Sprintf(`sum(gateway_function_invocation_total{function_name=~".*.%s"}) by (function_name)`, ns)
|
||||
// Restrict query results to only function names matching namespace suffix.
|
||||
|
||||
results, err := prometheusQuery.Fetch(url.QueryEscape(q))
|
||||
if err != nil {
|
||||
log.Printf("Error querying Prometheus: %s\n", err.Error())
|
||||
return
|
||||
}
|
||||
mixIn(&functions, results)
|
||||
}
|
||||
|
||||
bytesOut, err := json.Marshal(functions)
|
||||
if err != nil {
|
||||
log.Printf("Error serializing functions: %s", err)
|
||||
http.Error(w, "error writing response after adding metrics", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
@ -85,25 +88,19 @@ func mixIn(functions *[]types.FunctionStatus, metrics *VectorQueryResponse) {
|
||||
return
|
||||
}
|
||||
|
||||
// Ensure values are empty first.
|
||||
for i := range *functions {
|
||||
(*functions)[i].InvocationCount = 0
|
||||
}
|
||||
|
||||
for i, function := range *functions {
|
||||
for _, v := range metrics.Data.Result {
|
||||
|
||||
if v.Metric.FunctionName == fmt.Sprintf("%s.%s", function.Name, function.Namespace) {
|
||||
metricValue := v.Value[1]
|
||||
switch metricValue.(type) {
|
||||
switch value := metricValue.(type) {
|
||||
case string:
|
||||
f, strconvErr := strconv.ParseFloat(metricValue.(string), 64)
|
||||
if strconvErr != nil {
|
||||
log.Printf("Unable to convert value for metric: %s\n", strconvErr)
|
||||
f, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
log.Printf("add_metrics: unable to convert value %q for metric: %s", value, err)
|
||||
continue
|
||||
}
|
||||
(*functions)[i].InvocationCount += f
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -83,7 +83,7 @@ func Test_FunctionsHandler_ReturnsJSONAndOneFunction(t *testing.T) {
|
||||
func makeFunctionsHandler() http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
functions := []types.FunctionStatus{
|
||||
types.FunctionStatus{
|
||||
{
|
||||
Name: "func_echoit",
|
||||
Replicas: 0,
|
||||
Namespace: "openfaas-fn",
|
||||
|
@ -12,12 +12,14 @@ import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"log"
|
||||
|
||||
"github.com/openfaas/faas-provider/auth"
|
||||
types "github.com/openfaas/faas-provider/types"
|
||||
"github.com/openfaas/faas/gateway/scaling"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
@ -46,6 +48,7 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
|
||||
e.metricOptions.GatewayFunctionsHistogram.Describe(ch)
|
||||
e.metricOptions.ServiceReplicasGauge.Describe(ch)
|
||||
e.metricOptions.GatewayFunctionInvocationStarted.Describe(ch)
|
||||
e.metricOptions.ServiceTargetLoadGauge.Describe(ch)
|
||||
|
||||
e.metricOptions.ServiceMetrics.Counter.Describe(ch)
|
||||
e.metricOptions.ServiceMetrics.Histogram.Describe(ch)
|
||||
@ -59,6 +62,8 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
||||
e.metricOptions.GatewayFunctionInvocationStarted.Collect(ch)
|
||||
|
||||
e.metricOptions.ServiceReplicasGauge.Reset()
|
||||
e.metricOptions.ServiceTargetLoadGauge.Reset()
|
||||
|
||||
for _, service := range e.services {
|
||||
var serviceName string
|
||||
if len(service.Namespace) > 0 {
|
||||
@ -66,12 +71,54 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
||||
} else {
|
||||
serviceName = service.Name
|
||||
}
|
||||
|
||||
// Set current replica count
|
||||
e.metricOptions.ServiceReplicasGauge.
|
||||
WithLabelValues(serviceName).
|
||||
Set(float64(service.Replicas))
|
||||
|
||||
// Set minimum replicas
|
||||
minReplicas := scaling.DefaultMinReplicas
|
||||
if service.Labels != nil {
|
||||
a := *service.Labels
|
||||
if v, ok := a[scaling.MinScaleLabel]; ok && len(v) > 0 {
|
||||
val, _ := strconv.Atoi(v)
|
||||
minReplicas = val
|
||||
}
|
||||
}
|
||||
|
||||
e.metricOptions.ServiceMinReplicasGauge.
|
||||
WithLabelValues(serviceName).
|
||||
Set(float64(minReplicas))
|
||||
|
||||
// Set scale type
|
||||
scaleType := scaling.DefaultTypeScale
|
||||
if service.Labels != nil {
|
||||
a := *service.Labels
|
||||
if v, ok := a[scaling.ScaleTypeLabel]; ok && len(v) > 0 {
|
||||
scaleType = v
|
||||
}
|
||||
}
|
||||
|
||||
// Set target load
|
||||
targetScale := scaling.DefaultTargetLoad
|
||||
if service.Labels != nil {
|
||||
a := *service.Labels
|
||||
if v, ok := a[scaling.TargetLoadLabel]; ok && len(v) > 0 {
|
||||
val, _ := strconv.Atoi(v)
|
||||
targetScale = val
|
||||
}
|
||||
}
|
||||
|
||||
e.metricOptions.ServiceTargetLoadGauge.
|
||||
WithLabelValues(serviceName, scaleType).
|
||||
Set(float64(targetScale))
|
||||
|
||||
}
|
||||
|
||||
e.metricOptions.ServiceReplicasGauge.Collect(ch)
|
||||
e.metricOptions.ServiceMinReplicasGauge.Collect(ch)
|
||||
e.metricOptions.ServiceTargetLoadGauge.Collect(ch)
|
||||
|
||||
e.metricOptions.ServiceMetrics.Counter.Collect(ch)
|
||||
e.metricOptions.ServiceMetrics.Histogram.Collect(ch)
|
||||
|
@ -44,21 +44,21 @@ func Test_Describe_DescribesThePrometheusMetrics(t *testing.T) {
|
||||
expectedGatewayFunctionInvocationDesc := `Desc{fqName: "gateway_function_invocation_total", help: "Function metrics", constLabels: {}, variableLabels: [function_name code]}`
|
||||
actualGatewayFunctionInvocationDesc := d.String()
|
||||
if expectedGatewayFunctionInvocationDesc != actualGatewayFunctionInvocationDesc {
|
||||
t.Errorf("Want %s, got: %s", expectedGatewayFunctionInvocationDesc, actualGatewayFunctionInvocationDesc)
|
||||
t.Errorf("Want\n%s\ngot\n%s", expectedGatewayFunctionInvocationDesc, actualGatewayFunctionInvocationDesc)
|
||||
}
|
||||
|
||||
d = <-ch
|
||||
expectedGatewayFunctionsHistogramDesc := `Desc{fqName: "gateway_functions_seconds", help: "Function time taken", constLabels: {}, variableLabels: [function_name]}`
|
||||
actualGatewayFunctionsHistogramDesc := d.String()
|
||||
if expectedGatewayFunctionsHistogramDesc != actualGatewayFunctionsHistogramDesc {
|
||||
t.Errorf("Want %s, got: %s", expectedGatewayFunctionsHistogramDesc, actualGatewayFunctionsHistogramDesc)
|
||||
t.Errorf("Want\n%s\ngot\n%s", expectedGatewayFunctionsHistogramDesc, actualGatewayFunctionsHistogramDesc)
|
||||
}
|
||||
|
||||
d = <-ch
|
||||
expectedServiceReplicasGaugeDesc := `Desc{fqName: "gateway_service_count", help: "Service replicas", constLabels: {}, variableLabels: [function_name]}`
|
||||
expectedServiceReplicasGaugeDesc := `Desc{fqName: "gateway_service_count", help: "Current count of replicas for function", constLabels: {}, variableLabels: [function_name]}`
|
||||
actualServiceReplicasGaugeDesc := d.String()
|
||||
if expectedServiceReplicasGaugeDesc != actualServiceReplicasGaugeDesc {
|
||||
t.Errorf("Want %s, got: %s", expectedServiceReplicasGaugeDesc, actualServiceReplicasGaugeDesc)
|
||||
t.Errorf("Want\n%s\ngot\n%s", expectedServiceReplicasGaugeDesc, actualServiceReplicasGaugeDesc)
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -16,8 +16,12 @@ type MetricOptions struct {
|
||||
GatewayFunctionInvocation *prometheus.CounterVec
|
||||
GatewayFunctionsHistogram *prometheus.HistogramVec
|
||||
GatewayFunctionInvocationStarted *prometheus.CounterVec
|
||||
ServiceReplicasGauge *prometheus.GaugeVec
|
||||
ServiceMetrics *ServiceMetricOptions
|
||||
|
||||
ServiceReplicasGauge *prometheus.GaugeVec
|
||||
ServiceMinReplicasGauge *prometheus.GaugeVec
|
||||
ServiceTargetLoadGauge *prometheus.GaugeVec
|
||||
|
||||
ServiceMetrics *ServiceMetricOptions
|
||||
}
|
||||
|
||||
// ServiceMetricOptions provides RED metrics
|
||||
@ -62,11 +66,29 @@ func BuildMetricsOptions() MetricOptions {
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: "gateway",
|
||||
Name: "service_count",
|
||||
Help: "Service replicas",
|
||||
Help: "Current count of replicas for function",
|
||||
},
|
||||
[]string{"function_name"},
|
||||
)
|
||||
|
||||
serviceMinReplicas := prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: "gateway",
|
||||
Name: "service_min",
|
||||
Help: "Minium replicas for function",
|
||||
},
|
||||
[]string{"function_name"},
|
||||
)
|
||||
|
||||
serviceTargetLoad := prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: "gateway",
|
||||
Name: "service_target_load",
|
||||
Help: "Target load for function",
|
||||
},
|
||||
[]string{"function_name", "scaling_type"},
|
||||
)
|
||||
|
||||
// For automatic monitoring and alerting (RED method)
|
||||
histogram := prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Subsystem: "http",
|
||||
@ -104,6 +126,8 @@ func BuildMetricsOptions() MetricOptions {
|
||||
GatewayFunctionsHistogram: gatewayFunctionsHistogram,
|
||||
GatewayFunctionInvocation: gatewayFunctionInvocation,
|
||||
ServiceReplicasGauge: serviceReplicas,
|
||||
ServiceMinReplicasGauge: serviceMinReplicas,
|
||||
ServiceTargetLoadGauge: serviceTargetLoad,
|
||||
ServiceMetrics: serviceMetricOptions,
|
||||
GatewayFunctionInvocationStarted: gatewayFunctionInvocationStarted,
|
||||
}
|
||||
|
Reference in New Issue
Block a user