2016-09-09 19:28:19 +08:00
|
|
|
package middleware
|
|
|
|
|
|
|
|
import (
|
|
|
|
"net/http"
|
2017-09-07 04:24:10 +08:00
|
|
|
"strconv"
|
2016-09-09 19:28:19 +08:00
|
|
|
"strings"
|
2017-09-07 04:24:10 +08:00
|
|
|
"time"
|
2016-09-09 19:28:19 +08:00
|
|
|
|
2023-01-30 16:26:42 +08:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
|
2022-06-10 16:56:55 +08:00
|
|
|
"github.com/grafana/grafana/pkg/infra/log"
|
2019-02-24 06:35:26 +08:00
|
|
|
"github.com/grafana/grafana/pkg/infra/metrics"
|
2022-04-14 23:54:49 +08:00
|
|
|
"github.com/grafana/grafana/pkg/infra/tracing"
|
2023-08-16 21:05:19 +08:00
|
|
|
"github.com/grafana/grafana/pkg/middleware/requestmeta"
|
2022-01-27 01:44:20 +08:00
|
|
|
"github.com/grafana/grafana/pkg/services/featuremgmt"
|
2023-08-16 21:05:19 +08:00
|
|
|
"github.com/grafana/grafana/pkg/setting"
|
2021-10-11 20:30:59 +08:00
|
|
|
"github.com/grafana/grafana/pkg/web"
|
2016-09-09 19:28:19 +08:00
|
|
|
)
|
|
|
|
|
2020-02-20 01:29:47 +08:00
|
|
|
var (
|
2025-09-03 21:47:38 +08:00
|
|
|
// Histogram buckets for the response time, in seconds
|
|
|
|
durationDefBuckets = []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 25}
|
|
|
|
|
|
|
|
// Histogram buckets for response sizes, in bytes
|
|
|
|
sizeDefBuckets = prometheus.ExponentialBuckets(128, 2, 16) // 128 bytes ... 4 MB
|
2020-02-20 01:29:47 +08:00
|
|
|
)
|
|
|
|
|
2023-08-16 21:05:19 +08:00
|
|
|
// RequestMetrics is a middleware handler that instruments the request.
|
|
|
|
func RequestMetrics(features featuremgmt.FeatureToggles, cfg *setting.Cfg, promRegister prometheus.Registerer) web.Middleware {
|
|
|
|
log := log.New("middleware.request-metrics")
|
|
|
|
|
|
|
|
httpRequestsInFlight := prometheus.NewGauge(
|
2020-02-20 01:29:47 +08:00
|
|
|
prometheus.GaugeOpts{
|
2021-05-11 20:37:03 +08:00
|
|
|
Namespace: "grafana",
|
|
|
|
Name: "http_request_in_flight",
|
|
|
|
Help: "A gauge of requests currently being served by Grafana.",
|
2020-02-20 01:29:47 +08:00
|
|
|
},
|
|
|
|
)
|
|
|
|
|
2024-02-06 16:29:41 +08:00
|
|
|
histogramLabels := []string{"handler", "status_code", "method", "status_source", "slo_group"}
|
2023-09-11 18:13:13 +08:00
|
|
|
|
2023-08-16 21:05:19 +08:00
|
|
|
if cfg.MetricsIncludeTeamLabel {
|
2023-10-03 18:25:01 +08:00
|
|
|
histogramLabels = append(histogramLabels, "grafana_team")
|
2023-08-16 21:05:19 +08:00
|
|
|
}
|
|
|
|
|
2025-09-03 21:47:38 +08:00
|
|
|
reqDurationOptions := prometheus.HistogramOpts{
|
2023-10-04 02:23:55 +08:00
|
|
|
Namespace: "grafana",
|
|
|
|
Name: "http_request_duration_seconds",
|
|
|
|
Help: "Histogram of latencies for HTTP requests.",
|
2025-09-03 21:47:38 +08:00
|
|
|
Buckets: durationDefBuckets,
|
|
|
|
}
|
|
|
|
|
|
|
|
reqSizeOptions := prometheus.HistogramOpts{
|
|
|
|
Namespace: "grafana",
|
|
|
|
Name: "http_response_size_bytes",
|
|
|
|
Help: "Histogram of request sizes for HTTP requests.",
|
|
|
|
Buckets: sizeDefBuckets, // 100B ... ~1MB
|
2023-10-04 02:23:55 +08:00
|
|
|
}
|
|
|
|
|
2023-11-15 04:50:27 +08:00
|
|
|
if features.IsEnabledGlobally(featuremgmt.FlagEnableNativeHTTPHistogram) {
|
2023-10-04 02:23:55 +08:00
|
|
|
// the recommended default value from the prom_client
|
|
|
|
// https://github.com/prometheus/client_golang/blob/main/prometheus/histogram.go#L411
|
2024-06-19 03:37:44 +08:00
|
|
|
// Giving this variable a value means the client will expose a native
|
|
|
|
// histogram.
|
2025-09-03 21:47:38 +08:00
|
|
|
reqDurationOptions.NativeHistogramBucketFactor = 1.1
|
|
|
|
reqSizeOptions.NativeHistogramBucketFactor = 1.1
|
2023-10-04 02:23:55 +08:00
|
|
|
// The default value in OTel. It probably good enough for us as well.
|
2025-09-03 21:47:38 +08:00
|
|
|
reqDurationOptions.NativeHistogramMaxBucketNumber = 160
|
|
|
|
reqSizeOptions.NativeHistogramMaxBucketNumber = 160
|
|
|
|
reqDurationOptions.NativeHistogramMinResetDuration = time.Hour
|
|
|
|
reqSizeOptions.NativeHistogramMinResetDuration = time.Hour
|
2024-06-19 03:37:44 +08:00
|
|
|
|
|
|
|
if features.IsEnabledGlobally(featuremgmt.FlagDisableClassicHTTPHistogram) {
|
|
|
|
// setting Buckets to nil with native options set means the classic
|
|
|
|
// histogram will no longer be exposed - this can be a good way to
|
|
|
|
// reduce cardinality in the exposed metrics
|
2025-09-03 21:47:38 +08:00
|
|
|
reqDurationOptions.Buckets = nil
|
|
|
|
reqSizeOptions.Buckets = nil
|
2024-06-19 03:37:44 +08:00
|
|
|
}
|
2023-10-04 02:23:55 +08:00
|
|
|
}
|
|
|
|
|
2023-08-16 21:05:19 +08:00
|
|
|
httpRequestDurationHistogram := prometheus.NewHistogramVec(
|
2025-09-03 21:47:38 +08:00
|
|
|
reqDurationOptions,
|
2023-08-16 21:05:19 +08:00
|
|
|
histogramLabels,
|
2020-10-20 15:44:38 +08:00
|
|
|
)
|
|
|
|
|
2025-09-03 21:47:38 +08:00
|
|
|
httpRequestSizeHistogram := prometheus.NewHistogramVec(
|
|
|
|
reqSizeOptions,
|
|
|
|
histogramLabels,
|
|
|
|
)
|
|
|
|
|
|
|
|
promRegister.MustRegister(httpRequestsInFlight, httpRequestDurationHistogram, httpRequestSizeHistogram)
|
2022-06-10 16:56:55 +08:00
|
|
|
|
2022-08-09 20:58:50 +08:00
|
|
|
return func(next http.Handler) http.Handler {
|
|
|
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
|
|
rw := web.Rw(w, r)
|
|
|
|
now := time.Now()
|
|
|
|
httpRequestsInFlight.Inc()
|
|
|
|
defer httpRequestsInFlight.Dec()
|
|
|
|
next.ServeHTTP(w, r)
|
2022-02-03 01:48:46 +08:00
|
|
|
|
2022-08-09 20:58:50 +08:00
|
|
|
status := rw.Status()
|
|
|
|
code := sanitizeCode(status)
|
2022-02-03 01:48:46 +08:00
|
|
|
|
2022-08-09 20:58:50 +08:00
|
|
|
handler := "unknown"
|
|
|
|
// TODO: do not depend on web.Context from the future
|
2023-03-31 21:38:09 +08:00
|
|
|
if routeOperation, exists := RouteOperationName(web.FromContext(r.Context()).Req); exists {
|
2022-08-09 20:58:50 +08:00
|
|
|
handler = routeOperation
|
2022-06-14 13:58:20 +08:00
|
|
|
} else {
|
2022-08-09 20:58:50 +08:00
|
|
|
// if grafana does not recognize the handler and returns 404 we should register it as `notfound`
|
|
|
|
if status == http.StatusNotFound {
|
|
|
|
handler = "notfound"
|
|
|
|
} else {
|
|
|
|
// log requests where we could not identify handler so we can register them.
|
2023-11-15 04:50:27 +08:00
|
|
|
if features.IsEnabled(r.Context(), featuremgmt.FlagLogRequestsInstrumentedAsUnknown) {
|
2022-08-09 20:58:50 +08:00
|
|
|
log.Warn("request instrumented as unknown", "path", r.URL.Path, "status_code", status)
|
|
|
|
}
|
2022-06-14 13:58:20 +08:00
|
|
|
}
|
2022-06-10 16:56:55 +08:00
|
|
|
}
|
2017-09-07 04:24:10 +08:00
|
|
|
|
2023-08-16 21:05:19 +08:00
|
|
|
labelValues := []string{handler, code, r.Method}
|
2023-09-11 18:13:13 +08:00
|
|
|
rmd := requestmeta.GetRequestMetaData(r.Context())
|
|
|
|
|
2024-02-06 16:29:41 +08:00
|
|
|
labelValues = append(labelValues, string(rmd.StatusSource), string(rmd.SLOGroup))
|
2023-09-11 18:13:13 +08:00
|
|
|
|
2023-08-16 21:05:19 +08:00
|
|
|
if cfg.MetricsIncludeTeamLabel {
|
|
|
|
labelValues = append(labelValues, rmd.Team)
|
|
|
|
}
|
|
|
|
|
2022-08-09 20:58:50 +08:00
|
|
|
// avoiding the sanitize functions for in the new instrumentation
|
|
|
|
// since they dont make much sense. We should remove them later.
|
2025-09-03 21:47:38 +08:00
|
|
|
durationHistogram := httpRequestDurationHistogram.
|
|
|
|
WithLabelValues(labelValues...)
|
|
|
|
sizeHistogram := httpRequestSizeHistogram.
|
2023-08-16 21:05:19 +08:00
|
|
|
WithLabelValues(labelValues...)
|
|
|
|
|
2023-10-04 02:23:55 +08:00
|
|
|
elapsedTime := time.Since(now).Seconds()
|
2025-09-03 21:47:38 +08:00
|
|
|
responseSize := float64(rw.Size())
|
2023-10-04 02:23:55 +08:00
|
|
|
|
2022-08-09 20:58:50 +08:00
|
|
|
if traceID := tracing.TraceIDFromContext(r.Context(), true); traceID != "" {
|
|
|
|
// Need to type-convert the Observer to an
|
|
|
|
// ExemplarObserver. This will always work for a
|
|
|
|
// HistogramVec.
|
2025-09-03 21:47:38 +08:00
|
|
|
durationHistogram.(prometheus.ExemplarObserver).ObserveWithExemplar(
|
2023-10-04 02:23:55 +08:00
|
|
|
elapsedTime, prometheus.Labels{"traceID": traceID},
|
2022-08-09 20:58:50 +08:00
|
|
|
)
|
2025-09-03 21:47:38 +08:00
|
|
|
sizeHistogram.(prometheus.ExemplarObserver).ObserveWithExemplar(
|
|
|
|
responseSize, prometheus.Labels{"traceID": traceID},
|
|
|
|
)
|
2023-10-04 02:23:55 +08:00
|
|
|
} else {
|
2025-09-03 21:47:38 +08:00
|
|
|
durationHistogram.Observe(elapsedTime)
|
|
|
|
sizeHistogram.Observe(responseSize)
|
2022-08-09 20:58:50 +08:00
|
|
|
}
|
2022-02-03 01:48:46 +08:00
|
|
|
|
2022-08-09 20:58:50 +08:00
|
|
|
switch {
|
|
|
|
case strings.HasPrefix(r.RequestURI, "/api/datasources/proxy"):
|
|
|
|
countProxyRequests(status)
|
|
|
|
case strings.HasPrefix(r.RequestURI, "/api/"):
|
|
|
|
countApiRequests(status)
|
|
|
|
default:
|
|
|
|
countPageRequests(status)
|
|
|
|
}
|
|
|
|
})
|
2016-09-09 19:28:19 +08:00
|
|
|
}
|
|
|
|
}
|
2016-09-12 19:29:31 +08:00
|
|
|
|
|
|
|
func countApiRequests(status int) {
|
|
|
|
switch status {
|
|
|
|
case 200:
|
2019-07-16 22:58:46 +08:00
|
|
|
metrics.MApiStatus.WithLabelValues("200").Inc()
|
2016-09-12 19:29:31 +08:00
|
|
|
case 404:
|
2019-07-16 22:58:46 +08:00
|
|
|
metrics.MApiStatus.WithLabelValues("404").Inc()
|
2016-09-12 19:29:31 +08:00
|
|
|
case 500:
|
2019-07-16 22:58:46 +08:00
|
|
|
metrics.MApiStatus.WithLabelValues("500").Inc()
|
2016-09-12 19:29:31 +08:00
|
|
|
default:
|
2019-07-16 22:58:46 +08:00
|
|
|
metrics.MApiStatus.WithLabelValues("unknown").Inc()
|
2016-09-12 19:29:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func countPageRequests(status int) {
|
|
|
|
switch status {
|
|
|
|
case 200:
|
2019-07-16 22:58:46 +08:00
|
|
|
metrics.MPageStatus.WithLabelValues("200").Inc()
|
2016-09-12 19:29:31 +08:00
|
|
|
case 404:
|
2019-07-16 22:58:46 +08:00
|
|
|
metrics.MPageStatus.WithLabelValues("404").Inc()
|
2016-09-12 19:29:31 +08:00
|
|
|
case 500:
|
2019-07-16 22:58:46 +08:00
|
|
|
metrics.MPageStatus.WithLabelValues("500").Inc()
|
2016-09-12 19:29:31 +08:00
|
|
|
default:
|
2019-07-16 22:58:46 +08:00
|
|
|
metrics.MPageStatus.WithLabelValues("unknown").Inc()
|
2016-09-12 19:29:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func countProxyRequests(status int) {
|
|
|
|
switch status {
|
|
|
|
case 200:
|
2019-07-16 22:58:46 +08:00
|
|
|
metrics.MProxyStatus.WithLabelValues("200").Inc()
|
2016-09-12 19:29:31 +08:00
|
|
|
case 404:
|
2019-07-16 22:58:46 +08:00
|
|
|
metrics.MProxyStatus.WithLabelValues("400").Inc()
|
2016-09-12 19:29:31 +08:00
|
|
|
case 500:
|
2019-07-16 22:58:46 +08:00
|
|
|
metrics.MProxyStatus.WithLabelValues("500").Inc()
|
2016-09-12 19:29:31 +08:00
|
|
|
default:
|
2019-07-16 22:58:46 +08:00
|
|
|
metrics.MProxyStatus.WithLabelValues("unknown").Inc()
|
2016-09-12 19:29:31 +08:00
|
|
|
}
|
|
|
|
}
|
2017-09-07 04:24:10 +08:00
|
|
|
|
|
|
|
// If the wrapped http.Handler has not set a status code, i.e. the value is
|
2020-06-01 23:11:25 +08:00
|
|
|
// currently 0, sanitizeCode will return 200, for consistency with behavior in
|
2017-09-07 04:24:10 +08:00
|
|
|
// the stdlib.
|
|
|
|
func sanitizeCode(s int) string {
|
2021-05-26 18:18:54 +08:00
|
|
|
if s == 0 {
|
2017-09-07 04:24:10 +08:00
|
|
|
return "200"
|
|
|
|
}
|
2021-05-26 18:18:54 +08:00
|
|
|
return strconv.Itoa(s)
|
2017-09-07 04:24:10 +08:00
|
|
|
}
|