Metrics: Add http_response_size_bytes metric (#110428)

* Metrics: Add http_response_size_bytes metric

* add better handler names for public asset paths

* fix tests

* comment

* remove debug log

* exemplar
This commit is contained in:
Josh Hunt 2025-09-03 14:47:38 +01:00 committed by GitHub
parent 4e28cba1c5
commit bda895ec03
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 44 additions and 17 deletions

View File

@ -18,9 +18,11 @@ import (
) )
var ( var (
// DefBuckets are histogram buckets for the response time (in seconds) // Histogram buckets for the response time, in seconds
// of a network service, including one that is responding very slowly. durationDefBuckets = []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 25}
defBuckets = []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 25}
// Histogram buckets for response sizes, in bytes
sizeDefBuckets = prometheus.ExponentialBuckets(128, 2, 16) // 128 bytes ... 4 MB
) )
// RequestMetrics is a middleware handler that instruments the request. // RequestMetrics is a middleware handler that instruments the request.
@ -41,11 +43,18 @@ func RequestMetrics(features featuremgmt.FeatureToggles, cfg *setting.Cfg, promR
histogramLabels = append(histogramLabels, "grafana_team") histogramLabels = append(histogramLabels, "grafana_team")
} }
histogramOptions := prometheus.HistogramOpts{ reqDurationOptions := prometheus.HistogramOpts{
Namespace: "grafana", Namespace: "grafana",
Name: "http_request_duration_seconds", Name: "http_request_duration_seconds",
Help: "Histogram of latencies for HTTP requests.", Help: "Histogram of latencies for HTTP requests.",
Buckets: defBuckets, Buckets: durationDefBuckets,
}
reqSizeOptions := prometheus.HistogramOpts{
Namespace: "grafana",
Name: "http_response_size_bytes",
Help: "Histogram of request sizes for HTTP requests.",
Buckets: sizeDefBuckets, // 100B ... ~1MB
} }
if features.IsEnabledGlobally(featuremgmt.FlagEnableNativeHTTPHistogram) { if features.IsEnabledGlobally(featuremgmt.FlagEnableNativeHTTPHistogram) {
@ -53,25 +62,34 @@ func RequestMetrics(features featuremgmt.FeatureToggles, cfg *setting.Cfg, promR
// https://github.com/prometheus/client_golang/blob/main/prometheus/histogram.go#L411 // https://github.com/prometheus/client_golang/blob/main/prometheus/histogram.go#L411
// Giving this variable a value means the client will expose a native // Giving this variable a value means the client will expose a native
// histogram. // histogram.
histogramOptions.NativeHistogramBucketFactor = 1.1 reqDurationOptions.NativeHistogramBucketFactor = 1.1
reqSizeOptions.NativeHistogramBucketFactor = 1.1
// The default value in OTel. It probably good enough for us as well. // The default value in OTel. It probably good enough for us as well.
histogramOptions.NativeHistogramMaxBucketNumber = 160 reqDurationOptions.NativeHistogramMaxBucketNumber = 160
histogramOptions.NativeHistogramMinResetDuration = time.Hour reqSizeOptions.NativeHistogramMaxBucketNumber = 160
reqDurationOptions.NativeHistogramMinResetDuration = time.Hour
reqSizeOptions.NativeHistogramMinResetDuration = time.Hour
if features.IsEnabledGlobally(featuremgmt.FlagDisableClassicHTTPHistogram) { if features.IsEnabledGlobally(featuremgmt.FlagDisableClassicHTTPHistogram) {
// setting Buckets to nil with native options set means the classic // setting Buckets to nil with native options set means the classic
// histogram will no longer be exposed - this can be a good way to // histogram will no longer be exposed - this can be a good way to
// reduce cardinality in the exposed metrics // reduce cardinality in the exposed metrics
histogramOptions.Buckets = nil reqDurationOptions.Buckets = nil
reqSizeOptions.Buckets = nil
} }
} }
httpRequestDurationHistogram := prometheus.NewHistogramVec( httpRequestDurationHistogram := prometheus.NewHistogramVec(
histogramOptions, reqDurationOptions,
histogramLabels, histogramLabels,
) )
promRegister.MustRegister(httpRequestsInFlight, httpRequestDurationHistogram) httpRequestSizeHistogram := prometheus.NewHistogramVec(
reqSizeOptions,
histogramLabels,
)
promRegister.MustRegister(httpRequestsInFlight, httpRequestDurationHistogram, httpRequestSizeHistogram)
return func(next http.Handler) http.Handler { return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@ -111,20 +129,27 @@ func RequestMetrics(features featuremgmt.FeatureToggles, cfg *setting.Cfg, promR
// avoiding the sanitize functions for in the new instrumentation // avoiding the sanitize functions for in the new instrumentation
// since they dont make much sense. We should remove them later. // since they dont make much sense. We should remove them later.
histogram := httpRequestDurationHistogram. durationHistogram := httpRequestDurationHistogram.
WithLabelValues(labelValues...)
sizeHistogram := httpRequestSizeHistogram.
WithLabelValues(labelValues...) WithLabelValues(labelValues...)
elapsedTime := time.Since(now).Seconds() elapsedTime := time.Since(now).Seconds()
responseSize := float64(rw.Size())
if traceID := tracing.TraceIDFromContext(r.Context(), true); traceID != "" { if traceID := tracing.TraceIDFromContext(r.Context(), true); traceID != "" {
// Need to type-convert the Observer to an // Need to type-convert the Observer to an
// ExemplarObserver. This will always work for a // ExemplarObserver. This will always work for a
// HistogramVec. // HistogramVec.
histogram.(prometheus.ExemplarObserver).ObserveWithExemplar( durationHistogram.(prometheus.ExemplarObserver).ObserveWithExemplar(
elapsedTime, prometheus.Labels{"traceID": traceID}, elapsedTime, prometheus.Labels{"traceID": traceID},
) )
sizeHistogram.(prometheus.ExemplarObserver).ObserveWithExemplar(
responseSize, prometheus.Labels{"traceID": traceID},
)
} else { } else {
histogram.Observe(elapsedTime) durationHistogram.Observe(elapsedTime)
sizeHistogram.Observe(responseSize)
} }
switch { switch {

View File

@ -45,8 +45,10 @@ var unnamedHandlers = []struct {
pathPattern *regexp.Regexp pathPattern *regexp.Regexp
handler string handler string
}{ }{
{handler: "plugin-assets", pathPattern: regexp.MustCompile("^/public/plugins/")},
{handler: "public-build-assets", pathPattern: regexp.MustCompile("^/public/build/")}, // All Grafana core assets should come from this path
{handler: "public-assets", pathPattern: regexp.MustCompile("^/favicon.ico")}, {handler: "public-assets", pathPattern: regexp.MustCompile("^/favicon.ico")},
{handler: "public-assets", pathPattern: regexp.MustCompile("^/public/")}, {handler: "public-assets", pathPattern: regexp.MustCompile("^/public/")}, // Fallback for other assets, this should go down to 0
{handler: "/metrics", pathPattern: regexp.MustCompile("^/metrics")}, {handler: "/metrics", pathPattern: regexp.MustCompile("^/metrics")},
{handler: "/healthz", pathPattern: regexp.MustCompile("^/healthz")}, {handler: "/healthz", pathPattern: regexp.MustCompile("^/healthz")},
{handler: "/api/health", pathPattern: regexp.MustCompile("^/api/health")}, {handler: "/api/health", pathPattern: regexp.MustCompile("^/api/health")},

View File

@ -143,7 +143,7 @@ func TestFrontendService_Middleware(t *testing.T) {
metricsBody := recorder.Body.String() metricsBody := recorder.Body.String()
assert.Contains(t, metricsBody, "# TYPE grafana_http_request_duration_seconds histogram") assert.Contains(t, metricsBody, "# TYPE grafana_http_request_duration_seconds histogram")
assert.Contains(t, metricsBody, "grafana_http_request_duration_seconds_bucket{handler=\"public-assets\"") // assets 404 assert.Contains(t, metricsBody, "grafana_http_request_duration_seconds_bucket{handler=\"public-build-assets\"") // assets 404
assert.Contains(t, metricsBody, "grafana_http_request_duration_seconds_bucket{handler=\"/*\"") // index route assert.Contains(t, metricsBody, "grafana_http_request_duration_seconds_bucket{handler=\"/*\"") // index route
}) })