promqltest: Test for counter reset conflict warnings

This is an attempt to make sure that we are not accidentally warning about conflicting counter resets in rate calculation, see https://github.com/prometheus/prometheus/pull/17051#issuecomment-3226503416 . This is done by being more explicit about the warn expectation. However, as long as https://github.com/prometheus/prometheus/issues/15346 is not addressed, we won't be able to trigger the annotation this way anyway. However, we can play a trick, by wrapping a suitable expression in `histogram_count` or `histogram_sum`, which will invoke the `HistogramStatsIterator`, which in turn creates counter reset hints on the fly. So this commit also adds tests with that, both for absence of an annotation with `rate` and presence of an annotation with `sum_over_time`. Signed-off-by: beorn7 <beorn@grafana.com> test tbs Signed-off-by: beorn7 <beorn@grafana.com>
2025-08-27 18:19:30 +02:00 · 2025-08-27 18:19:30 +02:00 · 03588328d2
parent ba808d1736
commit 03588328d2
2 changed files with 98 additions and 34 deletions
--- a/promql/promqltest/testdata/functions.test
+++ b/promql/promqltest/testdata/functions.test
@ -254,15 +254,15 @@ eval instant at 20m irate(http_requests_histogram{path="/b"}[6m])
  	expect no_warn

 eval instant at 20m irate(http_requests_histogram{path="/c"}[20m])
-	expect warn
+	expect warn msg: PromQL warning: this native histogram metric is not a counter: "http_requests_histogram"
 	{path="/c"} {{sum:0.01 count:0.01 counter_reset_hint:gauge}}

 eval instant at 20m irate(http_requests_histogram{path="/d"}[20m])
-	expect warn
+	expect warn msg: PromQL warning: this native histogram metric is not a counter: "http_requests_histogram"
 	{path="/d"} {{sum:0.01 count:0.01 counter_reset_hint:gauge}}

 eval instant at 20m irate(http_requests_histogram{path="/e"}[20m])
-  expect warn
+  expect warn msg: PromQL warning: encountered a mix of histograms and floats for metric name "http_requests_histogram"

 eval instant at 20m irate(http_requests_histogram{path="/f"}[20m])
  	expect no_warn
@ -293,12 +293,12 @@ eval instant at 20m delta(http_requests_gauge[20m])

 # delta emits warn annotation for non-gauge histogram types.
 eval instant at 20m delta(http_requests_counter[20m])
-	expect warn
+	expect warn msg: PromQL warning: this native histogram metric is not a gauge: "http_requests_counter"
 	{path="/foo"} {{schema:0 sum:4 count:8 buckets:[4 4 4]}}

 # delta emits warn annotation for mix of histogram and floats.
 eval instant at 20m delta(http_requests_mix[20m])
-	expect warn
+	expect warn msg: PromQL warning: encountered a mix of histograms and floats for metric name "http_requests_mix"
 	#empty

 clear
@ -337,21 +337,21 @@ eval instant at 20m idelta(http_requests_histogram{path="/b"}[6m])
 	expect no_warn

 eval instant at 20m idelta(http_requests_histogram{path="/c"}[20m])
-	expect warn
+	expect warn msg: PromQL warning: this native histogram metric is not a gauge: "http_requests_histogram"
 	{path="/c"} {{sum:1 count:1 counter_reset_hint:gauge}}

 eval instant at 20m idelta(http_requests_histogram{path="/d"}[20m])
-	expect warn
+	expect warn msg: PromQL warning: this native histogram metric is not a gauge: "http_requests_histogram"
 	{path="/d"} {{sum:1 count:1 counter_reset_hint:gauge}}

 eval instant at 20m idelta(http_requests_histogram{path="/e"}[20m])
-  expect warn
+  expect warn msg: PromQL warning: encountered a mix of histograms and floats for metric name "http_requests_histogram"

 eval instant at 20m idelta(http_requests_histogram{path="/f"}[20m])
-  expect warn
+  expect warn msg: PromQL warning: vector contains a mix of histograms with exponential and custom buckets schemas for metric name "http_requests_histogram"

 eval instant at 20m idelta(http_requests_histogram{path="/g"}[20m])
-	expect warn
+	expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "http_requests_histogram"

 clear

--- a/promql/promqltest/testdata/native_histograms.test
+++ b/promql/promqltest/testdata/native_histograms.test
@ -101,54 +101,56 @@ clear
 # with an upper limit of 1 and offset:1 is the bucket which follows to the right. Negative offsets represent bucket
 # positions for upper limits <1 (tending toward zero), where offset:-1 is the bucket to the left of offset:0.
 load 5m
-	incr_histogram	{{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:1 buckets:[1] offset:1}}x10
+    incr_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:1 buckets:[1] offset:1}}x10

 eval instant at 5m histogram_count(incr_histogram)
-	{} 5
+    {} 5

 eval instant at 5m histogram_sum(incr_histogram)
-	{} 6
+    {} 6

 eval instant at 5m histogram_avg(incr_histogram)
-	{} 1.2
+    {} 1.2

 # We expect 3/5ths of the values to fall in the range 1 < x <= 2.
 eval instant at 5m histogram_fraction(1, 2, incr_histogram)
-	{} 0.6
+    {} 0.6

 # See explanation for exponential interpolation above.
 eval instant at 5m histogram_quantile(0.5, incr_histogram)
-	{} 1.414213562373095
+    {} 1.414213562373095


 eval instant at 50m incr_histogram
-	{__name__="incr_histogram"} {{count:14 sum:24 buckets:[1 12 1]}}
+    {__name__="incr_histogram"} {{count:14 sum:24 buckets:[1 12 1]}}

 eval instant at 50m histogram_count(incr_histogram)
-	{} 14
+    {} 14

 eval instant at 50m histogram_sum(incr_histogram)
-	{} 24
+    {} 24

 eval instant at 50m histogram_avg(incr_histogram)
    {} 1.7142857142857142

 # We expect 12/14ths of the values to fall in the range 1 < x <= 2.
 eval instant at 50m histogram_fraction(1, 2, incr_histogram)
-	{} 0.8571428571428571
+     {} 0.8571428571428571

 # See explanation for exponential interpolation above.
 eval instant at 50m histogram_quantile(0.5, incr_histogram)
-	{} 1.414213562373095
+     {} 1.414213562373095

 # Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum.
 eval instant at 50m rate(incr_histogram[10m])
+    expect no_warn
    {} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}}

 # Calculate the 50th percentile of observations over the last 10m.
 # See explanation for exponential interpolation above.
 eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m]))
-	{} 1.414213562373095
+    expect no_warn
+    {} 1.414213562373095

 clear

@ -291,9 +293,11 @@ load 15s
    histogram_rate {{schema:1 count:12 sum:18.4 z_bucket:2 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[1 2 0 1 1]}}+{{schema:1 count:9 sum:18.4 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 0 1 1] n_buckets:[1 1 0 1 1]}}x100

 eval instant at 5m rate(histogram_rate[45s])
+    expect no_warn
    {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}}

 eval range from 5m to 5m30s step 30s rate(histogram_rate[45s])
+    expect no_warn
    {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}}x1

 clear
@ -1044,13 +1048,16 @@ load 5m
     reset_in_bucket {{schema:0 count:4 sum:5 buckets:[1 2 1]}} {{schema:0 count:5 sum:6 buckets:[1 1 3]}} {{schema:0 count:6 sum:7 buckets:[1 2 3]}}

 eval instant at 10m increase(reset_in_bucket[15m])
+     expect no_warn
     {} {{count:9 sum:10.5 buckets:[1.5 3 4.5]}}

 # The following two test the "fast path" where only sum and count is decoded.
 eval instant at 10m histogram_count(increase(reset_in_bucket[15m]))
+     expect no_warn
     {} 9

 eval instant at 10m histogram_sum(increase(reset_in_bucket[15m]))
+     expect no_warn
     {} 10.5

 clear
@ -1076,12 +1083,12 @@ load 30s

 # Test the case where we only have two points for rate
 eval instant at 30s rate(some_metric[1m])
-    expect warn
+    expect warn msg: PromQL warning: this native histogram metric is not a counter: "some_metric"
    {} {{count:0.03333333333333333 sum:0.03333333333333333 buckets:[0.03333333333333333]}}

 # Test the case where we have more than two points for rate
 eval instant at 1m rate(some_metric[1m30s])
-    expect warn
+    expect warn msg: PromQL warning: this native histogram metric is not a counter: "some_metric"
    {} {{count:0.03333333333333333 sum:0.03333333333333333 buckets:[0.03333333333333333]}}

 clear
@ -1092,24 +1099,26 @@ load 30s

 # Start and end with exponential, with custom in the middle.
 eval instant at 1m rate(some_metric[1m30s])
-    expect warn
+    expect warn msg: PromQL warning: vector contains a mix of histograms with exponential and custom buckets schemas for metric name "some_metric"
    # Should produce no results.

 # Start and end with custom, with exponential in the middle.
 eval instant at 1m30s rate(some_metric[1m30s])
-    expect warn
+    expect warn msg: PromQL warning: vector contains a mix of histograms with exponential and custom buckets schemas for metric name "some_metric"
    # Should produce no results.

 # Start with custom, end with exponential. Return the exponential histogram divided by 48.
 # (The 1st sample is the NHCB with count:1. It is mostly ignored with the exception of the
 # count, which means the rate calculation extrapolates until the count hits 0.)
 eval instant at 1m rate(some_metric[1m])
+    expect no_warn
    {} {{count:0.08333333333333333 sum:0.10416666666666666 counter_reset_hint:gauge buckets:[0.020833333333333332 0.041666666666666664 0.020833333333333332]}}

 # Start with exponential, end with custom. Return the custom buckets histogram divided by 30.
 # (With the 2nd sample having a count of 1, the extrapolation to zero lands exactly at the
 # left boundary of the range, so no extrapolation limitation needed.)
 eval instant at 30s rate(some_metric[1m])
+    expect no_warn
    {} {{schema:-53 sum:0.03333333333333333 count:0.03333333333333333 custom_values:[5 10] buckets:[0.03333333333333333]}}

 clear
@ -1121,41 +1130,50 @@ load 1m
 # There is no change to the bucket count over time, thus rate is 0 in each bucket.
 # However native histograms do not represent empty buckets, so here the zeros are implicit.
 eval instant at 5m rate(const_histogram[5m])
+    expect no_warn
    {} {{schema:0 sum:0 count:0}}

 # Zero buckets mean no observations, thus the denominator in the average is 0
 # leading to 0/0, which is NaN.
 eval instant at 5m histogram_avg(rate(const_histogram[5m]))
+    expect no_warn
    {} NaN

 # Zero buckets mean no observations, so count is 0.
 eval instant at 5m histogram_count(rate(const_histogram[5m]))
+    expect no_warn
    {} 0.0

 # Zero buckets mean no observations and empty histogram has a sum of 0 by definition.
 eval instant at 5m histogram_sum(rate(const_histogram[5m]))
+    expect no_warn
    {} 0.0

 # Zero buckets mean no observations, thus the denominator in the fraction is 0,
 # leading to 0/0, which is NaN.
 eval instant at 5m histogram_fraction(0.0, 1.0, rate(const_histogram[5m]))
+    expect no_warn
    {} NaN

 # Workaround to calculate the observation count corresponding to NaN fraction.
 eval instant at 5m histogram_count(rate(const_histogram[5m])) == 0.0 or histogram_fraction(0.0, 1.0, rate(const_histogram[5m])) * histogram_count(rate(const_histogram[5m]))
+    expect no_warn
    {} 0.0

 # Zero buckets mean no observations, so there is no value that observations fall below,
 # which means that any quantile is a NaN.
 eval instant at 5m histogram_quantile(1.0, rate(const_histogram[5m]))
+    expect no_warn
    {} NaN

 # Zero buckets mean no observations, so there is no standard deviation.
 eval instant at 5m histogram_stddev(rate(const_histogram[5m]))
+    expect no_warn
    {} NaN

 # Zero buckets mean no observations, so there is no standard variance.
 eval instant at 5m histogram_stdvar(rate(const_histogram[5m]))
+    expect no_warn
    {} NaN

 clear
@ -1259,10 +1277,10 @@ load 6m
 # while the 3rd one has different ones.

 eval instant at 12m sum_over_time(nhcb_metric[13m])
-  expect warn
+  expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric"

 eval instant at 12m avg_over_time(nhcb_metric[13m])
-  expect warn
+  expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric"

 eval instant at 12m last_over_time(nhcb_metric[13m])
  expect no_warn
@ -1281,13 +1299,13 @@ eval instant at 12m changes(nhcb_metric[13m])
  {} 1

 eval instant at 12m delta(nhcb_metric[13m])
-  expect warn
+  expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric"

 eval instant at 12m increase(nhcb_metric[13m])
-  expect warn
+  expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric"

 eval instant at 12m rate(nhcb_metric[13m])
-  expect warn
+  expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric"

 eval instant at 12m resets(nhcb_metric[13m])
  expect no_warn
@ -1299,10 +1317,10 @@ eval instant at 12m resets(nhcb_metric[13m])
 # otherwise.

 eval instant at 18m sum_over_time(nhcb_metric[13m])
-  expect warn
+  expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric"

 eval instant at 18m avg_over_time(nhcb_metric[13m])
-  expect warn
+  expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric"

 eval instant at 18m last_over_time(nhcb_metric[13m])
  expect no_warn
@ -1321,7 +1339,7 @@ eval instant at 18m changes(nhcb_metric[13m])
  {} 1

 eval instant at 18m delta(nhcb_metric[13m])
-  expect warn
+  expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric"

 eval instant at 18m increase(nhcb_metric[13m])
  expect no_warn
@ -1485,6 +1503,7 @@ load 1m
 # Note that the 2nd bucket has an exaggerated increase of 2479.939393939394 (although
 # it has a value of only 2475 at the end of the range).
 eval instant at 55m increase(metric[90m])
+    expect no_warn
    {type="histogram"} {{count:2490 sum:50.303030303030305 counter_reset_hint:gauge buckets:[10.06060606060606 2479.939393939394]}}
    {type="counter"} 2490

@ -1492,6 +1511,7 @@ eval instant at 55m increase(metric[90m])
 # The 2nd bucket again has an exaggerated increase, but it is less obvious because of the
 # right-side extrapolation.
 eval instant at 54m30s increase(metric[90m])
+    expect no_warn
    {type="histogram"} {{count:2512.9166666666665 sum:50.76599326599326 counter_reset_hint:gauge buckets:[10.153198653198652 2502.7634680134674]}}
    {type="counter"} 2512.9166666666665

@ -1501,6 +1521,7 @@ eval instant at 54m30s increase(metric[90m])
 # easily here because the last sample in the range coincides with the boundary, where the 2nd bucket has
 # a value of 2475 but has increased by 2476.2045454545455 according to the returned result.
 eval instant at 55m increase(metric[55m15s])
+    expect no_warn
    {type="histogram"} {{count:2486.25 sum:50.227272727272734 counter_reset_hint:gauge buckets:[10.045454545454547 2476.2045454545455]}}
    {type="counter"} 2486.25

@ -1508,20 +1529,25 @@ eval instant at 55m increase(metric[55m15s])
 # This means no change of extrapolation is required for the histogram count (and neither for the float counter),
 # however, the 2nd bucket's extrapolation will reach zero within the range.
 eval instant at 54m30s increase(metric[54m45s])
+    expect no_warn
    {type="histogram"} {{count:2509.375 sum:50.69444444444444 counter_reset_hint:gauge buckets:[10.13888888888889 2499.236111111111]}}
    {type="counter"} 2509.375

 # Try the same, but now extract just the histogram count via `histogram_count`.
 eval instant at 55m histogram_count(increase(metric[90m]))
+    expect no_warn
    {type="histogram"} 2490

 eval instant at 54m30s histogram_count(increase(metric[90m]))
+    expect no_warn
    {type="histogram"} 2512.9166666666665

 eval instant at 55m histogram_count(increase(metric[55m15s]))
+    expect no_warn
    {type="histogram"} 2486.25

 eval instant at 54m30s histogram_count(increase(metric[54m45s]))
+    expect no_warn
    {type="histogram"} 2509.375

 clear
@ -1581,3 +1607,41 @@ eval instant at 1m histogram_quantile(0.5, myHistogram2)

 eval instant at 1m histogram_quantile(0.5, mixedHistogram)
 	expect warn msg: PromQL warning: vector contains a mix of classic and native histograms for metric name "mixedHistogram"
+
+
+clear
+
+load 1m
+    reset{timing="late"} {{schema:0 sum:1 count:0 buckets:[1 1 1]}} {{schema:0 sum:1 count:2 buckets:[1 1 1]}} {{schema:0 sum:1 count:3 buckets:[1 1 1]}}  {{schema:0 sum:1 count:2 buckets:[1 1 1]}}
+    reset{timing="early"} {{schema:0 sum:1 count:3 buckets:[1 1 1]}}  {{schema:0 sum:1 count:2 buckets:[1 1 1]}} {{schema:0 sum:1 count:2 buckets:[1 1 1]}} {{schema:0 sum:1 count:3 buckets:[1 1 1]}} 
+
+# Trigger an annotation about conflicting counter resets by going through the
+# HistogramStatsIterator, which creates counter reset hints on the fly.
+eval instant at 5m 1*histogram_count(sum_over_time(reset{timing="late"}[5m]))
+    expect warn msg: PromQL warning: conflicting counter resets during histogram addition
+    {timing="late"} 7
+
+eval instant at 5m 1*histogram_count(sum(reset))
+    expect warn msg: PromQL warning: conflicting counter resets during histogram aggregation
+    {} 5
+
+eval instant at 5m 1*histogram_count(avg(reset))
+    expect warn msg: PromQL warning: conflicting counter resets during histogram aggregation
+    {} 2.5
+
+# No annotation with the right timing.
+eval instant at 30s 1*histogram_count(sum(reset))
+    expect no_warn
+    {} 3
+
+eval instant at 30s 1*histogram_count(avg(reset))
+    expect no_warn
+    {} 1.5
+
+# Ensure that the annotation does not happen with rate.
+eval instant at 5m 1*histogram_count(rate(reset{timing="late"}[5m]))
+    expect no_warn
+    {timing="late"} 0.0175
+
+# NOTE: The `1*` part in the expressions above should not be needed.
+# It can be removed once https://github.com/prometheus/prometheus/pull/17127 is merged.