mirror of https://github.com/grafana/grafana.git
Alerting: Fix resolved notifications for same-label Error to Normal transitions (#106210)
Actionlint / Lint GitHub Actions files (push) Has been cancelled
Details
Backend Code Checks / Validate Backend Configs (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (1/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (2/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (3/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (4/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (5/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (6/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (7/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (8/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (1/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (2/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (3/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (4/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (5/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (6/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (7/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (8/8) (push) Has been cancelled
Details
CodeQL checks / Analyze (actions) (push) Has been cancelled
Details
CodeQL checks / Analyze (go) (push) Has been cancelled
Details
CodeQL checks / Analyze (javascript) (push) Has been cancelled
Details
Lint Frontend / Verify i18n (push) Has been cancelled
Details
Lint Frontend / Lint (push) Has been cancelled
Details
Lint Frontend / Typecheck (push) Has been cancelled
Details
Lint Frontend / Betterer (push) Has been cancelled
Details
golangci-lint / lint-go (push) Has been cancelled
Details
End-to-end tests / Build & Package Grafana (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (1) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (2) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (3) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (4) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (5) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (6) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (7) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (1/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (2/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (3/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (4/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (5/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (6/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (7/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (8/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (1/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (2/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (3/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (4/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (5/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (6/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (7/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (8/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (1/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (2/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (3/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (4/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (5/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (6/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (7/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (8/8) (push) Has been cancelled
Details
Reject GitHub secrets / reject-gh-secrets (push) Has been cancelled
Details
Run dashboard schema v2 e2e / dashboard-schema-v2-e2e (push) Has been cancelled
Details
Dispatch sync to mirror / dispatch-job (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (dashboards-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (panels-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (smoke-tests-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (various-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (old arch) (old-arch/dashboards-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (old arch) (old-arch/panels-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (old arch) (old-arch/smoke-tests-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (old arch) (old-arch/various-suite) (push) Has been cancelled
Details
Actionlint / Lint GitHub Actions files (push) Has been cancelled
Details
Backend Code Checks / Validate Backend Configs (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (1/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (2/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (3/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (4/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (5/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (6/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (7/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana (${{ matrix.shard }}) (8/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (1/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (2/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (3/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (4/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (5/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (6/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (7/8) (push) Has been cancelled
Details
Backend Unit Tests / Grafana Enterprise (${{ matrix.shard }}) (8/8) (push) Has been cancelled
Details
CodeQL checks / Analyze (actions) (push) Has been cancelled
Details
CodeQL checks / Analyze (go) (push) Has been cancelled
Details
CodeQL checks / Analyze (javascript) (push) Has been cancelled
Details
Lint Frontend / Verify i18n (push) Has been cancelled
Details
Lint Frontend / Lint (push) Has been cancelled
Details
Lint Frontend / Typecheck (push) Has been cancelled
Details
Lint Frontend / Betterer (push) Has been cancelled
Details
golangci-lint / lint-go (push) Has been cancelled
Details
End-to-end tests / Build & Package Grafana (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (1) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (2) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (3) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (4) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (5) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (6) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (7) (push) Has been cancelled
Details
Frontend tests / Unit tests (${{ matrix.chunk }} / 8) (8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (1/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (2/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (3/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (4/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (5/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (6/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (7/8) (push) Has been cancelled
Details
Integration Tests / Sqlite (${{ matrix.shard }}) (8/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (1/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (2/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (3/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (4/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (5/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (6/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (7/8) (push) Has been cancelled
Details
Integration Tests / MySQL (${{ matrix.shard }}) (8/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (1/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (2/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (3/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (4/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (5/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (6/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (7/8) (push) Has been cancelled
Details
Integration Tests / Postgres (${{ matrix.shard }}) (8/8) (push) Has been cancelled
Details
Reject GitHub secrets / reject-gh-secrets (push) Has been cancelled
Details
Run dashboard schema v2 e2e / dashboard-schema-v2-e2e (push) Has been cancelled
Details
Dispatch sync to mirror / dispatch-job (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (dashboards-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (panels-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (smoke-tests-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (various-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (old arch) (old-arch/dashboards-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (old arch) (old-arch/panels-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (old arch) (old-arch/smoke-tests-suite) (push) Has been cancelled
Details
End-to-end tests / ${{ matrix.suite }} (old arch) (old-arch/various-suite) (push) Has been cancelled
Details
What is this feature? Ensures that resolved notifications are sent when alert states transition from Error to Normal after the configured number of evaluation intervals: Missing series evaluations to resolve. Why do we need this feature? Before this change, when an alert was transitioning from Error to Normal, in case when the labels on the new Normal alert instance are the same, Grafana would not send resolved notifications for the Error alert state. The alert would be resolved after a few evaluation intervals automatically in the alertmanager, following the endsAt. With this change the resolved notification is sent after the configured number of evaluation intervals: Missing series evaluations to resolve.
This commit is contained in:
parent
a7368e004b
commit
3bb4c92028
|
|
@ -109,11 +109,27 @@ func expandAnnotationsAndLabels(ctx context.Context, log log.Logger, alertRule *
|
|||
labels, _ := expand(ctx, log, alertRule.Title, alertRule.Labels, templateData, externalURL, result.EvaluatedAt)
|
||||
annotations, _ := expand(ctx, log, alertRule.Title, alertRule.Annotations, templateData, externalURL, result.EvaluatedAt)
|
||||
|
||||
lbs := make(data.Labels, len(extraLabels)+len(labels)+len(resultLabels))
|
||||
// If the result contains an error, we want to add the ref_id and datasource_uid labels
|
||||
// to the new state if the alert rule should be in the ErrorErrState.
|
||||
var errorLabels data.Labels
|
||||
if result.State == eval.Error && alertRule.ExecErrState == ngModels.ErrorErrState {
|
||||
refID, datasourceUID := datasourceErrorInfo(result.Error, alertRule)
|
||||
if refID != "" || datasourceUID != "" {
|
||||
errorLabels = data.Labels{
|
||||
"ref_id": refID,
|
||||
"datasource_uid": datasourceUID,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lbs := make(data.Labels, len(extraLabels)+len(labels)+len(resultLabels)+len(errorLabels))
|
||||
dupes := make(data.Labels)
|
||||
for key, val := range extraLabels {
|
||||
lbs[key] = val
|
||||
}
|
||||
for key, val := range errorLabels {
|
||||
lbs[key] = val
|
||||
}
|
||||
for key, val := range labels {
|
||||
ruleVal, ok := lbs[key]
|
||||
// if duplicate labels exist, reserved label will take precedence
|
||||
|
|
|
|||
|
|
@ -3519,7 +3519,6 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
|
|
@ -3702,7 +3701,6 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
|
|
@ -3811,7 +3809,6 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
|
|
@ -3846,7 +3843,6 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
{
|
||||
PreviousState: eval.Error,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
|
|
@ -4006,7 +4002,6 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
|
|
@ -4111,7 +4106,6 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
{
|
||||
PreviousState: eval.Error,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
Error: datasourceError,
|
||||
State: eval.Normal,
|
||||
|
|
@ -4244,6 +4238,22 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
t2: {
|
||||
{
|
||||
PreviousState: eval.Error,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
LatestResult: newEvaluation(t1, eval.Error),
|
||||
StartsAt: t1,
|
||||
EndsAt: t1.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t1,
|
||||
LastSentAt: &t1,
|
||||
Annotations: mergeLabels(baseRule.Annotations, data.Labels{
|
||||
"Error": datasourceError.Error(),
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
Labels: labels["system + rule"],
|
||||
State: eval.Normal,
|
||||
|
|
@ -4251,11 +4261,28 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
StartsAt: t2,
|
||||
EndsAt: t2,
|
||||
LastEvaluationTime: t2,
|
||||
LastSentAt: &t1,
|
||||
},
|
||||
},
|
||||
},
|
||||
t3: {
|
||||
{
|
||||
PreviousState: eval.Error,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Normal,
|
||||
StateReason: "MissingSeries",
|
||||
LatestResult: newEvaluation(t1, eval.Error),
|
||||
StartsAt: t1,
|
||||
EndsAt: t3,
|
||||
ResolvedAt: &t3,
|
||||
LastEvaluationTime: t3,
|
||||
LastSentAt: &t3,
|
||||
Error: datasourceError,
|
||||
Annotations: mergeLabels(baseRule.Annotations, data.Labels{
|
||||
"Error": datasourceError.Error(),
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
|
|
@ -4265,7 +4292,6 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
StartsAt: t2,
|
||||
EndsAt: t2,
|
||||
LastEvaluationTime: t3,
|
||||
LastSentAt: &t1,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -4279,7 +4305,6 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
StartsAt: t2,
|
||||
EndsAt: t2,
|
||||
LastEvaluationTime: t4,
|
||||
LastSentAt: &t1,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -4442,7 +4467,6 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
{
|
||||
PreviousState: eval.Error,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
Error: datasourceError,
|
||||
State: eval.Error,
|
||||
|
|
@ -4460,6 +4484,22 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
t3: {
|
||||
{
|
||||
PreviousState: eval.Error,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
LatestResult: newEvaluation(t2, eval.Error),
|
||||
StartsAt: t1,
|
||||
EndsAt: t2.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t2,
|
||||
LastSentAt: &t1,
|
||||
Annotations: mergeLabels(baseRule.Annotations, data.Labels{
|
||||
"Error": datasourceError.Error(),
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
Labels: labels["system + rule"],
|
||||
State: eval.Normal,
|
||||
|
|
@ -4467,7 +4507,6 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
StartsAt: t3,
|
||||
EndsAt: t3,
|
||||
LastEvaluationTime: t3,
|
||||
LastSentAt: &t1,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -4475,12 +4514,22 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule"],
|
||||
State: eval.Normal,
|
||||
LatestResult: newEvaluation(t3, eval.Normal),
|
||||
StartsAt: t3,
|
||||
EndsAt: t3,
|
||||
LastEvaluationTime: t3,
|
||||
},
|
||||
},
|
||||
{
|
||||
PreviousState: eval.Error,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
LatestResult: newEvaluation(t4, eval.Error),
|
||||
Error: datasourceError,
|
||||
StartsAt: t4,
|
||||
StartsAt: t1,
|
||||
EndsAt: t4.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t4,
|
||||
LastSentAt: &t4,
|
||||
|
|
@ -4658,7 +4707,17 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule"],
|
||||
State: eval.Normal,
|
||||
LatestResult: newEvaluation(t1, eval.Normal),
|
||||
StartsAt: t1,
|
||||
EndsAt: t1,
|
||||
LastEvaluationTime: t1,
|
||||
},
|
||||
},
|
||||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
|
|
@ -4760,7 +4819,17 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
{
|
||||
PreviousState: eval.Pending,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule"],
|
||||
State: eval.Pending,
|
||||
LatestResult: newEvaluation(t1, eval.Alerting),
|
||||
StartsAt: t1,
|
||||
EndsAt: t1.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t1,
|
||||
},
|
||||
},
|
||||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
|
|
@ -4854,7 +4923,17 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
{
|
||||
PreviousState: eval.Pending,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule"],
|
||||
State: eval.Pending,
|
||||
LatestResult: newEvaluation(t1, eval.Alerting),
|
||||
StartsAt: t1,
|
||||
EndsAt: t1.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t1,
|
||||
},
|
||||
},
|
||||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
|
|
@ -4872,14 +4951,31 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
t3: {
|
||||
{
|
||||
PreviousState: eval.Error,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
LatestResult: newEvaluation(t2, eval.Error),
|
||||
StartsAt: t2,
|
||||
EndsAt: t2.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t2,
|
||||
LastSentAt: &t2,
|
||||
Annotations: mergeLabels(baseRule.Annotations, data.Labels{
|
||||
"Error": datasourceError.Error(),
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
PreviousState: eval.Pending,
|
||||
State: &State{
|
||||
Labels: labels["system + rule"],
|
||||
State: eval.Pending,
|
||||
State: eval.Alerting,
|
||||
LatestResult: newEvaluation(t3, eval.Alerting),
|
||||
StartsAt: t3,
|
||||
EndsAt: t3.Add(ResendDelay * 4),
|
||||
FiredAt: &t3,
|
||||
LastEvaluationTime: t3,
|
||||
LastSentAt: &t2,
|
||||
LastSentAt: &t3,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -4999,6 +5095,22 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
t2: {
|
||||
{
|
||||
PreviousState: eval.Error,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
LatestResult: newEvaluation(t1, eval.Error),
|
||||
StartsAt: t1,
|
||||
EndsAt: t1.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t1,
|
||||
LastSentAt: &t1,
|
||||
Error: datasourceError,
|
||||
Annotations: mergeLabels(baseRule.Annotations, data.Labels{
|
||||
"Error": datasourceError.Error(),
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
Labels: labels["system + rule"],
|
||||
State: eval.Normal,
|
||||
|
|
@ -5006,7 +5118,6 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
StartsAt: t2,
|
||||
EndsAt: t2,
|
||||
LastEvaluationTime: t2,
|
||||
LastSentAt: &t1, // TODO: Fix me. This should be t2 since we should be resolving the previous DatasourceError alert.
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -5081,7 +5192,6 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
|
|
@ -5100,15 +5210,30 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
|||
{
|
||||
PreviousState: eval.Error,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
LatestResult: newEvaluation(t1, eval.Error),
|
||||
StartsAt: t1,
|
||||
EndsAt: t1.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t1,
|
||||
LastSentAt: &t1,
|
||||
Annotations: mergeLabels(baseRule.Annotations, data.Labels{
|
||||
"Error": datasourceError.Error(),
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
Labels: labels["system + rule"],
|
||||
State: eval.Error,
|
||||
Error: genericError,
|
||||
LatestResult: newEvaluation(t2, eval.Error),
|
||||
StartsAt: t1,
|
||||
StartsAt: t2,
|
||||
EndsAt: t2.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t2,
|
||||
LastSentAt: &t1,
|
||||
LastSentAt: &t2,
|
||||
Annotations: genericErrorAnnotations,
|
||||
},
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1016,7 +1016,7 @@ func TestProcessEvalResults(t *testing.T) {
|
|||
// TODO(@moustafab): figure out why this test doesn't fail as is
|
||||
desc: "classic condition, execution Error as Error (alerting -> query error -> alerting)",
|
||||
alertRule: baseRuleWith(m.WithErrorExecAs(models.ErrorErrState)),
|
||||
expectedAnnotations: 3,
|
||||
expectedAnnotations: 2,
|
||||
evalResults: map[time.Time]eval.Results{
|
||||
t1: {
|
||||
newResult(eval.WithState(eval.Alerting), eval.WithLabels(data.Labels{})),
|
||||
|
|
@ -1030,15 +1030,33 @@ func TestProcessEvalResults(t *testing.T) {
|
|||
},
|
||||
expectedStates: []*state.State{
|
||||
{
|
||||
Labels: labels["system + rule"],
|
||||
Labels: data.Labels{"label": "test", "system": "owned"},
|
||||
ResultFingerprint: data.Labels{}.Fingerprint(),
|
||||
State: eval.Alerting,
|
||||
LatestResult: newEvaluation(t3, eval.Alerting),
|
||||
StartsAt: t3,
|
||||
StartsAt: t1,
|
||||
EndsAt: t3.Add(state.ResendDelay * 4),
|
||||
FiredAt: &t3,
|
||||
FiredAt: &t1,
|
||||
LastEvaluationTime: t3,
|
||||
LastSentAt: &t1, // Resend delay is 30s, so last sent at is t1.
|
||||
LastSentAt: &t1,
|
||||
Annotations: map[string]string{
|
||||
"annotation": "test",
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: data.Labels{"system": "owned", "label": "test", "ref_id": "A", "datasource_uid": "datasource_uid_1"},
|
||||
ResultFingerprint: data.Labels{}.Fingerprint(),
|
||||
State: eval.Error,
|
||||
LatestResult: newEvaluation(t2, eval.Error),
|
||||
StartsAt: t2,
|
||||
EndsAt: t2.Add(state.ResendDelay * 4),
|
||||
LastEvaluationTime: t2,
|
||||
LastSentAt: &t2,
|
||||
Error: expr.MakeQueryError("A", "test-datasource-uid", errors.New("this is an error")),
|
||||
Annotations: map[string]string{
|
||||
"Error": "[sse.dataQueryError] failed to execute query [A]: this is an error",
|
||||
"annotation": "test",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
|
|||
|
|
@ -219,16 +219,30 @@ func (a *State) Maintain(interval int64, evaluatedAt time.Time) {
|
|||
a.EndsAt = nextEndsTime(interval, evaluatedAt)
|
||||
}
|
||||
|
||||
// AddErrorInformation adds annotations to the state to indicate that an error occurred.
|
||||
// If addDatasourceInfoToLabels is true, the ref_id and datasource_uid are added to the labels,
|
||||
// otherwise, they are added to the annotations.
|
||||
func (a *State) AddErrorInformation(err error, rule *models.AlertRule, addDatasourceInfoToLabels bool) {
|
||||
// addErrorInfoToAnnotations adds annotations to the state to indicate that an error occurred.
|
||||
func (a *State) addErrorInfoToAnnotations(err error, rule *models.AlertRule) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
|
||||
a.Annotations["Error"] = err.Error()
|
||||
|
||||
refID, datasourceUID := datasourceErrorInfo(err, rule)
|
||||
|
||||
if refID != "" || datasourceUID != "" {
|
||||
a.Annotations["ref_id"] = refID
|
||||
a.Annotations["datasource_uid"] = datasourceUID
|
||||
} else {
|
||||
// Remove the ref_id and datasource_uid from the annotations if they are present.
|
||||
// It can happen if the alert state hasn't changed, but the error is different now.
|
||||
delete(a.Annotations, "ref_id")
|
||||
delete(a.Annotations, "datasource_uid")
|
||||
}
|
||||
}
|
||||
|
||||
// datasourceErrorInfo returns ref_id and datasource_uid if the evaluation
|
||||
// failed because a query returned an error.
|
||||
func datasourceErrorInfo(err error, rule *models.AlertRule) (string, string) {
|
||||
// If the evaluation failed because a query returned an error then add the Ref ID and
|
||||
// Datasource UID as labels or annotations
|
||||
var utilError errutil.Error
|
||||
|
|
@ -236,22 +250,12 @@ func (a *State) AddErrorInformation(err error, rule *models.AlertRule, addDataso
|
|||
(errors.Is(err, expr.QueryError) || errors.Is(err, expr.ConversionError)) {
|
||||
for _, next := range rule.Data {
|
||||
if next.RefID == utilError.PublicPayload["refId"].(string) {
|
||||
if addDatasourceInfoToLabels {
|
||||
a.Labels["ref_id"] = next.RefID
|
||||
a.Labels["datasource_uid"] = next.DatasourceUID
|
||||
} else {
|
||||
a.Annotations["ref_id"] = next.RefID
|
||||
a.Annotations["datasource_uid"] = next.DatasourceUID
|
||||
}
|
||||
break
|
||||
return next.RefID, next.DatasourceUID
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Remove the ref_id and datasource_uid from the annotations if they are present.
|
||||
// It can happen if the alert state hasn't changed, but the error is different now.
|
||||
delete(a.Annotations, "ref_id")
|
||||
delete(a.Annotations, "datasource_uid")
|
||||
}
|
||||
|
||||
return "", ""
|
||||
}
|
||||
|
||||
func (a *State) SetNextValues(result eval.Result) {
|
||||
|
|
@ -464,12 +468,11 @@ func resultError(state *State, rule *models.AlertRule, result eval.Result, logge
|
|||
resultAlerting(state, rule, result, logger, models.StateReasonError)
|
||||
// This is a special case where Alerting and Pending should also have an error and reason
|
||||
state.Error = result.Error
|
||||
state.AddErrorInformation(result.Error, rule, false)
|
||||
state.addErrorInfoToAnnotations(result.Error, rule)
|
||||
case models.ErrorErrState:
|
||||
if state.State == eval.Error {
|
||||
prevEndsAt := state.EndsAt
|
||||
state.Error = result.Error
|
||||
state.AddErrorInformation(result.Error, rule, true)
|
||||
state.Maintain(rule.IntervalSeconds, result.EvaluatedAt)
|
||||
logger.Debug("Keeping state",
|
||||
"state",
|
||||
|
|
@ -491,20 +494,23 @@ func resultError(state *State, rule *models.AlertRule, result eval.Result, logge
|
|||
"next_ends_at",
|
||||
nextEndsAt)
|
||||
state.SetError(result.Error, result.EvaluatedAt, nextEndsAt)
|
||||
state.AddErrorInformation(result.Error, rule, true)
|
||||
}
|
||||
// TODO: always add annotations
|
||||
if result.Error != nil {
|
||||
state.Annotations["Error"] = result.Error.Error()
|
||||
}
|
||||
case models.OkErrState:
|
||||
logger.Debug("Execution error state is Normal", "handler", "resultNormal", "previous_handler", handlerStr)
|
||||
resultNormal(state, rule, result, logger, "") // TODO: Should we add a reason?
|
||||
state.AddErrorInformation(result.Error, rule, false)
|
||||
state.addErrorInfoToAnnotations(result.Error, rule)
|
||||
case models.KeepLastErrState:
|
||||
logger := logger.New("previous_handler", handlerStr)
|
||||
resultKeepLast(state, rule, result, logger)
|
||||
state.AddErrorInformation(result.Error, rule, false)
|
||||
state.addErrorInfoToAnnotations(result.Error, rule)
|
||||
default:
|
||||
err := fmt.Errorf("unsupported execution error state: %s", rule.ExecErrState)
|
||||
state.SetError(err, state.StartsAt, nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt))
|
||||
state.AddErrorInformation(result.Error, rule, false)
|
||||
state.addErrorInfoToAnnotations(result.Error, rule)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -798,20 +804,6 @@ func patch(newState, existingState *State, result eval.Result) {
|
|||
newState.Annotations[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
// if the current state is "data source error" then it may have additional labels that may not exist in the new state.
|
||||
// See https://github.com/grafana/grafana/blob/c7fdf8ce706c2c9d438f5e6eabd6e580bac4946b/pkg/services/ngalert/state/state.go#L161-L163
|
||||
// copy known labels over to the new instance, it can help reduce flapping
|
||||
// TODO fix this?
|
||||
if existingState.State == eval.Error && result.State == eval.Error {
|
||||
setIfExist := func(lbl string) {
|
||||
if v, ok := existingState.Labels[lbl]; ok {
|
||||
newState.Labels[lbl] = v
|
||||
}
|
||||
}
|
||||
setIfExist("datasource_uid")
|
||||
setIfExist("ref_id")
|
||||
}
|
||||
}
|
||||
|
||||
func (a *State) transition(alertRule *models.AlertRule, result eval.Result, extraAnnotations data.Labels, logger log.Logger, takeImageFn takeImageFn) StateTransition {
|
||||
|
|
@ -831,27 +823,6 @@ func (a *State) transition(alertRule *models.AlertRule, result eval.Result, extr
|
|||
// Add the instance to the log context to help correlate log lines for a state
|
||||
logger = logger.New("instance", result.Instance)
|
||||
|
||||
// if the current state is Error but the result is different, then we need o clean up the extra labels
|
||||
// that were added after the state key was calculated
|
||||
// https://github.com/grafana/grafana/blob/1df4d332c982dc5e394201bb2ef35b442727ce63/pkg/services/ngalert/state/state.go#L298-L311
|
||||
// Usually, it happens in the case of classic conditions when the evalResult does not have labels.
|
||||
//
|
||||
// This is temporary change to make sure that the labels are not persistent in the state after it was in Error state
|
||||
// TODO yuri. Remove it when correct Error result with labels is provided
|
||||
if a.State == eval.Error && result.State != eval.Error {
|
||||
// This is possible because state was updated after the CacheID was calculated.
|
||||
_, curOk := a.Labels["ref_id"]
|
||||
_, resOk := result.Instance["ref_id"]
|
||||
if curOk && !resOk {
|
||||
delete(a.Labels, "ref_id")
|
||||
}
|
||||
_, curOk = a.Labels["datasource_uid"]
|
||||
_, resOk = result.Instance["datasource_uid"]
|
||||
if curOk && !resOk {
|
||||
delete(a.Labels, "datasource_uid")
|
||||
}
|
||||
}
|
||||
|
||||
switch result.State {
|
||||
case eval.Normal:
|
||||
logger.Debug("Setting next state", "handler", "resultNormal")
|
||||
|
|
|
|||
|
|
@ -1091,48 +1091,6 @@ func TestPatch(t *testing.T) {
|
|||
assert.Equal(t, orig.LastEvaluationTime, state.LastEvaluationTime)
|
||||
assert.Equal(t, orig.EvaluationDuration, state.EvaluationDuration)
|
||||
})
|
||||
|
||||
t.Run("if result Error and current state is Error it should copy datasource_uid and ref_id labels", func(t *testing.T) {
|
||||
state := randomSate(key)
|
||||
orig := state.Copy()
|
||||
current := randomSate(key)
|
||||
current.State = eval.Error
|
||||
current.Labels["datasource_uid"] = util.GenerateShortUID()
|
||||
current.Labels["ref_id"] = util.GenerateShortUID()
|
||||
|
||||
result := eval.Result{
|
||||
Instance: ngmodels.GenerateAlertLabels(5, "result-"),
|
||||
State: eval.Error,
|
||||
}
|
||||
|
||||
expectedLabels := orig.Labels.Copy()
|
||||
expectedLabels["datasource_uid"] = current.Labels["datasource_uid"]
|
||||
expectedLabels["ref_id"] = current.Labels["ref_id"]
|
||||
|
||||
patch(&state, ¤t, result)
|
||||
|
||||
assert.Equal(t, expectedLabels, state.Labels)
|
||||
assert.Equal(t, current.State, state.State)
|
||||
assert.Equal(t, current.StateReason, state.StateReason)
|
||||
assert.Equal(t, current.Image, state.Image)
|
||||
assert.Equal(t, current.LatestResult, state.LatestResult)
|
||||
assert.Equal(t, current.Error, state.Error)
|
||||
assert.Equal(t, current.Values, state.Values)
|
||||
assert.Equal(t, current.StartsAt, state.StartsAt)
|
||||
assert.Equal(t, current.EndsAt, state.EndsAt)
|
||||
assert.Equal(t, current.ResolvedAt, state.ResolvedAt)
|
||||
assert.Equal(t, current.LastSentAt, state.LastSentAt)
|
||||
assert.Equal(t, current.LastEvaluationString, state.LastEvaluationString)
|
||||
|
||||
// Fields that should not change
|
||||
assert.Equal(t, orig.OrgID, state.OrgID)
|
||||
assert.Equal(t, orig.AlertRuleUID, state.AlertRuleUID)
|
||||
assert.Equal(t, orig.CacheID, state.CacheID)
|
||||
assert.Equal(t, orig.ResultFingerprint, state.ResultFingerprint)
|
||||
assert.Equal(t, orig.LastEvaluationTime, state.LastEvaluationTime)
|
||||
assert.Equal(t, orig.EvaluationDuration, state.EvaluationDuration)
|
||||
assert.EqualValues(t, orig.Annotations, state.Annotations)
|
||||
})
|
||||
}
|
||||
|
||||
func TestResultStateReason(t *testing.T) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue