mirror of https://github.com/grafana/grafana.git
Alerting: Metric to count imported from Prometheus rules (#100847)
This commit is contained in:
parent
2638de6aeb
commit
d44728f4e5
|
|
@ -457,9 +457,9 @@ func grafanaRuleGroupToPrometheus(group string, rules []models.AlertRule) (apimo
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, rule := range rules {
|
for i, rule := range rules {
|
||||||
promDefinition := rule.PrometheusRuleDefinition()
|
promDefinition, err := rule.PrometheusRuleDefinition()
|
||||||
if promDefinition == "" {
|
if err != nil {
|
||||||
return apimodels.PrometheusRuleGroup{}, fmt.Errorf("failed to get the Prometheus definition of the rule with UID %s", rule.UID)
|
return apimodels.PrometheusRuleGroup{}, fmt.Errorf("failed to get the Prometheus definition of the rule with UID %s: %w", rule.UID, err)
|
||||||
}
|
}
|
||||||
var r apimodels.PrometheusRule
|
var r apimodels.PrometheusRule
|
||||||
if err := yaml.Unmarshal([]byte(promDefinition), &r); err != nil {
|
if err := yaml.Unmarshal([]byte(promDefinition), &r); err != nil {
|
||||||
|
|
|
||||||
|
|
@ -115,7 +115,10 @@ func TestRouteConvertPrometheusPostRuleGroup(t *testing.T) {
|
||||||
require.Equal(t, fmt.Sprintf("[%s] %s", simpleGroup.Name, simpleGroup.Rules[0].Alert), remaining[0].Title)
|
require.Equal(t, fmt.Sprintf("[%s] %s", simpleGroup.Name, simpleGroup.Rules[0].Alert), remaining[0].Title)
|
||||||
promRuleYAML, err := yaml.Marshal(simpleGroup.Rules[0])
|
promRuleYAML, err := yaml.Marshal(simpleGroup.Rules[0])
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, string(promRuleYAML), remaining[0].PrometheusRuleDefinition())
|
|
||||||
|
promDefinition, err := remaining[0].PrometheusRuleDefinition()
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, string(promRuleYAML), promDefinition)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("should fail to replace a provisioned rule group", func(t *testing.T) {
|
t.Run("should fail to replace a provisioned rule group", func(t *testing.T) {
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ type Scheduler struct {
|
||||||
Ticker *ticker.Metrics
|
Ticker *ticker.Metrics
|
||||||
EvaluationMissed *prometheus.CounterVec
|
EvaluationMissed *prometheus.CounterVec
|
||||||
SimplifiedEditorRules *prometheus.GaugeVec
|
SimplifiedEditorRules *prometheus.GaugeVec
|
||||||
|
PrometheusImportedRules *prometheus.GaugeVec
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewSchedulerMetrics(r prometheus.Registerer) *Scheduler {
|
func NewSchedulerMetrics(r prometheus.Registerer) *Scheduler {
|
||||||
|
|
@ -192,5 +193,14 @@ func NewSchedulerMetrics(r prometheus.Registerer) *Scheduler {
|
||||||
},
|
},
|
||||||
[]string{"org", "setting"},
|
[]string{"org", "setting"},
|
||||||
),
|
),
|
||||||
|
PrometheusImportedRules: promauto.With(r).NewGaugeVec(
|
||||||
|
prometheus.GaugeOpts{
|
||||||
|
Namespace: Namespace,
|
||||||
|
Subsystem: Subsystem,
|
||||||
|
Name: "prometheus_imported_rules",
|
||||||
|
Help: "The number of rules imported from a Prometheus-compatible source.",
|
||||||
|
},
|
||||||
|
[]string{"org"},
|
||||||
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -397,19 +397,18 @@ func WithoutInternalLabels() LabelOption {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (alertRule *AlertRule) ImportedFromPrometheus() bool {
|
func (alertRule *AlertRule) ImportedFromPrometheus() bool {
|
||||||
if alertRule.Metadata.PrometheusStyleRule == nil {
|
_, err := alertRule.PrometheusRuleDefinition()
|
||||||
return false
|
return err == nil
|
||||||
}
|
|
||||||
|
|
||||||
return alertRule.Metadata.PrometheusStyleRule.OriginalRuleDefinition != ""
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (alertRule *AlertRule) PrometheusRuleDefinition() string {
|
func (alertRule *AlertRule) PrometheusRuleDefinition() (string, error) {
|
||||||
if !alertRule.ImportedFromPrometheus() {
|
if alertRule.Metadata.PrometheusStyleRule != nil {
|
||||||
return ""
|
if alertRule.Metadata.PrometheusStyleRule.OriginalRuleDefinition != "" {
|
||||||
|
return alertRule.Metadata.PrometheusStyleRule.OriginalRuleDefinition, nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return alertRule.Metadata.PrometheusStyleRule.OriginalRuleDefinition
|
return "", fmt.Errorf("prometheus rule definition is missing")
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetLabels returns the labels specified as part of the alert rule.
|
// GetLabels returns the labels specified as part of the alert rule.
|
||||||
|
|
|
||||||
|
|
@ -1017,3 +1017,70 @@ func TestGeneratorFillsAllFields(t *testing.T) {
|
||||||
|
|
||||||
require.FailNow(t, "AlertRule generator does not populate fields", "skipped fields: %v", maps.Keys(fields))
|
require.FailNow(t, "AlertRule generator does not populate fields", "skipped fields: %v", maps.Keys(fields))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAlertRule_PrometheusRuleDefinition(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
rule AlertRule
|
||||||
|
expectedResult string
|
||||||
|
expectedErrorMsg string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "rule with prometheus definition",
|
||||||
|
rule: AlertRule{
|
||||||
|
Metadata: AlertRuleMetadata{
|
||||||
|
PrometheusStyleRule: &PrometheusStyleRule{
|
||||||
|
OriginalRuleDefinition: "groups:\n- name: example\n rules:\n - alert: HighRequestLatency\n expr: request_latency_seconds{job=\"myjob\"} > 0.5\n for: 10m\n labels:\n severity: page\n annotations:\n summary: High request latency",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedResult: "groups:\n- name: example\n rules:\n - alert: HighRequestLatency\n expr: request_latency_seconds{job=\"myjob\"} > 0.5\n for: 10m\n labels:\n severity: page\n annotations:\n summary: High request latency",
|
||||||
|
expectedErrorMsg: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "rule with empty prometheus definition",
|
||||||
|
rule: AlertRule{
|
||||||
|
Metadata: AlertRuleMetadata{
|
||||||
|
PrometheusStyleRule: &PrometheusStyleRule{
|
||||||
|
OriginalRuleDefinition: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedResult: "",
|
||||||
|
expectedErrorMsg: "prometheus rule definition is missing",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "rule with nil prometheus style rule",
|
||||||
|
rule: AlertRule{
|
||||||
|
Metadata: AlertRuleMetadata{
|
||||||
|
PrometheusStyleRule: nil,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedResult: "",
|
||||||
|
expectedErrorMsg: "prometheus rule definition is missing",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "rule with empty metadata",
|
||||||
|
rule: AlertRule{},
|
||||||
|
expectedResult: "",
|
||||||
|
expectedErrorMsg: "prometheus rule definition is missing",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result, err := tt.rule.PrometheusRuleDefinition()
|
||||||
|
isPrometheusRule := tt.rule.ImportedFromPrometheus()
|
||||||
|
|
||||||
|
if tt.expectedErrorMsg != "" {
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Equal(t, tt.expectedErrorMsg, err.Error())
|
||||||
|
require.False(t, isPrometheusRule)
|
||||||
|
} else {
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, tt.expectedResult, result)
|
||||||
|
require.True(t, isPrometheusRule)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,8 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) {
|
||||||
orgsNfSettings := make(map[int64]int64)
|
orgsNfSettings := make(map[int64]int64)
|
||||||
// gauge for groups per org
|
// gauge for groups per org
|
||||||
groupsPerOrg := make(map[int64]map[string]struct{})
|
groupsPerOrg := make(map[int64]map[string]struct{})
|
||||||
|
// gauge for rules imported from Prometheus per org
|
||||||
|
orgsRulesPrometheusImported := make(map[int64]int64)
|
||||||
|
|
||||||
simplifiedEditorSettingsPerOrg := make(map[int64]map[string]int64) // orgID -> setting -> count
|
simplifiedEditorSettingsPerOrg := make(map[int64]map[string]int64) // orgID -> setting -> count
|
||||||
|
|
||||||
|
|
@ -86,6 +88,10 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if rule.ImportedFromPrometheus() {
|
||||||
|
orgsRulesPrometheusImported[rule.OrgID]++
|
||||||
|
}
|
||||||
|
|
||||||
// Count groups per org
|
// Count groups per org
|
||||||
orgGroups, ok := groupsPerOrg[rule.OrgID]
|
orgGroups, ok := groupsPerOrg[rule.OrgID]
|
||||||
if !ok {
|
if !ok {
|
||||||
|
|
@ -100,6 +106,7 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) {
|
||||||
sch.metrics.SimpleNotificationRules.Reset()
|
sch.metrics.SimpleNotificationRules.Reset()
|
||||||
sch.metrics.Groups.Reset()
|
sch.metrics.Groups.Reset()
|
||||||
sch.metrics.SimplifiedEditorRules.Reset()
|
sch.metrics.SimplifiedEditorRules.Reset()
|
||||||
|
sch.metrics.PrometheusImportedRules.Reset()
|
||||||
|
|
||||||
// Set metrics
|
// Set metrics
|
||||||
for key, count := range buckets {
|
for key, count := range buckets {
|
||||||
|
|
@ -111,6 +118,9 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) {
|
||||||
for orgID, groups := range groupsPerOrg {
|
for orgID, groups := range groupsPerOrg {
|
||||||
sch.metrics.Groups.WithLabelValues(fmt.Sprint(orgID)).Set(float64(len(groups)))
|
sch.metrics.Groups.WithLabelValues(fmt.Sprint(orgID)).Set(float64(len(groups)))
|
||||||
}
|
}
|
||||||
|
for orgID, count := range orgsRulesPrometheusImported {
|
||||||
|
sch.metrics.PrometheusImportedRules.WithLabelValues(fmt.Sprint(orgID)).Set(float64(count))
|
||||||
|
}
|
||||||
for orgID, settings := range simplifiedEditorSettingsPerOrg {
|
for orgID, settings := range simplifiedEditorSettingsPerOrg {
|
||||||
for setting, count := range settings {
|
for setting, count := range settings {
|
||||||
sch.metrics.SimplifiedEditorRules.WithLabelValues(fmt.Sprint(orgID), setting).Set(float64(count))
|
sch.metrics.SimplifiedEditorRules.WithLabelValues(fmt.Sprint(orgID), setting).Set(float64(count))
|
||||||
|
|
|
||||||
|
|
@ -562,6 +562,7 @@ func TestSchedule_updateRulesMetrics(t *testing.T) {
|
||||||
sch := setupScheduler(t, ruleStore, nil, reg, nil, nil, nil)
|
sch := setupScheduler(t, ruleStore, nil, reg, nil, nil, nil)
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
const firstOrgID int64 = 1
|
const firstOrgID int64 = 1
|
||||||
|
const secondOrgID int64 = 2
|
||||||
|
|
||||||
t.Run("grafana_alerting_rule_group_rules metric should reflect the current state", func(t *testing.T) {
|
t.Run("grafana_alerting_rule_group_rules metric should reflect the current state", func(t *testing.T) {
|
||||||
// Without any rules there are no metrics
|
// Without any rules there are no metrics
|
||||||
|
|
@ -640,6 +641,62 @@ func TestSchedule_updateRulesMetrics(t *testing.T) {
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("prometheus_imported_rules metric should reflect the current state", func(t *testing.T) {
|
||||||
|
// Without any imported rules there are no metrics
|
||||||
|
t.Run("it should not show metrics", func(t *testing.T) {
|
||||||
|
sch.updateRulesMetrics([]*models.AlertRule{})
|
||||||
|
|
||||||
|
expectedMetric := ""
|
||||||
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_prometheus_imported_rules")
|
||||||
|
require.NoError(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
alertRule1 := models.RuleGen.With(
|
||||||
|
models.RuleGen.WithOrgID(firstOrgID),
|
||||||
|
models.RuleGen.WithPrometheusOriginalRuleDefinition("1"),
|
||||||
|
).GenerateRef()
|
||||||
|
|
||||||
|
t.Run("it should show one imported rule in a single org", func(t *testing.T) {
|
||||||
|
sch.updateRulesMetrics([]*models.AlertRule{alertRule1})
|
||||||
|
|
||||||
|
expectedMetric := fmt.Sprintf(
|
||||||
|
`# HELP grafana_alerting_prometheus_imported_rules The number of rules imported from a Prometheus-compatible source.
|
||||||
|
# TYPE grafana_alerting_prometheus_imported_rules gauge
|
||||||
|
grafana_alerting_prometheus_imported_rules{org="%[1]d"} 1
|
||||||
|
`, alertRule1.OrgID)
|
||||||
|
|
||||||
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_prometheus_imported_rules")
|
||||||
|
require.NoError(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
alertRule2 := models.RuleGen.With(
|
||||||
|
models.RuleGen.WithOrgID(secondOrgID),
|
||||||
|
models.RuleGen.WithPrometheusOriginalRuleDefinition("1"),
|
||||||
|
).GenerateRef()
|
||||||
|
|
||||||
|
t.Run("it should show two imported rules in two orgs", func(t *testing.T) {
|
||||||
|
sch.updateRulesMetrics([]*models.AlertRule{alertRule1, alertRule2})
|
||||||
|
|
||||||
|
expectedMetric := fmt.Sprintf(
|
||||||
|
`# HELP grafana_alerting_prometheus_imported_rules The number of rules imported from a Prometheus-compatible source.
|
||||||
|
# TYPE grafana_alerting_prometheus_imported_rules gauge
|
||||||
|
grafana_alerting_prometheus_imported_rules{org="%[1]d"} 1
|
||||||
|
grafana_alerting_prometheus_imported_rules{org="%[2]d"} 1
|
||||||
|
`, alertRule1.OrgID, alertRule2.OrgID)
|
||||||
|
|
||||||
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_prometheus_imported_rules")
|
||||||
|
require.NoError(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("after removing all rules it should not show any metrics", func(t *testing.T) {
|
||||||
|
sch.updateRulesMetrics([]*models.AlertRule{})
|
||||||
|
|
||||||
|
expectedMetric := ""
|
||||||
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_prometheus_imported_rules")
|
||||||
|
require.NoError(t, err)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
t.Run("rule_groups metric should reflect the current state", func(t *testing.T) {
|
t.Run("rule_groups metric should reflect the current state", func(t *testing.T) {
|
||||||
const firstOrgID int64 = 1
|
const firstOrgID int64 = 1
|
||||||
const secondOrgID int64 = 2
|
const secondOrgID int64 = 2
|
||||||
|
|
|
||||||
|
|
@ -607,10 +607,7 @@ func (st DBstore) ListAlertRules(ctx context.Context, query *ngmodels.ListAlertR
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if query.ImportedPrometheusRule != nil { // remove false-positive hits from the result
|
if query.ImportedPrometheusRule != nil { // remove false-positive hits from the result
|
||||||
hasOriginalRuleDefinition := converted.Metadata.PrometheusStyleRule != nil && len(converted.Metadata.PrometheusStyleRule.OriginalRuleDefinition) > 0
|
if *query.ImportedPrometheusRule != converted.ImportedFromPrometheus() {
|
||||||
if *query.ImportedPrometheusRule && !hasOriginalRuleDefinition {
|
|
||||||
continue
|
|
||||||
} else if !*query.ImportedPrometheusRule && hasOriginalRuleDefinition {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -215,11 +215,7 @@ func (f *RuleStore) ListAlertRules(_ context.Context, q *models.ListAlertRulesQu
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if q.ImportedPrometheusRule != nil {
|
if q.ImportedPrometheusRule != nil {
|
||||||
hasOriginalRuleDefinition := r.PrometheusRuleDefinition() != ""
|
if *q.ImportedPrometheusRule != r.ImportedFromPrometheus() {
|
||||||
if *q.ImportedPrometheusRule && !hasOriginalRuleDefinition {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if !*q.ImportedPrometheusRule && hasOriginalRuleDefinition {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue