grafana/pkg/services/ngalert/models/testing.go

383 lines
10 KiB
Go
Raw Normal View History

package models
import (
"encoding/json"
"fmt"
"math/rand"
"time"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/expr"
"github.com/grafana/grafana/pkg/services/folder"
"github.com/grafana/grafana/pkg/util"
)
type AlertRuleMutator func(*AlertRule)
// AlertRuleGen provides a factory function that generates a random AlertRule.
// The mutators arguments allows changing fields of the resulting structure
func AlertRuleGen(mutators ...AlertRuleMutator) func() *AlertRule {
return func() *AlertRule {
randNoDataState := func() NoDataState {
s := [...]NoDataState{
Alerting,
NoData,
OK,
}
return s[rand.Intn(len(s))]
}
randErrState := func() ExecutionErrorState {
s := [...]ExecutionErrorState{
AlertingErrState,
ErrorErrState,
OkErrState,
}
return s[rand.Intn(len(s))]
}
interval := (rand.Int63n(6) + 1) * 10
forInterval := time.Duration(interval*rand.Int63n(6)) * time.Second
var annotations map[string]string = nil
if rand.Int63()%2 == 0 {
annotations = GenerateAlertLabels(rand.Intn(5), "ann-")
}
var labels map[string]string = nil
if rand.Int63()%2 == 0 {
labels = GenerateAlertLabels(rand.Intn(5), "lbl-")
}
var dashUID *string = nil
var panelID *int64 = nil
if rand.Int63()%2 == 0 {
d := util.GenerateShortUID()
dashUID = &d
Alerting: Write and Delete multiple alert instances. (#55350) Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
2022-10-06 14:22:58 +08:00
p := rand.Int63n(1500)
panelID = &p
}
rule := &AlertRule{
Alerting: Write and Delete multiple alert instances. (#55350) Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
2022-10-06 14:22:58 +08:00
ID: rand.Int63n(1500),
OrgID: rand.Int63n(1500) + 1, // Prevent OrgID=0 as this does not pass alert rule validation.
Title: "TEST-ALERT-" + util.GenerateShortUID(),
Condition: "A",
Data: []AlertQuery{GenerateAlertQuery()},
Updated: time.Now().Add(-time.Duration(rand.Intn(100) + 1)),
IntervalSeconds: rand.Int63n(60) + 1,
Alerting: Write and Delete multiple alert instances. (#55350) Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
2022-10-06 14:22:58 +08:00
Version: rand.Int63n(1500), // Don't generate a rule ID too big for postgres
UID: util.GenerateShortUID(),
NamespaceUID: util.GenerateShortUID(),
DashboardUID: dashUID,
PanelID: panelID,
RuleGroup: "TEST-GROUP-" + util.GenerateShortUID(),
Alerting: Write and Delete multiple alert instances. (#55350) Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
2022-10-06 14:22:58 +08:00
RuleGroupIndex: rand.Intn(1500),
NoDataState: randNoDataState(),
ExecErrState: randErrState(),
For: forInterval,
Annotations: annotations,
Labels: labels,
}
for _, mutator := range mutators {
mutator(rule)
}
return rule
}
}
func WithNotEmptyLabels(count int, prefix string) AlertRuleMutator {
return func(rule *AlertRule) {
rule.Labels = GenerateAlertLabels(count, prefix)
}
}
func WithUniqueID() AlertRuleMutator {
usedID := make(map[int64]struct{})
return func(rule *AlertRule) {
for {
Alerting: Write and Delete multiple alert instances. (#55350) Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
2022-10-06 14:22:58 +08:00
id := rand.Int63n(1500)
if _, ok := usedID[id]; !ok {
usedID[id] = struct{}{}
rule.ID = id
return
}
}
}
}
func WithGroupIndex(groupIndex int) AlertRuleMutator {
return func(rule *AlertRule) {
rule.RuleGroupIndex = groupIndex
}
}
func WithUniqueGroupIndex() AlertRuleMutator {
usedIdx := make(map[int]struct{})
return func(rule *AlertRule) {
for {
idx := rand.Int()
if _, ok := usedIdx[idx]; !ok {
usedIdx[idx] = struct{}{}
rule.RuleGroupIndex = idx
return
}
}
}
}
func WithSequentialGroupIndex() AlertRuleMutator {
idx := 1
return func(rule *AlertRule) {
rule.RuleGroupIndex = idx
idx++
}
}
func WithOrgID(orgId int64) AlertRuleMutator {
return func(rule *AlertRule) {
rule.OrgID = orgId
}
}
func WithNamespace(namespace *folder.Folder) AlertRuleMutator {
return func(rule *AlertRule) {
rule.NamespaceUID = namespace.UID
}
}
func WithInterval(interval time.Duration) AlertRuleMutator {
return func(rule *AlertRule) {
rule.IntervalSeconds = int64(interval.Seconds())
}
}
func WithTitle(title string) AlertRuleMutator {
return func(rule *AlertRule) {
rule.Title = title
}
}
func WithFor(duration time.Duration) AlertRuleMutator {
return func(rule *AlertRule) {
rule.For = duration
}
}
func GenerateAlertLabels(count int, prefix string) data.Labels {
labels := make(data.Labels, count)
for i := 0; i < count; i++ {
labels[prefix+"key-"+util.GenerateShortUID()] = prefix + "value-" + util.GenerateShortUID()
}
return labels
}
func GenerateAlertQuery() AlertQuery {
f := rand.Intn(10) + 5
t := rand.Intn(f)
return AlertQuery{
DatasourceUID: util.GenerateShortUID(),
Model: json.RawMessage(fmt.Sprintf(`{
"%s": "%s",
"%s":"%d"
}`, util.GenerateShortUID(), util.GenerateShortUID(), util.GenerateShortUID(), rand.Int())),
RelativeTimeRange: RelativeTimeRange{
From: Duration(time.Duration(f) * time.Minute),
To: Duration(time.Duration(t) * time.Minute),
},
RefID: util.GenerateShortUID(),
QueryType: util.GenerateShortUID(),
}
}
// GenerateUniqueAlertRules generates many random alert rules and makes sure that they have unique UID.
// It returns a tuple where first element is a map where keys are UID of alert rule and the second element is a slice of the same rules
func GenerateUniqueAlertRules(count int, f func() *AlertRule) (map[string]*AlertRule, []*AlertRule) {
uIDs := make(map[string]*AlertRule, count)
result := make([]*AlertRule, 0, count)
for len(result) < count {
rule := f()
if _, ok := uIDs[rule.UID]; ok {
continue
}
result = append(result, rule)
uIDs[rule.UID] = rule
}
return uIDs, result
}
// GenerateAlertRulesSmallNonEmpty generates 1 to 5 rules using the provided generator
func GenerateAlertRulesSmallNonEmpty(f func() *AlertRule) []*AlertRule {
return GenerateAlertRules(rand.Intn(4)+1, f)
}
// GenerateAlertRules generates many random alert rules. Does not guarantee that rules are unique (by UID)
func GenerateAlertRules(count int, f func() *AlertRule) []*AlertRule {
result := make([]*AlertRule, 0, count)
for len(result) < count {
rule := f()
result = append(result, rule)
}
return result
}
// GenerateRuleKey generates a random alert rule key
func GenerateRuleKey(orgID int64) AlertRuleKey {
return AlertRuleKey{
OrgID: orgID,
UID: util.GenerateShortUID(),
}
}
// GenerateGroupKey generates a random group key
func GenerateGroupKey(orgID int64) AlertRuleGroupKey {
return AlertRuleGroupKey{
OrgID: orgID,
NamespaceUID: util.GenerateShortUID(),
RuleGroup: util.GenerateShortUID(),
}
}
// CopyRule creates a deep copy of AlertRule
func CopyRule(r *AlertRule) *AlertRule {
result := AlertRule{
ID: r.ID,
OrgID: r.OrgID,
Title: r.Title,
Condition: r.Condition,
Updated: r.Updated,
IntervalSeconds: r.IntervalSeconds,
Version: r.Version,
UID: r.UID,
NamespaceUID: r.NamespaceUID,
RuleGroup: r.RuleGroup,
RuleGroupIndex: r.RuleGroupIndex,
NoDataState: r.NoDataState,
ExecErrState: r.ExecErrState,
For: r.For,
}
if r.DashboardUID != nil {
dash := *r.DashboardUID
result.DashboardUID = &dash
}
if r.PanelID != nil {
p := *r.PanelID
result.PanelID = &p
}
for _, d := range r.Data {
q := AlertQuery{
RefID: d.RefID,
QueryType: d.QueryType,
RelativeTimeRange: d.RelativeTimeRange,
DatasourceUID: d.DatasourceUID,
}
q.Model = make([]byte, 0, cap(d.Model))
q.Model = append(q.Model, d.Model...)
result.Data = append(result.Data, q)
}
if r.Annotations != nil {
result.Annotations = make(map[string]string, len(r.Annotations))
for s, s2 := range r.Annotations {
result.Annotations[s] = s2
}
}
if r.Labels != nil {
result.Labels = make(map[string]string, len(r.Labels))
for s, s2 := range r.Labels {
result.Labels[s] = s2
}
}
return &result
}
func CreateClassicConditionExpression(refID string, inputRefID string, reducer string, operation string, threshold int) AlertQuery {
return AlertQuery{
RefID: refID,
QueryType: expr.DatasourceType,
DatasourceUID: expr.DatasourceUID,
// the format corresponds to model `ClassicConditionJSON` in /pkg/expr/classic/classic.go
Model: json.RawMessage(fmt.Sprintf(`
{
"refId": "%[1]s",
"hide": false,
"type": "classic_conditions",
"datasource": {
"uid": "%[6]s",
"type": "%[7]s"
},
"conditions": [
{
"type": "query",
"evaluator": {
"params": [
%[4]d
],
"type": "%[3]s"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"%[2]s"
]
},
"reducer": {
"params": [],
"type": "%[5]s"
}
}
]
}`, refID, inputRefID, operation, threshold, reducer, expr.DatasourceUID, expr.DatasourceType)),
}
}
Alerting: Add alert pausing feature (#60734) * Add field in alert_rule model, add state to alert_instance model, and state to eval * Remove paused state from eval package * Skip paused alert rules in scheduler * Add migration to add is_paused field to alert_rule table * Convert to postable alerts only if not normal, pernding, or paused * Handle paused eval results in state manager * Add Paused state to eval package * Add paused alerts logic in scheduler * Skip alert on scheduler * Remove paused status from eval package * Apply suggestions from code review Co-authored-by: George Robinson <george.robinson@grafana.com> * Remove state * Rethink schedule and manager for paused alerts * Change return to continue * Remove unused var * Rethink alert pausing * Paused alerts storing annotations * Only add one state transition * Revert boolean method renaming refactor * Revert take image refactor * Make registry errors public * Revert method extraction for getting a folder title * Revert variable renaming refactor * Undo unnecessary changes * Revert changes in test * Remove IsPause check in PatchPartiLAlertRule function * Use SetNormal to set state * Fix text by returning to old behaviour on alert rule deletion * Add test in schedule_unit_test.go to test ticks with paused alerts * Add coment to clarify usage of context.Background() * Add comment to clarify resetStateByRuleUID method usage * Move rule get to a more limited scope * Update pkg/services/ngalert/schedule/schedule.go Co-authored-by: George Robinson <george.robinson@grafana.com> * rum gofmt on pkg/services/ngalert/schedule/schedule.go * Remove defer cancel for context * Update pkg/services/ngalert/models/instance_test.go Co-authored-by: Santiago <santiagohernandez.1997@gmail.com> * Update pkg/services/ngalert/models/testing.go Co-authored-by: Santiago <santiagohernandez.1997@gmail.com> * Update pkg/services/ngalert/schedule/schedule_unit_test.go Co-authored-by: Santiago <santiagohernandez.1997@gmail.com> * Update pkg/services/ngalert/schedule/schedule_unit_test.go Co-authored-by: Santiago <santiagohernandez.1997@gmail.com> * Update pkg/services/ngalert/models/instance_test.go Co-authored-by: Santiago <santiagohernandez.1997@gmail.com> * skip scheduler rule state clean up on paused alert rule * Update pkg/services/ngalert/schedule/schedule.go Co-authored-by: Santiago <santiagohernandez.1997@gmail.com> * Fix mock in test * Add (hopefully) final suggestions * Use error channel from recordAnnotationsSync to cancel context * Run make gen-cue * Place pause alert check in channel update after version check * Reduce branching un update channel select * Add if for error and move code inside if in state manager ResetStateByRuleUID * Add reason to logs * Update pkg/services/ngalert/schedule/schedule.go Co-authored-by: George Robinson <george.robinson@grafana.com> * Do not delete alert rule routine, just exit on eval if is paused * Reduce branching and create-close a channel to avoid deadlocks * Separate state deletion and state reset (includes history saving) * Add current pause state in rule route in scheduler * Split clearState and bring errCh closer to RecordStatesAsync call * Change rule to ruleMeta in RecordStatesAsync * copy state to be able to modify it * Add timeout to context creation * Shorten the timeout * Use resetState is rule is paused and deleteState if rule is not paused * Remove Empty state reason * Save every rule change in historian * Add tests for DeleteStateByRuleUID and ResetStateByRuleUID * Remove useless line * Remove outdated comment Co-authored-by: George Robinson <george.robinson@grafana.com> Co-authored-by: Santiago <santiagohernandez.1997@gmail.com> Co-authored-by: Armand Grillet <2117580+armandgrillet@users.noreply.github.com>
2023-01-27 01:29:10 +08:00
type AlertInstanceMutator func(*AlertInstance)
// AlertInstanceGen provides a factory function that generates a random AlertInstance.
// The mutators arguments allows changing fields of the resulting structure.
func AlertInstanceGen(mutators ...AlertInstanceMutator) *AlertInstance {
var labels map[string]string = nil
if rand.Int63()%2 == 0 {
labels = GenerateAlertLabels(rand.Intn(5), "lbl-")
}
randState := func() InstanceStateType {
s := [...]InstanceStateType{
InstanceStateFiring,
InstanceStateNormal,
InstanceStatePending,
InstanceStateNoData,
InstanceStateError,
}
return s[rand.Intn(len(s))]
}
currentStateSince := time.Now().Add(-time.Duration(rand.Intn(100) + 1))
instance := &AlertInstance{
AlertInstanceKey: AlertInstanceKey{
RuleOrgID: rand.Int63n(1500),
RuleUID: util.GenerateShortUID(),
LabelsHash: util.GenerateShortUID(),
},
Labels: labels,
CurrentState: randState(),
CurrentReason: "TEST-REASON-" + util.GenerateShortUID(),
CurrentStateSince: currentStateSince,
CurrentStateEnd: currentStateSince.Add(time.Duration(rand.Intn(100) + 200)),
LastEvalTime: time.Now().Add(-time.Duration(rand.Intn(100) + 50)),
}
for _, mutator := range mutators {
mutator(instance)
}
return instance
}