Alerting: Add enablement flag for recording rules (#92032)

* Add enablement flag

* Disable if toggle not enabled
This commit is contained in:
Alexander Weaver 2024-08-19 12:01:00 -05:00 committed by GitHub
parent 9020eb4b17
commit ac5ebe6e4d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 65 additions and 45 deletions

View File

@ -1438,6 +1438,9 @@ max_age =
max_annotations_to_keep = max_annotations_to_keep =
[recording_rules] [recording_rules]
# Enable recording rules. You must provide write credentials below.
enabled = false
# Target URL (including write path) for recording rules. # Target URL (including write path) for recording rules.
url = url =

View File

@ -1432,6 +1432,9 @@ max_annotations_to_keep =
#################################### Recording Rules ##################### #################################### Recording Rules #####################
[recording_rules] [recording_rules]
# Enable recording rules. You must provide write credentials below.
enabled = false
# Target URL (including write path) for recording rules. # Target URL (including write path) for recording rules.
url = url =

View File

@ -346,6 +346,10 @@ func (ng *AlertNG) init() error {
evalFactory := eval.NewEvaluatorFactory(ng.Cfg.UnifiedAlerting, ng.DataSourceCache, ng.ExpressionService) evalFactory := eval.NewEvaluatorFactory(ng.Cfg.UnifiedAlerting, ng.DataSourceCache, ng.ExpressionService)
conditionValidator := eval.NewConditionValidator(ng.DataSourceCache, ng.ExpressionService, ng.pluginsStore) conditionValidator := eval.NewConditionValidator(ng.DataSourceCache, ng.ExpressionService, ng.pluginsStore)
if !ng.FeatureToggles.IsEnabled(initCtx, featuremgmt.FlagGrafanaManagedRecordingRules) {
// Force-disable the feature if the feature toggle is not on - sets us up for feature toggle removal.
ng.Cfg.UnifiedAlerting.RecordingRules.Enabled = false
}
recordingWriter, err := createRecordingWriter(ng.FeatureToggles, ng.Cfg.UnifiedAlerting.RecordingRules, ng.httpClientProvider, clk, ng.Metrics.GetRemoteWriterMetrics()) recordingWriter, err := createRecordingWriter(ng.FeatureToggles, ng.Cfg.UnifiedAlerting.RecordingRules, ng.httpClientProvider, clk, ng.Metrics.GetRemoteWriterMetrics())
if err != nil { if err != nil {
return fmt.Errorf("failed to initialize recording writer: %w", err) return fmt.Errorf("failed to initialize recording writer: %w", err)
@ -362,7 +366,7 @@ func (ng *AlertNG) init() error {
AppURL: appUrl, AppURL: appUrl,
EvaluatorFactory: evalFactory, EvaluatorFactory: evalFactory,
RuleStore: ng.store, RuleStore: ng.store,
FeatureToggles: ng.FeatureToggles, RecordingRulesCfg: ng.Cfg.UnifiedAlerting.RecordingRules,
Metrics: ng.Metrics.GetSchedulerMetrics(), Metrics: ng.Metrics.GetSchedulerMetrics(),
AlertSender: alertsRouter, AlertSender: alertsRouter,
Tracer: ng.tracer, Tracer: ng.tracer,
@ -667,7 +671,7 @@ func createRemoteAlertmanager(cfg remote.AlertmanagerConfig, kvstore kvstore.KVS
func createRecordingWriter(featureToggles featuremgmt.FeatureToggles, settings setting.RecordingRuleSettings, httpClientProvider httpclient.Provider, clock clock.Clock, m *metrics.RemoteWriter) (schedule.RecordingWriter, error) { func createRecordingWriter(featureToggles featuremgmt.FeatureToggles, settings setting.RecordingRuleSettings, httpClientProvider httpclient.Provider, clock clock.Clock, m *metrics.RemoteWriter) (schedule.RecordingWriter, error) {
logger := log.New("ngalert.writer") logger := log.New("ngalert.writer")
if featureToggles.IsEnabledGlobally(featuremgmt.FlagGrafanaManagedRecordingRules) { if settings.Enabled {
return writer.NewPrometheusWriter(settings, httpClientProvider, clock, logger, m) return writer.NewPrometheusWriter(settings, httpClientProvider, clock, logger, m)
} }

View File

@ -16,7 +16,6 @@ import (
"github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing" "github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/datasources" "github.com/grafana/grafana/pkg/services/datasources"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/eval" "github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics" "github.com/grafana/grafana/pkg/services/ngalert/metrics"
@ -24,6 +23,7 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/state" "github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/org" "github.com/grafana/grafana/pkg/services/org"
"github.com/grafana/grafana/pkg/services/user" "github.com/grafana/grafana/pkg/services/user"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util" "github.com/grafana/grafana/pkg/util"
) )
@ -57,7 +57,7 @@ func newRuleFactory(
evalFactory eval.EvaluatorFactory, evalFactory eval.EvaluatorFactory,
ruleProvider ruleProvider, ruleProvider ruleProvider,
clock clock.Clock, clock clock.Clock,
featureToggles featuremgmt.FeatureToggles, rrCfg setting.RecordingRuleSettings,
met *metrics.Scheduler, met *metrics.Scheduler,
logger log.Logger, logger log.Logger,
tracer tracing.Tracer, tracer tracing.Tracer,
@ -73,7 +73,7 @@ func newRuleFactory(
maxAttempts, maxAttempts,
clock, clock,
evalFactory, evalFactory,
featureToggles, rrCfg,
logger, logger,
met, met,
tracer, tracer,

View File

@ -827,7 +827,7 @@ func TestRuleRoutine(t *testing.T) {
} }
func ruleFactoryFromScheduler(sch *schedule) ruleFactory { func ruleFactoryFromScheduler(sch *schedule) ruleFactory {
return newRuleFactory(sch.appURL, sch.disableGrafanaFolder, sch.maxAttempts, sch.alertsSender, sch.stateManager, sch.evaluatorFactory, &sch.schedulableAlertRules, sch.clock, sch.featureToggles, sch.metrics, sch.log, sch.tracer, sch.recordingWriter, sch.evalAppliedFunc, sch.stopAppliedFunc) return newRuleFactory(sch.appURL, sch.disableGrafanaFolder, sch.maxAttempts, sch.alertsSender, sch.stateManager, sch.evaluatorFactory, &sch.schedulableAlertRules, sch.clock, sch.rrCfg, sch.metrics, sch.log, sch.tracer, sch.recordingWriter, sch.evalAppliedFunc, sch.stopAppliedFunc)
} }
func stateForRule(rule *models.AlertRule, ts time.Time, evalState eval.State) *state.State { func stateForRule(rule *models.AlertRule, ts time.Time, evalState eval.State) *state.State {

View File

@ -15,10 +15,10 @@ import (
"github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing" "github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/eval" "github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics" "github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util" "github.com/grafana/grafana/pkg/util"
) )
@ -42,10 +42,10 @@ type recordingRule struct {
maxAttempts int64 maxAttempts int64
clock clock.Clock clock clock.Clock
evalFactory eval.EvaluatorFactory evalFactory eval.EvaluatorFactory
featureToggles featuremgmt.FeatureToggles cfg setting.RecordingRuleSettings
writer RecordingWriter writer RecordingWriter
// Event hooks that are only used in tests. // Event hooks that are only used in tests.
evalAppliedHook evalAppliedFunc evalAppliedHook evalAppliedFunc
@ -56,7 +56,7 @@ type recordingRule struct {
tracer tracing.Tracer tracer tracing.Tracer
} }
func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKey, maxAttempts int64, clock clock.Clock, evalFactory eval.EvaluatorFactory, ft featuremgmt.FeatureToggles, logger log.Logger, metrics *metrics.Scheduler, tracer tracing.Tracer, writer RecordingWriter, evalAppliedHook evalAppliedFunc, stopAppliedHook stopAppliedFunc) *recordingRule { func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKey, maxAttempts int64, clock clock.Clock, evalFactory eval.EvaluatorFactory, cfg setting.RecordingRuleSettings, logger log.Logger, metrics *metrics.Scheduler, tracer tracing.Tracer, writer RecordingWriter, evalAppliedHook evalAppliedFunc, stopAppliedHook stopAppliedFunc) *recordingRule {
ctx, stop := util.WithCancelCause(ngmodels.WithRuleKey(parent, key)) ctx, stop := util.WithCancelCause(ngmodels.WithRuleKey(parent, key))
return &recordingRule{ return &recordingRule{
key: key, key: key,
@ -69,7 +69,7 @@ func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKey, maxAtte
evaluationDuration: atomic.NewDuration(0), evaluationDuration: atomic.NewDuration(0),
clock: clock, clock: clock,
evalFactory: evalFactory, evalFactory: evalFactory,
featureToggles: ft, cfg: cfg,
maxAttempts: maxAttempts, maxAttempts: maxAttempts,
evalAppliedHook: evalAppliedHook, evalAppliedHook: evalAppliedHook,
stopAppliedHook: stopAppliedHook, stopAppliedHook: stopAppliedHook,
@ -132,8 +132,8 @@ func (r *recordingRule) Run() error {
r.logger.Debug("Evaluation channel has been closed. Exiting") r.logger.Debug("Evaluation channel has been closed. Exiting")
return nil return nil
} }
if !r.featureToggles.IsEnabled(ctx, featuremgmt.FlagGrafanaManagedRecordingRules) { if !r.cfg.Enabled {
r.logger.Warn("Recording rule scheduled but toggle is not enabled. Skipping") r.logger.Warn("Recording rule scheduled but subsystem is not enabled. Skipping")
return nil return nil
} }
// TODO: Skipping the "evalRunning" guard that the alert rule routine does, because it seems to be dead code and impossible to hit. // TODO: Skipping the "evalRunning" guard that the alert rule routine does, because it seems to be dead code and impossible to hit.

View File

@ -19,10 +19,10 @@ import (
"github.com/grafana/grafana-plugin-sdk-go/backend/httpclient" "github.com/grafana/grafana-plugin-sdk-go/backend/httpclient"
"github.com/grafana/grafana/pkg/expr" "github.com/grafana/grafana/pkg/expr"
"github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/metrics" "github.com/grafana/grafana/pkg/services/ngalert/metrics"
models "github.com/grafana/grafana/pkg/services/ngalert/models" models "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/writer" "github.com/grafana/grafana/pkg/services/ngalert/writer"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util" "github.com/grafana/grafana/pkg/util"
) )
@ -153,8 +153,10 @@ func TestRecordingRule(t *testing.T) {
} }
func blankRecordingRuleForTests(ctx context.Context) *recordingRule { func blankRecordingRuleForTests(ctx context.Context) *recordingRule {
ft := featuremgmt.WithFeatures(featuremgmt.FlagGrafanaManagedRecordingRules) st := setting.RecordingRuleSettings{
return newRecordingRule(context.Background(), models.AlertRuleKey{}, 0, nil, nil, ft, log.NewNopLogger(), nil, nil, writer.FakeWriter{}, nil, nil) Enabled: true,
}
return newRecordingRule(context.Background(), models.AlertRuleKey{}, 0, nil, nil, st, log.NewNopLogger(), nil, nil, writer.FakeWriter{}, nil, nil)
} }
func TestRecordingRule_Integration(t *testing.T) { func TestRecordingRule_Integration(t *testing.T) {

View File

@ -15,12 +15,12 @@ import (
"github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing" "github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/eval" "github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics" "github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state" "github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util/ticker" "github.com/grafana/grafana/pkg/util/ticker"
) )
@ -84,7 +84,7 @@ type schedule struct {
appURL *url.URL appURL *url.URL
disableGrafanaFolder bool disableGrafanaFolder bool
jitterEvaluations JitterStrategy jitterEvaluations JitterStrategy
featureToggles featuremgmt.FeatureToggles rrCfg setting.RecordingRuleSettings
metrics *metrics.Scheduler metrics *metrics.Scheduler
// lastUpdatedMetricsForOrgsAndGroups contains AlertRuleGroupKeyWithFolderFullpaths that // lastUpdatedMetricsForOrgsAndGroups contains AlertRuleGroupKeyWithFolderFullpaths that
@ -113,7 +113,7 @@ type SchedulerCfg struct {
C clock.Clock C clock.Clock
MinRuleInterval time.Duration MinRuleInterval time.Duration
DisableGrafanaFolder bool DisableGrafanaFolder bool
FeatureToggles featuremgmt.FeatureToggles RecordingRulesCfg setting.RecordingRuleSettings
AppURL *url.URL AppURL *url.URL
JitterEvaluations JitterStrategy JitterEvaluations JitterStrategy
EvaluatorFactory eval.EvaluatorFactory EvaluatorFactory eval.EvaluatorFactory
@ -146,7 +146,7 @@ func NewScheduler(cfg SchedulerCfg, stateManager *state.Manager) *schedule {
appURL: cfg.AppURL, appURL: cfg.AppURL,
disableGrafanaFolder: cfg.DisableGrafanaFolder, disableGrafanaFolder: cfg.DisableGrafanaFolder,
jitterEvaluations: cfg.JitterEvaluations, jitterEvaluations: cfg.JitterEvaluations,
featureToggles: cfg.FeatureToggles, rrCfg: cfg.RecordingRulesCfg,
stateManager: stateManager, stateManager: stateManager,
minRuleInterval: cfg.MinRuleInterval, minRuleInterval: cfg.MinRuleInterval,
schedulableAlertRules: alertRulesRegistry{rules: make(map[ngmodels.AlertRuleKey]*ngmodels.AlertRule)}, schedulableAlertRules: alertRulesRegistry{rules: make(map[ngmodels.AlertRuleKey]*ngmodels.AlertRule)},
@ -266,7 +266,7 @@ func (sch *schedule) processTick(ctx context.Context, dispatcherGroup *errgroup.
sch.evaluatorFactory, sch.evaluatorFactory,
&sch.schedulableAlertRules, &sch.schedulableAlertRules,
sch.clock, sch.clock,
sch.featureToggles, sch.rrCfg,
sch.metrics, sch.metrics,
sch.log, sch.log,
sch.tracer, sch.tracer,

View File

@ -65,18 +65,21 @@ func TestProcessTicks(t *testing.T) {
cacheServ := &datasources.FakeCacheService{} cacheServ := &datasources.FakeCacheService{}
evaluator := eval.NewEvaluatorFactory(setting.UnifiedAlertingSettings{}, cacheServ, expr.ProvideService(&setting.Cfg{ExpressionsEnabled: true}, nil, nil, featuremgmt.WithFeatures(), nil, tracing.InitializeTracerForTest())) evaluator := eval.NewEvaluatorFactory(setting.UnifiedAlertingSettings{}, cacheServ, expr.ProvideService(&setting.Cfg{ExpressionsEnabled: true}, nil, nil, featuremgmt.WithFeatures(), nil, tracing.InitializeTracerForTest()))
rrSet := setting.RecordingRuleSettings{
Enabled: true,
}
schedCfg := SchedulerCfg{ schedCfg := SchedulerCfg{
BaseInterval: cfg.BaseInterval, BaseInterval: cfg.BaseInterval,
C: mockedClock, C: mockedClock,
AppURL: appUrl, AppURL: appUrl,
EvaluatorFactory: evaluator, EvaluatorFactory: evaluator,
RuleStore: ruleStore, RuleStore: ruleStore,
Metrics: testMetrics.GetSchedulerMetrics(), Metrics: testMetrics.GetSchedulerMetrics(),
AlertSender: notifier, AlertSender: notifier,
FeatureToggles: featuremgmt.WithFeatures(featuremgmt.FlagGrafanaManagedRecordingRules), RecordingRulesCfg: rrSet,
Tracer: testTracer, Tracer: testTracer,
Log: log.New("ngalert.scheduler"), Log: log.New("ngalert.scheduler"),
} }
managerCfg := state.ManagerCfg{ managerCfg := state.ManagerCfg{
Metrics: testMetrics.GetStateMetrics(), Metrics: testMetrics.GetStateMetrics(),
@ -854,23 +857,26 @@ func setupScheduler(t *testing.T, rs *fakeRulesStore, is *state.FakeInstanceStor
cfg := setting.UnifiedAlertingSettings{ cfg := setting.UnifiedAlertingSettings{
BaseInterval: time.Second, BaseInterval: time.Second,
MaxAttempts: 1, MaxAttempts: 1,
RecordingRules: setting.RecordingRuleSettings{
Enabled: true,
},
} }
fakeRecordingWriter := writer.FakeWriter{} fakeRecordingWriter := writer.FakeWriter{}
schedCfg := SchedulerCfg{ schedCfg := SchedulerCfg{
BaseInterval: cfg.BaseInterval, BaseInterval: cfg.BaseInterval,
MaxAttempts: cfg.MaxAttempts, MaxAttempts: cfg.MaxAttempts,
C: mockedClock, C: mockedClock,
AppURL: appUrl, AppURL: appUrl,
EvaluatorFactory: evaluator, EvaluatorFactory: evaluator,
RuleStore: rs, RuleStore: rs,
FeatureToggles: featuremgmt.WithFeatures(featuremgmt.FlagGrafanaManagedRecordingRules), RecordingRulesCfg: cfg.RecordingRules,
Metrics: m.GetSchedulerMetrics(), Metrics: m.GetSchedulerMetrics(),
AlertSender: senderMock, AlertSender: senderMock,
Tracer: testTracer, Tracer: testTracer,
Log: log.New("ngalert.scheduler"), Log: log.New("ngalert.scheduler"),
RecordingWriter: fakeRecordingWriter, RecordingWriter: fakeRecordingWriter,
} }
managerCfg := state.ManagerCfg{ managerCfg := state.ManagerCfg{
Metrics: m.GetStateMetrics(), Metrics: m.GetStateMetrics(),

View File

@ -124,6 +124,7 @@ type UnifiedAlertingSettings struct {
} }
type RecordingRuleSettings struct { type RecordingRuleSettings struct {
Enabled bool
URL string URL string
BasicAuthUsername string BasicAuthUsername string
BasicAuthPassword string BasicAuthPassword string
@ -421,6 +422,7 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
rr := iniFile.Section("recording_rules") rr := iniFile.Section("recording_rules")
uaCfgRecordingRules := RecordingRuleSettings{ uaCfgRecordingRules := RecordingRuleSettings{
Enabled: rr.Key("enabled").MustBool(false),
URL: rr.Key("url").MustString(""), URL: rr.Key("url").MustString(""),
BasicAuthUsername: rr.Key("basic_auth_username").MustString(""), BasicAuthUsername: rr.Key("basic_auth_username").MustString(""),
BasicAuthPassword: rr.Key("basic_auth_password").MustString(""), BasicAuthPassword: rr.Key("basic_auth_password").MustString(""),