Alerting: Add enablement flag for recording rules (#92032)

* Add enablement flag

* Disable if toggle not enabled
This commit is contained in:
Alexander Weaver 2024-08-19 12:01:00 -05:00 committed by GitHub
parent 9020eb4b17
commit ac5ebe6e4d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 65 additions and 45 deletions

View File

@ -1438,6 +1438,9 @@ max_age =
max_annotations_to_keep =
[recording_rules]
# Enable recording rules. You must provide write credentials below.
enabled = false
# Target URL (including write path) for recording rules.
url =

View File

@ -1432,6 +1432,9 @@ max_annotations_to_keep =
#################################### Recording Rules #####################
[recording_rules]
# Enable recording rules. You must provide write credentials below.
enabled = false
# Target URL (including write path) for recording rules.
url =

View File

@ -346,6 +346,10 @@ func (ng *AlertNG) init() error {
evalFactory := eval.NewEvaluatorFactory(ng.Cfg.UnifiedAlerting, ng.DataSourceCache, ng.ExpressionService)
conditionValidator := eval.NewConditionValidator(ng.DataSourceCache, ng.ExpressionService, ng.pluginsStore)
if !ng.FeatureToggles.IsEnabled(initCtx, featuremgmt.FlagGrafanaManagedRecordingRules) {
// Force-disable the feature if the feature toggle is not on - sets us up for feature toggle removal.
ng.Cfg.UnifiedAlerting.RecordingRules.Enabled = false
}
recordingWriter, err := createRecordingWriter(ng.FeatureToggles, ng.Cfg.UnifiedAlerting.RecordingRules, ng.httpClientProvider, clk, ng.Metrics.GetRemoteWriterMetrics())
if err != nil {
return fmt.Errorf("failed to initialize recording writer: %w", err)
@ -362,7 +366,7 @@ func (ng *AlertNG) init() error {
AppURL: appUrl,
EvaluatorFactory: evalFactory,
RuleStore: ng.store,
FeatureToggles: ng.FeatureToggles,
RecordingRulesCfg: ng.Cfg.UnifiedAlerting.RecordingRules,
Metrics: ng.Metrics.GetSchedulerMetrics(),
AlertSender: alertsRouter,
Tracer: ng.tracer,
@ -667,7 +671,7 @@ func createRemoteAlertmanager(cfg remote.AlertmanagerConfig, kvstore kvstore.KVS
func createRecordingWriter(featureToggles featuremgmt.FeatureToggles, settings setting.RecordingRuleSettings, httpClientProvider httpclient.Provider, clock clock.Clock, m *metrics.RemoteWriter) (schedule.RecordingWriter, error) {
logger := log.New("ngalert.writer")
if featureToggles.IsEnabledGlobally(featuremgmt.FlagGrafanaManagedRecordingRules) {
if settings.Enabled {
return writer.NewPrometheusWriter(settings, httpClientProvider, clock, logger, m)
}

View File

@ -16,7 +16,6 @@ import (
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/datasources"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
@ -24,6 +23,7 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/org"
"github.com/grafana/grafana/pkg/services/user"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util"
)
@ -57,7 +57,7 @@ func newRuleFactory(
evalFactory eval.EvaluatorFactory,
ruleProvider ruleProvider,
clock clock.Clock,
featureToggles featuremgmt.FeatureToggles,
rrCfg setting.RecordingRuleSettings,
met *metrics.Scheduler,
logger log.Logger,
tracer tracing.Tracer,
@ -73,7 +73,7 @@ func newRuleFactory(
maxAttempts,
clock,
evalFactory,
featureToggles,
rrCfg,
logger,
met,
tracer,

View File

@ -827,7 +827,7 @@ func TestRuleRoutine(t *testing.T) {
}
func ruleFactoryFromScheduler(sch *schedule) ruleFactory {
return newRuleFactory(sch.appURL, sch.disableGrafanaFolder, sch.maxAttempts, sch.alertsSender, sch.stateManager, sch.evaluatorFactory, &sch.schedulableAlertRules, sch.clock, sch.featureToggles, sch.metrics, sch.log, sch.tracer, sch.recordingWriter, sch.evalAppliedFunc, sch.stopAppliedFunc)
return newRuleFactory(sch.appURL, sch.disableGrafanaFolder, sch.maxAttempts, sch.alertsSender, sch.stateManager, sch.evaluatorFactory, &sch.schedulableAlertRules, sch.clock, sch.rrCfg, sch.metrics, sch.log, sch.tracer, sch.recordingWriter, sch.evalAppliedFunc, sch.stopAppliedFunc)
}
func stateForRule(rule *models.AlertRule, ts time.Time, evalState eval.State) *state.State {

View File

@ -15,10 +15,10 @@ import (
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util"
)
@ -42,10 +42,10 @@ type recordingRule struct {
maxAttempts int64
clock clock.Clock
evalFactory eval.EvaluatorFactory
featureToggles featuremgmt.FeatureToggles
writer RecordingWriter
clock clock.Clock
evalFactory eval.EvaluatorFactory
cfg setting.RecordingRuleSettings
writer RecordingWriter
// Event hooks that are only used in tests.
evalAppliedHook evalAppliedFunc
@ -56,7 +56,7 @@ type recordingRule struct {
tracer tracing.Tracer
}
func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKey, maxAttempts int64, clock clock.Clock, evalFactory eval.EvaluatorFactory, ft featuremgmt.FeatureToggles, logger log.Logger, metrics *metrics.Scheduler, tracer tracing.Tracer, writer RecordingWriter, evalAppliedHook evalAppliedFunc, stopAppliedHook stopAppliedFunc) *recordingRule {
func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKey, maxAttempts int64, clock clock.Clock, evalFactory eval.EvaluatorFactory, cfg setting.RecordingRuleSettings, logger log.Logger, metrics *metrics.Scheduler, tracer tracing.Tracer, writer RecordingWriter, evalAppliedHook evalAppliedFunc, stopAppliedHook stopAppliedFunc) *recordingRule {
ctx, stop := util.WithCancelCause(ngmodels.WithRuleKey(parent, key))
return &recordingRule{
key: key,
@ -69,7 +69,7 @@ func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKey, maxAtte
evaluationDuration: atomic.NewDuration(0),
clock: clock,
evalFactory: evalFactory,
featureToggles: ft,
cfg: cfg,
maxAttempts: maxAttempts,
evalAppliedHook: evalAppliedHook,
stopAppliedHook: stopAppliedHook,
@ -132,8 +132,8 @@ func (r *recordingRule) Run() error {
r.logger.Debug("Evaluation channel has been closed. Exiting")
return nil
}
if !r.featureToggles.IsEnabled(ctx, featuremgmt.FlagGrafanaManagedRecordingRules) {
r.logger.Warn("Recording rule scheduled but toggle is not enabled. Skipping")
if !r.cfg.Enabled {
r.logger.Warn("Recording rule scheduled but subsystem is not enabled. Skipping")
return nil
}
// TODO: Skipping the "evalRunning" guard that the alert rule routine does, because it seems to be dead code and impossible to hit.

View File

@ -19,10 +19,10 @@ import (
"github.com/grafana/grafana-plugin-sdk-go/backend/httpclient"
"github.com/grafana/grafana/pkg/expr"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
models "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/writer"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util"
)
@ -153,8 +153,10 @@ func TestRecordingRule(t *testing.T) {
}
func blankRecordingRuleForTests(ctx context.Context) *recordingRule {
ft := featuremgmt.WithFeatures(featuremgmt.FlagGrafanaManagedRecordingRules)
return newRecordingRule(context.Background(), models.AlertRuleKey{}, 0, nil, nil, ft, log.NewNopLogger(), nil, nil, writer.FakeWriter{}, nil, nil)
st := setting.RecordingRuleSettings{
Enabled: true,
}
return newRecordingRule(context.Background(), models.AlertRuleKey{}, 0, nil, nil, st, log.NewNopLogger(), nil, nil, writer.FakeWriter{}, nil, nil)
}
func TestRecordingRule_Integration(t *testing.T) {

View File

@ -15,12 +15,12 @@ import (
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util/ticker"
)
@ -84,7 +84,7 @@ type schedule struct {
appURL *url.URL
disableGrafanaFolder bool
jitterEvaluations JitterStrategy
featureToggles featuremgmt.FeatureToggles
rrCfg setting.RecordingRuleSettings
metrics *metrics.Scheduler
// lastUpdatedMetricsForOrgsAndGroups contains AlertRuleGroupKeyWithFolderFullpaths that
@ -113,7 +113,7 @@ type SchedulerCfg struct {
C clock.Clock
MinRuleInterval time.Duration
DisableGrafanaFolder bool
FeatureToggles featuremgmt.FeatureToggles
RecordingRulesCfg setting.RecordingRuleSettings
AppURL *url.URL
JitterEvaluations JitterStrategy
EvaluatorFactory eval.EvaluatorFactory
@ -146,7 +146,7 @@ func NewScheduler(cfg SchedulerCfg, stateManager *state.Manager) *schedule {
appURL: cfg.AppURL,
disableGrafanaFolder: cfg.DisableGrafanaFolder,
jitterEvaluations: cfg.JitterEvaluations,
featureToggles: cfg.FeatureToggles,
rrCfg: cfg.RecordingRulesCfg,
stateManager: stateManager,
minRuleInterval: cfg.MinRuleInterval,
schedulableAlertRules: alertRulesRegistry{rules: make(map[ngmodels.AlertRuleKey]*ngmodels.AlertRule)},
@ -266,7 +266,7 @@ func (sch *schedule) processTick(ctx context.Context, dispatcherGroup *errgroup.
sch.evaluatorFactory,
&sch.schedulableAlertRules,
sch.clock,
sch.featureToggles,
sch.rrCfg,
sch.metrics,
sch.log,
sch.tracer,

View File

@ -65,18 +65,21 @@ func TestProcessTicks(t *testing.T) {
cacheServ := &datasources.FakeCacheService{}
evaluator := eval.NewEvaluatorFactory(setting.UnifiedAlertingSettings{}, cacheServ, expr.ProvideService(&setting.Cfg{ExpressionsEnabled: true}, nil, nil, featuremgmt.WithFeatures(), nil, tracing.InitializeTracerForTest()))
rrSet := setting.RecordingRuleSettings{
Enabled: true,
}
schedCfg := SchedulerCfg{
BaseInterval: cfg.BaseInterval,
C: mockedClock,
AppURL: appUrl,
EvaluatorFactory: evaluator,
RuleStore: ruleStore,
Metrics: testMetrics.GetSchedulerMetrics(),
AlertSender: notifier,
FeatureToggles: featuremgmt.WithFeatures(featuremgmt.FlagGrafanaManagedRecordingRules),
Tracer: testTracer,
Log: log.New("ngalert.scheduler"),
BaseInterval: cfg.BaseInterval,
C: mockedClock,
AppURL: appUrl,
EvaluatorFactory: evaluator,
RuleStore: ruleStore,
Metrics: testMetrics.GetSchedulerMetrics(),
AlertSender: notifier,
RecordingRulesCfg: rrSet,
Tracer: testTracer,
Log: log.New("ngalert.scheduler"),
}
managerCfg := state.ManagerCfg{
Metrics: testMetrics.GetStateMetrics(),
@ -854,23 +857,26 @@ func setupScheduler(t *testing.T, rs *fakeRulesStore, is *state.FakeInstanceStor
cfg := setting.UnifiedAlertingSettings{
BaseInterval: time.Second,
MaxAttempts: 1,
RecordingRules: setting.RecordingRuleSettings{
Enabled: true,
},
}
fakeRecordingWriter := writer.FakeWriter{}
schedCfg := SchedulerCfg{
BaseInterval: cfg.BaseInterval,
MaxAttempts: cfg.MaxAttempts,
C: mockedClock,
AppURL: appUrl,
EvaluatorFactory: evaluator,
RuleStore: rs,
FeatureToggles: featuremgmt.WithFeatures(featuremgmt.FlagGrafanaManagedRecordingRules),
Metrics: m.GetSchedulerMetrics(),
AlertSender: senderMock,
Tracer: testTracer,
Log: log.New("ngalert.scheduler"),
RecordingWriter: fakeRecordingWriter,
BaseInterval: cfg.BaseInterval,
MaxAttempts: cfg.MaxAttempts,
C: mockedClock,
AppURL: appUrl,
EvaluatorFactory: evaluator,
RuleStore: rs,
RecordingRulesCfg: cfg.RecordingRules,
Metrics: m.GetSchedulerMetrics(),
AlertSender: senderMock,
Tracer: testTracer,
Log: log.New("ngalert.scheduler"),
RecordingWriter: fakeRecordingWriter,
}
managerCfg := state.ManagerCfg{
Metrics: m.GetStateMetrics(),

View File

@ -124,6 +124,7 @@ type UnifiedAlertingSettings struct {
}
type RecordingRuleSettings struct {
Enabled bool
URL string
BasicAuthUsername string
BasicAuthPassword string
@ -421,6 +422,7 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
rr := iniFile.Section("recording_rules")
uaCfgRecordingRules := RecordingRuleSettings{
Enabled: rr.Key("enabled").MustBool(false),
URL: rr.Key("url").MustString(""),
BasicAuthUsername: rr.Key("basic_auth_username").MustString(""),
BasicAuthPassword: rr.Key("basic_auth_password").MustString(""),