Alerting: Add state history backend to write ALERTS metric (#104361)

**What is this feature?** This PR implements a new Prometheus historian backend that allows Grafana alerting to write alert state history as Prometheus-compatible `ALERTS` metrics to remote Prometheus-compatible data sources. The metric includes a few additional labels: * `grafana_alertstate`: Grafana's full alert state, more granular than Prometheus. * `grafana_rule_uid`: Grafana's alert rule UID. Grafana states are included in the `grafana_alertstate` label also mapped to Prometheus-compatible `alertstate` values: | Grafana alert state | `alertstate` | `grafana_alertstate` | |---------------------|-----------------------|-----------------------| | `Alerting` | `firing` | `alerting` | | `Recovering` | `firing` | `recovering` | | `Pending` | `pending` | `pending` | | `Error` | `firing` | `error` | | `NoData` | `firing` | `nodata` | | `Normal` | _(no metric emitted)_ | _(no metric emitted)_ |
2025-06-18 07:17:57 +02:00 · 2025-06-18 07:17:57 +02:00 · ad683f83ff
parent 5448e8fb22
commit ad683f83ff
9 changed files with 965 additions and 41 deletions
--- a/conf/defaults.ini
+++ b/conf/defaults.ini
@ -1475,8 +1475,10 @@ disabled_labels =
 # Enable the state history functionality in Unified Alerting. The previous states of alert rules will be visible in panels and in the UI.
 enabled = true

-# Select which pluggable state history backend to use. Either "annotations", "loki", or "multiple"
-# "loki" writes state history to an external Loki instance. "multiple" allows history to be written to multiple backends at once.
+# Select which pluggable state history backend to use. Either "annotations", "loki", "prometheus", or "multiple"
+# "loki" writes state history to an external Loki instance.
+# "prometheus" writes state history as ALERTS metrics to a Prometheus-compatible data source.
+# "multiple" allows history to be written to multiple backends at once.
 # Defaults to "annotations".
 backend =

@ -1526,6 +1528,18 @@ loki_max_query_length = 721h
 # Default is 64kb
 loki_max_query_size = 65536

+# For "prometheus" only.
+# Target datasource UID for writing ALERTS metrics.
+prometheus_target_datasource_uid =
+
+# For "prometheus" only.
+# Metric name for the ALERTS metric. Default is "ALERTS".
+prometheus_metric_name = ALERTS
+
+# For "prometheus" only.
+# Timeout for writing ALERTS metrics to the target datasource. Default is 10s.
+prometheus_write_timeout = 10s
+
 [unified_alerting.state_history.external_labels]
 # Optional extra labels to attach to outbound state history records or log streams.
 # Any number of label key-value-pairs can be provided.
--- a/conf/sample.ini
+++ b/conf/sample.ini
@ -1454,8 +1454,10 @@ disabled_labels =
 # Enable the state history functionality in Unified Alerting. The previous states of alert rules will be visible in panels and in the UI.
 ; enabled = true

-# Select which pluggable state history backend to use. Either "annotations", "loki", or "multiple"
-# "loki" writes state history to an external Loki instance. "multiple" allows history to be written to multiple backends at once.
+# Select which pluggable state history backend to use. Either "annotations", "loki", "prometheus", or "multiple"
+# "loki" writes state history to an external Loki instance.
+# "prometheus" writes state history as ALERTS metrics to a Prometheus-compatible data source.
+# "multiple" allows history to be written to multiple backends at once.
 # Defaults to "annotations".
 ; backend = "multiple"

@ -1505,6 +1507,18 @@ disabled_labels =
 # Default is 64kb
 ;loki_max_query_size = 65536

+# For "prometheus" only.
+# Target datasource UID for writing ALERTS metrics.
+; prometheus_target_datasource_uid = "my-prometheus-uid"
+
+# For "prometheus" only.
+# Metric name for the ALERTS metric. Default is "ALERTS".
+; prometheus_metric_name = "ALERTS"
+
+# For "prometheus" only.
+# Timeout for writing ALERTS metrics to the target datasource. Default is 10s.
+; prometheus_write_timeout = 10s
+
 [unified_alerting.state_history.external_labels]
 # Optional extra labels to attach to outbound state history records or log streams.
 # Any number of label key-value-pairs can be provided.
--- a/pkg/services/ngalert/ngalert.go
+++ b/pkg/services/ngalert/ngalert.go
@ -362,7 +362,21 @@ func (ng *AlertNG) init() error {
 		FeatureToggles:       ng.FeatureToggles,
 	}

-	history, err := configureHistorianBackend(initCtx, ng.Cfg.UnifiedAlerting.StateHistory, ng.annotationsRepo, ng.dashboardService, ng.store, ng.Metrics.GetHistorianMetrics(), ng.Log, ng.tracer, ac.NewRuleService(ng.accesscontrol))
+	history, err := configureHistorianBackend(
+		initCtx,
+		ng.Cfg.UnifiedAlerting.StateHistory,
+		ng.annotationsRepo,
+		ng.dashboardService,
+		ng.store,
+		ng.Metrics.GetHistorianMetrics(),
+		ng.Log,
+		ng.tracer,
+		ac.NewRuleService(ng.accesscontrol),
+		ng.DataSourceService,
+		ng.httpClientProvider,
+		clk,
+		ng.Metrics.GetRemoteWriterMetrics(),
+	)
 	if err != nil {
 		return err
 	}
@ -606,7 +620,21 @@ type Historian interface {
 	state.Historian
 }

-func configureHistorianBackend(ctx context.Context, cfg setting.UnifiedAlertingStateHistorySettings, ar annotations.Repository, ds dashboards.DashboardService, rs historian.RuleStore, met *metrics.Historian, l log.Logger, tracer tracing.Tracer, ac historian.AccessControl) (Historian, error) {
+func configureHistorianBackend(
+	ctx context.Context,
+	cfg setting.UnifiedAlertingStateHistorySettings,
+	ar annotations.Repository,
+	ds dashboards.DashboardService,
+	rs historian.RuleStore,
+	met *metrics.Historian,
+	l log.Logger,
+	tracer tracing.Tracer,
+	ac historian.AccessControl,
+	datasourceService datasources.DataSourceService,
+	httpClientProvider httpclient.Provider,
+	clock clock.Clock,
+	mw *metrics.RemoteWriter,
+) (Historian, error) {
 	if !cfg.Enabled {
 		met.Info.WithLabelValues("noop").Set(0)
 		return historian.NewNopHistorian(), nil
@ -621,7 +649,7 @@ func configureHistorianBackend(ctx context.Context, cfg setting.UnifiedAlertingS
 	if backend == historian.BackendTypeMultiple {
 		primaryCfg := cfg
 		primaryCfg.Backend = cfg.MultiPrimary
-		primary, err := configureHistorianBackend(ctx, primaryCfg, ar, ds, rs, met, l, tracer, ac)
+		primary, err := configureHistorianBackend(ctx, primaryCfg, ar, ds, rs, met, l, tracer, ac, datasourceService, httpClientProvider, clock, mw)
 		if err != nil {
 			return nil, fmt.Errorf("multi-backend target \"%s\" was misconfigured: %w", cfg.MultiPrimary, err)
 		}
@ -630,7 +658,7 @@ func configureHistorianBackend(ctx context.Context, cfg setting.UnifiedAlertingS
 		for _, b := range cfg.MultiSecondaries {
 			secCfg := cfg
 			secCfg.Backend = b
-			sec, err := configureHistorianBackend(ctx, secCfg, ar, ds, rs, met, l, tracer, ac)
+			sec, err := configureHistorianBackend(ctx, secCfg, ar, ds, rs, met, l, tracer, ac, datasourceService, httpClientProvider, clock, mw)
 			if err != nil {
 				return nil, fmt.Errorf("multi-backend target \"%s\" was miconfigured: %w", b, err)
 			}
@ -642,7 +670,8 @@ func configureHistorianBackend(ctx context.Context, cfg setting.UnifiedAlertingS
 	}
 	if backend == historian.BackendTypeAnnotations {
 		store := historian.NewAnnotationStore(ar, ds, met)
-		annotationBackendLogger := log.New("ngalert.state.historian", "backend", "annotations")
+		logCtx := log.WithContextualAttributes(ctx, []any{"backend", "annotations"})
+		annotationBackendLogger := log.New("ngalert.state.historian").FromContext(logCtx)
 		return historian.NewAnnotationBackend(annotationBackendLogger, store, rs, met, ac), nil
 	}
 	if backend == historian.BackendTypeLoki {
@ -651,7 +680,8 @@ func configureHistorianBackend(ctx context.Context, cfg setting.UnifiedAlertingS
 			return nil, fmt.Errorf("invalid remote loki configuration: %w", err)
 		}
 		req := historian.NewRequester()
-		lokiBackendLogger := log.New("ngalert.state.historian", "backend", "loki")
+		logCtx := log.WithContextualAttributes(ctx, []any{"backend", "loki"})
+		lokiBackendLogger := log.New("ngalert.state.historian").FromContext(logCtx)
 		backend := historian.NewRemoteLokiBackend(lokiBackendLogger, lcfg, req, met, tracer, rs, ac)

 		testConnCtx, cancelFunc := context.WithTimeout(ctx, 10*time.Second)
@ -662,6 +692,25 @@ func configureHistorianBackend(ctx context.Context, cfg setting.UnifiedAlertingS
 		return backend, nil
 	}

+	if backend == historian.BackendTypePrometheus {
+		pcfg, err := historian.NewPrometheusConfig(cfg)
+		if err != nil {
+			return nil, fmt.Errorf("invalid remote prometheus configuration: %w", err)
+		}
+		writerCfg := writer.DatasourceWriterConfig{
+			Timeout: cfg.PrometheusWriteTimeout,
+		}
+		logCtx := log.WithContextualAttributes(ctx, []any{"backend", "prometheus"})
+		prometheusBackendLogger := log.New("ngalert.state.historian").FromContext(logCtx)
+		w := writer.NewDatasourceWriter(writerCfg, datasourceService, httpClientProvider, clock, prometheusBackendLogger, mw)
+		if w == nil {
+			return nil, fmt.Errorf("failed to create alert state metrics writer")
+		}
+		backend := historian.NewRemotePrometheusBackend(pcfg, w, prometheusBackendLogger)
+
+		return backend, nil
+	}
+
 	return nil, fmt.Errorf("unrecognized state history backend: %s", backend)
 }

--- a/pkg/services/ngalert/ngalert_test.go
+++ b/pkg/services/ngalert/ngalert_test.go
@ -90,7 +90,7 @@ func TestConfigureHistorianBackend(t *testing.T) {
 		}
 		ac := &acfakes.FakeRuleService{}

-		_, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac)
+		_, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac, nil, nil, nil, nil)

 		require.ErrorContains(t, err, "unrecognized")
 	})
@ -106,7 +106,7 @@ func TestConfigureHistorianBackend(t *testing.T) {
 		}
 		ac := &acfakes.FakeRuleService{}

-		_, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac)
+		_, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac, nil, nil, nil, nil)

 		require.ErrorContains(t, err, "multi-backend target")
 		require.ErrorContains(t, err, "unrecognized")
@ -124,7 +124,7 @@ func TestConfigureHistorianBackend(t *testing.T) {
 		}
 		ac := &acfakes.FakeRuleService{}

-		_, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac)
+		_, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac, nil, nil, nil, nil)

 		require.ErrorContains(t, err, "multi-backend target")
 		require.ErrorContains(t, err, "unrecognized")
@ -143,7 +143,42 @@ func TestConfigureHistorianBackend(t *testing.T) {
 		}
 		ac := &acfakes.FakeRuleService{}

-		h, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac)
+		h, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac, nil, nil, nil, nil)
+
+		require.NotNil(t, h)
+		require.NoError(t, err)
+	})
+
+	t.Run("fail initialization if prometheus backend missing datasource UID", func(t *testing.T) {
+		met := metrics.NewHistorianMetrics(prometheus.NewRegistry(), metrics.Subsystem)
+		logger := log.NewNopLogger()
+		tracer := tracing.InitializeTracerForTest()
+		cfg := setting.UnifiedAlertingStateHistorySettings{
+			Enabled: true,
+			Backend: "prometheus",
+			// Missing PrometheusTargetDatasourceUID
+		}
+		ac := &acfakes.FakeRuleService{}
+
+		_, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac, nil, nil, nil, nil)
+
+		require.Error(t, err)
+		require.ErrorContains(t, err, "datasource UID must not be empty")
+	})
+
+	t.Run("successful initialization of prometheus backend", func(t *testing.T) {
+		met := metrics.NewHistorianMetrics(prometheus.NewRegistry(), metrics.Subsystem)
+		logger := log.NewNopLogger()
+		tracer := tracing.InitializeTracerForTest()
+		cfg := setting.UnifiedAlertingStateHistorySettings{
+			Enabled:                       true,
+			Backend:                       "prometheus",
+			PrometheusMetricName:          "test_metric",
+			PrometheusTargetDatasourceUID: "test-prometheus-uid",
+		}
+		ac := &acfakes.FakeRuleService{}
+
+		h, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac, nil, nil, nil, nil)

 		require.NotNil(t, h)
 		require.NoError(t, err)
@ -160,7 +195,7 @@ func TestConfigureHistorianBackend(t *testing.T) {
 		}
 		ac := &acfakes.FakeRuleService{}

-		h, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac)
+		h, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac, nil, nil, nil, nil)

 		require.NotNil(t, h)
 		require.NoError(t, err)
@ -183,7 +218,7 @@ grafana_alerting_state_history_info{backend="annotations"} 1
 		}
 		ac := &acfakes.FakeRuleService{}

-		h, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac)
+		h, err := configureHistorianBackend(context.Background(), cfg, nil, nil, nil, met, logger, tracer, ac, nil, nil, nil, nil)

 		require.NotNil(t, h)
 		require.NoError(t, err)
--- a/pkg/services/ngalert/state/historian/backend.go
+++ b/pkg/services/ngalert/state/historian/backend.go
@ -17,6 +17,7 @@ const (
 	BackendTypeAnnotations BackendType = "annotations"
 	BackendTypeLoki        BackendType = "loki"
 	BackendTypeMultiple    BackendType = "multiple"
+	BackendTypePrometheus  BackendType = "prometheus"
 	BackendTypeNoop        BackendType = "noop"
 )

@ -27,6 +28,7 @@ func ParseBackendType(s string) (BackendType, error) {
 		BackendTypeAnnotations: {},
 		BackendTypeLoki:        {},
 		BackendTypeMultiple:    {},
+		BackendTypePrometheus:  {},
 		BackendTypeNoop:        {},
 	}
 	p := BackendType(norm)
--- a/pkg/services/ngalert/state/historian/prometheus.go
+++ b/pkg/services/ngalert/state/historian/prometheus.go
@ -0,0 +1,242 @@
+package historian
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"maps"
+	"math"
+	"strings"
+	"time"
+
+	"github.com/grafana/dataplane/sdata/numeric"
+	"github.com/grafana/grafana-plugin-sdk-go/data"
+	promValue "github.com/prometheus/prometheus/model/value"
+
+	"github.com/grafana/grafana/pkg/infra/log"
+	"github.com/grafana/grafana/pkg/services/ngalert/eval"
+	"github.com/grafana/grafana/pkg/services/ngalert/models"
+	"github.com/grafana/grafana/pkg/services/ngalert/state"
+	history_model "github.com/grafana/grafana/pkg/services/ngalert/state/historian/model"
+	"github.com/grafana/grafana/pkg/setting"
+)
+
+const (
+	// Label names for the alert metric.
+	alertNameLabel = "alertname"
+	// alertStateLabel is the label used to indicate
+	// the Prometheus-style alert state: firing or pending.
+	alertStateLabel = "alertstate"
+	// grafanaAlertStateLabel is the label used to indicate the Grafana-style
+	// alert state: alerting, pending, recovering, etc.
+	grafanaAlertStateLabel = "grafana_alertstate"
+	alertRuleUIDLabel      = "grafana_rule_uid"
+)
+
+// isMetricEmittingState defines which evaluation states should emit ALERTS metrics.
+// Basically every state that is not Normal should emit metrics currently,
+// and is defined here as an allowed state.
+func isMetricEmittingState(state eval.State) bool {
+	metricEmittingStates := map[eval.State]struct{}{
+		eval.Alerting:   {},
+		eval.Pending:    {},
+		eval.Recovering: {},
+		eval.Error:      {},
+		eval.NoData:     {},
+	}
+
+	_, ok := metricEmittingStates[state]
+
+	return ok
+}
+
+// getPrometheusState maps Grafana states to Prometheus alert states.
+// In Prometheus, the alertstate label in the ALERTS metric can be either "firing" or "pending",
+// so we need to convert Grafana states accordingly.
+func getPrometheusState(grafanaState eval.State) string {
+	if grafanaState == eval.Recovering || grafanaState == eval.Alerting || grafanaState == eval.Error || grafanaState == eval.NoData {
+		return "firing"
+	}
+
+	return strings.ToLower(grafanaState.String())
+}
+
+type seriesWriter interface {
+	WriteDatasource(ctx context.Context, dsUID string, name string, t time.Time, frames data.Frames, orgID int64, extraLabels map[string]string) error
+}
+
+type PrometheusConfig struct {
+	DatasourceUID string
+	MetricName    string
+}
+
+func NewPrometheusConfig(cfg setting.UnifiedAlertingStateHistorySettings) (PrometheusConfig, error) {
+	if cfg.PrometheusTargetDatasourceUID == "" {
+		return PrometheusConfig{}, errors.New("datasource UID must not be empty")
+	}
+
+	if cfg.PrometheusMetricName == "" {
+		return PrometheusConfig{}, errors.New("metric name must not be empty")
+	}
+
+	return PrometheusConfig{
+		DatasourceUID: cfg.PrometheusTargetDatasourceUID,
+		MetricName:    cfg.PrometheusMetricName,
+	}, nil
+}
+
+type RemotePrometheusBackend struct {
+	cfg        PrometheusConfig
+	promWriter seriesWriter
+	logger     log.Logger
+}
+
+func NewRemotePrometheusBackend(cfg PrometheusConfig, promWriter seriesWriter, logger log.Logger) *RemotePrometheusBackend {
+	logger.Info("Initializing remote Prometheus backend", "datasourceUID", cfg.DatasourceUID)
+
+	return &RemotePrometheusBackend{
+		cfg:        cfg,
+		promWriter: promWriter,
+		logger:     logger,
+	}
+}
+
+func (b *RemotePrometheusBackend) Query(ctx context.Context, query models.HistoryQuery) (*data.Frame, error) {
+	return nil, fmt.Errorf("prometheus historian backend does not support querying")
+}
+
+func (b *RemotePrometheusBackend) Record(ctx context.Context, rule history_model.RuleMeta, transitions []state.StateTransition) <-chan error {
+	errCh := make(chan error, 1)
+
+	if len(transitions) == 0 {
+		errCh <- nil
+		close(errCh)
+		return errCh
+	}
+
+	logger := b.logger.FromContext(ctx)
+
+	var frames data.Frames
+
+	for _, t := range transitions {
+		transitionFrames := b.framesFor(ctx, rule, t)
+		frames = append(frames, transitionFrames...)
+	}
+
+	if len(frames) == 0 {
+		logger.Debug("No frames generated for alert state metric, nothing to write")
+		errCh <- nil
+		close(errCh)
+		return errCh
+	}
+
+	st := transitions[0]
+
+	go func() {
+		defer func() {
+			if r := recover(); r != nil {
+				logger.Error("Panic in prometheus historian", "error", r)
+				errCh <- fmt.Errorf("prometheus historian panic: %v", r)
+			}
+			close(errCh)
+		}()
+
+		var sendErr error
+		if err := b.promWriter.WriteDatasource(ctx, b.cfg.DatasourceUID, b.cfg.MetricName, st.LastEvaluationTime, frames, st.OrgID, nil); err != nil {
+			logger.Error("Failed to write alert state metrics batch", "error", err)
+			sendErr = err
+		}
+		errCh <- sendErr
+	}()
+
+	return errCh
+}
+
+// framesFor converts a single StateTransition to multiple data.Frames to handle
+// transitions that require both StaleNaN for previous state and active metric for current state.
+//
+// StaleNaN: in the case of a transition from a metric-emitting state to a non-emitting state,
+// or when the series changes from one metric-emitting state to another, we should emit a StaleNaN sample
+// for the previous state to stop it in Prometheus:
+// https://prometheus.io/docs/specs/prw/remote_write_spec/#stale-markers
+func (b *RemotePrometheusBackend) framesFor(ctx context.Context, rule history_model.RuleMeta, t state.StateTransition) []*data.Frame {
+	samples := getSamples(t)
+	if len(samples) == 0 {
+		return nil
+	}
+
+	logger := b.logger.FromContext(ctx)
+
+	baseLabels := removePrivateLabels(t.Labels)
+	baseLabels[alertRuleUIDLabel] = t.AlertRuleUID
+	baseLabels[alertNameLabel] = rule.Title
+
+	frameMeta := &data.FrameMeta{
+		Type:        data.FrameTypeNumericMulti,
+		TypeVersion: numeric.MultiFrameVersionLatest,
+	}
+
+	frames := make([]*data.Frame, len(samples))
+
+	for i, sample := range samples {
+		labels := make(data.Labels, len(baseLabels)+2)
+		maps.Copy(labels, baseLabels)
+		labels[alertStateLabel] = sample.promState
+		labels[grafanaAlertStateLabel] = sample.grafanaState
+
+		logger.Debug("Creating metric with labels",
+			"rule_uid", t.AlertRuleUID,
+			"previous_state", t.PreviousState,
+			"current_state", t.State.State,
+			"last_evaluation_time", t.LastEvaluationTime,
+			"rule_title", rule.Title,
+			"labels", labels,
+			"value", sample.value,
+		)
+
+		field := data.NewField("", labels, []float64{sample.value})
+		frames[i] = data.NewFrame(b.cfg.MetricName, field)
+		frames[i].SetMeta(frameMeta)
+	}
+
+	return frames
+}
+
+type sample struct {
+	value        float64
+	grafanaState string
+	promState    string
+}
+
+// getSamples generates samples based on the state transition.
+func getSamples(tr state.StateTransition) []*sample {
+	curr, prev := tr.State.State, tr.PreviousState
+
+	var samples []*sample
+
+	// If transitioning from a metric-emitting state to a different state,
+	// emit a StaleNaN sample for the previous state to stop it in Prometheus.
+	if isMetricEmittingState(prev) && prev != curr {
+		prevState := strings.ToLower(prev.String())
+		prevPromState := getPrometheusState(prev)
+
+		samples = append(samples, &sample{
+			value:        math.Float64frombits(promValue.StaleNaN),
+			grafanaState: prevState,
+			promState:    prevPromState,
+		})
+	}
+
+	if isMetricEmittingState(curr) {
+		currState := strings.ToLower(curr.String())
+		currPromState := getPrometheusState(curr)
+
+		samples = append(samples, &sample{
+			value:        1.0,
+			grafanaState: currState,
+			promState:    currPromState,
+		})
+	}
+
+	return samples
+}
--- a/pkg/services/ngalert/state/historian/prometheus_test.go
+++ b/pkg/services/ngalert/state/historian/prometheus_test.go
@ -0,0 +1,344 @@
+package historian
+
+import (
+	"context"
+	"errors"
+	"math"
+	"testing"
+	"time"
+
+	"github.com/grafana/dataplane/sdata/numeric"
+	"github.com/grafana/grafana-plugin-sdk-go/data"
+	promValue "github.com/prometheus/prometheus/model/value"
+	"github.com/stretchr/testify/mock"
+	"github.com/stretchr/testify/require"
+
+	"github.com/grafana/grafana/pkg/infra/log"
+	"github.com/grafana/grafana/pkg/services/ngalert/eval"
+	ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
+	"github.com/grafana/grafana/pkg/services/ngalert/state"
+	history_model "github.com/grafana/grafana/pkg/services/ngalert/state/historian/model"
+	"github.com/grafana/grafana/pkg/setting"
+)
+
+const (
+	testMetricName = "test_metric_name"
+)
+
+type fakeRemoteWriter struct {
+	mock.Mock
+}
+
+func (f *fakeRemoteWriter) WriteDatasource(ctx context.Context, dsUID string, name string, t time.Time, frames data.Frames, orgID int64, extraLabels map[string]string) error {
+	args := f.Called(ctx, dsUID, name, t, frames, orgID, extraLabels)
+	return args.Error(0)
+}
+
+type panicRemoteWriter struct {
+	mock.Mock
+	panicMessage string
+}
+
+func (p *panicRemoteWriter) WriteDatasource(ctx context.Context, dsUID string, name string, t time.Time, frames data.Frames, orgID int64, extraLabels map[string]string) error {
+	p.Called(ctx, dsUID, name, t, frames, orgID, extraLabels)
+	panic(p.panicMessage)
+}
+
+func TestNewRemotePrometheusBackend(t *testing.T) {
+	cfg, err := NewPrometheusConfig(setting.UnifiedAlertingStateHistorySettings{
+		PrometheusTargetDatasourceUID: "test-ds-uid",
+		PrometheusMetricName:          testMetricName,
+	})
+	require.NoError(t, err)
+
+	fakeWriter := new(fakeRemoteWriter)
+	logger := log.NewNopLogger()
+
+	backend := NewRemotePrometheusBackend(cfg, fakeWriter, logger)
+
+	require.NotNil(t, backend)
+	require.Equal(t, cfg.DatasourceUID, backend.cfg.DatasourceUID)
+	require.Equal(t, fakeWriter, backend.promWriter)
+	require.Equal(t, logger, backend.logger)
+}
+
+func createExpectedFrame(t *testing.T, ruleUID, ruleName, promState, grafanaState string, instanceLabels data.Labels, value float64) *data.Frame {
+	t.Helper()
+
+	labels := instanceLabels.Copy()
+	labels[alertRuleUIDLabel] = ruleUID
+	labels[alertNameLabel] = ruleName
+	labels[alertStateLabel] = promState
+	labels[grafanaAlertStateLabel] = grafanaState
+
+	valueField := data.NewField("", labels, []float64{value})
+
+	frame := data.NewFrame(testMetricName, valueField)
+	frame.SetMeta(&data.FrameMeta{
+		Type:        data.FrameTypeNumericMulti,
+		TypeVersion: numeric.MultiFrameVersionLatest,
+	})
+	return frame
+}
+
+func createTransition(from, to eval.State, orgID int64, now time.Time) state.StateTransition {
+	return state.StateTransition{
+		State:         &state.State{AlertRuleUID: "rule-uid", OrgID: orgID, Labels: data.Labels{"instance": "server1"}, State: to, LastEvaluationTime: now},
+		PreviousState: from,
+	}
+}
+
+func assertFramesEqual(t *testing.T, actualFrames data.Frames, expectedFrames data.Frames) {
+	t.Helper()
+
+	require.Len(t, actualFrames, len(expectedFrames))
+
+	for i, expectedFrame := range expectedFrames {
+		actualFrame := actualFrames[i]
+		require.Equal(t, expectedFrame.Name, actualFrame.Name)
+		require.Len(t, actualFrame.Fields, 1)
+
+		expectedField := expectedFrame.Fields[0]
+		actualField := actualFrame.Fields[0]
+
+		// Check labels
+		require.Equal(t, expectedField.Labels, actualField.Labels)
+
+		// Check values with NaN handling
+		expectedValue := expectedField.At(0).(float64)
+		actualValue := actualField.At(0).(float64)
+		if math.IsNaN(expectedValue) {
+			require.True(t, math.IsNaN(actualValue))
+		} else {
+			require.Equal(t, expectedValue, actualValue)
+		}
+	}
+}
+
+func TestPrometheusBackend_Record(t *testing.T) {
+	cfg := PrometheusConfig{DatasourceUID: "test-ds-uid", MetricName: testMetricName}
+	logger := log.NewNopLogger()
+	ctx := context.Background()
+	orgID := int64(1)
+	now := time.Now()
+	ruleMeta := history_model.RuleMeta{Title: "test rule"}
+
+	testCases := []struct {
+		name           string
+		ruleMeta       history_model.RuleMeta
+		states         []state.StateTransition
+		expectedErr    error
+		expectedFrames data.Frames
+	}{
+		{
+			name:     "No states",
+			ruleMeta: history_model.RuleMeta{Title: "Test Rule No States"},
+			states:   []state.StateTransition{},
+		},
+		{
+			name:     "normal state only (no metrics emitted)",
+			ruleMeta: ruleMeta,
+			states: []state.StateTransition{
+				{State: &state.State{AlertRuleUID: "rule-uid-normal", OrgID: orgID, Labels: data.Labels{"label1": "value1"}, State: eval.Normal, LastEvaluationTime: now}},
+			},
+		},
+		{
+			name:     "remote writer error",
+			ruleMeta: ruleMeta,
+			states: []state.StateTransition{
+				{State: &state.State{AlertRuleUID: "rule-uid-err", OrgID: orgID, Labels: data.Labels{}, State: eval.Alerting, LastEvaluationTime: now}},
+			},
+			expectedFrames: data.Frames{
+				createExpectedFrame(t, "rule-uid-err", "test rule", "firing", "alerting", data.Labels{}, 1),
+			},
+			expectedErr: errors.New("remote write failed"),
+		},
+		{
+			name:     "internal labels are skipped",
+			ruleMeta: ruleMeta,
+			states: []state.StateTransition{
+				{
+					State: &state.State{
+						AlertRuleUID:       "rule-uid-internal",
+						OrgID:              orgID,
+						Labels:             data.Labels{ngmodels.AutogeneratedRouteLabel: "ignored", "label1": "value1", "__label2": "value2"},
+						State:              eval.Alerting,
+						LastEvaluationTime: now,
+					},
+				},
+			},
+			expectedFrames: data.Frames{
+				createExpectedFrame(t, "rule-uid-internal", "test rule", "firing", "alerting", data.Labels{"label1": "value1"}, 1.0),
+			},
+		},
+		{
+			name:     "mixed states (normal, pending, recovering, error, nodata)",
+			ruleMeta: ruleMeta,
+			states: []state.StateTransition{
+				{State: &state.State{AlertRuleUID: "rule-uid-normal", OrgID: orgID, Labels: data.Labels{"state": "normal"}, State: eval.Normal, LastEvaluationTime: now}},
+				{State: &state.State{AlertRuleUID: "rule-uid-pending", OrgID: orgID, Labels: data.Labels{"state": "pending"}, State: eval.Pending, LastEvaluationTime: now}},
+				{State: &state.State{AlertRuleUID: "rule-uid-recovering", OrgID: orgID, Labels: data.Labels{"state": "recovering"}, State: eval.Recovering, LastEvaluationTime: now}},
+				{State: &state.State{AlertRuleUID: "rule-uid-error", OrgID: orgID, Labels: data.Labels{"state": "error"}, State: eval.Error, LastEvaluationTime: now}},
+				{State: &state.State{AlertRuleUID: "rule-uid-nodata", OrgID: orgID, Labels: data.Labels{"state": "nodata"}, State: eval.NoData, LastEvaluationTime: now}},
+			},
+			expectedFrames: data.Frames{
+				createExpectedFrame(t, "rule-uid-pending", "test rule", "pending", "pending", data.Labels{"state": "pending"}, 1.0),
+				createExpectedFrame(t, "rule-uid-recovering", "test rule", "firing", "recovering", data.Labels{"state": "recovering"}, 1.0),
+				createExpectedFrame(t, "rule-uid-error", "test rule", "firing", "error", data.Labels{"state": "error"}, 1.0),
+				createExpectedFrame(t, "rule-uid-nodata", "test rule", "firing", "nodata", data.Labels{"state": "nodata"}, 1.0),
+			},
+		},
+
+		// State transitions - Normal to other states (single active frame)
+		{
+			name:     "normal to alerting transition",
+			ruleMeta: ruleMeta,
+			states:   []state.StateTransition{createTransition(eval.Normal, eval.Alerting, orgID, now)},
+			expectedFrames: data.Frames{
+				createExpectedFrame(t, "rule-uid", "test rule", "firing", "alerting", data.Labels{"instance": "server1"}, 1.0),
+			},
+		},
+		{
+			name:     "normal to pending transition",
+			ruleMeta: ruleMeta,
+			states:   []state.StateTransition{createTransition(eval.Normal, eval.Pending, orgID, now)},
+			expectedFrames: data.Frames{
+				createExpectedFrame(t, "rule-uid", "test rule", "pending", "pending", data.Labels{"instance": "server1"}, 1.0),
+			},
+		},
+		{
+			name:     "normal to error transition",
+			ruleMeta: ruleMeta,
+			states:   []state.StateTransition{createTransition(eval.Normal, eval.Error, orgID, now)},
+			expectedFrames: data.Frames{
+				createExpectedFrame(t, "rule-uid", "test rule", "firing", "error", data.Labels{"instance": "server1"}, 1.0),
+			},
+		},
+
+		// Transitions to Normal (StaleNaN only)
+		{
+			name:     "alerting to normal transition",
+			ruleMeta: ruleMeta,
+			states:   []state.StateTransition{createTransition(eval.Alerting, eval.Normal, orgID, now)},
+			expectedFrames: data.Frames{
+				createExpectedFrame(t, "rule-uid", "test rule", "firing", "alerting", data.Labels{"instance": "server1"}, math.Float64frombits(promValue.StaleNaN)),
+			},
+		},
+		{
+			name:     "error to normal transition",
+			ruleMeta: ruleMeta,
+			states:   []state.StateTransition{createTransition(eval.Error, eval.Normal, orgID, now)},
+			expectedFrames: data.Frames{
+				createExpectedFrame(t, "rule-uid", "test rule", "firing", "error", data.Labels{"instance": "server1"}, math.Float64frombits(promValue.StaleNaN)),
+			},
+		},
+		{
+			name:     "pending to alerting transition",
+			ruleMeta: ruleMeta,
+			states:   []state.StateTransition{createTransition(eval.Pending, eval.Alerting, orgID, now)},
+			expectedFrames: data.Frames{
+				createExpectedFrame(t, "rule-uid", "test rule", "pending", "pending", data.Labels{"instance": "server1"}, math.Float64frombits(promValue.StaleNaN)),
+				createExpectedFrame(t, "rule-uid", "test rule", "firing", "alerting", data.Labels{"instance": "server1"}, 1.0),
+			},
+		},
+		{
+			name:     "alerting to recovering transition",
+			ruleMeta: ruleMeta,
+			states:   []state.StateTransition{createTransition(eval.Alerting, eval.Recovering, orgID, now)},
+			expectedFrames: data.Frames{
+				createExpectedFrame(t, "rule-uid", "test rule", "firing", "alerting", data.Labels{"instance": "server1"}, math.Float64frombits(promValue.StaleNaN)),
+				createExpectedFrame(t, "rule-uid", "test rule", "firing", "recovering", data.Labels{"instance": "server1"}, 1.0),
+			},
+		},
+
+		// No metric should be written
+		{
+			name:     "Normal to Normal transition",
+			ruleMeta: ruleMeta,
+			states:   []state.StateTransition{createTransition(eval.Normal, eval.Normal, orgID, now)},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			fakeWriter := new(fakeRemoteWriter)
+			backend := NewRemotePrometheusBackend(cfg, fakeWriter, logger)
+
+			if tc.expectedFrames != nil {
+				var extraLabels map[string]string
+				fakeWriter.On(
+					"WriteDatasource", ctx, cfg.DatasourceUID, testMetricName, now, mock.Anything, orgID, extraLabels,
+				).Return(tc.expectedErr).Once().Run(func(args mock.Arguments) {
+					if tc.expectedErr == nil {
+						actualFrames := args.Get(4).(data.Frames)
+						assertFramesEqual(t, actualFrames, tc.expectedFrames)
+					}
+				})
+			}
+
+			errCh := backend.Record(ctx, tc.ruleMeta, tc.states)
+			err, ok := <-errCh
+			require.True(t, ok)
+
+			if tc.expectedErr == nil {
+				require.Nil(t, err)
+			} else {
+				require.ErrorIs(t, err, tc.expectedErr)
+			}
+
+			fakeWriter.AssertExpectations(t)
+			if tc.expectedFrames == nil {
+				fakeWriter.AssertNotCalled(t, "WriteDatasource", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything)
+			}
+		})
+	}
+}
+
+func TestPrometheusBackend_Query(t *testing.T) {
+	cfg := PrometheusConfig{DatasourceUID: "test-ds-uid", MetricName: testMetricName}
+	logger := log.NewNopLogger()
+	fakeWriter := new(fakeRemoteWriter)
+
+	backend := NewRemotePrometheusBackend(cfg, fakeWriter, logger)
+
+	frame, err := backend.Query(context.Background(), ngmodels.HistoryQuery{})
+	require.Error(t, err)
+	require.Nil(t, frame)
+	require.Contains(t, err.Error(), "prometheus historian backend does not support querying")
+}
+
+func TestPrometheusBackend_Record_PanicRecovery(t *testing.T) {
+	cfg := PrometheusConfig{DatasourceUID: "test-ds-uid", MetricName: testMetricName}
+	logger := log.NewNopLogger()
+	ctx := context.Background()
+	orgID := int64(1)
+	now := time.Now()
+	ruleMeta := history_model.RuleMeta{Title: "test rule"}
+
+	panicMessage := "panic in WriteDatasource"
+	panicWriter := &panicRemoteWriter{panicMessage: panicMessage}
+
+	panicWriter.On("WriteDatasource", ctx, cfg.DatasourceUID, testMetricName, now, mock.Anything, orgID, mock.Anything).Once()
+
+	backend := NewRemotePrometheusBackend(cfg, panicWriter, logger)
+
+	states := []state.StateTransition{
+		{State: &state.State{
+			AlertRuleUID:       "rule-uid-panic",
+			OrgID:              orgID,
+			Labels:             data.Labels{"test": "panic"},
+			State:              eval.Alerting,
+			LastEvaluationTime: now,
+		}},
+	}
+
+	errCh := backend.Record(ctx, ruleMeta, states)
+
+	err, ok := <-errCh
+	require.True(t, ok)
+	require.Error(t, err)
+	require.ErrorContains(t, err, "prometheus historian panic")
+	require.ErrorContains(t, err, panicMessage)
+
+	panicWriter.AssertExpectations(t)
+}
--- a/pkg/services/ngalert/state/manager_test.go
+++ b/pkg/services/ngalert/state/manager_test.go
@ -2058,3 +2058,219 @@ func mergeLabels(a, b data.Labels) data.Labels {
 	}
 	return result
 }
+
+// TestStateManager_HistorianIntegration tests that the state manager properly sends
+// all expected state transitions to the historian backend.
+func TestStateManager_HistorianIntegration(t *testing.T) {
+	baseInterval := 1 * time.Second
+	tN := func(n int) time.Time {
+		return time.Unix(0, 0).UTC().Add(time.Duration(n) * baseInterval)
+	}
+	t1 := tN(1)
+	t2 := tN(2)
+	t3 := tN(3)
+
+	labels1 := data.Labels{"instance": "server1", "job": "webapp"}
+	labels2 := data.Labels{"instance": "server2", "job": "webapp"}
+
+	baseRule := &models.AlertRule{
+		ID:              1,
+		OrgID:           1,
+		Title:           "test rule",
+		UID:             "test-rule-uid",
+		NamespaceUID:    "test-namespace",
+		IntervalSeconds: 10,
+		NoDataState:     models.NoData,
+		ExecErrState:    models.ErrorErrState,
+		For:             0,
+	}
+
+	type transition struct {
+		previousState eval.State
+		currentState  eval.State
+	}
+
+	scenarios := []struct {
+		name                string
+		rule                *models.AlertRule
+		evaluations         map[time.Time][]eval.Result
+		expectedTransitions map[time.Time][]transition
+	}{
+		{
+			name: "1:normal -> 1:alerting -> 1:normal",
+			rule: baseRule,
+			evaluations: map[time.Time][]eval.Result{
+				t1: {
+					{Instance: labels1, State: eval.Normal},
+				},
+				t2: {
+					{Instance: labels1, State: eval.Alerting},
+				},
+				t3: {
+					{Instance: labels1, State: eval.Normal},
+				},
+			},
+			expectedTransitions: map[time.Time][]transition{
+				t1: {
+					{previousState: eval.Normal, currentState: eval.Normal},
+				},
+				t2: {
+					{previousState: eval.Normal, currentState: eval.Alerting},
+				},
+				t3: {
+					{previousState: eval.Alerting, currentState: eval.Normal},
+				},
+			},
+		},
+		{
+			name: "1:alerting, 2:alerting -> 2:alerting -> {}",
+			rule: baseRule,
+			evaluations: map[time.Time][]eval.Result{
+				t1: {
+					{Instance: labels1, State: eval.Alerting},
+					{Instance: labels2, State: eval.Alerting},
+				},
+				t2: {
+					// labels1 is missing from this evaluation
+					{Instance: labels2, State: eval.Alerting},
+				},
+				t3: {
+					// Both labels1 and labels2 are missing
+				},
+			},
+			expectedTransitions: map[time.Time][]transition{
+				t1: {
+					{previousState: eval.Normal, currentState: eval.Alerting},
+					{previousState: eval.Normal, currentState: eval.Alerting},
+				},
+				t2: {
+					{previousState: eval.Alerting, currentState: eval.Alerting},
+					{previousState: eval.Alerting, currentState: eval.Alerting},
+				},
+				t3: {
+					{previousState: eval.Alerting, currentState: eval.Alerting},
+					{previousState: eval.Alerting, currentState: eval.Alerting},
+				},
+			},
+		},
+		{
+			name: "1:alerting -> {} -> {}",
+			rule: baseRule,
+			evaluations: map[time.Time][]eval.Result{
+				t1: {
+					{Instance: labels1, State: eval.Alerting, EvaluatedAt: t1},
+				},
+				t2: {
+					// labels1 is missing - first missing evaluation
+				},
+				t3: {
+					// labels1 is still missing - second missing evaluation
+				},
+			},
+			expectedTransitions: map[time.Time][]transition{
+				t1: {
+					{previousState: eval.Normal, currentState: eval.Alerting},
+				},
+				t2: {
+					{previousState: eval.Alerting, currentState: eval.Alerting},
+				},
+				t3: {
+					{previousState: eval.Alerting, currentState: eval.Alerting},
+				},
+			},
+		},
+		{
+			name: "1:alerting -> 1:recovering -> 1:normal",
+			rule: &models.AlertRule{
+				ID:              1,
+				OrgID:           1,
+				Title:           "test rule",
+				UID:             "test-rule-uid",
+				NamespaceUID:    "test-namespace",
+				IntervalSeconds: 10,
+				NoDataState:     models.NoData,
+				ExecErrState:    models.ErrorErrState,
+				For:             0,
+				KeepFiringFor:   10,
+			},
+			evaluations: map[time.Time][]eval.Result{
+				t1: {
+					{Instance: labels1, State: eval.Alerting},
+				},
+				t2: {
+					{Instance: labels1, State: eval.Normal},
+				},
+				t3: {
+					{Instance: labels1, State: eval.Normal},
+				},
+			},
+			expectedTransitions: map[time.Time][]transition{
+				t1: {
+					{previousState: eval.Normal, currentState: eval.Alerting},
+				},
+				t2: {
+					{previousState: eval.Alerting, currentState: eval.Recovering},
+				},
+				t3: {
+					{previousState: eval.Recovering, currentState: eval.Normal},
+				},
+			},
+		},
+	}
+
+	for _, scenario := range scenarios {
+		t.Run(scenario.name, func(t *testing.T) {
+			historian := &state.FakeHistorian{}
+
+			cfg := state.ManagerCfg{
+				Metrics:       metrics.NewNGAlert(prometheus.NewPedanticRegistry()).GetStateMetrics(),
+				ExternalURL:   nil,
+				InstanceStore: &state.FakeInstanceStore{},
+				Images:        &state.NotAvailableImageService{},
+				Clock:         clock.NewMock(),
+				Historian:     historian,
+				Tracer:        tracing.InitializeTracerForTest(),
+				Log:           log.NewNopLogger(),
+			}
+
+			mgr := state.NewManager(cfg, state.NewNoopPersister())
+
+			// Helper function to process one time step and verify historian
+			processTimeStep := func(evalTime time.Time) {
+				results := scenario.evaluations[evalTime]
+				expectedTransitions := scenario.expectedTransitions[evalTime]
+
+				for i := range results {
+					results[i].EvaluatedAt = evalTime
+				}
+
+				// Clear historian state transitions before the evaluation
+				historian.StateTransitions = nil
+
+				mgr.ProcessEvalResults(
+					context.Background(),
+					evalTime,
+					scenario.rule,
+					results,
+					make(data.Labels),
+					nil,
+				)
+
+				// Extract just the data we care about from the actual transitions
+				actualTransitions := make([]transition, len(historian.StateTransitions))
+				for i, t := range historian.StateTransitions {
+					actualTransitions[i] = transition{
+						previousState: t.PreviousState,
+						currentState:  t.State.State,
+					}
+				}
+
+				require.ElementsMatch(t, expectedTransitions, actualTransitions)
+			}
+
+			processTimeStep(t1)
+			processTimeStep(t2)
+			processTimeStep(t3)
+		})
+	}
+}
--- a/pkg/setting/setting_unified_alerting.go
+++ b/pkg/setting/setting_unified_alerting.go
@ -67,6 +67,8 @@ const (
 	lokiDefaultMaxQueryLength              = 721 * time.Hour // 30d1h, matches the default value in Loki
 	defaultRecordingRequestTimeout         = 10 * time.Second
 	lokiDefaultMaxQuerySize                = 65536 // 64kb
+	defaultHistorianPrometheusWriteTimeout = 10 * time.Second
+	defaultHistorianPrometheusMetricName   = "ALERTS"
 )

 var (
@ -192,6 +194,9 @@ type UnifiedAlertingStateHistorySettings struct {
 	LokiBasicAuthUsername         string
 	LokiMaxQueryLength            time.Duration
 	LokiMaxQuerySize              int
+	PrometheusMetricName          string
+	PrometheusTargetDatasourceUID string
+	PrometheusWriteTimeout        time.Duration
 	MultiPrimary                  string
 	MultiSecondaries              []string
 	ExternalLabels                map[string]string
@ -460,6 +465,9 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
 		LokiMaxQuerySize:              stateHistory.Key("loki_max_query_size").MustInt(lokiDefaultMaxQuerySize),
 		MultiPrimary:                  stateHistory.Key("primary").MustString(""),
 		MultiSecondaries:              splitTrim(stateHistory.Key("secondaries").MustString(""), ","),
+		PrometheusMetricName:          stateHistory.Key("prometheus_metric_name").MustString(defaultHistorianPrometheusMetricName),
+		PrometheusTargetDatasourceUID: stateHistory.Key("prometheus_target_datasource_uid").MustString(""),
+		PrometheusWriteTimeout:        stateHistory.Key("prometheus_write_timeout").MustDuration(defaultHistorianPrometheusWriteTimeout),
 		ExternalLabels:                stateHistoryLabels.KeysHash(),
 	}
 	uaCfg.StateHistory = uaCfgStateHistory