| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | // Copyright 2013 Prometheus Team
 | 
					
						
							|  |  |  | // Licensed under the Apache License, Version 2.0 (the "License");
 | 
					
						
							|  |  |  | // you may not use this file except in compliance with the License.
 | 
					
						
							|  |  |  | // You may obtain a copy of the License at
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // http://www.apache.org/licenses/LICENSE-2.0
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // Unless required by applicable law or agreed to in writing, software
 | 
					
						
							|  |  |  | // distributed under the License is distributed on an "AS IS" BASIS,
 | 
					
						
							|  |  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					
						
							|  |  |  | // See the License for the specific language governing permissions and
 | 
					
						
							|  |  |  | // limitations under the License.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package rules | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							| 
									
										
										
										
											2013-04-26 22:02:52 +08:00
										 |  |  | 	"fmt" | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	"github.com/prometheus/prometheus/model" | 
					
						
							|  |  |  | 	"github.com/prometheus/prometheus/rules/ast" | 
					
						
							| 
									
										
										
										
											2013-05-07 19:15:10 +08:00
										 |  |  | 	"github.com/prometheus/prometheus/storage/metric" | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	"github.com/prometheus/prometheus/utility" | 
					
						
							|  |  |  | 	"time" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // States that active alerts can be in.
 | 
					
						
							|  |  |  | type alertState int | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-05-16 13:38:31 +08:00
										 |  |  | func (s alertState) String() string { | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	switch s { | 
					
						
							|  |  |  | 	case PENDING: | 
					
						
							| 
									
										
										
										
											2013-05-16 13:38:31 +08:00
										 |  |  | 		return "pending" | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	case FIRING: | 
					
						
							| 
									
										
										
										
											2013-05-16 13:38:31 +08:00
										 |  |  | 		return "firing" | 
					
						
							|  |  |  | 	default: | 
					
						
							|  |  |  | 		panic("undefined") | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const ( | 
					
						
							|  |  |  | 	PENDING alertState = iota | 
					
						
							|  |  |  | 	FIRING | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // alert is used to track active (pending/firing) alerts over time.
 | 
					
						
							|  |  |  | type alert struct { | 
					
						
							|  |  |  | 	// The name of the alert.
 | 
					
						
							| 
									
										
										
										
											2013-04-06 00:03:45 +08:00
										 |  |  | 	name string | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	// The vector element labelset triggering this alert.
 | 
					
						
							| 
									
										
										
										
											2013-04-06 00:03:45 +08:00
										 |  |  | 	metric model.Metric | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	// The state of the alert (PENDING or FIRING).
 | 
					
						
							| 
									
										
										
										
											2013-04-06 00:03:45 +08:00
										 |  |  | 	state alertState | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	// The time when the alert first transitioned into PENDING state.
 | 
					
						
							|  |  |  | 	activeSince time.Time | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // sample returns a Sample suitable for recording the alert.
 | 
					
						
							|  |  |  | func (a alert) sample(timestamp time.Time, value model.SampleValue) model.Sample { | 
					
						
							|  |  |  | 	recordedMetric := model.Metric{} | 
					
						
							|  |  |  | 	for label, value := range a.metric { | 
					
						
							|  |  |  | 		recordedMetric[label] = value | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	recordedMetric[model.MetricNameLabel] = model.AlertMetricName | 
					
						
							|  |  |  | 	recordedMetric[model.AlertNameLabel] = model.LabelValue(a.name) | 
					
						
							|  |  |  | 	recordedMetric[model.AlertStateLabel] = model.LabelValue(a.state.String()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return model.Sample{ | 
					
						
							|  |  |  | 		Metric:    recordedMetric, | 
					
						
							|  |  |  | 		Value:     value, | 
					
						
							|  |  |  | 		Timestamp: timestamp, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // An alerting rule generates alerts from its vector expression.
 | 
					
						
							|  |  |  | type AlertingRule struct { | 
					
						
							|  |  |  | 	// The name of the alert.
 | 
					
						
							| 
									
										
										
										
											2013-04-06 00:03:45 +08:00
										 |  |  | 	name string | 
					
						
							|  |  |  | 	// The vector expression from which to generate alerts.
 | 
					
						
							|  |  |  | 	vector ast.VectorNode | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	// The duration for which a labelset needs to persist in the expression
 | 
					
						
							|  |  |  | 	// output vector before an alert transitions from PENDING to FIRING state.
 | 
					
						
							|  |  |  | 	holdDuration time.Duration | 
					
						
							|  |  |  | 	// Extra labels to attach to the resulting alert sample vectors.
 | 
					
						
							| 
									
										
										
										
											2013-04-06 00:03:45 +08:00
										 |  |  | 	labels model.LabelSet | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	// A map of alerts which are currently active (PENDING or FIRING), keyed by
 | 
					
						
							|  |  |  | 	// the fingerprint of the labelset they correspond to.
 | 
					
						
							|  |  |  | 	activeAlerts map[model.Fingerprint]*alert | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (rule AlertingRule) Name() string { return rule.name } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-05-16 13:38:31 +08:00
										 |  |  | func (rule AlertingRule) EvalRaw(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) { | 
					
						
							| 
									
										
										
										
											2013-05-07 19:15:10 +08:00
										 |  |  | 	return ast.EvalVectorInstant(rule.vector, timestamp, storage) | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-05-16 13:38:31 +08:00
										 |  |  | func (rule AlertingRule) Eval(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) { | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	// Get the raw value of the rule expression.
 | 
					
						
							| 
									
										
										
										
											2013-05-07 19:15:10 +08:00
										 |  |  | 	exprResult, err := rule.EvalRaw(timestamp, storage) | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	if err != nil { | 
					
						
							| 
									
										
										
										
											2013-05-16 13:38:31 +08:00
										 |  |  | 		return nil, err | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Create pending alerts for any new vector elements in the alert expression.
 | 
					
						
							|  |  |  | 	resultFingerprints := utility.Set{} | 
					
						
							|  |  |  | 	for _, sample := range exprResult { | 
					
						
							| 
									
										
										
										
											2013-05-17 18:58:15 +08:00
										 |  |  | 		fp := *model.NewFingerprintFromMetric(sample.Metric) | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 		resultFingerprints.Add(fp) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if _, ok := rule.activeAlerts[fp]; !ok { | 
					
						
							|  |  |  | 			rule.activeAlerts[fp] = &alert{ | 
					
						
							|  |  |  | 				name:        rule.name, | 
					
						
							|  |  |  | 				metric:      sample.Metric, | 
					
						
							|  |  |  | 				state:       PENDING, | 
					
						
							|  |  |  | 				activeSince: timestamp, | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-05-16 13:38:31 +08:00
										 |  |  | 	vector := ast.Vector{} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | 	// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
 | 
					
						
							|  |  |  | 	for fp, activeAlert := range rule.activeAlerts { | 
					
						
							|  |  |  | 		if !resultFingerprints.Has(fp) { | 
					
						
							|  |  |  | 			vector = append(vector, activeAlert.sample(timestamp, 0)) | 
					
						
							|  |  |  | 			delete(rule.activeAlerts, fp) | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if activeAlert.state == PENDING && timestamp.Sub(activeAlert.activeSince) >= rule.holdDuration { | 
					
						
							|  |  |  | 			vector = append(vector, activeAlert.sample(timestamp, 0)) | 
					
						
							|  |  |  | 			activeAlert.state = FIRING | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		vector = append(vector, activeAlert.sample(timestamp, 1)) | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2013-05-16 13:38:31 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	return vector, nil | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-04-26 22:02:52 +08:00
										 |  |  | func (rule AlertingRule) ToDotGraph() string { | 
					
						
							|  |  |  | 	graph := fmt.Sprintf(`digraph "Rules" { | 
					
						
							|  |  |  | 	  %#p[shape="box",label="ALERT %s IF FOR %s"]; | 
					
						
							|  |  |  | 		%#p -> %#p; | 
					
						
							|  |  |  | 		%s | 
					
						
							|  |  |  | 	}`, &rule, rule.name, utility.DurationToString(rule.holdDuration), &rule, rule.vector, rule.vector.NodeTreeToDotGraph()) | 
					
						
							|  |  |  | 	return graph | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-04-24 17:51:40 +08:00
										 |  |  | // Construct a new AlertingRule.
 | 
					
						
							|  |  |  | func NewAlertingRule(name string, vector ast.VectorNode, holdDuration time.Duration, labels model.LabelSet) *AlertingRule { | 
					
						
							|  |  |  | 	return &AlertingRule{ | 
					
						
							|  |  |  | 		name:         name, | 
					
						
							|  |  |  | 		vector:       vector, | 
					
						
							|  |  |  | 		holdDuration: holdDuration, | 
					
						
							|  |  |  | 		labels:       labels, | 
					
						
							|  |  |  | 		activeAlerts: map[model.Fingerprint]*alert{}, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } |