2016-05-17 20:31:52 +08:00
|
|
|
package alerting
|
|
|
|
|
|
|
|
|
|
import (
|
2016-05-23 16:02:17 +08:00
|
|
|
"math/rand"
|
|
|
|
|
"strconv"
|
2016-05-17 20:31:52 +08:00
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
"github.com/grafana/grafana/pkg/log"
|
|
|
|
|
m "github.com/grafana/grafana/pkg/models"
|
|
|
|
|
"github.com/grafana/grafana/pkg/setting"
|
2016-05-23 20:14:02 +08:00
|
|
|
"sync"
|
2016-05-17 20:31:52 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func Init() {
|
|
|
|
|
if !setting.AlertingEnabled {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
log.Info("Alerting: Initializing scheduler...")
|
|
|
|
|
|
|
|
|
|
scheduler := NewScheduler()
|
2016-05-23 16:59:28 +08:00
|
|
|
go scheduler.Dispatch(&AlertRuleReader{})
|
|
|
|
|
go scheduler.Executor(&DummieExecutor{})
|
2016-05-17 20:31:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type Scheduler struct {
|
|
|
|
|
jobs []*AlertJob
|
|
|
|
|
runQueue chan *AlertJob
|
2016-05-23 20:14:02 +08:00
|
|
|
mtx sync.RWMutex
|
2016-05-23 16:02:17 +08:00
|
|
|
|
2016-05-23 16:59:28 +08:00
|
|
|
alertRuleFetcher RuleReader
|
|
|
|
|
|
2016-05-23 16:02:17 +08:00
|
|
|
serverId string
|
|
|
|
|
serverPosition int
|
|
|
|
|
clusterSize int
|
2016-05-17 20:31:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func NewScheduler() *Scheduler {
|
|
|
|
|
return &Scheduler{
|
|
|
|
|
jobs: make([]*AlertJob, 0),
|
|
|
|
|
runQueue: make(chan *AlertJob, 1000),
|
2016-05-23 16:02:17 +08:00
|
|
|
serverId: strconv.Itoa(rand.Intn(1000)),
|
2016-05-17 20:31:52 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-23 20:14:02 +08:00
|
|
|
func (this *Scheduler) heartBeat() {
|
2016-05-23 16:02:17 +08:00
|
|
|
//write heartBeat to db.
|
|
|
|
|
//get the modulus position of active servers
|
|
|
|
|
|
2016-05-23 20:14:02 +08:00
|
|
|
log.Info("Heartbeat: Sending heartbeat from " + this.serverId)
|
|
|
|
|
this.clusterSize = 1
|
|
|
|
|
this.serverPosition = 1
|
2016-05-23 16:02:17 +08:00
|
|
|
}
|
|
|
|
|
|
2016-05-23 20:14:02 +08:00
|
|
|
func (this *Scheduler) Dispatch(reader RuleReader) {
|
2016-05-17 20:31:52 +08:00
|
|
|
reschedule := time.NewTicker(time.Second * 10)
|
|
|
|
|
secondTicker := time.NewTicker(time.Second)
|
2016-05-23 18:15:36 +08:00
|
|
|
heartbeat := time.NewTicker(time.Second * 5)
|
2016-05-17 20:31:52 +08:00
|
|
|
|
2016-05-23 20:14:02 +08:00
|
|
|
this.heartBeat()
|
|
|
|
|
this.updateJobs(reader.Fetch)
|
2016-05-17 20:31:52 +08:00
|
|
|
|
|
|
|
|
for {
|
|
|
|
|
select {
|
|
|
|
|
case <-secondTicker.C:
|
2016-05-23 20:14:02 +08:00
|
|
|
this.queueJobs()
|
2016-05-17 20:31:52 +08:00
|
|
|
case <-reschedule.C:
|
2016-05-23 20:14:02 +08:00
|
|
|
this.updateJobs(reader.Fetch)
|
2016-05-23 18:15:36 +08:00
|
|
|
case <-heartbeat.C:
|
2016-05-23 20:14:02 +08:00
|
|
|
this.heartBeat()
|
2016-05-17 20:31:52 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-23 20:14:02 +08:00
|
|
|
func (this *Scheduler) updateJobs(f func() []m.AlertRule) {
|
2016-05-23 16:59:28 +08:00
|
|
|
log.Debug("Scheduler: UpdateJobs()")
|
2016-05-17 20:31:52 +08:00
|
|
|
|
|
|
|
|
jobs := make([]*AlertJob, 0)
|
2016-05-23 18:04:34 +08:00
|
|
|
rules := f()
|
2016-05-23 16:02:17 +08:00
|
|
|
|
2016-05-23 20:14:02 +08:00
|
|
|
this.mtx.Lock()
|
|
|
|
|
defer this.mtx.Unlock()
|
|
|
|
|
|
|
|
|
|
for i := this.serverPosition - 1; i < len(rules); i += this.clusterSize {
|
2016-05-23 16:02:17 +08:00
|
|
|
rule := rules[i]
|
|
|
|
|
jobs = append(jobs, &AlertJob{
|
2016-05-23 17:45:15 +08:00
|
|
|
id: rule.Id,
|
2016-05-23 16:02:17 +08:00
|
|
|
name: rule.Title,
|
|
|
|
|
frequency: rule.Frequency,
|
|
|
|
|
rule: rule,
|
|
|
|
|
offset: int64(len(jobs)),
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
log.Debug("Scheduler: Selected %d jobs", len(jobs))
|
2016-05-17 20:31:52 +08:00
|
|
|
|
2016-05-23 20:14:02 +08:00
|
|
|
this.jobs = jobs
|
2016-05-17 20:31:52 +08:00
|
|
|
}
|
|
|
|
|
|
2016-05-23 20:14:02 +08:00
|
|
|
func (this *Scheduler) queueJobs() {
|
2016-05-17 20:31:52 +08:00
|
|
|
now := time.Now().Unix()
|
|
|
|
|
|
2016-05-23 20:14:02 +08:00
|
|
|
for _, job := range this.jobs {
|
|
|
|
|
if now%job.frequency == 0 && job.running == false {
|
2016-05-17 20:31:52 +08:00
|
|
|
log.Info("Scheduler: Putting job on to run queue: %s", job.name)
|
2016-05-23 20:14:02 +08:00
|
|
|
this.runQueue <- job
|
2016-05-17 20:31:52 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-23 20:14:02 +08:00
|
|
|
func (this *Scheduler) Executor(executor Executor) {
|
|
|
|
|
for job := range this.runQueue {
|
|
|
|
|
log.Info("Executor: queue length %d", len(this.runQueue))
|
2016-05-17 20:31:52 +08:00
|
|
|
log.Info("Executor: executing %s", job.name)
|
2016-05-23 20:14:02 +08:00
|
|
|
go Measure(executor, job)
|
2016-05-17 20:31:52 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-23 20:14:02 +08:00
|
|
|
func Measure(exec Executor, rule *AlertJob) {
|
|
|
|
|
now := time.Now()
|
|
|
|
|
rule.running = true
|
|
|
|
|
exec.Execute(rule.rule)
|
|
|
|
|
rule.running = true
|
|
|
|
|
elapsed := time.Since(now)
|
|
|
|
|
log.Info("Schedular: exeuction took %v milli seconds", elapsed.Nanoseconds()/1000000)
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-17 20:31:52 +08:00
|
|
|
type AlertJob struct {
|
|
|
|
|
id int64
|
|
|
|
|
name string
|
|
|
|
|
frequency int64
|
|
|
|
|
offset int64
|
|
|
|
|
delay bool
|
2016-05-23 20:14:02 +08:00
|
|
|
running bool
|
2016-05-23 16:02:17 +08:00
|
|
|
rule m.AlertRule
|
2016-05-17 20:31:52 +08:00
|
|
|
}
|
|
|
|
|
|
2016-05-23 16:59:28 +08:00
|
|
|
type AlertResult struct {
|
|
|
|
|
id int64
|
|
|
|
|
state string
|
|
|
|
|
duration time.Time
|
|
|
|
|
}
|