Add tracing spans to promql (#4436)
* Add spans to promql Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com> * Simplify timer and span tracking. Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
This commit is contained in:
		
							parent
							
								
									0b4d22b245
								
							
						
					
					
						commit
						71855a22a4
					
				|  | @ -85,7 +85,7 @@ type Query interface { | |||
| 	// Statement returns the parsed statement of the query.
 | ||||
| 	Statement() Statement | ||||
| 	// Stats returns statistics about the lifetime of the query.
 | ||||
| 	Stats() *stats.TimerGroup | ||||
| 	Stats() *stats.QueryTimers | ||||
| 	// Cancel signals that a running query execution should be aborted.
 | ||||
| 	Cancel() | ||||
| } | ||||
|  | @ -99,7 +99,7 @@ type query struct { | |||
| 	// Statement of the parsed query.
 | ||||
| 	stmt Statement | ||||
| 	// Timer stats for the query execution.
 | ||||
| 	stats *stats.TimerGroup | ||||
| 	stats *stats.QueryTimers | ||||
| 	// Result matrix for reuse.
 | ||||
| 	matrix Matrix | ||||
| 	// Cancellation function for the query.
 | ||||
|  | @ -115,7 +115,7 @@ func (q *query) Statement() Statement { | |||
| } | ||||
| 
 | ||||
| // Stats implements the Query interface.
 | ||||
| func (q *query) Stats() *stats.TimerGroup { | ||||
| func (q *query) Stats() *stats.QueryTimers { | ||||
| 	return q.stats | ||||
| } | ||||
| 
 | ||||
|  | @ -276,7 +276,7 @@ func (ng *Engine) newQuery(q storage.Queryable, expr Expr, start, end time.Time, | |||
| 	qry := &query{ | ||||
| 		stmt:      es, | ||||
| 		ng:        ng, | ||||
| 		stats:     stats.NewTimerGroup(), | ||||
| 		stats:     stats.NewQueryTimers(), | ||||
| 		queryable: q, | ||||
| 	} | ||||
| 	return qry | ||||
|  | @ -294,7 +294,7 @@ func (ng *Engine) newTestQuery(f func(context.Context) error) Query { | |||
| 		q:     "test statement", | ||||
| 		stmt:  testStmt(f), | ||||
| 		ng:    ng, | ||||
| 		stats: stats.NewTimerGroup(), | ||||
| 		stats: stats.NewQueryTimers(), | ||||
| 	} | ||||
| 	return qry | ||||
| } | ||||
|  | @ -310,25 +310,25 @@ func (ng *Engine) exec(ctx context.Context, q *query) (Value, error) { | |||
| 	ctx, cancel := context.WithTimeout(ctx, ng.timeout) | ||||
| 	q.cancel = cancel | ||||
| 
 | ||||
| 	execTimer := q.stats.GetTimer(stats.ExecTotalTime).Start() | ||||
| 	defer execTimer.Stop() | ||||
| 	queueTimer := q.stats.GetTimer(stats.ExecQueueTime).Start() | ||||
| 	execSpanTimer, ctx := q.stats.GetSpanTimer(ctx, stats.ExecTotalTime) | ||||
| 	defer execSpanTimer.Finish() | ||||
| 
 | ||||
| 	queueSpanTimer, _ := q.stats.GetSpanTimer(ctx, stats.ExecQueueTime, ng.metrics.queryQueueTime) | ||||
| 
 | ||||
| 	if err := ng.gate.Start(ctx); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer ng.gate.Done() | ||||
| 
 | ||||
| 	queueTimer.Stop() | ||||
| 	ng.metrics.queryQueueTime.Observe(queueTimer.ElapsedTime().Seconds()) | ||||
| 	queueSpanTimer.Finish() | ||||
| 
 | ||||
| 	// Cancel when execution is done or an error was raised.
 | ||||
| 	defer q.cancel() | ||||
| 
 | ||||
| 	const env = "query execution" | ||||
| 
 | ||||
| 	evalTimer := q.stats.GetTimer(stats.EvalTotalTime).Start() | ||||
| 	defer evalTimer.Stop() | ||||
| 	evalSpanTimer, ctx := q.stats.GetSpanTimer(ctx, stats.EvalTotalTime) | ||||
| 	defer evalSpanTimer.Finish() | ||||
| 
 | ||||
| 	// The base context might already be canceled on the first iteration (e.g. during shutdown).
 | ||||
| 	if err := contextDone(ctx, env); err != nil { | ||||
|  | @ -355,10 +355,9 @@ func durationMilliseconds(d time.Duration) int64 { | |||
| 
 | ||||
| // execEvalStmt evaluates the expression of an evaluation statement for the given time range.
 | ||||
| func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *EvalStmt) (Value, error) { | ||||
| 	prepareTimer := query.stats.GetTimer(stats.QueryPreparationTime).Start() | ||||
| 	querier, err := ng.populateSeries(ctx, query.queryable, s) | ||||
| 	prepareTimer.Stop() | ||||
| 	ng.metrics.queryPrepareTime.Observe(prepareTimer.ElapsedTime().Seconds()) | ||||
| 	prepareSpanTimer, ctxPrepare := query.stats.GetSpanTimer(ctx, stats.QueryPreparationTime, ng.metrics.queryPrepareTime) | ||||
| 	querier, err := ng.populateSeries(ctxPrepare, query.queryable, s) | ||||
| 	prepareSpanTimer.Finish() | ||||
| 
 | ||||
| 	// XXX(fabxc): the querier returned by populateSeries might be instantiated
 | ||||
| 	// we must not return without closing irrespective of the error.
 | ||||
|  | @ -371,7 +370,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *EvalStmt) ( | |||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	evalTimer := query.stats.GetTimer(stats.InnerEvalTime).Start() | ||||
| 	evalSpanTimer, _ := query.stats.GetSpanTimer(ctx, stats.InnerEvalTime, ng.metrics.queryInnerEval) | ||||
| 	// Instant evaluation. This is executed as a range evaluation with one step.
 | ||||
| 	if s.Start == s.End && s.Interval == 0 { | ||||
| 		start := timeMilliseconds(s.Start) | ||||
|  | @ -387,8 +386,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *EvalStmt) ( | |||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		evalTimer.Stop() | ||||
| 		ng.metrics.queryInnerEval.Observe(evalTimer.ElapsedTime().Seconds()) | ||||
| 		evalSpanTimer.Finish() | ||||
| 
 | ||||
| 		mat, ok := val.(Matrix) | ||||
| 		if !ok { | ||||
|  | @ -427,8 +425,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *EvalStmt) ( | |||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	evalTimer.Stop() | ||||
| 	ng.metrics.queryInnerEval.Observe(evalTimer.ElapsedTime().Seconds()) | ||||
| 	evalSpanTimer.Finish() | ||||
| 
 | ||||
| 	mat, ok := val.(Matrix) | ||||
| 	if !ok { | ||||
|  | @ -442,11 +439,10 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *EvalStmt) ( | |||
| 
 | ||||
| 	// TODO(fabxc): order ensured by storage?
 | ||||
| 	// TODO(fabxc): where to ensure metric labels are a copy from the storage internals.
 | ||||
| 	sortTimer := query.stats.GetTimer(stats.ResultSortTime).Start() | ||||
| 	sortSpanTimer, _ := query.stats.GetSpanTimer(ctx, stats.ResultSortTime, ng.metrics.queryResultSort) | ||||
| 	sort.Sort(mat) | ||||
| 	sortTimer.Stop() | ||||
| 	sortSpanTimer.Finish() | ||||
| 
 | ||||
| 	ng.metrics.queryResultSort.Observe(sortTimer.ElapsedTime().Seconds()) | ||||
| 	return mat, nil | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -13,6 +13,13 @@ | |||
| 
 | ||||
| package stats | ||||
| 
 | ||||
| import ( | ||||
| 	"context" | ||||
| 
 | ||||
| 	opentracing "github.com/opentracing/opentracing-go" | ||||
| 	"github.com/prometheus/client_golang/prometheus" | ||||
| ) | ||||
| 
 | ||||
| // QueryTiming identifies the code area or functionality in which time is spent
 | ||||
| // during a query.
 | ||||
| type QueryTiming int | ||||
|  | @ -47,6 +54,26 @@ func (s QueryTiming) String() string { | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Return a string representation of a QueryTiming span operation.
 | ||||
| func (s QueryTiming) SpanOperation() string { | ||||
| 	switch s { | ||||
| 	case EvalTotalTime: | ||||
| 		return "promqlEval" | ||||
| 	case ResultSortTime: | ||||
| 		return "promqlSort" | ||||
| 	case QueryPreparationTime: | ||||
| 		return "promqlPrepare" | ||||
| 	case InnerEvalTime: | ||||
| 		return "promqlInnerEval" | ||||
| 	case ExecQueueTime: | ||||
| 		return "promqlExecQueue" | ||||
| 	case ExecTotalTime: | ||||
| 		return "promqlExec" | ||||
| 	default: | ||||
| 		return "Unknown query timing" | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // queryTimings with all query timers mapped to durations.
 | ||||
| type queryTimings struct { | ||||
| 	EvalTotalTime        float64 `json:"evalTotalTime"` | ||||
|  | @ -64,10 +91,10 @@ type QueryStats struct { | |||
| 
 | ||||
| // NewQueryStats makes a QueryStats struct with all QueryTimings found in the
 | ||||
| // given TimerGroup.
 | ||||
| func NewQueryStats(tg *TimerGroup) *QueryStats { | ||||
| func NewQueryStats(tg *QueryTimers) *QueryStats { | ||||
| 	var qt queryTimings | ||||
| 
 | ||||
| 	for s, timer := range tg.timers { | ||||
| 	for s, timer := range tg.TimerGroup.timers { | ||||
| 		switch s { | ||||
| 		case EvalTotalTime: | ||||
| 			qt.EvalTotalTime = timer.Duration() | ||||
|  | @ -87,3 +114,44 @@ func NewQueryStats(tg *TimerGroup) *QueryStats { | |||
| 	qs := QueryStats{Timings: qt} | ||||
| 	return &qs | ||||
| } | ||||
| 
 | ||||
| // SpanTimer unifies tracing and timing, to reduce repetition.
 | ||||
| type SpanTimer struct { | ||||
| 	timer     *Timer | ||||
| 	observers []prometheus.Observer | ||||
| 
 | ||||
| 	span opentracing.Span | ||||
| } | ||||
| 
 | ||||
| func NewSpanTimer(ctx context.Context, operation string, timer *Timer, observers ...prometheus.Observer) (*SpanTimer, context.Context) { | ||||
| 	span, ctx := opentracing.StartSpanFromContext(ctx, operation) | ||||
| 	timer.Start() | ||||
| 
 | ||||
| 	return &SpanTimer{ | ||||
| 		timer:     timer, | ||||
| 		observers: observers, | ||||
| 
 | ||||
| 		span: span, | ||||
| 	}, ctx | ||||
| } | ||||
| 
 | ||||
| func (s *SpanTimer) Finish() { | ||||
| 	s.timer.Stop() | ||||
| 	s.span.Finish() | ||||
| 
 | ||||
| 	for _, obs := range s.observers { | ||||
| 		obs.Observe(s.timer.ElapsedTime().Seconds()) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| type QueryTimers struct { | ||||
| 	*TimerGroup | ||||
| } | ||||
| 
 | ||||
| func NewQueryTimers() *QueryTimers { | ||||
| 	return &QueryTimers{NewTimerGroup()} | ||||
| } | ||||
| 
 | ||||
| func (qs *QueryTimers) GetSpanTimer(ctx context.Context, qt QueryTiming, observers ...prometheus.Observer) (*SpanTimer, context.Context) { | ||||
| 	return NewSpanTimer(ctx, qt.SpanOperation(), qs.TimerGroup.GetTimer(qt), observers...) | ||||
| } | ||||
|  |  | |||
|  | @ -39,13 +39,13 @@ func TestTimerGroupNewTimer(t *testing.T) { | |||
| } | ||||
| 
 | ||||
| func TestQueryStatsWithTimers(t *testing.T) { | ||||
| 	tg := NewTimerGroup() | ||||
| 	timer := tg.GetTimer(ExecTotalTime) | ||||
| 	qt := NewQueryTimers() | ||||
| 	timer := qt.GetTimer(ExecTotalTime) | ||||
| 	timer.Start() | ||||
| 	time.Sleep(2 * time.Millisecond) | ||||
| 	timer.Stop() | ||||
| 
 | ||||
| 	qs := NewQueryStats(tg) | ||||
| 	qs := NewQueryStats(qt) | ||||
| 	actual, err := json.Marshal(qs) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("Unexpected error during serialization: %v", err) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue