SSE: improve error handling in DSNode (#71468)

* add data source uid to QueryError

* add error and datasource uid to tracing

* split queryDataResponseToResults to two functions one to extract frame from the result, and another to convert it

* propagate logger with context to convertDataFramesToResults
This commit is contained in:
Yuri Tseretyan 2023-07-12 14:59:02 -04:00 committed by GitHub
parent 64aa5465ac
commit 9e76d6372c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 49 additions and 22 deletions

View File

@ -16,7 +16,7 @@ import (
"go.opentelemetry.io/otel/attribute"
)
func shouldUseDataplane(frames data.Frames, logger *log.ConcreteLogger, disable bool) (dt data.FrameType, b bool, e error) {
func shouldUseDataplane(frames data.Frames, logger log.Logger, disable bool) (dt data.FrameType, b bool, e error) {
if disable {
return
}

View File

@ -15,6 +15,7 @@ import (
"github.com/grafana/grafana/pkg/expr/classic"
"github.com/grafana/grafana/pkg/expr/mathexp"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/datasources"
"github.com/grafana/grafana/pkg/services/featuremgmt"
)
@ -24,8 +25,9 @@ var (
)
type QueryError struct {
RefID string
Err error
RefID string
DatasourceUID string
Err error
}
func (e QueryError) Error() string {
@ -212,6 +214,7 @@ func (dn *DSNode) Execute(ctx context.Context, now time.Time, _ mathexp.Vars, s
return mathexp.Results{}, err
}
span.SetAttributes("datasource.type", dn.datasource.Type, attribute.Key("datasource.type").String(dn.datasource.Type))
span.SetAttributes("datasource.uid", dn.datasource.UID, attribute.Key("datasource.uid").String(dn.datasource.UID))
req := &backend.QueryDataRequest{
PluginContext: pCtx,
@ -234,6 +237,11 @@ func (dn *DSNode) Execute(ctx context.Context, now time.Time, _ mathexp.Vars, s
if e != nil {
responseType = "error"
respStatus = "failure"
span.AddEvents([]string{"error", "message"},
[]tracing.EventValue{
{Str: fmt.Sprintf("%v", err)},
{Str: "failed to query data source"},
})
}
logger.Debug("Data source queried", "responseType", responseType)
useDataplane := strings.HasPrefix(responseType, "dataplane-")
@ -242,50 +250,69 @@ func (dn *DSNode) Execute(ctx context.Context, now time.Time, _ mathexp.Vars, s
resp, err := s.dataService.QueryData(ctx, req)
if err != nil {
return mathexp.Results{}, err
return mathexp.Results{}, QueryError{
RefID: dn.refID,
DatasourceUID: dn.datasource.UID,
Err: err,
}
}
dataFrames, err := getResponseFrame(resp, dn.refID)
if err != nil {
return mathexp.Results{}, QueryError{
RefID: dn.refID,
DatasourceUID: dn.datasource.UID,
Err: err,
}
}
var result mathexp.Results
responseType, result, err = queryDataResponseToResults(ctx, resp, dn.refID, dn.datasource.Type, s)
responseType, result, err = convertDataFramesToResults(ctx, dataFrames, dn.datasource.Type, s, logger)
return result, err
}
func queryDataResponseToResults(ctx context.Context, resp *backend.QueryDataResponse, refID string, datasourceType string, s *Service) (string, mathexp.Results, error) {
vals := make([]mathexp.Value, 0)
func getResponseFrame(resp *backend.QueryDataResponse, refID string) (data.Frames, error) {
response, ok := resp.Responses[refID]
if !ok {
if len(resp.Responses) > 0 {
keys := make([]string, 0, len(resp.Responses))
for refID := range resp.Responses {
keys = append(keys, refID)
}
logger.Warn("Can't find response by refID. Return nodata", "responseRefIds", keys)
// This indicates that the RefID of the request was not included to the response, i.e. some problem in the data source plugin
keys := make([]string, 0, len(resp.Responses))
for refID := range resp.Responses {
keys = append(keys, refID)
}
return "no-data", mathexp.Results{Values: mathexp.Values{mathexp.NoData{}.New()}}, nil
logger.Warn("Can't find response by refID. Return nodata", "responseRefIds", keys)
return nil, nil
}
if response.Error != nil {
return "", mathexp.Results{}, QueryError{RefID: refID, Err: response.Error}
return nil, response.Error
}
return response.Frames, nil
}
func convertDataFramesToResults(ctx context.Context, frames data.Frames, datasourceType string, s *Service, logger log.Logger) (string, mathexp.Results, error) {
if len(frames) == 0 {
return "no-data", mathexp.Results{Values: mathexp.Values{mathexp.NewNoData()}}, nil
}
vals := make([]mathexp.Value, 0)
var dt data.FrameType
dt, useDataplane, _ := shouldUseDataplane(response.Frames, logger, s.features.IsEnabled(featuremgmt.FlagDisableSSEDataplane))
dt, useDataplane, _ := shouldUseDataplane(frames, logger, s.features.IsEnabled(featuremgmt.FlagDisableSSEDataplane))
if useDataplane {
logger.Debug("Handling SSE data source query through dataplane", "datatype", dt)
result, err := handleDataplaneFrames(ctx, s.tracer, dt, response.Frames)
result, err := handleDataplaneFrames(ctx, s.tracer, dt, frames)
return fmt.Sprintf("dataplane-%s", dt), result, err
}
if isAllFrameVectors(datasourceType, response.Frames) { // Prometheus Specific Handling
vals, err := framesToNumbers(response.Frames)
if isAllFrameVectors(datasourceType, frames) { // Prometheus Specific Handling
vals, err := framesToNumbers(frames)
if err != nil {
return "", mathexp.Results{}, fmt.Errorf("failed to read frames as numbers: %w", err)
}
return "vector", mathexp.Results{Values: vals}, nil
}
if len(response.Frames) == 1 {
frame := response.Frames[0]
if len(frames) == 1 {
frame := frames[0]
// Handle Untyped NoData
if len(frame.Fields) == 0 {
return "no-data", mathexp.Results{Values: mathexp.Values{mathexp.NoData{Frame: frame}}}, nil
@ -306,7 +333,7 @@ func queryDataResponseToResults(ctx context.Context, resp *backend.QueryDataResp
}
}
for _, frame := range response.Frames {
for _, frame := range frames {
// Check for TimeSeriesTypeNot in InfluxDB queries. A data frame of this type will cause
// the WideToMany() function to error out, which results in unhealthy alerts.
// This check should be removed once inconsistencies in data source responses are solved.