mirror of https://github.com/grafana/grafana.git
				
				
				
			SSE: improve error handling in DSNode (#71468)
* add data source uid to QueryError * add error and datasource uid to tracing * split queryDataResponseToResults to two functions one to extract frame from the result, and another to convert it * propagate logger with context to convertDataFramesToResults
This commit is contained in:
		
							parent
							
								
									64aa5465ac
								
							
						
					
					
						commit
						9e76d6372c
					
				|  | @ -16,7 +16,7 @@ import ( | |||
| 	"go.opentelemetry.io/otel/attribute" | ||||
| ) | ||||
| 
 | ||||
| func shouldUseDataplane(frames data.Frames, logger *log.ConcreteLogger, disable bool) (dt data.FrameType, b bool, e error) { | ||||
| func shouldUseDataplane(frames data.Frames, logger log.Logger, disable bool) (dt data.FrameType, b bool, e error) { | ||||
| 	if disable { | ||||
| 		return | ||||
| 	} | ||||
|  |  | |||
|  | @ -15,6 +15,7 @@ import ( | |||
| 	"github.com/grafana/grafana/pkg/expr/classic" | ||||
| 	"github.com/grafana/grafana/pkg/expr/mathexp" | ||||
| 	"github.com/grafana/grafana/pkg/infra/log" | ||||
| 	"github.com/grafana/grafana/pkg/infra/tracing" | ||||
| 	"github.com/grafana/grafana/pkg/services/datasources" | ||||
| 	"github.com/grafana/grafana/pkg/services/featuremgmt" | ||||
| ) | ||||
|  | @ -24,8 +25,9 @@ var ( | |||
| ) | ||||
| 
 | ||||
| type QueryError struct { | ||||
| 	RefID string | ||||
| 	Err   error | ||||
| 	RefID         string | ||||
| 	DatasourceUID string | ||||
| 	Err           error | ||||
| } | ||||
| 
 | ||||
| func (e QueryError) Error() string { | ||||
|  | @ -212,6 +214,7 @@ func (dn *DSNode) Execute(ctx context.Context, now time.Time, _ mathexp.Vars, s | |||
| 		return mathexp.Results{}, err | ||||
| 	} | ||||
| 	span.SetAttributes("datasource.type", dn.datasource.Type, attribute.Key("datasource.type").String(dn.datasource.Type)) | ||||
| 	span.SetAttributes("datasource.uid", dn.datasource.UID, attribute.Key("datasource.uid").String(dn.datasource.UID)) | ||||
| 
 | ||||
| 	req := &backend.QueryDataRequest{ | ||||
| 		PluginContext: pCtx, | ||||
|  | @ -234,6 +237,11 @@ func (dn *DSNode) Execute(ctx context.Context, now time.Time, _ mathexp.Vars, s | |||
| 		if e != nil { | ||||
| 			responseType = "error" | ||||
| 			respStatus = "failure" | ||||
| 			span.AddEvents([]string{"error", "message"}, | ||||
| 				[]tracing.EventValue{ | ||||
| 					{Str: fmt.Sprintf("%v", err)}, | ||||
| 					{Str: "failed to query data source"}, | ||||
| 				}) | ||||
| 		} | ||||
| 		logger.Debug("Data source queried", "responseType", responseType) | ||||
| 		useDataplane := strings.HasPrefix(responseType, "dataplane-") | ||||
|  | @ -242,50 +250,69 @@ func (dn *DSNode) Execute(ctx context.Context, now time.Time, _ mathexp.Vars, s | |||
| 
 | ||||
| 	resp, err := s.dataService.QueryData(ctx, req) | ||||
| 	if err != nil { | ||||
| 		return mathexp.Results{}, err | ||||
| 		return mathexp.Results{}, QueryError{ | ||||
| 			RefID:         dn.refID, | ||||
| 			DatasourceUID: dn.datasource.UID, | ||||
| 			Err:           err, | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	dataFrames, err := getResponseFrame(resp, dn.refID) | ||||
| 	if err != nil { | ||||
| 		return mathexp.Results{}, QueryError{ | ||||
| 			RefID:         dn.refID, | ||||
| 			DatasourceUID: dn.datasource.UID, | ||||
| 			Err:           err, | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	var result mathexp.Results | ||||
| 	responseType, result, err = queryDataResponseToResults(ctx, resp, dn.refID, dn.datasource.Type, s) | ||||
| 	responseType, result, err = convertDataFramesToResults(ctx, dataFrames, dn.datasource.Type, s, logger) | ||||
| 	return result, err | ||||
| } | ||||
| 
 | ||||
| func queryDataResponseToResults(ctx context.Context, resp *backend.QueryDataResponse, refID string, datasourceType string, s *Service) (string, mathexp.Results, error) { | ||||
| 	vals := make([]mathexp.Value, 0) | ||||
| func getResponseFrame(resp *backend.QueryDataResponse, refID string) (data.Frames, error) { | ||||
| 	response, ok := resp.Responses[refID] | ||||
| 	if !ok { | ||||
| 		if len(resp.Responses) > 0 { | ||||
| 			keys := make([]string, 0, len(resp.Responses)) | ||||
| 			for refID := range resp.Responses { | ||||
| 				keys = append(keys, refID) | ||||
| 			} | ||||
| 			logger.Warn("Can't find response by refID. Return nodata", "responseRefIds", keys) | ||||
| 		// This indicates that the RefID of the request was not included to the response, i.e. some problem in the data source plugin
 | ||||
| 		keys := make([]string, 0, len(resp.Responses)) | ||||
| 		for refID := range resp.Responses { | ||||
| 			keys = append(keys, refID) | ||||
| 		} | ||||
| 		return "no-data", mathexp.Results{Values: mathexp.Values{mathexp.NoData{}.New()}}, nil | ||||
| 		logger.Warn("Can't find response by refID. Return nodata", "responseRefIds", keys) | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 
 | ||||
| 	if response.Error != nil { | ||||
| 		return "", mathexp.Results{}, QueryError{RefID: refID, Err: response.Error} | ||||
| 		return nil, response.Error | ||||
| 	} | ||||
| 	return response.Frames, nil | ||||
| } | ||||
| 
 | ||||
| func convertDataFramesToResults(ctx context.Context, frames data.Frames, datasourceType string, s *Service, logger log.Logger) (string, mathexp.Results, error) { | ||||
| 	if len(frames) == 0 { | ||||
| 		return "no-data", mathexp.Results{Values: mathexp.Values{mathexp.NewNoData()}}, nil | ||||
| 	} | ||||
| 
 | ||||
| 	vals := make([]mathexp.Value, 0) | ||||
| 	var dt data.FrameType | ||||
| 	dt, useDataplane, _ := shouldUseDataplane(response.Frames, logger, s.features.IsEnabled(featuremgmt.FlagDisableSSEDataplane)) | ||||
| 	dt, useDataplane, _ := shouldUseDataplane(frames, logger, s.features.IsEnabled(featuremgmt.FlagDisableSSEDataplane)) | ||||
| 	if useDataplane { | ||||
| 		logger.Debug("Handling SSE data source query through dataplane", "datatype", dt) | ||||
| 		result, err := handleDataplaneFrames(ctx, s.tracer, dt, response.Frames) | ||||
| 		result, err := handleDataplaneFrames(ctx, s.tracer, dt, frames) | ||||
| 		return fmt.Sprintf("dataplane-%s", dt), result, err | ||||
| 	} | ||||
| 
 | ||||
| 	if isAllFrameVectors(datasourceType, response.Frames) { // Prometheus Specific Handling
 | ||||
| 		vals, err := framesToNumbers(response.Frames) | ||||
| 	if isAllFrameVectors(datasourceType, frames) { // Prometheus Specific Handling
 | ||||
| 		vals, err := framesToNumbers(frames) | ||||
| 		if err != nil { | ||||
| 			return "", mathexp.Results{}, fmt.Errorf("failed to read frames as numbers: %w", err) | ||||
| 		} | ||||
| 		return "vector", mathexp.Results{Values: vals}, nil | ||||
| 	} | ||||
| 
 | ||||
| 	if len(response.Frames) == 1 { | ||||
| 		frame := response.Frames[0] | ||||
| 	if len(frames) == 1 { | ||||
| 		frame := frames[0] | ||||
| 		// Handle Untyped NoData
 | ||||
| 		if len(frame.Fields) == 0 { | ||||
| 			return "no-data", mathexp.Results{Values: mathexp.Values{mathexp.NoData{Frame: frame}}}, nil | ||||
|  | @ -306,7 +333,7 @@ func queryDataResponseToResults(ctx context.Context, resp *backend.QueryDataResp | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	for _, frame := range response.Frames { | ||||
| 	for _, frame := range frames { | ||||
| 		// Check for TimeSeriesTypeNot in InfluxDB queries. A data frame of this type will cause
 | ||||
| 		// the WideToMany() function to error out, which results in unhealthy alerts.
 | ||||
| 		// This check should be removed once inconsistencies in data source responses are solved.
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue