mirror of https://github.com/grafana/grafana.git
				
				
				
			Retry aborted transactions on Spanner. (#103289)
* Retry aborted transactions on Spanner.
This commit is contained in:
		
							parent
							
								
									f5beba1036
								
							
						
					
					
						commit
						413378dd3a
					
				|  | @ -2,15 +2,14 @@ package sqlstore | ||||||
| 
 | 
 | ||||||
| import ( | import ( | ||||||
| 	"context" | 	"context" | ||||||
| 	"errors" |  | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"reflect" | 	"reflect" | ||||||
| 	"time" | 	"time" | ||||||
| 
 | 
 | ||||||
| 	"github.com/mattn/go-sqlite3" |  | ||||||
| 	"go.opentelemetry.io/otel/attribute" | 	"go.opentelemetry.io/otel/attribute" | ||||||
| 	"go.opentelemetry.io/otel/trace" | 	"go.opentelemetry.io/otel/trace" | ||||||
| 	"go.opentelemetry.io/otel/trace/noop" | 	"go.opentelemetry.io/otel/trace/noop" | ||||||
|  | 	"xorm.io/core" | ||||||
| 
 | 
 | ||||||
| 	"xorm.io/xorm" | 	"xorm.io/xorm" | ||||||
| 
 | 
 | ||||||
|  | @ -76,12 +75,12 @@ func startSessionOrUseExisting(ctx context.Context, engine *xorm.Engine, beginTr | ||||||
| // WithDbSession calls the callback with the session in the context (if exists).
 | // WithDbSession calls the callback with the session in the context (if exists).
 | ||||||
| // Otherwise it creates a new one that is closed upon completion.
 | // Otherwise it creates a new one that is closed upon completion.
 | ||||||
| // A session is stored in the context if sqlstore.InTransaction() has been previously called with the same context (and it's not committed/rolledback yet).
 | // A session is stored in the context if sqlstore.InTransaction() has been previously called with the same context (and it's not committed/rolledback yet).
 | ||||||
| // In case of sqlite3.ErrLocked or sqlite3.ErrBusy failure it will be retried at most five times before giving up.
 | // In case of retryable errors, callback will be retried at most five times before giving up.
 | ||||||
| func (ss *SQLStore) WithDbSession(ctx context.Context, callback DBTransactionFunc) error { | func (ss *SQLStore) WithDbSession(ctx context.Context, callback DBTransactionFunc) error { | ||||||
| 	return ss.withDbSession(ctx, ss.engine, callback) | 	return ss.withDbSession(ctx, ss.engine, callback) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (ss *SQLStore) retryOnLocks(ctx context.Context, callback DBTransactionFunc, sess *DBSession, retry int) func() (retryer.RetrySignal, error) { | func (ss *SQLStore) retryOnLocks(ctx context.Context, callback DBTransactionFunc, sess *DBSession, retry int, dialect core.Dialect) func() (retryer.RetrySignal, error) { | ||||||
| 	return func() (retryer.RetrySignal, error) { | 	return func() (retryer.RetrySignal, error) { | ||||||
| 		retry++ | 		retry++ | ||||||
| 
 | 
 | ||||||
|  | @ -89,9 +88,8 @@ func (ss *SQLStore) retryOnLocks(ctx context.Context, callback DBTransactionFunc | ||||||
| 
 | 
 | ||||||
| 		ctxLogger := tsclogger.FromContext(ctx) | 		ctxLogger := tsclogger.FromContext(ctx) | ||||||
| 
 | 
 | ||||||
| 		var sqlError sqlite3.Error | 		if r, ok := dialect.(xorm.DialectWithRetryableErrors); ok && r.RetryOnError(err) { | ||||||
| 		if errors.As(err, &sqlError) && (sqlError.Code == sqlite3.ErrLocked || sqlError.Code == sqlite3.ErrBusy) { | 			ctxLogger.Info("Database locked, sleeping then retrying", "error", err, "retry", retry, "code") | ||||||
| 			ctxLogger.Info("Database locked, sleeping then retrying", "error", err, "retry", retry, "code", sqlError.Code) |  | ||||||
| 			// retryer immediately returns the error (if there is one) without checking the response
 | 			// retryer immediately returns the error (if there is one) without checking the response
 | ||||||
| 			// therefore we only have to send it if we have reached the maximum retries
 | 			// therefore we only have to send it if we have reached the maximum retries
 | ||||||
| 			if retry >= ss.dbCfg.QueryRetries { | 			if retry >= ss.dbCfg.QueryRetries { | ||||||
|  | @ -120,7 +118,7 @@ func (ss *SQLStore) withDbSession(ctx context.Context, engine *xorm.Engine, call | ||||||
| 		defer sess.Close() | 		defer sess.Close() | ||||||
| 	} | 	} | ||||||
| 	retry := 0 | 	retry := 0 | ||||||
| 	return retryer.Retry(ss.retryOnLocks(ctx, callback, sess, retry), ss.dbCfg.QueryRetries, time.Millisecond*time.Duration(10), time.Second) | 	return retryer.Retry(ss.retryOnLocks(ctx, callback, sess, retry, engine.Dialect()), ss.dbCfg.QueryRetries, time.Millisecond*time.Duration(10), time.Second) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (sess *DBSession) InsertId(bean any, dialect migrator.Dialect) error { | func (sess *DBSession) InsertId(bean any, dialect migrator.Dialect) error { | ||||||
|  |  | ||||||
|  | @ -7,12 +7,17 @@ import ( | ||||||
| 	"testing" | 	"testing" | ||||||
| 
 | 
 | ||||||
| 	"github.com/mattn/go-sqlite3" | 	"github.com/mattn/go-sqlite3" | ||||||
| 	"github.com/stretchr/testify/assert" |  | ||||||
| 	"github.com/stretchr/testify/require" | 	"github.com/stretchr/testify/require" | ||||||
|  | 	"google.golang.org/grpc/codes" | ||||||
|  | 	grpcstatus "google.golang.org/grpc/status" | ||||||
|  | 
 | ||||||
|  | 	"github.com/grafana/grafana/pkg/services/sqlstore/migrator" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| func TestRetryingDisabled(t *testing.T) { | func TestIntegration_RetryingDisabled(t *testing.T) { | ||||||
| 	store, _ := InitTestDB(t) | 	store, _ := InitTestDB(t) | ||||||
|  | 	retryErrors := getRetryErrors(t, store) | ||||||
|  | 
 | ||||||
| 	require.Equal(t, 0, store.dbCfg.QueryRetries) | 	require.Equal(t, 0, store.dbCfg.QueryRetries) | ||||||
| 
 | 
 | ||||||
| 	funcToTest := map[string]func(ctx context.Context, callback DBTransactionFunc) error{ | 	funcToTest := map[string]func(ctx context.Context, callback DBTransactionFunc) error{ | ||||||
|  | @ -31,20 +36,16 @@ func TestRetryingDisabled(t *testing.T) { | ||||||
| 			require.Equal(t, 1, i) | 			require.Equal(t, 1, i) | ||||||
| 		}) | 		}) | ||||||
| 
 | 
 | ||||||
| 		errCodes := []sqlite3.ErrNo{sqlite3.ErrBusy, sqlite3.ErrLocked} | 		for _, e := range retryErrors { | ||||||
| 		for _, c := range errCodes { | 			t.Run(fmt.Sprintf("%s should return the sqlite3.Error %v immediately", name, e), func(t *testing.T) { | ||||||
| 			t.Run(fmt.Sprintf("%s should return the sqlite3.Error %v immediately", name, c.Error()), func(t *testing.T) { |  | ||||||
| 				i := 0 | 				i := 0 | ||||||
| 				callback := func(sess *DBSession) error { | 				callback := func(sess *DBSession) error { | ||||||
| 					i++ | 					i++ | ||||||
| 					return sqlite3.Error{Code: c} | 					return e | ||||||
| 				} | 				} | ||||||
| 				err := f(context.Background(), callback) | 				err := f(context.Background(), callback) | ||||||
| 				require.Error(t, err) | 				require.Error(t, err) | ||||||
| 				var driverErr sqlite3.Error |  | ||||||
| 				require.ErrorAs(t, err, &driverErr) |  | ||||||
| 				require.Equal(t, 1, i) | 				require.Equal(t, 1, i) | ||||||
| 				assert.Equal(t, c, driverErr.Code) |  | ||||||
| 			}) | 			}) | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | @ -61,8 +62,9 @@ func TestRetryingDisabled(t *testing.T) { | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func TestRetryingOnFailures(t *testing.T) { | func TestIntegration_RetryingOnFailures(t *testing.T) { | ||||||
| 	store, _ := InitTestDB(t) | 	store, _ := InitTestDB(t) | ||||||
|  | 	retryErrors := getRetryErrors(t, store) | ||||||
| 	store.dbCfg.QueryRetries = 5 | 	store.dbCfg.QueryRetries = 5 | ||||||
| 
 | 
 | ||||||
| 	funcToTest := map[string]func(ctx context.Context, callback DBTransactionFunc) error{ | 	funcToTest := map[string]func(ctx context.Context, callback DBTransactionFunc) error{ | ||||||
|  | @ -81,20 +83,16 @@ func TestRetryingOnFailures(t *testing.T) { | ||||||
| 			require.Equal(t, 1, i) | 			require.Equal(t, 1, i) | ||||||
| 		}) | 		}) | ||||||
| 
 | 
 | ||||||
| 		errCodes := []sqlite3.ErrNo{sqlite3.ErrBusy, sqlite3.ErrLocked} | 		for _, e := range retryErrors { | ||||||
| 		for _, c := range errCodes { | 			t.Run(fmt.Sprintf("%s should return the error %v if all retries have failed", name, e), func(t *testing.T) { | ||||||
| 			t.Run(fmt.Sprintf("%s should return the sqlite3.Error %v if all retries have failed", name, c.Error()), func(t *testing.T) { |  | ||||||
| 				i := 0 | 				i := 0 | ||||||
| 				callback := func(sess *DBSession) error { | 				callback := func(sess *DBSession) error { | ||||||
| 					i++ | 					i++ | ||||||
| 					return sqlite3.Error{Code: c} | 					return e | ||||||
| 				} | 				} | ||||||
| 				err := f(context.Background(), callback) | 				err := f(context.Background(), callback) | ||||||
| 				require.Error(t, err) | 				require.Error(t, err) | ||||||
| 				var driverErr sqlite3.Error |  | ||||||
| 				require.ErrorAs(t, err, &driverErr) |  | ||||||
| 				require.Equal(t, store.dbCfg.QueryRetries, i) | 				require.Equal(t, store.dbCfg.QueryRetries, i) | ||||||
| 				assert.Equal(t, c, driverErr.Code) |  | ||||||
| 			}) | 			}) | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | @ -107,7 +105,7 @@ func TestRetryingOnFailures(t *testing.T) { | ||||||
| 				case store.dbCfg.QueryRetries == i: | 				case store.dbCfg.QueryRetries == i: | ||||||
| 					err = nil | 					err = nil | ||||||
| 				default: | 				default: | ||||||
| 					err = sqlite3.Error{Code: sqlite3.ErrBusy} | 					err = retryErrors[0] | ||||||
| 				} | 				} | ||||||
| 				return err | 				return err | ||||||
| 			} | 			} | ||||||
|  | @ -137,3 +135,18 @@ func TestRetryingOnFailures(t *testing.T) { | ||||||
| 	require.Equal(t, int64(4), val3) | 	require.Equal(t, int64(4), val3) | ||||||
| 	require.False(t, rows.Next()) // no more rows
 | 	require.False(t, rows.Next()) // no more rows
 | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | func getRetryErrors(t *testing.T, store *SQLStore) []error { | ||||||
|  | 	var retryErrors []error | ||||||
|  | 	switch store.GetDialect().DriverName() { | ||||||
|  | 	case migrator.SQLite: | ||||||
|  | 		retryErrors = []error{sqlite3.Error{Code: sqlite3.ErrBusy}, sqlite3.Error{Code: sqlite3.ErrLocked}} | ||||||
|  | 	case migrator.Spanner: | ||||||
|  | 		retryErrors = []error{grpcstatus.Error(codes.Aborted, "aborted transaction")} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if len(retryErrors) == 0 { | ||||||
|  | 		t.Skip("This test only works with sqlite or spanner") | ||||||
|  | 	} | ||||||
|  | 	return retryErrors | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -2,11 +2,9 @@ package sqlstore | ||||||
| 
 | 
 | ||||||
| import ( | import ( | ||||||
| 	"context" | 	"context" | ||||||
| 	"errors" |  | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"time" | 	"time" | ||||||
| 
 | 
 | ||||||
| 	"github.com/mattn/go-sqlite3" |  | ||||||
| 	"xorm.io/xorm" | 	"xorm.io/xorm" | ||||||
| 
 | 
 | ||||||
| 	"github.com/grafana/grafana/pkg/bus" | 	"github.com/grafana/grafana/pkg/bus" | ||||||
|  | @ -63,16 +61,17 @@ func (ss *SQLStore) inTransactionWithRetryCtx(ctx context.Context, engine *xorm. | ||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// special handling of database locked errors for sqlite, then we can retry 5 times
 | 	// special handling of database locked errors for sqlite and spanner, then we can retry 5 times
 | ||||||
| 	var sqlError sqlite3.Error | 	if r, ok := engine.Dialect().(xorm.DialectWithRetryableErrors); ok { | ||||||
| 	if errors.As(err, &sqlError) && retry < ss.dbCfg.TransactionRetries && (sqlError.Code == sqlite3.ErrLocked || sqlError.Code == sqlite3.ErrBusy) { | 		if retry < ss.dbCfg.TransactionRetries && r.RetryOnError(err) { | ||||||
| 		if rollErr := sess.Rollback(); rollErr != nil { | 			if rollErr := sess.Rollback(); rollErr != nil { | ||||||
| 			return fmt.Errorf("rolling back transaction due to error failed: %s: %w", rollErr, err) | 				return fmt.Errorf("rolling back transaction due to error failed: %s: %w", rollErr, err) | ||||||
| 		} | 			} | ||||||
| 
 | 
 | ||||||
| 		time.Sleep(time.Millisecond * time.Duration(10)) | 			time.Sleep(time.Millisecond * time.Duration(10)) | ||||||
| 		ctxLogger.Info("Database locked, sleeping then retrying", "error", err, "retry", retry, "code", sqlError.Code) | 			ctxLogger.Info("Database locked, sleeping then retrying", "error", err, "retry", retry) | ||||||
| 		return ss.inTransactionWithRetryCtx(ctx, engine, bus, callback, retry+1) | 			return ss.inTransactionWithRetryCtx(ctx, engine, bus, callback, retry+1) | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
|  |  | ||||||
|  | @ -8,10 +8,12 @@ import ( | ||||||
| 	"strconv" | 	"strconv" | ||||||
| 	"strings" | 	"strings" | ||||||
| 
 | 
 | ||||||
|  | 	spannerclient "cloud.google.com/go/spanner" | ||||||
| 	_ "github.com/googleapis/go-sql-spanner" | 	_ "github.com/googleapis/go-sql-spanner" | ||||||
| 	spannerdriver "github.com/googleapis/go-sql-spanner" | 	spannerdriver "github.com/googleapis/go-sql-spanner" | ||||||
| 	"google.golang.org/api/option" | 	"google.golang.org/api/option" | ||||||
| 	"google.golang.org/grpc" | 	"google.golang.org/grpc" | ||||||
|  | 	"google.golang.org/grpc/codes" | ||||||
| 	"google.golang.org/grpc/credentials/insecure" | 	"google.golang.org/grpc/credentials/insecure" | ||||||
| 	"xorm.io/core" | 	"xorm.io/core" | ||||||
| ) | ) | ||||||
|  | @ -425,3 +427,7 @@ func SpannerConnectorConfigToClientOptions(connectorConfig spannerdriver.Connect | ||||||
| 	} | 	} | ||||||
| 	return opts | 	return opts | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | func (s *spanner) RetryOnError(err error) bool { | ||||||
|  | 	return err != nil && spannerclient.ErrCode(spannerclient.ToSpannerError(err)) == codes.Aborted | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -11,6 +11,7 @@ import ( | ||||||
| 	"regexp" | 	"regexp" | ||||||
| 	"strings" | 	"strings" | ||||||
| 
 | 
 | ||||||
|  | 	sqlite "github.com/mattn/go-sqlite3" | ||||||
| 	"xorm.io/core" | 	"xorm.io/core" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | @ -474,6 +475,14 @@ func (db *sqlite3) Filters() []core.Filter { | ||||||
| 	return []core.Filter{&core.IdFilter{}} | 	return []core.Filter{&core.IdFilter{}} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | func (db *sqlite3) RetryOnError(err error) bool { | ||||||
|  | 	var sqlError sqlite.Error | ||||||
|  | 	if errors.As(err, &sqlError) && (sqlError.Code == sqlite.ErrLocked || sqlError.Code == sqlite.ErrBusy) { | ||||||
|  | 		return true | ||||||
|  | 	} | ||||||
|  | 	return false | ||||||
|  | } | ||||||
|  | 
 | ||||||
| type sqlite3Driver struct { | type sqlite3Driver struct { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -117,7 +117,7 @@ func NewEngine(driverName string, dataSourceName string) (*Engine, error) { | ||||||
| 
 | 
 | ||||||
| 	runtime.SetFinalizer(engine, close) | 	runtime.SetFinalizer(engine, close) | ||||||
| 
 | 
 | ||||||
| 	if ext, ok := dialect.(DialectExt); ok { | 	if ext, ok := dialect.(DialectWithSequenceGenerator); ok { | ||||||
| 		engine.sequenceGenerator, err = ext.CreateSequenceGenerator(db.DB) | 		engine.sequenceGenerator, err = ext.CreateSequenceGenerator(db.DB) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, fmt.Errorf("failed to create sequence generator: %w", err) | 			return nil, fmt.Errorf("failed to create sequence generator: %w", err) | ||||||
|  | @ -138,9 +138,14 @@ type SequenceGenerator interface { | ||||||
| 	Reset() | 	Reset() | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| type DialectExt interface { | type DialectWithSequenceGenerator interface { | ||||||
| 	core.Dialect | 	core.Dialect | ||||||
| 
 | 
 | ||||||
| 	// CreateSequenceGenerator returns optional generator used to create AUTOINCREMENT ids for inserts.
 | 	// CreateSequenceGenerator returns optional generator used to create AUTOINCREMENT ids for inserts.
 | ||||||
| 	CreateSequenceGenerator(db *sql.DB) (SequenceGenerator, error) | 	CreateSequenceGenerator(db *sql.DB) (SequenceGenerator, error) | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | type DialectWithRetryableErrors interface { | ||||||
|  | 	core.Dialect | ||||||
|  | 	RetryOnError(err error) bool | ||||||
|  | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue