mirror of https://github.com/grafana/grafana.git
Retry aborted transactions on Spanner. (#103289)
* Retry aborted transactions on Spanner.
This commit is contained in:
parent
f5beba1036
commit
413378dd3a
|
|
@ -2,15 +2,14 @@ package sqlstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"reflect"
|
"reflect"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mattn/go-sqlite3"
|
|
||||||
"go.opentelemetry.io/otel/attribute"
|
"go.opentelemetry.io/otel/attribute"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
"go.opentelemetry.io/otel/trace/noop"
|
"go.opentelemetry.io/otel/trace/noop"
|
||||||
|
"xorm.io/core"
|
||||||
|
|
||||||
"xorm.io/xorm"
|
"xorm.io/xorm"
|
||||||
|
|
||||||
|
|
@ -76,12 +75,12 @@ func startSessionOrUseExisting(ctx context.Context, engine *xorm.Engine, beginTr
|
||||||
// WithDbSession calls the callback with the session in the context (if exists).
|
// WithDbSession calls the callback with the session in the context (if exists).
|
||||||
// Otherwise it creates a new one that is closed upon completion.
|
// Otherwise it creates a new one that is closed upon completion.
|
||||||
// A session is stored in the context if sqlstore.InTransaction() has been previously called with the same context (and it's not committed/rolledback yet).
|
// A session is stored in the context if sqlstore.InTransaction() has been previously called with the same context (and it's not committed/rolledback yet).
|
||||||
// In case of sqlite3.ErrLocked or sqlite3.ErrBusy failure it will be retried at most five times before giving up.
|
// In case of retryable errors, callback will be retried at most five times before giving up.
|
||||||
func (ss *SQLStore) WithDbSession(ctx context.Context, callback DBTransactionFunc) error {
|
func (ss *SQLStore) WithDbSession(ctx context.Context, callback DBTransactionFunc) error {
|
||||||
return ss.withDbSession(ctx, ss.engine, callback)
|
return ss.withDbSession(ctx, ss.engine, callback)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ss *SQLStore) retryOnLocks(ctx context.Context, callback DBTransactionFunc, sess *DBSession, retry int) func() (retryer.RetrySignal, error) {
|
func (ss *SQLStore) retryOnLocks(ctx context.Context, callback DBTransactionFunc, sess *DBSession, retry int, dialect core.Dialect) func() (retryer.RetrySignal, error) {
|
||||||
return func() (retryer.RetrySignal, error) {
|
return func() (retryer.RetrySignal, error) {
|
||||||
retry++
|
retry++
|
||||||
|
|
||||||
|
|
@ -89,9 +88,8 @@ func (ss *SQLStore) retryOnLocks(ctx context.Context, callback DBTransactionFunc
|
||||||
|
|
||||||
ctxLogger := tsclogger.FromContext(ctx)
|
ctxLogger := tsclogger.FromContext(ctx)
|
||||||
|
|
||||||
var sqlError sqlite3.Error
|
if r, ok := dialect.(xorm.DialectWithRetryableErrors); ok && r.RetryOnError(err) {
|
||||||
if errors.As(err, &sqlError) && (sqlError.Code == sqlite3.ErrLocked || sqlError.Code == sqlite3.ErrBusy) {
|
ctxLogger.Info("Database locked, sleeping then retrying", "error", err, "retry", retry, "code")
|
||||||
ctxLogger.Info("Database locked, sleeping then retrying", "error", err, "retry", retry, "code", sqlError.Code)
|
|
||||||
// retryer immediately returns the error (if there is one) without checking the response
|
// retryer immediately returns the error (if there is one) without checking the response
|
||||||
// therefore we only have to send it if we have reached the maximum retries
|
// therefore we only have to send it if we have reached the maximum retries
|
||||||
if retry >= ss.dbCfg.QueryRetries {
|
if retry >= ss.dbCfg.QueryRetries {
|
||||||
|
|
@ -120,7 +118,7 @@ func (ss *SQLStore) withDbSession(ctx context.Context, engine *xorm.Engine, call
|
||||||
defer sess.Close()
|
defer sess.Close()
|
||||||
}
|
}
|
||||||
retry := 0
|
retry := 0
|
||||||
return retryer.Retry(ss.retryOnLocks(ctx, callback, sess, retry), ss.dbCfg.QueryRetries, time.Millisecond*time.Duration(10), time.Second)
|
return retryer.Retry(ss.retryOnLocks(ctx, callback, sess, retry, engine.Dialect()), ss.dbCfg.QueryRetries, time.Millisecond*time.Duration(10), time.Second)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sess *DBSession) InsertId(bean any, dialect migrator.Dialect) error {
|
func (sess *DBSession) InsertId(bean any, dialect migrator.Dialect) error {
|
||||||
|
|
|
||||||
|
|
@ -7,12 +7,17 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/mattn/go-sqlite3"
|
"github.com/mattn/go-sqlite3"
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
"google.golang.org/grpc/codes"
|
||||||
|
grpcstatus "google.golang.org/grpc/status"
|
||||||
|
|
||||||
|
"github.com/grafana/grafana/pkg/services/sqlstore/migrator"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestRetryingDisabled(t *testing.T) {
|
func TestIntegration_RetryingDisabled(t *testing.T) {
|
||||||
store, _ := InitTestDB(t)
|
store, _ := InitTestDB(t)
|
||||||
|
retryErrors := getRetryErrors(t, store)
|
||||||
|
|
||||||
require.Equal(t, 0, store.dbCfg.QueryRetries)
|
require.Equal(t, 0, store.dbCfg.QueryRetries)
|
||||||
|
|
||||||
funcToTest := map[string]func(ctx context.Context, callback DBTransactionFunc) error{
|
funcToTest := map[string]func(ctx context.Context, callback DBTransactionFunc) error{
|
||||||
|
|
@ -31,20 +36,16 @@ func TestRetryingDisabled(t *testing.T) {
|
||||||
require.Equal(t, 1, i)
|
require.Equal(t, 1, i)
|
||||||
})
|
})
|
||||||
|
|
||||||
errCodes := []sqlite3.ErrNo{sqlite3.ErrBusy, sqlite3.ErrLocked}
|
for _, e := range retryErrors {
|
||||||
for _, c := range errCodes {
|
t.Run(fmt.Sprintf("%s should return the sqlite3.Error %v immediately", name, e), func(t *testing.T) {
|
||||||
t.Run(fmt.Sprintf("%s should return the sqlite3.Error %v immediately", name, c.Error()), func(t *testing.T) {
|
|
||||||
i := 0
|
i := 0
|
||||||
callback := func(sess *DBSession) error {
|
callback := func(sess *DBSession) error {
|
||||||
i++
|
i++
|
||||||
return sqlite3.Error{Code: c}
|
return e
|
||||||
}
|
}
|
||||||
err := f(context.Background(), callback)
|
err := f(context.Background(), callback)
|
||||||
require.Error(t, err)
|
require.Error(t, err)
|
||||||
var driverErr sqlite3.Error
|
|
||||||
require.ErrorAs(t, err, &driverErr)
|
|
||||||
require.Equal(t, 1, i)
|
require.Equal(t, 1, i)
|
||||||
assert.Equal(t, c, driverErr.Code)
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -61,8 +62,9 @@ func TestRetryingDisabled(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRetryingOnFailures(t *testing.T) {
|
func TestIntegration_RetryingOnFailures(t *testing.T) {
|
||||||
store, _ := InitTestDB(t)
|
store, _ := InitTestDB(t)
|
||||||
|
retryErrors := getRetryErrors(t, store)
|
||||||
store.dbCfg.QueryRetries = 5
|
store.dbCfg.QueryRetries = 5
|
||||||
|
|
||||||
funcToTest := map[string]func(ctx context.Context, callback DBTransactionFunc) error{
|
funcToTest := map[string]func(ctx context.Context, callback DBTransactionFunc) error{
|
||||||
|
|
@ -81,20 +83,16 @@ func TestRetryingOnFailures(t *testing.T) {
|
||||||
require.Equal(t, 1, i)
|
require.Equal(t, 1, i)
|
||||||
})
|
})
|
||||||
|
|
||||||
errCodes := []sqlite3.ErrNo{sqlite3.ErrBusy, sqlite3.ErrLocked}
|
for _, e := range retryErrors {
|
||||||
for _, c := range errCodes {
|
t.Run(fmt.Sprintf("%s should return the error %v if all retries have failed", name, e), func(t *testing.T) {
|
||||||
t.Run(fmt.Sprintf("%s should return the sqlite3.Error %v if all retries have failed", name, c.Error()), func(t *testing.T) {
|
|
||||||
i := 0
|
i := 0
|
||||||
callback := func(sess *DBSession) error {
|
callback := func(sess *DBSession) error {
|
||||||
i++
|
i++
|
||||||
return sqlite3.Error{Code: c}
|
return e
|
||||||
}
|
}
|
||||||
err := f(context.Background(), callback)
|
err := f(context.Background(), callback)
|
||||||
require.Error(t, err)
|
require.Error(t, err)
|
||||||
var driverErr sqlite3.Error
|
|
||||||
require.ErrorAs(t, err, &driverErr)
|
|
||||||
require.Equal(t, store.dbCfg.QueryRetries, i)
|
require.Equal(t, store.dbCfg.QueryRetries, i)
|
||||||
assert.Equal(t, c, driverErr.Code)
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -107,7 +105,7 @@ func TestRetryingOnFailures(t *testing.T) {
|
||||||
case store.dbCfg.QueryRetries == i:
|
case store.dbCfg.QueryRetries == i:
|
||||||
err = nil
|
err = nil
|
||||||
default:
|
default:
|
||||||
err = sqlite3.Error{Code: sqlite3.ErrBusy}
|
err = retryErrors[0]
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
@ -137,3 +135,18 @@ func TestRetryingOnFailures(t *testing.T) {
|
||||||
require.Equal(t, int64(4), val3)
|
require.Equal(t, int64(4), val3)
|
||||||
require.False(t, rows.Next()) // no more rows
|
require.False(t, rows.Next()) // no more rows
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getRetryErrors(t *testing.T, store *SQLStore) []error {
|
||||||
|
var retryErrors []error
|
||||||
|
switch store.GetDialect().DriverName() {
|
||||||
|
case migrator.SQLite:
|
||||||
|
retryErrors = []error{sqlite3.Error{Code: sqlite3.ErrBusy}, sqlite3.Error{Code: sqlite3.ErrLocked}}
|
||||||
|
case migrator.Spanner:
|
||||||
|
retryErrors = []error{grpcstatus.Error(codes.Aborted, "aborted transaction")}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(retryErrors) == 0 {
|
||||||
|
t.Skip("This test only works with sqlite or spanner")
|
||||||
|
}
|
||||||
|
return retryErrors
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,9 @@ package sqlstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mattn/go-sqlite3"
|
|
||||||
"xorm.io/xorm"
|
"xorm.io/xorm"
|
||||||
|
|
||||||
"github.com/grafana/grafana/pkg/bus"
|
"github.com/grafana/grafana/pkg/bus"
|
||||||
|
|
@ -63,16 +61,17 @@ func (ss *SQLStore) inTransactionWithRetryCtx(ctx context.Context, engine *xorm.
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// special handling of database locked errors for sqlite, then we can retry 5 times
|
// special handling of database locked errors for sqlite and spanner, then we can retry 5 times
|
||||||
var sqlError sqlite3.Error
|
if r, ok := engine.Dialect().(xorm.DialectWithRetryableErrors); ok {
|
||||||
if errors.As(err, &sqlError) && retry < ss.dbCfg.TransactionRetries && (sqlError.Code == sqlite3.ErrLocked || sqlError.Code == sqlite3.ErrBusy) {
|
if retry < ss.dbCfg.TransactionRetries && r.RetryOnError(err) {
|
||||||
if rollErr := sess.Rollback(); rollErr != nil {
|
if rollErr := sess.Rollback(); rollErr != nil {
|
||||||
return fmt.Errorf("rolling back transaction due to error failed: %s: %w", rollErr, err)
|
return fmt.Errorf("rolling back transaction due to error failed: %s: %w", rollErr, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(time.Millisecond * time.Duration(10))
|
time.Sleep(time.Millisecond * time.Duration(10))
|
||||||
ctxLogger.Info("Database locked, sleeping then retrying", "error", err, "retry", retry, "code", sqlError.Code)
|
ctxLogger.Info("Database locked, sleeping then retrying", "error", err, "retry", retry)
|
||||||
return ss.inTransactionWithRetryCtx(ctx, engine, bus, callback, retry+1)
|
return ss.inTransactionWithRetryCtx(ctx, engine, bus, callback, retry+1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
|
|
@ -8,10 +8,12 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
spannerclient "cloud.google.com/go/spanner"
|
||||||
_ "github.com/googleapis/go-sql-spanner"
|
_ "github.com/googleapis/go-sql-spanner"
|
||||||
spannerdriver "github.com/googleapis/go-sql-spanner"
|
spannerdriver "github.com/googleapis/go-sql-spanner"
|
||||||
"google.golang.org/api/option"
|
"google.golang.org/api/option"
|
||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
|
"google.golang.org/grpc/codes"
|
||||||
"google.golang.org/grpc/credentials/insecure"
|
"google.golang.org/grpc/credentials/insecure"
|
||||||
"xorm.io/core"
|
"xorm.io/core"
|
||||||
)
|
)
|
||||||
|
|
@ -425,3 +427,7 @@ func SpannerConnectorConfigToClientOptions(connectorConfig spannerdriver.Connect
|
||||||
}
|
}
|
||||||
return opts
|
return opts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *spanner) RetryOnError(err error) bool {
|
||||||
|
return err != nil && spannerclient.ErrCode(spannerclient.ToSpannerError(err)) == codes.Aborted
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
sqlite "github.com/mattn/go-sqlite3"
|
||||||
"xorm.io/core"
|
"xorm.io/core"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -474,6 +475,14 @@ func (db *sqlite3) Filters() []core.Filter {
|
||||||
return []core.Filter{&core.IdFilter{}}
|
return []core.Filter{&core.IdFilter{}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (db *sqlite3) RetryOnError(err error) bool {
|
||||||
|
var sqlError sqlite.Error
|
||||||
|
if errors.As(err, &sqlError) && (sqlError.Code == sqlite.ErrLocked || sqlError.Code == sqlite.ErrBusy) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
type sqlite3Driver struct {
|
type sqlite3Driver struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,7 @@ func NewEngine(driverName string, dataSourceName string) (*Engine, error) {
|
||||||
|
|
||||||
runtime.SetFinalizer(engine, close)
|
runtime.SetFinalizer(engine, close)
|
||||||
|
|
||||||
if ext, ok := dialect.(DialectExt); ok {
|
if ext, ok := dialect.(DialectWithSequenceGenerator); ok {
|
||||||
engine.sequenceGenerator, err = ext.CreateSequenceGenerator(db.DB)
|
engine.sequenceGenerator, err = ext.CreateSequenceGenerator(db.DB)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create sequence generator: %w", err)
|
return nil, fmt.Errorf("failed to create sequence generator: %w", err)
|
||||||
|
|
@ -138,9 +138,14 @@ type SequenceGenerator interface {
|
||||||
Reset()
|
Reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
type DialectExt interface {
|
type DialectWithSequenceGenerator interface {
|
||||||
core.Dialect
|
core.Dialect
|
||||||
|
|
||||||
// CreateSequenceGenerator returns optional generator used to create AUTOINCREMENT ids for inserts.
|
// CreateSequenceGenerator returns optional generator used to create AUTOINCREMENT ids for inserts.
|
||||||
CreateSequenceGenerator(db *sql.DB) (SequenceGenerator, error)
|
CreateSequenceGenerator(db *sql.DB) (SequenceGenerator, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type DialectWithRetryableErrors interface {
|
||||||
|
core.Dialect
|
||||||
|
RetryOnError(err error) bool
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue