mirror of https://github.com/minio/minio.git
				
				
				
			fix: make sure to use new restClient for healthcheck (#10026)
Without instantiating a new rest client we can have a recursive error which can lead to healthcheck returning always offline, this can prematurely take the servers offline.
This commit is contained in:
		
							parent
							
								
									c2fdf73491
								
							
						
					
					
						commit
						3b9fbf80ad
					
				|  | @ -205,11 +205,7 @@ func newBootstrapRESTClients(endpointZones EndpointZones) []*bootstrapRESTClient | |||
| 
 | ||||
| 			// Only proceed for remote endpoints.
 | ||||
| 			if !endpoint.IsLocal { | ||||
| 				clnt, err := newBootstrapRESTClient(endpoint) | ||||
| 				if err != nil { | ||||
| 					continue | ||||
| 				} | ||||
| 				clnts = append(clnts, clnt) | ||||
| 				clnts = append(clnts, newBootstrapRESTClient(endpoint)) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | @ -217,7 +213,7 @@ func newBootstrapRESTClients(endpointZones EndpointZones) []*bootstrapRESTClient | |||
| } | ||||
| 
 | ||||
| // Returns a new bootstrap client.
 | ||||
| func newBootstrapRESTClient(endpoint Endpoint) (*bootstrapRESTClient, error) { | ||||
| func newBootstrapRESTClient(endpoint Endpoint) *bootstrapRESTClient { | ||||
| 	serverURL := &url.URL{ | ||||
| 		Scheme: endpoint.Scheme, | ||||
| 		Host:   endpoint.Host, | ||||
|  | @ -233,19 +229,17 @@ func newBootstrapRESTClient(endpoint Endpoint) (*bootstrapRESTClient, error) { | |||
| 	} | ||||
| 
 | ||||
| 	trFn := newCustomHTTPTransport(tlsConfig, rest.DefaultRESTTimeout) | ||||
| 	restClient, err := rest.NewClient(serverURL, trFn, newAuthToken) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	restClient := rest.NewClient(serverURL, trFn, newAuthToken) | ||||
| 	restClient.HealthCheckFn = func() bool { | ||||
| 		ctx, cancel := context.WithTimeout(GlobalContext, restClient.HealthCheckTimeout) | ||||
| 		respBody, err := restClient.CallWithContext(ctx, bootstrapRESTMethodHealth, nil, nil, -1) | ||||
| 		// Instantiate a new rest client for healthcheck
 | ||||
| 		// to avoid recursive healthCheckFn()
 | ||||
| 		respBody, err := rest.NewClient(serverURL, trFn, newAuthToken).CallWithContext(ctx, bootstrapRESTMethodHealth, nil, nil, -1) | ||||
| 		xhttp.DrainBody(respBody) | ||||
| 		cancel() | ||||
| 		var ne *rest.NetworkError | ||||
| 		return !errors.Is(err, context.DeadlineExceeded) && !errors.As(err, &ne) | ||||
| 	} | ||||
| 
 | ||||
| 	return &bootstrapRESTClient{endpoint: endpoint, restClient: restClient}, nil | ||||
| 	return &bootstrapRESTClient{endpoint: endpoint, restClient: restClient} | ||||
| } | ||||
|  |  | |||
|  | @ -462,7 +462,7 @@ func azureToObjectError(err error, params ...string) error { | |||
| 
 | ||||
| func azureCodesToObjectError(err error, serviceCode string, statusCode int, bucket string, object string) error { | ||||
| 	switch serviceCode { | ||||
| 	case "ContainerNotFound": | ||||
| 	case "ContainerNotFound", "ContainerBeingDeleted": | ||||
| 		err = minio.BucketNotFound{Bucket: bucket} | ||||
| 	case "ContainerAlreadyExists": | ||||
| 		err = minio.BucketExists{Bucket: bucket} | ||||
|  |  | |||
|  | @ -26,7 +26,6 @@ import ( | |||
| 
 | ||||
| 	"github.com/minio/minio/cmd/http" | ||||
| 	xhttp "github.com/minio/minio/cmd/http" | ||||
| 	"github.com/minio/minio/cmd/logger" | ||||
| 	"github.com/minio/minio/cmd/rest" | ||||
| 	"github.com/minio/minio/pkg/dsync" | ||||
| ) | ||||
|  | @ -155,13 +154,12 @@ func newlockRESTClient(endpoint Endpoint) *lockRESTClient { | |||
| 	} | ||||
| 
 | ||||
| 	trFn := newCustomHTTPTransport(tlsConfig, rest.DefaultRESTTimeout) | ||||
| 	restClient, err := rest.NewClient(serverURL, trFn, newAuthToken) | ||||
| 	if err != nil { | ||||
| 		logger.Fatal(err, "Unable to create lock rest client") | ||||
| 	} | ||||
| 	restClient := rest.NewClient(serverURL, trFn, newAuthToken) | ||||
| 	restClient.HealthCheckFn = func() bool { | ||||
| 		ctx, cancel := context.WithTimeout(GlobalContext, restClient.HealthCheckTimeout) | ||||
| 		respBody, err := restClient.CallWithContext(ctx, lockRESTMethodHealth, nil, nil, -1) | ||||
| 		// Instantiate a new rest client for healthcheck
 | ||||
| 		// to avoid recursive healthCheckFn()
 | ||||
| 		respBody, err := rest.NewClient(serverURL, trFn, newAuthToken).CallWithContext(ctx, lockRESTMethodHealth, nil, nil, -1) | ||||
| 		xhttp.DrainBody(respBody) | ||||
| 		cancel() | ||||
| 		var ne *rest.NetworkError | ||||
|  |  | |||
|  | @ -846,19 +846,14 @@ func newPeerRestClients(endpoints EndpointZones) []*peerRESTClient { | |||
| 	peerHosts := getRemoteHosts(endpoints) | ||||
| 	restClients := make([]*peerRESTClient, len(peerHosts)) | ||||
| 	for i, host := range peerHosts { | ||||
| 		client, err := newPeerRESTClient(host) | ||||
| 		if err != nil { | ||||
| 			logger.LogIf(GlobalContext, err) | ||||
| 			continue | ||||
| 		} | ||||
| 		restClients[i] = client | ||||
| 		restClients[i] = newPeerRESTClient(host) | ||||
| 	} | ||||
| 
 | ||||
| 	return restClients | ||||
| } | ||||
| 
 | ||||
| // Returns a peer rest client.
 | ||||
| func newPeerRESTClient(peer *xnet.Host) (*peerRESTClient, error) { | ||||
| func newPeerRESTClient(peer *xnet.Host) *peerRESTClient { | ||||
| 	scheme := "http" | ||||
| 	if globalIsSSL { | ||||
| 		scheme = "https" | ||||
|  | @ -879,20 +874,19 @@ func newPeerRESTClient(peer *xnet.Host) (*peerRESTClient, error) { | |||
| 	} | ||||
| 
 | ||||
| 	trFn := newCustomHTTPTransport(tlsConfig, rest.DefaultRESTTimeout) | ||||
| 	restClient, err := rest.NewClient(serverURL, trFn, newAuthToken) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	restClient := rest.NewClient(serverURL, trFn, newAuthToken) | ||||
| 
 | ||||
| 	// Construct a new health function.
 | ||||
| 	restClient.HealthCheckFn = func() bool { | ||||
| 		ctx, cancel := context.WithTimeout(GlobalContext, restClient.HealthCheckTimeout) | ||||
| 		respBody, err := restClient.CallWithContext(ctx, peerRESTMethodHealth, nil, nil, -1) | ||||
| 		// Instantiate a new rest client for healthcheck
 | ||||
| 		// to avoid recursive healthCheckFn()
 | ||||
| 		respBody, err := rest.NewClient(serverURL, trFn, newAuthToken).CallWithContext(ctx, peerRESTMethodHealth, nil, nil, -1) | ||||
| 		xhttp.DrainBody(respBody) | ||||
| 		cancel() | ||||
| 		var ne *rest.NetworkError | ||||
| 		return !errors.Is(err, context.DeadlineExceeded) && !errors.As(err, &ne) | ||||
| 	} | ||||
| 
 | ||||
| 	return &peerRESTClient{host: peer, restClient: restClient}, nil | ||||
| 	return &peerRESTClient{host: peer, restClient: restClient} | ||||
| } | ||||
|  |  | |||
|  | @ -158,7 +158,7 @@ func (c *Client) Close() { | |||
| } | ||||
| 
 | ||||
| // NewClient - returns new REST client.
 | ||||
| func NewClient(url *url.URL, newCustomTransport func() *http.Transport, newAuthToken func(aud string) string) (*Client, error) { | ||||
| func NewClient(url *url.URL, newCustomTransport func() *http.Transport, newAuthToken func(aud string) string) *Client { | ||||
| 	// Transport is exactly same as Go default in https://golang.org/pkg/net/http/#RoundTripper
 | ||||
| 	// except custom DialContext and TLSClientConfig.
 | ||||
| 	tr := newCustomTransport() | ||||
|  | @ -172,7 +172,7 @@ func NewClient(url *url.URL, newCustomTransport func() *http.Transport, newAuthT | |||
| 		MaxErrResponseSize:  4096, | ||||
| 		HealthCheckInterval: 200 * time.Millisecond, | ||||
| 		HealthCheckTimeout:  time.Second, | ||||
| 	}, nil | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // IsOnline returns whether the client is likely to be online.
 | ||||
|  |  | |||
|  | @ -660,15 +660,13 @@ func newStorageRESTClient(endpoint Endpoint) *storageRESTClient { | |||
| 	} | ||||
| 
 | ||||
| 	trFn := newCustomHTTPTransport(tlsConfig, rest.DefaultRESTTimeout) | ||||
| 	restClient, err := rest.NewClient(serverURL, trFn, newAuthToken) | ||||
| 	if err != nil { | ||||
| 		logger.Fatal(err, "Unable to initialize remote REST disks") | ||||
| 	} | ||||
| 
 | ||||
| 	restClient := rest.NewClient(serverURL, trFn, newAuthToken) | ||||
| 	restClient.HealthCheckInterval = 500 * time.Millisecond | ||||
| 	restClient.HealthCheckFn = func() bool { | ||||
| 		ctx, cancel := context.WithTimeout(GlobalContext, restClient.HealthCheckTimeout) | ||||
| 		respBody, err := restClient.CallWithContext(ctx, storageRESTMethodHealth, nil, nil, -1) | ||||
| 		// Instantiate a new rest client for healthcheck
 | ||||
| 		// to avoid recursive healthCheckFn()
 | ||||
| 		respBody, err := rest.NewClient(serverURL, trFn, newAuthToken).CallWithContext(ctx, storageRESTMethodHealth, nil, nil, -1) | ||||
| 		xhttp.DrainBody(respBody) | ||||
| 		cancel() | ||||
| 		return !errors.Is(err, context.DeadlineExceeded) && toStorageErr(err) != errDiskNotFound | ||||
|  |  | |||
|  | @ -66,6 +66,13 @@ import ( | |||
| 
 | ||||
| // Tests should initNSLock only once.
 | ||||
| func init() { | ||||
| 	globalActiveCred = auth.Credentials{ | ||||
| 		AccessKey: auth.DefaultAccessKey, | ||||
| 		SecretKey: auth.DefaultSecretKey, | ||||
| 	} | ||||
| 
 | ||||
| 	globalConfigEncrypted = true | ||||
| 
 | ||||
| 	// disable ENVs which interfere with tests.
 | ||||
| 	for _, env := range []string{ | ||||
| 		crypto.EnvAutoEncryptionLegacy, | ||||
|  | @ -463,13 +470,6 @@ func newTestConfig(bucketLocation string, obj ObjectLayer) (err error) { | |||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	globalActiveCred = auth.Credentials{ | ||||
| 		AccessKey: auth.DefaultAccessKey, | ||||
| 		SecretKey: auth.DefaultSecretKey, | ||||
| 	} | ||||
| 
 | ||||
| 	globalConfigEncrypted = true | ||||
| 
 | ||||
| 	// Set a default region.
 | ||||
| 	config.SetRegion(globalServerConfig, bucketLocation) | ||||
| 
 | ||||
|  |  | |||
|  | @ -19,12 +19,12 @@ package dsync | |||
| import ( | ||||
| 	"context" | ||||
| 	"errors" | ||||
| 	golog "log" | ||||
| 	"math/rand" | ||||
| 	"os" | ||||
| 	"sync" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/console" | ||||
| 	"github.com/minio/minio/pkg/retry" | ||||
| ) | ||||
| 
 | ||||
|  | @ -37,9 +37,9 @@ func init() { | |||
| 	rand.Seed(time.Now().UnixNano()) | ||||
| } | ||||
| 
 | ||||
| func log(msg ...interface{}) { | ||||
| func log(format string, data ...interface{}) { | ||||
| 	if dsyncLog { | ||||
| 		golog.Println(msg...) | ||||
| 		console.Printf(format, data...) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
|  | @ -185,7 +185,7 @@ func lock(ds *Dsync, locks *[]string, id, source string, isReadLock bool, lockNa | |||
| 
 | ||||
| 			g := Granted{index: index} | ||||
| 			if c == nil { | ||||
| 				log("lock: nil locker") | ||||
| 				log("dsync: nil locker") | ||||
| 				ch <- g | ||||
| 				return | ||||
| 			} | ||||
|  | @ -200,11 +200,11 @@ func lock(ds *Dsync, locks *[]string, id, source string, isReadLock bool, lockNa | |||
| 			var err error | ||||
| 			if isReadLock { | ||||
| 				if locked, err = c.RLock(args); err != nil { | ||||
| 					log("Unable to call RLock", err) | ||||
| 					log("dsync: Unable to call RLock failed with %s for %#v at %s\n", err, args, c) | ||||
| 				} | ||||
| 			} else { | ||||
| 				if locked, err = c.Lock(args); err != nil { | ||||
| 					log("Unable to call Lock", err) | ||||
| 					log("dsync: Unable to call Lock failed with %s for %#v at %s\n", err, args, c) | ||||
| 				} | ||||
| 			} | ||||
| 
 | ||||
|  | @ -259,10 +259,10 @@ func lock(ds *Dsync, locks *[]string, id, source string, isReadLock bool, lockNa | |||
| 				// timeout happened, maybe one of the nodes is slow, count
 | ||||
| 				// number of locks to check whether we have quorum or not
 | ||||
| 				if !quorumMet(locks, isReadLock, dquorum, dquorumReads) { | ||||
| 					log("Quorum not met after timeout") | ||||
| 					log("Quorum not met after timeout\n") | ||||
| 					releaseAll(ds, locks, isReadLock, restClnts, lockNames...) | ||||
| 				} else { | ||||
| 					log("Quorum met after timeout") | ||||
| 					log("Quorum met after timeout\n") | ||||
| 				} | ||||
| 			} | ||||
| 
 | ||||
|  | @ -402,7 +402,7 @@ func unlock(ds *Dsync, locks []string, isReadLock bool, restClnts []NetLocker, n | |||
| // sendRelease sends a release message to a node that previously granted a lock
 | ||||
| func sendRelease(ds *Dsync, c NetLocker, uid string, isReadLock bool, names ...string) { | ||||
| 	if c == nil { | ||||
| 		log("Unable to call RUnlock", errors.New("netLocker is offline")) | ||||
| 		log("Unable to call RUnlock failed with %s\n", errors.New("netLocker is offline")) | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
|  | @ -412,11 +412,11 @@ func sendRelease(ds *Dsync, c NetLocker, uid string, isReadLock bool, names ...s | |||
| 	} | ||||
| 	if isReadLock { | ||||
| 		if _, err := c.RUnlock(args); err != nil { | ||||
| 			log("Unable to call RUnlock", err) | ||||
| 			log("dsync: Unable to call RUnlock failed with %s for %#v at %s\n", err, args, c) | ||||
| 		} | ||||
| 	} else { | ||||
| 		if _, err := c.Unlock(args); err != nil { | ||||
| 			log("Unable to call Unlock", err) | ||||
| 			log("dsync: Unable to call Unlock failed with %s for %#v at %s\n", err, args, c) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue