mirror of https://github.com/minio/minio.git
				
				
				
			fix: make sure to use new restClient for healthcheck (#10026)
Without instantiating a new rest client we can have a recursive error which can lead to healthcheck returning always offline, this can prematurely take the servers offline.
This commit is contained in:
		
							parent
							
								
									c2fdf73491
								
							
						
					
					
						commit
						3b9fbf80ad
					
				| 
						 | 
				
			
			@ -205,11 +205,7 @@ func newBootstrapRESTClients(endpointZones EndpointZones) []*bootstrapRESTClient
 | 
			
		|||
 | 
			
		||||
			// Only proceed for remote endpoints.
 | 
			
		||||
			if !endpoint.IsLocal {
 | 
			
		||||
				clnt, err := newBootstrapRESTClient(endpoint)
 | 
			
		||||
				if err != nil {
 | 
			
		||||
					continue
 | 
			
		||||
				}
 | 
			
		||||
				clnts = append(clnts, clnt)
 | 
			
		||||
				clnts = append(clnts, newBootstrapRESTClient(endpoint))
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -217,7 +213,7 @@ func newBootstrapRESTClients(endpointZones EndpointZones) []*bootstrapRESTClient
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
// Returns a new bootstrap client.
 | 
			
		||||
func newBootstrapRESTClient(endpoint Endpoint) (*bootstrapRESTClient, error) {
 | 
			
		||||
func newBootstrapRESTClient(endpoint Endpoint) *bootstrapRESTClient {
 | 
			
		||||
	serverURL := &url.URL{
 | 
			
		||||
		Scheme: endpoint.Scheme,
 | 
			
		||||
		Host:   endpoint.Host,
 | 
			
		||||
| 
						 | 
				
			
			@ -233,19 +229,17 @@ func newBootstrapRESTClient(endpoint Endpoint) (*bootstrapRESTClient, error) {
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	trFn := newCustomHTTPTransport(tlsConfig, rest.DefaultRESTTimeout)
 | 
			
		||||
	restClient, err := rest.NewClient(serverURL, trFn, newAuthToken)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	restClient := rest.NewClient(serverURL, trFn, newAuthToken)
 | 
			
		||||
	restClient.HealthCheckFn = func() bool {
 | 
			
		||||
		ctx, cancel := context.WithTimeout(GlobalContext, restClient.HealthCheckTimeout)
 | 
			
		||||
		respBody, err := restClient.CallWithContext(ctx, bootstrapRESTMethodHealth, nil, nil, -1)
 | 
			
		||||
		// Instantiate a new rest client for healthcheck
 | 
			
		||||
		// to avoid recursive healthCheckFn()
 | 
			
		||||
		respBody, err := rest.NewClient(serverURL, trFn, newAuthToken).CallWithContext(ctx, bootstrapRESTMethodHealth, nil, nil, -1)
 | 
			
		||||
		xhttp.DrainBody(respBody)
 | 
			
		||||
		cancel()
 | 
			
		||||
		var ne *rest.NetworkError
 | 
			
		||||
		return !errors.Is(err, context.DeadlineExceeded) && !errors.As(err, &ne)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return &bootstrapRESTClient{endpoint: endpoint, restClient: restClient}, nil
 | 
			
		||||
	return &bootstrapRESTClient{endpoint: endpoint, restClient: restClient}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -462,7 +462,7 @@ func azureToObjectError(err error, params ...string) error {
 | 
			
		|||
 | 
			
		||||
func azureCodesToObjectError(err error, serviceCode string, statusCode int, bucket string, object string) error {
 | 
			
		||||
	switch serviceCode {
 | 
			
		||||
	case "ContainerNotFound":
 | 
			
		||||
	case "ContainerNotFound", "ContainerBeingDeleted":
 | 
			
		||||
		err = minio.BucketNotFound{Bucket: bucket}
 | 
			
		||||
	case "ContainerAlreadyExists":
 | 
			
		||||
		err = minio.BucketExists{Bucket: bucket}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -26,7 +26,6 @@ import (
 | 
			
		|||
 | 
			
		||||
	"github.com/minio/minio/cmd/http"
 | 
			
		||||
	xhttp "github.com/minio/minio/cmd/http"
 | 
			
		||||
	"github.com/minio/minio/cmd/logger"
 | 
			
		||||
	"github.com/minio/minio/cmd/rest"
 | 
			
		||||
	"github.com/minio/minio/pkg/dsync"
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			@ -155,13 +154,12 @@ func newlockRESTClient(endpoint Endpoint) *lockRESTClient {
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	trFn := newCustomHTTPTransport(tlsConfig, rest.DefaultRESTTimeout)
 | 
			
		||||
	restClient, err := rest.NewClient(serverURL, trFn, newAuthToken)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		logger.Fatal(err, "Unable to create lock rest client")
 | 
			
		||||
	}
 | 
			
		||||
	restClient := rest.NewClient(serverURL, trFn, newAuthToken)
 | 
			
		||||
	restClient.HealthCheckFn = func() bool {
 | 
			
		||||
		ctx, cancel := context.WithTimeout(GlobalContext, restClient.HealthCheckTimeout)
 | 
			
		||||
		respBody, err := restClient.CallWithContext(ctx, lockRESTMethodHealth, nil, nil, -1)
 | 
			
		||||
		// Instantiate a new rest client for healthcheck
 | 
			
		||||
		// to avoid recursive healthCheckFn()
 | 
			
		||||
		respBody, err := rest.NewClient(serverURL, trFn, newAuthToken).CallWithContext(ctx, lockRESTMethodHealth, nil, nil, -1)
 | 
			
		||||
		xhttp.DrainBody(respBody)
 | 
			
		||||
		cancel()
 | 
			
		||||
		var ne *rest.NetworkError
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -846,19 +846,14 @@ func newPeerRestClients(endpoints EndpointZones) []*peerRESTClient {
 | 
			
		|||
	peerHosts := getRemoteHosts(endpoints)
 | 
			
		||||
	restClients := make([]*peerRESTClient, len(peerHosts))
 | 
			
		||||
	for i, host := range peerHosts {
 | 
			
		||||
		client, err := newPeerRESTClient(host)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			logger.LogIf(GlobalContext, err)
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		restClients[i] = client
 | 
			
		||||
		restClients[i] = newPeerRESTClient(host)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return restClients
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Returns a peer rest client.
 | 
			
		||||
func newPeerRESTClient(peer *xnet.Host) (*peerRESTClient, error) {
 | 
			
		||||
func newPeerRESTClient(peer *xnet.Host) *peerRESTClient {
 | 
			
		||||
	scheme := "http"
 | 
			
		||||
	if globalIsSSL {
 | 
			
		||||
		scheme = "https"
 | 
			
		||||
| 
						 | 
				
			
			@ -879,20 +874,19 @@ func newPeerRESTClient(peer *xnet.Host) (*peerRESTClient, error) {
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	trFn := newCustomHTTPTransport(tlsConfig, rest.DefaultRESTTimeout)
 | 
			
		||||
	restClient, err := rest.NewClient(serverURL, trFn, newAuthToken)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	restClient := rest.NewClient(serverURL, trFn, newAuthToken)
 | 
			
		||||
 | 
			
		||||
	// Construct a new health function.
 | 
			
		||||
	restClient.HealthCheckFn = func() bool {
 | 
			
		||||
		ctx, cancel := context.WithTimeout(GlobalContext, restClient.HealthCheckTimeout)
 | 
			
		||||
		respBody, err := restClient.CallWithContext(ctx, peerRESTMethodHealth, nil, nil, -1)
 | 
			
		||||
		// Instantiate a new rest client for healthcheck
 | 
			
		||||
		// to avoid recursive healthCheckFn()
 | 
			
		||||
		respBody, err := rest.NewClient(serverURL, trFn, newAuthToken).CallWithContext(ctx, peerRESTMethodHealth, nil, nil, -1)
 | 
			
		||||
		xhttp.DrainBody(respBody)
 | 
			
		||||
		cancel()
 | 
			
		||||
		var ne *rest.NetworkError
 | 
			
		||||
		return !errors.Is(err, context.DeadlineExceeded) && !errors.As(err, &ne)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return &peerRESTClient{host: peer, restClient: restClient}, nil
 | 
			
		||||
	return &peerRESTClient{host: peer, restClient: restClient}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -158,7 +158,7 @@ func (c *Client) Close() {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
// NewClient - returns new REST client.
 | 
			
		||||
func NewClient(url *url.URL, newCustomTransport func() *http.Transport, newAuthToken func(aud string) string) (*Client, error) {
 | 
			
		||||
func NewClient(url *url.URL, newCustomTransport func() *http.Transport, newAuthToken func(aud string) string) *Client {
 | 
			
		||||
	// Transport is exactly same as Go default in https://golang.org/pkg/net/http/#RoundTripper
 | 
			
		||||
	// except custom DialContext and TLSClientConfig.
 | 
			
		||||
	tr := newCustomTransport()
 | 
			
		||||
| 
						 | 
				
			
			@ -172,7 +172,7 @@ func NewClient(url *url.URL, newCustomTransport func() *http.Transport, newAuthT
 | 
			
		|||
		MaxErrResponseSize:  4096,
 | 
			
		||||
		HealthCheckInterval: 200 * time.Millisecond,
 | 
			
		||||
		HealthCheckTimeout:  time.Second,
 | 
			
		||||
	}, nil
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// IsOnline returns whether the client is likely to be online.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -660,15 +660,13 @@ func newStorageRESTClient(endpoint Endpoint) *storageRESTClient {
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	trFn := newCustomHTTPTransport(tlsConfig, rest.DefaultRESTTimeout)
 | 
			
		||||
	restClient, err := rest.NewClient(serverURL, trFn, newAuthToken)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		logger.Fatal(err, "Unable to initialize remote REST disks")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	restClient := rest.NewClient(serverURL, trFn, newAuthToken)
 | 
			
		||||
	restClient.HealthCheckInterval = 500 * time.Millisecond
 | 
			
		||||
	restClient.HealthCheckFn = func() bool {
 | 
			
		||||
		ctx, cancel := context.WithTimeout(GlobalContext, restClient.HealthCheckTimeout)
 | 
			
		||||
		respBody, err := restClient.CallWithContext(ctx, storageRESTMethodHealth, nil, nil, -1)
 | 
			
		||||
		// Instantiate a new rest client for healthcheck
 | 
			
		||||
		// to avoid recursive healthCheckFn()
 | 
			
		||||
		respBody, err := rest.NewClient(serverURL, trFn, newAuthToken).CallWithContext(ctx, storageRESTMethodHealth, nil, nil, -1)
 | 
			
		||||
		xhttp.DrainBody(respBody)
 | 
			
		||||
		cancel()
 | 
			
		||||
		return !errors.Is(err, context.DeadlineExceeded) && toStorageErr(err) != errDiskNotFound
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -66,6 +66,13 @@ import (
 | 
			
		|||
 | 
			
		||||
// Tests should initNSLock only once.
 | 
			
		||||
func init() {
 | 
			
		||||
	globalActiveCred = auth.Credentials{
 | 
			
		||||
		AccessKey: auth.DefaultAccessKey,
 | 
			
		||||
		SecretKey: auth.DefaultSecretKey,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	globalConfigEncrypted = true
 | 
			
		||||
 | 
			
		||||
	// disable ENVs which interfere with tests.
 | 
			
		||||
	for _, env := range []string{
 | 
			
		||||
		crypto.EnvAutoEncryptionLegacy,
 | 
			
		||||
| 
						 | 
				
			
			@ -463,13 +470,6 @@ func newTestConfig(bucketLocation string, obj ObjectLayer) (err error) {
 | 
			
		|||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	globalActiveCred = auth.Credentials{
 | 
			
		||||
		AccessKey: auth.DefaultAccessKey,
 | 
			
		||||
		SecretKey: auth.DefaultSecretKey,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	globalConfigEncrypted = true
 | 
			
		||||
 | 
			
		||||
	// Set a default region.
 | 
			
		||||
	config.SetRegion(globalServerConfig, bucketLocation)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -19,12 +19,12 @@ package dsync
 | 
			
		|||
import (
 | 
			
		||||
	"context"
 | 
			
		||||
	"errors"
 | 
			
		||||
	golog "log"
 | 
			
		||||
	"math/rand"
 | 
			
		||||
	"os"
 | 
			
		||||
	"sync"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"github.com/minio/minio/pkg/console"
 | 
			
		||||
	"github.com/minio/minio/pkg/retry"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -37,9 +37,9 @@ func init() {
 | 
			
		|||
	rand.Seed(time.Now().UnixNano())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func log(msg ...interface{}) {
 | 
			
		||||
func log(format string, data ...interface{}) {
 | 
			
		||||
	if dsyncLog {
 | 
			
		||||
		golog.Println(msg...)
 | 
			
		||||
		console.Printf(format, data...)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -185,7 +185,7 @@ func lock(ds *Dsync, locks *[]string, id, source string, isReadLock bool, lockNa
 | 
			
		|||
 | 
			
		||||
			g := Granted{index: index}
 | 
			
		||||
			if c == nil {
 | 
			
		||||
				log("lock: nil locker")
 | 
			
		||||
				log("dsync: nil locker")
 | 
			
		||||
				ch <- g
 | 
			
		||||
				return
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			@ -200,11 +200,11 @@ func lock(ds *Dsync, locks *[]string, id, source string, isReadLock bool, lockNa
 | 
			
		|||
			var err error
 | 
			
		||||
			if isReadLock {
 | 
			
		||||
				if locked, err = c.RLock(args); err != nil {
 | 
			
		||||
					log("Unable to call RLock", err)
 | 
			
		||||
					log("dsync: Unable to call RLock failed with %s for %#v at %s\n", err, args, c)
 | 
			
		||||
				}
 | 
			
		||||
			} else {
 | 
			
		||||
				if locked, err = c.Lock(args); err != nil {
 | 
			
		||||
					log("Unable to call Lock", err)
 | 
			
		||||
					log("dsync: Unable to call Lock failed with %s for %#v at %s\n", err, args, c)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -259,10 +259,10 @@ func lock(ds *Dsync, locks *[]string, id, source string, isReadLock bool, lockNa
 | 
			
		|||
				// timeout happened, maybe one of the nodes is slow, count
 | 
			
		||||
				// number of locks to check whether we have quorum or not
 | 
			
		||||
				if !quorumMet(locks, isReadLock, dquorum, dquorumReads) {
 | 
			
		||||
					log("Quorum not met after timeout")
 | 
			
		||||
					log("Quorum not met after timeout\n")
 | 
			
		||||
					releaseAll(ds, locks, isReadLock, restClnts, lockNames...)
 | 
			
		||||
				} else {
 | 
			
		||||
					log("Quorum met after timeout")
 | 
			
		||||
					log("Quorum met after timeout\n")
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -402,7 +402,7 @@ func unlock(ds *Dsync, locks []string, isReadLock bool, restClnts []NetLocker, n
 | 
			
		|||
// sendRelease sends a release message to a node that previously granted a lock
 | 
			
		||||
func sendRelease(ds *Dsync, c NetLocker, uid string, isReadLock bool, names ...string) {
 | 
			
		||||
	if c == nil {
 | 
			
		||||
		log("Unable to call RUnlock", errors.New("netLocker is offline"))
 | 
			
		||||
		log("Unable to call RUnlock failed with %s\n", errors.New("netLocker is offline"))
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -412,11 +412,11 @@ func sendRelease(ds *Dsync, c NetLocker, uid string, isReadLock bool, names ...s
 | 
			
		|||
	}
 | 
			
		||||
	if isReadLock {
 | 
			
		||||
		if _, err := c.RUnlock(args); err != nil {
 | 
			
		||||
			log("Unable to call RUnlock", err)
 | 
			
		||||
			log("dsync: Unable to call RUnlock failed with %s for %#v at %s\n", err, args, c)
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		if _, err := c.Unlock(args); err != nil {
 | 
			
		||||
			log("Unable to call Unlock", err)
 | 
			
		||||
			log("dsync: Unable to call Unlock failed with %s for %#v at %s\n", err, args, c)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue