mirror of https://github.com/minio/minio.git
				
				
				
			fix: do not os.Exit(1) while writing goroutines during shutdown (#17640)
Also shutdown poll add jitter, to verify if the shutdown sequence can finish before 500ms, this reduces the overall time taken during "restart" of the service. Provides speedup for `mc admin service restart` during active I/O, also ensures that systemd doesn't treat the returned 'error' as a failure, certain configurations in systemd can cause it to 'auto-restart' the process by-itself which can interfere with `mc admin service restart`. It can be observed how now restarting the service is much snappier.
This commit is contained in:
		
							parent
							
								
									a566bcf613
								
							
						
					
					
						commit
						2d1cda2061
					
				|  | @ -46,21 +46,17 @@ func handleSignals() { | |||
| 	} | ||||
| 
 | ||||
| 	stopProcess := func() bool { | ||||
| 		var err, oerr error | ||||
| 
 | ||||
| 		// send signal to various go-routines that they need to quit.
 | ||||
| 		cancelGlobalContext() | ||||
| 
 | ||||
| 		if httpServer := newHTTPServerFn(); httpServer != nil { | ||||
| 			err = httpServer.Shutdown() | ||||
| 			if !errors.Is(err, http.ErrServerClosed) { | ||||
| 			if err := httpServer.Shutdown(); err != nil && !errors.Is(err, http.ErrServerClosed) { | ||||
| 				logger.LogIf(context.Background(), err) | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		if objAPI := newObjectLayerFn(); objAPI != nil { | ||||
| 			oerr = objAPI.Shutdown(context.Background()) | ||||
| 			logger.LogIf(context.Background(), oerr) | ||||
| 			logger.LogIf(context.Background(), objAPI.Shutdown(context.Background())) | ||||
| 		} | ||||
| 
 | ||||
| 		if srv := newConsoleServerFn(); srv != nil { | ||||
|  | @ -71,7 +67,7 @@ func handleSignals() { | |||
| 			globalEventNotifier.RemoveAllBucketTargets() | ||||
| 		} | ||||
| 
 | ||||
| 		return (err == nil && oerr == nil) | ||||
| 		return true | ||||
| 	} | ||||
| 
 | ||||
| 	for { | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| // Copyright (c) 2015-2023 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
|  | @ -22,6 +22,7 @@ import ( | |||
| 	"crypto/tls" | ||||
| 	"errors" | ||||
| 	"log" | ||||
| 	"math/rand" | ||||
| 	"net" | ||||
| 	"net/http" | ||||
| 	"os" | ||||
|  | @ -42,7 +43,7 @@ var ( | |||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	serverShutdownPoll = 500 * time.Millisecond | ||||
| 	shutdownPollIntervalMax = 500 * time.Millisecond | ||||
| 
 | ||||
| 	// DefaultShutdownTimeout - default shutdown timeout to gracefully shutdown server.
 | ||||
| 	DefaultShutdownTimeout = 5 * time.Second | ||||
|  | @ -161,14 +162,32 @@ func (srv *Server) Shutdown() error { | |||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	pollIntervalBase := time.Millisecond | ||||
| 	nextPollInterval := func() time.Duration { | ||||
| 		// Add 10% jitter.
 | ||||
| 		interval := pollIntervalBase + time.Duration(rand.Intn(int(pollIntervalBase/10))) | ||||
| 		// Double and clamp for next time.
 | ||||
| 		pollIntervalBase *= 2 | ||||
| 		if pollIntervalBase > shutdownPollIntervalMax { | ||||
| 			pollIntervalBase = shutdownPollIntervalMax | ||||
| 		} | ||||
| 		return interval | ||||
| 	} | ||||
| 
 | ||||
| 	// Wait for opened connection to be closed up to Shutdown timeout.
 | ||||
| 	shutdownTimeout := srv.ShutdownTimeout | ||||
| 	shutdownTimer := time.NewTimer(shutdownTimeout) | ||||
| 	ticker := time.NewTicker(serverShutdownPoll) | ||||
| 	defer ticker.Stop() | ||||
| 	defer shutdownTimer.Stop() | ||||
| 
 | ||||
| 	timer := time.NewTimer(nextPollInterval()) | ||||
| 	defer timer.Stop() | ||||
| 	for { | ||||
| 		select { | ||||
| 		case <-shutdownTimer.C: | ||||
| 			if atomic.LoadInt32(&srv.requestCount) <= 0 { | ||||
| 				return nil | ||||
| 			} | ||||
| 
 | ||||
| 			// Write all running goroutines.
 | ||||
| 			tmp, err := os.CreateTemp("", "minio-goroutines-*.txt") | ||||
| 			if err == nil { | ||||
|  | @ -177,10 +196,11 @@ func (srv *Server) Shutdown() error { | |||
| 				return errors.New("timed out. some connections are still active. goroutines written to " + tmp.Name()) | ||||
| 			} | ||||
| 			return errors.New("timed out. some connections are still active") | ||||
| 		case <-ticker.C: | ||||
| 		case <-timer.C: | ||||
| 			if atomic.LoadInt32(&srv.requestCount) <= 0 { | ||||
| 				return nil | ||||
| 			} | ||||
| 			timer.Reset(nextPollInterval()) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue