| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | // Copyright (c) 2015-2024 MinIO, Inc.
 | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | // This file is part of MinIO Object Storage stack
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is free software: you can redistribute it and/or modify
 | 
					
						
							|  |  |  | // it under the terms of the GNU Affero General Public License as published by
 | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or
 | 
					
						
							|  |  |  | // (at your option) any later version.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is distributed in the hope that it will be useful
 | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					
						
							|  |  |  | // GNU Affero General Public License for more details.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // You should have received a copy of the GNU Affero General Public License
 | 
					
						
							|  |  |  | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | //go:generate msgp -file=$GOFILE
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | package cmd | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"context" | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 	"encoding/binary" | 
					
						
							|  |  |  | 	"errors" | 
					
						
							|  |  |  | 	"fmt" | 
					
						
							|  |  |  | 	"io" | 
					
						
							|  |  |  | 	"sync" | 
					
						
							|  |  |  | 	"sync/atomic" | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 	"time" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-24 01:15:52 +08:00
										 |  |  | 	"github.com/google/uuid" | 
					
						
							| 
									
										
										
										
											2023-06-20 08:53:08 +08:00
										 |  |  | 	"github.com/minio/madmin-go/v3" | 
					
						
							| 
									
										
										
										
											2024-05-25 07:05:23 +08:00
										 |  |  | 	"github.com/minio/pkg/v3/wildcard" | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 	"github.com/tinylib/msgp/msgp" | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const ( | 
					
						
							| 
									
										
										
										
											2023-04-19 22:47:42 +08:00
										 |  |  | 	mrfOpsQueueSize = 100000 | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | const ( | 
					
						
							|  |  |  | 	healDir              = ".heal" | 
					
						
							|  |  |  | 	healMRFDir           = bucketMetaPrefix + SlashSeparator + healDir + SlashSeparator + "mrf" | 
					
						
							|  |  |  | 	healMRFMetaFormat    = 1 | 
					
						
							|  |  |  | 	healMRFMetaVersionV1 = 1 | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // PartialOperation is a successful upload/delete of an object
 | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | // but not written in all disks (having quorum)
 | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | type PartialOperation struct { | 
					
						
							|  |  |  | 	Bucket              string | 
					
						
							|  |  |  | 	Object              string | 
					
						
							|  |  |  | 	VersionID           string | 
					
						
							|  |  |  | 	Versions            []byte | 
					
						
							|  |  |  | 	SetIndex, PoolIndex int | 
					
						
							|  |  |  | 	Queued              time.Time | 
					
						
							|  |  |  | 	BitrotScan          bool | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // mrfState sncapsulates all the information
 | 
					
						
							|  |  |  | // related to the global background MRF.
 | 
					
						
							|  |  |  | type mrfState struct { | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 	opCh chan PartialOperation | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	closed  int32 | 
					
						
							|  |  |  | 	closing int32 | 
					
						
							|  |  |  | 	wg      sync.WaitGroup | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func newMRFState() mrfState { | 
					
						
							|  |  |  | 	return mrfState{ | 
					
						
							|  |  |  | 		opCh: make(chan PartialOperation, mrfOpsQueueSize), | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Add a partial S3 operation (put/delete) when one or more disks are offline.
 | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | func (m *mrfState) addPartialOp(op PartialOperation) { | 
					
						
							| 
									
										
										
										
											2023-04-19 22:47:42 +08:00
										 |  |  | 	if m == nil { | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 	if atomic.LoadInt32(&m.closed) == 1 { | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	m.wg.Add(1) | 
					
						
							|  |  |  | 	defer m.wg.Done() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if atomic.LoadInt32(&m.closing) == 1 { | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 	select { | 
					
						
							|  |  |  | 	case m.opCh <- op: | 
					
						
							|  |  |  | 	default: | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | // Do not accept new MRF operations anymore and start to save
 | 
					
						
							|  |  |  | // the current heal status in one available disk
 | 
					
						
							|  |  |  | func (m *mrfState) shutdown() { | 
					
						
							|  |  |  | 	atomic.StoreInt32(&m.closing, 1) | 
					
						
							|  |  |  | 	m.wg.Wait() | 
					
						
							|  |  |  | 	close(m.opCh) | 
					
						
							|  |  |  | 	atomic.StoreInt32(&m.closed, 1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if len(m.opCh) > 0 { | 
					
						
							|  |  |  | 		healingLogEvent(context.Background(), "Saving MRF healing data (%d entries)", len(m.opCh)) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	newReader := func() io.ReadCloser { | 
					
						
							|  |  |  | 		r, w := io.Pipe() | 
					
						
							|  |  |  | 		go func() { | 
					
						
							|  |  |  | 			// Initialize MRF meta header.
 | 
					
						
							|  |  |  | 			var data [4]byte | 
					
						
							|  |  |  | 			binary.LittleEndian.PutUint16(data[0:2], healMRFMetaFormat) | 
					
						
							|  |  |  | 			binary.LittleEndian.PutUint16(data[2:4], healMRFMetaVersionV1) | 
					
						
							|  |  |  | 			mw := msgp.NewWriter(w) | 
					
						
							|  |  |  | 			n, err := mw.Write(data[:]) | 
					
						
							|  |  |  | 			if err != nil { | 
					
						
							|  |  |  | 				w.CloseWithError(err) | 
					
						
							|  |  |  | 				return | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if n != len(data) { | 
					
						
							|  |  |  | 				w.CloseWithError(io.ErrShortWrite) | 
					
						
							|  |  |  | 				return | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			for item := range m.opCh { | 
					
						
							|  |  |  | 				err = item.EncodeMsg(mw) | 
					
						
							|  |  |  | 				if err != nil { | 
					
						
							|  |  |  | 					break | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			mw.Flush() | 
					
						
							|  |  |  | 			w.CloseWithError(err) | 
					
						
							|  |  |  | 		}() | 
					
						
							|  |  |  | 		return r | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	globalLocalDrivesMu.RLock() | 
					
						
							|  |  |  | 	localDrives := cloneDrives(globalLocalDrivesMap) | 
					
						
							|  |  |  | 	globalLocalDrivesMu.RUnlock() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for _, localDrive := range localDrives { | 
					
						
							|  |  |  | 		r := newReader() | 
					
						
							|  |  |  | 		err := localDrive.CreateFile(context.Background(), "", minioMetaBucket, pathJoin(healMRFDir, "list.bin"), -1, r) | 
					
						
							|  |  |  | 		r.Close() | 
					
						
							|  |  |  | 		if err == nil { | 
					
						
							|  |  |  | 			break | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *mrfState) startMRFPersistence() { | 
					
						
							|  |  |  | 	loadMRF := func(rc io.ReadCloser, opCh chan PartialOperation) error { | 
					
						
							|  |  |  | 		defer rc.Close() | 
					
						
							|  |  |  | 		var data [4]byte | 
					
						
							|  |  |  | 		n, err := rc.Read(data[:]) | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			return err | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if n != len(data) { | 
					
						
							|  |  |  | 			return errors.New("heal mrf: no data") | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		// Read resync meta header
 | 
					
						
							|  |  |  | 		switch binary.LittleEndian.Uint16(data[0:2]) { | 
					
						
							|  |  |  | 		case healMRFMetaFormat: | 
					
						
							|  |  |  | 		default: | 
					
						
							|  |  |  | 			return fmt.Errorf("heal mrf: unknown format: %d", binary.LittleEndian.Uint16(data[0:2])) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		switch binary.LittleEndian.Uint16(data[2:4]) { | 
					
						
							|  |  |  | 		case healMRFMetaVersionV1: | 
					
						
							|  |  |  | 		default: | 
					
						
							|  |  |  | 			return fmt.Errorf("heal mrf: unknown version: %d", binary.LittleEndian.Uint16(data[2:4])) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		mr := msgp.NewReader(rc) | 
					
						
							|  |  |  | 		for { | 
					
						
							|  |  |  | 			op := PartialOperation{} | 
					
						
							|  |  |  | 			err = op.DecodeMsg(mr) | 
					
						
							|  |  |  | 			if err != nil { | 
					
						
							|  |  |  | 				break | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			opCh <- op | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	globalLocalDrivesMu.RLock() | 
					
						
							|  |  |  | 	localDrives := cloneDrives(globalLocalDrivesMap) | 
					
						
							|  |  |  | 	globalLocalDrivesMu.RUnlock() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for _, localDrive := range localDrives { | 
					
						
							|  |  |  | 		if localDrive == nil { | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		rc, err := localDrive.ReadFileStream(context.Background(), minioMetaBucket, pathJoin(healMRFDir, "list.bin"), 0, -1) | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		err = loadMRF(rc, m.opCh) | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		// finally delete the file after processing mrf entries
 | 
					
						
							|  |  |  | 		localDrive.Delete(GlobalContext, minioMetaBucket, pathJoin(healMRFDir, "list.bin"), DeleteOptions{}) | 
					
						
							|  |  |  | 		break | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-19 22:47:42 +08:00
										 |  |  | var healSleeper = newDynamicSleeper(5, time.Second, false) | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | // healRoutine listens to new disks reconnection events and
 | 
					
						
							|  |  |  | // issues healing requests for queued objects belonging to the
 | 
					
						
							|  |  |  | // corresponding erasure set
 | 
					
						
							| 
									
										
										
										
											2024-02-28 15:02:14 +08:00
										 |  |  | func (m *mrfState) healRoutine(z *erasureServerPools) { | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 	for { | 
					
						
							|  |  |  | 		select { | 
					
						
							| 
									
										
										
										
											2024-02-28 15:02:14 +08:00
										 |  |  | 		case <-GlobalContext.Done(): | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 			return | 
					
						
							| 
									
										
										
										
											2023-04-19 22:47:42 +08:00
										 |  |  | 		case u, ok := <-m.opCh: | 
					
						
							|  |  |  | 			if !ok { | 
					
						
							|  |  |  | 				return | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-20 11:21:15 +08:00
										 |  |  | 			// We might land at .metacache, .trash, .multipart
 | 
					
						
							|  |  |  | 			// no need to heal them skip, only when bucket
 | 
					
						
							|  |  |  | 			// is '.minio.sys'
 | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 			if u.Bucket == minioMetaBucket { | 
					
						
							| 
									
										
										
										
											2024-03-20 11:21:15 +08:00
										 |  |  | 				// No MRF needed for temporary objects
 | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 				if wildcard.Match("buckets/*/.metacache/*", u.Object) { | 
					
						
							| 
									
										
										
										
											2024-03-20 11:21:15 +08:00
										 |  |  | 					continue | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 				if wildcard.Match("tmp/*", u.Object) { | 
					
						
							| 
									
										
										
										
											2024-03-20 11:21:15 +08:00
										 |  |  | 					continue | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 				if wildcard.Match("multipart/*", u.Object) { | 
					
						
							| 
									
										
										
										
											2024-03-20 11:21:15 +08:00
										 |  |  | 					continue | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 				if wildcard.Match("tmp-old/*", u.Object) { | 
					
						
							| 
									
										
										
										
											2024-03-20 11:21:15 +08:00
										 |  |  | 					continue | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-19 22:47:42 +08:00
										 |  |  | 			now := time.Now() | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 			if now.Sub(u.Queued) < time.Second { | 
					
						
							| 
									
										
										
										
											2023-04-19 22:47:42 +08:00
										 |  |  | 				// let recently failed networks to reconnect
 | 
					
						
							|  |  |  | 				// making MRF wait for 1s before retrying,
 | 
					
						
							|  |  |  | 				// i.e 4 reconnect attempts.
 | 
					
						
							| 
									
										
										
										
											2023-12-05 03:33:39 +08:00
										 |  |  | 				time.Sleep(time.Second) | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-19 22:47:42 +08:00
										 |  |  | 			// wait on timer per heal
 | 
					
						
							|  |  |  | 			wait := healSleeper.Timer(context.Background()) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-09 04:26:01 +08:00
										 |  |  | 			scan := madmin.HealNormalScan | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 			if u.BitrotScan { | 
					
						
							|  |  |  | 				scan = madmin.HealDeepScan | 
					
						
							| 
									
										
										
										
											2023-12-09 04:26:01 +08:00
										 |  |  | 			} | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 			if u.Object == "" { | 
					
						
							|  |  |  | 				healBucket(u.Bucket, scan) | 
					
						
							| 
									
										
										
										
											2023-04-19 22:47:42 +08:00
										 |  |  | 			} else { | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 				if len(u.Versions) > 0 { | 
					
						
							|  |  |  | 					vers := len(u.Versions) / 16 | 
					
						
							| 
									
										
										
										
											2024-04-24 01:15:52 +08:00
										 |  |  | 					if vers > 0 { | 
					
						
							| 
									
										
										
										
											2025-05-27 23:19:03 +08:00
										 |  |  | 						for i := range vers { | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 							healObject(u.Bucket, u.Object, uuid.UUID(u.Versions[16*i:]).String(), scan) | 
					
						
							| 
									
										
										
										
											2024-04-24 01:15:52 +08:00
										 |  |  | 						} | 
					
						
							|  |  |  | 					} | 
					
						
							| 
									
										
										
										
											2023-06-25 10:31:04 +08:00
										 |  |  | 				} else { | 
					
						
							| 
									
										
										
										
											2024-08-14 06:26:05 +08:00
										 |  |  | 					healObject(u.Bucket, u.Object, u.VersionID, scan) | 
					
						
							| 
									
										
										
										
											2023-06-25 10:31:04 +08:00
										 |  |  | 				} | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 			} | 
					
						
							| 
									
										
										
										
											2021-08-26 08:46:20 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-19 22:47:42 +08:00
										 |  |  | 			wait() | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } |