| 
									
										
										
										
											2021-04-19 03:41:13 +08:00
										 |  |  | // Copyright (c) 2015-2021 MinIO, Inc.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This file is part of MinIO Object Storage stack
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is free software: you can redistribute it and/or modify
 | 
					
						
							|  |  |  | // it under the terms of the GNU Affero General Public License as published by
 | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or
 | 
					
						
							|  |  |  | // (at your option) any later version.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is distributed in the hope that it will be useful
 | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					
						
							|  |  |  | // GNU Affero General Public License for more details.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // You should have received a copy of the GNU Affero General Public License
 | 
					
						
							|  |  |  | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
					
						
							| 
									
										
										
										
											2019-10-29 01:27:49 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | package cmd | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 	"bytes" | 
					
						
							| 
									
										
										
										
											2019-10-29 01:27:49 +08:00
										 |  |  | 	"context" | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 	"encoding/json" | 
					
						
							| 
									
										
										
										
											2020-08-19 05:37:26 +08:00
										 |  |  | 	"errors" | 
					
						
							| 
									
										
										
										
											2020-08-08 04:22:53 +08:00
										 |  |  | 	"fmt" | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 	"io" | 
					
						
							| 
									
										
										
										
											2020-12-16 09:34:54 +08:00
										 |  |  | 	"sort" | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 	"strings" | 
					
						
							|  |  |  | 	"sync" | 
					
						
							| 
									
										
										
										
											2019-10-29 01:27:49 +08:00
										 |  |  | 	"time" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-16 22:30:05 +08:00
										 |  |  | 	"github.com/dustin/go-humanize" | 
					
						
							| 
									
										
										
										
											2021-05-06 23:52:02 +08:00
										 |  |  | 	"github.com/minio/madmin-go" | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 	"github.com/minio/minio-go/v7/pkg/set" | 
					
						
							| 
									
										
										
										
											2021-06-02 05:59:40 +08:00
										 |  |  | 	"github.com/minio/minio/internal/color" | 
					
						
							|  |  |  | 	"github.com/minio/minio/internal/logger" | 
					
						
							| 
									
										
										
										
											2021-05-29 06:17:01 +08:00
										 |  |  | 	"github.com/minio/pkg/console" | 
					
						
							| 
									
										
										
										
											2019-10-29 01:27:49 +08:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-29 10:39:32 +08:00
										 |  |  | const ( | 
					
						
							|  |  |  | 	defaultMonitorNewDiskInterval = time.Second * 10 | 
					
						
							|  |  |  | 	healingTrackerFilename        = ".healing.bin" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | //go:generate msgp -file $GOFILE -unexported
 | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | // healingTracker is used to persist healing information during a heal.
 | 
					
						
							| 
									
										
										
										
											2020-09-29 10:39:32 +08:00
										 |  |  | type healingTracker struct { | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 	disk StorageAPI `msg:"-"` | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 	ID         string | 
					
						
							|  |  |  | 	PoolIndex  int | 
					
						
							|  |  |  | 	SetIndex   int | 
					
						
							|  |  |  | 	DiskIndex  int | 
					
						
							|  |  |  | 	Path       string | 
					
						
							|  |  |  | 	Endpoint   string | 
					
						
							|  |  |  | 	Started    time.Time | 
					
						
							|  |  |  | 	LastUpdate time.Time | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	ObjectsTotalCount uint64 | 
					
						
							|  |  |  | 	ObjectsTotalSize  uint64 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	ItemsHealed uint64 | 
					
						
							|  |  |  | 	ItemsFailed uint64 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	BytesDone   uint64 | 
					
						
							|  |  |  | 	BytesFailed uint64 | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	// Last object scanned.
 | 
					
						
							|  |  |  | 	Bucket string `json:"-"` | 
					
						
							|  |  |  | 	Object string `json:"-"` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Numbers when current bucket started healing,
 | 
					
						
							|  |  |  | 	// for resuming with correct numbers.
 | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 	ResumeItemsHealed uint64 `json:"-"` | 
					
						
							|  |  |  | 	ResumeItemsFailed uint64 `json:"-"` | 
					
						
							|  |  |  | 	ResumeBytesDone   uint64 `json:"-"` | 
					
						
							|  |  |  | 	ResumeBytesFailed uint64 `json:"-"` | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	// Filled on startup/restarts.
 | 
					
						
							|  |  |  | 	QueuedBuckets []string | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Filled during heal.
 | 
					
						
							|  |  |  | 	HealedBuckets []string | 
					
						
							|  |  |  | 	// Add future tracking capabilities
 | 
					
						
							|  |  |  | 	// Be sure that they are included in toHealingDisk
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // loadHealingTracker will load the healing tracker from the supplied disk.
 | 
					
						
							|  |  |  | // The disk ID will be validated against the loaded one.
 | 
					
						
							|  |  |  | func loadHealingTracker(ctx context.Context, disk StorageAPI) (*healingTracker, error) { | 
					
						
							|  |  |  | 	if disk == nil { | 
					
						
							|  |  |  | 		return nil, errors.New("loadHealingTracker: nil disk given") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	diskID, err := disk.GetDiskID() | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b, err := disk.ReadAll(ctx, minioMetaBucket, | 
					
						
							|  |  |  | 		pathJoin(bucketMetaPrefix, slashSeparator, healingTrackerFilename)) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	var h healingTracker | 
					
						
							|  |  |  | 	_, err = h.UnmarshalMsg(b) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if h.ID != diskID && h.ID != "" { | 
					
						
							|  |  |  | 		return nil, fmt.Errorf("loadHealingTracker: disk id mismatch expected %s, got %s", h.ID, diskID) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	h.disk = disk | 
					
						
							|  |  |  | 	h.ID = diskID | 
					
						
							|  |  |  | 	return &h, nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // newHealingTracker will create a new healing tracker for the disk.
 | 
					
						
							|  |  |  | func newHealingTracker(disk StorageAPI) *healingTracker { | 
					
						
							|  |  |  | 	diskID, _ := disk.GetDiskID() | 
					
						
							|  |  |  | 	h := healingTracker{ | 
					
						
							|  |  |  | 		disk:     disk, | 
					
						
							|  |  |  | 		ID:       diskID, | 
					
						
							|  |  |  | 		Path:     disk.String(), | 
					
						
							|  |  |  | 		Endpoint: disk.Endpoint().String(), | 
					
						
							|  |  |  | 		Started:  time.Now().UTC(), | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	h.PoolIndex, h.SetIndex, h.DiskIndex = disk.GetDiskLoc() | 
					
						
							|  |  |  | 	return &h | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // update will update the tracker on the disk.
 | 
					
						
							|  |  |  | // If the tracker has been deleted an error is returned.
 | 
					
						
							|  |  |  | func (h *healingTracker) update(ctx context.Context) error { | 
					
						
							|  |  |  | 	if h.disk.Healing() == nil { | 
					
						
							|  |  |  | 		return fmt.Errorf("healingTracker: disk %q is not marked as healing", h.ID) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if h.ID == "" || h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 { | 
					
						
							|  |  |  | 		h.ID, _ = h.disk.GetDiskID() | 
					
						
							|  |  |  | 		h.PoolIndex, h.SetIndex, h.DiskIndex = h.disk.GetDiskLoc() | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return h.save(ctx) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // save will unconditionally save the tracker and will be created if not existing.
 | 
					
						
							|  |  |  | func (h *healingTracker) save(ctx context.Context) error { | 
					
						
							|  |  |  | 	if h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 { | 
					
						
							|  |  |  | 		// Attempt to get location.
 | 
					
						
							|  |  |  | 		if api := newObjectLayerFn(); api != nil { | 
					
						
							|  |  |  | 			if ep, ok := api.(*erasureServerPools); ok { | 
					
						
							|  |  |  | 				h.PoolIndex, h.SetIndex, h.DiskIndex, _ = ep.getPoolAndSet(h.ID) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	h.LastUpdate = time.Now().UTC() | 
					
						
							|  |  |  | 	htrackerBytes, err := h.MarshalMsg(nil) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	globalBackgroundHealState.updateHealStatus(h) | 
					
						
							|  |  |  | 	return h.disk.WriteAll(ctx, minioMetaBucket, | 
					
						
							|  |  |  | 		pathJoin(bucketMetaPrefix, slashSeparator, healingTrackerFilename), | 
					
						
							|  |  |  | 		htrackerBytes) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // delete the tracker on disk.
 | 
					
						
							|  |  |  | func (h *healingTracker) delete(ctx context.Context) error { | 
					
						
							|  |  |  | 	return h.disk.Delete(ctx, minioMetaBucket, | 
					
						
							|  |  |  | 		pathJoin(bucketMetaPrefix, slashSeparator, healingTrackerFilename), | 
					
						
							|  |  |  | 		false) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (h *healingTracker) isHealed(bucket string) bool { | 
					
						
							|  |  |  | 	for _, v := range h.HealedBuckets { | 
					
						
							|  |  |  | 		if v == bucket { | 
					
						
							|  |  |  | 			return true | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return false | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2020-09-29 10:39:32 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | // resume will reset progress to the numbers at the start of the bucket.
 | 
					
						
							|  |  |  | func (h *healingTracker) resume() { | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 	h.ItemsHealed = h.ResumeItemsHealed | 
					
						
							|  |  |  | 	h.ItemsFailed = h.ResumeItemsFailed | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 	h.BytesDone = h.ResumeBytesDone | 
					
						
							|  |  |  | 	h.BytesFailed = h.ResumeBytesFailed | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // bucketDone should be called when a bucket is done healing.
 | 
					
						
							|  |  |  | // Adds the bucket to the list of healed buckets and updates resume numbers.
 | 
					
						
							|  |  |  | func (h *healingTracker) bucketDone(bucket string) { | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 	h.ResumeItemsHealed = h.ItemsHealed | 
					
						
							|  |  |  | 	h.ResumeItemsFailed = h.ItemsFailed | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 	h.ResumeBytesDone = h.BytesDone | 
					
						
							|  |  |  | 	h.ResumeBytesFailed = h.BytesFailed | 
					
						
							|  |  |  | 	h.HealedBuckets = append(h.HealedBuckets, bucket) | 
					
						
							|  |  |  | 	for i, b := range h.QueuedBuckets { | 
					
						
							|  |  |  | 		if b == bucket { | 
					
						
							|  |  |  | 			// Delete...
 | 
					
						
							|  |  |  | 			h.QueuedBuckets = append(h.QueuedBuckets[:i], h.QueuedBuckets[i+1:]...) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // setQueuedBuckets will add buckets, but exclude any that is already in h.HealedBuckets.
 | 
					
						
							|  |  |  | // Order is preserved.
 | 
					
						
							|  |  |  | func (h *healingTracker) setQueuedBuckets(buckets []BucketInfo) { | 
					
						
							|  |  |  | 	s := set.CreateStringSet(h.HealedBuckets...) | 
					
						
							|  |  |  | 	h.QueuedBuckets = make([]string, 0, len(buckets)) | 
					
						
							|  |  |  | 	for _, b := range buckets { | 
					
						
							|  |  |  | 		if !s.Contains(b.Name) { | 
					
						
							|  |  |  | 			h.QueuedBuckets = append(h.QueuedBuckets, b.Name) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (h *healingTracker) printTo(writer io.Writer) { | 
					
						
							|  |  |  | 	b, err := json.MarshalIndent(h, "", "  ") | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		writer.Write([]byte(err.Error())) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	writer.Write(b) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // toHealingDisk converts the information to madmin.HealingDisk
 | 
					
						
							|  |  |  | func (h *healingTracker) toHealingDisk() madmin.HealingDisk { | 
					
						
							|  |  |  | 	return madmin.HealingDisk{ | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 		ID:                h.ID, | 
					
						
							|  |  |  | 		Endpoint:          h.Endpoint, | 
					
						
							|  |  |  | 		PoolIndex:         h.PoolIndex, | 
					
						
							|  |  |  | 		SetIndex:          h.SetIndex, | 
					
						
							|  |  |  | 		DiskIndex:         h.DiskIndex, | 
					
						
							|  |  |  | 		Path:              h.Path, | 
					
						
							|  |  |  | 		Started:           h.Started.UTC(), | 
					
						
							|  |  |  | 		LastUpdate:        h.LastUpdate.UTC(), | 
					
						
							|  |  |  | 		ObjectsTotalCount: h.ObjectsTotalCount, | 
					
						
							|  |  |  | 		ObjectsTotalSize:  h.ObjectsTotalSize, | 
					
						
							|  |  |  | 		ItemsHealed:       h.ItemsHealed, | 
					
						
							|  |  |  | 		ItemsFailed:       h.ItemsFailed, | 
					
						
							|  |  |  | 		BytesDone:         h.BytesDone, | 
					
						
							|  |  |  | 		BytesFailed:       h.BytesFailed, | 
					
						
							|  |  |  | 		Bucket:            h.Bucket, | 
					
						
							|  |  |  | 		Object:            h.Object, | 
					
						
							|  |  |  | 		QueuedBuckets:     h.QueuedBuckets, | 
					
						
							|  |  |  | 		HealedBuckets:     h.HealedBuckets, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		ObjectsHealed: h.ItemsHealed, // Deprecated July 2021
 | 
					
						
							|  |  |  | 		ObjectsFailed: h.ItemsFailed, // Deprecated July 2021
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2020-09-29 10:39:32 +08:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2019-10-29 01:27:49 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-08 10:43:06 +08:00
										 |  |  | func initAutoHeal(ctx context.Context, objAPI ObjectLayer) { | 
					
						
							| 
									
										
										
										
											2020-12-02 05:50:33 +08:00
										 |  |  | 	z, ok := objAPI.(*erasureServerPools) | 
					
						
							| 
									
										
										
										
											2020-08-08 10:43:06 +08:00
										 |  |  | 	if !ok { | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	initBackgroundHealing(ctx, objAPI) // start quick background healing
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-14 03:57:08 +08:00
										 |  |  | 	bgSeq := mustGetHealSequence(ctx) | 
					
						
							| 
									
										
										
										
											2020-08-08 10:43:06 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-29 10:39:32 +08:00
										 |  |  | 	globalBackgroundHealState.pushHealLocalDisks(getLocalDisksToHeal()...) | 
					
						
							| 
									
										
										
										
											2020-09-05 08:09:02 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	if drivesToHeal := globalBackgroundHealState.healDriveCount(); drivesToHeal > 0 { | 
					
						
							|  |  |  | 		logger.Info(fmt.Sprintf("Found drives to heal %d, waiting until %s to heal the content...", | 
					
						
							|  |  |  | 			drivesToHeal, defaultMonitorNewDiskInterval)) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-08 10:43:06 +08:00
										 |  |  | 		// Heal any disk format and metadata early, if possible.
 | 
					
						
							| 
									
										
										
										
											2020-11-11 01:02:06 +08:00
										 |  |  | 		// Start with format healing
 | 
					
						
							|  |  |  | 		if err := bgSeq.healDiskFormat(); err != nil { | 
					
						
							| 
									
										
										
										
											2020-08-08 10:43:06 +08:00
										 |  |  | 			if newObjectLayerFn() != nil { | 
					
						
							|  |  |  | 				// log only in situations, when object layer
 | 
					
						
							|  |  |  | 				// has fully initialized.
 | 
					
						
							|  |  |  | 				logger.LogIf(bgSeq.ctx, err) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-11 01:02:06 +08:00
										 |  |  | 	if err := bgSeq.healDiskMeta(objAPI); err != nil { | 
					
						
							|  |  |  | 		if newObjectLayerFn() != nil { | 
					
						
							|  |  |  | 			// log only in situations, when object layer
 | 
					
						
							|  |  |  | 			// has fully initialized.
 | 
					
						
							|  |  |  | 			logger.LogIf(bgSeq.ctx, err) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-05 08:09:02 +08:00
										 |  |  | 	go monitorLocalDisksAndHeal(ctx, z, bgSeq) | 
					
						
							| 
									
										
										
										
											2019-10-29 01:27:49 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-05 08:09:02 +08:00
										 |  |  | func getLocalDisksToHeal() (disksToHeal Endpoints) { | 
					
						
							|  |  |  | 	for _, ep := range globalEndpoints { | 
					
						
							| 
									
										
										
										
											2020-08-08 04:22:53 +08:00
										 |  |  | 		for _, endpoint := range ep.Endpoints { | 
					
						
							|  |  |  | 			if !endpoint.IsLocal { | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			// Try to connect to the current endpoint
 | 
					
						
							|  |  |  | 			// and reformat if the current disk is not formatted
 | 
					
						
							| 
									
										
										
										
											2020-09-29 10:39:32 +08:00
										 |  |  | 			disk, _, err := connectEndpoint(endpoint) | 
					
						
							| 
									
										
										
										
											2020-08-19 05:37:26 +08:00
										 |  |  | 			if errors.Is(err, errUnformattedDisk) { | 
					
						
							| 
									
										
										
										
											2020-09-05 08:09:02 +08:00
										 |  |  | 				disksToHeal = append(disksToHeal, endpoint) | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 			} else if err == nil && disk != nil && disk.Healing() != nil { | 
					
						
							| 
									
										
										
										
											2020-09-29 10:39:32 +08:00
										 |  |  | 				disksToHeal = append(disksToHeal, disk.Endpoint()) | 
					
						
							| 
									
										
										
										
											2020-08-08 04:22:53 +08:00
										 |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2020-09-05 08:09:02 +08:00
										 |  |  | 	return disksToHeal | 
					
						
							| 
									
										
										
										
											2020-08-08 04:22:53 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-29 01:27:49 +08:00
										 |  |  | // monitorLocalDisksAndHeal - ensures that detected new disks are healed
 | 
					
						
							|  |  |  | //  1. Only the concerned erasure set will be listed and healed
 | 
					
						
							|  |  |  | //  2. Only the node hosting the disk is responsible to perform the heal
 | 
					
						
							| 
									
										
										
										
											2020-12-02 05:50:33 +08:00
										 |  |  | func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerPools, bgSeq *healSequence) { | 
					
						
							| 
									
										
										
										
											2020-01-10 18:35:06 +08:00
										 |  |  | 	// Perform automatic disk healing when a disk is replaced locally.
 | 
					
						
							| 
									
										
										
										
											2020-12-18 04:35:02 +08:00
										 |  |  | 	diskCheckTimer := time.NewTimer(defaultMonitorNewDiskInterval) | 
					
						
							|  |  |  | 	defer diskCheckTimer.Stop() | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-29 01:27:49 +08:00
										 |  |  | 	for { | 
					
						
							| 
									
										
										
										
											2020-03-23 03:16:36 +08:00
										 |  |  | 		select { | 
					
						
							|  |  |  | 		case <-ctx.Done(): | 
					
						
							|  |  |  | 			return | 
					
						
							| 
									
										
										
										
											2020-12-18 04:35:02 +08:00
										 |  |  | 		case <-diskCheckTimer.C: | 
					
						
							|  |  |  | 			// Reset to next interval.
 | 
					
						
							|  |  |  | 			diskCheckTimer.Reset(defaultMonitorNewDiskInterval) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-27 12:47:42 +08:00
										 |  |  | 			var erasureSetInPoolDisksToHeal []map[int][]StorageAPI | 
					
						
							| 
									
										
										
										
											2020-09-29 10:39:32 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 			healDisks := globalBackgroundHealState.getHealLocalDiskEndpoints() | 
					
						
							| 
									
										
										
										
											2020-09-17 12:14:35 +08:00
										 |  |  | 			if len(healDisks) > 0 { | 
					
						
							| 
									
										
										
										
											2020-08-08 04:22:53 +08:00
										 |  |  | 				// Reformat disks
 | 
					
						
							| 
									
										
										
										
											2021-08-27 05:06:04 +08:00
										 |  |  | 				bgSeq.queueHealTask(healSource{bucket: SlashSeparator}, madmin.HealItemMetadata) | 
					
						
							| 
									
										
										
										
											2020-01-10 18:35:06 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-08 04:22:53 +08:00
										 |  |  | 				// Ensure that reformatting disks is finished
 | 
					
						
							| 
									
										
										
										
											2021-08-27 05:06:04 +08:00
										 |  |  | 				bgSeq.queueHealTask(healSource{bucket: nopHeal}, madmin.HealItemMetadata) | 
					
						
							| 
									
										
										
										
											2020-07-16 22:30:05 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-17 12:14:35 +08:00
										 |  |  | 				logger.Info(fmt.Sprintf("Found drives to heal %d, proceeding to heal content...", | 
					
						
							|  |  |  | 					len(healDisks))) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-27 12:47:42 +08:00
										 |  |  | 				erasureSetInPoolDisksToHeal = make([]map[int][]StorageAPI, len(z.serverPools)) | 
					
						
							| 
									
										
										
										
											2020-12-02 05:50:33 +08:00
										 |  |  | 				for i := range z.serverPools { | 
					
						
							| 
									
										
										
										
											2021-01-27 12:47:42 +08:00
										 |  |  | 					erasureSetInPoolDisksToHeal[i] = map[int][]StorageAPI{} | 
					
						
							| 
									
										
										
										
											2020-09-17 12:14:35 +08:00
										 |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-06 06:34:41 +08:00
										 |  |  | 			if serverDebugLog && len(healDisks) > 0 { | 
					
						
							| 
									
										
										
										
											2021-01-19 18:40:52 +08:00
										 |  |  | 				console.Debugf(color.Green("healDisk:")+" disk check timer fired, attempting to heal %d drives\n", len(healDisks)) | 
					
						
							| 
									
										
										
										
											2020-12-18 08:52:47 +08:00
										 |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-17 12:14:35 +08:00
										 |  |  | 			// heal only if new disks found.
 | 
					
						
							|  |  |  | 			for _, endpoint := range healDisks { | 
					
						
							| 
									
										
										
										
											2020-09-29 10:39:32 +08:00
										 |  |  | 				disk, format, err := connectEndpoint(endpoint) | 
					
						
							| 
									
										
										
										
											2020-09-05 08:09:02 +08:00
										 |  |  | 				if err != nil { | 
					
						
							|  |  |  | 					printEndpointError(endpoint, err, true) | 
					
						
							|  |  |  | 					continue | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2020-03-23 03:16:36 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-27 12:47:42 +08:00
										 |  |  | 				poolIdx := globalEndpoints.GetLocalPoolIdx(disk.Endpoint()) | 
					
						
							| 
									
										
										
										
											2021-01-07 01:35:47 +08:00
										 |  |  | 				if poolIdx < 0 { | 
					
						
							| 
									
										
										
										
											2020-09-05 08:09:02 +08:00
										 |  |  | 					continue | 
					
						
							| 
									
										
										
										
											2019-11-20 09:42:27 +08:00
										 |  |  | 				} | 
					
						
							| 
									
										
										
										
											2019-10-29 01:27:49 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-05 08:09:02 +08:00
										 |  |  | 				// Calculate the set index where the current endpoint belongs
 | 
					
						
							| 
									
										
										
										
											2021-01-07 01:35:47 +08:00
										 |  |  | 				z.serverPools[poolIdx].erasureDisksMu.RLock() | 
					
						
							| 
									
										
										
										
											2020-10-25 04:23:08 +08:00
										 |  |  | 				// Protect reading reference format.
 | 
					
						
							| 
									
										
										
										
											2021-01-07 01:35:47 +08:00
										 |  |  | 				setIndex, _, err := findDiskIndex(z.serverPools[poolIdx].format, format) | 
					
						
							|  |  |  | 				z.serverPools[poolIdx].erasureDisksMu.RUnlock() | 
					
						
							| 
									
										
										
										
											2020-09-05 08:09:02 +08:00
										 |  |  | 				if err != nil { | 
					
						
							|  |  |  | 					printEndpointError(endpoint, err, false) | 
					
						
							|  |  |  | 					continue | 
					
						
							| 
									
										
										
										
											2020-08-08 04:22:53 +08:00
										 |  |  | 				} | 
					
						
							| 
									
										
										
										
											2020-09-05 08:09:02 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-27 12:47:42 +08:00
										 |  |  | 				erasureSetInPoolDisksToHeal[poolIdx][setIndex] = append(erasureSetInPoolDisksToHeal[poolIdx][setIndex], disk) | 
					
						
							| 
									
										
										
										
											2020-08-08 04:22:53 +08:00
										 |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-15 04:07:07 +08:00
										 |  |  | 			buckets, _ := z.ListBuckets(ctx) | 
					
						
							| 
									
										
										
										
											2020-12-16 09:34:54 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 			buckets = append(buckets, BucketInfo{ | 
					
						
							|  |  |  | 				Name: pathJoin(minioMetaBucket, minioConfigPrefix), | 
					
						
							|  |  |  | 			}) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-09 08:12:17 +08:00
										 |  |  | 			// Buckets data are dispersed in multiple zones/sets, make
 | 
					
						
							|  |  |  | 			// sure to heal all bucket metadata configuration.
 | 
					
						
							|  |  |  | 			buckets = append(buckets, []BucketInfo{ | 
					
						
							|  |  |  | 				{Name: pathJoin(minioMetaBucket, bucketMetaPrefix)}, | 
					
						
							|  |  |  | 			}...) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-16 09:34:54 +08:00
										 |  |  | 			// Heal latest buckets first.
 | 
					
						
							|  |  |  | 			sort.Slice(buckets, func(i, j int) bool { | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 				a, b := strings.HasPrefix(buckets[i].Name, minioMetaBucket), strings.HasPrefix(buckets[j].Name, minioMetaBucket) | 
					
						
							|  |  |  | 				if a != b { | 
					
						
							|  |  |  | 					return a | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2020-12-16 09:34:54 +08:00
										 |  |  | 				return buckets[i].Created.After(buckets[j].Created) | 
					
						
							|  |  |  | 			}) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 			// TODO(klauspost): This will block until all heals are done,
 | 
					
						
							|  |  |  | 			// in the future this should be able to start healing other sets at once.
 | 
					
						
							|  |  |  | 			var wg sync.WaitGroup | 
					
						
							| 
									
										
										
										
											2021-01-27 12:47:42 +08:00
										 |  |  | 			for i, setMap := range erasureSetInPoolDisksToHeal { | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 				i := i | 
					
						
							| 
									
										
										
										
											2020-09-29 10:39:32 +08:00
										 |  |  | 				for setIndex, disks := range setMap { | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 					if len(disks) == 0 { | 
					
						
							|  |  |  | 						continue | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					wg.Add(1) | 
					
						
							|  |  |  | 					go func(setIndex int, disks []StorageAPI) { | 
					
						
							|  |  |  | 						defer wg.Done() | 
					
						
							|  |  |  | 						for _, disk := range disks { | 
					
						
							|  |  |  | 							logger.Info("Healing disk '%v' on %s pool", disk, humanize.Ordinal(i+1)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 							// So someone changed the drives underneath, healing tracker missing.
 | 
					
						
							|  |  |  | 							tracker, err := loadHealingTracker(ctx, disk) | 
					
						
							| 
									
										
										
										
											2020-11-19 01:34:46 +08:00
										 |  |  | 							if err != nil { | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 								logger.Info("Healing tracker missing on '%s', disk was swapped again on %s pool", disk, humanize.Ordinal(i+1)) | 
					
						
							|  |  |  | 								tracker = newHealingTracker(disk) | 
					
						
							| 
									
										
										
										
											2020-11-19 01:34:46 +08:00
										 |  |  | 							} | 
					
						
							| 
									
										
										
										
											2021-07-16 13:32:06 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 							// Load bucket totals
 | 
					
						
							|  |  |  | 							cache := dataUsageCache{} | 
					
						
							|  |  |  | 							if err := cache.load(ctx, z.serverPools[i].sets[setIndex], dataUsageCacheName); err == nil { | 
					
						
							|  |  |  | 								dataUsageInfo := cache.dui(dataUsageRoot, nil) | 
					
						
							|  |  |  | 								tracker.ObjectsTotalCount = dataUsageInfo.ObjectsTotalCount | 
					
						
							|  |  |  | 								tracker.ObjectsTotalSize = dataUsageInfo.ObjectsTotalSize | 
					
						
							|  |  |  | 							} | 
					
						
							| 
									
										
										
										
											2020-11-19 01:34:46 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 							tracker.PoolIndex, tracker.SetIndex, tracker.DiskIndex = disk.GetDiskLoc() | 
					
						
							|  |  |  | 							tracker.setQueuedBuckets(buckets) | 
					
						
							|  |  |  | 							if err := tracker.save(ctx); err != nil { | 
					
						
							| 
									
										
										
										
											2020-11-19 01:34:46 +08:00
										 |  |  | 								logger.LogIf(ctx, err) | 
					
						
							|  |  |  | 								// Unable to write healing tracker, permission denied or some
 | 
					
						
							|  |  |  | 								// other unexpected error occurred. Proceed to look for new
 | 
					
						
							|  |  |  | 								// disks to be healed again, we cannot proceed further.
 | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 								return | 
					
						
							| 
									
										
										
										
											2020-11-19 01:34:46 +08:00
										 |  |  | 							} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 							err = z.serverPools[i].sets[setIndex].healErasureSet(ctx, buckets, tracker) | 
					
						
							|  |  |  | 							if err != nil { | 
					
						
							|  |  |  | 								logger.LogIf(ctx, err) | 
					
						
							|  |  |  | 								continue | 
					
						
							|  |  |  | 							} | 
					
						
							| 
									
										
										
										
											2020-08-08 04:22:53 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 							logger.Info("Healing disk '%s' on %s pool complete", disk, humanize.Ordinal(i+1)) | 
					
						
							|  |  |  | 							var buf bytes.Buffer | 
					
						
							|  |  |  | 							tracker.printTo(&buf) | 
					
						
							|  |  |  | 							logger.Info("Summary:\n%s", buf.String()) | 
					
						
							|  |  |  | 							logger.LogIf(ctx, tracker.delete(ctx)) | 
					
						
							| 
									
										
										
										
											2020-09-25 00:53:38 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 							// Only upon success pop the healed disk.
 | 
					
						
							|  |  |  | 							globalBackgroundHealState.popHealLocalDisks(disk.Endpoint()) | 
					
						
							| 
									
										
										
										
											2020-09-29 10:39:32 +08:00
										 |  |  | 						} | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 					}(setIndex, disks) | 
					
						
							| 
									
										
										
										
											2019-11-20 09:42:27 +08:00
										 |  |  | 				} | 
					
						
							| 
									
										
										
										
											2019-10-29 01:27:49 +08:00
										 |  |  | 			} | 
					
						
							| 
									
										
										
										
											2021-03-05 06:36:23 +08:00
										 |  |  | 			wg.Wait() | 
					
						
							| 
									
										
										
										
											2019-10-29 01:27:49 +08:00
										 |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } |