| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | // Copyright (c) 2015-2024 MinIO, Inc.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This file is part of MinIO Object Storage stack
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is free software: you can redistribute it and/or modify
 | 
					
						
							|  |  |  | // it under the terms of the GNU Affero General Public License as published by
 | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or
 | 
					
						
							|  |  |  | // (at your option) any later version.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is distributed in the hope that it will be useful
 | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					
						
							|  |  |  | // GNU Affero General Public License for more details.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // You should have received a copy of the GNU Affero General Public License
 | 
					
						
							|  |  |  | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package cmd | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							| 
									
										
										
										
											2024-05-09 08:51:34 +08:00
										 |  |  | 	"context" | 
					
						
							| 
									
										
										
										
											2024-04-12 01:46:34 +08:00
										 |  |  | 	"sync" | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | 	"time" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	"github.com/minio/madmin-go/v3" | 
					
						
							|  |  |  | 	"github.com/minio/minio/internal/cachevalue" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // metricsCache - cache for metrics.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // When serving metrics, this cache is passed to the MetricsLoaderFn.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This cache is used for metrics that would result in network/storage calls.
 | 
					
						
							|  |  |  | type metricsCache struct { | 
					
						
							|  |  |  | 	dataUsageInfo       *cachevalue.Cache[DataUsageInfo] | 
					
						
							|  |  |  | 	esetHealthResult    *cachevalue.Cache[HealthResult] | 
					
						
							|  |  |  | 	driveMetrics        *cachevalue.Cache[storageMetrics] | 
					
						
							| 
									
										
										
										
											2024-04-17 13:10:25 +08:00
										 |  |  | 	memoryMetrics       *cachevalue.Cache[madmin.MemInfo] | 
					
						
							| 
									
										
										
										
											2024-04-24 07:56:12 +08:00
										 |  |  | 	cpuMetrics          *cachevalue.Cache[madmin.CPUMetrics] | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | 	clusterDriveMetrics *cachevalue.Cache[storageMetrics] | 
					
						
							|  |  |  | 	nodesUpDown         *cachevalue.Cache[nodesOnline] | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func newMetricsCache() *metricsCache { | 
					
						
							|  |  |  | 	return &metricsCache{ | 
					
						
							|  |  |  | 		dataUsageInfo:       newDataUsageInfoCache(), | 
					
						
							|  |  |  | 		esetHealthResult:    newESetHealthResultCache(), | 
					
						
							|  |  |  | 		driveMetrics:        newDriveMetricsCache(), | 
					
						
							| 
									
										
										
										
											2024-04-17 13:10:25 +08:00
										 |  |  | 		memoryMetrics:       newMemoryMetricsCache(), | 
					
						
							| 
									
										
										
										
											2024-04-24 07:56:12 +08:00
										 |  |  | 		cpuMetrics:          newCPUMetricsCache(), | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | 		clusterDriveMetrics: newClusterStorageInfoCache(), | 
					
						
							|  |  |  | 		nodesUpDown:         newNodesUpDownCache(), | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type nodesOnline struct { | 
					
						
							|  |  |  | 	Online, Offline int | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func newNodesUpDownCache() *cachevalue.Cache[nodesOnline] { | 
					
						
							| 
									
										
										
										
											2024-05-09 08:51:34 +08:00
										 |  |  | 	loadNodesUpDown := func(ctx context.Context) (v nodesOnline, err error) { | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | 		v.Online, v.Offline = globalNotificationSys.GetPeerOnlineCount() | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return cachevalue.NewFromFunc(1*time.Minute, | 
					
						
							|  |  |  | 		cachevalue.Opts{ReturnLastGood: true}, | 
					
						
							|  |  |  | 		loadNodesUpDown) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-12 01:46:34 +08:00
										 |  |  | type driveIOStatMetrics struct { | 
					
						
							|  |  |  | 	readsPerSec    float64 | 
					
						
							|  |  |  | 	readsKBPerSec  float64 | 
					
						
							|  |  |  | 	readsAwait     float64 | 
					
						
							|  |  |  | 	writesPerSec   float64 | 
					
						
							|  |  |  | 	writesKBPerSec float64 | 
					
						
							|  |  |  | 	writesAwait    float64 | 
					
						
							|  |  |  | 	percUtil       float64 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // storageMetrics - cached storage metrics.
 | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | type storageMetrics struct { | 
					
						
							|  |  |  | 	storageInfo                              madmin.StorageInfo | 
					
						
							| 
									
										
										
										
											2024-04-12 01:46:34 +08:00
										 |  |  | 	ioStats                                  map[string]driveIOStatMetrics | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | 	onlineDrives, offlineDrives, totalDrives int | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func newDataUsageInfoCache() *cachevalue.Cache[DataUsageInfo] { | 
					
						
							| 
									
										
										
										
											2024-05-09 08:51:34 +08:00
										 |  |  | 	loadDataUsage := func(ctx context.Context) (u DataUsageInfo, err error) { | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | 		objLayer := newObjectLayerFn() | 
					
						
							|  |  |  | 		if objLayer == nil { | 
					
						
							|  |  |  | 			return | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// Collect cluster level object metrics.
 | 
					
						
							|  |  |  | 		u, err = loadDataUsageFromBackend(GlobalContext, objLayer) | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return cachevalue.NewFromFunc(1*time.Minute, | 
					
						
							|  |  |  | 		cachevalue.Opts{ReturnLastGood: true}, | 
					
						
							|  |  |  | 		loadDataUsage) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func newESetHealthResultCache() *cachevalue.Cache[HealthResult] { | 
					
						
							| 
									
										
										
										
											2024-05-09 08:51:34 +08:00
										 |  |  | 	loadHealth := func(ctx context.Context) (r HealthResult, err error) { | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | 		objLayer := newObjectLayerFn() | 
					
						
							|  |  |  | 		if objLayer == nil { | 
					
						
							|  |  |  | 			return | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		r = objLayer.Health(GlobalContext, HealthOptions{}) | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return cachevalue.NewFromFunc(1*time.Minute, | 
					
						
							|  |  |  | 		cachevalue.Opts{ReturnLastGood: true}, | 
					
						
							|  |  |  | 		loadHealth, | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-12 01:46:34 +08:00
										 |  |  | func getDiffStats(initialStats, currentStats madmin.DiskIOStats) madmin.DiskIOStats { | 
					
						
							|  |  |  | 	return madmin.DiskIOStats{ | 
					
						
							|  |  |  | 		ReadIOs:      currentStats.ReadIOs - initialStats.ReadIOs, | 
					
						
							|  |  |  | 		WriteIOs:     currentStats.WriteIOs - initialStats.WriteIOs, | 
					
						
							|  |  |  | 		ReadSectors:  currentStats.ReadSectors - initialStats.ReadSectors, | 
					
						
							|  |  |  | 		WriteSectors: currentStats.WriteSectors - initialStats.WriteSectors, | 
					
						
							|  |  |  | 		ReadTicks:    currentStats.ReadTicks - initialStats.ReadTicks, | 
					
						
							|  |  |  | 		WriteTicks:   currentStats.WriteTicks - initialStats.WriteTicks, | 
					
						
							|  |  |  | 		TotalTicks:   currentStats.TotalTicks - initialStats.TotalTicks, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func getDriveIOStatMetrics(ioStats madmin.DiskIOStats, duration time.Duration) (m driveIOStatMetrics) { | 
					
						
							|  |  |  | 	durationSecs := duration.Seconds() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	m.readsPerSec = float64(ioStats.ReadIOs) / durationSecs | 
					
						
							|  |  |  | 	m.readsKBPerSec = float64(ioStats.ReadSectors) * float64(sectorSize) / kib / durationSecs | 
					
						
							|  |  |  | 	if ioStats.ReadIOs > 0 { | 
					
						
							|  |  |  | 		m.readsAwait = float64(ioStats.ReadTicks) / float64(ioStats.ReadIOs) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	m.writesPerSec = float64(ioStats.WriteIOs) / durationSecs | 
					
						
							|  |  |  | 	m.writesKBPerSec = float64(ioStats.WriteSectors) * float64(sectorSize) / kib / durationSecs | 
					
						
							|  |  |  | 	if ioStats.WriteIOs > 0 { | 
					
						
							|  |  |  | 		m.writesAwait = float64(ioStats.WriteTicks) / float64(ioStats.WriteIOs) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// TotalTicks is in milliseconds
 | 
					
						
							|  |  |  | 	m.percUtil = float64(ioStats.TotalTicks) * 100 / (durationSecs * 1000) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | func newDriveMetricsCache() *cachevalue.Cache[storageMetrics] { | 
					
						
							| 
									
										
										
										
											2024-04-12 01:46:34 +08:00
										 |  |  | 	var ( | 
					
						
							|  |  |  | 		// prevDriveIOStats is used to calculate "per second"
 | 
					
						
							|  |  |  | 		// values for IOStat related disk metrics e.g. reads/sec.
 | 
					
						
							|  |  |  | 		prevDriveIOStats            map[string]madmin.DiskIOStats | 
					
						
							|  |  |  | 		prevDriveIOStatsMu          sync.RWMutex | 
					
						
							|  |  |  | 		prevDriveIOStatsRefreshedAt time.Time | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-09 08:51:34 +08:00
										 |  |  | 	loadDriveMetrics := func(ctx context.Context) (v storageMetrics, err error) { | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | 		objLayer := newObjectLayerFn() | 
					
						
							|  |  |  | 		if objLayer == nil { | 
					
						
							|  |  |  | 			return | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		storageInfo := objLayer.LocalStorageInfo(GlobalContext, true) | 
					
						
							|  |  |  | 		onlineDrives, offlineDrives := getOnlineOfflineDisksStats(storageInfo.Disks) | 
					
						
							|  |  |  | 		totalDrives := onlineDrives.Merge(offlineDrives) | 
					
						
							| 
									
										
										
										
											2024-04-12 01:46:34 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | 		v = storageMetrics{ | 
					
						
							|  |  |  | 			storageInfo:   storageInfo, | 
					
						
							|  |  |  | 			onlineDrives:  onlineDrives.Sum(), | 
					
						
							|  |  |  | 			offlineDrives: offlineDrives.Sum(), | 
					
						
							|  |  |  | 			totalDrives:   totalDrives.Sum(), | 
					
						
							| 
									
										
										
										
											2024-04-12 01:46:34 +08:00
										 |  |  | 			ioStats:       map[string]driveIOStatMetrics{}, | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2024-04-12 01:46:34 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		currentStats := getCurrentDriveIOStats() | 
					
						
							|  |  |  | 		now := time.Now().UTC() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		prevDriveIOStatsMu.Lock() | 
					
						
							|  |  |  | 		if prevDriveIOStats != nil { | 
					
						
							|  |  |  | 			duration := now.Sub(prevDriveIOStatsRefreshedAt) | 
					
						
							|  |  |  | 			if duration.Seconds() > 1 { | 
					
						
							|  |  |  | 				for d, cs := range currentStats { | 
					
						
							|  |  |  | 					if ps, found := prevDriveIOStats[d]; found { | 
					
						
							|  |  |  | 						v.ioStats[d] = getDriveIOStatMetrics(getDiffStats(ps, cs), duration) | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		prevDriveIOStats = currentStats | 
					
						
							|  |  |  | 		prevDriveIOStatsRefreshedAt = now | 
					
						
							|  |  |  | 		prevDriveIOStatsMu.Unlock() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-04-12 01:46:34 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | 	return cachevalue.NewFromFunc(1*time.Minute, | 
					
						
							|  |  |  | 		cachevalue.Opts{ReturnLastGood: true}, | 
					
						
							|  |  |  | 		loadDriveMetrics) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-24 07:56:12 +08:00
										 |  |  | func newCPUMetricsCache() *cachevalue.Cache[madmin.CPUMetrics] { | 
					
						
							| 
									
										
										
										
											2024-05-09 08:51:34 +08:00
										 |  |  | 	loadCPUMetrics := func(ctx context.Context) (v madmin.CPUMetrics, err error) { | 
					
						
							| 
									
										
										
										
											2024-04-24 07:56:12 +08:00
										 |  |  | 		var types madmin.MetricType = madmin.MetricsCPU | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		m := collectLocalMetrics(types, collectMetricsOpts{ | 
					
						
							|  |  |  | 			hosts: map[string]struct{}{ | 
					
						
							|  |  |  | 				globalLocalNodeName: {}, | 
					
						
							|  |  |  | 			}, | 
					
						
							|  |  |  | 		}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		for _, hm := range m.ByHost { | 
					
						
							|  |  |  | 			if hm.CPU != nil { | 
					
						
							|  |  |  | 				v = *hm.CPU | 
					
						
							|  |  |  | 				break | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return cachevalue.NewFromFunc(1*time.Minute, | 
					
						
							|  |  |  | 		cachevalue.Opts{ReturnLastGood: true}, | 
					
						
							|  |  |  | 		loadCPUMetrics) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-17 13:10:25 +08:00
										 |  |  | func newMemoryMetricsCache() *cachevalue.Cache[madmin.MemInfo] { | 
					
						
							| 
									
										
										
										
											2024-05-09 08:51:34 +08:00
										 |  |  | 	loadMemoryMetrics := func(ctx context.Context) (v madmin.MemInfo, err error) { | 
					
						
							| 
									
										
										
										
											2024-04-17 13:10:25 +08:00
										 |  |  | 		var types madmin.MetricType = madmin.MetricsMem | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		m := collectLocalMetrics(types, collectMetricsOpts{ | 
					
						
							|  |  |  | 			hosts: map[string]struct{}{ | 
					
						
							|  |  |  | 				globalLocalNodeName: {}, | 
					
						
							|  |  |  | 			}, | 
					
						
							|  |  |  | 		}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		for _, hm := range m.ByHost { | 
					
						
							|  |  |  | 			if hm.Mem != nil && len(hm.Mem.Info.Addr) > 0 { | 
					
						
							|  |  |  | 				v = hm.Mem.Info | 
					
						
							|  |  |  | 				break | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return cachevalue.NewFromFunc(1*time.Minute, | 
					
						
							|  |  |  | 		cachevalue.Opts{ReturnLastGood: true}, | 
					
						
							|  |  |  | 		loadMemoryMetrics) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | func newClusterStorageInfoCache() *cachevalue.Cache[storageMetrics] { | 
					
						
							| 
									
										
										
										
											2024-05-09 08:51:34 +08:00
										 |  |  | 	loadStorageInfo := func(ctx context.Context) (v storageMetrics, err error) { | 
					
						
							| 
									
										
										
										
											2024-03-10 17:15:15 +08:00
										 |  |  | 		objLayer := newObjectLayerFn() | 
					
						
							|  |  |  | 		if objLayer == nil { | 
					
						
							|  |  |  | 			return storageMetrics{}, nil | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		storageInfo := objLayer.StorageInfo(GlobalContext, true) | 
					
						
							|  |  |  | 		onlineDrives, offlineDrives := getOnlineOfflineDisksStats(storageInfo.Disks) | 
					
						
							|  |  |  | 		totalDrives := onlineDrives.Merge(offlineDrives) | 
					
						
							|  |  |  | 		v = storageMetrics{ | 
					
						
							|  |  |  | 			storageInfo:   storageInfo, | 
					
						
							|  |  |  | 			onlineDrives:  onlineDrives.Sum(), | 
					
						
							|  |  |  | 			offlineDrives: offlineDrives.Sum(), | 
					
						
							|  |  |  | 			totalDrives:   totalDrives.Sum(), | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return cachevalue.NewFromFunc(1*time.Minute, | 
					
						
							|  |  |  | 		cachevalue.Opts{ReturnLastGood: true}, | 
					
						
							|  |  |  | 		loadStorageInfo, | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | } |