mirror of https://github.com/minio/minio.git
				
				
				
			
		
			
				
	
	
		
			488 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			488 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Go
		
	
	
	
| // Copyright (c) 2015-2024 MinIO, Inc.
 | |
| //
 | |
| // This file is part of MinIO Object Storage stack
 | |
| //
 | |
| // This program is free software: you can redistribute it and/or modify
 | |
| // it under the terms of the GNU Affero General Public License as published by
 | |
| // the Free Software Foundation, either version 3 of the License, or
 | |
| // (at your option) any later version.
 | |
| //
 | |
| // This program is distributed in the hope that it will be useful
 | |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| // GNU Affero General Public License for more details.
 | |
| //
 | |
| // You should have received a copy of the GNU Affero General Public License
 | |
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | |
| 
 | |
| package cmd
 | |
| 
 | |
| import (
 | |
| 	"slices"
 | |
| 	"strings"
 | |
| 
 | |
| 	"github.com/prometheus/client_golang/prometheus"
 | |
| 	"github.com/prometheus/client_golang/prometheus/collectors"
 | |
| )
 | |
| 
 | |
| // Collector paths.
 | |
| //
 | |
| // These are paths under the top-level /minio/metrics/v3 metrics endpoint. Each
 | |
| // of these paths returns a set of V3 metrics.
 | |
| //
 | |
| // Per-bucket metrics endpoints always start with /bucket and the bucket name is
 | |
| // appended to the path. e.g. if the collector path is /bucket/api, the endpoint
 | |
| // for the bucket "mybucket" would be /minio/metrics/v3/bucket/api/mybucket
 | |
| const (
 | |
| 	apiRequestsCollectorPath collectorPath = "/api/requests"
 | |
| 
 | |
| 	bucketAPICollectorPath         collectorPath = "/bucket/api"
 | |
| 	bucketReplicationCollectorPath collectorPath = "/bucket/replication"
 | |
| 
 | |
| 	systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode"
 | |
| 	systemDriveCollectorPath            collectorPath = "/system/drive"
 | |
| 	systemMemoryCollectorPath           collectorPath = "/system/memory"
 | |
| 	systemCPUCollectorPath              collectorPath = "/system/cpu"
 | |
| 	systemProcessCollectorPath          collectorPath = "/system/process"
 | |
| 
 | |
| 	debugGoCollectorPath collectorPath = "/debug/go"
 | |
| 
 | |
| 	clusterHealthCollectorPath       collectorPath = "/cluster/health"
 | |
| 	clusterUsageObjectsCollectorPath collectorPath = "/cluster/usage/objects"
 | |
| 	clusterUsageBucketsCollectorPath collectorPath = "/cluster/usage/buckets"
 | |
| 	clusterErasureSetCollectorPath   collectorPath = "/cluster/erasure-set"
 | |
| 	clusterIAMCollectorPath          collectorPath = "/cluster/iam"
 | |
| 	clusterConfigCollectorPath       collectorPath = "/cluster/config"
 | |
| 
 | |
| 	ilmCollectorPath           collectorPath = "/ilm"
 | |
| 	auditCollectorPath         collectorPath = "/audit"
 | |
| 	loggerWebhookCollectorPath collectorPath = "/logger/webhook"
 | |
| 	replicationCollectorPath   collectorPath = "/replication"
 | |
| 	notificationCollectorPath  collectorPath = "/notification"
 | |
| 	scannerCollectorPath       collectorPath = "/scanner"
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	clusterBasePath = "/cluster"
 | |
| )
 | |
| 
 | |
| type metricsV3Collection struct {
 | |
| 	mgMap       map[collectorPath]*MetricsGroup
 | |
| 	bucketMGMap map[collectorPath]*MetricsGroup
 | |
| 
 | |
| 	// Gatherers for non-bucket MetricsGroup's
 | |
| 	mgGatherers map[collectorPath]prometheus.Gatherer
 | |
| 
 | |
| 	collectorPaths []collectorPath
 | |
| }
 | |
| 
 | |
| func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
 | |
| 	// Create all metric groups.
 | |
| 	apiRequestsMG := NewMetricsGroup(apiRequestsCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			apiRejectedAuthTotalMD,
 | |
| 			apiRejectedHeaderTotalMD,
 | |
| 			apiRejectedTimestampTotalMD,
 | |
| 			apiRejectedInvalidTotalMD,
 | |
| 
 | |
| 			apiRequestsWaitingTotalMD,
 | |
| 			apiRequestsIncomingTotalMD,
 | |
| 			apiRequestsInFlightTotalMD,
 | |
| 			apiRequestsTotalMD,
 | |
| 			apiRequestsErrorsTotalMD,
 | |
| 			apiRequests5xxErrorsTotalMD,
 | |
| 			apiRequests4xxErrorsTotalMD,
 | |
| 			apiRequestsCanceledTotalMD,
 | |
| 
 | |
| 			apiRequestsTTFBSecondsDistributionMD,
 | |
| 
 | |
| 			apiTrafficSentBytesMD,
 | |
| 			apiTrafficRecvBytesMD,
 | |
| 		},
 | |
| 		JoinLoaders(loadAPIRequestsHTTPMetrics, loadAPIRequestsTTFBMetrics,
 | |
| 			loadAPIRequestsNetworkMetrics),
 | |
| 	)
 | |
| 
 | |
| 	bucketAPIMG := NewBucketMetricsGroup(bucketAPICollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			bucketAPITrafficRecvBytesMD,
 | |
| 			bucketAPITrafficSentBytesMD,
 | |
| 
 | |
| 			bucketAPIRequestsInFlightMD,
 | |
| 			bucketAPIRequestsTotalMD,
 | |
| 			bucketAPIRequestsCanceledMD,
 | |
| 			bucketAPIRequests4xxErrorsMD,
 | |
| 			bucketAPIRequests5xxErrorsMD,
 | |
| 
 | |
| 			bucketAPIRequestsTTFBSecondsDistributionMD,
 | |
| 		},
 | |
| 		JoinBucketLoaders(loadBucketAPIHTTPMetrics, loadBucketAPITTFBMetrics),
 | |
| 	)
 | |
| 
 | |
| 	bucketReplicationMG := NewBucketMetricsGroup(bucketReplicationCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			bucketReplLastHrFailedBytesMD,
 | |
| 			bucketReplLastHrFailedCountMD,
 | |
| 			bucketReplLastMinFailedBytesMD,
 | |
| 			bucketReplLastMinFailedCountMD,
 | |
| 			bucketReplLatencyMsMD,
 | |
| 			bucketReplProxiedDeleteTaggingRequestsTotalMD,
 | |
| 			bucketReplProxiedGetRequestsFailuresMD,
 | |
| 			bucketReplProxiedGetRequestsTotalMD,
 | |
| 			bucketReplProxiedGetTaggingRequestsFailuresMD,
 | |
| 			bucketReplProxiedGetTaggingRequestsTotalMD,
 | |
| 			bucketReplProxiedHeadRequestsFailuresMD,
 | |
| 			bucketReplProxiedHeadRequestsTotalMD,
 | |
| 			bucketReplProxiedPutTaggingRequestsFailuresMD,
 | |
| 			bucketReplProxiedPutTaggingRequestsTotalMD,
 | |
| 			bucketReplSentBytesMD,
 | |
| 			bucketReplSentCountMD,
 | |
| 			bucketReplTotalFailedBytesMD,
 | |
| 			bucketReplTotalFailedCountMD,
 | |
| 			bucketReplProxiedDeleteTaggingRequestsFailuresMD,
 | |
| 		},
 | |
| 		loadBucketReplicationMetrics,
 | |
| 	)
 | |
| 
 | |
| 	systemNetworkInternodeMG := NewMetricsGroup(systemNetworkInternodeCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			internodeErrorsTotalMD,
 | |
| 			internodeDialedErrorsTotalMD,
 | |
| 			internodeDialAvgTimeNanosMD,
 | |
| 			internodeSentBytesTotalMD,
 | |
| 			internodeRecvBytesTotalMD,
 | |
| 		},
 | |
| 		loadNetworkInternodeMetrics,
 | |
| 	)
 | |
| 
 | |
| 	systemMemoryMG := NewMetricsGroup(systemMemoryCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			memTotalMD,
 | |
| 			memUsedMD,
 | |
| 			memFreeMD,
 | |
| 			memAvailableMD,
 | |
| 			memBuffersMD,
 | |
| 			memCacheMD,
 | |
| 			memSharedMD,
 | |
| 			memUsedPercMD,
 | |
| 		},
 | |
| 		loadMemoryMetrics,
 | |
| 	)
 | |
| 
 | |
| 	systemCPUMG := NewMetricsGroup(systemCPUCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			sysCPUAvgIdleMD,
 | |
| 			sysCPUAvgIOWaitMD,
 | |
| 			sysCPULoadMD,
 | |
| 			sysCPULoadPercMD,
 | |
| 			sysCPUNiceMD,
 | |
| 			sysCPUStealMD,
 | |
| 			sysCPUSystemMD,
 | |
| 			sysCPUUserMD,
 | |
| 		},
 | |
| 		loadCPUMetrics,
 | |
| 	)
 | |
| 
 | |
| 	systemProcessMG := NewMetricsGroup(systemProcessCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			processLocksReadTotalMD,
 | |
| 			processLocksWriteTotalMD,
 | |
| 			processCPUTotalSecondsMD,
 | |
| 			processGoRoutineTotalMD,
 | |
| 			processIORCharBytesMD,
 | |
| 			processIOReadBytesMD,
 | |
| 			processIOWCharBytesMD,
 | |
| 			processIOWriteBytesMD,
 | |
| 			processStarttimeSecondsMD,
 | |
| 			processUptimeSecondsMD,
 | |
| 			processFileDescriptorLimitTotalMD,
 | |
| 			processFileDescriptorOpenTotalMD,
 | |
| 			processSyscallReadTotalMD,
 | |
| 			processSyscallWriteTotalMD,
 | |
| 			processResidentMemoryBytesMD,
 | |
| 			processVirtualMemoryBytesMD,
 | |
| 			processVirtualMemoryMaxBytesMD,
 | |
| 		},
 | |
| 		loadProcessMetrics,
 | |
| 	)
 | |
| 
 | |
| 	systemDriveMG := NewMetricsGroup(systemDriveCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			driveUsedBytesMD,
 | |
| 			driveFreeBytesMD,
 | |
| 			driveTotalBytesMD,
 | |
| 			driveUsedInodesMD,
 | |
| 			driveFreeInodesMD,
 | |
| 			driveTotalInodesMD,
 | |
| 			driveTimeoutErrorsMD,
 | |
| 			driveIOErrorsMD,
 | |
| 			driveAvailabilityErrorsMD,
 | |
| 			driveWaitingIOMD,
 | |
| 			driveAPILatencyMD,
 | |
| 			driveHealthMD,
 | |
| 
 | |
| 			driveOfflineCountMD,
 | |
| 			driveOnlineCountMD,
 | |
| 			driveCountMD,
 | |
| 
 | |
| 			// iostat related
 | |
| 			driveReadsPerSecMD,
 | |
| 			driveReadsKBPerSecMD,
 | |
| 			driveReadsAwaitMD,
 | |
| 			driveWritesPerSecMD,
 | |
| 			driveWritesKBPerSecMD,
 | |
| 			driveWritesAwaitMD,
 | |
| 			drivePercUtilMD,
 | |
| 		},
 | |
| 		loadDriveMetrics,
 | |
| 	)
 | |
| 
 | |
| 	clusterHealthMG := NewMetricsGroup(clusterHealthCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			healthDrivesOfflineCountMD,
 | |
| 			healthDrivesOnlineCountMD,
 | |
| 			healthDrivesCountMD,
 | |
| 
 | |
| 			healthNodesOfflineCountMD,
 | |
| 			healthNodesOnlineCountMD,
 | |
| 
 | |
| 			healthCapacityRawTotalBytesMD,
 | |
| 			healthCapacityRawFreeBytesMD,
 | |
| 			healthCapacityUsableTotalBytesMD,
 | |
| 			healthCapacityUsableFreeBytesMD,
 | |
| 		},
 | |
| 		JoinLoaders(loadClusterHealthDriveMetrics,
 | |
| 			loadClusterHealthNodeMetrics,
 | |
| 			loadClusterHealthCapacityMetrics),
 | |
| 	)
 | |
| 
 | |
| 	clusterUsageObjectsMG := NewMetricsGroup(clusterUsageObjectsCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			usageSinceLastUpdateSecondsMD,
 | |
| 			usageTotalBytesMD,
 | |
| 			usageObjectsCountMD,
 | |
| 			usageVersionsCountMD,
 | |
| 			usageDeleteMarkersCountMD,
 | |
| 			usageBucketsCountMD,
 | |
| 			usageObjectsDistributionMD,
 | |
| 			usageVersionsDistributionMD,
 | |
| 		},
 | |
| 		loadClusterUsageObjectMetrics,
 | |
| 	)
 | |
| 
 | |
| 	clusterUsageBucketsMG := NewMetricsGroup(clusterUsageBucketsCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			usageSinceLastUpdateSecondsMD,
 | |
| 			usageBucketTotalBytesMD,
 | |
| 			usageBucketObjectsTotalMD,
 | |
| 			usageBucketVersionsCountMD,
 | |
| 			usageBucketDeleteMarkersCountMD,
 | |
| 			usageBucketQuotaTotalBytesMD,
 | |
| 			usageBucketObjectSizeDistributionMD,
 | |
| 			usageBucketObjectVersionCountDistributionMD,
 | |
| 		},
 | |
| 		loadClusterUsageBucketMetrics,
 | |
| 	)
 | |
| 
 | |
| 	clusterErasureSetMG := NewMetricsGroup(clusterErasureSetCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			erasureSetOverallWriteQuorumMD,
 | |
| 			erasureSetOverallHealthMD,
 | |
| 			erasureSetReadQuorumMD,
 | |
| 			erasureSetWriteQuorumMD,
 | |
| 			erasureSetOnlineDrivesCountMD,
 | |
| 			erasureSetHealingDrivesCountMD,
 | |
| 			erasureSetHealthMD,
 | |
| 			erasureSetReadToleranceMD,
 | |
| 			erasureSetWriteToleranceMD,
 | |
| 			erasureSetReadHealthMD,
 | |
| 			erasureSetWriteHealthMD,
 | |
| 		},
 | |
| 		loadClusterErasureSetMetrics,
 | |
| 	)
 | |
| 
 | |
| 	clusterNotificationMG := NewMetricsGroup(notificationCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			notificationCurrentSendInProgressMD,
 | |
| 			notificationEventsErrorsTotalMD,
 | |
| 			notificationEventsSentTotalMD,
 | |
| 			notificationEventsSkippedTotalMD,
 | |
| 		},
 | |
| 		loadClusterNotificationMetrics,
 | |
| 	)
 | |
| 
 | |
| 	clusterIAMMG := NewMetricsGroup(clusterIAMCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			lastSyncDurationMillisMD,
 | |
| 			pluginAuthnServiceFailedRequestsMinuteMD,
 | |
| 			pluginAuthnServiceLastFailSecondsMD,
 | |
| 			pluginAuthnServiceLastSuccSecondsMD,
 | |
| 			pluginAuthnServiceSuccAvgRttMsMinuteMD,
 | |
| 			pluginAuthnServiceSuccMaxRttMsMinuteMD,
 | |
| 			pluginAuthnServiceTotalRequestsMinuteMD,
 | |
| 			sinceLastSyncMillisMD,
 | |
| 			syncFailuresMD,
 | |
| 			syncSuccessesMD,
 | |
| 		},
 | |
| 		loadClusterIAMMetrics,
 | |
| 	)
 | |
| 
 | |
| 	clusterReplicationMG := NewMetricsGroup(replicationCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			replicationAverageActiveWorkersMD,
 | |
| 			replicationAverageQueuedBytesMD,
 | |
| 			replicationAverageQueuedCountMD,
 | |
| 			replicationAverageDataTransferRateMD,
 | |
| 			replicationCurrentActiveWorkersMD,
 | |
| 			replicationCurrentDataTransferRateMD,
 | |
| 			replicationLastMinuteQueuedBytesMD,
 | |
| 			replicationLastMinuteQueuedCountMD,
 | |
| 			replicationMaxActiveWorkersMD,
 | |
| 			replicationMaxQueuedBytesMD,
 | |
| 			replicationMaxQueuedCountMD,
 | |
| 			replicationMaxDataTransferRateMD,
 | |
| 			replicationRecentBacklogCountMD,
 | |
| 		},
 | |
| 		loadClusterReplicationMetrics,
 | |
| 	)
 | |
| 
 | |
| 	clusterConfigMG := NewMetricsGroup(clusterConfigCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			configRRSParityMD,
 | |
| 			configStandardParityMD,
 | |
| 		},
 | |
| 		loadClusterConfigMetrics,
 | |
| 	)
 | |
| 
 | |
| 	scannerMG := NewMetricsGroup(scannerCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			scannerBucketScansFinishedMD,
 | |
| 			scannerBucketScansStartedMD,
 | |
| 			scannerDirectoriesScannedMD,
 | |
| 			scannerObjectsScannedMD,
 | |
| 			scannerVersionsScannedMD,
 | |
| 			scannerLastActivitySecondsMD,
 | |
| 		},
 | |
| 		loadClusterScannerMetrics,
 | |
| 	)
 | |
| 
 | |
| 	loggerWebhookMG := NewMetricsGroup(loggerWebhookCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			webhookFailedMessagesMD,
 | |
| 			webhookQueueLengthMD,
 | |
| 			webhookTotalMessagesMD,
 | |
| 		},
 | |
| 		loadLoggerWebhookMetrics,
 | |
| 	)
 | |
| 
 | |
| 	auditMG := NewMetricsGroup(auditCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			auditFailedMessagesMD,
 | |
| 			auditTargetQueueLengthMD,
 | |
| 			auditTotalMessagesMD,
 | |
| 		},
 | |
| 		loadAuditMetrics,
 | |
| 	)
 | |
| 
 | |
| 	ilmMG := NewMetricsGroup(ilmCollectorPath,
 | |
| 		[]MetricDescriptor{
 | |
| 			ilmExpiryPendingTasksMD,
 | |
| 			ilmTransitionActiveTasksMD,
 | |
| 			ilmTransitionPendingTasksMD,
 | |
| 			ilmTransitionMissedImmediateTasksMD,
 | |
| 			ilmVersionsScannedMD,
 | |
| 		},
 | |
| 		loadILMMetrics,
 | |
| 	)
 | |
| 
 | |
| 	allMetricGroups := []*MetricsGroup{
 | |
| 		apiRequestsMG,
 | |
| 		bucketAPIMG,
 | |
| 		bucketReplicationMG,
 | |
| 
 | |
| 		systemNetworkInternodeMG,
 | |
| 		systemDriveMG,
 | |
| 		systemMemoryMG,
 | |
| 		systemCPUMG,
 | |
| 		systemProcessMG,
 | |
| 
 | |
| 		clusterHealthMG,
 | |
| 		clusterUsageObjectsMG,
 | |
| 		clusterUsageBucketsMG,
 | |
| 		clusterErasureSetMG,
 | |
| 		clusterNotificationMG,
 | |
| 		clusterIAMMG,
 | |
| 		clusterReplicationMG,
 | |
| 		clusterConfigMG,
 | |
| 
 | |
| 		ilmMG,
 | |
| 		scannerMG,
 | |
| 		auditMG,
 | |
| 		loggerWebhookMG,
 | |
| 	}
 | |
| 
 | |
| 	// Bucket metrics are special, they always include the bucket label. These
 | |
| 	// metrics required a list of buckets to be passed to the loader, and the list
 | |
| 	// of buckets is not known until the request is made. So we keep a separate
 | |
| 	// map for bucket metrics and handle them specially.
 | |
| 
 | |
| 	// Add the serverName and poolIndex labels to all non-cluster metrics.
 | |
| 	//
 | |
| 	// Also create metric group maps and set the cache.
 | |
| 	metricsCache := newMetricsCache()
 | |
| 	mgMap := make(map[collectorPath]*MetricsGroup)
 | |
| 	bucketMGMap := make(map[collectorPath]*MetricsGroup)
 | |
| 	for _, mg := range allMetricGroups {
 | |
| 		if !strings.HasPrefix(string(mg.CollectorPath), clusterBasePath) {
 | |
| 			mg.AddExtraLabels(
 | |
| 				serverName, globalLocalNodeName,
 | |
| 				// poolIndex, strconv.Itoa(globalLocalPoolIdx),
 | |
| 			)
 | |
| 		}
 | |
| 		mg.SetCache(metricsCache)
 | |
| 		if mg.IsBucketMetricsGroup() {
 | |
| 			bucketMGMap[mg.CollectorPath] = mg
 | |
| 		} else {
 | |
| 			mgMap[mg.CollectorPath] = mg
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Prepare to register the collectors. Other than `MetricGroup` collectors,
 | |
| 	// we also have standard collectors like `GoCollector`.
 | |
| 
 | |
| 	// Create all Non-`MetricGroup` collectors here.
 | |
| 	collectors := map[collectorPath]prometheus.Collector{
 | |
| 		debugGoCollectorPath: collectors.NewGoCollector(),
 | |
| 	}
 | |
| 
 | |
| 	// Add all `MetricGroup` collectors to the map.
 | |
| 	for _, mg := range allMetricGroups {
 | |
| 		collectors[mg.CollectorPath] = mg
 | |
| 	}
 | |
| 
 | |
| 	// Helper function to register a collector and return a gatherer for it.
 | |
| 	mustRegister := func(c ...prometheus.Collector) prometheus.Gatherer {
 | |
| 		subRegistry := prometheus.NewRegistry()
 | |
| 		for _, col := range c {
 | |
| 			subRegistry.MustRegister(col)
 | |
| 		}
 | |
| 		r.MustRegister(subRegistry)
 | |
| 		return subRegistry
 | |
| 	}
 | |
| 
 | |
| 	// Register all collectors and create gatherers for them.
 | |
| 	gatherers := make(map[collectorPath]prometheus.Gatherer, len(collectors))
 | |
| 	collectorPaths := make([]collectorPath, 0, len(collectors))
 | |
| 	for path, collector := range collectors {
 | |
| 		gatherers[path] = mustRegister(collector)
 | |
| 		collectorPaths = append(collectorPaths, path)
 | |
| 	}
 | |
| 	slices.Sort(collectorPaths)
 | |
| 	return &metricsV3Collection{
 | |
| 		mgMap:          mgMap,
 | |
| 		bucketMGMap:    bucketMGMap,
 | |
| 		mgGatherers:    gatherers,
 | |
| 		collectorPaths: collectorPaths,
 | |
| 	}
 | |
| }
 |