Feat: Application Status Metrics & Structured Logs (#6857)

Feat: Application Status Metrics & Structured Logs

Signed-off-by: Brian Kane <briankane1@gmail.com>
This commit is contained in:
Brian Kane 2025-08-20 23:07:13 +01:00 committed by GitHub
parent 721c75e44a
commit 1a934e1618
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 817 additions and 0 deletions

View File

@ -99,6 +99,7 @@ helm install --create-namespace -n vela-system kubevela kubevela/vela-core --wai
| `featureGates.sharedDefinitionStorageForApplicationRevision` | use definition cache to reduce duplicated definition storage for application revision, must be used with InformerCacheFilterUnnecessaryFields | `true` |
| `featureGates.disableWorkflowContextConfigMapCache` | disable the workflow context's configmap informer cache | `true` |
| `featureGates.enableCueValidation` | enable the strict cue validation for cue required parameter fields | `false` |
| `featureGates.enableApplicationStatusMetrics` | enable application status metrics and structured logging | `false` |
### MultiCluster parameters

View File

@ -312,6 +312,7 @@ spec:
- "--feature-gates=SharedDefinitionStorageForApplicationRevision={{- .Values.featureGates.sharedDefinitionStorageForApplicationRevision | toString -}}"
- "--feature-gates=DisableWorkflowContextConfigMapCache={{- .Values.featureGates.disableWorkflowContextConfigMapCache | toString -}}"
- "--feature-gates=EnableCueValidation={{- .Values.featureGates.enableCueValidation | toString -}}"
- "--feature-gates=EnableApplicationStatusMetrics={{- .Values.featureGates.enableApplicationStatusMetrics | toString -}}"
{{ if .Values.authentication.enabled }}
{{ if .Values.authentication.withUser }}
- "--authentication-with-user"

View File

@ -124,6 +124,7 @@ optimize:
##@param featureGates.sharedDefinitionStorageForApplicationRevision use definition cache to reduce duplicated definition storage for application revision, must be used with InformerCacheFilterUnnecessaryFields
##@param featureGates.disableWorkflowContextConfigMapCache disable the workflow context's configmap informer cache
##@param featureGates.enableCueValidation enable the strict cue validation for cue required parameter fields
##@param featureGates.enableApplicationStatusMetrics enable application status metrics and structured logging
##@param
featureGates:
gzipResourceTracker: false
@ -140,6 +141,7 @@ featureGates:
sharedDefinitionStorageForApplicationRevision: true
disableWorkflowContextConfigMapCache: true
enableCueValidation: false
enableApplicationStatusMetrics: false
## @section MultiCluster parameters

View File

@ -475,6 +475,9 @@ func (r *Reconciler) writeStatusByMethod(ctx context.Context, method method, app
executor.StepStatusCache.Store(fmt.Sprintf("%s-%s", app.Name, app.Namespace), -1)
return err
}
if feature.DefaultMutableFeatureGate.Enabled(features.EnableApplicationStatusMetrics) {
r.updateMetricsAndLog(ctx, app)
}
return nil
}
@ -591,6 +594,9 @@ func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error {
// Setup adds a controller that reconciles App.
func Setup(mgr ctrl.Manager, args core.Args) error {
// Register application status metrics after feature gates are initialized
metrics.RegisterApplicationStatusMetrics()
reconciler := Reconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),

View File

@ -0,0 +1,206 @@
/*
Copyright 2021 The KubeVela Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package application
import (
"context"
workflowv1alpha1 "github.com/kubevela/workflow/api/v1alpha1"
"k8s.io/klog/v2"
"github.com/oam-dev/kubevela/apis/core.oam.dev/common"
"github.com/oam-dev/kubevela/apis/core.oam.dev/v1beta1"
"github.com/oam-dev/kubevela/pkg/monitor/metrics"
)
// HealthStatus represents the health status of an application
type HealthStatus struct {
Healthy bool
HealthyCount int
UnhealthyCount int
}
// updateMetricsAndLog updates Prometheus metrics and logs application status with service details
func (r *Reconciler) updateMetricsAndLog(_ context.Context, app *v1beta1.Application) {
healthStatus := calculateHealthStatus(app.Status.Services)
updateHealthMetric(app, healthStatus.Healthy)
updatePhaseMetrics(app)
workflowStatus := buildWorkflowStatus(app.Status.Workflow)
serviceDetails := buildServiceDetails(app.Status.Services)
logApplicationStatus(app, healthStatus, workflowStatus, serviceDetails)
}
// calculateHealthStatus calculates the health status from services
func calculateHealthStatus(services []common.ApplicationComponentStatus) HealthStatus {
status := HealthStatus{
Healthy: true,
}
for _, svc := range services {
if svc.Healthy {
status.HealthyCount++
} else {
status.UnhealthyCount++
status.Healthy = false
}
}
return status
}
// updateHealthMetric updates the application health status metric
func updateHealthMetric(app *v1beta1.Application, healthy bool) {
healthValue := float64(1)
if !healthy {
healthValue = float64(0)
}
metrics.ApplicationHealthStatus.WithLabelValues(
app.Name,
app.Namespace,
).Set(healthValue)
}
// updatePhaseMetrics updates the application and workflow phase metrics
func updatePhaseMetrics(app *v1beta1.Application) {
metrics.ApplicationPhase.WithLabelValues(
app.Name,
app.Namespace,
).Set(appPhaseToNumeric(app.Status.Phase))
if app.Status.Workflow != nil && app.Status.Workflow.Phase != "" {
metrics.WorkflowPhase.WithLabelValues(
app.Name,
app.Namespace,
).Set(workflowPhaseToNumeric(app.Status.Workflow.Phase))
}
}
// buildWorkflowStatus builds workflow status information for logging
func buildWorkflowStatus(workflow *common.WorkflowStatus) map[string]interface{} {
if workflow == nil {
return make(map[string]interface{})
}
return map[string]interface{}{
"app_revision": workflow.AppRevision,
"finished": workflow.Finished,
"phase": workflow.Phase,
"message": workflow.Message,
}
}
// buildServiceDetails builds service details for logging
func buildServiceDetails(services []common.ApplicationComponentStatus) []map[string]interface{} {
serviceDetails := make([]map[string]interface{}, 0, len(services))
for _, svc := range services {
svcDetails := map[string]interface{}{
"name": svc.Name,
"namespace": svc.Namespace,
"cluster": svc.Cluster,
"healthy": svc.Healthy,
"message": svc.Message,
}
if len(svc.Details) > 0 {
svcDetails["details"] = svc.Details
}
serviceDetails = append(serviceDetails, svcDetails)
}
return serviceDetails
}
// logApplicationStatus logs the application status with structured data
func logApplicationStatus(app *v1beta1.Application, healthStatus HealthStatus, workflowStatus map[string]interface{}, serviceDetails []map[string]interface{}) {
statusDetails := map[string]interface{}{
"app_uid": app.UID,
"app_name": app.Name,
"version": app.ResourceVersion,
"namespace": app.Namespace,
"labels": app.Labels,
"status": map[string]interface{}{
"phase": string(app.Status.Phase),
"healthy": healthStatus.Healthy,
"healthy_services_count": healthStatus.HealthyCount,
"unhealthy_services_count": healthStatus.UnhealthyCount,
"services": serviceDetails,
"workflow": workflowStatus,
},
}
klog.InfoS("application update",
"app_uid", app.UID,
"app_name", app.Name,
"namespace", app.Namespace,
"phase", string(app.Status.Phase),
"healthy", healthStatus.Healthy,
"data", statusDetails,
)
}
// appPhaseToNumeric converts application phase to numeric value for metrics
func appPhaseToNumeric(phase common.ApplicationPhase) float64 {
switch phase {
case common.ApplicationStarting:
return 0
case common.ApplicationRunning:
return 1
case common.ApplicationRendering:
return 2
case common.ApplicationPolicyGenerating:
return 3
case common.ApplicationRunningWorkflow:
return 4
case common.ApplicationWorkflowSuspending:
return 5
case common.ApplicationWorkflowTerminated:
return 6
case common.ApplicationWorkflowFailed:
return 7
case common.ApplicationUnhealthy:
return 8
case common.ApplicationDeleting:
return 9
default:
return -1
}
}
// workflowPhaseToNumeric converts workflow phase to numeric value for metrics
func workflowPhaseToNumeric(phase workflowv1alpha1.WorkflowRunPhase) float64 {
switch phase {
case workflowv1alpha1.WorkflowStateInitializing:
return 0
case workflowv1alpha1.WorkflowStateSucceeded:
return 1
case workflowv1alpha1.WorkflowStateExecuting:
return 2
case workflowv1alpha1.WorkflowStateSuspending:
return 3
case workflowv1alpha1.WorkflowStateTerminated:
return 4
case workflowv1alpha1.WorkflowStateFailed:
return 5
case workflowv1alpha1.WorkflowStateSkipped:
return 6
default:
return -1
}
}

View File

@ -0,0 +1,545 @@
/*
Copyright 2021 The KubeVela Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package application
import (
"context"
"testing"
workflowv1alpha1 "github.com/kubevela/workflow/api/v1alpha1"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/oam-dev/kubevela/apis/core.oam.dev/common"
"github.com/oam-dev/kubevela/apis/core.oam.dev/v1beta1"
"github.com/oam-dev/kubevela/pkg/monitor/metrics"
)
func TestCalculateHealthStatus(t *testing.T) {
tests := []struct {
name string
services []common.ApplicationComponentStatus
expected HealthStatus
}{
{
name: "all services healthy",
services: []common.ApplicationComponentStatus{
{Healthy: true},
{Healthy: true},
{Healthy: true},
},
expected: HealthStatus{
Healthy: true,
HealthyCount: 3,
UnhealthyCount: 0,
},
},
{
name: "some unhealthy",
services: []common.ApplicationComponentStatus{
{Healthy: true},
{Healthy: false},
{Healthy: true},
},
expected: HealthStatus{
Healthy: false,
HealthyCount: 2,
UnhealthyCount: 1,
},
},
{
name: "all services unhealthy",
services: []common.ApplicationComponentStatus{
{Healthy: false},
{Healthy: false},
},
expected: HealthStatus{
Healthy: false,
HealthyCount: 0,
UnhealthyCount: 2,
},
},
{
name: "no services",
services: []common.ApplicationComponentStatus{},
expected: HealthStatus{
Healthy: true,
HealthyCount: 0,
UnhealthyCount: 0,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := calculateHealthStatus(tt.services)
assert.Equal(t, tt.expected, got)
})
}
}
func TestAppPhaseToNumeric(t *testing.T) {
tests := []struct {
name string
phase common.ApplicationPhase
want float64
}{
{"starting", common.ApplicationStarting, 0},
{"running", common.ApplicationRunning, 1},
{"rendering", common.ApplicationRendering, 2},
{"policy generating", common.ApplicationPolicyGenerating, 3},
{"running workflow", common.ApplicationRunningWorkflow, 4},
{"workflow suspending", common.ApplicationWorkflowSuspending, 5},
{"workflow terminated", common.ApplicationWorkflowTerminated, 6},
{"workflow failed", common.ApplicationWorkflowFailed, 7},
{"unhealthy", common.ApplicationUnhealthy, 8},
{"deleting", common.ApplicationDeleting, 9},
{"unknown", common.ApplicationPhase("unknown"), -1},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := appPhaseToNumeric(tt.phase)
assert.Equal(t, tt.want, got)
})
}
}
func TestWorkflowPhaseToNumeric(t *testing.T) {
tests := []struct {
name string
phase workflowv1alpha1.WorkflowRunPhase
want float64
}{
{"initializing", workflowv1alpha1.WorkflowStateInitializing, 0},
{"succeeded", workflowv1alpha1.WorkflowStateSucceeded, 1},
{"executing", workflowv1alpha1.WorkflowStateExecuting, 2},
{"suspending", workflowv1alpha1.WorkflowStateSuspending, 3},
{"terminated", workflowv1alpha1.WorkflowStateTerminated, 4},
{"failed", workflowv1alpha1.WorkflowStateFailed, 5},
{"skipped", workflowv1alpha1.WorkflowStateSkipped, 6},
{"unknown", workflowv1alpha1.WorkflowRunPhase("unknown"), -1},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := workflowPhaseToNumeric(tt.phase)
assert.Equal(t, tt.want, got)
})
}
}
func TestBuildWorkflowStatus(t *testing.T) {
tests := []struct {
name string
workflow *common.WorkflowStatus
want map[string]interface{}
}{
{
name: "nil workflow",
workflow: nil,
want: map[string]interface{}{},
},
{
name: "workflow with data",
workflow: &common.WorkflowStatus{
AppRevision: "rev-1",
Finished: true,
Phase: workflowv1alpha1.WorkflowStateSucceeded,
Message: "Workflow completed",
},
want: map[string]interface{}{
"app_revision": "rev-1",
"finished": true,
"phase": workflowv1alpha1.WorkflowStateSucceeded,
"message": "Workflow completed",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := buildWorkflowStatus(tt.workflow)
assert.Equal(t, tt.want, got)
})
}
}
func TestBuildServiceDetails(t *testing.T) {
tests := []struct {
name string
services []common.ApplicationComponentStatus
want []map[string]interface{}
}{
{
name: "empty services",
services: []common.ApplicationComponentStatus{},
want: []map[string]interface{}{},
},
{
name: "services with details",
services: []common.ApplicationComponentStatus{
{
Name: "web",
Namespace: "default",
Cluster: "local",
Healthy: true,
Message: "Running",
Details: map[string]string{"replicas": "3"},
},
{
Name: "db",
Namespace: "default",
Cluster: "local",
Healthy: false,
Message: "Connection failed",
},
},
want: []map[string]interface{}{
{
"name": "web",
"namespace": "default",
"cluster": "local",
"healthy": true,
"message": "Running",
"details": map[string]string{"replicas": "3"},
},
{
"name": "db",
"namespace": "default",
"cluster": "local",
"healthy": false,
"message": "Connection failed",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := buildServiceDetails(tt.services)
assert.Equal(t, tt.want, got)
})
}
}
func TestUpdateHealthMetric(t *testing.T) {
// Reset the metric before testing
metrics.ApplicationHealthStatus.Reset()
tests := []struct {
name string
app *v1beta1.Application
healthy bool
expectedValue float64
}{
{
name: "healthy application",
app: &v1beta1.Application{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app",
Namespace: "default",
},
},
healthy: true,
expectedValue: 1,
},
{
name: "unhealthy application",
app: &v1beta1.Application{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app",
Namespace: "default",
},
},
healthy: false,
expectedValue: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
updateHealthMetric(tt.app, tt.healthy)
value := testutil.ToFloat64(metrics.ApplicationHealthStatus.WithLabelValues(
tt.app.Name,
tt.app.Namespace,
))
assert.Equal(t, tt.expectedValue, value)
})
}
}
func TestUpdatePhaseMetrics(t *testing.T) {
// Reset metrics before testing
metrics.ApplicationPhase.Reset()
metrics.WorkflowPhase.Reset()
tests := []struct {
name string
app *v1beta1.Application
expectedAppPhase float64
expectedWorkflowPhase float64
hasWorkflowMetric bool
}{
{
name: "app with workflow",
app: &v1beta1.Application{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app",
Namespace: "default",
},
Status: common.AppStatus{
Phase: common.ApplicationRunning,
Workflow: &common.WorkflowStatus{
Phase: workflowv1alpha1.WorkflowStateSucceeded,
},
},
},
expectedAppPhase: 1, // ApplicationRunning
expectedWorkflowPhase: 1, // WorkflowStateSucceeded
hasWorkflowMetric: true,
},
{
name: "app without workflow",
app: &v1beta1.Application{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app-2",
Namespace: "default",
},
Status: common.AppStatus{
Phase: common.ApplicationStarting,
},
},
expectedAppPhase: 0, // ApplicationStarting
hasWorkflowMetric: false,
},
{
name: "app with empty workflow phase",
app: &v1beta1.Application{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app-3",
Namespace: "default",
},
Status: common.AppStatus{
Phase: common.ApplicationUnhealthy,
Workflow: &common.WorkflowStatus{
Phase: "", // Empty phase
},
},
},
expectedAppPhase: 8, // ApplicationUnhealthy
hasWorkflowMetric: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
updatePhaseMetrics(tt.app)
appPhaseValue := testutil.ToFloat64(metrics.ApplicationPhase.WithLabelValues(
tt.app.Name,
tt.app.Namespace,
))
assert.Equal(t, tt.expectedAppPhase, appPhaseValue)
if tt.hasWorkflowMetric {
workflowPhaseValue := testutil.ToFloat64(metrics.WorkflowPhase.WithLabelValues(
tt.app.Name,
tt.app.Namespace,
))
assert.Equal(t, tt.expectedWorkflowPhase, workflowPhaseValue)
}
})
}
}
func TestLogApplicationStatus(t *testing.T) {
tests := []struct {
name string
app *v1beta1.Application
healthStatus HealthStatus
workflowStatus map[string]interface{}
serviceDetails []map[string]interface{}
}{
{
name: "complete status",
app: &v1beta1.Application{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app",
Namespace: "default",
UID: "12345",
Labels: map[string]string{"env": "prod"},
},
Status: common.AppStatus{
Phase: common.ApplicationRunning,
},
},
healthStatus: HealthStatus{
Healthy: true,
HealthyCount: 2,
UnhealthyCount: 0,
},
workflowStatus: map[string]interface{}{
"phase": workflowv1alpha1.WorkflowStateSucceeded,
"finished": true,
},
serviceDetails: []map[string]interface{}{
{
"name": "web",
"healthy": true,
},
},
},
{
name: "minimal status",
app: &v1beta1.Application{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app-minimal",
Namespace: "default",
},
Status: common.AppStatus{
Phase: common.ApplicationStarting,
},
},
healthStatus: HealthStatus{Healthy: true},
workflowStatus: map[string]interface{}{},
serviceDetails: []map[string]interface{}{},
},
{
name: "nil values",
app: &v1beta1.Application{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app-nil",
Namespace: "default",
},
},
healthStatus: HealthStatus{},
workflowStatus: nil,
serviceDetails: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.NotPanics(t, func() {
logApplicationStatus(tt.app, tt.healthStatus, tt.workflowStatus, tt.serviceDetails)
})
})
}
}
func TestUpdateMetricsAndLogFunction(t *testing.T) {
// Reset metrics before testing
metrics.ApplicationHealthStatus.Reset()
metrics.ApplicationPhase.Reset()
metrics.WorkflowPhase.Reset()
tests := []struct {
name string
app *v1beta1.Application
}{
{
name: "complete application",
app: &v1beta1.Application{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app",
Namespace: "default",
UID: "12345",
},
Status: common.AppStatus{
Phase: common.ApplicationRunning,
Services: []common.ApplicationComponentStatus{
{
Name: "web",
Namespace: "default",
Healthy: true,
Message: "Running",
},
{
Name: "db",
Namespace: "default",
Healthy: false,
Message: "Starting",
},
},
Workflow: &common.WorkflowStatus{
Phase: workflowv1alpha1.WorkflowStateExecuting,
Finished: false,
AppRevision: "v1",
},
},
},
},
{
name: "application with no services",
app: &v1beta1.Application{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app-empty",
Namespace: "test",
},
Status: common.AppStatus{
Phase: common.ApplicationStarting,
Services: []common.ApplicationComponentStatus{},
},
},
},
{
name: "application with nil workflow",
app: &v1beta1.Application{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app-no-workflow",
Namespace: "default",
},
Status: common.AppStatus{
Phase: common.ApplicationUnhealthy,
Services: []common.ApplicationComponentStatus{
{
Name: "failing-service",
Healthy: false,
},
},
Workflow: nil,
},
},
},
}
r := &Reconciler{}
ctx := context.Background()
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.NotPanics(t, func() {
r.updateMetricsAndLog(ctx, tt.app)
})
labels := prometheus.Labels{
"app_name": tt.app.Name,
"namespace": tt.app.Namespace,
}
_, err := metrics.ApplicationHealthStatus.GetMetricWith(labels)
assert.NoError(t, err)
_, err = metrics.ApplicationPhase.GetMetricWith(labels)
assert.NoError(t, err)
})
}
}

View File

@ -114,6 +114,9 @@ const (
// EnableCueValidation enable strict cue validation fields for the required parameter field verification
EnableCueValidation = "EnableCueValidation"
// EnableApplicationStatusMetrics enable the collection and export of application status metrics and structured logging
EnableApplicationStatusMetrics = "EnableApplicationStatusMetrics"
)
var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
@ -139,6 +142,7 @@ var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
SharedDefinitionStorageForApplicationRevision: {Default: true, PreRelease: featuregate.Alpha},
DisableWorkflowContextConfigMapCache: {Default: true, PreRelease: featuregate.Alpha},
EnableCueValidation: {Default: false, PreRelease: featuregate.Beta},
EnableApplicationStatusMetrics: {Default: false, PreRelease: featuregate.Alpha},
}
func init() {

View File

@ -66,6 +66,27 @@ var (
Name: "workflow_step_phase_number",
Help: "workflow step phase number",
}, []string{"step_type", "phase"})
// ApplicationHealthStatus reports the overall health status of each application
ApplicationHealthStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "kubevela_application_health_status",
Help: "Application health status (1 = healthy, 0 = unhealthy)",
}, []string{"app_name", "namespace"})
// ApplicationPhase reports the numeric phase of each application
ApplicationPhase = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "kubevela_application_phase",
Help: "Application phase as numeric value (0=starting, 1=running, 2=rendering, 3=policy_generating, 4=running_workflow, " +
"5=workflow_suspending, 6=workflow_terminated, 7=workflow_failed, 8=unhealthy, 9=deleting, " +
"-1=unknown)",
}, []string{"app_name", "namespace"})
// WorkflowPhase reports the numeric phase of each workflow
WorkflowPhase = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "kubevela_application_workflow_phase",
Help: "Workflow phase as numeric value (0=initializing, 1=succeeded, 2=executing, 3=suspending, 4=terminated, " +
"5=failed, 6=skipped, -1=unknown)",
}, []string{"app_name", "namespace"})
)
var (

View File

@ -15,10 +15,13 @@ package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/metrics"
velametrics "github.com/kubevela/pkg/monitor/metrics"
"github.com/oam-dev/kubevela/pkg/features"
)
var (
@ -53,6 +56,10 @@ var collectorGroup = []prometheus.Collector{
ClusterCPUUsageGauge,
}
var (
applicationStatusMetricsRegistered = false
)
func init() {
for _, collector := range collectorGroup {
if err := metrics.Registry.Register(collector); err != nil {
@ -60,3 +67,27 @@ func init() {
}
}
}
// RegisterApplicationStatusMetrics registers the application status metrics
// This should be called after the feature gate system is initialized
func RegisterApplicationStatusMetrics() {
if applicationStatusMetricsRegistered {
return
}
if feature.DefaultMutableFeatureGate.Enabled(features.EnableApplicationStatusMetrics) {
statusMetrics := []prometheus.Collector{
ApplicationHealthStatus,
ApplicationPhase,
WorkflowPhase,
}
for _, metric := range statusMetrics {
if err := metrics.Registry.Register(metric); err != nil {
klog.Errorf("Failed to register application status metric: %v", err)
}
}
applicationStatusMetricsRegistered = true
}
}