mirror of https://github.com/kubevela/kubevela.git
Feat: Application Status Metrics & Structured Logs (#6857)
Feat: Application Status Metrics & Structured Logs Signed-off-by: Brian Kane <briankane1@gmail.com>
This commit is contained in:
parent
721c75e44a
commit
1a934e1618
|
@ -99,6 +99,7 @@ helm install --create-namespace -n vela-system kubevela kubevela/vela-core --wai
|
||||||
| `featureGates.sharedDefinitionStorageForApplicationRevision` | use definition cache to reduce duplicated definition storage for application revision, must be used with InformerCacheFilterUnnecessaryFields | `true` |
|
| `featureGates.sharedDefinitionStorageForApplicationRevision` | use definition cache to reduce duplicated definition storage for application revision, must be used with InformerCacheFilterUnnecessaryFields | `true` |
|
||||||
| `featureGates.disableWorkflowContextConfigMapCache` | disable the workflow context's configmap informer cache | `true` |
|
| `featureGates.disableWorkflowContextConfigMapCache` | disable the workflow context's configmap informer cache | `true` |
|
||||||
| `featureGates.enableCueValidation` | enable the strict cue validation for cue required parameter fields | `false` |
|
| `featureGates.enableCueValidation` | enable the strict cue validation for cue required parameter fields | `false` |
|
||||||
|
| `featureGates.enableApplicationStatusMetrics` | enable application status metrics and structured logging | `false` |
|
||||||
|
|
||||||
### MultiCluster parameters
|
### MultiCluster parameters
|
||||||
|
|
||||||
|
|
|
@ -312,6 +312,7 @@ spec:
|
||||||
- "--feature-gates=SharedDefinitionStorageForApplicationRevision={{- .Values.featureGates.sharedDefinitionStorageForApplicationRevision | toString -}}"
|
- "--feature-gates=SharedDefinitionStorageForApplicationRevision={{- .Values.featureGates.sharedDefinitionStorageForApplicationRevision | toString -}}"
|
||||||
- "--feature-gates=DisableWorkflowContextConfigMapCache={{- .Values.featureGates.disableWorkflowContextConfigMapCache | toString -}}"
|
- "--feature-gates=DisableWorkflowContextConfigMapCache={{- .Values.featureGates.disableWorkflowContextConfigMapCache | toString -}}"
|
||||||
- "--feature-gates=EnableCueValidation={{- .Values.featureGates.enableCueValidation | toString -}}"
|
- "--feature-gates=EnableCueValidation={{- .Values.featureGates.enableCueValidation | toString -}}"
|
||||||
|
- "--feature-gates=EnableApplicationStatusMetrics={{- .Values.featureGates.enableApplicationStatusMetrics | toString -}}"
|
||||||
{{ if .Values.authentication.enabled }}
|
{{ if .Values.authentication.enabled }}
|
||||||
{{ if .Values.authentication.withUser }}
|
{{ if .Values.authentication.withUser }}
|
||||||
- "--authentication-with-user"
|
- "--authentication-with-user"
|
||||||
|
|
|
@ -124,6 +124,7 @@ optimize:
|
||||||
##@param featureGates.sharedDefinitionStorageForApplicationRevision use definition cache to reduce duplicated definition storage for application revision, must be used with InformerCacheFilterUnnecessaryFields
|
##@param featureGates.sharedDefinitionStorageForApplicationRevision use definition cache to reduce duplicated definition storage for application revision, must be used with InformerCacheFilterUnnecessaryFields
|
||||||
##@param featureGates.disableWorkflowContextConfigMapCache disable the workflow context's configmap informer cache
|
##@param featureGates.disableWorkflowContextConfigMapCache disable the workflow context's configmap informer cache
|
||||||
##@param featureGates.enableCueValidation enable the strict cue validation for cue required parameter fields
|
##@param featureGates.enableCueValidation enable the strict cue validation for cue required parameter fields
|
||||||
|
##@param featureGates.enableApplicationStatusMetrics enable application status metrics and structured logging
|
||||||
##@param
|
##@param
|
||||||
featureGates:
|
featureGates:
|
||||||
gzipResourceTracker: false
|
gzipResourceTracker: false
|
||||||
|
@ -140,6 +141,7 @@ featureGates:
|
||||||
sharedDefinitionStorageForApplicationRevision: true
|
sharedDefinitionStorageForApplicationRevision: true
|
||||||
disableWorkflowContextConfigMapCache: true
|
disableWorkflowContextConfigMapCache: true
|
||||||
enableCueValidation: false
|
enableCueValidation: false
|
||||||
|
enableApplicationStatusMetrics: false
|
||||||
|
|
||||||
## @section MultiCluster parameters
|
## @section MultiCluster parameters
|
||||||
|
|
||||||
|
|
|
@ -475,6 +475,9 @@ func (r *Reconciler) writeStatusByMethod(ctx context.Context, method method, app
|
||||||
executor.StepStatusCache.Store(fmt.Sprintf("%s-%s", app.Name, app.Namespace), -1)
|
executor.StepStatusCache.Store(fmt.Sprintf("%s-%s", app.Name, app.Namespace), -1)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if feature.DefaultMutableFeatureGate.Enabled(features.EnableApplicationStatusMetrics) {
|
||||||
|
r.updateMetricsAndLog(ctx, app)
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -591,6 +594,9 @@ func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||||
|
|
||||||
// Setup adds a controller that reconciles App.
|
// Setup adds a controller that reconciles App.
|
||||||
func Setup(mgr ctrl.Manager, args core.Args) error {
|
func Setup(mgr ctrl.Manager, args core.Args) error {
|
||||||
|
// Register application status metrics after feature gates are initialized
|
||||||
|
metrics.RegisterApplicationStatusMetrics()
|
||||||
|
|
||||||
reconciler := Reconciler{
|
reconciler := Reconciler{
|
||||||
Client: mgr.GetClient(),
|
Client: mgr.GetClient(),
|
||||||
Scheme: mgr.GetScheme(),
|
Scheme: mgr.GetScheme(),
|
||||||
|
|
|
@ -0,0 +1,206 @@
|
||||||
|
/*
|
||||||
|
Copyright 2021 The KubeVela Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package application
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
workflowv1alpha1 "github.com/kubevela/workflow/api/v1alpha1"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
|
|
||||||
|
"github.com/oam-dev/kubevela/apis/core.oam.dev/common"
|
||||||
|
"github.com/oam-dev/kubevela/apis/core.oam.dev/v1beta1"
|
||||||
|
"github.com/oam-dev/kubevela/pkg/monitor/metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
// HealthStatus represents the health status of an application
|
||||||
|
type HealthStatus struct {
|
||||||
|
Healthy bool
|
||||||
|
HealthyCount int
|
||||||
|
UnhealthyCount int
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateMetricsAndLog updates Prometheus metrics and logs application status with service details
|
||||||
|
func (r *Reconciler) updateMetricsAndLog(_ context.Context, app *v1beta1.Application) {
|
||||||
|
healthStatus := calculateHealthStatus(app.Status.Services)
|
||||||
|
|
||||||
|
updateHealthMetric(app, healthStatus.Healthy)
|
||||||
|
updatePhaseMetrics(app)
|
||||||
|
|
||||||
|
workflowStatus := buildWorkflowStatus(app.Status.Workflow)
|
||||||
|
serviceDetails := buildServiceDetails(app.Status.Services)
|
||||||
|
logApplicationStatus(app, healthStatus, workflowStatus, serviceDetails)
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculateHealthStatus calculates the health status from services
|
||||||
|
func calculateHealthStatus(services []common.ApplicationComponentStatus) HealthStatus {
|
||||||
|
status := HealthStatus{
|
||||||
|
Healthy: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, svc := range services {
|
||||||
|
if svc.Healthy {
|
||||||
|
status.HealthyCount++
|
||||||
|
} else {
|
||||||
|
status.UnhealthyCount++
|
||||||
|
status.Healthy = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return status
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateHealthMetric updates the application health status metric
|
||||||
|
func updateHealthMetric(app *v1beta1.Application, healthy bool) {
|
||||||
|
healthValue := float64(1)
|
||||||
|
if !healthy {
|
||||||
|
healthValue = float64(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics.ApplicationHealthStatus.WithLabelValues(
|
||||||
|
app.Name,
|
||||||
|
app.Namespace,
|
||||||
|
).Set(healthValue)
|
||||||
|
}
|
||||||
|
|
||||||
|
// updatePhaseMetrics updates the application and workflow phase metrics
|
||||||
|
func updatePhaseMetrics(app *v1beta1.Application) {
|
||||||
|
metrics.ApplicationPhase.WithLabelValues(
|
||||||
|
app.Name,
|
||||||
|
app.Namespace,
|
||||||
|
).Set(appPhaseToNumeric(app.Status.Phase))
|
||||||
|
|
||||||
|
if app.Status.Workflow != nil && app.Status.Workflow.Phase != "" {
|
||||||
|
metrics.WorkflowPhase.WithLabelValues(
|
||||||
|
app.Name,
|
||||||
|
app.Namespace,
|
||||||
|
).Set(workflowPhaseToNumeric(app.Status.Workflow.Phase))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildWorkflowStatus builds workflow status information for logging
|
||||||
|
func buildWorkflowStatus(workflow *common.WorkflowStatus) map[string]interface{} {
|
||||||
|
if workflow == nil {
|
||||||
|
return make(map[string]interface{})
|
||||||
|
}
|
||||||
|
|
||||||
|
return map[string]interface{}{
|
||||||
|
"app_revision": workflow.AppRevision,
|
||||||
|
"finished": workflow.Finished,
|
||||||
|
"phase": workflow.Phase,
|
||||||
|
"message": workflow.Message,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildServiceDetails builds service details for logging
|
||||||
|
func buildServiceDetails(services []common.ApplicationComponentStatus) []map[string]interface{} {
|
||||||
|
serviceDetails := make([]map[string]interface{}, 0, len(services))
|
||||||
|
|
||||||
|
for _, svc := range services {
|
||||||
|
svcDetails := map[string]interface{}{
|
||||||
|
"name": svc.Name,
|
||||||
|
"namespace": svc.Namespace,
|
||||||
|
"cluster": svc.Cluster,
|
||||||
|
"healthy": svc.Healthy,
|
||||||
|
"message": svc.Message,
|
||||||
|
}
|
||||||
|
if len(svc.Details) > 0 {
|
||||||
|
svcDetails["details"] = svc.Details
|
||||||
|
}
|
||||||
|
serviceDetails = append(serviceDetails, svcDetails)
|
||||||
|
}
|
||||||
|
|
||||||
|
return serviceDetails
|
||||||
|
}
|
||||||
|
|
||||||
|
// logApplicationStatus logs the application status with structured data
|
||||||
|
func logApplicationStatus(app *v1beta1.Application, healthStatus HealthStatus, workflowStatus map[string]interface{}, serviceDetails []map[string]interface{}) {
|
||||||
|
statusDetails := map[string]interface{}{
|
||||||
|
"app_uid": app.UID,
|
||||||
|
"app_name": app.Name,
|
||||||
|
"version": app.ResourceVersion,
|
||||||
|
"namespace": app.Namespace,
|
||||||
|
"labels": app.Labels,
|
||||||
|
"status": map[string]interface{}{
|
||||||
|
"phase": string(app.Status.Phase),
|
||||||
|
"healthy": healthStatus.Healthy,
|
||||||
|
"healthy_services_count": healthStatus.HealthyCount,
|
||||||
|
"unhealthy_services_count": healthStatus.UnhealthyCount,
|
||||||
|
"services": serviceDetails,
|
||||||
|
"workflow": workflowStatus,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
klog.InfoS("application update",
|
||||||
|
"app_uid", app.UID,
|
||||||
|
"app_name", app.Name,
|
||||||
|
"namespace", app.Namespace,
|
||||||
|
"phase", string(app.Status.Phase),
|
||||||
|
"healthy", healthStatus.Healthy,
|
||||||
|
"data", statusDetails,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// appPhaseToNumeric converts application phase to numeric value for metrics
|
||||||
|
func appPhaseToNumeric(phase common.ApplicationPhase) float64 {
|
||||||
|
switch phase {
|
||||||
|
case common.ApplicationStarting:
|
||||||
|
return 0
|
||||||
|
case common.ApplicationRunning:
|
||||||
|
return 1
|
||||||
|
case common.ApplicationRendering:
|
||||||
|
return 2
|
||||||
|
case common.ApplicationPolicyGenerating:
|
||||||
|
return 3
|
||||||
|
case common.ApplicationRunningWorkflow:
|
||||||
|
return 4
|
||||||
|
case common.ApplicationWorkflowSuspending:
|
||||||
|
return 5
|
||||||
|
case common.ApplicationWorkflowTerminated:
|
||||||
|
return 6
|
||||||
|
case common.ApplicationWorkflowFailed:
|
||||||
|
return 7
|
||||||
|
case common.ApplicationUnhealthy:
|
||||||
|
return 8
|
||||||
|
case common.ApplicationDeleting:
|
||||||
|
return 9
|
||||||
|
default:
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// workflowPhaseToNumeric converts workflow phase to numeric value for metrics
|
||||||
|
func workflowPhaseToNumeric(phase workflowv1alpha1.WorkflowRunPhase) float64 {
|
||||||
|
switch phase {
|
||||||
|
case workflowv1alpha1.WorkflowStateInitializing:
|
||||||
|
return 0
|
||||||
|
case workflowv1alpha1.WorkflowStateSucceeded:
|
||||||
|
return 1
|
||||||
|
case workflowv1alpha1.WorkflowStateExecuting:
|
||||||
|
return 2
|
||||||
|
case workflowv1alpha1.WorkflowStateSuspending:
|
||||||
|
return 3
|
||||||
|
case workflowv1alpha1.WorkflowStateTerminated:
|
||||||
|
return 4
|
||||||
|
case workflowv1alpha1.WorkflowStateFailed:
|
||||||
|
return 5
|
||||||
|
case workflowv1alpha1.WorkflowStateSkipped:
|
||||||
|
return 6
|
||||||
|
default:
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,545 @@
|
||||||
|
/*
|
||||||
|
Copyright 2021 The KubeVela Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package application
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
workflowv1alpha1 "github.com/kubevela/workflow/api/v1alpha1"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
|
||||||
|
"github.com/oam-dev/kubevela/apis/core.oam.dev/common"
|
||||||
|
"github.com/oam-dev/kubevela/apis/core.oam.dev/v1beta1"
|
||||||
|
"github.com/oam-dev/kubevela/pkg/monitor/metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCalculateHealthStatus(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
services []common.ApplicationComponentStatus
|
||||||
|
expected HealthStatus
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "all services healthy",
|
||||||
|
services: []common.ApplicationComponentStatus{
|
||||||
|
{Healthy: true},
|
||||||
|
{Healthy: true},
|
||||||
|
{Healthy: true},
|
||||||
|
},
|
||||||
|
expected: HealthStatus{
|
||||||
|
Healthy: true,
|
||||||
|
HealthyCount: 3,
|
||||||
|
UnhealthyCount: 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "some unhealthy",
|
||||||
|
services: []common.ApplicationComponentStatus{
|
||||||
|
{Healthy: true},
|
||||||
|
{Healthy: false},
|
||||||
|
{Healthy: true},
|
||||||
|
},
|
||||||
|
expected: HealthStatus{
|
||||||
|
Healthy: false,
|
||||||
|
HealthyCount: 2,
|
||||||
|
UnhealthyCount: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "all services unhealthy",
|
||||||
|
services: []common.ApplicationComponentStatus{
|
||||||
|
{Healthy: false},
|
||||||
|
{Healthy: false},
|
||||||
|
},
|
||||||
|
expected: HealthStatus{
|
||||||
|
Healthy: false,
|
||||||
|
HealthyCount: 0,
|
||||||
|
UnhealthyCount: 2,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no services",
|
||||||
|
services: []common.ApplicationComponentStatus{},
|
||||||
|
expected: HealthStatus{
|
||||||
|
Healthy: true,
|
||||||
|
HealthyCount: 0,
|
||||||
|
UnhealthyCount: 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := calculateHealthStatus(tt.services)
|
||||||
|
assert.Equal(t, tt.expected, got)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAppPhaseToNumeric(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
phase common.ApplicationPhase
|
||||||
|
want float64
|
||||||
|
}{
|
||||||
|
{"starting", common.ApplicationStarting, 0},
|
||||||
|
{"running", common.ApplicationRunning, 1},
|
||||||
|
{"rendering", common.ApplicationRendering, 2},
|
||||||
|
{"policy generating", common.ApplicationPolicyGenerating, 3},
|
||||||
|
{"running workflow", common.ApplicationRunningWorkflow, 4},
|
||||||
|
{"workflow suspending", common.ApplicationWorkflowSuspending, 5},
|
||||||
|
{"workflow terminated", common.ApplicationWorkflowTerminated, 6},
|
||||||
|
{"workflow failed", common.ApplicationWorkflowFailed, 7},
|
||||||
|
{"unhealthy", common.ApplicationUnhealthy, 8},
|
||||||
|
{"deleting", common.ApplicationDeleting, 9},
|
||||||
|
{"unknown", common.ApplicationPhase("unknown"), -1},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := appPhaseToNumeric(tt.phase)
|
||||||
|
assert.Equal(t, tt.want, got)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWorkflowPhaseToNumeric(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
phase workflowv1alpha1.WorkflowRunPhase
|
||||||
|
want float64
|
||||||
|
}{
|
||||||
|
{"initializing", workflowv1alpha1.WorkflowStateInitializing, 0},
|
||||||
|
{"succeeded", workflowv1alpha1.WorkflowStateSucceeded, 1},
|
||||||
|
{"executing", workflowv1alpha1.WorkflowStateExecuting, 2},
|
||||||
|
{"suspending", workflowv1alpha1.WorkflowStateSuspending, 3},
|
||||||
|
{"terminated", workflowv1alpha1.WorkflowStateTerminated, 4},
|
||||||
|
{"failed", workflowv1alpha1.WorkflowStateFailed, 5},
|
||||||
|
{"skipped", workflowv1alpha1.WorkflowStateSkipped, 6},
|
||||||
|
{"unknown", workflowv1alpha1.WorkflowRunPhase("unknown"), -1},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := workflowPhaseToNumeric(tt.phase)
|
||||||
|
assert.Equal(t, tt.want, got)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildWorkflowStatus(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
workflow *common.WorkflowStatus
|
||||||
|
want map[string]interface{}
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "nil workflow",
|
||||||
|
workflow: nil,
|
||||||
|
want: map[string]interface{}{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "workflow with data",
|
||||||
|
workflow: &common.WorkflowStatus{
|
||||||
|
AppRevision: "rev-1",
|
||||||
|
Finished: true,
|
||||||
|
Phase: workflowv1alpha1.WorkflowStateSucceeded,
|
||||||
|
Message: "Workflow completed",
|
||||||
|
},
|
||||||
|
want: map[string]interface{}{
|
||||||
|
"app_revision": "rev-1",
|
||||||
|
"finished": true,
|
||||||
|
"phase": workflowv1alpha1.WorkflowStateSucceeded,
|
||||||
|
"message": "Workflow completed",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := buildWorkflowStatus(tt.workflow)
|
||||||
|
assert.Equal(t, tt.want, got)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildServiceDetails(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
services []common.ApplicationComponentStatus
|
||||||
|
want []map[string]interface{}
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "empty services",
|
||||||
|
services: []common.ApplicationComponentStatus{},
|
||||||
|
want: []map[string]interface{}{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "services with details",
|
||||||
|
services: []common.ApplicationComponentStatus{
|
||||||
|
{
|
||||||
|
Name: "web",
|
||||||
|
Namespace: "default",
|
||||||
|
Cluster: "local",
|
||||||
|
Healthy: true,
|
||||||
|
Message: "Running",
|
||||||
|
Details: map[string]string{"replicas": "3"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "db",
|
||||||
|
Namespace: "default",
|
||||||
|
Cluster: "local",
|
||||||
|
Healthy: false,
|
||||||
|
Message: "Connection failed",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: []map[string]interface{}{
|
||||||
|
{
|
||||||
|
"name": "web",
|
||||||
|
"namespace": "default",
|
||||||
|
"cluster": "local",
|
||||||
|
"healthy": true,
|
||||||
|
"message": "Running",
|
||||||
|
"details": map[string]string{"replicas": "3"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "db",
|
||||||
|
"namespace": "default",
|
||||||
|
"cluster": "local",
|
||||||
|
"healthy": false,
|
||||||
|
"message": "Connection failed",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := buildServiceDetails(tt.services)
|
||||||
|
assert.Equal(t, tt.want, got)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdateHealthMetric(t *testing.T) {
|
||||||
|
// Reset the metric before testing
|
||||||
|
metrics.ApplicationHealthStatus.Reset()
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
app *v1beta1.Application
|
||||||
|
healthy bool
|
||||||
|
expectedValue float64
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "healthy application",
|
||||||
|
app: &v1beta1.Application{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "test-app",
|
||||||
|
Namespace: "default",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
healthy: true,
|
||||||
|
expectedValue: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "unhealthy application",
|
||||||
|
app: &v1beta1.Application{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "test-app",
|
||||||
|
Namespace: "default",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
healthy: false,
|
||||||
|
expectedValue: 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
updateHealthMetric(tt.app, tt.healthy)
|
||||||
|
|
||||||
|
value := testutil.ToFloat64(metrics.ApplicationHealthStatus.WithLabelValues(
|
||||||
|
tt.app.Name,
|
||||||
|
tt.app.Namespace,
|
||||||
|
))
|
||||||
|
assert.Equal(t, tt.expectedValue, value)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdatePhaseMetrics(t *testing.T) {
|
||||||
|
// Reset metrics before testing
|
||||||
|
metrics.ApplicationPhase.Reset()
|
||||||
|
metrics.WorkflowPhase.Reset()
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
app *v1beta1.Application
|
||||||
|
expectedAppPhase float64
|
||||||
|
expectedWorkflowPhase float64
|
||||||
|
hasWorkflowMetric bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "app with workflow",
|
||||||
|
app: &v1beta1.Application{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "test-app",
|
||||||
|
Namespace: "default",
|
||||||
|
},
|
||||||
|
Status: common.AppStatus{
|
||||||
|
Phase: common.ApplicationRunning,
|
||||||
|
Workflow: &common.WorkflowStatus{
|
||||||
|
Phase: workflowv1alpha1.WorkflowStateSucceeded,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedAppPhase: 1, // ApplicationRunning
|
||||||
|
expectedWorkflowPhase: 1, // WorkflowStateSucceeded
|
||||||
|
hasWorkflowMetric: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "app without workflow",
|
||||||
|
app: &v1beta1.Application{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "test-app-2",
|
||||||
|
Namespace: "default",
|
||||||
|
},
|
||||||
|
Status: common.AppStatus{
|
||||||
|
Phase: common.ApplicationStarting,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedAppPhase: 0, // ApplicationStarting
|
||||||
|
hasWorkflowMetric: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "app with empty workflow phase",
|
||||||
|
app: &v1beta1.Application{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "test-app-3",
|
||||||
|
Namespace: "default",
|
||||||
|
},
|
||||||
|
Status: common.AppStatus{
|
||||||
|
Phase: common.ApplicationUnhealthy,
|
||||||
|
Workflow: &common.WorkflowStatus{
|
||||||
|
Phase: "", // Empty phase
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedAppPhase: 8, // ApplicationUnhealthy
|
||||||
|
hasWorkflowMetric: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
updatePhaseMetrics(tt.app)
|
||||||
|
|
||||||
|
appPhaseValue := testutil.ToFloat64(metrics.ApplicationPhase.WithLabelValues(
|
||||||
|
tt.app.Name,
|
||||||
|
tt.app.Namespace,
|
||||||
|
))
|
||||||
|
assert.Equal(t, tt.expectedAppPhase, appPhaseValue)
|
||||||
|
|
||||||
|
if tt.hasWorkflowMetric {
|
||||||
|
workflowPhaseValue := testutil.ToFloat64(metrics.WorkflowPhase.WithLabelValues(
|
||||||
|
tt.app.Name,
|
||||||
|
tt.app.Namespace,
|
||||||
|
))
|
||||||
|
assert.Equal(t, tt.expectedWorkflowPhase, workflowPhaseValue)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLogApplicationStatus(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
app *v1beta1.Application
|
||||||
|
healthStatus HealthStatus
|
||||||
|
workflowStatus map[string]interface{}
|
||||||
|
serviceDetails []map[string]interface{}
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "complete status",
|
||||||
|
app: &v1beta1.Application{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "test-app",
|
||||||
|
Namespace: "default",
|
||||||
|
UID: "12345",
|
||||||
|
Labels: map[string]string{"env": "prod"},
|
||||||
|
},
|
||||||
|
Status: common.AppStatus{
|
||||||
|
Phase: common.ApplicationRunning,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
healthStatus: HealthStatus{
|
||||||
|
Healthy: true,
|
||||||
|
HealthyCount: 2,
|
||||||
|
UnhealthyCount: 0,
|
||||||
|
},
|
||||||
|
workflowStatus: map[string]interface{}{
|
||||||
|
"phase": workflowv1alpha1.WorkflowStateSucceeded,
|
||||||
|
"finished": true,
|
||||||
|
},
|
||||||
|
serviceDetails: []map[string]interface{}{
|
||||||
|
{
|
||||||
|
"name": "web",
|
||||||
|
"healthy": true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "minimal status",
|
||||||
|
app: &v1beta1.Application{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "test-app-minimal",
|
||||||
|
Namespace: "default",
|
||||||
|
},
|
||||||
|
Status: common.AppStatus{
|
||||||
|
Phase: common.ApplicationStarting,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
healthStatus: HealthStatus{Healthy: true},
|
||||||
|
workflowStatus: map[string]interface{}{},
|
||||||
|
serviceDetails: []map[string]interface{}{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nil values",
|
||||||
|
app: &v1beta1.Application{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "test-app-nil",
|
||||||
|
Namespace: "default",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
healthStatus: HealthStatus{},
|
||||||
|
workflowStatus: nil,
|
||||||
|
serviceDetails: nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
assert.NotPanics(t, func() {
|
||||||
|
logApplicationStatus(tt.app, tt.healthStatus, tt.workflowStatus, tt.serviceDetails)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdateMetricsAndLogFunction(t *testing.T) {
|
||||||
|
// Reset metrics before testing
|
||||||
|
metrics.ApplicationHealthStatus.Reset()
|
||||||
|
metrics.ApplicationPhase.Reset()
|
||||||
|
metrics.WorkflowPhase.Reset()
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
app *v1beta1.Application
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "complete application",
|
||||||
|
app: &v1beta1.Application{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "test-app",
|
||||||
|
Namespace: "default",
|
||||||
|
UID: "12345",
|
||||||
|
},
|
||||||
|
Status: common.AppStatus{
|
||||||
|
Phase: common.ApplicationRunning,
|
||||||
|
Services: []common.ApplicationComponentStatus{
|
||||||
|
{
|
||||||
|
Name: "web",
|
||||||
|
Namespace: "default",
|
||||||
|
Healthy: true,
|
||||||
|
Message: "Running",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "db",
|
||||||
|
Namespace: "default",
|
||||||
|
Healthy: false,
|
||||||
|
Message: "Starting",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Workflow: &common.WorkflowStatus{
|
||||||
|
Phase: workflowv1alpha1.WorkflowStateExecuting,
|
||||||
|
Finished: false,
|
||||||
|
AppRevision: "v1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "application with no services",
|
||||||
|
app: &v1beta1.Application{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "test-app-empty",
|
||||||
|
Namespace: "test",
|
||||||
|
},
|
||||||
|
Status: common.AppStatus{
|
||||||
|
Phase: common.ApplicationStarting,
|
||||||
|
Services: []common.ApplicationComponentStatus{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "application with nil workflow",
|
||||||
|
app: &v1beta1.Application{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "test-app-no-workflow",
|
||||||
|
Namespace: "default",
|
||||||
|
},
|
||||||
|
Status: common.AppStatus{
|
||||||
|
Phase: common.ApplicationUnhealthy,
|
||||||
|
Services: []common.ApplicationComponentStatus{
|
||||||
|
{
|
||||||
|
Name: "failing-service",
|
||||||
|
Healthy: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Workflow: nil,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
r := &Reconciler{}
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
assert.NotPanics(t, func() {
|
||||||
|
r.updateMetricsAndLog(ctx, tt.app)
|
||||||
|
})
|
||||||
|
|
||||||
|
labels := prometheus.Labels{
|
||||||
|
"app_name": tt.app.Name,
|
||||||
|
"namespace": tt.app.Namespace,
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err := metrics.ApplicationHealthStatus.GetMetricWith(labels)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
_, err = metrics.ApplicationPhase.GetMetricWith(labels)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
|
@ -114,6 +114,9 @@ const (
|
||||||
|
|
||||||
// EnableCueValidation enable strict cue validation fields for the required parameter field verification
|
// EnableCueValidation enable strict cue validation fields for the required parameter field verification
|
||||||
EnableCueValidation = "EnableCueValidation"
|
EnableCueValidation = "EnableCueValidation"
|
||||||
|
|
||||||
|
// EnableApplicationStatusMetrics enable the collection and export of application status metrics and structured logging
|
||||||
|
EnableApplicationStatusMetrics = "EnableApplicationStatusMetrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
|
var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
|
||||||
|
@ -139,6 +142,7 @@ var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
|
||||||
SharedDefinitionStorageForApplicationRevision: {Default: true, PreRelease: featuregate.Alpha},
|
SharedDefinitionStorageForApplicationRevision: {Default: true, PreRelease: featuregate.Alpha},
|
||||||
DisableWorkflowContextConfigMapCache: {Default: true, PreRelease: featuregate.Alpha},
|
DisableWorkflowContextConfigMapCache: {Default: true, PreRelease: featuregate.Alpha},
|
||||||
EnableCueValidation: {Default: false, PreRelease: featuregate.Beta},
|
EnableCueValidation: {Default: false, PreRelease: featuregate.Beta},
|
||||||
|
EnableApplicationStatusMetrics: {Default: false, PreRelease: featuregate.Alpha},
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
|
|
@ -66,6 +66,27 @@ var (
|
||||||
Name: "workflow_step_phase_number",
|
Name: "workflow_step_phase_number",
|
||||||
Help: "workflow step phase number",
|
Help: "workflow step phase number",
|
||||||
}, []string{"step_type", "phase"})
|
}, []string{"step_type", "phase"})
|
||||||
|
|
||||||
|
// ApplicationHealthStatus reports the overall health status of each application
|
||||||
|
ApplicationHealthStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: "kubevela_application_health_status",
|
||||||
|
Help: "Application health status (1 = healthy, 0 = unhealthy)",
|
||||||
|
}, []string{"app_name", "namespace"})
|
||||||
|
|
||||||
|
// ApplicationPhase reports the numeric phase of each application
|
||||||
|
ApplicationPhase = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: "kubevela_application_phase",
|
||||||
|
Help: "Application phase as numeric value (0=starting, 1=running, 2=rendering, 3=policy_generating, 4=running_workflow, " +
|
||||||
|
"5=workflow_suspending, 6=workflow_terminated, 7=workflow_failed, 8=unhealthy, 9=deleting, " +
|
||||||
|
"-1=unknown)",
|
||||||
|
}, []string{"app_name", "namespace"})
|
||||||
|
|
||||||
|
// WorkflowPhase reports the numeric phase of each workflow
|
||||||
|
WorkflowPhase = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: "kubevela_application_workflow_phase",
|
||||||
|
Help: "Workflow phase as numeric value (0=initializing, 1=succeeded, 2=executing, 3=suspending, 4=terminated, " +
|
||||||
|
"5=failed, 6=skipped, -1=unknown)",
|
||||||
|
}, []string{"app_name", "namespace"})
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
|
@ -15,10 +15,13 @@ package metrics
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"k8s.io/apiserver/pkg/util/feature"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
"sigs.k8s.io/controller-runtime/pkg/metrics"
|
"sigs.k8s.io/controller-runtime/pkg/metrics"
|
||||||
|
|
||||||
velametrics "github.com/kubevela/pkg/monitor/metrics"
|
velametrics "github.com/kubevela/pkg/monitor/metrics"
|
||||||
|
|
||||||
|
"github.com/oam-dev/kubevela/pkg/features"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -53,6 +56,10 @@ var collectorGroup = []prometheus.Collector{
|
||||||
ClusterCPUUsageGauge,
|
ClusterCPUUsageGauge,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
applicationStatusMetricsRegistered = false
|
||||||
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
for _, collector := range collectorGroup {
|
for _, collector := range collectorGroup {
|
||||||
if err := metrics.Registry.Register(collector); err != nil {
|
if err := metrics.Registry.Register(collector); err != nil {
|
||||||
|
@ -60,3 +67,27 @@ func init() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RegisterApplicationStatusMetrics registers the application status metrics
|
||||||
|
// This should be called after the feature gate system is initialized
|
||||||
|
func RegisterApplicationStatusMetrics() {
|
||||||
|
if applicationStatusMetricsRegistered {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if feature.DefaultMutableFeatureGate.Enabled(features.EnableApplicationStatusMetrics) {
|
||||||
|
statusMetrics := []prometheus.Collector{
|
||||||
|
ApplicationHealthStatus,
|
||||||
|
ApplicationPhase,
|
||||||
|
WorkflowPhase,
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, metric := range statusMetrics {
|
||||||
|
if err := metrics.Registry.Register(metric); err != nil {
|
||||||
|
klog.Errorf("Failed to register application status metric: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
applicationStatusMetricsRegistered = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue