2022-08-04 14:21:32 +08:00
/ *
Copyright 2021 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package job
import (
"fmt"
batch "k8s.io/api/batch/v1"
v1 "k8s.io/api/core/v1"
)
// matchPodFailurePolicy returns information about matching a given failed pod
// against the pod failure policy rules. The information is represented as an
// optional job failure message (present in case the pod matched a 'FailJob'
2022-10-31 23:50:45 +08:00
// rule), a boolean indicating if the failure should be counted towards
// backoffLimit (it should not be counted if the pod matched an 'Ignore' rule),
// and a pointer to the matched pod failure policy action.
func matchPodFailurePolicy ( podFailurePolicy * batch . PodFailurePolicy , failedPod * v1 . Pod ) ( * string , bool , * batch . PodFailurePolicyAction ) {
2022-08-04 14:21:32 +08:00
if podFailurePolicy == nil {
2022-10-31 23:50:45 +08:00
return nil , true , nil
2022-08-04 14:21:32 +08:00
}
2022-10-31 23:50:45 +08:00
ignore := batch . PodFailurePolicyActionIgnore
failJob := batch . PodFailurePolicyActionFailJob
count := batch . PodFailurePolicyActionCount
2022-08-04 14:21:32 +08:00
for index , podFailurePolicyRule := range podFailurePolicy . Rules {
if podFailurePolicyRule . OnExitCodes != nil {
if containerStatus := matchOnExitCodes ( & failedPod . Status , podFailurePolicyRule . OnExitCodes ) ; containerStatus != nil {
switch podFailurePolicyRule . Action {
case batch . PodFailurePolicyActionIgnore :
2022-10-31 23:50:45 +08:00
return nil , false , & ignore
2022-08-04 14:21:32 +08:00
case batch . PodFailurePolicyActionCount :
2022-10-31 23:50:45 +08:00
return nil , true , & count
2022-08-04 14:21:32 +08:00
case batch . PodFailurePolicyActionFailJob :
msg := fmt . Sprintf ( "Container %s for pod %s/%s failed with exit code %v matching %v rule at index %d" ,
containerStatus . Name , failedPod . Namespace , failedPod . Name , containerStatus . State . Terminated . ExitCode , podFailurePolicyRule . Action , index )
2022-10-31 23:50:45 +08:00
return & msg , true , & failJob
2022-08-04 14:21:32 +08:00
}
}
} else if podFailurePolicyRule . OnPodConditions != nil {
if podCondition := matchOnPodConditions ( & failedPod . Status , podFailurePolicyRule . OnPodConditions ) ; podCondition != nil {
switch podFailurePolicyRule . Action {
case batch . PodFailurePolicyActionIgnore :
2022-10-31 23:50:45 +08:00
return nil , false , & ignore
2022-08-04 14:21:32 +08:00
case batch . PodFailurePolicyActionCount :
2022-10-31 23:50:45 +08:00
return nil , true , & count
2022-08-04 14:21:32 +08:00
case batch . PodFailurePolicyActionFailJob :
msg := fmt . Sprintf ( "Pod %s/%s has condition %v matching %v rule at index %d" ,
failedPod . Namespace , failedPod . Name , podCondition . Type , podFailurePolicyRule . Action , index )
2022-10-31 23:50:45 +08:00
return & msg , true , & failJob
2022-08-04 14:21:32 +08:00
}
}
}
}
2022-10-31 23:50:45 +08:00
return nil , true , nil
2022-08-04 14:21:32 +08:00
}
2022-11-11 23:05:03 +08:00
// matchOnExitCodes returns a terminated container status that matches the error code requirement, if any exists.
// If the returned status is non-nil, it has a non-nil Terminated field.
2022-08-04 14:21:32 +08:00
func matchOnExitCodes ( podStatus * v1 . PodStatus , requirement * batch . PodFailurePolicyOnExitCodesRequirement ) * v1 . ContainerStatus {
if containerStatus := getMatchingContainerFromList ( podStatus . ContainerStatuses , requirement ) ; containerStatus != nil {
return containerStatus
}
return getMatchingContainerFromList ( podStatus . InitContainerStatuses , requirement )
}
func matchOnPodConditions ( podStatus * v1 . PodStatus , requirement [ ] batch . PodFailurePolicyOnPodConditionsPattern ) * v1 . PodCondition {
for _ , podCondition := range podStatus . Conditions {
for _ , pattern := range requirement {
if podCondition . Type == pattern . Type && podCondition . Status == pattern . Status {
return & podCondition
}
}
}
return nil
}
2022-11-11 23:05:03 +08:00
// getMatchingContainerFromList returns the first terminated container status in the list that matches the error code requirement, or nil if none match.
// If the returned status is non-nil, it has a non-nil Terminated field
2022-08-04 14:21:32 +08:00
func getMatchingContainerFromList ( containerStatuses [ ] v1 . ContainerStatus , requirement * batch . PodFailurePolicyOnExitCodesRequirement ) * v1 . ContainerStatus {
for _ , containerStatus := range containerStatuses {
2022-11-11 23:05:03 +08:00
if containerStatus . State . Terminated == nil {
// This container is still be terminating. There is no exit code to match.
continue
}
2022-08-04 14:21:32 +08:00
if requirement . ContainerName == nil || * requirement . ContainerName == containerStatus . Name {
if containerStatus . State . Terminated . ExitCode != 0 {
if isOnExitCodesOperatorMatching ( containerStatus . State . Terminated . ExitCode , requirement ) {
return & containerStatus
}
}
}
}
return nil
}
func isOnExitCodesOperatorMatching ( exitCode int32 , requirement * batch . PodFailurePolicyOnExitCodesRequirement ) bool {
switch requirement . Operator {
case batch . PodFailurePolicyOnExitCodesOpIn :
for _ , value := range requirement . Values {
if value == exitCode {
return true
}
}
return false
case batch . PodFailurePolicyOnExitCodesOpNotIn :
for _ , value := range requirement . Values {
if value == exitCode {
return false
}
}
return true
default :
return false
}
}