2015-09-09 02:50:39 +08:00
/ *
2016-06-03 08:25:58 +08:00
Copyright 2015 The Kubernetes Authors .
2015-09-09 02:50:39 +08:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package daemon
import (
2020-02-08 10:16:47 +08:00
"context"
2016-06-06 15:07:28 +08:00
"fmt"
2015-09-09 02:50:39 +08:00
"reflect"
"sort"
2016-01-28 14:13:05 +08:00
"sync"
2015-09-09 02:50:39 +08:00
"time"
2020-04-18 03:25:06 +08:00
"k8s.io/klog/v2"
2018-03-14 15:08:17 +08:00
2018-02-15 02:35:38 +08:00
apps "k8s.io/api/apps/v1"
2019-09-18 00:06:37 +08:00
v1 "k8s.io/api/core/v1"
2018-08-13 19:52:34 +08:00
apiequality "k8s.io/apimachinery/pkg/api/equality"
2020-06-02 16:39:39 +08:00
apierrors "k8s.io/apimachinery/pkg/api/errors"
2017-01-11 22:09:48 +08:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
2018-02-15 02:35:38 +08:00
appsinformers "k8s.io/client-go/informers/apps/v1"
2017-06-24 04:56:37 +08:00
coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
2017-07-08 04:00:40 +08:00
"k8s.io/client-go/kubernetes/scheme"
2018-02-15 02:35:38 +08:00
unversionedapps "k8s.io/client-go/kubernetes/typed/apps/v1"
2017-01-31 02:39:54 +08:00
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
2018-02-15 02:35:38 +08:00
appslisters "k8s.io/client-go/listers/apps/v1"
2017-06-24 04:56:37 +08:00
corelisters "k8s.io/client-go/listers/core/v1"
2017-01-24 22:11:51 +08:00
"k8s.io/client-go/tools/cache"
2017-01-31 02:39:54 +08:00
"k8s.io/client-go/tools/record"
2018-08-15 22:03:39 +08:00
"k8s.io/client-go/util/flowcontrol"
2017-01-27 23:20:40 +08:00
"k8s.io/client-go/util/workqueue"
2019-08-23 01:40:21 +08:00
"k8s.io/component-base/metrics/prometheus/ratelimiter"
2021-01-27 03:03:08 +08:00
v1helper "k8s.io/component-helpers/scheduling/corev1"
2021-03-09 04:43:38 +08:00
"k8s.io/component-helpers/scheduling/corev1/nodeaffinity"
2017-04-18 01:56:40 +08:00
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
2015-09-09 02:50:39 +08:00
"k8s.io/kubernetes/pkg/controller"
2017-02-16 18:18:16 +08:00
"k8s.io/kubernetes/pkg/controller/daemon/util"
2019-01-25 00:34:33 +08:00
"k8s.io/utils/integer"
2015-09-09 02:50:39 +08:00
)
const (
2018-02-05 17:11:09 +08:00
// BurstReplicas is a rate limiter for booting pods on a lot of pods.
// The value of 250 is chosen b/c values that are too high can cause registry DoS issues.
2017-01-28 03:34:11 +08:00
BurstReplicas = 250
2015-10-20 18:47:46 +08:00
2018-02-05 17:11:09 +08:00
// StatusUpdateRetries limits the number of retries if sending a status update to API server fails.
2015-09-09 02:50:39 +08:00
StatusUpdateRetries = 1
2018-08-15 22:03:39 +08:00
// BackoffGCInterval is the time that has to pass before next iteration of backoff GC is run
BackoffGCInterval = 1 * time . Minute
2018-02-05 17:11:09 +08:00
)
2017-02-02 03:32:03 +08:00
2018-02-05 17:11:09 +08:00
// Reasons for DaemonSet events
const (
2017-02-02 03:32:03 +08:00
// SelectingAllReason is added to an event when a DaemonSet selects all Pods.
SelectingAllReason = "SelectingAll"
// FailedPlacementReason is added to an event when a DaemonSet can't schedule a Pod to a specified node.
FailedPlacementReason = "FailedPlacement"
// FailedDaemonPodReason is added to an event when the status of a Pod of a DaemonSet is 'Failed'.
FailedDaemonPodReason = "FailedDaemonPod"
2015-09-09 02:50:39 +08:00
)
2017-02-26 02:56:58 +08:00
// controllerKind contains the schema.GroupVersionKind for this controller type.
2018-02-15 02:35:38 +08:00
var controllerKind = apps . SchemeGroupVersion . WithKind ( "DaemonSet" )
2017-02-26 02:56:58 +08:00
2015-09-09 02:50:39 +08:00
// DaemonSetsController is responsible for synchronizing DaemonSet objects stored
// in the system with actual running pods.
type DaemonSetsController struct {
2016-03-19 07:14:07 +08:00
kubeClient clientset . Interface
eventRecorder record . EventRecorder
podControl controller . PodControlInterface
2017-06-03 09:02:01 +08:00
crControl controller . ControllerRevisionControlInterface
2015-09-09 02:50:39 +08:00
2015-10-20 18:47:46 +08:00
// An dsc is temporarily suspended after creating/deleting these many replicas.
// It resumes normal action after observing the watch events for them.
burstReplicas int
2015-09-09 02:50:39 +08:00
// To allow injection of syncDaemonSet for testing.
2021-04-23 02:14:52 +08:00
syncHandler func ( ctx context . Context , dsKey string ) error
2017-03-02 19:44:59 +08:00
// used for unit testing
2019-10-25 09:06:11 +08:00
enqueueDaemonSet func ( ds * apps . DaemonSet )
2015-09-09 02:50:39 +08:00
// A TTLCache of pod creates/deletes each ds expects to see
expectations controller . ControllerExpectationsInterface
2017-02-07 02:35:50 +08:00
// dsLister can list/get daemonsets from the shared informer's store
2018-02-15 02:35:38 +08:00
dsLister appslisters . DaemonSetLister
2016-12-20 05:39:23 +08:00
// dsStoreSynced returns true if the daemonset store has been synced at least once.
// Added as a member to the struct to allow injection for testing.
dsStoreSynced cache . InformerSynced
2017-05-18 07:53:46 +08:00
// historyLister get list/get history from the shared informers's store
historyLister appslisters . ControllerRevisionLister
// historyStoreSynced returns true if the history store has been synced at least once.
// Added as a member to the struct to allow injection for testing.
historyStoreSynced cache . InformerSynced
2017-02-07 02:35:50 +08:00
// podLister get list/get pods from the shared informers's store
podLister corelisters . PodLister
2018-06-12 16:29:55 +08:00
// podNodeIndex indexes pods by their nodeName
podNodeIndex cache . Indexer
2015-10-01 17:16:08 +08:00
// podStoreSynced returns true if the pod store has been synced at least once.
// Added as a member to the struct to allow injection for testing.
2016-09-15 02:35:38 +08:00
podStoreSynced cache . InformerSynced
2017-02-07 02:35:50 +08:00
// nodeLister can list/get nodes from the shared informer's store
nodeLister corelisters . NodeLister
2016-09-07 19:39:49 +08:00
// nodeStoreSynced returns true if the node store has been synced at least once.
// Added as a member to the struct to allow injection for testing.
2016-09-15 02:35:38 +08:00
nodeStoreSynced cache . InformerSynced
2015-10-01 17:16:08 +08:00
2016-09-07 19:39:49 +08:00
// DaemonSet keys that need to be synced.
queue workqueue . RateLimitingInterface
2017-07-24 20:08:35 +08:00
2018-08-15 22:03:39 +08:00
failedPodsBackoff * flowcontrol . Backoff
2015-09-09 02:50:39 +08:00
}
2018-02-05 17:11:09 +08:00
// NewDaemonSetsController creates a new DaemonSetsController
2018-08-15 22:03:39 +08:00
func NewDaemonSetsController (
daemonSetInformer appsinformers . DaemonSetInformer ,
historyInformer appsinformers . ControllerRevisionInformer ,
podInformer coreinformers . PodInformer ,
nodeInformer coreinformers . NodeInformer ,
kubeClient clientset . Interface ,
failedPodsBackoff * flowcontrol . Backoff ,
) ( * DaemonSetsController , error ) {
2015-09-09 02:50:39 +08:00
eventBroadcaster := record . NewBroadcaster ( )
2020-06-09 11:01:45 +08:00
eventBroadcaster . StartStructuredLogging ( 0 )
2018-03-29 20:24:26 +08:00
eventBroadcaster . StartRecordingToSink ( & v1core . EventSinkImpl { Interface : kubeClient . CoreV1 ( ) . Events ( "" ) } )
2015-09-09 02:50:39 +08:00
2017-10-25 23:54:32 +08:00
if kubeClient != nil && kubeClient . CoreV1 ( ) . RESTClient ( ) . GetRateLimiter ( ) != nil {
2019-08-23 01:40:21 +08:00
if err := ratelimiter . RegisterMetricAndTrackRateLimiterUsage ( "daemon_controller" , kubeClient . CoreV1 ( ) . RESTClient ( ) . GetRateLimiter ( ) ) ; err != nil {
2017-10-31 22:19:55 +08:00
return nil , err
}
2016-04-14 02:38:32 +08:00
}
2015-09-09 02:50:39 +08:00
dsc := & DaemonSetsController {
2016-03-19 07:14:07 +08:00
kubeClient : kubeClient ,
2017-07-15 13:25:54 +08:00
eventRecorder : eventBroadcaster . NewRecorder ( scheme . Scheme , v1 . EventSource { Component : "daemonset-controller" } ) ,
2015-09-09 02:50:39 +08:00
podControl : controller . RealPodControl {
KubeClient : kubeClient ,
2017-09-01 08:25:18 +08:00
Recorder : eventBroadcaster . NewRecorder ( scheme . Scheme , v1 . EventSource { Component : "daemonset-controller" } ) ,
2015-09-09 02:50:39 +08:00
} ,
2017-06-03 09:02:01 +08:00
crControl : controller . RealControllerRevisionControl {
KubeClient : kubeClient ,
} ,
2019-10-25 09:06:11 +08:00
burstReplicas : BurstReplicas ,
expectations : controller . NewControllerExpectations ( ) ,
queue : workqueue . NewNamedRateLimitingQueue ( workqueue . DefaultControllerRateLimiter ( ) , "daemonset" ) ,
2015-09-09 02:50:39 +08:00
}
2016-09-16 04:27:47 +08:00
daemonSetInformer . Informer ( ) . AddEventHandler ( cache . ResourceEventHandlerFuncs {
2020-06-17 15:22:17 +08:00
AddFunc : dsc . addDaemonset ,
UpdateFunc : dsc . updateDaemonset ,
2016-09-16 04:27:47 +08:00
DeleteFunc : dsc . deleteDaemonset ,
} )
2017-02-07 02:35:50 +08:00
dsc . dsLister = daemonSetInformer . Lister ( )
2016-12-20 05:39:23 +08:00
dsc . dsStoreSynced = daemonSetInformer . Informer ( ) . HasSynced
2016-04-19 20:45:00 +08:00
2017-05-18 07:53:46 +08:00
historyInformer . Informer ( ) . AddEventHandler ( cache . ResourceEventHandlerFuncs {
AddFunc : dsc . addHistory ,
UpdateFunc : dsc . updateHistory ,
DeleteFunc : dsc . deleteHistory ,
} )
dsc . historyLister = historyInformer . Lister ( )
dsc . historyStoreSynced = historyInformer . Informer ( ) . HasSynced
2015-09-09 02:50:39 +08:00
// Watch for creation/deletion of pods. The reason we watch is that we don't want a daemon set to create/delete
// more pods until all the effects (expectations) of a daemon set's create/delete have been observed.
2016-09-16 04:27:47 +08:00
podInformer . Informer ( ) . AddEventHandler ( cache . ResourceEventHandlerFuncs {
2016-04-19 20:45:00 +08:00
AddFunc : dsc . addPod ,
UpdateFunc : dsc . updatePod ,
DeleteFunc : dsc . deletePod ,
} )
2017-02-07 02:35:50 +08:00
dsc . podLister = podInformer . Lister ( )
2018-06-12 16:29:55 +08:00
// This custom indexer will index pods based on their NodeName which will decrease the amount of pods we need to get in simulate() call.
podInformer . Informer ( ) . GetIndexer ( ) . AddIndexers ( cache . Indexers {
"nodeName" : indexByPodNodeName ,
} )
dsc . podNodeIndex = podInformer . Informer ( ) . GetIndexer ( )
2016-09-16 04:27:47 +08:00
dsc . podStoreSynced = podInformer . Informer ( ) . HasSynced
nodeInformer . Informer ( ) . AddEventHandler ( cache . ResourceEventHandlerFuncs {
AddFunc : dsc . addNode ,
UpdateFunc : dsc . updateNode ,
} ,
2015-09-09 02:50:39 +08:00
)
2016-09-16 04:27:47 +08:00
dsc . nodeStoreSynced = nodeInformer . Informer ( ) . HasSynced
2017-02-07 02:35:50 +08:00
dsc . nodeLister = nodeInformer . Lister ( )
2016-09-07 19:39:49 +08:00
2015-09-09 02:50:39 +08:00
dsc . syncHandler = dsc . syncDaemonSet
2017-03-02 19:44:59 +08:00
dsc . enqueueDaemonSet = dsc . enqueue
2018-08-15 22:03:39 +08:00
dsc . failedPodsBackoff = failedPodsBackoff
2017-10-31 22:19:55 +08:00
return dsc , nil
2015-09-09 02:50:39 +08:00
}
2018-06-12 16:29:55 +08:00
func indexByPodNodeName ( obj interface { } ) ( [ ] string , error ) {
pod , ok := obj . ( * v1 . Pod )
if ! ok {
return [ ] string { } , nil
}
// We are only interested in active pods with nodeName set
if len ( pod . Spec . NodeName ) == 0 || pod . Status . Phase == v1 . PodSucceeded || pod . Status . Phase == v1 . PodFailed {
return [ ] string { } , nil
}
return [ ] string { pod . Spec . NodeName } , nil
}
2020-06-17 15:22:17 +08:00
func ( dsc * DaemonSetsController ) addDaemonset ( obj interface { } ) {
ds := obj . ( * apps . DaemonSet )
klog . V ( 4 ) . Infof ( "Adding daemon set %s" , ds . Name )
dsc . enqueueDaemonSet ( ds )
}
func ( dsc * DaemonSetsController ) updateDaemonset ( cur , old interface { } ) {
oldDS := old . ( * apps . DaemonSet )
curDS := cur . ( * apps . DaemonSet )
// TODO: make a KEP and fix informers to always call the delete event handler on re-create
if curDS . UID != oldDS . UID {
key , err := controller . KeyFunc ( oldDS )
if err != nil {
utilruntime . HandleError ( fmt . Errorf ( "couldn't get key for object %#v: %v" , oldDS , err ) )
return
}
dsc . deleteDaemonset ( cache . DeletedFinalStateUnknown {
Key : key ,
Obj : oldDS ,
} )
}
klog . V ( 4 ) . Infof ( "Updating daemon set %s" , oldDS . Name )
dsc . enqueueDaemonSet ( curDS )
}
2016-05-20 05:16:34 +08:00
func ( dsc * DaemonSetsController ) deleteDaemonset ( obj interface { } ) {
2018-02-15 02:35:38 +08:00
ds , ok := obj . ( * apps . DaemonSet )
2016-05-20 05:16:34 +08:00
if ! ok {
tombstone , ok := obj . ( cache . DeletedFinalStateUnknown )
if ! ok {
2020-06-17 15:22:17 +08:00
utilruntime . HandleError ( fmt . Errorf ( "couldn't get object from tombstone %#v" , obj ) )
2016-05-20 05:16:34 +08:00
return
}
2018-02-15 02:35:38 +08:00
ds , ok = tombstone . Obj . ( * apps . DaemonSet )
2016-05-20 05:16:34 +08:00
if ! ok {
2020-06-17 15:22:17 +08:00
utilruntime . HandleError ( fmt . Errorf ( "tombstone contained object that is not a DaemonSet %#v" , obj ) )
2016-05-20 05:16:34 +08:00
return
}
}
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Deleting daemon set %s" , ds . Name )
2020-06-17 15:22:17 +08:00
key , err := controller . KeyFunc ( ds )
if err != nil {
utilruntime . HandleError ( fmt . Errorf ( "couldn't get key for object %#v: %v" , ds , err ) )
return
}
// Delete expectations for the DaemonSet so if we create a new one with the same name it starts clean
dsc . expectations . DeleteExpectations ( key )
dsc . queue . Add ( key )
2016-05-20 05:16:34 +08:00
}
2015-09-09 02:50:39 +08:00
// Run begins watching and syncing daemon sets.
2021-04-23 02:14:52 +08:00
func ( dsc * DaemonSetsController ) Run ( ctx context . Context , workers int ) {
2016-01-15 15:32:10 +08:00
defer utilruntime . HandleCrash ( )
2016-09-07 19:39:49 +08:00
defer dsc . queue . ShutDown ( )
2018-11-10 02:49:10 +08:00
klog . Infof ( "Starting daemon sets controller" )
defer klog . Infof ( "Shutting down daemon sets controller" )
2016-09-07 19:39:49 +08:00
2021-04-23 02:14:52 +08:00
if ! cache . WaitForNamedCacheSync ( "daemon sets" , ctx . Done ( ) , dsc . podStoreSynced , dsc . nodeStoreSynced , dsc . historyStoreSynced , dsc . dsStoreSynced ) {
2016-09-07 19:39:49 +08:00
return
}
2015-09-09 02:50:39 +08:00
for i := 0 ; i < workers ; i ++ {
2021-04-23 02:14:52 +08:00
go wait . UntilWithContext ( ctx , dsc . runWorker , time . Second )
2015-09-09 02:50:39 +08:00
}
2016-04-19 20:45:00 +08:00
2021-04-23 02:14:52 +08:00
go wait . Until ( dsc . failedPodsBackoff . GC , BackoffGCInterval , ctx . Done ( ) )
2018-08-15 22:03:39 +08:00
2021-04-23 02:14:52 +08:00
<- ctx . Done ( )
2015-09-09 02:50:39 +08:00
}
2021-04-23 02:14:52 +08:00
func ( dsc * DaemonSetsController ) runWorker ( ctx context . Context ) {
for dsc . processNextWorkItem ( ctx ) {
2015-09-09 02:50:39 +08:00
}
}
2016-09-07 19:39:49 +08:00
// processNextWorkItem deals with one key off the queue. It returns false when it's time to quit.
2021-04-23 02:14:52 +08:00
func ( dsc * DaemonSetsController ) processNextWorkItem ( ctx context . Context ) bool {
2016-09-07 19:39:49 +08:00
dsKey , quit := dsc . queue . Get ( )
if quit {
return false
}
defer dsc . queue . Done ( dsKey )
2021-04-23 02:14:52 +08:00
err := dsc . syncHandler ( ctx , dsKey . ( string ) )
2016-09-07 19:39:49 +08:00
if err == nil {
dsc . queue . Forget ( dsKey )
return true
}
utilruntime . HandleError ( fmt . Errorf ( "%v failed with : %v" , dsKey , err ) )
dsc . queue . AddRateLimited ( dsKey )
return true
}
2018-02-15 02:35:38 +08:00
func ( dsc * DaemonSetsController ) enqueue ( ds * apps . DaemonSet ) {
2016-03-19 07:14:07 +08:00
key , err := controller . KeyFunc ( ds )
2015-09-09 02:50:39 +08:00
if err != nil {
2017-02-13 00:42:26 +08:00
utilruntime . HandleError ( fmt . Errorf ( "Couldn't get key for object %#v: %v" , ds , err ) )
2015-09-09 02:50:39 +08:00
return
}
2015-10-04 05:03:27 +08:00
// TODO: Handle overlapping controllers better. See comment in ReplicationManager.
2015-09-09 02:50:39 +08:00
dsc . queue . Add ( key )
}
2017-02-16 18:18:16 +08:00
func ( dsc * DaemonSetsController ) enqueueDaemonSetAfter ( obj interface { } , after time . Duration ) {
key , err := controller . KeyFunc ( obj )
if err != nil {
utilruntime . HandleError ( fmt . Errorf ( "Couldn't get key for object %+v: %v" , obj , err ) )
return
}
// TODO: Handle overlapping controllers better. See comment in ReplicationManager.
dsc . queue . AddAfter ( key , after )
}
2017-09-09 02:44:45 +08:00
// getDaemonSetsForPod returns a list of DaemonSets that potentially match the pod.
2018-02-15 02:35:38 +08:00
func ( dsc * DaemonSetsController ) getDaemonSetsForPod ( pod * v1 . Pod ) [ ] * apps . DaemonSet {
2017-02-07 02:35:50 +08:00
sets , err := dsc . dsLister . GetPodDaemonSets ( pod )
2015-09-09 02:50:39 +08:00
if err != nil {
return nil
}
2015-09-19 03:19:07 +08:00
if len ( sets ) > 1 {
2017-03-17 23:47:10 +08:00
// ControllerRef will ensure we don't do anything crazy, but more than one
2017-02-26 09:34:14 +08:00
// item in this list nevertheless constitutes user error.
2017-02-13 00:42:26 +08:00
utilruntime . HandleError ( fmt . Errorf ( "user error! more than one daemon is selecting pods with labels: %+v" , pod . Labels ) )
2016-02-26 11:39:43 +08:00
}
2017-02-26 09:34:14 +08:00
return sets
2016-02-26 11:39:43 +08:00
}
2017-05-18 07:53:46 +08:00
// getDaemonSetsForHistory returns a list of DaemonSets that potentially
// match a ControllerRevision.
2018-02-15 02:35:38 +08:00
func ( dsc * DaemonSetsController ) getDaemonSetsForHistory ( history * apps . ControllerRevision ) [ ] * apps . DaemonSet {
2017-05-18 07:53:46 +08:00
daemonSets , err := dsc . dsLister . GetHistoryDaemonSets ( history )
if err != nil || len ( daemonSets ) == 0 {
return nil
}
if len ( daemonSets ) > 1 {
// ControllerRef will ensure we don't do anything crazy, but more than one
// item in this list nevertheless constitutes user error.
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "User error! more than one DaemonSets is selecting ControllerRevision %s/%s with labels: %#v" ,
2017-05-18 07:53:46 +08:00
history . Namespace , history . Name , history . Labels )
}
return daemonSets
}
// addHistory enqueues the DaemonSet that manages a ControllerRevision when the ControllerRevision is created
// or when the controller manager is restarted.
func ( dsc * DaemonSetsController ) addHistory ( obj interface { } ) {
history := obj . ( * apps . ControllerRevision )
if history . DeletionTimestamp != nil {
// On a restart of the controller manager, it's possible for an object to
// show up in a state that is already pending deletion.
dsc . deleteHistory ( history )
return
}
// If it has a ControllerRef, that's all that matters.
2017-08-02 17:41:33 +08:00
if controllerRef := metav1 . GetControllerOf ( history ) ; controllerRef != nil {
2017-05-18 07:53:46 +08:00
ds := dsc . resolveControllerRef ( history . Namespace , controllerRef )
if ds == nil {
return
}
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "ControllerRevision %s added." , history . Name )
2017-05-18 07:53:46 +08:00
return
}
// Otherwise, it's an orphan. Get a list of all matching DaemonSets and sync
// them to see if anyone wants to adopt it.
daemonSets := dsc . getDaemonSetsForHistory ( history )
if len ( daemonSets ) == 0 {
return
}
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Orphan ControllerRevision %s added." , history . Name )
2017-05-18 07:53:46 +08:00
for _ , ds := range daemonSets {
dsc . enqueueDaemonSet ( ds )
}
}
// updateHistory figures out what DaemonSet(s) manage a ControllerRevision when the ControllerRevision
2017-09-09 02:44:45 +08:00
// is updated and wake them up. If anything of the ControllerRevision has changed, we need to awaken
// both the old and new DaemonSets.
2017-05-18 07:53:46 +08:00
func ( dsc * DaemonSetsController ) updateHistory ( old , cur interface { } ) {
curHistory := cur . ( * apps . ControllerRevision )
oldHistory := old . ( * apps . ControllerRevision )
if curHistory . ResourceVersion == oldHistory . ResourceVersion {
// Periodic resync will send update events for all known ControllerRevisions.
return
}
2017-08-02 17:41:33 +08:00
curControllerRef := metav1 . GetControllerOf ( curHistory )
oldControllerRef := metav1 . GetControllerOf ( oldHistory )
2017-05-18 07:53:46 +08:00
controllerRefChanged := ! reflect . DeepEqual ( curControllerRef , oldControllerRef )
if controllerRefChanged && oldControllerRef != nil {
// The ControllerRef was changed. Sync the old controller, if any.
if ds := dsc . resolveControllerRef ( oldHistory . Namespace , oldControllerRef ) ; ds != nil {
dsc . enqueueDaemonSet ( ds )
}
}
// If it has a ControllerRef, that's all that matters.
if curControllerRef != nil {
ds := dsc . resolveControllerRef ( curHistory . Namespace , curControllerRef )
if ds == nil {
return
}
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "ControllerRevision %s updated." , curHistory . Name )
2017-05-18 07:53:46 +08:00
dsc . enqueueDaemonSet ( ds )
return
}
// Otherwise, it's an orphan. If anything changed, sync matching controllers
// to see if anyone wants to adopt it now.
labelChanged := ! reflect . DeepEqual ( curHistory . Labels , oldHistory . Labels )
if labelChanged || controllerRefChanged {
daemonSets := dsc . getDaemonSetsForHistory ( curHistory )
if len ( daemonSets ) == 0 {
return
}
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Orphan ControllerRevision %s updated." , curHistory . Name )
2017-05-18 07:53:46 +08:00
for _ , ds := range daemonSets {
dsc . enqueueDaemonSet ( ds )
}
}
}
// deleteHistory enqueues the DaemonSet that manages a ControllerRevision when
// the ControllerRevision is deleted. obj could be an *app.ControllerRevision, or
// a DeletionFinalStateUnknown marker item.
func ( dsc * DaemonSetsController ) deleteHistory ( obj interface { } ) {
history , ok := obj . ( * apps . ControllerRevision )
// When a delete is dropped, the relist will notice a ControllerRevision in the store not
// in the list, leading to the insertion of a tombstone object which contains
// the deleted key/value. Note that this value might be stale. If the ControllerRevision
// changed labels the new DaemonSet will not be woken up till the periodic resync.
if ! ok {
tombstone , ok := obj . ( cache . DeletedFinalStateUnknown )
if ! ok {
utilruntime . HandleError ( fmt . Errorf ( "Couldn't get object from tombstone %#v" , obj ) )
return
}
history , ok = tombstone . Obj . ( * apps . ControllerRevision )
if ! ok {
utilruntime . HandleError ( fmt . Errorf ( "Tombstone contained object that is not a ControllerRevision %#v" , obj ) )
return
}
}
2017-08-02 17:41:33 +08:00
controllerRef := metav1 . GetControllerOf ( history )
2017-05-18 07:53:46 +08:00
if controllerRef == nil {
// No controller should care about orphans being deleted.
return
}
ds := dsc . resolveControllerRef ( history . Namespace , controllerRef )
if ds == nil {
return
}
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "ControllerRevision %s deleted." , history . Name )
2017-05-18 07:53:46 +08:00
dsc . enqueueDaemonSet ( ds )
}
2015-09-09 02:50:39 +08:00
func ( dsc * DaemonSetsController ) addPod ( obj interface { } ) {
2016-11-19 04:50:17 +08:00
pod := obj . ( * v1 . Pod )
2017-02-26 09:34:14 +08:00
if pod . DeletionTimestamp != nil {
// on a restart of the controller manager, it's possible a new pod shows up in a state that
// is already pending deletion. Prevent the pod from being a creation observation.
dsc . deletePod ( pod )
return
}
// If it has a ControllerRef, that's all that matters.
2017-08-02 17:41:33 +08:00
if controllerRef := metav1 . GetControllerOf ( pod ) ; controllerRef != nil {
2017-03-07 02:48:10 +08:00
ds := dsc . resolveControllerRef ( pod . Namespace , controllerRef )
if ds == nil {
2017-02-26 09:34:14 +08:00
return
}
2015-09-09 02:50:39 +08:00
dsKey , err := controller . KeyFunc ( ds )
if err != nil {
return
}
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Pod %s added." , pod . Name )
2015-09-09 02:50:39 +08:00
dsc . expectations . CreationObserved ( dsKey )
dsc . enqueueDaemonSet ( ds )
2017-02-26 09:34:14 +08:00
return
}
// Otherwise, it's an orphan. Get a list of all matching DaemonSets and sync
// them to see if anyone wants to adopt it.
// DO NOT observe creation because no controller should be waiting for an
// orphan.
2017-09-09 02:44:45 +08:00
dss := dsc . getDaemonSetsForPod ( pod )
2017-03-03 02:35:21 +08:00
if len ( dss ) == 0 {
return
}
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Orphan Pod %s added." , pod . Name )
2017-03-03 02:35:21 +08:00
for _ , ds := range dss {
2017-02-26 09:34:14 +08:00
dsc . enqueueDaemonSet ( ds )
2015-09-09 02:50:39 +08:00
}
}
// When a pod is updated, figure out what sets manage it and wake them
// up. If the labels of the pod have changed we need to awaken both the old
2016-11-19 04:50:17 +08:00
// and new set. old and cur must be *v1.Pod types.
2015-09-09 02:50:39 +08:00
func ( dsc * DaemonSetsController ) updatePod ( old , cur interface { } ) {
2016-11-19 04:50:17 +08:00
curPod := cur . ( * v1 . Pod )
oldPod := old . ( * v1 . Pod )
2016-08-09 21:57:21 +08:00
if curPod . ResourceVersion == oldPod . ResourceVersion {
// Periodic resync will send update events for all known pods.
// Two different versions of the same pod will always have different RVs.
2015-09-09 02:50:39 +08:00
return
}
2017-02-26 09:34:14 +08:00
2019-05-11 16:01:39 +08:00
if curPod . DeletionTimestamp != nil {
// when a pod is deleted gracefully its deletion timestamp is first modified to reflect a grace period,
// and after such time has passed, the kubelet actually deletes it from the store. We receive an update
// for modification of the deletion timestamp and expect an ds to create more replicas asap, not wait
// until the kubelet actually deletes the pod.
dsc . deletePod ( curPod )
return
}
2017-08-02 17:41:33 +08:00
curControllerRef := metav1 . GetControllerOf ( curPod )
oldControllerRef := metav1 . GetControllerOf ( oldPod )
2017-02-26 09:34:14 +08:00
controllerRefChanged := ! reflect . DeepEqual ( curControllerRef , oldControllerRef )
2017-03-07 02:48:10 +08:00
if controllerRefChanged && oldControllerRef != nil {
2017-02-26 09:34:14 +08:00
// The ControllerRef was changed. Sync the old controller, if any.
2017-03-07 02:48:10 +08:00
if ds := dsc . resolveControllerRef ( oldPod . Namespace , oldControllerRef ) ; ds != nil {
2017-02-26 09:34:14 +08:00
dsc . enqueueDaemonSet ( ds )
}
}
2017-02-16 18:18:16 +08:00
2017-02-26 09:34:14 +08:00
// If it has a ControllerRef, that's all that matters.
if curControllerRef != nil {
2017-03-07 02:48:10 +08:00
ds := dsc . resolveControllerRef ( curPod . Namespace , curControllerRef )
if ds == nil {
2017-02-26 09:34:14 +08:00
return
}
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Pod %s updated." , curPod . Name )
2017-02-26 09:34:14 +08:00
dsc . enqueueDaemonSet ( ds )
2017-09-09 02:44:45 +08:00
changedToReady := ! podutil . IsPodReady ( oldPod ) && podutil . IsPodReady ( curPod )
2017-02-16 18:18:16 +08:00
// See https://github.com/kubernetes/kubernetes/pull/38076 for more details
2017-02-26 09:34:14 +08:00
if changedToReady && ds . Spec . MinReadySeconds > 0 {
2017-03-19 11:44:43 +08:00
// Add a second to avoid milliseconds skew in AddAfter.
// See https://github.com/kubernetes/kubernetes/issues/39785#issuecomment-279959133 for more info.
dsc . enqueueDaemonSetAfter ( ds , ( time . Duration ( ds . Spec . MinReadySeconds ) * time . Second ) + time . Second )
2017-02-16 18:18:16 +08:00
}
2017-02-26 09:34:14 +08:00
return
2015-09-09 02:50:39 +08:00
}
2017-02-26 09:34:14 +08:00
// Otherwise, it's an orphan. If anything changed, sync matching controllers
// to see if anyone wants to adopt it now.
2017-09-09 02:44:45 +08:00
dss := dsc . getDaemonSetsForPod ( curPod )
2017-03-03 02:35:21 +08:00
if len ( dss ) == 0 {
return
}
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Orphan Pod %s updated." , curPod . Name )
2017-09-09 02:44:45 +08:00
labelChanged := ! reflect . DeepEqual ( curPod . Labels , oldPod . Labels )
2017-02-26 09:34:14 +08:00
if labelChanged || controllerRefChanged {
2017-03-03 02:35:21 +08:00
for _ , ds := range dss {
2017-02-26 09:34:14 +08:00
dsc . enqueueDaemonSet ( ds )
2015-09-09 02:50:39 +08:00
}
}
}
func ( dsc * DaemonSetsController ) deletePod ( obj interface { } ) {
2016-11-19 04:50:17 +08:00
pod , ok := obj . ( * v1 . Pod )
2015-09-09 02:50:39 +08:00
// When a delete is dropped, the relist will notice a pod in the store not
// in the list, leading to the insertion of a tombstone object which contains
// the deleted key/value. Note that this value might be stale. If the pod
2016-02-26 08:15:46 +08:00
// changed labels the new daemonset will not be woken up till the periodic
// resync.
2015-09-09 02:50:39 +08:00
if ! ok {
tombstone , ok := obj . ( cache . DeletedFinalStateUnknown )
if ! ok {
2017-02-26 09:34:14 +08:00
utilruntime . HandleError ( fmt . Errorf ( "couldn't get object from tombstone %#v" , obj ) )
2015-09-09 02:50:39 +08:00
return
}
2016-11-19 04:50:17 +08:00
pod , ok = tombstone . Obj . ( * v1 . Pod )
2015-09-09 02:50:39 +08:00
if ! ok {
2017-02-26 09:34:14 +08:00
utilruntime . HandleError ( fmt . Errorf ( "tombstone contained object that is not a pod %#v" , obj ) )
2015-09-09 02:50:39 +08:00
return
}
}
2017-02-26 09:34:14 +08:00
2017-08-02 17:41:33 +08:00
controllerRef := metav1 . GetControllerOf ( pod )
2017-02-26 09:34:14 +08:00
if controllerRef == nil {
// No controller should care about orphans being deleted.
return
}
2017-03-07 02:48:10 +08:00
ds := dsc . resolveControllerRef ( pod . Namespace , controllerRef )
if ds == nil {
2017-02-26 09:34:14 +08:00
return
}
dsKey , err := controller . KeyFunc ( ds )
if err != nil {
return
}
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Pod %s deleted." , pod . Name )
2017-02-26 09:34:14 +08:00
dsc . expectations . DeletionObserved ( dsKey )
dsc . enqueueDaemonSet ( ds )
2015-09-09 02:50:39 +08:00
}
func ( dsc * DaemonSetsController ) addNode ( obj interface { } ) {
// TODO: it'd be nice to pass a hint with these enqueues, so that each ds would only examine the added node (unless it has other work to do, too).
2017-02-07 02:35:50 +08:00
dsList , err := dsc . dsLister . List ( labels . Everything ( ) )
2015-11-18 09:39:55 +08:00
if err != nil {
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Error enqueueing daemon sets: %v" , err )
2015-11-18 09:39:55 +08:00
return
}
2016-11-19 04:50:17 +08:00
node := obj . ( * v1 . Node )
2017-05-10 14:06:34 +08:00
for _ , ds := range dsList {
2021-01-28 10:47:41 +08:00
if shouldRun , _ := dsc . nodeShouldRunDaemonPod ( node , ds ) ; shouldRun {
2015-11-18 09:39:55 +08:00
dsc . enqueueDaemonSet ( ds )
}
}
2015-09-09 02:50:39 +08:00
}
2017-05-11 16:49:18 +08:00
// nodeInSameCondition returns true if all effective types ("Status" is true) equals;
// otherwise, returns false.
func nodeInSameCondition ( old [ ] v1 . NodeCondition , cur [ ] v1 . NodeCondition ) bool {
if len ( old ) == 0 && len ( cur ) == 0 {
return true
}
c1map := map [ v1 . NodeConditionType ] v1 . ConditionStatus { }
for _ , c := range old {
if c . Status == v1 . ConditionTrue {
c1map [ c . Type ] = c . Status
}
}
for _ , c := range cur {
if c . Status != v1 . ConditionTrue {
continue
}
if _ , found := c1map [ c . Type ] ; ! found {
return false
}
delete ( c1map , c . Type )
}
return len ( c1map ) == 0
}
2018-08-13 19:52:34 +08:00
func shouldIgnoreNodeUpdate ( oldNode , curNode v1 . Node ) bool {
if ! nodeInSameCondition ( oldNode . Status . Conditions , curNode . Status . Conditions ) {
return false
}
oldNode . ResourceVersion = curNode . ResourceVersion
oldNode . Status . Conditions = curNode . Status . Conditions
return apiequality . Semantic . DeepEqual ( oldNode , curNode )
}
2015-09-09 02:50:39 +08:00
func ( dsc * DaemonSetsController ) updateNode ( old , cur interface { } ) {
2016-11-19 04:50:17 +08:00
oldNode := old . ( * v1 . Node )
curNode := cur . ( * v1 . Node )
2018-08-13 19:52:34 +08:00
if shouldIgnoreNodeUpdate ( * oldNode , * curNode ) {
2015-09-09 02:50:39 +08:00
return
}
2017-05-11 16:49:18 +08:00
2017-02-07 02:35:50 +08:00
dsList , err := dsc . dsLister . List ( labels . Everything ( ) )
2015-11-18 09:39:55 +08:00
if err != nil {
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Error listing daemon sets: %v" , err )
2015-11-18 09:39:55 +08:00
return
}
2016-12-15 06:25:35 +08:00
// TODO: it'd be nice to pass a hint with these enqueues, so that each ds would only examine the added node (unless it has other work to do, too).
2017-05-10 14:06:34 +08:00
for _ , ds := range dsList {
2021-01-28 10:47:41 +08:00
oldShouldRun , oldShouldContinueRunning := dsc . nodeShouldRunDaemonPod ( oldNode , ds )
currentShouldRun , currentShouldContinueRunning := dsc . nodeShouldRunDaemonPod ( curNode , ds )
2020-01-04 13:55:11 +08:00
if ( oldShouldRun != currentShouldRun ) || ( oldShouldContinueRunning != currentShouldContinueRunning ) {
2015-11-18 09:39:55 +08:00
dsc . enqueueDaemonSet ( ds )
}
}
2015-09-09 02:50:39 +08:00
}
2017-05-18 07:53:46 +08:00
// getDaemonPods returns daemon pods owned by the given ds.
2017-02-26 08:22:54 +08:00
// This also reconciles ControllerRef by adopting/orphaning.
// Note that returned Pods are pointers to objects in the cache.
// If you want to modify one, you need to deep-copy it first.
2021-04-23 02:14:52 +08:00
func ( dsc * DaemonSetsController ) getDaemonPods ( ctx context . Context , ds * apps . DaemonSet ) ( [ ] * v1 . Pod , error ) {
2016-12-04 02:57:26 +08:00
selector , err := metav1 . LabelSelectorAsSelector ( ds . Spec . Selector )
2015-10-26 14:11:09 +08:00
if err != nil {
return nil , err
}
2017-02-26 08:22:54 +08:00
// List all pods to include those that don't match the selector anymore but
// have a ControllerRef pointing to this controller.
pods , err := dsc . podLister . Pods ( ds . Namespace ) . List ( labels . Everything ( ) )
2015-09-09 02:50:39 +08:00
if err != nil {
2017-02-26 08:22:54 +08:00
return nil , err
2015-09-09 02:50:39 +08:00
}
2017-03-11 09:13:51 +08:00
// If any adoptions are attempted, we should first recheck for deletion with
// an uncached quorum read sometime after listing Pods (see #42639).
2021-04-23 02:14:52 +08:00
dsNotDeleted := controller . RecheckDeletionTimestamp ( func ( ctx context . Context ) ( metav1 . Object , error ) {
fresh , err := dsc . kubeClient . AppsV1 ( ) . DaemonSets ( ds . Namespace ) . Get ( ctx , ds . Name , metav1 . GetOptions { } )
2017-03-11 09:13:51 +08:00
if err != nil {
return nil , err
}
if fresh . UID != ds . UID {
return nil , fmt . Errorf ( "original DaemonSet %v/%v is gone: got uid %v, wanted %v" , ds . Namespace , ds . Name , fresh . UID , ds . UID )
}
return fresh , nil
} )
2017-08-25 00:51:28 +08:00
2017-02-26 08:22:54 +08:00
// Use ControllerRefManager to adopt/orphan as needed.
2017-08-25 00:51:28 +08:00
cm := controller . NewPodControllerRefManager ( dsc . podControl , ds , selector , controllerKind , dsNotDeleted )
2021-04-23 02:14:52 +08:00
return cm . ClaimPods ( ctx , pods )
2017-05-18 07:53:46 +08:00
}
2018-06-02 08:39:13 +08:00
// getNodesToDaemonPods returns a map from nodes to daemon pods (corresponding to ds) created for the nodes.
2017-05-18 07:53:46 +08:00
// This also reconciles ControllerRef by adopting/orphaning.
// Note that returned Pods are pointers to objects in the cache.
// If you want to modify one, you need to deep-copy it first.
2021-04-23 02:14:52 +08:00
func ( dsc * DaemonSetsController ) getNodesToDaemonPods ( ctx context . Context , ds * apps . DaemonSet ) ( map [ string ] [ ] * v1 . Pod , error ) {
claimedPods , err := dsc . getDaemonPods ( ctx , ds )
2017-02-26 08:22:54 +08:00
if err != nil {
return nil , err
}
// Group Pods by Node name.
nodeToDaemonPods := make ( map [ string ] [ ] * v1 . Pod )
for _ , pod := range claimedPods {
2018-06-02 08:39:13 +08:00
nodeName , err := util . GetTargetNodeName ( pod )
if err != nil {
2018-11-10 02:49:10 +08:00
klog . Warningf ( "Failed to get target node name of Pod %v/%v in DaemonSet %v/%v" ,
2018-06-02 08:39:13 +08:00
pod . Namespace , pod . Name , ds . Namespace , ds . Name )
continue
}
2017-02-26 08:22:54 +08:00
nodeToDaemonPods [ nodeName ] = append ( nodeToDaemonPods [ nodeName ] , pod )
2015-09-09 02:50:39 +08:00
}
2018-06-02 08:39:13 +08:00
2015-09-09 02:50:39 +08:00
return nodeToDaemonPods , nil
}
2017-03-07 02:48:10 +08:00
// resolveControllerRef returns the controller referenced by a ControllerRef,
// or nil if the ControllerRef could not be resolved to a matching controller
2017-07-21 14:09:18 +08:00
// of the correct Kind.
2018-02-15 02:35:38 +08:00
func ( dsc * DaemonSetsController ) resolveControllerRef ( namespace string , controllerRef * metav1 . OwnerReference ) * apps . DaemonSet {
2017-03-07 02:48:10 +08:00
// We can't look up by UID, so look up by Name and then verify UID.
// Don't even try to look up by Name if it's the wrong Kind.
if controllerRef . Kind != controllerKind . Kind {
return nil
}
ds , err := dsc . dsLister . DaemonSets ( namespace ) . Get ( controllerRef . Name )
if err != nil {
return nil
}
if ds . UID != controllerRef . UID {
// The controller we found with this Name is not the same one that the
// ControllerRef points to.
return nil
}
return ds
}
2018-03-10 19:35:24 +08:00
// podsShouldBeOnNode figures out the DaemonSet pods to be created and deleted on the given node:
// - nodesNeedingDaemonPods: the pods need to start on the node
// - podsToDelete: the Pods need to be deleted on the node
// - err: unexpected error
func ( dsc * DaemonSetsController ) podsShouldBeOnNode (
node * v1 . Node ,
nodeToDaemonPods map [ string ] [ ] * v1 . Pod ,
ds * apps . DaemonSet ,
2021-01-27 13:20:56 +08:00
hash string ,
2021-01-28 10:47:41 +08:00
) ( nodesNeedingDaemonPods , podsToDelete [ ] string ) {
2018-03-10 19:35:24 +08:00
2021-01-28 10:47:41 +08:00
shouldRun , shouldContinueRunning := dsc . nodeShouldRunDaemonPod ( node , ds )
2018-03-10 19:35:24 +08:00
daemonPods , exists := nodeToDaemonPods [ node . Name ]
switch {
2020-01-04 13:55:11 +08:00
case shouldRun && ! exists :
2018-03-10 19:35:24 +08:00
// If daemon pod is supposed to be running on node, but isn't, create daemon pod.
nodesNeedingDaemonPods = append ( nodesNeedingDaemonPods , node . Name )
case shouldContinueRunning :
// If a daemon pod failed, delete it
// If there's non-daemon pods left on this node, we will create it in the next sync loop
var daemonPodsRunning [ ] * v1 . Pod
for _ , pod := range daemonPods {
if pod . DeletionTimestamp != nil {
continue
}
if pod . Status . Phase == v1 . PodFailed {
2018-08-15 22:03:39 +08:00
// This is a critical place where DS is often fighting with kubelet that rejects pods.
// We need to avoid hot looping and backoff.
backoffKey := failedPodsBackoffKey ( ds , node . Name )
now := dsc . failedPodsBackoff . Clock . Now ( )
inBackoff := dsc . failedPodsBackoff . IsInBackOffSinceUpdate ( backoffKey , now )
if inBackoff {
delay := dsc . failedPodsBackoff . Get ( backoffKey )
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Deleting failed pod %s/%s on node %s has been limited by backoff - %v remaining" ,
2018-08-15 22:03:39 +08:00
pod . Namespace , pod . Name , node . Name , delay )
dsc . enqueueDaemonSetAfter ( ds , delay )
continue
}
dsc . failedPodsBackoff . Next ( backoffKey , now )
2018-03-10 19:35:24 +08:00
msg := fmt . Sprintf ( "Found failed daemon pod %s/%s on node %s, will try to kill it" , pod . Namespace , pod . Name , node . Name )
2018-11-10 02:49:10 +08:00
klog . V ( 2 ) . Infof ( msg )
2018-03-10 19:35:24 +08:00
// Emit an event so that it's discoverable to users.
dsc . eventRecorder . Eventf ( ds , v1 . EventTypeWarning , FailedDaemonPodReason , msg )
podsToDelete = append ( podsToDelete , pod . Name )
} else {
daemonPodsRunning = append ( daemonPodsRunning , pod )
}
}
2021-01-27 13:20:56 +08:00
// When surge is not enabled, if there is more than 1 running pod on a node delete all but the oldest
if ! util . AllowsSurge ( ds ) {
if len ( daemonPodsRunning ) <= 1 {
// There are no excess pods to be pruned, and no pods to create
break
}
2018-06-02 08:39:13 +08:00
sort . Sort ( podByCreationTimestampAndPhase ( daemonPodsRunning ) )
2018-03-10 19:35:24 +08:00
for i := 1 ; i < len ( daemonPodsRunning ) ; i ++ {
podsToDelete = append ( podsToDelete , daemonPodsRunning [ i ] . Name )
}
2021-01-27 13:20:56 +08:00
break
2018-03-10 19:35:24 +08:00
}
2021-01-27 13:20:56 +08:00
if len ( daemonPodsRunning ) <= 1 {
// // There are no excess pods to be pruned
if len ( daemonPodsRunning ) == 0 && shouldRun {
// We are surging so we need to have at least one non-deleted pod on the node
nodesNeedingDaemonPods = append ( nodesNeedingDaemonPods , node . Name )
}
break
}
// When surge is enabled, we allow 2 pods if and only if the oldest pod matching the current hash state
// is not ready AND the oldest pod that doesn't match the current hash state is ready. All other pods are
// deleted. If neither pod is ready, only the one matching the current hash revision is kept.
var oldestNewPod , oldestOldPod * v1 . Pod
sort . Sort ( podByCreationTimestampAndPhase ( daemonPodsRunning ) )
for _ , pod := range daemonPodsRunning {
if pod . Labels [ apps . ControllerRevisionHashLabelKey ] == hash {
if oldestNewPod == nil {
oldestNewPod = pod
continue
}
} else {
if oldestOldPod == nil {
oldestOldPod = pod
continue
}
}
podsToDelete = append ( podsToDelete , pod . Name )
}
if oldestNewPod != nil && oldestOldPod != nil {
switch {
case ! podutil . IsPodReady ( oldestOldPod ) :
klog . V ( 5 ) . Infof ( "Pod %s/%s from daemonset %s is no longer ready and will be replaced with newer pod %s" , oldestOldPod . Namespace , oldestOldPod . Name , ds . Name , oldestNewPod . Name )
podsToDelete = append ( podsToDelete , oldestOldPod . Name )
case podutil . IsPodAvailable ( oldestNewPod , ds . Spec . MinReadySeconds , metav1 . Time { Time : dsc . failedPodsBackoff . Clock . Now ( ) } ) :
klog . V ( 5 ) . Infof ( "Pod %s/%s from daemonset %s is now ready and will replace older pod %s" , oldestNewPod . Namespace , oldestNewPod . Name , ds . Name , oldestOldPod . Name )
podsToDelete = append ( podsToDelete , oldestOldPod . Name )
}
}
2018-03-10 19:35:24 +08:00
case ! shouldContinueRunning && exists :
// If daemon pod isn't supposed to run on node, but it is, delete all daemon pods on node.
for _ , pod := range daemonPods {
2019-06-13 17:47:52 +08:00
if pod . DeletionTimestamp != nil {
continue
}
2018-03-10 19:35:24 +08:00
podsToDelete = append ( podsToDelete , pod . Name )
}
}
2021-01-28 10:47:41 +08:00
return nodesNeedingDaemonPods , podsToDelete
2018-03-10 19:35:24 +08:00
}
2017-09-09 02:44:45 +08:00
// manage manages the scheduling and running of Pods of ds on nodes.
// After figuring out which nodes should run a Pod of ds but not yet running one and
// which nodes should not run a Pod of ds but currently running one, it calls function
// syncNodes with a list of pods to remove and a list of nodes to run a Pod of ds.
2021-04-23 02:14:52 +08:00
func ( dsc * DaemonSetsController ) manage ( ctx context . Context , ds * apps . DaemonSet , nodeList [ ] * v1 . Node , hash string ) error {
2018-06-02 08:39:13 +08:00
// Find out the pods which are created for the nodes by DaemonSet.
2021-04-23 02:14:52 +08:00
nodeToDaemonPods , err := dsc . getNodesToDaemonPods ( ctx , ds )
2017-03-08 07:01:11 +08:00
if err != nil {
2017-06-10 02:03:38 +08:00
return fmt . Errorf ( "couldn't get node to daemon pod mapping for daemon set %q: %v" , ds . Name , err )
2017-03-08 07:01:11 +08:00
}
2015-09-09 02:50:39 +08:00
// For each node, if the node is running the daemon pod but isn't supposed to, kill the daemon
// pod. If the node is supposed to run the daemon pod, but isn't, create the daemon pod on the node.
var nodesNeedingDaemonPods , podsToDelete [ ] string
2017-05-10 14:06:34 +08:00
for _ , node := range nodeList {
2021-01-28 10:47:41 +08:00
nodesNeedingDaemonPodsOnNode , podsToDeleteOnNode := dsc . podsShouldBeOnNode (
2021-01-27 13:20:56 +08:00
node , nodeToDaemonPods , ds , hash )
2018-03-10 19:35:24 +08:00
nodesNeedingDaemonPods = append ( nodesNeedingDaemonPods , nodesNeedingDaemonPodsOnNode ... )
podsToDelete = append ( podsToDelete , podsToDeleteOnNode ... )
2015-09-09 02:50:39 +08:00
}
2017-05-18 07:53:46 +08:00
2019-04-03 06:24:26 +08:00
// Remove unscheduled pods assigned to not existing nodes when daemonset pods are scheduled by scheduler.
2019-02-01 03:26:19 +08:00
// If node doesn't exist then pods are never scheduled and can't be deleted by PodGCController.
2019-09-18 00:06:37 +08:00
podsToDelete = append ( podsToDelete , getUnscheduledPodsWithoutNode ( nodeList , nodeToDaemonPods ) ... )
2019-01-28 21:07:20 +08:00
2017-06-10 02:03:38 +08:00
// Label new pods using the hash label value of the current history when creating them
2017-05-18 07:53:46 +08:00
if err = dsc . syncNodes ( ds , podsToDelete , nodesNeedingDaemonPods , hash ) ; err != nil {
2017-06-10 02:03:38 +08:00
return err
2017-05-18 07:53:46 +08:00
}
2017-02-16 18:18:16 +08:00
2017-06-10 02:03:38 +08:00
return nil
2017-02-16 18:18:16 +08:00
}
2017-03-07 20:58:18 +08:00
// syncNodes deletes given pods and creates new daemon set pods on the given nodes
2019-11-06 11:43:20 +08:00
// returns slice with errors if any
2018-02-15 02:35:38 +08:00
func ( dsc * DaemonSetsController ) syncNodes ( ds * apps . DaemonSet , podsToDelete , nodesNeedingDaemonPods [ ] string , hash string ) error {
2015-09-09 02:50:39 +08:00
// We need to set expectations before creating/deleting pods to avoid race conditions.
dsKey , err := controller . KeyFunc ( ds )
if err != nil {
2017-05-18 07:53:46 +08:00
return fmt . Errorf ( "couldn't get key for object %#v: %v" , ds , err )
2015-09-09 02:50:39 +08:00
}
2015-10-20 18:47:46 +08:00
createDiff := len ( nodesNeedingDaemonPods )
deleteDiff := len ( podsToDelete )
if createDiff > dsc . burstReplicas {
createDiff = dsc . burstReplicas
}
if deleteDiff > dsc . burstReplicas {
deleteDiff = dsc . burstReplicas
2015-09-09 02:50:39 +08:00
}
2015-10-20 18:47:46 +08:00
dsc . expectations . SetExpectations ( dsKey , createDiff , deleteDiff )
2016-09-07 19:39:49 +08:00
// error channel to communicate back failures. make the buffer big enough to avoid any blocking
errCh := make ( chan error , createDiff + deleteDiff )
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Nodes needing daemon pods for daemon set %s: %+v, creating %d" , ds . Name , nodesNeedingDaemonPods , createDiff )
2015-10-20 18:47:46 +08:00
createWait := sync . WaitGroup { }
2018-02-15 02:35:38 +08:00
// If the returned error is not nil we have a parse error.
// The controller handles this via the hash.
generation , err := util . GetTemplateGeneration ( ds )
if err != nil {
generation = nil
}
2018-11-29 17:18:32 +08:00
template := util . CreatePodTemplate ( ds . Spec . Template , generation , hash )
2017-07-11 12:04:35 +08:00
// Batch the pod creates. Batch sizes start at SlowStartInitialBatchSize
// and double with each successful iteration in a kind of "slow start".
// This handles attempts to start large numbers of pods that would
// likely all fail with the same error. For example a project with a
// low quota that attempts to create a large number of pods will be
// prevented from spamming the API service with the pod create requests
// after one of its pods fails. Conveniently, this also prevents the
// event spam that those failures would generate.
batchSize := integer . IntMin ( createDiff , controller . SlowStartInitialBatchSize )
for pos := 0 ; createDiff > pos ; batchSize , pos = integer . IntMin ( 2 * batchSize , createDiff - ( pos + batchSize ) ) , pos + batchSize {
errorCount := len ( errCh )
createWait . Add ( batchSize )
for i := pos ; i < pos + batchSize ; i ++ {
go func ( ix int ) {
defer createWait . Done ( )
2018-03-08 10:09:54 +08:00
2018-06-26 15:57:27 +08:00
podTemplate := template . DeepCopy ( )
2019-09-18 00:06:37 +08:00
// The pod's NodeAffinity will be updated to make sure the Pod is bound
// to the target node by default scheduler. It is safe to do so because there
// should be no conflicting node affinity with the target node.
podTemplate . Spec . Affinity = util . ReplaceDaemonSetPodNodeNameNodeAffinity (
podTemplate . Spec . Affinity , nodesNeedingDaemonPods [ ix ] )
2021-05-21 04:27:21 +08:00
err := dsc . podControl . CreatePods ( ds . Namespace , podTemplate ,
2019-09-18 00:06:37 +08:00
ds , metav1 . NewControllerRef ( ds , controllerKind ) )
2018-03-08 10:09:54 +08:00
2019-10-21 04:24:59 +08:00
if err != nil {
2021-01-02 07:09:22 +08:00
if apierrors . HasStatusCause ( err , v1 . NamespaceTerminatingCause ) {
2019-10-21 04:24:59 +08:00
// If the namespace is being torn down, we can safely ignore
// this error since all subsequent creations will fail.
return
}
2017-07-11 12:04:35 +08:00
}
if err != nil {
2018-11-10 02:49:10 +08:00
klog . V ( 2 ) . Infof ( "Failed creation, decrementing expectations for set %q/%q" , ds . Namespace , ds . Name )
2017-07-11 12:04:35 +08:00
dsc . expectations . CreationObserved ( dsKey )
errCh <- err
utilruntime . HandleError ( err )
}
} ( i )
}
createWait . Wait ( )
// any skipped pods that we never attempted to start shouldn't be expected.
2019-03-03 09:19:22 +08:00
skippedPods := createDiff - ( batchSize + pos )
2017-07-11 12:04:35 +08:00
if errorCount < len ( errCh ) && skippedPods > 0 {
2018-11-10 02:49:10 +08:00
klog . V ( 2 ) . Infof ( "Slow-start failure. Skipping creation of %d pods, decrementing expectations for set %q/%q" , skippedPods , ds . Namespace , ds . Name )
2019-06-15 23:11:44 +08:00
dsc . expectations . LowerExpectations ( dsKey , skippedPods , 0 )
2017-07-11 12:04:35 +08:00
// The skipped pods will be retried later. The next controller resync will
// retry the slow start process.
break
}
2015-10-20 18:47:46 +08:00
}
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Pods to delete for daemon set %s: %+v, deleting %d" , ds . Name , podsToDelete , deleteDiff )
2015-10-20 18:47:46 +08:00
deleteWait := sync . WaitGroup { }
deleteWait . Add ( deleteDiff )
for i := 0 ; i < deleteDiff ; i ++ {
go func ( ix int ) {
defer deleteWait . Done ( )
if err := dsc . podControl . DeletePod ( ds . Namespace , podsToDelete [ ix ] , ds ) ; err != nil {
dsc . expectations . DeletionObserved ( dsKey )
2020-06-02 16:39:39 +08:00
if ! apierrors . IsNotFound ( err ) {
klog . V ( 2 ) . Infof ( "Failed deletion, decremented expectations for set %q/%q" , ds . Namespace , ds . Name )
errCh <- err
utilruntime . HandleError ( err )
}
2015-10-20 18:47:46 +08:00
}
} ( i )
2015-09-09 02:50:39 +08:00
}
2015-10-20 18:47:46 +08:00
deleteWait . Wait ( )
2016-09-07 19:39:49 +08:00
// collect errors if any for proper reporting/retry logic in the controller
errors := [ ] error { }
close ( errCh )
for err := range errCh {
errors = append ( errors , err )
}
2017-05-18 07:53:46 +08:00
return utilerrors . NewAggregate ( errors )
2015-09-09 02:50:39 +08:00
}
2021-04-23 02:14:52 +08:00
func storeDaemonSetStatus (
ctx context . Context ,
dsClient unversionedapps . DaemonSetInterface ,
ds * apps . DaemonSet , desiredNumberScheduled ,
currentNumberScheduled ,
numberMisscheduled ,
numberReady ,
updatedNumberScheduled ,
numberAvailable ,
numberUnavailable int ,
updateObservedGen bool ) error {
2016-06-07 10:28:05 +08:00
if int ( ds . Status . DesiredNumberScheduled ) == desiredNumberScheduled &&
int ( ds . Status . CurrentNumberScheduled ) == currentNumberScheduled &&
2016-10-05 15:16:41 +08:00
int ( ds . Status . NumberMisscheduled ) == numberMisscheduled &&
2016-12-21 09:35:20 +08:00
int ( ds . Status . NumberReady ) == numberReady &&
2017-02-16 18:18:16 +08:00
int ( ds . Status . UpdatedNumberScheduled ) == updatedNumberScheduled &&
int ( ds . Status . NumberAvailable ) == numberAvailable &&
int ( ds . Status . NumberUnavailable ) == numberUnavailable &&
2016-12-21 09:35:20 +08:00
ds . Status . ObservedGeneration >= ds . Generation {
2015-09-09 02:50:39 +08:00
return nil
}
2016-02-11 11:46:16 +08:00
2017-08-15 20:14:21 +08:00
toUpdate := ds . DeepCopy ( )
2016-12-20 05:39:23 +08:00
2015-09-09 02:50:39 +08:00
var updateErr , getErr error
2021-02-24 19:21:36 +08:00
for i := 0 ; ; i ++ {
2018-08-10 18:28:34 +08:00
if updateObservedGen {
toUpdate . Status . ObservedGeneration = ds . Generation
}
2016-12-20 05:39:23 +08:00
toUpdate . Status . DesiredNumberScheduled = int32 ( desiredNumberScheduled )
toUpdate . Status . CurrentNumberScheduled = int32 ( currentNumberScheduled )
toUpdate . Status . NumberMisscheduled = int32 ( numberMisscheduled )
toUpdate . Status . NumberReady = int32 ( numberReady )
2017-02-16 18:18:16 +08:00
toUpdate . Status . UpdatedNumberScheduled = int32 ( updatedNumberScheduled )
toUpdate . Status . NumberAvailable = int32 ( numberAvailable )
toUpdate . Status . NumberUnavailable = int32 ( numberUnavailable )
2015-11-04 15:29:31 +08:00
2021-04-23 02:14:52 +08:00
if _ , updateErr = dsClient . UpdateStatus ( ctx , toUpdate , metav1 . UpdateOptions { } ) ; updateErr == nil {
2015-09-09 02:50:39 +08:00
return nil
}
2016-06-07 10:28:05 +08:00
2021-02-24 19:21:36 +08:00
// Stop retrying if we exceed statusUpdateRetries - the DaemonSet will be requeued with a rate limit.
if i >= StatusUpdateRetries {
break
}
2015-09-09 02:50:39 +08:00
// Update the set with the latest resource version for the next poll
2021-04-23 02:14:52 +08:00
if toUpdate , getErr = dsClient . Get ( ctx , ds . Name , metav1 . GetOptions { } ) ; getErr != nil {
2015-09-09 02:50:39 +08:00
// If the GET fails we can't trust status.Replicas anymore. This error
// is bound to be more interesting than the update failure.
return getErr
}
}
return updateErr
}
2021-04-23 02:14:52 +08:00
func ( dsc * DaemonSetsController ) updateDaemonSetStatus ( ctx context . Context , ds * apps . DaemonSet , nodeList [ ] * v1 . Node , hash string , updateObservedGen bool ) error {
2018-11-10 02:49:10 +08:00
klog . V ( 4 ) . Infof ( "Updating daemon set status" )
2021-04-23 02:14:52 +08:00
nodeToDaemonPods , err := dsc . getNodesToDaemonPods ( ctx , ds )
2017-03-08 07:01:11 +08:00
if err != nil {
return fmt . Errorf ( "couldn't get node to daemon pod mapping for daemon set %q: %v" , ds . Name , err )
}
2015-09-09 02:50:39 +08:00
2017-02-16 18:18:16 +08:00
var desiredNumberScheduled , currentNumberScheduled , numberMisscheduled , numberReady , updatedNumberScheduled , numberAvailable int
2021-01-27 13:20:56 +08:00
now := dsc . failedPodsBackoff . Clock . Now ( )
2017-05-10 14:06:34 +08:00
for _ , node := range nodeList {
2021-01-28 10:47:41 +08:00
shouldRun , _ := dsc . nodeShouldRunDaemonPod ( node , ds )
2016-06-07 10:28:05 +08:00
scheduled := len ( nodeToDaemonPods [ node . Name ] ) > 0
2015-09-09 02:50:39 +08:00
2020-01-04 13:55:11 +08:00
if shouldRun {
2015-09-09 02:50:39 +08:00
desiredNumberScheduled ++
2016-06-07 10:28:05 +08:00
if scheduled {
currentNumberScheduled ++
2017-02-16 18:18:16 +08:00
// Sort the daemon pods by creation time, so that the oldest is first.
2016-10-05 15:16:41 +08:00
daemonPods , _ := nodeToDaemonPods [ node . Name ]
2018-06-02 08:39:13 +08:00
sort . Sort ( podByCreationTimestampAndPhase ( daemonPods ) )
2017-02-16 18:18:16 +08:00
pod := daemonPods [ 0 ]
2017-04-18 01:56:40 +08:00
if podutil . IsPodReady ( pod ) {
2016-10-05 15:16:41 +08:00
numberReady ++
2021-01-27 13:20:56 +08:00
if podutil . IsPodAvailable ( pod , ds . Spec . MinReadySeconds , metav1 . Time { Time : now } ) {
2017-02-16 18:18:16 +08:00
numberAvailable ++
}
}
2018-02-15 02:35:38 +08:00
// If the returned error is not nil we have a parse error.
// The controller handles this via the hash.
generation , err := util . GetTemplateGeneration ( ds )
if err != nil {
generation = nil
}
if util . IsPodUpdated ( pod , hash , generation ) {
2017-02-16 18:18:16 +08:00
updatedNumberScheduled ++
2016-10-05 15:16:41 +08:00
}
2016-06-07 10:28:05 +08:00
}
} else {
if scheduled {
numberMisscheduled ++
}
2015-09-09 02:50:39 +08:00
}
}
2017-02-16 18:18:16 +08:00
numberUnavailable := desiredNumberScheduled - numberAvailable
2015-09-09 02:50:39 +08:00
2021-04-23 02:14:52 +08:00
err = storeDaemonSetStatus ( ctx , dsc . kubeClient . AppsV1 ( ) . DaemonSets ( ds . Namespace ) , ds , desiredNumberScheduled , currentNumberScheduled , numberMisscheduled , numberReady , updatedNumberScheduled , numberAvailable , numberUnavailable , updateObservedGen )
2015-09-09 02:50:39 +08:00
if err != nil {
2016-09-07 19:39:49 +08:00
return fmt . Errorf ( "error storing status for daemon set %#v: %v" , ds , err )
2015-09-09 02:50:39 +08:00
}
2016-09-07 19:39:49 +08:00
2019-04-29 19:51:03 +08:00
// Resync the DaemonSet after MinReadySeconds as a last line of defense to guard against clock-skew.
if ds . Spec . MinReadySeconds > 0 && numberReady != numberAvailable {
dsc . enqueueDaemonSetAfter ( ds , time . Duration ( ds . Spec . MinReadySeconds ) * time . Second )
}
2016-09-07 19:39:49 +08:00
return nil
2015-09-09 02:50:39 +08:00
}
2021-04-23 02:14:52 +08:00
func ( dsc * DaemonSetsController ) syncDaemonSet ( ctx context . Context , key string ) error {
2021-01-27 13:20:56 +08:00
startTime := dsc . failedPodsBackoff . Clock . Now ( )
2015-09-09 02:50:39 +08:00
defer func ( ) {
2021-01-27 13:20:56 +08:00
klog . V ( 4 ) . Infof ( "Finished syncing daemon set %q (%v)" , key , dsc . failedPodsBackoff . Clock . Now ( ) . Sub ( startTime ) )
2015-09-09 02:50:39 +08:00
} ( )
2016-02-13 11:47:33 +08:00
2017-02-07 02:35:50 +08:00
namespace , name , err := cache . SplitMetaNamespaceKey ( key )
2015-09-09 02:50:39 +08:00
if err != nil {
2017-02-07 02:35:50 +08:00
return err
2015-09-09 02:50:39 +08:00
}
2017-02-07 02:35:50 +08:00
ds , err := dsc . dsLister . DaemonSets ( namespace ) . Get ( name )
2021-01-02 07:09:22 +08:00
if apierrors . IsNotFound ( err ) {
2018-11-10 02:49:10 +08:00
klog . V ( 3 ) . Infof ( "daemon set has been deleted %v" , key )
2015-09-09 02:50:39 +08:00
dsc . expectations . DeleteExpectations ( key )
return nil
}
2017-02-07 02:35:50 +08:00
if err != nil {
return fmt . Errorf ( "unable to retrieve ds %v from store: %v" , key , err )
}
2015-09-09 02:50:39 +08:00
2019-03-07 08:36:21 +08:00
nodeList , err := dsc . nodeLister . List ( labels . Everything ( ) )
if err != nil {
return fmt . Errorf ( "couldn't get list of nodes when syncing daemon set %#v: %v" , ds , err )
}
2016-12-04 02:57:26 +08:00
everything := metav1 . LabelSelector { }
2016-03-19 07:14:07 +08:00
if reflect . DeepEqual ( ds . Spec . Selector , & everything ) {
2017-02-02 03:32:03 +08:00
dsc . eventRecorder . Eventf ( ds , v1 . EventTypeWarning , SelectingAllReason , "This daemon set is selecting all pods. A non-empty selector is required." )
2016-03-19 07:14:07 +08:00
return nil
}
2015-09-09 02:50:39 +08:00
// Don't process a daemon set until all its creations and deletions have been processed.
// For example if daemon set foo asked for 3 new daemon pods in the previous call to manage,
// then we do not want to call manage on foo until the daemon pods have been created.
dsKey , err := controller . KeyFunc ( ds )
if err != nil {
2016-09-07 19:39:49 +08:00
return fmt . Errorf ( "couldn't get key for object %#v: %v" , ds , err )
2015-09-09 02:50:39 +08:00
}
2017-06-10 02:03:38 +08:00
2018-06-06 06:55:25 +08:00
// If the DaemonSet is being deleted (either by foreground deletion or
// orphan deletion), we cannot be sure if the DaemonSet history objects
// it owned still exist -- those history objects can either be deleted
// or orphaned. Garbage collector doesn't guarantee that it will delete
// DaemonSet pods before deleting DaemonSet history objects, because
// DaemonSet history doesn't own DaemonSet pods. We cannot reliably
// calculate the status of a DaemonSet being deleted. Therefore, return
// here without updating status for the DaemonSet being deleted.
if ds . DeletionTimestamp != nil {
return nil
}
2017-06-10 02:03:38 +08:00
// Construct histories of the DaemonSet, and get the hash of current history
2021-04-23 02:14:52 +08:00
cur , old , err := dsc . constructHistory ( ctx , ds )
2017-06-10 02:03:38 +08:00
if err != nil {
return fmt . Errorf ( "failed to construct revisions of DaemonSet: %v" , err )
}
2018-02-15 02:35:38 +08:00
hash := cur . Labels [ apps . DefaultDaemonSetUniqueLabelKey ]
2017-06-10 02:03:38 +08:00
2018-06-06 06:55:25 +08:00
if ! dsc . expectations . SatisfiedExpectations ( dsKey ) {
2018-08-10 18:28:34 +08:00
// Only update status. Don't raise observedGeneration since controller didn't process object of that generation.
2021-04-23 02:14:52 +08:00
return dsc . updateDaemonSetStatus ( ctx , ds , nodeList , hash , false )
2017-03-08 07:01:11 +08:00
}
2021-04-23 02:14:52 +08:00
err = dsc . manage ( ctx , ds , nodeList , hash )
2017-05-18 07:53:46 +08:00
if err != nil {
2017-03-08 07:01:11 +08:00
return err
2015-09-09 02:50:39 +08:00
}
2017-03-08 07:01:11 +08:00
// Process rolling updates if we're ready.
if dsc . expectations . SatisfiedExpectations ( dsKey ) {
2017-02-16 18:18:16 +08:00
switch ds . Spec . UpdateStrategy . Type {
2018-02-15 02:35:38 +08:00
case apps . OnDeleteDaemonSetStrategyType :
case apps . RollingUpdateDaemonSetStrategyType :
2021-04-23 02:14:52 +08:00
err = dsc . rollingUpdate ( ctx , ds , nodeList , hash )
2017-02-16 18:18:16 +08:00
}
if err != nil {
return err
}
}
2021-04-23 02:14:52 +08:00
err = dsc . cleanupHistory ( ctx , ds , old )
2017-05-18 07:53:46 +08:00
if err != nil {
return fmt . Errorf ( "failed to clean up revisions of DaemonSet: %v" , err )
}
2021-04-23 02:14:52 +08:00
return dsc . updateDaemonSetStatus ( ctx , ds , nodeList , hash , true )
2015-09-09 02:50:39 +08:00
}
2017-07-01 22:03:06 +08:00
// nodeShouldRunDaemonPod checks a set of preconditions against a (node,daemonset) and returns a
// summary. Returned booleans are:
2020-01-04 13:55:11 +08:00
// * shouldRun:
// Returns true when a daemonset should run on the node if a daemonset pod is not already
2017-07-01 22:03:06 +08:00
// running on that node.
// * shouldContinueRunning:
// Returns true when a daemonset should continue running on a node if a daemonset pod is already
// running on that node.
2021-01-28 10:47:41 +08:00
func ( dsc * DaemonSetsController ) nodeShouldRunDaemonPod ( node * v1 . Node , ds * apps . DaemonSet ) ( bool , bool ) {
2019-12-31 01:58:28 +08:00
pod := NewPod ( ds , node . Name )
2017-07-01 22:03:06 +08:00
// If the daemon set specifies a node name, check that it matches with node.Name.
if ! ( ds . Spec . Template . Spec . NodeName == "" || ds . Spec . Template . Spec . NodeName == node . Name ) {
2021-01-28 10:47:41 +08:00
return false , false
2017-07-01 22:03:06 +08:00
}
2020-04-11 01:49:39 +08:00
taints := node . Spec . Taints
2020-01-04 13:55:11 +08:00
fitsNodeName , fitsNodeAffinity , fitsTaints := Predicates ( pod , node , taints )
2019-12-31 01:58:28 +08:00
if ! fitsNodeName || ! fitsNodeAffinity {
2021-01-28 10:47:41 +08:00
return false , false
2016-01-28 14:13:05 +08:00
}
2019-12-31 01:58:28 +08:00
if ! fitsTaints {
2020-01-04 13:55:11 +08:00
// Scheduled daemon pods should continue running if they tolerate NoExecute taint.
2021-01-28 10:47:41 +08:00
_ , hasUntoleratedTaint := v1helper . FindMatchingUntoleratedTaint ( taints , pod . Spec . Tolerations , func ( t * v1 . Taint ) bool {
2019-12-31 01:58:28 +08:00
return t . Effect == v1 . TaintEffectNoExecute
} )
2021-01-28 10:47:41 +08:00
return false , ! hasUntoleratedTaint
2019-12-31 01:58:28 +08:00
}
2021-01-28 10:47:41 +08:00
return true , true
2019-12-31 01:58:28 +08:00
}
2020-01-04 13:55:11 +08:00
// Predicates checks if a DaemonSet's pod can run on a node.
func Predicates ( pod * v1 . Pod , node * v1 . Node , taints [ ] v1 . Taint ) ( fitsNodeName , fitsNodeAffinity , fitsTaints bool ) {
2019-12-31 01:58:28 +08:00
fitsNodeName = len ( pod . Spec . NodeName ) == 0 || pod . Spec . NodeName == node . Name
2021-03-09 04:43:38 +08:00
// Ignore parsing errors for backwards compatibility.
fitsNodeAffinity , _ = nodeaffinity . GetRequiredNodeAffinity ( pod ) . Match ( node )
2021-01-28 10:47:41 +08:00
_ , hasUntoleratedTaint := v1helper . FindMatchingUntoleratedTaint ( taints , pod . Spec . Tolerations , func ( t * v1 . Taint ) bool {
2019-12-31 01:58:28 +08:00
return t . Effect == v1 . TaintEffectNoExecute || t . Effect == v1 . TaintEffectNoSchedule
} )
2021-01-28 10:47:41 +08:00
fitsTaints = ! hasUntoleratedTaint
2016-12-15 06:25:35 +08:00
return
2015-11-17 00:09:43 +08:00
}
2018-02-05 17:11:09 +08:00
// NewPod creates a new pod
2018-02-15 02:35:38 +08:00
func NewPod ( ds * apps . DaemonSet , nodeName string ) * v1 . Pod {
2017-03-02 08:04:54 +08:00
newPod := & v1 . Pod { Spec : ds . Spec . Template . Spec , ObjectMeta : ds . Spec . Template . ObjectMeta }
newPod . Namespace = ds . Namespace
newPod . Spec . NodeName = nodeName
2018-09-11 09:14:07 +08:00
// Added default tolerations for DaemonSet pods.
2018-11-29 17:18:32 +08:00
util . AddOrUpdateDaemonPodTolerations ( & newPod . Spec )
2018-09-11 09:14:07 +08:00
2017-03-02 08:04:54 +08:00
return newPod
}
2018-06-02 08:39:13 +08:00
type podByCreationTimestampAndPhase [ ] * v1 . Pod
func ( o podByCreationTimestampAndPhase ) Len ( ) int { return len ( o ) }
func ( o podByCreationTimestampAndPhase ) Swap ( i , j int ) { o [ i ] , o [ j ] = o [ j ] , o [ i ] }
2015-09-29 17:07:43 +08:00
2018-06-02 08:39:13 +08:00
func ( o podByCreationTimestampAndPhase ) Less ( i , j int ) bool {
// Scheduled Pod first
if len ( o [ i ] . Spec . NodeName ) != 0 && len ( o [ j ] . Spec . NodeName ) == 0 {
return true
}
if len ( o [ i ] . Spec . NodeName ) == 0 && len ( o [ j ] . Spec . NodeName ) != 0 {
return false
}
2015-09-29 17:07:43 +08:00
2017-08-04 23:04:14 +08:00
if o [ i ] . CreationTimestamp . Equal ( & o [ j ] . CreationTimestamp ) {
2015-09-29 17:07:43 +08:00
return o [ i ] . Name < o [ j ] . Name
}
2017-08-04 23:04:14 +08:00
return o [ i ] . CreationTimestamp . Before ( & o [ j ] . CreationTimestamp )
2015-09-29 17:07:43 +08:00
}
2018-06-08 20:21:22 +08:00
2018-08-15 22:03:39 +08:00
func failedPodsBackoffKey ( ds * apps . DaemonSet , nodeName string ) string {
return fmt . Sprintf ( "%s/%d/%s" , ds . UID , ds . Status . ObservedGeneration , nodeName )
}
2019-01-28 21:07:20 +08:00
2019-04-03 06:24:26 +08:00
// getUnscheduledPodsWithoutNode returns list of unscheduled pods assigned to not existing nodes.
// Returned pods can't be deleted by PodGCController so they should be deleted by DaemonSetController.
func getUnscheduledPodsWithoutNode ( runningNodesList [ ] * v1 . Node , nodeToDaemonPods map [ string ] [ ] * v1 . Pod ) [ ] string {
2019-01-28 21:07:20 +08:00
var results [ ] string
isNodeRunning := make ( map [ string ] bool )
for _ , node := range runningNodesList {
isNodeRunning [ node . Name ] = true
}
for n , pods := range nodeToDaemonPods {
if ! isNodeRunning [ n ] {
for _ , pod := range pods {
2019-04-03 06:24:26 +08:00
if len ( pod . Spec . NodeName ) == 0 {
results = append ( results , pod . Name )
}
2019-01-28 21:07:20 +08:00
}
}
}
return results
}