KAFKA-14296; Partition leaders are not demoted during kraft controlled shutdown (#12741)

When the `BrokerServer` starts its shutting down process, it transitions to `SHUTTING_DOWN` and sets `isShuttingDown` to `true`. With this state change, the follower state changes are short-cutted. This means that a broker which was serving as leader would remain acting as a leader until controlled shutdown completes. Instead, we want the leader and ISR state to be updated so that requests will return NOT_LEADER and the client can find the new leader. We missed this case while implementing https://github.com/apache/kafka/pull/12187. This patch fixes the issue and updates an existing test to ensure that `isShuttingDown` has not effect. We should consider adding integration tests for this as well. We can do this separately. Reviewers: Ismael Juma <ismael@juma.me.uk>, José Armando García Sancio <jsancio@users.noreply.github.com>, Jason Gustafson <jason@confluent.io>
2022-10-14 01:59:19 +02:00 · 2022-10-14 01:59:19 +02:00 · 5cff8f67b0
parent 0cb1d61413
commit 5cff8f67b0
2 changed files with 27 additions and 24 deletions
--- a/core/src/main/scala/kafka/server/ReplicaManager.scala
+++ b/core/src/main/scala/kafka/server/ReplicaManager.scala
@ -2142,7 +2142,6 @@ class ReplicaManager(val config: KafkaConfig,
  ): Unit = {
    stateChangeLogger.info(s"Transitioning ${localFollowers.size} partition(s) to " +
      "local followers.")
-    val shuttingDown = isShuttingDown.get()
    val partitionsToStartFetching = new mutable.HashMap[TopicPartition, Partition]
    val partitionsToStopFetching = new mutable.HashMap[TopicPartition, Boolean]
    val followerTopicSet = new mutable.HashSet[String]
@ -2151,28 +2150,24 @@ class ReplicaManager(val config: KafkaConfig,
        try {
          followerTopicSet.add(tp.topic)

-          if (shuttingDown) {
-            stateChangeLogger.trace(s"Unable to start fetching $tp with topic " +
-              s"ID ${info.topicId} because the replica manager is shutting down.")
-          } else {
-            // We always update the follower state.
-            // - This ensure that a replica with no leader can step down;
-            // - This also ensures that the local replica is created even if the leader
-            //   is unavailable. This is required to ensure that we include the partition's
-            //   high watermark in the checkpoint file (see KAFKA-1647).
-            val state = info.partition.toLeaderAndIsrPartitionState(tp, isNew)
-            val isNewLeaderEpoch = partition.makeFollower(state, offsetCheckpoints, Some(info.topicId))
+          // We always update the follower state.
+          // - This ensure that a replica with no leader can step down;
+          // - This also ensures that the local replica is created even if the leader
+          //   is unavailable. This is required to ensure that we include the partition's
+          //   high watermark in the checkpoint file (see KAFKA-1647).
+          val state = info.partition.toLeaderAndIsrPartitionState(tp, isNew)
+          val isNewLeaderEpoch = partition.makeFollower(state, offsetCheckpoints, Some(info.topicId))

-            if (isInControlledShutdown && (info.partition.leader == NO_LEADER ||
-                !info.partition.isr.contains(config.brokerId))) {
-              // During controlled shutdown, replica with no leaders and replica
-              // where this broker is not in the ISR are stopped.
-              partitionsToStopFetching.put(tp, false)
-            } else if (isNewLeaderEpoch) {
-              // Otherwise, fetcher is restarted if the leader epoch has changed.
-              partitionsToStartFetching.put(tp, partition)
-            }
+          if (isInControlledShutdown && (info.partition.leader == NO_LEADER ||
+              !info.partition.isr.contains(config.brokerId))) {
+            // During controlled shutdown, replica with no leaders and replica
+            // where this broker is not in the ISR are stopped.
+            partitionsToStopFetching.put(tp, false)
+          } else if (isNewLeaderEpoch) {
+            // Otherwise, fetcher is restarted if the leader epoch has changed.
+            partitionsToStartFetching.put(tp, partition)
          }
+
          changedPartitions.add(partition)
        } catch {
          case e: KafkaStorageException =>
--- a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala
@ -21,7 +21,7 @@ import java.io.File
 import java.net.InetAddress
 import java.nio.file.Files
 import java.util
-import java.util.concurrent.atomic.{AtomicLong, AtomicReference}
+import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong, AtomicReference}
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 import java.util.stream.IntStream
 import java.util.{Collections, Optional, Properties}
@ -2210,7 +2210,8 @@ class ReplicaManagerTest {
    aliveBrokerIds: Seq[Int] = Seq(0, 1),
    propsModifier: Properties => Unit = _ => {},
    mockReplicaFetcherManager: Option[ReplicaFetcherManager] = None,
-    mockReplicaAlterLogDirsManager: Option[ReplicaAlterLogDirsManager] = None
+    mockReplicaAlterLogDirsManager: Option[ReplicaAlterLogDirsManager] = None,
+    isShuttingDown: AtomicBoolean = new AtomicBoolean(false)
  ): ReplicaManager = {
    val props = TestUtils.createBrokerConfig(brokerId, TestUtils.MockZkConnect)
    props.put("log.dirs", TestUtils.tempRelativeDir("data").getAbsolutePath + "," + TestUtils.tempRelativeDir("data2").getAbsolutePath)
@ -2245,6 +2246,7 @@ class ReplicaManagerTest {
      metadataCache = metadataCache,
      logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size),
      alterPartitionManager = alterPartitionManager,
+      isShuttingDown = isShuttingDown,
      delayedProducePurgatoryParam = Some(mockProducePurgatory),
      delayedFetchPurgatoryParam = Some(mockFetchPurgatory),
      delayedDeleteRecordsPurgatoryParam = Some(mockDeleteRecordsPurgatory),
@ -3868,10 +3870,12 @@ class ReplicaManagerTest {
    val foo2 = new TopicPartition("foo", 2)

    val mockReplicaFetcherManager = mock(classOf[ReplicaFetcherManager])
+    val isShuttingDown = new AtomicBoolean(false)
    val replicaManager = setupReplicaManagerWithMockedPurgatories(
      timer = new MockTimer(time),
      brokerId = localId,
-      mockReplicaFetcherManager = Some(mockReplicaFetcherManager)
+      mockReplicaFetcherManager = Some(mockReplicaFetcherManager),
+      isShuttingDown = isShuttingDown
    )

    try {
@ -3940,6 +3944,10 @@ class ReplicaManagerTest {

      reset(mockReplicaFetcherManager)

+      // The broker transitions to SHUTTING_DOWN state. This should not have
+      // any impact in KRaft mode.
+      isShuttingDown.set(true)
+
      // The replica begins the controlled shutdown.
      replicaManager.beginControlledShutdown()