mirror of https://github.com/apache/kafka.git
KAFKA-14296; Partition leaders are not demoted during kraft controlled shutdown (#12741)
When the `BrokerServer` starts its shutting down process, it transitions to `SHUTTING_DOWN` and sets `isShuttingDown` to `true`. With this state change, the follower state changes are short-cutted. This means that a broker which was serving as leader would remain acting as a leader until controlled shutdown completes. Instead, we want the leader and ISR state to be updated so that requests will return NOT_LEADER and the client can find the new leader. We missed this case while implementing https://github.com/apache/kafka/pull/12187. This patch fixes the issue and updates an existing test to ensure that `isShuttingDown` has not effect. We should consider adding integration tests for this as well. We can do this separately. Reviewers: Ismael Juma <ismael@juma.me.uk>, José Armando García Sancio <jsancio@users.noreply.github.com>, Jason Gustafson <jason@confluent.io>
This commit is contained in:
parent
0cb1d61413
commit
5cff8f67b0
|
@ -2142,7 +2142,6 @@ class ReplicaManager(val config: KafkaConfig,
|
|||
): Unit = {
|
||||
stateChangeLogger.info(s"Transitioning ${localFollowers.size} partition(s) to " +
|
||||
"local followers.")
|
||||
val shuttingDown = isShuttingDown.get()
|
||||
val partitionsToStartFetching = new mutable.HashMap[TopicPartition, Partition]
|
||||
val partitionsToStopFetching = new mutable.HashMap[TopicPartition, Boolean]
|
||||
val followerTopicSet = new mutable.HashSet[String]
|
||||
|
@ -2151,28 +2150,24 @@ class ReplicaManager(val config: KafkaConfig,
|
|||
try {
|
||||
followerTopicSet.add(tp.topic)
|
||||
|
||||
if (shuttingDown) {
|
||||
stateChangeLogger.trace(s"Unable to start fetching $tp with topic " +
|
||||
s"ID ${info.topicId} because the replica manager is shutting down.")
|
||||
} else {
|
||||
// We always update the follower state.
|
||||
// - This ensure that a replica with no leader can step down;
|
||||
// - This also ensures that the local replica is created even if the leader
|
||||
// is unavailable. This is required to ensure that we include the partition's
|
||||
// high watermark in the checkpoint file (see KAFKA-1647).
|
||||
val state = info.partition.toLeaderAndIsrPartitionState(tp, isNew)
|
||||
val isNewLeaderEpoch = partition.makeFollower(state, offsetCheckpoints, Some(info.topicId))
|
||||
// We always update the follower state.
|
||||
// - This ensure that a replica with no leader can step down;
|
||||
// - This also ensures that the local replica is created even if the leader
|
||||
// is unavailable. This is required to ensure that we include the partition's
|
||||
// high watermark in the checkpoint file (see KAFKA-1647).
|
||||
val state = info.partition.toLeaderAndIsrPartitionState(tp, isNew)
|
||||
val isNewLeaderEpoch = partition.makeFollower(state, offsetCheckpoints, Some(info.topicId))
|
||||
|
||||
if (isInControlledShutdown && (info.partition.leader == NO_LEADER ||
|
||||
!info.partition.isr.contains(config.brokerId))) {
|
||||
// During controlled shutdown, replica with no leaders and replica
|
||||
// where this broker is not in the ISR are stopped.
|
||||
partitionsToStopFetching.put(tp, false)
|
||||
} else if (isNewLeaderEpoch) {
|
||||
// Otherwise, fetcher is restarted if the leader epoch has changed.
|
||||
partitionsToStartFetching.put(tp, partition)
|
||||
}
|
||||
if (isInControlledShutdown && (info.partition.leader == NO_LEADER ||
|
||||
!info.partition.isr.contains(config.brokerId))) {
|
||||
// During controlled shutdown, replica with no leaders and replica
|
||||
// where this broker is not in the ISR are stopped.
|
||||
partitionsToStopFetching.put(tp, false)
|
||||
} else if (isNewLeaderEpoch) {
|
||||
// Otherwise, fetcher is restarted if the leader epoch has changed.
|
||||
partitionsToStartFetching.put(tp, partition)
|
||||
}
|
||||
|
||||
changedPartitions.add(partition)
|
||||
} catch {
|
||||
case e: KafkaStorageException =>
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.File
|
|||
import java.net.InetAddress
|
||||
import java.nio.file.Files
|
||||
import java.util
|
||||
import java.util.concurrent.atomic.{AtomicLong, AtomicReference}
|
||||
import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong, AtomicReference}
|
||||
import java.util.concurrent.{CountDownLatch, TimeUnit}
|
||||
import java.util.stream.IntStream
|
||||
import java.util.{Collections, Optional, Properties}
|
||||
|
@ -2210,7 +2210,8 @@ class ReplicaManagerTest {
|
|||
aliveBrokerIds: Seq[Int] = Seq(0, 1),
|
||||
propsModifier: Properties => Unit = _ => {},
|
||||
mockReplicaFetcherManager: Option[ReplicaFetcherManager] = None,
|
||||
mockReplicaAlterLogDirsManager: Option[ReplicaAlterLogDirsManager] = None
|
||||
mockReplicaAlterLogDirsManager: Option[ReplicaAlterLogDirsManager] = None,
|
||||
isShuttingDown: AtomicBoolean = new AtomicBoolean(false)
|
||||
): ReplicaManager = {
|
||||
val props = TestUtils.createBrokerConfig(brokerId, TestUtils.MockZkConnect)
|
||||
props.put("log.dirs", TestUtils.tempRelativeDir("data").getAbsolutePath + "," + TestUtils.tempRelativeDir("data2").getAbsolutePath)
|
||||
|
@ -2245,6 +2246,7 @@ class ReplicaManagerTest {
|
|||
metadataCache = metadataCache,
|
||||
logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size),
|
||||
alterPartitionManager = alterPartitionManager,
|
||||
isShuttingDown = isShuttingDown,
|
||||
delayedProducePurgatoryParam = Some(mockProducePurgatory),
|
||||
delayedFetchPurgatoryParam = Some(mockFetchPurgatory),
|
||||
delayedDeleteRecordsPurgatoryParam = Some(mockDeleteRecordsPurgatory),
|
||||
|
@ -3868,10 +3870,12 @@ class ReplicaManagerTest {
|
|||
val foo2 = new TopicPartition("foo", 2)
|
||||
|
||||
val mockReplicaFetcherManager = mock(classOf[ReplicaFetcherManager])
|
||||
val isShuttingDown = new AtomicBoolean(false)
|
||||
val replicaManager = setupReplicaManagerWithMockedPurgatories(
|
||||
timer = new MockTimer(time),
|
||||
brokerId = localId,
|
||||
mockReplicaFetcherManager = Some(mockReplicaFetcherManager)
|
||||
mockReplicaFetcherManager = Some(mockReplicaFetcherManager),
|
||||
isShuttingDown = isShuttingDown
|
||||
)
|
||||
|
||||
try {
|
||||
|
@ -3940,6 +3944,10 @@ class ReplicaManagerTest {
|
|||
|
||||
reset(mockReplicaFetcherManager)
|
||||
|
||||
// The broker transitions to SHUTTING_DOWN state. This should not have
|
||||
// any impact in KRaft mode.
|
||||
isShuttingDown.set(true)
|
||||
|
||||
// The replica begins the controlled shutdown.
|
||||
replicaManager.beginControlledShutdown()
|
||||
|
||||
|
|
Loading…
Reference in New Issue