KAFKA-19522: avoid electing fenced lastKnownLeader (#20200)
CI / build (push) Waiting to run Details

This patch fixes the bug that allows the last known leader to be elected as a partition leader while still in a fenced state, before the next heartbeat removes the fence.
https://issues.apache.org/jira/browse/KAFKA-19522

Reviewers: Jun Rao <junrao@gmail.com>, TengYao Chi
<frankvicky@apache.org>
This commit is contained in:
Calvin Liu 2025-07-20 01:16:37 -07:00 committed by GitHub
parent 908049fccc
commit c162d2eb14
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 38 additions and 1 deletions

View File

@ -311,9 +311,10 @@ public class PartitionChangeBuilder {
topicId, partitionId, Arrays.toString(partition.lastKnownElr));
return false;
}
if (isAcceptableLeader.test(partition.lastKnownElr[0])) {
if (!isAcceptableLeader.test(partition.lastKnownElr[0])) {
log.trace("Try to elect last known leader for {}-{} but last known leader is not alive. last known leader={}",
topicId, partitionId, partition.lastKnownElr[0]);
return false;
}
return true;
}

View File

@ -34,6 +34,7 @@ import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.EnumSource;
import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.ValueSource;
@ -1233,4 +1234,39 @@ public class PartitionChangeBuilderTest {
// No change to the partition.
assertEquals(Optional.empty(), builder.build());
}
@ParameterizedTest
@EnumSource(value = Election.class)
public void testEligibleLeaderReplicas_NotEligibleLastKnownLeader(Election type) {
short version = 2;
PartitionRegistration partition = new PartitionRegistration.Builder()
.setReplicas(new int[] {1, 2, 3, 4})
.setDirectories(new Uuid[]{
Uuid.fromString("zANDdMukTEqefOvHpmniMg"),
Uuid.fromString("Ui2Eq8rbRiuW7m7uiPTRyg"),
Uuid.fromString("MhgJOZrrTsKNcGM0XKK4aA"),
Uuid.fromString("Y25PaCAmRfyGIKxAThhBAw")
})
.setIsr(new int[] {})
.setElr(new int[] {})
.setLastKnownElr(new int[] {1})
.setLeader(-1)
.setLeaderRecoveryState(LeaderRecoveryState.RECOVERED)
.setLeaderEpoch(100)
.setPartitionEpoch(200)
.build();
Uuid topicId = Uuid.fromString("FbrrdcfiR-KC2CPSTHaJrg");
PartitionChangeBuilder builder = new PartitionChangeBuilder(partition, topicId, 0, r -> false,
metadataVersionForPartitionChangeRecordVersion(version), 3)
.setElection(type)
.setEligibleLeaderReplicasEnabled(true)
.setDefaultDirProvider(DEFAULT_DIR_PROVIDER)
.setUseLastKnownLeaderInBalancedRecovery(true);
builder.setTargetIsr(List.of());
// No change to the partition.
assertEquals(Optional.empty(), builder.build());
}
}