mirror of https://github.com/apache/kafka.git
KAFKA-15643: Fix error logged when unload is called on a broker that was never a coordinator. (#14657)
When a new leader is elected for a __consumer_offset partition, the followers are notified to unload the state. However, only the former leader is aware of it. The remaining follower prints out the following error: `ERROR [GroupCoordinator id=1] Execution of UnloadCoordinator(tp=__consumer_offsets-1, epoch=0) failed due to This is not the correct coordinator.. (org.apache.kafka.coordinator.group.runtime.CoordinatorRuntime)` The error is actually correct and expected when in the remaining follower case, however this could be misleading. This patch handles the case gracefully. Reviewers: David Jacot <djacot@confluent.io>
This commit is contained in:
parent
57fd8f4c36
commit
a48ca490e4
|
@ -1359,18 +1359,28 @@ public class CoordinatorRuntime<S extends CoordinatorShard<U>, U> implements Aut
|
|||
log.info("Scheduling unloading of metadata for {} with epoch {}", tp, partitionEpoch);
|
||||
|
||||
scheduleInternalOperation("UnloadCoordinator(tp=" + tp + ", epoch=" + partitionEpoch + ")", tp, () -> {
|
||||
withContextOrThrow(tp, context -> {
|
||||
if (context.epoch < partitionEpoch) {
|
||||
log.info("Started unloading metadata for {} with epoch {}.", tp, partitionEpoch);
|
||||
context.transitionTo(CoordinatorState.CLOSED);
|
||||
coordinators.remove(tp, context);
|
||||
log.info("Finished unloading metadata for {} with epoch {}.", tp, partitionEpoch);
|
||||
} else {
|
||||
log.info("Ignored unloading metadata for {} in epoch {} since current epoch is {}.",
|
||||
tp, partitionEpoch, context.epoch
|
||||
);
|
||||
CoordinatorContext context = coordinators.get(tp);
|
||||
if (context != null) {
|
||||
try {
|
||||
context.lock.lock();
|
||||
if (context.epoch < partitionEpoch) {
|
||||
log.info("Started unloading metadata for {} with epoch {}.", tp, partitionEpoch);
|
||||
context.transitionTo(CoordinatorState.CLOSED);
|
||||
coordinators.remove(tp, context);
|
||||
log.info("Finished unloading metadata for {} with epoch {}.", tp, partitionEpoch);
|
||||
} else {
|
||||
log.info("Ignored unloading metadata for {} in epoch {} since current epoch is {}.",
|
||||
tp, partitionEpoch, context.epoch
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
context.lock.unlock();
|
||||
}
|
||||
});
|
||||
} else {
|
||||
log.info("Ignored unloading metadata for {} in epoch {} since metadata was never loaded.",
|
||||
tp, partitionEpoch
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -554,6 +554,45 @@ public class CoordinatorRuntimeTest {
|
|||
// Getting the coordinator context fails because it no longer exists.
|
||||
assertThrows(NotCoordinatorException.class, () -> runtime.contextOrThrow(TP));
|
||||
}
|
||||
@Test
|
||||
public void testScheduleUnloadingWhenContextDoesntExist() {
|
||||
MockTimer timer = new MockTimer();
|
||||
MockPartitionWriter writer = mock(MockPartitionWriter.class);
|
||||
MockCoordinatorShardBuilderSupplier supplier = mock(MockCoordinatorShardBuilderSupplier.class);
|
||||
MockCoordinatorShardBuilder builder = mock(MockCoordinatorShardBuilder.class);
|
||||
MockCoordinatorShard coordinator = mock(MockCoordinatorShard.class);
|
||||
|
||||
CoordinatorRuntime<MockCoordinatorShard, String> runtime =
|
||||
new CoordinatorRuntime.Builder<MockCoordinatorShard, String>()
|
||||
.withTime(timer.time())
|
||||
.withTimer(timer)
|
||||
.withLoader(new MockCoordinatorLoader())
|
||||
.withEventProcessor(new DirectEventProcessor())
|
||||
.withPartitionWriter(writer)
|
||||
.withCoordinatorShardBuilderSupplier(supplier)
|
||||
.withCoordinatorRuntimeMetrics(mock(GroupCoordinatorRuntimeMetrics.class))
|
||||
.build();
|
||||
|
||||
when(builder.withSnapshotRegistry(any())).thenReturn(builder);
|
||||
when(builder.withLogContext(any())).thenReturn(builder);
|
||||
when(builder.withTime(any())).thenReturn(builder);
|
||||
when(builder.withTimer(any())).thenReturn(builder);
|
||||
when(builder.withTopicPartition(any())).thenReturn(builder);
|
||||
when(builder.build()).thenReturn(coordinator);
|
||||
when(supplier.get()).thenReturn(builder);
|
||||
|
||||
// No loading is scheduled. This is to check the case when a follower that was never a coordinator
|
||||
// is asked to unload its state. The unload event is skipped in this case.
|
||||
|
||||
// Schedule the unloading.
|
||||
runtime.scheduleUnloadOperation(TP, 11);
|
||||
|
||||
// Verify that onUnloaded is not called.
|
||||
verify(coordinator, times(0)).onUnloaded();
|
||||
|
||||
// Getting the coordinator context fails because it doesn't exist.
|
||||
assertThrows(NotCoordinatorException.class, () -> runtime.contextOrThrow(TP));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testScheduleUnloadingWithStalePartitionEpoch() {
|
||||
|
|
Loading…
Reference in New Issue