KAFKA-15643: Fix error logged when unload is called on a broker that was never a coordinator. (#14657)

When a new leader is elected for a __consumer_offset partition, the followers are notified to unload the state. However, only the former leader is aware of it. The remaining follower prints out the following error:
`ERROR [GroupCoordinator id=1] Execution of UnloadCoordinator(tp=__consumer_offsets-1, epoch=0) failed due to This is not the correct coordinator.. (org.apache.kafka.coordinator.group.runtime.CoordinatorRuntime)`
The error is actually correct and expected when in the remaining follower case, however this could be misleading. This patch handles the case gracefully.

Reviewers: David Jacot <djacot@confluent.io>
This commit is contained in:
Ritika Reddy 2023-10-31 03:09:32 -07:00 committed by GitHub
parent 57fd8f4c36
commit a48ca490e4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 60 additions and 11 deletions

View File

@ -1359,18 +1359,28 @@ public class CoordinatorRuntime<S extends CoordinatorShard<U>, U> implements Aut
log.info("Scheduling unloading of metadata for {} with epoch {}", tp, partitionEpoch);
scheduleInternalOperation("UnloadCoordinator(tp=" + tp + ", epoch=" + partitionEpoch + ")", tp, () -> {
withContextOrThrow(tp, context -> {
if (context.epoch < partitionEpoch) {
log.info("Started unloading metadata for {} with epoch {}.", tp, partitionEpoch);
context.transitionTo(CoordinatorState.CLOSED);
coordinators.remove(tp, context);
log.info("Finished unloading metadata for {} with epoch {}.", tp, partitionEpoch);
} else {
log.info("Ignored unloading metadata for {} in epoch {} since current epoch is {}.",
tp, partitionEpoch, context.epoch
);
CoordinatorContext context = coordinators.get(tp);
if (context != null) {
try {
context.lock.lock();
if (context.epoch < partitionEpoch) {
log.info("Started unloading metadata for {} with epoch {}.", tp, partitionEpoch);
context.transitionTo(CoordinatorState.CLOSED);
coordinators.remove(tp, context);
log.info("Finished unloading metadata for {} with epoch {}.", tp, partitionEpoch);
} else {
log.info("Ignored unloading metadata for {} in epoch {} since current epoch is {}.",
tp, partitionEpoch, context.epoch
);
}
} finally {
context.lock.unlock();
}
});
} else {
log.info("Ignored unloading metadata for {} in epoch {} since metadata was never loaded.",
tp, partitionEpoch
);
}
});
}

View File

@ -554,6 +554,45 @@ public class CoordinatorRuntimeTest {
// Getting the coordinator context fails because it no longer exists.
assertThrows(NotCoordinatorException.class, () -> runtime.contextOrThrow(TP));
}
@Test
public void testScheduleUnloadingWhenContextDoesntExist() {
MockTimer timer = new MockTimer();
MockPartitionWriter writer = mock(MockPartitionWriter.class);
MockCoordinatorShardBuilderSupplier supplier = mock(MockCoordinatorShardBuilderSupplier.class);
MockCoordinatorShardBuilder builder = mock(MockCoordinatorShardBuilder.class);
MockCoordinatorShard coordinator = mock(MockCoordinatorShard.class);
CoordinatorRuntime<MockCoordinatorShard, String> runtime =
new CoordinatorRuntime.Builder<MockCoordinatorShard, String>()
.withTime(timer.time())
.withTimer(timer)
.withLoader(new MockCoordinatorLoader())
.withEventProcessor(new DirectEventProcessor())
.withPartitionWriter(writer)
.withCoordinatorShardBuilderSupplier(supplier)
.withCoordinatorRuntimeMetrics(mock(GroupCoordinatorRuntimeMetrics.class))
.build();
when(builder.withSnapshotRegistry(any())).thenReturn(builder);
when(builder.withLogContext(any())).thenReturn(builder);
when(builder.withTime(any())).thenReturn(builder);
when(builder.withTimer(any())).thenReturn(builder);
when(builder.withTopicPartition(any())).thenReturn(builder);
when(builder.build()).thenReturn(coordinator);
when(supplier.get()).thenReturn(builder);
// No loading is scheduled. This is to check the case when a follower that was never a coordinator
// is asked to unload its state. The unload event is skipped in this case.
// Schedule the unloading.
runtime.scheduleUnloadOperation(TP, 11);
// Verify that onUnloaded is not called.
verify(coordinator, times(0)).onUnloaded();
// Getting the coordinator context fails because it doesn't exist.
assertThrows(NotCoordinatorException.class, () -> runtime.contextOrThrow(TP));
}
@Test
public void testScheduleUnloadingWithStalePartitionEpoch() {