KAFKA-18632: Multibroker test improvements. (#18718)

Reviewers: Andrew Schofield <aschofield@confluent.io>
This commit is contained in:
Sushant Mahajan 2025-01-29 22:33:43 +05:30 committed by GitHub
parent 048dfeffd0
commit 632aedcf4f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 209 additions and 114 deletions

View File

@ -20,6 +20,7 @@ package kafka.server.metadata;
import kafka.server.MetadataCache; import kafka.server.MetadataCache;
import org.apache.kafka.common.Node; import org.apache.kafka.common.Node;
import org.apache.kafka.common.errors.CoordinatorNotAvailableException;
import org.apache.kafka.common.message.MetadataResponseData; import org.apache.kafka.common.message.MetadataResponseData;
import org.apache.kafka.common.network.ListenerName; import org.apache.kafka.common.network.ListenerName;
import org.apache.kafka.common.protocol.Errors; import org.apache.kafka.common.protocol.Errors;
@ -27,6 +28,9 @@ import org.apache.kafka.common.requests.MetadataResponse;
import org.apache.kafka.server.share.SharePartitionKey; import org.apache.kafka.server.share.SharePartitionKey;
import org.apache.kafka.server.share.persister.ShareCoordinatorMetadataCacheHelper; import org.apache.kafka.server.share.persister.ShareCoordinatorMetadataCacheHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
@ -41,6 +45,7 @@ public class ShareCoordinatorMetadataCacheHelperImpl implements ShareCoordinator
private final MetadataCache metadataCache; private final MetadataCache metadataCache;
private final Function<SharePartitionKey, Integer> keyToPartitionMapper; private final Function<SharePartitionKey, Integer> keyToPartitionMapper;
private final ListenerName interBrokerListenerName; private final ListenerName interBrokerListenerName;
private final Logger log = LoggerFactory.getLogger(ShareCoordinatorMetadataCacheHelperImpl.class);
public ShareCoordinatorMetadataCacheHelperImpl( public ShareCoordinatorMetadataCacheHelperImpl(
MetadataCache metadataCache, MetadataCache metadataCache,
@ -63,35 +68,39 @@ public class ShareCoordinatorMetadataCacheHelperImpl implements ShareCoordinator
@Override @Override
public Node getShareCoordinator(SharePartitionKey key, String internalTopicName) { public Node getShareCoordinator(SharePartitionKey key, String internalTopicName) {
if (metadataCache.contains(internalTopicName)) { try {
Set<String> topicSet = new HashSet<>(); if (metadataCache.contains(internalTopicName)) {
topicSet.add(internalTopicName); Set<String> topicSet = new HashSet<>();
topicSet.add(internalTopicName);
List<MetadataResponseData.MetadataResponseTopic> topicMetadata = CollectionConverters.asJava( List<MetadataResponseData.MetadataResponseTopic> topicMetadata = CollectionConverters.asJava(
metadataCache.getTopicMetadata( metadataCache.getTopicMetadata(
CollectionConverters.asScala(topicSet), CollectionConverters.asScala(topicSet),
interBrokerListenerName, interBrokerListenerName,
false, false,
false false
) )
); );
if (topicMetadata == null || topicMetadata.isEmpty() || topicMetadata.get(0).errorCode() != Errors.NONE.code()) { if (topicMetadata == null || topicMetadata.isEmpty() || topicMetadata.get(0).errorCode() != Errors.NONE.code()) {
return Node.noNode();
} else {
int partition = keyToPartitionMapper.apply(key);
Optional<MetadataResponseData.MetadataResponsePartition> response = topicMetadata.get(0).partitions().stream()
.filter(responsePart -> responsePart.partitionIndex() == partition
&& responsePart.leaderId() != MetadataResponse.NO_LEADER_ID)
.findFirst();
if (response.isPresent()) {
return OptionConverters.toJava(metadataCache.getAliveBrokerNode(response.get().leaderId(), interBrokerListenerName))
.orElse(Node.noNode());
} else {
return Node.noNode(); return Node.noNode();
} else {
int partition = keyToPartitionMapper.apply(key);
Optional<MetadataResponseData.MetadataResponsePartition> response = topicMetadata.get(0).partitions().stream()
.filter(responsePart -> responsePart.partitionIndex() == partition
&& responsePart.leaderId() != MetadataResponse.NO_LEADER_ID)
.findFirst();
if (response.isPresent()) {
return OptionConverters.toJava(metadataCache.getAliveBrokerNode(response.get().leaderId(), interBrokerListenerName))
.orElse(Node.noNode());
} else {
return Node.noNode();
}
} }
} }
} catch (CoordinatorNotAvailableException e) {
log.warn("Coordinator not available", e);
} }
return Node.noNode(); return Node.noNode();
} }

View File

@ -55,6 +55,8 @@ import org.apache.kafka.common.record.TimestampType;
import org.apache.kafka.common.serialization.ByteArrayDeserializer; import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import org.apache.kafka.common.serialization.Deserializer; import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.serialization.Serializer; import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.kafka.common.test.ClusterInstance; import org.apache.kafka.common.test.ClusterInstance;
import org.apache.kafka.common.test.api.ClusterConfigProperty; import org.apache.kafka.common.test.api.ClusterConfigProperty;
import org.apache.kafka.common.test.api.ClusterTest; import org.apache.kafka.common.test.api.ClusterTest;
@ -88,6 +90,7 @@ import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException; import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
@ -1832,86 +1835,142 @@ public class ShareConsumerTest {
@ClusterConfigProperty(key = "unstable.api.versions.enable", value = "true") @ClusterConfigProperty(key = "unstable.api.versions.enable", value = "true")
} }
) )
@Timeout(90)
public void testShareConsumerAfterCoordinatorMovement() throws Exception { public void testShareConsumerAfterCoordinatorMovement() throws Exception {
setup(); setup();
String topicName = "multipart"; String topicName = "multipart";
String groupId = "multipartGrp"; String groupId = "multipartGrp";
Uuid topicId = createTopic(topicName, 3, 3); Uuid topicId = createTopic(topicName, 3, 3);
alterShareAutoOffsetReset(groupId, "earliest"); alterShareAutoOffsetReset(groupId, "earliest");
ScheduledExecutorService service = Executors.newScheduledThreadPool(5);
try (Admin admin = createAdminClient()) { TopicPartition tpMulti = new TopicPartition(topicName, 0);
TopicPartition tpMulti = new TopicPartition(topicName, 0);
// produce some messages // produce some messages
try (Producer<byte[], byte[]> producer = createProducer()) { ClientState prodState = new ClientState();
ProducerRecord<byte[], byte[]> record = new ProducerRecord<>( final Set<String> produced = new HashSet<>();
tpMulti.topic(), service.execute(() -> {
tpMulti.partition(), int i = 0;
null, try (Producer<String, String> producer = createProducer(Map.of(
"key".getBytes(), ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName(),
"value".getBytes() ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()
); ))) {
IntStream.range(0, 10).forEach(__ -> producer.send(record)); while (!prodState.done().get()) {
producer.flush(); String key = "key-" + (i++);
ProducerRecord<String, String> record = new ProducerRecord<>(
tpMulti.topic(),
tpMulti.partition(),
null,
key,
"value"
);
try {
producer.send(record);
producer.flush();
// count only correctly produced records
prodState.count().incrementAndGet();
produced.add(key);
} catch (Exception e) {
// ignore
}
}
}
} }
);
// consume messages // consume messages - start after small delay
try (ShareConsumer<byte[], byte[]> shareConsumer = createShareConsumer(groupId)) { ClientState consState = new ClientState();
shareConsumer.subscribe(List.of(topicName)); // using map here if we want to debug specific keys
ConsumerRecords<byte[], byte[]> records = shareConsumer.poll(Duration.ofMillis(5000)); Map<String, Integer> consumed = new HashMap<>();
assertEquals(10, records.count()); service.schedule(() -> {
} try (ShareConsumer<String, String> shareConsumer = createShareConsumer(groupId, Map.of(
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName(),
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()
))) {
shareConsumer.subscribe(List.of(topicName));
while (!consState.done().get()) {
ConsumerRecords<String, String> records = shareConsumer.poll(Duration.ofMillis(2000L));
consState.count().addAndGet(records.count());
records.forEach(rec -> consumed.compute(rec.key(), (k, v) -> v == null ? 1 : v + 1));
if (prodState.done().get() && records.count() == 0) {
consState.done().set(true);
}
}
}
}, 100L, TimeUnit.MILLISECONDS
);
// get current share coordinator node // To be closer to real world scenarios, we will execute after
SharePartitionKey key = SharePartitionKey.getInstance(groupId, new TopicIdPartition(topicId, tpMulti)); // some time has elapsed since the producer and consumer started
int shareGroupStateTp = Utils.abs(key.asCoordinatorKey().hashCode()) % 3; // working.
List<Integer> curShareCoordNodeId = admin.describeTopics(List.of(Topic.SHARE_GROUP_STATE_TOPIC_NAME)).allTopicNames().get().get(Topic.SHARE_GROUP_STATE_TOPIC_NAME) service.schedule(() -> {
.partitions().stream() // Get the current node hosting the __share_group_state partition
.filter(info -> info.partition() == shareGroupStateTp) // on which tpMulti is hosted. Then shut down this node and wait
.map(info -> info.leader().id()) // for it to be gracefully shutdown. Then fetch the coordinator again
.toList(); // and verify that it has moved to some other broker.
try (Admin admin = createAdminClient()) {
SharePartitionKey key = SharePartitionKey.getInstance(groupId, new TopicIdPartition(topicId, tpMulti));
int shareGroupStateTp = Utils.abs(key.asCoordinatorKey().hashCode()) % 3;
List<Integer> curShareCoordNodeId = null;
try {
curShareCoordNodeId = admin.describeTopics(List.of(Topic.SHARE_GROUP_STATE_TOPIC_NAME)).allTopicNames().get().get(Topic.SHARE_GROUP_STATE_TOPIC_NAME)
.partitions().stream()
.filter(info -> info.partition() == shareGroupStateTp)
.map(info -> info.leader().id())
.toList();
} catch (Exception e) {
fail(e);
}
assertEquals(1, curShareCoordNodeId.size());
assertEquals(1, curShareCoordNodeId.size()); // shutdown the coordinator
KafkaBroker broker = cluster.brokers().get(curShareCoordNodeId.get(0));
cluster.shutdownBroker(curShareCoordNodeId.get(0));
// shutdown the coordinator // wait for it to be completely shutdown
KafkaBroker broker = cluster.brokers().get(curShareCoordNodeId.get(0)); broker.awaitShutdown();
cluster.shutdownBroker(curShareCoordNodeId.get(0));
// give some breathing time List<Integer> newShareCoordNodeId = null;
broker.awaitShutdown(); try {
newShareCoordNodeId = admin.describeTopics(List.of(Topic.SHARE_GROUP_STATE_TOPIC_NAME)).allTopicNames().get().get(Topic.SHARE_GROUP_STATE_TOPIC_NAME)
.partitions().stream()
.filter(info -> info.partition() == shareGroupStateTp)
.map(info -> info.leader().id())
.toList();
} catch (Exception e) {
fail(e);
}
List<Integer> newShareCoordNodeId = admin.describeTopics(List.of(Topic.SHARE_GROUP_STATE_TOPIC_NAME)).allTopicNames().get().get(Topic.SHARE_GROUP_STATE_TOPIC_NAME) assertEquals(1, newShareCoordNodeId.size());
.partitions().stream() assertNotEquals(curShareCoordNodeId.get(0), newShareCoordNodeId.get(0));
.filter(info -> info.partition() == shareGroupStateTp) }
.map(info -> info.leader().id()) }, 5L, TimeUnit.SECONDS
.toList(); );
assertEquals(1, newShareCoordNodeId.size()); // top the producer after some time (but after coordinator shutdown)
assertNotEquals(curShareCoordNodeId.get(0), newShareCoordNodeId.get(0)); service.schedule(() -> {
prodState.done().set(true);
}, 10L, TimeUnit.SECONDS
);
// again produce to same topic partition // wait for both producer and consumer to finish
try (Producer<byte[], byte[]> producer = createProducer()) { TestUtils.waitForCondition(
ProducerRecord<byte[], byte[]> record = new ProducerRecord<>( () -> prodState.done().get() && consState.done().get(),
tpMulti.topic(), 45_000L,
tpMulti.partition(), 500L,
null, () -> "prod/cons not done yet"
"key".getBytes(), );
"value".getBytes()
);
IntStream.range(0, 10).forEach(__ -> producer.send(record));
producer.flush();
}
// consume messages should only be possible if partition and share coord has moved // Make sure we consumed all records. Consumed records could be higher
// from shutdown broker since we are only producing to partition 0 of topic. // due to re-delivery but that is expected since we are only guaranteeing
try (ShareConsumer<byte[], byte[]> shareConsumer = createShareConsumer(groupId)) { // at least once semantics.
shareConsumer.subscribe(List.of(topicName)); assertTrue(prodState.count().get() <= consState.count().get());
ConsumerRecords<byte[], byte[]> records = shareConsumer.poll(Duration.ofMillis(5000)); Set<String> consumedKeys = consumed.keySet();
assertEquals(20, records.count()); assertTrue(produced.containsAll(consumedKeys) && consumedKeys.containsAll(produced));
}
verifyShareGroupStateTopicRecordsProduced(); shutdownExecutorService(service);
}
verifyShareGroupStateTopicRecordsProduced();
} }
@ClusterTest( @ClusterTest(
@ -1929,6 +1988,8 @@ public class ShareConsumerTest {
@ClusterConfigProperty(key = "unstable.api.versions.enable", value = "true") @ClusterConfigProperty(key = "unstable.api.versions.enable", value = "true")
} }
) )
@Timeout(150)
@Flaky("KAFKA-18665")
public void testComplexShareConsumer() throws Exception { public void testComplexShareConsumer() throws Exception {
setup(); setup();
String topicName = "multipart"; String topicName = "multipart";
@ -1936,19 +1997,18 @@ public class ShareConsumerTest {
createTopic(topicName, 3, 3); createTopic(topicName, 3, 3);
TopicPartition multiTp = new TopicPartition(topicName, 0); TopicPartition multiTp = new TopicPartition(topicName, 0);
ExecutorService executer = Executors.newCachedThreadPool(); ScheduledExecutorService service = Executors.newScheduledThreadPool(5);
AtomicBoolean prodDone = new AtomicBoolean(false); ClientState prodState = new ClientState();
AtomicInteger sentCount = new AtomicInteger(0);
// produce messages until we want // produce messages until we want
executer.execute(() -> { service.execute(() -> {
try (Producer<byte[], byte[]> producer = createProducer()) { try (Producer<byte[], byte[]> producer = createProducer()) {
while (!prodDone.get()) { while (!prodState.done().get()) {
ProducerRecord<byte[], byte[]> record = new ProducerRecord<>(multiTp.topic(), multiTp.partition(), null, "key".getBytes(), "value".getBytes()); ProducerRecord<byte[], byte[]> record = new ProducerRecord<>(multiTp.topic(), multiTp.partition(), null, "key".getBytes(), "value".getBytes());
producer.send(record); producer.send(record);
producer.flush(); producer.flush();
sentCount.incrementAndGet(); prodState.count().incrementAndGet();
} }
} }
}); });
@ -1961,28 +2021,45 @@ public class ShareConsumerTest {
Map.of() Map.of()
); );
executer.execute(complexCons1); service.schedule(
complexCons1,
100L,
TimeUnit.MILLISECONDS
);
// let the complex consumer read the messages // let the complex consumer read the messages
executer.execute(() -> { service.schedule(() -> {
try { prodState.done().set(true);
TimeUnit.SECONDS.sleep(10L); }, 10L, TimeUnit.SECONDS
prodDone.set(true); );
} catch (InterruptedException e) {
// ignore
}
});
// all messages which can be read are read, some would be redelivered // all messages which can be read are read, some would be redelivered
TestUtils.waitForCondition(complexCons1::isDone, 30_000L, () -> "did not close!"); TestUtils.waitForCondition(complexCons1::isDone, 45_000L, () -> "did not close!");
assertTrue(sentCount.get() < complexCons1.recordsRead());
executer.shutdown(); assertTrue(prodState.count().get() < complexCons1.recordsRead());
executer.shutdownNow();
shutdownExecutorService(service);
verifyShareGroupStateTopicRecordsProduced(); verifyShareGroupStateTopicRecordsProduced();
} }
/**
* Util class to encapsulate state for a consumer/producer
* being executed by an {@link ExecutorService}.
*/
private static class ClientState {
private final AtomicBoolean done = new AtomicBoolean(false);
private final AtomicInteger count = new AtomicInteger(0);
AtomicBoolean done() {
return done;
}
AtomicInteger count() {
return count;
}
}
private int produceMessages(int messageCount) { private int produceMessages(int messageCount) {
try (Producer<byte[], byte[]> producer = createProducer()) { try (Producer<byte[], byte[]> producer = createProducer()) {
ProducerRecord<byte[], byte[]> record = new ProducerRecord<>(tp.topic(), tp.partition(), null, "key".getBytes(), "value".getBytes()); ProducerRecord<byte[], byte[]> record = new ProducerRecord<>(tp.topic(), tp.partition(), null, "key".getBytes(), "value".getBytes());
@ -2217,9 +2294,7 @@ public class ShareConsumerTest {
private final String topicName; private final String topicName;
private final Map<String, Object> configs = new HashMap<>(); private final Map<String, Object> configs = new HashMap<>();
private final AtomicBoolean isDone = new AtomicBoolean(false); private final ClientState state = new ClientState();
private final AtomicBoolean shouldLoop = new AtomicBoolean(true);
private final AtomicInteger readCount = new AtomicInteger(0);
private final Predicate<ConsumerRecords<K, V>> exitCriteria; private final Predicate<ConsumerRecords<K, V>> exitCriteria;
private final BiConsumer<ShareConsumer<K, V>, ConsumerRecord<K, V>> processFunc; private final BiConsumer<ShareConsumer<K, V>, ConsumerRecord<K, V>> processFunc;
@ -2267,31 +2342,42 @@ public class ShareConsumerTest {
} }
void stop() { void stop() {
shouldLoop.set(false); state.done().set(true);
} }
@Override @Override
public void run() { public void run() {
try (ShareConsumer<K, V> consumer = new KafkaShareConsumer<>(configs)) { try (ShareConsumer<K, V> consumer = new KafkaShareConsumer<>(configs)) {
consumer.subscribe(Set.of(this.topicName)); consumer.subscribe(Set.of(this.topicName));
while (shouldLoop.get()) { while (!state.done().get()) {
ConsumerRecords<K, V> records = consumer.poll(Duration.ofMillis(POLL_TIMEOUT_MS)); ConsumerRecords<K, V> records = consumer.poll(Duration.ofMillis(POLL_TIMEOUT_MS));
readCount.addAndGet(records.count()); state.count().addAndGet(records.count());
if (exitCriteria.test(records)) { if (exitCriteria.test(records)) {
break; state.done().set(true);
} }
records.forEach(record -> processFunc.accept(consumer, record)); records.forEach(record -> processFunc.accept(consumer, record));
} }
} }
isDone.set(true);
} }
boolean isDone() { boolean isDone() {
return isDone.get(); return state.done().get();
} }
int recordsRead() { int recordsRead() {
return readCount.get(); return state.count().get();
}
}
private void shutdownExecutorService(ExecutorService service) {
service.shutdown();
try {
if (!service.awaitTermination(5L, TimeUnit.SECONDS)) {
service.shutdownNow();
}
} catch (Exception e) {
service.shutdownNow();
Thread.currentThread().interrupt();
} }
} }
} }