mirror of https://github.com/apache/kafka.git
MINOR: Fix some AssignmentsManager bugs (#14954)
- Add proper start & stop for AssignmentsManager's event loop - Dedupe queued duplicate assignments - Fix bug where directory ID is resolved too late Co-authored-by: Gaurav Narula <gaurav_narula2@apple.com> Reviewers: Colin P. McCabe <cmccabe@apache.org>
This commit is contained in:
parent
93b6df6173
commit
8c184b4743
|
@ -2325,6 +2325,8 @@ class ReplicaManager(val config: KafkaConfig,
|
|||
warn(s"Broker $localBrokerId stopped fetcher for partitions ${newOfflinePartitions.mkString(",")} and stopped moving logs " +
|
||||
s"for partitions ${partitionsWithOfflineFutureReplica.mkString(",")} because they are in the failed log directory $dir.")
|
||||
}
|
||||
// retrieve the UUID here because logManager.handleLogDirFailure handler removes it
|
||||
val uuid = logManager.directoryId(dir)
|
||||
logManager.handleLogDirFailure(dir)
|
||||
if (dir == config.metadataLogDir) {
|
||||
fatal(s"Shutdown broker because the metadata log dir $dir has failed")
|
||||
|
@ -2337,7 +2339,6 @@ class ReplicaManager(val config: KafkaConfig,
|
|||
Exit.halt(1)
|
||||
}
|
||||
if (zkClient.isEmpty) {
|
||||
val uuid = logManager.directoryId(dir)
|
||||
if (uuid.isDefined) {
|
||||
directoryEventHandler.handleFailure(uuid.get)
|
||||
} else {
|
||||
|
|
|
@ -19,6 +19,10 @@ package org.apache.kafka.server;
|
|||
|
||||
import org.apache.kafka.clients.ClientResponse;
|
||||
import org.apache.kafka.common.Uuid;
|
||||
import org.apache.kafka.common.message.AssignReplicasToDirsRequestData;
|
||||
import org.apache.kafka.common.message.AssignReplicasToDirsRequestData.DirectoryData;
|
||||
import org.apache.kafka.common.message.AssignReplicasToDirsRequestData.PartitionData;
|
||||
import org.apache.kafka.common.message.AssignReplicasToDirsRequestData.TopicData;
|
||||
import org.apache.kafka.common.message.AssignReplicasToDirsResponseData;
|
||||
import org.apache.kafka.common.protocol.Errors;
|
||||
import org.apache.kafka.common.requests.AssignReplicasToDirsRequest;
|
||||
|
@ -33,6 +37,7 @@ import org.apache.kafka.server.common.TopicIdPartition;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -84,12 +89,13 @@ public class AssignmentsManager {
|
|||
this.brokerEpochSupplier = brokerEpochSupplier;
|
||||
this.eventQueue = new KafkaEventQueue(time,
|
||||
new LogContext("[AssignmentsManager id=" + brokerId + "]"),
|
||||
"broker-" + brokerId + "-directory-assignments-manager-");
|
||||
"broker-" + brokerId + "-directory-assignments-manager-",
|
||||
new ShutdownEvent());
|
||||
channelManager.start();
|
||||
}
|
||||
|
||||
public void close() throws InterruptedException {
|
||||
eventQueue.close();
|
||||
channelManager.shutdown();
|
||||
}
|
||||
|
||||
public void onAssignment(TopicIdPartition topicPartition, Uuid dirId, Runnable callback) {
|
||||
|
@ -119,6 +125,16 @@ public class AssignmentsManager {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles shutdown of the {@link AssignmentsManager}.
|
||||
*/
|
||||
private class ShutdownEvent extends Event {
|
||||
@Override
|
||||
public void run() throws Exception {
|
||||
channelManager.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles new generated assignments, to be propagated to the controller.
|
||||
* Assignment events may be handled out of order, so for any two assignment
|
||||
|
@ -139,11 +155,18 @@ public class AssignmentsManager {
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
AssignmentEvent existing = pending.getOrDefault(partition, null);
|
||||
if (existing != null && existing.timestampNs > timestampNs) {
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("Dropping assignment {} because it's older than {}", this, existing);
|
||||
if (existing == null && inflight != null) {
|
||||
existing = inflight.getOrDefault(partition, null);
|
||||
}
|
||||
if (existing != null) {
|
||||
if (existing.dirId.equals(dirId)) {
|
||||
if (log.isDebugEnabled()) log.debug("Ignoring duplicate assignment {}", this);
|
||||
return;
|
||||
}
|
||||
if (existing.timestampNs > timestampNs) {
|
||||
if (log.isDebugEnabled()) log.debug("Dropping assignment {} because it's older than {}", this, existing);
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("Received new assignment {}", this);
|
||||
|
@ -240,8 +263,8 @@ public class AssignmentsManager {
|
|||
Set<AssignmentEvent> completed = Utils.diff(HashSet::new, inflight.values().stream().collect(Collectors.toSet()), failed);
|
||||
completed.forEach(assignmentEvent -> assignmentEvent.callback.run());
|
||||
|
||||
log.warn("Re-queueing assignments: {}", failed);
|
||||
if (!failed.isEmpty()) {
|
||||
log.warn("Re-queueing assignments: {}", failed);
|
||||
for (AssignmentEvent event : failed) {
|
||||
pending.put(event.partition, event);
|
||||
}
|
||||
|
@ -376,4 +399,27 @@ public class AssignmentsManager {
|
|||
}
|
||||
return failures;
|
||||
}
|
||||
|
||||
// visible for testing
|
||||
static AssignReplicasToDirsRequestData buildRequestData(int brokerId, long brokerEpoch, Map<TopicIdPartition, Uuid> assignment) {
|
||||
Map<Uuid, DirectoryData> directoryMap = new HashMap<>();
|
||||
Map<Uuid, Map<Uuid, TopicData>> topicMap = new HashMap<>();
|
||||
for (Map.Entry<TopicIdPartition, Uuid> entry : assignment.entrySet()) {
|
||||
TopicIdPartition topicPartition = entry.getKey();
|
||||
Uuid directoryId = entry.getValue();
|
||||
DirectoryData directory = directoryMap.computeIfAbsent(directoryId, d -> new DirectoryData().setId(directoryId));
|
||||
TopicData topic = topicMap.computeIfAbsent(directoryId, d -> new HashMap<>())
|
||||
.computeIfAbsent(topicPartition.topicId(), topicId -> {
|
||||
TopicData data = new TopicData().setTopicId(topicId);
|
||||
directory.topics().add(data);
|
||||
return data;
|
||||
});
|
||||
PartitionData partition = new PartitionData().setPartitionIndex(topicPartition.partitionId());
|
||||
topic.partitions().add(partition);
|
||||
}
|
||||
return new AssignReplicasToDirsRequestData()
|
||||
.setBrokerId(brokerId)
|
||||
.setBrokerEpoch(brokerEpoch)
|
||||
.setDirectories(new ArrayList<>(directoryMap.values()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,7 +33,11 @@ import org.junit.jupiter.api.BeforeEach;
|
|||
import org.junit.jupiter.api.Test;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
|
@ -41,6 +45,7 @@ import static org.apache.kafka.metadata.AssignmentsHelper.buildRequestData;
|
|||
import static org.apache.kafka.metadata.AssignmentsHelper.normalize;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.Mockito.atMostOnce;
|
||||
import static org.mockito.Mockito.doAnswer;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.times;
|
||||
|
@ -71,11 +76,77 @@ public class AssignmentsManagerTest {
|
|||
manager.close();
|
||||
}
|
||||
|
||||
AssignReplicasToDirsRequestData normalize(AssignReplicasToDirsRequestData request) {
|
||||
request = request.duplicate();
|
||||
request.directories().sort(Comparator.comparing(
|
||||
AssignReplicasToDirsRequestData.DirectoryData::id));
|
||||
for (AssignReplicasToDirsRequestData.DirectoryData directory : request.directories()) {
|
||||
directory.topics().sort(Comparator.comparing(
|
||||
AssignReplicasToDirsRequestData.TopicData::topicId));
|
||||
for (AssignReplicasToDirsRequestData.TopicData topic : directory.topics()) {
|
||||
topic.partitions().sort(Comparator.comparing(
|
||||
AssignReplicasToDirsRequestData.PartitionData::partitionIndex));
|
||||
}
|
||||
}
|
||||
return request;
|
||||
}
|
||||
|
||||
void assertRequestEquals(AssignReplicasToDirsRequestData expected, AssignReplicasToDirsRequestData actual) {
|
||||
|
||||
void assertRequestEquals(
|
||||
AssignReplicasToDirsRequestData expected,
|
||||
AssignReplicasToDirsRequestData actual
|
||||
) {
|
||||
assertEquals(normalize(expected), normalize(actual));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testBuildRequestData() {
|
||||
Map<TopicIdPartition, Uuid> assignment = new HashMap<TopicIdPartition, Uuid>() {{
|
||||
put(new TopicIdPartition(TOPIC_1, 1), DIR_1);
|
||||
put(new TopicIdPartition(TOPIC_1, 2), DIR_2);
|
||||
put(new TopicIdPartition(TOPIC_1, 3), DIR_3);
|
||||
put(new TopicIdPartition(TOPIC_1, 4), DIR_1);
|
||||
put(new TopicIdPartition(TOPIC_2, 5), DIR_2);
|
||||
}};
|
||||
AssignReplicasToDirsRequestData built = AssignmentsManager.buildRequestData(8, 100L, assignment);
|
||||
AssignReplicasToDirsRequestData expected = new AssignReplicasToDirsRequestData()
|
||||
.setBrokerId(8)
|
||||
.setBrokerEpoch(100L)
|
||||
.setDirectories(Arrays.asList(
|
||||
new AssignReplicasToDirsRequestData.DirectoryData()
|
||||
.setId(DIR_2)
|
||||
.setTopics(Arrays.asList(
|
||||
new AssignReplicasToDirsRequestData.TopicData()
|
||||
.setTopicId(TOPIC_1)
|
||||
.setPartitions(Collections.singletonList(
|
||||
new AssignReplicasToDirsRequestData.PartitionData()
|
||||
.setPartitionIndex(2))),
|
||||
new AssignReplicasToDirsRequestData.TopicData()
|
||||
.setTopicId(TOPIC_2)
|
||||
.setPartitions(Collections.singletonList(
|
||||
new AssignReplicasToDirsRequestData.PartitionData()
|
||||
.setPartitionIndex(5))))),
|
||||
new AssignReplicasToDirsRequestData.DirectoryData()
|
||||
.setId(DIR_3)
|
||||
.setTopics(Collections.singletonList(
|
||||
new AssignReplicasToDirsRequestData.TopicData()
|
||||
.setTopicId(TOPIC_1)
|
||||
.setPartitions(Collections.singletonList(
|
||||
new AssignReplicasToDirsRequestData.PartitionData()
|
||||
.setPartitionIndex(3))))),
|
||||
new AssignReplicasToDirsRequestData.DirectoryData()
|
||||
.setId(DIR_1)
|
||||
.setTopics(Collections.singletonList(
|
||||
new AssignReplicasToDirsRequestData.TopicData()
|
||||
.setTopicId(TOPIC_1)
|
||||
.setPartitions(Arrays.asList(
|
||||
new AssignReplicasToDirsRequestData.PartitionData()
|
||||
.setPartitionIndex(4),
|
||||
new AssignReplicasToDirsRequestData.PartitionData()
|
||||
.setPartitionIndex(1)))))));
|
||||
assertRequestEquals(expected, built);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAssignmentAggregation() throws InterruptedException {
|
||||
CountDownLatch readyToAssert = new CountDownLatch(1);
|
||||
|
@ -84,7 +155,8 @@ public class AssignmentsManagerTest {
|
|||
readyToAssert.countDown();
|
||||
}
|
||||
return null;
|
||||
}).when(channelManager).sendRequest(any(AssignReplicasToDirsRequest.Builder.class), any(ControllerRequestCompletionHandler.class));
|
||||
}).when(channelManager).sendRequest(any(AssignReplicasToDirsRequest.Builder.class),
|
||||
any(ControllerRequestCompletionHandler.class));
|
||||
|
||||
manager.onAssignment(new TopicIdPartition(TOPIC_1, 1), DIR_1, () -> { });
|
||||
manager.onAssignment(new TopicIdPartition(TOPIC_1, 2), DIR_2, () -> { });
|
||||
|
@ -96,19 +168,22 @@ public class AssignmentsManagerTest {
|
|||
manager.wakeup();
|
||||
}
|
||||
|
||||
ArgumentCaptor<AssignReplicasToDirsRequest.Builder> captor = ArgumentCaptor.forClass(AssignReplicasToDirsRequest.Builder.class);
|
||||
ArgumentCaptor<AssignReplicasToDirsRequest.Builder> captor =
|
||||
ArgumentCaptor.forClass(AssignReplicasToDirsRequest.Builder.class);
|
||||
verify(channelManager, times(1)).start();
|
||||
verify(channelManager).sendRequest(captor.capture(), any(ControllerRequestCompletionHandler.class));
|
||||
verify(channelManager, atMostOnce()).shutdown();
|
||||
verifyNoMoreInteractions(channelManager);
|
||||
assertEquals(1, captor.getAllValues().size());
|
||||
AssignReplicasToDirsRequestData actual = captor.getValue().build().data();
|
||||
AssignReplicasToDirsRequestData expected = buildRequestData(
|
||||
8, 100L, new HashMap<TopicIdPartition, Uuid>() {{
|
||||
put(new TopicIdPartition(TOPIC_1, 1), DIR_1);
|
||||
put(new TopicIdPartition(TOPIC_1, 2), DIR_2);
|
||||
put(new TopicIdPartition(TOPIC_1, 3), DIR_3);
|
||||
put(new TopicIdPartition(TOPIC_1, 4), DIR_1);
|
||||
put(new TopicIdPartition(TOPIC_2, 5), DIR_2);
|
||||
}}
|
||||
8, 100L, new HashMap<TopicIdPartition, Uuid>() {{
|
||||
put(new TopicIdPartition(TOPIC_1, 1), DIR_1);
|
||||
put(new TopicIdPartition(TOPIC_1, 2), DIR_2);
|
||||
put(new TopicIdPartition(TOPIC_1, 3), DIR_3);
|
||||
put(new TopicIdPartition(TOPIC_1, 4), DIR_1);
|
||||
put(new TopicIdPartition(TOPIC_2, 5), DIR_2);
|
||||
}}
|
||||
);
|
||||
assertRequestEquals(expected, actual);
|
||||
}
|
||||
|
@ -126,28 +201,38 @@ public class AssignmentsManagerTest {
|
|||
}
|
||||
if (readyToAssert.getCount() == 3) {
|
||||
invocation.getArgument(1, ControllerRequestCompletionHandler.class).onComplete(
|
||||
new ClientResponse(null, null, null, 0L, 0L, false, false,
|
||||
new UnsupportedVersionException("test unsupported version exception"), null, null)
|
||||
);
|
||||
manager.onAssignment(new TopicIdPartition(TOPIC_1, 3), Uuid.fromString("xHLCnG54R9W3lZxTPnpk1Q"), () -> { });
|
||||
new ClientResponse(null, null, null, 0L, 0L, false, false,
|
||||
new UnsupportedVersionException("test unsupported version exception"), null, null));
|
||||
|
||||
// duplicate should be ignored
|
||||
manager.onAssignment(new TopicIdPartition(TOPIC_1, 2), DIR_3, () -> { });
|
||||
|
||||
manager.onAssignment(new TopicIdPartition(TOPIC_1, 3),
|
||||
Uuid.fromString("xHLCnG54R9W3lZxTPnpk1Q"), () -> { });
|
||||
}
|
||||
if (readyToAssert.getCount() == 2) {
|
||||
invocation.getArgument(1, ControllerRequestCompletionHandler.class).onComplete(
|
||||
new ClientResponse(null, null, null, 0L, 0L, false, false, null,
|
||||
new AuthenticationException("test authentication exception"), null)
|
||||
);
|
||||
manager.onAssignment(new TopicIdPartition(TOPIC_1, 4), Uuid.fromString("RCYu1A0CTa6eEIpuKDOfxw"), () -> { });
|
||||
|
||||
// duplicate should be ignored
|
||||
manager.onAssignment(new TopicIdPartition(TOPIC_1, 3),
|
||||
Uuid.fromString("xHLCnG54R9W3lZxTPnpk1Q"), () -> { });
|
||||
|
||||
manager.onAssignment(new TopicIdPartition(TOPIC_1, 4),
|
||||
Uuid.fromString("RCYu1A0CTa6eEIpuKDOfxw"), () -> { });
|
||||
}
|
||||
if (readyToAssert.getCount() == 1) {
|
||||
invocation.getArgument(1, ControllerRequestCompletionHandler.class).onComplete(
|
||||
new ClientResponse(null, null, null, 0L, 0L, false, false, null, null,
|
||||
new AssignReplicasToDirsResponse(new AssignReplicasToDirsResponseData()
|
||||
.setErrorCode(Errors.NOT_CONTROLLER.code())
|
||||
.setThrottleTimeMs(0)))
|
||||
);
|
||||
new ClientResponse(null, null, null, 0L, 0L, false, false, null, null,
|
||||
new AssignReplicasToDirsResponse(new AssignReplicasToDirsResponseData()
|
||||
.setErrorCode(Errors.NOT_CONTROLLER.code())
|
||||
.setThrottleTimeMs(0))));
|
||||
}
|
||||
return null;
|
||||
}).when(channelManager).sendRequest(any(AssignReplicasToDirsRequest.Builder.class), any(ControllerRequestCompletionHandler.class));
|
||||
}).when(channelManager).sendRequest(any(AssignReplicasToDirsRequest.Builder.class),
|
||||
any(ControllerRequestCompletionHandler.class));
|
||||
|
||||
manager.onAssignment(new TopicIdPartition(TOPIC_1, 1), DIR_1, () -> { });
|
||||
while (!readyToAssert.await(1, TimeUnit.MILLISECONDS)) {
|
||||
|
@ -155,28 +240,32 @@ public class AssignmentsManagerTest {
|
|||
manager.wakeup();
|
||||
}
|
||||
|
||||
ArgumentCaptor<AssignReplicasToDirsRequest.Builder> captor = ArgumentCaptor.forClass(AssignReplicasToDirsRequest.Builder.class);
|
||||
verify(channelManager, times(5)).sendRequest(captor.capture(), any(ControllerRequestCompletionHandler.class));
|
||||
ArgumentCaptor<AssignReplicasToDirsRequest.Builder> captor =
|
||||
ArgumentCaptor.forClass(AssignReplicasToDirsRequest.Builder.class);
|
||||
verify(channelManager, times(1)).start();
|
||||
verify(channelManager, times(5)).sendRequest(captor.capture(),
|
||||
any(ControllerRequestCompletionHandler.class));
|
||||
verify(channelManager, atMostOnce()).shutdown();
|
||||
verifyNoMoreInteractions(channelManager);
|
||||
assertEquals(5, captor.getAllValues().size());
|
||||
assertRequestEquals(buildRequestData(
|
||||
8, 100L, new HashMap<TopicIdPartition, Uuid>() {{
|
||||
put(new TopicIdPartition(TOPIC_1, 1), DIR_1);
|
||||
}}
|
||||
8, 100L, new HashMap<TopicIdPartition, Uuid>() {{
|
||||
put(new TopicIdPartition(TOPIC_1, 1), DIR_1);
|
||||
}}
|
||||
), captor.getAllValues().get(0).build().data());
|
||||
assertRequestEquals(buildRequestData(
|
||||
8, 100L, new HashMap<TopicIdPartition, Uuid>() {{
|
||||
put(new TopicIdPartition(TOPIC_1, 1), DIR_1);
|
||||
put(new TopicIdPartition(TOPIC_1, 2), DIR_3);
|
||||
}}
|
||||
8, 100L, new HashMap<TopicIdPartition, Uuid>() {{
|
||||
put(new TopicIdPartition(TOPIC_1, 1), DIR_1);
|
||||
put(new TopicIdPartition(TOPIC_1, 2), DIR_3);
|
||||
}}
|
||||
), captor.getAllValues().get(1).build().data());
|
||||
assertRequestEquals(buildRequestData(
|
||||
8, 100L, new HashMap<TopicIdPartition, Uuid>() {{
|
||||
put(new TopicIdPartition(TOPIC_1, 1), DIR_1);
|
||||
put(new TopicIdPartition(TOPIC_1, 2), DIR_3);
|
||||
put(new TopicIdPartition(TOPIC_1, 3), Uuid.fromString("xHLCnG54R9W3lZxTPnpk1Q"));
|
||||
put(new TopicIdPartition(TOPIC_1, 4), Uuid.fromString("RCYu1A0CTa6eEIpuKDOfxw"));
|
||||
}}
|
||||
8, 100L, new HashMap<TopicIdPartition, Uuid>() {{
|
||||
put(new TopicIdPartition(TOPIC_1, 1), DIR_1);
|
||||
put(new TopicIdPartition(TOPIC_1, 2), DIR_3);
|
||||
put(new TopicIdPartition(TOPIC_1, 3), Uuid.fromString("xHLCnG54R9W3lZxTPnpk1Q"));
|
||||
put(new TopicIdPartition(TOPIC_1, 4), Uuid.fromString("RCYu1A0CTa6eEIpuKDOfxw"));
|
||||
}}
|
||||
), captor.getAllValues().get(4).build().data());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue