mirror of https://github.com/apache/kafka.git
KAFKA-8286; Generalized Leader Election Admin RPC (KIP-460) (#6686)
Implements KIP-460: https://cwiki.apache.org/confluence/display/KAFKA/KIP-460%3A+Admin+Leader+Election+RPC. Reviewers: Jun Rao <junrao@gmail.com>, Jason Gustafson <jason@confluent.io>
This commit is contained in:
parent
348e5d738f
commit
121308cc7a
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
exec $(dirname $0)/kafka-run-class.sh kafka.admin.LeaderElectionCommand "$@"
|
|
@ -0,0 +1,17 @@
|
|||
@echo off
|
||||
rem Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
rem contributor license agreements. See the NOTICE file distributed with
|
||||
rem this work for additional information regarding copyright ownership.
|
||||
rem The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
rem (the "License"); you may not use this file except in compliance with
|
||||
rem the License. You may obtain a copy of the License at
|
||||
rem
|
||||
rem http://www.apache.org/licenses/LICENSE-2.0
|
||||
rem
|
||||
rem Unless required by applicable law or agreed to in writing, software
|
||||
rem distributed under the License is distributed on an "AS IS" BASIS,
|
||||
rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
rem See the License for the specific language governing permissions and
|
||||
rem limitations under the License.
|
||||
|
||||
"%~dp0kafka-run-class.bat" kafka.admin.LeaderElectionCommand %*
|
|
@ -17,6 +17,14 @@
|
|||
|
||||
package org.apache.kafka.clients.admin;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.apache.kafka.common.ElectionType;
|
||||
import org.apache.kafka.common.Metric;
|
||||
import org.apache.kafka.common.MetricName;
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
|
@ -26,12 +34,6 @@ import org.apache.kafka.common.acl.AclBindingFilter;
|
|||
import org.apache.kafka.common.annotation.InterfaceStability;
|
||||
import org.apache.kafka.common.config.ConfigResource;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* The administrative client for Kafka, which supports managing and inspecting topics, brokers, configurations and ACLs.
|
||||
*
|
||||
|
@ -839,35 +841,75 @@ public abstract class AdminClient implements AutoCloseable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Elect the preferred broker of the given {@code partitions} as leader, or
|
||||
* elect the preferred broker for all partitions as leader if the argument to {@code partitions} is null.
|
||||
* Elect the preferred replica as leader for topic partitions.
|
||||
*
|
||||
* This is a convenience method for {@link #electPreferredLeaders(Collection, ElectPreferredLeadersOptions)}
|
||||
* with default options.
|
||||
* See the overload for more details.
|
||||
* This is a convenience method for {@link #electLeaders(ElectionType, Set, ElectLeadersOptions)}
|
||||
* with preferred election type and default options.
|
||||
*
|
||||
* This operation is supported by brokers with version 2.2.0 or higher.
|
||||
*
|
||||
* @param partitions The partitions for which the preferred leader should be elected.
|
||||
* @return The ElectPreferredLeadersResult.
|
||||
* @deprecated Since 2.4.0. Use {@link #electLeaders(ElectionType, Set)}.
|
||||
*/
|
||||
@Deprecated
|
||||
public ElectPreferredLeadersResult electPreferredLeaders(Collection<TopicPartition> partitions) {
|
||||
return electPreferredLeaders(partitions, new ElectPreferredLeadersOptions());
|
||||
}
|
||||
|
||||
/**
|
||||
* Elect the preferred broker of the given {@code partitions} as leader, or
|
||||
* elect the preferred broker for all partitions as leader if the argument to {@code partitions} is null.
|
||||
* Elect the preferred replica as leader for topic partitions.
|
||||
*
|
||||
* This operation is not transactional so it may succeed for some partitions while fail for others.
|
||||
*
|
||||
* It may take several seconds after this method returns
|
||||
* success for all the brokers in the cluster to become aware that the partitions have new leaders.
|
||||
* During this time, {@link AdminClient#describeTopics(Collection)}
|
||||
* may not return information about the partitions' new leaders.
|
||||
* This is a convenience method for {@link #electLeaders(ElectionType, Set, ElectLeadersOptions)}
|
||||
* with preferred election type.
|
||||
*
|
||||
* This operation is supported by brokers with version 2.2.0 or higher.
|
||||
*
|
||||
* <p>The following exceptions can be anticipated when calling {@code get()} on the futures obtained from
|
||||
* the returned {@code ElectPreferredLeadersResult}:</p>
|
||||
* @param partitions The partitions for which the preferred leader should be elected.
|
||||
* @param options The options to use when electing the preferred leaders.
|
||||
* @return The ElectPreferredLeadersResult.
|
||||
* @deprecated Since 2.4.0. Use {@link #electLeaders(ElectionType, Set, ElectLeadersOptions)}.
|
||||
*/
|
||||
@Deprecated
|
||||
public ElectPreferredLeadersResult electPreferredLeaders(Collection<TopicPartition> partitions,
|
||||
ElectPreferredLeadersOptions options) {
|
||||
final ElectLeadersOptions newOptions = new ElectLeadersOptions();
|
||||
newOptions.timeoutMs(options.timeoutMs());
|
||||
final Set<TopicPartition> topicPartitions = partitions == null ? null : new HashSet<>(partitions);
|
||||
|
||||
return new ElectPreferredLeadersResult(electLeaders(ElectionType.PREFERRED, topicPartitions, newOptions));
|
||||
}
|
||||
|
||||
/**
|
||||
* Elect a replica as leader for topic partitions.
|
||||
*
|
||||
* This is a convenience method for {@link #electLeaders(ElectionType, Set, ElectLeadersOptions)}
|
||||
* with default options.
|
||||
*
|
||||
* @param electionType The type of election to conduct.
|
||||
* @param partitions The topics and partitions for which to conduct elections.
|
||||
* @return The ElectLeadersResult.
|
||||
*/
|
||||
public ElectLeadersResult electLeaders(ElectionType electionType, Set<TopicPartition> partitions) {
|
||||
return electLeaders(electionType, partitions, new ElectLeadersOptions());
|
||||
}
|
||||
|
||||
/**
|
||||
* Elect a replica as leader for the given {@code partitions}, or for all partitions if the argumentl
|
||||
* to {@code partitions} is null.
|
||||
*
|
||||
* This operation is not transactional so it may succeed for some partitions while fail for others.
|
||||
*
|
||||
* It may take several seconds after this method returns success for all the brokers in the cluster
|
||||
* to become aware that the partitions have new leaders. During this time,
|
||||
* {@link AdminClient#describeTopics(Collection)} may not return information about the partitions'
|
||||
* new leaders.
|
||||
*
|
||||
* This operation is supported by brokers with version 2.2.0 or later if preferred eleciton is use;
|
||||
* otherwise the brokers most be 2.4.0 or higher.
|
||||
*
|
||||
* <p>The following exceptions can be anticipated when calling {@code get()} on the future obtained
|
||||
* from the returned {@code ElectLeadersResult}:</p>
|
||||
* <ul>
|
||||
* <li>{@link org.apache.kafka.common.errors.ClusterAuthorizationException}
|
||||
* if the authenticated user didn't have alter access to the cluster.</li>
|
||||
|
@ -883,12 +925,15 @@ public abstract class AdminClient implements AutoCloseable {
|
|||
* if the preferred leader was not alive or not in the ISR.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param partitions The partitions for which the preferred leader should be elected.
|
||||
* @param options The options to use when electing the preferred leaders.
|
||||
* @return The ElectPreferredLeadersResult.
|
||||
* @param electionType The type of election to conduct.
|
||||
* @param partitions The topics and partitions for which to conduct elections.
|
||||
* @param options The options to use when electing the leaders.
|
||||
* @return The ElectLeadersResult.
|
||||
*/
|
||||
public abstract ElectPreferredLeadersResult electPreferredLeaders(Collection<TopicPartition> partitions,
|
||||
ElectPreferredLeadersOptions options);
|
||||
public abstract ElectLeadersResult electLeaders(
|
||||
ElectionType electionType,
|
||||
Set<TopicPartition> partitions,
|
||||
ElectLeadersOptions options);
|
||||
|
||||
/**
|
||||
* Get the metrics kept by the adminClient
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kafka.clients.admin;
|
||||
|
||||
import org.apache.kafka.common.annotation.InterfaceStability;
|
||||
|
||||
/**
|
||||
* Options for {@link AdminClient#electLeaders(ElectionType, Set, ElectLeadersOptions)}.
|
||||
*
|
||||
* The API of this class is evolving, see {@link AdminClient} for details.
|
||||
*/
|
||||
@InterfaceStability.Evolving
|
||||
final public class ElectLeadersOptions extends AbstractOptions<ElectLeadersOptions> {
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kafka.clients.admin;
|
||||
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import org.apache.kafka.common.KafkaFuture;
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
import org.apache.kafka.common.annotation.InterfaceStability;
|
||||
import org.apache.kafka.common.internals.KafkaFutureImpl;
|
||||
|
||||
/**
|
||||
* The result of {@link AdminClient#electLeaders(ElectionType, Set, ElectLeadersOptions)}
|
||||
*
|
||||
* The API of this class is evolving, see {@link AdminClient} for details.
|
||||
*/
|
||||
@InterfaceStability.Evolving
|
||||
final public class ElectLeadersResult {
|
||||
private final KafkaFutureImpl<Map<TopicPartition, Optional<Throwable>>> electionFuture;
|
||||
|
||||
ElectLeadersResult(KafkaFutureImpl<Map<TopicPartition, Optional<Throwable>>> electionFuture) {
|
||||
this.electionFuture = electionFuture;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Get a future for the topic partitions for which a leader election was attempted.
|
||||
* If the election succeeded then the value for a topic partition will be the empty Optional.
|
||||
* Otherwise the election failed and the Optional will be set with the error.</p>
|
||||
*/
|
||||
public KafkaFuture<Map<TopicPartition, Optional<Throwable>>> partitions() {
|
||||
return electionFuture;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a future which succeeds if all the topic elections succeed.
|
||||
*/
|
||||
public KafkaFuture<Void> all() {
|
||||
final KafkaFutureImpl<Void> result = new KafkaFutureImpl<>();
|
||||
|
||||
partitions().whenComplete(
|
||||
new KafkaFuture.BiConsumer<Map<TopicPartition, Optional<Throwable>>, Throwable>() {
|
||||
@Override
|
||||
public void accept(Map<TopicPartition, Optional<Throwable>> topicPartitions, Throwable throwable) {
|
||||
if (throwable != null) {
|
||||
result.completeExceptionally(throwable);
|
||||
} else {
|
||||
for (Optional<Throwable> exception : topicPartitions.values()) {
|
||||
if (exception.isPresent()) {
|
||||
result.completeExceptionally(exception.get());
|
||||
return;
|
||||
}
|
||||
}
|
||||
result.complete(null);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -18,14 +18,16 @@
|
|||
package org.apache.kafka.clients.admin;
|
||||
|
||||
import org.apache.kafka.common.annotation.InterfaceStability;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Options for {@link AdminClient#electPreferredLeaders(Collection, ElectPreferredLeadersOptions)}.
|
||||
*
|
||||
* The API of this class is evolving, see {@link AdminClient} for details.
|
||||
*
|
||||
* @deprecated Since 2.4.0. Use {@link AdminClient#electLeaders(ElectionType, Set, ElectLeadersOptions)}.
|
||||
*/
|
||||
@InterfaceStability.Evolving
|
||||
@Deprecated
|
||||
public class ElectPreferredLeadersOptions extends AbstractOptions<ElectPreferredLeadersOptions> {
|
||||
}
|
||||
|
|
|
@ -17,33 +17,31 @@
|
|||
|
||||
package org.apache.kafka.clients.admin;
|
||||
|
||||
import org.apache.kafka.common.KafkaFuture;
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
import org.apache.kafka.common.annotation.InterfaceStability;
|
||||
import org.apache.kafka.common.errors.ApiException;
|
||||
import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;
|
||||
import org.apache.kafka.common.internals.KafkaFutureImpl;
|
||||
import org.apache.kafka.common.protocol.Errors;
|
||||
import org.apache.kafka.common.requests.ApiError;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import org.apache.kafka.common.KafkaFuture;
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
import org.apache.kafka.common.annotation.InterfaceStability;
|
||||
import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;
|
||||
import org.apache.kafka.common.internals.KafkaFutureImpl;
|
||||
|
||||
/**
|
||||
* The result of {@link AdminClient#electPreferredLeaders(Collection, ElectPreferredLeadersOptions)}
|
||||
*
|
||||
* The API of this class is evolving, see {@link AdminClient} for details.
|
||||
*
|
||||
* @deprecated Since 2.4.0. Use {@link AdminClient#electLeaders(ElectionType, Set, ElectLeadersOptions)}.
|
||||
*/
|
||||
@InterfaceStability.Evolving
|
||||
@Deprecated
|
||||
public class ElectPreferredLeadersResult {
|
||||
private final ElectLeadersResult electionResult;
|
||||
|
||||
private final KafkaFutureImpl<Map<TopicPartition, ApiError>> electionFuture;
|
||||
private final Set<TopicPartition> partitions;
|
||||
|
||||
ElectPreferredLeadersResult(KafkaFutureImpl<Map<TopicPartition, ApiError>> electionFuture, Set<TopicPartition> partitions) {
|
||||
this.electionFuture = electionFuture;
|
||||
this.partitions = partitions;
|
||||
ElectPreferredLeadersResult(ElectLeadersResult electionResult) {
|
||||
this.electionResult = electionResult;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -53,30 +51,28 @@ public class ElectPreferredLeadersResult {
|
|||
*/
|
||||
public KafkaFuture<Void> partitionResult(final TopicPartition partition) {
|
||||
final KafkaFutureImpl<Void> result = new KafkaFutureImpl<>();
|
||||
electionFuture.whenComplete(new KafkaFuture.BiConsumer<Map<TopicPartition, ApiError>, Throwable>() {
|
||||
@Override
|
||||
public void accept(Map<TopicPartition, ApiError> topicPartitions, Throwable throwable) {
|
||||
if (throwable != null) {
|
||||
result.completeExceptionally(throwable);
|
||||
} else if (!topicPartitions.containsKey(partition)) {
|
||||
result.completeExceptionally(new UnknownTopicOrPartitionException(
|
||||
"Preferred leader election for partition \"" + partition +
|
||||
"\" was not attempted"));
|
||||
} else if (partitions == null && topicPartitions.isEmpty()) {
|
||||
// If partitions is null, we requested information about all partitions. In
|
||||
// that case, if topicPartitions is empty, that indicates a
|
||||
// CLUSTER_AUTHORIZATION_FAILED error.
|
||||
result.completeExceptionally(Errors.CLUSTER_AUTHORIZATION_FAILED.exception());
|
||||
} else {
|
||||
ApiException exception = topicPartitions.get(partition).exception();
|
||||
if (exception == null) {
|
||||
result.complete(null);
|
||||
} else {
|
||||
result.completeExceptionally(exception);
|
||||
|
||||
electionResult.partitions().whenComplete(
|
||||
new KafkaFuture.BiConsumer<Map<TopicPartition, Optional<Throwable>>, Throwable>() {
|
||||
@Override
|
||||
public void accept(Map<TopicPartition, Optional<Throwable>> topicPartitions, Throwable throwable) {
|
||||
if (throwable != null) {
|
||||
result.completeExceptionally(throwable);
|
||||
} else if (!topicPartitions.containsKey(partition)) {
|
||||
result.completeExceptionally(new UnknownTopicOrPartitionException(
|
||||
"Preferred leader election for partition \"" + partition +
|
||||
"\" was not attempted"));
|
||||
} else {
|
||||
Optional<Throwable> exception = topicPartitions.get(partition);
|
||||
if (exception.isPresent()) {
|
||||
result.completeExceptionally(exception.get());
|
||||
} else {
|
||||
result.complete(null);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -90,49 +86,27 @@ public class ElectPreferredLeadersResult {
|
|||
* with a null {@code partitions} argument.</p>
|
||||
*/
|
||||
public KafkaFuture<Set<TopicPartition>> partitions() {
|
||||
if (partitions != null) {
|
||||
return KafkaFutureImpl.completedFuture(this.partitions);
|
||||
} else {
|
||||
final KafkaFutureImpl<Set<TopicPartition>> result = new KafkaFutureImpl<>();
|
||||
electionFuture.whenComplete(new KafkaFuture.BiConsumer<Map<TopicPartition, ApiError>, Throwable>() {
|
||||
@Override
|
||||
public void accept(Map<TopicPartition, ApiError> topicPartitions, Throwable throwable) {
|
||||
if (throwable != null) {
|
||||
result.completeExceptionally(throwable);
|
||||
} else if (topicPartitions.isEmpty()) {
|
||||
result.completeExceptionally(Errors.CLUSTER_AUTHORIZATION_FAILED.exception());
|
||||
} else {
|
||||
for (ApiError apiError : topicPartitions.values()) {
|
||||
if (apiError.isFailure()) {
|
||||
result.completeExceptionally(apiError.exception());
|
||||
}
|
||||
final KafkaFutureImpl<Set<TopicPartition>> result = new KafkaFutureImpl<>();
|
||||
|
||||
electionResult.partitions().whenComplete(
|
||||
new KafkaFuture.BiConsumer<Map<TopicPartition, Optional<Throwable>>, Throwable>() {
|
||||
@Override
|
||||
public void accept(Map<TopicPartition, Optional<Throwable>> topicPartitions, Throwable throwable) {
|
||||
if (throwable != null) {
|
||||
result.completeExceptionally(throwable);
|
||||
} else {
|
||||
result.complete(topicPartitions.keySet());
|
||||
}
|
||||
result.complete(topicPartitions.keySet());
|
||||
}
|
||||
}
|
||||
});
|
||||
return result;
|
||||
}
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a future which succeeds if all the topic elections succeed.
|
||||
*/
|
||||
public KafkaFuture<Void> all() {
|
||||
final KafkaFutureImpl<Void> result = new KafkaFutureImpl<>();
|
||||
electionFuture.thenApply(new KafkaFuture.Function<Map<TopicPartition, ApiError>, Void>() {
|
||||
@Override
|
||||
public Void apply(Map<TopicPartition, ApiError> topicPartitions) {
|
||||
for (ApiError apiError : topicPartitions.values()) {
|
||||
if (apiError.isFailure()) {
|
||||
result.completeExceptionally(apiError.exception());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
result.complete(null);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
return result;
|
||||
return electionResult.all();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.kafka.clients.consumer.internals.ConsumerProtocol;
|
|||
import org.apache.kafka.clients.consumer.internals.PartitionAssignor;
|
||||
import org.apache.kafka.common.Cluster;
|
||||
import org.apache.kafka.common.ConsumerGroupState;
|
||||
import org.apache.kafka.common.ElectionType;
|
||||
import org.apache.kafka.common.KafkaException;
|
||||
import org.apache.kafka.common.KafkaFuture;
|
||||
import org.apache.kafka.common.Metric;
|
||||
|
@ -89,22 +90,21 @@ import org.apache.kafka.common.requests.AlterConfigsResponse;
|
|||
import org.apache.kafka.common.requests.AlterReplicaLogDirsRequest;
|
||||
import org.apache.kafka.common.requests.AlterReplicaLogDirsResponse;
|
||||
import org.apache.kafka.common.requests.ApiError;
|
||||
import org.apache.kafka.common.requests.CreateAclsRequest;
|
||||
import org.apache.kafka.common.requests.CreateAclsRequest.AclCreation;
|
||||
import org.apache.kafka.common.requests.CreateAclsResponse;
|
||||
import org.apache.kafka.common.requests.CreateAclsRequest;
|
||||
import org.apache.kafka.common.requests.CreateAclsResponse.AclCreationResponse;
|
||||
import org.apache.kafka.common.requests.CreateAclsResponse;
|
||||
import org.apache.kafka.common.requests.CreateDelegationTokenRequest;
|
||||
import org.apache.kafka.common.requests.CreateDelegationTokenResponse;
|
||||
import org.apache.kafka.common.requests.CreatePartitionsRequest;
|
||||
import org.apache.kafka.common.requests.CreatePartitionsRequest.PartitionDetails;
|
||||
import org.apache.kafka.common.requests.CreatePartitionsRequest;
|
||||
import org.apache.kafka.common.requests.CreatePartitionsResponse;
|
||||
import org.apache.kafka.common.requests.CreateTopicsRequest;
|
||||
import org.apache.kafka.common.requests.CreateTopicsResponse;
|
||||
import org.apache.kafka.common.requests.DeleteAclsRequest;
|
||||
import org.apache.kafka.common.requests.DeleteAclsResponse;
|
||||
import org.apache.kafka.common.requests.DeleteAclsResponse.AclDeletionResult;
|
||||
import org.apache.kafka.common.requests.DeleteAclsResponse.AclFilterResponse;
|
||||
import org.apache.kafka.common.requests.FindCoordinatorRequest.CoordinatorType;
|
||||
import org.apache.kafka.common.requests.DeleteAclsResponse;
|
||||
import org.apache.kafka.common.requests.DeleteGroupsRequest;
|
||||
import org.apache.kafka.common.requests.DeleteGroupsResponse;
|
||||
import org.apache.kafka.common.requests.DeleteRecordsRequest;
|
||||
|
@ -121,10 +121,11 @@ import org.apache.kafka.common.requests.DescribeGroupsRequest;
|
|||
import org.apache.kafka.common.requests.DescribeGroupsResponse;
|
||||
import org.apache.kafka.common.requests.DescribeLogDirsRequest;
|
||||
import org.apache.kafka.common.requests.DescribeLogDirsResponse;
|
||||
import org.apache.kafka.common.requests.ElectPreferredLeadersRequest;
|
||||
import org.apache.kafka.common.requests.ElectPreferredLeadersResponse;
|
||||
import org.apache.kafka.common.requests.ElectLeadersRequest;
|
||||
import org.apache.kafka.common.requests.ElectLeadersResponse;
|
||||
import org.apache.kafka.common.requests.ExpireDelegationTokenRequest;
|
||||
import org.apache.kafka.common.requests.ExpireDelegationTokenResponse;
|
||||
import org.apache.kafka.common.requests.FindCoordinatorRequest.CoordinatorType;
|
||||
import org.apache.kafka.common.requests.FindCoordinatorRequest;
|
||||
import org.apache.kafka.common.requests.FindCoordinatorResponse;
|
||||
import org.apache.kafka.common.requests.IncrementalAlterConfigsRequest;
|
||||
|
@ -3016,25 +3017,33 @@ public class KafkaAdminClient extends AdminClient {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ElectPreferredLeadersResult electPreferredLeaders(final Collection<TopicPartition> partitions,
|
||||
ElectPreferredLeadersOptions options) {
|
||||
final Set<TopicPartition> partitionSet = partitions != null ? new HashSet<>(partitions) : null;
|
||||
final KafkaFutureImpl<Map<TopicPartition, ApiError>> electionFuture = new KafkaFutureImpl<>();
|
||||
public ElectLeadersResult electLeaders(
|
||||
final ElectionType electionType,
|
||||
final Set<TopicPartition> topicPartitions,
|
||||
ElectLeadersOptions options) {
|
||||
final KafkaFutureImpl<Map<TopicPartition, Optional<Throwable>>> electionFuture = new KafkaFutureImpl<>();
|
||||
final long now = time.milliseconds();
|
||||
runnable.call(new Call("electPreferredLeaders", calcDeadlineMs(now, options.timeoutMs()),
|
||||
runnable.call(new Call("electLeaders", calcDeadlineMs(now, options.timeoutMs()),
|
||||
new ControllerNodeProvider()) {
|
||||
|
||||
@Override
|
||||
public AbstractRequest.Builder createRequest(int timeoutMs) {
|
||||
return new ElectPreferredLeadersRequest.Builder(
|
||||
ElectPreferredLeadersRequest.toRequestData(partitions, timeoutMs));
|
||||
return new ElectLeadersRequest.Builder(electionType, topicPartitions, timeoutMs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleResponse(AbstractResponse abstractResponse) {
|
||||
ElectPreferredLeadersResponse response = (ElectPreferredLeadersResponse) abstractResponse;
|
||||
electionFuture.complete(
|
||||
ElectPreferredLeadersRequest.fromResponseData(response.data()));
|
||||
ElectLeadersResponse response = (ElectLeadersResponse) abstractResponse;
|
||||
Map<TopicPartition, Optional<Throwable>> result = ElectLeadersResponse.electLeadersResult(response.data());
|
||||
|
||||
// For version == 0 then errorCode would be 0 which maps to Errors.NONE
|
||||
Errors error = Errors.forCode(response.data().errorCode());
|
||||
if (error != Errors.NONE) {
|
||||
electionFuture.completeExceptionally(error.exception());
|
||||
return;
|
||||
}
|
||||
|
||||
electionFuture.complete(result);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -3042,6 +3051,7 @@ public class KafkaAdminClient extends AdminClient {
|
|||
electionFuture.completeExceptionally(throwable);
|
||||
}
|
||||
}, now);
|
||||
return new ElectPreferredLeadersResult(electionFuture, partitionSet);
|
||||
|
||||
return new ElectLeadersResult(electionFuture);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kafka.common;
|
||||
|
||||
import java.util.Arrays;
|
||||
import org.apache.kafka.common.annotation.InterfaceStability;
|
||||
|
||||
/**
|
||||
* Options for {@link org.apache.kafka.clients.admin.AdminClient#electLeaders(ElectionType, Set, ElectLeadersOptions)}.
|
||||
*
|
||||
* The API of this class is evolving, see {@link org.apache.kafka.clients.admin.AdminClient} for details.
|
||||
*/
|
||||
@InterfaceStability.Evolving
|
||||
public enum ElectionType {
|
||||
PREFERRED((byte) 0), UNCLEAN((byte) 1);
|
||||
|
||||
public final byte value;
|
||||
|
||||
ElectionType(byte value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public static ElectionType valueOf(byte value) {
|
||||
if (value == PREFERRED.value) {
|
||||
return PREFERRED;
|
||||
} else if (value == UNCLEAN.value) {
|
||||
return UNCLEAN;
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
String.format("Value %s must be one of %s", value, Arrays.asList(ElectionType.values())));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.common.errors;
|
||||
|
||||
public class ElectionNotNeededException extends InvalidMetadataException {
|
||||
|
||||
public ElectionNotNeededException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public ElectionNotNeededException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.common.errors;
|
||||
|
||||
public class EligibleLeadersNotAvailableException extends InvalidMetadataException {
|
||||
|
||||
public EligibleLeadersNotAvailableException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public EligibleLeadersNotAvailableException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
}
|
|
@ -24,8 +24,8 @@ import org.apache.kafka.common.message.DeleteTopicsRequestData;
|
|||
import org.apache.kafka.common.message.DeleteTopicsResponseData;
|
||||
import org.apache.kafka.common.message.DescribeGroupsRequestData;
|
||||
import org.apache.kafka.common.message.DescribeGroupsResponseData;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersRequestData;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersResponseData;
|
||||
import org.apache.kafka.common.message.ElectLeadersRequestData;
|
||||
import org.apache.kafka.common.message.ElectLeadersResponseData;
|
||||
import org.apache.kafka.common.message.FindCoordinatorRequestData;
|
||||
import org.apache.kafka.common.message.FindCoordinatorResponseData;
|
||||
import org.apache.kafka.common.message.HeartbeatRequestData;
|
||||
|
@ -190,8 +190,8 @@ public enum ApiKeys {
|
|||
EXPIRE_DELEGATION_TOKEN(40, "ExpireDelegationToken", ExpireDelegationTokenRequest.schemaVersions(), ExpireDelegationTokenResponse.schemaVersions()),
|
||||
DESCRIBE_DELEGATION_TOKEN(41, "DescribeDelegationToken", DescribeDelegationTokenRequest.schemaVersions(), DescribeDelegationTokenResponse.schemaVersions()),
|
||||
DELETE_GROUPS(42, "DeleteGroups", DeleteGroupsRequest.schemaVersions(), DeleteGroupsResponse.schemaVersions()),
|
||||
ELECT_PREFERRED_LEADERS(43, "ElectPreferredLeaders", ElectPreferredLeadersRequestData.SCHEMAS,
|
||||
ElectPreferredLeadersResponseData.SCHEMAS),
|
||||
ELECT_LEADERS(43, "ElectLeaders", ElectLeadersRequestData.SCHEMAS,
|
||||
ElectLeadersResponseData.SCHEMAS),
|
||||
INCREMENTAL_ALTER_CONFIGS(44, "IncrementalAlterConfigs", IncrementalAlterConfigsRequestData.SCHEMAS,
|
||||
IncrementalAlterConfigsResponseData.SCHEMAS);
|
||||
|
||||
|
|
|
@ -62,6 +62,8 @@ import org.apache.kafka.common.errors.LeaderNotAvailableException;
|
|||
import org.apache.kafka.common.errors.LogDirNotFoundException;
|
||||
import org.apache.kafka.common.errors.FencedInstanceIdException;
|
||||
import org.apache.kafka.common.errors.MemberIdRequiredException;
|
||||
import org.apache.kafka.common.errors.ElectionNotNeededException;
|
||||
import org.apache.kafka.common.errors.EligibleLeadersNotAvailableException;
|
||||
import org.apache.kafka.common.errors.NetworkException;
|
||||
import org.apache.kafka.common.errors.NotControllerException;
|
||||
import org.apache.kafka.common.errors.NotCoordinatorException;
|
||||
|
@ -306,7 +308,10 @@ public enum Errors {
|
|||
GROUP_MAX_SIZE_REACHED(81, "The consumer group has reached its max size.", GroupMaxSizeReachedException::new),
|
||||
FENCED_INSTANCE_ID(82, "The broker rejected this static consumer since " +
|
||||
"another consumer with the same group.instance.id has registered with a different member.id.",
|
||||
FencedInstanceIdException::new);
|
||||
FencedInstanceIdException::new),
|
||||
ELIGIBLE_LEADERS_NOT_AVAILABLE(83, "Eligible topic partition leaders are not available",
|
||||
EligibleLeadersNotAvailableException::new),
|
||||
ELECTION_NOT_NEEDED(84, "Leader election not needed for topic partition", ElectionNotNeededException::new);
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(Errors.class);
|
||||
|
||||
|
|
|
@ -229,8 +229,8 @@ public abstract class AbstractRequest extends AbstractRequestResponse {
|
|||
return new DescribeDelegationTokenRequest(struct, apiVersion);
|
||||
case DELETE_GROUPS:
|
||||
return new DeleteGroupsRequest(struct, apiVersion);
|
||||
case ELECT_PREFERRED_LEADERS:
|
||||
return new ElectPreferredLeadersRequest(struct, apiVersion);
|
||||
case ELECT_LEADERS:
|
||||
return new ElectLeadersRequest(struct, apiVersion);
|
||||
case INCREMENTAL_ALTER_CONFIGS:
|
||||
return new IncrementalAlterConfigsRequest(struct, apiVersion);
|
||||
default:
|
||||
|
|
|
@ -156,8 +156,8 @@ public abstract class AbstractResponse extends AbstractRequestResponse {
|
|||
return new DescribeDelegationTokenResponse(struct);
|
||||
case DELETE_GROUPS:
|
||||
return new DeleteGroupsResponse(struct);
|
||||
case ELECT_PREFERRED_LEADERS:
|
||||
return new ElectPreferredLeadersResponse(struct, version);
|
||||
case ELECT_LEADERS:
|
||||
return new ElectLeadersResponse(struct, version);
|
||||
case INCREMENTAL_ALTER_CONFIGS:
|
||||
return new IncrementalAlterConfigsResponse(struct, version);
|
||||
default:
|
||||
|
|
|
@ -50,6 +50,10 @@ public class ApiError {
|
|||
message = struct.getOrElse(ERROR_MESSAGE, null);
|
||||
}
|
||||
|
||||
public ApiError(Errors error) {
|
||||
this(error, error.message());
|
||||
}
|
||||
|
||||
public ApiError(Errors error, String message) {
|
||||
this.error = error;
|
||||
this.message = message;
|
||||
|
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kafka.common.requests;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.kafka.common.ElectionType;
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
import org.apache.kafka.common.errors.UnsupportedVersionException;
|
||||
import org.apache.kafka.common.message.ElectLeadersRequestData.TopicPartitions;
|
||||
import org.apache.kafka.common.message.ElectLeadersRequestData;
|
||||
import org.apache.kafka.common.message.ElectLeadersResponseData.PartitionResult;
|
||||
import org.apache.kafka.common.message.ElectLeadersResponseData.ReplicaElectionResult;
|
||||
import org.apache.kafka.common.protocol.ApiKeys;
|
||||
import org.apache.kafka.common.protocol.MessageUtil;
|
||||
import org.apache.kafka.common.protocol.types.Struct;
|
||||
import org.apache.kafka.common.utils.CollectionUtils;
|
||||
|
||||
public class ElectLeadersRequest extends AbstractRequest {
|
||||
public static class Builder extends AbstractRequest.Builder<ElectLeadersRequest> {
|
||||
private final ElectionType electionType;
|
||||
private final Collection<TopicPartition> topicPartitions;
|
||||
private final int timeoutMs;
|
||||
|
||||
public Builder(ElectionType electionType, Collection<TopicPartition> topicPartitions, int timeoutMs) {
|
||||
super(ApiKeys.ELECT_LEADERS);
|
||||
this.electionType = electionType;
|
||||
this.topicPartitions = topicPartitions;
|
||||
this.timeoutMs = timeoutMs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ElectLeadersRequest build(short version) {
|
||||
return new ElectLeadersRequest(toRequestData(version), version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ElectLeadersRequest("
|
||||
+ "electionType=" + electionType
|
||||
+ ", topicPartitions=" + ((topicPartitions == null) ? "null" : MessageUtil.deepToString(topicPartitions.iterator()))
|
||||
+ ", timeoutMs=" + timeoutMs
|
||||
+ ")";
|
||||
}
|
||||
|
||||
private ElectLeadersRequestData toRequestData(short version) {
|
||||
if (electionType != ElectionType.PREFERRED && version == 0) {
|
||||
throw new UnsupportedVersionException("API Version 0 only supports PREFERRED election type");
|
||||
}
|
||||
|
||||
ElectLeadersRequestData data = new ElectLeadersRequestData()
|
||||
.setTimeoutMs(timeoutMs);
|
||||
|
||||
if (topicPartitions != null) {
|
||||
for (Map.Entry<String, List<Integer>> tp : CollectionUtils.groupPartitionsByTopic(topicPartitions).entrySet()) {
|
||||
data.topicPartitions().add(new ElectLeadersRequestData.TopicPartitions().setTopic(tp.getKey()).setPartitionId(tp.getValue()));
|
||||
}
|
||||
} else {
|
||||
data.setTopicPartitions(null);
|
||||
}
|
||||
|
||||
data.setElectionType(electionType.value);
|
||||
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
private final ElectLeadersRequestData data;
|
||||
|
||||
private ElectLeadersRequest(ElectLeadersRequestData data, short version) {
|
||||
super(ApiKeys.ELECT_LEADERS, version);
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public ElectLeadersRequest(Struct struct, short version) {
|
||||
super(ApiKeys.ELECT_LEADERS, version);
|
||||
this.data = new ElectLeadersRequestData(struct, version);
|
||||
}
|
||||
|
||||
public ElectLeadersRequestData data() {
|
||||
return data;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractResponse getErrorResponse(int throttleTimeMs, Throwable e) {
|
||||
ApiError apiError = ApiError.fromThrowable(e);
|
||||
List<ReplicaElectionResult> electionResults = new ArrayList<>();
|
||||
|
||||
for (TopicPartitions topic : data.topicPartitions()) {
|
||||
ReplicaElectionResult electionResult = new ReplicaElectionResult();
|
||||
|
||||
electionResult.setTopic(topic.topic());
|
||||
for (Integer partitionId : topic.partitionId()) {
|
||||
PartitionResult partitionResult = new PartitionResult();
|
||||
partitionResult.setPartitionId(partitionId);
|
||||
partitionResult.setErrorCode(apiError.error().code());
|
||||
partitionResult.setErrorMessage(apiError.message());
|
||||
|
||||
electionResult.partitionResult().add(partitionResult);
|
||||
}
|
||||
|
||||
electionResults.add(electionResult);
|
||||
}
|
||||
|
||||
return new ElectLeadersResponse(throttleTimeMs, apiError.error().code(), electionResults, version());
|
||||
}
|
||||
|
||||
public static ElectLeadersRequest parse(ByteBuffer buffer, short version) {
|
||||
return new ElectLeadersRequest(ApiKeys.ELECT_LEADERS.parseRequest(version, buffer), version);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Struct toStruct() {
|
||||
return data.toStruct(version());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kafka.common.requests;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
import org.apache.kafka.common.message.ElectLeadersResponseData.PartitionResult;
|
||||
import org.apache.kafka.common.message.ElectLeadersResponseData.ReplicaElectionResult;
|
||||
import org.apache.kafka.common.message.ElectLeadersResponseData;
|
||||
import org.apache.kafka.common.protocol.ApiKeys;
|
||||
import org.apache.kafka.common.protocol.Errors;
|
||||
import org.apache.kafka.common.protocol.types.Struct;
|
||||
|
||||
public class ElectLeadersResponse extends AbstractResponse {
|
||||
|
||||
private final short version;
|
||||
private final ElectLeadersResponseData data;
|
||||
|
||||
public ElectLeadersResponse(Struct struct) {
|
||||
this(struct, ApiKeys.ELECT_LEADERS.latestVersion());
|
||||
}
|
||||
|
||||
public ElectLeadersResponse(Struct struct, short version) {
|
||||
this.version = version;
|
||||
this.data = new ElectLeadersResponseData(struct, version);
|
||||
}
|
||||
|
||||
public ElectLeadersResponse(
|
||||
int throttleTimeMs,
|
||||
short errorCode,
|
||||
List<ReplicaElectionResult> electionResults) {
|
||||
this(throttleTimeMs, errorCode, electionResults, ApiKeys.ELECT_LEADERS.latestVersion());
|
||||
}
|
||||
|
||||
public ElectLeadersResponse(
|
||||
int throttleTimeMs,
|
||||
short errorCode,
|
||||
List<ReplicaElectionResult> electionResults,
|
||||
short version) {
|
||||
|
||||
this.version = version;
|
||||
this.data = new ElectLeadersResponseData();
|
||||
|
||||
data.setThrottleTimeMs(throttleTimeMs);
|
||||
|
||||
if (version >= 1) {
|
||||
data.setErrorCode(errorCode);
|
||||
}
|
||||
|
||||
data.setReplicaElectionResults(electionResults);
|
||||
}
|
||||
|
||||
public ElectLeadersResponseData data() {
|
||||
return data;
|
||||
}
|
||||
|
||||
public short version() {
|
||||
return version;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Struct toStruct(short version) {
|
||||
return data.toStruct(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int throttleTimeMs() {
|
||||
return data.throttleTimeMs();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<Errors, Integer> errorCounts() {
|
||||
HashMap<Errors, Integer> counts = new HashMap<>();
|
||||
for (ReplicaElectionResult result : data.replicaElectionResults()) {
|
||||
for (PartitionResult partitionResult : result.partitionResult()) {
|
||||
Errors error = Errors.forCode(partitionResult.errorCode());
|
||||
counts.put(error, counts.getOrDefault(error, 0) + 1);
|
||||
}
|
||||
}
|
||||
return counts;
|
||||
}
|
||||
|
||||
public static ElectLeadersResponse parse(ByteBuffer buffer, short version) {
|
||||
return new ElectLeadersResponse(ApiKeys.ELECT_LEADERS.responseSchema(version).read(buffer), version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean shouldClientThrottle(short version) {
|
||||
return true;
|
||||
}
|
||||
|
||||
public static Map<TopicPartition, Optional<Throwable>> electLeadersResult(ElectLeadersResponseData data) {
|
||||
Map<TopicPartition, Optional<Throwable>> map = new HashMap<>();
|
||||
|
||||
for (ElectLeadersResponseData.ReplicaElectionResult topicResults : data.replicaElectionResults()) {
|
||||
for (ElectLeadersResponseData.PartitionResult partitionResult : topicResults.partitionResult()) {
|
||||
Optional<Throwable> value = Optional.empty();
|
||||
Errors error = Errors.forCode(partitionResult.errorCode());
|
||||
if (error != Errors.NONE) {
|
||||
value = Optional.of(error.exception(partitionResult.errorMessage()));
|
||||
}
|
||||
|
||||
map.put(new TopicPartition(topicResults.topic(), partitionResult.partitionId()),
|
||||
value);
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
}
|
|
@ -1,129 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kafka.common.requests;
|
||||
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersRequestData;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersRequestData.TopicPartitions;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersResponseData;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersResponseData.ReplicaElectionResult;
|
||||
import org.apache.kafka.common.protocol.ApiKeys;
|
||||
import org.apache.kafka.common.protocol.Errors;
|
||||
import org.apache.kafka.common.protocol.types.Struct;
|
||||
import org.apache.kafka.common.utils.CollectionUtils;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class ElectPreferredLeadersRequest extends AbstractRequest {
|
||||
public static class Builder extends AbstractRequest.Builder<ElectPreferredLeadersRequest> {
|
||||
private final ElectPreferredLeadersRequestData data;
|
||||
|
||||
public Builder(ElectPreferredLeadersRequestData data) {
|
||||
super(ApiKeys.ELECT_PREFERRED_LEADERS);
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ElectPreferredLeadersRequest build(short version) {
|
||||
return new ElectPreferredLeadersRequest(data, version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return data.toString();
|
||||
}
|
||||
}
|
||||
|
||||
public static ElectPreferredLeadersRequestData toRequestData(Collection<TopicPartition> partitions, int timeoutMs) {
|
||||
ElectPreferredLeadersRequestData d = new ElectPreferredLeadersRequestData()
|
||||
.setTimeoutMs(timeoutMs);
|
||||
if (partitions != null) {
|
||||
for (Map.Entry<String, List<Integer>> tp : CollectionUtils.groupPartitionsByTopic(partitions).entrySet()) {
|
||||
d.topicPartitions().add(new ElectPreferredLeadersRequestData.TopicPartitions().setTopic(tp.getKey()).setPartitionId(tp.getValue()));
|
||||
}
|
||||
} else {
|
||||
d.setTopicPartitions(null);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
public static Map<TopicPartition, ApiError> fromResponseData(ElectPreferredLeadersResponseData data) {
|
||||
Map<TopicPartition, ApiError> map = new HashMap<>();
|
||||
for (ElectPreferredLeadersResponseData.ReplicaElectionResult topicResults : data.replicaElectionResults()) {
|
||||
for (ElectPreferredLeadersResponseData.PartitionResult partitionResult : topicResults.partitionResult()) {
|
||||
map.put(new TopicPartition(topicResults.topic(), partitionResult.partitionId()),
|
||||
new ApiError(Errors.forCode(partitionResult.errorCode()),
|
||||
partitionResult.errorMessage()));
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
private final ElectPreferredLeadersRequestData data;
|
||||
private final short version;
|
||||
|
||||
private ElectPreferredLeadersRequest(ElectPreferredLeadersRequestData data, short version) {
|
||||
super(ApiKeys.ELECT_PREFERRED_LEADERS, version);
|
||||
this.data = data;
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
public ElectPreferredLeadersRequest(Struct struct, short version) {
|
||||
super(ApiKeys.ELECT_PREFERRED_LEADERS, version);
|
||||
this.data = new ElectPreferredLeadersRequestData(struct, version);
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
public ElectPreferredLeadersRequestData data() {
|
||||
return data;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractResponse getErrorResponse(int throttleTimeMs, Throwable e) {
|
||||
ElectPreferredLeadersResponseData response = new ElectPreferredLeadersResponseData();
|
||||
response.setThrottleTimeMs(throttleTimeMs);
|
||||
ApiError apiError = ApiError.fromThrowable(e);
|
||||
for (TopicPartitions topic : data.topicPartitions()) {
|
||||
ReplicaElectionResult electionResult = new ReplicaElectionResult().setTopic(topic.topic());
|
||||
for (Integer partitionId : topic.partitionId()) {
|
||||
electionResult.partitionResult().add(new ElectPreferredLeadersResponseData.PartitionResult()
|
||||
.setPartitionId(partitionId)
|
||||
.setErrorCode(apiError.error().code())
|
||||
.setErrorMessage(apiError.message()));
|
||||
}
|
||||
response.replicaElectionResults().add(electionResult);
|
||||
}
|
||||
return new ElectPreferredLeadersResponse(response);
|
||||
}
|
||||
|
||||
public static ElectPreferredLeadersRequest parse(ByteBuffer buffer, short version) {
|
||||
return new ElectPreferredLeadersRequest(ApiKeys.ELECT_PREFERRED_LEADERS.parseRequest(version, buffer), version);
|
||||
}
|
||||
|
||||
/**
|
||||
* Visible for testing.
|
||||
*/
|
||||
@Override
|
||||
public Struct toStruct() {
|
||||
return data.toStruct(version);
|
||||
}
|
||||
}
|
|
@ -1,83 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kafka.common.requests;
|
||||
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersResponseData;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersResponseData.PartitionResult;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersResponseData.ReplicaElectionResult;
|
||||
import org.apache.kafka.common.protocol.ApiKeys;
|
||||
import org.apache.kafka.common.protocol.Errors;
|
||||
import org.apache.kafka.common.protocol.types.Struct;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class ElectPreferredLeadersResponse extends AbstractResponse {
|
||||
|
||||
private final ElectPreferredLeadersResponseData data;
|
||||
|
||||
public ElectPreferredLeadersResponse(ElectPreferredLeadersResponseData data) {
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public ElectPreferredLeadersResponse(Struct struct, short version) {
|
||||
this.data = new ElectPreferredLeadersResponseData(struct, version);
|
||||
}
|
||||
|
||||
public ElectPreferredLeadersResponse(Struct struct) {
|
||||
short latestVersion = (short) (ElectPreferredLeadersResponseData.SCHEMAS.length - 1);
|
||||
this.data = new ElectPreferredLeadersResponseData(struct, latestVersion);
|
||||
}
|
||||
|
||||
public ElectPreferredLeadersResponseData data() {
|
||||
return data;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Struct toStruct(short version) {
|
||||
return data.toStruct(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int throttleTimeMs() {
|
||||
return data.throttleTimeMs();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<Errors, Integer> errorCounts() {
|
||||
HashMap<Errors, Integer> counts = new HashMap<>();
|
||||
for (ReplicaElectionResult result : data.replicaElectionResults()) {
|
||||
for (PartitionResult partitionResult : result.partitionResult()) {
|
||||
Errors error = Errors.forCode(partitionResult.errorCode());
|
||||
counts.put(error, counts.getOrDefault(error, 0) + 1);
|
||||
}
|
||||
}
|
||||
return counts;
|
||||
}
|
||||
|
||||
public static ElectPreferredLeadersResponse parse(ByteBuffer buffer, short version) {
|
||||
return new ElectPreferredLeadersResponse(
|
||||
ApiKeys.ELECT_PREFERRED_LEADERS.responseSchema(version).read(buffer), version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean shouldClientThrottle(short version) {
|
||||
return version >= 3;
|
||||
}
|
||||
}
|
|
@ -16,17 +16,20 @@
|
|||
{
|
||||
"apiKey": 43,
|
||||
"type": "request",
|
||||
"name": "ElectPreferredLeadersRequest",
|
||||
"validVersions": "0",
|
||||
"name": "ElectLeadersRequest",
|
||||
"validVersions": "0-1",
|
||||
"fields": [
|
||||
{ "name": "ElectionType", "type": "int8", "versions": "1+",
|
||||
"about": "Type of elections to conduct for the partition. A value of '0' elects the preferred replica. A value of '1' elects the first live replica if there are no in-sync replica." },
|
||||
{ "name": "TopicPartitions", "type": "[]TopicPartitions", "versions": "0+", "nullableVersions": "0+",
|
||||
"about": "The topic partitions to elect the preferred leader of.",
|
||||
"about": "The topic partitions to elect leaders.",
|
||||
"fields": [
|
||||
{ "name": "Topic", "type": "string", "versions": "0+", "entityType": "topicName",
|
||||
"about": "The name of a topic." },
|
||||
{ "name": "PartitionId", "type": "[]int32", "versions": "0+",
|
||||
"about": "The partitions of this topic whose preferred leader should be elected" }
|
||||
]},
|
||||
"about": "The partitions of this topic whose leader should be elected." }
|
||||
]
|
||||
},
|
||||
{ "name": "TimeoutMs", "type": "int32", "versions": "0+", "default": "60000",
|
||||
"about": "The time in ms to wait for the election to complete." }
|
||||
]
|
|
@ -16,11 +16,13 @@
|
|||
{
|
||||
"apiKey": 43,
|
||||
"type": "response",
|
||||
"name": "ElectPreferredLeadersResponse",
|
||||
"validVersions": "0",
|
||||
"name": "ElectLeadersResponse",
|
||||
"validVersions": "0-1",
|
||||
"fields": [
|
||||
{ "name": "ThrottleTimeMs", "type": "int32", "versions": "0+",
|
||||
"about": "The duration in milliseconds for which the request was throttled due to a quota violation, or zero if the request did not violate any quota." },
|
||||
{ "name": "ErrorCode", "type": "int16", "versions": "1+", "ignorable": false,
|
||||
"about": "The top level response error code." },
|
||||
{ "name": "ReplicaElectionResults", "type": "[]ReplicaElectionResult", "versions": "0+",
|
||||
"about": "The election results, or an empty array if the requester did not have permission and the request asks for all partitions.", "fields": [
|
||||
{ "name": "Topic", "type": "string", "versions": "0+", "entityType": "topicName",
|
|
@ -25,6 +25,7 @@ import org.apache.kafka.clients.consumer.OffsetAndMetadata;
|
|||
import org.apache.kafka.clients.consumer.internals.ConsumerProtocol;
|
||||
import org.apache.kafka.clients.consumer.internals.PartitionAssignor;
|
||||
import org.apache.kafka.common.Cluster;
|
||||
import org.apache.kafka.common.ElectionType;
|
||||
import org.apache.kafka.common.KafkaException;
|
||||
import org.apache.kafka.common.KafkaFuture;
|
||||
import org.apache.kafka.common.Node;
|
||||
|
@ -51,26 +52,25 @@ import org.apache.kafka.common.errors.TimeoutException;
|
|||
import org.apache.kafka.common.errors.TopicDeletionDisabledException;
|
||||
import org.apache.kafka.common.errors.UnknownServerException;
|
||||
import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;
|
||||
import org.apache.kafka.common.message.CreateTopicsResponseData;
|
||||
import org.apache.kafka.common.message.CreateTopicsResponseData.CreatableTopicResult;
|
||||
import org.apache.kafka.common.message.DeleteTopicsResponseData;
|
||||
import org.apache.kafka.common.message.CreateTopicsResponseData;
|
||||
import org.apache.kafka.common.message.DeleteTopicsResponseData.DeletableTopicResult;
|
||||
import org.apache.kafka.common.message.DeleteTopicsResponseData;
|
||||
import org.apache.kafka.common.message.DescribeGroupsResponseData;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersResponseData;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersResponseData.PartitionResult;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersResponseData.ReplicaElectionResult;
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsResponseData;
|
||||
import org.apache.kafka.common.message.ElectLeadersResponseData.PartitionResult;
|
||||
import org.apache.kafka.common.message.ElectLeadersResponseData.ReplicaElectionResult;
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsResponseData.AlterConfigsResourceResult;
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsResponseData;
|
||||
import org.apache.kafka.common.protocol.Errors;
|
||||
import org.apache.kafka.common.requests.ApiError;
|
||||
import org.apache.kafka.common.requests.CreateAclsResponse;
|
||||
import org.apache.kafka.common.requests.CreateAclsResponse.AclCreationResponse;
|
||||
import org.apache.kafka.common.requests.CreateAclsResponse;
|
||||
import org.apache.kafka.common.requests.CreatePartitionsResponse;
|
||||
import org.apache.kafka.common.requests.CreateTopicsRequest;
|
||||
import org.apache.kafka.common.requests.CreateTopicsResponse;
|
||||
import org.apache.kafka.common.requests.DeleteAclsResponse;
|
||||
import org.apache.kafka.common.requests.DeleteAclsResponse.AclDeletionResult;
|
||||
import org.apache.kafka.common.requests.DeleteAclsResponse.AclFilterResponse;
|
||||
import org.apache.kafka.common.requests.DeleteAclsResponse;
|
||||
import org.apache.kafka.common.requests.DeleteGroupsResponse;
|
||||
import org.apache.kafka.common.requests.DeleteRecordsResponse;
|
||||
import org.apache.kafka.common.requests.DeleteTopicsRequest;
|
||||
|
@ -78,7 +78,7 @@ import org.apache.kafka.common.requests.DeleteTopicsResponse;
|
|||
import org.apache.kafka.common.requests.DescribeAclsResponse;
|
||||
import org.apache.kafka.common.requests.DescribeConfigsResponse;
|
||||
import org.apache.kafka.common.requests.DescribeGroupsResponse;
|
||||
import org.apache.kafka.common.requests.ElectPreferredLeadersResponse;
|
||||
import org.apache.kafka.common.requests.ElectLeadersResponse;
|
||||
import org.apache.kafka.common.requests.FindCoordinatorResponse;
|
||||
import org.apache.kafka.common.requests.IncrementalAlterConfigsResponse;
|
||||
import org.apache.kafka.common.requests.ListGroupsResponse;
|
||||
|
@ -123,6 +123,7 @@ import static org.junit.Assert.assertEquals;
|
|||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
/**
|
||||
|
@ -675,51 +676,59 @@ public class KafkaAdminClientTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testElectPreferredLeaders() throws Exception {
|
||||
public void testElectLeaders() throws Exception {
|
||||
TopicPartition topic1 = new TopicPartition("topic", 0);
|
||||
TopicPartition topic2 = new TopicPartition("topic", 2);
|
||||
try (AdminClientUnitTestEnv env = mockClientEnv()) {
|
||||
env.kafkaClient().setNodeApiVersions(NodeApiVersions.create());
|
||||
for (ElectionType electionType : ElectionType.values()) {
|
||||
env.kafkaClient().setNodeApiVersions(NodeApiVersions.create());
|
||||
|
||||
// Test a call where one partition has an error.
|
||||
ApiError value = ApiError.fromThrowable(new ClusterAuthorizationException(null));
|
||||
ElectPreferredLeadersResponseData responseData = new ElectPreferredLeadersResponseData();
|
||||
ReplicaElectionResult r = new ReplicaElectionResult().setTopic(topic1.topic());
|
||||
r.partitionResult().add(new PartitionResult()
|
||||
.setPartitionId(topic1.partition())
|
||||
.setErrorCode(ApiError.NONE.error().code())
|
||||
.setErrorMessage(ApiError.NONE.message()));
|
||||
r.partitionResult().add(new PartitionResult()
|
||||
.setPartitionId(topic2.partition())
|
||||
.setErrorCode(value.error().code())
|
||||
.setErrorMessage(value.message()));
|
||||
responseData.replicaElectionResults().add(r);
|
||||
env.kafkaClient().prepareResponse(new ElectPreferredLeadersResponse(responseData));
|
||||
ElectPreferredLeadersResult results = env.adminClient().electPreferredLeaders(asList(topic1, topic2));
|
||||
results.partitionResult(topic1).get();
|
||||
TestUtils.assertFutureError(results.partitionResult(topic2), ClusterAuthorizationException.class);
|
||||
TestUtils.assertFutureError(results.all(), ClusterAuthorizationException.class);
|
||||
// Test a call where one partition has an error.
|
||||
ApiError value = ApiError.fromThrowable(new ClusterAuthorizationException(null));
|
||||
List<ReplicaElectionResult> electionResults = new ArrayList<>();
|
||||
ReplicaElectionResult electionResult = new ReplicaElectionResult();
|
||||
electionResult.setTopic(topic1.topic());
|
||||
// Add partition 1 result
|
||||
PartitionResult partition1Result = new PartitionResult();
|
||||
partition1Result.setPartitionId(topic1.partition());
|
||||
partition1Result.setErrorCode(value.error().code());
|
||||
partition1Result.setErrorMessage(value.message());
|
||||
electionResult.partitionResult().add(partition1Result);
|
||||
|
||||
// Test a call where there are no errors.
|
||||
r.partitionResult().clear();
|
||||
r.partitionResult().add(new PartitionResult()
|
||||
.setPartitionId(topic1.partition())
|
||||
.setErrorCode(ApiError.NONE.error().code())
|
||||
.setErrorMessage(ApiError.NONE.message()));
|
||||
r.partitionResult().add(new PartitionResult()
|
||||
.setPartitionId(topic2.partition())
|
||||
.setErrorCode(ApiError.NONE.error().code())
|
||||
.setErrorMessage(ApiError.NONE.message()));
|
||||
env.kafkaClient().prepareResponse(new ElectPreferredLeadersResponse(responseData));
|
||||
// Add partition 2 result
|
||||
PartitionResult partition2Result = new PartitionResult();
|
||||
partition2Result.setPartitionId(topic2.partition());
|
||||
partition2Result.setErrorCode(value.error().code());
|
||||
partition2Result.setErrorMessage(value.message());
|
||||
electionResult.partitionResult().add(partition2Result);
|
||||
|
||||
results = env.adminClient().electPreferredLeaders(asList(topic1, topic2));
|
||||
results.partitionResult(topic1).get();
|
||||
results.partitionResult(topic2).get();
|
||||
electionResults.add(electionResult);
|
||||
|
||||
// Now try a timeout
|
||||
results = env.adminClient().electPreferredLeaders(asList(topic1, topic2), new ElectPreferredLeadersOptions().timeoutMs(100));
|
||||
TestUtils.assertFutureError(results.partitionResult(topic1), TimeoutException.class);
|
||||
TestUtils.assertFutureError(results.partitionResult(topic2), TimeoutException.class);
|
||||
env.kafkaClient().prepareResponse(new ElectLeadersResponse(0, Errors.NONE.code(), electionResults));
|
||||
ElectLeadersResult results = env.adminClient().electLeaders(
|
||||
electionType,
|
||||
new HashSet<>(asList(topic1, topic2)));
|
||||
assertEquals(results.partitions().get().get(topic2).get().getClass(), ClusterAuthorizationException.class);
|
||||
|
||||
// Test a call where there are no errors. By mutating the internal of election results
|
||||
partition1Result.setErrorCode(ApiError.NONE.error().code());
|
||||
partition1Result.setErrorMessage(ApiError.NONE.message());
|
||||
|
||||
partition2Result.setErrorCode(ApiError.NONE.error().code());
|
||||
partition2Result.setErrorMessage(ApiError.NONE.message());
|
||||
|
||||
env.kafkaClient().prepareResponse(new ElectLeadersResponse(0, Errors.NONE.code(), electionResults));
|
||||
results = env.adminClient().electLeaders(electionType, new HashSet<>(asList(topic1, topic2)));
|
||||
assertFalse(results.partitions().get().get(topic1).isPresent());
|
||||
assertFalse(results.partitions().get().get(topic2).isPresent());
|
||||
|
||||
// Now try a timeout
|
||||
results = env.adminClient().electLeaders(
|
||||
electionType,
|
||||
new HashSet<>(asList(topic1, topic2)),
|
||||
new ElectLeadersOptions().timeoutMs(100));
|
||||
TestUtils.assertFutureError(results.partitions(), TimeoutException.class);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.kafka.common.Metric;
|
|||
import org.apache.kafka.common.MetricName;
|
||||
import org.apache.kafka.common.Node;
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
import org.apache.kafka.common.ElectionType;
|
||||
import org.apache.kafka.common.TopicPartitionInfo;
|
||||
import org.apache.kafka.common.TopicPartitionReplica;
|
||||
import org.apache.kafka.common.acl.AclBinding;
|
||||
|
@ -333,10 +334,20 @@ public class MockAdminClient extends AdminClient {
|
|||
throw new UnsupportedOperationException("Not implemented yet");
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
@Override
|
||||
public ElectPreferredLeadersResult electPreferredLeaders(Collection<TopicPartition> partitions, ElectPreferredLeadersOptions options) {
|
||||
throw new UnsupportedOperationException("Not implemented yet");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ElectLeadersResult electLeaders(
|
||||
ElectionType electionType,
|
||||
Set<TopicPartition> partitions,
|
||||
ElectLeadersOptions options) {
|
||||
throw new UnsupportedOperationException("Not implemented yet");
|
||||
}
|
||||
|
||||
@Override
|
||||
public CreateAclsResult createAcls(Collection<AclBinding> acls, CreateAclsOptions options) {
|
||||
throw new UnsupportedOperationException("Not implemented yet");
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.kafka.common.requests;
|
|||
|
||||
import org.apache.kafka.common.Node;
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
import org.apache.kafka.common.ElectionType;
|
||||
import org.apache.kafka.common.acl.AccessControlEntry;
|
||||
import org.apache.kafka.common.acl.AccessControlEntryFilter;
|
||||
import org.apache.kafka.common.acl.AclBinding;
|
||||
|
@ -33,28 +34,30 @@ import org.apache.kafka.common.errors.SecurityDisabledException;
|
|||
import org.apache.kafka.common.errors.UnknownServerException;
|
||||
import org.apache.kafka.common.errors.UnsupportedVersionException;
|
||||
import org.apache.kafka.common.message.ControlledShutdownRequestData;
|
||||
import org.apache.kafka.common.message.ControlledShutdownResponseData;
|
||||
import org.apache.kafka.common.message.ControlledShutdownResponseData.RemainingPartition;
|
||||
import org.apache.kafka.common.message.ControlledShutdownResponseData.RemainingPartitionCollection;
|
||||
import org.apache.kafka.common.message.CreateTopicsRequestData;
|
||||
import org.apache.kafka.common.message.ControlledShutdownResponseData;
|
||||
import org.apache.kafka.common.message.CreateTopicsRequestData.CreatableReplicaAssignment;
|
||||
import org.apache.kafka.common.message.CreateTopicsRequestData.CreatableTopic;
|
||||
import org.apache.kafka.common.message.CreateTopicsRequestData.CreateableTopicConfig;
|
||||
import org.apache.kafka.common.message.CreateTopicsResponseData;
|
||||
import org.apache.kafka.common.message.CreateTopicsRequestData;
|
||||
import org.apache.kafka.common.message.CreateTopicsResponseData.CreatableTopicResult;
|
||||
import org.apache.kafka.common.message.CreateTopicsResponseData;
|
||||
import org.apache.kafka.common.message.DeleteTopicsRequestData;
|
||||
import org.apache.kafka.common.message.DeleteTopicsResponseData;
|
||||
import org.apache.kafka.common.message.DeleteTopicsResponseData.DeletableTopicResult;
|
||||
import org.apache.kafka.common.message.DeleteTopicsResponseData;
|
||||
import org.apache.kafka.common.message.DescribeGroupsRequestData;
|
||||
import org.apache.kafka.common.message.DescribeGroupsResponseData;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersRequestData;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersRequestData.TopicPartitions;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersResponseData;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersResponseData.PartitionResult;
|
||||
import org.apache.kafka.common.message.ElectPreferredLeadersResponseData.ReplicaElectionResult;
|
||||
import org.apache.kafka.common.message.ElectLeadersResponseData.PartitionResult;
|
||||
import org.apache.kafka.common.message.ElectLeadersResponseData.ReplicaElectionResult;
|
||||
import org.apache.kafka.common.message.FindCoordinatorRequestData;
|
||||
import org.apache.kafka.common.message.HeartbeatRequestData;
|
||||
import org.apache.kafka.common.message.HeartbeatResponseData;
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData.AlterConfigsResource;
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData.AlterableConfig;
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData;
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsResponseData.AlterConfigsResourceResult;
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsResponseData;
|
||||
import org.apache.kafka.common.message.InitProducerIdRequestData;
|
||||
import org.apache.kafka.common.message.InitProducerIdResponseData;
|
||||
import org.apache.kafka.common.message.JoinGroupRequestData;
|
||||
|
@ -67,11 +70,6 @@ import org.apache.kafka.common.message.SaslAuthenticateRequestData;
|
|||
import org.apache.kafka.common.message.SaslAuthenticateResponseData;
|
||||
import org.apache.kafka.common.message.SaslHandshakeRequestData;
|
||||
import org.apache.kafka.common.message.SaslHandshakeResponseData;
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData;
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData.AlterConfigsResource;
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData.AlterableConfig;
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsResponseData;
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsResponseData.AlterConfigsResourceResult;
|
||||
import org.apache.kafka.common.network.ListenerName;
|
||||
import org.apache.kafka.common.network.Send;
|
||||
import org.apache.kafka.common.protocol.ApiKeys;
|
||||
|
@ -346,10 +344,10 @@ public class RequestResponseTest {
|
|||
checkRequest(createRenewTokenRequest(), true);
|
||||
checkErrorResponse(createRenewTokenRequest(), new UnknownServerException(), true);
|
||||
checkResponse(createRenewTokenResponse(), 0, true);
|
||||
checkRequest(createElectPreferredLeadersRequest(), true);
|
||||
checkRequest(createElectPreferredLeadersRequestNullPartitions(), true);
|
||||
checkErrorResponse(createElectPreferredLeadersRequest(), new UnknownServerException(), true);
|
||||
checkResponse(createElectPreferredLeadersResponse(), 0, true);
|
||||
checkRequest(createElectLeadersRequest(), true);
|
||||
checkRequest(createElectLeadersRequestNullPartitions(), true);
|
||||
checkErrorResponse(createElectLeadersRequest(), new UnknownServerException(), true);
|
||||
checkResponse(createElectLeadersResponse(), 1, true);
|
||||
checkRequest(createIncrementalAlterConfigsRequest(), true);
|
||||
checkErrorResponse(createIncrementalAlterConfigsRequest(), new UnknownServerException(), true);
|
||||
checkResponse(createIncrementalAlterConfigsResponse(), 0, true);
|
||||
|
@ -1515,32 +1513,36 @@ public class RequestResponseTest {
|
|||
return new DescribeDelegationTokenResponse(20, Errors.NONE, tokenList);
|
||||
}
|
||||
|
||||
private ElectPreferredLeadersRequest createElectPreferredLeadersRequestNullPartitions() {
|
||||
return new ElectPreferredLeadersRequest.Builder(
|
||||
new ElectPreferredLeadersRequestData()
|
||||
.setTimeoutMs(100)
|
||||
.setTopicPartitions(null))
|
||||
.build((short) 0);
|
||||
private ElectLeadersRequest createElectLeadersRequestNullPartitions() {
|
||||
return new ElectLeadersRequest.Builder(ElectionType.PREFERRED, null, 100).build((short) 1);
|
||||
}
|
||||
|
||||
private ElectPreferredLeadersRequest createElectPreferredLeadersRequest() {
|
||||
ElectPreferredLeadersRequestData data = new ElectPreferredLeadersRequestData()
|
||||
.setTimeoutMs(100);
|
||||
data.topicPartitions().add(new TopicPartitions().setTopic("data").setPartitionId(asList(1, 2)));
|
||||
return new ElectPreferredLeadersRequest.Builder(data).build((short) 0);
|
||||
private ElectLeadersRequest createElectLeadersRequest() {
|
||||
List<TopicPartition> partitions = asList(new TopicPartition("data", 1), new TopicPartition("data", 2));
|
||||
|
||||
return new ElectLeadersRequest.Builder(ElectionType.PREFERRED, partitions, 100).build((short) 1);
|
||||
}
|
||||
|
||||
private ElectPreferredLeadersResponse createElectPreferredLeadersResponse() {
|
||||
ElectPreferredLeadersResponseData data = new ElectPreferredLeadersResponseData().setThrottleTimeMs(200);
|
||||
ReplicaElectionResult resultsByTopic = new ReplicaElectionResult().setTopic("myTopic");
|
||||
resultsByTopic.partitionResult().add(new PartitionResult().setPartitionId(0)
|
||||
.setErrorCode(Errors.NONE.code())
|
||||
.setErrorMessage(Errors.NONE.message()));
|
||||
resultsByTopic.partitionResult().add(new PartitionResult().setPartitionId(1)
|
||||
.setErrorCode(Errors.UNKNOWN_TOPIC_OR_PARTITION.code())
|
||||
.setErrorMessage(Errors.UNKNOWN_TOPIC_OR_PARTITION.message()));
|
||||
data.replicaElectionResults().add(resultsByTopic);
|
||||
return new ElectPreferredLeadersResponse(data);
|
||||
private ElectLeadersResponse createElectLeadersResponse() {
|
||||
String topic = "myTopic";
|
||||
List<ReplicaElectionResult> electionResults = new ArrayList<>();
|
||||
ReplicaElectionResult electionResult = new ReplicaElectionResult();
|
||||
electionResult.setTopic(topic);
|
||||
// Add partition 1 result
|
||||
PartitionResult partitionResult = new PartitionResult();
|
||||
partitionResult.setPartitionId(0);
|
||||
partitionResult.setErrorCode(ApiError.NONE.error().code());
|
||||
partitionResult.setErrorMessage(ApiError.NONE.message());
|
||||
electionResult.partitionResult().add(partitionResult);
|
||||
|
||||
// Add partition 2 result
|
||||
partitionResult = new PartitionResult();
|
||||
partitionResult.setPartitionId(1);
|
||||
partitionResult.setErrorCode(Errors.UNKNOWN_TOPIC_OR_PARTITION.code());
|
||||
partitionResult.setErrorMessage(Errors.UNKNOWN_TOPIC_OR_PARTITION.message());
|
||||
electionResult.partitionResult().add(partitionResult);
|
||||
|
||||
return new ElectLeadersResponse(200, Errors.NONE.code(), electionResults);
|
||||
}
|
||||
|
||||
private IncrementalAlterConfigsRequest createIncrementalAlterConfigsRequest() {
|
||||
|
|
|
@ -38,7 +38,7 @@ object Kafka extends Logging {
|
|||
// fact that this class ignores the first parameter which is interpreted as positional and mandatory
|
||||
// but would not be mandatory if --version is specified
|
||||
// This is a bit of an ugly crutch till we get a chance to rework the entire command line parsing
|
||||
val versionOpt = optionParser.accepts("version", "Print version information and exit.")
|
||||
optionParser.accepts("version", "Print version information and exit.")
|
||||
|
||||
if (args.length == 0 || args.contains("--help")) {
|
||||
CommandLineUtils.printUsageAndDie(optionParser, "USAGE: java [options] %s server.properties [--override property=value]*".format(classOf[KafkaServer].getSimpleName()))
|
||||
|
|
|
@ -0,0 +1,240 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package kafka.admin
|
||||
|
||||
import java.util.Properties
|
||||
import java.util.concurrent.ExecutionException
|
||||
import joptsimple.util.EnumConverter
|
||||
import kafka.common.AdminCommandFailedException
|
||||
import kafka.utils.CommandDefaultOptions
|
||||
import kafka.utils.CommandLineUtils
|
||||
import kafka.utils.CoreUtils
|
||||
import kafka.utils.Json
|
||||
import kafka.utils.Logging
|
||||
import org.apache.kafka.clients.admin.AdminClientConfig
|
||||
import org.apache.kafka.clients.admin.{AdminClient => JAdminClient}
|
||||
import org.apache.kafka.common.ElectionType
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.apache.kafka.common.errors.ClusterAuthorizationException
|
||||
import org.apache.kafka.common.errors.ElectionNotNeededException
|
||||
import org.apache.kafka.common.errors.TimeoutException
|
||||
import org.apache.kafka.common.utils.Utils
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.collection.mutable
|
||||
import scala.concurrent.duration._
|
||||
|
||||
object LeaderElectionCommand extends Logging {
|
||||
def main(args: Array[String]): Unit = {
|
||||
run(args, 30.second)
|
||||
}
|
||||
|
||||
def run(args: Array[String], timeout: Duration): Unit = {
|
||||
val commandOptions = new LeaderElectionCommandOptions(args)
|
||||
CommandLineUtils.printHelpAndExitIfNeeded(
|
||||
commandOptions,
|
||||
"This tool attempts to elect a new leader for a set of topic partitions. The type of elections supported are preferred replicas and unclean replicas."
|
||||
)
|
||||
|
||||
val electionType = commandOptions.options.valueOf(commandOptions.electionType)
|
||||
|
||||
val jsonFileTopicPartitions = Option(commandOptions.options.valueOf(commandOptions.pathToJsonFile)).map { path =>
|
||||
parseReplicaElectionData(Utils.readFileAsString(path))
|
||||
}
|
||||
|
||||
val singleTopicPartition = (
|
||||
Option(commandOptions.options.valueOf(commandOptions.topic)),
|
||||
Option(commandOptions.options.valueOf(commandOptions.partition))
|
||||
) match {
|
||||
case (Some(topic), Some(partition)) => Some(Set(new TopicPartition(topic, partition)))
|
||||
case _ => None
|
||||
}
|
||||
|
||||
/* Note: No need to look at --all-topic-partitions as we want this to be None if it is use.
|
||||
* Jopt-Simple should be validating that this option required if the --topic and --path-to-json-file
|
||||
*/
|
||||
val topicPartitions = jsonFileTopicPartitions.orElse(singleTopicPartition)
|
||||
|
||||
val adminClient = {
|
||||
val props = Option(commandOptions.options.valueOf(commandOptions.adminClientConfig)).map { config =>
|
||||
Utils.loadProps(config)
|
||||
}.getOrElse(new Properties())
|
||||
|
||||
props.setProperty(
|
||||
AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG,
|
||||
commandOptions.options.valueOf(commandOptions.bootstrapServer)
|
||||
)
|
||||
props.setProperty(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, timeout.toMillis.toString)
|
||||
|
||||
JAdminClient.create(props)
|
||||
}
|
||||
|
||||
try {
|
||||
electLeaders(adminClient, electionType, topicPartitions)
|
||||
} finally {
|
||||
adminClient.close()
|
||||
}
|
||||
}
|
||||
|
||||
private[this] def parseReplicaElectionData(jsonString: String): Set[TopicPartition] = {
|
||||
Json.parseFull(jsonString) match {
|
||||
case Some(js) =>
|
||||
js.asJsonObject.get("partitions") match {
|
||||
case Some(partitionsList) =>
|
||||
val partitionsRaw = partitionsList.asJsonArray.iterator.map(_.asJsonObject)
|
||||
val partitions = partitionsRaw.map { p =>
|
||||
val topic = p("topic").to[String]
|
||||
val partition = p("partition").to[Int]
|
||||
new TopicPartition(topic, partition)
|
||||
}.toBuffer
|
||||
val duplicatePartitions = CoreUtils.duplicates(partitions)
|
||||
if (duplicatePartitions.nonEmpty) {
|
||||
throw new AdminOperationException(
|
||||
s"Replica election data contains duplicate partitions: ${duplicatePartitions.mkString(",")}"
|
||||
)
|
||||
}
|
||||
partitions.toSet
|
||||
case None => throw new AdminOperationException("Replica election data is missing \"partition\" field")
|
||||
}
|
||||
case None => throw new AdminOperationException("Replica election data is empty")
|
||||
}
|
||||
}
|
||||
|
||||
private[this] def electLeaders(
|
||||
client: JAdminClient,
|
||||
electionType: ElectionType,
|
||||
topicPartitions: Option[Set[TopicPartition]]
|
||||
): Unit = {
|
||||
val electionResults = try {
|
||||
val partitions = topicPartitions.map(_.asJava).orNull
|
||||
debug(s"Calling AdminClient.electLeaders($electionType, $partitions)")
|
||||
client.electLeaders(electionType, partitions).partitions.get.asScala
|
||||
} catch {
|
||||
case e: ExecutionException =>
|
||||
e.getCause match {
|
||||
case cause: TimeoutException =>
|
||||
val message = "Timeout waiting for election results"
|
||||
println(message)
|
||||
throw new AdminCommandFailedException(message, cause)
|
||||
case cause: ClusterAuthorizationException =>
|
||||
val message = "Not authorized to perform leader election"
|
||||
println(message)
|
||||
throw new AdminCommandFailedException(message, cause)
|
||||
case _ =>
|
||||
throw e
|
||||
}
|
||||
case e: Throwable =>
|
||||
println("Error while making request")
|
||||
throw e
|
||||
}
|
||||
|
||||
val succeeded = mutable.Set.empty[TopicPartition]
|
||||
val noop = mutable.Set.empty[TopicPartition]
|
||||
val failed = mutable.Map.empty[TopicPartition, Throwable]
|
||||
|
||||
electionResults.foreach { case (topicPartition, error) =>
|
||||
val _: Unit = if (error.isPresent) {
|
||||
error.get match {
|
||||
case _: ElectionNotNeededException => noop += topicPartition
|
||||
case _ => failed += topicPartition -> error.get
|
||||
}
|
||||
} else {
|
||||
succeeded += topicPartition
|
||||
}
|
||||
}
|
||||
|
||||
if (succeeded.nonEmpty) {
|
||||
val partitions = succeeded.mkString(", ")
|
||||
println(s"Successfully completed leader election ($electionType) for partitions $partitions")
|
||||
}
|
||||
|
||||
if (noop.nonEmpty) {
|
||||
val partitions = succeeded.mkString(", ")
|
||||
println(s"Valid replica already elected for partitions $partitions")
|
||||
}
|
||||
|
||||
if (failed.nonEmpty) {
|
||||
val rootException = new AdminCommandFailedException(s"${failed.size} replica(s) could not be elected")
|
||||
failed.foreach { case (topicPartition, exception) =>
|
||||
println(s"Error completing leader election ($electionType) for partition: $topicPartition: $exception")
|
||||
rootException.addSuppressed(exception)
|
||||
}
|
||||
throw rootException
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final class LeaderElectionCommandOptions(args: Array[String]) extends CommandDefaultOptions(args) {
|
||||
val bootstrapServer = parser
|
||||
.accepts(
|
||||
"bootstrap-server",
|
||||
"A hostname and port for the broker to connect to, in the form host:port. Multiple comma separated URLs can be given. REQUIRED.")
|
||||
.withRequiredArg
|
||||
.required
|
||||
.describedAs("host:port")
|
||||
.ofType(classOf[String])
|
||||
val adminClientConfig = parser
|
||||
.accepts(
|
||||
"admin.config",
|
||||
"Configuration properties files to pass to the admin client")
|
||||
.withRequiredArg
|
||||
.describedAs("config file")
|
||||
.ofType(classOf[String])
|
||||
|
||||
val pathToJsonFile = parser
|
||||
.accepts(
|
||||
"path-to-json-file",
|
||||
"The JSON file with the list of partition for which leader elections should be performed. This is an example format. \n{\"partitions\":\n\t[{\"topic\": \"foo\", \"partition\": 1},\n\t {\"topic\": \"foobar\", \"partition\": 2}]\n}\nNot allowed if --all-topic-partitions or --topic flags are specified.")
|
||||
.withRequiredArg
|
||||
.describedAs("Path to JSON file")
|
||||
.ofType(classOf[String])
|
||||
|
||||
val topic = parser
|
||||
.accepts(
|
||||
"topic",
|
||||
"Name of topic for which to perform an election. Not allowed if --path-to-json-file or --all-topic-partitions is specified.")
|
||||
.availableUnless("path-to-json-file")
|
||||
.withRequiredArg
|
||||
.describedAs("topic name")
|
||||
.ofType(classOf[String])
|
||||
val partition = parser
|
||||
.accepts(
|
||||
"partition",
|
||||
"Partition id for which to perform an election. REQUIRED if --topic is specified.")
|
||||
.requiredIf("topic")
|
||||
.withRequiredArg
|
||||
.describedAs("partition id")
|
||||
.ofType(classOf[Integer])
|
||||
|
||||
val allTopicPartitions = parser
|
||||
.accepts(
|
||||
"all-topic-partitions",
|
||||
"Perform election on all of the eligible topic partitions based on the type of election (see the --election-type flag). Not allowed if --topic or --path-to-json-file is specified.")
|
||||
.requiredUnless("path-to-json-file", "topic")
|
||||
|
||||
val electionType = parser
|
||||
.accepts(
|
||||
"election-type",
|
||||
"Type of election to attempt. Possible values are \"preferred\" for preferred leader election or \"unclean\" for unclean leader election. If preferred election is selection, the election is only performed if the current leader is not the preferred leader for the topic partition. If unclean election is selected, the election is only performed if there are no leader for the topic partition. REQUIRED.")
|
||||
.withRequiredArg
|
||||
.required
|
||||
.describedAs("election type")
|
||||
.withValuesConvertedBy(ElectionTypeConverter)
|
||||
|
||||
options = parser.parse(args: _*)
|
||||
}
|
||||
|
||||
final object ElectionTypeConverter extends EnumConverter[ElectionType](classOf[ElectionType]) { }
|
|
@ -16,32 +16,34 @@
|
|||
*/
|
||||
package kafka.admin
|
||||
|
||||
import collection.JavaConverters._
|
||||
import collection._
|
||||
import java.util.Properties
|
||||
import java.util.concurrent.ExecutionException
|
||||
|
||||
import joptsimple.OptionSpecBuilder
|
||||
import kafka.common.AdminCommandFailedException
|
||||
import kafka.utils._
|
||||
import kafka.zk.KafkaZkClient
|
||||
import org.apache.kafka.clients.admin.AdminClientConfig
|
||||
import org.apache.kafka.common.ElectionType
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.apache.kafka.common.errors.ClusterAuthorizationException
|
||||
import org.apache.kafka.common.errors.ElectionNotNeededException
|
||||
import org.apache.kafka.common.errors.TimeoutException
|
||||
|
||||
import collection.JavaConverters._
|
||||
import org.apache.kafka.common.utils.{Time, Utils}
|
||||
import org.apache.kafka.common.security.JaasUtils
|
||||
import org.apache.kafka.common.{KafkaFuture, TopicPartition}
|
||||
import org.apache.kafka.common.utils.Time
|
||||
import org.apache.kafka.common.utils.Utils
|
||||
import org.apache.zookeeper.KeeperException.NodeExistsException
|
||||
|
||||
import collection._
|
||||
|
||||
object PreferredReplicaLeaderElectionCommand extends Logging {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
val timeout = 30000
|
||||
run(args, timeout)
|
||||
}
|
||||
|
||||
def run(args: Array[String], timeout: Int = 30000): Unit = {
|
||||
println("This tool is deprecated. Please use kafka-leader-election tool. Tracking issue: KAFKA-8405")
|
||||
val commandOpts = new PreferredReplicaLeaderElectionCommandOptions(args)
|
||||
CommandLineUtils.printHelpAndExitIfNeeded(commandOpts, "This tool helps to causes leadership for each partition to be transferred back to the 'preferred replica'," +
|
||||
" it can be used to balance leadership among the servers.")
|
||||
|
@ -209,71 +211,68 @@ object PreferredReplicaLeaderElectionCommand extends Logging {
|
|||
|
||||
val adminClient = org.apache.kafka.clients.admin.AdminClient.create(adminClientProps)
|
||||
|
||||
/**
|
||||
* Wait until the given future has completed, then return whether it completed exceptionally.
|
||||
* Because KafkaFuture.isCompletedExceptionally doesn't wait for a result
|
||||
*/
|
||||
private def completedExceptionally[T](future: KafkaFuture[T]): Boolean = {
|
||||
try {
|
||||
future.get()
|
||||
false
|
||||
} catch {
|
||||
case (_: Throwable) =>
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
override def electPreferredLeaders(partitionsFromUser: Option[Set[TopicPartition]]): Unit = {
|
||||
val partitions = partitionsFromUser match {
|
||||
case Some(partitionsFromUser) => partitionsFromUser.asJava
|
||||
case None => null
|
||||
}
|
||||
debug(s"Calling AdminClient.electPreferredLeaders($partitions)")
|
||||
val result = adminClient.electPreferredLeaders(partitions)
|
||||
// wait for all results
|
||||
debug(s"Calling AdminClient.electLeaders(ElectionType.PREFERRED, $partitions)")
|
||||
|
||||
val attemptedPartitions = partitionsFromUser match {
|
||||
case Some(partitionsFromUser) => partitions.asScala
|
||||
case None => try {
|
||||
result.partitions().get.asScala
|
||||
} catch {
|
||||
case e: ExecutionException =>
|
||||
val cause = e.getCause
|
||||
if (cause.isInstanceOf[TimeoutException]) {
|
||||
// We timed out, or don't even know the attempted partitions
|
||||
println("Timeout waiting for election results")
|
||||
}
|
||||
throw new AdminCommandFailedException(null, cause)
|
||||
case e: Throwable =>
|
||||
// We don't even know the attempted partitions
|
||||
println("Error while making request")
|
||||
e.printStackTrace()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
val (exceptional, ok) = attemptedPartitions.map(tp => tp -> result.partitionResult(tp)).
|
||||
partition { case (_, partitionResult) => completedExceptionally(partitionResult) }
|
||||
|
||||
if (!ok.isEmpty) {
|
||||
println(s"Successfully completed preferred replica election for partitions ${ok.map{ case (tp, future) => tp }.mkString(", ")}")
|
||||
}
|
||||
if (!exceptional.isEmpty) {
|
||||
val adminException = new AdminCommandFailedException(
|
||||
s"${exceptional.size} preferred replica(s) could not be elected")
|
||||
for ((partition, void) <- exceptional) {
|
||||
val exception = try {
|
||||
void.get()
|
||||
new AdminCommandFailedException("Exceptional future with no exception")
|
||||
} catch {
|
||||
case e: ExecutionException => e.getCause
|
||||
val electionResults = try {
|
||||
adminClient.electLeaders(ElectionType.PREFERRED, partitions).partitions.get.asScala
|
||||
} catch {
|
||||
case e: ExecutionException =>
|
||||
val cause = e.getCause
|
||||
if (cause.isInstanceOf[TimeoutException]) {
|
||||
println("Timeout waiting for election results")
|
||||
throw new AdminCommandFailedException("Timeout waiting for election results", cause)
|
||||
} else if (cause.isInstanceOf[ClusterAuthorizationException]) {
|
||||
println(s"Not authorized to perform leader election")
|
||||
throw new AdminCommandFailedException("Not authorized to perform leader election", cause)
|
||||
}
|
||||
println(s"Error completing preferred replica election for partition $partition: $exception")
|
||||
adminException.addSuppressed(exception)
|
||||
}
|
||||
throw adminException
|
||||
|
||||
throw e
|
||||
case e: Throwable =>
|
||||
// We don't even know the attempted partitions
|
||||
println("Error while making request")
|
||||
e.printStackTrace()
|
||||
return
|
||||
}
|
||||
|
||||
val succeeded = mutable.Set.empty[TopicPartition]
|
||||
val noop = mutable.Set.empty[TopicPartition]
|
||||
val failed = mutable.Map.empty[TopicPartition, Throwable]
|
||||
|
||||
electionResults.foreach { case (topicPartition, error) =>
|
||||
val _: Unit = if (error.isPresent) {
|
||||
if (error.get.isInstanceOf[ElectionNotNeededException]) {
|
||||
noop += topicPartition
|
||||
} else {
|
||||
failed += topicPartition -> error.get
|
||||
}
|
||||
} else {
|
||||
succeeded += topicPartition
|
||||
}
|
||||
}
|
||||
|
||||
if (!succeeded.isEmpty) {
|
||||
val partitions = succeeded.mkString(", ")
|
||||
println(s"Successfully completed preferred leader election for partitions $partitions")
|
||||
}
|
||||
|
||||
if (!noop.isEmpty) {
|
||||
val partitions = succeeded.mkString(", ")
|
||||
println(s"Preferred replica already elected for partitions $partitions")
|
||||
}
|
||||
|
||||
if (!failed.isEmpty) {
|
||||
val rootException = new AdminCommandFailedException(s"${failed.size} preferred replica(s) could not be elected")
|
||||
failed.foreach { case (topicPartition, exception) =>
|
||||
println(s"Error completing preferred leader election for partition: $topicPartition: $exception")
|
||||
rootException.addSuppressed(exception)
|
||||
}
|
||||
throw rootException
|
||||
}
|
||||
}
|
||||
|
||||
override def close(): Unit = {
|
||||
|
|
|
@ -415,7 +415,7 @@ object TopicCommand extends Logging {
|
|||
}
|
||||
|
||||
override def getTopics(topicWhitelist: Option[String], excludeInternalTopics: Boolean = false): Seq[String] = {
|
||||
val allTopics = zkClient.getAllTopicsInCluster.sorted
|
||||
val allTopics = zkClient.getAllTopicsInCluster.toSeq.sorted
|
||||
doGetTopics(allTopics, topicWhitelist, excludeInternalTopics)
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package kafka
|
||||
|
||||
import org.apache.kafka.common.ElectionType
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.apache.kafka.common.requests.ElectLeadersRequest
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.collection.breakOut
|
||||
|
||||
package object api {
|
||||
implicit final class ElectLeadersRequestOps(val self: ElectLeadersRequest) extends AnyVal {
|
||||
def topicPartitions: Set[TopicPartition] = {
|
||||
if (self.data.topicPartitions == null) {
|
||||
Set.empty
|
||||
} else {
|
||||
self.data.topicPartitions.asScala.flatMap { topicPartition =>
|
||||
topicPartition.partitionId.asScala.map { partitionId =>
|
||||
new TopicPartition(topicPartition.topic, partitionId)
|
||||
}
|
||||
}(breakOut)
|
||||
}
|
||||
}
|
||||
|
||||
def electionType: ElectionType = {
|
||||
if (self.version == 0) {
|
||||
ElectionType.PREFERRED
|
||||
} else {
|
||||
ElectionType.valueOf(self.data.electionType)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -92,22 +92,22 @@ class DelayedOperations(topicPartition: TopicPartition,
|
|||
deleteRecords: DelayedOperationPurgatory[DelayedDeleteRecords]) {
|
||||
|
||||
def checkAndCompleteAll(): Unit = {
|
||||
val requestKey = new TopicPartitionOperationKey(topicPartition)
|
||||
val requestKey = TopicPartitionOperationKey(topicPartition)
|
||||
fetch.checkAndComplete(requestKey)
|
||||
produce.checkAndComplete(requestKey)
|
||||
deleteRecords.checkAndComplete(requestKey)
|
||||
}
|
||||
|
||||
def checkAndCompleteFetch(): Unit = {
|
||||
fetch.checkAndComplete(new TopicPartitionOperationKey(topicPartition))
|
||||
fetch.checkAndComplete(TopicPartitionOperationKey(topicPartition))
|
||||
}
|
||||
|
||||
def checkAndCompleteProduce(): Unit = {
|
||||
produce.checkAndComplete(new TopicPartitionOperationKey(topicPartition))
|
||||
produce.checkAndComplete(TopicPartitionOperationKey(topicPartition))
|
||||
}
|
||||
|
||||
def checkAndCompleteDeleteRecords(): Unit = {
|
||||
deleteRecords.checkAndComplete(new TopicPartitionOperationKey(topicPartition))
|
||||
deleteRecords.checkAndComplete(TopicPartitionOperationKey(topicPartition))
|
||||
}
|
||||
|
||||
def numDelayedDelete: Int = deleteRecords.numDelayed
|
||||
|
|
|
@ -33,8 +33,8 @@ object Election {
|
|||
leaderIsrAndControllerEpochOpt match {
|
||||
case Some(leaderIsrAndControllerEpoch) =>
|
||||
val isr = leaderIsrAndControllerEpoch.leaderAndIsr.isr
|
||||
val leaderOpt = PartitionLeaderElectionAlgorithms.offlinePartitionLeaderElection(assignment, isr,
|
||||
liveReplicas.toSet, uncleanLeaderElectionEnabled, controllerContext)
|
||||
val leaderOpt = PartitionLeaderElectionAlgorithms.offlinePartitionLeaderElection(
|
||||
assignment, isr, liveReplicas.toSet, uncleanLeaderElectionEnabled, controllerContext)
|
||||
val newLeaderAndIsrOpt = leaderOpt.map { leader =>
|
||||
val newIsr = if (isr.contains(leader)) isr.filter(replica => controllerContext.isReplicaOnline(replica, partition))
|
||||
else List(leader)
|
||||
|
@ -57,10 +57,13 @@ object Election {
|
|||
*
|
||||
* @return The election results
|
||||
*/
|
||||
def leaderForOffline(controllerContext: ControllerContext,
|
||||
partitionsWithUncleanLeaderElectionState: Seq[(TopicPartition, Option[LeaderIsrAndControllerEpoch], Boolean)]): Seq[ElectionResult] = {
|
||||
partitionsWithUncleanLeaderElectionState.map { case (partition, leaderIsrAndControllerEpochOpt, uncleanLeaderElectionEnabled) =>
|
||||
leaderForOffline(partition, leaderIsrAndControllerEpochOpt, uncleanLeaderElectionEnabled, controllerContext)
|
||||
def leaderForOffline(
|
||||
controllerContext: ControllerContext,
|
||||
partitionsWithUncleanLeaderElectionState: Seq[(TopicPartition, Option[LeaderIsrAndControllerEpoch], Boolean)]
|
||||
): Seq[ElectionResult] = {
|
||||
partitionsWithUncleanLeaderElectionState.map {
|
||||
case (partition, leaderIsrAndControllerEpochOpt, uncleanLeaderElectionEnabled) =>
|
||||
leaderForOffline(partition, leaderIsrAndControllerEpochOpt, uncleanLeaderElectionEnabled, controllerContext)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -22,14 +22,16 @@ import com.yammer.metrics.core.Gauge
|
|||
import kafka.admin.AdminOperationException
|
||||
import kafka.api._
|
||||
import kafka.common._
|
||||
import kafka.controller.KafkaController.ElectPreferredLeadersCallback
|
||||
import kafka.controller.KafkaController.ElectLeadersCallback
|
||||
import kafka.metrics.{KafkaMetricsGroup, KafkaTimer}
|
||||
import kafka.server._
|
||||
import kafka.utils._
|
||||
import kafka.zk.KafkaZkClient.UpdateLeaderAndIsrResult
|
||||
import kafka.zk._
|
||||
import kafka.zookeeper.{StateChangeHandler, ZNodeChangeHandler, ZNodeChildChangeHandler}
|
||||
import org.apache.kafka.common.{KafkaException, TopicPartition}
|
||||
import org.apache.kafka.common.ElectionType
|
||||
import org.apache.kafka.common.KafkaException
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.apache.kafka.common.errors.{BrokerNotAvailableException, ControllerMovedException, StaleBrokerEpochException}
|
||||
import org.apache.kafka.common.metrics.Metrics
|
||||
import org.apache.kafka.common.protocol.Errors
|
||||
|
@ -42,16 +44,16 @@ import scala.collection.JavaConverters._
|
|||
import scala.collection._
|
||||
import scala.util.{Failure, Try}
|
||||
|
||||
sealed trait ElectionType
|
||||
object AutoTriggered extends ElectionType
|
||||
object ZkTriggered extends ElectionType
|
||||
object AdminClientTriggered extends ElectionType
|
||||
sealed trait ElectionTrigger
|
||||
final case object AutoTriggered extends ElectionTrigger
|
||||
final case object ZkTriggered extends ElectionTrigger
|
||||
final case object AdminClientTriggered extends ElectionTrigger
|
||||
|
||||
object KafkaController extends Logging {
|
||||
val InitialControllerEpoch = 0
|
||||
val InitialControllerEpochZkVersion = 0
|
||||
|
||||
type ElectPreferredLeadersCallback = (Map[TopicPartition, Int], Map[TopicPartition, ApiError]) => Unit
|
||||
type ElectLeadersCallback = Map[TopicPartition, Either[ApiError, Int]] => Unit
|
||||
}
|
||||
|
||||
class KafkaController(val config: KafkaConfig,
|
||||
|
@ -272,7 +274,7 @@ class KafkaController(val config: KafkaConfig,
|
|||
maybeTriggerPartitionReassignment(controllerContext.partitionsBeingReassigned.keySet)
|
||||
topicDeletionManager.tryTopicDeletion()
|
||||
val pendingPreferredReplicaElections = fetchPendingPreferredReplicaElections()
|
||||
onPreferredReplicaElection(pendingPreferredReplicaElections, ZkTriggered)
|
||||
onReplicaElection(pendingPreferredReplicaElections, ElectionType.PREFERRED, ZkTriggered)
|
||||
info("Starting the controller scheduler")
|
||||
kafkaScheduler.startup()
|
||||
if (config.autoLeaderRebalanceEnable) {
|
||||
|
@ -487,7 +489,11 @@ class KafkaController(val config: KafkaConfig,
|
|||
info(s"New partition creation callback for ${newPartitions.mkString(",")}")
|
||||
partitionStateMachine.handleStateChanges(newPartitions.toSeq, NewPartition)
|
||||
replicaStateMachine.handleStateChanges(controllerContext.replicasForPartition(newPartitions).toSeq, NewReplica)
|
||||
partitionStateMachine.handleStateChanges(newPartitions.toSeq, OnlinePartition, Some(OfflinePartitionLeaderElectionStrategy))
|
||||
partitionStateMachine.handleStateChanges(
|
||||
newPartitions.toSeq,
|
||||
OnlinePartition,
|
||||
Some(OfflinePartitionLeaderElectionStrategy(false))
|
||||
)
|
||||
replicaStateMachine.handleStateChanges(controllerContext.replicasForPartition(newPartitions).toSeq, OnlineReplica)
|
||||
}
|
||||
|
||||
|
@ -631,34 +637,53 @@ class KafkaController(val config: KafkaConfig,
|
|||
removePartitionsFromReassignedPartitions(partitionsToBeRemovedFromReassignment)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Attempt to elect the preferred replica as leader for each of the given partitions.
|
||||
* @param partitions The partitions to have their preferred leader elected
|
||||
* @param electionType The election type
|
||||
* @return A map of failed elections where keys are partitions which had an error and the corresponding value is
|
||||
* the exception that was thrown.
|
||||
* Attempt to elect a replica as leader for each of the given partitions.
|
||||
* @param partitions The partitions to have a new leader elected
|
||||
* @param electionType The type of election to perform
|
||||
* @param electionTrigger The reason for tigger this election
|
||||
* @return A map of failed and successful elections. The keys are the topic partitions and the corresponding values are
|
||||
* either the exception that was thrown or new leader & ISR.
|
||||
*/
|
||||
private def onPreferredReplicaElection(partitions: Set[TopicPartition],
|
||||
electionType: ElectionType): Map[TopicPartition, Throwable] = {
|
||||
info(s"Starting preferred replica leader election for partitions ${partitions.mkString(",")}")
|
||||
private[this] def onReplicaElection(
|
||||
partitions: Set[TopicPartition],
|
||||
electionType: ElectionType,
|
||||
electionTrigger: ElectionTrigger
|
||||
): Map[TopicPartition, Either[Throwable, LeaderAndIsr]] = {
|
||||
info(s"Starting replica leader election ($electionType) for partitions ${partitions.mkString(",")} triggerd by $electionTrigger")
|
||||
try {
|
||||
val results = partitionStateMachine.handleStateChanges(partitions.toSeq, OnlinePartition,
|
||||
Some(PreferredReplicaPartitionLeaderElectionStrategy))
|
||||
if (electionType != AdminClientTriggered) {
|
||||
results.foreach { case (tp, throwable) =>
|
||||
if (throwable.isInstanceOf[ControllerMovedException]) {
|
||||
error(s"Error completing preferred replica leader election for partition $tp because controller has moved to another broker.", throwable)
|
||||
throw throwable
|
||||
} else {
|
||||
error(s"Error completing preferred replica leader election for partition $tp", throwable)
|
||||
}
|
||||
val strategy = electionType match {
|
||||
case ElectionType.PREFERRED => PreferredReplicaPartitionLeaderElectionStrategy
|
||||
case ElectionType.UNCLEAN =>
|
||||
/* Let's be conservative and only trigger unclean election if the election type is unclean and it was
|
||||
* triggered by the admin client
|
||||
*/
|
||||
OfflinePartitionLeaderElectionStrategy(allowUnclean = electionTrigger == AdminClientTriggered)
|
||||
}
|
||||
|
||||
val results = partitionStateMachine.handleStateChanges(
|
||||
partitions.toSeq,
|
||||
OnlinePartition,
|
||||
Some(strategy)
|
||||
)
|
||||
if (electionTrigger != AdminClientTriggered) {
|
||||
results.foreach {
|
||||
case (tp, Left(throwable)) =>
|
||||
if (throwable.isInstanceOf[ControllerMovedException]) {
|
||||
info(s"Error completing replica leader election ($electionType) for partition $tp because controller has moved to another broker.", throwable)
|
||||
throw throwable
|
||||
} else {
|
||||
error(s"Error completing replica leader election ($electionType) for partition $tp", throwable)
|
||||
}
|
||||
case (_, Right(_)) => // Ignored; No need to log or throw exception for the success cases
|
||||
}
|
||||
}
|
||||
return results;
|
||||
|
||||
results
|
||||
} finally {
|
||||
if (electionType != AdminClientTriggered)
|
||||
removePartitionsFromPreferredReplicaElection(partitions, electionType == AutoTriggered)
|
||||
if (electionTrigger != AdminClientTriggered) {
|
||||
removePartitionsFromPreferredReplicaElection(partitions, electionTrigger == AutoTriggered)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -898,7 +923,7 @@ class KafkaController(val config: KafkaConfig,
|
|||
if (!isTriggeredByAutoRebalance) {
|
||||
zkClient.deletePreferredReplicaElection(controllerContext.epochZkVersion)
|
||||
// Ensure we detect future preferred replica leader elections
|
||||
eventManager.put(PreferredReplicaLeaderElection(None))
|
||||
eventManager.put(ReplicaLeaderElection(None, ElectionType.PREFERRED, ZkTriggered))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -943,16 +968,17 @@ class KafkaController(val config: KafkaConfig,
|
|||
// assigned replica list
|
||||
val newLeaderAndIsr = leaderAndIsr.newEpochAndZkVersion
|
||||
// update the new leadership decision in zookeeper or retry
|
||||
val UpdateLeaderAndIsrResult(successfulUpdates, _, failedUpdates) =
|
||||
val UpdateLeaderAndIsrResult(finishedUpdates, _) =
|
||||
zkClient.updateLeaderAndIsr(immutable.Map(partition -> newLeaderAndIsr), epoch, controllerContext.epochZkVersion)
|
||||
if (successfulUpdates.contains(partition)) {
|
||||
val finalLeaderAndIsr = successfulUpdates(partition)
|
||||
finalLeaderIsrAndControllerEpoch = Some(LeaderIsrAndControllerEpoch(finalLeaderAndIsr, epoch))
|
||||
info(s"Updated leader epoch for partition $partition to ${finalLeaderAndIsr.leaderEpoch}")
|
||||
true
|
||||
} else if (failedUpdates.contains(partition)) {
|
||||
throw failedUpdates(partition)
|
||||
} else false
|
||||
|
||||
finishedUpdates.headOption.map {
|
||||
case (partition, Right(leaderAndIsr)) =>
|
||||
finalLeaderIsrAndControllerEpoch = Some(LeaderIsrAndControllerEpoch(leaderAndIsr, epoch))
|
||||
info(s"Updated leader epoch for partition $partition to ${leaderAndIsr.leaderEpoch}")
|
||||
true
|
||||
case (partition, Left(e)) =>
|
||||
throw e
|
||||
}.getOrElse(false)
|
||||
case None =>
|
||||
throw new IllegalStateException(s"Cannot update leader epoch for partition $partition as " +
|
||||
"leaderAndIsr path is empty. This could mean we somehow tried to reassign a partition that doesn't exist")
|
||||
|
@ -992,7 +1018,7 @@ class KafkaController(val config: KafkaConfig,
|
|||
controllerContext.partitionsBeingReassigned.isEmpty &&
|
||||
!topicDeletionManager.isTopicQueuedUpForDeletion(tp.topic) &&
|
||||
controllerContext.allTopics.contains(tp.topic))
|
||||
onPreferredReplicaElection(candidatePartitions.toSet, AutoTriggered)
|
||||
onReplicaElection(candidatePartitions.toSet, ElectionType.PREFERRED, AutoTriggered)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1465,71 +1491,95 @@ class KafkaController(val config: KafkaConfig,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
def electPreferredLeaders(partitions: Set[TopicPartition], callback: ElectPreferredLeadersCallback = { (_,_) => }): Unit = {
|
||||
eventManager.put(PreferredReplicaLeaderElection(Some(partitions), AdminClientTriggered, callback))
|
||||
def electLeaders(
|
||||
partitions: Set[TopicPartition],
|
||||
electionType: ElectionType,
|
||||
callback: ElectLeadersCallback
|
||||
): Unit = {
|
||||
eventManager.put(ReplicaLeaderElection(Some(partitions), electionType, AdminClientTriggered, callback))
|
||||
}
|
||||
|
||||
private def preemptPreferredReplicaLeaderElection(partitionsFromAdminClientOpt: Option[Set[TopicPartition]], callback: ElectPreferredLeadersCallback = (_, _) =>{}): Unit = {
|
||||
callback(Map.empty, partitionsFromAdminClientOpt match {
|
||||
case Some(partitions) => partitions.map(partition => partition -> new ApiError(Errors.NOT_CONTROLLER, null)).toMap
|
||||
case None => Map.empty
|
||||
})
|
||||
private def preemptReplicaLeaderElection(
|
||||
partitionsFromAdminClientOpt: Option[Set[TopicPartition]],
|
||||
callback: ElectLeadersCallback
|
||||
): Unit = {
|
||||
callback(
|
||||
partitionsFromAdminClientOpt.fold(Map.empty[TopicPartition, Either[ApiError, Int]]) { partitions =>
|
||||
partitions.map(partition => partition -> Left(new ApiError(Errors.NOT_CONTROLLER, null)))(breakOut)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
private def processPreferredReplicaLeaderElection(partitionsFromAdminClientOpt: Option[Set[TopicPartition]],
|
||||
electionType: ElectionType = ZkTriggered,
|
||||
callback: ElectPreferredLeadersCallback = (_,_) =>{}): Unit = {
|
||||
private def processReplicaLeaderElection(
|
||||
partitionsFromAdminClientOpt: Option[Set[TopicPartition]],
|
||||
electionType: ElectionType,
|
||||
electionTrigger: ElectionTrigger,
|
||||
callback: ElectLeadersCallback
|
||||
): Unit = {
|
||||
if (!isActive) {
|
||||
callback(Map.empty, partitionsFromAdminClientOpt match {
|
||||
case Some(partitions) => partitions.map(partition => partition -> new ApiError(Errors.NOT_CONTROLLER, null)).toMap
|
||||
case None => Map.empty
|
||||
callback(partitionsFromAdminClientOpt.fold(Map.empty[TopicPartition, Either[ApiError, Int]]) { partitions =>
|
||||
partitions.map(partition => partition -> Left(new ApiError(Errors.NOT_CONTROLLER, null)))(breakOut)
|
||||
})
|
||||
} else {
|
||||
// We need to register the watcher if the path doesn't exist in order to detect future preferred replica
|
||||
// leader elections and we get the `path exists` check for free
|
||||
if (electionType == AdminClientTriggered || zkClient.registerZNodeChangeHandlerAndCheckExistence(preferredReplicaElectionHandler)) {
|
||||
if (electionTrigger == AdminClientTriggered || zkClient.registerZNodeChangeHandlerAndCheckExistence(preferredReplicaElectionHandler)) {
|
||||
val partitions = partitionsFromAdminClientOpt match {
|
||||
case Some(partitions) => partitions
|
||||
case None => zkClient.getPreferredReplicaElection
|
||||
}
|
||||
|
||||
val (validPartitions, invalidPartitions) = partitions.partition(tp => controllerContext.allPartitions.contains(tp))
|
||||
invalidPartitions.foreach { p =>
|
||||
info(s"Skipping preferred replica leader election for partition ${p} since it doesn't exist.")
|
||||
val (knownPartitions, unknownPartitions) = partitions.partition(tp => controllerContext.allPartitions.contains(tp))
|
||||
unknownPartitions.foreach { p =>
|
||||
info(s"Skipping replica leader election ($electionType) for partition $p by $electionTrigger since it doesn't exist.")
|
||||
}
|
||||
|
||||
val (partitionsBeingDeleted, livePartitions) = validPartitions.partition(partition =>
|
||||
topicDeletionManager.isTopicQueuedUpForDeletion(partition.topic))
|
||||
val (partitionsBeingDeleted, livePartitions) = knownPartitions.partition(partition =>
|
||||
topicDeletionManager.isTopicQueuedUpForDeletion(partition.topic))
|
||||
if (partitionsBeingDeleted.nonEmpty) {
|
||||
warn(s"Skipping preferred replica election for partitions $partitionsBeingDeleted " +
|
||||
s"since the respective topics are being deleted")
|
||||
}
|
||||
// partition those where preferred is already leader
|
||||
val (electablePartitions, alreadyPreferred) = livePartitions.partition { partition =>
|
||||
val assignedReplicas = controllerContext.partitionReplicaAssignment(partition)
|
||||
val preferredReplica = assignedReplicas.head
|
||||
val currentLeader = controllerContext.partitionLeadershipInfo(partition).leaderAndIsr.leader
|
||||
currentLeader != preferredReplica
|
||||
warn(s"Skipping replica leader election ($electionType) for partitions $partitionsBeingDeleted " +
|
||||
s"by $electionTrigger since the respective topics are being deleted")
|
||||
}
|
||||
|
||||
val electionErrors = onPreferredReplicaElection(electablePartitions, electionType)
|
||||
val successfulPartitions = electablePartitions -- electionErrors.keySet
|
||||
val results = electionErrors.map { case (partition, ex) =>
|
||||
val apiError = if (ex.isInstanceOf[StateChangeFailedException])
|
||||
new ApiError(Errors.PREFERRED_LEADER_NOT_AVAILABLE, ex.getMessage)
|
||||
else
|
||||
ApiError.fromThrowable(ex)
|
||||
partition -> apiError
|
||||
// partition those that have a valid leader
|
||||
val (electablePartitions, alreadyValidLeader) = livePartitions.partition { partition =>
|
||||
electionType match {
|
||||
case ElectionType.PREFERRED =>
|
||||
val assignedReplicas = controllerContext.partitionReplicaAssignment(partition)
|
||||
val preferredReplica = assignedReplicas.head
|
||||
val currentLeader = controllerContext.partitionLeadershipInfo(partition).leaderAndIsr.leader
|
||||
currentLeader != preferredReplica
|
||||
|
||||
case ElectionType.UNCLEAN =>
|
||||
val currentLeader = controllerContext.partitionLeadershipInfo(partition).leaderAndIsr.leader
|
||||
currentLeader == LeaderAndIsr.NoLeader || !controllerContext.liveBrokerIds.contains(currentLeader)
|
||||
}
|
||||
}
|
||||
|
||||
val results = onReplicaElection(electablePartitions, electionType, electionTrigger).mapValues {
|
||||
case Left(ex) =>
|
||||
if (ex.isInstanceOf[StateChangeFailedException]) {
|
||||
val error = if (electionType == ElectionType.PREFERRED) {
|
||||
Errors.PREFERRED_LEADER_NOT_AVAILABLE
|
||||
} else {
|
||||
Errors.ELIGIBLE_LEADERS_NOT_AVAILABLE
|
||||
}
|
||||
Left(new ApiError(error, ex.getMessage))
|
||||
} else {
|
||||
Left(ApiError.fromThrowable(ex))
|
||||
}
|
||||
case Right(leaderAndIsr) => Right(leaderAndIsr.leader)
|
||||
} ++
|
||||
alreadyPreferred.map(_ -> ApiError.NONE) ++
|
||||
partitionsBeingDeleted.map(_ -> new ApiError(Errors.INVALID_TOPIC_EXCEPTION, "The topic is being deleted")) ++
|
||||
invalidPartitions.map ( tp => tp -> new ApiError(Errors.UNKNOWN_TOPIC_OR_PARTITION, s"The partition does not exist.")
|
||||
)
|
||||
debug(s"PreferredReplicaLeaderElection waiting: $successfulPartitions, results: $results")
|
||||
callback(successfulPartitions.map(
|
||||
tp => tp->controllerContext.partitionReplicaAssignment(tp).head).toMap,
|
||||
results)
|
||||
alreadyValidLeader.map(_ -> Left(new ApiError(Errors.ELECTION_NOT_NEEDED))) ++
|
||||
partitionsBeingDeleted.map(
|
||||
_ -> Left(new ApiError(Errors.INVALID_TOPIC_EXCEPTION, "The topic is being deleted"))
|
||||
) ++
|
||||
unknownPartitions.map(
|
||||
_ -> Left(new ApiError(Errors.UNKNOWN_TOPIC_OR_PARTITION, "The partition does not exist."))
|
||||
)
|
||||
|
||||
debug(s"Waiting for any successful result for election type ($electionType) by $electionTrigger for partitions: $results")
|
||||
callback(results)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1557,13 +1607,15 @@ class KafkaController(val config: KafkaConfig,
|
|||
override def process(event: ControllerEvent): Unit = {
|
||||
try {
|
||||
event match {
|
||||
// Used only in test cases
|
||||
case event: MockEvent =>
|
||||
// Used only in test cases
|
||||
event.process()
|
||||
case ShutdownEventThread =>
|
||||
error("Received a ShutdownEventThread event. This type of event is supposed to be handle by ControllerEventThread")
|
||||
case AutoPreferredReplicaLeaderElection =>
|
||||
processAutoPreferredReplicaLeaderElection()
|
||||
case PreferredReplicaLeaderElection(partitions, electionType, callback) =>
|
||||
processPreferredReplicaLeaderElection(partitions, electionType, callback)
|
||||
case ReplicaLeaderElection(partitions, electionType, electionTrigger, callback) =>
|
||||
processReplicaLeaderElection(partitions, electionType, electionTrigger, callback)
|
||||
case UncleanLeaderElectionEnable =>
|
||||
processUncleanLeaderElectionEnable()
|
||||
case TopicUncleanLeaderElectionEnable(topic) =>
|
||||
|
@ -1602,8 +1654,6 @@ class KafkaController(val config: KafkaConfig,
|
|||
processIsrChangeNotification()
|
||||
case Startup =>
|
||||
processStartup()
|
||||
case ShutdownEventThread =>
|
||||
// not handled here
|
||||
}
|
||||
} catch {
|
||||
case e: ControllerMovedException =>
|
||||
|
@ -1618,8 +1668,8 @@ class KafkaController(val config: KafkaConfig,
|
|||
|
||||
override def preempt(event: ControllerEvent): Unit = {
|
||||
event match {
|
||||
case PreferredReplicaLeaderElection(partitions, _, callback) =>
|
||||
preemptPreferredReplicaLeaderElection(partitions, callback)
|
||||
case ReplicaLeaderElection(partitions, _, _, callback) =>
|
||||
preemptReplicaLeaderElection(partitions, callback)
|
||||
case ControlledShutdown(id, brokerEpoch, callback) =>
|
||||
preemptControlledShutdown(id, brokerEpoch, callback)
|
||||
case _ =>
|
||||
|
@ -1699,7 +1749,7 @@ object IsrChangeNotificationHandler {
|
|||
class PreferredReplicaElectionHandler(eventManager: ControllerEventManager) extends ZNodeChangeHandler {
|
||||
override val path: String = PreferredReplicaElectionZNode.path
|
||||
|
||||
override def handleCreation(): Unit = eventManager.put(PreferredReplicaLeaderElection(None))
|
||||
override def handleCreation(): Unit = eventManager.put(ReplicaLeaderElection(None, ElectionType.PREFERRED, ZkTriggered))
|
||||
}
|
||||
|
||||
class ControllerChangeHandler(eventManager: ControllerEventManager) extends ZNodeChangeHandler {
|
||||
|
@ -1842,9 +1892,12 @@ case object IsrChangeNotification extends ControllerEvent {
|
|||
override def state: ControllerState = ControllerState.IsrChange
|
||||
}
|
||||
|
||||
case class PreferredReplicaLeaderElection(partitionsFromAdminClientOpt: Option[Set[TopicPartition]],
|
||||
electionType: ElectionType = ZkTriggered,
|
||||
callback: ElectPreferredLeadersCallback = (_,_) => {}) extends ControllerEvent {
|
||||
case class ReplicaLeaderElection(
|
||||
partitionsFromAdminClientOpt: Option[Set[TopicPartition]],
|
||||
electionType: ElectionType,
|
||||
electionTrigger: ElectionTrigger,
|
||||
callback: ElectLeadersCallback = _ => {}
|
||||
) extends ControllerEvent {
|
||||
override def state: ControllerState = ControllerState.ManualLeaderBalance
|
||||
}
|
||||
|
||||
|
|
|
@ -21,13 +21,14 @@ import kafka.common.StateChangeFailedException
|
|||
import kafka.controller.Election._
|
||||
import kafka.server.KafkaConfig
|
||||
import kafka.utils.Logging
|
||||
import kafka.zk.KafkaZkClient
|
||||
import kafka.zk.KafkaZkClient.UpdateLeaderAndIsrResult
|
||||
import kafka.zk.{KafkaZkClient, TopicPartitionStateZNode}
|
||||
import kafka.zk.TopicPartitionStateZNode
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.apache.kafka.common.errors.ControllerMovedException
|
||||
import org.apache.zookeeper.KeeperException
|
||||
import org.apache.zookeeper.KeeperException.Code
|
||||
|
||||
import scala.collection.breakOut
|
||||
import scala.collection.mutable
|
||||
|
||||
abstract class PartitionStateMachine(controllerContext: ControllerContext) extends Logging {
|
||||
|
@ -70,7 +71,7 @@ abstract class PartitionStateMachine(controllerContext: ControllerContext) exten
|
|||
!controllerContext.isTopicQueuedUpForDeletion(partition.topic)
|
||||
}.toSeq
|
||||
|
||||
handleStateChanges(partitionsToTrigger, OnlinePartition, Some(OfflinePartitionLeaderElectionStrategy))
|
||||
handleStateChanges(partitionsToTrigger, OnlinePartition, Some(OfflinePartitionLeaderElectionStrategy(false)))
|
||||
// TODO: If handleStateChanges catches an exception, it is not enough to bail out and log an error.
|
||||
// It is important to trigger leader election for those partitions.
|
||||
}
|
||||
|
@ -96,14 +97,18 @@ abstract class PartitionStateMachine(controllerContext: ControllerContext) exten
|
|||
}
|
||||
}
|
||||
|
||||
def handleStateChanges(partitions: Seq[TopicPartition],
|
||||
targetState: PartitionState): Map[TopicPartition, Throwable] = {
|
||||
def handleStateChanges(
|
||||
partitions: Seq[TopicPartition],
|
||||
targetState: PartitionState
|
||||
): Map[TopicPartition, Either[Throwable, LeaderAndIsr]] = {
|
||||
handleStateChanges(partitions, targetState, None)
|
||||
}
|
||||
|
||||
def handleStateChanges(partitions: Seq[TopicPartition],
|
||||
targetState: PartitionState,
|
||||
leaderElectionStrategy: Option[PartitionLeaderElectionStrategy]): Map[TopicPartition, Throwable]
|
||||
def handleStateChanges(
|
||||
partitions: Seq[TopicPartition],
|
||||
targetState: PartitionState,
|
||||
leaderElectionStrategy: Option[PartitionLeaderElectionStrategy]
|
||||
): Map[TopicPartition, Either[Throwable, LeaderAndIsr]]
|
||||
|
||||
}
|
||||
|
||||
|
@ -130,31 +135,40 @@ class ZkPartitionStateMachine(config: KafkaConfig,
|
|||
this.logIdent = s"[PartitionStateMachine controllerId=$controllerId] "
|
||||
|
||||
/**
|
||||
* Try to change the state of the given partitions to the given targetState, using the given
|
||||
* partitionLeaderElectionStrategyOpt if a leader election is required.
|
||||
* @param partitions The partitions
|
||||
* @param targetState The state
|
||||
* @param partitionLeaderElectionStrategyOpt The leader election strategy if a leader election is required.
|
||||
* @return partitions and corresponding throwable for those partitions which could not transition to the given state
|
||||
*/
|
||||
override def handleStateChanges(partitions: Seq[TopicPartition], targetState: PartitionState,
|
||||
partitionLeaderElectionStrategyOpt: Option[PartitionLeaderElectionStrategy]): Map[TopicPartition, Throwable] = {
|
||||
* Try to change the state of the given partitions to the given targetState, using the given
|
||||
* partitionLeaderElectionStrategyOpt if a leader election is required.
|
||||
* @param partitions The partitions
|
||||
* @param targetState The state
|
||||
* @param partitionLeaderElectionStrategyOpt The leader election strategy if a leader election is required.
|
||||
* @return A map of failed and successful elections when targetState is OnlinePartitions. The keys are the
|
||||
* topic partitions and the corresponding values are either the exception that was thrown or new
|
||||
* leader & ISR.
|
||||
*/
|
||||
override def handleStateChanges(
|
||||
partitions: Seq[TopicPartition],
|
||||
targetState: PartitionState,
|
||||
partitionLeaderElectionStrategyOpt: Option[PartitionLeaderElectionStrategy]
|
||||
): Map[TopicPartition, Either[Throwable, LeaderAndIsr]] = {
|
||||
if (partitions.nonEmpty) {
|
||||
try {
|
||||
controllerBrokerRequestBatch.newBatch()
|
||||
val errors = doHandleStateChanges(partitions, targetState, partitionLeaderElectionStrategyOpt)
|
||||
val result = doHandleStateChanges(
|
||||
partitions,
|
||||
targetState,
|
||||
partitionLeaderElectionStrategyOpt
|
||||
)
|
||||
controllerBrokerRequestBatch.sendRequestsToBrokers(controllerContext.epoch)
|
||||
errors
|
||||
result
|
||||
} catch {
|
||||
case e: ControllerMovedException =>
|
||||
error(s"Controller moved to another broker when moving some partitions to $targetState state", e)
|
||||
throw e
|
||||
case e: Throwable =>
|
||||
error(s"Error while moving some partitions to $targetState state", e)
|
||||
partitions.map { _ -> e }.toMap
|
||||
partitions.map(_ -> Left(e))(breakOut)
|
||||
}
|
||||
} else {
|
||||
Map.empty[TopicPartition, Throwable]
|
||||
Map.empty
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -183,10 +197,15 @@ class ZkPartitionStateMachine(config: KafkaConfig,
|
|||
* --nothing other than marking the partition state as NonExistentPartition
|
||||
* @param partitions The partitions for which the state transition is invoked
|
||||
* @param targetState The end state that the partition should be moved to
|
||||
* @return A map of failed and successful elections when targetState is OnlinePartitions. The keys are the
|
||||
* topic partitions and the corresponding values are either the exception that was thrown or new
|
||||
* leader & ISR.
|
||||
*/
|
||||
private def doHandleStateChanges(partitions: Seq[TopicPartition],
|
||||
targetState: PartitionState,
|
||||
partitionLeaderElectionStrategyOpt: Option[PartitionLeaderElectionStrategy]): Map[TopicPartition, Throwable] = {
|
||||
private def doHandleStateChanges(
|
||||
partitions: Seq[TopicPartition],
|
||||
targetState: PartitionState,
|
||||
partitionLeaderElectionStrategyOpt: Option[PartitionLeaderElectionStrategy]
|
||||
): Map[TopicPartition, Either[Throwable, LeaderAndIsr]] = {
|
||||
val stateChangeLog = stateChangeLogger.withControllerEpoch(controllerContext.epoch)
|
||||
partitions.foreach(partition => controllerContext.putPartitionStateIfNotExists(partition, NonExistentPartition))
|
||||
val (validPartitions, invalidPartitions) = controllerContext.checkValidPartitionStateChange(partitions, targetState)
|
||||
|
@ -212,13 +231,23 @@ class ZkPartitionStateMachine(config: KafkaConfig,
|
|||
}
|
||||
}
|
||||
if (partitionsToElectLeader.nonEmpty) {
|
||||
val (successfulElections, failedElections) = electLeaderForPartitions(partitionsToElectLeader, partitionLeaderElectionStrategyOpt.get)
|
||||
successfulElections.foreach { partition =>
|
||||
stateChangeLog.trace(s"Changed partition $partition from ${partitionState(partition)} to $targetState with state " +
|
||||
s"${controllerContext.partitionLeadershipInfo(partition).leaderAndIsr}")
|
||||
controllerContext.putPartitionState(partition, OnlinePartition)
|
||||
val electionResults = electLeaderForPartitions(
|
||||
partitionsToElectLeader,
|
||||
partitionLeaderElectionStrategyOpt.getOrElse(
|
||||
throw new IllegalArgumentException("Election strategy is a required field when the target state is OnlinePartition")
|
||||
)
|
||||
)
|
||||
|
||||
electionResults.foreach {
|
||||
case (partition, Right(leaderAndIsr)) =>
|
||||
stateChangeLog.trace(
|
||||
s"Changed partition $partition from ${partitionState(partition)} to $targetState with state $leaderAndIsr"
|
||||
)
|
||||
controllerContext.putPartitionState(partition, OnlinePartition)
|
||||
case (_, Left(_)) => // Ignore; no need to update partition state on election error
|
||||
}
|
||||
failedElections
|
||||
|
||||
electionResults
|
||||
} else {
|
||||
Map.empty
|
||||
}
|
||||
|
@ -293,24 +322,30 @@ class ZkPartitionStateMachine(config: KafkaConfig,
|
|||
* Repeatedly attempt to elect leaders for multiple partitions until there are no more remaining partitions to retry.
|
||||
* @param partitions The partitions that we're trying to elect leaders for.
|
||||
* @param partitionLeaderElectionStrategy The election strategy to use.
|
||||
* @return A pair with first element of which is the partitions that successfully had a leader elected
|
||||
* and the second element a map of failed partition to the corresponding thrown exception.
|
||||
* @return A map of failed and successful elections. The keys are the topic partitions and the corresponding values are
|
||||
* either the exception that was thrown or new leader & ISR.
|
||||
*/
|
||||
private def electLeaderForPartitions(partitions: Seq[TopicPartition],
|
||||
partitionLeaderElectionStrategy: PartitionLeaderElectionStrategy): (Seq[TopicPartition], Map[TopicPartition, Throwable]) = {
|
||||
val successfulElections = mutable.Buffer.empty[TopicPartition]
|
||||
private def electLeaderForPartitions(
|
||||
partitions: Seq[TopicPartition],
|
||||
partitionLeaderElectionStrategy: PartitionLeaderElectionStrategy
|
||||
): Map[TopicPartition, Either[Throwable, LeaderAndIsr]] = {
|
||||
var remaining = partitions
|
||||
var failures = Map.empty[TopicPartition, Throwable]
|
||||
val finishedElections = mutable.Map.empty[TopicPartition, Either[Throwable, LeaderAndIsr]]
|
||||
|
||||
while (remaining.nonEmpty) {
|
||||
val (success, updatesToRetry, failedElections) = doElectLeaderForPartitions(partitions, partitionLeaderElectionStrategy)
|
||||
val (finished, updatesToRetry) = doElectLeaderForPartitions(remaining, partitionLeaderElectionStrategy)
|
||||
remaining = updatesToRetry
|
||||
successfulElections ++= success
|
||||
failedElections.foreach { case (partition, e) =>
|
||||
logFailedStateChange(partition, partitionState(partition), OnlinePartition, e)
|
||||
|
||||
finished.foreach {
|
||||
case (partition, Left(e)) =>
|
||||
logFailedStateChange(partition, partitionState(partition), OnlinePartition, e)
|
||||
case (_, Right(_)) => // Ignore; success so no need to log failed state change
|
||||
}
|
||||
failures ++= failedElections
|
||||
|
||||
finishedElections ++= finished
|
||||
}
|
||||
(successfulElections, failures)
|
||||
|
||||
finishedElections.toMap
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -319,21 +354,23 @@ class ZkPartitionStateMachine(config: KafkaConfig,
|
|||
*
|
||||
* @param partitions The partitions that we're trying to elect leaders for.
|
||||
* @param partitionLeaderElectionStrategy The election strategy to use.
|
||||
* @return A tuple of three values:
|
||||
* 1. The partitions that successfully had a leader elected.
|
||||
* @return A tuple of two values:
|
||||
* 1. The partitions and the expected leader and isr that successfully had a leader elected. And exceptions
|
||||
* corresponding to failed elections that should not be retried.
|
||||
* 2. The partitions that we should retry due to a zookeeper BADVERSION conflict. Version conflicts can occur if
|
||||
* the partition leader updated partition state while the controller attempted to update partition state.
|
||||
* 3. Exceptions corresponding to failed elections that should not be retried.
|
||||
*/
|
||||
private def doElectLeaderForPartitions(partitions: Seq[TopicPartition], partitionLeaderElectionStrategy: PartitionLeaderElectionStrategy):
|
||||
(Seq[TopicPartition], Seq[TopicPartition], Map[TopicPartition, Exception]) = {
|
||||
private def doElectLeaderForPartitions(
|
||||
partitions: Seq[TopicPartition],
|
||||
partitionLeaderElectionStrategy: PartitionLeaderElectionStrategy
|
||||
): (Map[TopicPartition, Either[Exception, LeaderAndIsr]], Seq[TopicPartition]) = {
|
||||
val getDataResponses = try {
|
||||
zkClient.getTopicPartitionStatesRaw(partitions)
|
||||
} catch {
|
||||
case e: Exception =>
|
||||
return (Seq.empty, Seq.empty, partitions.map(_ -> e).toMap)
|
||||
return (partitions.map(_ -> Left(e))(breakOut), Seq.empty)
|
||||
}
|
||||
val failedElections = mutable.Map.empty[TopicPartition, Exception]
|
||||
val failedElections = mutable.Map.empty[TopicPartition, Either[Exception, LeaderAndIsr]]
|
||||
val leaderIsrAndControllerEpochPerPartition = mutable.Buffer.empty[(TopicPartition, LeaderIsrAndControllerEpoch)]
|
||||
getDataResponses.foreach { getDataResponse =>
|
||||
val partition = getDataResponse.ctx.get.asInstanceOf[TopicPartition]
|
||||
|
@ -342,16 +379,17 @@ class ZkPartitionStateMachine(config: KafkaConfig,
|
|||
val leaderIsrAndControllerEpochOpt = TopicPartitionStateZNode.decode(getDataResponse.data, getDataResponse.stat)
|
||||
if (leaderIsrAndControllerEpochOpt.isEmpty) {
|
||||
val exception = new StateChangeFailedException(s"LeaderAndIsr information doesn't exist for partition $partition in $currState state")
|
||||
failedElections.put(partition, exception)
|
||||
failedElections.put(partition, Left(exception))
|
||||
}
|
||||
leaderIsrAndControllerEpochPerPartition += partition -> leaderIsrAndControllerEpochOpt.get
|
||||
} else if (getDataResponse.resultCode == Code.NONODE) {
|
||||
val exception = new StateChangeFailedException(s"LeaderAndIsr information doesn't exist for partition $partition in $currState state")
|
||||
failedElections.put(partition, exception)
|
||||
failedElections.put(partition, Left(exception))
|
||||
} else {
|
||||
failedElections.put(partition, getDataResponse.resultException.get)
|
||||
failedElections.put(partition, Left(getDataResponse.resultException.get))
|
||||
}
|
||||
}
|
||||
|
||||
val (invalidPartitionsForElection, validPartitionsForElection) = leaderIsrAndControllerEpochPerPartition.partition { case (_, leaderIsrAndControllerEpoch) =>
|
||||
leaderIsrAndControllerEpoch.controllerEpoch > controllerContext.epoch
|
||||
}
|
||||
|
@ -359,14 +397,19 @@ class ZkPartitionStateMachine(config: KafkaConfig,
|
|||
val failMsg = s"aborted leader election for partition $partition since the LeaderAndIsr path was " +
|
||||
s"already written by another controller. This probably means that the current controller $controllerId went through " +
|
||||
s"a soft failure and another controller was elected with epoch ${leaderIsrAndControllerEpoch.controllerEpoch}."
|
||||
failedElections.put(partition, new StateChangeFailedException(failMsg))
|
||||
failedElections.put(partition, Left(new StateChangeFailedException(failMsg)))
|
||||
}
|
||||
|
||||
if (validPartitionsForElection.isEmpty) {
|
||||
return (Seq.empty, Seq.empty, failedElections.toMap)
|
||||
return (failedElections.toMap, Seq.empty)
|
||||
}
|
||||
|
||||
val (partitionsWithoutLeaders, partitionsWithLeaders) = partitionLeaderElectionStrategy match {
|
||||
case OfflinePartitionLeaderElectionStrategy =>
|
||||
val partitionsWithUncleanLeaderElectionState = collectUncleanLeaderElectionState(validPartitionsForElection)
|
||||
case OfflinePartitionLeaderElectionStrategy(allowUnclean) =>
|
||||
val partitionsWithUncleanLeaderElectionState = collectUncleanLeaderElectionState(
|
||||
validPartitionsForElection,
|
||||
allowUnclean
|
||||
)
|
||||
leaderForOffline(controllerContext, partitionsWithUncleanLeaderElectionState).partition(_.leaderAndIsr.isEmpty)
|
||||
case ReassignPartitionLeaderElectionStrategy =>
|
||||
leaderForReassign(controllerContext, validPartitionsForElection).partition(_.leaderAndIsr.isEmpty)
|
||||
|
@ -378,37 +421,79 @@ class ZkPartitionStateMachine(config: KafkaConfig,
|
|||
partitionsWithoutLeaders.foreach { electionResult =>
|
||||
val partition = electionResult.topicPartition
|
||||
val failMsg = s"Failed to elect leader for partition $partition under strategy $partitionLeaderElectionStrategy"
|
||||
failedElections.put(partition, new StateChangeFailedException(failMsg))
|
||||
failedElections.put(partition, Left(new StateChangeFailedException(failMsg)))
|
||||
}
|
||||
val recipientsPerPartition = partitionsWithLeaders.map(result => result.topicPartition -> result.liveReplicas).toMap
|
||||
val adjustedLeaderAndIsrs = partitionsWithLeaders.map(result => result.topicPartition -> result.leaderAndIsr.get).toMap
|
||||
val UpdateLeaderAndIsrResult(successfulUpdates, updatesToRetry, failedUpdates) = zkClient.updateLeaderAndIsr(
|
||||
val UpdateLeaderAndIsrResult(finishedUpdates, updatesToRetry) = zkClient.updateLeaderAndIsr(
|
||||
adjustedLeaderAndIsrs, controllerContext.epoch, controllerContext.epochZkVersion)
|
||||
successfulUpdates.foreach { case (partition, leaderAndIsr) =>
|
||||
val replicas = controllerContext.partitionReplicaAssignment(partition)
|
||||
val leaderIsrAndControllerEpoch = LeaderIsrAndControllerEpoch(leaderAndIsr, controllerContext.epoch)
|
||||
controllerContext.partitionLeadershipInfo.put(partition, leaderIsrAndControllerEpoch)
|
||||
controllerBrokerRequestBatch.addLeaderAndIsrRequestForBrokers(recipientsPerPartition(partition), partition,
|
||||
leaderIsrAndControllerEpoch, replicas, isNew = false)
|
||||
finishedUpdates.foreach { case (partition, result) =>
|
||||
result.right.foreach { leaderAndIsr =>
|
||||
val replicas = controllerContext.partitionReplicaAssignment(partition)
|
||||
val leaderIsrAndControllerEpoch = LeaderIsrAndControllerEpoch(leaderAndIsr, controllerContext.epoch)
|
||||
controllerContext.partitionLeadershipInfo.put(partition, leaderIsrAndControllerEpoch)
|
||||
controllerBrokerRequestBatch.addLeaderAndIsrRequestForBrokers(recipientsPerPartition(partition), partition,
|
||||
leaderIsrAndControllerEpoch, replicas, isNew = false)
|
||||
}
|
||||
}
|
||||
(successfulUpdates.keys.toSeq, updatesToRetry, failedElections.toMap ++ failedUpdates)
|
||||
|
||||
(finishedUpdates ++ failedElections, updatesToRetry)
|
||||
}
|
||||
|
||||
private def collectUncleanLeaderElectionState(leaderIsrAndControllerEpochs: Seq[(TopicPartition, LeaderIsrAndControllerEpoch)]):
|
||||
Seq[(TopicPartition, Option[LeaderIsrAndControllerEpoch], Boolean)] = {
|
||||
val (partitionsWithNoLiveInSyncReplicas, partitionsWithLiveInSyncReplicas) = leaderIsrAndControllerEpochs.partition { case (partition, leaderIsrAndControllerEpoch) =>
|
||||
val liveInSyncReplicas = leaderIsrAndControllerEpoch.leaderAndIsr.isr.filter(replica => controllerContext.isReplicaOnline(replica, partition))
|
||||
liveInSyncReplicas.isEmpty
|
||||
/* For the provided set of topic partition and partition sync state it attempts to determine if unclean
|
||||
* leader election should be performed. Unclean election should be performed if there are no live
|
||||
* replica which are in sync and unclean leader election is allowed (allowUnclean parameter is true or
|
||||
* the topic has been configured to allow unclean election).
|
||||
*
|
||||
* @param leaderIsrAndControllerEpochs set of partition to determine if unclean leader election should be
|
||||
* allowed
|
||||
* @param allowUnclean whether to allow unclean election without having to read the topic configuration
|
||||
* @return a sequence of three element tuple:
|
||||
* 1. topic partition
|
||||
* 2. leader, isr and controller epoc. Some means election should be performed
|
||||
* 3. allow unclean
|
||||
*/
|
||||
private def collectUncleanLeaderElectionState(
|
||||
leaderIsrAndControllerEpochs: Seq[(TopicPartition, LeaderIsrAndControllerEpoch)],
|
||||
allowUnclean: Boolean
|
||||
): Seq[(TopicPartition, Option[LeaderIsrAndControllerEpoch], Boolean)] = {
|
||||
val (partitionsWithNoLiveInSyncReplicas, partitionsWithLiveInSyncReplicas) = leaderIsrAndControllerEpochs.partition {
|
||||
case (partition, leaderIsrAndControllerEpoch) =>
|
||||
val liveInSyncReplicas = leaderIsrAndControllerEpoch
|
||||
.leaderAndIsr
|
||||
.isr
|
||||
.filter(replica => controllerContext.isReplicaOnline(replica, partition))
|
||||
liveInSyncReplicas.isEmpty
|
||||
}
|
||||
val (logConfigs, failed) = zkClient.getLogConfigs(partitionsWithNoLiveInSyncReplicas.map { case (partition, _) => partition.topic }, config.originals())
|
||||
partitionsWithNoLiveInSyncReplicas.map { case (partition, leaderIsrAndControllerEpoch) =>
|
||||
if (failed.contains(partition.topic)) {
|
||||
logFailedStateChange(partition, partitionState(partition), OnlinePartition, failed(partition.topic))
|
||||
(partition, None, false)
|
||||
} else {
|
||||
(partition, Option(leaderIsrAndControllerEpoch), logConfigs(partition.topic).uncleanLeaderElectionEnable.booleanValue())
|
||||
|
||||
val electionForPartitionWithoutLiveReplicas = if (allowUnclean) {
|
||||
partitionsWithNoLiveInSyncReplicas.map { case (partition, leaderIsrAndControllerEpoch) =>
|
||||
(partition, Option(leaderIsrAndControllerEpoch), true)
|
||||
}
|
||||
} ++ partitionsWithLiveInSyncReplicas.map { case (partition, leaderIsrAndControllerEpoch) => (partition, Option(leaderIsrAndControllerEpoch), false) }
|
||||
} else {
|
||||
val (logConfigs, failed) = zkClient.getLogConfigs(
|
||||
partitionsWithNoLiveInSyncReplicas.map { case (partition, _) => partition.topic }(breakOut),
|
||||
config.originals()
|
||||
)
|
||||
|
||||
partitionsWithNoLiveInSyncReplicas.map { case (partition, leaderIsrAndControllerEpoch) =>
|
||||
if (failed.contains(partition.topic)) {
|
||||
logFailedStateChange(partition, partitionState(partition), OnlinePartition, failed(partition.topic))
|
||||
(partition, None, false)
|
||||
} else {
|
||||
(
|
||||
partition,
|
||||
Option(leaderIsrAndControllerEpoch),
|
||||
logConfigs(partition.topic).uncleanLeaderElectionEnable.booleanValue()
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
electionForPartitionWithoutLiveReplicas ++
|
||||
partitionsWithLiveInSyncReplicas.map { case (partition, leaderIsrAndControllerEpoch) =>
|
||||
(partition, Option(leaderIsrAndControllerEpoch), false)
|
||||
}
|
||||
}
|
||||
|
||||
private def logInvalidTransition(partition: TopicPartition, targetState: PartitionState): Unit = {
|
||||
|
@ -458,10 +543,10 @@ object PartitionLeaderElectionAlgorithms {
|
|||
}
|
||||
|
||||
sealed trait PartitionLeaderElectionStrategy
|
||||
case object OfflinePartitionLeaderElectionStrategy extends PartitionLeaderElectionStrategy
|
||||
case object ReassignPartitionLeaderElectionStrategy extends PartitionLeaderElectionStrategy
|
||||
case object PreferredReplicaPartitionLeaderElectionStrategy extends PartitionLeaderElectionStrategy
|
||||
case object ControlledShutdownPartitionLeaderElectionStrategy extends PartitionLeaderElectionStrategy
|
||||
final case class OfflinePartitionLeaderElectionStrategy(allowUnclean: Boolean) extends PartitionLeaderElectionStrategy
|
||||
final case object ReassignPartitionLeaderElectionStrategy extends PartitionLeaderElectionStrategy
|
||||
final case object PreferredReplicaPartitionLeaderElectionStrategy extends PartitionLeaderElectionStrategy
|
||||
final case object ControlledShutdownPartitionLeaderElectionStrategy extends PartitionLeaderElectionStrategy
|
||||
|
||||
sealed trait PartitionState {
|
||||
def state: Byte
|
||||
|
|
|
@ -20,12 +20,13 @@ import kafka.api.LeaderAndIsr
|
|||
import kafka.common.StateChangeFailedException
|
||||
import kafka.server.KafkaConfig
|
||||
import kafka.utils.Logging
|
||||
import kafka.zk.{KafkaZkClient, TopicPartitionStateZNode}
|
||||
import kafka.zk.KafkaZkClient
|
||||
import kafka.zk.KafkaZkClient.UpdateLeaderAndIsrResult
|
||||
import kafka.zk.TopicPartitionStateZNode
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.apache.kafka.common.errors.ControllerMovedException
|
||||
import org.apache.zookeeper.KeeperException.Code
|
||||
|
||||
import scala.collection.breakOut
|
||||
import scala.collection.mutable
|
||||
|
||||
abstract class ReplicaStateMachine(controllerContext: ControllerContext) extends Logging {
|
||||
|
@ -274,18 +275,23 @@ class ZkReplicaStateMachine(config: KafkaConfig,
|
|||
* @param partitions The partitions from which we're trying to remove the replica from isr
|
||||
* @return The updated LeaderIsrAndControllerEpochs of all partitions for which we successfully removed the replica from isr.
|
||||
*/
|
||||
private def removeReplicasFromIsr(replicaId: Int, partitions: Seq[TopicPartition]):
|
||||
Map[TopicPartition, LeaderIsrAndControllerEpoch] = {
|
||||
private def removeReplicasFromIsr(
|
||||
replicaId: Int,
|
||||
partitions: Seq[TopicPartition]
|
||||
): Map[TopicPartition, LeaderIsrAndControllerEpoch] = {
|
||||
var results = Map.empty[TopicPartition, LeaderIsrAndControllerEpoch]
|
||||
var remaining = partitions
|
||||
while (remaining.nonEmpty) {
|
||||
val (successfulRemovals, removalsToRetry, failedRemovals) = doRemoveReplicasFromIsr(replicaId, remaining)
|
||||
results ++= successfulRemovals
|
||||
val (finishedRemoval, removalsToRetry) = doRemoveReplicasFromIsr(replicaId, remaining)
|
||||
remaining = removalsToRetry
|
||||
failedRemovals.foreach { case (partition, e) =>
|
||||
val replica = PartitionAndReplica(partition, replicaId)
|
||||
val currentState = controllerContext.replicaState(replica)
|
||||
logFailedStateChange(replica, currentState, OfflineReplica, e)
|
||||
|
||||
finishedRemoval.foreach {
|
||||
case (partition, Left(e)) =>
|
||||
val replica = PartitionAndReplica(partition, replicaId)
|
||||
val currentState = controllerContext.replicaState(replica)
|
||||
logFailedStateChange(replica, currentState, OfflineReplica, e)
|
||||
case (partition, Right(leaderIsrAndEpoch)) =>
|
||||
results += partition -> leaderIsrAndEpoch
|
||||
}
|
||||
}
|
||||
results
|
||||
|
@ -297,86 +303,117 @@ class ZkReplicaStateMachine(config: KafkaConfig,
|
|||
*
|
||||
* @param replicaId The replica being removed from isr of multiple partitions
|
||||
* @param partitions The partitions from which we're trying to remove the replica from isr
|
||||
* @return A tuple of three values:
|
||||
* 1. The updated LeaderIsrAndControllerEpochs of all partitions for which we successfully removed the replica from isr.
|
||||
* @return A tuple of two elements:
|
||||
* 1. The updated Right[LeaderIsrAndControllerEpochs] of all partitions for which we successfully
|
||||
* removed the replica from isr. Or Left[Exception] corresponding to failed removals that should
|
||||
* not be retried
|
||||
* 2. The partitions that we should retry due to a zookeeper BADVERSION conflict. Version conflicts can occur if
|
||||
* the partition leader updated partition state while the controller attempted to update partition state.
|
||||
* 3. Exceptions corresponding to failed removals that should not be retried.
|
||||
*/
|
||||
private def doRemoveReplicasFromIsr(replicaId: Int, partitions: Seq[TopicPartition]):
|
||||
(Map[TopicPartition, LeaderIsrAndControllerEpoch],
|
||||
Seq[TopicPartition],
|
||||
Map[TopicPartition, Exception]) = {
|
||||
val (leaderAndIsrs, partitionsWithNoLeaderAndIsrInZk, failedStateReads) = getTopicPartitionStatesFromZk(partitions)
|
||||
val (leaderAndIsrsWithReplica, leaderAndIsrsWithoutReplica) = leaderAndIsrs.partition { case (_, leaderAndIsr) => leaderAndIsr.isr.contains(replicaId) }
|
||||
val adjustedLeaderAndIsrs = leaderAndIsrsWithReplica.mapValues { leaderAndIsr =>
|
||||
val newLeader = if (replicaId == leaderAndIsr.leader) LeaderAndIsr.NoLeader else leaderAndIsr.leader
|
||||
val adjustedIsr = if (leaderAndIsr.isr.size == 1) leaderAndIsr.isr else leaderAndIsr.isr.filter(_ != replicaId)
|
||||
leaderAndIsr.newLeaderAndIsr(newLeader, adjustedIsr)
|
||||
private def doRemoveReplicasFromIsr(
|
||||
replicaId: Int,
|
||||
partitions: Seq[TopicPartition]
|
||||
): (Map[TopicPartition, Either[Exception, LeaderIsrAndControllerEpoch]], Seq[TopicPartition]) = {
|
||||
val (leaderAndIsrs, partitionsWithNoLeaderAndIsrInZk) = getTopicPartitionStatesFromZk(partitions)
|
||||
val (leaderAndIsrsWithReplica, leaderAndIsrsWithoutReplica) = leaderAndIsrs.partition { case (_, result) =>
|
||||
result.right.map { leaderAndIsr =>
|
||||
leaderAndIsr.isr.contains(replicaId)
|
||||
}.right.getOrElse(false)
|
||||
}
|
||||
val UpdateLeaderAndIsrResult(successfulUpdates, updatesToRetry, failedUpdates) = zkClient.updateLeaderAndIsr(
|
||||
adjustedLeaderAndIsrs, controllerContext.epoch, controllerContext.epochZkVersion)
|
||||
val exceptionsForPartitionsWithNoLeaderAndIsrInZk = partitionsWithNoLeaderAndIsrInZk.flatMap { partition =>
|
||||
if (!controllerContext.isTopicQueuedUpForDeletion(partition.topic)) {
|
||||
val exception = new StateChangeFailedException(s"Failed to change state of replica $replicaId for partition $partition since the leader and isr path in zookeeper is empty")
|
||||
Option(partition -> exception)
|
||||
} else None
|
||||
}.toMap
|
||||
val leaderIsrAndControllerEpochs = (leaderAndIsrsWithoutReplica ++ successfulUpdates).map { case (partition, leaderAndIsr) =>
|
||||
val leaderIsrAndControllerEpoch = LeaderIsrAndControllerEpoch(leaderAndIsr, controllerContext.epoch)
|
||||
controllerContext.partitionLeadershipInfo.put(partition, leaderIsrAndControllerEpoch)
|
||||
partition -> leaderIsrAndControllerEpoch
|
||||
|
||||
val adjustedLeaderAndIsrs: Map[TopicPartition, LeaderAndIsr] = leaderAndIsrsWithReplica.flatMap {
|
||||
case (partition, result) =>
|
||||
result.right.toOption.map { leaderAndIsr =>
|
||||
val newLeader = if (replicaId == leaderAndIsr.leader) LeaderAndIsr.NoLeader else leaderAndIsr.leader
|
||||
val adjustedIsr = if (leaderAndIsr.isr.size == 1) leaderAndIsr.isr else leaderAndIsr.isr.filter(_ != replicaId)
|
||||
partition -> leaderAndIsr.newLeaderAndIsr(newLeader, adjustedIsr)
|
||||
}
|
||||
}
|
||||
(leaderIsrAndControllerEpochs, updatesToRetry, failedStateReads ++ exceptionsForPartitionsWithNoLeaderAndIsrInZk ++ failedUpdates)
|
||||
|
||||
val UpdateLeaderAndIsrResult(finishedPartitions, updatesToRetry) = zkClient.updateLeaderAndIsr(
|
||||
adjustedLeaderAndIsrs,
|
||||
controllerContext.epoch,
|
||||
controllerContext.epochZkVersion
|
||||
)
|
||||
|
||||
val exceptionsForPartitionsWithNoLeaderAndIsrInZk: Map[TopicPartition, Either[Exception, LeaderIsrAndControllerEpoch]] =
|
||||
partitionsWithNoLeaderAndIsrInZk.flatMap { partition =>
|
||||
if (!controllerContext.isTopicQueuedUpForDeletion(partition.topic)) {
|
||||
val exception = new StateChangeFailedException(
|
||||
s"Failed to change state of replica $replicaId for partition $partition since the leader and isr " +
|
||||
"path in zookeeper is empty"
|
||||
)
|
||||
Option(partition -> Left(exception))
|
||||
} else None
|
||||
}(breakOut)
|
||||
|
||||
val leaderIsrAndControllerEpochs: Map[TopicPartition, Either[Exception, LeaderIsrAndControllerEpoch]] =
|
||||
(leaderAndIsrsWithoutReplica ++ finishedPartitions).map { case (partition, result: Either[Exception, LeaderAndIsr]) =>
|
||||
(
|
||||
partition,
|
||||
result.right.map { leaderAndIsr =>
|
||||
val leaderIsrAndControllerEpoch = LeaderIsrAndControllerEpoch(leaderAndIsr, controllerContext.epoch)
|
||||
controllerContext.partitionLeadershipInfo.put(partition, leaderIsrAndControllerEpoch)
|
||||
leaderIsrAndControllerEpoch
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
(
|
||||
leaderIsrAndControllerEpochs ++ exceptionsForPartitionsWithNoLeaderAndIsrInZk,
|
||||
updatesToRetry
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the partition state from zookeeper
|
||||
* @param partitions the partitions whose state we want from zookeeper
|
||||
* @return A tuple of three values:
|
||||
* 1. The LeaderAndIsrs of partitions whose state we successfully read from zookeeper
|
||||
* @return A tuple of two values:
|
||||
* 1. The Right(LeaderAndIsrs) of partitions whose state we successfully read from zookeeper.
|
||||
* The Left(Exception) to failed zookeeper lookups or states whose controller epoch exceeds our current epoch
|
||||
* 2. The partitions that had no leader and isr state in zookeeper. This happens if the controller
|
||||
* didn't finish partition initialization.
|
||||
* 3. Exceptions corresponding to failed zookeeper lookups or states whose controller epoch exceeds our current epoch.
|
||||
*/
|
||||
private def getTopicPartitionStatesFromZk(partitions: Seq[TopicPartition]):
|
||||
(Map[TopicPartition, LeaderAndIsr],
|
||||
Seq[TopicPartition],
|
||||
Map[TopicPartition, Exception]) = {
|
||||
val leaderAndIsrs = mutable.Map.empty[TopicPartition, LeaderAndIsr]
|
||||
val partitionsWithNoLeaderAndIsrInZk = mutable.Buffer.empty[TopicPartition]
|
||||
val failed = mutable.Map.empty[TopicPartition, Exception]
|
||||
private def getTopicPartitionStatesFromZk(
|
||||
partitions: Seq[TopicPartition]
|
||||
): (Map[TopicPartition, Either[Exception, LeaderAndIsr]], Seq[TopicPartition]) = {
|
||||
val getDataResponses = try {
|
||||
zkClient.getTopicPartitionStatesRaw(partitions)
|
||||
} catch {
|
||||
case e: Exception =>
|
||||
partitions.foreach(partition => failed.put(partition, e))
|
||||
return (leaderAndIsrs.toMap, partitionsWithNoLeaderAndIsrInZk, failed.toMap)
|
||||
return (partitions.map(_ -> Left(e))(breakOut), Seq.empty)
|
||||
}
|
||||
|
||||
val partitionsWithNoLeaderAndIsrInZk = mutable.Buffer.empty[TopicPartition]
|
||||
val result = mutable.Map.empty[TopicPartition, Either[Exception, LeaderAndIsr]]
|
||||
|
||||
getDataResponses.foreach { getDataResponse =>
|
||||
val partition = getDataResponse.ctx.get.asInstanceOf[TopicPartition]
|
||||
if (getDataResponse.resultCode == Code.OK) {
|
||||
val leaderIsrAndControllerEpochOpt = TopicPartitionStateZNode.decode(getDataResponse.data, getDataResponse.stat)
|
||||
if (leaderIsrAndControllerEpochOpt.isEmpty) {
|
||||
partitionsWithNoLeaderAndIsrInZk += partition
|
||||
} else {
|
||||
val leaderIsrAndControllerEpoch = leaderIsrAndControllerEpochOpt.get
|
||||
if (leaderIsrAndControllerEpoch.controllerEpoch > controllerContext.epoch) {
|
||||
val exception = new StateChangeFailedException("Leader and isr path written by another controller. This probably" +
|
||||
s"means the current controller with epoch ${controllerContext.epoch} went through a soft failure and another " +
|
||||
s"controller was elected with epoch ${leaderIsrAndControllerEpoch.controllerEpoch}. Aborting state change by this controller")
|
||||
failed.put(partition, exception)
|
||||
} else {
|
||||
leaderAndIsrs.put(partition, leaderIsrAndControllerEpoch.leaderAndIsr)
|
||||
}
|
||||
val _: Unit = if (getDataResponse.resultCode == Code.OK) {
|
||||
TopicPartitionStateZNode.decode(getDataResponse.data, getDataResponse.stat) match {
|
||||
case None =>
|
||||
partitionsWithNoLeaderAndIsrInZk += partition
|
||||
case Some(leaderIsrAndControllerEpoch) =>
|
||||
if (leaderIsrAndControllerEpoch.controllerEpoch > controllerContext.epoch) {
|
||||
val exception = new StateChangeFailedException(
|
||||
"Leader and isr path written by another controller. This probably " +
|
||||
s"means the current controller with epoch ${controllerContext.epoch} went through a soft failure and " +
|
||||
s"another controller was elected with epoch ${leaderIsrAndControllerEpoch.controllerEpoch}. Aborting " +
|
||||
"state change by this controller"
|
||||
)
|
||||
result += (partition -> Left(exception))
|
||||
} else {
|
||||
result += (partition -> Right(leaderIsrAndControllerEpoch.leaderAndIsr))
|
||||
}
|
||||
}
|
||||
} else if (getDataResponse.resultCode == Code.NONODE) {
|
||||
partitionsWithNoLeaderAndIsrInZk += partition
|
||||
} else {
|
||||
failed.put(partition, getDataResponse.resultException.get)
|
||||
result += (partition -> Left(getDataResponse.resultException.get))
|
||||
}
|
||||
}
|
||||
(leaderAndIsrs.toMap, partitionsWithNoLeaderAndIsrInZk, failed.toMap)
|
||||
|
||||
(result.toMap, partitionsWithNoLeaderAndIsrInZk)
|
||||
}
|
||||
|
||||
private def logSuccessfulTransition(replicaId: Int, partition: TopicPartition, currState: ReplicaState, targetState: ReplicaState): Unit = {
|
||||
|
|
|
@ -999,7 +999,10 @@ object LogManager {
|
|||
val defaultLogConfig = LogConfig(defaultProps)
|
||||
|
||||
// read the log configurations from zookeeper
|
||||
val (topicConfigs, failed) = zkClient.getLogConfigs(zkClient.getAllTopicsInCluster, defaultProps)
|
||||
val (topicConfigs, failed) = zkClient.getLogConfigs(
|
||||
zkClient.getAllTopicsInCluster,
|
||||
defaultProps
|
||||
)
|
||||
if (!failed.isEmpty) throw failed.head._2
|
||||
|
||||
val cleanerConfig = LogCleaner.cleanerConfig(config)
|
||||
|
|
|
@ -23,15 +23,16 @@ import org.apache.kafka.common.requests.ApiError
|
|||
|
||||
import scala.collection.{Map, mutable}
|
||||
|
||||
/** A delayed elect preferred leader operation that can be created by the replica manager and watched
|
||||
* in the elect preferred leader purgatory
|
||||
/** A delayed elect leader operation that can be created by the replica manager and watched
|
||||
* in the elect leader purgatory
|
||||
*/
|
||||
class DelayedElectPreferredLeader(delayMs: Long,
|
||||
expectedLeaders: Map[TopicPartition, Int],
|
||||
results: Map[TopicPartition, ApiError],
|
||||
replicaManager: ReplicaManager,
|
||||
responseCallback: Map[TopicPartition, ApiError] => Unit)
|
||||
extends DelayedOperation(delayMs) {
|
||||
class DelayedElectLeader(
|
||||
delayMs: Long,
|
||||
expectedLeaders: Map[TopicPartition, Int],
|
||||
results: Map[TopicPartition, ApiError],
|
||||
replicaManager: ReplicaManager,
|
||||
responseCallback: Map[TopicPartition, ApiError] => Unit
|
||||
) extends DelayedOperation(delayMs) {
|
||||
|
||||
var waitingPartitions = expectedLeaders
|
||||
val fullResults = results.to[mutable.Set]
|
|
@ -33,11 +33,16 @@ object DelayedOperationKey {
|
|||
/* used by delayed-produce and delayed-fetch operations */
|
||||
case class TopicPartitionOperationKey(topic: String, partition: Int) extends DelayedOperationKey {
|
||||
|
||||
def this(topicPartition: TopicPartition) = this(topicPartition.topic, topicPartition.partition)
|
||||
|
||||
override def keyLabel = "%s-%d".format(topic, partition)
|
||||
}
|
||||
|
||||
object TopicPartitionOperationKey {
|
||||
def apply(topicPartition: TopicPartition): TopicPartitionOperationKey = {
|
||||
apply(topicPartition.topic, topicPartition.partition)
|
||||
}
|
||||
}
|
||||
|
||||
/* used by delayed-join-group operations */
|
||||
case class MemberKey(groupId: String, consumerId: String) extends DelayedOperationKey {
|
||||
|
||||
|
|
|
@ -17,15 +17,16 @@
|
|||
|
||||
package kafka.server
|
||||
|
||||
import java.lang.{Long => JLong}
|
||||
import java.lang.{Byte => JByte}
|
||||
import java.lang.{Long => JLong}
|
||||
import java.nio.ByteBuffer
|
||||
import java.util
|
||||
import java.util.Optional
|
||||
import java.util.concurrent.ConcurrentHashMap
|
||||
import java.util.concurrent.atomic.AtomicInteger
|
||||
import java.util.{Collections, Optional, Properties}
|
||||
|
||||
import kafka.admin.{AdminUtils, RackAwareMode}
|
||||
import kafka.api.ElectLeadersRequestOps
|
||||
import kafka.api.{ApiVersion, KAFKA_0_11_0_IV0, KAFKA_2_3_IV0}
|
||||
import kafka.cluster.Partition
|
||||
import kafka.common.OffsetAndMetadata
|
||||
|
@ -47,9 +48,23 @@ import org.apache.kafka.common.errors._
|
|||
import org.apache.kafka.common.internals.FatalExitError
|
||||
import org.apache.kafka.common.internals.Topic.{GROUP_METADATA_TOPIC_NAME, TRANSACTION_STATE_TOPIC_NAME, isInternal}
|
||||
import org.apache.kafka.common.message.CreateTopicsRequestData.CreatableTopic
|
||||
import org.apache.kafka.common.message.CreateTopicsResponseData
|
||||
import org.apache.kafka.common.message.CreateTopicsResponseData.{CreatableTopicResult, CreatableTopicResultCollection}
|
||||
import org.apache.kafka.common.message._
|
||||
import org.apache.kafka.common.message.DeleteTopicsResponseData
|
||||
import org.apache.kafka.common.message.DeleteTopicsResponseData.{DeletableTopicResult, DeletableTopicResultCollection}
|
||||
import org.apache.kafka.common.message.DescribeGroupsResponseData
|
||||
import org.apache.kafka.common.message.ElectLeadersResponseData.PartitionResult;
|
||||
import org.apache.kafka.common.message.ElectLeadersResponseData.ReplicaElectionResult;
|
||||
import org.apache.kafka.common.message.FindCoordinatorResponseData
|
||||
import org.apache.kafka.common.message.HeartbeatResponseData
|
||||
import org.apache.kafka.common.message.InitProducerIdResponseData
|
||||
import org.apache.kafka.common.message.JoinGroupResponseData
|
||||
import org.apache.kafka.common.message.LeaveGroupResponseData
|
||||
import org.apache.kafka.common.message.OffsetCommitRequestData
|
||||
import org.apache.kafka.common.message.OffsetCommitResponseData
|
||||
import org.apache.kafka.common.message.SaslAuthenticateResponseData
|
||||
import org.apache.kafka.common.message.SaslHandshakeResponseData
|
||||
import org.apache.kafka.common.message.SyncGroupResponseData
|
||||
import org.apache.kafka.common.metrics.Metrics
|
||||
import org.apache.kafka.common.network.{ListenerName, Send}
|
||||
import org.apache.kafka.common.protocol.{ApiKeys, Errors}
|
||||
|
@ -153,7 +168,7 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
case ApiKeys.EXPIRE_DELEGATION_TOKEN => handleExpireTokenRequest(request)
|
||||
case ApiKeys.DESCRIBE_DELEGATION_TOKEN => handleDescribeTokensRequest(request)
|
||||
case ApiKeys.DELETE_GROUPS => handleDeleteGroupsRequest(request)
|
||||
case ApiKeys.ELECT_PREFERRED_LEADERS => handleElectPreferredReplicaLeader(request)
|
||||
case ApiKeys.ELECT_LEADERS => handleElectReplicaLeader(request)
|
||||
case ApiKeys.INCREMENTAL_ALTER_CONFIGS => handleIncrementalAlterConfigsRequest(request)
|
||||
}
|
||||
} catch {
|
||||
|
@ -264,7 +279,7 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
}
|
||||
if (replicaManager.hasDelayedElectionOperations) {
|
||||
updateMetadataRequest.partitionStates.asScala.foreach { case (tp, _) =>
|
||||
replicaManager.tryCompleteElection(new TopicPartitionOperationKey(tp))
|
||||
replicaManager.tryCompleteElection(TopicPartitionOperationKey(tp))
|
||||
}
|
||||
}
|
||||
sendResponseExemptThrottle(request, new UpdateMetadataResponse(Errors.NONE))
|
||||
|
@ -330,7 +345,7 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
// Only enable static membership when IBP >= 2.3, because it is not safe for the broker to use the static member logic
|
||||
// until we are sure that all brokers support it. If static group being loaded by an older coordinator, it will discard
|
||||
// the group.instance.id field, so static members could accidentally become "dynamic", which leads to wrong states.
|
||||
var errorMap = new mutable.HashMap[TopicPartition, Errors]
|
||||
val errorMap = new mutable.HashMap[TopicPartition, Errors]
|
||||
for (topicData <- offsetCommitRequest.data().topics().asScala) {
|
||||
for (partitionData <- topicData.partitions().asScala) {
|
||||
val topicPartition = new TopicPartition(topicData.name(), partitionData.partitionIndex())
|
||||
|
@ -936,20 +951,20 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
private def createTopic(topic: String,
|
||||
numPartitions: Int,
|
||||
replicationFactor: Int,
|
||||
properties: Properties = new Properties()): MetadataResponse.TopicMetadata = {
|
||||
properties: util.Properties = new util.Properties()): MetadataResponse.TopicMetadata = {
|
||||
try {
|
||||
adminZkClient.createTopic(topic, numPartitions, replicationFactor, properties, RackAwareMode.Safe)
|
||||
info("Auto creation of topic %s with %d partitions and replication factor %d is successful"
|
||||
.format(topic, numPartitions, replicationFactor))
|
||||
new MetadataResponse.TopicMetadata(Errors.LEADER_NOT_AVAILABLE, topic, isInternal(topic),
|
||||
java.util.Collections.emptyList())
|
||||
util.Collections.emptyList())
|
||||
} catch {
|
||||
case _: TopicExistsException => // let it go, possibly another broker created this topic
|
||||
new MetadataResponse.TopicMetadata(Errors.LEADER_NOT_AVAILABLE, topic, isInternal(topic),
|
||||
java.util.Collections.emptyList())
|
||||
util.Collections.emptyList())
|
||||
case ex: Throwable => // Catch all to prevent unhandled errors
|
||||
new MetadataResponse.TopicMetadata(Errors.forException(ex), topic, isInternal(topic),
|
||||
java.util.Collections.emptyList())
|
||||
util.Collections.emptyList())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -966,7 +981,7 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
s"'${config.offsetsTopicReplicationFactor}' for the offsets topic (configured via " +
|
||||
s"'${KafkaConfig.OffsetsTopicReplicationFactorProp}'). This error can be ignored if the cluster is starting up " +
|
||||
s"and not all brokers are up yet.")
|
||||
new MetadataResponse.TopicMetadata(Errors.COORDINATOR_NOT_AVAILABLE, topic, true, java.util.Collections.emptyList())
|
||||
new MetadataResponse.TopicMetadata(Errors.COORDINATOR_NOT_AVAILABLE, topic, true, util.Collections.emptyList())
|
||||
} else {
|
||||
createTopic(topic, config.offsetsTopicPartitions, config.offsetsTopicReplicationFactor.toInt,
|
||||
groupCoordinator.offsetsTopicConfigs)
|
||||
|
@ -977,7 +992,7 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
s"'${config.transactionTopicReplicationFactor}' for the transactions state topic (configured via " +
|
||||
s"'${KafkaConfig.TransactionsTopicReplicationFactorProp}'). This error can be ignored if the cluster is starting up " +
|
||||
s"and not all brokers are up yet.")
|
||||
new MetadataResponse.TopicMetadata(Errors.COORDINATOR_NOT_AVAILABLE, topic, true, java.util.Collections.emptyList())
|
||||
new MetadataResponse.TopicMetadata(Errors.COORDINATOR_NOT_AVAILABLE, topic, true, util.Collections.emptyList())
|
||||
} else {
|
||||
createTopic(topic, config.transactionTopicPartitions, config.transactionTopicReplicationFactor.toInt,
|
||||
txnCoordinator.transactionTopicConfigs)
|
||||
|
@ -1004,13 +1019,13 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
if (isInternal(topic)) {
|
||||
val topicMetadata = createInternalTopic(topic)
|
||||
if (topicMetadata.error == Errors.COORDINATOR_NOT_AVAILABLE)
|
||||
new MetadataResponse.TopicMetadata(Errors.INVALID_REPLICATION_FACTOR, topic, true, java.util.Collections.emptyList())
|
||||
new MetadataResponse.TopicMetadata(Errors.INVALID_REPLICATION_FACTOR, topic, true, util.Collections.emptyList())
|
||||
else
|
||||
topicMetadata
|
||||
} else if (allowAutoTopicCreation && config.autoCreateTopicsEnable) {
|
||||
createTopic(topic, config.numPartitions, config.defaultReplicationFactor)
|
||||
} else {
|
||||
new MetadataResponse.TopicMetadata(Errors.UNKNOWN_TOPIC_OR_PARTITION, topic, false, java.util.Collections.emptyList())
|
||||
new MetadataResponse.TopicMetadata(Errors.UNKNOWN_TOPIC_OR_PARTITION, topic, false, util.Collections.emptyList())
|
||||
}
|
||||
}
|
||||
topicResponses ++ responsesForNonExistentTopics
|
||||
|
@ -1048,7 +1063,7 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
|
||||
val unauthorizedForCreateTopicMetadata = unauthorizedForCreateTopics.map(topic =>
|
||||
new MetadataResponse.TopicMetadata(Errors.TOPIC_AUTHORIZATION_FAILED, topic, isInternal(topic),
|
||||
java.util.Collections.emptyList()))
|
||||
util.Collections.emptyList()))
|
||||
|
||||
// do not disclose the existence of topics unauthorized for Describe, so we've not even checked if they exist or not
|
||||
val unauthorizedForDescribeTopicMetadata =
|
||||
|
@ -1057,7 +1072,7 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
Set.empty[MetadataResponse.TopicMetadata]
|
||||
else
|
||||
unauthorizedForDescribeTopics.map(topic =>
|
||||
new MetadataResponse.TopicMetadata(Errors.TOPIC_AUTHORIZATION_FAILED, topic, false, java.util.Collections.emptyList()))
|
||||
new MetadataResponse.TopicMetadata(Errors.TOPIC_AUTHORIZATION_FAILED, topic, false, util.Collections.emptyList()))
|
||||
|
||||
// In version 0, we returned an error when brokers with replicas were unavailable,
|
||||
// while in higher versions we simply don't include the broker in the returned broker list
|
||||
|
@ -1362,7 +1377,7 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
.setProtocolName(JoinGroupResponse.UNKNOWN_PROTOCOL)
|
||||
.setLeader(JoinGroupResponse.UNKNOWN_MEMBER_ID)
|
||||
.setMemberId(JoinGroupResponse.UNKNOWN_MEMBER_ID)
|
||||
.setMembers(Collections.emptyList())
|
||||
.setMembers(util.Collections.emptyList())
|
||||
)
|
||||
)
|
||||
} else {
|
||||
|
@ -1676,7 +1691,7 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
.setName(topic))
|
||||
}
|
||||
results.asScala.foreach(topic => {
|
||||
if (!authorize(request.session, Delete, Resource(Topic, topic.name, LITERAL)))
|
||||
if (!authorize(request.session, Delete, Resource(Topic, topic.name, LITERAL)))
|
||||
topic.setErrorCode(Errors.TOPIC_AUTHORIZATION_FAILED.code)
|
||||
else if (!metadataCache.contains(topic.name))
|
||||
topic.setErrorCode(Errors.UNKNOWN_TOPIC_OR_PARTITION.code)
|
||||
|
@ -2073,7 +2088,7 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
case None =>
|
||||
sendResponseMaybeThrottle(request, requestThrottleMs =>
|
||||
new DescribeAclsResponse(requestThrottleMs,
|
||||
new ApiError(Errors.SECURITY_DISABLED, "No Authorizer is configured on the broker"), Collections.emptySet()))
|
||||
new ApiError(Errors.SECURITY_DISABLED, "No Authorizer is configured on the broker"), util.Collections.emptySet()))
|
||||
case Some(auth) =>
|
||||
val filter = describeAclsRequest.filter()
|
||||
val returnedAcls = auth.getAcls.toSeq.flatMap { case (resource, acls) =>
|
||||
|
@ -2278,7 +2293,7 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
}.toMap, describeConfigsRequest.includeSynonyms)
|
||||
val unauthorizedConfigs = unauthorizedResources.map { resource =>
|
||||
val error = configsAuthorizationApiError(request.session, resource)
|
||||
resource -> new DescribeConfigsResponse.Config(error, Collections.emptyList[DescribeConfigsResponse.ConfigEntry])
|
||||
resource -> new DescribeConfigsResponse.Config(error, util.Collections.emptyList[DescribeConfigsResponse.ConfigEntry])
|
||||
}
|
||||
|
||||
sendResponseMaybeThrottle(request, requestThrottleMs =>
|
||||
|
@ -2435,45 +2450,74 @@ class KafkaApis(val requestChannel: RequestChannel,
|
|||
true
|
||||
}
|
||||
|
||||
def handleElectPreferredReplicaLeader(request: RequestChannel.Request): Unit = {
|
||||
def handleElectReplicaLeader(request: RequestChannel.Request): Unit = {
|
||||
|
||||
val electionRequest = request.body[ElectPreferredLeadersRequest]
|
||||
val partitions =
|
||||
if (electionRequest.data().topicPartitions() == null) {
|
||||
val electionRequest = request.body[ElectLeadersRequest]
|
||||
|
||||
def sendResponseCallback(
|
||||
error: ApiError
|
||||
)(
|
||||
results: Map[TopicPartition, ApiError]
|
||||
): Unit = {
|
||||
sendResponseMaybeThrottle(request, requestThrottleMs => {
|
||||
val adjustedResults = if (electionRequest.data().topicPartitions() == null) {
|
||||
/* When performing elections across all of the partitions we should only return
|
||||
* partitions for which there was an eleciton or resulted in an error. In other
|
||||
* words, partitions that didn't need election because they ready have the correct
|
||||
* leader are not returned to the client.
|
||||
*/
|
||||
results.filter { case (_, error) =>
|
||||
error.error != Errors.ELECTION_NOT_NEEDED
|
||||
}
|
||||
} else results
|
||||
|
||||
val electionResults = new util.ArrayList[ReplicaElectionResult]()
|
||||
adjustedResults
|
||||
.groupBy { case (tp, _) => tp.topic }
|
||||
.foreach { case (topic, ps) =>
|
||||
val electionResult = new ReplicaElectionResult()
|
||||
|
||||
electionResult.setTopic(topic)
|
||||
ps.foreach { case (topicPartition, error) =>
|
||||
val partitionResult = new PartitionResult()
|
||||
partitionResult.setPartitionId(topicPartition.partition)
|
||||
partitionResult.setErrorCode(error.error.code)
|
||||
partitionResult.setErrorMessage(error.message)
|
||||
electionResult.partitionResult.add(partitionResult)
|
||||
}
|
||||
|
||||
electionResults.add(electionResult)
|
||||
}
|
||||
|
||||
new ElectLeadersResponse(
|
||||
requestThrottleMs,
|
||||
error.error.code,
|
||||
electionResults,
|
||||
electionRequest.version
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
if (!authorize(request.session, Alter, Resource.ClusterResource)) {
|
||||
val error = new ApiError(Errors.CLUSTER_AUTHORIZATION_FAILED, null)
|
||||
val partitionErrors: Map[TopicPartition, ApiError] =
|
||||
electionRequest.topicPartitions.map(partition => partition -> error)(breakOut)
|
||||
|
||||
sendResponseCallback(error)(partitionErrors)
|
||||
} else {
|
||||
val partitions = if (electionRequest.data().topicPartitions() == null) {
|
||||
metadataCache.getAllPartitions()
|
||||
} else {
|
||||
electionRequest.data().topicPartitions().asScala.flatMap{tp =>
|
||||
tp.partitionId().asScala.map(partitionId => new TopicPartition(tp.topic, partitionId))}.toSet
|
||||
electionRequest.topicPartitions
|
||||
}
|
||||
def sendResponseCallback(result: Map[TopicPartition, ApiError]): Unit = {
|
||||
sendResponseMaybeThrottle(request, requestThrottleMs => {
|
||||
val results = result.
|
||||
groupBy{case (tp, error) => tp.topic}.
|
||||
map{case (topic, ps) => new ElectPreferredLeadersResponseData.ReplicaElectionResult()
|
||||
.setTopic(topic)
|
||||
.setPartitionResult(ps.map{
|
||||
case (tp, error) =>
|
||||
new ElectPreferredLeadersResponseData.PartitionResult()
|
||||
.setErrorCode(error.error.code)
|
||||
.setErrorMessage(error.message())
|
||||
.setPartitionId(tp.partition)}.toList.asJava)}
|
||||
val data = new ElectPreferredLeadersResponseData()
|
||||
.setThrottleTimeMs(requestThrottleMs)
|
||||
.setReplicaElectionResults(results.toList.asJava)
|
||||
new ElectPreferredLeadersResponse(data)})
|
||||
}
|
||||
if (!authorize(request.session, Alter, Resource.ClusterResource)) {
|
||||
val error = new ApiError(Errors.CLUSTER_AUTHORIZATION_FAILED, null);
|
||||
val partitionErrors =
|
||||
if (electionRequest.data().topicPartitions() == null) {
|
||||
// Don't leak the set of partitions if the client lack authz
|
||||
Map.empty[TopicPartition, ApiError]
|
||||
} else {
|
||||
partitions.map(partition => partition -> error).toMap
|
||||
}
|
||||
sendResponseCallback(partitionErrors)
|
||||
} else {
|
||||
replicaManager.electPreferredLeaders(controller, partitions, sendResponseCallback, electionRequest.data().timeoutMs())
|
||||
|
||||
replicaManager.electLeaders(
|
||||
controller,
|
||||
partitions,
|
||||
electionRequest.electionType,
|
||||
sendResponseCallback(ApiError.NONE),
|
||||
electionRequest.data().timeoutMs()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -32,20 +32,21 @@ import kafka.server.QuotaFactory.{QuotaManagers, UnboundedQuota}
|
|||
import kafka.server.checkpoints.{OffsetCheckpointFile, OffsetCheckpoints, SimpleOffsetCheckpoints}
|
||||
import kafka.utils._
|
||||
import kafka.zk.KafkaZkClient
|
||||
import org.apache.kafka.common.ElectionType
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.apache.kafka.common.errors._
|
||||
import org.apache.kafka.common.internals.Topic
|
||||
import org.apache.kafka.common.metrics.Metrics
|
||||
import org.apache.kafka.common.protocol.Errors
|
||||
import org.apache.kafka.common.record.FileRecords.TimestampAndOffset
|
||||
import org.apache.kafka.common.record._
|
||||
import org.apache.kafka.common.requests.FetchResponse.AbortedTransaction
|
||||
import org.apache.kafka.common.requests.DescribeLogDirsResponse.{LogDirInfo, ReplicaInfo}
|
||||
import org.apache.kafka.common.requests.EpochEndOffset._
|
||||
import org.apache.kafka.common.requests.FetchRequest.PartitionData
|
||||
import org.apache.kafka.common.requests.FetchResponse.AbortedTransaction
|
||||
import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse
|
||||
import org.apache.kafka.common.requests._
|
||||
import org.apache.kafka.common.utils.Time
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.apache.kafka.common.record.FileRecords.TimestampAndOffset
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.collection._
|
||||
|
@ -158,7 +159,7 @@ class ReplicaManager(val config: KafkaConfig,
|
|||
val delayedProducePurgatory: DelayedOperationPurgatory[DelayedProduce],
|
||||
val delayedFetchPurgatory: DelayedOperationPurgatory[DelayedFetch],
|
||||
val delayedDeleteRecordsPurgatory: DelayedOperationPurgatory[DelayedDeleteRecords],
|
||||
val delayedElectPreferredLeaderPurgatory: DelayedOperationPurgatory[DelayedElectPreferredLeader],
|
||||
val delayedElectLeaderPurgatory: DelayedOperationPurgatory[DelayedElectLeader],
|
||||
threadNamePrefix: Option[String]) extends Logging with KafkaMetricsGroup {
|
||||
|
||||
def this(config: KafkaConfig,
|
||||
|
@ -184,8 +185,8 @@ class ReplicaManager(val config: KafkaConfig,
|
|||
DelayedOperationPurgatory[DelayedDeleteRecords](
|
||||
purgatoryName = "DeleteRecords", brokerId = config.brokerId,
|
||||
purgeInterval = config.deleteRecordsPurgatoryPurgeIntervalRequests),
|
||||
DelayedOperationPurgatory[DelayedElectPreferredLeader](
|
||||
purgatoryName = "ElectPreferredLeader", brokerId = config.brokerId),
|
||||
DelayedOperationPurgatory[DelayedElectLeader](
|
||||
purgatoryName = "ElectLeader", brokerId = config.brokerId),
|
||||
threadNamePrefix)
|
||||
}
|
||||
|
||||
|
@ -305,11 +306,11 @@ class ReplicaManager(val config: KafkaConfig,
|
|||
|
||||
def getLog(topicPartition: TopicPartition): Option[Log] = logManager.getLog(topicPartition)
|
||||
|
||||
def hasDelayedElectionOperations: Boolean = delayedElectPreferredLeaderPurgatory.numDelayed != 0
|
||||
def hasDelayedElectionOperations: Boolean = delayedElectLeaderPurgatory.numDelayed != 0
|
||||
|
||||
def tryCompleteElection(key: DelayedOperationKey): Unit = {
|
||||
val completed = delayedElectPreferredLeaderPurgatory.checkAndComplete(key)
|
||||
debug("Request key %s unblocked %d ElectPreferredLeader.".format(key.keyLabel, completed))
|
||||
val completed = delayedElectLeaderPurgatory.checkAndComplete(key)
|
||||
debug("Request key %s unblocked %d ElectLeader.".format(key.keyLabel, completed))
|
||||
}
|
||||
|
||||
def startup() {
|
||||
|
@ -504,7 +505,7 @@ class ReplicaManager(val config: KafkaConfig,
|
|||
val delayedProduce = new DelayedProduce(timeout, produceMetadata, this, responseCallback, delayedProduceLock)
|
||||
|
||||
// create a list of (topic, partition) pairs to use as keys for this delayed produce operation
|
||||
val producerRequestKeys = entriesPerPartition.keys.map(new TopicPartitionOperationKey(_)).toSeq
|
||||
val producerRequestKeys = entriesPerPartition.keys.map(TopicPartitionOperationKey(_)).toSeq
|
||||
|
||||
// try to complete the request immediately, otherwise put it into the purgatory
|
||||
// this is because while the delayed produce operation is being created, new
|
||||
|
@ -704,7 +705,7 @@ class ReplicaManager(val config: KafkaConfig,
|
|||
val delayedDeleteRecords = new DelayedDeleteRecords(timeout, deleteRecordsStatus, this, responseCallback)
|
||||
|
||||
// create a list of (topic, partition) pairs to use as keys for this delayed delete records operation
|
||||
val deleteRecordsRequestKeys = offsetPerPartition.keys.map(new TopicPartitionOperationKey(_)).toSeq
|
||||
val deleteRecordsRequestKeys = offsetPerPartition.keys.map(TopicPartitionOperationKey(_)).toSeq
|
||||
|
||||
// try to complete the request immediately, otherwise put it into the purgatory
|
||||
// this is because while the delayed delete records operation is being created, new
|
||||
|
@ -883,7 +884,7 @@ class ReplicaManager(val config: KafkaConfig,
|
|||
val delayedFetch = new DelayedFetch(timeout, fetchMetadata, this, quota, responseCallback)
|
||||
|
||||
// create a list of (topic, partition) pairs to use as keys for this delayed fetch operation
|
||||
val delayedFetchKeys = fetchPartitionStatus.map { case (tp, _) => new TopicPartitionOperationKey(tp) }
|
||||
val delayedFetchKeys = fetchPartitionStatus.map { case (tp, _) => TopicPartitionOperationKey(tp) }
|
||||
|
||||
// try to complete the request immediately, otherwise put it into the purgatory;
|
||||
// this is because while the delayed fetch operation is being created, new requests
|
||||
|
@ -1326,7 +1327,7 @@ class ReplicaManager(val config: KafkaConfig,
|
|||
}
|
||||
|
||||
partitionsToMakeFollower.foreach { partition =>
|
||||
val topicPartitionOperationKey = new TopicPartitionOperationKey(partition.topicPartition)
|
||||
val topicPartitionOperationKey = TopicPartitionOperationKey(partition.topicPartition)
|
||||
delayedProducePurgatory.checkAndComplete(topicPartitionOperationKey)
|
||||
delayedFetchPurgatory.checkAndComplete(topicPartitionOperationKey)
|
||||
}
|
||||
|
@ -1512,7 +1513,7 @@ class ReplicaManager(val config: KafkaConfig,
|
|||
delayedFetchPurgatory.shutdown()
|
||||
delayedProducePurgatory.shutdown()
|
||||
delayedDeleteRecordsPurgatory.shutdown()
|
||||
delayedElectPreferredLeaderPurgatory.shutdown()
|
||||
delayedElectLeaderPurgatory.shutdown()
|
||||
if (checkpointHW)
|
||||
checkpointHighWatermarks()
|
||||
info("Shut down completely")
|
||||
|
@ -1546,29 +1547,45 @@ class ReplicaManager(val config: KafkaConfig,
|
|||
}
|
||||
}
|
||||
|
||||
def electPreferredLeaders(controller: KafkaController,
|
||||
partitions: Set[TopicPartition],
|
||||
responseCallback: Map[TopicPartition, ApiError] => Unit,
|
||||
requestTimeout: Long): Unit = {
|
||||
def electLeaders(
|
||||
controller: KafkaController,
|
||||
partitions: Set[TopicPartition],
|
||||
electionType: ElectionType,
|
||||
responseCallback: Map[TopicPartition, ApiError] => Unit,
|
||||
requestTimeout: Int
|
||||
): Unit = {
|
||||
|
||||
val deadline = time.milliseconds() + requestTimeout
|
||||
|
||||
def electionCallback(expectedLeaders: Map[TopicPartition, Int],
|
||||
results: Map[TopicPartition, ApiError]): Unit = {
|
||||
def electionCallback(results: Map[TopicPartition, Either[ApiError, Int]]): Unit = {
|
||||
val expectedLeaders = mutable.Map.empty[TopicPartition, Int]
|
||||
val failures = mutable.Map.empty[TopicPartition, ApiError]
|
||||
results.foreach {
|
||||
case (partition, Right(leader)) => expectedLeaders += partition -> leader
|
||||
case (partition, Left(error)) => failures += partition -> error
|
||||
}
|
||||
|
||||
if (expectedLeaders.nonEmpty) {
|
||||
val watchKeys = expectedLeaders.map{
|
||||
case (tp, _) => new TopicPartitionOperationKey(tp)
|
||||
}.toSeq
|
||||
delayedElectPreferredLeaderPurgatory.tryCompleteElseWatch(
|
||||
new DelayedElectPreferredLeader(deadline - time.milliseconds(), expectedLeaders, results,
|
||||
this, responseCallback),
|
||||
watchKeys)
|
||||
val watchKeys: Seq[TopicPartitionOperationKey] = expectedLeaders.map{
|
||||
case (tp, _) => TopicPartitionOperationKey(tp)
|
||||
}(breakOut)
|
||||
|
||||
delayedElectLeaderPurgatory.tryCompleteElseWatch(
|
||||
new DelayedElectLeader(
|
||||
math.max(0, deadline - time.milliseconds()),
|
||||
expectedLeaders,
|
||||
failures,
|
||||
this,
|
||||
responseCallback
|
||||
),
|
||||
watchKeys
|
||||
)
|
||||
} else {
|
||||
// There are no partitions actually being elected, so return immediately
|
||||
responseCallback(results)
|
||||
responseCallback(failures)
|
||||
}
|
||||
}
|
||||
|
||||
controller.electPreferredLeaders(partitions, electionCallback)
|
||||
controller.electLeaders(partitions, electionType, electionCallback)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,18 +29,18 @@ import kafka.security.auth.{Acl, Resource, ResourceType}
|
|||
import kafka.server.ConfigType
|
||||
import kafka.utils.Logging
|
||||
import kafka.zookeeper._
|
||||
import org.apache.kafka.common.{KafkaException, TopicPartition}
|
||||
import org.apache.kafka.common.resource.PatternType
|
||||
import org.apache.kafka.common.errors.ControllerMovedException
|
||||
import org.apache.kafka.common.resource.PatternType
|
||||
import org.apache.kafka.common.security.token.delegation.{DelegationToken, TokenInformation}
|
||||
import org.apache.kafka.common.utils.{Time, Utils}
|
||||
import org.apache.kafka.common.{KafkaException, TopicPartition}
|
||||
import org.apache.zookeeper.KeeperException.{Code, NodeExistsException}
|
||||
import org.apache.zookeeper.OpResult.{CreateResult, ErrorResult, SetDataResult}
|
||||
import org.apache.zookeeper.data.{ACL, Stat}
|
||||
import org.apache.zookeeper.{CreateMode, KeeperException, ZooKeeper}
|
||||
|
||||
import scala.collection.mutable.ArrayBuffer
|
||||
import scala.collection.{Seq, mutable}
|
||||
import scala.collection.Seq
|
||||
import scala.collection.breakOut
|
||||
import scala.collection.mutable
|
||||
|
||||
/**
|
||||
* Provides higher level Kafka-specific operations on top of the pipelined [[kafka.zookeeper.ZooKeeperClient]].
|
||||
|
@ -250,10 +250,11 @@ class KafkaZkClient private[zk] (zooKeeperClient: ZooKeeperClient, isSecure: Boo
|
|||
* @param expectedControllerEpochZkVersion expected controller epoch zkVersion.
|
||||
* @return UpdateLeaderAndIsrResult instance containing per partition results.
|
||||
*/
|
||||
def updateLeaderAndIsr(leaderAndIsrs: Map[TopicPartition, LeaderAndIsr], controllerEpoch: Int, expectedControllerEpochZkVersion: Int): UpdateLeaderAndIsrResult = {
|
||||
val successfulUpdates = mutable.Map.empty[TopicPartition, LeaderAndIsr]
|
||||
val updatesToRetry = mutable.Buffer.empty[TopicPartition]
|
||||
val failed = mutable.Map.empty[TopicPartition, Exception]
|
||||
def updateLeaderAndIsr(
|
||||
leaderAndIsrs: Map[TopicPartition, LeaderAndIsr],
|
||||
controllerEpoch: Int,
|
||||
expectedControllerEpochZkVersion: Int
|
||||
): UpdateLeaderAndIsrResult = {
|
||||
val leaderIsrAndControllerEpochs = leaderAndIsrs.map { case (partition, leaderAndIsr) =>
|
||||
partition -> LeaderIsrAndControllerEpoch(leaderAndIsr, controllerEpoch)
|
||||
}
|
||||
|
@ -262,20 +263,26 @@ class KafkaZkClient private[zk] (zooKeeperClient: ZooKeeperClient, isSecure: Boo
|
|||
} catch {
|
||||
case e: ControllerMovedException => throw e
|
||||
case e: Exception =>
|
||||
leaderAndIsrs.keys.foreach(partition => failed.put(partition, e))
|
||||
return UpdateLeaderAndIsrResult(successfulUpdates.toMap, updatesToRetry, failed.toMap)
|
||||
return UpdateLeaderAndIsrResult(leaderAndIsrs.keys.map(_ -> Left(e))(breakOut), Seq.empty)
|
||||
}
|
||||
setDataResponses.foreach { setDataResponse =>
|
||||
|
||||
val updatesToRetry = mutable.Buffer.empty[TopicPartition]
|
||||
val finished: Map[TopicPartition, Either[Exception, LeaderAndIsr]] = setDataResponses.flatMap { setDataResponse =>
|
||||
val partition = setDataResponse.ctx.get.asInstanceOf[TopicPartition]
|
||||
setDataResponse.resultCode match {
|
||||
case Code.OK =>
|
||||
val updatedLeaderAndIsr = leaderAndIsrs(partition).withZkVersion(setDataResponse.stat.getVersion)
|
||||
successfulUpdates.put(partition, updatedLeaderAndIsr)
|
||||
case Code.BADVERSION => updatesToRetry += partition
|
||||
case _ => failed.put(partition, setDataResponse.resultException.get)
|
||||
Some(partition -> Right(updatedLeaderAndIsr))
|
||||
case Code.BADVERSION =>
|
||||
// Update the buffer for partitions to retry
|
||||
updatesToRetry += partition
|
||||
None
|
||||
case _ =>
|
||||
Some(partition -> Left(setDataResponse.resultException.get))
|
||||
}
|
||||
}
|
||||
UpdateLeaderAndIsrResult(successfulUpdates.toMap, updatesToRetry, failed.toMap)
|
||||
}(breakOut)
|
||||
|
||||
UpdateLeaderAndIsrResult(finished, updatesToRetry)
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -286,8 +293,10 @@ class KafkaZkClient private[zk] (zooKeeperClient: ZooKeeperClient, isSecure: Boo
|
|||
* 1. The successfully gathered log configs
|
||||
* 2. Exceptions corresponding to failed log config lookups.
|
||||
*/
|
||||
def getLogConfigs(topics: Seq[String], config: java.util.Map[String, AnyRef]):
|
||||
(Map[String, LogConfig], Map[String, Exception]) = {
|
||||
def getLogConfigs(
|
||||
topics: Set[String],
|
||||
config: java.util.Map[String, AnyRef]
|
||||
): (Map[String, LogConfig], Map[String, Exception]) = {
|
||||
val logConfigs = mutable.Map.empty[String, LogConfig]
|
||||
val failed = mutable.Map.empty[String, Exception]
|
||||
val configResponses = try {
|
||||
|
@ -453,11 +462,11 @@ class KafkaZkClient private[zk] (zooKeeperClient: ZooKeeperClient, isSecure: Boo
|
|||
* Gets all topics in the cluster.
|
||||
* @return sequence of topics in the cluster.
|
||||
*/
|
||||
def getAllTopicsInCluster: Seq[String] = {
|
||||
def getAllTopicsInCluster: Set[String] = {
|
||||
val getChildrenResponse = retryRequestUntilConnected(GetChildrenRequest(TopicsZNode.path))
|
||||
getChildrenResponse.resultCode match {
|
||||
case Code.OK => getChildrenResponse.children
|
||||
case Code.NONODE => Seq.empty
|
||||
case Code.OK => getChildrenResponse.children.toSet
|
||||
case Code.NONODE => Set.empty
|
||||
case _ => throw getChildrenResponse.resultException.get
|
||||
}
|
||||
|
||||
|
@ -1627,10 +1636,11 @@ class KafkaZkClient private[zk] (zooKeeperClient: ZooKeeperClient, isSecure: Boo
|
|||
retryRequestsUntilConnected(createRequests, expectedControllerEpochZkVersion)
|
||||
}
|
||||
|
||||
private def getTopicConfigs(topics: Seq[String]): Seq[GetDataResponse] = {
|
||||
val getDataRequests = topics.map { topic =>
|
||||
private def getTopicConfigs(topics: Set[String]): Seq[GetDataResponse] = {
|
||||
val getDataRequests: Seq[GetDataRequest] = topics.map { topic =>
|
||||
GetDataRequest(ConfigEntityZNode.path(ConfigType.Topic, topic), ctx = Some(topic))
|
||||
}
|
||||
}(breakOut)
|
||||
|
||||
retryRequestsUntilConnected(getDataRequests)
|
||||
}
|
||||
|
||||
|
@ -1654,8 +1664,8 @@ class KafkaZkClient private[zk] (zooKeeperClient: ZooKeeperClient, isSecure: Boo
|
|||
}
|
||||
|
||||
private def retryRequestsUntilConnected[Req <: AsyncRequest](requests: Seq[Req]): Seq[Req#Response] = {
|
||||
val remainingRequests = ArrayBuffer(requests: _*)
|
||||
val responses = new ArrayBuffer[Req#Response]
|
||||
val remainingRequests = mutable.ArrayBuffer(requests: _*)
|
||||
val responses = new mutable.ArrayBuffer[Req#Response]
|
||||
while (remainingRequests.nonEmpty) {
|
||||
val batchResponses = zooKeeperClient.handleRequests(remainingRequests)
|
||||
|
||||
|
@ -1798,15 +1808,16 @@ class KafkaZkClient private[zk] (zooKeeperClient: ZooKeeperClient, isSecure: Boo
|
|||
object KafkaZkClient {
|
||||
|
||||
/**
|
||||
* @param successfulPartitions The successfully updated partition states with adjusted znode versions.
|
||||
* @param finishedPartitions Partitions that finished either in successfully
|
||||
* updated partition states or failed with an exception.
|
||||
* @param partitionsToRetry The partitions that we should retry due to a zookeeper BADVERSION conflict. Version conflicts
|
||||
* can occur if the partition leader updated partition state while the controller attempted to
|
||||
* update partition state.
|
||||
* @param failedPartitions Exceptions corresponding to failed partition state updates.
|
||||
*/
|
||||
case class UpdateLeaderAndIsrResult(successfulPartitions: Map[TopicPartition, LeaderAndIsr],
|
||||
partitionsToRetry: Seq[TopicPartition],
|
||||
failedPartitions: Map[TopicPartition, Exception])
|
||||
case class UpdateLeaderAndIsrResult(
|
||||
finishedPartitions: Map[TopicPartition, Either[Exception, LeaderAndIsr]],
|
||||
partitionsToRetry: Seq[TopicPartition]
|
||||
)
|
||||
|
||||
/**
|
||||
* Create an instance of this class with the provided parameters.
|
||||
|
|
|
@ -35,7 +35,10 @@ import org.apache.kafka.clients.admin.NewTopic
|
|||
import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
|
||||
import org.apache.kafka.clients.producer.KafkaProducer
|
||||
import org.apache.kafka.clients.producer.ProducerRecord
|
||||
import org.apache.kafka.common.{ConsumerGroupState, TopicPartition, TopicPartitionReplica}
|
||||
import org.apache.kafka.common.ConsumerGroupState
|
||||
import org.apache.kafka.common.ElectionType
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.apache.kafka.common.TopicPartitionReplica
|
||||
import org.apache.kafka.common.acl._
|
||||
import org.apache.kafka.common.config.ConfigResource
|
||||
import org.apache.kafka.common.errors._
|
||||
|
@ -1295,7 +1298,8 @@ class AdminClientIntegrationTest extends IntegrationTestHarness with Logging {
|
|||
zkClient.createPartitionReassignment(m)
|
||||
TestUtils.waitUntilTrue(
|
||||
() => preferredLeader(partition1) == preferred && preferredLeader(partition2) == preferred,
|
||||
s"Expected preferred leader to become $preferred, but is ${preferredLeader(partition1)} and ${preferredLeader(partition2)}", 10000)
|
||||
s"Expected preferred leader to become $preferred, but is ${preferredLeader(partition1)} and ${preferredLeader(partition2)}",
|
||||
10000)
|
||||
// Check the leader hasn't moved
|
||||
assertEquals(prior1, currentLeader(partition1))
|
||||
assertEquals(prior2, currentLeader(partition2))
|
||||
|
@ -1306,48 +1310,44 @@ class AdminClientIntegrationTest extends IntegrationTestHarness with Logging {
|
|||
assertEquals(0, currentLeader(partition2))
|
||||
|
||||
// Noop election
|
||||
var electResult = client.electPreferredLeaders(asList(partition1))
|
||||
electResult.partitionResult(partition1).get()
|
||||
var electResult = client.electLeaders(ElectionType.PREFERRED, Set(partition1).asJava)
|
||||
var exception = electResult.partitions.get.get(partition1).get
|
||||
assertEquals(classOf[ElectionNotNeededException], exception.getClass)
|
||||
assertEquals("Leader election not needed for topic partition", exception.getMessage)
|
||||
assertEquals(0, currentLeader(partition1))
|
||||
|
||||
// Noop election with null partitions
|
||||
electResult = client.electPreferredLeaders(null)
|
||||
electResult.partitionResult(partition1).get()
|
||||
electResult = client.electLeaders(ElectionType.PREFERRED, null)
|
||||
assertTrue(electResult.partitions.get.isEmpty)
|
||||
assertEquals(0, currentLeader(partition1))
|
||||
electResult.partitionResult(partition2).get()
|
||||
assertEquals(0, currentLeader(partition2))
|
||||
|
||||
// Now change the preferred leader to 1
|
||||
changePreferredLeader(prefer1)
|
||||
|
||||
// meaningful election
|
||||
electResult = client.electPreferredLeaders(asList(partition1))
|
||||
assertEquals(Set(partition1).asJava, electResult.partitions.get)
|
||||
electResult.partitionResult(partition1).get()
|
||||
electResult = client.electLeaders(ElectionType.PREFERRED, Set(partition1).asJava)
|
||||
assertEquals(Set(partition1).asJava, electResult.partitions.get.keySet)
|
||||
assertFalse(electResult.partitions.get.get(partition1).isPresent)
|
||||
waitForLeaderToBecome(partition1, 1)
|
||||
|
||||
// topic 2 unchanged
|
||||
var e = intercept[ExecutionException](electResult.partitionResult(partition2).get()).getCause
|
||||
assertEquals(classOf[UnknownTopicOrPartitionException], e.getClass)
|
||||
assertEquals("Preferred leader election for partition \"elect-preferred-leaders-topic-2-0\" was not attempted",
|
||||
e.getMessage)
|
||||
assertFalse(electResult.partitions.get.containsKey(partition2))
|
||||
assertEquals(0, currentLeader(partition2))
|
||||
|
||||
// meaningful election with null partitions
|
||||
electResult = client.electPreferredLeaders(null)
|
||||
assertEquals(Set(partition1, partition2), electResult.partitions.get.asScala.filterNot(_.topic == "__consumer_offsets"))
|
||||
electResult.partitionResult(partition1).get()
|
||||
waitForLeaderToBecome(partition1, 1)
|
||||
electResult.partitionResult(partition2).get()
|
||||
electResult = client.electLeaders(ElectionType.PREFERRED, null)
|
||||
assertEquals(Set(partition2), electResult.partitions.get.keySet.asScala)
|
||||
assertFalse(electResult.partitions.get.get(partition2).isPresent)
|
||||
waitForLeaderToBecome(partition2, 1)
|
||||
|
||||
// unknown topic
|
||||
val unknownPartition = new TopicPartition("topic-does-not-exist", 0)
|
||||
electResult = client.electPreferredLeaders(asList(unknownPartition))
|
||||
assertEquals(Set(unknownPartition).asJava, electResult.partitions.get)
|
||||
e = intercept[ExecutionException](electResult.partitionResult(unknownPartition).get()).getCause
|
||||
assertEquals(classOf[UnknownTopicOrPartitionException], e.getClass)
|
||||
assertEquals("The partition does not exist.", e.getMessage)
|
||||
electResult = client.electLeaders(ElectionType.PREFERRED, Set(unknownPartition).asJava)
|
||||
assertEquals(Set(unknownPartition).asJava, electResult.partitions.get.keySet)
|
||||
exception = electResult.partitions.get.get(unknownPartition).get
|
||||
assertEquals(classOf[UnknownTopicOrPartitionException], exception.getClass)
|
||||
assertEquals("The partition does not exist.", exception.getMessage)
|
||||
assertEquals(1, currentLeader(partition1))
|
||||
assertEquals(1, currentLeader(partition2))
|
||||
|
||||
|
@ -1355,18 +1355,18 @@ class AdminClientIntegrationTest extends IntegrationTestHarness with Logging {
|
|||
changePreferredLeader(prefer2)
|
||||
|
||||
// mixed results
|
||||
electResult = client.electPreferredLeaders(asList(unknownPartition, partition1))
|
||||
assertEquals(Set(unknownPartition, partition1).asJava, electResult.partitions.get)
|
||||
electResult = client.electLeaders(ElectionType.PREFERRED, Set(unknownPartition, partition1).asJava)
|
||||
assertEquals(Set(unknownPartition, partition1).asJava, electResult.partitions.get.keySet)
|
||||
waitForLeaderToBecome(partition1, 2)
|
||||
assertEquals(1, currentLeader(partition2))
|
||||
e = intercept[ExecutionException](electResult.partitionResult(unknownPartition).get()).getCause
|
||||
assertEquals(classOf[UnknownTopicOrPartitionException], e.getClass)
|
||||
assertEquals("The partition does not exist.", e.getMessage)
|
||||
exception = electResult.partitions.get.get(unknownPartition).get
|
||||
assertEquals(classOf[UnknownTopicOrPartitionException], exception.getClass)
|
||||
assertEquals("The partition does not exist.", exception.getMessage)
|
||||
|
||||
// dupe partitions
|
||||
electResult = client.electPreferredLeaders(asList(partition2, partition2))
|
||||
assertEquals(Set(partition2).asJava, electResult.partitions.get)
|
||||
electResult.partitionResult(partition2).get()
|
||||
// elect preferred leader for partition 2
|
||||
electResult = client.electLeaders(ElectionType.PREFERRED, Set(partition2).asJava)
|
||||
assertEquals(Set(partition2).asJava, electResult.partitions.get.keySet)
|
||||
assertFalse(electResult.partitions.get.get(partition2).isPresent)
|
||||
waitForLeaderToBecome(partition2, 2)
|
||||
|
||||
// Now change the preferred leader to 1
|
||||
|
@ -1374,34 +1374,32 @@ class AdminClientIntegrationTest extends IntegrationTestHarness with Logging {
|
|||
// but shut it down...
|
||||
servers(1).shutdown()
|
||||
waitUntilTrue (() => {
|
||||
val description = client.describeTopics(Set(partition1.topic, partition2.topic).asJava).all().get()
|
||||
val description = client.describeTopics(Set(partition1.topic, partition2.topic).asJava).all.get
|
||||
val isr = description.asScala.values.flatMap(_.partitions.asScala.flatMap(_.isr.asScala))
|
||||
!isr.exists(_.id == 1)
|
||||
}, "Expect broker 1 to no longer be in any ISR")
|
||||
|
||||
// ... now what happens if we try to elect the preferred leader and it's down?
|
||||
val shortTimeout = new ElectPreferredLeadersOptions().timeoutMs(10000)
|
||||
electResult = client.electPreferredLeaders(asList(partition1), shortTimeout)
|
||||
assertEquals(Set(partition1).asJava, electResult.partitions.get)
|
||||
e = intercept[ExecutionException](electResult.partitionResult(partition1).get()).getCause
|
||||
assertEquals(classOf[PreferredLeaderNotAvailableException], e.getClass)
|
||||
assertTrue(s"Wrong message ${e.getMessage}", e.getMessage.contains(
|
||||
val shortTimeout = new ElectLeadersOptions().timeoutMs(10000)
|
||||
electResult = client.electLeaders(ElectionType.PREFERRED, Set(partition1).asJava, shortTimeout)
|
||||
assertEquals(Set(partition1).asJava, electResult.partitions.get.keySet)
|
||||
exception = electResult.partitions.get.get(partition1).get
|
||||
assertEquals(classOf[PreferredLeaderNotAvailableException], exception.getClass)
|
||||
assertTrue(s"Wrong message ${exception.getMessage}", exception.getMessage.contains(
|
||||
"Failed to elect leader for partition elect-preferred-leaders-topic-1-0 under strategy PreferredReplicaPartitionLeaderElectionStrategy"))
|
||||
assertEquals(2, currentLeader(partition1))
|
||||
|
||||
// preferred leader unavailable with null argument
|
||||
electResult = client.electPreferredLeaders(null, shortTimeout)
|
||||
e = intercept[ExecutionException](electResult.partitions.get()).getCause
|
||||
assertEquals(classOf[PreferredLeaderNotAvailableException], e.getClass)
|
||||
electResult = client.electLeaders(ElectionType.PREFERRED, null, shortTimeout)
|
||||
|
||||
e = intercept[ExecutionException](electResult.partitionResult(partition1).get()).getCause
|
||||
assertEquals(classOf[PreferredLeaderNotAvailableException], e.getClass)
|
||||
assertTrue(s"Wrong message ${e.getMessage}", e.getMessage.contains(
|
||||
exception = electResult.partitions.get.get(partition1).get
|
||||
assertEquals(classOf[PreferredLeaderNotAvailableException], exception.getClass)
|
||||
assertTrue(s"Wrong message ${exception.getMessage}", exception.getMessage.contains(
|
||||
"Failed to elect leader for partition elect-preferred-leaders-topic-1-0 under strategy PreferredReplicaPartitionLeaderElectionStrategy"))
|
||||
|
||||
e = intercept[ExecutionException](electResult.partitionResult(partition2).get()).getCause
|
||||
assertEquals(classOf[PreferredLeaderNotAvailableException], e.getClass)
|
||||
assertTrue(s"Wrong message ${e.getMessage}", e.getMessage.contains(
|
||||
exception = electResult.partitions.get.get(partition2).get
|
||||
assertEquals(classOf[PreferredLeaderNotAvailableException], exception.getClass)
|
||||
assertTrue(s"Wrong message ${exception.getMessage}", exception.getMessage.contains(
|
||||
"Failed to elect leader for partition elect-preferred-leaders-topic-2-0 under strategy PreferredReplicaPartitionLeaderElectionStrategy"))
|
||||
|
||||
assertEquals(2, currentLeader(partition1))
|
||||
|
|
|
@ -29,14 +29,25 @@ import org.apache.kafka.clients.admin.{AdminClient, AdminClientConfig, AlterConf
|
|||
import org.apache.kafka.clients.consumer._
|
||||
import org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener
|
||||
import org.apache.kafka.clients.producer._
|
||||
import org.apache.kafka.common.ElectionType;
|
||||
import org.apache.kafka.common.acl.{AccessControlEntry, AccessControlEntryFilter, AclBinding, AclBindingFilter, AclOperation, AclPermissionType}
|
||||
import org.apache.kafka.common.config.ConfigResource
|
||||
import org.apache.kafka.common.errors._
|
||||
import org.apache.kafka.common.internals.Topic.GROUP_METADATA_TOPIC_NAME
|
||||
import org.apache.kafka.common.message._
|
||||
import org.apache.kafka.common.message.ControlledShutdownRequestData
|
||||
import org.apache.kafka.common.message.CreateTopicsRequestData
|
||||
import org.apache.kafka.common.message.CreateTopicsRequestData.{CreatableTopic, CreatableTopicCollection}
|
||||
import org.apache.kafka.common.message.DeleteTopicsRequestData
|
||||
import org.apache.kafka.common.message.DescribeGroupsRequestData
|
||||
import org.apache.kafka.common.message.FindCoordinatorRequestData
|
||||
import org.apache.kafka.common.message.HeartbeatRequestData
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData.{AlterConfigsResource, AlterableConfig, AlterableConfigCollection}
|
||||
import org.apache.kafka.common.message.JoinGroupRequestData
|
||||
import org.apache.kafka.common.message.JoinGroupRequestData.JoinGroupRequestProtocolCollection
|
||||
import org.apache.kafka.common.message.LeaveGroupRequestData
|
||||
import org.apache.kafka.common.message.OffsetCommitRequestData
|
||||
import org.apache.kafka.common.message.SyncGroupRequestData
|
||||
import org.apache.kafka.common.network.ListenerName
|
||||
import org.apache.kafka.common.protocol.{ApiKeys, Errors}
|
||||
import org.apache.kafka.common.record.{CompressionType, MemoryRecords, Records, RecordBatch, SimpleRecord}
|
||||
|
@ -151,7 +162,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
|
|||
ApiKeys.ALTER_REPLICA_LOG_DIRS -> classOf[AlterReplicaLogDirsResponse],
|
||||
ApiKeys.DESCRIBE_LOG_DIRS -> classOf[DescribeLogDirsResponse],
|
||||
ApiKeys.CREATE_PARTITIONS -> classOf[CreatePartitionsResponse],
|
||||
ApiKeys.ELECT_PREFERRED_LEADERS -> classOf[ElectPreferredLeadersResponse],
|
||||
ApiKeys.ELECT_LEADERS -> classOf[ElectLeadersResponse],
|
||||
ApiKeys.INCREMENTAL_ALTER_CONFIGS -> classOf[IncrementalAlterConfigsResponse]
|
||||
)
|
||||
|
||||
|
@ -198,8 +209,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
|
|||
ApiKeys.DESCRIBE_LOG_DIRS -> ((resp: DescribeLogDirsResponse) =>
|
||||
if (resp.logDirInfos.size() > 0) resp.logDirInfos.asScala.head._2.error else Errors.CLUSTER_AUTHORIZATION_FAILED),
|
||||
ApiKeys.CREATE_PARTITIONS -> ((resp: CreatePartitionsResponse) => resp.errors.asScala.find(_._1 == topic).get._2.error),
|
||||
ApiKeys.ELECT_PREFERRED_LEADERS -> ((resp: ElectPreferredLeadersResponse) =>
|
||||
ElectPreferredLeadersRequest.fromResponseData(resp.data()).get(tp).error()),
|
||||
ApiKeys.ELECT_LEADERS -> ((resp: ElectLeadersResponse) => Errors.forCode(resp.data().errorCode())),
|
||||
ApiKeys.INCREMENTAL_ALTER_CONFIGS -> ((resp: IncrementalAlterConfigsResponse) =>
|
||||
IncrementalAlterConfigsResponse.fromResponseData(resp.data()).get(new ConfigResource(ConfigResource.Type.TOPIC, tp.topic)).error)
|
||||
)
|
||||
|
@ -240,7 +250,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
|
|||
ApiKeys.ALTER_REPLICA_LOG_DIRS -> clusterAlterAcl,
|
||||
ApiKeys.DESCRIBE_LOG_DIRS -> clusterDescribeAcl,
|
||||
ApiKeys.CREATE_PARTITIONS -> topicAlterAcl,
|
||||
ApiKeys.ELECT_PREFERRED_LEADERS -> clusterAlterAcl,
|
||||
ApiKeys.ELECT_LEADERS -> clusterAlterAcl,
|
||||
ApiKeys.INCREMENTAL_ALTER_CONFIGS -> topicAlterConfigsAcl
|
||||
)
|
||||
|
||||
|
@ -464,8 +474,11 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
|
|||
|
||||
private def addOffsetsToTxnRequest = new AddOffsetsToTxnRequest.Builder(transactionalId, 1, 1, group).build()
|
||||
|
||||
private def electPreferredLeadersRequest = new ElectPreferredLeadersRequest.Builder(
|
||||
ElectPreferredLeadersRequest.toRequestData(Collections.singleton(tp), 10000)).build()
|
||||
private def electLeadersRequest = new ElectLeadersRequest.Builder(
|
||||
ElectionType.PREFERRED,
|
||||
Collections.singleton(tp),
|
||||
10000
|
||||
).build()
|
||||
|
||||
@Test
|
||||
def testAuthorizationWithTopicExisting() {
|
||||
|
@ -501,7 +514,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
|
|||
ApiKeys.ADD_OFFSETS_TO_TXN -> addOffsetsToTxnRequest,
|
||||
// Check StopReplica last since some APIs depend on replica availability
|
||||
ApiKeys.STOP_REPLICA -> stopReplicaRequest,
|
||||
ApiKeys.ELECT_PREFERRED_LEADERS -> electPreferredLeadersRequest,
|
||||
ApiKeys.ELECT_LEADERS -> electLeadersRequest,
|
||||
ApiKeys.INCREMENTAL_ALTER_CONFIGS -> incrementalAlterConfigsRequest
|
||||
)
|
||||
|
||||
|
@ -549,7 +562,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
|
|||
ApiKeys.CREATE_PARTITIONS -> createPartitionsRequest,
|
||||
ApiKeys.DELETE_GROUPS -> deleteGroupsRequest,
|
||||
ApiKeys.OFFSET_FOR_LEADER_EPOCH -> offsetsForLeaderEpochRequest,
|
||||
ApiKeys.ELECT_PREFERRED_LEADERS -> electPreferredLeadersRequest
|
||||
ApiKeys.ELECT_LEADERS -> electLeadersRequest
|
||||
)
|
||||
|
||||
for ((key, request) <- requestKeyToRequest) {
|
||||
|
|
|
@ -1,70 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package kafka.admin
|
||||
|
||||
import java.util.Properties
|
||||
|
||||
import kafka.server.{KafkaConfig, KafkaServer}
|
||||
import kafka.utils.{Logging, TestUtils}
|
||||
import kafka.zk.ZooKeeperTestHarness
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.{After, Test}
|
||||
|
||||
import scala.collection.{Map, Set}
|
||||
|
||||
class PreferredReplicaElectionCommandTest extends ZooKeeperTestHarness with Logging {
|
||||
var servers: Seq[KafkaServer] = Seq()
|
||||
|
||||
@After
|
||||
override def tearDown() {
|
||||
TestUtils.shutdownServers(servers)
|
||||
super.tearDown()
|
||||
}
|
||||
|
||||
@Test
|
||||
def testPreferredReplicaJsonData() {
|
||||
// write preferred replica json data to zk path
|
||||
val partitionsForPreferredReplicaElection = Set(new TopicPartition("test", 1), new TopicPartition("test2", 1))
|
||||
PreferredReplicaLeaderElectionCommand.writePreferredReplicaElectionData(zkClient, partitionsForPreferredReplicaElection)
|
||||
// try to read it back and compare with what was written
|
||||
val partitionsUndergoingPreferredReplicaElection = zkClient.getPreferredReplicaElection
|
||||
assertEquals("Preferred replica election ser-de failed", partitionsForPreferredReplicaElection,
|
||||
partitionsUndergoingPreferredReplicaElection)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testBasicPreferredReplicaElection() {
|
||||
val expectedReplicaAssignment = Map(0 -> List(0, 1, 2))
|
||||
val topic = "test"
|
||||
val partition = 0
|
||||
val preferredReplica = 0
|
||||
// create brokers
|
||||
val brokerRack = Map(0 -> "rack0", 1 -> "rack1", 2 -> "rack2")
|
||||
val serverConfigs = TestUtils.createBrokerConfigs(3, zkConnect, false, rackInfo = brokerRack).map(KafkaConfig.fromProps)
|
||||
// create the topic
|
||||
adminZkClient.createTopicWithAssignment(topic, config = new Properties, expectedReplicaAssignment)
|
||||
servers = serverConfigs.reverseMap(s => TestUtils.createServer(s))
|
||||
// broker 2 should be the leader since it was started first
|
||||
val currentLeader = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, partition, oldLeaderOpt = None)
|
||||
// trigger preferred replica election
|
||||
val preferredReplicaElection = new PreferredReplicaLeaderElectionCommand(zkClient, Set(new TopicPartition(topic, partition)))
|
||||
preferredReplicaElection.moveLeaderToPreferredReplica()
|
||||
val newLeader = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, partition, oldLeaderOpt = Some(currentLeader))
|
||||
assertEquals("Preferred replica election failed", preferredReplica, newLeader)
|
||||
}
|
||||
}
|
|
@ -28,7 +28,10 @@ import kafka.server.{KafkaConfig, KafkaServer}
|
|||
import kafka.utils.{Logging, TestUtils, ZkUtils}
|
||||
import kafka.zk.ZooKeeperTestHarness
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.apache.kafka.common.errors.{ClusterAuthorizationException, PreferredLeaderNotAvailableException, TimeoutException, UnknownTopicOrPartitionException}
|
||||
import org.apache.kafka.common.errors.ClusterAuthorizationException
|
||||
import org.apache.kafka.common.errors.PreferredLeaderNotAvailableException
|
||||
import org.apache.kafka.common.errors.TimeoutException
|
||||
import org.apache.kafka.common.errors.UnknownTopicOrPartitionException
|
||||
import org.apache.kafka.common.network.ListenerName
|
||||
import org.junit.Assert._
|
||||
import org.junit.{After, Test}
|
||||
|
@ -65,8 +68,15 @@ class PreferredReplicaLeaderElectionCommandTest extends ZooKeeperTestHarness wit
|
|||
Map(tp -> assigment))
|
||||
}
|
||||
// wait until replica log is created on every broker
|
||||
TestUtils.waitUntilTrue(() => servers.forall(server => partitionsAndAssignments.forall(partitionAndAssignment => server.getLogManager().getLog(partitionAndAssignment._1).isDefined)),
|
||||
"Replicas for topic test not created")
|
||||
TestUtils.waitUntilTrue(
|
||||
() =>
|
||||
servers.forall { server =>
|
||||
partitionsAndAssignments.forall { partitionAndAssignment =>
|
||||
server.getLogManager().getLog(partitionAndAssignment._1).isDefined
|
||||
}
|
||||
},
|
||||
"Replicas for topic test not created"
|
||||
)
|
||||
}
|
||||
|
||||
/** Bounce the given targetServer and wait for all servers to get metadata for the given partition */
|
||||
|
@ -291,8 +301,7 @@ class PreferredReplicaLeaderElectionCommandTest extends ZooKeeperTestHarness wit
|
|||
fail();
|
||||
} catch {
|
||||
case e: AdminCommandFailedException =>
|
||||
assertEquals("1 preferred replica(s) could not be elected", e.getMessage)
|
||||
assertTrue(e.getSuppressed()(0).getMessage.contains("Timed out waiting for a node assignment"))
|
||||
assertEquals("Timeout waiting for election results", e.getMessage)
|
||||
// Check we still have the same leader
|
||||
assertEquals(leader, getLeader(testPartition))
|
||||
} finally {
|
||||
|
@ -318,8 +327,8 @@ class PreferredReplicaLeaderElectionCommandTest extends ZooKeeperTestHarness wit
|
|||
fail();
|
||||
} catch {
|
||||
case e: AdminCommandFailedException =>
|
||||
assertEquals("1 preferred replica(s) could not be elected", e.getMessage)
|
||||
assertTrue(e.getSuppressed()(0).isInstanceOf[ClusterAuthorizationException])
|
||||
assertEquals("Not authorized to perform leader election", e.getMessage)
|
||||
assertTrue(e.getCause().isInstanceOf[ClusterAuthorizationException])
|
||||
// Check we still have the same leader
|
||||
assertEquals(leader, getLeader(testPartition))
|
||||
} finally {
|
||||
|
@ -327,6 +336,37 @@ class PreferredReplicaLeaderElectionCommandTest extends ZooKeeperTestHarness wit
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
def testPreferredReplicaJsonData() {
|
||||
// write preferred replica json data to zk path
|
||||
val partitionsForPreferredReplicaElection = Set(new TopicPartition("test", 1), new TopicPartition("test2", 1))
|
||||
PreferredReplicaLeaderElectionCommand.writePreferredReplicaElectionData(zkClient, partitionsForPreferredReplicaElection)
|
||||
// try to read it back and compare with what was written
|
||||
val partitionsUndergoingPreferredReplicaElection = zkClient.getPreferredReplicaElection
|
||||
assertEquals("Preferred replica election ser-de failed", partitionsForPreferredReplicaElection,
|
||||
partitionsUndergoingPreferredReplicaElection)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testBasicPreferredReplicaElection() {
|
||||
val expectedReplicaAssignment = Map(0 -> List(0, 1, 2))
|
||||
val topic = "test"
|
||||
val partition = 0
|
||||
val preferredReplica = 0
|
||||
// create brokers
|
||||
val brokerRack = Map(0 -> "rack0", 1 -> "rack1", 2 -> "rack2")
|
||||
val serverConfigs = TestUtils.createBrokerConfigs(3, zkConnect, false, rackInfo = brokerRack).map(KafkaConfig.fromProps)
|
||||
// create the topic
|
||||
adminZkClient.createTopicWithAssignment(topic, config = new Properties, expectedReplicaAssignment)
|
||||
servers = serverConfigs.reverseMap(s => TestUtils.createServer(s))
|
||||
// broker 2 should be the leader since it was started first
|
||||
val currentLeader = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, partition, oldLeaderOpt = None)
|
||||
// trigger preferred replica election
|
||||
val preferredReplicaElection = new PreferredReplicaLeaderElectionCommand(zkClient, Set(new TopicPartition(topic, partition)))
|
||||
preferredReplicaElection.moveLeaderToPreferredReplica()
|
||||
val newLeader = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, partition, oldLeaderOpt = Some(currentLeader))
|
||||
assertEquals("Preferred replica election failed", preferredReplica, newLeader)
|
||||
}
|
||||
}
|
||||
|
||||
class PreferredReplicaLeaderElectionCommandTestAuthorizer extends SimpleAclAuthorizer {
|
||||
|
|
|
@ -16,19 +16,21 @@
|
|||
*/
|
||||
package kafka.controller
|
||||
|
||||
import kafka.api.LeaderAndIsr
|
||||
import kafka.common.StateChangeFailedException
|
||||
import kafka.controller.Election._
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
|
||||
import scala.collection.mutable
|
||||
import scala.collection.breakOut
|
||||
|
||||
class MockPartitionStateMachine(controllerContext: ControllerContext,
|
||||
uncleanLeaderElectionEnabled: Boolean)
|
||||
extends PartitionStateMachine(controllerContext) {
|
||||
|
||||
override def handleStateChanges(partitions: Seq[TopicPartition],
|
||||
targetState: PartitionState,
|
||||
leaderElectionStrategy: Option[PartitionLeaderElectionStrategy]): Map[TopicPartition, Throwable] = {
|
||||
override def handleStateChanges(
|
||||
partitions: Seq[TopicPartition],
|
||||
targetState: PartitionState,
|
||||
leaderElectionStrategy: Option[PartitionLeaderElectionStrategy]
|
||||
): Map[TopicPartition, Either[Throwable, LeaderAndIsr]] = {
|
||||
partitions.foreach(partition => controllerContext.putPartitionStateIfNotExists(partition, NonExistentPartition))
|
||||
val (validPartitions, invalidPartitions) = controllerContext.checkValidPartitionStateChange(partitions, targetState)
|
||||
if (invalidPartitions.nonEmpty) {
|
||||
|
@ -47,13 +49,13 @@ class MockPartitionStateMachine(controllerContext: ControllerContext,
|
|||
controllerContext.putPartitionState(partition, targetState)
|
||||
}
|
||||
|
||||
val failedElections = doLeaderElections(partitionsToElectLeader, leaderElectionStrategy.get)
|
||||
val successfulElections = partitionsToElectLeader.filterNot(failedElections.keySet.contains)
|
||||
successfulElections.foreach { partition =>
|
||||
controllerContext.putPartitionState(partition, targetState)
|
||||
val electionResults = doLeaderElections(partitionsToElectLeader, leaderElectionStrategy.get)
|
||||
electionResults.foreach {
|
||||
case (partition, Right(_)) => controllerContext.putPartitionState(partition, targetState)
|
||||
case (_, Left(_)) => // Ignore; No need to update the context if the election failed
|
||||
}
|
||||
|
||||
failedElections
|
||||
electionResults
|
||||
} else {
|
||||
validPartitions.foreach { partition =>
|
||||
controllerContext.putPartitionState(partition, targetState)
|
||||
|
@ -62,9 +64,10 @@ class MockPartitionStateMachine(controllerContext: ControllerContext,
|
|||
}
|
||||
}
|
||||
|
||||
private def doLeaderElections(partitions: Seq[TopicPartition],
|
||||
leaderElectionStrategy: PartitionLeaderElectionStrategy): Map[TopicPartition, Throwable] = {
|
||||
val failedElections = mutable.Map.empty[TopicPartition, Exception]
|
||||
private def doLeaderElections(
|
||||
partitions: Seq[TopicPartition],
|
||||
leaderElectionStrategy: PartitionLeaderElectionStrategy
|
||||
): Map[TopicPartition, Either[Throwable, LeaderAndIsr]] = {
|
||||
val leaderIsrAndControllerEpochPerPartition = partitions.map { partition =>
|
||||
partition -> controllerContext.partitionLeadershipInfo(partition)
|
||||
}
|
||||
|
@ -72,17 +75,19 @@ class MockPartitionStateMachine(controllerContext: ControllerContext,
|
|||
val (invalidPartitionsForElection, validPartitionsForElection) = leaderIsrAndControllerEpochPerPartition.partition { case (_, leaderIsrAndControllerEpoch) =>
|
||||
leaderIsrAndControllerEpoch.controllerEpoch > controllerContext.epoch
|
||||
}
|
||||
invalidPartitionsForElection.foreach { case (partition, leaderIsrAndControllerEpoch) =>
|
||||
|
||||
val failedElections = invalidPartitionsForElection.map { case (partition, leaderIsrAndControllerEpoch) =>
|
||||
val failMsg = s"aborted leader election for partition $partition since the LeaderAndIsr path was " +
|
||||
s"already written by another controller. This probably means that the current controller went through " +
|
||||
s"a soft failure and another controller was elected with epoch ${leaderIsrAndControllerEpoch.controllerEpoch}."
|
||||
failedElections.put(partition, new StateChangeFailedException(failMsg))
|
||||
|
||||
partition -> Left(new StateChangeFailedException(failMsg))
|
||||
}
|
||||
|
||||
val electionResults = leaderElectionStrategy match {
|
||||
case OfflinePartitionLeaderElectionStrategy =>
|
||||
case OfflinePartitionLeaderElectionStrategy(isUnclean) =>
|
||||
val partitionsWithUncleanLeaderElectionState = validPartitionsForElection.map { case (partition, leaderIsrAndControllerEpoch) =>
|
||||
(partition, Some(leaderIsrAndControllerEpoch), uncleanLeaderElectionEnabled)
|
||||
(partition, Some(leaderIsrAndControllerEpoch), isUnclean || uncleanLeaderElectionEnabled)
|
||||
}
|
||||
leaderForOffline(controllerContext, partitionsWithUncleanLeaderElectionState)
|
||||
case ReassignPartitionLeaderElectionStrategy =>
|
||||
|
@ -93,18 +98,22 @@ class MockPartitionStateMachine(controllerContext: ControllerContext,
|
|||
leaderForControlledShutdown(controllerContext, validPartitionsForElection)
|
||||
}
|
||||
|
||||
for (electionResult <- electionResults) {
|
||||
val results: Map[TopicPartition, Either[Exception, LeaderAndIsr]] = electionResults.map { electionResult =>
|
||||
val partition = electionResult.topicPartition
|
||||
electionResult.leaderAndIsr match {
|
||||
val value = electionResult.leaderAndIsr match {
|
||||
case None =>
|
||||
val failMsg = s"Failed to elect leader for partition $partition under strategy $leaderElectionStrategy"
|
||||
failedElections.put(partition, new StateChangeFailedException(failMsg))
|
||||
Left(new StateChangeFailedException(failMsg))
|
||||
case Some(leaderAndIsr) =>
|
||||
val leaderIsrAndControllerEpoch = LeaderIsrAndControllerEpoch(leaderAndIsr, controllerContext.epoch)
|
||||
controllerContext.partitionLeadershipInfo.put(partition, leaderIsrAndControllerEpoch)
|
||||
Right(leaderAndIsr)
|
||||
}
|
||||
}
|
||||
failedElections.toMap
|
||||
|
||||
partition -> value
|
||||
}(breakOut)
|
||||
|
||||
results ++ failedElections
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -20,8 +20,8 @@ import kafka.api.LeaderAndIsr
|
|||
import kafka.log.LogConfig
|
||||
import kafka.server.KafkaConfig
|
||||
import kafka.utils.TestUtils
|
||||
import kafka.zk.{KafkaZkClient, TopicPartitionStateZNode}
|
||||
import kafka.zk.KafkaZkClient.UpdateLeaderAndIsrResult
|
||||
import kafka.zk.{KafkaZkClient, TopicPartitionStateZNode}
|
||||
import kafka.zookeeper._
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.apache.zookeeper.KeeperException.Code
|
||||
|
@ -30,6 +30,7 @@ import org.easymock.EasyMock
|
|||
import org.junit.Assert._
|
||||
import org.junit.{Before, Test}
|
||||
import org.mockito.Mockito
|
||||
import scala.collection.breakOut
|
||||
|
||||
class PartitionStateMachineTest {
|
||||
private var controllerContext: ControllerContext = null
|
||||
|
@ -65,7 +66,11 @@ class PartitionStateMachineTest {
|
|||
|
||||
@Test
|
||||
def testInvalidNonexistentPartitionToOnlinePartitionTransition(): Unit = {
|
||||
partitionStateMachine.handleStateChanges(partitions, OnlinePartition, Option(OfflinePartitionLeaderElectionStrategy))
|
||||
partitionStateMachine.handleStateChanges(
|
||||
partitions,
|
||||
OnlinePartition,
|
||||
Option(OfflinePartitionLeaderElectionStrategy(false))
|
||||
)
|
||||
assertEquals(NonExistentPartition, partitionState(partition))
|
||||
}
|
||||
|
||||
|
@ -88,7 +93,11 @@ class PartitionStateMachineTest {
|
|||
partition, leaderIsrAndControllerEpoch, Seq(brokerId), isNew = true))
|
||||
EasyMock.expect(mockControllerBrokerRequestBatch.sendRequestsToBrokers(controllerEpoch))
|
||||
EasyMock.replay(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
partitionStateMachine.handleStateChanges(partitions, OnlinePartition, Option(OfflinePartitionLeaderElectionStrategy))
|
||||
partitionStateMachine.handleStateChanges(
|
||||
partitions,
|
||||
OnlinePartition,
|
||||
Option(OfflinePartitionLeaderElectionStrategy(false))
|
||||
)
|
||||
EasyMock.verify(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
assertEquals(OnlinePartition, partitionState(partition))
|
||||
}
|
||||
|
@ -104,7 +113,11 @@ class PartitionStateMachineTest {
|
|||
.andThrow(new ZooKeeperClientException("test"))
|
||||
EasyMock.expect(mockControllerBrokerRequestBatch.sendRequestsToBrokers(controllerEpoch))
|
||||
EasyMock.replay(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
partitionStateMachine.handleStateChanges(partitions, OnlinePartition, Option(OfflinePartitionLeaderElectionStrategy))
|
||||
partitionStateMachine.handleStateChanges(
|
||||
partitions,
|
||||
OnlinePartition,
|
||||
Option(OfflinePartitionLeaderElectionStrategy(false))
|
||||
)
|
||||
EasyMock.verify(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
assertEquals(NewPartition, partitionState(partition))
|
||||
}
|
||||
|
@ -120,7 +133,11 @@ class PartitionStateMachineTest {
|
|||
.andReturn(Seq(CreateResponse(Code.NODEEXISTS, null, Some(partition), null, ResponseMetadata(0, 0))))
|
||||
EasyMock.expect(mockControllerBrokerRequestBatch.sendRequestsToBrokers(controllerEpoch))
|
||||
EasyMock.replay(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
partitionStateMachine.handleStateChanges(partitions, OnlinePartition, Option(OfflinePartitionLeaderElectionStrategy))
|
||||
partitionStateMachine.handleStateChanges(
|
||||
partitions,
|
||||
OnlinePartition,
|
||||
Option(OfflinePartitionLeaderElectionStrategy(false))
|
||||
)
|
||||
EasyMock.verify(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
assertEquals(NewPartition, partitionState(partition))
|
||||
}
|
||||
|
@ -157,7 +174,7 @@ class PartitionStateMachineTest {
|
|||
val leaderAndIsrAfterElection = leaderAndIsr.newLeader(brokerId)
|
||||
val updatedLeaderAndIsr = leaderAndIsrAfterElection.withZkVersion(2)
|
||||
EasyMock.expect(mockZkClient.updateLeaderAndIsr(Map(partition -> leaderAndIsrAfterElection), controllerEpoch, controllerContext.epochZkVersion))
|
||||
.andReturn(UpdateLeaderAndIsrResult(Map(partition -> updatedLeaderAndIsr), Seq.empty, Map.empty))
|
||||
.andReturn(UpdateLeaderAndIsrResult(Map(partition -> Right(updatedLeaderAndIsr)), Seq.empty))
|
||||
EasyMock.expect(mockControllerBrokerRequestBatch.addLeaderAndIsrRequestForBrokers(Seq(brokerId),
|
||||
partition, LeaderIsrAndControllerEpoch(updatedLeaderAndIsr, controllerEpoch), Seq(brokerId), isNew = false))
|
||||
EasyMock.expect(mockControllerBrokerRequestBatch.sendRequestsToBrokers(controllerEpoch))
|
||||
|
@ -190,7 +207,7 @@ class PartitionStateMachineTest {
|
|||
val leaderAndIsrAfterElection = leaderAndIsr.newLeaderAndIsr(otherBrokerId, List(otherBrokerId))
|
||||
val updatedLeaderAndIsr = leaderAndIsrAfterElection.withZkVersion(2)
|
||||
EasyMock.expect(mockZkClient.updateLeaderAndIsr(Map(partition -> leaderAndIsrAfterElection), controllerEpoch, controllerContext.epochZkVersion))
|
||||
.andReturn(UpdateLeaderAndIsrResult(Map(partition -> updatedLeaderAndIsr), Seq.empty, Map.empty))
|
||||
.andReturn(UpdateLeaderAndIsrResult(Map(partition -> Right(updatedLeaderAndIsr)), Seq.empty))
|
||||
|
||||
// The leaderAndIsr request should be sent to both brokers, including the shutting down one
|
||||
EasyMock.expect(mockControllerBrokerRequestBatch.addLeaderAndIsrRequestForBrokers(Seq(brokerId, otherBrokerId),
|
||||
|
@ -240,18 +257,89 @@ class PartitionStateMachineTest {
|
|||
.andReturn(Seq(GetDataResponse(Code.OK, null, Some(partition),
|
||||
TopicPartitionStateZNode.encode(leaderIsrAndControllerEpoch), stat, ResponseMetadata(0, 0))))
|
||||
|
||||
EasyMock.expect(mockZkClient.getLogConfigs(Seq.empty, config.originals()))
|
||||
EasyMock.expect(mockZkClient.getLogConfigs(Set.empty, config.originals()))
|
||||
.andReturn((Map(partition.topic -> LogConfig()), Map.empty))
|
||||
val leaderAndIsrAfterElection = leaderAndIsr.newLeader(brokerId)
|
||||
val updatedLeaderAndIsr = leaderAndIsrAfterElection.withZkVersion(2)
|
||||
EasyMock.expect(mockZkClient.updateLeaderAndIsr(Map(partition -> leaderAndIsrAfterElection), controllerEpoch, controllerContext.epochZkVersion))
|
||||
.andReturn(UpdateLeaderAndIsrResult(Map(partition -> updatedLeaderAndIsr), Seq.empty, Map.empty))
|
||||
.andReturn(UpdateLeaderAndIsrResult(Map(partition -> Right(updatedLeaderAndIsr)), Seq.empty))
|
||||
EasyMock.expect(mockControllerBrokerRequestBatch.addLeaderAndIsrRequestForBrokers(Seq(brokerId),
|
||||
partition, LeaderIsrAndControllerEpoch(updatedLeaderAndIsr, controllerEpoch), Seq(brokerId), isNew = false))
|
||||
EasyMock.expect(mockControllerBrokerRequestBatch.sendRequestsToBrokers(controllerEpoch))
|
||||
EasyMock.replay(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
|
||||
partitionStateMachine.handleStateChanges(partitions, OnlinePartition, Option(OfflinePartitionLeaderElectionStrategy))
|
||||
partitionStateMachine.handleStateChanges(
|
||||
partitions,
|
||||
OnlinePartition,
|
||||
Option(OfflinePartitionLeaderElectionStrategy(false))
|
||||
)
|
||||
EasyMock.verify(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
assertEquals(OnlinePartition, partitionState(partition))
|
||||
}
|
||||
|
||||
@Test
|
||||
def testOfflinePartitionToUncleanOnlinePartitionTransition(): Unit = {
|
||||
/* Starting scenario: Leader: X, Isr: [X], Replicas: [X, Y], LiveBrokers: [Y]
|
||||
* Ending scenario: Leader: Y, Isr: [Y], Replicas: [X, Y], LiverBrokers: [Y]
|
||||
*
|
||||
* For the give staring scenario verify that performing an unclean leader
|
||||
* election on the offline partition results on the first live broker getting
|
||||
* elected.
|
||||
*/
|
||||
val leaderBrokerId = brokerId + 1
|
||||
controllerContext.setLiveBrokerAndEpochs(Map(TestUtils.createBrokerAndEpoch(brokerId, "host", 0)))
|
||||
controllerContext.updatePartitionReplicaAssignment(partition, Seq(leaderBrokerId, brokerId))
|
||||
controllerContext.putPartitionState(partition, OfflinePartition)
|
||||
|
||||
val leaderAndIsr = LeaderAndIsr(leaderBrokerId, List(leaderBrokerId))
|
||||
val leaderIsrAndControllerEpoch = LeaderIsrAndControllerEpoch(leaderAndIsr, controllerEpoch)
|
||||
controllerContext.partitionLeadershipInfo.put(partition, leaderIsrAndControllerEpoch)
|
||||
|
||||
EasyMock.expect(mockControllerBrokerRequestBatch.newBatch())
|
||||
EasyMock
|
||||
.expect(mockZkClient.getTopicPartitionStatesRaw(partitions))
|
||||
.andReturn(
|
||||
Seq(
|
||||
GetDataResponse(
|
||||
Code.OK,
|
||||
null,
|
||||
Option(partition),
|
||||
TopicPartitionStateZNode.encode(leaderIsrAndControllerEpoch),
|
||||
new Stat(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
|
||||
ResponseMetadata(0, 0)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
val leaderAndIsrAfterElection = leaderAndIsr.newLeaderAndIsr(brokerId, List(brokerId))
|
||||
val updatedLeaderAndIsr = leaderAndIsrAfterElection.withZkVersion(2)
|
||||
|
||||
EasyMock
|
||||
.expect(
|
||||
mockZkClient.updateLeaderAndIsr(
|
||||
Map(partition -> leaderAndIsrAfterElection),
|
||||
controllerEpoch,
|
||||
controllerContext.epochZkVersion
|
||||
)
|
||||
)
|
||||
.andReturn(UpdateLeaderAndIsrResult(Map(partition -> Right(updatedLeaderAndIsr)), Seq.empty))
|
||||
EasyMock.expect(
|
||||
mockControllerBrokerRequestBatch.addLeaderAndIsrRequestForBrokers(
|
||||
Seq(brokerId),
|
||||
partition,
|
||||
LeaderIsrAndControllerEpoch(updatedLeaderAndIsr, controllerEpoch),
|
||||
Seq(leaderBrokerId, brokerId),
|
||||
false
|
||||
)
|
||||
)
|
||||
EasyMock.expect(mockControllerBrokerRequestBatch.sendRequestsToBrokers(controllerEpoch))
|
||||
EasyMock.replay(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
|
||||
partitionStateMachine.handleStateChanges(
|
||||
partitions,
|
||||
OnlinePartition,
|
||||
Option(OfflinePartitionLeaderElectionStrategy(true))
|
||||
)
|
||||
EasyMock.verify(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
assertEquals(OnlinePartition, partitionState(partition))
|
||||
}
|
||||
|
@ -272,7 +360,11 @@ class PartitionStateMachineTest {
|
|||
EasyMock.expect(mockControllerBrokerRequestBatch.sendRequestsToBrokers(controllerEpoch))
|
||||
EasyMock.replay(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
|
||||
partitionStateMachine.handleStateChanges(partitions, OnlinePartition, Option(OfflinePartitionLeaderElectionStrategy))
|
||||
partitionStateMachine.handleStateChanges(
|
||||
partitions,
|
||||
OnlinePartition,
|
||||
Option(OfflinePartitionLeaderElectionStrategy(false))
|
||||
)
|
||||
EasyMock.verify(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
assertEquals(OfflinePartition, partitionState(partition))
|
||||
}
|
||||
|
@ -295,7 +387,11 @@ class PartitionStateMachineTest {
|
|||
EasyMock.expect(mockControllerBrokerRequestBatch.sendRequestsToBrokers(controllerEpoch))
|
||||
EasyMock.replay(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
|
||||
partitionStateMachine.handleStateChanges(partitions, OnlinePartition, Option(OfflinePartitionLeaderElectionStrategy))
|
||||
partitionStateMachine.handleStateChanges(
|
||||
partitions,
|
||||
OnlinePartition,
|
||||
Option(OfflinePartitionLeaderElectionStrategy(false))
|
||||
)
|
||||
EasyMock.verify(mockZkClient, mockControllerBrokerRequestBatch)
|
||||
assertEquals(OfflinePartition, partitionState(partition))
|
||||
}
|
||||
|
@ -327,18 +423,17 @@ class PartitionStateMachineTest {
|
|||
prepareMockToGetTopicPartitionsStatesRaw()
|
||||
|
||||
def prepareMockToGetLogConfigs(): Unit = {
|
||||
val topicsForPartitionsWithNoLiveInSyncReplicas = Seq()
|
||||
EasyMock.expect(mockZkClient.getLogConfigs(topicsForPartitionsWithNoLiveInSyncReplicas, config.originals()))
|
||||
EasyMock.expect(mockZkClient.getLogConfigs(Set.empty, config.originals()))
|
||||
.andReturn(Map.empty, Map.empty)
|
||||
}
|
||||
prepareMockToGetLogConfigs()
|
||||
|
||||
def prepareMockToUpdateLeaderAndIsr(): Unit = {
|
||||
val updatedLeaderAndIsr = partitions.map { partition =>
|
||||
val updatedLeaderAndIsr: Map[TopicPartition, LeaderAndIsr] = partitions.map { partition =>
|
||||
partition -> leaderAndIsr.newLeaderAndIsr(brokerId, List(brokerId))
|
||||
}.toMap
|
||||
}(breakOut)
|
||||
EasyMock.expect(mockZkClient.updateLeaderAndIsr(updatedLeaderAndIsr, controllerEpoch, controllerContext.epochZkVersion))
|
||||
.andReturn(UpdateLeaderAndIsrResult(updatedLeaderAndIsr, Seq.empty, Map.empty))
|
||||
.andReturn(UpdateLeaderAndIsrResult(updatedLeaderAndIsr.mapValues(Right(_)), Seq.empty))
|
||||
}
|
||||
prepareMockToUpdateLeaderAndIsr()
|
||||
}
|
||||
|
@ -366,7 +461,7 @@ class PartitionStateMachineTest {
|
|||
partitionStateMachine.handleStateChanges(partitions, OfflinePartition)
|
||||
assertEquals(s"There should be ${partitions.size} offline partition(s)", partitions.size, controllerContext.offlinePartitionCount)
|
||||
|
||||
partitionStateMachine.handleStateChanges(partitions, OnlinePartition, Some(OfflinePartitionLeaderElectionStrategy))
|
||||
partitionStateMachine.handleStateChanges(partitions, OnlinePartition, Some(OfflinePartitionLeaderElectionStrategy(false)))
|
||||
assertEquals(s"There should be no offline partition(s)", 0, controllerContext.offlinePartitionCount)
|
||||
}
|
||||
|
||||
|
|
|
@ -222,7 +222,7 @@ class ReplicaStateMachineTest {
|
|||
Seq(GetDataResponse(Code.OK, null, Some(partition),
|
||||
TopicPartitionStateZNode.encode(leaderIsrAndControllerEpoch), stat, ResponseMetadata(0, 0))))
|
||||
EasyMock.expect(mockZkClient.updateLeaderAndIsr(Map(partition -> adjustedLeaderAndIsr), controllerEpoch, controllerContext.epochZkVersion))
|
||||
.andReturn(UpdateLeaderAndIsrResult(Map(partition -> updatedLeaderAndIsr), Seq.empty, Map.empty))
|
||||
.andReturn(UpdateLeaderAndIsrResult(Map(partition -> Right(updatedLeaderAndIsr)), Seq.empty))
|
||||
EasyMock.expect(mockControllerBrokerRequestBatch.addLeaderAndIsrRequestForBrokers(Seq(otherBrokerId),
|
||||
partition, updatedLeaderIsrAndControllerEpoch, replicaIds, isNew = false))
|
||||
EasyMock.expect(mockControllerBrokerRequestBatch.sendRequestsToBrokers(controllerEpoch))
|
||||
|
|
|
@ -201,7 +201,7 @@ object AbstractCoordinatorConcurrencyTest {
|
|||
})
|
||||
}
|
||||
}
|
||||
val producerRequestKeys = entriesPerPartition.keys.map(new TopicPartitionOperationKey(_)).toSeq
|
||||
val producerRequestKeys = entriesPerPartition.keys.map(TopicPartitionOperationKey(_)).toSeq
|
||||
watchKeys ++= producerRequestKeys
|
||||
producePurgatory.tryCompleteElseWatch(delayedProduce, producerRequestKeys)
|
||||
tryCompleteDelayedRequests()
|
||||
|
|
|
@ -659,8 +659,8 @@ class ReplicaManagerTest {
|
|||
purgatoryName = "Fetch", timer, reaperEnabled = false)
|
||||
val mockDeleteRecordsPurgatory = new DelayedOperationPurgatory[DelayedDeleteRecords](
|
||||
purgatoryName = "DeleteRecords", timer, reaperEnabled = false)
|
||||
val mockElectPreferredLeaderPurgatory = new DelayedOperationPurgatory[DelayedElectPreferredLeader](
|
||||
purgatoryName = "ElectPreferredLeader", timer, reaperEnabled = false)
|
||||
val mockElectLeaderPurgatory = new DelayedOperationPurgatory[DelayedElectLeader](
|
||||
purgatoryName = "ElectLeader", timer, reaperEnabled = false)
|
||||
|
||||
// Mock network client to show leader offset of 5
|
||||
val quota = QuotaFactory.instantiate(config, metrics, time, "")
|
||||
|
@ -669,7 +669,7 @@ class ReplicaManagerTest {
|
|||
val replicaManager = new ReplicaManager(config, metrics, time, kafkaZkClient, mockScheduler, mockLogMgr,
|
||||
new AtomicBoolean(false), quota, mockBrokerTopicStats,
|
||||
metadataCache, mockLogDirFailureChannel, mockProducePurgatory, mockFetchPurgatory,
|
||||
mockDeleteRecordsPurgatory, mockElectPreferredLeaderPurgatory, Option(this.getClass.getName)) {
|
||||
mockDeleteRecordsPurgatory, mockElectLeaderPurgatory, Option(this.getClass.getName)) {
|
||||
|
||||
override protected def createReplicaFetcherManager(metrics: Metrics,
|
||||
time: Time,
|
||||
|
@ -819,13 +819,13 @@ class ReplicaManagerTest {
|
|||
purgatoryName = "Fetch", timer, reaperEnabled = false)
|
||||
val mockDeleteRecordsPurgatory = new DelayedOperationPurgatory[DelayedDeleteRecords](
|
||||
purgatoryName = "DeleteRecords", timer, reaperEnabled = false)
|
||||
val mockDelayedElectPreferredLeaderPurgatory = new DelayedOperationPurgatory[DelayedElectPreferredLeader](
|
||||
purgatoryName = "DelayedElectPreferredLeader", timer, reaperEnabled = false)
|
||||
val mockDelayedElectLeaderPurgatory = new DelayedOperationPurgatory[DelayedElectLeader](
|
||||
purgatoryName = "DelayedElectLeader", timer, reaperEnabled = false)
|
||||
|
||||
new ReplicaManager(config, metrics, time, kafkaZkClient, new MockScheduler(time), mockLogMgr,
|
||||
new AtomicBoolean(false), QuotaFactory.instantiate(config, metrics, time, ""), new BrokerTopicStats,
|
||||
metadataCache, new LogDirFailureChannel(config.logDirs.size), mockProducePurgatory, mockFetchPurgatory,
|
||||
mockDeleteRecordsPurgatory, mockDelayedElectPreferredLeaderPurgatory, Option(this.getClass.getName))
|
||||
mockDeleteRecordsPurgatory, mockDelayedElectLeaderPurgatory, Option(this.getClass.getName))
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -21,14 +21,27 @@ import kafka.log.LogConfig
|
|||
import kafka.network.RequestChannel.Session
|
||||
import kafka.security.auth._
|
||||
import kafka.utils.TestUtils
|
||||
import org.apache.kafka.common.acl._
|
||||
import org.apache.kafka.common.ElectionType
|
||||
import org.apache.kafka.common.Node
|
||||
import org.apache.kafka.common.TopicPartition
|
||||
import org.apache.kafka.common.acl.{AccessControlEntry, AccessControlEntryFilter, AclBinding, AclBindingFilter, AclOperation, AclPermissionType}
|
||||
import org.apache.kafka.common.config.ConfigResource
|
||||
import org.apache.kafka.common.message._
|
||||
import org.apache.kafka.common.resource.{PatternType, ResourcePattern, ResourcePatternFilter, ResourceType => AdminResourceType}
|
||||
import org.apache.kafka.common.{Node, TopicPartition}
|
||||
import org.apache.kafka.common.message.ControlledShutdownRequestData
|
||||
import org.apache.kafka.common.message.CreateTopicsRequestData
|
||||
import org.apache.kafka.common.message.CreateTopicsRequestData.{CreatableTopic, CreatableTopicCollection}
|
||||
import org.apache.kafka.common.message.DeleteTopicsRequestData
|
||||
import org.apache.kafka.common.message.DescribeGroupsRequestData
|
||||
import org.apache.kafka.common.message.FindCoordinatorRequestData
|
||||
import org.apache.kafka.common.message.HeartbeatRequestData
|
||||
import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData
|
||||
import org.apache.kafka.common.message.InitProducerIdRequestData
|
||||
import org.apache.kafka.common.message.JoinGroupRequestData
|
||||
import org.apache.kafka.common.message.JoinGroupRequestData.JoinGroupRequestProtocolCollection
|
||||
import org.apache.kafka.common.message.LeaveGroupRequestData
|
||||
import org.apache.kafka.common.message.OffsetCommitRequestData
|
||||
import org.apache.kafka.common.message.SaslAuthenticateRequestData
|
||||
import org.apache.kafka.common.message.SaslHandshakeRequestData
|
||||
import org.apache.kafka.common.message.SyncGroupRequestData
|
||||
import org.apache.kafka.common.metrics.{KafkaMetric, Quota, Sensor}
|
||||
import org.apache.kafka.common.network.ListenerName
|
||||
import org.apache.kafka.common.protocol.ApiKeys
|
||||
|
@ -36,6 +49,7 @@ import org.apache.kafka.common.protocol.types.Struct
|
|||
import org.apache.kafka.common.record._
|
||||
import org.apache.kafka.common.requests.CreateAclsRequest.AclCreation
|
||||
import org.apache.kafka.common.requests._
|
||||
import org.apache.kafka.common.resource.{PatternType, ResourcePattern, ResourcePatternFilter, ResourceType => AdminResourceType}
|
||||
import org.apache.kafka.common.security.auth.{AuthenticationContext, KafkaPrincipal, KafkaPrincipalBuilder, SecurityProtocol}
|
||||
import org.apache.kafka.common.utils.Sanitizer
|
||||
import org.apache.kafka.common.utils.SecurityUtils
|
||||
|
@ -429,14 +443,12 @@ class RequestQuotaTest extends BaseRequestTest {
|
|||
case ApiKeys.DELETE_GROUPS =>
|
||||
new DeleteGroupsRequest.Builder(Collections.singleton("test-group"))
|
||||
|
||||
case ApiKeys.ELECT_PREFERRED_LEADERS =>
|
||||
val partition = new ElectPreferredLeadersRequestData.TopicPartitions()
|
||||
.setPartitionId(Collections.singletonList(0))
|
||||
.setTopic("my_topic")
|
||||
new ElectPreferredLeadersRequest.Builder(
|
||||
new ElectPreferredLeadersRequestData()
|
||||
.setTimeoutMs(0)
|
||||
.setTopicPartitions(Collections.singletonList(partition)))
|
||||
case ApiKeys.ELECT_LEADERS =>
|
||||
new ElectLeadersRequest.Builder(
|
||||
ElectionType.PREFERRED,
|
||||
Collections.singletonList(new TopicPartition("my_topic", 0)),
|
||||
0
|
||||
)
|
||||
|
||||
case ApiKeys.INCREMENTAL_ALTER_CONFIGS =>
|
||||
new IncrementalAlterConfigsRequest.Builder(
|
||||
|
@ -542,7 +554,7 @@ class RequestQuotaTest extends BaseRequestTest {
|
|||
case ApiKeys.RENEW_DELEGATION_TOKEN => new RenewDelegationTokenResponse(response).throttleTimeMs
|
||||
case ApiKeys.DELETE_GROUPS => new DeleteGroupsResponse(response).throttleTimeMs
|
||||
case ApiKeys.OFFSET_FOR_LEADER_EPOCH => new OffsetsForLeaderEpochResponse(response).throttleTimeMs
|
||||
case ApiKeys.ELECT_PREFERRED_LEADERS => new ElectPreferredLeadersResponse(response).throttleTimeMs
|
||||
case ApiKeys.ELECT_LEADERS => new ElectLeadersResponse(response).throttleTimeMs
|
||||
case ApiKeys.INCREMENTAL_ALTER_CONFIGS =>
|
||||
new IncrementalAlterConfigsResponse(response, ApiKeys.INCREMENTAL_ALTER_CONFIGS.latestVersion()).throttleTimeMs
|
||||
case requestId => throw new IllegalArgumentException(s"No throttle time for $requestId")
|
||||
|
|
|
@ -159,7 +159,7 @@ class AdminZkClientTest extends ZooKeeperTestHarness with Logging with RackAware
|
|||
// simulate the ZK interactions that can happen when a topic is concurrently created by multiple processes
|
||||
val zkMock: KafkaZkClient = EasyMock.createNiceMock(classOf[KafkaZkClient])
|
||||
EasyMock.expect(zkMock.topicExists(topic)).andReturn(false)
|
||||
EasyMock.expect(zkMock.getAllTopicsInCluster).andReturn(Seq("some.topic", topic, "some.other.topic"))
|
||||
EasyMock.expect(zkMock.getAllTopicsInCluster).andReturn(Set("some.topic", topic, "some.other.topic"))
|
||||
EasyMock.replay(zkMock)
|
||||
val adminZkClient = new AdminZkClient(zkMock)
|
||||
|
||||
|
|
|
@ -648,19 +648,19 @@ class KafkaZkClientTest extends ZooKeeperTestHarness {
|
|||
val emptyConfig = LogConfig(Collections.emptyMap())
|
||||
assertEquals("Non existent config, no defaults",
|
||||
(Map(topic1 -> emptyConfig), Map.empty),
|
||||
zkClient.getLogConfigs(Seq(topic1), Collections.emptyMap()))
|
||||
zkClient.getLogConfigs(Set(topic1), Collections.emptyMap()))
|
||||
|
||||
val logProps2 = createLogProps(2048)
|
||||
|
||||
zkClient.setOrCreateEntityConfigs(ConfigType.Topic, topic1, logProps)
|
||||
assertEquals("One existing and one non-existent topic",
|
||||
(Map(topic1 -> LogConfig(logProps), topic2 -> emptyConfig), Map.empty),
|
||||
zkClient.getLogConfigs(Seq(topic1, topic2), Collections.emptyMap()))
|
||||
zkClient.getLogConfigs(Set(topic1, topic2), Collections.emptyMap()))
|
||||
|
||||
zkClient.setOrCreateEntityConfigs(ConfigType.Topic, topic2, logProps2)
|
||||
assertEquals("Two existing topics",
|
||||
(Map(topic1 -> LogConfig(logProps), topic2 -> LogConfig(logProps2)), Map.empty),
|
||||
zkClient.getLogConfigs(Seq(topic1, topic2), Collections.emptyMap()))
|
||||
zkClient.getLogConfigs(Set(topic1, topic2), Collections.emptyMap()))
|
||||
|
||||
val logProps1WithMoreValues = createLogProps(1024)
|
||||
logProps1WithMoreValues.put(LogConfig.SegmentJitterMsProp, "100")
|
||||
|
@ -668,7 +668,7 @@ class KafkaZkClientTest extends ZooKeeperTestHarness {
|
|||
|
||||
assertEquals("Config with defaults",
|
||||
(Map(topic1 -> LogConfig(logProps1WithMoreValues)), Map.empty),
|
||||
zkClient.getLogConfigs(Seq(topic1),
|
||||
zkClient.getLogConfigs(Set(topic1),
|
||||
Map[String, AnyRef](LogConfig.SegmentJitterMsProp -> "100", LogConfig.SegmentBytesProp -> "128").asJava))
|
||||
}
|
||||
|
||||
|
@ -794,14 +794,20 @@ class KafkaZkClientTest extends ZooKeeperTestHarness {
|
|||
expectedPartitionsToRetry: Seq[TopicPartition],
|
||||
expectedFailedPartitions: Map[TopicPartition, (Class[_], String)],
|
||||
actualUpdateLeaderAndIsrResult: UpdateLeaderAndIsrResult): Unit = {
|
||||
val failedPartitionsExcerpt =
|
||||
actualUpdateLeaderAndIsrResult.failedPartitions.mapValues(e => (e.getClass, e.getMessage))
|
||||
val failedPartitionsExcerpt = mutable.Map.empty[TopicPartition, (Class[_], String)]
|
||||
val successfulPartitions = mutable.Map.empty[TopicPartition, LeaderAndIsr]
|
||||
|
||||
actualUpdateLeaderAndIsrResult.finishedPartitions.foreach {
|
||||
case (partition, Left(e)) => failedPartitionsExcerpt += partition -> (e.getClass, e.getMessage)
|
||||
case (partition, Right(leaderAndIsr)) => successfulPartitions += partition -> leaderAndIsr
|
||||
}
|
||||
|
||||
assertEquals("Permanently failed updates do not match expected",
|
||||
expectedFailedPartitions, failedPartitionsExcerpt)
|
||||
assertEquals("Retriable updates (due to BADVERSION) do not match expected",
|
||||
expectedPartitionsToRetry, actualUpdateLeaderAndIsrResult.partitionsToRetry)
|
||||
assertEquals("Successful updates do not match expected",
|
||||
expectedSuccessfulPartitions, actualUpdateLeaderAndIsrResult.successfulPartitions)
|
||||
expectedSuccessfulPartitions, successfulPartitions)
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -71,6 +71,12 @@
|
|||
</li>
|
||||
</ol>
|
||||
|
||||
<h5><a id="upgrade_240_notable" href="#upgrade_240_notable">Notable changes in 2.4.0</a></h5>
|
||||
<ul>
|
||||
<li>The <code>bin/kafka-preferred-replica-election.sh</code> command line tool has been deprecated. It has been replaced by <code>bin/kafka-leader-election.sh</code>.</li>
|
||||
<li>The methods <code>electPreferredLeaders</code> in the Java <code>AdminClient</code> class have been deprecated in favor of the methods <code>electLeaders</code>.</li>
|
||||
</ul>
|
||||
|
||||
<h5><a id="upgrade_230_notable" href="#upgrade_230_notable">Notable changes in 2.3.0</a></h5>
|
||||
<ul>
|
||||
<li>
|
||||
|
|
|
@ -100,8 +100,10 @@ For a detailed description of spotbugs bug categories, see https://spotbugs.read
|
|||
</Match>
|
||||
|
||||
<Match>
|
||||
<!-- Add a suppression for the equals() method of NetworkClientBlockingOps. -->
|
||||
<Class name="kafka.utils.NetworkClientBlockingOps"/>
|
||||
<!-- Suppression for the equals() for extensiom methods. -->
|
||||
<Or>
|
||||
<Class name="kafka.api.package$ElectLeadersRequestOps"/>
|
||||
</Or>
|
||||
<Bug pattern="EQ_UNUSUAL"/>
|
||||
</Match>
|
||||
|
||||
|
|
|
@ -272,7 +272,7 @@ public class EmbeddedKafkaCluster extends ExternalResource {
|
|||
* @param timeoutMs the max time to wait for the topics to be deleted (does not block if {@code <= 0})
|
||||
*/
|
||||
public void deleteAllTopicsAndWait(final long timeoutMs) throws InterruptedException {
|
||||
final List<String> topics = JavaConverters.seqAsJavaListConverter(brokers[0].kafkaServer().zkClient().getAllTopicsInCluster()).asJava();
|
||||
final Set<String> topics = JavaConverters.setAsJavaSetConverter(brokers[0].kafkaServer().zkClient().getAllTopicsInCluster()).asJava();
|
||||
for (final String topic : topics) {
|
||||
try {
|
||||
brokers[0].deleteTopic(topic);
|
||||
|
@ -312,7 +312,7 @@ public class EmbeddedKafkaCluster extends ExternalResource {
|
|||
@Override
|
||||
public boolean conditionMet() {
|
||||
final Set<String> allTopics = new HashSet<>(
|
||||
JavaConverters.seqAsJavaListConverter(brokers[0].kafkaServer().zkClient().getAllTopicsInCluster()).asJava());
|
||||
JavaConverters.setAsJavaSetConverter(brokers[0].kafkaServer().zkClient().getAllTopicsInCluster()).asJava());
|
||||
return !allTopics.removeAll(deletedTopics);
|
||||
}
|
||||
}
|
||||
|
@ -326,8 +326,7 @@ public class EmbeddedKafkaCluster extends ExternalResource {
|
|||
|
||||
@Override
|
||||
public boolean conditionMet() {
|
||||
final Set<String> allTopics = new HashSet<>(
|
||||
JavaConverters.seqAsJavaListConverter(brokers[0].kafkaServer().zkClient().getAllTopicsInCluster()).asJava());
|
||||
final Set<String> allTopics = JavaConverters.setAsJavaSetConverter(brokers[0].kafkaServer().zkClient().getAllTopicsInCluster()).asJava();
|
||||
return allTopics.equals(remainingTopics);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue