Fetch session optimizations

This commit is contained in:
Lucas Bradstreet 2019-10-01 16:39:42 -07:00
parent bbb1e99940
commit 2614b2417a
3 changed files with 156 additions and 15 deletions

View File

@ -185,7 +185,18 @@ public class FetchSessionHandler {
* Another reason is because we make use of the list ordering to optimize the preparation of
* incremental fetch requests (see below).
*/
private LinkedHashMap<TopicPartition, PartitionData> next = new LinkedHashMap<>();
private LinkedHashMap<TopicPartition, PartitionData> next;
private final boolean copySessionPartitions;
Builder() {
this.next = new LinkedHashMap<>();
this.copySessionPartitions = true;
}
Builder(int initialSize, boolean copySessionPartitions) {
this.next = new LinkedHashMap<>(initialSize);
this.copySessionPartitions = copySessionPartitions;
}
/**
* Mark that we want data from this partition in the upcoming fetch.
@ -207,6 +218,7 @@ public class FetchSessionHandler {
return new FetchRequestData(toSend, Collections.emptyList(), toSend, nextMetadata);
}
LinkedHashMap<TopicPartition, PartitionData> updated = new LinkedHashMap<>();
List<TopicPartition> added = new ArrayList<>();
List<TopicPartition> removed = new ArrayList<>();
List<TopicPartition> altered = new ArrayList<>();
@ -215,16 +227,13 @@ public class FetchSessionHandler {
Entry<TopicPartition, PartitionData> entry = iter.next();
TopicPartition topicPartition = entry.getKey();
PartitionData prevData = entry.getValue();
PartitionData nextData = next.get(topicPartition);
// process from next - removing the entry now so that later only
// added partitions are left
PartitionData nextData = next.remove(topicPartition);
if (nextData != null) {
if (prevData.equals(nextData)) {
// Omit this partition from the FetchRequest, because it hasn't changed
// since the previous request.
next.remove(topicPartition);
} else {
// Move the altered partition to the end of 'next'
next.remove(topicPartition);
next.put(topicPartition, nextData);
if (!prevData.equals(nextData)) {
// partition data was updated
updated.put(topicPartition, nextData);
entry.setValue(nextData);
altered.add(topicPartition);
}
@ -247,6 +256,7 @@ public class FetchSessionHandler {
break;
}
sessionPartitions.put(topicPartition, nextData);
updated.put(topicPartition, nextData);
added.add(topicPartition);
}
if (log.isDebugEnabled()) {
@ -255,10 +265,10 @@ public class FetchSessionHandler {
partitionsToLogString(altered), partitionsToLogString(removed),
partitionsToLogString(sessionPartitions.keySet()));
}
Map<TopicPartition, PartitionData> toSend =
Collections.unmodifiableMap(new LinkedHashMap<>(next));
Map<TopicPartition, PartitionData> curSessionPartitions =
Collections.unmodifiableMap(new LinkedHashMap<>(sessionPartitions));
Map<TopicPartition, PartitionData> toSend = Collections.unmodifiableMap(updated);
Map<TopicPartition, PartitionData> curSessionPartitions = copySessionPartitions
? Collections.unmodifiableMap(new LinkedHashMap<>(sessionPartitions))
: Collections.unmodifiableMap(sessionPartitions);
next = null;
return new FetchRequestData(toSend, Collections.unmodifiableList(removed),
curSessionPartitions, nextMetadata);
@ -269,6 +279,18 @@ public class FetchSessionHandler {
return new Builder();
}
/** A builder that allows for presizing the PartitionData hashmap, and avoiding making a
* secondary copy of the sessionPartitions, in cases where this is not necessarily.
* This builder is primarily for use by the Replica Fetcher
* @param size the initial size of the PartitionData hashmap
* @param copySessionPartitions boolean denoting whether the builder should make a deep copy of
* session partitions
*/
public Builder newBuilder(int size, boolean copySessionPartitions) {
return new Builder(size, copySessionPartitions);
}
private String partitionsToLogString(Collection<TopicPartition> partitions) {
if (!log.isTraceEnabled()) {
return String.format("%d partition(s)", partitions.size());

View File

@ -244,7 +244,7 @@ class ReplicaFetcherThread(name: String,
override def buildFetch(partitionMap: Map[TopicPartition, PartitionFetchState]): ResultWithPartitions[Option[ReplicaFetch]] = {
val partitionsWithError = mutable.Set[TopicPartition]()
val builder = fetchSessionHandler.newBuilder()
val builder = fetchSessionHandler.newBuilder(partitionMap.size, false)
partitionMap.foreach { case (topicPartition, fetchState) =>
// We will not include a replica in the fetch request if it should be throttled.
if (fetchState.isReadyForFetch && !shouldFollowerThrottle(quota, fetchState, topicPartition)) {

View File

@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.jmh.fetchsession;
import org.apache.kafka.clients.FetchSessionHandler;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.protocol.Errors;
import org.apache.kafka.common.record.MemoryRecords;
import org.apache.kafka.common.requests.FetchRequest;
import org.apache.kafka.common.requests.FetchResponse;
import org.apache.kafka.common.utils.LogContext;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 5)
@Measurement(iterations = 10)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public class FetchSessionBenchmark {
private static final LogContext LOG_CONTEXT = new LogContext("[BenchFetchSessionHandler]=");
@Param(value = {"10", "100", "1000"})
private int partitionCount;
@Param(value = {"0", "10", "100", "1000"})
private int updatedPercentage;
@Param(value = {"false", "true"})
private boolean presize;
private LinkedHashMap<TopicPartition, FetchRequest.PartitionData> fetches;
private FetchSessionHandler handler;
@Setup(Level.Trial)
public void setUp() {
fetches = new LinkedHashMap<>();
handler = new FetchSessionHandler(LOG_CONTEXT, 1);
FetchSessionHandler.Builder builder = handler.newBuilder();
LinkedHashMap<TopicPartition, FetchResponse.PartitionData<MemoryRecords>> respMap = new LinkedHashMap<>();
for (int i = 0; i < partitionCount; i++) {
TopicPartition tp = new TopicPartition("foo", i);
FetchRequest.PartitionData partitionData = new FetchRequest.PartitionData(0, 0, 200,
Optional.empty());
fetches.put(tp, partitionData);
builder.add(tp, partitionData);
respMap.put(tp, new FetchResponse.PartitionData<>(
Errors.NONE,
0L,
0L,
0,
null,
null));
}
FetchSessionHandler.FetchRequestData data = builder.build();
// build and handle an initial response so that the next fetch will be incremental
handler.handleResponse(new FetchResponse<>(Errors.NONE, respMap, 0, 1));
int counter = 0;
for (TopicPartition topicPartition: new ArrayList<>(fetches.keySet())) {
if (updatedPercentage != 0 && counter % (100 / updatedPercentage) == 0) {
// reorder in fetch session, and update log start offset
fetches.remove(topicPartition);
fetches.put(topicPartition, new FetchRequest.PartitionData(50, 40, 200,
Optional.empty()));
}
counter++;
}
}
@Benchmark
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public void incrementalFetchSessionBuild() {
FetchSessionHandler.Builder builder;
if (presize)
builder = handler.newBuilder(fetches.size(), true);
else
builder = handler.newBuilder();
for (Map.Entry<TopicPartition, FetchRequest.PartitionData> entry: fetches.entrySet()) {
builder.add(entry.getKey(), entry.getValue());
}
builder.build();
}
}