mirror of https://github.com/apache/kafka.git
MINOR: Optimise performance of `Topic.validate()`
I included a JMH benchmark and the results follow. The implementation in this PR takes no more than 1/10th of the time when compared to trunk. I also included results for an alternative implementation that is a little slower than the one in the PR. Trunk: ```text TopicBenchmark.testValidate topic avgt 15 134.107 ± 3.956 ns/op TopicBenchmark.testValidate longer-topic-name avgt 15 316.241 ± 13.379 ns/op TopicBenchmark.testValidate very-long-topic-name_with_more_text avgt 15 636.026 ± 30.272 ns/op ``` Implementation in the PR: ```text TopicBenchmark.testValidate topic avgt 15 13.153 ± 0.383 ns/op TopicBenchmark.testValidate longer-topic-name avgt 15 26.139 ± 0.896 ns/op TopicBenchmark.testValidate very-long-topic-name.with_more_text avgt 15 44.829 ± 1.390 ns/op ``` Alternative implementation where boolean validChar = Character.isLetterOrDigit(c) || c == '.' || c == '_' || c == '-'; ```text TopicBenchmark.testValidate topic avgt 15 18.883 ± 1.044 ns/op TopicBenchmark.testValidate longer-topic-name avgt 15 36.696 ± 1.220 ns/op TopicBenchmark.testValidate very-long-topic-name_with_more_text avgt 15 65.956 ± 0.669 ns/op ``` Author: Ismael Juma <ismael@juma.me.uk> Reviewers: Guozhang Wang <wangguoz@gmail.com> Closes #3234 from ijuma/optimise-topic-is-valid
This commit is contained in:
parent
8e3ed7028e
commit
39eb31feae
|
@ -40,7 +40,7 @@
|
||||||
files=".*/protocol/Errors.java"/>
|
files=".*/protocol/Errors.java"/>
|
||||||
|
|
||||||
<suppress checks="BooleanExpressionComplexity"
|
<suppress checks="BooleanExpressionComplexity"
|
||||||
files="(Utils|KafkaLZ4BlockOutputStream|AclData).java"/>
|
files="(Utils|Topic|KafkaLZ4BlockOutputStream|AclData).java"/>
|
||||||
|
|
||||||
<suppress checks="CyclomaticComplexity"
|
<suppress checks="CyclomaticComplexity"
|
||||||
files="(ConsumerCoordinator|Fetcher|Sender|KafkaProducer|BufferPool|ConfigDef|RecordAccumulator|SsLTransportLayer|KerberosLogin|AbstractRequest|AbstractResponse|Selector|SslTransportLayer).java"/>
|
files="(ConsumerCoordinator|Fetcher|Sender|KafkaProducer|BufferPool|ConfigDef|RecordAccumulator|SsLTransportLayer|KerberosLogin|AbstractRequest|AbstractResponse|Selector|SslTransportLayer).java"/>
|
||||||
|
|
|
@ -21,7 +21,6 @@ import org.apache.kafka.common.utils.Utils;
|
||||||
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
public class Topic {
|
public class Topic {
|
||||||
|
|
||||||
|
@ -33,7 +32,6 @@ public class Topic {
|
||||||
Utils.mkSet(GROUP_METADATA_TOPIC_NAME, TRANSACTION_STATE_TOPIC_NAME));
|
Utils.mkSet(GROUP_METADATA_TOPIC_NAME, TRANSACTION_STATE_TOPIC_NAME));
|
||||||
|
|
||||||
private static final int MAX_NAME_LENGTH = 249;
|
private static final int MAX_NAME_LENGTH = 249;
|
||||||
private static final Pattern LEGAL_CHARS_PATTERN = Pattern.compile(LEGAL_CHARS + "+");
|
|
||||||
|
|
||||||
public static void validate(String topic) {
|
public static void validate(String topic) {
|
||||||
if (topic.isEmpty())
|
if (topic.isEmpty())
|
||||||
|
@ -77,6 +75,15 @@ public class Topic {
|
||||||
* Valid characters for Kafka topics are the ASCII alphanumerics, '.', '_', and '-'
|
* Valid characters for Kafka topics are the ASCII alphanumerics, '.', '_', and '-'
|
||||||
*/
|
*/
|
||||||
static boolean containsValidPattern(String topic) {
|
static boolean containsValidPattern(String topic) {
|
||||||
return LEGAL_CHARS_PATTERN.matcher(topic).matches();
|
for (int i = 0; i < topic.length(); ++i) {
|
||||||
|
char c = topic.charAt(i);
|
||||||
|
|
||||||
|
// We don't use Character.isLetterOrDigit(c) because it's slower
|
||||||
|
boolean validChar = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || c == '.' ||
|
||||||
|
c == '_' || c == '-';
|
||||||
|
if (!validChar)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -164,6 +164,7 @@ For a detailed description of findbugs bug categories, see http://findbugs.sourc
|
||||||
benchmarking. -->
|
benchmarking. -->
|
||||||
<Or>
|
<Or>
|
||||||
<Package name="org.apache.kafka.jmh.cache.generated"/>
|
<Package name="org.apache.kafka.jmh.cache.generated"/>
|
||||||
|
<Package name="org.apache.kafka.jmh.common.generated"/>
|
||||||
<Package name="org.apache.kafka.jmh.record.generated"/>
|
<Package name="org.apache.kafka.jmh.record.generated"/>
|
||||||
<Package name="org.apache.kafka.jmh.producer.generated"/>
|
<Package name="org.apache.kafka.jmh.producer.generated"/>
|
||||||
</Or>
|
</Or>
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.kafka.jmh.common;
|
||||||
|
|
||||||
|
import org.apache.kafka.common.internals.Topic;
|
||||||
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
|
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||||
|
import org.openjdk.jmh.annotations.Fork;
|
||||||
|
import org.openjdk.jmh.annotations.Measurement;
|
||||||
|
import org.openjdk.jmh.annotations.Mode;
|
||||||
|
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||||
|
import org.openjdk.jmh.annotations.Param;
|
||||||
|
import org.openjdk.jmh.annotations.Scope;
|
||||||
|
import org.openjdk.jmh.annotations.State;
|
||||||
|
import org.openjdk.jmh.annotations.Warmup;
|
||||||
|
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
@State(Scope.Benchmark)
|
||||||
|
@Fork(value = 1)
|
||||||
|
@Warmup(iterations = 5)
|
||||||
|
@Measurement(iterations = 15)
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
|
public class TopicBenchmark {
|
||||||
|
|
||||||
|
@State(Scope.Thread)
|
||||||
|
public static class BenchState {
|
||||||
|
@Param({"topic", "longer-topic-name", "very-long-topic-name.with_more_text"})
|
||||||
|
public String topicName;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public BenchState testValidate(BenchState state) {
|
||||||
|
// validate doesn't return anything, so return `state` to prevent the JVM from optimising the whole call away
|
||||||
|
Topic.validate(state.topicName);
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue