mirror of https://github.com/apache/kafka.git
MINOR: Optimise performance of `Topic.validate()`
I included a JMH benchmark and the results follow. The implementation in this PR takes no more than 1/10th of the time when compared to trunk. I also included results for an alternative implementation that is a little slower than the one in the PR. Trunk: ```text TopicBenchmark.testValidate topic avgt 15 134.107 ± 3.956 ns/op TopicBenchmark.testValidate longer-topic-name avgt 15 316.241 ± 13.379 ns/op TopicBenchmark.testValidate very-long-topic-name_with_more_text avgt 15 636.026 ± 30.272 ns/op ``` Implementation in the PR: ```text TopicBenchmark.testValidate topic avgt 15 13.153 ± 0.383 ns/op TopicBenchmark.testValidate longer-topic-name avgt 15 26.139 ± 0.896 ns/op TopicBenchmark.testValidate very-long-topic-name.with_more_text avgt 15 44.829 ± 1.390 ns/op ``` Alternative implementation where boolean validChar = Character.isLetterOrDigit(c) || c == '.' || c == '_' || c == '-'; ```text TopicBenchmark.testValidate topic avgt 15 18.883 ± 1.044 ns/op TopicBenchmark.testValidate longer-topic-name avgt 15 36.696 ± 1.220 ns/op TopicBenchmark.testValidate very-long-topic-name_with_more_text avgt 15 65.956 ± 0.669 ns/op ``` Author: Ismael Juma <ismael@juma.me.uk> Reviewers: Guozhang Wang <wangguoz@gmail.com> Closes #3234 from ijuma/optimise-topic-is-valid
This commit is contained in:
parent
8e3ed7028e
commit
39eb31feae
|
@ -40,7 +40,7 @@
|
|||
files=".*/protocol/Errors.java"/>
|
||||
|
||||
<suppress checks="BooleanExpressionComplexity"
|
||||
files="(Utils|KafkaLZ4BlockOutputStream|AclData).java"/>
|
||||
files="(Utils|Topic|KafkaLZ4BlockOutputStream|AclData).java"/>
|
||||
|
||||
<suppress checks="CyclomaticComplexity"
|
||||
files="(ConsumerCoordinator|Fetcher|Sender|KafkaProducer|BufferPool|ConfigDef|RecordAccumulator|SsLTransportLayer|KerberosLogin|AbstractRequest|AbstractResponse|Selector|SslTransportLayer).java"/>
|
||||
|
|
|
@ -21,7 +21,6 @@ import org.apache.kafka.common.utils.Utils;
|
|||
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class Topic {
|
||||
|
||||
|
@ -33,7 +32,6 @@ public class Topic {
|
|||
Utils.mkSet(GROUP_METADATA_TOPIC_NAME, TRANSACTION_STATE_TOPIC_NAME));
|
||||
|
||||
private static final int MAX_NAME_LENGTH = 249;
|
||||
private static final Pattern LEGAL_CHARS_PATTERN = Pattern.compile(LEGAL_CHARS + "+");
|
||||
|
||||
public static void validate(String topic) {
|
||||
if (topic.isEmpty())
|
||||
|
@ -77,6 +75,15 @@ public class Topic {
|
|||
* Valid characters for Kafka topics are the ASCII alphanumerics, '.', '_', and '-'
|
||||
*/
|
||||
static boolean containsValidPattern(String topic) {
|
||||
return LEGAL_CHARS_PATTERN.matcher(topic).matches();
|
||||
for (int i = 0; i < topic.length(); ++i) {
|
||||
char c = topic.charAt(i);
|
||||
|
||||
// We don't use Character.isLetterOrDigit(c) because it's slower
|
||||
boolean validChar = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || c == '.' ||
|
||||
c == '_' || c == '-';
|
||||
if (!validChar)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -164,6 +164,7 @@ For a detailed description of findbugs bug categories, see http://findbugs.sourc
|
|||
benchmarking. -->
|
||||
<Or>
|
||||
<Package name="org.apache.kafka.jmh.cache.generated"/>
|
||||
<Package name="org.apache.kafka.jmh.common.generated"/>
|
||||
<Package name="org.apache.kafka.jmh.record.generated"/>
|
||||
<Package name="org.apache.kafka.jmh.producer.generated"/>
|
||||
</Or>
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.jmh.common;
|
||||
|
||||
import org.apache.kafka.common.internals.Topic;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@State(Scope.Benchmark)
|
||||
@Fork(value = 1)
|
||||
@Warmup(iterations = 5)
|
||||
@Measurement(iterations = 15)
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
public class TopicBenchmark {
|
||||
|
||||
@State(Scope.Thread)
|
||||
public static class BenchState {
|
||||
@Param({"topic", "longer-topic-name", "very-long-topic-name.with_more_text"})
|
||||
public String topicName;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public BenchState testValidate(BenchState state) {
|
||||
// validate doesn't return anything, so return `state` to prevent the JVM from optimising the whole call away
|
||||
Topic.validate(state.topicName);
|
||||
return state;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue