MINOR: Optimise performance of `Topic.validate()`

I included a JMH benchmark and the results follow. The
implementation in this PR takes no more than 1/10th
of the time when compared to trunk. I also included
results for an alternative implementation that is a little
slower than the one in the PR.

Trunk:
```text
TopicBenchmark.testValidate                                topic  avgt   15  134.107 ±  3.956  ns/op
TopicBenchmark.testValidate                    longer-topic-name  avgt   15  316.241 ± 13.379  ns/op
TopicBenchmark.testValidate  very-long-topic-name_with_more_text  avgt   15  636.026 ± 30.272  ns/op
```

Implementation in the PR:
```text
TopicBenchmark.testValidate                                topic  avgt   15  13.153 ± 0.383  ns/op
TopicBenchmark.testValidate                    longer-topic-name  avgt   15  26.139 ± 0.896  ns/op
TopicBenchmark.testValidate  very-long-topic-name.with_more_text  avgt   15  44.829 ± 1.390  ns/op
```

Alternative implementation where boolean validChar = Character.isLetterOrDigit(c) || c == '.' || c == '_' || c == '-';
```text
TopicBenchmark.testValidate                                topic  avgt   15  18.883 ± 1.044  ns/op
TopicBenchmark.testValidate                    longer-topic-name  avgt   15  36.696 ± 1.220  ns/op
TopicBenchmark.testValidate  very-long-topic-name_with_more_text  avgt   15  65.956 ± 0.669  ns/op
```

Author: Ismael Juma <ismael@juma.me.uk>

Reviewers: Guozhang Wang <wangguoz@gmail.com>

Closes #3234 from ijuma/optimise-topic-is-valid
This commit is contained in:
Ismael Juma 2017-06-06 03:08:40 +01:00
parent 8e3ed7028e
commit 39eb31feae
4 changed files with 65 additions and 4 deletions

View File

@ -40,7 +40,7 @@
files=".*/protocol/Errors.java"/>
<suppress checks="BooleanExpressionComplexity"
files="(Utils|KafkaLZ4BlockOutputStream|AclData).java"/>
files="(Utils|Topic|KafkaLZ4BlockOutputStream|AclData).java"/>
<suppress checks="CyclomaticComplexity"
files="(ConsumerCoordinator|Fetcher|Sender|KafkaProducer|BufferPool|ConfigDef|RecordAccumulator|SsLTransportLayer|KerberosLogin|AbstractRequest|AbstractResponse|Selector|SslTransportLayer).java"/>

View File

@ -21,7 +21,6 @@ import org.apache.kafka.common.utils.Utils;
import java.util.Collections;
import java.util.Set;
import java.util.regex.Pattern;
public class Topic {
@ -33,7 +32,6 @@ public class Topic {
Utils.mkSet(GROUP_METADATA_TOPIC_NAME, TRANSACTION_STATE_TOPIC_NAME));
private static final int MAX_NAME_LENGTH = 249;
private static final Pattern LEGAL_CHARS_PATTERN = Pattern.compile(LEGAL_CHARS + "+");
public static void validate(String topic) {
if (topic.isEmpty())
@ -77,6 +75,15 @@ public class Topic {
* Valid characters for Kafka topics are the ASCII alphanumerics, '.', '_', and '-'
*/
static boolean containsValidPattern(String topic) {
return LEGAL_CHARS_PATTERN.matcher(topic).matches();
for (int i = 0; i < topic.length(); ++i) {
char c = topic.charAt(i);
// We don't use Character.isLetterOrDigit(c) because it's slower
boolean validChar = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || c == '.' ||
c == '_' || c == '-';
if (!validChar)
return false;
}
return true;
}
}

View File

@ -164,6 +164,7 @@ For a detailed description of findbugs bug categories, see http://findbugs.sourc
benchmarking. -->
<Or>
<Package name="org.apache.kafka.jmh.cache.generated"/>
<Package name="org.apache.kafka.jmh.common.generated"/>
<Package name="org.apache.kafka.jmh.record.generated"/>
<Package name="org.apache.kafka.jmh.producer.generated"/>
</Or>

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.jmh.common;
import org.apache.kafka.common.internals.Topic;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.util.concurrent.TimeUnit;
@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 5)
@Measurement(iterations = 15)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public class TopicBenchmark {
@State(Scope.Thread)
public static class BenchState {
@Param({"topic", "longer-topic-name", "very-long-topic-name.with_more_text"})
public String topicName;
}
@Benchmark
public BenchState testValidate(BenchState state) {
// validate doesn't return anything, so return `state` to prevent the JVM from optimising the whole call away
Topic.validate(state.topicName);
return state;
}
}