From 39eb31feaeebfb184d98cc5d94da9148c2319d81 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Tue, 6 Jun 2017 03:08:40 +0100 Subject: [PATCH] MINOR: Optimise performance of `Topic.validate()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I included a JMH benchmark and the results follow. The implementation in this PR takes no more than 1/10th of the time when compared to trunk. I also included results for an alternative implementation that is a little slower than the one in the PR. Trunk: ```text TopicBenchmark.testValidate topic avgt 15 134.107 ± 3.956 ns/op TopicBenchmark.testValidate longer-topic-name avgt 15 316.241 ± 13.379 ns/op TopicBenchmark.testValidate very-long-topic-name_with_more_text avgt 15 636.026 ± 30.272 ns/op ``` Implementation in the PR: ```text TopicBenchmark.testValidate topic avgt 15 13.153 ± 0.383 ns/op TopicBenchmark.testValidate longer-topic-name avgt 15 26.139 ± 0.896 ns/op TopicBenchmark.testValidate very-long-topic-name.with_more_text avgt 15 44.829 ± 1.390 ns/op ``` Alternative implementation where boolean validChar = Character.isLetterOrDigit(c) || c == '.' || c == '_' || c == '-'; ```text TopicBenchmark.testValidate topic avgt 15 18.883 ± 1.044 ns/op TopicBenchmark.testValidate longer-topic-name avgt 15 36.696 ± 1.220 ns/op TopicBenchmark.testValidate very-long-topic-name_with_more_text avgt 15 65.956 ± 0.669 ns/op ``` Author: Ismael Juma Reviewers: Guozhang Wang Closes #3234 from ijuma/optimise-topic-is-valid --- checkstyle/suppressions.xml | 2 +- .../apache/kafka/common/internals/Topic.java | 13 +++-- gradle/findbugs-exclude.xml | 1 + .../kafka/jmh/common/TopicBenchmark.java | 53 +++++++++++++++++++ 4 files changed, 65 insertions(+), 4 deletions(-) create mode 100644 jmh-benchmarks/src/main/java/org/apache/kafka/jmh/common/TopicBenchmark.java diff --git a/checkstyle/suppressions.xml b/checkstyle/suppressions.xml index f2fb3d9e5e1..d21daf1348f 100644 --- a/checkstyle/suppressions.xml +++ b/checkstyle/suppressions.xml @@ -40,7 +40,7 @@ files=".*/protocol/Errors.java"/> + files="(Utils|Topic|KafkaLZ4BlockOutputStream|AclData).java"/> diff --git a/clients/src/main/java/org/apache/kafka/common/internals/Topic.java b/clients/src/main/java/org/apache/kafka/common/internals/Topic.java index dd6dbcf3a29..a5ef3357dd1 100644 --- a/clients/src/main/java/org/apache/kafka/common/internals/Topic.java +++ b/clients/src/main/java/org/apache/kafka/common/internals/Topic.java @@ -21,7 +21,6 @@ import org.apache.kafka.common.utils.Utils; import java.util.Collections; import java.util.Set; -import java.util.regex.Pattern; public class Topic { @@ -33,7 +32,6 @@ public class Topic { Utils.mkSet(GROUP_METADATA_TOPIC_NAME, TRANSACTION_STATE_TOPIC_NAME)); private static final int MAX_NAME_LENGTH = 249; - private static final Pattern LEGAL_CHARS_PATTERN = Pattern.compile(LEGAL_CHARS + "+"); public static void validate(String topic) { if (topic.isEmpty()) @@ -77,6 +75,15 @@ public class Topic { * Valid characters for Kafka topics are the ASCII alphanumerics, '.', '_', and '-' */ static boolean containsValidPattern(String topic) { - return LEGAL_CHARS_PATTERN.matcher(topic).matches(); + for (int i = 0; i < topic.length(); ++i) { + char c = topic.charAt(i); + + // We don't use Character.isLetterOrDigit(c) because it's slower + boolean validChar = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || c == '.' || + c == '_' || c == '-'; + if (!validChar) + return false; + } + return true; } } diff --git a/gradle/findbugs-exclude.xml b/gradle/findbugs-exclude.xml index 9262a782fb6..9cb1db88214 100644 --- a/gradle/findbugs-exclude.xml +++ b/gradle/findbugs-exclude.xml @@ -164,6 +164,7 @@ For a detailed description of findbugs bug categories, see http://findbugs.sourc benchmarking. --> + diff --git a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/common/TopicBenchmark.java b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/common/TopicBenchmark.java new file mode 100644 index 00000000000..fde239ea1d1 --- /dev/null +++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/common/TopicBenchmark.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.jmh.common; + +import org.apache.kafka.common.internals.Topic; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.util.concurrent.TimeUnit; + +@State(Scope.Benchmark) +@Fork(value = 1) +@Warmup(iterations = 5) +@Measurement(iterations = 15) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +public class TopicBenchmark { + + @State(Scope.Thread) + public static class BenchState { + @Param({"topic", "longer-topic-name", "very-long-topic-name.with_more_text"}) + public String topicName; + } + + @Benchmark + public BenchState testValidate(BenchState state) { + // validate doesn't return anything, so return `state` to prevent the JVM from optimising the whole call away + Topic.validate(state.topicName); + return state; + } +}