mirror of https://github.com/apache/kafka.git
165 lines
6.8 KiB
Python
165 lines
6.8 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from ducktape.tests.test import Test
|
|
from ducktape.mark.resource import cluster
|
|
from ducktape.mark import parametrize, matrix
|
|
from kafkatest.tests.kafka_test import KafkaTest
|
|
|
|
from kafkatest.services.performance.streams_performance import StreamsSimpleBenchmarkService
|
|
from kafkatest.services.zookeeper import ZookeeperService
|
|
from kafkatest.services.kafka import KafkaService
|
|
from kafkatest.version import DEV_BRANCH
|
|
|
|
STREAMS_SIMPLE_TESTS = ["streamprocess", "streamprocesswithsink", "streamprocesswithstatestore", "streamprocesswithwindowstore"]
|
|
STREAMS_COUNT_TESTS = ["streamcount", "streamcountwindowed"]
|
|
STREAMS_JOIN_TESTS = ["streamtablejoin", "streamstreamjoin", "tabletablejoin"]
|
|
NON_STREAMS_TESTS = ["consume", "consumeproduce"]
|
|
|
|
ALL_TEST = "all"
|
|
STREAMS_SIMPLE_TEST = "streams-simple"
|
|
STREAMS_COUNT_TEST = "streams-count"
|
|
STREAMS_JOIN_TEST = "streams-join"
|
|
|
|
|
|
class StreamsSimpleBenchmarkTest(Test):
|
|
"""
|
|
Simple benchmark of Kafka Streams.
|
|
"""
|
|
|
|
def __init__(self, test_context):
|
|
super(StreamsSimpleBenchmarkTest, self).__init__(test_context)
|
|
|
|
# these values could be updated in ad-hoc benchmarks
|
|
self.key_skew = 0
|
|
self.value_size = 1024
|
|
self.num_records = 10000000L
|
|
self.num_threads = 1
|
|
|
|
self.replication = 1
|
|
|
|
@cluster(num_nodes=12)
|
|
@matrix(test=["consume", "consumeproduce",
|
|
"streamprocess", "streamprocesswithsink", "streamprocesswithstatestore", "streamprocesswithwindowstore",
|
|
"streamcount", "streamcountwindowed",
|
|
"streamtablejoin", "streamstreamjoin", "tabletablejoin"],
|
|
scale=[1])
|
|
def test_simple_benchmark(self, test, scale):
|
|
"""
|
|
Run simple Kafka Streams benchmark
|
|
"""
|
|
self.driver = [None] * (scale + 1)
|
|
|
|
self.final = {}
|
|
|
|
#############
|
|
# SETUP PHASE
|
|
#############
|
|
self.zk = ZookeeperService(self.test_context, num_nodes=1)
|
|
self.zk.start()
|
|
self.kafka = KafkaService(self.test_context, num_nodes=scale, zk=self.zk, version=DEV_BRANCH, topics={
|
|
'simpleBenchmarkSourceTopic1' : { 'partitions': scale, 'replication-factor': self.replication },
|
|
'simpleBenchmarkSourceTopic2' : { 'partitions': scale, 'replication-factor': self.replication },
|
|
'simpleBenchmarkSinkTopic' : { 'partitions': scale, 'replication-factor': self.replication },
|
|
'yahooCampaigns' : { 'partitions': 20, 'replication-factor': self.replication },
|
|
'yahooEvents' : { 'partitions': 20, 'replication-factor': self.replication }
|
|
})
|
|
self.kafka.log_level = "INFO"
|
|
self.kafka.start()
|
|
|
|
|
|
load_test = ""
|
|
if test == ALL_TEST:
|
|
load_test = "load-two"
|
|
if test in STREAMS_JOIN_TESTS or test == STREAMS_JOIN_TEST:
|
|
load_test = "load-two"
|
|
if test in STREAMS_COUNT_TESTS or test == STREAMS_COUNT_TEST:
|
|
load_test = "load-one"
|
|
if test in STREAMS_SIMPLE_TESTS or test == STREAMS_SIMPLE_TEST:
|
|
load_test = "load-one"
|
|
if test in NON_STREAMS_TESTS:
|
|
load_test = "load-one"
|
|
|
|
|
|
|
|
################
|
|
# LOAD PHASE
|
|
################
|
|
self.load_driver = StreamsSimpleBenchmarkService(self.test_context,
|
|
self.kafka,
|
|
load_test,
|
|
self.num_threads,
|
|
self.num_records,
|
|
self.key_skew,
|
|
self.value_size)
|
|
|
|
self.load_driver.start()
|
|
self.load_driver.wait(3600) # wait at most 30 minutes
|
|
self.load_driver.stop()
|
|
|
|
if test == ALL_TEST:
|
|
for single_test in STREAMS_SIMPLE_TESTS + STREAMS_COUNT_TESTS + STREAMS_JOIN_TESTS:
|
|
self.execute(single_test, scale)
|
|
elif test == STREAMS_SIMPLE_TEST:
|
|
for single_test in STREAMS_SIMPLE_TESTS:
|
|
self.execute(single_test, scale)
|
|
elif test == STREAMS_COUNT_TEST:
|
|
for single_test in STREAMS_COUNT_TESTS:
|
|
self.execute(single_test, scale)
|
|
elif test == STREAMS_JOIN_TEST:
|
|
for single_test in STREAMS_JOIN_TESTS:
|
|
self.execute(single_test, scale)
|
|
else:
|
|
self.execute(test, scale)
|
|
|
|
return self.final
|
|
|
|
def execute(self, test, scale):
|
|
|
|
################
|
|
# RUN PHASE
|
|
################
|
|
for num in range(0, scale):
|
|
self.driver[num] = StreamsSimpleBenchmarkService(self.test_context,
|
|
self.kafka,
|
|
test,
|
|
self.num_threads,
|
|
self.num_records,
|
|
self.key_skew,
|
|
self.value_size)
|
|
self.driver[num].start()
|
|
|
|
#######################
|
|
# STOP + COLLECT PHASE
|
|
#######################
|
|
data = [None] * (scale)
|
|
|
|
for num in range(0, scale):
|
|
self.driver[num].wait()
|
|
self.driver[num].stop()
|
|
self.driver[num].node.account.ssh("grep Performance %s" % self.driver[num].STDOUT_FILE, allow_fail=False)
|
|
data[num] = self.driver[num].collect_data(self.driver[num].node, "")
|
|
self.driver[num].read_jmx_output_all_nodes()
|
|
|
|
for num in range(0, scale):
|
|
for key in data[num]:
|
|
self.final[key + "-" + str(num)] = data[num][key]
|
|
|
|
for key in sorted(self.driver[num].jmx_stats[0]):
|
|
self.logger.info("%s: %s" % (key, self.driver[num].jmx_stats[0][key]))
|
|
|
|
self.final[test + "-jmx-avg-" + str(num)] = self.driver[num].average_jmx_value
|
|
self.final[test + "-jmx-max-" + str(num)] = self.driver[num].maximum_jmx_value
|