kafka/tests/kafkatest/benchmarks/streams/streams_simple_benchmark_te...

165 lines
6.8 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ducktape.tests.test import Test
from ducktape.mark.resource import cluster
from ducktape.mark import parametrize, matrix
from kafkatest.tests.kafka_test import KafkaTest
from kafkatest.services.performance.streams_performance import StreamsSimpleBenchmarkService
from kafkatest.services.zookeeper import ZookeeperService
from kafkatest.services.kafka import KafkaService
from kafkatest.version import DEV_BRANCH
STREAMS_SIMPLE_TESTS = ["streamprocess", "streamprocesswithsink", "streamprocesswithstatestore", "streamprocesswithwindowstore"]
STREAMS_COUNT_TESTS = ["streamcount", "streamcountwindowed"]
STREAMS_JOIN_TESTS = ["streamtablejoin", "streamstreamjoin", "tabletablejoin"]
NON_STREAMS_TESTS = ["consume", "consumeproduce"]
ALL_TEST = "all"
STREAMS_SIMPLE_TEST = "streams-simple"
STREAMS_COUNT_TEST = "streams-count"
STREAMS_JOIN_TEST = "streams-join"
class StreamsSimpleBenchmarkTest(Test):
"""
Simple benchmark of Kafka Streams.
"""
def __init__(self, test_context):
super(StreamsSimpleBenchmarkTest, self).__init__(test_context)
# these values could be updated in ad-hoc benchmarks
self.key_skew = 0
self.value_size = 1024
self.num_records = 10000000L
self.num_threads = 1
self.replication = 1
@cluster(num_nodes=12)
@matrix(test=["consume", "consumeproduce",
"streamprocess", "streamprocesswithsink", "streamprocesswithstatestore", "streamprocesswithwindowstore",
"streamcount", "streamcountwindowed",
"streamtablejoin", "streamstreamjoin", "tabletablejoin"],
scale=[1])
def test_simple_benchmark(self, test, scale):
"""
Run simple Kafka Streams benchmark
"""
self.driver = [None] * (scale + 1)
self.final = {}
#############
# SETUP PHASE
#############
self.zk = ZookeeperService(self.test_context, num_nodes=1)
self.zk.start()
self.kafka = KafkaService(self.test_context, num_nodes=scale, zk=self.zk, version=DEV_BRANCH, topics={
'simpleBenchmarkSourceTopic1' : { 'partitions': scale, 'replication-factor': self.replication },
'simpleBenchmarkSourceTopic2' : { 'partitions': scale, 'replication-factor': self.replication },
'simpleBenchmarkSinkTopic' : { 'partitions': scale, 'replication-factor': self.replication },
'yahooCampaigns' : { 'partitions': 20, 'replication-factor': self.replication },
'yahooEvents' : { 'partitions': 20, 'replication-factor': self.replication }
})
self.kafka.log_level = "INFO"
self.kafka.start()
load_test = ""
if test == ALL_TEST:
load_test = "load-two"
if test in STREAMS_JOIN_TESTS or test == STREAMS_JOIN_TEST:
load_test = "load-two"
if test in STREAMS_COUNT_TESTS or test == STREAMS_COUNT_TEST:
load_test = "load-one"
if test in STREAMS_SIMPLE_TESTS or test == STREAMS_SIMPLE_TEST:
load_test = "load-one"
if test in NON_STREAMS_TESTS:
load_test = "load-one"
################
# LOAD PHASE
################
self.load_driver = StreamsSimpleBenchmarkService(self.test_context,
self.kafka,
load_test,
self.num_threads,
self.num_records,
self.key_skew,
self.value_size)
self.load_driver.start()
self.load_driver.wait(3600) # wait at most 30 minutes
self.load_driver.stop()
if test == ALL_TEST:
for single_test in STREAMS_SIMPLE_TESTS + STREAMS_COUNT_TESTS + STREAMS_JOIN_TESTS:
self.execute(single_test, scale)
elif test == STREAMS_SIMPLE_TEST:
for single_test in STREAMS_SIMPLE_TESTS:
self.execute(single_test, scale)
elif test == STREAMS_COUNT_TEST:
for single_test in STREAMS_COUNT_TESTS:
self.execute(single_test, scale)
elif test == STREAMS_JOIN_TEST:
for single_test in STREAMS_JOIN_TESTS:
self.execute(single_test, scale)
else:
self.execute(test, scale)
return self.final
def execute(self, test, scale):
################
# RUN PHASE
################
for num in range(0, scale):
self.driver[num] = StreamsSimpleBenchmarkService(self.test_context,
self.kafka,
test,
self.num_threads,
self.num_records,
self.key_skew,
self.value_size)
self.driver[num].start()
#######################
# STOP + COLLECT PHASE
#######################
data = [None] * (scale)
for num in range(0, scale):
self.driver[num].wait()
self.driver[num].stop()
self.driver[num].node.account.ssh("grep Performance %s" % self.driver[num].STDOUT_FILE, allow_fail=False)
data[num] = self.driver[num].collect_data(self.driver[num].node, "")
self.driver[num].read_jmx_output_all_nodes()
for num in range(0, scale):
for key in data[num]:
self.final[key + "-" + str(num)] = data[num][key]
for key in sorted(self.driver[num].jmx_stats[0]):
self.logger.info("%s: %s" % (key, self.driver[num].jmx_stats[0][key]))
self.final[test + "-jmx-avg-" + str(num)] = self.driver[num].average_jmx_value
self.final[test + "-jmx-max-" + str(num)] = self.driver[num].maximum_jmx_value