2015-07-29 08:22:14 +08:00
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
|
|
# this work for additional information regarding copyright ownership.
|
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
# (the "License"); you may not use this file except in compliance with
|
|
|
|
# the License. You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
from ducktape.services.background_thread import BackgroundThreadService
|
2015-08-23 10:23:36 +08:00
|
|
|
from ducktape.utils.util import wait_until
|
2015-10-14 04:54:40 +08:00
|
|
|
from kafkatest.services.performance.jmx_mixin import JmxMixin
|
|
|
|
from kafkatest.services.performance import PerformanceService
|
2015-10-13 08:19:45 +08:00
|
|
|
from kafkatest.utils.security_config import SecurityConfig
|
2015-07-29 08:22:14 +08:00
|
|
|
|
2015-08-12 06:24:52 +08:00
|
|
|
import os
|
2015-08-23 10:23:36 +08:00
|
|
|
import subprocess
|
2015-10-14 04:54:40 +08:00
|
|
|
import itertools
|
2015-07-29 08:22:14 +08:00
|
|
|
|
|
|
|
def is_int(msg):
|
|
|
|
"""Default method used to check whether text pulled from console consumer is a message.
|
|
|
|
|
|
|
|
return int or None
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
return int(msg)
|
|
|
|
except:
|
|
|
|
return None
|
|
|
|
|
|
|
|
"""
|
|
|
|
0.8.2.1 ConsoleConsumer options
|
|
|
|
|
|
|
|
The console consumer is a tool that reads data from Kafka and outputs it to standard output.
|
|
|
|
Option Description
|
|
|
|
------ -----------
|
|
|
|
--blacklist <blacklist> Blacklist of topics to exclude from
|
|
|
|
consumption.
|
|
|
|
--consumer.config <config file> Consumer config properties file.
|
|
|
|
--csv-reporter-enabled If set, the CSV metrics reporter will
|
|
|
|
be enabled
|
|
|
|
--delete-consumer-offsets If specified, the consumer path in
|
|
|
|
zookeeper is deleted when starting up
|
|
|
|
--formatter <class> The name of a class to use for
|
|
|
|
formatting kafka messages for
|
|
|
|
display. (default: kafka.tools.
|
|
|
|
DefaultMessageFormatter)
|
|
|
|
--from-beginning If the consumer does not already have
|
|
|
|
an established offset to consume
|
|
|
|
from, start with the earliest
|
|
|
|
message present in the log rather
|
|
|
|
than the latest message.
|
|
|
|
--max-messages <Integer: num_messages> The maximum number of messages to
|
|
|
|
consume before exiting. If not set,
|
|
|
|
consumption is continual.
|
|
|
|
--metrics-dir <metrics dictory> If csv-reporter-enable is set, and
|
|
|
|
this parameter isset, the csv
|
|
|
|
metrics will be outputed here
|
|
|
|
--property <prop>
|
|
|
|
--skip-message-on-error If there is an error when processing a
|
|
|
|
message, skip it instead of halt.
|
|
|
|
--topic <topic> The topic id to consume on.
|
|
|
|
--whitelist <whitelist> Whitelist of topics to include for
|
|
|
|
consumption.
|
|
|
|
--zookeeper <urls> REQUIRED: The connection string for
|
|
|
|
the zookeeper connection in the form
|
|
|
|
host:port. Multiple URLS can be
|
|
|
|
given to allow fail-over.
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
2015-10-14 04:54:40 +08:00
|
|
|
class ConsoleConsumer(JmxMixin, PerformanceService):
|
2015-08-12 06:24:52 +08:00
|
|
|
# Root directory for persistent output
|
|
|
|
PERSISTENT_ROOT = "/mnt/console_consumer"
|
|
|
|
STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "console_consumer.stdout")
|
|
|
|
STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "console_consumer.stderr")
|
|
|
|
LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs")
|
|
|
|
LOG_FILE = os.path.join(LOG_DIR, "console_consumer.log")
|
|
|
|
LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties")
|
|
|
|
CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "console_consumer.properties")
|
|
|
|
|
2015-07-29 08:22:14 +08:00
|
|
|
logs = {
|
2015-08-12 06:24:52 +08:00
|
|
|
"consumer_stdout": {
|
|
|
|
"path": STDOUT_CAPTURE,
|
|
|
|
"collect_default": False},
|
|
|
|
"consumer_stderr": {
|
|
|
|
"path": STDERR_CAPTURE,
|
|
|
|
"collect_default": False},
|
2015-07-29 08:22:14 +08:00
|
|
|
"consumer_log": {
|
2015-08-12 06:24:52 +08:00
|
|
|
"path": LOG_FILE,
|
2015-07-29 08:22:14 +08:00
|
|
|
"collect_default": True}
|
|
|
|
}
|
|
|
|
|
2015-10-14 04:54:40 +08:00
|
|
|
def __init__(self, context, num_nodes, kafka, topic, security_protocol=None, new_consumer=None, message_validator=None,
|
|
|
|
from_beginning=True, consumer_timeout_ms=None, client_id="console-consumer", jmx_object_names=None, jmx_attributes=[]):
|
2015-07-29 08:22:14 +08:00
|
|
|
"""
|
|
|
|
Args:
|
|
|
|
context: standard context
|
|
|
|
num_nodes: number of nodes to use (this should be 1)
|
|
|
|
kafka: kafka service
|
|
|
|
topic: consume from this topic
|
2015-10-13 08:19:45 +08:00
|
|
|
security_protocol: security protocol for Kafka connections
|
|
|
|
new_consumer: use new Kafka consumer if True
|
2015-07-29 08:22:14 +08:00
|
|
|
message_validator: function which returns message or None
|
|
|
|
from_beginning: consume from beginning if True, else from the end
|
|
|
|
consumer_timeout_ms: corresponds to consumer.timeout.ms. consumer process ends if time between
|
|
|
|
successively consumed messages exceeds this timeout. Setting this and
|
|
|
|
waiting for the consumer to stop is a pretty good way to consume all messages
|
|
|
|
in a topic.
|
|
|
|
"""
|
2015-10-14 04:54:40 +08:00
|
|
|
JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes)
|
|
|
|
PerformanceService.__init__(self, context, num_nodes)
|
2015-07-29 08:22:14 +08:00
|
|
|
self.kafka = kafka
|
2015-10-13 08:19:45 +08:00
|
|
|
self.new_consumer = new_consumer
|
2015-07-29 08:22:14 +08:00
|
|
|
self.args = {
|
|
|
|
'topic': topic,
|
|
|
|
}
|
|
|
|
|
|
|
|
self.consumer_timeout_ms = consumer_timeout_ms
|
|
|
|
|
|
|
|
self.from_beginning = from_beginning
|
|
|
|
self.message_validator = message_validator
|
|
|
|
self.messages_consumed = {idx: [] for idx in range(1, num_nodes + 1)}
|
2015-10-14 04:54:40 +08:00
|
|
|
self.client_id = client_id
|
2015-07-29 08:22:14 +08:00
|
|
|
|
2015-10-13 08:19:45 +08:00
|
|
|
# Process client configuration
|
2015-10-14 04:54:40 +08:00
|
|
|
self.prop_file = self.render('console_consumer.properties', consumer_timeout_ms=self.consumer_timeout_ms, client_id=self.client_id)
|
2015-10-13 08:19:45 +08:00
|
|
|
|
|
|
|
# Add security properties to the config. If security protocol is not specified,
|
|
|
|
# use the default in the template properties.
|
|
|
|
self.security_config = SecurityConfig(security_protocol, self.prop_file)
|
|
|
|
self.security_protocol = self.security_config.security_protocol
|
|
|
|
if self.new_consumer is None:
|
|
|
|
self.new_consumer = self.security_protocol == SecurityConfig.SSL
|
|
|
|
if self.security_protocol == SecurityConfig.SSL and not self.new_consumer:
|
|
|
|
raise Exception("SSL protocol is supported only with the new consumer")
|
|
|
|
self.prop_file += str(self.security_config)
|
|
|
|
|
2015-07-29 08:22:14 +08:00
|
|
|
@property
|
|
|
|
def start_cmd(self):
|
|
|
|
args = self.args.copy()
|
2015-08-12 06:24:52 +08:00
|
|
|
args['zk_connect'] = self.kafka.zk.connect_setting()
|
|
|
|
args['stdout'] = ConsoleConsumer.STDOUT_CAPTURE
|
|
|
|
args['stderr'] = ConsoleConsumer.STDERR_CAPTURE
|
|
|
|
args['config_file'] = ConsoleConsumer.CONFIG_FILE
|
2015-10-14 04:54:40 +08:00
|
|
|
args['jmx_port'] = self.jmx_port
|
2015-08-12 06:24:52 +08:00
|
|
|
|
|
|
|
cmd = "export LOG_DIR=%s;" % ConsoleConsumer.LOG_DIR
|
|
|
|
cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\";" % ConsoleConsumer.LOG4J_CONFIG
|
2015-10-14 04:54:40 +08:00
|
|
|
cmd += " JMX_PORT=%(jmx_port)d /opt/kafka/bin/kafka-console-consumer.sh --topic %(topic)s" \
|
2015-08-12 06:24:52 +08:00
|
|
|
" --consumer.config %(config_file)s" % args
|
2015-07-29 08:22:14 +08:00
|
|
|
|
2015-10-13 08:19:45 +08:00
|
|
|
if self.new_consumer:
|
|
|
|
cmd += " --new-consumer --bootstrap-server %s" % self.kafka.bootstrap_servers()
|
|
|
|
else:
|
|
|
|
cmd += " --zookeeper %(zk_connect)s" % args
|
2015-07-29 08:22:14 +08:00
|
|
|
if self.from_beginning:
|
|
|
|
cmd += " --from-beginning"
|
|
|
|
|
2015-08-12 06:24:52 +08:00
|
|
|
cmd += " 2>> %(stderr)s | tee -a %(stdout)s &" % args
|
2015-07-29 08:22:14 +08:00
|
|
|
return cmd
|
|
|
|
|
2015-08-12 06:24:52 +08:00
|
|
|
def pids(self, node):
|
|
|
|
try:
|
|
|
|
cmd = "ps ax | grep -i console_consumer | grep java | grep -v grep | awk '{print $1}'"
|
|
|
|
pid_arr = [pid for pid in node.account.ssh_capture(cmd, allow_fail=True, callback=int)]
|
|
|
|
return pid_arr
|
2015-08-23 10:23:36 +08:00
|
|
|
except (subprocess.CalledProcessError, ValueError) as e:
|
2015-08-12 06:24:52 +08:00
|
|
|
return []
|
|
|
|
|
|
|
|
def alive(self, node):
|
|
|
|
return len(self.pids(node)) > 0
|
|
|
|
|
2015-07-29 08:22:14 +08:00
|
|
|
def _worker(self, idx, node):
|
2015-08-12 06:24:52 +08:00
|
|
|
node.account.ssh("mkdir -p %s" % ConsoleConsumer.PERSISTENT_ROOT, allow_fail=False)
|
|
|
|
|
2015-10-14 04:54:40 +08:00
|
|
|
# Create and upload config file
|
2015-07-29 08:22:14 +08:00
|
|
|
self.logger.info("console_consumer.properties:")
|
2015-10-13 08:19:45 +08:00
|
|
|
self.logger.info(self.prop_file)
|
|
|
|
node.account.create_file(ConsoleConsumer.CONFIG_FILE, self.prop_file)
|
|
|
|
self.security_config.setup_node(node)
|
2015-08-12 06:24:52 +08:00
|
|
|
|
|
|
|
# Create and upload log properties
|
2015-08-23 10:23:36 +08:00
|
|
|
log_config = self.render('tools_log4j.properties', log_file=ConsoleConsumer.LOG_FILE)
|
2015-08-12 06:24:52 +08:00
|
|
|
node.account.create_file(ConsoleConsumer.LOG4J_CONFIG, log_config)
|
2015-07-29 08:22:14 +08:00
|
|
|
|
|
|
|
# Run and capture output
|
|
|
|
cmd = self.start_cmd
|
|
|
|
self.logger.debug("Console consumer %d command: %s", idx, cmd)
|
2015-10-14 04:54:40 +08:00
|
|
|
|
|
|
|
consumer_output = node.account.ssh_capture(cmd, allow_fail=False)
|
|
|
|
first_line = consumer_output.next()
|
|
|
|
self.start_jmx_tool(idx, node)
|
|
|
|
for line in itertools.chain([first_line], consumer_output):
|
2015-07-29 08:22:14 +08:00
|
|
|
msg = line.strip()
|
2015-08-23 10:23:36 +08:00
|
|
|
if self.message_validator is not None:
|
|
|
|
msg = self.message_validator(msg)
|
2015-07-29 08:22:14 +08:00
|
|
|
if msg is not None:
|
|
|
|
self.messages_consumed[idx].append(msg)
|
|
|
|
|
2015-10-14 04:54:40 +08:00
|
|
|
self.read_jmx_output(idx, node)
|
|
|
|
|
2015-07-29 08:22:14 +08:00
|
|
|
def start_node(self, node):
|
2015-10-14 04:54:40 +08:00
|
|
|
PerformanceService.start_node(self, node)
|
2015-07-29 08:22:14 +08:00
|
|
|
|
|
|
|
def stop_node(self, node):
|
2015-10-14 04:54:40 +08:00
|
|
|
node.account.kill_process("console_consumer", allow_fail=True)
|
2015-08-23 10:23:36 +08:00
|
|
|
wait_until(lambda: not self.alive(node), timeout_sec=10, backoff_sec=.2,
|
|
|
|
err_msg="Timed out waiting for consumer to stop.")
|
2015-07-29 08:22:14 +08:00
|
|
|
|
|
|
|
def clean_node(self, node):
|
2015-08-23 10:23:36 +08:00
|
|
|
if self.alive(node):
|
|
|
|
self.logger.warn("%s %s was still alive at cleanup time. Killing forcefully..." %
|
|
|
|
(self.__class__.__name__, node.account))
|
2015-10-14 04:54:40 +08:00
|
|
|
JmxMixin.clean_node(self, node)
|
|
|
|
PerformanceService.clean_node(self, node)
|
2015-08-12 06:24:52 +08:00
|
|
|
node.account.ssh("rm -rf %s" % ConsoleConsumer.PERSISTENT_ROOT, allow_fail=False)
|
2015-10-13 08:19:45 +08:00
|
|
|
self.security_config.clean_node(node)
|