KAFKA-13231; `TransactionalMessageCopier.start_node` should wait until the process if fully started (#11264)

This patch ensures that the transaction message copier is fully started in `start_node`. Without this, it is possible that `stop_node` is called before the process is started which results in not stopping it at all.

Reviewers: Jason Gustafson <jason@confluent.io>
This commit is contained in:
David Jacot 2021-08-27 08:28:14 +02:00 committed by GitHub
parent 8d5185d976
commit c4e1e23857
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 5 additions and 1 deletions

View File

@ -159,12 +159,16 @@ class TransactionalMessageCopier(KafkaPathResolverMixin, BackgroundThreadService
def alive(self, node):
return len(self.pids(node)) > 0
def start_node(self, node):
BackgroundThreadService.start_node(self, node)
wait_until(lambda: self.alive(node), timeout_sec=60, err_msg="Node %s: Message Copier failed to start" % str(node.account))
def kill_node(self, node, clean_shutdown=True):
pids = self.pids(node)
sig = signal.SIGTERM if clean_shutdown else signal.SIGKILL
for pid in pids:
node.account.signal(pid, sig)
wait_until(lambda: len(self.pids(node)) == 0, timeout_sec=60, err_msg="Message Copier failed to stop")
wait_until(lambda: len(self.pids(node)) == 0, timeout_sec=60, err_msg="Node %s: Message Copier failed to stop" % str(node.account))
def stop_node(self, node, clean_shutdown=True):
self.kill_node(node, clean_shutdown)