From 97da746160a7e1f8306991d24cd106a1e5595d98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Mon, 3 Mar 2025 16:56:36 +0100 Subject: [PATCH 1/5] v5_SUITE: Close all connections in `end_per_testcase/2` [Why] Many tests do not clean up their connections if they encounter a failure. This affects subsequent testcases negatively. --- deps/rabbitmq_mqtt/test/v5_SUITE.erl | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/deps/rabbitmq_mqtt/test/v5_SUITE.erl b/deps/rabbitmq_mqtt/test/v5_SUITE.erl index a74cf0277b..44a1950944 100644 --- a/deps/rabbitmq_mqtt/test/v5_SUITE.erl +++ b/deps/rabbitmq_mqtt/test/v5_SUITE.erl @@ -206,10 +206,27 @@ end_per_testcase(T, Config) -> end_per_testcase0(T, Config). end_per_testcase0(Testcase, Config) -> + %% Terminate all connections and wait for sessions to terminate before + %% starting the next test case. + _ = rabbit_ct_broker_helpers:rpc( + Config, 0, + rabbit_networking, close_all_connections, [<<"test finished">>]), + _ = rabbit_ct_broker_helpers:rpc_all( + Config, + rabbit_mqtt, close_local_client_connections, [normal]), + eventually(?_assertEqual( + [], + rpc(Config, rabbit_mqtt, local_connection_pids, []))), %% Assert that every testcase cleaned up their MQTT sessions. + rabbit_ct_broker_helpers:rpc(Config, 0, ?MODULE, delete_queues, []), eventually(?_assertEqual([], rpc(Config, rabbit_amqqueue, list, []))), rabbit_ct_helpers:testcase_finished(Config, Testcase). +delete_queues() -> + _ = [catch rabbit_amqqueue:delete(Q, false, false, <<"test finished">>) + || Q <- rabbit_amqqueue:list()], + ok. + %% ------------------------------------------------------------------- %% Testsuite cases %% ------------------------------------------------------------------- From 28870f380ce8299ecaefd4e3fa1a9cd83bb98d10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Tue, 25 Feb 2025 17:40:01 +0100 Subject: [PATCH 2/5] priority_queue_recovery_SUITE: Add suffix to RabbitMQ node names [Why] This helps debugging. --- deps/rabbit/test/priority_queue_recovery_SUITE.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deps/rabbit/test/priority_queue_recovery_SUITE.erl b/deps/rabbit/test/priority_queue_recovery_SUITE.erl index 9d6e7599da..b8792056d2 100644 --- a/deps/rabbit/test/priority_queue_recovery_SUITE.erl +++ b/deps/rabbit/test/priority_queue_recovery_SUITE.erl @@ -35,8 +35,10 @@ end_per_suite(Config) -> rabbit_ct_helpers:run_teardown_steps(Config). init_per_group(_, Config) -> + Suffix = rabbit_ct_helpers:testcase_absname(Config, "", "-"), Config1 = rabbit_ct_helpers:set_config(Config, [ - {rmq_nodes_count, 2} + {rmq_nodes_count, 2}, + {rmq_nodename_suffix, Suffix} ]), rabbit_ct_helpers:run_steps(Config1, rabbit_ct_broker_helpers:setup_steps() ++ From 43916da581a91fcb6b959cba71bed523daac2ac2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Mon, 3 Mar 2025 10:48:43 +0100 Subject: [PATCH 3/5] logging_SUITE: Increase timetrap to 3 minutes [Why] We sometimes hit the 1-minute timetrap in CI even though the tests are running fine. --- deps/rabbit/test/logging_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/rabbit/test/logging_SUITE.erl b/deps/rabbit/test/logging_SUITE.erl index 696d0b5cde..5e89034a51 100644 --- a/deps/rabbit/test/logging_SUITE.erl +++ b/deps/rabbit/test/logging_SUITE.erl @@ -57,7 +57,7 @@ logging_to_syslog_works/1]). suite() -> - [{timetrap, {minutes, 1}}]. + [{timetrap, {minutes, 3}}]. all() -> [ From 0e7f92aba2292ca117d664e7e67529f118a258ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Thu, 27 Feb 2025 13:24:57 +0100 Subject: [PATCH 4/5] rabbit_stream_SUITE: Increase some timeouts --- .../src/test/java/com/rabbitmq/stream/FailureTest.java | 2 ++ .../src/test/java/com/rabbitmq/stream/LeaderLocatorTest.java | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/FailureTest.java b/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/FailureTest.java index 9ffaa051d7..cb6a80832f 100644 --- a/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/FailureTest.java +++ b/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/FailureTest.java @@ -221,6 +221,7 @@ public class FailureTest { () -> { connected.set(false); + try { Thread.sleep(2000); } catch (Exception e) {} Client locator = cf.get(new Client.ClientParameters().port(streamPortNode2())); // wait until there's a new leader @@ -467,6 +468,7 @@ public class FailureTest { // avoid long-running task in the IO thread executorService.submit( () -> { + try { Thread.sleep(2000); } catch (Exception e) {} Client.StreamMetadata m = metadataClient.metadata(stream).get(stream); int newReplicaPort = m.getReplicas().get(0).getPort(); diff --git a/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/LeaderLocatorTest.java b/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/LeaderLocatorTest.java index f50b194a4f..24718f87b9 100644 --- a/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/LeaderLocatorTest.java +++ b/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/LeaderLocatorTest.java @@ -28,6 +28,7 @@ import com.rabbitmq.stream.impl.Client.ClientParameters; import com.rabbitmq.stream.impl.Client.Response; import com.rabbitmq.stream.impl.Client.StreamMetadata; import java.util.Collections; +import java.time.Duration; import java.util.HashMap; import java.util.Map; import java.util.Set; @@ -57,7 +58,9 @@ public class LeaderLocatorTest { void clientLocalLocatorShouldMakeLeaderOnConnectedNode() { int[] ports = new int[] {TestUtils.streamPortNode1(), TestUtils.streamPortNode2()}; for (int port : ports) { - Client client = cf.get(new Client.ClientParameters().port(port)); + Client client = cf.get(new Client.ClientParameters() + .port(port) + .rpcTimeout(Duration.ofSeconds(30))); String s = UUID.randomUUID().toString(); try { Response response = From 3a278e7e7c48f05fdacdf90018f201b08c281b1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Wed, 26 Feb 2025 14:00:08 +0100 Subject: [PATCH 5/5] rabbitmq-run.mk: Stop node in `start-background-broker` in case of error [Why] The CLI sometimes crashes early because it fails to configure the Erlang distribution. Because we use two CLI commands to watch the start of RabbitMQ, if one of them fails, the Make recipe will exit with an error, leaving the RabbitMQ node running. [How] We use a shell trap to stop the node if the shell is about to exit with an error. While here, we retry the `await_startup` CLI command several times because this is the one failing the most. This is until the crash is understood and a proper fix is committed. --- deps/rabbit_common/mk/rabbitmq-run.mk | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/deps/rabbit_common/mk/rabbitmq-run.mk b/deps/rabbit_common/mk/rabbitmq-run.mk index 926b2b1a51..480b6dd442 100644 --- a/deps/rabbit_common/mk/rabbitmq-run.mk +++ b/deps/rabbit_common/mk/rabbitmq-run.mk @@ -323,10 +323,13 @@ start-background-broker: node-tmpdir $(DIST_TARGET) $(BASIC_SCRIPT_ENV_SETTINGS) \ $(RABBITMQ_SERVER) \ $(REDIRECT_STDIO) & + trap 'test "$$?" = 0 || $(MAKE) stop-node' EXIT && \ ERL_LIBS="$(DIST_ERL_LIBS)" \ $(RABBITMQCTL) -n $(RABBITMQ_NODENAME) wait --timeout $(RMQCTL_WAIT_TIMEOUT) $(RABBITMQ_PID_FILE) && \ - ERL_LIBS="$(DIST_ERL_LIBS)" \ - $(RABBITMQCTL) --node $(RABBITMQ_NODENAME) await_startup + for i in $$(seq 1 10); do \ + ERL_LIBS="$(DIST_ERL_LIBS)" $(RABBITMQCTL) -n $(RABBITMQ_NODENAME) await_startup || sleep 1; \ + done && \ + ERL_LIBS="$(DIST_ERL_LIBS)" $(RABBITMQCTL) -n $(RABBITMQ_NODENAME) await_startup start-rabbit-on-node: $(exec_verbose) ERL_LIBS="$(DIST_ERL_LIBS)" \