Skip peer discovery clustering tests if multiple Khepri machine versions

... are being used at the same time.

[Why]
Depending on which node clusters with which, a node running an older
version of the Khepri Ra machine may not be able to apply Ra commands
and could be stuck.

There is no real solution and this clearly an unsupported scenario. An
old node won't always be able to join a newer cluster.

[How]
In the testsuites, we skip clustering tests if we detect that multiple
Khepri Ra machine versions are being used.
This commit is contained in:
Jean-Sébastien Pédron 2025-02-12 17:13:24 +01:00
parent e8a302a249
commit 1f1a13521b
No known key found for this signature in database
GPG Key ID: 39E99761A5FD94CC
3 changed files with 53 additions and 8 deletions

View File

@ -173,7 +173,8 @@
user/1,
configured_metadata_store/1,
await_metadata_store_consistent/2
await_metadata_store_consistent/2,
do_nodes_run_same_ra_machine_version/2
]).
%% Internal functions exported to be used by rpc:call/4.
@ -1174,6 +1175,12 @@ ra_last_applied(ServerId) ->
#{last_applied := LastApplied} = ra:key_metrics(ServerId),
LastApplied.
do_nodes_run_same_ra_machine_version(Config, RaMachineMod) ->
[MacVer1 | MacVerN] = MacVers = rpc_all(Config, RaMachineMod, version, []),
ct:pal("Ra machine versions of ~s: ~0p", [RaMachineMod, MacVers]),
is_integer(MacVer1) andalso
lists:all(fun(MacVer) -> MacVer =:= MacVer1 end, MacVerN).
rewrite_node_config_file(Config, Node) ->
NodeConfig = get_node_config(Config, Node),
I = if

View File

@ -83,9 +83,27 @@ init_per_testcase(Testcase, Config)
case Config3 of
_ when is_list(Config3) ->
try
SameMacVer = (
rabbit_ct_broker_helpers:
do_nodes_run_same_ra_machine_version(
Config3, khepri_machine)),
case SameMacVer of
true ->
_ = rabbit_ct_broker_helpers:rpc_all(
Config3, rabbit_peer_discovery_backend, api_version, []),
Config3
Config3,
rabbit_peer_discovery_backend, api_version, []),
Config3;
false ->
Config5 = rabbit_ct_helpers:run_steps(
Config3,
rabbit_ct_client_helpers:teardown_steps()
++
rabbit_ct_broker_helpers:teardown_steps()),
rabbit_ct_helpers:testcase_finished(Config5, Testcase),
{skip,
"Nodes are using different Khepri Ra machine "
"versions; clustering will likely fail"}
end
catch
error:{exception, undef,
[{rabbit_peer_discovery_backend, api_version, _, _}

View File

@ -92,9 +92,27 @@ init_per_testcase(Testcase, Config)
case Config3 of
_ when is_list(Config3) ->
try
SameMacVer = (
rabbit_ct_broker_helpers:
do_nodes_run_same_ra_machine_version(
Config3, khepri_machine)),
case SameMacVer of
true ->
_ = rabbit_ct_broker_helpers:rpc_all(
Config3, rabbit_peer_discovery_backend, api_version, []),
Config3
Config3,
rabbit_peer_discovery_backend, api_version, []),
Config3;
false ->
Config5 = rabbit_ct_helpers:run_steps(
Config3,
rabbit_ct_client_helpers:teardown_steps()
++
rabbit_ct_broker_helpers:teardown_steps()),
rabbit_ct_helpers:testcase_finished(Config5, Testcase),
{skip,
"Nodes are using different Khepri Ra machine "
"versions; clustering will likely fail"}
end
catch
error:{exception, undef,
[{rabbit_peer_discovery_backend, api_version, _, _}
@ -239,7 +257,9 @@ wait_for_etcd(EtcdEndpoints) ->
Timeout = 60000,
rabbit_ct_helpers:await_condition(
fun() ->
case eetcd:open(test, EtcdEndpoints) of
Ret = eetcd:open(test, EtcdEndpoints),
ct:pal("Ret = ~p", [Ret]),
case Ret of
{ok, _Pid} -> true;
_ -> false
end