rabbit_db: `force_reset` command is unsupported with Khepri

[Why]
The `force_reset` command simply removes local files on disk for the
local node.

In the case of Ra, this can't work because the rest of the cluster does
not know about the forced-reset node. Therefore the leader will continue
to send `append_entry` commands to the reset node.

If that forced-reset node restarts and receives these messages, it will
either join the cluster again (because it's on an older Raft term) or it
will hit an assertion and exit (because it's on the same Raft term).

[How]
Given we can't really support this scenario and it has little value, the
command will now return an error if someone attemps a `force_reset` with
a node running Khepri.

This also deprecates the command: once Mnesia support is removed, the
command will be removed at the same time. This is noted in the
rabbitmqctl.8 manpage.
This commit is contained in:
Jean-Sébastien Pédron 2025-02-07 16:44:37 +01:00
parent 211fc5b45f
commit c78aec7d48
No known key found for this signature in database
GPG Key ID: 39E99761A5FD94CC
5 changed files with 19 additions and 44 deletions

View File

@ -346,7 +346,7 @@ next time it is started:
.sp .sp
.Dl rabbitmqctl force_boot .Dl rabbitmqctl force_boot
.\" ------------------------------------------------------------------ .\" ------------------------------------------------------------------
.It Cm force_reset .It Cm force_reset Em (deprecated)
.Pp .Pp
Forcefully returns a RabbitMQ node to its virgin state. Forcefully returns a RabbitMQ node to its virgin state.
.Pp .Pp
@ -359,6 +359,13 @@ management database state and cluster configuration.
It should only be used as a last resort if the database or cluster It should only be used as a last resort if the database or cluster
configuration has been corrupted. configuration has been corrupted.
.Pp .Pp
The
.Cm force_reset
command is
.Sy deprecated .
It remains available when the Mnesia metadata store is used.
It is unsupported with the Khepri metadata store.
.Pp
For For
.Cm reset .Cm reset
and and

View File

@ -163,11 +163,13 @@ force_reset_using_mnesia() ->
#{domain => ?RMQLOG_DOMAIN_DB}), #{domain => ?RMQLOG_DOMAIN_DB}),
rabbit_mnesia:force_reset(). rabbit_mnesia:force_reset().
-spec force_reset_using_khepri() -> no_return().
force_reset_using_khepri() -> force_reset_using_khepri() ->
?LOG_DEBUG( ?LOG_ERROR(
"DB: resetting node forcefully (using Khepri)", "DB: resetting node forcefully is unsupported with Khepri",
#{domain => ?RMQLOG_DOMAIN_DB}), #{domain => ?RMQLOG_DOMAIN_DB}),
rabbit_khepri:force_reset(). throw({error, "Forced reset is unsupported with Khepri"}).
-spec force_load_on_next_boot() -> Ret when -spec force_load_on_next_boot() -> Ret when
Ret :: ok. Ret :: ok.

View File

@ -168,8 +168,7 @@
-export([check_cluster_consistency/0, -export([check_cluster_consistency/0,
check_cluster_consistency/2, check_cluster_consistency/2,
node_info/0]). node_info/0]).
-export([reset/0, -export([reset/0]).
force_reset/0]).
-export([cluster_status_from_khepri/0, -export([cluster_status_from_khepri/0,
cli_cluster_status/0]). cli_cluster_status/0]).
@ -601,23 +600,6 @@ reset() ->
%% @private %% @private
force_reset() ->
case rabbit:is_running() of
false ->
ok = khepri:stop(?RA_CLUSTER_NAME),
DataDir = maps:get(data_dir, ra_system:fetch(?RA_SYSTEM)),
ok = rabbit_ra_systems:ensure_ra_system_stopped(?RA_SYSTEM),
ok = rabbit_file:recursive_delete(
filelib:wildcard(DataDir ++ "/*")),
_ = file:delete(rabbit_guid:filename()),
ok;
true ->
throw({error, rabbitmq_unexpectedly_running})
end.
%% @private
force_shrink_member_to_current_member() -> force_shrink_member_to_current_member() ->
ok = ra_server_proc:force_shrink_members_to_current_member( ok = ra_server_proc:force_shrink_members_to_current_member(
{?RA_CLUSTER_NAME, node()}). {?RA_CLUSTER_NAME, node()}).

View File

@ -953,22 +953,11 @@ force_reset_node_in_khepri(Config) ->
stop_join_start(Config, Rabbit, Hare), stop_join_start(Config, Rabbit, Hare),
stop_app(Config, Rabbit), stop_app(Config, Rabbit),
ok = force_reset(Config, Rabbit), {error, 69, Msg} = force_reset(Config, Rabbit),
assert_cluster_status({[Rabbit, Hare], [Rabbit, Hare], [Hare]}, [Hare]), ?assertEqual(
%% Khepri is stopped, so it won't report anything. match,
assert_status({[Rabbit], [], [Rabbit], [Rabbit], []}, [Rabbit]), re:run(
%% Hare thinks that Rabbit is still clustered Msg, "Forced reset is unsupported with Khepri", [{capture, none}])).
assert_cluster_status({[Rabbit, Hare], [Rabbit, Hare], [Hare]},
[Hare]),
ok = start_app(Config, Rabbit),
assert_not_clustered(Rabbit),
%% We can rejoin Rabbit and Hare. Unlike with Mnesia, we try to solve the
%% inconsistency instead of returning an error.
ok = stop_app(Config, Rabbit),
?assertEqual(ok, join_cluster(Config, Rabbit, Hare, false)),
ok = start_app(Config, Rabbit),
assert_cluster_status({[Rabbit, Hare], [Rabbit, Hare], [Rabbit, Hare]},
[Rabbit, Hare]).
status_with_alarm(Config) -> status_with_alarm(Config) ->
[Rabbit, Hare] = rabbit_ct_broker_helpers:get_node_configs(Config, [Rabbit, Hare] = rabbit_ct_broker_helpers:get_node_configs(Config,

View File

@ -55,7 +55,6 @@
kill_node_after/3, kill_node_after/3,
reset_node/2, reset_node/2,
force_reset_node/2,
forget_cluster_node/3, forget_cluster_node/3,
forget_cluster_node/4, forget_cluster_node/4,
@ -2159,10 +2158,6 @@ reset_node(Config, Node) ->
Name = get_node_config(Config, Node, nodename), Name = get_node_config(Config, Node, nodename),
rabbit_control_helper:command(reset, Name). rabbit_control_helper:command(reset, Name).
force_reset_node(Config, Node) ->
Name = get_node_config(Config, Node, nodename),
rabbit_control_helper:command(force_reset, Name).
forget_cluster_node(Config, Node, NodeToForget) -> forget_cluster_node(Config, Node, NodeToForget) ->
forget_cluster_node(Config, Node, NodeToForget, []). forget_cluster_node(Config, Node, NodeToForget, []).
forget_cluster_node(Config, Node, NodeToForget, Opts) -> forget_cluster_node(Config, Node, NodeToForget, Opts) ->