See #7209. Evaluate quorum queue membership periodically.
This commit is contained in:
parent
7037c887f3
commit
559a83d45f
|
@ -5,6 +5,7 @@ load("//:rabbitmq_home.bzl", "rabbitmq_home")
|
|||
load("//:rabbitmq_run.bzl", "rabbitmq_run")
|
||||
load(
|
||||
"//:rabbitmq.bzl",
|
||||
"ENABLE_FEATURE_MAYBE_EXPR",
|
||||
"RABBITMQ_DIALYZER_OPTS",
|
||||
"assert_suites",
|
||||
"rabbitmq_app",
|
||||
|
@ -330,6 +331,14 @@ rabbitmq_integration_suite(
|
|||
size = "medium",
|
||||
)
|
||||
|
||||
rabbitmq_integration_suite(
|
||||
name = "quorum_queue_member_reconciliation_SUITE",
|
||||
size = "medium",
|
||||
additional_beam = [
|
||||
":test_quorum_queue_utils_beam",
|
||||
],
|
||||
)
|
||||
|
||||
rabbitmq_integration_suite(
|
||||
name = "clustering_management_SUITE",
|
||||
size = "large",
|
||||
|
@ -665,6 +674,7 @@ rabbitmq_integration_suite(
|
|||
additional_beam = [
|
||||
":test_quorum_queue_utils_beam",
|
||||
],
|
||||
flaky = True,
|
||||
shard_count = 6,
|
||||
)
|
||||
|
||||
|
@ -1230,5 +1240,6 @@ eunit(
|
|||
":test_test_util_beam",
|
||||
":test_test_rabbit_event_handler_beam",
|
||||
],
|
||||
erl_extra_args = [ENABLE_FEATURE_MAYBE_EXPR],
|
||||
target = ":test_erlang_app",
|
||||
)
|
||||
|
|
|
@ -192,6 +192,7 @@ def all_beam_files(name = "all_beam_files"):
|
|||
"src/rabbit_queue_type_util.erl",
|
||||
"src/rabbit_quorum_memory_manager.erl",
|
||||
"src/rabbit_quorum_queue.erl",
|
||||
"src/rabbit_quorum_queue_periodic_membership_reconciliation.erl",
|
||||
"src/rabbit_ra_registry.erl",
|
||||
"src/rabbit_ra_systems.erl",
|
||||
"src/rabbit_reader.erl",
|
||||
|
@ -436,6 +437,7 @@ def all_test_beam_files(name = "all_test_beam_files"):
|
|||
"src/rabbit_queue_type_util.erl",
|
||||
"src/rabbit_quorum_memory_manager.erl",
|
||||
"src/rabbit_quorum_queue.erl",
|
||||
"src/rabbit_quorum_queue_periodic_membership_reconciliation.erl",
|
||||
"src/rabbit_ra_registry.erl",
|
||||
"src/rabbit_ra_systems.erl",
|
||||
"src/rabbit_reader.erl",
|
||||
|
@ -698,6 +700,7 @@ def all_srcs(name = "all_srcs"):
|
|||
"src/rabbit_queue_type_util.erl",
|
||||
"src/rabbit_quorum_memory_manager.erl",
|
||||
"src/rabbit_quorum_queue.erl",
|
||||
"src/rabbit_quorum_queue_periodic_membership_reconciliation.erl",
|
||||
"src/rabbit_ra_registry.erl",
|
||||
"src/rabbit_ra_systems.erl",
|
||||
"src/rabbit_reader.erl",
|
||||
|
@ -802,6 +805,7 @@ def test_suite_beam_files(name = "test_suite_beam_files"):
|
|||
erlc_opts = "//:test_erlc_opts",
|
||||
deps = ["//deps/amqp_client:erlang_app"],
|
||||
)
|
||||
|
||||
erlang_bytecode(
|
||||
name = "cluster_rename_SUITE_beam_files",
|
||||
testonly = True,
|
||||
|
@ -1972,3 +1976,12 @@ def test_suite_beam_files(name = "test_suite_beam_files"):
|
|||
erlc_opts = "//:test_erlc_opts",
|
||||
deps = ["//deps/amqp_client:erlang_app"],
|
||||
)
|
||||
erlang_bytecode(
|
||||
name = "quorum_queue_member_reconciliation_SUITE_beam_files",
|
||||
testonly = True,
|
||||
srcs = ["test/quorum_queue_member_reconciliation_SUITE.erl"],
|
||||
outs = ["test/quorum_queue_member_reconciliation_SUITE.beam"],
|
||||
app_name = "rabbit",
|
||||
erlc_opts = "//:test_erlc_opts",
|
||||
deps = ["//deps/amqp_client:erlang_app", "//deps/rabbitmq_ct_helpers:erlang_app"],
|
||||
)
|
||||
|
|
|
@ -2487,6 +2487,30 @@ end}.
|
|||
{mapping, "quorum_queue.property_equivalence.relaxed_checks_on_redeclaration", "rabbit.quorum_relaxed_checks_on_redeclaration", [
|
||||
{datatype, {enum, [true, false]}}]}.
|
||||
|
||||
|
||||
%%
|
||||
%% Quorum Queue membership reconciliation
|
||||
%%
|
||||
|
||||
{mapping, "quorum_queue.continuous_membership_reconciliation.enabled", "rabbit.quorum_membership_reconciliation_enabled", [
|
||||
{datatype, {enum, [true, false]}}]}.
|
||||
|
||||
{mapping, "quorum_queue.continuous_membership_reconciliation.auto_remove", "rabbit.quorum_membership_reconciliation_auto_remove", [
|
||||
{datatype, {enum, [true, false]}}]}.
|
||||
|
||||
{mapping, "quorum_queue.continuous_membership_reconciliation.interval", "rabbit.quorum_membership_reconciliation_interval", [
|
||||
{datatype, integer}, {validators, ["non_negative_integer"]}
|
||||
]}.
|
||||
|
||||
{mapping, "quorum_queue.continuous_membership_reconciliation.trigger_interval", "rabbit.quorum_membership_reconciliation_trigger_interval", [
|
||||
{datatype, integer}, {validators, ["non_negative_integer"]}
|
||||
]}.
|
||||
|
||||
{mapping, "quorum_queue.continuous_membership_reconciliation.target_group_size", "rabbit.quorum_membership_reconciliation_target_group_size", [
|
||||
{datatype, integer}, {validators, ["non_negative_integer"]}
|
||||
]}.
|
||||
|
||||
|
||||
%%
|
||||
%% Runtime parameters
|
||||
%%
|
||||
|
|
|
@ -175,6 +175,12 @@
|
|||
{requires, [rabbit_alarm, guid_generator]},
|
||||
{enables, core_initialized}]}).
|
||||
|
||||
-rabbit_boot_step({rabbit_quorum_queue_periodic_membership_reconciliation,
|
||||
[{description, "Quorums Queue membership reconciliation"},
|
||||
{mfa, {rabbit_sup, start_restartable_child,
|
||||
[rabbit_quorum_queue_periodic_membership_reconciliation]}},
|
||||
{requires, [database]}]}).
|
||||
|
||||
-rabbit_boot_step({rabbit_epmd_monitor,
|
||||
[{description, "epmd monitor"},
|
||||
{mfa, {rabbit_sup, start_restartable_child,
|
||||
|
|
|
@ -815,6 +815,7 @@ handle_dead_rabbit(Node, State = #state{partitions = Partitions,
|
|||
ok = rabbit_amqqueue:on_node_down(Node),
|
||||
ok = rabbit_alarm:on_node_down(Node),
|
||||
ok = rabbit_mnesia:on_node_down(Node),
|
||||
ok = rabbit_quorum_queue_periodic_membership_reconciliation:on_node_down(Node),
|
||||
%% If we have been partitioned, and we are now in the only remaining
|
||||
%% partition, we no longer care about partitions - forget them. Note
|
||||
%% that we do not attempt to deal with individual (other) partitions
|
||||
|
@ -843,7 +844,8 @@ ensure_keepalive_timer(State) ->
|
|||
handle_live_rabbit(Node) ->
|
||||
ok = rabbit_amqqueue:on_node_up(Node),
|
||||
ok = rabbit_alarm:on_node_up(Node),
|
||||
ok = rabbit_mnesia:on_node_up(Node).
|
||||
ok = rabbit_mnesia:on_node_up(Node),
|
||||
ok = rabbit_quorum_queue_periodic_membership_reconciliation:on_node_up(Node).
|
||||
|
||||
maybe_autoheal(State = #state{partitions = []}) ->
|
||||
State;
|
||||
|
|
|
@ -282,14 +282,17 @@ do_filter_reachable(Members) ->
|
|||
Members).
|
||||
|
||||
-spec is_running(Node) -> IsRunning when
|
||||
Node :: node(),
|
||||
Node :: node() | [node()],
|
||||
IsRunning :: boolean().
|
||||
%% @doc Indicates if the given node is running.
|
||||
%%
|
||||
%% @see filter_running/1.
|
||||
|
||||
is_running(Node) when is_atom(Node) ->
|
||||
[Node] =:= filter_running([Node]).
|
||||
[Node] =:= filter_running([Node]);
|
||||
is_running(Nodes) when is_list(Nodes) ->
|
||||
lists:sort(Nodes) =:= lists:sort(filter_running(Nodes)).
|
||||
|
||||
|
||||
-spec list_running() -> Nodes when
|
||||
Nodes :: [node()].
|
||||
|
|
|
@ -367,11 +367,13 @@ validate(_VHost, <<"operator_policy">>, Name, Term, _User) ->
|
|||
notify(VHost, <<"policy">>, Name, Term0, ActingUser) ->
|
||||
Term = rabbit_data_coercion:atomize_keys(Term0),
|
||||
update_matched_objects(VHost, Term, ActingUser),
|
||||
rabbit_quorum_queue_periodic_membership_reconciliation:policy_set(),
|
||||
rabbit_event:notify(policy_set, [{name, Name}, {vhost, VHost},
|
||||
{user_who_performed_action, ActingUser} | Term]);
|
||||
notify(VHost, <<"operator_policy">>, Name, Term0, ActingUser) ->
|
||||
Term = rabbit_data_coercion:atomize_keys(Term0),
|
||||
update_matched_objects(VHost, Term, ActingUser),
|
||||
rabbit_quorum_queue_periodic_membership_reconciliation:policy_set(),
|
||||
rabbit_event:notify(policy_set, [{name, Name}, {vhost, VHost},
|
||||
{user_who_performed_action, ActingUser} | Term]).
|
||||
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
-module(rabbit_quorum_queue).
|
||||
|
||||
-behaviour(rabbit_queue_type).
|
||||
-behaviour(rabbit_policy_validator).
|
||||
-behaviour(rabbit_policy_merge_strategy).
|
||||
|
||||
-export([init/1,
|
||||
close/1,
|
||||
|
@ -35,8 +37,8 @@
|
|||
-export([format/1]).
|
||||
-export([open_files/1]).
|
||||
-export([peek/2, peek/3]).
|
||||
-export([add_member/4]).
|
||||
-export([delete_member/3]).
|
||||
-export([add_member/4, add_member/2]).
|
||||
-export([delete_member/3, delete_member/2]).
|
||||
-export([requeue/3]).
|
||||
-export([policy_changed/1]).
|
||||
-export([format_ra_event/3]).
|
||||
|
@ -65,6 +67,7 @@
|
|||
is_compatible/3,
|
||||
declare/2,
|
||||
is_stateful/0]).
|
||||
-export([validate_policy/1, merge_policy_value/3]).
|
||||
|
||||
-export([force_shrink_member_to_current_member/2,
|
||||
force_all_queues_shrink_member_to_current_member/0]).
|
||||
|
@ -114,6 +117,34 @@
|
|||
-define(ADD_MEMBER_TIMEOUT, 5000).
|
||||
-define(SNAPSHOT_INTERVAL, 8192). %% the ra default is 4096
|
||||
|
||||
%%----------- QQ policies ---------------------------------------------------
|
||||
|
||||
-rabbit_boot_step(
|
||||
{?MODULE,
|
||||
[{description, "QQ target group size policies. "
|
||||
"target-group-size controls the targeted number of "
|
||||
"member nodes for the queue. If set, RabbitMQ will try to "
|
||||
"grow the queue members to the target size. "
|
||||
"See module rabbit_queue_member_eval."},
|
||||
{mfa, {rabbit_registry, register,
|
||||
[policy_validator, <<"target-group-size">>, ?MODULE]}},
|
||||
{mfa, {rabbit_registry, register,
|
||||
[operator_policy_validator, <<"target-group-size">>, ?MODULE]}},
|
||||
{mfa, {rabbit_registry, register,
|
||||
[policy_merge_strategy, <<"target-group-size">>, ?MODULE]}},
|
||||
{requires, rabbit_registry},
|
||||
{enables, recovery}]}).
|
||||
|
||||
validate_policy(Args) ->
|
||||
Count = proplists:get_value(<<"target-group-size">>, Args, none),
|
||||
case is_integer(Count) andalso Count > 0 of
|
||||
true -> ok;
|
||||
false -> {error, "~tp is not a valid qq target count value", [Count]}
|
||||
end.
|
||||
|
||||
merge_policy_value(<<"target-group-size">>, Val, OpVal) ->
|
||||
max(Val, OpVal).
|
||||
|
||||
%%----------- rabbit_queue_type ---------------------------------------------
|
||||
|
||||
-spec is_enabled() -> boolean().
|
||||
|
@ -215,6 +246,7 @@ start_cluster(Q) ->
|
|||
ok = rabbit_fifo_client:update_machine_state(LeaderId,
|
||||
ra_machine_config(NewQ)),
|
||||
notify_decorators(QName, startup),
|
||||
rabbit_quorum_queue_periodic_membership_reconciliation:queue_created(NewQ),
|
||||
rabbit_event:notify(queue_created,
|
||||
[{name, QName},
|
||||
{durable, Durable},
|
||||
|
@ -1093,6 +1125,8 @@ add_member(VHost, Name, Node, Timeout) ->
|
|||
E
|
||||
end.
|
||||
|
||||
add_member(Q, Node) ->
|
||||
add_member(Q, Node, ?ADD_MEMBER_TIMEOUT).
|
||||
add_member(Q, Node, Timeout) when ?amqqueue_is_quorum(Q) ->
|
||||
{RaName, _} = amqqueue:get_pid(Q),
|
||||
QName = amqqueue:get_name(Q),
|
||||
|
|
|
@ -0,0 +1,256 @@
|
|||
%% This Source Code Form is subject to the terms of the Mozilla Public
|
||||
%% License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
%%
|
||||
%% Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved.
|
||||
%%
|
||||
|
||||
-module(rabbit_quorum_queue_periodic_membership_reconciliation).
|
||||
|
||||
-feature(maybe_expr, enable).
|
||||
|
||||
-behaviour(gen_server).
|
||||
|
||||
-export([on_node_up/1, on_node_down/1, queue_created/1, policy_set/0]).
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
%% gen_server callbacks
|
||||
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
|
||||
code_change/3]).
|
||||
|
||||
-define(SERVER, ?MODULE).
|
||||
-define(DEFAULT_INTERVAL, 60_000*60).
|
||||
-define(DEFAULT_TRIGGER_INTERVAL, 10_000).
|
||||
-define(QUEUE_COUNT_START_RANDOM_SELECTION, 1_000).
|
||||
|
||||
-define(EVAL_MSG, membership_reconciliation).
|
||||
|
||||
-record(state, {timer_ref :: reference() | undefined,
|
||||
interval :: non_neg_integer(),
|
||||
trigger_interval :: non_neg_integer(),
|
||||
target_group_size :: non_neg_integer() | undefined,
|
||||
enabled :: boolean(),
|
||||
auto_remove :: boolean()}).
|
||||
|
||||
%%----------------------------------------------------------------------------
|
||||
%% Start
|
||||
%%----------------------------------------------------------------------------
|
||||
|
||||
-spec start_link() -> rabbit_types:ok_pid_or_error().
|
||||
start_link() -> gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
|
||||
|
||||
%%----------------------------------------------------------------------------
|
||||
%% API
|
||||
%%----------------------------------------------------------------------------
|
||||
|
||||
on_node_up(Node) ->
|
||||
gen_server:cast(?SERVER, {membership_reconciliation_trigger, {node_up, Node}}).
|
||||
|
||||
on_node_down(Node) ->
|
||||
gen_server:cast(?SERVER, {membership_reconciliation_trigger, {node_down, Node}}).
|
||||
|
||||
queue_created(Q) ->
|
||||
gen_server:cast(?SERVER, {membership_reconciliation_trigger, {queue_created, Q}}).
|
||||
|
||||
policy_set() ->
|
||||
gen_server:cast(?SERVER, {membership_reconciliation_trigger, policy_set}).
|
||||
|
||||
%%----------------------------------------------------------------------------
|
||||
%% gen_server callbacks
|
||||
%%----------------------------------------------------------------------------
|
||||
|
||||
init([]) ->
|
||||
Enabled = rabbit_misc:get_env(rabbit, quorum_membership_reconciliation_enabled,
|
||||
false),
|
||||
AutoRemove = rabbit_misc:get_env(rabbit, quorum_membership_reconciliation_auto_remove,
|
||||
false),
|
||||
Interval = rabbit_misc:get_env(rabbit, quorum_membership_reconciliation_interval,
|
||||
?DEFAULT_INTERVAL),
|
||||
TriggerInterval = rabbit_misc:get_env(rabbit, quorum_membership_reconciliation_trigger_interval,
|
||||
?DEFAULT_TRIGGER_INTERVAL),
|
||||
TargetGroupSize = rabbit_misc:get_env(rabbit, quorum_membership_reconciliation_target_group_size,
|
||||
undefined),
|
||||
State = #state{interval = Interval,
|
||||
trigger_interval = TriggerInterval,
|
||||
target_group_size = TargetGroupSize,
|
||||
enabled = Enabled,
|
||||
auto_remove = AutoRemove},
|
||||
case Enabled of
|
||||
true ->
|
||||
Ref = erlang:send_after(Interval, self(), ?EVAL_MSG),
|
||||
{ok, State#state{timer_ref = Ref}};
|
||||
false ->
|
||||
{ok, State, hibernate}
|
||||
end.
|
||||
|
||||
handle_call(_Request, _From, State) ->
|
||||
{reply, ok, State}.
|
||||
|
||||
handle_cast({membership_reconciliation_trigger, _Reason}, #state{enabled = false} = State) ->
|
||||
{noreply, State, hibernate};
|
||||
handle_cast({membership_reconciliation_trigger, Reason}, #state{timer_ref = OldRef,
|
||||
trigger_interval = Time} = State) ->
|
||||
rabbit_log:debug("Quorum Queue membership reconciliation triggered: ~p",
|
||||
[Reason]),
|
||||
_ = erlang:cancel_timer(OldRef),
|
||||
Ref = erlang:send_after(Time, self(), ?EVAL_MSG),
|
||||
{noreply, State#state{timer_ref = Ref}};
|
||||
handle_cast(_Msg, State) ->
|
||||
{noreply, State}.
|
||||
|
||||
handle_info(?EVAL_MSG, #state{interval = Interval,
|
||||
trigger_interval = TriggerInterval} = State) ->
|
||||
Res = reconclitiate_quorum_queue_membership(State),
|
||||
NewTimeout = case Res of
|
||||
noop ->
|
||||
Interval;
|
||||
_ ->
|
||||
TriggerInterval
|
||||
end,
|
||||
Ref = erlang:send_after(NewTimeout, self(), ?EVAL_MSG),
|
||||
{noreply, State#state{timer_ref = Ref}};
|
||||
handle_info(_Info, #state{enabled = false} = State) ->
|
||||
{noreply, State, hibernate};
|
||||
handle_info(_Info, State) ->
|
||||
{noreply, State}.
|
||||
|
||||
terminate(_Reason, _State) ->
|
||||
ok.
|
||||
|
||||
code_change(_OldVsn, State, _Extra) ->
|
||||
{ok, State}.
|
||||
|
||||
%%----------------------------------------------------------------------------
|
||||
%% Internal functions
|
||||
%%----------------------------------------------------------------------------
|
||||
|
||||
reconclitiate_quorum_queue_membership(State) ->
|
||||
LocalLeaders = rabbit_amqqueue:list_local_leaders(),
|
||||
ExpectedNodes = rabbit_nodes:list_members(),
|
||||
Running = rabbit_nodes:list_running(),
|
||||
reconclitiate_quorum_members(ExpectedNodes, Running, LocalLeaders, State, noop).
|
||||
|
||||
reconclitiate_quorum_members(_ExpectedNodes, _Running, [], _State, Result) ->
|
||||
Result;
|
||||
reconclitiate_quorum_members(ExpectedNodes, Running, [Q | LocalLeaders],
|
||||
#state{target_group_size = TargetSize} = State,
|
||||
OldResult) ->
|
||||
Result =
|
||||
maybe
|
||||
{ok, Members, {_, LeaderNode}} = ra:members(amqqueue:get_pid(Q), 500),
|
||||
%% Check if Leader is indeed this node
|
||||
LeaderNode ?= node(),
|
||||
%% And that this not is not in maintenance mode
|
||||
true ?= not rabbit_maintenance:is_being_drained_local_read(node()),
|
||||
MemberNodes = [Node || {_, Node} <- Members],
|
||||
DanglingNodes = MemberNodes -- ExpectedNodes,
|
||||
case maybe_remove(DanglingNodes, State) of
|
||||
false ->
|
||||
maybe_add_member(Q, Running, MemberNodes, get_target_size(Q, TargetSize));
|
||||
true ->
|
||||
remove_members(Q, DanglingNodes)
|
||||
end
|
||||
else
|
||||
{timeout, Reason} ->
|
||||
rabbit_log:debug("Find leader timeout: ~p", [Reason]),
|
||||
ok;
|
||||
_ ->
|
||||
noop
|
||||
end,
|
||||
reconclitiate_quorum_members(ExpectedNodes, Running, LocalLeaders, State,
|
||||
update_result(OldResult, Result)).
|
||||
|
||||
maybe_remove(_, #state{auto_remove = false}) ->
|
||||
false;
|
||||
maybe_remove([], #state{auto_remove = true}) ->
|
||||
false;
|
||||
maybe_remove(_Nodes, #state{auto_remove = true}) ->
|
||||
true.
|
||||
|
||||
maybe_add_member(Q, Running, MemberNodes, TargetSize) ->
|
||||
%% Filter out any new nodes under maintenance
|
||||
New = rabbit_maintenance:filter_out_drained_nodes_local_read(Running -- MemberNodes),
|
||||
case should_add_node(MemberNodes, New, TargetSize) of
|
||||
true ->
|
||||
%% In the future, sort the list of new nodes based on load,
|
||||
%% availability zones etc
|
||||
Node = select_node(New),
|
||||
QName = amqqueue:get_name(Q),
|
||||
case rabbit_quorum_queue:add_member(Q, Node) of
|
||||
ok ->
|
||||
rabbit_log:debug(
|
||||
"Added node ~ts as a member to ~ts as "
|
||||
"the queues target group size(#~w) is not met and "
|
||||
"there are enough new nodes(#~w) in the cluster",
|
||||
[Node, rabbit_misc:rs(QName), TargetSize, length(New)]);
|
||||
{error, Err} ->
|
||||
rabbit_log:warning(
|
||||
"~ts: failed to add member (replica) on node ~w, error: ~w",
|
||||
[rabbit_misc:rs(QName), Node, Err])
|
||||
end,
|
||||
ok;
|
||||
false ->
|
||||
noop
|
||||
end.
|
||||
|
||||
should_add_node(MemberNodes, New, TargetSize) ->
|
||||
CurrentSize = length(MemberNodes),
|
||||
NumberOfNewNodes = length(New),
|
||||
maybe
|
||||
true ?= NumberOfNewNodes > 0, %% There are new nodes to grow to
|
||||
true ?= CurrentSize < TargetSize, %% Target size not reached
|
||||
true ?= rabbit_misc:is_even(CurrentSize) orelse NumberOfNewNodes > 1, %% Enough nodes to grow to odd member size
|
||||
true ?= rabbit_nodes:is_running(lists:delete(node(), MemberNodes))
|
||||
end.
|
||||
|
||||
get_target_size(Q, undefined) ->
|
||||
get_target_size(Q);
|
||||
get_target_size(Q, N) when N > 0 ->
|
||||
max(N, get_target_size(Q)).
|
||||
|
||||
get_target_size(Q) ->
|
||||
PolicyValue = case rabbit_policy:get(<<"target-group-size">>, Q) of
|
||||
undefined ->
|
||||
0;
|
||||
PolicyN ->
|
||||
PolicyN
|
||||
end,
|
||||
Arguments = amqqueue:get_arguments(Q),
|
||||
case rabbit_misc:table_lookup(Arguments, <<"x-quorum-target-group-size">>) of
|
||||
undefined ->
|
||||
PolicyValue;
|
||||
ArgN ->
|
||||
max(ArgN, PolicyValue)
|
||||
end.
|
||||
|
||||
remove_members(_Q, []) ->
|
||||
ok;
|
||||
remove_members(Q, [Node | Nodes]) ->
|
||||
case rabbit_quorum_queue:delete_member(Q, Node) of
|
||||
ok ->
|
||||
QName = amqqueue:get_name(Q),
|
||||
rabbit_log:debug("~ts: Successfully removed member (replica) on node ~w",
|
||||
[rabbit_misc:rs(QName), Node]),
|
||||
ok;
|
||||
{error, Err} ->
|
||||
QName = amqqueue:get_name(Q),
|
||||
rabbit_log:warning("~ts: failed to remove member (replica) on node "
|
||||
"~w, error: ~w",
|
||||
[rabbit_misc:rs(QName), Node, Err])
|
||||
end,
|
||||
remove_members(Q, Nodes).
|
||||
|
||||
|
||||
%% Make sure any non-noop result is stored.
|
||||
update_result(noop, Result) ->
|
||||
Result;
|
||||
update_result(Result, noop) ->
|
||||
Result;
|
||||
update_result(Result, Result) ->
|
||||
Result.
|
||||
|
||||
select_node([Node]) ->
|
||||
Node;
|
||||
select_node(Nodes) ->
|
||||
lists:nth(rand:uniform(length(Nodes)), Nodes).
|
|
@ -0,0 +1,232 @@
|
|||
%% This Source Code Form is subject to the terms of the Mozilla Public
|
||||
%% License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
%%
|
||||
%% Copyright (c) 2018-2023 VMware, Inc. or its affiliates. All rights reserved.
|
||||
|
||||
|
||||
-module(quorum_queue_member_reconciliation_SUITE).
|
||||
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("amqp_client/include/amqp_client.hrl").
|
||||
-include_lib("rabbitmq_ct_helpers/include/rabbit_assert.hrl").
|
||||
|
||||
-compile([nowarn_export_all, export_all]).
|
||||
|
||||
|
||||
all() ->
|
||||
[
|
||||
{group, unclustered}
|
||||
].
|
||||
|
||||
groups() ->
|
||||
[
|
||||
{unclustered, [],
|
||||
[
|
||||
{quorum_queue_3, [], [auto_grow, auto_grow_drained_node, auto_shrink]}
|
||||
]}
|
||||
].
|
||||
|
||||
%% -------------------------------------------------------------------
|
||||
%% Testsuite setup/teardown.
|
||||
%% -------------------------------------------------------------------
|
||||
|
||||
init_per_suite(Config0) ->
|
||||
rabbit_ct_helpers:log_environment(),
|
||||
Config1 = rabbit_ct_helpers:merge_app_env(
|
||||
Config0, {rabbit, [{quorum_tick_interval, 1000},
|
||||
{quorum_membership_reconciliation_enabled, true},
|
||||
{quorum_membership_reconciliation_auto_remove, true},
|
||||
{quorum_membership_reconciliation_interval, 5000},
|
||||
{quorum_membership_reconciliation_trigger_interval, 2000},
|
||||
{quorum_membership_reconciliation_target_group_size, 3}]}),
|
||||
rabbit_ct_helpers:run_setup_steps(Config1, []).
|
||||
|
||||
end_per_suite(Config) ->
|
||||
rabbit_ct_helpers:run_teardown_steps(Config).
|
||||
init_per_group(unclustered, Config) ->
|
||||
rabbit_ct_helpers:set_config(Config, [{rmq_nodes_clustered, false}]);
|
||||
init_per_group(Group, Config) ->
|
||||
ClusterSize = 3,
|
||||
Config1 = rabbit_ct_helpers:set_config(Config,
|
||||
[{rmq_nodes_count, ClusterSize},
|
||||
{rmq_nodename_suffix, Group},
|
||||
{tcp_ports_base}]),
|
||||
Config1b = rabbit_ct_helpers:set_config(Config1, [{net_ticktime, 10}]),
|
||||
rabbit_ct_helpers:run_steps(Config1b,
|
||||
[fun merge_app_env/1 ] ++
|
||||
rabbit_ct_broker_helpers:setup_steps()).
|
||||
|
||||
end_per_group(unclustered, Config) ->
|
||||
Config;
|
||||
end_per_group(_, Config) ->
|
||||
rabbit_ct_helpers:run_steps(Config,
|
||||
rabbit_ct_broker_helpers:teardown_steps()).
|
||||
|
||||
init_per_testcase(Testcase, Config) ->
|
||||
Config1 = rabbit_ct_helpers:testcase_started(Config, Testcase),
|
||||
rabbit_ct_broker_helpers:rpc(Config, 0, ?MODULE, delete_queues, []),
|
||||
Q = rabbit_data_coercion:to_binary(Testcase),
|
||||
Config2 = rabbit_ct_helpers:set_config(Config1,
|
||||
[{queue_name, Q},
|
||||
{alt_queue_name, <<Q/binary, "_alt">>},
|
||||
{alt_2_queue_name, <<Q/binary, "_alt_2">>}
|
||||
]),
|
||||
rabbit_ct_helpers:run_steps(Config2, rabbit_ct_client_helpers:setup_steps()).
|
||||
|
||||
merge_app_env(Config) ->
|
||||
rabbit_ct_helpers:merge_app_env(
|
||||
rabbit_ct_helpers:merge_app_env(Config,
|
||||
{rabbit, [{core_metrics_gc_interval, 100}]}),
|
||||
{ra, [{min_wal_roll_over_interval, 30000}]}).
|
||||
|
||||
end_per_testcase(Testcase, Config) ->
|
||||
[Server0, Server1, Server2] =
|
||||
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
|
||||
reset_nodes([Server1, Server2], Server0),
|
||||
Config1 = rabbit_ct_helpers:run_steps(
|
||||
Config,
|
||||
rabbit_ct_client_helpers:teardown_steps()),
|
||||
rabbit_ct_helpers:testcase_finished(Config1, Testcase).
|
||||
|
||||
reset_nodes([], _Leader) ->
|
||||
ok;
|
||||
reset_nodes([Node| Nodes], Leader) ->
|
||||
ok = rabbit_control_helper:command(stop_app, Node),
|
||||
ok = rabbit_control_helper:command(forget_cluster_node, Leader, [atom_to_list(Node)]),
|
||||
ok = rabbit_control_helper:command(reset, Node),
|
||||
ok = rabbit_control_helper:command(start_app, Node),
|
||||
reset_nodes(Nodes, Leader).
|
||||
|
||||
|
||||
%% -------------------------------------------------------------------
|
||||
%% Testcases.
|
||||
%% -------------------------------------------------------------------
|
||||
|
||||
auto_grow(Config) ->
|
||||
[Server0, Server1, Server2] =
|
||||
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
|
||||
Ch = rabbit_ct_client_helpers:open_channel(Config, Server0),
|
||||
|
||||
QQ = ?config(queue_name, Config),
|
||||
?assertEqual({'queue.declare_ok', QQ, 0, 0},
|
||||
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),
|
||||
|
||||
%% There is only one node in the cluster at the moment
|
||||
{ok, Members, _} = ra:members({quorum_queue_utils:ra_name(QQ), Server0}),
|
||||
?assertEqual(1, length(Members)),
|
||||
|
||||
add_server_to_cluster(Server1, Server0),
|
||||
%% With 2 nodes in the cluster, target group size is not reached, so no
|
||||
%% new members should be available. We sleep a while so the periodic check
|
||||
%% runs
|
||||
timer:sleep(4000),
|
||||
{ok, Members, _} = ra:members({quorum_queue_utils:ra_name(QQ), Server0}),
|
||||
?assertEqual(1, length(Members)),
|
||||
|
||||
add_server_to_cluster(Server2, Server0),
|
||||
%% With 3 nodes in the cluster, target size is met so eventually it should
|
||||
%% be 3 members
|
||||
wait_until(fun() ->
|
||||
{ok, M, _} = ra:members({quorum_queue_utils:ra_name(QQ), Server0}),
|
||||
3 =:= length(M)
|
||||
end).
|
||||
|
||||
auto_grow_drained_node(Config) ->
|
||||
[Server0, Server1, Server2] =
|
||||
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
|
||||
Ch = rabbit_ct_client_helpers:open_channel(Config, Server0),
|
||||
|
||||
QQ = ?config(queue_name, Config),
|
||||
?assertEqual({'queue.declare_ok', QQ, 0, 0},
|
||||
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),
|
||||
|
||||
%% There is only one node in the cluster at the moment
|
||||
{ok, Members, _} = ra:members({quorum_queue_utils:ra_name(QQ), Server0}),
|
||||
?assertEqual(1, length(Members)),
|
||||
|
||||
add_server_to_cluster(Server1, Server0),
|
||||
%% mark server1 as drained, which should mean the node is not a candiate
|
||||
%% for qq membership
|
||||
rabbit_ct_broker_helpers:mark_as_being_drained(Config, Server1),
|
||||
rabbit_ct_helpers:await_condition(
|
||||
fun () -> rabbit_ct_broker_helpers:is_being_drained_local_read(Config, Server1) end,
|
||||
10000),
|
||||
add_server_to_cluster(Server2, Server0),
|
||||
timer:sleep(5000),
|
||||
%% We have 3 nodes, but one is drained, so it will not be concidered.
|
||||
{ok, Members1, _} = ra:members({quorum_queue_utils:ra_name(QQ), Server0}),
|
||||
?assertEqual(1, length(Members1)),
|
||||
|
||||
rabbit_ct_broker_helpers:unmark_as_being_drained(Config, Server1),
|
||||
rabbit_ct_helpers:await_condition(
|
||||
fun () -> not rabbit_ct_broker_helpers:is_being_drained_local_read(Config, Server1) end,
|
||||
10000),
|
||||
%% We have 3 nodes, none is being drained, so we should grow membership to 3
|
||||
wait_until(fun() ->
|
||||
{ok, M, _} = ra:members({quorum_queue_utils:ra_name(QQ), Server0}),
|
||||
3 =:= length(M)
|
||||
end).
|
||||
|
||||
|
||||
auto_shrink(Config) ->
|
||||
[Server0, Server1, Server2] =
|
||||
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
|
||||
Ch = rabbit_ct_client_helpers:open_channel(Config, Server0),
|
||||
add_server_to_cluster(Server1, Server0),
|
||||
add_server_to_cluster(Server2, Server0),
|
||||
|
||||
QQ = ?config(queue_name, Config),
|
||||
?assertEqual({'queue.declare_ok', QQ, 0, 0},
|
||||
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),
|
||||
|
||||
wait_until(fun() ->
|
||||
{ok, M, _} = ra:members({quorum_queue_utils:ra_name(QQ),
|
||||
Server0}),
|
||||
3 =:= length(M)
|
||||
end),
|
||||
ok = rabbit_control_helper:command(stop_app, Server2),
|
||||
ok = rabbit_ct_broker_helpers:rpc(Config, 0, rabbit_db_cluster, forget_member,
|
||||
[Server2, false]),
|
||||
%% with one node 'forgotten', eventually the membership will shrink to 2
|
||||
wait_until(fun() ->
|
||||
{ok, M, _} = ra:members({quorum_queue_utils:ra_name(QQ),
|
||||
Server0}),
|
||||
2 =:= length(M)
|
||||
end).
|
||||
|
||||
|
||||
|
||||
add_server_to_cluster(Server, Leader) ->
|
||||
ok = rabbit_control_helper:command(stop_app, Server),
|
||||
ok = rabbit_control_helper:command(join_cluster, Server, [atom_to_list(Leader)], []),
|
||||
rabbit_control_helper:command(start_app, Server).
|
||||
|
||||
declare(Ch, Q) ->
|
||||
declare(Ch, Q, []).
|
||||
|
||||
declare(Ch, Q, Args) ->
|
||||
amqp_channel:call(Ch, #'queue.declare'{queue = Q,
|
||||
durable = true,
|
||||
auto_delete = false,
|
||||
arguments = Args}).
|
||||
|
||||
wait_until(Condition) ->
|
||||
wait_until(Condition, 60).
|
||||
|
||||
wait_until(Condition, 0) ->
|
||||
?assertEqual(true, Condition());
|
||||
wait_until(Condition, N) ->
|
||||
case Condition() of
|
||||
true ->
|
||||
ok;
|
||||
_ ->
|
||||
timer:sleep(500),
|
||||
wait_until(Condition, N - 1)
|
||||
end.
|
||||
|
||||
|
||||
delete_queues() ->
|
||||
[rabbit_amqqueue:delete(Q, false, false, <<"dummy">>)
|
||||
|| Q <- rabbit_amqqueue:list()].
|
|
@ -82,6 +82,7 @@
|
|||
-export([maps_any/2]).
|
||||
-export([safe_ets_update_counter/3, safe_ets_update_counter/4, safe_ets_update_counter/5,
|
||||
safe_ets_update_element/3, safe_ets_update_element/4, safe_ets_update_element/5]).
|
||||
-export([is_even/1, is_odd/1]).
|
||||
|
||||
%% Horrible macro to use in guards
|
||||
-define(IS_BENIGN_EXIT(R),
|
||||
|
@ -1586,3 +1587,10 @@ maps_any_1(Pred, {K, V, I}) ->
|
|||
false ->
|
||||
maps_any_1(Pred, maps:next(I))
|
||||
end.
|
||||
|
||||
-spec is_even(integer()) -> boolean().
|
||||
is_even(N) ->
|
||||
(N band 1) =:= 0.
|
||||
-spec is_odd(integer()) -> boolean().
|
||||
is_odd(N) ->
|
||||
(N band 1) =:= 1.
|
||||
|
|
|
@ -336,6 +336,7 @@
|
|||
<% if (queue_type == "quorum") { %>
|
||||
<span class="argument-link" field="arguments" key="x-delivery-limit" type="number">Delivery limit</span><span class="help" id="delivery-limit"></span>
|
||||
| <span class="argument-link" field="arguments" key="x-quorum-initial-group-size" type="number">Initial cluster size</span><span class="help" id="queue-initial-cluster-size"></span><br/>
|
||||
| <span class="argument-link" field="arguments" key="x-quorum-target-group-size" type="number">Target cluster size</span><span class="help" id="qourum-queue-target-group-size"></span>
|
||||
<span class="argument-link" field="arguments" key="x-dead-letter-strategy" type="string">Dead letter strategy</span><span class="help" id="queue-dead-letter-strategy"></span>
|
||||
<% } %>
|
||||
<% if (queue_type == "stream") { %>
|
||||
|
|
|
@ -736,6 +736,7 @@ rabbit:
|
|||
- rabbit_queue_type_util
|
||||
- rabbit_quorum_memory_manager
|
||||
- rabbit_quorum_queue
|
||||
- rabbit_quorum_queue_periodic_membership_reconciliation
|
||||
- rabbit_ra_registry
|
||||
- rabbit_ra_systems
|
||||
- rabbit_reader
|
||||
|
|
Loading…
Reference in New Issue