Prefer running nodes for replica selection
When declaring a quorum queue or a stream, select its replicas in the following order: 1. local RabbitMQ node (to have data locality for declaring client) 2. running RabbitMQ nodes 3. RabbitMQ nodes with least quorum queue or stream replicas (to have a "balanced" RabbitMQ cluster). From now on, quorum queues and streams behave the same way for replica selection strategy and leader locator strategy.
This commit is contained in:
parent
f903ef95cc
commit
1315b1d4b1
|
|
@ -0,0 +1,119 @@
|
|||
%% This Source Code Form is subject to the terms of the Mozilla Public
|
||||
%% License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
%%
|
||||
%% Copyright (c) 2007-2022 VMware, Inc. or its affiliates. All rights reserved.
|
||||
%%
|
||||
|
||||
-module(rabbit_queue_location).
|
||||
|
||||
-include("amqqueue.hrl").
|
||||
|
||||
-export([select_leader_and_followers/2]).
|
||||
|
||||
select_leader_and_followers(Q, Size)
|
||||
when (?amqqueue_is_quorum(Q) orelse ?amqqueue_is_stream(Q)) andalso is_integer(Size) ->
|
||||
QueueType = amqqueue:get_type(Q),
|
||||
GetQueues0 = get_queues_for_type(QueueType),
|
||||
{AllNodes, _DiscNodes, RunningNodes} = rabbit_mnesia:cluster_nodes(status),
|
||||
{Replicas, GetQueues} = select_replicas(Size, AllNodes, RunningNodes, GetQueues0),
|
||||
LeaderLocator = leader_locator(
|
||||
rabbit_queue_type_util:args_policy_lookup(
|
||||
<<"queue-leader-locator">>,
|
||||
fun (PolVal, _ArgVal) ->
|
||||
PolVal
|
||||
end, Q)),
|
||||
Leader = leader_node(LeaderLocator, Replicas, RunningNodes, GetQueues),
|
||||
Followers = lists:delete(Leader, Replicas),
|
||||
{Leader, Followers}.
|
||||
|
||||
select_replicas(Size, AllNodes, _, Fun)
|
||||
when length(AllNodes) =< Size ->
|
||||
{AllNodes, Fun};
|
||||
select_replicas(Size, _, RunningNodes, Fun)
|
||||
when length(RunningNodes) =:= Size ->
|
||||
{RunningNodes, Fun};
|
||||
select_replicas(Size, AllNodes, RunningNodes, GetQueues) ->
|
||||
%% Select nodes in the following order:
|
||||
%% 1. local node (to have data locality for declaring client)
|
||||
%% 2. running nodes
|
||||
%% 3. nodes with least replicas (to have a "balanced" RabbitMQ cluster).
|
||||
Local = node(),
|
||||
true = lists:member(Local, AllNodes),
|
||||
true = lists:member(Local, RunningNodes),
|
||||
Counters0 = maps:from_list([{Node, 0} || Node <- lists:delete(Local, AllNodes)]),
|
||||
Queues = GetQueues(),
|
||||
Counters = lists:foldl(fun(Q, Acc) ->
|
||||
#{nodes := Nodes} = amqqueue:get_type_state(Q),
|
||||
lists:foldl(fun(N, A)
|
||||
when is_map_key(N, A) ->
|
||||
maps:update_with(N, fun(C) -> C+1 end, A);
|
||||
(_, A) ->
|
||||
A
|
||||
end, Acc, Nodes)
|
||||
end, Counters0, Queues),
|
||||
L0 = maps:to_list(Counters),
|
||||
L1 = lists:sort(fun({N0, C0}, {N1, C1}) ->
|
||||
case {lists:member(N0, RunningNodes),
|
||||
lists:member(N1, RunningNodes)} of
|
||||
{true, false} ->
|
||||
true;
|
||||
{false, true} ->
|
||||
false;
|
||||
_ ->
|
||||
C0 =< C1
|
||||
end
|
||||
end, L0),
|
||||
{L2, _} = lists:split(Size - 1, L1),
|
||||
L = lists:map(fun({N, _}) -> N end, L2),
|
||||
{[Local | L], fun() -> Queues end}.
|
||||
|
||||
leader_locator(undefined) -> <<"client-local">>;
|
||||
leader_locator(Val) -> Val.
|
||||
|
||||
leader_node(<<"client-local">>, _, _, _) ->
|
||||
node();
|
||||
leader_node(<<"random">>, Nodes0, RunningNodes, _) ->
|
||||
Nodes = potential_leaders(Nodes0, RunningNodes),
|
||||
lists:nth(rand:uniform(length(Nodes)), Nodes);
|
||||
leader_node(<<"least-leaders">>, Nodes0, RunningNodes, GetQueues)
|
||||
when is_function(GetQueues, 0) ->
|
||||
Nodes = potential_leaders(Nodes0, RunningNodes),
|
||||
Counters0 = maps:from_list([{N, 0} || N <- Nodes]),
|
||||
Counters = lists:foldl(fun(Q, Acc) ->
|
||||
case amqqueue:get_pid(Q) of
|
||||
{RaName, LeaderNode}
|
||||
when is_atom(RaName), is_atom(LeaderNode), is_map_key(LeaderNode, Acc) ->
|
||||
maps:update_with(LeaderNode, fun(C) -> C+1 end, Acc);
|
||||
StreamLeaderPid
|
||||
when is_pid(StreamLeaderPid), is_map_key(node(StreamLeaderPid), Acc) ->
|
||||
maps:update_with(node(StreamLeaderPid), fun(C) -> C+1 end, Acc);
|
||||
_ ->
|
||||
Acc
|
||||
end
|
||||
end, Counters0, GetQueues()),
|
||||
{Node, _} = hd(lists:keysort(2, maps:to_list(Counters))),
|
||||
Node.
|
||||
|
||||
potential_leaders(Nodes, AllRunningNodes) ->
|
||||
RunningNodes = lists:filter(fun(N) ->
|
||||
lists:member(N, AllRunningNodes)
|
||||
end, Nodes),
|
||||
case rabbit_maintenance:filter_out_drained_nodes_local_read(RunningNodes) of
|
||||
[] ->
|
||||
%% All running nodes are drained. Let's place the leader on a drained node
|
||||
%% respecting the requested queue-leader-locator streategy.
|
||||
RunningNodes;
|
||||
Filtered ->
|
||||
Filtered
|
||||
end.
|
||||
|
||||
%% Return a function so that queues are fetched lazily (i.e. only when needed,
|
||||
%% and at most once when no amqqueue migration is going on).
|
||||
get_queues_for_type(QueueType) ->
|
||||
fun() -> rabbit_amqqueue:list_with_possible_retry(
|
||||
fun() ->
|
||||
mnesia:dirty_match_object(rabbit_queue,
|
||||
amqqueue:pattern_match_on_type(QueueType))
|
||||
end)
|
||||
end.
|
||||
|
|
@ -1,3 +1,10 @@
|
|||
%% This Source Code Form is subject to the terms of the Mozilla Public
|
||||
%% License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
%%
|
||||
%% Copyright (c) 2007-2022 VMware, Inc. or its affiliates. All rights reserved.
|
||||
%%
|
||||
|
||||
-module(rabbit_queue_type).
|
||||
-include("amqqueue.hrl").
|
||||
-include_lib("rabbit_common/include/resource.hrl").
|
||||
|
|
|
|||
|
|
@ -180,18 +180,10 @@ start_cluster(Q) ->
|
|||
rabbit_data_coercion:to_atom(ra:new_uid(N))
|
||||
end,
|
||||
Id = {RaName, node()},
|
||||
AllQuorumQs = rabbit_amqqueue:list_with_possible_retry(
|
||||
fun() ->
|
||||
mnesia:dirty_match_object(rabbit_queue,
|
||||
amqqueue:pattern_match_on_type(?MODULE))
|
||||
end),
|
||||
Nodes = select_quorum_nodes(QuorumSize, rabbit_nodes:all(), AllQuorumQs),
|
||||
LeaderLocator = leader_locator(args_policy_lookup(<<"queue-leader-locator">>,
|
||||
fun policyHasPrecedence/2, Q)),
|
||||
LeaderNode = leader_node(LeaderLocator, Nodes, AllQuorumQs),
|
||||
LeaderId = {RaName, LeaderNode},
|
||||
{Leader, Followers} = rabbit_queue_location:select_leader_and_followers(Q, QuorumSize),
|
||||
LeaderId = {RaName, Leader},
|
||||
NewQ0 = amqqueue:set_pid(Q, LeaderId),
|
||||
NewQ1 = amqqueue:set_type_state(NewQ0, #{nodes => Nodes}),
|
||||
NewQ1 = amqqueue:set_type_state(NewQ0, #{nodes => [Leader | Followers]}),
|
||||
|
||||
rabbit_log:debug("Will start up to ~w replicas for quorum queue ~s",
|
||||
[QuorumSize, rabbit_misc:rs(QName)]),
|
||||
|
|
@ -1608,62 +1600,6 @@ get_default_quorum_initial_group_size(Arguments) ->
|
|||
Val
|
||||
end.
|
||||
|
||||
select_quorum_nodes(Size, AllNodes, _)
|
||||
when length(AllNodes) =< Size ->
|
||||
AllNodes;
|
||||
select_quorum_nodes(Size, AllNodes, AllQuorumQs) ->
|
||||
%% Select local node (to have data locality for declaring client)
|
||||
%% and nodes with least quorum queue replicas (to have a "balanced" RabbitMQ cluster).
|
||||
Local = node(),
|
||||
true = lists:member(Local, AllNodes),
|
||||
Counters0 = maps:from_list([{Node, 0} || Node <- lists:delete(Local, AllNodes)]),
|
||||
Counters = lists:foldl(fun(Q, Acc) ->
|
||||
lists:foldl(fun(N, A)
|
||||
when is_map_key(N, A) ->
|
||||
maps:update_with(N, fun(C) -> C+1 end, A);
|
||||
(_, A) ->
|
||||
A
|
||||
end, Acc, get_nodes(Q))
|
||||
end, Counters0, AllQuorumQs),
|
||||
L0 = maps:to_list(Counters),
|
||||
L1 = lists:keysort(2, L0),
|
||||
{L, _} = lists:split(Size - 1, L1),
|
||||
LeastReplicas = lists:map(fun({N, _}) -> N end, L),
|
||||
[Local | LeastReplicas].
|
||||
|
||||
leader_locator(undefined) -> <<"client-local">>;
|
||||
leader_locator(Val) -> Val.
|
||||
|
||||
leader_node(<<"client-local">>, _, _) ->
|
||||
node();
|
||||
leader_node(<<"random">>, Nodes0, _) ->
|
||||
Nodes = potential_leaders(Nodes0),
|
||||
lists:nth(rand:uniform(length(Nodes)), Nodes);
|
||||
leader_node(<<"least-leaders">>, Nodes0, AllQuorumQs) ->
|
||||
Nodes = potential_leaders(Nodes0),
|
||||
Counters0 = maps:from_list([{N, 0} || N <- Nodes]),
|
||||
Counters = lists:foldl(fun(Q, Acc) ->
|
||||
case amqqueue:get_pid(Q) of
|
||||
{_, LeaderNode}
|
||||
when is_map_key(LeaderNode, Acc) ->
|
||||
maps:update_with(LeaderNode, fun(C) -> C+1 end, Acc);
|
||||
_ ->
|
||||
Acc
|
||||
end
|
||||
end, Counters0, AllQuorumQs),
|
||||
{Node, _} = hd(lists:keysort(2, maps:to_list(Counters))),
|
||||
Node.
|
||||
|
||||
potential_leaders(Nodes) ->
|
||||
case rabbit_maintenance:filter_out_drained_nodes_local_read(Nodes) of
|
||||
[] ->
|
||||
%% All nodes are drained. Let's place the leader on a drained node
|
||||
%% respecting the requested queue-leader-locator streategy.
|
||||
Nodes;
|
||||
Filtered ->
|
||||
Filtered
|
||||
end.
|
||||
|
||||
%% member with the current leader first
|
||||
members(Q) when ?amqqueue_is_quorum(Q) ->
|
||||
{RaName, LeaderNode} = amqqueue:get_pid(Q),
|
||||
|
|
|
|||
|
|
@ -86,30 +86,35 @@ is_enabled() ->
|
|||
-spec declare(amqqueue:amqqueue(), node()) ->
|
||||
{'new' | 'existing', amqqueue:amqqueue()} |
|
||||
{protocol_error, Type :: atom(), Reason :: string(), Args :: term()}.
|
||||
declare(Q0, Node) when ?amqqueue_is_stream(Q0) ->
|
||||
declare(Q0, _Node) when ?amqqueue_is_stream(Q0) ->
|
||||
case rabbit_queue_type_util:run_checks(
|
||||
[fun rabbit_queue_type_util:check_auto_delete/1,
|
||||
fun rabbit_queue_type_util:check_exclusive/1,
|
||||
fun rabbit_queue_type_util:check_non_durable/1],
|
||||
Q0) of
|
||||
ok ->
|
||||
create_stream(Q0, Node);
|
||||
create_stream(Q0);
|
||||
Err ->
|
||||
Err
|
||||
end.
|
||||
|
||||
create_stream(Q0, Node) ->
|
||||
create_stream(Q0) ->
|
||||
Arguments = amqqueue:get_arguments(Q0),
|
||||
QName = amqqueue:get_name(Q0),
|
||||
Opts = amqqueue:get_options(Q0),
|
||||
ActingUser = maps:get(user, Opts, ?UNKNOWN_USER),
|
||||
Conf0 = make_stream_conf(Node, Q0),
|
||||
Conf = apply_leader_locator_strategy(Conf0),
|
||||
#{leader_node := LeaderNode} = Conf,
|
||||
Conf0 = make_stream_conf(Q0),
|
||||
InitialClusterSize = initial_cluster_size(
|
||||
args_policy_lookup(<<"initial-cluster-size">>,
|
||||
fun policy_precedence/2, Q0)),
|
||||
{Leader, Followers} = rabbit_queue_location:select_leader_and_followers(Q0, InitialClusterSize),
|
||||
Conf = maps:merge(Conf0, #{nodes => [Leader | Followers],
|
||||
leader_node => Leader,
|
||||
replica_nodes => Followers}),
|
||||
Q1 = amqqueue:set_type_state(Q0, Conf),
|
||||
case rabbit_amqqueue:internal_declare(Q1, false) of
|
||||
{created, Q} ->
|
||||
case rabbit_stream_coordinator:new_stream(Q, LeaderNode) of
|
||||
case rabbit_stream_coordinator:new_stream(Q, Leader) of
|
||||
{ok, {ok, LeaderPid}, _} ->
|
||||
%% update record with leader pid
|
||||
set_leader_pid(LeaderPid, amqqueue:get_name(Q)),
|
||||
|
|
@ -770,21 +775,13 @@ delete_replica(VHost, Name, Node) ->
|
|||
E
|
||||
end.
|
||||
|
||||
make_stream_conf(Node, Q) ->
|
||||
make_stream_conf(Q) ->
|
||||
QName = amqqueue:get_name(Q),
|
||||
Name = stream_name(QName),
|
||||
%% MaxLength = args_policy_lookup(<<"max-length">>, policy_precedence/2, Q),
|
||||
MaxBytes = args_policy_lookup(<<"max-length-bytes">>, fun policy_precedence/2, Q),
|
||||
MaxAge = max_age(args_policy_lookup(<<"max-age">>, fun policy_precedence/2, Q)),
|
||||
MaxSegmentSizeBytes = args_policy_lookup(<<"stream-max-segment-size-bytes">>, fun policy_precedence/2, Q),
|
||||
LeaderLocator = queue_leader_locator(args_policy_lookup(<<"queue-leader-locator">>,
|
||||
fun policy_precedence/2, Q)),
|
||||
InitialClusterSize = initial_cluster_size(
|
||||
args_policy_lookup(<<"initial-cluster-size">>,
|
||||
fun policy_precedence/2, Q)),
|
||||
Replicas0 = rabbit_nodes:all() -- [Node],
|
||||
%% TODO: try to avoid nodes that are not connected
|
||||
Replicas = select_stream_nodes(InitialClusterSize - 1, Replicas0),
|
||||
Formatter = {?MODULE, format_osiris_event, [QName]},
|
||||
Retention = lists:filter(fun({_, R}) ->
|
||||
R =/= undefined
|
||||
|
|
@ -794,30 +791,9 @@ make_stream_conf(Node, Q) ->
|
|||
#{reference => QName,
|
||||
name => Name,
|
||||
retention => Retention,
|
||||
nodes => [Node | Replicas],
|
||||
leader_locator_strategy => LeaderLocator,
|
||||
leader_node => Node,
|
||||
replica_nodes => Replicas,
|
||||
event_formatter => Formatter,
|
||||
epoch => 1}).
|
||||
|
||||
select_stream_nodes(Size, All) when length(All) =< Size ->
|
||||
All;
|
||||
select_stream_nodes(Size, All) ->
|
||||
Node = node(),
|
||||
case lists:member(Node, All) of
|
||||
true ->
|
||||
select_stream_nodes(Size - 1, lists:delete(Node, All), [Node]);
|
||||
false ->
|
||||
select_stream_nodes(Size, All, [])
|
||||
end.
|
||||
|
||||
select_stream_nodes(0, _, Selected) ->
|
||||
Selected;
|
||||
select_stream_nodes(Size, Rest, Selected) ->
|
||||
S = lists:nth(rand:uniform(length(Rest)), Rest),
|
||||
select_stream_nodes(Size - 1, lists:delete(S, Rest), [S | Selected]).
|
||||
|
||||
update_stream_conf(undefined, #{} = Conf) ->
|
||||
Conf;
|
||||
update_stream_conf(Q, #{} = Conf) when ?is_amqqueue(Q) ->
|
||||
|
|
@ -846,9 +822,6 @@ max_age(Bin) when is_binary(Bin) ->
|
|||
max_age(Age) ->
|
||||
Age.
|
||||
|
||||
queue_leader_locator(undefined) -> <<"client-local">>;
|
||||
queue_leader_locator(Val) -> Val.
|
||||
|
||||
initial_cluster_size(undefined) ->
|
||||
length(rabbit_nodes:all());
|
||||
initial_cluster_size(Val) ->
|
||||
|
|
@ -1020,55 +993,3 @@ set_leader_pid(Pid, QName) ->
|
|||
_ ->
|
||||
ok
|
||||
end.
|
||||
|
||||
apply_leader_locator_strategy(#{leader_locator_strategy := <<"client-local">>} = Conf) ->
|
||||
Conf;
|
||||
apply_leader_locator_strategy(#{leader_node := Leader,
|
||||
replica_nodes := Replicas0,
|
||||
leader_locator_strategy := <<"random">>} = Conf) ->
|
||||
Replicas = [Leader | Replicas0],
|
||||
PotentialLeaders = potential_leaders(Replicas),
|
||||
NewLeader = lists:nth(rand:uniform(length(PotentialLeaders)), PotentialLeaders),
|
||||
NewReplicas = lists:delete(NewLeader, Replicas),
|
||||
Conf#{leader_node => NewLeader,
|
||||
replica_nodes => NewReplicas};
|
||||
apply_leader_locator_strategy(#{leader_node := Leader,
|
||||
replica_nodes := Replicas0,
|
||||
leader_locator_strategy := <<"least-leaders">>} = Conf) ->
|
||||
Replicas = [Leader | Replicas0],
|
||||
PotentialLeaders = potential_leaders(Replicas),
|
||||
Counters0 = maps:from_list([{R, 0} || R <- PotentialLeaders]),
|
||||
Counters = maps:to_list(
|
||||
lists:foldl(fun(Q, Acc) ->
|
||||
P = amqqueue:get_pid(Q),
|
||||
case amqqueue:get_type(Q) of
|
||||
?MODULE when is_pid(P) ->
|
||||
maps:update_with(node(P), fun(V) -> V + 1 end, 1, Acc);
|
||||
_ ->
|
||||
Acc
|
||||
end
|
||||
end, Counters0, rabbit_amqqueue:list())),
|
||||
Ordered = lists:keysort(2, Counters),
|
||||
%% We could have potentially introduced nodes that are not in the list of replicas if
|
||||
%% initial cluster size is smaller than the cluster size. Let's select the first one
|
||||
%% that is on the list of replicas
|
||||
NewLeader = select_first_matching_node(Ordered, Replicas),
|
||||
NewReplicas = lists:delete(NewLeader, Replicas),
|
||||
Conf#{leader_node => NewLeader,
|
||||
replica_nodes => NewReplicas}.
|
||||
|
||||
potential_leaders(Nodes) ->
|
||||
case rabbit_maintenance:filter_out_drained_nodes_local_read(Nodes) of
|
||||
[] ->
|
||||
%% All nodes are drained. Let's place the leader on a drained node
|
||||
%% respecting the requested queue-leader-locator streategy.
|
||||
Nodes;
|
||||
Filtered ->
|
||||
Filtered
|
||||
end.
|
||||
|
||||
select_first_matching_node([{N, _} | Rest], Replicas) ->
|
||||
case lists:member(N, Replicas) of
|
||||
true -> N;
|
||||
false -> select_first_matching_node(Rest, Replicas)
|
||||
end.
|
||||
|
|
|
|||
|
|
@ -84,7 +84,8 @@ groups() ->
|
|||
quorum_cluster_size_3,
|
||||
quorum_cluster_size_7,
|
||||
node_removal_is_not_quorum_critical,
|
||||
select_nodes_with_least_replicas
|
||||
select_nodes_with_least_replicas,
|
||||
select_nodes_with_least_replicas_node_down
|
||||
]},
|
||||
{clustered_with_partitions, [],
|
||||
[
|
||||
|
|
@ -1402,6 +1403,11 @@ declare_during_node_down(Config) ->
|
|||
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),
|
||||
|
||||
RaName = ra_name(QQ),
|
||||
{ok, Members0, _} = ra:members({RaName, Server}),
|
||||
%% Since there are not sufficient running nodes, we expect that
|
||||
%% also stopped nodes are selected as replicas.
|
||||
Members = lists:map(fun({_, N}) -> N end, Members0),
|
||||
?assert(same_elements(Members, Servers)),
|
||||
timer:sleep(2000),
|
||||
rabbit_ct_broker_helpers:start_node(Config, DownServer),
|
||||
publish(Ch, QQ),
|
||||
|
|
@ -2736,18 +2742,51 @@ select_nodes_with_least_replicas(Config) ->
|
|||
declare(Ch, Q,
|
||||
[{<<"x-queue-type">>, longstr, <<"quorum">>},
|
||||
{<<"x-quorum-initial-group-size">>, long, 3}])),
|
||||
{ok, Members, _} = ra:members({ra_name(Q), Server}),
|
||||
?assertEqual(3, length(Members)),
|
||||
lists:map(fun({_, N}) -> N end, Members)
|
||||
{ok, Members0, _} = ra:members({ra_name(Q), Server}),
|
||||
?assertEqual(3, length(Members0)),
|
||||
lists:map(fun({_, N}) -> N end, Members0)
|
||||
end || Q <- Qs],
|
||||
%% Assert that second queue selected the nodes where first queue does not have replicas.
|
||||
?assertEqual(5, sets:size(sets:from_list(lists:flatten(Members)))),
|
||||
|
||||
[?assertMatch(#'queue.delete_ok'{},
|
||||
amqp_channel:call(Ch, #'queue.delete'{queue = Q}))
|
||||
|| Q <- Qs].
|
||||
|
||||
select_nodes_with_least_replicas_node_down(Config) ->
|
||||
[S1, S2 | _ ] = Servers = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
|
||||
?assertEqual(ok, rabbit_control_helper:command(stop_app, S2)),
|
||||
RunningNodes = lists:delete(S2, Servers),
|
||||
Ch = rabbit_ct_client_helpers:open_channel(Config, S1),
|
||||
Qs = [?config(queue_name, Config),
|
||||
?config(alt_queue_name, Config)],
|
||||
|
||||
timer:sleep(1000),
|
||||
Members = [begin
|
||||
?assertMatch({'queue.declare_ok', Q, 0, 0},
|
||||
declare(Ch, Q,
|
||||
[{<<"x-queue-type">>, longstr, <<"quorum">>},
|
||||
{<<"x-quorum-initial-group-size">>, long, 3}])),
|
||||
{ok, Members0, _} = ra:members({ra_name(Q), S1}),
|
||||
?assertEqual(3, length(Members0)),
|
||||
lists:map(fun({_, N}) -> N end, Members0)
|
||||
end || Q <- Qs],
|
||||
%% Assert that
|
||||
%% 1. no replicas got placed on a node which is down because there are sufficient running nodes, and
|
||||
%% 2. second queue selected the nodes where first queue does not have replicas.
|
||||
?assert(same_elements(lists:flatten(Members), RunningNodes)),
|
||||
|
||||
?assertEqual(ok, rabbit_control_helper:command(start_app, S2)),
|
||||
[?assertMatch(#'queue.delete_ok'{},
|
||||
amqp_channel:call(Ch, #'queue.delete'{queue = Q}))
|
||||
|| Q <- Qs].
|
||||
|
||||
%%----------------------------------------------------------------------------
|
||||
|
||||
same_elements(L1, L2)
|
||||
when is_list(L1), is_list(L2) ->
|
||||
lists:usort(L1) =:= lists:usort(L2).
|
||||
|
||||
declare(Ch, Q) ->
|
||||
declare(Ch, Q, []).
|
||||
|
||||
|
|
|
|||
|
|
@ -53,11 +53,15 @@ groups() ->
|
|||
leader_locator_policy,
|
||||
queue_size_on_declare,
|
||||
leader_locator_random_maintenance,
|
||||
leader_locator_least_leaders_maintenance
|
||||
leader_locator_least_leaders_maintenance,
|
||||
leader_locator_random,
|
||||
leader_locator_least_leaders,
|
||||
select_nodes_with_least_replicas
|
||||
]},
|
||||
{cluster_size_3_1, [], [shrink_coordinator_cluster]},
|
||||
{cluster_size_3_2, [], [recover,
|
||||
declare_with_node_down]},
|
||||
declare_with_node_down_1,
|
||||
declare_with_node_down_2]},
|
||||
{cluster_size_3_parallel_1, [parallel], [
|
||||
delete_replica,
|
||||
delete_last_replica,
|
||||
|
|
@ -68,9 +72,7 @@ groups() ->
|
|||
initial_cluster_size_two,
|
||||
initial_cluster_size_one_policy,
|
||||
leader_locator_client_local,
|
||||
declare_delete_same_stream,
|
||||
leader_locator_random,
|
||||
leader_locator_least_leaders
|
||||
declare_delete_same_stream
|
||||
]},
|
||||
{cluster_size_3_parallel_2, [parallel], all_tests()},
|
||||
{unclustered_size_3_1, [], [add_replica]},
|
||||
|
|
@ -708,19 +710,39 @@ restart_single_node(Config) ->
|
|||
|
||||
%% the failing case for this test relies on a particular random condition
|
||||
%% please never consider this a flake
|
||||
declare_with_node_down(Config) ->
|
||||
declare_with_node_down_1(Config) ->
|
||||
[Server1, Server2, Server3] = Servers = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
|
||||
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),
|
||||
rabbit_ct_broker_helpers:stop_node(Config, Server2),
|
||||
Q = ?config(queue_name, Config),
|
||||
?assertEqual({'queue.declare_ok', Q, 0, 0},
|
||||
declare(Ch, Q, [{<<"x-queue-type">>, longstr, <<"stream">>}])),
|
||||
|
||||
declare(Ch, Q, [{<<"x-queue-type">>, longstr, <<"stream">>},
|
||||
{<<"x-initial-cluster-size">>, long, 3}])),
|
||||
check_leader_and_replicas(Config, [Server1, Server3]),
|
||||
%% Since there are not sufficient running nodes, we expect that
|
||||
%% also stopped nodes are selected as replicas.
|
||||
check_members(Config, Servers),
|
||||
rabbit_ct_broker_helpers:start_node(Config, Server2),
|
||||
check_leader_and_replicas(Config, Servers),
|
||||
ok.
|
||||
|
||||
declare_with_node_down_2(Config) ->
|
||||
[Server1, Server2, Server3] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
|
||||
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),
|
||||
rabbit_ct_broker_helpers:stop_node(Config, Server2),
|
||||
Q = ?config(queue_name, Config),
|
||||
?assertEqual({'queue.declare_ok', Q, 0, 0},
|
||||
declare(Ch, Q, [{<<"x-queue-type">>, longstr, <<"stream">>},
|
||||
{<<"x-initial-cluster-size">>, long, 2},
|
||||
{<<"x-queue-leader-locator">>, longstr, <<"random">>}])),
|
||||
check_leader_and_replicas(Config, [Server1, Server3]),
|
||||
%% Since there are sufficient running nodes, we expect that
|
||||
%% stopped nodes are not selected as replicas.
|
||||
check_members(Config, [Server1, Server3]),
|
||||
rabbit_ct_broker_helpers:start_node(Config, Server2),
|
||||
check_leader_and_replicas(Config, [Server1, Server3]),
|
||||
ok.
|
||||
|
||||
recover(Config) ->
|
||||
[Server | _] = Servers0 = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
|
||||
|
||||
|
|
@ -1887,16 +1909,16 @@ leader_locator_least_leaders(Config) ->
|
|||
|
||||
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),
|
||||
Q = ?config(queue_name, Config),
|
||||
Bin = rabbit_data_coercion:to_binary(?FUNCTION_NAME),
|
||||
Q1 = <<Bin/binary, "_q1">>,
|
||||
Q2 = <<Bin/binary, "_q2">>,
|
||||
|
||||
Q1 = <<"q1">>,
|
||||
Q2 = <<"q2">>,
|
||||
?assertEqual({'queue.declare_ok', Q1, 0, 0},
|
||||
declare(Ch, Q1, [{<<"x-queue-type">>, longstr, <<"stream">>},
|
||||
{<<"x-queue-leader-locator">>, longstr, <<"client-local">>}])),
|
||||
?assertEqual({'queue.declare_ok', Q2, 0, 0},
|
||||
declare(Ch, Q2, [{<<"x-queue-type">>, longstr, <<"stream">>},
|
||||
{<<"x-queue-leader-locator">>, longstr, <<"client-local">>}])),
|
||||
|
||||
?assertEqual({'queue.declare_ok', Q, 0, 0},
|
||||
declare(Ch, Q, [{<<"x-queue-type">>, longstr, <<"stream">>},
|
||||
{<<"x-queue-leader-locator">>, longstr, <<"least-leaders">>}])),
|
||||
|
|
@ -1905,7 +1927,32 @@ leader_locator_least_leaders(Config) ->
|
|||
Leader = proplists:get_value(leader, Info),
|
||||
|
||||
?assert(lists:member(Leader, [Server2, Server3])),
|
||||
rabbit_ct_broker_helpers:rpc(Config, 0, ?MODULE, delete_testcase_queue, [Q]).
|
||||
rabbit_ct_broker_helpers:rpc(Config, 0, ?MODULE, delete_queues, [[Q2, Q1, Q]]).
|
||||
|
||||
select_nodes_with_least_replicas(Config) ->
|
||||
[Server1 | _ ] = Servers = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
|
||||
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),
|
||||
Q = ?config(queue_name, Config),
|
||||
Bin = rabbit_data_coercion:to_binary(?FUNCTION_NAME),
|
||||
Q1 = <<Bin/binary, "_q1">>,
|
||||
Qs = [Q1, Q],
|
||||
|
||||
[Q1Members, QMembers] =
|
||||
lists:map(fun(Q0) ->
|
||||
?assertEqual({'queue.declare_ok', Q0, 0, 0},
|
||||
declare(Ch, Q0, [{<<"x-queue-type">>, longstr, <<"stream">>},
|
||||
{<<"x-initial-cluster-size">>, long, 2}])),
|
||||
Infos = rabbit_ct_broker_helpers:rpc(Config, 0, rabbit_amqqueue, info_all,
|
||||
[<<"/">>, [name, members]]),
|
||||
Name = rabbit_misc:r(<<"/">>, queue, Q0),
|
||||
[Info] = [Props || Props <- Infos, lists:member({name, Name}, Props)],
|
||||
proplists:get_value(members, Info)
|
||||
end, Qs),
|
||||
|
||||
%% We expect that the second stream chose nodes where the first stream does not have replicas.
|
||||
?assertEqual(lists:usort(Servers),
|
||||
lists:usort(Q1Members ++ QMembers)),
|
||||
rabbit_ct_broker_helpers:rpc(Config, 0, ?MODULE, delete_queues, [Qs]).
|
||||
|
||||
leader_locator_least_leaders_maintenance(Config) ->
|
||||
[Server1, Server2, Server3] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
|
||||
|
|
@ -2137,9 +2184,8 @@ purge(Config) ->
|
|||
|
||||
%%----------------------------------------------------------------------------
|
||||
|
||||
delete_queues() ->
|
||||
[{ok, _} = rabbit_amqqueue:delete(Q, false, false, <<"dummy">>)
|
||||
|| Q <- rabbit_amqqueue:list()].
|
||||
delete_queues(Qs) when is_list(Qs) ->
|
||||
lists:foreach(fun delete_testcase_queue/1, Qs).
|
||||
|
||||
delete_testcase_queue(Name) ->
|
||||
QName = rabbit_misc:r(<<"/">>, queue, Name),
|
||||
|
|
@ -2177,7 +2223,16 @@ check_leader_and_replicas(Config, Members, Tag) ->
|
|||
ct:pal("~s members ~w ~p", [?FUNCTION_NAME, Members, Info]),
|
||||
lists:member(proplists:get_value(leader, Info), Members)
|
||||
andalso (lists:sort(Members) == lists:sort(proplists:get_value(Tag, Info)))
|
||||
end, 60000).
|
||||
end, 60_000).
|
||||
|
||||
check_members(Config, ExpectedMembers) ->
|
||||
rabbit_ct_helpers:await_condition(
|
||||
fun () ->
|
||||
Info = find_queue_info(Config, 0, [members]),
|
||||
Members = proplists:get_value(members, Info),
|
||||
ct:pal("~s members ~w ~p", [?FUNCTION_NAME, Members, Info]),
|
||||
lists:sort(ExpectedMembers) == lists:sort(Members)
|
||||
end, 20_000).
|
||||
|
||||
publish(Ch, Queue) ->
|
||||
publish(Ch, Queue, <<"msg">>).
|
||||
|
|
|
|||
|
|
@ -1033,7 +1033,7 @@ is_mixed_versions(Config) ->
|
|||
%% -------------------------------------------------------------------
|
||||
|
||||
await_condition(ConditionFun) ->
|
||||
await_condition(ConditionFun, 10000).
|
||||
await_condition(ConditionFun, 10_000).
|
||||
|
||||
await_condition(ConditionFun, Timeout) ->
|
||||
Retries = ceil(Timeout / 50),
|
||||
|
|
|
|||
Loading…
Reference in New Issue