Don't perform CMQ leadership transfer when entering maintenance mode
The time this operation can take in clusters with a lot of classic mirrored queue (say, 10s or 100s of thousands) be prohibitive for upgrades. Upgrades that use a health check to ensure that there are in-sync replicas before entering maintenance mode, in which case the transfer is not really necessary. All of the above is more obvious with the recent changes in #2749.
This commit is contained in:
parent
50761cbe03
commit
c7b9c39352
|
@ -88,7 +88,8 @@ do_drain() ->
|
|||
ReadableCandidates = readable_candidate_list(TransferCandidates),
|
||||
rabbit_log:info("Node will transfer primary replicas of its queues to ~b peers: ~s",
|
||||
[length(TransferCandidates), ReadableCandidates]),
|
||||
transfer_leadership_of_classic_mirrored_queues(TransferCandidates),
|
||||
%% Note: only QQ leadership is transferred because it is a reasonably quick thing to do a lot of queues
|
||||
%% in the cluster, unlike with CMQs.
|
||||
transfer_leadership_of_quorum_queues(TransferCandidates),
|
||||
stop_local_quorum_queue_followers(),
|
||||
|
||||
|
@ -248,6 +249,11 @@ transfer_leadership_of_quorum_queues(_TransferCandidates) ->
|
|||
rabbit_log:info("Leadership transfer for quorum queues hosted on this node has been initiated").
|
||||
|
||||
-spec transfer_leadership_of_classic_mirrored_queues([node()]) -> ok.
|
||||
%% This function is no longer used by maintanence mode. We retain it in case
|
||||
%% classic mirrored queue leadership transfer would be reconsidered.
|
||||
%%
|
||||
%% With a lot of CMQs in a cluster, the transfer procedure can take prohibitively long
|
||||
%% for a pre-upgrade task.
|
||||
transfer_leadership_of_classic_mirrored_queues([]) ->
|
||||
rabbit_log:warning("Skipping leadership transfer of classic mirrored queues: no candidate "
|
||||
"(online, not under maintenance) nodes to transfer to!");
|
||||
|
|
|
@ -24,8 +24,7 @@ groups() ->
|
|||
{cluster_size_3, [], [
|
||||
maintenance_mode_status,
|
||||
listener_suspension_status,
|
||||
client_connection_closure,
|
||||
classic_mirrored_queue_leadership_transfer
|
||||
client_connection_closure
|
||||
]},
|
||||
{quorum_queues, [], [
|
||||
quorum_queue_leadership_transfer
|
||||
|
@ -211,33 +210,6 @@ client_connection_closure(Config) ->
|
|||
rabbit_ct_broker_helpers:revive_node(Config, A).
|
||||
|
||||
|
||||
classic_mirrored_queue_leadership_transfer(Config) ->
|
||||
[A | _] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
|
||||
ct:pal("Picked node ~s for maintenance tests...", [A]),
|
||||
|
||||
rabbit_ct_helpers:await_condition(
|
||||
fun () -> not rabbit_ct_broker_helpers:is_being_drained_local_read(Config, A) end, 10000),
|
||||
|
||||
PolicyPattern = <<"^cq.mirrored">>,
|
||||
rabbit_ct_broker_helpers:set_ha_policy(Config, A, PolicyPattern, <<"all">>),
|
||||
|
||||
Conn = rabbit_ct_client_helpers:open_connection(Config, A),
|
||||
{ok, Ch} = amqp_connection:open_channel(Conn),
|
||||
QName = <<"cq.mirrored.1">>,
|
||||
amqp_channel:call(Ch, #'queue.declare'{queue = QName, durable = true}),
|
||||
|
||||
?assertEqual(1, length(rabbit_ct_broker_helpers:rpc(Config, A, rabbit_amqqueue, list_local, [<<"/">>]))),
|
||||
|
||||
rabbit_ct_broker_helpers:drain_node(Config, A),
|
||||
rabbit_ct_helpers:await_condition(
|
||||
fun () -> rabbit_ct_broker_helpers:is_being_drained_local_read(Config, A) end, 10000),
|
||||
|
||||
?assertEqual(0, length(rabbit_ct_broker_helpers:rpc(Config, A, rabbit_amqqueue, list_local, [<<"/">>]))),
|
||||
|
||||
rabbit_ct_broker_helpers:revive_node(Config, A),
|
||||
%% rabbit_ct_broker_helpers:set_ha_policy/4 uses pattern for policy name
|
||||
rabbit_ct_broker_helpers:clear_policy(Config, A, PolicyPattern).
|
||||
|
||||
quorum_queue_leadership_transfer(Config) ->
|
||||
[A | _] = Nodenames = rabbit_ct_broker_helpers:get_node_configs(
|
||||
Config, nodename),
|
||||
|
|
Loading…
Reference in New Issue