Skip peer discovery cleanup when backend returns error
Previously if the peer discovery backend returned an error from failing to discover nodes, the `service_discovery_nodes/0` helper returned an empty list. During cleanup this would mean that any nodes unreachable during a partition would have destructive action taken against them: `rabbit_db_cluster:forget_member/2` and `rabbit_quorum_queue:shrink_all/1`. The `list_nodes/0` callback can fail transiently, though, and a failure shouldn't mean that the cluster is empty. It's safer to avoid cleaning up any nodes when the peer discovery backend fails to return the intended set of nodes.
This commit is contained in:
parent
200127c6a8
commit
5540c69c81
|
@ -240,19 +240,29 @@ maybe_cleanup(State, UnreachableNodes) ->
|
||||||
?LOG_DEBUG(
|
?LOG_DEBUG(
|
||||||
"Peer discovery: cleanup discovered unreachable nodes: ~tp",
|
"Peer discovery: cleanup discovered unreachable nodes: ~tp",
|
||||||
[UnreachableNodes]),
|
[UnreachableNodes]),
|
||||||
case lists:subtract(as_list(UnreachableNodes), as_list(service_discovery_nodes())) of
|
Module = rabbit_peer_discovery:backend(),
|
||||||
[] ->
|
case rabbit_peer_discovery:normalize(Module:list_nodes()) of
|
||||||
?LOG_DEBUG(
|
{ok, {OneOrMultipleNodes, _Type}} ->
|
||||||
"Peer discovery: all unreachable nodes are still "
|
DiscoveredNodes = as_list(OneOrMultipleNodes),
|
||||||
"registered with the discovery backend ~tp",
|
case lists:subtract(UnreachableNodes, DiscoveredNodes) of
|
||||||
[rabbit_peer_discovery:backend()],
|
[] ->
|
||||||
#{domain => ?RMQLOG_DOMAIN_PEER_DISC}),
|
?LOG_DEBUG(
|
||||||
ok;
|
"Peer discovery: all unreachable nodes are still "
|
||||||
Nodes ->
|
"registered with the discovery backend ~tp",
|
||||||
?LOG_DEBUG(
|
[rabbit_peer_discovery:backend()],
|
||||||
"Peer discovery: unreachable nodes are not registered "
|
#{domain => ?RMQLOG_DOMAIN_PEER_DISC}),
|
||||||
"with the discovery backend ~tp", [Nodes]),
|
ok;
|
||||||
maybe_remove_nodes(Nodes, State#state.warn_only)
|
Nodes ->
|
||||||
|
?LOG_DEBUG(
|
||||||
|
"Peer discovery: unreachable nodes are not registered "
|
||||||
|
"with the discovery backend ~tp", [Nodes]),
|
||||||
|
maybe_remove_nodes(Nodes, State#state.warn_only)
|
||||||
|
end;
|
||||||
|
{error, Reason} ->
|
||||||
|
?LOG_INFO(
|
||||||
|
"Peer discovery cleanup: ~tp returned error ~tp",
|
||||||
|
[Module, Reason]),
|
||||||
|
ok
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%%--------------------------------------------------------------------
|
%%--------------------------------------------------------------------
|
||||||
|
@ -288,26 +298,3 @@ maybe_remove_nodes([Node | Nodes], false) ->
|
||||||
-spec unreachable_nodes() -> [node()].
|
-spec unreachable_nodes() -> [node()].
|
||||||
unreachable_nodes() ->
|
unreachable_nodes() ->
|
||||||
rabbit_nodes:list_unreachable().
|
rabbit_nodes:list_unreachable().
|
||||||
|
|
||||||
%%--------------------------------------------------------------------
|
|
||||||
%% @private
|
|
||||||
%% @doc Return the nodes that the service discovery backend knows about
|
|
||||||
%% @spec service_discovery_nodes() -> [node()]
|
|
||||||
%% @end
|
|
||||||
%%--------------------------------------------------------------------
|
|
||||||
-spec service_discovery_nodes() -> [node()].
|
|
||||||
service_discovery_nodes() ->
|
|
||||||
Module = rabbit_peer_discovery:backend(),
|
|
||||||
case rabbit_peer_discovery:normalize(Module:list_nodes()) of
|
|
||||||
{ok, {OneOrMultipleNodes, _Type}} ->
|
|
||||||
Nodes = as_list(OneOrMultipleNodes),
|
|
||||||
?LOG_DEBUG(
|
|
||||||
"Peer discovery cleanup: ~tp returned ~tp",
|
|
||||||
[Module, Nodes]),
|
|
||||||
Nodes;
|
|
||||||
{error, Reason} ->
|
|
||||||
?LOG_DEBUG(
|
|
||||||
"Peer discovery cleanup: ~tp returned error ~tp",
|
|
||||||
[Module, Reason]),
|
|
||||||
[]
|
|
||||||
end.
|
|
||||||
|
|
Loading…
Reference in New Issue