AWS peer discovery: ensure consistent hostname path ordering (#14557)

* AWS peer discovery: ensure consistent hostname path ordering

AWS EC2 API returns networkInterfaceSet and privateIpAddressesSet in
arbitrary order, causing non-deterministic hostname resolution during
peer discovery. This leads to inconsistent cluster formation.

Changes:
- Sort network interfaces by deviceIndex (0 first for primary ENI)
- Sort private IP addresses by primary flag (primary=true first)
- Add debug logging to show hostname path selection and sorting results
- Add comprehensive unit tests for sorting behavior

The sorting ensures deviceIndex=0 and primary=true IPs are consistently
selected first, making peer discovery deterministic across deployments.

* AWS peer discovery: ensure consistent hostname path ordering (address feedback on debug logs and sorting helper functions)
This commit is contained in:
Ben Nguyen 2025-09-23 14:19:07 -07:00 committed by GitHub
parent 668dbe2e2d
commit 4a324706a4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 152 additions and 14 deletions

View File

@ -351,10 +351,12 @@ get_hostname_by_tags(Tags) ->
get_hostname_path() ->
UsePrivateIP = get_config_key(aws_use_private_ip, ?CONFIG_MODULE:config_map(?BACKEND_CONFIG_KEY)),
HostnamePath = get_config_key(aws_hostname_path, ?CONFIG_MODULE:config_map(?BACKEND_CONFIG_KEY)),
case HostnamePath of
FinalPath = case HostnamePath of
["privateDnsName"] when UsePrivateIP -> ["privateIpAddress"];
P -> P
end.
end,
?LOG_DEBUG("AWS peer discovery using hostname path: ~tp", [FinalPath]),
FinalPath.
-spec get_hostname(path(), props()) -> string().
get_hostname(Path, Props) ->
@ -371,7 +373,37 @@ get_value(Key, Props) when is_integer(Key) ->
{"item", Props2} = lists:nth(Key, Props),
Props2;
get_value(Key, Props) ->
proplists:get_value(Key, Props).
Value = proplists:get_value(Key, Props),
sort_ec2_hostname_path_set_members(Key, Value).
%% Sort AWS API responses for consistent ordering
-spec sort_ec2_hostname_path_set_members(string(), any()) -> any().
sort_ec2_hostname_path_set_members("networkInterfaceSet", NetworkInterfaces) when is_list(NetworkInterfaces) ->
lists:sort(fun({"item", A}, {"item", B}) -> device_index(A) =< device_index(B) end, NetworkInterfaces);
sort_ec2_hostname_path_set_members("privateIpAddressesSet", PrivateIpAddresses) when is_list(PrivateIpAddresses) ->
lists:sort(fun({"item", A}, {"item", B}) -> is_primary(A) >= is_primary(B) end, PrivateIpAddresses);
sort_ec2_hostname_path_set_members(_, Value) ->
Value.
%% Extract deviceIndex from network interface attachment
-spec device_index(props()) -> integer().
device_index(Interface) ->
Attachment = proplists:get_value("attachment", Interface),
case proplists:get_value("deviceIndex", Attachment) of
DeviceIndex when is_list(DeviceIndex) ->
{Int, []} = string:to_integer(DeviceIndex),
Int;
DeviceIndex when is_integer(DeviceIndex) ->
DeviceIndex
end.
%% Extract primary flag from private IP address
-spec is_primary(props()) -> boolean().
is_primary(IpAddress) ->
case proplists:get_value("primary", IpAddress) of
"true" -> true;
_ -> false
end.
-spec get_tags() -> tags().
get_tags() ->

View File

@ -23,7 +23,9 @@ groups() ->
{unit, [], [
maybe_add_tag_filters,
get_hostname_name_from_reservation_set,
registration_support
registration_support,
network_interface_sorting,
private_ip_address_sorting
]},
{lock, [], [
lock_single_node,
@ -75,12 +77,93 @@ get_hostname_name_from_reservation_set(_Config) ->
?assertEqual(Expectation,
rabbit_peer_discovery_aws:get_hostname_name_from_reservation_set(
reservation_set(), []))
end},
{"from private IP DNS in network interface",
fun() ->
os:putenv("AWS_HOSTNAME_PATH", "networkInterfaceSet,2,privateIpAddressesSet,1,privateDnsName"),
Expectation = ["ip-10-0-15-100.eu-west-1.compute.internal",
"ip-10-0-16-31.eu-west-1.compute.internal"],
?assertEqual(Expectation,
rabbit_peer_discovery_aws:get_hostname_name_from_reservation_set(
reservation_set(), []))
end}]
}).
registration_support(_Config) ->
?assertEqual(false, rabbit_peer_discovery_aws:supports_registration()).
network_interface_sorting(_Config) ->
%% Test ENI sorting by deviceIndex (DescribeInstances only returns attached ENIs)
NetworkInterfaces = [
{"item", [
{"networkInterfaceId", "eni-secondary"},
{"attachment", [{"deviceIndex", "1"}]}
]},
{"item", [
{"networkInterfaceId", "eni-primary"},
{"attachment", [{"deviceIndex", "0"}]}
]},
{"item", [
{"networkInterfaceId", "eni-tertiary"},
{"attachment", [{"deviceIndex", "2"}]}
]}
],
%% Should sort ENIs by deviceIndex
Sorted = rabbit_peer_discovery_aws:sort_ec2_hostname_path_set_members("networkInterfaceSet", NetworkInterfaces),
%% Should have all 3 ENIs
?assertEqual(3, length(Sorted)),
%% Primary ENI (deviceIndex=0) should be first
{"item", FirstENI} = lists:nth(1, Sorted),
?assertEqual("eni-primary", proplists:get_value("networkInterfaceId", FirstENI)),
%% Secondary ENI (deviceIndex=1) should be second
{"item", SecondENI} = lists:nth(2, Sorted),
?assertEqual("eni-secondary", proplists:get_value("networkInterfaceId", SecondENI)),
%% Tertiary ENI (deviceIndex=2) should be third
{"item", ThirdENI} = lists:nth(3, Sorted),
?assertEqual("eni-tertiary", proplists:get_value("networkInterfaceId", ThirdENI)).
private_ip_address_sorting(_Config) ->
%% Test private IP address sorting by primary flag
PrivateIpAddresses = [
{"item", [
{"privateIpAddress", "10.0.14.176"},
{"privateDnsName", "ip-10-0-14-176.us-west-2.compute.internal"},
{"primary", "false"}
]},
{"item", [
{"privateIpAddress", "10.0.12.112"},
{"privateDnsName", "ip-10-0-12-112.us-west-2.compute.internal"},
{"primary", "true"}
]},
{"item", [
{"privateIpAddress", "10.0.15.200"},
{"privateDnsName", "ip-10-0-15-200.us-west-2.compute.internal"},
{"primary", "false"}
]}
],
Sorted = rabbit_peer_discovery_aws:sort_ec2_hostname_path_set_members("privateIpAddressesSet", PrivateIpAddresses),
?assertEqual(3, length(Sorted)),
%% Primary IP (primary=true) should be first
{"item", FirstIP} = lists:nth(1, Sorted),
?assertEqual("10.0.12.112", proplists:get_value("privateIpAddress", FirstIP)),
?assertEqual("true", proplists:get_value("primary", FirstIP)),
%% Non-primary IPs should maintain relative order
{"item", SecondIP} = lists:nth(2, Sorted),
?assertEqual("10.0.14.176", proplists:get_value("privateIpAddress", SecondIP)),
?assertEqual("false", proplists:get_value("primary", SecondIP)),
{"item", ThirdIP} = lists:nth(3, Sorted),
?assertEqual("10.0.15.200", proplists:get_value("privateIpAddress", ThirdIP)),
?assertEqual("false", proplists:get_value("primary", ThirdIP)).
lock_single_node(_Config) ->
LocalNode = node(),
Nodes = [LocalNode],
@ -141,16 +224,30 @@ reservation_set() ->
{"vpcId","vpc-4fe1562b"},
{"networkInterfaceSet", [
{"item",
[{"association",
[{"publicIp","203.0.113.11"},
{"publicDnsName",
"ec2-203-0-113-11.eu-west-1.compute.amazonaws.com"},
{"ipOwnerId","amazon"}]}]},
{"item",
[{"association",
[{"attachment", [{"deviceIndex", "1"}]},
{"association",
[{"publicIp","203.0.113.12"},
{"publicDnsName",
"ec2-203-0-113-12.eu-west-1.compute.amazonaws.com"},
{"ipOwnerId","amazon"}]},
{"privateIpAddressesSet", [
{"item", [
{"privateIpAddress", "10.0.15.101"},
{"privateDnsName", "ip-10-0-15-101.eu-west-1.compute.internal"},
{"primary", "false"}
]},
{"item", [
{"privateIpAddress", "10.0.15.100"},
{"privateDnsName", "ip-10-0-15-100.eu-west-1.compute.internal"},
{"primary", "true"}
]}
]}]},
{"item",
[{"attachment", [{"deviceIndex", "0"}]},
{"association",
[{"publicIp","203.0.113.11"},
{"publicDnsName",
"ec2-203-0-113-11.eu-west-1.compute.amazonaws.com"},
{"ipOwnerId","amazon"}]}]}]},
{"privateIpAddress","10.0.16.29"}]}]}]},
{"item", [{"reservationId","r-006cfdbf8d04c5f01"},
@ -171,15 +268,24 @@ reservation_set() ->
{"vpcId","vpc-4fe1562b"},
{"networkInterfaceSet", [
{"item",
[{"association",
[{"attachment", [{"deviceIndex", "0"}]},
{"association",
[{"publicIp","203.0.113.21"},
{"publicDnsName",
"ec2-203-0-113-21.eu-west-1.compute.amazonaws.com"},
{"ipOwnerId","amazon"}]}]},
{"item",
[{"association",
[{"attachment", [{"deviceIndex", "1"}]},
{"association",
[{"publicIp","203.0.113.22"},
{"publicDnsName",
"ec2-203-0-113-22.eu-west-1.compute.amazonaws.com"},
{"ipOwnerId","amazon"}]}]}]},
{"ipOwnerId","amazon"}]},
{"privateIpAddressesSet", [
{"item", [
{"privateIpAddress", "10.0.16.31"},
{"privateDnsName", "ip-10-0-16-31.eu-west-1.compute.internal"},
{"primary", "true"}
]}
]}]}]},
{"privateIpAddress","10.0.16.31"}]}]}]}].