Ra 2.16.5 - bug fixes and minor improvements

Ra improvements:

* Don't allow a non-voter to start elections
* Register with ra directory before initialising ra server.
* Trigger tick_timeout immediately after entering leader state.
* Set a configurable segment max size

This commit also includes a change to turn the quorum queue
become leader callback to become a noop and instead rely on
the more promptly tick_handler to handle the meta data store
update after a leader election.

This more prompt tick update means there should be a much shorter
gap between the queue metrics being deleted from the old leader
node to them being available again on the new node resulting
in smoother message count metrics.

Fix test that relied on waiting on too simplistic a property
before asserting.
This commit is contained in:
Karl Nilsson 2025-03-17 17:19:57 +00:00
parent 44657cd393
commit 4fe96dfd27
3 changed files with 51 additions and 57 deletions

View File

@ -425,11 +425,10 @@ local_or_remote_handler(ChPid, Module, Function, Args) ->
erpc:cast(Node, Module, Function, Args)
end.
become_leader(QName, Name) ->
%% as this function is called synchronously when a ra node becomes leader
%% we need to ensure there is no chance of blocking as else the ra node
%% may not be able to establish its leadership
spawn(fun () -> become_leader0(QName, Name) end).
become_leader(_QName, _Name) ->
%% noop now as we instead rely on the promt tick_timeout + repair to update
%% the meta data store after a leader change
ok.
become_leader0(QName, Name) ->
Fun = fun (Q1) ->
@ -580,7 +579,6 @@ handle_tick(QName,
Nodes) ->
%% this makes calls to remote processes so cannot be run inside the
%% ra server
Self = self(),
spawn(
fun() ->
try
@ -638,7 +636,7 @@ handle_tick(QName,
end}
| Infos0],
rabbit_core_metrics:queue_stats(QName, Infos),
ok = repair_leader_record(Q, Self),
ok = repair_leader_record(Q, Name),
case repair_amqqueue_nodes(Q) of
ok ->
ok;
@ -675,7 +673,7 @@ handle_tick(QName, Config, _Nodes) ->
rabbit_log:debug("~ts: handle tick received unexpected config format ~tp",
[rabbit_misc:rs(QName), Config]).
repair_leader_record(Q, Self) ->
repair_leader_record(Q, Name) ->
Node = node(),
case amqqueue:get_pid(Q) of
{_, Node} ->
@ -683,9 +681,8 @@ repair_leader_record(Q, Self) ->
ok;
_ ->
QName = amqqueue:get_name(Q),
rabbit_log:debug("~ts: repairing leader record",
[rabbit_misc:rs(QName)]),
{_, Name} = erlang:process_info(Self, registered_name),
rabbit_log:debug("~ts: updating leader record to current node ~b",
[rabbit_misc:rs(QName), Node]),
ok = become_leader0(QName, Name),
ok
end,

View File

@ -482,11 +482,6 @@ queues_enable_totals_test(Config) ->
Publish(<<"foo">>),
Fun = fun() ->
length(rabbit_ct_broker_helpers:rpc(Config, 0, ets, tab2list,
[queue_coarse_metrics])) == 2
end,
await_condition(Fun),
Queues = http_get(Config, "/queues/%2F"),
Queue = http_get(Config, "/queues/%2F/foo"),
@ -528,7 +523,9 @@ queues_enable_totals_test(Config) ->
?assert(not maps:is_key(message_stats, Queue)),
?assert(not maps:is_key(messages_details, Queue)),
?assert(not maps:is_key(reductions_details, Queue)),
true
end,
await_condition(Fun),
http_delete(Config, "/queues/%2F/foo", {group, '2xx'}),
http_delete(Config, "/queues/%2F/baz", {group, '2xx'}),
close_connection(Conn),

View File

@ -51,7 +51,7 @@ dep_khepri_mnesia_migration = hex 0.7.1
dep_meck = hex 1.0.0
dep_osiris = git https://github.com/rabbitmq/osiris v1.8.6
dep_prometheus = hex 4.11.0
dep_ra = hex 2.16.3
dep_ra = hex 2.16.5
dep_ranch = hex 2.2.0
dep_recon = hex 2.5.6
dep_redbug = hex 2.0.7