Fix delete_replica bug
It caused a lot of flakiness on the rabbit_stream_queue_SUITE, both on `delete_replica` and `delete_last_replica` test cases.
This commit is contained in:
parent
6052ecdc9c
commit
e65ba8347c
|
|
@ -1018,16 +1018,14 @@ update_stream0(#{system_time := _Ts},
|
||||||
_ ->
|
_ ->
|
||||||
false
|
false
|
||||||
end,
|
end,
|
||||||
|
|
||||||
case maps:get(Node, Members0) of
|
case maps:get(Node, Members0) of
|
||||||
#member{role = {replica, Epoch},
|
#member{role = {replica, Epoch},
|
||||||
current = {stopping, Idx},
|
current = {stopping, Idx},
|
||||||
state = _} = Member0
|
state = _} = Member0
|
||||||
when IsLeaderInCurrent ->
|
when IsLeaderInCurrent ->
|
||||||
%% A leader has already been selected so skip straight to ready state
|
%% A leader has already been selected so skip straight to ready state
|
||||||
Member = Member0#member{state = {ready, Epoch},
|
Member = update_target(Member0#member{state = {ready, Epoch},
|
||||||
target = Target,
|
current = undefined}, Target),
|
||||||
current = undefined},
|
|
||||||
Members1 = Members0#{Node => Member},
|
Members1 = Members0#{Node => Member},
|
||||||
Stream0#stream{members = Members1};
|
Stream0#stream{members = Members1};
|
||||||
#member{role = {_, Epoch},
|
#member{role = {_, Epoch},
|
||||||
|
|
@ -1037,9 +1035,8 @@ update_stream0(#{system_time := _Ts},
|
||||||
%% epoch
|
%% epoch
|
||||||
Member = case StoppedEpoch of
|
Member = case StoppedEpoch of
|
||||||
Epoch ->
|
Epoch ->
|
||||||
Member0#member{state = {stopped, StoppedEpoch, Tail},
|
update_target(Member0#member{state = {stopped, StoppedEpoch, Tail},
|
||||||
target = Target,
|
current = undefined}, Target);
|
||||||
current = undefined};
|
|
||||||
_ ->
|
_ ->
|
||||||
%% if stopped epoch is from another epoch
|
%% if stopped epoch is from another epoch
|
||||||
%% leave target as is to retry stop in current term
|
%% leave target as is to retry stop in current term
|
||||||
|
|
@ -1518,3 +1515,8 @@ set_running_to_stopped(Members) ->
|
||||||
M
|
M
|
||||||
end, Members).
|
end, Members).
|
||||||
|
|
||||||
|
update_target(#member{target = deleted} = Member, _) ->
|
||||||
|
%% A deleted member can never transition to another state
|
||||||
|
Member;
|
||||||
|
update_target(Member, Target) ->
|
||||||
|
Member#member{target = Target}.
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ all_tests() ->
|
||||||
delete_stream,
|
delete_stream,
|
||||||
delete_replica_leader,
|
delete_replica_leader,
|
||||||
delete_replica,
|
delete_replica,
|
||||||
|
delete_two_replicas,
|
||||||
delete_replica_2,
|
delete_replica_2,
|
||||||
leader_start_failed
|
leader_start_failed
|
||||||
].
|
].
|
||||||
|
|
@ -907,6 +908,79 @@ delete_replica(_) ->
|
||||||
{S4, []} = evaluate_stream(meta(?LINE), S4, []),
|
{S4, []} = evaluate_stream(meta(?LINE), S4, []),
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
|
delete_two_replicas(_) ->
|
||||||
|
%% There was a race condition on the rabbit_stream_queue_SUITE testcases delete_replica
|
||||||
|
%% and delete_last_replica. A replica can sometimes restart after deletion as it transitions
|
||||||
|
%% again to running state. This test reproduces it. See `rabbit_stream_coordinator.erl`
|
||||||
|
%% line 1039, the processing of `member_stopped` command. The new function `update_target`
|
||||||
|
%% ensures this transition never happens.
|
||||||
|
%% This test reproduces the trace that leads to that error.
|
||||||
|
E = 1,
|
||||||
|
StreamId = atom_to_list(?FUNCTION_NAME),
|
||||||
|
LeaderPid = fake_pid(n1),
|
||||||
|
[Replica1, Replica2] = [fake_pid(n2), fake_pid(n3)],
|
||||||
|
N1 = node(LeaderPid),
|
||||||
|
N2 = node(Replica1),
|
||||||
|
%% this is to be added
|
||||||
|
N3 = node(Replica2),
|
||||||
|
|
||||||
|
S0 = started_stream(StreamId, LeaderPid, [Replica1, Replica2]),
|
||||||
|
From = {self(), make_ref()},
|
||||||
|
Idx1 = ?LINE,
|
||||||
|
Meta1 = (meta(Idx1))#{from => From},
|
||||||
|
S1 = update_stream(Meta1, {delete_replica, StreamId, #{node => N3}}, S0),
|
||||||
|
?assertMatch(#stream{target = running,
|
||||||
|
nodes = [N1, N2],
|
||||||
|
members = #{N1 := #member{target = stopped,
|
||||||
|
current = undefined,
|
||||||
|
state = {running, _, _}},
|
||||||
|
N2 := #member{target = stopped,
|
||||||
|
current = undefined,
|
||||||
|
state = {running, _, _}},
|
||||||
|
N3 := #member{target = deleted,
|
||||||
|
current = undefined,
|
||||||
|
state = {running, _, _}}
|
||||||
|
}},
|
||||||
|
S1),
|
||||||
|
{S2, Actions1} = evaluate_stream(Meta1, S1, []),
|
||||||
|
?assertMatch([{aux, {delete_member, StreamId, #{node := N3}, _}},
|
||||||
|
{aux, {stop, StreamId, #{node := N1, epoch := E}, _}},
|
||||||
|
{aux, {stop, StreamId, #{node := N2, epoch := E}, _}}],
|
||||||
|
lists:sort(Actions1)),
|
||||||
|
|
||||||
|
Idx2 = ?LINE,
|
||||||
|
Meta2 = (meta(Idx2))#{from => From},
|
||||||
|
S3 = update_stream(Meta2, {delete_replica, StreamId, #{node => N2}}, S2),
|
||||||
|
?assertMatch(#stream{target = running,
|
||||||
|
nodes = [N1],
|
||||||
|
members = #{N1 := #member{target = stopped,
|
||||||
|
current = {stopping, _},
|
||||||
|
state = {running, _, _}},
|
||||||
|
N2 := #member{target = deleted,
|
||||||
|
current = {stopping, _},
|
||||||
|
state = {running, _, _}},
|
||||||
|
N3 := #member{target = deleted,
|
||||||
|
current = {deleting, _},
|
||||||
|
state = {running, _, _}}
|
||||||
|
}},
|
||||||
|
S3),
|
||||||
|
{S4, []} = evaluate_stream(Meta2, S3, []),
|
||||||
|
|
||||||
|
|
||||||
|
Idx3 = ?LINE,
|
||||||
|
S5 = update_stream(meta(Idx3),
|
||||||
|
{member_stopped, StreamId, #{node => N2,
|
||||||
|
index => Idx1,
|
||||||
|
epoch => E,
|
||||||
|
tail => {E, 101}}},
|
||||||
|
S4),
|
||||||
|
%% A deleted member can never transition to another target.
|
||||||
|
?assertMatch(#stream{members = #{N2 := #member{target = deleted,
|
||||||
|
current = undefined,
|
||||||
|
state = {stopped, _, _}}}},
|
||||||
|
S5),
|
||||||
|
ok.
|
||||||
|
|
||||||
delete_replica_2(_) ->
|
delete_replica_2(_) ->
|
||||||
%% replica is deleted before it has been fully started
|
%% replica is deleted before it has been fully started
|
||||||
E = 1,
|
E = 1,
|
||||||
|
|
|
||||||
|
|
@ -186,7 +186,6 @@ merge_app_env(Config) ->
|
||||||
{rabbit, [{core_metrics_gc_interval, 100}]}).
|
{rabbit, [{core_metrics_gc_interval, 100}]}).
|
||||||
|
|
||||||
end_per_testcase(Testcase, Config) ->
|
end_per_testcase(Testcase, Config) ->
|
||||||
Q = ?config(queue_name, Config),
|
|
||||||
Config1 = rabbit_ct_helpers:run_steps(
|
Config1 = rabbit_ct_helpers:run_steps(
|
||||||
Config,
|
Config,
|
||||||
rabbit_ct_client_helpers:teardown_steps()),
|
rabbit_ct_client_helpers:teardown_steps()),
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue