Fix delete_replica bug
It caused a lot of flakiness on the rabbit_stream_queue_SUITE, both on `delete_replica` and `delete_last_replica` test cases.
This commit is contained in:
		
							parent
							
								
									6052ecdc9c
								
							
						
					
					
						commit
						e65ba8347c
					
				|  | @ -1018,16 +1018,14 @@ update_stream0(#{system_time := _Ts}, | |||
|                             _ -> | ||||
|                                 false | ||||
|                         end, | ||||
| 
 | ||||
|     case maps:get(Node, Members0) of | ||||
|         #member{role = {replica, Epoch}, | ||||
|                 current = {stopping, Idx}, | ||||
|                 state = _} = Member0 | ||||
|           when IsLeaderInCurrent -> | ||||
|             %% A leader has already been selected so skip straight to ready state | ||||
|             Member = Member0#member{state = {ready, Epoch}, | ||||
|                                     target = Target, | ||||
|                                     current = undefined}, | ||||
|             Member = update_target(Member0#member{state = {ready, Epoch}, | ||||
|                                                   current = undefined}, Target), | ||||
|             Members1 = Members0#{Node => Member}, | ||||
|             Stream0#stream{members = Members1}; | ||||
|         #member{role = {_, Epoch}, | ||||
|  | @ -1037,9 +1035,8 @@ update_stream0(#{system_time := _Ts}, | |||
|             %% epoch | ||||
|             Member = case StoppedEpoch of | ||||
|                          Epoch -> | ||||
|                              Member0#member{state = {stopped, StoppedEpoch, Tail}, | ||||
|                                             target = Target, | ||||
|                                             current = undefined}; | ||||
|                              update_target(Member0#member{state = {stopped, StoppedEpoch, Tail}, | ||||
|                                                           current = undefined}, Target); | ||||
|                          _ -> | ||||
|                              %% if stopped epoch is from another epoch | ||||
|                              %% leave target as is to retry stop in current term | ||||
|  | @ -1518,3 +1515,8 @@ set_running_to_stopped(Members) -> | |||
|                      M | ||||
|              end, Members). | ||||
| 
 | ||||
| update_target(#member{target = deleted} = Member, _) -> | ||||
|     %% A deleted member can never transition to another state | ||||
|     Member; | ||||
| update_target(Member, Target) -> | ||||
|     Member#member{target = Target}. | ||||
|  |  | |||
|  | @ -30,6 +30,7 @@ all_tests() -> | |||
|      delete_stream, | ||||
|      delete_replica_leader, | ||||
|      delete_replica, | ||||
|      delete_two_replicas, | ||||
|      delete_replica_2, | ||||
|      leader_start_failed | ||||
|     ]. | ||||
|  | @ -907,6 +908,79 @@ delete_replica(_) -> | |||
|     {S4, []} = evaluate_stream(meta(?LINE), S4, []), | ||||
|     ok. | ||||
| 
 | ||||
| delete_two_replicas(_) -> | ||||
|     %% There was a race condition on the rabbit_stream_queue_SUITE testcases delete_replica | ||||
|     %% and delete_last_replica. A replica can sometimes restart after deletion as it transitions | ||||
|     %% again to running state. This test reproduces it. See `rabbit_stream_coordinator.erl` | ||||
|     %% line 1039, the processing of `member_stopped` command. The new function `update_target` | ||||
|     %% ensures this transition never happens. | ||||
|     %% This test reproduces the trace that leads to that error. | ||||
|     E = 1, | ||||
|     StreamId = atom_to_list(?FUNCTION_NAME), | ||||
|     LeaderPid = fake_pid(n1), | ||||
|     [Replica1, Replica2] = [fake_pid(n2), fake_pid(n3)], | ||||
|     N1 = node(LeaderPid), | ||||
|     N2 = node(Replica1), | ||||
|     %% this is to be added | ||||
|     N3 = node(Replica2), | ||||
| 
 | ||||
|     S0 = started_stream(StreamId, LeaderPid, [Replica1, Replica2]), | ||||
|     From = {self(), make_ref()}, | ||||
|     Idx1 = ?LINE, | ||||
|     Meta1 = (meta(Idx1))#{from => From}, | ||||
|     S1 = update_stream(Meta1, {delete_replica, StreamId, #{node => N3}}, S0), | ||||
|     ?assertMatch(#stream{target = running, | ||||
|                          nodes = [N1, N2], | ||||
|                          members = #{N1 := #member{target = stopped, | ||||
|                                                    current = undefined, | ||||
|                                                    state = {running, _, _}}, | ||||
|                                      N2 := #member{target = stopped, | ||||
|                                                    current = undefined, | ||||
|                                                    state = {running, _, _}}, | ||||
|                                      N3 := #member{target = deleted, | ||||
|                                                    current = undefined, | ||||
|                                                    state = {running, _, _}} | ||||
|                                     }}, | ||||
|                  S1), | ||||
|     {S2, Actions1} = evaluate_stream(Meta1, S1, []), | ||||
|     ?assertMatch([{aux, {delete_member, StreamId, #{node := N3}, _}}, | ||||
|                   {aux, {stop, StreamId, #{node := N1, epoch := E}, _}}, | ||||
|                   {aux, {stop, StreamId, #{node := N2, epoch := E}, _}}], | ||||
|                  lists:sort(Actions1)), | ||||
| 
 | ||||
|     Idx2 = ?LINE, | ||||
|     Meta2 = (meta(Idx2))#{from => From}, | ||||
|     S3 = update_stream(Meta2, {delete_replica, StreamId, #{node => N2}}, S2), | ||||
|     ?assertMatch(#stream{target = running, | ||||
|                          nodes = [N1], | ||||
|                          members = #{N1 := #member{target = stopped, | ||||
|                                                    current = {stopping, _}, | ||||
|                                                    state = {running, _, _}}, | ||||
|                                      N2 := #member{target = deleted, | ||||
|                                                    current = {stopping, _}, | ||||
|                                                    state = {running, _, _}}, | ||||
|                                      N3 := #member{target = deleted, | ||||
|                                                    current = {deleting, _}, | ||||
|                                                    state = {running, _, _}} | ||||
|                                     }}, | ||||
|                  S3), | ||||
|     {S4, []} = evaluate_stream(Meta2, S3, []), | ||||
| 
 | ||||
| 
 | ||||
|     Idx3 = ?LINE, | ||||
|     S5 = update_stream(meta(Idx3), | ||||
|                        {member_stopped, StreamId, #{node => N2, | ||||
|                                                     index => Idx1, | ||||
|                                                     epoch => E, | ||||
|                                                     tail => {E, 101}}}, | ||||
|                        S4), | ||||
|     %% A deleted member can never transition to another target. | ||||
|     ?assertMatch(#stream{members = #{N2 := #member{target = deleted, | ||||
|                                                    current = undefined, | ||||
|                                                    state = {stopped, _, _}}}}, | ||||
|                  S5), | ||||
|     ok. | ||||
| 
 | ||||
| delete_replica_2(_) -> | ||||
|     %% replica is deleted before it has been fully started | ||||
|     E = 1, | ||||
|  |  | |||
|  | @ -186,7 +186,6 @@ merge_app_env(Config) -> | |||
|                                       {rabbit, [{core_metrics_gc_interval, 100}]}). | ||||
| 
 | ||||
| end_per_testcase(Testcase, Config) -> | ||||
|     Q = ?config(queue_name, Config), | ||||
|     Config1 = rabbit_ct_helpers:run_steps( | ||||
|                 Config, | ||||
|                 rabbit_ct_client_helpers:teardown_steps()), | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue