I thought of another race: in theory a queue_stats message could make it out of a dying node just in time, but then be overtaken by the queue_deleted event that was generated by another node. Fairly unlikely, but we should handle it.
This commit is contained in:
		
							parent
							
								
									1a9cc3ba9b
								
							
						
					
					
						commit
						914f1fa751
					
				|  | @ -424,7 +424,7 @@ handle_event(Event = #event{type = queue_deleted, | |||
|     Id = {coarse, {queue_stats, Name}}, | ||||
|     TS = floor(Timestamp, State), | ||||
|     OldStats = lookup_element(OldTable, Id), | ||||
|     [record_sample(Id, {Key, -pget(Key, OldStats, 0), TS, State}, State) | ||||
|     [record_sample_coarse(Id, {Key, -pget(Key, OldStats, 0), TS, State}, State) | ||||
|      || Key <- ?COARSE_QUEUE_STATS], | ||||
|     delete_samples(channel_queue_stats,  {'_', Name}, State), | ||||
|     delete_samples(queue_exchange_stats, {Name, '_'}, State), | ||||
|  | @ -606,9 +606,15 @@ append_sample(Key, Value, NewMS, OldStats, Id, State) -> | |||
|     record_sample( | ||||
|       Id, {Key, Value - pget(Key, OldStats, 0), NewMS, State}, State). | ||||
| 
 | ||||
| record_sample({coarse, {queue_stats, Q} = Id}, Args, State) -> | ||||
|     case object_exists(Q, State) of | ||||
|         true  -> record_sample_coarse({coarse, Id}, Args, State); | ||||
|         false -> io:format("Ignoring: ~p~n", [{Q, Args}]), | ||||
|                  ok | ||||
|     end; | ||||
| 
 | ||||
| record_sample({coarse, Id}, Args, State) -> | ||||
|     record_sample0(Id, Args), | ||||
|     record_sample0({vhost_stats, vhost(Id, State)}, Args); | ||||
|     record_sample_coarse({coarse, Id}, Args, State); | ||||
| 
 | ||||
| %% Deliveries / acks (Q -> Ch) | ||||
| record_sample({fine, {Ch, Q = #resource{kind = queue}}}, Args, State) -> | ||||
|  | @ -655,11 +661,16 @@ record_sample({fine, {_Ch, | |||
|         false -> ok | ||||
|     end. | ||||
| 
 | ||||
| %% We have to check the queue and exchange objects still exist for fine | ||||
| %% stats since their deleted event could be overtaken by a channel stats | ||||
| %% event which contains fine stats referencing them. That's also why we | ||||
| %% don't need to check the channels exist - their deleted event can't be | ||||
| %% overtaken by their own last stats event. | ||||
| %% We have to check the queue and exchange objects still exist since | ||||
| %% their deleted event could be overtaken by a channel stats event | ||||
| %% which contains fine stats referencing them. That's also why we | ||||
| %% don't need to check the channels exist - their deleted event can't | ||||
| %% be overtaken by their own last stats event. | ||||
| %% | ||||
| %% Also, sometimes the queue_deleted event is not emitted by the queue | ||||
| %% (in the nodedown case) - so it can overtake the final queue_stats | ||||
| %% event (which is not *guaranteed* to be lost). So we make a similar | ||||
| %% check for coarse queue stats. | ||||
| %% | ||||
| %% We can be sure that mnesia will be up to date by the time we receive | ||||
| %% the event (even though we dirty read) since the deletions are | ||||
|  | @ -685,6 +696,10 @@ record_sampleX(RenamePublishTo, X, {publish, Diff, TS, State}) -> | |||
| record_sampleX(_RenamePublishTo, X, {Type, Diff, TS, State}) -> | ||||
|     record_sample0({exchange_stats, X}, {Type, Diff, TS, State}). | ||||
| 
 | ||||
| record_sample_coarse({coarse, Id}, Args, State) -> | ||||
|     record_sample0(Id, Args), | ||||
|     record_sample0({vhost_stats, vhost(Id, State)}, Args). | ||||
| 
 | ||||
| record_sample0(Id0, {Key, Diff, TS, #state{aggregated_stats       = ETS, | ||||
|                                            aggregated_stats_index = ETSi}}) -> | ||||
|     Id = {Id0, Key}, | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue