Store hash ring state in a single table
This implementation is significantly simpler and doesn't perform nearly as many Mnesia operations. Pair: @dcorbacho. References #37, #38. [#159822323]
This commit is contained in:
parent
e5e9316b9e
commit
a2d4f0bd87
|
|
@ -27,25 +27,12 @@
|
|||
-export([init/0]).
|
||||
-export([info/1, info/2]).
|
||||
|
||||
-record(bucket, {
|
||||
%% a {resource, bucket} pair
|
||||
%% where bucket is a non-negative integer
|
||||
id,
|
||||
%% a resource
|
||||
queue
|
||||
}).
|
||||
|
||||
-record(bucket_count, {
|
||||
exchange,
|
||||
count
|
||||
}).
|
||||
|
||||
-record(binding_buckets, {
|
||||
%% an {exchange, queue} pair because we
|
||||
%% assume that there's only one binding between
|
||||
%% a consistent hash exchange and a queue
|
||||
id,
|
||||
bucket_numbers = []
|
||||
-record(chx_hash_ring, {
|
||||
%% a resource
|
||||
exchange,
|
||||
%% a map of bucket => queue | exchange
|
||||
bucket_map,
|
||||
next_bucket_number
|
||||
}).
|
||||
|
||||
-rabbit_boot_step(
|
||||
|
|
@ -66,17 +53,9 @@
|
|||
{enables, external_infrastructure}]}).
|
||||
|
||||
%% This data model allows for efficient routing and exchange deletion
|
||||
%% but not efficient binding management. This is a future area of improvement.
|
||||
%% A couple of alternatives were considered, e.g. storing the entire ring state
|
||||
%% in a single map. Without an additional structure such as a balanced tree
|
||||
%% ring updates would be even less efficient (but easier to follow).
|
||||
%% but less efficient (linear) binding management.
|
||||
|
||||
%% maps buckets to queues
|
||||
-define(BUCKET_TABLE, rabbit_exchange_type_consistent_hash_bucket_queue).
|
||||
%% maps exchange to total the number of buckets
|
||||
-define(BUCKET_COUNT_TABLE, rabbit_exchange_type_consistent_hash_bucket_count).
|
||||
%% maps {exchange, queue} pairs to a list of buckets
|
||||
-define(BINDING_BUCKET_TABLE, rabbit_exchange_type_consistent_hash_binding_bucket).
|
||||
-define(HASH_RING_STATE_TABLE, rabbit_exchange_type_consistent_hash_ring_state).
|
||||
|
||||
-define(PROPERTIES, [<<"correlation_id">>, <<"message_id">>, <<"timestamp">>]).
|
||||
|
||||
|
|
@ -91,21 +70,26 @@ description() ->
|
|||
|
||||
serialise_events() -> false.
|
||||
|
||||
route(#exchange { name = Name,
|
||||
arguments = Args },
|
||||
#delivery { message = Msg }) ->
|
||||
case ets:lookup(?BUCKET_COUNT_TABLE, Name) of
|
||||
route(#exchange {name = Name,
|
||||
arguments = Args},
|
||||
#delivery {message = Msg}) ->
|
||||
case ets:lookup(?HASH_RING_STATE_TABLE, Name) of
|
||||
[] ->
|
||||
[];
|
||||
[#bucket_count{count = N}] ->
|
||||
K = value_to_hash(hash_on(Args), Msg),
|
||||
SelectedBucket = jump_consistent_hash(K, N),
|
||||
case mnesia:dirty_read({?BUCKET_TABLE, {Name, SelectedBucket}}) of
|
||||
[Bucket] -> [Bucket#bucket.queue];
|
||||
[] -> rabbit_log:warning("Bucket ~p not found", [SelectedBucket]),
|
||||
[]
|
||||
end
|
||||
[#chx_hash_ring{bucket_map = BM}] ->
|
||||
case maps:size(BM) of
|
||||
0 -> [];
|
||||
N ->
|
||||
K = value_to_hash(hash_on(Args), Msg),
|
||||
SelectedBucket = jump_consistent_hash(K, N),
|
||||
|
||||
case maps:get(SelectedBucket, BM, undefined) of
|
||||
undefined ->
|
||||
rabbit_log:warning("Bucket ~p not found", [SelectedBucket]),
|
||||
[];
|
||||
Queue -> [Queue]
|
||||
end
|
||||
end
|
||||
end.
|
||||
|
||||
validate(#exchange { arguments = Args }) ->
|
||||
|
|
@ -139,63 +123,64 @@ validate_binding(_X, #binding { key = K }) ->
|
|||
{error, {binding_invalid, "The binding key must be an integer: ~p", [K]}}
|
||||
end.
|
||||
|
||||
create(_Tx, _X) -> ok.
|
||||
maybe_initialise_hash_ring_state(transaction, X) ->
|
||||
case mnesia:read(?HASH_RING_STATE_TABLE, X) of
|
||||
[_] -> ok;
|
||||
[] ->
|
||||
mnesia:write_lock_table(?HASH_RING_STATE_TABLE),
|
||||
ok = mnesia:write(?HASH_RING_STATE_TABLE, #chx_hash_ring{
|
||||
exchange = X,
|
||||
next_bucket_number = 0,
|
||||
bucket_map = #{}}, write)
|
||||
end;
|
||||
|
||||
maybe_initialise_hash_ring_state(_, X) ->
|
||||
rabbit_misc:execute_mnesia_transaction(
|
||||
fun() -> maybe_initialise_hash_ring_state(transaction, X) end).
|
||||
|
||||
create(transaction, X) ->
|
||||
maybe_initialise_hash_ring_state(transaction, X);
|
||||
create(Tx, X) ->
|
||||
maybe_initialise_hash_ring_state(Tx, X).
|
||||
|
||||
delete(transaction, #exchange{name = Name}, _Bs) ->
|
||||
ok = mnesia:write_lock_table(?BUCKET_TABLE),
|
||||
ok = mnesia:write_lock_table(?BUCKET_COUNT_TABLE),
|
||||
mnesia:write_lock_table(?HASH_RING_STATE_TABLE),
|
||||
|
||||
Numbers = mnesia:select(?BUCKET_TABLE, [{
|
||||
#bucket{id = {Name, '$1'}, _ = '_'},
|
||||
[],
|
||||
['$1']
|
||||
}]),
|
||||
[mnesia:delete({?BUCKET_TABLE, {Name, N}})
|
||||
|| N <- Numbers],
|
||||
|
||||
Queues = mnesia:select(?BINDING_BUCKET_TABLE,
|
||||
[{
|
||||
#binding_buckets{id = {Name, '$1'}, _ = '_'},
|
||||
[],
|
||||
['$1']
|
||||
}]),
|
||||
[mnesia:delete({?BINDING_BUCKET_TABLE, {Name, Q}})
|
||||
|| Q <- Queues],
|
||||
|
||||
mnesia:delete({?BUCKET_COUNT_TABLE, Name}),
|
||||
ok;
|
||||
ok = mnesia:delete({?HASH_RING_STATE_TABLE, Name});
|
||||
delete(_Tx, _X, _Bs) ->
|
||||
ok.
|
||||
|
||||
policy_changed(_X1, _X2) -> ok.
|
||||
|
||||
add_binding(transaction, _X,
|
||||
#binding{source = S, destination = D, key = K}) ->
|
||||
add_binding(transaction, X,
|
||||
B = #binding{source = S, destination = D, key = K}) ->
|
||||
Weight = rabbit_data_coercion:to_integer(K),
|
||||
|
||||
mnesia:write_lock_table(?BUCKET_TABLE),
|
||||
mnesia:write_lock_table(?BUCKET_COUNT_TABLE),
|
||||
mnesia:write_lock_table(?HASH_RING_STATE_TABLE),
|
||||
|
||||
LastBucketNum = bucket_count_of(S),
|
||||
NewBucketCount = LastBucketNum + Weight,
|
||||
case mnesia:read(?HASH_RING_STATE_TABLE, S) of
|
||||
[State0 = #chx_hash_ring{bucket_map = BM0,
|
||||
next_bucket_number = NexN0}] ->
|
||||
NextN = NexN0 + Weight,
|
||||
%% hi/lo bucket counters are 0-based but weight is 1-based
|
||||
Range = lists:seq(NexN0, (NextN - 1)),
|
||||
BM = lists:foldl(fun(Key, Acc) ->
|
||||
maps:put(Key, D, Acc)
|
||||
end, BM0, Range),
|
||||
State = State0#chx_hash_ring{bucket_map = BM,
|
||||
next_bucket_number = NextN},
|
||||
|
||||
Numbers = lists:seq(LastBucketNum, (NewBucketCount - 1)),
|
||||
Buckets = [#bucket{id = {S, I}, queue = D} || I <- Numbers],
|
||||
|
||||
[ok = mnesia:write(?BUCKET_TABLE, B, write) || B <- Buckets],
|
||||
|
||||
mnesia:write(?BINDING_BUCKET_TABLE, #binding_buckets{id = {S, D},
|
||||
bucket_numbers = Numbers}, write),
|
||||
mnesia:write(?BUCKET_COUNT_TABLE, #bucket_count{exchange = S,
|
||||
count = NewBucketCount}, write),
|
||||
|
||||
ok;
|
||||
ok = mnesia:write(?HASH_RING_STATE_TABLE, State, write),
|
||||
ok;
|
||||
[] ->
|
||||
maybe_initialise_hash_ring_state(transaction, S),
|
||||
add_binding(transaction, X, B)
|
||||
end;
|
||||
add_binding(none, _X, _B) ->
|
||||
ok.
|
||||
|
||||
remove_bindings(transaction, _X, Bindings) ->
|
||||
mnesia:write_lock_table(?BUCKET_TABLE),
|
||||
mnesia:write_lock_table(?BUCKET_COUNT_TABLE),
|
||||
mnesia:write_lock_table(?HASH_RING_STATE_TABLE),
|
||||
|
||||
[remove_binding(B) || B <- Bindings],
|
||||
|
||||
|
|
@ -203,78 +188,52 @@ remove_bindings(transaction, _X, Bindings) ->
|
|||
remove_bindings(none, _X, _Bs) ->
|
||||
ok.
|
||||
|
||||
remove_binding(#binding{source = S, destination = D, key = K}) ->
|
||||
Weight = rabbit_data_coercion:to_integer(K),
|
||||
remove_binding(#binding{source = S, destination = D, key = RK}) ->
|
||||
Weight = rabbit_data_coercion:to_integer(RK),
|
||||
|
||||
[#binding_buckets{bucket_numbers = Numbers}] = mnesia:read(?BINDING_BUCKET_TABLE, {S, D}),
|
||||
LastNum = lists:last(Numbers),
|
||||
mnesia:write_lock_table(?HASH_RING_STATE_TABLE),
|
||||
|
||||
%% Delete all buckets for this {exchange, queue} pair
|
||||
[ok = mnesia:delete(?BUCKET_TABLE, {S, N}, write) || N <- Numbers],
|
||||
case mnesia:read(?HASH_RING_STATE_TABLE, S) of
|
||||
[State0 = #chx_hash_ring{bucket_map = BM0,
|
||||
next_bucket_number = NexN0}] ->
|
||||
%% Buckets with lower numbers stay as is; buckets that
|
||||
%% belong to this binding are removed; buckets with
|
||||
%% greater numbers are updated (their numbers are adjusted downwards by weight)
|
||||
BucketsOfThisBinding = maps:filter(fun (_K, V) -> V =:= D end, BM0),
|
||||
LastBucket = lists:last(maps:keys(BucketsOfThisBinding)),
|
||||
BucketsDownTheRing = maps:filter(fun (K, _) -> K > LastBucket end, BM0),
|
||||
|
||||
%% Buckets with lower numbers stay as is; buckets that
|
||||
%% belong to this binding are removed; buckets with
|
||||
%% greater numbers are updated (their numbers are adjusted downwards by weight)
|
||||
BucketsToUpdate = mnesia:select(?BUCKET_TABLE, [{
|
||||
#bucket{id = {S, '$1'}, _ = '_'},
|
||||
[
|
||||
{'>', '$1', LastNum}
|
||||
],
|
||||
['$_']
|
||||
}]),
|
||||
QueuesWithUpdatedBuckets = lists:usort([Q || #bucket{queue = Q} <- BucketsToUpdate]),
|
||||
[ok = mnesia:delete(?BUCKET_TABLE, Id, write) || #bucket{id = Id} <- BucketsToUpdate],
|
||||
%% hash ring state without the buckets of this binding
|
||||
BM1 = maps:fold(fun(K, _, Acc) -> maps:remove(K, Acc) end, BM0, BucketsOfThisBinding),
|
||||
%% final state with "down the ring" buckets updated
|
||||
BM2 = maps:fold(fun(K0, V, Acc) ->
|
||||
M = maps:remove(K0, Acc),
|
||||
maps:put(K0 - Weight, V, M)
|
||||
end, BM1, BucketsDownTheRing),
|
||||
|
||||
UpdatedBuckets = [B#bucket{id = {X, N - Weight}} || B = #bucket{id = {X, N}} <- BucketsToUpdate],
|
||||
[ok = mnesia:write(?BUCKET_TABLE, B, write) || B <- UpdatedBuckets],
|
||||
NextN = NexN0 - Weight,
|
||||
State = State0#chx_hash_ring{bucket_map = BM2,
|
||||
next_bucket_number = NextN},
|
||||
|
||||
%% There will be no buckets for this {exchange, queue} pair to track
|
||||
ok = mnesia:delete(?BINDING_BUCKET_TABLE, {S, D}, write),
|
||||
ok = mnesia:write(?HASH_RING_STATE_TABLE, State, write),
|
||||
|
||||
%% Update the counter
|
||||
TotalBucketsForX = bucket_count_of(S),
|
||||
mnesia:write(?BUCKET_COUNT_TABLE, #bucket_count{exchange = S,
|
||||
count = TotalBucketsForX - Weight}, write),
|
||||
|
||||
%% Update bucket numbers
|
||||
[begin
|
||||
case mnesia:read(?BINDING_BUCKET_TABLE, {S, Q}) of
|
||||
[] -> ok;
|
||||
[Val = #binding_buckets{bucket_numbers = BNs}] ->
|
||||
NewBNs = [N - Weight || N <- BNs],
|
||||
ok = mnesia:write(?BINDING_BUCKET_TABLE, Val#binding_buckets{bucket_numbers = NewBNs}, write)
|
||||
end
|
||||
end || Q <- QueuesWithUpdatedBuckets],
|
||||
ok = mnesia:delete(?BINDING_BUCKET_TABLE, {S, D}, write),
|
||||
|
||||
ok.
|
||||
ok;
|
||||
[] ->
|
||||
rabbit_log:warning("Can't remove binding: hash ring state for exchange ~s wasn't found",
|
||||
[rabbit_misc:rs(S)]),
|
||||
ok
|
||||
end.
|
||||
|
||||
|
||||
assert_args_equivalence(X, Args) ->
|
||||
rabbit_exchange:assert_args_equivalence(X, Args).
|
||||
|
||||
bucket_count_of(X) ->
|
||||
case ets:lookup(?BUCKET_COUNT_TABLE, X) of
|
||||
[] -> 0;
|
||||
[#bucket_count{count = N}] -> N
|
||||
end.
|
||||
|
||||
init() ->
|
||||
mnesia:create_table(?BUCKET_TABLE, [{record_name, bucket},
|
||||
{attributes, record_info(fields, bucket)},
|
||||
{type, ordered_set}]),
|
||||
mnesia:create_table(?BUCKET_COUNT_TABLE, [{record_name, bucket_count},
|
||||
{attributes, record_info(fields, bucket_count)},
|
||||
{type, ordered_set}]),
|
||||
mnesia:create_table(?BINDING_BUCKET_TABLE, [{record_name, binding_buckets},
|
||||
{attributes, record_info(fields, binding_buckets)},
|
||||
{type, ordered_set}]),
|
||||
|
||||
mnesia:add_table_copy(?BUCKET_TABLE, node(), ram_copies),
|
||||
mnesia:add_table_copy(?BUCKET_COUNT_TABLE, node(), ram_copies),
|
||||
mnesia:add_table_copy(?BINDING_BUCKET_TABLE, node(), ram_copies),
|
||||
|
||||
mnesia:wait_for_tables([?BUCKET_TABLE], 30000),
|
||||
mnesia:create_table(?HASH_RING_STATE_TABLE, [{record_name, chx_hash_ring},
|
||||
{attributes, record_info(fields, chx_hash_ring)},
|
||||
{type, ordered_set}]),
|
||||
mnesia:add_table_copy(?HASH_RING_STATE_TABLE, node(), ram_copies),
|
||||
mnesia:wait_for_tables([?HASH_RING_STATE_TABLE], 30000),
|
||||
ok.
|
||||
|
||||
%%
|
||||
|
|
|
|||
Loading…
Reference in New Issue