Store hash ring state in a single table
This implementation is significantly simpler and doesn't perform nearly as many Mnesia operations. Pair: @dcorbacho. References #37, #38. [#159822323]
This commit is contained in:
parent
e5e9316b9e
commit
a2d4f0bd87
|
|
@ -27,25 +27,12 @@
|
||||||
-export([init/0]).
|
-export([init/0]).
|
||||||
-export([info/1, info/2]).
|
-export([info/1, info/2]).
|
||||||
|
|
||||||
-record(bucket, {
|
-record(chx_hash_ring, {
|
||||||
%% a {resource, bucket} pair
|
%% a resource
|
||||||
%% where bucket is a non-negative integer
|
exchange,
|
||||||
id,
|
%% a map of bucket => queue | exchange
|
||||||
%% a resource
|
bucket_map,
|
||||||
queue
|
next_bucket_number
|
||||||
}).
|
|
||||||
|
|
||||||
-record(bucket_count, {
|
|
||||||
exchange,
|
|
||||||
count
|
|
||||||
}).
|
|
||||||
|
|
||||||
-record(binding_buckets, {
|
|
||||||
%% an {exchange, queue} pair because we
|
|
||||||
%% assume that there's only one binding between
|
|
||||||
%% a consistent hash exchange and a queue
|
|
||||||
id,
|
|
||||||
bucket_numbers = []
|
|
||||||
}).
|
}).
|
||||||
|
|
||||||
-rabbit_boot_step(
|
-rabbit_boot_step(
|
||||||
|
|
@ -66,17 +53,9 @@
|
||||||
{enables, external_infrastructure}]}).
|
{enables, external_infrastructure}]}).
|
||||||
|
|
||||||
%% This data model allows for efficient routing and exchange deletion
|
%% This data model allows for efficient routing and exchange deletion
|
||||||
%% but not efficient binding management. This is a future area of improvement.
|
%% but less efficient (linear) binding management.
|
||||||
%% A couple of alternatives were considered, e.g. storing the entire ring state
|
|
||||||
%% in a single map. Without an additional structure such as a balanced tree
|
|
||||||
%% ring updates would be even less efficient (but easier to follow).
|
|
||||||
|
|
||||||
%% maps buckets to queues
|
-define(HASH_RING_STATE_TABLE, rabbit_exchange_type_consistent_hash_ring_state).
|
||||||
-define(BUCKET_TABLE, rabbit_exchange_type_consistent_hash_bucket_queue).
|
|
||||||
%% maps exchange to total the number of buckets
|
|
||||||
-define(BUCKET_COUNT_TABLE, rabbit_exchange_type_consistent_hash_bucket_count).
|
|
||||||
%% maps {exchange, queue} pairs to a list of buckets
|
|
||||||
-define(BINDING_BUCKET_TABLE, rabbit_exchange_type_consistent_hash_binding_bucket).
|
|
||||||
|
|
||||||
-define(PROPERTIES, [<<"correlation_id">>, <<"message_id">>, <<"timestamp">>]).
|
-define(PROPERTIES, [<<"correlation_id">>, <<"message_id">>, <<"timestamp">>]).
|
||||||
|
|
||||||
|
|
@ -91,21 +70,26 @@ description() ->
|
||||||
|
|
||||||
serialise_events() -> false.
|
serialise_events() -> false.
|
||||||
|
|
||||||
route(#exchange { name = Name,
|
route(#exchange {name = Name,
|
||||||
arguments = Args },
|
arguments = Args},
|
||||||
#delivery { message = Msg }) ->
|
#delivery {message = Msg}) ->
|
||||||
case ets:lookup(?BUCKET_COUNT_TABLE, Name) of
|
case ets:lookup(?HASH_RING_STATE_TABLE, Name) of
|
||||||
[] ->
|
[] ->
|
||||||
[];
|
[];
|
||||||
[#bucket_count{count = N}] ->
|
[#chx_hash_ring{bucket_map = BM}] ->
|
||||||
K = value_to_hash(hash_on(Args), Msg),
|
case maps:size(BM) of
|
||||||
SelectedBucket = jump_consistent_hash(K, N),
|
0 -> [];
|
||||||
case mnesia:dirty_read({?BUCKET_TABLE, {Name, SelectedBucket}}) of
|
N ->
|
||||||
[Bucket] -> [Bucket#bucket.queue];
|
K = value_to_hash(hash_on(Args), Msg),
|
||||||
[] -> rabbit_log:warning("Bucket ~p not found", [SelectedBucket]),
|
SelectedBucket = jump_consistent_hash(K, N),
|
||||||
[]
|
|
||||||
end
|
|
||||||
|
|
||||||
|
case maps:get(SelectedBucket, BM, undefined) of
|
||||||
|
undefined ->
|
||||||
|
rabbit_log:warning("Bucket ~p not found", [SelectedBucket]),
|
||||||
|
[];
|
||||||
|
Queue -> [Queue]
|
||||||
|
end
|
||||||
|
end
|
||||||
end.
|
end.
|
||||||
|
|
||||||
validate(#exchange { arguments = Args }) ->
|
validate(#exchange { arguments = Args }) ->
|
||||||
|
|
@ -139,63 +123,64 @@ validate_binding(_X, #binding { key = K }) ->
|
||||||
{error, {binding_invalid, "The binding key must be an integer: ~p", [K]}}
|
{error, {binding_invalid, "The binding key must be an integer: ~p", [K]}}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
create(_Tx, _X) -> ok.
|
maybe_initialise_hash_ring_state(transaction, X) ->
|
||||||
|
case mnesia:read(?HASH_RING_STATE_TABLE, X) of
|
||||||
|
[_] -> ok;
|
||||||
|
[] ->
|
||||||
|
mnesia:write_lock_table(?HASH_RING_STATE_TABLE),
|
||||||
|
ok = mnesia:write(?HASH_RING_STATE_TABLE, #chx_hash_ring{
|
||||||
|
exchange = X,
|
||||||
|
next_bucket_number = 0,
|
||||||
|
bucket_map = #{}}, write)
|
||||||
|
end;
|
||||||
|
|
||||||
|
maybe_initialise_hash_ring_state(_, X) ->
|
||||||
|
rabbit_misc:execute_mnesia_transaction(
|
||||||
|
fun() -> maybe_initialise_hash_ring_state(transaction, X) end).
|
||||||
|
|
||||||
|
create(transaction, X) ->
|
||||||
|
maybe_initialise_hash_ring_state(transaction, X);
|
||||||
|
create(Tx, X) ->
|
||||||
|
maybe_initialise_hash_ring_state(Tx, X).
|
||||||
|
|
||||||
delete(transaction, #exchange{name = Name}, _Bs) ->
|
delete(transaction, #exchange{name = Name}, _Bs) ->
|
||||||
ok = mnesia:write_lock_table(?BUCKET_TABLE),
|
mnesia:write_lock_table(?HASH_RING_STATE_TABLE),
|
||||||
ok = mnesia:write_lock_table(?BUCKET_COUNT_TABLE),
|
|
||||||
|
|
||||||
Numbers = mnesia:select(?BUCKET_TABLE, [{
|
ok = mnesia:delete({?HASH_RING_STATE_TABLE, Name});
|
||||||
#bucket{id = {Name, '$1'}, _ = '_'},
|
|
||||||
[],
|
|
||||||
['$1']
|
|
||||||
}]),
|
|
||||||
[mnesia:delete({?BUCKET_TABLE, {Name, N}})
|
|
||||||
|| N <- Numbers],
|
|
||||||
|
|
||||||
Queues = mnesia:select(?BINDING_BUCKET_TABLE,
|
|
||||||
[{
|
|
||||||
#binding_buckets{id = {Name, '$1'}, _ = '_'},
|
|
||||||
[],
|
|
||||||
['$1']
|
|
||||||
}]),
|
|
||||||
[mnesia:delete({?BINDING_BUCKET_TABLE, {Name, Q}})
|
|
||||||
|| Q <- Queues],
|
|
||||||
|
|
||||||
mnesia:delete({?BUCKET_COUNT_TABLE, Name}),
|
|
||||||
ok;
|
|
||||||
delete(_Tx, _X, _Bs) ->
|
delete(_Tx, _X, _Bs) ->
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
policy_changed(_X1, _X2) -> ok.
|
policy_changed(_X1, _X2) -> ok.
|
||||||
|
|
||||||
add_binding(transaction, _X,
|
add_binding(transaction, X,
|
||||||
#binding{source = S, destination = D, key = K}) ->
|
B = #binding{source = S, destination = D, key = K}) ->
|
||||||
Weight = rabbit_data_coercion:to_integer(K),
|
Weight = rabbit_data_coercion:to_integer(K),
|
||||||
|
|
||||||
mnesia:write_lock_table(?BUCKET_TABLE),
|
mnesia:write_lock_table(?HASH_RING_STATE_TABLE),
|
||||||
mnesia:write_lock_table(?BUCKET_COUNT_TABLE),
|
|
||||||
|
|
||||||
LastBucketNum = bucket_count_of(S),
|
case mnesia:read(?HASH_RING_STATE_TABLE, S) of
|
||||||
NewBucketCount = LastBucketNum + Weight,
|
[State0 = #chx_hash_ring{bucket_map = BM0,
|
||||||
|
next_bucket_number = NexN0}] ->
|
||||||
|
NextN = NexN0 + Weight,
|
||||||
|
%% hi/lo bucket counters are 0-based but weight is 1-based
|
||||||
|
Range = lists:seq(NexN0, (NextN - 1)),
|
||||||
|
BM = lists:foldl(fun(Key, Acc) ->
|
||||||
|
maps:put(Key, D, Acc)
|
||||||
|
end, BM0, Range),
|
||||||
|
State = State0#chx_hash_ring{bucket_map = BM,
|
||||||
|
next_bucket_number = NextN},
|
||||||
|
|
||||||
Numbers = lists:seq(LastBucketNum, (NewBucketCount - 1)),
|
ok = mnesia:write(?HASH_RING_STATE_TABLE, State, write),
|
||||||
Buckets = [#bucket{id = {S, I}, queue = D} || I <- Numbers],
|
ok;
|
||||||
|
[] ->
|
||||||
[ok = mnesia:write(?BUCKET_TABLE, B, write) || B <- Buckets],
|
maybe_initialise_hash_ring_state(transaction, S),
|
||||||
|
add_binding(transaction, X, B)
|
||||||
mnesia:write(?BINDING_BUCKET_TABLE, #binding_buckets{id = {S, D},
|
end;
|
||||||
bucket_numbers = Numbers}, write),
|
|
||||||
mnesia:write(?BUCKET_COUNT_TABLE, #bucket_count{exchange = S,
|
|
||||||
count = NewBucketCount}, write),
|
|
||||||
|
|
||||||
ok;
|
|
||||||
add_binding(none, _X, _B) ->
|
add_binding(none, _X, _B) ->
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
remove_bindings(transaction, _X, Bindings) ->
|
remove_bindings(transaction, _X, Bindings) ->
|
||||||
mnesia:write_lock_table(?BUCKET_TABLE),
|
mnesia:write_lock_table(?HASH_RING_STATE_TABLE),
|
||||||
mnesia:write_lock_table(?BUCKET_COUNT_TABLE),
|
|
||||||
|
|
||||||
[remove_binding(B) || B <- Bindings],
|
[remove_binding(B) || B <- Bindings],
|
||||||
|
|
||||||
|
|
@ -203,78 +188,52 @@ remove_bindings(transaction, _X, Bindings) ->
|
||||||
remove_bindings(none, _X, _Bs) ->
|
remove_bindings(none, _X, _Bs) ->
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
remove_binding(#binding{source = S, destination = D, key = K}) ->
|
remove_binding(#binding{source = S, destination = D, key = RK}) ->
|
||||||
Weight = rabbit_data_coercion:to_integer(K),
|
Weight = rabbit_data_coercion:to_integer(RK),
|
||||||
|
|
||||||
[#binding_buckets{bucket_numbers = Numbers}] = mnesia:read(?BINDING_BUCKET_TABLE, {S, D}),
|
mnesia:write_lock_table(?HASH_RING_STATE_TABLE),
|
||||||
LastNum = lists:last(Numbers),
|
|
||||||
|
|
||||||
%% Delete all buckets for this {exchange, queue} pair
|
case mnesia:read(?HASH_RING_STATE_TABLE, S) of
|
||||||
[ok = mnesia:delete(?BUCKET_TABLE, {S, N}, write) || N <- Numbers],
|
[State0 = #chx_hash_ring{bucket_map = BM0,
|
||||||
|
next_bucket_number = NexN0}] ->
|
||||||
|
%% Buckets with lower numbers stay as is; buckets that
|
||||||
|
%% belong to this binding are removed; buckets with
|
||||||
|
%% greater numbers are updated (their numbers are adjusted downwards by weight)
|
||||||
|
BucketsOfThisBinding = maps:filter(fun (_K, V) -> V =:= D end, BM0),
|
||||||
|
LastBucket = lists:last(maps:keys(BucketsOfThisBinding)),
|
||||||
|
BucketsDownTheRing = maps:filter(fun (K, _) -> K > LastBucket end, BM0),
|
||||||
|
|
||||||
%% Buckets with lower numbers stay as is; buckets that
|
%% hash ring state without the buckets of this binding
|
||||||
%% belong to this binding are removed; buckets with
|
BM1 = maps:fold(fun(K, _, Acc) -> maps:remove(K, Acc) end, BM0, BucketsOfThisBinding),
|
||||||
%% greater numbers are updated (their numbers are adjusted downwards by weight)
|
%% final state with "down the ring" buckets updated
|
||||||
BucketsToUpdate = mnesia:select(?BUCKET_TABLE, [{
|
BM2 = maps:fold(fun(K0, V, Acc) ->
|
||||||
#bucket{id = {S, '$1'}, _ = '_'},
|
M = maps:remove(K0, Acc),
|
||||||
[
|
maps:put(K0 - Weight, V, M)
|
||||||
{'>', '$1', LastNum}
|
end, BM1, BucketsDownTheRing),
|
||||||
],
|
|
||||||
['$_']
|
|
||||||
}]),
|
|
||||||
QueuesWithUpdatedBuckets = lists:usort([Q || #bucket{queue = Q} <- BucketsToUpdate]),
|
|
||||||
[ok = mnesia:delete(?BUCKET_TABLE, Id, write) || #bucket{id = Id} <- BucketsToUpdate],
|
|
||||||
|
|
||||||
UpdatedBuckets = [B#bucket{id = {X, N - Weight}} || B = #bucket{id = {X, N}} <- BucketsToUpdate],
|
NextN = NexN0 - Weight,
|
||||||
[ok = mnesia:write(?BUCKET_TABLE, B, write) || B <- UpdatedBuckets],
|
State = State0#chx_hash_ring{bucket_map = BM2,
|
||||||
|
next_bucket_number = NextN},
|
||||||
|
|
||||||
%% There will be no buckets for this {exchange, queue} pair to track
|
ok = mnesia:write(?HASH_RING_STATE_TABLE, State, write),
|
||||||
ok = mnesia:delete(?BINDING_BUCKET_TABLE, {S, D}, write),
|
|
||||||
|
|
||||||
%% Update the counter
|
ok;
|
||||||
TotalBucketsForX = bucket_count_of(S),
|
[] ->
|
||||||
mnesia:write(?BUCKET_COUNT_TABLE, #bucket_count{exchange = S,
|
rabbit_log:warning("Can't remove binding: hash ring state for exchange ~s wasn't found",
|
||||||
count = TotalBucketsForX - Weight}, write),
|
[rabbit_misc:rs(S)]),
|
||||||
|
ok
|
||||||
%% Update bucket numbers
|
end.
|
||||||
[begin
|
|
||||||
case mnesia:read(?BINDING_BUCKET_TABLE, {S, Q}) of
|
|
||||||
[] -> ok;
|
|
||||||
[Val = #binding_buckets{bucket_numbers = BNs}] ->
|
|
||||||
NewBNs = [N - Weight || N <- BNs],
|
|
||||||
ok = mnesia:write(?BINDING_BUCKET_TABLE, Val#binding_buckets{bucket_numbers = NewBNs}, write)
|
|
||||||
end
|
|
||||||
end || Q <- QueuesWithUpdatedBuckets],
|
|
||||||
ok = mnesia:delete(?BINDING_BUCKET_TABLE, {S, D}, write),
|
|
||||||
|
|
||||||
ok.
|
|
||||||
|
|
||||||
|
|
||||||
assert_args_equivalence(X, Args) ->
|
assert_args_equivalence(X, Args) ->
|
||||||
rabbit_exchange:assert_args_equivalence(X, Args).
|
rabbit_exchange:assert_args_equivalence(X, Args).
|
||||||
|
|
||||||
bucket_count_of(X) ->
|
|
||||||
case ets:lookup(?BUCKET_COUNT_TABLE, X) of
|
|
||||||
[] -> 0;
|
|
||||||
[#bucket_count{count = N}] -> N
|
|
||||||
end.
|
|
||||||
|
|
||||||
init() ->
|
init() ->
|
||||||
mnesia:create_table(?BUCKET_TABLE, [{record_name, bucket},
|
mnesia:create_table(?HASH_RING_STATE_TABLE, [{record_name, chx_hash_ring},
|
||||||
{attributes, record_info(fields, bucket)},
|
{attributes, record_info(fields, chx_hash_ring)},
|
||||||
{type, ordered_set}]),
|
{type, ordered_set}]),
|
||||||
mnesia:create_table(?BUCKET_COUNT_TABLE, [{record_name, bucket_count},
|
mnesia:add_table_copy(?HASH_RING_STATE_TABLE, node(), ram_copies),
|
||||||
{attributes, record_info(fields, bucket_count)},
|
mnesia:wait_for_tables([?HASH_RING_STATE_TABLE], 30000),
|
||||||
{type, ordered_set}]),
|
|
||||||
mnesia:create_table(?BINDING_BUCKET_TABLE, [{record_name, binding_buckets},
|
|
||||||
{attributes, record_info(fields, binding_buckets)},
|
|
||||||
{type, ordered_set}]),
|
|
||||||
|
|
||||||
mnesia:add_table_copy(?BUCKET_TABLE, node(), ram_copies),
|
|
||||||
mnesia:add_table_copy(?BUCKET_COUNT_TABLE, node(), ram_copies),
|
|
||||||
mnesia:add_table_copy(?BINDING_BUCKET_TABLE, node(), ram_copies),
|
|
||||||
|
|
||||||
mnesia:wait_for_tables([?BUCKET_TABLE], 30000),
|
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue