Convert raft_entry_commit_latency to seconds & be explicit about unit
This is a follow-up to https://github.com/rabbitmq/ra/pull/160 Had to introduce mf_convert/3 so that METRICS_REQUIRING_CONVERSIONS proplist does not clash with METRICS_RAW proplists that have the same number of elements. This is begging to be refactored, but I know that @dcorbacho is working on https://github.com/rabbitmq/rabbitmq-prometheus/issues/26 Also modified the RabbitMQ-Quorum-Queues-Raft dashboard Signed-off-by: Gerhard Lazu <gerhard@lazu.co.uk>
This commit is contained in:
parent
5602a9eb4c
commit
89efb964d9
|
|
@ -43,7 +43,7 @@
|
|||
"gnetId": null,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"iteration": 1575376605605,
|
||||
"iteration": 1578410270904,
|
||||
"links": [
|
||||
{
|
||||
"icon": "doc",
|
||||
|
|
@ -227,7 +227,7 @@
|
|||
"reverseYBuckets": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rabbitmq_raft_entry_commit_latency * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) rabbitmq_identity_info{rabbitmq_cluster=\"$rabbitmq_cluster\"}",
|
||||
"expr": "rabbitmq_raft_entry_commit_latency_seconds * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) rabbitmq_identity_info{rabbitmq_cluster=\"$rabbitmq_cluster\"}",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
|
|
@ -250,7 +250,7 @@
|
|||
"xBucketSize": null,
|
||||
"yAxis": {
|
||||
"decimals": null,
|
||||
"format": "ms",
|
||||
"format": "s",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": "0",
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@
|
|||
{2, disk_space_available_limit_bytes, gauge, "Free disk space low watermark in bytes", disk_free_limit},
|
||||
{2, erlang_processes_limit, gauge, "Erlang processes limit", proc_total},
|
||||
{2, erlang_scheduler_run_queue, gauge, "Erlang scheduler run queue", run_queue},
|
||||
{2, erlang_net_ticktime_seconds, gauge, "Inter-node heartbeat interval in seconds", net_ticktime}
|
||||
{2, erlang_net_ticktime_seconds, gauge, "Inter-node heartbeat interval", net_ticktime}
|
||||
]},
|
||||
|
||||
{node_persister_metrics, [
|
||||
|
|
@ -155,8 +155,7 @@
|
|||
{3, raft_log_snapshot_index, gauge, "Raft log snapshot index"},
|
||||
{4, raft_log_last_applied_index, gauge, "Raft log last applied index"},
|
||||
{5, raft_log_commit_index, gauge, "Raft log commit index"},
|
||||
{6, raft_log_last_written_index, gauge, "Raft log last written index"},
|
||||
{7, raft_entry_commit_latency, gauge, "Time taken for an entry to be committed"}
|
||||
{6, raft_log_last_written_index, gauge, "Raft log last written index"}
|
||||
]},
|
||||
|
||||
{queue_coarse_metrics, [
|
||||
|
|
@ -201,11 +200,13 @@
|
|||
{2, 1000000, io_sync_time_seconds_total, counter, "Total I/O sync time", io_sync_time},
|
||||
{2, 1000000, io_seek_time_seconds_total, counter, "Total I/O seek time", io_seek_time},
|
||||
{2, 1000000, io_open_attempt_time_seconds_total, counter, "Total file open attempts time", io_file_handle_open_attempt_time}
|
||||
]},
|
||||
|
||||
{ra_metrics, [
|
||||
{7, 1000, raft_entry_commit_latency_seconds, gauge, "Time taken for a log entry to be committed"}
|
||||
]}
|
||||
]).
|
||||
|
||||
-define(METRICS, ?METRICS_RAW ++ ?METRICS_REQUIRING_CONVERSIONS).
|
||||
|
||||
-define(TOTALS, [
|
||||
%% ordering differs from metrics above, refer to list comprehension
|
||||
{connection_created, connections, gauge, "Connections currently open"},
|
||||
|
|
@ -227,7 +228,11 @@ collect_mf(_Registry, Callback) ->
|
|||
[begin
|
||||
Data = ets:tab2list(Table),
|
||||
mf(Callback, Contents, Data)
|
||||
end || {Table, Contents} <- ?METRICS],
|
||||
end || {Table, Contents} <- ?METRICS_RAW],
|
||||
[begin
|
||||
Data = ets:tab2list(Table),
|
||||
mf_convert(Callback, Contents, Data)
|
||||
end || {Table, Contents} <- ?METRICS_REQUIRING_CONVERSIONS],
|
||||
[begin
|
||||
Size = ets:info(Table, size),
|
||||
mf_totals(Callback, Name, Type, Help, Size)
|
||||
|
|
@ -295,7 +300,21 @@ mf(Callback, Contents, Data) ->
|
|||
{Type, Fun, Data}
|
||||
)
|
||||
)
|
||||
end || {Index, Name, Type, Help, Key} <- Contents],
|
||||
end || {Index, Name, Type, Help, Key} <- Contents].
|
||||
|
||||
mf_convert(Callback, Contents, Data) ->
|
||||
[begin
|
||||
Fun = fun(D) -> element(Index, D) / BaseUnitConversionFactor end,
|
||||
Callback(
|
||||
create_mf(
|
||||
?METRIC_NAME(Name),
|
||||
Help,
|
||||
catch_boolean(Type),
|
||||
?MODULE,
|
||||
{Type, Fun, Data}
|
||||
)
|
||||
)
|
||||
end || {Index, BaseUnitConversionFactor, Name, Type, Help} <- Contents],
|
||||
[begin
|
||||
Fun = fun(D) -> proplists:get_value(Key, element(Index, D)) / BaseUnitConversionFactor end,
|
||||
Callback(
|
||||
|
|
|
|||
|
|
@ -198,6 +198,7 @@ metrics_test(Config) ->
|
|||
%% Checking the first metric value in each ETS table that requires converting
|
||||
?assertEqual(match, re:run(Body, "^rabbitmq_erlang_uptime_seconds ", [{capture, none}, multiline])),
|
||||
?assertEqual(match, re:run(Body, "^rabbitmq_io_read_time_seconds_total ", [{capture, none}, multiline])),
|
||||
?assertEqual(match, re:run(Body, "^rabbitmq_raft_entry_commit_latency_seconds{", [{capture, none}, multiline])),
|
||||
%% Checking the first TOTALS metric value
|
||||
?assertEqual(match, re:run(Body, "^rabbitmq_connections ", [{capture, none}, multiline])).
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue