diff --git a/deps/rabbitmq_prometheus/docker/grafana/dashboards/RabbitMQ-Quorum-Queues-Raft.json b/deps/rabbitmq_prometheus/docker/grafana/dashboards/RabbitMQ-Quorum-Queues-Raft.json index 55135804e1..751f8ef88c 100644 --- a/deps/rabbitmq_prometheus/docker/grafana/dashboards/RabbitMQ-Quorum-Queues-Raft.json +++ b/deps/rabbitmq_prometheus/docker/grafana/dashboards/RabbitMQ-Quorum-Queues-Raft.json @@ -43,7 +43,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1575376605605, + "iteration": 1578410270904, "links": [ { "icon": "doc", @@ -227,7 +227,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "rabbitmq_raft_entry_commit_latency * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) rabbitmq_identity_info{rabbitmq_cluster=\"$rabbitmq_cluster\"}", + "expr": "rabbitmq_raft_entry_commit_latency_seconds * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) rabbitmq_identity_info{rabbitmq_cluster=\"$rabbitmq_cluster\"}", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -250,7 +250,7 @@ "xBucketSize": null, "yAxis": { "decimals": null, - "format": "ms", + "format": "s", "logBase": 1, "max": null, "min": "0", diff --git a/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl b/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl index 0b87926b73..fbbed8f01e 100644 --- a/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl +++ b/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl @@ -129,7 +129,7 @@ {2, disk_space_available_limit_bytes, gauge, "Free disk space low watermark in bytes", disk_free_limit}, {2, erlang_processes_limit, gauge, "Erlang processes limit", proc_total}, {2, erlang_scheduler_run_queue, gauge, "Erlang scheduler run queue", run_queue}, - {2, erlang_net_ticktime_seconds, gauge, "Inter-node heartbeat interval in seconds", net_ticktime} + {2, erlang_net_ticktime_seconds, gauge, "Inter-node heartbeat interval", net_ticktime} ]}, {node_persister_metrics, [ @@ -155,8 +155,7 @@ {3, raft_log_snapshot_index, gauge, "Raft log snapshot index"}, {4, raft_log_last_applied_index, gauge, "Raft log last applied index"}, {5, raft_log_commit_index, gauge, "Raft log commit index"}, - {6, raft_log_last_written_index, gauge, "Raft log last written index"}, - {7, raft_entry_commit_latency, gauge, "Time taken for an entry to be committed"} + {6, raft_log_last_written_index, gauge, "Raft log last written index"} ]}, {queue_coarse_metrics, [ @@ -201,11 +200,13 @@ {2, 1000000, io_sync_time_seconds_total, counter, "Total I/O sync time", io_sync_time}, {2, 1000000, io_seek_time_seconds_total, counter, "Total I/O seek time", io_seek_time}, {2, 1000000, io_open_attempt_time_seconds_total, counter, "Total file open attempts time", io_file_handle_open_attempt_time} + ]}, + + {ra_metrics, [ + {7, 1000, raft_entry_commit_latency_seconds, gauge, "Time taken for a log entry to be committed"} ]} ]). --define(METRICS, ?METRICS_RAW ++ ?METRICS_REQUIRING_CONVERSIONS). - -define(TOTALS, [ %% ordering differs from metrics above, refer to list comprehension {connection_created, connections, gauge, "Connections currently open"}, @@ -227,7 +228,11 @@ collect_mf(_Registry, Callback) -> [begin Data = ets:tab2list(Table), mf(Callback, Contents, Data) - end || {Table, Contents} <- ?METRICS], + end || {Table, Contents} <- ?METRICS_RAW], + [begin + Data = ets:tab2list(Table), + mf_convert(Callback, Contents, Data) + end || {Table, Contents} <- ?METRICS_REQUIRING_CONVERSIONS], [begin Size = ets:info(Table, size), mf_totals(Callback, Name, Type, Help, Size) @@ -295,7 +300,21 @@ mf(Callback, Contents, Data) -> {Type, Fun, Data} ) ) - end || {Index, Name, Type, Help, Key} <- Contents], + end || {Index, Name, Type, Help, Key} <- Contents]. + +mf_convert(Callback, Contents, Data) -> + [begin + Fun = fun(D) -> element(Index, D) / BaseUnitConversionFactor end, + Callback( + create_mf( + ?METRIC_NAME(Name), + Help, + catch_boolean(Type), + ?MODULE, + {Type, Fun, Data} + ) + ) + end || {Index, BaseUnitConversionFactor, Name, Type, Help} <- Contents], [begin Fun = fun(D) -> proplists:get_value(Key, element(Index, D)) / BaseUnitConversionFactor end, Callback( diff --git a/deps/rabbitmq_prometheus/test/rabbit_prometheus_http_SUITE.erl b/deps/rabbitmq_prometheus/test/rabbit_prometheus_http_SUITE.erl index e63b74cbf1..8482620cbc 100644 --- a/deps/rabbitmq_prometheus/test/rabbit_prometheus_http_SUITE.erl +++ b/deps/rabbitmq_prometheus/test/rabbit_prometheus_http_SUITE.erl @@ -198,6 +198,7 @@ metrics_test(Config) -> %% Checking the first metric value in each ETS table that requires converting ?assertEqual(match, re:run(Body, "^rabbitmq_erlang_uptime_seconds ", [{capture, none}, multiline])), ?assertEqual(match, re:run(Body, "^rabbitmq_io_read_time_seconds_total ", [{capture, none}, multiline])), + ?assertEqual(match, re:run(Body, "^rabbitmq_raft_entry_commit_latency_seconds{", [{capture, none}, multiline])), %% Checking the first TOTALS metric value ?assertEqual(match, re:run(Body, "^rabbitmq_connections ", [{capture, none}, multiline])).