Convert raft_entry_commit_latency to seconds & be explicit about unit
This is a follow-up to https://github.com/rabbitmq/ra/pull/160 Had to introduce mf_convert/3 so that METRICS_REQUIRING_CONVERSIONS proplist does not clash with METRICS_RAW proplists that have the same number of elements. This is begging to be refactored, but I know that @dcorbacho is working on https://github.com/rabbitmq/rabbitmq-prometheus/issues/26 Also modified the RabbitMQ-Quorum-Queues-Raft dashboard Signed-off-by: Gerhard Lazu <gerhard@lazu.co.uk>
This commit is contained in:
		
							parent
							
								
									5602a9eb4c
								
							
						
					
					
						commit
						89efb964d9
					
				| 
						 | 
					@ -43,7 +43,7 @@
 | 
				
			||||||
  "gnetId": null,
 | 
					  "gnetId": null,
 | 
				
			||||||
  "graphTooltip": 1,
 | 
					  "graphTooltip": 1,
 | 
				
			||||||
  "id": null,
 | 
					  "id": null,
 | 
				
			||||||
  "iteration": 1575376605605,
 | 
					  "iteration": 1578410270904,
 | 
				
			||||||
  "links": [
 | 
					  "links": [
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      "icon": "doc",
 | 
					      "icon": "doc",
 | 
				
			||||||
| 
						 | 
					@ -227,7 +227,7 @@
 | 
				
			||||||
      "reverseYBuckets": false,
 | 
					      "reverseYBuckets": false,
 | 
				
			||||||
      "targets": [
 | 
					      "targets": [
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
          "expr": "rabbitmq_raft_entry_commit_latency * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) rabbitmq_identity_info{rabbitmq_cluster=\"$rabbitmq_cluster\"}",
 | 
					          "expr": "rabbitmq_raft_entry_commit_latency_seconds * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) rabbitmq_identity_info{rabbitmq_cluster=\"$rabbitmq_cluster\"}",
 | 
				
			||||||
          "format": "time_series",
 | 
					          "format": "time_series",
 | 
				
			||||||
          "instant": false,
 | 
					          "instant": false,
 | 
				
			||||||
          "intervalFactor": 1,
 | 
					          "intervalFactor": 1,
 | 
				
			||||||
| 
						 | 
					@ -250,7 +250,7 @@
 | 
				
			||||||
      "xBucketSize": null,
 | 
					      "xBucketSize": null,
 | 
				
			||||||
      "yAxis": {
 | 
					      "yAxis": {
 | 
				
			||||||
        "decimals": null,
 | 
					        "decimals": null,
 | 
				
			||||||
        "format": "ms",
 | 
					        "format": "s",
 | 
				
			||||||
        "logBase": 1,
 | 
					        "logBase": 1,
 | 
				
			||||||
        "max": null,
 | 
					        "max": null,
 | 
				
			||||||
        "min": "0",
 | 
					        "min": "0",
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -129,7 +129,7 @@
 | 
				
			||||||
        {2, disk_space_available_limit_bytes, gauge, "Free disk space low watermark in bytes", disk_free_limit},
 | 
					        {2, disk_space_available_limit_bytes, gauge, "Free disk space low watermark in bytes", disk_free_limit},
 | 
				
			||||||
        {2, erlang_processes_limit, gauge, "Erlang processes limit", proc_total},
 | 
					        {2, erlang_processes_limit, gauge, "Erlang processes limit", proc_total},
 | 
				
			||||||
        {2, erlang_scheduler_run_queue, gauge, "Erlang scheduler run queue", run_queue},
 | 
					        {2, erlang_scheduler_run_queue, gauge, "Erlang scheduler run queue", run_queue},
 | 
				
			||||||
        {2, erlang_net_ticktime_seconds, gauge, "Inter-node heartbeat interval in seconds", net_ticktime}
 | 
					        {2, erlang_net_ticktime_seconds, gauge, "Inter-node heartbeat interval", net_ticktime}
 | 
				
			||||||
    ]},
 | 
					    ]},
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    {node_persister_metrics, [
 | 
					    {node_persister_metrics, [
 | 
				
			||||||
| 
						 | 
					@ -155,8 +155,7 @@
 | 
				
			||||||
        {3, raft_log_snapshot_index, gauge, "Raft log snapshot index"},
 | 
					        {3, raft_log_snapshot_index, gauge, "Raft log snapshot index"},
 | 
				
			||||||
        {4, raft_log_last_applied_index, gauge, "Raft log last applied index"},
 | 
					        {4, raft_log_last_applied_index, gauge, "Raft log last applied index"},
 | 
				
			||||||
        {5, raft_log_commit_index, gauge, "Raft log commit index"},
 | 
					        {5, raft_log_commit_index, gauge, "Raft log commit index"},
 | 
				
			||||||
        {6, raft_log_last_written_index, gauge, "Raft log last written index"},
 | 
					        {6, raft_log_last_written_index, gauge, "Raft log last written index"}
 | 
				
			||||||
        {7, raft_entry_commit_latency, gauge, "Time taken for an entry to be committed"}
 | 
					 | 
				
			||||||
    ]},
 | 
					    ]},
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    {queue_coarse_metrics, [
 | 
					    {queue_coarse_metrics, [
 | 
				
			||||||
| 
						 | 
					@ -201,11 +200,13 @@
 | 
				
			||||||
        {2, 1000000, io_sync_time_seconds_total, counter, "Total I/O sync time", io_sync_time},
 | 
					        {2, 1000000, io_sync_time_seconds_total, counter, "Total I/O sync time", io_sync_time},
 | 
				
			||||||
        {2, 1000000, io_seek_time_seconds_total, counter, "Total I/O seek time", io_seek_time},
 | 
					        {2, 1000000, io_seek_time_seconds_total, counter, "Total I/O seek time", io_seek_time},
 | 
				
			||||||
        {2, 1000000, io_open_attempt_time_seconds_total, counter, "Total file open attempts time", io_file_handle_open_attempt_time}
 | 
					        {2, 1000000, io_open_attempt_time_seconds_total, counter, "Total file open attempts time", io_file_handle_open_attempt_time}
 | 
				
			||||||
 | 
					    ]},
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    {ra_metrics, [
 | 
				
			||||||
 | 
					        {7, 1000, raft_entry_commit_latency_seconds, gauge, "Time taken for a log entry to be committed"}
 | 
				
			||||||
    ]}
 | 
					    ]}
 | 
				
			||||||
]).
 | 
					]).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-define(METRICS, ?METRICS_RAW ++ ?METRICS_REQUIRING_CONVERSIONS).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-define(TOTALS, [
 | 
					-define(TOTALS, [
 | 
				
			||||||
    %% ordering differs from metrics above, refer to list comprehension
 | 
					    %% ordering differs from metrics above, refer to list comprehension
 | 
				
			||||||
    {connection_created, connections, gauge, "Connections currently open"},
 | 
					    {connection_created, connections, gauge, "Connections currently open"},
 | 
				
			||||||
| 
						 | 
					@ -227,7 +228,11 @@ collect_mf(_Registry, Callback) ->
 | 
				
			||||||
    [begin
 | 
					    [begin
 | 
				
			||||||
         Data = ets:tab2list(Table),
 | 
					         Data = ets:tab2list(Table),
 | 
				
			||||||
         mf(Callback, Contents, Data)
 | 
					         mf(Callback, Contents, Data)
 | 
				
			||||||
     end || {Table, Contents} <- ?METRICS],
 | 
					     end || {Table, Contents} <- ?METRICS_RAW],
 | 
				
			||||||
 | 
					    [begin
 | 
				
			||||||
 | 
					         Data = ets:tab2list(Table),
 | 
				
			||||||
 | 
					         mf_convert(Callback, Contents, Data)
 | 
				
			||||||
 | 
					     end || {Table, Contents} <- ?METRICS_REQUIRING_CONVERSIONS],
 | 
				
			||||||
    [begin
 | 
					    [begin
 | 
				
			||||||
         Size = ets:info(Table, size),
 | 
					         Size = ets:info(Table, size),
 | 
				
			||||||
         mf_totals(Callback, Name, Type, Help, Size)
 | 
					         mf_totals(Callback, Name, Type, Help, Size)
 | 
				
			||||||
| 
						 | 
					@ -295,7 +300,21 @@ mf(Callback, Contents, Data) ->
 | 
				
			||||||
                {Type, Fun, Data}
 | 
					                {Type, Fun, Data}
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
    end || {Index, Name, Type, Help, Key} <- Contents],
 | 
					    end || {Index, Name, Type, Help, Key} <- Contents].
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					mf_convert(Callback, Contents, Data) ->
 | 
				
			||||||
 | 
					    [begin
 | 
				
			||||||
 | 
					        Fun = fun(D) -> element(Index, D) / BaseUnitConversionFactor end,
 | 
				
			||||||
 | 
					        Callback(
 | 
				
			||||||
 | 
					            create_mf(
 | 
				
			||||||
 | 
					                ?METRIC_NAME(Name),
 | 
				
			||||||
 | 
					                Help,
 | 
				
			||||||
 | 
					                catch_boolean(Type),
 | 
				
			||||||
 | 
					                ?MODULE,
 | 
				
			||||||
 | 
					                {Type, Fun, Data}
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					    end || {Index, BaseUnitConversionFactor, Name, Type, Help} <- Contents],
 | 
				
			||||||
    [begin
 | 
					    [begin
 | 
				
			||||||
        Fun = fun(D) -> proplists:get_value(Key, element(Index, D)) / BaseUnitConversionFactor end,
 | 
					        Fun = fun(D) -> proplists:get_value(Key, element(Index, D)) / BaseUnitConversionFactor end,
 | 
				
			||||||
        Callback(
 | 
					        Callback(
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -198,6 +198,7 @@ metrics_test(Config) ->
 | 
				
			||||||
    %% Checking the first metric value in each ETS table that requires converting
 | 
					    %% Checking the first metric value in each ETS table that requires converting
 | 
				
			||||||
    ?assertEqual(match, re:run(Body, "^rabbitmq_erlang_uptime_seconds ", [{capture, none}, multiline])),
 | 
					    ?assertEqual(match, re:run(Body, "^rabbitmq_erlang_uptime_seconds ", [{capture, none}, multiline])),
 | 
				
			||||||
    ?assertEqual(match, re:run(Body, "^rabbitmq_io_read_time_seconds_total ", [{capture, none}, multiline])),
 | 
					    ?assertEqual(match, re:run(Body, "^rabbitmq_io_read_time_seconds_total ", [{capture, none}, multiline])),
 | 
				
			||||||
 | 
					    ?assertEqual(match, re:run(Body, "^rabbitmq_raft_entry_commit_latency_seconds{", [{capture, none}, multiline])),
 | 
				
			||||||
    %% Checking the first TOTALS metric value
 | 
					    %% Checking the first TOTALS metric value
 | 
				
			||||||
    ?assertEqual(match, re:run(Body, "^rabbitmq_connections ", [{capture, none}, multiline])).
 | 
					    ?assertEqual(match, re:run(Body, "^rabbitmq_connections ", [{capture, none}, multiline])).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue