Add first version of RabbitMQ Raft metrics

Depends on https://github.com/rabbitmq/ra/tree/metrics_tweaks &
https://github.com/rabbitmq/rabbitmq-server/tree/qq_metrics_tweak

[#166819045]
This commit is contained in:
Gerhard Lazu 2019-06-20 20:11:31 +01:00
parent 31aa440bc4
commit 5e280c0281
9 changed files with 564 additions and 2 deletions

View File

@ -297,6 +297,10 @@ RUN rm /plugins/rabbitmq_management*.ez
COPY plugins/rabbitmq_management*.ez /plugins/ COPY plugins/rabbitmq_management*.ez /plugins/
RUN rm /plugins/rabbitmq_prometheus*.ez RUN rm /plugins/rabbitmq_prometheus*.ez
COPY plugins/rabbitmq_prometheus*.ez /plugins/ COPY plugins/rabbitmq_prometheus*.ez /plugins/
RUN rm /plugins/ra-*.ez
COPY plugins/ra-*.ez /plugins/
RUN rm /opt/rabbitmq/ebin/rabbit_quorum_queue.beam
COPY plugins/rabbit_quorum_queue.beam /opt/rabbitmq/ebin/
ARG RABBITMQ_PROMETHEUS_VERSION ARG RABBITMQ_PROMETHEUS_VERSION
RUN chmod --recursive --verbose a+r /plugins/*.ez && \ RUN chmod --recursive --verbose a+r /plugins/*.ez && \

View File

@ -132,6 +132,7 @@ DASHBOARDS_TO_PATH := $(CURDIR)/docker/grafana/dashboards
.PHONY: update_dashboards .PHONY: update_dashboards
update_dashboards: update_dashboards:
mv -fv $(DASHBOARDS_FROM_PATH)/RabbitMQ-Overview-*.json $(DASHBOARDS_TO_PATH)/RabbitMQ-Overview.json ; \ mv -fv $(DASHBOARDS_FROM_PATH)/RabbitMQ-Overview-*.json $(DASHBOARDS_TO_PATH)/RabbitMQ-Overview.json ; \
mv -fv $(DASHBOARDS_FROM_PATH)/RabbitMQ-Raft-*.json $(DASHBOARDS_TO_PATH)/RabbitMQ-Raft.json ; \
mv -fv $(DASHBOARDS_FROM_PATH)/Erlang-Distribution-*.json $(DASHBOARDS_TO_PATH)/Erlang-Distribution.json ; \ mv -fv $(DASHBOARDS_FROM_PATH)/Erlang-Distribution-*.json $(DASHBOARDS_TO_PATH)/Erlang-Distribution.json ; \
mv -fv $(DASHBOARDS_FROM_PATH)/Erlang-Memory-Allocators-*.json $(DASHBOARDS_TO_PATH)/Erlang-Memory-Allocators.json ; \ mv -fv $(DASHBOARDS_FROM_PATH)/Erlang-Memory-Allocators-*.json $(DASHBOARDS_TO_PATH)/Erlang-Memory-Allocators.json ; \
true true

View File

@ -0,0 +1,64 @@
# https://docs.docker.com/compose/compose-file/
version: "3.6"
# https://docs.docker.com/compose/compose-file/#networks
networks:
rabbitmq-prometheus:
# https://docs.docker.com/compose/compose-file/#volumes
volumes:
rabbitmq-prometheus_prometheus:
rabbitmq-prometheus_grafana:
services:
rmq0-qq: &rabbitmq
# https://hub.docker.com/r/pivotalrabbitmq/rabbitmq-prometheus/tags
image: pivotalrabbitmq/rabbitmq-prometheus:3.8.0-alpha.700-2019.06.20
networks:
- "rabbitmq-prometheus"
ports:
- "5679:5672"
- "15679:15672"
- "15699:15692"
# https://unix.stackexchange.com/questions/71940/killing-tcp-connection-in-linux
# https://en.wikipedia.org/wiki/Tcpkill
# https://www.digitalocean.com/community/tutorials/iptables-essentials-common-firewall-rules-and-commands#block-an-ip-address
cap_add:
- ALL
hostname: rmq0-qq
environment:
RABBITMQ_ERLANG_COOKIE: rabbitmq-qq
volumes:
- ./rabbitmq-qq.conf:/etc/rabbitmq/rabbitmq.conf:ro
- ./rabbitmq-qq-definitions.json:/etc/rabbitmq/rabbitmq-definitions.json:ro
rmq1-qq:
<< : *rabbitmq
hostname: rmq1-qq
ports:
- "5680:5672"
- "15680:15672"
- "15700:15692"
rmq2-qq:
<< : *rabbitmq
hostname: rmq2-qq
ports:
- "5681:5672"
- "15681:15672"
- "15701:15692"
stress-test-qq:
image: &perf-test-image pivotalrabbitmq/perf-test:dev-2019.06.10
networks:
- "rabbitmq-prometheus"
environment:
URIS: "amqp://guest:guest@rmq0-qq:5672/%2f,amqp://guest:guest@rmq1-qq:5672/%2f,amqp://guest:guest@rmq1-qq:5672/%2f"
QUEUE_PATTERN: "qq%d"
QUEUE_PATTERN_FROM: 1
QUEUE_PATTERN_TO: 10
PRODUCERS: 10
CONSUMERS: 10
QUEUE_ARGS: x-queue-type=quorum,x-max-length=1000
FLAG: persistent
AUTO_DELETE: "false"
RATE: 10
AUTOACK: "false"
SERVERS_STARTUP_TIMEOUT: &startup_timeout 30

View File

@ -0,0 +1,436 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "RabbitMQ Raft metrics",
"editable": true,
"gnetId": null,
"graphTooltip": 1,
"id": 3,
"iteration": 1561057199575,
"links": [],
"panels": [
{
"aliasColors": {},
"bars": true,
"cacheTimeout": null,
"dashLength": 10,
"dashes": false,
"description": "",
"fill": 10,
"gridPos": {
"h": 11,
"w": 24,
"x": 0,
"y": 0
},
"id": 62,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"hideEmpty": false,
"hideZero": true,
"max": true,
"min": false,
"rightSide": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": true,
"values": true
},
"lines": false,
"linewidth": 0,
"links": [],
"nullPointMode": "null as zero",
"options": {},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(rabbitmq_raft_log_last_written_index{cluster=\"$cluster\"} - rabbitmq_raft_log_commit_index{cluster=\"$cluster\"}) by(queue)",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"legendFormat": "{{queue}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Uncommitted entries",
"tooltip": {
"shared": true,
"sort": 1,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": null,
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"cacheTimeout": null,
"cards": {
"cardPadding": null,
"cardRound": null
},
"color": {
"cardColor": "#37872D",
"colorScale": "sqrt",
"colorScheme": "interpolateRdYlGn",
"exponent": 0.3,
"mode": "opacity"
},
"dataFormat": "timeseries",
"description": "",
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 11
},
"heatmap": {},
"hideZeroBuckets": false,
"highlightCards": true,
"id": 64,
"legend": {
"show": false
},
"links": [],
"options": {},
"reverseYBuckets": false,
"targets": [
{
"expr": "rate(rabbitmq_raft_log_commit_index{cluster=\"$cluster\"}[$__interval])",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Entries committed / s",
"tooltip": {
"show": true,
"showHistogram": true
},
"type": "heatmap",
"xAxis": {
"show": true
},
"xBucketNumber": null,
"xBucketSize": null,
"yAxis": {
"decimals": null,
"format": "short",
"logBase": 1,
"max": null,
"min": null,
"show": true,
"splitFactor": null
},
"yBucketBound": "auto",
"yBucketNumber": null,
"yBucketSize": null
},
{
"aliasColors": {},
"bars": true,
"cacheTimeout": null,
"dashLength": 10,
"dashes": false,
"description": "",
"fill": 10,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 11
},
"id": 63,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": false,
"rightSide": false,
"show": false,
"total": false,
"values": true
},
"lines": false,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"options": {},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(rabbitmq_raft_term[30s])) by(queue)",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"legendFormat": "{{queue}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Number of elections / s",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": null,
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": true,
"cacheTimeout": null,
"dashLength": 10,
"dashes": false,
"description": "",
"fill": 0,
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 18
},
"id": 18,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": false,
"total": false,
"values": true
},
"lines": false,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"options": {},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Number of entries in the log",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": null,
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": "5s",
"schemaVersion": 18,
"style": "dark",
"tags": [
"pivotal",
"prometheus",
"rabbitmq"
],
"templating": {
"list": [
{
"allValue": null,
"current": {
"selected": true,
"text": "rabbitmq-qq",
"value": "rabbitmq-qq"
},
"datasource": "prometheus",
"definition": "label_values(rabbitmq_memory_used_bytes,cluster)",
"hide": 0,
"includeAll": false,
"label": "Cluster",
"multi": false,
"name": "cluster",
"options": [],
"query": "label_values(rabbitmq_memory_used_bytes,cluster)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-3h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"15s",
"30s",
"1m",
"5m",
"10m"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "RabbitMQ-Raft",
"uid": "f1Mee9nZz",
"version": 18
}

View File

@ -39,3 +39,6 @@ scrape_configs:
- 'rmq0-dist-tls:15692' - 'rmq0-dist-tls:15692'
- 'rmq1-dist-tls:15692' - 'rmq1-dist-tls:15692'
- 'rmq2-dist-tls:15692' - 'rmq2-dist-tls:15692'
- 'rmq0-qq:15692'
- 'rmq1-qq:15692'
- 'rmq2-qq:15692'

View File

@ -0,0 +1,23 @@
{
"global_parameters": [
{"name": "cluster_name", "value": "rabbitmq-qq"}
],
"permissions": [
{
"configure": ".*",
"read": ".*",
"user": "guest",
"vhost": "/",
"write": ".*"
}
],
"users": [
{
"hashing_algorithm": "rabbit_password_hashing_sha256",
"name": "guest",
"password_hash": "hENva+fxJ7gnmaBK/WhwNHOYbvB53/QjNcqhtF4KqF7p21+x",
"tags": "administrator"
}
],
"vhosts": [{"name": "/"}]
}

View File

@ -0,0 +1,16 @@
# https://github.com/rabbitmq/rabbitmq-server/blob/master/docs/rabbitmq.conf.example
loopback_users.guest = false
listeners.tcp.default = 5672
management.listener.port = 15672
management.listener.ssl = false
vm_memory_high_watermark.absolute = 256MiB
cluster_formation.peer_discovery_backend = rabbit_peer_discovery_classic_config
cluster_formation.classic_config.nodes.1 = rabbit@rmq0-qq
cluster_formation.classic_config.nodes.2 = rabbit@rmq1-qq
cluster_formation.classic_config.nodes.3 = rabbit@rmq2-qq
management.load_definitions = /etc/rabbitmq/rabbitmq-definitions.json
# background_gc_enabled = true

View File

@ -147,6 +147,14 @@
{2, queue_index_journal_write, counter, "Queue Index Journal write operations", queue_index_journal_write_count} {2, queue_index_journal_write, counter, "Queue Index Journal write operations", queue_index_journal_write_count}
]}, ]},
{ra_metrics, [
{2, raft_term, counter, "Raft member term"},
{3, raft_log_snapshot_index, counter, "Raft log snapshot index"},
{4, raft_log_last_applied_index, counter, "Raft log last applied index"},
{5, raft_log_commit_index, counter, "Raft log commit index"},
{6, raft_log_last_written_index, counter, "Raft log last written index"}
]},
{queue_coarse_metrics, [ {queue_coarse_metrics, [
{2, queue_messages_ready, gauge, "Messages ready to be delivered to consumers"}, {2, queue_messages_ready, gauge, "Messages ready to be delivered to consumers"},
{3, queue_messages_unacked, gauge, "Messages delivered to consumers but not yet acknowledged"}, {3, queue_messages_unacked, gauge, "Messages delivered to consumers but not yet acknowledged"},

View File

@ -78,8 +78,14 @@ init_per_group(with_metrics, Config0) ->
Ch = rabbit_ct_client_helpers:open_channel(Config3, A), Ch = rabbit_ct_client_helpers:open_channel(Config3, A),
Q = <<"prometheus_test_queue">>, Q = <<"prometheus_test_queue">>,
amqp_channel:call(Ch, #'queue.declare'{queue = Q}), amqp_channel:call(Ch,
amqp_channel:cast(Ch, #'basic.publish'{routing_key = Q}, #amqp_msg{payload = <<"msg">>}), #'queue.declare'{queue = Q,
durable = true,
arguments = [{<<"x-queue-type">>, longstr, <<"quorum">>}]
}),
amqp_channel:cast(Ch,
#'basic.publish'{routing_key = Q},
#amqp_msg{payload = <<"msg">>}),
timer:sleep(150), timer:sleep(150),
{#'basic.get_ok'{}, #amqp_msg{}} = amqp_channel:call(Ch, #'basic.get'{queue = Q}), {#'basic.get_ok'{}, #amqp_msg{}} = amqp_channel:call(Ch, #'basic.get'{queue = Q}),
timer:sleep(10000), timer:sleep(10000),
@ -167,6 +173,7 @@ metrics_test(Config) ->
?assertEqual(match, re:run(Body, "rabbitmq_file_descriptors_open", [{capture, none}])), ?assertEqual(match, re:run(Body, "rabbitmq_file_descriptors_open", [{capture, none}])),
?assertEqual(match, re:run(Body, "rabbitmq_file_descriptors_open_limit", [{capture, none}])), ?assertEqual(match, re:run(Body, "rabbitmq_file_descriptors_open_limit", [{capture, none}])),
?assertEqual(match, re:run(Body, "rabbitmq_io_read", [{capture, none}])), ?assertEqual(match, re:run(Body, "rabbitmq_io_read", [{capture, none}])),
?assertEqual(match, re:run(Body, "rabbitmq_raft_term", [{capture, none}])),
?assertEqual(match, re:run(Body, "rabbitmq_queue_messages_ready", [{capture, none}])), ?assertEqual(match, re:run(Body, "rabbitmq_queue_messages_ready", [{capture, none}])),
?assertEqual(match, re:run(Body, "rabbitmq_queue_consumers", [{capture, none}])), ?assertEqual(match, re:run(Body, "rabbitmq_queue_consumers", [{capture, none}])),
%% Checking the first TOTALS metric %% Checking the first TOTALS metric