From 6b2564811a6137f1fe639dee236f2538bb7160b1 Mon Sep 17 00:00:00 2001 From: Kaufman Ng Date: Sun, 7 Aug 2016 14:29:03 -0700 Subject: [PATCH] KAFKA-3479: Add new consumer metrics documentation added new consumer metrics section refactored common metrics into new section updated TOC Author: Kaufman Ng Reviewers: Jason Gustafson , Ewen Cheslack-Postava Closes #1361 from coughman/KAFKA-3479-consumer-metrics-doc --- .gitignore | 1 + docs/documentation.html | 5 + docs/ops.html | 447 +++++++++++++++++++++++++++++----------- 3 files changed, 338 insertions(+), 115 deletions(-) diff --git a/.gitignore b/.gitignore index 73972e6a4f3..b54fcf39fba 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ kafka.iws .vagrant Vagrantfile.local /logs +.DS_Store config/server-* config/zookeeper-* diff --git a/docs/documentation.html b/docs/documentation.html index 31dc03960c4..c3425c0deb0 100644 --- a/docs/documentation.html +++ b/docs/documentation.html @@ -110,6 +110,11 @@ Prior releases: 0.7.x, Ext4 Notes
  • 6.6 Monitoring +
  • 6.7 ZooKeeper
    • Stable Version diff --git a/docs/ops.html b/docs/ops.html index a8e72e7a58a..a9161b7ab63 100644 --- a/docs/ops.html +++ b/docs/ops.html @@ -689,6 +689,149 @@ We do graphing and alerting on the following metrics: +

      Common monitoring metrics for producer/consumer/connect

      + +The following metrics are available on producer/consumer/connector instances. For specific metrics, please see following sections. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Metric/Attribute nameDescriptionMbean name
      connection-close-rateConnections closed per second in the window.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      connection-creation-rateNew connections established per second in the window.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      network-io-rateThe average number of network operations (reads or writes) on all connections per second.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      outgoing-byte-rateThe average number of outgoing bytes sent per second to all servers.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      request-rateThe average number of requests sent per second.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      request-size-avgThe average size of all requests in the window.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      request-size-maxThe maximum size of any request sent in the window.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      incoming-byte-rateBytes/second read off all sockets.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      response-rateResponses received sent per second.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      select-rateNumber of times the I/O layer checked for new I/O to perform per second.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      io-wait-time-ns-avgThe average length of time the I/O thread spent waiting for a socket ready for reads or writes in nanoseconds.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      io-wait-ratioThe fraction of time the I/O thread spent waiting.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      io-time-ns-avgThe average length of time for I/O per select call in nanoseconds.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      io-ratioThe fraction of time the I/O thread spent doing I/O.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      connection-countThe current number of active connections.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
      + +

      Common Per-broker metrics for producer/consumer/connect

      + +The following metrics are available on producer/consumer/connector instances. For specific metrics, please see following sections. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Metric/Attribute nameDescriptionMbean name
      outgoing-byte-rateThe average number of outgoing bytes sent per second for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
      request-rateThe average number of requests sent per second for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
      request-size-avgThe average size of all requests in the window for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
      request-size-maxThe maximum size of any request sent in the window for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
      incoming-byte-rateThe average number of responses received per second for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
      request-latency-avgThe average request latency in ms for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
      request-latency-maxThe maximum request latency in ms for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
      response-rateResponses received sent per second for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
      +

      New producer monitoring

      The following metrics are available on new producer instances. @@ -794,121 +937,7 @@ The following metrics are available on new producer instances. The age in seconds of the current producer metadata being used. kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - connection-close-rate - Connections closed per second in the window. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - connection-creation-rate - New connections established per second in the window. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - network-io-rate - The average number of network operations (reads or writes) on all connections per second. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - outgoing-byte-rate - The average number of outgoing bytes sent per second to all servers. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - request-rate - The average number of requests sent per second. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - request-size-avg - The average size of all requests in the window. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - request-size-max - The maximum size of any request sent in the window. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - incoming-byte-rate - Bytes/second read off all sockets. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - response-rate - Responses received sent per second. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - select-rate - Number of times the I/O layer checked for new I/O to perform per second. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - io-wait-time-ns-avg - The average length of time the I/O thread spent waiting for a socket ready for reads or writes in nanoseconds. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - io-wait-ratio - The fraction of time the I/O thread spent waiting. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - io-time-ns-avg - The average length of time for I/O per select call in nanoseconds. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - io-ratio - The fraction of time the I/O thread spent doing I/O. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - connection-count - The current number of active connections. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) - - - outgoing-byte-rate - The average number of outgoing bytes sent per second for a node. - kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+) - - - request-rate - The average number of requests sent per second for a node. - kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+) - - - request-size-avg - The average size of all requests in the window for a node. - kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+) - - - request-size-max - The maximum size of any request sent in the window for a node. - kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+) - - - incoming-byte-rate - The average number of responses received per second for a node. - kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+) - - - request-latency-avg - The average request latency in ms for a node. - kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+) - - - request-latency-max - The maximum request latency in ms for a node. - kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+) - - - response-rate - Responses received sent per second for a node. - kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+) - + record-send-rate The average number of records sent per second for a topic. @@ -946,6 +975,194 @@ The following metrics are available on new producer instances. + +

      New consumer monitoring

      + +The following metrics are available on new consumer instances. + +
      Consumer Group Metrics
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Metric/Attribute nameDescriptionMbean name
      commit-latency-avgThe average time taken for a commit requestkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
      commit-latency-maxThe max time taken for a commit requestkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
      commit-rateThe number of commit calls per secondkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
      assigned-partitionsThe number of partitions currently assigned to this consumerkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
      heartbeat-response-time-maxThe max time taken to receive a response to a heartbeat requestkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
      heartbeat-rateThe average number of heartbeats per secondkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
      join-time-avgThe average time taken for a group rejoinkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
      join-time-maxThe max time taken for a group rejoinkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
      join-rateThe number of group joins per secondkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
      sync-time-avgThe average time taken for a group synckafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
      sync-time-maxThe max time taken for a group synckafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
      sync-rateThe number of group syncs per secondkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
      last-heartbeat-seconds-agoThe number of seconds since the last controller heartbeatkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
      + +
      Consumer Fetch Metrics
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Metric/Attribute nameDescriptionMbean name
      fetch-size-avgThe average number of bytes fetched per requestkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
      fetch-size-maxThe maximum number of bytes fetched per requestkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
      bytes-consumed-rateThe average number of bytes consumed per secondkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
      records-per-request-avgThe average number of records in each requestkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
      records-consumed-rateThe average number of records consumed per secondkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
      fetch-latency-avgThe average time taken for a fetch requestkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
      fetch-latency-maxThe max time taken for a fetch requestkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
      fetch-rateThe number of fetch requests per secondkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
      records-lag-maxThe maximum lag in terms of number of records for any partition in this windowkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
      fetch-throttle-time-avgThe average throttle time in mskafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
      fetch-throttle-time-maxThe maximum throttle time in mskafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
      + + +
      Topic-level Fetch Metrics
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Metric/Attribute nameDescriptionMbean name
      fetch-size-avgThe average number of bytes fetched per request for a specific topic.kafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+),topic=([-.\w]+)
      fetch-size-maxThe maximum number of bytes fetched per request for a specific topic.kafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+),topic=([-.\w]+)
      bytes-consumed-rateThe average number of bytes consumed per second for a specific topic.kafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+),topic=([-.\w]+)
      records-per-request-avgThe average number of records in each request for a specific topic.kafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+),topic=([-.\w]+)
      records-consumed-rateThe average number of records consumed per second for a specific topic.kafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+),topic=([-.\w]+)
      + +
      Others
      + We recommend monitoring GC time and other stats and various server stats such as CPU utilization, I/O service time, etc. On the client side, we recommend monitoring the message/byte rate (global and per topic), request rate/size/time, and on the consumer side, max lag in messages among all partitions and min fetch request rate. For a consumer to keep up, max lag needs to be less than a threshold and min fetch rate needs to be larger than 0.