MINOR: Remove deprecated per-partition lag metrics

It takes O(n^2) time to instantiate a mbean with n attributes which can be very slow if the number of attributes of this mbean is large. This PR removes metrics whose number of attributes can grow with the number of partitions in the cluster to fix the performance issue. These metrics have already been marked for removal in 2.0 by KIP-225.

Author: Dong Lin <lindong28@gmail.com>

Reviewers: Ismael Juma <ismael@juma.me.uk>

Closes #5172 from lindong28/remove-deprecated-metrics
This commit is contained in:
Dong Lin 2018-06-11 23:32:30 -07:00
parent f0b1b46486
commit 4580d9f16a
5 changed files with 32 additions and 67 deletions

View File

@ -1447,16 +1447,6 @@ public class Fetcher<K, V> implements SubscriptionState.Listener, Closeable {
recordsLag.add(this.metrics.metricInstance(metricsRegistry.partitionRecordsLag, metricTags), new Value());
recordsLag.add(this.metrics.metricInstance(metricsRegistry.partitionRecordsLagMax, metricTags), new Max());
recordsLag.add(this.metrics.metricInstance(metricsRegistry.partitionRecordsLagAvg, metricTags), new Avg());
recordsLag.add(this.metrics.metricName(name,
metricsRegistry.partitionRecordsLagDeprecated.group(),
metricsRegistry.partitionRecordsLagDeprecated.description()), new Value());
recordsLag.add(this.metrics.metricName(name + "-max",
metricsRegistry.partitionRecordsLagMaxDeprecated.group(),
metricsRegistry.partitionRecordsLagMaxDeprecated.description()), new Max());
recordsLag.add(this.metrics.metricName(name + "-avg",
metricsRegistry.partitionRecordsLagAvgDeprecated.group(),
metricsRegistry.partitionRecordsLagAvgDeprecated.description()), new Avg());
}
recordsLag.record(lag);
}

View File

@ -54,10 +54,6 @@ public class FetcherMetricsRegistry {
public MetricNameTemplate partitionRecordsLead;
public MetricNameTemplate partitionRecordsLeadMin;
public MetricNameTemplate partitionRecordsLeadAvg;
// To remove in 2.0
public MetricNameTemplate partitionRecordsLagDeprecated;
public MetricNameTemplate partitionRecordsLagMaxDeprecated;
public MetricNameTemplate partitionRecordsLagAvgDeprecated;
public FetcherMetricsRegistry() {
this(new HashSet<String>(), "");
@ -128,13 +124,6 @@ public class FetcherMetricsRegistry {
this.topicRecordsConsumedTotal = new MetricNameTemplate("records-consumed-total", groupName,
"The total number of records consumed for a topic", topicTags);
this.partitionRecordsLagDeprecated = new MetricNameTemplate("{topic}-{partition}.records-lag", groupName,
"The latest lag of the partition (DEPRECATED use the tag based version instead)", tags);
this.partitionRecordsLagMaxDeprecated = new MetricNameTemplate("{topic}-{partition}.records-lag-max", groupName,
"The max lag of the partition (DEPRECATED use the tag based version instead)", tags);
this.partitionRecordsLagAvgDeprecated = new MetricNameTemplate("{topic}-{partition}.records-lag-avg", groupName,
"The average lag of the partition (DEPRECATED use the tag based version instead)", tags);
/***** Partition level *****/
Set<String> partitionTags = new HashSet<>(topicTags);
partitionTags.add("partition");
@ -177,9 +166,6 @@ public class FetcherMetricsRegistry {
topicRecordsPerRequestAvg,
topicRecordsConsumedRate,
topicRecordsConsumedTotal,
partitionRecordsLagDeprecated,
partitionRecordsLagAvgDeprecated,
partitionRecordsLagMaxDeprecated,
partitionRecordsLag,
partitionRecordsLagAvg,
partitionRecordsLagMax,

View File

@ -1529,7 +1529,6 @@ public class FetcherTest {
tags.put("topic", tp0.topic());
tags.put("partition", String.valueOf(tp0.partition()));
MetricName partitionLagMetric = metrics.metricName("records-lag", metricGroup, tags);
MetricName partitionLagMetricDeprecated = metrics.metricName(tp0 + ".records-lag", metricGroup);
Map<MetricName, KafkaMetric> allMetrics = metrics.metrics();
KafkaMetric recordsFetchLagMax = allMetrics.get(maxLagMetric);
@ -1544,9 +1543,6 @@ public class FetcherTest {
KafkaMetric partitionLag = allMetrics.get(partitionLagMetric);
assertEquals(50, partitionLag.value(), EPSILON);
KafkaMetric partitionLagDeprecated = allMetrics.get(partitionLagMetricDeprecated);
assertEquals(50, partitionLagDeprecated.value(), EPSILON);
// recordsFetchLagMax should be lso - offset of the last message after receiving a non-empty FetchResponse
MemoryRecordsBuilder builder = MemoryRecords.builder(ByteBuffer.allocate(1024), CompressionType.NONE,
TimestampType.CREATE_TIME, 0L);
@ -1559,7 +1555,6 @@ public class FetcherTest {
// verify de-registration of partition lag
subscriptions.unsubscribe();
assertFalse(allMetrics.containsKey(partitionLagMetric));
assertFalse(allMetrics.containsKey(partitionLagMetricDeprecated));
}
@Test

View File

@ -1534,10 +1534,6 @@ class PlaintextConsumerTest extends BaseConsumerTest {
val fetchLag = consumer.metrics.get(new MetricName("records-lag", "consumer-fetch-manager-metrics", "", tags))
assertNotNull(fetchLag)
val oldTags = Collections.singletonMap("client-id", "testPerPartitionLagMetricsCleanUpWithAssign")
val oldFetchLag = consumer.metrics.get(new MetricName(tp + ".records-lag", "consumer-fetch-manager-metrics", "", oldTags))
assertEquals(fetchLag.metricValue(), oldFetchLag.metricValue())
val expectedLag = numMessages - records.count
assertEquals(s"The lag should be $expectedLag", expectedLag, fetchLag.value, epsilon)
@ -1594,15 +1590,12 @@ class PlaintextConsumerTest extends BaseConsumerTest {
records = consumer.poll(100)
!records.isEmpty
}, "Consumer did not consume any message before timeout.")
val oldTags = Collections.singletonMap("client-id", "testPerPartitionLagWithMaxPollRecords")
val oldLag = consumer.metrics.get(new MetricName(tp + ".records-lag", "consumer-fetch-manager-metrics", "", oldTags))
val tags = new util.HashMap[String, String]()
tags.put("client-id", "testPerPartitionLagWithMaxPollRecords")
tags.put("topic", tp.topic())
tags.put("partition", String.valueOf(tp.partition()))
val lag = consumer.metrics.get(new MetricName("records-lag", "consumer-fetch-manager-metrics", "", tags))
assertEquals(oldLag.metricValue(), lag.metricValue())
assertEquals(s"The lag should be ${numMessages - records.count}", numMessages - records.count, lag.value, epsilon)
} finally {

View File

@ -80,6 +80,7 @@
JMX monitoring tools that do not automatically aggregate. To get the total count for a specific request type, the tool needs to be
updated to aggregate across different versions.
</li>
<li><a href="https://cwiki.apache.org/confluence/x/uaBzB">KIP-225</a> changed the metric "records.lag" to use tags for topic and partition. The original version with the name format "{topic}-{partition}.records-lag" has been removed.</li>
<li>The Scala producers, which have been deprecated since 0.10.0.0, have been removed. The Java producer has been the recommended option
since 0.9.0.0. Note that the behaviour of the default partitioner in the Java producer differs from the default partitioner
in the Scala producers. Users migrating should consider configuring a custom partitioner that retains the previous behaviour.</li>