mirror of https://github.com/apache/kafka.git
KAFKA-14486 Move LogCleanerManager to storage module (#19216)
Move LogCleanerManager and related classes to storage module and rewrite in Java. Reviewers: TengYao Chi <kitingiao@gmail.com>, Jun Rao <junrao@gmail.com>, Mickael Maison <mickael.maison@gmail.com>, Chia-Ping Tsai <chia7712@gmail.com>
This commit is contained in:
parent
eb88e78373
commit
84b8fec089
|
@ -80,6 +80,7 @@
|
||||||
<subpackage name="storage.internals">
|
<subpackage name="storage.internals">
|
||||||
<allow pkg="kafka.server"/>
|
<allow pkg="kafka.server"/>
|
||||||
<allow pkg="kafka.log"/>
|
<allow pkg="kafka.log"/>
|
||||||
|
<allow pkg="kafka.utils"/>
|
||||||
<allow pkg="com.fasterxml.jackson" />
|
<allow pkg="com.fasterxml.jackson" />
|
||||||
<allow pkg="com.yammer.metrics.core" />
|
<allow pkg="com.yammer.metrics.core" />
|
||||||
<allow pkg="org.apache.kafka.common" />
|
<allow pkg="org.apache.kafka.common" />
|
||||||
|
|
|
@ -21,11 +21,11 @@ import java.io.{File, IOException}
|
||||||
import java.lang.{Long => JLong}
|
import java.lang.{Long => JLong}
|
||||||
import java.nio._
|
import java.nio._
|
||||||
import java.util
|
import java.util
|
||||||
import java.util.Date
|
import java.util.{Date, Optional}
|
||||||
import java.util.concurrent.TimeUnit
|
import java.util.concurrent.TimeUnit
|
||||||
import kafka.log.LogCleaner.{CleanerRecopyPercentMetricName, DeadThreadCountMetricName, MaxBufferUtilizationPercentMetricName, MaxCleanTimeMetricName, MaxCompactionDelayMetricsName}
|
import kafka.log.LogCleaner.{CleanerRecopyPercentMetricName, DeadThreadCountMetricName, MaxBufferUtilizationPercentMetricName, MaxCleanTimeMetricName, MaxCompactionDelayMetricsName}
|
||||||
import kafka.server.{BrokerReconfigurable, KafkaConfig}
|
import kafka.server.{BrokerReconfigurable, KafkaConfig}
|
||||||
import kafka.utils.{Logging, Pool}
|
import kafka.utils.Logging
|
||||||
import org.apache.kafka.common.{KafkaException, TopicPartition}
|
import org.apache.kafka.common.{KafkaException, TopicPartition}
|
||||||
import org.apache.kafka.common.config.ConfigException
|
import org.apache.kafka.common.config.ConfigException
|
||||||
import org.apache.kafka.common.errors.{CorruptRecordException, KafkaStorageException}
|
import org.apache.kafka.common.errors.{CorruptRecordException, KafkaStorageException}
|
||||||
|
@ -36,12 +36,13 @@ import org.apache.kafka.common.utils.{BufferSupplier, Time}
|
||||||
import org.apache.kafka.server.config.ServerConfigs
|
import org.apache.kafka.server.config.ServerConfigs
|
||||||
import org.apache.kafka.server.metrics.KafkaMetricsGroup
|
import org.apache.kafka.server.metrics.KafkaMetricsGroup
|
||||||
import org.apache.kafka.server.util.ShutdownableThread
|
import org.apache.kafka.server.util.ShutdownableThread
|
||||||
import org.apache.kafka.storage.internals.log.{AbortedTxn, CleanerConfig, LastRecord, LogCleaningAbortedException, LogDirFailureChannel, LogSegment, LogSegmentOffsetOverflowException, OffsetMap, SkimpyOffsetMap, ThreadShutdownException, TransactionIndex, UnifiedLog}
|
import org.apache.kafka.storage.internals.log.{AbortedTxn, CleanerConfig, LastRecord, LogCleanerManager, LogCleaningAbortedException, LogCleaningException, LogDirFailureChannel, LogSegment, LogSegmentOffsetOverflowException, LogToClean, OffsetMap, PreCleanStats, SkimpyOffsetMap, ThreadShutdownException, TransactionIndex, UnifiedLog}
|
||||||
import org.apache.kafka.storage.internals.utils.Throttler
|
import org.apache.kafka.storage.internals.utils.Throttler
|
||||||
|
|
||||||
import scala.jdk.CollectionConverters._
|
import scala.jdk.CollectionConverters._
|
||||||
import scala.collection.mutable.ListBuffer
|
import scala.collection.mutable.ListBuffer
|
||||||
import scala.collection.{Iterable, Seq, Set, mutable}
|
import scala.collection.{Iterable, Seq, Set, mutable}
|
||||||
|
import scala.jdk.OptionConverters.{RichOption, RichOptional}
|
||||||
import scala.util.control.ControlThrowable
|
import scala.util.control.ControlThrowable
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -93,13 +94,13 @@ import scala.util.control.ControlThrowable
|
||||||
*
|
*
|
||||||
* @param initialConfig Initial configuration parameters for the cleaner. Actual config may be dynamically updated.
|
* @param initialConfig Initial configuration parameters for the cleaner. Actual config may be dynamically updated.
|
||||||
* @param logDirs The directories where offset checkpoints reside
|
* @param logDirs The directories where offset checkpoints reside
|
||||||
* @param logs The pool of logs
|
* @param logs The map of logs
|
||||||
* @param logDirFailureChannel The channel used to add offline log dirs that may be encountered when cleaning the log
|
* @param logDirFailureChannel The channel used to add offline log dirs that may be encountered when cleaning the log
|
||||||
* @param time A way to control the passage of time
|
* @param time A way to control the passage of time
|
||||||
*/
|
*/
|
||||||
class LogCleaner(initialConfig: CleanerConfig,
|
class LogCleaner(initialConfig: CleanerConfig,
|
||||||
val logDirs: Seq[File],
|
val logDirs: Seq[File],
|
||||||
val logs: Pool[TopicPartition, UnifiedLog],
|
val logs: util.concurrent.ConcurrentMap[TopicPartition, UnifiedLog],
|
||||||
val logDirFailureChannel: LogDirFailureChannel,
|
val logDirFailureChannel: LogDirFailureChannel,
|
||||||
time: Time = Time.SYSTEM) extends Logging with BrokerReconfigurable {
|
time: Time = Time.SYSTEM) extends Logging with BrokerReconfigurable {
|
||||||
// Visible for test.
|
// Visible for test.
|
||||||
|
@ -109,7 +110,7 @@ class LogCleaner(initialConfig: CleanerConfig,
|
||||||
@volatile private var config = initialConfig
|
@volatile private var config = initialConfig
|
||||||
|
|
||||||
/* for managing the state of partitions being cleaned. package-private to allow access in tests */
|
/* for managing the state of partitions being cleaned. package-private to allow access in tests */
|
||||||
private[log] val cleanerManager = new LogCleanerManager(logDirs, logs, logDirFailureChannel)
|
private[log] val cleanerManager = new LogCleanerManager(logDirs.asJava, logs, logDirFailureChannel)
|
||||||
|
|
||||||
/* a throttle used to limit the I/O of all the cleaner threads to a user-specified maximum rate */
|
/* a throttle used to limit the I/O of all the cleaner threads to a user-specified maximum rate */
|
||||||
private[log] val throttler = new Throttler(config.maxIoBytesPerSecond, 300, "cleaner-io", "bytes", time)
|
private[log] val throttler = new Throttler(config.maxIoBytesPerSecond, 300, "cleaner-io", "bytes", time)
|
||||||
|
@ -249,7 +250,7 @@ class LogCleaner(initialConfig: CleanerConfig,
|
||||||
* @param partitionToRemove The topicPartition to be removed, default none
|
* @param partitionToRemove The topicPartition to be removed, default none
|
||||||
*/
|
*/
|
||||||
def updateCheckpoints(dataDir: File, partitionToRemove: Option[TopicPartition] = None): Unit = {
|
def updateCheckpoints(dataDir: File, partitionToRemove: Option[TopicPartition] = None): Unit = {
|
||||||
cleanerManager.updateCheckpoints(dataDir, partitionToRemove = partitionToRemove)
|
cleanerManager.updateCheckpoints(dataDir, Optional.empty(), partitionToRemove.toJava)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -300,7 +301,7 @@ class LogCleaner(initialConfig: CleanerConfig,
|
||||||
* @param topicPartitions The collection of topicPartitions to be resumed cleaning
|
* @param topicPartitions The collection of topicPartitions to be resumed cleaning
|
||||||
*/
|
*/
|
||||||
def resumeCleaning(topicPartitions: Iterable[TopicPartition]): Unit = {
|
def resumeCleaning(topicPartitions: Iterable[TopicPartition]): Unit = {
|
||||||
cleanerManager.resumeCleaning(topicPartitions)
|
cleanerManager.resumeCleaning(topicPartitions.toList.asJava)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -314,7 +315,7 @@ class LogCleaner(initialConfig: CleanerConfig,
|
||||||
* @return A boolean indicating whether the work has completed before timeout
|
* @return A boolean indicating whether the work has completed before timeout
|
||||||
*/
|
*/
|
||||||
def awaitCleaned(topicPartition: TopicPartition, offset: Long, maxWaitMs: Long = 60000L): Boolean = {
|
def awaitCleaned(topicPartition: TopicPartition, offset: Long, maxWaitMs: Long = 60000L): Boolean = {
|
||||||
def isCleaned = cleanerManager.allCleanerCheckpoints.get(topicPartition).fold(false)(_ >= offset)
|
def isCleaned = Option(cleanerManager.allCleanerCheckpoints.get(topicPartition)).fold(false)(_ >= offset)
|
||||||
var remainingWaitMs = maxWaitMs
|
var remainingWaitMs = maxWaitMs
|
||||||
while (!isCleaned && remainingWaitMs > 0) {
|
while (!isCleaned && remainingWaitMs > 0) {
|
||||||
val sleepTime = math.min(100, remainingWaitMs)
|
val sleepTime = math.min(100, remainingWaitMs)
|
||||||
|
@ -331,7 +332,7 @@ class LogCleaner(initialConfig: CleanerConfig,
|
||||||
* @return A list of log partitions that retention threads can safely work on
|
* @return A list of log partitions that retention threads can safely work on
|
||||||
*/
|
*/
|
||||||
def pauseCleaningForNonCompactedPartitions(): Iterable[(TopicPartition, UnifiedLog)] = {
|
def pauseCleaningForNonCompactedPartitions(): Iterable[(TopicPartition, UnifiedLog)] = {
|
||||||
cleanerManager.pauseCleaningForNonCompactedPartitions()
|
cleanerManager.pauseCleaningForNonCompactedPartitions().asScala.map(entry => (entry.getKey, entry.getValue))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only for testing
|
// Only for testing
|
||||||
|
@ -409,7 +410,7 @@ class LogCleaner(initialConfig: CleanerConfig,
|
||||||
@throws(classOf[LogCleaningException])
|
@throws(classOf[LogCleaningException])
|
||||||
private def cleanFilthiestLog(): Boolean = {
|
private def cleanFilthiestLog(): Boolean = {
|
||||||
val preCleanStats = new PreCleanStats()
|
val preCleanStats = new PreCleanStats()
|
||||||
val ltc = cleanerManager.grabFilthiestCompactedLog(time, preCleanStats)
|
val ltc = cleanerManager.grabFilthiestCompactedLog(time, preCleanStats).toScala
|
||||||
val cleaned = ltc match {
|
val cleaned = ltc match {
|
||||||
case None =>
|
case None =>
|
||||||
false
|
false
|
||||||
|
@ -424,7 +425,7 @@ class LogCleaner(initialConfig: CleanerConfig,
|
||||||
case e: Exception => throw new LogCleaningException(cleanable.log, e.getMessage, e)
|
case e: Exception => throw new LogCleaningException(cleanable.log, e.getMessage, e)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
val deletable: Iterable[(TopicPartition, UnifiedLog)] = cleanerManager.deletableLogs()
|
val deletable = cleanerManager.deletableLogs().asScala
|
||||||
try {
|
try {
|
||||||
deletable.foreach { case (_, log) =>
|
deletable.foreach { case (_, log) =>
|
||||||
try {
|
try {
|
||||||
|
@ -435,7 +436,7 @@ class LogCleaner(initialConfig: CleanerConfig,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
cleanerManager.doneDeleting(deletable.map(_._1))
|
cleanerManager.doneDeleting(deletable.keys.toList.asJava)
|
||||||
}
|
}
|
||||||
|
|
||||||
cleaned
|
cleaned
|
||||||
|
@ -1150,25 +1151,6 @@ private[log] class Cleaner(val id: Int,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* A simple struct for collecting pre-clean stats
|
|
||||||
*/
|
|
||||||
private class PreCleanStats {
|
|
||||||
var maxCompactionDelayMs = 0L
|
|
||||||
var delayedPartitions = 0
|
|
||||||
var cleanablePartitions = 0
|
|
||||||
|
|
||||||
def updateMaxCompactionDelay(delayMs: Long): Unit = {
|
|
||||||
maxCompactionDelayMs = Math.max(maxCompactionDelayMs, delayMs)
|
|
||||||
if (delayMs > 0) {
|
|
||||||
delayedPartitions += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
def recordCleanablePartitions(numOfCleanables: Int): Unit = {
|
|
||||||
cleanablePartitions = numOfCleanables
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A simple struct for collecting stats about log cleaning
|
* A simple struct for collecting stats about log cleaning
|
||||||
*/
|
*/
|
||||||
|
@ -1221,22 +1203,6 @@ private class CleanerStats(time: Time = Time.SYSTEM) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper class for a log, its topic/partition, the first cleanable position, the first uncleanable dirty position,
|
|
||||||
* and whether it needs compaction immediately.
|
|
||||||
*/
|
|
||||||
private case class LogToClean(topicPartition: TopicPartition,
|
|
||||||
log: UnifiedLog,
|
|
||||||
firstDirtyOffset: Long,
|
|
||||||
uncleanableOffset: Long,
|
|
||||||
needCompactionNow: Boolean = false) extends Ordered[LogToClean] {
|
|
||||||
val cleanBytes: Long = log.logSegments(-1, firstDirtyOffset).asScala.map(_.size.toLong).sum
|
|
||||||
val (firstUncleanableOffset, cleanableBytes) = LogCleanerManager.calculateCleanableBytes(log, firstDirtyOffset, uncleanableOffset)
|
|
||||||
val totalBytes: Long = cleanBytes + cleanableBytes
|
|
||||||
val cleanableRatio: Double = cleanableBytes / totalBytes.toDouble
|
|
||||||
override def compare(that: LogToClean): Int = math.signum(this.cleanableRatio - that.cleanableRatio).toInt
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is a helper class to facilitate tracking transaction state while cleaning the log. It maintains a set
|
* This is a helper class to facilitate tracking transaction state while cleaning the log. It maintains a set
|
||||||
* of the ongoing aborted and committed transactions as the cleaner is working its way through the log. This
|
* of the ongoing aborted and committed transactions as the cleaner is working its way through the log. This
|
||||||
|
|
|
@ -1,686 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package kafka.log
|
|
||||||
|
|
||||||
import java.lang.{Long => JLong}
|
|
||||||
import java.io.File
|
|
||||||
import java.util.concurrent.TimeUnit
|
|
||||||
import java.util.concurrent.locks.ReentrantLock
|
|
||||||
import kafka.utils.CoreUtils._
|
|
||||||
import kafka.utils.{Logging, Pool}
|
|
||||||
import org.apache.kafka.common.{KafkaException, TopicPartition}
|
|
||||||
import org.apache.kafka.common.errors.KafkaStorageException
|
|
||||||
import org.apache.kafka.common.utils.Time
|
|
||||||
import org.apache.kafka.storage.internals.checkpoint.OffsetCheckpointFile
|
|
||||||
import org.apache.kafka.storage.internals.log.{LogCleaningAbortedException, LogDirFailureChannel, UnifiedLog}
|
|
||||||
import org.apache.kafka.server.metrics.KafkaMetricsGroup
|
|
||||||
|
|
||||||
import java.util.Comparator
|
|
||||||
import scala.collection.{Iterable, Seq, mutable}
|
|
||||||
import scala.jdk.CollectionConverters._
|
|
||||||
|
|
||||||
private[log] sealed trait LogCleaningState
|
|
||||||
private[log] case object LogCleaningInProgress extends LogCleaningState
|
|
||||||
private[log] case object LogCleaningAborted extends LogCleaningState
|
|
||||||
private[log] case class LogCleaningPaused(pausedCount: Int) extends LogCleaningState
|
|
||||||
|
|
||||||
private[log] class LogCleaningException(val log: UnifiedLog,
|
|
||||||
private val message: String,
|
|
||||||
private val cause: Throwable) extends KafkaException(message, cause)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This class manages the state of each partition being cleaned.
|
|
||||||
* LogCleaningState defines the cleaning states that a TopicPartition can be in.
|
|
||||||
* 1. None : No cleaning state in a TopicPartition. In this state, it can become LogCleaningInProgress
|
|
||||||
* or LogCleaningPaused(1). Valid previous state are LogCleaningInProgress and LogCleaningPaused(1)
|
|
||||||
* 2. LogCleaningInProgress : The cleaning is currently in progress. In this state, it can become None when log cleaning is finished
|
|
||||||
* or become LogCleaningAborted. Valid previous state is None.
|
|
||||||
* 3. LogCleaningAborted : The cleaning abort is requested. In this state, it can become LogCleaningPaused(1).
|
|
||||||
* Valid previous state is LogCleaningInProgress.
|
|
||||||
* 4-a. LogCleaningPaused(1) : The cleaning is paused once. No log cleaning can be done in this state.
|
|
||||||
* In this state, it can become None or LogCleaningPaused(2).
|
|
||||||
* Valid previous state is None, LogCleaningAborted or LogCleaningPaused(2).
|
|
||||||
* 4-b. LogCleaningPaused(i) : The cleaning is paused i times where i>= 2. No log cleaning can be done in this state.
|
|
||||||
* In this state, it can become LogCleaningPaused(i-1) or LogCleaningPaused(i+1).
|
|
||||||
* Valid previous state is LogCleaningPaused(i-1) or LogCleaningPaused(i+1).
|
|
||||||
*/
|
|
||||||
private[log] class LogCleanerManager(val logDirs: Seq[File],
|
|
||||||
val logs: Pool[TopicPartition, UnifiedLog],
|
|
||||||
val logDirFailureChannel: LogDirFailureChannel) extends Logging {
|
|
||||||
import LogCleanerManager._
|
|
||||||
|
|
||||||
private val metricsGroup = new KafkaMetricsGroup(this.getClass)
|
|
||||||
|
|
||||||
protected override def loggerName: String = classOf[LogCleaner].getName
|
|
||||||
|
|
||||||
// package-private for testing
|
|
||||||
private[log] val offsetCheckpointFile = "cleaner-offset-checkpoint"
|
|
||||||
|
|
||||||
/* the offset checkpoints holding the last cleaned point for each log */
|
|
||||||
@volatile private var checkpoints = logDirs.map(dir =>
|
|
||||||
(dir, new OffsetCheckpointFile(new File(dir, offsetCheckpointFile), logDirFailureChannel))).toMap
|
|
||||||
|
|
||||||
/* the set of logs currently being cleaned */
|
|
||||||
private val inProgress = mutable.HashMap[TopicPartition, LogCleaningState]()
|
|
||||||
|
|
||||||
/* the set of uncleanable partitions (partitions that have raised an unexpected error during cleaning)
|
|
||||||
* for each log directory */
|
|
||||||
private val uncleanablePartitions = mutable.HashMap[String, mutable.Set[TopicPartition]]()
|
|
||||||
|
|
||||||
/* a global lock used to control all access to the in-progress set and the offset checkpoints */
|
|
||||||
private val lock = new ReentrantLock
|
|
||||||
|
|
||||||
/* for coordinating the pausing and the cleaning of a partition */
|
|
||||||
private val pausedCleaningCond = lock.newCondition()
|
|
||||||
|
|
||||||
// Visible for testing
|
|
||||||
private[log] val gaugeMetricNameWithTag = new java.util.HashMap[String, java.util.List[java.util.Map[String, String]]]()
|
|
||||||
|
|
||||||
/* gauges for tracking the number of partitions marked as uncleanable for each log directory */
|
|
||||||
for (dir <- logDirs) {
|
|
||||||
val metricTag = Map("logDirectory" -> dir.getAbsolutePath).asJava
|
|
||||||
metricsGroup.newGauge(UncleanablePartitionsCountMetricName,
|
|
||||||
() => inLock(lock) { uncleanablePartitions.get(dir.getAbsolutePath).map(_.size).getOrElse(0) },
|
|
||||||
metricTag
|
|
||||||
)
|
|
||||||
gaugeMetricNameWithTag.computeIfAbsent(UncleanablePartitionsCountMetricName, _ => new java.util.ArrayList[java.util.Map[String, String]]())
|
|
||||||
.add(metricTag)
|
|
||||||
}
|
|
||||||
|
|
||||||
/* gauges for tracking the number of uncleanable bytes from uncleanable partitions for each log directory */
|
|
||||||
for (dir <- logDirs) {
|
|
||||||
val metricTag = Map("logDirectory" -> dir.getAbsolutePath).asJava
|
|
||||||
metricsGroup.newGauge(UncleanableBytesMetricName,
|
|
||||||
() => inLock(lock) {
|
|
||||||
uncleanablePartitions.get(dir.getAbsolutePath) match {
|
|
||||||
case Some(partitions) =>
|
|
||||||
val lastClean = allCleanerCheckpoints
|
|
||||||
val now = Time.SYSTEM.milliseconds
|
|
||||||
partitions.iterator.map { tp =>
|
|
||||||
Option(logs.get(tp)).map {
|
|
||||||
log =>
|
|
||||||
val lastCleanOffset: Option[Long] = lastClean.get(tp)
|
|
||||||
val offsetsToClean = cleanableOffsets(log, lastCleanOffset, now)
|
|
||||||
val (_, uncleanableBytes) = calculateCleanableBytes(log, offsetsToClean.firstDirtyOffset, offsetsToClean.firstUncleanableDirtyOffset)
|
|
||||||
uncleanableBytes
|
|
||||||
}.getOrElse(0L)
|
|
||||||
}.sum
|
|
||||||
case None => 0
|
|
||||||
}
|
|
||||||
},
|
|
||||||
metricTag
|
|
||||||
)
|
|
||||||
gaugeMetricNameWithTag.computeIfAbsent(UncleanableBytesMetricName, _ => new java.util.ArrayList[java.util.Map[String, String]]())
|
|
||||||
.add(metricTag)
|
|
||||||
}
|
|
||||||
|
|
||||||
/* a gauge for tracking the cleanable ratio of the dirtiest log */
|
|
||||||
@volatile private var dirtiestLogCleanableRatio = 0.0
|
|
||||||
metricsGroup.newGauge(MaxDirtyPercentMetricName, () => (100 * dirtiestLogCleanableRatio).toInt)
|
|
||||||
|
|
||||||
/* a gauge for tracking the time since the last log cleaner run, in milli seconds */
|
|
||||||
@volatile private var timeOfLastRun: Long = Time.SYSTEM.milliseconds
|
|
||||||
metricsGroup.newGauge(TimeSinceLastRunMsMetricName, () => Time.SYSTEM.milliseconds - timeOfLastRun)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the position processed for all logs.
|
|
||||||
*/
|
|
||||||
def allCleanerCheckpoints: Map[TopicPartition, Long] = {
|
|
||||||
inLock(lock) {
|
|
||||||
checkpoints.values.flatMap(checkpoint => {
|
|
||||||
try {
|
|
||||||
checkpoint.read().asScala.map{ case (tp, offset) => tp -> Long2long(offset) }
|
|
||||||
} catch {
|
|
||||||
case e: KafkaStorageException =>
|
|
||||||
error(s"Failed to access checkpoint file ${checkpoint.file.getName} in dir ${checkpoint.file.getParentFile.getAbsolutePath}", e)
|
|
||||||
Map.empty[TopicPartition, Long]
|
|
||||||
}
|
|
||||||
}).toMap
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Package private for unit test. Get the cleaning state of the partition.
|
|
||||||
*/
|
|
||||||
private[log] def cleaningState(tp: TopicPartition): Option[LogCleaningState] = {
|
|
||||||
inLock(lock) {
|
|
||||||
inProgress.get(tp)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Package private for unit test. Set the cleaning state of the partition.
|
|
||||||
*/
|
|
||||||
private[log] def setCleaningState(tp: TopicPartition, state: LogCleaningState): Unit = {
|
|
||||||
inLock(lock) {
|
|
||||||
inProgress.put(tp, state)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Choose the log to clean next and add it to the in-progress set. We recompute this
|
|
||||||
* each time from the full set of logs to allow logs to be dynamically added to the pool of logs
|
|
||||||
* the log manager maintains.
|
|
||||||
*/
|
|
||||||
def grabFilthiestCompactedLog(time: Time, preCleanStats: PreCleanStats = new PreCleanStats()): Option[LogToClean] = {
|
|
||||||
inLock(lock) {
|
|
||||||
val now = time.milliseconds
|
|
||||||
this.timeOfLastRun = now
|
|
||||||
val lastClean = allCleanerCheckpoints
|
|
||||||
|
|
||||||
val dirtyLogs = logs.filter {
|
|
||||||
case (_, log) => log.config.compact
|
|
||||||
}.filterNot {
|
|
||||||
case (topicPartition, log) =>
|
|
||||||
inProgress.contains(topicPartition) || isUncleanablePartition(log, topicPartition)
|
|
||||||
}.map {
|
|
||||||
case (topicPartition, log) => // create a LogToClean instance for each
|
|
||||||
try {
|
|
||||||
val lastCleanOffset = lastClean.get(topicPartition)
|
|
||||||
val offsetsToClean = cleanableOffsets(log, lastCleanOffset, now)
|
|
||||||
// update checkpoint for logs with invalid checkpointed offsets
|
|
||||||
if (offsetsToClean.forceUpdateCheckpoint)
|
|
||||||
updateCheckpoints(log.parentDirFile, partitionToUpdateOrAdd = Option(topicPartition, offsetsToClean.firstDirtyOffset))
|
|
||||||
val compactionDelayMs = maxCompactionDelay(log, offsetsToClean.firstDirtyOffset, now)
|
|
||||||
preCleanStats.updateMaxCompactionDelay(compactionDelayMs)
|
|
||||||
|
|
||||||
LogToClean(topicPartition, log, offsetsToClean.firstDirtyOffset, offsetsToClean.firstUncleanableDirtyOffset, compactionDelayMs > 0)
|
|
||||||
} catch {
|
|
||||||
case e: Throwable => throw new LogCleaningException(log,
|
|
||||||
s"Failed to calculate log cleaning stats for partition $topicPartition", e)
|
|
||||||
}
|
|
||||||
}.filter(ltc => ltc.totalBytes > 0) // skip any empty logs
|
|
||||||
|
|
||||||
this.dirtiestLogCleanableRatio = if (dirtyLogs.nonEmpty) dirtyLogs.max.cleanableRatio else 0
|
|
||||||
// and must meet the minimum threshold for dirty byte ratio or have some bytes required to be compacted
|
|
||||||
val cleanableLogs = dirtyLogs.filter { ltc =>
|
|
||||||
(ltc.needCompactionNow && ltc.cleanableBytes > 0) || ltc.cleanableRatio > ltc.log.config.minCleanableRatio
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cleanableLogs.isEmpty)
|
|
||||||
None
|
|
||||||
else {
|
|
||||||
preCleanStats.recordCleanablePartitions(cleanableLogs.size)
|
|
||||||
val filthiest = cleanableLogs.max
|
|
||||||
inProgress.put(filthiest.topicPartition, LogCleaningInProgress)
|
|
||||||
Some(filthiest)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Pause logs cleaning for logs that do not have compaction enabled
|
|
||||||
* and do not have other deletion or compaction in progress.
|
|
||||||
* This is to handle potential race between retention and cleaner threads when users
|
|
||||||
* switch topic configuration between compacted and non-compacted topic.
|
|
||||||
* @return retention logs that have log cleaning successfully paused
|
|
||||||
*/
|
|
||||||
def pauseCleaningForNonCompactedPartitions(): Iterable[(TopicPartition, UnifiedLog)] = {
|
|
||||||
inLock(lock) {
|
|
||||||
val deletableLogs = logs.filter {
|
|
||||||
case (_, log) => !log.config.compact // pick non-compacted logs
|
|
||||||
}.filterNot {
|
|
||||||
case (topicPartition, _) => inProgress.contains(topicPartition) // skip any logs already in-progress
|
|
||||||
}
|
|
||||||
|
|
||||||
deletableLogs.foreach {
|
|
||||||
case (topicPartition, _) => inProgress.put(topicPartition, LogCleaningPaused(1))
|
|
||||||
}
|
|
||||||
deletableLogs
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find any logs that have compaction enabled. Mark them as being cleaned
|
|
||||||
* Include logs without delete enabled, as they may have segments
|
|
||||||
* that precede the start offset.
|
|
||||||
*/
|
|
||||||
def deletableLogs(): Iterable[(TopicPartition, UnifiedLog)] = {
|
|
||||||
inLock(lock) {
|
|
||||||
val toClean = logs.filter { case (topicPartition, log) =>
|
|
||||||
!inProgress.contains(topicPartition) && log.config.compact &&
|
|
||||||
!isUncleanablePartition(log, topicPartition)
|
|
||||||
}
|
|
||||||
toClean.foreach { case (tp, _) => inProgress.put(tp, LogCleaningInProgress) }
|
|
||||||
toClean
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Abort the cleaning of a particular partition, if it's in progress. This call blocks until the cleaning of
|
|
||||||
* the partition is aborted.
|
|
||||||
* This is implemented by first abortAndPausing and then resuming the cleaning of the partition.
|
|
||||||
*/
|
|
||||||
def abortCleaning(topicPartition: TopicPartition): Unit = {
|
|
||||||
inLock(lock) {
|
|
||||||
abortAndPauseCleaning(topicPartition)
|
|
||||||
resumeCleaning(Seq(topicPartition))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Abort the cleaning of a particular partition if it's in progress, and pause any future cleaning of this partition.
|
|
||||||
* This call blocks until the cleaning of the partition is aborted and paused.
|
|
||||||
* 1. If the partition is not in progress, mark it as paused.
|
|
||||||
* 2. Otherwise, first mark the state of the partition as aborted.
|
|
||||||
* 3. The cleaner thread checks the state periodically and if it sees the state of the partition is aborted, it
|
|
||||||
* throws a LogCleaningAbortedException to stop the cleaning task.
|
|
||||||
* 4. When the cleaning task is stopped, doneCleaning() is called, which sets the state of the partition as paused.
|
|
||||||
* 5. abortAndPauseCleaning() waits until the state of the partition is changed to paused.
|
|
||||||
* 6. If the partition is already paused, a new call to this function
|
|
||||||
* will increase the paused count by one.
|
|
||||||
*/
|
|
||||||
def abortAndPauseCleaning(topicPartition: TopicPartition): Unit = {
|
|
||||||
inLock(lock) {
|
|
||||||
inProgress.get(topicPartition) match {
|
|
||||||
case None =>
|
|
||||||
inProgress.put(topicPartition, LogCleaningPaused(1))
|
|
||||||
case Some(LogCleaningInProgress) =>
|
|
||||||
inProgress.put(topicPartition, LogCleaningAborted)
|
|
||||||
case Some(LogCleaningPaused(count)) =>
|
|
||||||
inProgress.put(topicPartition, LogCleaningPaused(count + 1))
|
|
||||||
case Some(s) =>
|
|
||||||
throw new IllegalStateException(s"Compaction for partition $topicPartition cannot be aborted and paused since it is in $s state.")
|
|
||||||
}
|
|
||||||
while (!isCleaningInStatePaused(topicPartition))
|
|
||||||
pausedCleaningCond.await(100, TimeUnit.MILLISECONDS)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Resume the cleaning of paused partitions.
|
|
||||||
* Each call of this function will undo one pause.
|
|
||||||
*/
|
|
||||||
def resumeCleaning(topicPartitions: Iterable[TopicPartition]): Unit = {
|
|
||||||
inLock(lock) {
|
|
||||||
topicPartitions.foreach {
|
|
||||||
topicPartition =>
|
|
||||||
inProgress.get(topicPartition) match {
|
|
||||||
case None =>
|
|
||||||
throw new IllegalStateException(s"Compaction for partition $topicPartition cannot be resumed since it is not paused.")
|
|
||||||
case Some(state) =>
|
|
||||||
state match {
|
|
||||||
case LogCleaningPaused(count) if count == 1 =>
|
|
||||||
inProgress.remove(topicPartition)
|
|
||||||
case LogCleaningPaused(count) if count > 1 =>
|
|
||||||
inProgress.put(topicPartition, LogCleaningPaused(count - 1))
|
|
||||||
case s =>
|
|
||||||
throw new IllegalStateException(s"Compaction for partition $topicPartition cannot be resumed since it is in $s state.")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if the cleaning for a partition is in a particular state. The caller is expected to hold lock while making the call.
|
|
||||||
*/
|
|
||||||
private def isCleaningInState(topicPartition: TopicPartition, expectedState: LogCleaningState): Boolean = {
|
|
||||||
inProgress.get(topicPartition) match {
|
|
||||||
case None => false
|
|
||||||
case Some(state) =>
|
|
||||||
if (state == expectedState)
|
|
||||||
true
|
|
||||||
else
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if the cleaning for a partition is paused. The caller is expected to hold lock while making the call.
|
|
||||||
*/
|
|
||||||
private def isCleaningInStatePaused(topicPartition: TopicPartition): Boolean = {
|
|
||||||
inProgress.get(topicPartition) match {
|
|
||||||
case None => false
|
|
||||||
case Some(state) =>
|
|
||||||
state match {
|
|
||||||
case _: LogCleaningPaused =>
|
|
||||||
true
|
|
||||||
case _ =>
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if the cleaning for a partition is aborted. If so, throw an exception.
|
|
||||||
*/
|
|
||||||
def checkCleaningAborted(topicPartition: TopicPartition): Unit = {
|
|
||||||
inLock(lock) {
|
|
||||||
if (isCleaningInState(topicPartition, LogCleaningAborted))
|
|
||||||
throw new LogCleaningAbortedException()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Update checkpoint file, adding or removing partitions if necessary.
|
|
||||||
*
|
|
||||||
* @param dataDir The File object to be updated
|
|
||||||
* @param partitionToUpdateOrAdd The [TopicPartition, Long] map data to be updated. pass "none" if doing remove, not add
|
|
||||||
* @param partitionToRemove The TopicPartition to be removed
|
|
||||||
*/
|
|
||||||
def updateCheckpoints(dataDir: File,
|
|
||||||
partitionToUpdateOrAdd: Option[(TopicPartition, JLong)] = None,
|
|
||||||
partitionToRemove: Option[TopicPartition] = None): Unit = {
|
|
||||||
inLock(lock) {
|
|
||||||
val checkpoint = checkpoints(dataDir)
|
|
||||||
if (checkpoint != null) {
|
|
||||||
try {
|
|
||||||
val currentCheckpoint = checkpoint.read().asScala.filter { case (tp, _) => logs.keys.contains(tp) }.toMap
|
|
||||||
// remove the partition offset if any
|
|
||||||
var updatedCheckpoint = partitionToRemove match {
|
|
||||||
case Some(topicPartition) => currentCheckpoint - topicPartition
|
|
||||||
case None => currentCheckpoint
|
|
||||||
}
|
|
||||||
// update or add the partition offset if any
|
|
||||||
updatedCheckpoint = partitionToUpdateOrAdd match {
|
|
||||||
case Some(updatedOffset) => updatedCheckpoint + updatedOffset
|
|
||||||
case None => updatedCheckpoint
|
|
||||||
}
|
|
||||||
|
|
||||||
checkpoint.write(updatedCheckpoint.asJava)
|
|
||||||
} catch {
|
|
||||||
case e: KafkaStorageException =>
|
|
||||||
error(s"Failed to access checkpoint file ${checkpoint.file.getName} in dir ${checkpoint.file.getParentFile.getAbsolutePath}", e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* alter the checkpoint directory for the topicPartition, to remove the data in sourceLogDir, and add the data in destLogDir
|
|
||||||
*/
|
|
||||||
def alterCheckpointDir(topicPartition: TopicPartition, sourceLogDir: File, destLogDir: File): Unit = {
|
|
||||||
inLock(lock) {
|
|
||||||
try {
|
|
||||||
checkpoints.get(sourceLogDir).flatMap(_.read().asScala.get(topicPartition)) match {
|
|
||||||
case Some(offset) =>
|
|
||||||
debug(s"Removing the partition offset data in checkpoint file for '$topicPartition' " +
|
|
||||||
s"from ${sourceLogDir.getAbsoluteFile} directory.")
|
|
||||||
updateCheckpoints(sourceLogDir, partitionToRemove = Option(topicPartition))
|
|
||||||
|
|
||||||
debug(s"Adding the partition offset data in checkpoint file for '$topicPartition' " +
|
|
||||||
s"to ${destLogDir.getAbsoluteFile} directory.")
|
|
||||||
updateCheckpoints(destLogDir, partitionToUpdateOrAdd = Option(topicPartition, offset))
|
|
||||||
case None =>
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
case e: KafkaStorageException =>
|
|
||||||
error(s"Failed to access checkpoint file in dir ${sourceLogDir.getAbsolutePath}", e)
|
|
||||||
}
|
|
||||||
|
|
||||||
val logUncleanablePartitions = uncleanablePartitions.getOrElse(sourceLogDir.toString, mutable.Set[TopicPartition]())
|
|
||||||
if (logUncleanablePartitions.contains(topicPartition)) {
|
|
||||||
logUncleanablePartitions.remove(topicPartition)
|
|
||||||
markPartitionUncleanable(destLogDir.toString, topicPartition)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Stop cleaning logs in the provided directory
|
|
||||||
*
|
|
||||||
* @param dir the absolute path of the log dir
|
|
||||||
*/
|
|
||||||
def handleLogDirFailure(dir: String): Unit = {
|
|
||||||
warn(s"Stopping cleaning logs in dir $dir")
|
|
||||||
inLock(lock) {
|
|
||||||
checkpoints = checkpoints.filter { case (k, _) => k.getAbsolutePath != dir }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Truncate the checkpointed offset for the given partition if its checkpointed offset is larger than the given offset
|
|
||||||
*/
|
|
||||||
def maybeTruncateCheckpoint(dataDir: File, topicPartition: TopicPartition, offset: JLong): Unit = {
|
|
||||||
inLock(lock) {
|
|
||||||
if (logs.get(topicPartition).config.compact) {
|
|
||||||
val checkpoint = checkpoints(dataDir)
|
|
||||||
if (checkpoint != null) {
|
|
||||||
val existing = checkpoint.read()
|
|
||||||
if (existing.getOrDefault(topicPartition, 0L) > offset) {
|
|
||||||
existing.put(topicPartition, offset)
|
|
||||||
checkpoint.write(existing)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Save out the endOffset and remove the given log from the in-progress set, if not aborted.
|
|
||||||
*/
|
|
||||||
def doneCleaning(topicPartition: TopicPartition, dataDir: File, endOffset: Long): Unit = {
|
|
||||||
inLock(lock) {
|
|
||||||
inProgress.get(topicPartition) match {
|
|
||||||
case Some(LogCleaningInProgress) =>
|
|
||||||
updateCheckpoints(dataDir, partitionToUpdateOrAdd = Option(topicPartition, endOffset))
|
|
||||||
inProgress.remove(topicPartition)
|
|
||||||
case Some(LogCleaningAborted) =>
|
|
||||||
inProgress.put(topicPartition, LogCleaningPaused(1))
|
|
||||||
pausedCleaningCond.signalAll()
|
|
||||||
case None =>
|
|
||||||
throw new IllegalStateException(s"State for partition $topicPartition should exist.")
|
|
||||||
case s =>
|
|
||||||
throw new IllegalStateException(s"In-progress partition $topicPartition cannot be in $s state.")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def doneDeleting(topicPartitions: Iterable[TopicPartition]): Unit = {
|
|
||||||
inLock(lock) {
|
|
||||||
topicPartitions.foreach {
|
|
||||||
topicPartition =>
|
|
||||||
inProgress.get(topicPartition) match {
|
|
||||||
case Some(LogCleaningInProgress) =>
|
|
||||||
inProgress.remove(topicPartition)
|
|
||||||
case Some(LogCleaningAborted) =>
|
|
||||||
inProgress.put(topicPartition, LogCleaningPaused(1))
|
|
||||||
pausedCleaningCond.signalAll()
|
|
||||||
case None =>
|
|
||||||
throw new IllegalStateException(s"State for partition $topicPartition should exist.")
|
|
||||||
case s =>
|
|
||||||
throw new IllegalStateException(s"In-progress partition $topicPartition cannot be in $s state.")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns an immutable set of the uncleanable partitions for a given log directory
|
|
||||||
* Only used for testing
|
|
||||||
*/
|
|
||||||
private[log] def uncleanablePartitions(logDir: String): Set[TopicPartition] = {
|
|
||||||
var partitions: Set[TopicPartition] = Set()
|
|
||||||
inLock(lock) { partitions ++= uncleanablePartitions.getOrElse(logDir, partitions) }
|
|
||||||
partitions
|
|
||||||
}
|
|
||||||
|
|
||||||
def markPartitionUncleanable(logDir: String, partition: TopicPartition): Unit = {
|
|
||||||
inLock(lock) {
|
|
||||||
uncleanablePartitions.get(logDir) match {
|
|
||||||
case Some(partitions) =>
|
|
||||||
partitions.add(partition)
|
|
||||||
case None =>
|
|
||||||
uncleanablePartitions.put(logDir, mutable.Set(partition))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private def isUncleanablePartition(log: UnifiedLog, topicPartition: TopicPartition): Boolean = {
|
|
||||||
inLock(lock) {
|
|
||||||
uncleanablePartitions.get(log.parentDir).exists(partitions => partitions.contains(topicPartition))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def maintainUncleanablePartitions(): Unit = {
|
|
||||||
// Remove deleted partitions from uncleanablePartitions
|
|
||||||
inLock(lock) {
|
|
||||||
// Remove deleted partitions
|
|
||||||
uncleanablePartitions.values.foreach { partitions =>
|
|
||||||
partitions.filterInPlace(logs.contains)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove entries with empty partition set.
|
|
||||||
uncleanablePartitions.filterInPlace {
|
|
||||||
case (_, partitions) => partitions.nonEmpty
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def removeMetrics(): Unit = {
|
|
||||||
GaugeMetricNameNoTag.foreach(metricsGroup.removeMetric)
|
|
||||||
gaugeMetricNameWithTag.asScala.foreach { metricNameAndTags =>
|
|
||||||
metricNameAndTags._2.asScala.foreach { tag =>
|
|
||||||
metricsGroup.removeMetric(metricNameAndTags._1, tag)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
gaugeMetricNameWithTag.clear()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper class for the range of cleanable dirty offsets of a log and whether to update the checkpoint associated with
|
|
||||||
* the log
|
|
||||||
*
|
|
||||||
* @param firstDirtyOffset the lower (inclusive) offset to begin cleaning from
|
|
||||||
* @param firstUncleanableDirtyOffset the upper(exclusive) offset to clean to
|
|
||||||
* @param forceUpdateCheckpoint whether to update the checkpoint associated with this log. if true, checkpoint should be
|
|
||||||
* reset to firstDirtyOffset
|
|
||||||
*/
|
|
||||||
private case class OffsetsToClean(firstDirtyOffset: Long,
|
|
||||||
firstUncleanableDirtyOffset: Long,
|
|
||||||
forceUpdateCheckpoint: Boolean = false) {
|
|
||||||
}
|
|
||||||
|
|
||||||
private[log] object LogCleanerManager extends Logging {
|
|
||||||
private val UncleanablePartitionsCountMetricName = "uncleanable-partitions-count"
|
|
||||||
private val UncleanableBytesMetricName = "uncleanable-bytes"
|
|
||||||
private val MaxDirtyPercentMetricName = "max-dirty-percent"
|
|
||||||
private val TimeSinceLastRunMsMetricName = "time-since-last-run-ms"
|
|
||||||
|
|
||||||
// Visible for testing
|
|
||||||
private[log] val GaugeMetricNameNoTag = Set(
|
|
||||||
MaxDirtyPercentMetricName,
|
|
||||||
TimeSinceLastRunMsMetricName
|
|
||||||
)
|
|
||||||
|
|
||||||
private def isCompactAndDelete(log: UnifiedLog): Boolean = {
|
|
||||||
log.config.compact && log.config.delete
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get max delay between the time when log is required to be compacted as determined
|
|
||||||
* by maxCompactionLagMs and the current time.
|
|
||||||
*/
|
|
||||||
private def maxCompactionDelay(log: UnifiedLog, firstDirtyOffset: Long, now: Long) : Long = {
|
|
||||||
val dirtyNonActiveSegments = log.nonActiveLogSegmentsFrom(firstDirtyOffset)
|
|
||||||
val firstBatchTimestamps = log.getFirstBatchTimestampForSegments(dirtyNonActiveSegments).stream.filter(_ > 0)
|
|
||||||
|
|
||||||
val earliestDirtySegmentTimestamp = firstBatchTimestamps.min(Comparator.naturalOrder()).orElse(Long.MaxValue)
|
|
||||||
|
|
||||||
val maxCompactionLagMs = math.max(log.config.maxCompactionLagMs, 0L)
|
|
||||||
val cleanUntilTime = now - maxCompactionLagMs
|
|
||||||
|
|
||||||
if (earliestDirtySegmentTimestamp < cleanUntilTime)
|
|
||||||
cleanUntilTime - earliestDirtySegmentTimestamp
|
|
||||||
else
|
|
||||||
0L
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the range of dirty offsets that can be cleaned.
|
|
||||||
*
|
|
||||||
* @param log the log
|
|
||||||
* @param lastCleanOffset the last checkpointed offset
|
|
||||||
* @param now the current time in milliseconds of the cleaning operation
|
|
||||||
* @return OffsetsToClean containing offsets for cleanable portion of log and whether the log checkpoint needs updating
|
|
||||||
*/
|
|
||||||
def cleanableOffsets(log: UnifiedLog, lastCleanOffset: Option[Long], now: Long): OffsetsToClean = {
|
|
||||||
// If the log segments are abnormally truncated and hence the checkpointed offset is no longer valid;
|
|
||||||
// reset to the log starting offset and log the error
|
|
||||||
val (firstDirtyOffset, forceUpdateCheckpoint) = {
|
|
||||||
val logStartOffset = log.logStartOffset
|
|
||||||
val checkpointDirtyOffset = lastCleanOffset.getOrElse(logStartOffset)
|
|
||||||
|
|
||||||
if (checkpointDirtyOffset < logStartOffset) {
|
|
||||||
// Don't bother with the warning if compact and delete are enabled.
|
|
||||||
if (!isCompactAndDelete(log))
|
|
||||||
warn(s"Resetting first dirty offset of ${log.name} to log start offset $logStartOffset " +
|
|
||||||
s"since the checkpointed offset $checkpointDirtyOffset is invalid.")
|
|
||||||
(logStartOffset, true)
|
|
||||||
} else if (checkpointDirtyOffset > log.logEndOffset) {
|
|
||||||
// The dirty offset has gotten ahead of the log end offset. This could happen if there was data
|
|
||||||
// corruption at the end of the log. We conservatively assume that the full log needs cleaning.
|
|
||||||
warn(s"The last checkpoint dirty offset for partition ${log.name} is $checkpointDirtyOffset, " +
|
|
||||||
s"which is larger than the log end offset ${log.logEndOffset}. Resetting to the log start offset $logStartOffset.")
|
|
||||||
(logStartOffset, true)
|
|
||||||
} else {
|
|
||||||
(checkpointDirtyOffset, false)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
val minCompactionLagMs = math.max(log.config.compactionLagMs, 0L)
|
|
||||||
|
|
||||||
// Find the first segment that cannot be cleaned. We cannot clean past:
|
|
||||||
// 1. The active segment
|
|
||||||
// 2. The last stable offset (including the high watermark)
|
|
||||||
// 3. Any segments closer to the head of the log than the minimum compaction lag time
|
|
||||||
val firstUncleanableDirtyOffset: Long = Seq(
|
|
||||||
|
|
||||||
// we do not clean beyond the last stable offset
|
|
||||||
Some(log.lastStableOffset),
|
|
||||||
|
|
||||||
// the active segment is always uncleanable
|
|
||||||
Option(log.activeSegment.baseOffset),
|
|
||||||
|
|
||||||
// the first segment whose largest message timestamp is within a minimum time lag from now
|
|
||||||
if (minCompactionLagMs > 0) {
|
|
||||||
// dirty log segments
|
|
||||||
val dirtyNonActiveSegments = log.nonActiveLogSegmentsFrom(firstDirtyOffset)
|
|
||||||
dirtyNonActiveSegments.asScala.find { s =>
|
|
||||||
val isUncleanable = s.largestTimestamp > now - minCompactionLagMs
|
|
||||||
debug(s"Checking if log segment may be cleaned: log='${log.name}' segment.baseOffset=${s.baseOffset} " +
|
|
||||||
s"segment.largestTimestamp=${s.largestTimestamp}; now - compactionLag=${now - minCompactionLagMs}; " +
|
|
||||||
s"is uncleanable=$isUncleanable")
|
|
||||||
isUncleanable
|
|
||||||
}.map(_.baseOffset)
|
|
||||||
} else None
|
|
||||||
).flatten.min
|
|
||||||
|
|
||||||
debug(s"Finding range of cleanable offsets for log=${log.name}. Last clean offset=$lastCleanOffset " +
|
|
||||||
s"now=$now => firstDirtyOffset=$firstDirtyOffset firstUncleanableOffset=$firstUncleanableDirtyOffset " +
|
|
||||||
s"activeSegment.baseOffset=${log.activeSegment.baseOffset}")
|
|
||||||
|
|
||||||
OffsetsToClean(firstDirtyOffset, math.max(firstDirtyOffset, firstUncleanableDirtyOffset), forceUpdateCheckpoint)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Given the first dirty offset and an uncleanable offset, calculates the total cleanable bytes for this log
|
|
||||||
* @return the biggest uncleanable offset and the total amount of cleanable bytes
|
|
||||||
*/
|
|
||||||
def calculateCleanableBytes(log: UnifiedLog, firstDirtyOffset: Long, uncleanableOffset: Long): (Long, Long) = {
|
|
||||||
val firstUncleanableSegment = log.nonActiveLogSegmentsFrom(uncleanableOffset).asScala.headOption.getOrElse(log.activeSegment)
|
|
||||||
val firstUncleanableOffset = firstUncleanableSegment.baseOffset
|
|
||||||
val cleanableBytes = log.logSegments(math.min(firstDirtyOffset, firstUncleanableOffset), firstUncleanableOffset).asScala.map(_.size.toLong).sum
|
|
||||||
|
|
||||||
(firstUncleanableOffset, cleanableBytes)
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -83,11 +83,11 @@ class LogManager(logDirs: Seq[File],
|
||||||
private val metricsGroup = new KafkaMetricsGroup(this.getClass)
|
private val metricsGroup = new KafkaMetricsGroup(this.getClass)
|
||||||
|
|
||||||
private val logCreationOrDeletionLock = new Object
|
private val logCreationOrDeletionLock = new Object
|
||||||
private val currentLogs = new Pool[TopicPartition, UnifiedLog]()
|
private val currentLogs = new util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog]()
|
||||||
// Future logs are put in the directory with "-future" suffix. Future log is created when user wants to move replica
|
// Future logs are put in the directory with "-future" suffix. Future log is created when user wants to move replica
|
||||||
// from one log directory to another log directory on the same broker. The directory of the future log will be renamed
|
// from one log directory to another log directory on the same broker. The directory of the future log will be renamed
|
||||||
// to replace the current log of the partition after the future log catches up with the current log
|
// to replace the current log of the partition after the future log catches up with the current log
|
||||||
private val futureLogs = new Pool[TopicPartition, UnifiedLog]()
|
private val futureLogs = new util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog]()
|
||||||
// Each element in the queue contains the log object to be deleted and the time it is scheduled for deletion.
|
// Each element in the queue contains the log object to be deleted and the time it is scheduled for deletion.
|
||||||
private val logsToBeDeleted = new LinkedBlockingQueue[(UnifiedLog, Long)]()
|
private val logsToBeDeleted = new LinkedBlockingQueue[(UnifiedLog, Long)]()
|
||||||
|
|
||||||
|
@ -230,8 +230,8 @@ class LogManager(logDirs: Seq[File],
|
||||||
if (cleaner != null)
|
if (cleaner != null)
|
||||||
cleaner.handleLogDirFailure(dir)
|
cleaner.handleLogDirFailure(dir)
|
||||||
|
|
||||||
def removeOfflineLogs(logs: Pool[TopicPartition, UnifiedLog]): Iterable[TopicPartition] = {
|
def removeOfflineLogs(logs: util.concurrent.ConcurrentMap[TopicPartition, UnifiedLog]): Iterable[TopicPartition] = {
|
||||||
val offlineTopicPartitions: Iterable[TopicPartition] = logs.collect {
|
val offlineTopicPartitions: Iterable[TopicPartition] = logs.asScala.collect {
|
||||||
case (tp, log) if log.parentDir == dir => tp
|
case (tp, log) if log.parentDir == dir => tp
|
||||||
}
|
}
|
||||||
offlineTopicPartitions.foreach { topicPartition => {
|
offlineTopicPartitions.foreach { topicPartition => {
|
||||||
|
@ -1180,7 +1180,7 @@ class LogManager(logDirs: Seq[File],
|
||||||
}
|
}
|
||||||
|
|
||||||
private def findAbandonedFutureLogs(brokerId: Int, newTopicsImage: TopicsImage): Iterable[(UnifiedLog, Option[UnifiedLog])] = {
|
private def findAbandonedFutureLogs(brokerId: Int, newTopicsImage: TopicsImage): Iterable[(UnifiedLog, Option[UnifiedLog])] = {
|
||||||
futureLogs.values.flatMap { futureLog =>
|
futureLogs.asScala.values.flatMap { futureLog =>
|
||||||
val topicId = futureLog.topicId.orElseThrow(() =>
|
val topicId = futureLog.topicId.orElseThrow(() =>
|
||||||
new RuntimeException(s"The log dir $futureLog does not have a topic ID, " +
|
new RuntimeException(s"The log dir $futureLog does not have a topic ID, " +
|
||||||
"which is not allowed when running in KRaft mode.")
|
"which is not allowed when running in KRaft mode.")
|
||||||
|
@ -1386,7 +1386,7 @@ class LogManager(logDirs: Seq[File],
|
||||||
// prevent cleaner from working on same partitions when changing cleanup policy
|
// prevent cleaner from working on same partitions when changing cleanup policy
|
||||||
cleaner.pauseCleaningForNonCompactedPartitions()
|
cleaner.pauseCleaningForNonCompactedPartitions()
|
||||||
} else {
|
} else {
|
||||||
currentLogs.filter {
|
currentLogs.asScala.filter {
|
||||||
case (_, log) => !log.config.compact
|
case (_, log) => !log.config.compact
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1418,10 +1418,10 @@ class LogManager(logDirs: Seq[File],
|
||||||
/**
|
/**
|
||||||
* Get all the partition logs
|
* Get all the partition logs
|
||||||
*/
|
*/
|
||||||
def allLogs: Iterable[UnifiedLog] = currentLogs.values ++ futureLogs.values
|
def allLogs: Iterable[UnifiedLog] = currentLogs.asScala.values ++ futureLogs.asScala.values
|
||||||
|
|
||||||
def logsByTopic(topic: String): Seq[UnifiedLog] = {
|
def logsByTopic(topic: String): Seq[UnifiedLog] = {
|
||||||
(currentLogs.toList ++ futureLogs.toList).collect {
|
(currentLogs.asScala.toList ++ futureLogs.asScala.toList).collect {
|
||||||
case (topicPartition, log) if topicPartition.topic == topic => log
|
case (topicPartition, log) if topicPartition.topic == topic => log
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1437,8 +1437,8 @@ class LogManager(logDirs: Seq[File],
|
||||||
def addToDir(tp: TopicPartition, log: UnifiedLog): Unit = {
|
def addToDir(tp: TopicPartition, log: UnifiedLog): Unit = {
|
||||||
byDir.getOrElseUpdate(log.parentDir, new mutable.AnyRefMap[TopicPartition, UnifiedLog]()).put(tp, log)
|
byDir.getOrElseUpdate(log.parentDir, new mutable.AnyRefMap[TopicPartition, UnifiedLog]()).put(tp, log)
|
||||||
}
|
}
|
||||||
currentLogs.foreachEntry(addToDir)
|
currentLogs.asScala.foreachEntry(addToDir)
|
||||||
futureLogs.foreachEntry(addToDir)
|
futureLogs.asScala.foreachEntry(addToDir)
|
||||||
byDir
|
byDir
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1466,7 +1466,7 @@ class LogManager(logDirs: Seq[File],
|
||||||
private def flushDirtyLogs(): Unit = {
|
private def flushDirtyLogs(): Unit = {
|
||||||
debug("Checking for dirty logs to flush...")
|
debug("Checking for dirty logs to flush...")
|
||||||
|
|
||||||
for ((topicPartition, log) <- currentLogs.toList ++ futureLogs.toList) {
|
for ((topicPartition, log) <- currentLogs.asScala.toList ++ futureLogs.asScala.toList) {
|
||||||
try {
|
try {
|
||||||
val timeSinceLastFlush = time.milliseconds - log.lastFlushTime
|
val timeSinceLastFlush = time.milliseconds - log.lastFlushTime
|
||||||
debug(s"Checking if flush is needed on ${topicPartition.topic} flush interval ${log.config.flushMs}" +
|
debug(s"Checking if flush is needed on ${topicPartition.topic} flush interval ${log.config.flushMs}" +
|
||||||
|
@ -1480,7 +1480,7 @@ class LogManager(logDirs: Seq[File],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private def removeLogAndMetrics(logs: Pool[TopicPartition, UnifiedLog], tp: TopicPartition): Option[UnifiedLog] = {
|
private def removeLogAndMetrics(logs: util.concurrent.ConcurrentMap[TopicPartition, UnifiedLog], tp: TopicPartition): Option[UnifiedLog] = {
|
||||||
val removedLog = logs.remove(tp)
|
val removedLog = logs.remove(tp)
|
||||||
if (removedLog != null) {
|
if (removedLog != null) {
|
||||||
removedLog.removeLogMetrics()
|
removedLog.removeLogMetrics()
|
||||||
|
|
|
@ -19,7 +19,7 @@ package kafka.log
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.nio.file.Files
|
import java.nio.file.Files
|
||||||
import java.util.{Optional, Properties}
|
import java.util.{Optional, Properties}
|
||||||
import kafka.utils.{Pool, TestUtils}
|
import kafka.utils.TestUtils
|
||||||
import kafka.utils.Implicits._
|
import kafka.utils.Implicits._
|
||||||
import org.apache.kafka.common.TopicPartition
|
import org.apache.kafka.common.TopicPartition
|
||||||
import org.apache.kafka.common.compress.Compression
|
import org.apache.kafka.common.compress.Compression
|
||||||
|
@ -93,7 +93,7 @@ abstract class AbstractLogCleanerIntegrationTest {
|
||||||
cleanerIoBufferSize: Option[Int] = None,
|
cleanerIoBufferSize: Option[Int] = None,
|
||||||
propertyOverrides: Properties = new Properties()): LogCleaner = {
|
propertyOverrides: Properties = new Properties()): LogCleaner = {
|
||||||
|
|
||||||
val logMap = new Pool[TopicPartition, UnifiedLog]()
|
val logMap = new java.util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog]()
|
||||||
for (partition <- partitions) {
|
for (partition <- partitions) {
|
||||||
val dir = new File(logDir, s"${partition.topic}-${partition.partition}")
|
val dir = new File(logDir, s"${partition.topic}-${partition.partition}")
|
||||||
Files.createDirectories(dir.toPath)
|
Files.createDirectories(dir.toPath)
|
||||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.kafka.common.compress.Compression
|
||||||
import org.apache.kafka.common.record.RecordBatch
|
import org.apache.kafka.common.record.RecordBatch
|
||||||
import org.apache.kafka.server.metrics.KafkaYammerMetrics
|
import org.apache.kafka.server.metrics.KafkaYammerMetrics
|
||||||
import org.apache.kafka.server.util.MockTime
|
import org.apache.kafka.server.util.MockTime
|
||||||
import org.apache.kafka.storage.internals.log.UnifiedLog
|
import org.apache.kafka.storage.internals.log.{LogCleanerManager, UnifiedLog}
|
||||||
import org.junit.jupiter.api.Assertions._
|
import org.junit.jupiter.api.Assertions._
|
||||||
import org.junit.jupiter.api.{AfterEach, Test}
|
import org.junit.jupiter.api.{AfterEach, Test}
|
||||||
|
|
||||||
|
@ -78,8 +78,8 @@ class LogCleanerIntegrationTest extends AbstractLogCleanerIntegrationTest {
|
||||||
val uncleanableBytesGauge = getGauge[Long]("uncleanable-bytes", uncleanableDirectory)
|
val uncleanableBytesGauge = getGauge[Long]("uncleanable-bytes", uncleanableDirectory)
|
||||||
|
|
||||||
TestUtils.waitUntilTrue(() => uncleanablePartitionsCountGauge.value() == 2, "There should be 2 uncleanable partitions", 2000L)
|
TestUtils.waitUntilTrue(() => uncleanablePartitionsCountGauge.value() == 2, "There should be 2 uncleanable partitions", 2000L)
|
||||||
val expectedTotalUncleanableBytes = LogCleanerManager.calculateCleanableBytes(log, 0, log.logSegments.asScala.last.baseOffset)._2 +
|
val expectedTotalUncleanableBytes = LogCleanerManager.calculateCleanableBytes(log, 0, log.logSegments.asScala.last.baseOffset).getValue +
|
||||||
LogCleanerManager.calculateCleanableBytes(log2, 0, log2.logSegments.asScala.last.baseOffset)._2
|
LogCleanerManager.calculateCleanableBytes(log2, 0, log2.logSegments.asScala.last.baseOffset).getValue
|
||||||
TestUtils.waitUntilTrue(() => uncleanableBytesGauge.value() == expectedTotalUncleanableBytes,
|
TestUtils.waitUntilTrue(() => uncleanableBytesGauge.value() == expectedTotalUncleanableBytes,
|
||||||
s"There should be $expectedTotalUncleanableBytes uncleanable bytes", 1000L)
|
s"There should be $expectedTotalUncleanableBytes uncleanable bytes", 1000L)
|
||||||
|
|
||||||
|
@ -172,7 +172,7 @@ class LogCleanerIntegrationTest extends AbstractLogCleanerIntegrationTest {
|
||||||
cleaner.awaitCleaned(new TopicPartition("log", 0), firstBlockCleanableSegmentOffset)
|
cleaner.awaitCleaned(new TopicPartition("log", 0), firstBlockCleanableSegmentOffset)
|
||||||
|
|
||||||
val read1 = readFromLog(log)
|
val read1 = readFromLog(log)
|
||||||
val lastCleaned = cleaner.cleanerManager.allCleanerCheckpoints(new TopicPartition("log", 0))
|
val lastCleaned = cleaner.cleanerManager.allCleanerCheckpoints.get(new TopicPartition("log", 0))
|
||||||
assertTrue(lastCleaned >= firstBlockCleanableSegmentOffset,
|
assertTrue(lastCleaned >= firstBlockCleanableSegmentOffset,
|
||||||
s"log cleaner should have processed at least to offset $firstBlockCleanableSegmentOffset, but lastCleaned=$lastCleaned")
|
s"log cleaner should have processed at least to offset $firstBlockCleanableSegmentOffset, but lastCleaned=$lastCleaned")
|
||||||
|
|
||||||
|
@ -187,7 +187,7 @@ class LogCleanerIntegrationTest extends AbstractLogCleanerIntegrationTest {
|
||||||
|
|
||||||
assertEquals(appends1, read2, s"log should only contains zero keys now")
|
assertEquals(appends1, read2, s"log should only contains zero keys now")
|
||||||
|
|
||||||
val lastCleaned2 = cleaner.cleanerManager.allCleanerCheckpoints(new TopicPartition("log", 0))
|
val lastCleaned2 = cleaner.cleanerManager.allCleanerCheckpoints.get(new TopicPartition("log", 0))
|
||||||
val secondBlockCleanableSegmentOffset = activeSegAtT1.baseOffset
|
val secondBlockCleanableSegmentOffset = activeSegAtT1.baseOffset
|
||||||
assertTrue(lastCleaned2 >= secondBlockCleanableSegmentOffset,
|
assertTrue(lastCleaned2 >= secondBlockCleanableSegmentOffset,
|
||||||
s"log cleaner should have processed at least to offset $secondBlockCleanableSegmentOffset, but lastCleaned=$lastCleaned2")
|
s"log cleaner should have processed at least to offset $secondBlockCleanableSegmentOffset, but lastCleaned=$lastCleaned2")
|
||||||
|
|
|
@ -92,7 +92,7 @@ class LogCleanerLagIntegrationTest extends AbstractLogCleanerIntegrationTest wit
|
||||||
|
|
||||||
val compactedSize = log.logSegments(0L, activeSegAtT0.baseOffset).asScala.map(_.size).sum
|
val compactedSize = log.logSegments(0L, activeSegAtT0.baseOffset).asScala.map(_.size).sum
|
||||||
debug(s"after cleaning the compacted size up to active segment at T0: $compactedSize")
|
debug(s"after cleaning the compacted size up to active segment at T0: $compactedSize")
|
||||||
val lastCleaned = cleaner.cleanerManager.allCleanerCheckpoints(new TopicPartition("log", 0))
|
val lastCleaned = cleaner.cleanerManager.allCleanerCheckpoints.get(new TopicPartition("log", 0))
|
||||||
assertTrue(lastCleaned >= firstBlock1SegmentBaseOffset, s"log cleaner should have processed up to offset $firstBlock1SegmentBaseOffset, but lastCleaned=$lastCleaned")
|
assertTrue(lastCleaned >= firstBlock1SegmentBaseOffset, s"log cleaner should have processed up to offset $firstBlock1SegmentBaseOffset, but lastCleaned=$lastCleaned")
|
||||||
assertTrue(sizeUpToActiveSegmentAtT0 > compactedSize, s"log should have been compacted: size up to offset of active segment at T0=$sizeUpToActiveSegmentAtT0 compacted size=$compactedSize")
|
assertTrue(sizeUpToActiveSegmentAtT0 > compactedSize, s"log should have been compacted: size up to offset of active segment at T0=$sizeUpToActiveSegmentAtT0 compacted size=$compactedSize")
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,8 @@ import org.apache.kafka.common.record._
|
||||||
import org.apache.kafka.common.utils.Utils
|
import org.apache.kafka.common.utils.Utils
|
||||||
import org.apache.kafka.coordinator.transaction.TransactionLogConfig
|
import org.apache.kafka.coordinator.transaction.TransactionLogConfig
|
||||||
import org.apache.kafka.server.util.MockTime
|
import org.apache.kafka.server.util.MockTime
|
||||||
import org.apache.kafka.storage.internals.log.{AppendOrigin, LocalLog, LogConfig, LogDirFailureChannel, LogLoader, LogOffsetsListener, LogSegment, LogSegments, LogStartOffsetIncrementReason, ProducerStateManager, ProducerStateManagerConfig, UnifiedLog}
|
import org.apache.kafka.storage.internals.log.LogCleaningState.{LOG_CLEANING_ABORTED, LOG_CLEANING_IN_PROGRESS}
|
||||||
|
import org.apache.kafka.storage.internals.log.{AppendOrigin, LocalLog, LogCleanerManager, LogCleaningException, LogCleaningState, LogConfig, LogDirFailureChannel, LogLoader, LogOffsetsListener, LogSegment, LogSegments, LogStartOffsetIncrementReason, LogToClean, PreCleanStats, ProducerStateManager, ProducerStateManagerConfig, UnifiedLog}
|
||||||
import org.apache.kafka.storage.log.metrics.BrokerTopicStats
|
import org.apache.kafka.storage.log.metrics.BrokerTopicStats
|
||||||
import org.junit.jupiter.api.Assertions._
|
import org.junit.jupiter.api.Assertions._
|
||||||
import org.junit.jupiter.api.{AfterEach, Test}
|
import org.junit.jupiter.api.{AfterEach, Test}
|
||||||
|
@ -37,6 +38,8 @@ import java.lang.{Long => JLong}
|
||||||
import java.util
|
import java.util
|
||||||
import java.util.concurrent.ConcurrentHashMap
|
import java.util.concurrent.ConcurrentHashMap
|
||||||
import scala.collection.mutable
|
import scala.collection.mutable
|
||||||
|
import scala.jdk.CollectionConverters._
|
||||||
|
import scala.jdk.OptionConverters.RichOptional
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unit tests for the log cleaning logic
|
* Unit tests for the log cleaning logic
|
||||||
|
@ -46,7 +49,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val tmpDir: File = TestUtils.tempDir()
|
val tmpDir: File = TestUtils.tempDir()
|
||||||
val tmpDir2: File = TestUtils.tempDir()
|
val tmpDir2: File = TestUtils.tempDir()
|
||||||
val logDir: File = TestUtils.randomPartitionLogDir(tmpDir)
|
val logDir: File = TestUtils.randomPartitionLogDir(tmpDir)
|
||||||
val logDir2: File = TestUtils.randomPartitionLogDir(tmpDir)
|
val logDir2: File = TestUtils.randomPartitionLogDir(tmpDir2)
|
||||||
val topicPartition = new TopicPartition("log", 0)
|
val topicPartition = new TopicPartition("log", 0)
|
||||||
val topicPartition2 = new TopicPartition("log2", 0)
|
val topicPartition2 = new TopicPartition("log2", 0)
|
||||||
val logProps = new Properties()
|
val logProps = new Properties()
|
||||||
|
@ -58,21 +61,21 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val offset = 999
|
val offset = 999
|
||||||
val producerStateManagerConfig = new ProducerStateManagerConfig(TransactionLogConfig.PRODUCER_ID_EXPIRATION_MS_DEFAULT, false)
|
val producerStateManagerConfig = new ProducerStateManagerConfig(TransactionLogConfig.PRODUCER_ID_EXPIRATION_MS_DEFAULT, false)
|
||||||
|
|
||||||
val cleanerCheckpoints: mutable.Map[TopicPartition, Long] = mutable.Map[TopicPartition, Long]()
|
val cleanerCheckpoints: mutable.Map[TopicPartition, JLong] = mutable.Map[TopicPartition, JLong]()
|
||||||
|
|
||||||
class LogCleanerManagerMock(logDirs: Seq[File],
|
class LogCleanerManagerMock(logDirs: util.List[File],
|
||||||
logs: Pool[TopicPartition, UnifiedLog],
|
logs: util.concurrent.ConcurrentMap[TopicPartition, UnifiedLog],
|
||||||
logDirFailureChannel: LogDirFailureChannel) extends LogCleanerManager(logDirs, logs, logDirFailureChannel) {
|
logDirFailureChannel: LogDirFailureChannel) extends LogCleanerManager(logDirs, logs, logDirFailureChannel) {
|
||||||
override def allCleanerCheckpoints: Map[TopicPartition, Long] = {
|
override def allCleanerCheckpoints: util.Map[TopicPartition, JLong] = {
|
||||||
cleanerCheckpoints.toMap
|
cleanerCheckpoints.toMap.asJava
|
||||||
}
|
}
|
||||||
|
|
||||||
override def updateCheckpoints(dataDir: File, partitionToUpdateOrAdd: Option[(TopicPartition, JLong)] = None,
|
override def updateCheckpoints(dataDir: File, partitionToUpdateOrAdd: Optional[util.Map.Entry[TopicPartition, JLong]],
|
||||||
partitionToRemove: Option[TopicPartition] = None): Unit = {
|
partitionToRemove: Optional[TopicPartition]): Unit = {
|
||||||
assert(partitionToRemove.isEmpty, "partitionToRemove argument with value not yet handled")
|
assert(partitionToRemove.isEmpty, "partitionToRemove argument with value not yet handled")
|
||||||
val (tp, offset) = partitionToUpdateOrAdd.getOrElse(
|
val entry = partitionToUpdateOrAdd.orElseThrow(() =>
|
||||||
throw new IllegalArgumentException("partitionToUpdateOrAdd==None argument not yet handled"))
|
new IllegalArgumentException("partitionToUpdateOrAdd==None argument not yet handled"))
|
||||||
cleanerCheckpoints.put(tp, offset)
|
cleanerCheckpoints.put(entry.getKey, entry.getValue)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,8 +86,8 @@ class LogCleanerManagerTest extends Logging {
|
||||||
|
|
||||||
private def setupIncreasinglyFilthyLogs(partitions: Seq[TopicPartition],
|
private def setupIncreasinglyFilthyLogs(partitions: Seq[TopicPartition],
|
||||||
startNumBatches: Int,
|
startNumBatches: Int,
|
||||||
batchIncrement: Int): Pool[TopicPartition, UnifiedLog] = {
|
batchIncrement: Int): util.concurrent.ConcurrentMap[TopicPartition, UnifiedLog] = {
|
||||||
val logs = new Pool[TopicPartition, UnifiedLog]()
|
val logs = new util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog]()
|
||||||
var numBatches = startNumBatches
|
var numBatches = startNumBatches
|
||||||
|
|
||||||
for (tp <- partitions) {
|
for (tp <- partitions) {
|
||||||
|
@ -146,12 +149,12 @@ class LogCleanerManagerTest extends Logging {
|
||||||
batchesPerSegment = 2
|
batchesPerSegment = 2
|
||||||
)
|
)
|
||||||
|
|
||||||
val logsPool = new Pool[TopicPartition, UnifiedLog]()
|
val logsPool = new util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog]()
|
||||||
logsPool.put(tp, log)
|
logsPool.put(tp, log)
|
||||||
val cleanerManager = createCleanerManagerMock(logsPool)
|
val cleanerManager = createCleanerManagerMock(logsPool)
|
||||||
cleanerCheckpoints.put(tp, 1)
|
cleanerCheckpoints.put(tp, 1)
|
||||||
|
|
||||||
val thrownException = assertThrows(classOf[LogCleaningException], () => cleanerManager.grabFilthiestCompactedLog(time).get)
|
val thrownException = assertThrows(classOf[LogCleaningException], () => cleanerManager.grabFilthiestCompactedLog(time, new PreCleanStats()).get)
|
||||||
assertEquals(log, thrownException.log)
|
assertEquals(log, thrownException.log)
|
||||||
assertTrue(thrownException.getCause.isInstanceOf[IllegalStateException])
|
assertTrue(thrownException.getCause.isInstanceOf[IllegalStateException])
|
||||||
}
|
}
|
||||||
|
@ -168,7 +171,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val cleanerManager = createCleanerManagerMock(logs)
|
val cleanerManager = createCleanerManagerMock(logs)
|
||||||
partitions.foreach(partition => cleanerCheckpoints.put(partition, 20))
|
partitions.foreach(partition => cleanerCheckpoints.put(partition, 20))
|
||||||
|
|
||||||
val filthiestLog: LogToClean = cleanerManager.grabFilthiestCompactedLog(time).get
|
val filthiestLog: LogToClean = cleanerManager.grabFilthiestCompactedLog(time, new PreCleanStats()).get
|
||||||
assertEquals(tp2, filthiestLog.topicPartition)
|
assertEquals(tp2, filthiestLog.topicPartition)
|
||||||
assertEquals(tp2, filthiestLog.log.topicPartition)
|
assertEquals(tp2, filthiestLog.log.topicPartition)
|
||||||
}
|
}
|
||||||
|
@ -187,7 +190,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
|
|
||||||
cleanerManager.markPartitionUncleanable(logs.get(tp2).dir.getParent, tp2)
|
cleanerManager.markPartitionUncleanable(logs.get(tp2).dir.getParent, tp2)
|
||||||
|
|
||||||
val filthiestLog: LogToClean = cleanerManager.grabFilthiestCompactedLog(time).get
|
val filthiestLog: LogToClean = cleanerManager.grabFilthiestCompactedLog(time, new PreCleanStats()).get
|
||||||
assertEquals(tp1, filthiestLog.topicPartition)
|
assertEquals(tp1, filthiestLog.topicPartition)
|
||||||
assertEquals(tp1, filthiestLog.log.topicPartition)
|
assertEquals(tp1, filthiestLog.log.topicPartition)
|
||||||
}
|
}
|
||||||
|
@ -204,9 +207,9 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val cleanerManager = createCleanerManagerMock(logs)
|
val cleanerManager = createCleanerManagerMock(logs)
|
||||||
partitions.foreach(partition => cleanerCheckpoints.put(partition, 20))
|
partitions.foreach(partition => cleanerCheckpoints.put(partition, 20))
|
||||||
|
|
||||||
cleanerManager.setCleaningState(tp2, LogCleaningInProgress)
|
cleanerManager.setCleaningState(tp2, LOG_CLEANING_IN_PROGRESS)
|
||||||
|
|
||||||
val filthiestLog: LogToClean = cleanerManager.grabFilthiestCompactedLog(time).get
|
val filthiestLog: LogToClean = cleanerManager.grabFilthiestCompactedLog(time, new PreCleanStats()).get
|
||||||
assertEquals(tp1, filthiestLog.topicPartition)
|
assertEquals(tp1, filthiestLog.topicPartition)
|
||||||
assertEquals(tp1, filthiestLog.log.topicPartition)
|
assertEquals(tp1, filthiestLog.log.topicPartition)
|
||||||
}
|
}
|
||||||
|
@ -223,11 +226,11 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val cleanerManager = createCleanerManagerMock(logs)
|
val cleanerManager = createCleanerManagerMock(logs)
|
||||||
partitions.foreach(partition => cleanerCheckpoints.put(partition, 20))
|
partitions.foreach(partition => cleanerCheckpoints.put(partition, 20))
|
||||||
|
|
||||||
cleanerManager.setCleaningState(tp2, LogCleaningInProgress)
|
cleanerManager.setCleaningState(tp2, LOG_CLEANING_IN_PROGRESS)
|
||||||
cleanerManager.markPartitionUncleanable(logs.get(tp1).dir.getParent, tp1)
|
cleanerManager.markPartitionUncleanable(logs.get(tp1).dir.getParent, tp1)
|
||||||
|
|
||||||
val filthiestLog: Option[LogToClean] = cleanerManager.grabFilthiestCompactedLog(time)
|
val filthiestLog: Optional[LogToClean] = cleanerManager.grabFilthiestCompactedLog(time, new PreCleanStats())
|
||||||
assertEquals(None, filthiestLog)
|
assertEquals(Optional.empty(), filthiestLog)
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -237,7 +240,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val cleanerManager = createCleanerManagerMock(logs)
|
val cleanerManager = createCleanerManagerMock(logs)
|
||||||
cleanerCheckpoints.put(tp, 200)
|
cleanerCheckpoints.put(tp, 200)
|
||||||
|
|
||||||
val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time).get
|
val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time, new PreCleanStats()).get
|
||||||
assertEquals(0L, filthiestLog.firstDirtyOffset)
|
assertEquals(0L, filthiestLog.firstDirtyOffset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,7 +254,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val cleanerManager = createCleanerManagerMock(logs)
|
val cleanerManager = createCleanerManagerMock(logs)
|
||||||
cleanerCheckpoints.put(tp, 0L)
|
cleanerCheckpoints.put(tp, 0L)
|
||||||
|
|
||||||
val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time).get
|
val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time, new PreCleanStats()).get
|
||||||
assertEquals(10L, filthiestLog.firstDirtyOffset)
|
assertEquals(10L, filthiestLog.firstDirtyOffset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -260,7 +263,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val tp = new TopicPartition("foo", 0)
|
val tp = new TopicPartition("foo", 0)
|
||||||
val log = createLog(segmentSize = 2048, TopicConfig.CLEANUP_POLICY_COMPACT, tp)
|
val log = createLog(segmentSize = 2048, TopicConfig.CLEANUP_POLICY_COMPACT, tp)
|
||||||
|
|
||||||
val logs = new Pool[TopicPartition, UnifiedLog]()
|
val logs = new util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog]()
|
||||||
logs.put(tp, log)
|
logs.put(tp, log)
|
||||||
|
|
||||||
appendRecords(log, numRecords = 3)
|
appendRecords(log, numRecords = 3)
|
||||||
|
@ -275,8 +278,8 @@ class LogCleanerManagerTest extends Logging {
|
||||||
cleanerCheckpoints.put(tp, 0L)
|
cleanerCheckpoints.put(tp, 0L)
|
||||||
|
|
||||||
// The active segment is uncleanable and hence not filthy from the POV of the CleanerManager.
|
// The active segment is uncleanable and hence not filthy from the POV of the CleanerManager.
|
||||||
val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time)
|
val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time, new PreCleanStats())
|
||||||
assertEquals(None, filthiestLog)
|
assertEquals(Optional.empty(), filthiestLog)
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -287,7 +290,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
|
|
||||||
val tp = new TopicPartition("foo", 0)
|
val tp = new TopicPartition("foo", 0)
|
||||||
|
|
||||||
val logs = new Pool[TopicPartition, UnifiedLog]()
|
val logs = new util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog]()
|
||||||
val log = createLog(2048, TopicConfig.CLEANUP_POLICY_COMPACT, topicPartition = tp)
|
val log = createLog(2048, TopicConfig.CLEANUP_POLICY_COMPACT, topicPartition = tp)
|
||||||
logs.put(tp, log)
|
logs.put(tp, log)
|
||||||
|
|
||||||
|
@ -301,8 +304,8 @@ class LogCleanerManagerTest extends Logging {
|
||||||
cleanerCheckpoints.put(tp, 3L)
|
cleanerCheckpoints.put(tp, 3L)
|
||||||
|
|
||||||
// These segments are uncleanable and hence not filthy
|
// These segments are uncleanable and hence not filthy
|
||||||
val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time)
|
val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time, new PreCleanStats())
|
||||||
assertEquals(None, filthiestLog)
|
assertEquals(Optional.empty(), filthiestLog)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -375,12 +378,12 @@ class LogCleanerManagerTest extends Logging {
|
||||||
log.updateConfig(config)
|
log.updateConfig(config)
|
||||||
|
|
||||||
// log cleanup inprogress, the log is not available for compaction
|
// log cleanup inprogress, the log is not available for compaction
|
||||||
val cleanable = cleanerManager.grabFilthiestCompactedLog(time)
|
val cleanable = cleanerManager.grabFilthiestCompactedLog(time, new PreCleanStats()).toScala
|
||||||
assertEquals(0, cleanable.size, "should have 0 logs ready to be compacted")
|
assertEquals(0, cleanable.size, "should have 0 logs ready to be compacted")
|
||||||
|
|
||||||
// log cleanup finished, and log can be picked up for compaction
|
// log cleanup finished, and log can be picked up for compaction
|
||||||
cleanerManager.resumeCleaning(deletableLog.map(_._1))
|
cleanerManager.resumeCleaning(deletableLog.asScala.map(_.getKey).toList.asJava)
|
||||||
val cleanable2 = cleanerManager.grabFilthiestCompactedLog(time)
|
val cleanable2 = cleanerManager.grabFilthiestCompactedLog(time, new PreCleanStats()).toScala
|
||||||
assertEquals(1, cleanable2.size, "should have 1 logs ready to be compacted")
|
assertEquals(1, cleanable2.size, "should have 1 logs ready to be compacted")
|
||||||
|
|
||||||
// update cleanup policy to delete
|
// update cleanup policy to delete
|
||||||
|
@ -393,7 +396,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
assertEquals(0, deletableLog2.size, "should have 0 logs ready to be deleted")
|
assertEquals(0, deletableLog2.size, "should have 0 logs ready to be deleted")
|
||||||
|
|
||||||
// compaction done, should have 1 log eligible for log cleanup
|
// compaction done, should have 1 log eligible for log cleanup
|
||||||
cleanerManager.doneDeleting(Seq(cleanable2.get.topicPartition))
|
cleanerManager.doneDeleting(Seq(cleanable2.get.topicPartition).asJava)
|
||||||
val deletableLog3 = cleanerManager.pauseCleaningForNonCompactedPartitions()
|
val deletableLog3 = cleanerManager.pauseCleaningForNonCompactedPartitions()
|
||||||
assertEquals(1, deletableLog3.size, "should have 1 logs ready to be deleted")
|
assertEquals(1, deletableLog3.size, "should have 1 logs ready to be deleted")
|
||||||
}
|
}
|
||||||
|
@ -405,11 +408,11 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val cleanerManager: LogCleanerManager = createCleanerManager(log)
|
val cleanerManager: LogCleanerManager = createCleanerManager(log)
|
||||||
|
|
||||||
// expect the checkpoint offset is not the expectedOffset before doing updateCheckpoints
|
// expect the checkpoint offset is not the expectedOffset before doing updateCheckpoints
|
||||||
assertNotEquals(offset, cleanerManager.allCleanerCheckpoints.getOrElse(topicPartition, 0))
|
assertNotEquals(offset, cleanerManager.allCleanerCheckpoints.getOrDefault(topicPartition, 0))
|
||||||
|
|
||||||
cleanerManager.updateCheckpoints(logDir, partitionToUpdateOrAdd = Option(topicPartition, offset))
|
cleanerManager.updateCheckpoints(logDir, Optional.of(util.Map.entry(topicPartition, offset)), Optional.empty())
|
||||||
// expect the checkpoint offset is now updated to the expected offset after doing updateCheckpoints
|
// expect the checkpoint offset is now updated to the expected offset after doing updateCheckpoints
|
||||||
assertEquals(offset, cleanerManager.allCleanerCheckpoints(topicPartition))
|
assertEquals(offset, cleanerManager.allCleanerCheckpoints.get(topicPartition))
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -419,12 +422,12 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val cleanerManager: LogCleanerManager = createCleanerManager(log)
|
val cleanerManager: LogCleanerManager = createCleanerManager(log)
|
||||||
|
|
||||||
// write some data into the cleaner-offset-checkpoint file
|
// write some data into the cleaner-offset-checkpoint file
|
||||||
cleanerManager.updateCheckpoints(logDir, partitionToUpdateOrAdd = Option(topicPartition, offset))
|
cleanerManager.updateCheckpoints(logDir, Optional.of(util.Map.entry(topicPartition, offset)), Optional.empty())
|
||||||
assertEquals(offset, cleanerManager.allCleanerCheckpoints(topicPartition))
|
assertEquals(offset, cleanerManager.allCleanerCheckpoints.get(topicPartition))
|
||||||
|
|
||||||
// updateCheckpoints should remove the topicPartition data in the logDir
|
// updateCheckpoints should remove the topicPartition data in the logDir
|
||||||
cleanerManager.updateCheckpoints(logDir, partitionToRemove = Option(topicPartition))
|
cleanerManager.updateCheckpoints(logDir, Optional.empty(), Optional.of(topicPartition))
|
||||||
assertFalse(cleanerManager.allCleanerCheckpoints.contains(topicPartition))
|
assertFalse(cleanerManager.allCleanerCheckpoints.containsKey(topicPartition))
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -434,15 +437,15 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val cleanerManager: LogCleanerManager = createCleanerManager(log)
|
val cleanerManager: LogCleanerManager = createCleanerManager(log)
|
||||||
|
|
||||||
// write some data into the cleaner-offset-checkpoint file in logDir and logDir2
|
// write some data into the cleaner-offset-checkpoint file in logDir and logDir2
|
||||||
cleanerManager.updateCheckpoints(logDir, partitionToUpdateOrAdd = Option(topicPartition, offset))
|
cleanerManager.updateCheckpoints(logDir, Optional.of(util.Map.entry(topicPartition, offset)), Optional.empty())
|
||||||
cleanerManager.updateCheckpoints(logDir2, partitionToUpdateOrAdd = Option(topicPartition2, offset))
|
cleanerManager.updateCheckpoints(logDir2, Optional.of(util.Map.entry(topicPartition2, offset)), Optional.empty())
|
||||||
assertEquals(offset, cleanerManager.allCleanerCheckpoints(topicPartition))
|
assertEquals(offset, cleanerManager.allCleanerCheckpoints.get(topicPartition))
|
||||||
assertEquals(offset, cleanerManager.allCleanerCheckpoints(topicPartition2))
|
assertEquals(offset, cleanerManager.allCleanerCheckpoints.get(topicPartition2))
|
||||||
|
|
||||||
cleanerManager.handleLogDirFailure(logDir.getAbsolutePath)
|
cleanerManager.handleLogDirFailure(logDir.getAbsolutePath)
|
||||||
// verify the partition data in logDir is gone, and data in logDir2 is still there
|
// verify the partition data in logDir is gone, and data in logDir2 is still there
|
||||||
assertEquals(offset, cleanerManager.allCleanerCheckpoints(topicPartition2))
|
assertEquals(offset, cleanerManager.allCleanerCheckpoints.get(topicPartition2))
|
||||||
assertFalse(cleanerManager.allCleanerCheckpoints.contains(topicPartition))
|
assertFalse(cleanerManager.allCleanerCheckpoints.containsKey(topicPartition))
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -454,15 +457,15 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val higherOffset = 1000L
|
val higherOffset = 1000L
|
||||||
|
|
||||||
// write some data into the cleaner-offset-checkpoint file in logDir
|
// write some data into the cleaner-offset-checkpoint file in logDir
|
||||||
cleanerManager.updateCheckpoints(logDir, partitionToUpdateOrAdd = Option(topicPartition, offset))
|
cleanerManager.updateCheckpoints(logDir, Optional.of(util.Map.entry(topicPartition, offset)), Optional.empty())
|
||||||
assertEquals(offset, cleanerManager.allCleanerCheckpoints(topicPartition))
|
assertEquals(offset, cleanerManager.allCleanerCheckpoints.get(topicPartition))
|
||||||
|
|
||||||
// we should not truncate the checkpoint data for checkpointed offset <= the given offset (higherOffset)
|
// we should not truncate the checkpoint data for checkpointed offset <= the given offset (higherOffset)
|
||||||
cleanerManager.maybeTruncateCheckpoint(logDir, topicPartition, higherOffset)
|
cleanerManager.maybeTruncateCheckpoint(logDir, topicPartition, higherOffset)
|
||||||
assertEquals(offset, cleanerManager.allCleanerCheckpoints(topicPartition))
|
assertEquals(offset, cleanerManager.allCleanerCheckpoints.get(topicPartition))
|
||||||
// we should truncate the checkpoint data for checkpointed offset > the given offset (lowerOffset)
|
// we should truncate the checkpoint data for checkpointed offset > the given offset (lowerOffset)
|
||||||
cleanerManager.maybeTruncateCheckpoint(logDir, topicPartition, lowerOffset)
|
cleanerManager.maybeTruncateCheckpoint(logDir, topicPartition, lowerOffset)
|
||||||
assertEquals(lowerOffset, cleanerManager.allCleanerCheckpoints(topicPartition))
|
assertEquals(lowerOffset, cleanerManager.allCleanerCheckpoints.get(topicPartition))
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -472,17 +475,17 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val cleanerManager: LogCleanerManager = createCleanerManager(log)
|
val cleanerManager: LogCleanerManager = createCleanerManager(log)
|
||||||
|
|
||||||
// write some data into the cleaner-offset-checkpoint file in logDir
|
// write some data into the cleaner-offset-checkpoint file in logDir
|
||||||
cleanerManager.updateCheckpoints(logDir, partitionToUpdateOrAdd = Option(topicPartition, offset))
|
cleanerManager.updateCheckpoints(logDir, Optional.of(util.Map.entry(topicPartition, offset)), Optional.empty())
|
||||||
assertEquals(offset, cleanerManager.allCleanerCheckpoints(topicPartition))
|
assertEquals(offset, cleanerManager.allCleanerCheckpoints.get(topicPartition))
|
||||||
|
|
||||||
cleanerManager.alterCheckpointDir(topicPartition, logDir, logDir2)
|
cleanerManager.alterCheckpointDir(topicPartition, logDir, logDir2)
|
||||||
// verify we still can get the partition offset after alterCheckpointDir
|
// verify we still can get the partition offset after alterCheckpointDir
|
||||||
// This data should locate in logDir2, not logDir
|
// This data should locate in logDir2, not logDir
|
||||||
assertEquals(offset, cleanerManager.allCleanerCheckpoints(topicPartition))
|
assertEquals(offset, cleanerManager.allCleanerCheckpoints.get(topicPartition))
|
||||||
|
|
||||||
// force delete the logDir2 from checkpoints, so that the partition data should also be deleted
|
// force delete the logDir2 from checkpoints, so that the partition data should also be deleted
|
||||||
cleanerManager.handleLogDirFailure(logDir2.getAbsolutePath)
|
cleanerManager.handleLogDirFailure(logDir2.getAbsolutePath)
|
||||||
assertFalse(cleanerManager.allCleanerCheckpoints.contains(topicPartition))
|
assertFalse(cleanerManager.allCleanerCheckpoints.containsKey(topicPartition))
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -498,11 +501,11 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val pausedPartitions = cleanerManager.pauseCleaningForNonCompactedPartitions()
|
val pausedPartitions = cleanerManager.pauseCleaningForNonCompactedPartitions()
|
||||||
// Log truncation happens due to unclean leader election
|
// Log truncation happens due to unclean leader election
|
||||||
cleanerManager.abortAndPauseCleaning(log.topicPartition)
|
cleanerManager.abortAndPauseCleaning(log.topicPartition)
|
||||||
cleanerManager.resumeCleaning(Seq(log.topicPartition))
|
cleanerManager.resumeCleaning(Seq(log.topicPartition).asJava)
|
||||||
// log cleanup finishes and pausedPartitions are resumed
|
// log cleanup finishes and pausedPartitions are resumed
|
||||||
cleanerManager.resumeCleaning(pausedPartitions.map(_._1))
|
cleanerManager.resumeCleaning(pausedPartitions.asScala.map(_.getKey).toList.asJava)
|
||||||
|
|
||||||
assertEquals(None, cleanerManager.cleaningState(log.topicPartition))
|
assertEquals(Optional.empty(), cleanerManager.cleaningState(log.topicPartition))
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -519,9 +522,9 @@ class LogCleanerManagerTest extends Logging {
|
||||||
// Broker processes StopReplicaRequest with delete=true
|
// Broker processes StopReplicaRequest with delete=true
|
||||||
cleanerManager.abortCleaning(log.topicPartition)
|
cleanerManager.abortCleaning(log.topicPartition)
|
||||||
// log cleanup finishes and pausedPartitions are resumed
|
// log cleanup finishes and pausedPartitions are resumed
|
||||||
cleanerManager.resumeCleaning(pausedPartitions.map(_._1))
|
cleanerManager.resumeCleaning(pausedPartitions.asScala.map(_.getKey).toList.asJava)
|
||||||
|
|
||||||
assertEquals(None, cleanerManager.cleaningState(log.topicPartition))
|
assertEquals(Optional.empty(), cleanerManager.cleaningState(log.topicPartition))
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -554,7 +557,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
|
|
||||||
log.updateHighWatermark(50)
|
log.updateHighWatermark(50)
|
||||||
|
|
||||||
val lastCleanOffset = Some(0L)
|
val lastCleanOffset = Optional.of(0L.asInstanceOf[JLong])
|
||||||
val cleanableOffsets = LogCleanerManager.cleanableOffsets(log, lastCleanOffset, time.milliseconds)
|
val cleanableOffsets = LogCleanerManager.cleanableOffsets(log, lastCleanOffset, time.milliseconds)
|
||||||
assertEquals(0L, cleanableOffsets.firstDirtyOffset, "The first cleanable offset starts at the beginning of the log.")
|
assertEquals(0L, cleanableOffsets.firstDirtyOffset, "The first cleanable offset starts at the beginning of the log.")
|
||||||
assertEquals(log.highWatermark, log.lastStableOffset, "The high watermark equals the last stable offset as no transactions are in progress")
|
assertEquals(log.highWatermark, log.lastStableOffset, "The high watermark equals the last stable offset as no transactions are in progress")
|
||||||
|
@ -576,7 +579,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
|
|
||||||
log.updateHighWatermark(log.logEndOffset)
|
log.updateHighWatermark(log.logEndOffset)
|
||||||
|
|
||||||
val lastCleanOffset = Some(0L)
|
val lastCleanOffset = Optional.of(0L.asInstanceOf[JLong])
|
||||||
val cleanableOffsets = LogCleanerManager.cleanableOffsets(log, lastCleanOffset, time.milliseconds)
|
val cleanableOffsets = LogCleanerManager.cleanableOffsets(log, lastCleanOffset, time.milliseconds)
|
||||||
assertEquals(0L, cleanableOffsets.firstDirtyOffset, "The first cleanable offset starts at the beginning of the log.")
|
assertEquals(0L, cleanableOffsets.firstDirtyOffset, "The first cleanable offset starts at the beginning of the log.")
|
||||||
assertEquals(log.activeSegment.baseOffset, cleanableOffsets.firstUncleanableDirtyOffset, "The first uncleanable offset begins with the active segment.")
|
assertEquals(log.activeSegment.baseOffset, cleanableOffsets.firstUncleanableDirtyOffset, "The first uncleanable offset begins with the active segment.")
|
||||||
|
@ -608,7 +611,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
|
|
||||||
log.updateHighWatermark(log.logEndOffset)
|
log.updateHighWatermark(log.logEndOffset)
|
||||||
|
|
||||||
val lastCleanOffset = Some(0L)
|
val lastCleanOffset = Optional.of(0L.asInstanceOf[JLong])
|
||||||
val cleanableOffsets = LogCleanerManager.cleanableOffsets(log, lastCleanOffset, time.milliseconds)
|
val cleanableOffsets = LogCleanerManager.cleanableOffsets(log, lastCleanOffset, time.milliseconds)
|
||||||
assertEquals(0L, cleanableOffsets.firstDirtyOffset, "The first cleanable offset starts at the beginning of the log.")
|
assertEquals(0L, cleanableOffsets.firstDirtyOffset, "The first cleanable offset starts at the beginning of the log.")
|
||||||
assertEquals(activeSegAtT0.baseOffset, cleanableOffsets.firstUncleanableDirtyOffset, "The first uncleanable offset begins with the second block of log entries.")
|
assertEquals(activeSegAtT0.baseOffset, cleanableOffsets.firstUncleanableDirtyOffset, "The first uncleanable offset begins with the second block of log entries.")
|
||||||
|
@ -635,7 +638,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
|
|
||||||
time.sleep(compactionLag + 1)
|
time.sleep(compactionLag + 1)
|
||||||
|
|
||||||
val lastCleanOffset = Some(0L)
|
val lastCleanOffset = Optional.of(0L.asInstanceOf[JLong])
|
||||||
val cleanableOffsets = LogCleanerManager.cleanableOffsets(log, lastCleanOffset, time.milliseconds)
|
val cleanableOffsets = LogCleanerManager.cleanableOffsets(log, lastCleanOffset, time.milliseconds)
|
||||||
assertEquals(0L, cleanableOffsets.firstDirtyOffset, "The first cleanable offset starts at the beginning of the log.")
|
assertEquals(0L, cleanableOffsets.firstDirtyOffset, "The first cleanable offset starts at the beginning of the log.")
|
||||||
assertEquals(log.activeSegment.baseOffset, cleanableOffsets.firstUncleanableDirtyOffset, "The first uncleanable offset begins with active segment.")
|
assertEquals(log.activeSegment.baseOffset, cleanableOffsets.firstUncleanableDirtyOffset, "The first uncleanable offset begins with active segment.")
|
||||||
|
@ -647,7 +650,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val logs = setupIncreasinglyFilthyLogs(Seq(tp), startNumBatches = 20, batchIncrement = 5)
|
val logs = setupIncreasinglyFilthyLogs(Seq(tp), startNumBatches = 20, batchIncrement = 5)
|
||||||
logs.get(tp).maybeIncrementLogStartOffset(10L, LogStartOffsetIncrementReason.ClientRecordDeletion)
|
logs.get(tp).maybeIncrementLogStartOffset(10L, LogStartOffsetIncrementReason.ClientRecordDeletion)
|
||||||
|
|
||||||
var lastCleanOffset = Some(15L)
|
var lastCleanOffset = Optional.of(15L.asInstanceOf[JLong])
|
||||||
var cleanableOffsets = LogCleanerManager.cleanableOffsets(logs.get(tp), lastCleanOffset, time.milliseconds)
|
var cleanableOffsets = LogCleanerManager.cleanableOffsets(logs.get(tp), lastCleanOffset, time.milliseconds)
|
||||||
assertFalse(cleanableOffsets.forceUpdateCheckpoint, "Checkpoint offset should not be reset if valid")
|
assertFalse(cleanableOffsets.forceUpdateCheckpoint, "Checkpoint offset should not be reset if valid")
|
||||||
|
|
||||||
|
@ -655,7 +658,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
cleanableOffsets = LogCleanerManager.cleanableOffsets(logs.get(tp), lastCleanOffset, time.milliseconds)
|
cleanableOffsets = LogCleanerManager.cleanableOffsets(logs.get(tp), lastCleanOffset, time.milliseconds)
|
||||||
assertTrue(cleanableOffsets.forceUpdateCheckpoint, "Checkpoint offset needs to be reset if less than log start offset")
|
assertTrue(cleanableOffsets.forceUpdateCheckpoint, "Checkpoint offset needs to be reset if less than log start offset")
|
||||||
|
|
||||||
lastCleanOffset = Some(25L)
|
lastCleanOffset = Optional.of(25L)
|
||||||
cleanableOffsets = LogCleanerManager.cleanableOffsets(logs.get(tp), lastCleanOffset, time.milliseconds)
|
cleanableOffsets = LogCleanerManager.cleanableOffsets(logs.get(tp), lastCleanOffset, time.milliseconds)
|
||||||
assertTrue(cleanableOffsets.forceUpdateCheckpoint, "Checkpoint offset needs to be reset if greater than log end offset")
|
assertTrue(cleanableOffsets.forceUpdateCheckpoint, "Checkpoint offset needs to be reset if greater than log end offset")
|
||||||
}
|
}
|
||||||
|
@ -682,7 +685,7 @@ class LogCleanerManagerTest extends Logging {
|
||||||
|
|
||||||
time.sleep(compactionLag + 1)
|
time.sleep(compactionLag + 1)
|
||||||
// although the compaction lag has been exceeded, the undecided data should not be cleaned
|
// although the compaction lag has been exceeded, the undecided data should not be cleaned
|
||||||
var cleanableOffsets = LogCleanerManager.cleanableOffsets(log, Some(0L), time.milliseconds())
|
var cleanableOffsets = LogCleanerManager.cleanableOffsets(log, Optional.of(0L), time.milliseconds())
|
||||||
assertEquals(0L, cleanableOffsets.firstDirtyOffset)
|
assertEquals(0L, cleanableOffsets.firstDirtyOffset)
|
||||||
assertEquals(0L, cleanableOffsets.firstUncleanableDirtyOffset)
|
assertEquals(0L, cleanableOffsets.firstUncleanableDirtyOffset)
|
||||||
|
|
||||||
|
@ -693,14 +696,14 @@ class LogCleanerManagerTest extends Logging {
|
||||||
log.updateHighWatermark(4L)
|
log.updateHighWatermark(4L)
|
||||||
|
|
||||||
// the first segment should now become cleanable immediately
|
// the first segment should now become cleanable immediately
|
||||||
cleanableOffsets = LogCleanerManager.cleanableOffsets(log, Some(0L), time.milliseconds())
|
cleanableOffsets = LogCleanerManager.cleanableOffsets(log, Optional.of(0L), time.milliseconds())
|
||||||
assertEquals(0L, cleanableOffsets.firstDirtyOffset)
|
assertEquals(0L, cleanableOffsets.firstDirtyOffset)
|
||||||
assertEquals(3L, cleanableOffsets.firstUncleanableDirtyOffset)
|
assertEquals(3L, cleanableOffsets.firstUncleanableDirtyOffset)
|
||||||
|
|
||||||
time.sleep(compactionLag + 1)
|
time.sleep(compactionLag + 1)
|
||||||
|
|
||||||
// the second segment becomes cleanable after the compaction lag
|
// the second segment becomes cleanable after the compaction lag
|
||||||
cleanableOffsets = LogCleanerManager.cleanableOffsets(log, Some(0L), time.milliseconds())
|
cleanableOffsets = LogCleanerManager.cleanableOffsets(log, Optional.of(0L), time.milliseconds())
|
||||||
assertEquals(0L, cleanableOffsets.firstDirtyOffset)
|
assertEquals(0L, cleanableOffsets.firstDirtyOffset)
|
||||||
assertEquals(4L, cleanableOffsets.firstUncleanableDirtyOffset)
|
assertEquals(4L, cleanableOffsets.firstUncleanableDirtyOffset)
|
||||||
}
|
}
|
||||||
|
@ -717,20 +720,20 @@ class LogCleanerManagerTest extends Logging {
|
||||||
|
|
||||||
assertThrows(classOf[IllegalStateException], () => cleanerManager.doneCleaning(topicPartition, log.dir, 1))
|
assertThrows(classOf[IllegalStateException], () => cleanerManager.doneCleaning(topicPartition, log.dir, 1))
|
||||||
|
|
||||||
cleanerManager.setCleaningState(topicPartition, LogCleaningPaused(1))
|
cleanerManager.setCleaningState(topicPartition, LogCleaningState.logCleaningPaused(1))
|
||||||
assertThrows(classOf[IllegalStateException], () => cleanerManager.doneCleaning(topicPartition, log.dir, 1))
|
assertThrows(classOf[IllegalStateException], () => cleanerManager.doneCleaning(topicPartition, log.dir, 1))
|
||||||
|
|
||||||
cleanerManager.setCleaningState(topicPartition, LogCleaningInProgress)
|
cleanerManager.setCleaningState(topicPartition, LOG_CLEANING_IN_PROGRESS)
|
||||||
val endOffset = 1L
|
val endOffset = 1L
|
||||||
cleanerManager.doneCleaning(topicPartition, log.dir, endOffset)
|
cleanerManager.doneCleaning(topicPartition, log.dir, endOffset)
|
||||||
assertTrue(cleanerManager.cleaningState(topicPartition).isEmpty)
|
assertTrue(cleanerManager.cleaningState(topicPartition).isEmpty)
|
||||||
assertTrue(cleanerManager.allCleanerCheckpoints.contains(topicPartition))
|
assertTrue(cleanerManager.allCleanerCheckpoints.containsKey(topicPartition))
|
||||||
assertEquals(Some(endOffset), cleanerManager.allCleanerCheckpoints.get(topicPartition))
|
assertEquals(Some(endOffset), Option(cleanerManager.allCleanerCheckpoints.get(topicPartition)))
|
||||||
|
|
||||||
cleanerManager.setCleaningState(topicPartition, LogCleaningAborted)
|
cleanerManager.setCleaningState(topicPartition, LOG_CLEANING_ABORTED)
|
||||||
cleanerManager.doneCleaning(topicPartition, log.dir, endOffset)
|
cleanerManager.doneCleaning(topicPartition, log.dir, endOffset)
|
||||||
assertEquals(LogCleaningPaused(1), cleanerManager.cleaningState(topicPartition).get)
|
assertEquals(LogCleaningState.logCleaningPaused(1), cleanerManager.cleaningState(topicPartition).get)
|
||||||
assertTrue(cleanerManager.allCleanerCheckpoints.contains(topicPartition))
|
assertTrue(cleanerManager.allCleanerCheckpoints.containsKey(topicPartition))
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -740,18 +743,18 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val cleanerManager: LogCleanerManager = createCleanerManager(log)
|
val cleanerManager: LogCleanerManager = createCleanerManager(log)
|
||||||
val tp = new TopicPartition("log", 0)
|
val tp = new TopicPartition("log", 0)
|
||||||
|
|
||||||
assertThrows(classOf[IllegalStateException], () => cleanerManager.doneDeleting(Seq(tp)))
|
assertThrows(classOf[IllegalStateException], () => cleanerManager.doneDeleting(Seq(tp).asJava))
|
||||||
|
|
||||||
cleanerManager.setCleaningState(tp, LogCleaningPaused(1))
|
cleanerManager.setCleaningState(tp, LogCleaningState.logCleaningPaused(1))
|
||||||
assertThrows(classOf[IllegalStateException], () => cleanerManager.doneDeleting(Seq(tp)))
|
assertThrows(classOf[IllegalStateException], () => cleanerManager.doneDeleting(Seq(tp).asJava))
|
||||||
|
|
||||||
cleanerManager.setCleaningState(tp, LogCleaningInProgress)
|
cleanerManager.setCleaningState(tp, LOG_CLEANING_IN_PROGRESS)
|
||||||
cleanerManager.doneDeleting(Seq(tp))
|
cleanerManager.doneDeleting(Seq(tp).asJava)
|
||||||
assertTrue(cleanerManager.cleaningState(tp).isEmpty)
|
assertTrue(cleanerManager.cleaningState(tp).isEmpty)
|
||||||
|
|
||||||
cleanerManager.setCleaningState(tp, LogCleaningAborted)
|
cleanerManager.setCleaningState(tp, LOG_CLEANING_ABORTED)
|
||||||
cleanerManager.doneDeleting(Seq(tp))
|
cleanerManager.doneDeleting(Seq(tp).asJava)
|
||||||
assertEquals(LogCleaningPaused(1), cleanerManager.cleaningState(tp).get)
|
assertEquals(LogCleaningState.logCleaningPaused(1), cleanerManager.cleaningState(tp).get)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -766,8 +769,8 @@ class LogCleanerManagerTest extends Logging {
|
||||||
val cleanerManager = createCleanerManagerMock(logs)
|
val cleanerManager = createCleanerManagerMock(logs)
|
||||||
cleanerCheckpoints.put(tp, 15L)
|
cleanerCheckpoints.put(tp, 15L)
|
||||||
|
|
||||||
val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time)
|
val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time, new PreCleanStats())
|
||||||
assertEquals(None, filthiestLog, "Log should not be selected for cleaning")
|
assertEquals(Optional.empty(), filthiestLog, "Log should not be selected for cleaning")
|
||||||
assertEquals(20L, cleanerCheckpoints(tp), "Unselected log should have checkpoint offset updated")
|
assertEquals(20L, cleanerCheckpoints(tp), "Unselected log should have checkpoint offset updated")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -788,19 +791,19 @@ class LogCleanerManagerTest extends Logging {
|
||||||
cleanerCheckpoints.put(tp0, 10L)
|
cleanerCheckpoints.put(tp0, 10L)
|
||||||
cleanerCheckpoints.put(tp1, 5L)
|
cleanerCheckpoints.put(tp1, 5L)
|
||||||
|
|
||||||
val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time).get
|
val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time, new PreCleanStats()).get
|
||||||
assertEquals(tp1, filthiestLog.topicPartition, "Dirtier log should be selected")
|
assertEquals(tp1, filthiestLog.topicPartition, "Dirtier log should be selected")
|
||||||
assertEquals(15L, cleanerCheckpoints(tp0), "Unselected log should have checkpoint offset updated")
|
assertEquals(15L, cleanerCheckpoints(tp0), "Unselected log should have checkpoint offset updated")
|
||||||
}
|
}
|
||||||
|
|
||||||
private def createCleanerManager(log: UnifiedLog): LogCleanerManager = {
|
private def createCleanerManager(log: UnifiedLog): LogCleanerManager = {
|
||||||
val logs = new Pool[TopicPartition, UnifiedLog]()
|
val logs = new util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog]()
|
||||||
logs.put(topicPartition, log)
|
logs.put(topicPartition, log)
|
||||||
new LogCleanerManager(Seq(logDir, logDir2), logs, null)
|
new LogCleanerManager(Seq(logDir, logDir2).asJava, logs, null)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def createCleanerManagerMock(pool: Pool[TopicPartition, UnifiedLog]): LogCleanerManagerMock = {
|
private def createCleanerManagerMock(pool: util.concurrent.ConcurrentMap[TopicPartition, UnifiedLog]): LogCleanerManagerMock = {
|
||||||
new LogCleanerManagerMock(Seq(logDir), pool, null)
|
new LogCleanerManagerMock(Seq(logDir).asJava, pool, null)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def createLog(segmentSize: Int,
|
private def createLog(segmentSize: Int,
|
||||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.kafka.common.utils.Time
|
||||||
import org.apache.kafka.server.config.ServerConfigs
|
import org.apache.kafka.server.config.ServerConfigs
|
||||||
import org.apache.kafka.server.util.MockTime
|
import org.apache.kafka.server.util.MockTime
|
||||||
import org.apache.kafka.storage.internals.checkpoint.OffsetCheckpointFile
|
import org.apache.kafka.storage.internals.checkpoint.OffsetCheckpointFile
|
||||||
import org.apache.kafka.storage.internals.log.{CleanerConfig, LogConfig, UnifiedLog}
|
import org.apache.kafka.storage.internals.log.{CleanerConfig, LogCleanerManager, LogConfig, UnifiedLog}
|
||||||
import org.junit.jupiter.api.Assertions._
|
import org.junit.jupiter.api.Assertions._
|
||||||
import org.junit.jupiter.api.extension.ExtensionContext
|
import org.junit.jupiter.api.extension.ExtensionContext
|
||||||
import org.junit.jupiter.params.ParameterizedTest
|
import org.junit.jupiter.params.ParameterizedTest
|
||||||
|
@ -86,7 +86,7 @@ class LogCleanerParameterizedIntegrationTest extends AbstractLogCleanerIntegrati
|
||||||
// and make sure its gone from checkpoint file
|
// and make sure its gone from checkpoint file
|
||||||
cleaner.logs.remove(topicPartitions(0))
|
cleaner.logs.remove(topicPartitions(0))
|
||||||
cleaner.updateCheckpoints(logDir, partitionToRemove = Option(topicPartitions(0)))
|
cleaner.updateCheckpoints(logDir, partitionToRemove = Option(topicPartitions(0)))
|
||||||
val checkpoints = new OffsetCheckpointFile(new File(logDir, cleaner.cleanerManager.offsetCheckpointFile), null).read()
|
val checkpoints = new OffsetCheckpointFile(new File(logDir, LogCleanerManager.OFFSET_CHECKPOINT_FILE), null).read()
|
||||||
// we expect partition 0 to be gone
|
// we expect partition 0 to be gone
|
||||||
assertFalse(checkpoints.containsKey(topicPartitions(0)))
|
assertFalse(checkpoints.containsKey(topicPartitions(0)))
|
||||||
}
|
}
|
||||||
|
@ -318,7 +318,7 @@ class LogCleanerParameterizedIntegrationTest extends AbstractLogCleanerIntegrati
|
||||||
// TopicConfig.MIN_CLEANABLE_DIRTY_RATIO_CONFIG
|
// TopicConfig.MIN_CLEANABLE_DIRTY_RATIO_CONFIG
|
||||||
val topicPartition = new TopicPartition(topic, partitionId)
|
val topicPartition = new TopicPartition(topic, partitionId)
|
||||||
cleaner.awaitCleaned(topicPartition, firstDirty)
|
cleaner.awaitCleaned(topicPartition, firstDirty)
|
||||||
val lastCleaned = cleaner.cleanerManager.allCleanerCheckpoints(topicPartition)
|
val lastCleaned = cleaner.cleanerManager.allCleanerCheckpoints.get(topicPartition)
|
||||||
assertTrue(lastCleaned >= firstDirty, s"log cleaner should have processed up to offset $firstDirty, but lastCleaned=$lastCleaned")
|
assertTrue(lastCleaned >= firstDirty, s"log cleaner should have processed up to offset $firstDirty, but lastCleaned=$lastCleaned")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ package kafka.log
|
||||||
|
|
||||||
import kafka.log.LogCleaner.{MaxBufferUtilizationPercentMetricName, MaxCleanTimeMetricName, MaxCompactionDelayMetricsName}
|
import kafka.log.LogCleaner.{MaxBufferUtilizationPercentMetricName, MaxCleanTimeMetricName, MaxCompactionDelayMetricsName}
|
||||||
import kafka.server.KafkaConfig
|
import kafka.server.KafkaConfig
|
||||||
import kafka.utils.{CoreUtils, Logging, Pool, TestUtils}
|
import kafka.utils.{CoreUtils, Logging, TestUtils}
|
||||||
import org.apache.kafka.common.TopicPartition
|
import org.apache.kafka.common.TopicPartition
|
||||||
import org.apache.kafka.common.compress.Compression
|
import org.apache.kafka.common.compress.Compression
|
||||||
import org.apache.kafka.common.config.TopicConfig
|
import org.apache.kafka.common.config.TopicConfig
|
||||||
|
@ -29,7 +29,7 @@ import org.apache.kafka.common.utils.Utils
|
||||||
import org.apache.kafka.coordinator.transaction.TransactionLogConfig
|
import org.apache.kafka.coordinator.transaction.TransactionLogConfig
|
||||||
import org.apache.kafka.server.metrics.{KafkaMetricsGroup, KafkaYammerMetrics}
|
import org.apache.kafka.server.metrics.{KafkaMetricsGroup, KafkaYammerMetrics}
|
||||||
import org.apache.kafka.server.util.MockTime
|
import org.apache.kafka.server.util.MockTime
|
||||||
import org.apache.kafka.storage.internals.log.{AbortedTxn, AppendOrigin, CleanerConfig, LocalLog, LogAppendInfo, LogCleaningAbortedException, LogConfig, LogDirFailureChannel, LogFileUtils, LogLoader, LogOffsetsListener, LogSegment, LogSegments, LogStartOffsetIncrementReason, OffsetMap, ProducerStateManager, ProducerStateManagerConfig, UnifiedLog}
|
import org.apache.kafka.storage.internals.log.{AbortedTxn, AppendOrigin, CleanerConfig, LocalLog, LogAppendInfo, LogCleanerManager, LogCleaningAbortedException, LogConfig, LogDirFailureChannel, LogFileUtils, LogLoader, LogOffsetsListener, LogSegment, LogSegments, LogStartOffsetIncrementReason, LogToClean, OffsetMap, ProducerStateManager, ProducerStateManagerConfig, UnifiedLog}
|
||||||
import org.apache.kafka.storage.internals.utils.Throttler
|
import org.apache.kafka.storage.internals.utils.Throttler
|
||||||
import org.apache.kafka.storage.log.metrics.BrokerTopicStats
|
import org.apache.kafka.storage.log.metrics.BrokerTopicStats
|
||||||
import org.junit.jupiter.api.Assertions._
|
import org.junit.jupiter.api.Assertions._
|
||||||
|
@ -78,7 +78,7 @@ class LogCleanerTest extends Logging {
|
||||||
try {
|
try {
|
||||||
val logCleaner = new LogCleaner(new CleanerConfig(true),
|
val logCleaner = new LogCleaner(new CleanerConfig(true),
|
||||||
logDirs = Array(TestUtils.tempDir(), TestUtils.tempDir()),
|
logDirs = Array(TestUtils.tempDir(), TestUtils.tempDir()),
|
||||||
logs = new Pool[TopicPartition, UnifiedLog](),
|
logs = new ConcurrentHashMap[TopicPartition, UnifiedLog](),
|
||||||
logDirFailureChannel = new LogDirFailureChannel(1),
|
logDirFailureChannel = new LogDirFailureChannel(1),
|
||||||
time = time)
|
time = time)
|
||||||
val metricsToVerify = new java.util.HashMap[String, java.util.List[java.util.Map[String, String]]]()
|
val metricsToVerify = new java.util.HashMap[String, java.util.List[java.util.Map[String, String]]]()
|
||||||
|
@ -99,13 +99,13 @@ class LogCleanerTest extends Logging {
|
||||||
|
|
||||||
// verify that each metric in `LogCleanerManager` is removed
|
// verify that each metric in `LogCleanerManager` is removed
|
||||||
val mockLogCleanerManagerMetricsGroup = mockMetricsGroupCtor.constructed.get(1)
|
val mockLogCleanerManagerMetricsGroup = mockMetricsGroupCtor.constructed.get(1)
|
||||||
LogCleanerManager.GaugeMetricNameNoTag.foreach(metricName => verify(mockLogCleanerManagerMetricsGroup).newGauge(ArgumentMatchers.eq(metricName), any()))
|
LogCleanerManager.GAUGE_METRIC_NAME_NO_TAG.forEach(metricName => verify(mockLogCleanerManagerMetricsGroup).newGauge(ArgumentMatchers.eq(metricName), any()))
|
||||||
metricsToVerify.asScala.foreach { metricNameAndTags =>
|
metricsToVerify.asScala.foreach { metricNameAndTags =>
|
||||||
metricNameAndTags._2.asScala.foreach { tags =>
|
metricNameAndTags._2.asScala.foreach { tags =>
|
||||||
verify(mockLogCleanerManagerMetricsGroup).newGauge(ArgumentMatchers.eq(metricNameAndTags._1), any(), ArgumentMatchers.eq(tags))
|
verify(mockLogCleanerManagerMetricsGroup).newGauge(ArgumentMatchers.eq(metricNameAndTags._1), any(), ArgumentMatchers.eq(tags))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LogCleanerManager.GaugeMetricNameNoTag.foreach(verify(mockLogCleanerManagerMetricsGroup).removeMetric(_))
|
LogCleanerManager.GAUGE_METRIC_NAME_NO_TAG.forEach(verify(mockLogCleanerManagerMetricsGroup).removeMetric(_))
|
||||||
metricsToVerify.asScala.foreach { metricNameAndTags =>
|
metricsToVerify.asScala.foreach { metricNameAndTags =>
|
||||||
metricNameAndTags._2.asScala.foreach { tags =>
|
metricNameAndTags._2.asScala.foreach { tags =>
|
||||||
verify(mockLogCleanerManagerMetricsGroup).removeMetric(ArgumentMatchers.eq(metricNameAndTags._1), ArgumentMatchers.eq(tags))
|
verify(mockLogCleanerManagerMetricsGroup).removeMetric(ArgumentMatchers.eq(metricNameAndTags._1), ArgumentMatchers.eq(tags))
|
||||||
|
@ -124,7 +124,7 @@ class LogCleanerTest extends Logging {
|
||||||
def testMetricsActiveAfterReconfiguration(): Unit = {
|
def testMetricsActiveAfterReconfiguration(): Unit = {
|
||||||
val logCleaner = new LogCleaner(new CleanerConfig(true),
|
val logCleaner = new LogCleaner(new CleanerConfig(true),
|
||||||
logDirs = Array(TestUtils.tempDir()),
|
logDirs = Array(TestUtils.tempDir()),
|
||||||
logs = new Pool[TopicPartition, UnifiedLog](),
|
logs = new util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog](),
|
||||||
logDirFailureChannel = new LogDirFailureChannel(1),
|
logDirFailureChannel = new LogDirFailureChannel(1),
|
||||||
time = time)
|
time = time)
|
||||||
|
|
||||||
|
@ -285,7 +285,7 @@ class LogCleanerTest extends Logging {
|
||||||
log.roll()
|
log.roll()
|
||||||
|
|
||||||
// clean the log with only one message removed
|
// clean the log with only one message removed
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 2, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 2, log.activeSegment.baseOffset, false))
|
||||||
|
|
||||||
assertTrue(log.logSegments.iterator.next().log.channel.size < originalMaxFileSize,
|
assertTrue(log.logSegments.iterator.next().log.channel.size < originalMaxFileSize,
|
||||||
"Cleaned segment file should be trimmed to its real size.")
|
"Cleaned segment file should be trimmed to its real size.")
|
||||||
|
@ -309,7 +309,7 @@ class LogCleanerTest extends Logging {
|
||||||
appendIdempotentAsLeader(log, pid3, producerEpoch)(Seq(1, 4))
|
appendIdempotentAsLeader(log, pid3, producerEpoch)(Seq(1, 4))
|
||||||
|
|
||||||
log.roll()
|
log.roll()
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0L, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false))
|
||||||
assertEquals(List(2, 5, 7), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(2, 5, 7), lastOffsetsPerBatchInLog(log))
|
||||||
assertEquals(Map(pid1 -> 2, pid2 -> 2, pid3 -> 1), lastSequencesInLog(log))
|
assertEquals(Map(pid1 -> 2, pid2 -> 2, pid3 -> 1), lastSequencesInLog(log))
|
||||||
assertEquals(List(2, 3, 1, 4), LogTestUtils.keysInLog(log))
|
assertEquals(List(2, 3, 1, 4), LogTestUtils.keysInLog(log))
|
||||||
|
@ -341,7 +341,7 @@ class LogCleanerTest extends Logging {
|
||||||
// do one more append and a round of cleaning to force another deletion from producer 1's batch
|
// do one more append and a round of cleaning to force another deletion from producer 1's batch
|
||||||
appendIdempotentAsLeader(log, pid4, producerEpoch)(Seq(2))
|
appendIdempotentAsLeader(log, pid4, producerEpoch)(Seq(2))
|
||||||
log.roll()
|
log.roll()
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0L, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false))
|
||||||
assertEquals(Map(pid1 -> 2, pid2 -> 2, pid3 -> 1, pid4 -> 0), lastSequencesInLog(log))
|
assertEquals(Map(pid1 -> 2, pid2 -> 2, pid3 -> 1, pid4 -> 0), lastSequencesInLog(log))
|
||||||
assertEquals(List(2, 5, 7, 8), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(2, 5, 7, 8), lastOffsetsPerBatchInLog(log))
|
||||||
assertEquals(List(3, 1, 4, 2), LogTestUtils.keysInLog(log))
|
assertEquals(List(3, 1, 4, 2), LogTestUtils.keysInLog(log))
|
||||||
|
@ -485,7 +485,7 @@ class LogCleanerTest extends Logging {
|
||||||
val abortedTransactions = log.collectAbortedTransactions(log.logStartOffset, log.logEndOffset)
|
val abortedTransactions = log.collectAbortedTransactions(log.logStartOffset, log.logEndOffset)
|
||||||
|
|
||||||
log.roll()
|
log.roll()
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0L, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false))
|
||||||
assertEquals(List(3, 2), LogTestUtils.keysInLog(log))
|
assertEquals(List(3, 2), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(3, 6, 7, 8, 9), offsetsInLog(log))
|
assertEquals(List(3, 6, 7, 8, 9), offsetsInLog(log))
|
||||||
|
|
||||||
|
@ -525,7 +525,7 @@ class LogCleanerTest extends Logging {
|
||||||
log.appendAsLeader(abortMarker(pid1, producerEpoch), 0, AppendOrigin.COORDINATOR)
|
log.appendAsLeader(abortMarker(pid1, producerEpoch), 0, AppendOrigin.COORDINATOR)
|
||||||
|
|
||||||
// we have only cleaned the records in the first segment
|
// we have only cleaned the records in the first segment
|
||||||
val dirtyOffset = cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0L, log.activeSegment.baseOffset))._1
|
val dirtyOffset = cleaner.clean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false))._1
|
||||||
assertEquals(List(2, 3, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10), LogTestUtils.keysInLog(log))
|
assertEquals(List(2, 3, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10), LogTestUtils.keysInLog(log))
|
||||||
|
|
||||||
log.roll()
|
log.roll()
|
||||||
|
@ -535,13 +535,12 @@ class LogCleanerTest extends Logging {
|
||||||
appendProducer1(Seq(12))
|
appendProducer1(Seq(12))
|
||||||
|
|
||||||
// finally only the keys from pid3 should remain
|
// finally only the keys from pid3 should remain
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, dirtyOffset, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, dirtyOffset, log.activeSegment.baseOffset, false))
|
||||||
assertEquals(List(2, 3, 6, 7, 8, 9, 11, 12), LogTestUtils.keysInLog(log))
|
assertEquals(List(2, 3, 6, 7, 8, 9, 11, 12), LogTestUtils.keysInLog(log))
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testCommitMarkerRemoval(): Unit = {
|
def testCommitMarkerRemoval(): Unit = {
|
||||||
val tp = new TopicPartition("test", 0)
|
|
||||||
val cleaner = makeCleaner(Int.MaxValue)
|
val cleaner = makeCleaner(Int.MaxValue)
|
||||||
val logProps = new Properties()
|
val logProps = new Properties()
|
||||||
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 256: java.lang.Integer)
|
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 256: java.lang.Integer)
|
||||||
|
@ -559,7 +558,7 @@ class LogCleanerTest extends Logging {
|
||||||
log.roll()
|
log.roll()
|
||||||
|
|
||||||
// cannot remove the marker in this pass because there are still valid records
|
// cannot remove the marker in this pass because there are still valid records
|
||||||
var dirtyOffset = cleaner.doClean(LogToClean(tp, log, 0L, log.activeSegment.baseOffset), currentTime = largeTimestamp)._1
|
var dirtyOffset = cleaner.doClean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)._1
|
||||||
assertEquals(List(1, 3, 2), LogTestUtils.keysInLog(log))
|
assertEquals(List(1, 3, 2), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(0, 2, 3, 4, 5), offsetsInLog(log))
|
assertEquals(List(0, 2, 3, 4, 5), offsetsInLog(log))
|
||||||
|
|
||||||
|
@ -568,17 +567,17 @@ class LogCleanerTest extends Logging {
|
||||||
log.roll()
|
log.roll()
|
||||||
|
|
||||||
// the first cleaning preserves the commit marker (at offset 3) since there were still records for the transaction
|
// the first cleaning preserves the commit marker (at offset 3) since there were still records for the transaction
|
||||||
dirtyOffset = cleaner.doClean(LogToClean(tp, log, dirtyOffset, log.activeSegment.baseOffset), currentTime = largeTimestamp)._1
|
dirtyOffset = cleaner.doClean(new LogToClean(log, dirtyOffset, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)._1
|
||||||
assertEquals(List(2, 1, 3), LogTestUtils.keysInLog(log))
|
assertEquals(List(2, 1, 3), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(3, 4, 5, 6, 7, 8), offsetsInLog(log))
|
assertEquals(List(3, 4, 5, 6, 7, 8), offsetsInLog(log))
|
||||||
|
|
||||||
// clean again with same timestamp to verify marker is not removed early
|
// clean again with same timestamp to verify marker is not removed early
|
||||||
dirtyOffset = cleaner.doClean(LogToClean(tp, log, dirtyOffset, log.activeSegment.baseOffset), currentTime = largeTimestamp)._1
|
dirtyOffset = cleaner.doClean(new LogToClean(log, dirtyOffset, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)._1
|
||||||
assertEquals(List(2, 1, 3), LogTestUtils.keysInLog(log))
|
assertEquals(List(2, 1, 3), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(3, 4, 5, 6, 7, 8), offsetsInLog(log))
|
assertEquals(List(3, 4, 5, 6, 7, 8), offsetsInLog(log))
|
||||||
|
|
||||||
// clean again with max timestamp to verify the marker is removed
|
// clean again with max timestamp to verify the marker is removed
|
||||||
dirtyOffset = cleaner.doClean(LogToClean(tp, log, dirtyOffset, log.activeSegment.baseOffset), currentTime = Long.MaxValue)._1
|
dirtyOffset = cleaner.doClean(new LogToClean(log, dirtyOffset, log.activeSegment.baseOffset, false), currentTime = Long.MaxValue)._1
|
||||||
assertEquals(List(2, 1, 3), LogTestUtils.keysInLog(log))
|
assertEquals(List(2, 1, 3), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(4, 5, 6, 7, 8), offsetsInLog(log))
|
assertEquals(List(4, 5, 6, 7, 8), offsetsInLog(log))
|
||||||
}
|
}
|
||||||
|
@ -589,7 +588,6 @@ class LogCleanerTest extends Logging {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
def testDeletedBatchesWithNoMessagesRead(): Unit = {
|
def testDeletedBatchesWithNoMessagesRead(): Unit = {
|
||||||
val tp = new TopicPartition("test", 0)
|
|
||||||
val cleaner = makeCleaner(capacity = Int.MaxValue, maxMessageSize = 100)
|
val cleaner = makeCleaner(capacity = Int.MaxValue, maxMessageSize = 100)
|
||||||
val logProps = new Properties()
|
val logProps = new Properties()
|
||||||
logProps.put(TopicConfig.MAX_MESSAGE_BYTES_CONFIG, 100: java.lang.Integer)
|
logProps.put(TopicConfig.MAX_MESSAGE_BYTES_CONFIG, 100: java.lang.Integer)
|
||||||
|
@ -607,19 +605,18 @@ class LogCleanerTest extends Logging {
|
||||||
log.appendAsLeader(commitMarker(producerId, producerEpoch), 0, AppendOrigin.COORDINATOR)
|
log.appendAsLeader(commitMarker(producerId, producerEpoch), 0, AppendOrigin.COORDINATOR)
|
||||||
log.roll()
|
log.roll()
|
||||||
|
|
||||||
cleaner.doClean(LogToClean(tp, log, 0L, log.activeSegment.baseOffset), currentTime = largeTimestamp)
|
cleaner.doClean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)
|
||||||
assertEquals(List(2), LogTestUtils.keysInLog(log))
|
assertEquals(List(2), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(1, 3, 4), offsetsInLog(log))
|
assertEquals(List(1, 3, 4), offsetsInLog(log))
|
||||||
|
|
||||||
// In the first pass, the deleteHorizon for {Producer2: Commit} is set. In the second pass, it's removed.
|
// In the first pass, the deleteHorizon for {Producer2: Commit} is set. In the second pass, it's removed.
|
||||||
runTwoPassClean(cleaner, LogToClean(tp, log, 0L, log.activeSegment.baseOffset), currentTime = largeTimestamp)
|
runTwoPassClean(cleaner, new LogToClean(log, 0L, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)
|
||||||
assertEquals(List(2), LogTestUtils.keysInLog(log))
|
assertEquals(List(2), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(3, 4), offsetsInLog(log))
|
assertEquals(List(3, 4), offsetsInLog(log))
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testCommitMarkerRetentionWithEmptyBatch(): Unit = {
|
def testCommitMarkerRetentionWithEmptyBatch(): Unit = {
|
||||||
val tp = new TopicPartition("test", 0)
|
|
||||||
val cleaner = makeCleaner(Int.MaxValue)
|
val cleaner = makeCleaner(Int.MaxValue)
|
||||||
val logProps = new Properties()
|
val logProps = new Properties()
|
||||||
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 256: java.lang.Integer)
|
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 256: java.lang.Integer)
|
||||||
|
@ -647,14 +644,14 @@ class LogCleanerTest extends Logging {
|
||||||
|
|
||||||
// first time through the records are removed
|
// first time through the records are removed
|
||||||
// Expected State: [{Producer1: EmptyBatch}, {Producer2: EmptyBatch}, {Producer2: Commit}, {2}, {3}, {Producer1: Commit}]
|
// Expected State: [{Producer1: EmptyBatch}, {Producer2: EmptyBatch}, {Producer2: Commit}, {2}, {3}, {Producer1: Commit}]
|
||||||
var dirtyOffset = cleaner.doClean(LogToClean(tp, log, 0L, log.activeSegment.baseOffset), currentTime = largeTimestamp)._1
|
var dirtyOffset = cleaner.doClean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)._1
|
||||||
assertEquals(List(2, 3), LogTestUtils.keysInLog(log))
|
assertEquals(List(2, 3), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(4, 5, 6, 7), offsetsInLog(log))
|
assertEquals(List(4, 5, 6, 7), offsetsInLog(log))
|
||||||
assertEquals(List(1, 3, 4, 5, 6, 7), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(1, 3, 4, 5, 6, 7), lastOffsetsPerBatchInLog(log))
|
||||||
|
|
||||||
// the empty batch remains if cleaned again because it still holds the last sequence
|
// the empty batch remains if cleaned again because it still holds the last sequence
|
||||||
// Expected State: [{Producer1: EmptyBatch}, {Producer2: EmptyBatch}, {Producer2: Commit}, {2}, {3}, {Producer1: Commit}]
|
// Expected State: [{Producer1: EmptyBatch}, {Producer2: EmptyBatch}, {Producer2: Commit}, {2}, {3}, {Producer1: Commit}]
|
||||||
dirtyOffset = cleaner.doClean(LogToClean(tp, log, dirtyOffset, log.activeSegment.baseOffset), currentTime = largeTimestamp)._1
|
dirtyOffset = cleaner.doClean(new LogToClean(log, dirtyOffset, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)._1
|
||||||
assertEquals(List(2, 3), LogTestUtils.keysInLog(log))
|
assertEquals(List(2, 3), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(4, 5, 6, 7), offsetsInLog(log))
|
assertEquals(List(4, 5, 6, 7), offsetsInLog(log))
|
||||||
assertEquals(List(1, 3, 4, 5, 6, 7), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(1, 3, 4, 5, 6, 7), lastOffsetsPerBatchInLog(log))
|
||||||
|
@ -668,14 +665,14 @@ class LogCleanerTest extends Logging {
|
||||||
|
|
||||||
// Expected State: [{Producer1: EmptyBatch}, {Producer2: Commit}, {2}, {3}, {Producer1: Commit}, {Producer2: 1}, {Producer2: Commit}]
|
// Expected State: [{Producer1: EmptyBatch}, {Producer2: Commit}, {2}, {3}, {Producer1: Commit}, {Producer2: 1}, {Producer2: Commit}]
|
||||||
// The deleteHorizon for {Producer2: Commit} is still not set yet.
|
// The deleteHorizon for {Producer2: Commit} is still not set yet.
|
||||||
dirtyOffset = cleaner.doClean(LogToClean(tp, log, dirtyOffset, log.activeSegment.baseOffset), currentTime = largeTimestamp)._1
|
dirtyOffset = cleaner.doClean(new LogToClean(log, dirtyOffset, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)._1
|
||||||
assertEquals(List(2, 3, 1), LogTestUtils.keysInLog(log))
|
assertEquals(List(2, 3, 1), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(4, 5, 6, 7, 8, 9), offsetsInLog(log))
|
assertEquals(List(4, 5, 6, 7, 8, 9), offsetsInLog(log))
|
||||||
assertEquals(List(1, 4, 5, 6, 7, 8, 9), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(1, 4, 5, 6, 7, 8, 9), lastOffsetsPerBatchInLog(log))
|
||||||
|
|
||||||
// Expected State: [{Producer1: EmptyBatch}, {2}, {3}, {Producer1: Commit}, {Producer2: 1}, {Producer2: Commit}]
|
// Expected State: [{Producer1: EmptyBatch}, {2}, {3}, {Producer1: Commit}, {Producer2: 1}, {Producer2: Commit}]
|
||||||
// In the first pass, the deleteHorizon for {Producer2: Commit} is set. In the second pass, it's removed.
|
// In the first pass, the deleteHorizon for {Producer2: Commit} is set. In the second pass, it's removed.
|
||||||
dirtyOffset = runTwoPassClean(cleaner, LogToClean(tp, log, dirtyOffset, log.activeSegment.baseOffset), currentTime = largeTimestamp)
|
dirtyOffset = runTwoPassClean(cleaner, new LogToClean(log, dirtyOffset, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)
|
||||||
assertEquals(List(2, 3, 1), LogTestUtils.keysInLog(log))
|
assertEquals(List(2, 3, 1), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(5, 6, 7, 8, 9), offsetsInLog(log))
|
assertEquals(List(5, 6, 7, 8, 9), offsetsInLog(log))
|
||||||
assertEquals(List(1, 5, 6, 7, 8, 9), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(1, 5, 6, 7, 8, 9), lastOffsetsPerBatchInLog(log))
|
||||||
|
@ -683,7 +680,6 @@ class LogCleanerTest extends Logging {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testCleanEmptyControlBatch(): Unit = {
|
def testCleanEmptyControlBatch(): Unit = {
|
||||||
val tp = new TopicPartition("test", 0)
|
|
||||||
val cleaner = makeCleaner(Int.MaxValue)
|
val cleaner = makeCleaner(Int.MaxValue)
|
||||||
val logProps = new Properties()
|
val logProps = new Properties()
|
||||||
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 256: java.lang.Integer)
|
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 256: java.lang.Integer)
|
||||||
|
@ -701,14 +697,14 @@ class LogCleanerTest extends Logging {
|
||||||
// Expected State: [{Producer1: EmptyBatch}], [{2}, {3}]
|
// Expected State: [{Producer1: EmptyBatch}], [{2}, {3}]
|
||||||
// In the first pass, the deleteHorizon for the commit marker is set. In the second pass, the commit marker is removed
|
// In the first pass, the deleteHorizon for the commit marker is set. In the second pass, the commit marker is removed
|
||||||
// but the empty batch is retained for preserving the producer epoch.
|
// but the empty batch is retained for preserving the producer epoch.
|
||||||
var dirtyOffset = runTwoPassClean(cleaner, LogToClean(tp, log, 0L, log.activeSegment.baseOffset), currentTime = largeTimestamp)
|
var dirtyOffset = runTwoPassClean(cleaner, new LogToClean(log, 0L, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)
|
||||||
assertEquals(List(2, 3), LogTestUtils.keysInLog(log))
|
assertEquals(List(2, 3), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(1, 2), offsetsInLog(log))
|
assertEquals(List(1, 2), offsetsInLog(log))
|
||||||
assertEquals(List(0, 1, 2), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(0, 1, 2), lastOffsetsPerBatchInLog(log))
|
||||||
|
|
||||||
// the empty control batch does not cause an exception when cleaned
|
// the empty control batch does not cause an exception when cleaned
|
||||||
// Expected State: [{Producer1: EmptyBatch}], [{2}, {3}]
|
// Expected State: [{Producer1: EmptyBatch}], [{2}, {3}]
|
||||||
dirtyOffset = cleaner.doClean(LogToClean(tp, log, dirtyOffset, log.activeSegment.baseOffset), currentTime = Long.MaxValue)._1
|
dirtyOffset = cleaner.doClean(new LogToClean(log, dirtyOffset, log.activeSegment.baseOffset, false), currentTime = Long.MaxValue)._1
|
||||||
assertEquals(List(2, 3), LogTestUtils.keysInLog(log))
|
assertEquals(List(2, 3), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(1, 2), offsetsInLog(log))
|
assertEquals(List(1, 2), offsetsInLog(log))
|
||||||
assertEquals(List(0, 1, 2), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(0, 1, 2), lastOffsetsPerBatchInLog(log))
|
||||||
|
@ -716,7 +712,6 @@ class LogCleanerTest extends Logging {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testCommittedTransactionSpanningSegments(): Unit = {
|
def testCommittedTransactionSpanningSegments(): Unit = {
|
||||||
val tp = new TopicPartition("test", 0)
|
|
||||||
val cleaner = makeCleaner(Int.MaxValue)
|
val cleaner = makeCleaner(Int.MaxValue)
|
||||||
val logProps = new Properties()
|
val logProps = new Properties()
|
||||||
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 128: java.lang.Integer)
|
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 128: java.lang.Integer)
|
||||||
|
@ -732,14 +727,13 @@ class LogCleanerTest extends Logging {
|
||||||
log.roll()
|
log.roll()
|
||||||
|
|
||||||
// Both the record and the marker should remain after cleaning
|
// Both the record and the marker should remain after cleaning
|
||||||
runTwoPassClean(cleaner, LogToClean(tp, log, 0L, log.activeSegment.baseOffset), currentTime = largeTimestamp)
|
runTwoPassClean(cleaner, new LogToClean(log, 0L, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)
|
||||||
assertEquals(List(0, 1), offsetsInLog(log))
|
assertEquals(List(0, 1), offsetsInLog(log))
|
||||||
assertEquals(List(0, 1), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(0, 1), lastOffsetsPerBatchInLog(log))
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testAbortedTransactionSpanningSegments(): Unit = {
|
def testAbortedTransactionSpanningSegments(): Unit = {
|
||||||
val tp = new TopicPartition("test", 0)
|
|
||||||
val cleaner = makeCleaner(Int.MaxValue)
|
val cleaner = makeCleaner(Int.MaxValue)
|
||||||
val logProps = new Properties()
|
val logProps = new Properties()
|
||||||
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 128: java.lang.Integer)
|
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 128: java.lang.Integer)
|
||||||
|
@ -757,19 +751,18 @@ class LogCleanerTest extends Logging {
|
||||||
// Both the batch and the marker should remain after cleaning. The batch is retained
|
// Both the batch and the marker should remain after cleaning. The batch is retained
|
||||||
// because it is the last entry for this producerId. The marker is retained because
|
// because it is the last entry for this producerId. The marker is retained because
|
||||||
// there are still batches remaining from this transaction.
|
// there are still batches remaining from this transaction.
|
||||||
cleaner.doClean(LogToClean(tp, log, 0L, log.activeSegment.baseOffset), currentTime = largeTimestamp)
|
cleaner.doClean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)
|
||||||
assertEquals(List(1), offsetsInLog(log))
|
assertEquals(List(1), offsetsInLog(log))
|
||||||
assertEquals(List(0, 1), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(0, 1), lastOffsetsPerBatchInLog(log))
|
||||||
|
|
||||||
// The empty batch and the marker is still retained after a second cleaning.
|
// The empty batch and the marker is still retained after a second cleaning.
|
||||||
cleaner.doClean(LogToClean(tp, log, 0L, log.activeSegment.baseOffset), currentTime = Long.MaxValue)
|
cleaner.doClean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false), currentTime = Long.MaxValue)
|
||||||
assertEquals(List(1), offsetsInLog(log))
|
assertEquals(List(1), offsetsInLog(log))
|
||||||
assertEquals(List(0, 1), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(0, 1), lastOffsetsPerBatchInLog(log))
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testAbortMarkerRemoval(): Unit = {
|
def testAbortMarkerRemoval(): Unit = {
|
||||||
val tp = new TopicPartition("test", 0)
|
|
||||||
val cleaner = makeCleaner(Int.MaxValue)
|
val cleaner = makeCleaner(Int.MaxValue)
|
||||||
val logProps = new Properties()
|
val logProps = new Properties()
|
||||||
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 256: java.lang.Integer)
|
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 256: java.lang.Integer)
|
||||||
|
@ -787,12 +780,12 @@ class LogCleanerTest extends Logging {
|
||||||
log.roll()
|
log.roll()
|
||||||
|
|
||||||
// Aborted records are removed, but the abort marker is still preserved.
|
// Aborted records are removed, but the abort marker is still preserved.
|
||||||
val dirtyOffset = cleaner.doClean(LogToClean(tp, log, 0L, log.activeSegment.baseOffset), currentTime = largeTimestamp)._1
|
val dirtyOffset = cleaner.doClean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)._1
|
||||||
assertEquals(List(3), LogTestUtils.keysInLog(log))
|
assertEquals(List(3), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(3, 4, 5), offsetsInLog(log))
|
assertEquals(List(3, 4, 5), offsetsInLog(log))
|
||||||
|
|
||||||
// In the first pass, the delete horizon for the abort marker is set. In the second pass, the abort marker is removed.
|
// In the first pass, the delete horizon for the abort marker is set. In the second pass, the abort marker is removed.
|
||||||
runTwoPassClean(cleaner, LogToClean(tp, log, dirtyOffset, log.activeSegment.baseOffset), currentTime = largeTimestamp)
|
runTwoPassClean(cleaner, new LogToClean(log, dirtyOffset, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)
|
||||||
assertEquals(List(3), LogTestUtils.keysInLog(log))
|
assertEquals(List(3), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(4, 5), offsetsInLog(log))
|
assertEquals(List(4, 5), offsetsInLog(log))
|
||||||
}
|
}
|
||||||
|
@ -804,7 +797,6 @@ class LogCleanerTest extends Logging {
|
||||||
|
|
||||||
val producerEpoch = 0.toShort
|
val producerEpoch = 0.toShort
|
||||||
val producerId = 1L
|
val producerId = 1L
|
||||||
val tp = new TopicPartition("test", 0)
|
|
||||||
val cleaner = makeCleaner(Int.MaxValue)
|
val cleaner = makeCleaner(Int.MaxValue)
|
||||||
val logProps = new Properties()
|
val logProps = new Properties()
|
||||||
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 2048: java.lang.Integer)
|
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 2048: java.lang.Integer)
|
||||||
|
@ -826,19 +818,18 @@ class LogCleanerTest extends Logging {
|
||||||
|
|
||||||
// Both transactional batches will be cleaned. The last one will remain in the log
|
// Both transactional batches will be cleaned. The last one will remain in the log
|
||||||
// as an empty batch in order to preserve the producer sequence number and epoch
|
// as an empty batch in order to preserve the producer sequence number and epoch
|
||||||
cleaner.doClean(LogToClean(tp, log, 0L, log.activeSegment.baseOffset), currentTime = largeTimestamp)
|
cleaner.doClean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)
|
||||||
assertEquals(List(1, 3, 4, 5), offsetsInLog(log))
|
assertEquals(List(1, 3, 4, 5), offsetsInLog(log))
|
||||||
assertEquals(List(1, 2, 3, 4, 5), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(1, 2, 3, 4, 5), lastOffsetsPerBatchInLog(log))
|
||||||
|
|
||||||
// In the first pass, the delete horizon for the first marker is set. In the second pass, the first marker is removed.
|
// In the first pass, the delete horizon for the first marker is set. In the second pass, the first marker is removed.
|
||||||
runTwoPassClean(cleaner, LogToClean(tp, log, 0L, log.activeSegment.baseOffset), currentTime = largeTimestamp)
|
runTwoPassClean(cleaner, new LogToClean(log, 0L, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)
|
||||||
assertEquals(List(3, 4, 5), offsetsInLog(log))
|
assertEquals(List(3, 4, 5), offsetsInLog(log))
|
||||||
assertEquals(List(2, 3, 4, 5), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(2, 3, 4, 5), lastOffsetsPerBatchInLog(log))
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testAbortMarkerRetentionWithEmptyBatch(): Unit = {
|
def testAbortMarkerRetentionWithEmptyBatch(): Unit = {
|
||||||
val tp = new TopicPartition("test", 0)
|
|
||||||
val cleaner = makeCleaner(Int.MaxValue)
|
val cleaner = makeCleaner(Int.MaxValue)
|
||||||
val logProps = new Properties()
|
val logProps = new Properties()
|
||||||
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 256: java.lang.Integer)
|
logProps.put(TopicConfig.SEGMENT_BYTES_CONFIG, 256: java.lang.Integer)
|
||||||
|
@ -863,14 +854,14 @@ class LogCleanerTest extends Logging {
|
||||||
assertAbortedTransactionIndexed()
|
assertAbortedTransactionIndexed()
|
||||||
|
|
||||||
// first time through the records are removed
|
// first time through the records are removed
|
||||||
var dirtyOffset = cleaner.doClean(LogToClean(tp, log, 0L, log.activeSegment.baseOffset), currentTime = largeTimestamp)._1
|
var dirtyOffset = cleaner.doClean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)._1
|
||||||
assertAbortedTransactionIndexed()
|
assertAbortedTransactionIndexed()
|
||||||
assertEquals(List(), LogTestUtils.keysInLog(log))
|
assertEquals(List(), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(2), offsetsInLog(log)) // abort marker is retained
|
assertEquals(List(2), offsetsInLog(log)) // abort marker is retained
|
||||||
assertEquals(List(1, 2), lastOffsetsPerBatchInLog(log)) // empty batch is retained
|
assertEquals(List(1, 2), lastOffsetsPerBatchInLog(log)) // empty batch is retained
|
||||||
|
|
||||||
// the empty batch remains if cleaned again because it still holds the last sequence
|
// the empty batch remains if cleaned again because it still holds the last sequence
|
||||||
dirtyOffset = runTwoPassClean(cleaner, LogToClean(tp, log, dirtyOffset, log.activeSegment.baseOffset), currentTime = largeTimestamp)
|
dirtyOffset = runTwoPassClean(cleaner, new LogToClean(log, dirtyOffset, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)
|
||||||
assertAbortedTransactionIndexed()
|
assertAbortedTransactionIndexed()
|
||||||
assertEquals(List(), LogTestUtils.keysInLog(log))
|
assertEquals(List(), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(2), offsetsInLog(log)) // abort marker is still retained
|
assertEquals(List(2), offsetsInLog(log)) // abort marker is still retained
|
||||||
|
@ -880,14 +871,14 @@ class LogCleanerTest extends Logging {
|
||||||
appendProducer(Seq(1))
|
appendProducer(Seq(1))
|
||||||
log.roll()
|
log.roll()
|
||||||
|
|
||||||
dirtyOffset = cleaner.doClean(LogToClean(tp, log, dirtyOffset, log.activeSegment.baseOffset), currentTime = largeTimestamp)._1
|
dirtyOffset = cleaner.doClean(new LogToClean(log, dirtyOffset, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)._1
|
||||||
assertAbortedTransactionIndexed()
|
assertAbortedTransactionIndexed()
|
||||||
assertEquals(List(1), LogTestUtils.keysInLog(log))
|
assertEquals(List(1), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(2, 3), offsetsInLog(log)) // abort marker is not yet gone because we read the empty batch
|
assertEquals(List(2, 3), offsetsInLog(log)) // abort marker is not yet gone because we read the empty batch
|
||||||
assertEquals(List(2, 3), lastOffsetsPerBatchInLog(log)) // but we do not preserve the empty batch
|
assertEquals(List(2, 3), lastOffsetsPerBatchInLog(log)) // but we do not preserve the empty batch
|
||||||
|
|
||||||
// In the first pass, the delete horizon for the abort marker is set. In the second pass, the abort marker is removed.
|
// In the first pass, the delete horizon for the abort marker is set. In the second pass, the abort marker is removed.
|
||||||
dirtyOffset = runTwoPassClean(cleaner, LogToClean(tp, log, dirtyOffset, log.activeSegment.baseOffset), currentTime = largeTimestamp)
|
dirtyOffset = runTwoPassClean(cleaner, new LogToClean(log, dirtyOffset, log.activeSegment.baseOffset, false), currentTime = largeTimestamp)
|
||||||
assertEquals(List(1), LogTestUtils.keysInLog(log))
|
assertEquals(List(1), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(3), offsetsInLog(log)) // abort marker is gone
|
assertEquals(List(3), offsetsInLog(log)) // abort marker is gone
|
||||||
assertEquals(List(3), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(3), lastOffsetsPerBatchInLog(log))
|
||||||
|
@ -1020,7 +1011,7 @@ class LogCleanerTest extends Logging {
|
||||||
while (log.numberOfSegments < 4)
|
while (log.numberOfSegments < 4)
|
||||||
log.appendAsLeader(record(log.logEndOffset.toInt, log.logEndOffset.toInt), 0)
|
log.appendAsLeader(record(log.logEndOffset.toInt, log.logEndOffset.toInt), 0)
|
||||||
|
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 0, log.activeSegment.baseOffset, false))
|
||||||
val keys = LogTestUtils.keysInLog(log).toSet
|
val keys = LogTestUtils.keysInLog(log).toSet
|
||||||
assertTrue((0 until leo.toInt by 2).forall(!keys.contains(_)), "None of the keys we deleted should still exist.")
|
assertTrue((0 until leo.toInt by 2).forall(!keys.contains(_)), "None of the keys we deleted should still exist.")
|
||||||
}
|
}
|
||||||
|
@ -1044,7 +1035,7 @@ class LogCleanerTest extends Logging {
|
||||||
|
|
||||||
val initialLogSize = log.size
|
val initialLogSize = log.size
|
||||||
|
|
||||||
val (endOffset, stats) = cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 2, log.activeSegment.baseOffset))
|
val (endOffset, stats) = cleaner.clean(new LogToClean(log, 2, log.activeSegment.baseOffset, false))
|
||||||
assertEquals(5, endOffset)
|
assertEquals(5, endOffset)
|
||||||
assertEquals(5, stats.messagesRead)
|
assertEquals(5, stats.messagesRead)
|
||||||
assertEquals(initialLogSize, stats.bytesRead)
|
assertEquals(initialLogSize, stats.bytesRead)
|
||||||
|
@ -1070,7 +1061,7 @@ class LogCleanerTest extends Logging {
|
||||||
// roll the segment, so we can clean the messages already appended
|
// roll the segment, so we can clean the messages already appended
|
||||||
log.roll()
|
log.roll()
|
||||||
|
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0L, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false))
|
||||||
assertEquals(List(1, 3, 4), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(1, 3, 4), lastOffsetsPerBatchInLog(log))
|
||||||
assertEquals(Map(1L -> 0, 2L -> 1, 3L -> 0), lastSequencesInLog(log))
|
assertEquals(Map(1L -> 0, 2L -> 1, 3L -> 0), lastSequencesInLog(log))
|
||||||
assertEquals(List(0, 1), LogTestUtils.keysInLog(log))
|
assertEquals(List(0, 1), LogTestUtils.keysInLog(log))
|
||||||
|
@ -1093,7 +1084,7 @@ class LogCleanerTest extends Logging {
|
||||||
log.appendAsLeader(abortMarker(producerId, producerEpoch), 0, AppendOrigin.COORDINATOR)
|
log.appendAsLeader(abortMarker(producerId, producerEpoch), 0, AppendOrigin.COORDINATOR)
|
||||||
log.roll()
|
log.roll()
|
||||||
|
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0L, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false))
|
||||||
assertEquals(List(2, 3), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(2, 3), lastOffsetsPerBatchInLog(log))
|
||||||
assertEquals(Map(producerId -> 2), lastSequencesInLog(log))
|
assertEquals(Map(producerId -> 2), lastSequencesInLog(log))
|
||||||
assertEquals(List(), LogTestUtils.keysInLog(log))
|
assertEquals(List(), LogTestUtils.keysInLog(log))
|
||||||
|
@ -1102,7 +1093,7 @@ class LogCleanerTest extends Logging {
|
||||||
// Append a new entry from the producer and verify that the empty batch is cleaned up
|
// Append a new entry from the producer and verify that the empty batch is cleaned up
|
||||||
appendProducer(Seq(1, 5))
|
appendProducer(Seq(1, 5))
|
||||||
log.roll()
|
log.roll()
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0L, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false))
|
||||||
|
|
||||||
assertEquals(List(3, 5), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(3, 5), lastOffsetsPerBatchInLog(log))
|
||||||
assertEquals(Map(producerId -> 4), lastSequencesInLog(log))
|
assertEquals(Map(producerId -> 4), lastSequencesInLog(log))
|
||||||
|
@ -1149,7 +1140,7 @@ class LogCleanerTest extends Logging {
|
||||||
assertEquals(List(0, 1, 2, 3), offsetsInLog(log))
|
assertEquals(List(0, 1, 2, 3), offsetsInLog(log))
|
||||||
|
|
||||||
// After cleaning, the marker should not be removed
|
// After cleaning, the marker should not be removed
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0L, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 0L, log.activeSegment.baseOffset, false))
|
||||||
assertEquals(List(0, 1, 2, 3), lastOffsetsPerBatchInLog(log))
|
assertEquals(List(0, 1, 2, 3), lastOffsetsPerBatchInLog(log))
|
||||||
assertEquals(List(0, 1, 2, 3), offsetsInLog(log))
|
assertEquals(List(0, 1, 2, 3), offsetsInLog(log))
|
||||||
}
|
}
|
||||||
|
@ -1172,16 +1163,16 @@ class LogCleanerTest extends Logging {
|
||||||
log.roll()
|
log.roll()
|
||||||
|
|
||||||
// clean the log with only one message removed
|
// clean the log with only one message removed
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 2, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 2, log.activeSegment.baseOffset, false))
|
||||||
assertEquals(List(1,0,1,0), LogTestUtils.keysInLog(log))
|
assertEquals(List(1,0,1,0), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(1,2,3,4), offsetsInLog(log))
|
assertEquals(List(1,2,3,4), offsetsInLog(log))
|
||||||
|
|
||||||
// continue to make progress, even though we can only clean one message at a time
|
// continue to make progress, even though we can only clean one message at a time
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 3, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 3, log.activeSegment.baseOffset, false))
|
||||||
assertEquals(List(0,1,0), LogTestUtils.keysInLog(log))
|
assertEquals(List(0,1,0), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(2,3,4), offsetsInLog(log))
|
assertEquals(List(2,3,4), offsetsInLog(log))
|
||||||
|
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 4, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 4, log.activeSegment.baseOffset, false))
|
||||||
assertEquals(List(1,0), LogTestUtils.keysInLog(log))
|
assertEquals(List(1,0), LogTestUtils.keysInLog(log))
|
||||||
assertEquals(List(3,4), offsetsInLog(log))
|
assertEquals(List(3,4), offsetsInLog(log))
|
||||||
}
|
}
|
||||||
|
@ -1218,7 +1209,7 @@ class LogCleanerTest extends Logging {
|
||||||
assertTrue(distinctValuesBySegment.reverse.tail.forall(_ > N),
|
assertTrue(distinctValuesBySegment.reverse.tail.forall(_ > N),
|
||||||
"Test is not effective unless each segment contains duplicates. Increase segment size or decrease number of keys.")
|
"Test is not effective unless each segment contains duplicates. Increase segment size or decrease number of keys.")
|
||||||
|
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0, firstUncleanableOffset))
|
cleaner.clean(new LogToClean(log, 0, firstUncleanableOffset, false))
|
||||||
|
|
||||||
val distinctValuesBySegmentAfterClean = distinctValuesBySegment
|
val distinctValuesBySegmentAfterClean = distinctValuesBySegment
|
||||||
|
|
||||||
|
@ -1241,7 +1232,7 @@ class LogCleanerTest extends Logging {
|
||||||
for (_ <- 0 until 6)
|
for (_ <- 0 until 6)
|
||||||
log.appendAsLeader(createRecords, 0)
|
log.appendAsLeader(createRecords, 0)
|
||||||
|
|
||||||
val logToClean = LogToClean(new TopicPartition("test", 0), log, log.activeSegment.baseOffset, log.activeSegment.baseOffset)
|
val logToClean = new LogToClean(log, log.activeSegment.baseOffset, log.activeSegment.baseOffset, false)
|
||||||
|
|
||||||
assertEquals(logToClean.totalBytes, log.size - log.activeSegment.size,
|
assertEquals(logToClean.totalBytes, log.size - log.activeSegment.size,
|
||||||
"Total bytes of LogToClean should equal size of all segments excluding the active segment")
|
"Total bytes of LogToClean should equal size of all segments excluding the active segment")
|
||||||
|
@ -1261,7 +1252,7 @@ class LogCleanerTest extends Logging {
|
||||||
|
|
||||||
// segments [0,1] are clean; segments [2, 3] are cleanable; segments [4,5] are uncleanable
|
// segments [0,1] are clean; segments [2, 3] are cleanable; segments [4,5] are uncleanable
|
||||||
val segs = log.logSegments.asScala.toSeq
|
val segs = log.logSegments.asScala.toSeq
|
||||||
val logToClean = LogToClean(new TopicPartition("test", 0), log, segs(2).baseOffset, segs(4).baseOffset)
|
val logToClean = new LogToClean(log, segs(2).baseOffset, segs(4).baseOffset, false)
|
||||||
|
|
||||||
val expectedCleanSize = segs.take(2).map(_.size).sum
|
val expectedCleanSize = segs.take(2).map(_.size).sum
|
||||||
val expectedCleanableSize = segs.slice(2, 4).map(_.size).sum
|
val expectedCleanableSize = segs.slice(2, 4).map(_.size).sum
|
||||||
|
@ -1301,7 +1292,7 @@ class LogCleanerTest extends Logging {
|
||||||
log.appendAsLeader(record(log.logEndOffset.toInt, log.logEndOffset.toInt), 0)
|
log.appendAsLeader(record(log.logEndOffset.toInt, log.logEndOffset.toInt), 0)
|
||||||
|
|
||||||
val expectedSizeAfterCleaning = log.size - sizeWithUnkeyedMessages
|
val expectedSizeAfterCleaning = log.size - sizeWithUnkeyedMessages
|
||||||
val (_, stats) = cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0, log.activeSegment.baseOffset))
|
val (_, stats) = cleaner.clean(new LogToClean(log, 0, log.activeSegment.baseOffset, false))
|
||||||
|
|
||||||
assertEquals(0, unkeyedMessageCountInLog(log), "Log should only contain keyed messages after cleaning.")
|
assertEquals(0, unkeyedMessageCountInLog(log), "Log should only contain keyed messages after cleaning.")
|
||||||
assertEquals(expectedSizeAfterCleaning, log.size, "Log should only contain keyed messages after cleaning.")
|
assertEquals(expectedSizeAfterCleaning, log.size, "Log should only contain keyed messages after cleaning.")
|
||||||
|
@ -1472,7 +1463,7 @@ class LogCleanerTest extends Logging {
|
||||||
//segments will not group even their size is very small.
|
//segments will not group even their size is very small.
|
||||||
assertEquals(totalSegments - notCleanableSegments, groups.size)
|
assertEquals(totalSegments - notCleanableSegments, groups.size)
|
||||||
//do clean to clean first 2 segments to empty
|
//do clean to clean first 2 segments to empty
|
||||||
cleaner.clean(LogToClean(log.topicPartition, log, 0, firstUncleanableOffset))
|
cleaner.clean(new LogToClean(log, 0, firstUncleanableOffset, false))
|
||||||
assertEquals(totalSegments, log.numberOfSegments)
|
assertEquals(totalSegments, log.numberOfSegments)
|
||||||
assertEquals(0, log.logSegments.asScala.head.size)
|
assertEquals(0, log.logSegments.asScala.head.size)
|
||||||
|
|
||||||
|
@ -1482,7 +1473,7 @@ class LogCleanerTest extends Logging {
|
||||||
assertEquals(noneEmptySegment + 1, groups.size)
|
assertEquals(noneEmptySegment + 1, groups.size)
|
||||||
|
|
||||||
//trigger a clean and 2 empty segments should cleaned to 1
|
//trigger a clean and 2 empty segments should cleaned to 1
|
||||||
cleaner.clean(LogToClean(log.topicPartition, log, 0, firstUncleanableOffset))
|
cleaner.clean(new LogToClean(log, 0, firstUncleanableOffset, false))
|
||||||
assertEquals(totalSegments - 1, log.numberOfSegments)
|
assertEquals(totalSegments - 1, log.numberOfSegments)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1883,7 +1874,7 @@ class LogCleanerTest extends Logging {
|
||||||
|
|
||||||
log.roll()
|
log.roll()
|
||||||
|
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 0, log.activeSegment.baseOffset, false))
|
||||||
|
|
||||||
for (segment <- log.logSegments.asScala; batch <- segment.log.batches.asScala; record <- batch.asScala) {
|
for (segment <- log.logSegments.asScala; batch <- segment.log.batches.asScala; record <- batch.asScala) {
|
||||||
assertTrue(record.hasMagic(batch.magic))
|
assertTrue(record.hasMagic(batch.magic))
|
||||||
|
@ -1927,14 +1918,14 @@ class LogCleanerTest extends Logging {
|
||||||
key = "0".getBytes,
|
key = "0".getBytes,
|
||||||
timestamp = time.milliseconds() + logConfig.deleteRetentionMs + 10000), 0)
|
timestamp = time.milliseconds() + logConfig.deleteRetentionMs + 10000), 0)
|
||||||
log.roll()
|
log.roll()
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 0, log.activeSegment.baseOffset, false))
|
||||||
// Append a tombstone with a small timestamp and roll out a new log segment.
|
// Append a tombstone with a small timestamp and roll out a new log segment.
|
||||||
log.appendAsLeader(TestUtils.singletonRecords(value = null,
|
log.appendAsLeader(TestUtils.singletonRecords(value = null,
|
||||||
key = "0".getBytes,
|
key = "0".getBytes,
|
||||||
timestamp = time.milliseconds() - logConfig.deleteRetentionMs - 10000), 0)
|
timestamp = time.milliseconds() - logConfig.deleteRetentionMs - 10000), 0)
|
||||||
log.roll()
|
log.roll()
|
||||||
|
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 1, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 1, log.activeSegment.baseOffset, false))
|
||||||
assertEquals(1, log.logSegments.asScala.head.log.batches.iterator.next().lastOffset,
|
assertEquals(1, log.logSegments.asScala.head.log.batches.iterator.next().lastOffset,
|
||||||
"The tombstone should be retained.")
|
"The tombstone should be retained.")
|
||||||
// Append a message and roll out another log segment.
|
// Append a message and roll out another log segment.
|
||||||
|
@ -1942,7 +1933,7 @@ class LogCleanerTest extends Logging {
|
||||||
key = "1".getBytes,
|
key = "1".getBytes,
|
||||||
timestamp = time.milliseconds()), 0)
|
timestamp = time.milliseconds()), 0)
|
||||||
log.roll()
|
log.roll()
|
||||||
cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 2, log.activeSegment.baseOffset))
|
cleaner.clean(new LogToClean(log, 2, log.activeSegment.baseOffset, false))
|
||||||
assertEquals(1, log.logSegments.asScala.head.log.batches.iterator.next().lastOffset,
|
assertEquals(1, log.logSegments.asScala.head.log.batches.iterator.next().lastOffset,
|
||||||
"The tombstone should be retained.")
|
"The tombstone should be retained.")
|
||||||
}
|
}
|
||||||
|
@ -1967,7 +1958,7 @@ class LogCleanerTest extends Logging {
|
||||||
// active segment record
|
// active segment record
|
||||||
log.appendAsFollower(messageWithOffset(1015, 1015, 11L), Int.MaxValue)
|
log.appendAsFollower(messageWithOffset(1015, 1015, 11L), Int.MaxValue)
|
||||||
|
|
||||||
val (nextDirtyOffset, _) = cleaner.clean(LogToClean(log.topicPartition, log, 0L, log.activeSegment.baseOffset, needCompactionNow = true))
|
val (nextDirtyOffset, _) = cleaner.clean(new LogToClean(log, 0L, log.activeSegment.baseOffset, true))
|
||||||
assertEquals(log.activeSegment.baseOffset, nextDirtyOffset,
|
assertEquals(log.activeSegment.baseOffset, nextDirtyOffset,
|
||||||
"Cleaning point should pass offset gap")
|
"Cleaning point should pass offset gap")
|
||||||
}
|
}
|
||||||
|
@ -1986,7 +1977,7 @@ class LogCleanerTest extends Logging {
|
||||||
// active segment record
|
// active segment record
|
||||||
log.appendAsFollower(messageWithOffset(1015, 1015, 30L), Int.MaxValue)
|
log.appendAsFollower(messageWithOffset(1015, 1015, 30L), Int.MaxValue)
|
||||||
|
|
||||||
val (nextDirtyOffset, _) = cleaner.clean(LogToClean(log.topicPartition, log, 0L, log.activeSegment.baseOffset, needCompactionNow = true))
|
val (nextDirtyOffset, _) = cleaner.clean(new LogToClean(log, 0L, log.activeSegment.baseOffset, true))
|
||||||
assertEquals(log.activeSegment.baseOffset, nextDirtyOffset,
|
assertEquals(log.activeSegment.baseOffset, nextDirtyOffset,
|
||||||
"Cleaning point should pass offset gap in multiple segments")
|
"Cleaning point should pass offset gap in multiple segments")
|
||||||
}
|
}
|
||||||
|
@ -1996,7 +1987,7 @@ class LogCleanerTest extends Logging {
|
||||||
def testMaxCleanTimeSecs(): Unit = {
|
def testMaxCleanTimeSecs(): Unit = {
|
||||||
val logCleaner = new LogCleaner(new CleanerConfig(true),
|
val logCleaner = new LogCleaner(new CleanerConfig(true),
|
||||||
logDirs = Array(TestUtils.tempDir()),
|
logDirs = Array(TestUtils.tempDir()),
|
||||||
logs = new Pool[TopicPartition, UnifiedLog](),
|
logs = new util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog](),
|
||||||
logDirFailureChannel = new LogDirFailureChannel(1),
|
logDirFailureChannel = new LogDirFailureChannel(1),
|
||||||
time = time)
|
time = time)
|
||||||
|
|
||||||
|
@ -2020,7 +2011,7 @@ class LogCleanerTest extends Logging {
|
||||||
|
|
||||||
val logCleaner = new LogCleaner(LogCleaner.cleanerConfig(new KafkaConfig(oldKafkaProps)),
|
val logCleaner = new LogCleaner(LogCleaner.cleanerConfig(new KafkaConfig(oldKafkaProps)),
|
||||||
logDirs = Array(TestUtils.tempDir()),
|
logDirs = Array(TestUtils.tempDir()),
|
||||||
logs = new Pool[TopicPartition, UnifiedLog](),
|
logs = new util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog](),
|
||||||
logDirFailureChannel = new LogDirFailureChannel(1),
|
logDirFailureChannel = new LogDirFailureChannel(1),
|
||||||
time = time) {
|
time = time) {
|
||||||
// shutdown() and startup() are called in LogCleaner.reconfigure().
|
// shutdown() and startup() are called in LogCleaner.reconfigure().
|
||||||
|
@ -2048,7 +2039,7 @@ class LogCleanerTest extends Logging {
|
||||||
val logCleaner = new LogCleaner(
|
val logCleaner = new LogCleaner(
|
||||||
new CleanerConfig(true),
|
new CleanerConfig(true),
|
||||||
logDirs = Array(TestUtils.tempDir(), TestUtils.tempDir()),
|
logDirs = Array(TestUtils.tempDir(), TestUtils.tempDir()),
|
||||||
logs = new Pool[TopicPartition, UnifiedLog](),
|
logs = new util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog](),
|
||||||
logDirFailureChannel = new LogDirFailureChannel(1),
|
logDirFailureChannel = new LogDirFailureChannel(1),
|
||||||
time = time
|
time = time
|
||||||
)
|
)
|
||||||
|
@ -2100,7 +2091,7 @@ class LogCleanerTest extends Logging {
|
||||||
val logCleaner = new LogCleaner(
|
val logCleaner = new LogCleaner(
|
||||||
new CleanerConfig(true),
|
new CleanerConfig(true),
|
||||||
logDirs = Array(TestUtils.tempDir(), TestUtils.tempDir()),
|
logDirs = Array(TestUtils.tempDir(), TestUtils.tempDir()),
|
||||||
logs = new Pool[TopicPartition, UnifiedLog](),
|
logs = new util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog](),
|
||||||
logDirFailureChannel = new LogDirFailureChannel(1),
|
logDirFailureChannel = new LogDirFailureChannel(1),
|
||||||
time = time
|
time = time
|
||||||
)
|
)
|
||||||
|
@ -2152,7 +2143,7 @@ class LogCleanerTest extends Logging {
|
||||||
val logCleaner = new LogCleaner(
|
val logCleaner = new LogCleaner(
|
||||||
new CleanerConfig(true),
|
new CleanerConfig(true),
|
||||||
logDirs = Array(TestUtils.tempDir(), TestUtils.tempDir()),
|
logDirs = Array(TestUtils.tempDir(), TestUtils.tempDir()),
|
||||||
logs = new Pool[TopicPartition, UnifiedLog](),
|
logs = new util.concurrent.ConcurrentHashMap[TopicPartition, UnifiedLog](),
|
||||||
logDirFailureChannel = new LogDirFailureChannel(1),
|
logDirFailureChannel = new LogDirFailureChannel(1),
|
||||||
time = time
|
time = time
|
||||||
)
|
)
|
||||||
|
@ -2171,28 +2162,28 @@ class LogCleanerTest extends Logging {
|
||||||
|
|
||||||
val cleaner1 = new logCleaner.CleanerThread(1)
|
val cleaner1 = new logCleaner.CleanerThread(1)
|
||||||
cleaner1.lastStats = new CleanerStats(time)
|
cleaner1.lastStats = new CleanerStats(time)
|
||||||
cleaner1.lastPreCleanStats.maxCompactionDelayMs = 1_000L
|
cleaner1.lastPreCleanStats.maxCompactionDelayMs(1_000L)
|
||||||
cleaners += cleaner1
|
cleaners += cleaner1
|
||||||
|
|
||||||
val cleaner2 = new logCleaner.CleanerThread(2)
|
val cleaner2 = new logCleaner.CleanerThread(2)
|
||||||
cleaner2.lastStats = new CleanerStats(time)
|
cleaner2.lastStats = new CleanerStats(time)
|
||||||
cleaner2.lastPreCleanStats.maxCompactionDelayMs = 2_000L
|
cleaner2.lastPreCleanStats.maxCompactionDelayMs(2_000L)
|
||||||
cleaners += cleaner2
|
cleaners += cleaner2
|
||||||
|
|
||||||
val cleaner3 = new logCleaner.CleanerThread(3)
|
val cleaner3 = new logCleaner.CleanerThread(3)
|
||||||
cleaner3.lastStats = new CleanerStats(time)
|
cleaner3.lastStats = new CleanerStats(time)
|
||||||
cleaner3.lastPreCleanStats.maxCompactionDelayMs = 3_000L
|
cleaner3.lastPreCleanStats.maxCompactionDelayMs(3_000L)
|
||||||
cleaners += cleaner3
|
cleaners += cleaner3
|
||||||
|
|
||||||
// expect the gauge value to reflect the maximum CompactionDelay
|
// expect the gauge value to reflect the maximum CompactionDelay
|
||||||
assertMaxCompactionDelay(3)
|
assertMaxCompactionDelay(3)
|
||||||
|
|
||||||
// Update CompactionDelay and verify the gauge value updates
|
// Update CompactionDelay and verify the gauge value updates
|
||||||
cleaner1.lastPreCleanStats.maxCompactionDelayMs = 4_000L
|
cleaner1.lastPreCleanStats.maxCompactionDelayMs(4_000L)
|
||||||
assertMaxCompactionDelay(4)
|
assertMaxCompactionDelay(4)
|
||||||
|
|
||||||
// All CleanerThreads have the same CompactionDelay
|
// All CleanerThreads have the same CompactionDelay
|
||||||
cleaners.foreach(_.lastPreCleanStats.maxCompactionDelayMs = 1_500L)
|
cleaners.foreach(_.lastPreCleanStats.maxCompactionDelayMs(1_500L))
|
||||||
assertMaxCompactionDelay(1)
|
assertMaxCompactionDelay(1)
|
||||||
} finally {
|
} finally {
|
||||||
logCleaner.shutdown()
|
logCleaner.shutdown()
|
||||||
|
|
|
@ -43,7 +43,7 @@ import org.apache.kafka.server.storage.log.{FetchIsolation, UnexpectedAppendOffs
|
||||||
import org.apache.kafka.server.util.{KafkaScheduler, MockTime, Scheduler}
|
import org.apache.kafka.server.util.{KafkaScheduler, MockTime, Scheduler}
|
||||||
import org.apache.kafka.storage.internals.checkpoint.{LeaderEpochCheckpointFile, PartitionMetadataFile}
|
import org.apache.kafka.storage.internals.checkpoint.{LeaderEpochCheckpointFile, PartitionMetadataFile}
|
||||||
import org.apache.kafka.storage.internals.epoch.LeaderEpochFileCache
|
import org.apache.kafka.storage.internals.epoch.LeaderEpochFileCache
|
||||||
import org.apache.kafka.storage.internals.log.{AbortedTxn, AppendOrigin, EpochEntry, LogConfig, LogFileUtils, LogOffsetMetadata, LogOffsetSnapshot, LogOffsetsListener, LogSegment, LogSegments, LogStartOffsetIncrementReason, OffsetResultHolder, OffsetsOutOfOrderException, ProducerStateManager, ProducerStateManagerConfig, RecordValidationException, UnifiedLog, VerificationGuard}
|
import org.apache.kafka.storage.internals.log.{AbortedTxn, AppendOrigin, EpochEntry, LogConfig, LogFileUtils, LogOffsetMetadata, LogOffsetSnapshot, LogOffsetsListener, LogSegment, LogSegments, LogStartOffsetIncrementReason, LogToClean, OffsetResultHolder, OffsetsOutOfOrderException, ProducerStateManager, ProducerStateManagerConfig, RecordValidationException, UnifiedLog, VerificationGuard}
|
||||||
import org.apache.kafka.storage.internals.utils.Throttler
|
import org.apache.kafka.storage.internals.utils.Throttler
|
||||||
import org.apache.kafka.storage.log.metrics.{BrokerTopicMetrics, BrokerTopicStats}
|
import org.apache.kafka.storage.log.metrics.{BrokerTopicMetrics, BrokerTopicStats}
|
||||||
import org.junit.jupiter.api.Assertions._
|
import org.junit.jupiter.api.Assertions._
|
||||||
|
@ -1100,7 +1100,7 @@ class UnifiedLogTest {
|
||||||
|
|
||||||
// Clean segments, this should delete everything except the active segment since there only
|
// Clean segments, this should delete everything except the active segment since there only
|
||||||
// exists the key "a".
|
// exists the key "a".
|
||||||
cleaner.clean(LogToClean(log.topicPartition, log, 0, log.logEndOffset))
|
cleaner.clean(new LogToClean(log, 0, log.logEndOffset, false))
|
||||||
log.deleteOldSegments()
|
log.deleteOldSegments()
|
||||||
// Sleep to breach the file delete delay and run scheduled file deletion tasks
|
// Sleep to breach the file delete delay and run scheduled file deletion tasks
|
||||||
mockTime.sleep(1)
|
mockTime.sleep(1)
|
||||||
|
|
|
@ -279,15 +279,6 @@ For a detailed description of spotbugs bug categories, see https://spotbugs.read
|
||||||
<Bug pattern="RV_RETURN_VALUE_IGNORED"/>
|
<Bug pattern="RV_RETURN_VALUE_IGNORED"/>
|
||||||
</Match>
|
</Match>
|
||||||
|
|
||||||
<Match>
|
|
||||||
<!-- Suppress a warning about ignoring the return value of await.
|
|
||||||
This is done intentionally because we use other clues to determine
|
|
||||||
if the wait was cut short. -->
|
|
||||||
<Package name="kafka.log"/>
|
|
||||||
<Source name="LogCleanerManager.scala"/>
|
|
||||||
<Bug pattern="RV_RETURN_VALUE_IGNORED,RV_RETURN_VALUE_IGNORED_BAD_PRACTICE"/>
|
|
||||||
</Match>
|
|
||||||
|
|
||||||
<Match>
|
<Match>
|
||||||
<!-- Suppress some warnings about intentional switch statement fallthrough. -->
|
<!-- Suppress some warnings about intentional switch statement fallthrough. -->
|
||||||
<Class name="org.apache.kafka.connect.runtime.WorkerConnector"/>
|
<Class name="org.apache.kafka.connect.runtime.WorkerConnector"/>
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.kafka.server.util;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.concurrent.locks.Lock;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A utility class providing helper methods for working with {@link Lock} objects.
|
||||||
|
* This class simplifies the usage of locks by encapsulating common patterns,
|
||||||
|
* such as acquiring and releasing locks in a safe manner.
|
||||||
|
*/
|
||||||
|
public class LockUtils {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes the given {@link Supplier} within the context of the specified {@link Lock}.
|
||||||
|
* The lock is acquired before executing the supplier and released after the execution,
|
||||||
|
* ensuring that the lock is always released, even if an exception is thrown.
|
||||||
|
*
|
||||||
|
* @param <T> the type of the result returned by the supplier
|
||||||
|
* @param lock the lock to be acquired and released
|
||||||
|
* @param supplier the supplier to be executed within the lock context
|
||||||
|
* @return the result of the supplier
|
||||||
|
* @throws NullPointerException if either {@code lock} or {@code supplier} is null
|
||||||
|
*/
|
||||||
|
public static <T> T inLock(Lock lock, Supplier<T> supplier) {
|
||||||
|
Objects.requireNonNull(lock, "Lock must not be null");
|
||||||
|
Objects.requireNonNull(supplier, "Supplier must not be null");
|
||||||
|
|
||||||
|
lock.lock();
|
||||||
|
try {
|
||||||
|
return supplier.get();
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,798 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.kafka.storage.internals.log;
|
||||||
|
|
||||||
|
import org.apache.kafka.common.TopicPartition;
|
||||||
|
import org.apache.kafka.common.errors.KafkaStorageException;
|
||||||
|
import org.apache.kafka.common.utils.Time;
|
||||||
|
import org.apache.kafka.server.metrics.KafkaMetricsGroup;
|
||||||
|
import org.apache.kafka.storage.internals.checkpoint.OffsetCheckpointFile;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.ConcurrentMap;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.locks.Condition;
|
||||||
|
import java.util.concurrent.locks.Lock;
|
||||||
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import static org.apache.kafka.server.util.LockUtils.inLock;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class manages the state (see {@link LogCleaningState}) of each partition being cleaned.
|
||||||
|
* <ul>
|
||||||
|
* <li>1. None : No cleaning state in a TopicPartition. In this state, it can become LogCleaningInProgress
|
||||||
|
* or LogCleaningPaused(1). Valid previous state are LogCleaningInProgress and LogCleaningPaused(1)</li>
|
||||||
|
* <li>2. LogCleaningInProgress : The cleaning is currently in progress. In this state, it can become None when log cleaning is finished
|
||||||
|
* or become LogCleaningAborted. Valid previous state is None.</li>
|
||||||
|
* <li>3. LogCleaningAborted : The cleaning abort is requested. In this state, it can become LogCleaningPaused(1).
|
||||||
|
* Valid previous state is LogCleaningInProgress.</li>
|
||||||
|
* <li>4-a. LogCleaningPaused(1) : The cleaning is paused once. No log cleaning can be done in this state.
|
||||||
|
* In this state, it can become None or LogCleaningPaused(2).
|
||||||
|
* Valid previous state is None, LogCleaningAborted or LogCleaningPaused(2).</li>
|
||||||
|
* <li>4-b. LogCleaningPaused(i) : The cleaning is paused i times where i>= 2. No log cleaning can be done in this state.
|
||||||
|
* In this state, it can become LogCleaningPaused(i-1) or LogCleaningPaused(i+1).
|
||||||
|
* Valid previous state is LogCleaningPaused(i-1) or LogCleaningPaused(i+1).</li>
|
||||||
|
* </ul>
|
||||||
|
*/
|
||||||
|
public class LogCleanerManager {
|
||||||
|
public static final String OFFSET_CHECKPOINT_FILE = "cleaner-offset-checkpoint";
|
||||||
|
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger("kafka.log.LogCleaner");
|
||||||
|
|
||||||
|
private static final String UNCLEANABLE_PARTITIONS_COUNT_METRIC_NAME = "uncleanable-partitions-count";
|
||||||
|
private static final String UNCLEANABLE_BYTES_METRIC_NAME = "uncleanable-bytes";
|
||||||
|
private static final String MAX_DIRTY_PERCENT_METRIC_NAME = "max-dirty-percent";
|
||||||
|
private static final String TIME_SINCE_LAST_RUN_MS_METRIC_NAME = "time-since-last-run-ms";
|
||||||
|
|
||||||
|
// Visible for testing
|
||||||
|
public static final Set<String> GAUGE_METRIC_NAME_NO_TAG = Set.of(MAX_DIRTY_PERCENT_METRIC_NAME, TIME_SINCE_LAST_RUN_MS_METRIC_NAME);
|
||||||
|
|
||||||
|
// For compatibility, metrics are defined to be under `kafka.log.LogCleanerManager` class
|
||||||
|
private final KafkaMetricsGroup metricsGroup = new KafkaMetricsGroup("kafka.log", "LogCleanerManager");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The set of logs currently being cleaned.
|
||||||
|
*/
|
||||||
|
private final Map<TopicPartition, LogCleaningState> inProgress = new HashMap<>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The set of uncleanable partitions (partitions that have raised an unexpected error during cleaning)
|
||||||
|
* for each log directory.
|
||||||
|
*/
|
||||||
|
private final Map<String, Set<TopicPartition>> uncleanablePartitions = new HashMap<>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A global lock used to control all access to the in-progress set and the offset checkpoints.
|
||||||
|
*/
|
||||||
|
private final Lock lock = new ReentrantLock();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For coordinating the pausing and the cleaning of a partition.
|
||||||
|
*/
|
||||||
|
private final Condition pausedCleaningCond = lock.newCondition();
|
||||||
|
|
||||||
|
private final Map<String, List<Map<String, String>>> gaugeMetricNameWithTag = new HashMap<>();
|
||||||
|
|
||||||
|
private final ConcurrentMap<TopicPartition, UnifiedLog> logs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The offset checkpoints holding the last cleaned point for each log.
|
||||||
|
*/
|
||||||
|
private volatile Map<File, OffsetCheckpointFile> checkpoints;
|
||||||
|
|
||||||
|
private volatile double dirtiestLogCleanableRatio;
|
||||||
|
private volatile long timeOfLastRun;
|
||||||
|
|
||||||
|
@SuppressWarnings({"this-escape"})
|
||||||
|
public LogCleanerManager(
|
||||||
|
List<File> logDirs,
|
||||||
|
ConcurrentMap<TopicPartition, UnifiedLog> logs,
|
||||||
|
LogDirFailureChannel logDirFailureChannel
|
||||||
|
) {
|
||||||
|
this.logs = logs;
|
||||||
|
checkpoints = logDirs.stream()
|
||||||
|
.collect(Collectors.toMap(
|
||||||
|
dir -> dir,
|
||||||
|
dir -> {
|
||||||
|
try {
|
||||||
|
return new OffsetCheckpointFile(new File(dir, OFFSET_CHECKPOINT_FILE), logDirFailureChannel);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
));
|
||||||
|
|
||||||
|
registerMetrics(logDirs);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void registerMetrics(List<File> logDirs) {
|
||||||
|
// gauges for tracking the number of partitions marked as uncleanable for each log directory
|
||||||
|
for (File dir : logDirs) {
|
||||||
|
Map<String, String> metricTag = Map.of("logDirectory", dir.getAbsolutePath());
|
||||||
|
metricsGroup.newGauge(
|
||||||
|
UNCLEANABLE_PARTITIONS_COUNT_METRIC_NAME,
|
||||||
|
() -> inLock(lock, () -> uncleanablePartitions.getOrDefault(dir.getAbsolutePath(), Set.of()).size()),
|
||||||
|
metricTag
|
||||||
|
);
|
||||||
|
|
||||||
|
gaugeMetricNameWithTag
|
||||||
|
.computeIfAbsent(UNCLEANABLE_PARTITIONS_COUNT_METRIC_NAME, k -> new ArrayList<>())
|
||||||
|
.add(metricTag);
|
||||||
|
}
|
||||||
|
|
||||||
|
// gauges for tracking the number of uncleanable bytes from uncleanable partitions for each log directory
|
||||||
|
for (File dir : logDirs) {
|
||||||
|
Map<String, String> metricTag = Map.of("logDirectory", dir.getAbsolutePath());
|
||||||
|
metricsGroup.newGauge(
|
||||||
|
UNCLEANABLE_BYTES_METRIC_NAME,
|
||||||
|
() -> inLock(lock, () -> {
|
||||||
|
Set<TopicPartition> partitions = uncleanablePartitions.get(dir.getAbsolutePath());
|
||||||
|
|
||||||
|
if (partitions == null) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
Map<TopicPartition, Long> lastClean = allCleanerCheckpoints();
|
||||||
|
long now = Time.SYSTEM.milliseconds();
|
||||||
|
return partitions.stream()
|
||||||
|
.mapToLong(tp -> {
|
||||||
|
UnifiedLog log = logs.get(tp);
|
||||||
|
if (log != null) {
|
||||||
|
Optional<Long> lastCleanOffset = Optional.of(lastClean.get(tp));
|
||||||
|
try {
|
||||||
|
OffsetsToClean offsetsToClean = cleanableOffsets(log, lastCleanOffset, now);
|
||||||
|
return calculateCleanableBytes(log, offsetsToClean.firstDirtyOffset(),
|
||||||
|
offsetsToClean.firstUncleanableDirtyOffset()).getValue();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return 0L;
|
||||||
|
}
|
||||||
|
}).sum();
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
metricTag
|
||||||
|
);
|
||||||
|
|
||||||
|
gaugeMetricNameWithTag
|
||||||
|
.computeIfAbsent(UNCLEANABLE_BYTES_METRIC_NAME, k -> new ArrayList<>())
|
||||||
|
.add(metricTag);
|
||||||
|
}
|
||||||
|
|
||||||
|
// a gauge for tracking the cleanable ratio of the dirtiest log
|
||||||
|
dirtiestLogCleanableRatio = 0.0;
|
||||||
|
metricsGroup.newGauge(MAX_DIRTY_PERCENT_METRIC_NAME, () -> (int) (100 * dirtiestLogCleanableRatio));
|
||||||
|
|
||||||
|
// a gauge for tracking the time since the last log cleaner run, in milliseconds
|
||||||
|
timeOfLastRun = Time.SYSTEM.milliseconds();
|
||||||
|
metricsGroup.newGauge(TIME_SINCE_LAST_RUN_MS_METRIC_NAME, () -> Time.SYSTEM.milliseconds() - timeOfLastRun);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, List<Map<String, String>>> gaugeMetricNameWithTag() {
|
||||||
|
return gaugeMetricNameWithTag;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the position processed for all logs.
|
||||||
|
*/
|
||||||
|
public Map<TopicPartition, Long> allCleanerCheckpoints() {
|
||||||
|
return inLock(lock, () -> checkpoints.values().stream()
|
||||||
|
.flatMap(checkpoint -> {
|
||||||
|
try {
|
||||||
|
return checkpoint.read().entrySet().stream();
|
||||||
|
} catch (KafkaStorageException e) {
|
||||||
|
LOG.error("Failed to access checkpoint file {} in dir {}",
|
||||||
|
checkpoint.file().getName(), checkpoint.file().getParentFile().getAbsolutePath(), e);
|
||||||
|
return Stream.empty();
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Public for unit test. Get the cleaning state of the partition.
|
||||||
|
*/
|
||||||
|
public Optional<LogCleaningState> cleaningState(TopicPartition tp) {
|
||||||
|
return inLock(lock, () -> Optional.ofNullable(inProgress.get(tp)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Public for unit test. Set the cleaning state of the partition.
|
||||||
|
*/
|
||||||
|
public void setCleaningState(TopicPartition tp, LogCleaningState state) {
|
||||||
|
inLock(lock, () -> inProgress.put(tp, state));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Choose the log to clean next and add it to the in-progress set. We recompute this
|
||||||
|
* each time from the full set of logs to allow logs to be dynamically added to the pool of logs
|
||||||
|
* the log manager maintains.
|
||||||
|
*/
|
||||||
|
public Optional<LogToClean> grabFilthiestCompactedLog(Time time, PreCleanStats preCleanStats) {
|
||||||
|
return inLock(lock, () -> {
|
||||||
|
long now = time.milliseconds();
|
||||||
|
timeOfLastRun = now;
|
||||||
|
Map<TopicPartition, Long> lastClean = allCleanerCheckpoints();
|
||||||
|
|
||||||
|
List<LogToClean> dirtyLogs = logs.entrySet().stream()
|
||||||
|
.filter(entry -> entry.getValue().config().compact &&
|
||||||
|
!inProgress.containsKey(entry.getKey()) &&
|
||||||
|
!isUncleanablePartition(entry.getValue(), entry.getKey())
|
||||||
|
)
|
||||||
|
.map(entry -> {
|
||||||
|
// create a LogToClean instance for each
|
||||||
|
TopicPartition topicPartition = entry.getKey();
|
||||||
|
UnifiedLog log = entry.getValue();
|
||||||
|
try {
|
||||||
|
Long lastCleanOffset = lastClean.get(topicPartition);
|
||||||
|
OffsetsToClean offsetsToClean = cleanableOffsets(log, Optional.ofNullable(lastCleanOffset), now);
|
||||||
|
// update checkpoint for logs with invalid checkpointed offsets
|
||||||
|
if (offsetsToClean.forceUpdateCheckpoint) {
|
||||||
|
updateCheckpoints(log.parentDirFile(), Optional.of(Map.entry(topicPartition, offsetsToClean.firstDirtyOffset)), Optional.empty());
|
||||||
|
}
|
||||||
|
long compactionDelayMs = maxCompactionDelay(log, offsetsToClean.firstDirtyOffset, now);
|
||||||
|
preCleanStats.updateMaxCompactionDelay(compactionDelayMs);
|
||||||
|
|
||||||
|
return new LogToClean(log, offsetsToClean.firstDirtyOffset,
|
||||||
|
offsetsToClean.firstUncleanableDirtyOffset, compactionDelayMs > 0);
|
||||||
|
} catch (Throwable e) {
|
||||||
|
throw new LogCleaningException(log, "Failed to calculate log cleaning stats for partition " + topicPartition, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
.filter(ltc -> ltc.totalBytes() > 0) // skip any empty logs
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
dirtiestLogCleanableRatio = dirtyLogs.isEmpty()
|
||||||
|
? 0
|
||||||
|
: dirtyLogs.stream()
|
||||||
|
.mapToDouble(LogToClean::cleanableRatio)
|
||||||
|
.max()
|
||||||
|
.orElse(0.0);
|
||||||
|
// and must meet the minimum threshold for dirty byte ratio or have some bytes required to be compacted
|
||||||
|
List<LogToClean> cleanableLogs = dirtyLogs.stream()
|
||||||
|
.filter(ltc -> (ltc.needCompactionNow() && ltc.cleanableBytes() > 0) || ltc.cleanableRatio() > ltc.log().config().minCleanableRatio)
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
if (cleanableLogs.isEmpty()) {
|
||||||
|
return Optional.empty();
|
||||||
|
} else {
|
||||||
|
preCleanStats.recordCleanablePartitions(cleanableLogs.size());
|
||||||
|
LogToClean filthiest = cleanableLogs.stream()
|
||||||
|
.max(Comparator.comparingDouble(LogToClean::cleanableRatio))
|
||||||
|
.orElseThrow(() -> new IllegalStateException("No filthiest log found"));
|
||||||
|
|
||||||
|
inProgress.put(filthiest.topicPartition(), LogCleaningState.LOG_CLEANING_IN_PROGRESS);
|
||||||
|
return Optional.of(filthiest);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pause logs cleaning for logs that do not have compaction enabled
|
||||||
|
* and do not have other deletion or compaction in progress.
|
||||||
|
* This is to handle potential race between retention and cleaner threads when users
|
||||||
|
* switch topic configuration between compacted and non-compacted topic.
|
||||||
|
*
|
||||||
|
* @return retention logs that have log cleaning successfully paused
|
||||||
|
*/
|
||||||
|
public List<Map.Entry<TopicPartition, UnifiedLog>> pauseCleaningForNonCompactedPartitions() {
|
||||||
|
return inLock(lock, () -> {
|
||||||
|
List<Map.Entry<TopicPartition, UnifiedLog>> deletableLogs = logs.entrySet().stream()
|
||||||
|
.filter(entry -> !entry.getValue().config().compact) // pick non-compacted logs
|
||||||
|
.filter(entry -> !inProgress.containsKey(entry.getKey())) // skip any logs already in-progress
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
deletableLogs.forEach(entry -> inProgress.put(entry.getKey(), LogCleaningState.logCleaningPaused(1)));
|
||||||
|
|
||||||
|
return deletableLogs;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find any logs that have compaction enabled. Mark them as being cleaned
|
||||||
|
* Include logs without delete enabled, as they may have segments
|
||||||
|
* that precede the start offset.
|
||||||
|
*/
|
||||||
|
public Map<TopicPartition, UnifiedLog> deletableLogs() {
|
||||||
|
return inLock(lock, () -> {
|
||||||
|
Map<TopicPartition, UnifiedLog> toClean = logs.entrySet().stream()
|
||||||
|
.filter(entry -> {
|
||||||
|
TopicPartition topicPartition = entry.getKey();
|
||||||
|
UnifiedLog log = entry.getValue();
|
||||||
|
return !inProgress.containsKey(topicPartition) && log.config().compact &&
|
||||||
|
!isUncleanablePartition(log, topicPartition);
|
||||||
|
})
|
||||||
|
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
|
||||||
|
toClean.forEach((partition, log) -> inProgress.put(partition, LogCleaningState.LOG_CLEANING_IN_PROGRESS));
|
||||||
|
return toClean;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abort the cleaning of a particular partition, if it's in progress. This call blocks until the cleaning of
|
||||||
|
* the partition is aborted.
|
||||||
|
* This is implemented by first abortAndPausing and then resuming the cleaning of the partition.
|
||||||
|
*/
|
||||||
|
public void abortCleaning(TopicPartition topicPartition) {
|
||||||
|
inLock(lock, () -> {
|
||||||
|
abortAndPauseCleaning(topicPartition);
|
||||||
|
resumeCleaning(List.of(topicPartition));
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abort the cleaning of a particular partition if it's in progress, and pause any future cleaning of this partition.
|
||||||
|
* This call blocks until the cleaning of the partition is aborted and paused.
|
||||||
|
* <ol>
|
||||||
|
* <li>If the partition is not in progress, mark it as paused.</li>
|
||||||
|
* <li>Otherwise, first mark the state of the partition as aborted.</li>
|
||||||
|
* <li>The cleaner thread checks the state periodically and if it sees the state of the partition is aborted, it
|
||||||
|
* throws a LogCleaningAbortedException to stop the cleaning task.</li>
|
||||||
|
* <li>When the cleaning task is stopped, doneCleaning() is called, which sets the state of the partition as paused.</li>
|
||||||
|
* <li>abortAndPauseCleaning() waits until the state of the partition is changed to paused.</li>
|
||||||
|
* <li>If the partition is already paused, a new call to this function
|
||||||
|
* will increase the paused count by one.</li>
|
||||||
|
* </ol>
|
||||||
|
*/
|
||||||
|
public void abortAndPauseCleaning(TopicPartition topicPartition) {
|
||||||
|
inLock(lock, () -> {
|
||||||
|
LogCleaningState state = inProgress.get(topicPartition);
|
||||||
|
|
||||||
|
if (state == null) {
|
||||||
|
inProgress.put(topicPartition, LogCleaningState.logCleaningPaused(1));
|
||||||
|
} else if (state == LogCleaningState.LOG_CLEANING_IN_PROGRESS) {
|
||||||
|
inProgress.put(topicPartition, LogCleaningState.LOG_CLEANING_ABORTED);
|
||||||
|
} else if (state instanceof LogCleaningState.LogCleaningPaused logCleaningPaused) {
|
||||||
|
inProgress.put(topicPartition, LogCleaningState.logCleaningPaused(logCleaningPaused.pausedCount() + 1));
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException("Compaction for partition " + topicPartition +
|
||||||
|
" cannot be aborted and paused since it is in " + state + " state.");
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!isCleaningInStatePaused(topicPartition)) {
|
||||||
|
try {
|
||||||
|
pausedCleaningCond.await(100, TimeUnit.MILLISECONDS);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resume the cleaning of paused partitions.
|
||||||
|
* Each call of this function will undo one pause.
|
||||||
|
*/
|
||||||
|
public void resumeCleaning(List<TopicPartition> topicPartitions) {
|
||||||
|
inLock(lock, () -> {
|
||||||
|
topicPartitions.forEach(topicPartition -> {
|
||||||
|
LogCleaningState state = inProgress.get(topicPartition);
|
||||||
|
|
||||||
|
if (state == null) {
|
||||||
|
throw new IllegalStateException("Compaction for partition " + topicPartition + " cannot be resumed since it is not paused.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state instanceof LogCleaningState.LogCleaningPaused logCleaningPaused) {
|
||||||
|
if (logCleaningPaused.pausedCount() == 1) {
|
||||||
|
inProgress.remove(topicPartition);
|
||||||
|
} else if (logCleaningPaused.pausedCount() > 1) {
|
||||||
|
inProgress.put(topicPartition, LogCleaningState.logCleaningPaused(logCleaningPaused.pausedCount() - 1));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException("Compaction for partition " + topicPartition +
|
||||||
|
" cannot be resumed since it is in " + state + " state.");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the cleaning for a partition is in a particular state. The caller is expected to hold lock while making the call.
|
||||||
|
*/
|
||||||
|
private boolean isCleaningInState(TopicPartition topicPartition, LogCleaningState expectedState) {
|
||||||
|
LogCleaningState state = inProgress.get(topicPartition);
|
||||||
|
|
||||||
|
if (state == null) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return state == expectedState;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the cleaning for a partition is paused. The caller is expected to hold lock while making the call.
|
||||||
|
*/
|
||||||
|
private boolean isCleaningInStatePaused(TopicPartition topicPartition) {
|
||||||
|
LogCleaningState state = inProgress.get(topicPartition);
|
||||||
|
|
||||||
|
if (state == null) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return state instanceof LogCleaningState.LogCleaningPaused;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the cleaning for a partition is aborted. If so, throw an exception.
|
||||||
|
*/
|
||||||
|
public void checkCleaningAborted(TopicPartition topicPartition) {
|
||||||
|
inLock(lock, () -> {
|
||||||
|
if (isCleaningInState(topicPartition, LogCleaningState.LOG_CLEANING_ABORTED)) {
|
||||||
|
throw new LogCleaningAbortedException();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update checkpoint file, adding or removing partitions if necessary.
|
||||||
|
*
|
||||||
|
* @param dataDir The File object to be updated
|
||||||
|
* @param partitionToUpdateOrAdd The [TopicPartition, Long] map entry to be updated. pass "Optional.empty" if doing remove, not add
|
||||||
|
* @param partitionToRemove The TopicPartition to be removed
|
||||||
|
*/
|
||||||
|
public void updateCheckpoints(
|
||||||
|
File dataDir,
|
||||||
|
Optional<Map.Entry<TopicPartition, Long>> partitionToUpdateOrAdd,
|
||||||
|
Optional<TopicPartition> partitionToRemove
|
||||||
|
) {
|
||||||
|
inLock(lock, () -> {
|
||||||
|
OffsetCheckpointFile checkpoint = checkpoints.get(dataDir);
|
||||||
|
if (checkpoint != null) {
|
||||||
|
try {
|
||||||
|
Map<TopicPartition, Long> currentCheckpoint = checkpoint.read().entrySet().stream()
|
||||||
|
.filter(entry -> logs.containsKey(entry.getKey()))
|
||||||
|
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
|
||||||
|
|
||||||
|
Map<TopicPartition, Long> updatedCheckpoint = new HashMap<>(currentCheckpoint);
|
||||||
|
|
||||||
|
// Remove the partition offset if present
|
||||||
|
partitionToRemove.ifPresent(updatedCheckpoint::remove);
|
||||||
|
|
||||||
|
// Update or add the partition offset if present
|
||||||
|
partitionToUpdateOrAdd.ifPresent(entry -> updatedCheckpoint.put(entry.getKey(), entry.getValue()));
|
||||||
|
|
||||||
|
// Write back the updated checkpoint
|
||||||
|
checkpoint.write(updatedCheckpoint);
|
||||||
|
} catch (KafkaStorageException e) {
|
||||||
|
LOG.error("Failed to access checkpoint file {} in dir {}",
|
||||||
|
checkpoint.file().getName(), checkpoint.file().getParentFile().getAbsolutePath(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Alter the checkpoint directory for the topicPartition, to remove the data in sourceLogDir, and add the data in destLogDir.
|
||||||
|
*/
|
||||||
|
public void alterCheckpointDir(TopicPartition topicPartition, File sourceLogDir, File destLogDir) {
|
||||||
|
inLock(lock, () -> {
|
||||||
|
try {
|
||||||
|
Optional<Long> offsetOpt = Optional.ofNullable(checkpoints.get(sourceLogDir))
|
||||||
|
.flatMap(checkpoint -> Optional.ofNullable(checkpoint.read().get(topicPartition)));
|
||||||
|
|
||||||
|
offsetOpt.ifPresent(offset -> {
|
||||||
|
LOG.debug("Removing the partition offset data in checkpoint file for '{}' from {} directory.",
|
||||||
|
topicPartition, sourceLogDir.getAbsoluteFile());
|
||||||
|
updateCheckpoints(sourceLogDir, Optional.empty(), Optional.of(topicPartition));
|
||||||
|
|
||||||
|
LOG.debug("Adding the partition offset data in checkpoint file for '{}' to {} directory.",
|
||||||
|
topicPartition, destLogDir.getAbsoluteFile());
|
||||||
|
updateCheckpoints(destLogDir, Optional.of(Map.entry(topicPartition, offset)), Optional.empty());
|
||||||
|
});
|
||||||
|
} catch (KafkaStorageException e) {
|
||||||
|
LOG.error("Failed to access checkpoint file in dir {}", sourceLogDir.getAbsolutePath(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<TopicPartition> logUncleanablePartitions = uncleanablePartitions.getOrDefault(sourceLogDir.toString(), Collections.emptySet());
|
||||||
|
if (logUncleanablePartitions.contains(topicPartition)) {
|
||||||
|
logUncleanablePartitions.remove(topicPartition);
|
||||||
|
markPartitionUncleanable(destLogDir.toString(), topicPartition);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop cleaning logs in the provided directory.
|
||||||
|
*
|
||||||
|
* @param dir the absolute path of the log dir
|
||||||
|
*/
|
||||||
|
public void handleLogDirFailure(String dir) {
|
||||||
|
LOG.warn("Stopping cleaning logs in dir {}", dir);
|
||||||
|
inLock(lock, () -> {
|
||||||
|
checkpoints = checkpoints.entrySet().stream()
|
||||||
|
.filter(entry -> !entry.getKey().getAbsolutePath().equals(dir))
|
||||||
|
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
|
||||||
|
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate the checkpointed offset for the given partition if its checkpointed offset is larger than the given offset.
|
||||||
|
*/
|
||||||
|
public void maybeTruncateCheckpoint(File dataDir, TopicPartition topicPartition, long offset) {
|
||||||
|
inLock(lock, () -> {
|
||||||
|
if (logs.get(topicPartition).config().compact) {
|
||||||
|
OffsetCheckpointFile checkpoint = checkpoints.get(dataDir);
|
||||||
|
if (checkpoint != null) {
|
||||||
|
Map<TopicPartition, Long> existing = checkpoint.read();
|
||||||
|
if (existing.getOrDefault(topicPartition, 0L) > offset) {
|
||||||
|
existing.put(topicPartition, offset);
|
||||||
|
checkpoint.write(existing);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save out the endOffset and remove the given log from the in-progress set, if not aborted.
|
||||||
|
*/
|
||||||
|
public void doneCleaning(TopicPartition topicPartition, File dataDir, long endOffset) {
|
||||||
|
inLock(lock, () -> {
|
||||||
|
LogCleaningState state = inProgress.get(topicPartition);
|
||||||
|
|
||||||
|
if (state == null) {
|
||||||
|
throw new IllegalStateException("State for partition " + topicPartition + " should exist.");
|
||||||
|
} else if (state == LogCleaningState.LOG_CLEANING_IN_PROGRESS) {
|
||||||
|
updateCheckpoints(dataDir, Optional.of(Map.entry(topicPartition, endOffset)), Optional.empty());
|
||||||
|
inProgress.remove(topicPartition);
|
||||||
|
} else if (state == LogCleaningState.LOG_CLEANING_ABORTED) {
|
||||||
|
inProgress.put(topicPartition, LogCleaningState.logCleaningPaused(1));
|
||||||
|
pausedCleaningCond.signalAll();
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException("In-progress partition " + topicPartition + " cannot be in " + state + " state.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void doneDeleting(List<TopicPartition> topicPartitions) {
|
||||||
|
inLock(lock, () -> {
|
||||||
|
topicPartitions.forEach(topicPartition -> {
|
||||||
|
LogCleaningState logCleaningState = inProgress.get(topicPartition);
|
||||||
|
|
||||||
|
if (logCleaningState == null) {
|
||||||
|
throw new IllegalStateException("State for partition " + topicPartition + " should exist.");
|
||||||
|
} else if (logCleaningState == LogCleaningState.LOG_CLEANING_IN_PROGRESS) {
|
||||||
|
inProgress.remove(topicPartition);
|
||||||
|
} else if (logCleaningState == LogCleaningState.LOG_CLEANING_ABORTED) {
|
||||||
|
inProgress.put(topicPartition, LogCleaningState.logCleaningPaused(1));
|
||||||
|
pausedCleaningCond.signalAll();
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException("In-progress partition " + topicPartition + " cannot be in " + logCleaningState + " state.");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an immutable set of the uncleanable partitions for a given log directory.
|
||||||
|
* Only used for testing.
|
||||||
|
*/
|
||||||
|
public Set<TopicPartition> uncleanablePartitions(String logDir) {
|
||||||
|
return inLock(lock, () -> {
|
||||||
|
Set<TopicPartition> partitions = uncleanablePartitions.get(logDir);
|
||||||
|
return partitions != null ? Set.copyOf(partitions) : Set.of();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void markPartitionUncleanable(String logDir, TopicPartition partition) {
|
||||||
|
inLock(lock, () -> {
|
||||||
|
Set<TopicPartition> partitions = uncleanablePartitions.computeIfAbsent(logDir, dir -> new HashSet<>());
|
||||||
|
partitions.add(partition);
|
||||||
|
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isUncleanablePartition(UnifiedLog log, TopicPartition topicPartition) {
|
||||||
|
return inLock(lock, () -> Optional.ofNullable(uncleanablePartitions.get(log.parentDir()))
|
||||||
|
.map(partitions -> partitions.contains(topicPartition))
|
||||||
|
.orElse(false)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void maintainUncleanablePartitions() {
|
||||||
|
// Remove deleted partitions from uncleanablePartitions
|
||||||
|
inLock(lock, () -> {
|
||||||
|
// Remove deleted partitions
|
||||||
|
uncleanablePartitions.values().forEach(partitions ->
|
||||||
|
partitions.removeIf(partition -> !logs.containsKey(partition)));
|
||||||
|
|
||||||
|
// Remove entries with empty partition set.
|
||||||
|
uncleanablePartitions.entrySet().removeIf(entry -> entry.getValue().isEmpty());
|
||||||
|
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void removeMetrics() {
|
||||||
|
GAUGE_METRIC_NAME_NO_TAG.forEach(metricsGroup::removeMetric);
|
||||||
|
gaugeMetricNameWithTag.forEach((metricName, tags) ->
|
||||||
|
tags.forEach(tag -> metricsGroup.removeMetric(metricName, tag)));
|
||||||
|
gaugeMetricNameWithTag.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isCompactAndDelete(UnifiedLog log) {
|
||||||
|
return log.config().compact && log.config().delete;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get max delay between the time when log is required to be compacted as determined
|
||||||
|
* by maxCompactionLagMs and the current time.
|
||||||
|
*/
|
||||||
|
private static long maxCompactionDelay(UnifiedLog log, long firstDirtyOffset, long now) {
|
||||||
|
List<LogSegment> dirtyNonActiveSegments = log.nonActiveLogSegmentsFrom(firstDirtyOffset);
|
||||||
|
Stream<Long> firstBatchTimestamps = log.getFirstBatchTimestampForSegments(dirtyNonActiveSegments).stream()
|
||||||
|
.filter(timestamp -> timestamp > 0);
|
||||||
|
|
||||||
|
long earliestDirtySegmentTimestamp = firstBatchTimestamps.min(Comparator.naturalOrder()).orElse(Long.MAX_VALUE);
|
||||||
|
|
||||||
|
long maxCompactionLagMs = Math.max(log.config().maxCompactionLagMs, 0L);
|
||||||
|
long cleanUntilTime = now - maxCompactionLagMs;
|
||||||
|
|
||||||
|
return earliestDirtySegmentTimestamp < cleanUntilTime ? cleanUntilTime - earliestDirtySegmentTimestamp : 0L;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the range of dirty offsets that can be cleaned.
|
||||||
|
*
|
||||||
|
* @param log the log
|
||||||
|
* @param lastCleanOffset the last checkpointed offset
|
||||||
|
* @param now the current time in milliseconds of the cleaning operation
|
||||||
|
* @return OffsetsToClean containing offsets for cleanable portion of log and whether the log checkpoint needs updating
|
||||||
|
* @throws IOException if an I/O error occurs
|
||||||
|
*/
|
||||||
|
public static OffsetsToClean cleanableOffsets(UnifiedLog log, Optional<Long> lastCleanOffset, long now) throws IOException {
|
||||||
|
// If the log segments are abnormally truncated and hence the checkpointed offset is no longer valid;
|
||||||
|
// reset to the log starting offset and log the error
|
||||||
|
|
||||||
|
long logStartOffset = log.logStartOffset();
|
||||||
|
long checkpointDirtyOffset = lastCleanOffset.orElse(logStartOffset);
|
||||||
|
|
||||||
|
long firstDirtyOffset;
|
||||||
|
boolean forceUpdateCheckpoint;
|
||||||
|
|
||||||
|
if (checkpointDirtyOffset < logStartOffset) {
|
||||||
|
// Don't bother with the warning if compact and delete are enabled.
|
||||||
|
if (!isCompactAndDelete(log))
|
||||||
|
LOG.warn("Resetting first dirty offset of {} to log start offset {} since the checkpointed offset {} is invalid.",
|
||||||
|
log.name(), logStartOffset, checkpointDirtyOffset);
|
||||||
|
|
||||||
|
firstDirtyOffset = logStartOffset;
|
||||||
|
forceUpdateCheckpoint = true;
|
||||||
|
} else if (checkpointDirtyOffset > log.logEndOffset()) {
|
||||||
|
// The dirty offset has gotten ahead of the log end offset. This could happen if there was data
|
||||||
|
// corruption at the end of the log. We conservatively assume that the full log needs cleaning.
|
||||||
|
LOG.warn("The last checkpoint dirty offset for partition {} is {}, " +
|
||||||
|
"which is larger than the log end offset {}. Resetting to the log start offset {}.",
|
||||||
|
log.name(), checkpointDirtyOffset, log.logEndOffset(), logStartOffset);
|
||||||
|
|
||||||
|
firstDirtyOffset = logStartOffset;
|
||||||
|
forceUpdateCheckpoint = true;
|
||||||
|
} else {
|
||||||
|
firstDirtyOffset = checkpointDirtyOffset;
|
||||||
|
forceUpdateCheckpoint = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
long minCompactionLagMs = Math.max(log.config().compactionLagMs, 0L);
|
||||||
|
|
||||||
|
// Find the first segment that cannot be cleaned. We cannot clean past:
|
||||||
|
// 1. The active segment
|
||||||
|
// 2. The last stable offset (including the high watermark)
|
||||||
|
// 3. Any segments closer to the head of the log than the minimum compaction lag time
|
||||||
|
long firstUncleanableDirtyOffset = Stream.of(
|
||||||
|
// we do not clean beyond the last stable offset
|
||||||
|
Optional.of(log.lastStableOffset()),
|
||||||
|
|
||||||
|
// the active segment is always uncleanable
|
||||||
|
Optional.of(log.activeSegment().baseOffset()),
|
||||||
|
|
||||||
|
// the first segment whose largest message timestamp is within a minimum time lag from now
|
||||||
|
minCompactionLagMs > 0 ? findFirstUncleanableSegment(log, firstDirtyOffset, now, minCompactionLagMs) : Optional.<Long>empty()
|
||||||
|
)
|
||||||
|
.flatMap(Optional::stream)
|
||||||
|
.min(Long::compare)
|
||||||
|
.orElseThrow(() -> new IllegalStateException("No uncleanable offset found"));
|
||||||
|
|
||||||
|
LOG.debug("Finding range of cleanable offsets for log={}. Last clean offset={} " +
|
||||||
|
"now={} => firstDirtyOffset={} firstUncleanableOffset={} activeSegment.baseOffset={}",
|
||||||
|
log.name(), lastCleanOffset, now, firstDirtyOffset, firstUncleanableDirtyOffset, log.activeSegment().baseOffset());
|
||||||
|
|
||||||
|
return new OffsetsToClean(firstDirtyOffset, Math.max(firstDirtyOffset, firstUncleanableDirtyOffset), forceUpdateCheckpoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given the first dirty offset and an uncleanable offset, calculates the total cleanable bytes for this log.
|
||||||
|
*
|
||||||
|
* @return the biggest uncleanable offset and the total amount of cleanable bytes
|
||||||
|
*/
|
||||||
|
public static Map.Entry<Long, Long> calculateCleanableBytes(UnifiedLog log, long firstDirtyOffset, long uncleanableOffset) {
|
||||||
|
List<LogSegment> nonActiveSegments = log.nonActiveLogSegmentsFrom(uncleanableOffset);
|
||||||
|
LogSegment firstUncleanableSegment = nonActiveSegments.isEmpty() ? log.activeSegment() : nonActiveSegments.get(0);
|
||||||
|
long firstUncleanableOffset = firstUncleanableSegment.baseOffset();
|
||||||
|
long cleanableBytes = log.logSegments(Math.min(firstDirtyOffset, firstUncleanableOffset), firstUncleanableOffset).stream()
|
||||||
|
.mapToLong(LogSegment::size)
|
||||||
|
.sum();
|
||||||
|
|
||||||
|
return Map.entry(firstUncleanableOffset, cleanableBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Optional<Long> findFirstUncleanableSegment(UnifiedLog log, long firstDirtyOffset, long now, long minCompactionLagMs) throws IOException {
|
||||||
|
List<LogSegment> dirtyNonActiveSegments = log.nonActiveLogSegmentsFrom(firstDirtyOffset);
|
||||||
|
|
||||||
|
for (LogSegment segment : dirtyNonActiveSegments) {
|
||||||
|
boolean isUncleanable = segment.largestTimestamp() > now - minCompactionLagMs;
|
||||||
|
|
||||||
|
LOG.debug("Checking if log segment may be cleaned: log='{}' segment.baseOffset={} " +
|
||||||
|
"segment.largestTimestamp={}; now - compactionLag={}; is uncleanable={}",
|
||||||
|
log.name(), segment.baseOffset(), segment.largestTimestamp(), now - minCompactionLagMs, isUncleanable);
|
||||||
|
|
||||||
|
if (isUncleanable) {
|
||||||
|
return Optional.of(segment.baseOffset());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper class for the range of cleanable dirty offsets of a log and whether to update the checkpoint associated with
|
||||||
|
* the log.
|
||||||
|
*
|
||||||
|
* @param firstDirtyOffset the lower (inclusive) offset to begin cleaning from
|
||||||
|
* @param firstUncleanableDirtyOffset the upper(exclusive) offset to clean to
|
||||||
|
* @param forceUpdateCheckpoint whether to update the checkpoint associated with this log. if true, checkpoint should be
|
||||||
|
* reset to firstDirtyOffset
|
||||||
|
*/
|
||||||
|
public record OffsetsToClean(long firstDirtyOffset, long firstUncleanableDirtyOffset,
|
||||||
|
boolean forceUpdateCheckpoint) {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.kafka.storage.internals.log;
|
||||||
|
|
||||||
|
import org.apache.kafka.common.KafkaException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An exception indicating a failure during log cleaning operations.
|
||||||
|
* This exception typically wraps the root cause of the cleaning failure and provides
|
||||||
|
* additional context about the partition and log being cleaned.
|
||||||
|
*/
|
||||||
|
public class LogCleaningException extends KafkaException {
|
||||||
|
public final UnifiedLog log;
|
||||||
|
|
||||||
|
public LogCleaningException(UnifiedLog log, String message, Throwable cause) {
|
||||||
|
super(message, cause);
|
||||||
|
|
||||||
|
this.log = log;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,72 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.kafka.storage.internals.log;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* LogCleaningState defines the cleaning states that a TopicPartition can be in.
|
||||||
|
*/
|
||||||
|
public sealed interface LogCleaningState {
|
||||||
|
LogCleaningInProgress LOG_CLEANING_IN_PROGRESS = new LogCleaningInProgress();
|
||||||
|
|
||||||
|
LogCleaningAborted LOG_CLEANING_ABORTED = new LogCleaningAborted();
|
||||||
|
|
||||||
|
static LogCleaningPaused logCleaningPaused(int pausedCount) {
|
||||||
|
return new LogCleaningPaused(pausedCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
final class LogCleaningInProgress implements LogCleaningState {
|
||||||
|
private LogCleaningInProgress() {}
|
||||||
|
}
|
||||||
|
|
||||||
|
final class LogCleaningAborted implements LogCleaningState {
|
||||||
|
private LogCleaningAborted() {}
|
||||||
|
}
|
||||||
|
|
||||||
|
final class LogCleaningPaused implements LogCleaningState {
|
||||||
|
private final int pausedCount;
|
||||||
|
|
||||||
|
private LogCleaningPaused(int pausedCount) {
|
||||||
|
this.pausedCount = pausedCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int pausedCount() {
|
||||||
|
return pausedCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
LogCleaningPaused that = (LogCleaningPaused) o;
|
||||||
|
return pausedCount == that.pausedCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hashCode(pausedCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "LogCleaningPaused{" +
|
||||||
|
"pausedCount=" + pausedCount +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,136 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.kafka.storage.internals.log;
|
||||||
|
|
||||||
|
import org.apache.kafka.common.TopicPartition;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper class for a log, its topic/partition, the first cleanable position, the first uncleanable dirty position,
|
||||||
|
* and whether it needs compaction immediately.
|
||||||
|
*/
|
||||||
|
public final class LogToClean implements Comparable<LogToClean> {
|
||||||
|
private final TopicPartition topicPartition;
|
||||||
|
private final UnifiedLog log;
|
||||||
|
private final long firstDirtyOffset;
|
||||||
|
private final boolean needCompactionNow;
|
||||||
|
private final long cleanBytes;
|
||||||
|
private final long firstUncleanableOffset;
|
||||||
|
private final long cleanableBytes;
|
||||||
|
private final long totalBytes;
|
||||||
|
private final double cleanableRatio;
|
||||||
|
|
||||||
|
public LogToClean(UnifiedLog log, long firstDirtyOffset, long uncleanableOffset, boolean needCompactionNow) {
|
||||||
|
this.log = log;
|
||||||
|
this.topicPartition = log.topicPartition();
|
||||||
|
this.firstDirtyOffset = firstDirtyOffset;
|
||||||
|
this.needCompactionNow = needCompactionNow;
|
||||||
|
|
||||||
|
this.cleanBytes = log.logSegments(-1, firstDirtyOffset).stream()
|
||||||
|
.mapToLong(LogSegment::size)
|
||||||
|
.sum();
|
||||||
|
|
||||||
|
Map.Entry<Long, Long> cleanableBytesResult = LogCleanerManager.calculateCleanableBytes(log, firstDirtyOffset, uncleanableOffset);
|
||||||
|
this.firstUncleanableOffset = cleanableBytesResult.getKey();
|
||||||
|
this.cleanableBytes = cleanableBytesResult.getValue();
|
||||||
|
|
||||||
|
this.totalBytes = cleanBytes + cleanableBytes;
|
||||||
|
this.cleanableRatio = (double) cleanableBytes / totalBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TopicPartition topicPartition() {
|
||||||
|
return topicPartition;
|
||||||
|
}
|
||||||
|
|
||||||
|
public UnifiedLog log() {
|
||||||
|
return log;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long firstDirtyOffset() {
|
||||||
|
return firstDirtyOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean needCompactionNow() {
|
||||||
|
return needCompactionNow;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long cleanBytes() {
|
||||||
|
return cleanBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long firstUncleanableOffset() {
|
||||||
|
return firstUncleanableOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long cleanableBytes() {
|
||||||
|
return cleanableBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long totalBytes() {
|
||||||
|
return totalBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double cleanableRatio() {
|
||||||
|
return cleanableRatio;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(LogToClean that) {
|
||||||
|
return Double.compare(this.cleanableRatio, that.cleanableRatio);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
LogToClean that = (LogToClean) o;
|
||||||
|
return firstDirtyOffset == that.firstDirtyOffset &&
|
||||||
|
needCompactionNow == that.needCompactionNow &&
|
||||||
|
cleanBytes == that.cleanBytes &&
|
||||||
|
firstUncleanableOffset == that.firstUncleanableOffset &&
|
||||||
|
cleanableBytes == that.cleanableBytes &&
|
||||||
|
totalBytes == that.totalBytes &&
|
||||||
|
Double.compare(that.cleanableRatio, cleanableRatio) == 0 &&
|
||||||
|
topicPartition.equals(that.topicPartition) &&
|
||||||
|
log.equals(that.log);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(
|
||||||
|
topicPartition, log, firstDirtyOffset, needCompactionNow, cleanBytes,
|
||||||
|
firstUncleanableOffset, cleanableBytes, totalBytes, cleanableRatio
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "LogToClean{" +
|
||||||
|
"topicPartition=" + topicPartition +
|
||||||
|
", log=" + log +
|
||||||
|
", firstDirtyOffset=" + firstDirtyOffset +
|
||||||
|
", needCompactionNow=" + needCompactionNow +
|
||||||
|
", cleanBytes=" + cleanBytes +
|
||||||
|
", firstUncleanableOffset=" + firstUncleanableOffset +
|
||||||
|
", cleanableBytes=" + cleanableBytes +
|
||||||
|
", totalBytes=" + totalBytes +
|
||||||
|
", cleanableRatio=" + cleanableRatio +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.kafka.storage.internals.log;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple struct for collecting pre-clean stats.
|
||||||
|
*/
|
||||||
|
public class PreCleanStats {
|
||||||
|
private long maxCompactionDelayMs = 0L;
|
||||||
|
private int delayedPartitions = 0;
|
||||||
|
private int cleanablePartitions = 0;
|
||||||
|
|
||||||
|
public void updateMaxCompactionDelay(long delayMs) {
|
||||||
|
maxCompactionDelayMs = Math.max(maxCompactionDelayMs, delayMs);
|
||||||
|
if (delayMs > 0) {
|
||||||
|
delayedPartitions++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void recordCleanablePartitions(int numOfCleanables) {
|
||||||
|
cleanablePartitions = numOfCleanables;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int cleanablePartitions() {
|
||||||
|
return cleanablePartitions;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int delayedPartitions() {
|
||||||
|
return delayedPartitions;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long maxCompactionDelayMs() {
|
||||||
|
return maxCompactionDelayMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// for testing
|
||||||
|
public void maxCompactionDelayMs(long maxCompactionDelayMs) {
|
||||||
|
this.maxCompactionDelayMs = maxCompactionDelayMs;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue