KAFKA-19334 MetadataShell execution unintentionally deletes lock file (#19817)
CI / build (push) Has been cancelled Details

## Summary
- MetadataShell may deletes lock file unintentionally when it exists or
fails to acquire lock. If there's running server, this causes unexpected
result as below:
  * MetadataShell succeeds on 2nd run unexpectedly
  * Even worse, LogManager/RaftManager's lock also no longer work from
concurrent Kafka process startup

Reviewers: TengYao Chi <frankvicky@apache.org>
# Conflicts:
#	shell/src/test/java/org/apache/kafka/shell/MetadataShellIntegrationTest.java
This commit is contained in:
Okada Haruki 2025-06-09 13:24:26 +09:00 committed by frankvicky
parent ded7653066
commit 1cc14f6343
3 changed files with 23 additions and 11 deletions

View File

@ -91,4 +91,12 @@ public class FileLock {
}
channel.close();
}
/**
* Unlock the file and close the associated FileChannel
*/
public synchronized void unlockAndClose() throws IOException {
unlock();
channel.close();
}
}

View File

@ -128,7 +128,7 @@ public final class MetadataShell {
"directory before proceeding.");
}
} catch (Throwable e) {
fileLock.destroy();
fileLock.unlockAndClose();
throw e;
}
return fileLock;
@ -232,9 +232,9 @@ public final class MetadataShell {
Utils.closeQuietly(snapshotFileReader, "raftManager");
if (fileLock != null) {
try {
fileLock.destroy();
fileLock.unlockAndClose();
} catch (Exception e) {
log.error("Error destroying fileLock", e);
log.error("Error cleaning up fileLock", e);
} finally {
fileLock = null;
}

View File

@ -33,7 +33,7 @@ import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import static org.junit.jupiter.api.Assertions.assertEquals;
@ -91,18 +91,22 @@ public class MetadataShellIntegrationTest {
if (canLock) {
assertEquals(NoSuchFileException.class,
assertThrows(ExecutionException.class,
() -> env.shell.run(Collections.emptyList())).
() -> env.shell.run(List.of())).
getCause().getClass());
} else {
FileLock fileLock = new FileLock(new File(env.tempDir, ".lock"));
try {
fileLock.lock();
// We had a bug where the shell can lock the directory unintentionally
// at the 2nd run, so we check that it fails (See KAFKA-19334)
for (int i = 0; i < 2; i++) {
assertEquals("Unable to lock " + env.tempDir.getAbsolutePath() +
". Please ensure that no broker or controller process is using this " +
"directory before proceeding.",
assertThrows(RuntimeException.class,
() -> env.shell.run(Collections.emptyList())).
() -> env.shell.run(List.of())).
getMessage());
}
} finally {
fileLock.destroy();
}