KAFKA-19334 MetadataShell execution unintentionally deletes lock file (#19817)
CI / build (push) Has been cancelled Details

## Summary
- MetadataShell may deletes lock file unintentionally when it exists or
fails to acquire lock. If there's running server, this causes unexpected
result as below:
  * MetadataShell succeeds on 2nd run unexpectedly
  * Even worse, LogManager/RaftManager's lock also no longer work from
concurrent Kafka process startup

Reviewers: TengYao Chi <frankvicky@apache.org>
# Conflicts:
#	shell/src/test/java/org/apache/kafka/shell/MetadataShellIntegrationTest.java
This commit is contained in:
Okada Haruki 2025-06-09 13:24:26 +09:00 committed by frankvicky
parent ded7653066
commit 1cc14f6343
3 changed files with 23 additions and 11 deletions

View File

@ -91,4 +91,12 @@ public class FileLock {
} }
channel.close(); channel.close();
} }
/**
* Unlock the file and close the associated FileChannel
*/
public synchronized void unlockAndClose() throws IOException {
unlock();
channel.close();
}
} }

View File

@ -128,7 +128,7 @@ public final class MetadataShell {
"directory before proceeding."); "directory before proceeding.");
} }
} catch (Throwable e) { } catch (Throwable e) {
fileLock.destroy(); fileLock.unlockAndClose();
throw e; throw e;
} }
return fileLock; return fileLock;
@ -232,9 +232,9 @@ public final class MetadataShell {
Utils.closeQuietly(snapshotFileReader, "raftManager"); Utils.closeQuietly(snapshotFileReader, "raftManager");
if (fileLock != null) { if (fileLock != null) {
try { try {
fileLock.destroy(); fileLock.unlockAndClose();
} catch (Exception e) { } catch (Exception e) {
log.error("Error destroying fileLock", e); log.error("Error cleaning up fileLock", e);
} finally { } finally {
fileLock = null; fileLock = null;
} }

View File

@ -33,7 +33,7 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.NoSuchFileException; import java.nio.file.NoSuchFileException;
import java.util.Collections; import java.util.List;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
@ -91,18 +91,22 @@ public class MetadataShellIntegrationTest {
if (canLock) { if (canLock) {
assertEquals(NoSuchFileException.class, assertEquals(NoSuchFileException.class,
assertThrows(ExecutionException.class, assertThrows(ExecutionException.class,
() -> env.shell.run(Collections.emptyList())). () -> env.shell.run(List.of())).
getCause().getClass()); getCause().getClass());
} else { } else {
FileLock fileLock = new FileLock(new File(env.tempDir, ".lock")); FileLock fileLock = new FileLock(new File(env.tempDir, ".lock"));
try { try {
fileLock.lock(); fileLock.lock();
assertEquals("Unable to lock " + env.tempDir.getAbsolutePath() + // We had a bug where the shell can lock the directory unintentionally
". Please ensure that no broker or controller process is using this " + // at the 2nd run, so we check that it fails (See KAFKA-19334)
"directory before proceeding.", for (int i = 0; i < 2; i++) {
assertThrows(RuntimeException.class, assertEquals("Unable to lock " + env.tempDir.getAbsolutePath() +
() -> env.shell.run(Collections.emptyList())). ". Please ensure that no broker or controller process is using this " +
getMessage()); "directory before proceeding.",
assertThrows(RuntimeException.class,
() -> env.shell.run(List.of())).
getMessage());
}
} finally { } finally {
fileLock.destroy(); fileLock.destroy();
} }