KAFKA-19334 MetadataShell execution unintentionally deletes lock file (#19817)

## Summary
- MetadataShell may deletes lock file unintentionally when it exists or
fails to acquire lock. If there's running server, this causes unexpected
result as below:
  * MetadataShell succeeds on 2nd run unexpectedly
  * Even worse, LogManager/RaftManager's lock also no longer work from
concurrent Kafka process startup

Reviewers: TengYao Chi <frankvicky@apache.org>
This commit is contained in:
Okada Haruki 2025-06-09 13:24:26 +09:00 committed by GitHub
parent df73133f3b
commit e2500186cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 21 additions and 9 deletions

View File

@ -91,4 +91,12 @@ public class FileLock {
}
channel.close();
}
/**
* Unlock the file and close the associated FileChannel
*/
public synchronized void unlockAndClose() throws IOException {
unlock();
channel.close();
}
}

View File

@ -114,7 +114,7 @@ public final class MetadataShell {
"directory before proceeding.");
}
} catch (Throwable e) {
fileLock.destroy();
fileLock.unlockAndClose();
throw e;
}
return fileLock;
@ -186,9 +186,9 @@ public final class MetadataShell {
Utils.closeQuietly(snapshotFileReader, "snapshotFileReader");
if (fileLock != null) {
try {
fileLock.destroy();
fileLock.unlockAndClose();
} catch (Exception e) {
log.error("Error destroying fileLock", e);
log.error("Error cleaning up fileLock", e);
} finally {
fileLock = null;
}

View File

@ -97,12 +97,16 @@ public class MetadataShellIntegrationTest {
FileLock fileLock = new FileLock(new File(env.tempDir, ".lock"));
try {
fileLock.lock();
assertEquals("Unable to lock " + env.tempDir.getAbsolutePath() +
". Please ensure that no broker or controller process is using this " +
"directory before proceeding.",
assertThrows(RuntimeException.class,
() -> env.shell.run(List.of())).
getMessage());
// We had a bug where the shell can lock the directory unintentionally
// at the 2nd run, so we check that it fails (See KAFKA-19334)
for (int i = 0; i < 2; i++) {
assertEquals("Unable to lock " + env.tempDir.getAbsolutePath() +
". Please ensure that no broker or controller process is using this " +
"directory before proceeding.",
assertThrows(RuntimeException.class,
() -> env.shell.run(List.of())).
getMessage());
}
} finally {
fileLock.destroy();
}