KAFKA-19334 MetadataShell execution unintentionally deletes lock file (#19817)

## Summary
- MetadataShell may deletes lock file unintentionally when it exists or
fails to acquire lock. If there's running server, this causes unexpected
result as below:
  * MetadataShell succeeds on 2nd run unexpectedly
  * Even worse, LogManager/RaftManager's lock also no longer work from
concurrent Kafka process startup

Reviewers: TengYao Chi <frankvicky@apache.org>
This commit is contained in:
Okada Haruki 2025-06-09 13:24:26 +09:00 committed by GitHub
parent df73133f3b
commit e2500186cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 21 additions and 9 deletions

View File

@ -91,4 +91,12 @@ public class FileLock {
} }
channel.close(); channel.close();
} }
/**
* Unlock the file and close the associated FileChannel
*/
public synchronized void unlockAndClose() throws IOException {
unlock();
channel.close();
}
} }

View File

@ -114,7 +114,7 @@ public final class MetadataShell {
"directory before proceeding."); "directory before proceeding.");
} }
} catch (Throwable e) { } catch (Throwable e) {
fileLock.destroy(); fileLock.unlockAndClose();
throw e; throw e;
} }
return fileLock; return fileLock;
@ -186,9 +186,9 @@ public final class MetadataShell {
Utils.closeQuietly(snapshotFileReader, "snapshotFileReader"); Utils.closeQuietly(snapshotFileReader, "snapshotFileReader");
if (fileLock != null) { if (fileLock != null) {
try { try {
fileLock.destroy(); fileLock.unlockAndClose();
} catch (Exception e) { } catch (Exception e) {
log.error("Error destroying fileLock", e); log.error("Error cleaning up fileLock", e);
} finally { } finally {
fileLock = null; fileLock = null;
} }

View File

@ -97,12 +97,16 @@ public class MetadataShellIntegrationTest {
FileLock fileLock = new FileLock(new File(env.tempDir, ".lock")); FileLock fileLock = new FileLock(new File(env.tempDir, ".lock"));
try { try {
fileLock.lock(); fileLock.lock();
assertEquals("Unable to lock " + env.tempDir.getAbsolutePath() + // We had a bug where the shell can lock the directory unintentionally
". Please ensure that no broker or controller process is using this " + // at the 2nd run, so we check that it fails (See KAFKA-19334)
"directory before proceeding.", for (int i = 0; i < 2; i++) {
assertThrows(RuntimeException.class, assertEquals("Unable to lock " + env.tempDir.getAbsolutePath() +
() -> env.shell.run(List.of())). ". Please ensure that no broker or controller process is using this " +
getMessage()); "directory before proceeding.",
assertThrows(RuntimeException.class,
() -> env.shell.run(List.of())).
getMessage());
}
} finally { } finally {
fileLock.destroy(); fileLock.destroy();
} }