Speed up `Jenkins._cleanUpDisconnectComputers`

This commit is contained in:
Jesse Glick 2025-09-24 15:26:51 -04:00
parent 59691663a5
commit cfebcec2e8
No known key found for this signature in database
GPG Key ID: 1DDA69D94B624311
3 changed files with 13 additions and 27 deletions

View File

@ -94,10 +94,6 @@ public abstract class AbstractCIBase extends Node implements ItemGroup<TopLevelI
ViewJob.interruptReloadThread();
}
protected void killComputer(Computer c) {
c.kill();
}
private final Set<String> disabledAdministrativeMonitors = new HashSet<>();
/**
@ -267,12 +263,12 @@ public abstract class AbstractCIBase extends Node implements ItemGroup<TopLevelI
// we need to start the process of reducing the executors on all computers as distinct
// from the killing action which should not excessively use the Queue lock.
for (Computer c : old) {
c.inflictMortalWound();
c.setNumExecutors(0);
}
});
for (Computer c : old) {
// when we get to here, the number of executors should be zero so this call should not need the Queue.lock
killComputer(c);
c.kill();
}
getQueue().scheduleMaintenance();
Listeners.notify(ComputerListener.class, false, ComputerListener::onConfigurationChange);

View File

@ -827,23 +827,6 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces
setNumExecutors(0);
}
/**
* Called by {@link Jenkins#updateComputerList(boolean, Collection)} to notify {@link Computer} that it will be discarded.
*
* <p>
* Note that at this point {@link #getNode()} returns null.
*
* <p>
* Note that the Queue lock is already held when this method is called.
*
* @see #onRemoved()
*/
@Restricted(NoExternalUse.class)
@GuardedBy("hudson.model.Queue.lock")
/*package*/ void inflictMortalWound() {
setNumExecutors(0);
}
/**
* Called by {@link Jenkins} when this computer is removed.
*
@ -865,7 +848,7 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces
* Calling path, *means protected by Queue.withLock
*
* Computer.doConfigSubmit -> Computer.replaceBy ->Jenkins.setNodes* ->Computer.setNode
* AbstractCIBase.updateComputerList->Computer.inflictMortalWound*
* AbstractCIBase.updateComputerList->Computer.setNumExecutors*
* AbstractCIBase.updateComputerList->AbstractCIBase.updateComputer* ->Computer.setNode
* AbstractCIBase.updateComputerList->AbstractCIBase.killComputer->Computer.kill
* Computer.constructor->Computer.setNode
@ -873,8 +856,9 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces
*
* @param n number of executors
*/
@Restricted(NoExternalUse.class)
@GuardedBy("hudson.model.Queue.lock")
private void setNumExecutors(int n) {
public void setNumExecutors(int n) {
this.numExecutors = n;
final int diff = executors.size() - n;

View File

@ -3775,7 +3775,7 @@ public class Jenkins extends AbstractCIBase implements DirectlyModifiableTopLeve
for (Computer c : getComputersCollection()) {
try {
c.interrupt();
killComputer(c);
c.setNumExecutors(0);
pending.add(c.disconnect(null));
} catch (OutOfMemoryError e) {
// we should just propagate this, no point trying to log
@ -3950,9 +3950,15 @@ public class Jenkins extends AbstractCIBase implements DirectlyModifiableTopLeve
if (!pending.isEmpty()) {
LOGGER.log(Main.isUnitTest ? Level.FINE : Level.INFO, "Waiting for node disconnection completion");
}
long end = System.nanoTime() + Duration.ofSeconds(10).toNanos();
for (Future<?> f : pending) {
try {
f.get(10, TimeUnit.SECONDS); // if clean up operation didn't complete in time, we fail the test
long remaining = end - System.nanoTime();
if (remaining <= 0) {
LOGGER.warning("Ran out of time waiting for agents to disconnect");
break;
}
f.get(remaining, TimeUnit.NANOSECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break; // someone wants us to die now. quick!