Speed up `Jenkins._cleanUpDisconnectComputers`

This commit is contained in:
Jesse Glick 2025-09-24 15:26:51 -04:00
parent 59691663a5
commit cfebcec2e8
No known key found for this signature in database
GPG Key ID: 1DDA69D94B624311
3 changed files with 13 additions and 27 deletions

View File

@ -94,10 +94,6 @@ public abstract class AbstractCIBase extends Node implements ItemGroup<TopLevelI
ViewJob.interruptReloadThread(); ViewJob.interruptReloadThread();
} }
protected void killComputer(Computer c) {
c.kill();
}
private final Set<String> disabledAdministrativeMonitors = new HashSet<>(); private final Set<String> disabledAdministrativeMonitors = new HashSet<>();
/** /**
@ -267,12 +263,12 @@ public abstract class AbstractCIBase extends Node implements ItemGroup<TopLevelI
// we need to start the process of reducing the executors on all computers as distinct // we need to start the process of reducing the executors on all computers as distinct
// from the killing action which should not excessively use the Queue lock. // from the killing action which should not excessively use the Queue lock.
for (Computer c : old) { for (Computer c : old) {
c.inflictMortalWound(); c.setNumExecutors(0);
} }
}); });
for (Computer c : old) { for (Computer c : old) {
// when we get to here, the number of executors should be zero so this call should not need the Queue.lock // when we get to here, the number of executors should be zero so this call should not need the Queue.lock
killComputer(c); c.kill();
} }
getQueue().scheduleMaintenance(); getQueue().scheduleMaintenance();
Listeners.notify(ComputerListener.class, false, ComputerListener::onConfigurationChange); Listeners.notify(ComputerListener.class, false, ComputerListener::onConfigurationChange);

View File

@ -827,23 +827,6 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces
setNumExecutors(0); setNumExecutors(0);
} }
/**
* Called by {@link Jenkins#updateComputerList(boolean, Collection)} to notify {@link Computer} that it will be discarded.
*
* <p>
* Note that at this point {@link #getNode()} returns null.
*
* <p>
* Note that the Queue lock is already held when this method is called.
*
* @see #onRemoved()
*/
@Restricted(NoExternalUse.class)
@GuardedBy("hudson.model.Queue.lock")
/*package*/ void inflictMortalWound() {
setNumExecutors(0);
}
/** /**
* Called by {@link Jenkins} when this computer is removed. * Called by {@link Jenkins} when this computer is removed.
* *
@ -865,7 +848,7 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces
* Calling path, *means protected by Queue.withLock * Calling path, *means protected by Queue.withLock
* *
* Computer.doConfigSubmit -> Computer.replaceBy ->Jenkins.setNodes* ->Computer.setNode * Computer.doConfigSubmit -> Computer.replaceBy ->Jenkins.setNodes* ->Computer.setNode
* AbstractCIBase.updateComputerList->Computer.inflictMortalWound* * AbstractCIBase.updateComputerList->Computer.setNumExecutors*
* AbstractCIBase.updateComputerList->AbstractCIBase.updateComputer* ->Computer.setNode * AbstractCIBase.updateComputerList->AbstractCIBase.updateComputer* ->Computer.setNode
* AbstractCIBase.updateComputerList->AbstractCIBase.killComputer->Computer.kill * AbstractCIBase.updateComputerList->AbstractCIBase.killComputer->Computer.kill
* Computer.constructor->Computer.setNode * Computer.constructor->Computer.setNode
@ -873,8 +856,9 @@ public /*transient*/ abstract class Computer extends Actionable implements Acces
* *
* @param n number of executors * @param n number of executors
*/ */
@Restricted(NoExternalUse.class)
@GuardedBy("hudson.model.Queue.lock") @GuardedBy("hudson.model.Queue.lock")
private void setNumExecutors(int n) { public void setNumExecutors(int n) {
this.numExecutors = n; this.numExecutors = n;
final int diff = executors.size() - n; final int diff = executors.size() - n;

View File

@ -3775,7 +3775,7 @@ public class Jenkins extends AbstractCIBase implements DirectlyModifiableTopLeve
for (Computer c : getComputersCollection()) { for (Computer c : getComputersCollection()) {
try { try {
c.interrupt(); c.interrupt();
killComputer(c); c.setNumExecutors(0);
pending.add(c.disconnect(null)); pending.add(c.disconnect(null));
} catch (OutOfMemoryError e) { } catch (OutOfMemoryError e) {
// we should just propagate this, no point trying to log // we should just propagate this, no point trying to log
@ -3950,9 +3950,15 @@ public class Jenkins extends AbstractCIBase implements DirectlyModifiableTopLeve
if (!pending.isEmpty()) { if (!pending.isEmpty()) {
LOGGER.log(Main.isUnitTest ? Level.FINE : Level.INFO, "Waiting for node disconnection completion"); LOGGER.log(Main.isUnitTest ? Level.FINE : Level.INFO, "Waiting for node disconnection completion");
} }
long end = System.nanoTime() + Duration.ofSeconds(10).toNanos();
for (Future<?> f : pending) { for (Future<?> f : pending) {
try { try {
f.get(10, TimeUnit.SECONDS); // if clean up operation didn't complete in time, we fail the test long remaining = end - System.nanoTime();
if (remaining <= 0) {
LOGGER.warning("Ran out of time waiting for agents to disconnect");
break;
}
f.get(remaining, TimeUnit.NANOSECONDS);
} catch (InterruptedException e) { } catch (InterruptedException e) {
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
break; // someone wants us to die now. quick! break; // someone wants us to die now. quick!