Fix some daily CI issues (#14217)

1) Fix the timeout of `Active defrag big keys: standalone` Using a pipe to write commands may cause the write to block if the read buffer becomes full. 2) Fix the failure of `Main db not affected when fail to diskless load` test If the master was killed in slow environment, then after `cluster-node-timeout` (3s in our test), running keyspace commands on the replica will get a CLUSTERDOWN error. 3) Fix the failure of `Test shutdown hook` test ASAN can intercept a signal, so I guess that when we send SIGCONT after SIGTERM to kill the server, it might start doing some work again, causing the process to close very slowly.
2025-07-28 10:53:57 +08:00 · 2025-07-28 10:53:57 +08:00 · fe3f0aa252
parent ecd5e639ed
commit fe3f0aa252
4 changed files with 37 additions and 13 deletions
--- a/tests/cluster/tests/17-diskless-load-swapdb.tcl
+++ b/tests/cluster/tests/17-diskless-load-swapdb.tcl
@ -80,7 +80,14 @@ test "Main db not affected when fail to diskless load" {
        fail "Fail to full sync"
    }

-    # Replica keys and keys to slots map still both are right
-    assert_equal {1} [$replica get $slot0_key]
+    # Replica keys and keys to slots map still both are right.
+    # CLUSTERDOWN errors are acceptable here because the cluster may be in a transient state
+    # due to the timing relationship with cluster-node-timeout.
+    if {[catch {$replica get $slot0_key} result]} {
+        assert_match "*CLUSTERDOWN*" $result
+    } else {
+        assert_equal {1} $result
+    }
+
    assert_equal $slot0_key [$replica CLUSTER GETKEYSINSLOT 0 1]
 }
--- a/tests/instances.tcl
+++ b/tests/instances.tcl
@ -221,9 +221,10 @@ proc is_alive pid {
 }

 proc stop_instance pid {
-    catch {exec kill $pid}
    # Node might have been stopped in the test
+    # Send SIGCONT before SIGTERM, otherwise shutdown may be slow with ASAN.
    catch {exec kill -SIGCONT $pid}
+    catch {exec kill $pid}
    if {$::valgrind} {
        set max_wait 120000
    } else {
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@ -95,9 +95,10 @@ proc kill_server config {

    # kill server and wait for the process to be totally exited
    send_data_packet $::test_server_fd server-killing $pid
-    catch {exec kill $pid}
    # Node might have been stopped in the test
+    # Send SIGCONT before SIGTERM, otherwise shutdown may be slow with ASAN.
    catch {exec kill -SIGCONT $pid}
+    catch {exec kill $pid}
    if {$::valgrind} {
        set max_wait 120000
    } else {
--- a/tests/unit/memefficiency.tcl
+++ b/tests/unit/memefficiency.tcl
@ -334,32 +334,47 @@ run_solo {defrag} {
            set expected_frag 1.49
            if {$::accurate} {
                # scale the hash to 1m fields in order to have a measurable the latency
+                set count 0
                for {set j 10000} {$j < 1000000} {incr j} {
                    $rd hset bighash $j [concat "asdfasdfasdf" $j]
-                }
-                for {set j 10000} {$j < 1000000} {incr j} {
+
+                    incr count
+                    if {$count % 10000 == 0} {
+                        for {set k 0} {$k < 10000} {incr k} {
                            $rd read ; # Discard replies
                        }
+                    }
+                }
                # creating that big hash, increased used_memory, so the relative frag goes down
                set expected_frag 1.3
            }

            # add a mass of string keys
+            set count 0
            for {set j 0} {$j < 500000} {incr j} {
                $rd setrange $j 150 a
-            }
-            for {set j 0} {$j < 500000} {incr j} {
+
+                incr count
+                if {$count % 10000 == 0} {
+                    for {set k 0} {$k < 10000} {incr k} {
                        $rd read ; # Discard replies
                    }
+                }
+            }
            assert_equal [r dbsize] 500016

            # create some fragmentation
+            set count 0
            for {set j 0} {$j < 500000} {incr j 2} {
                $rd del $j
-            }
-            for {set j 0} {$j < 500000} {incr j 2} {
+
+                incr count
+                if {$count % 10000 == 0} {
+                    for {set k 0} {$k < 10000} {incr k} {
                        $rd read ; # Discard replies
                    }
+                }
+            }
            assert_equal [r dbsize] 250016

            # start defrag