From fe3f0aa252158bd45eb0bd68e21b362bf865532a Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Mon, 28 Jul 2025 10:53:57 +0800 Subject: [PATCH] Fix some daily CI issues (#14217) 1) Fix the timeout of `Active defrag big keys: standalone` Using a pipe to write commands may cause the write to block if the read buffer becomes full. 2) Fix the failure of `Main db not affected when fail to diskless load` test If the master was killed in slow environment, then after `cluster-node-timeout` (3s in our test), running keyspace commands on the replica will get a CLUSTERDOWN error. 3) Fix the failure of `Test shutdown hook` test ASAN can intercept a signal, so I guess that when we send SIGCONT after SIGTERM to kill the server, it might start doing some work again, causing the process to close very slowly. --- .../cluster/tests/17-diskless-load-swapdb.tcl | 11 +++++-- tests/instances.tcl | 3 +- tests/support/server.tcl | 3 +- tests/unit/memefficiency.tcl | 33 ++++++++++++++----- 4 files changed, 37 insertions(+), 13 deletions(-) diff --git a/tests/cluster/tests/17-diskless-load-swapdb.tcl b/tests/cluster/tests/17-diskless-load-swapdb.tcl index 7a56ec783..cb81b9fdb 100644 --- a/tests/cluster/tests/17-diskless-load-swapdb.tcl +++ b/tests/cluster/tests/17-diskless-load-swapdb.tcl @@ -80,7 +80,14 @@ test "Main db not affected when fail to diskless load" { fail "Fail to full sync" } - # Replica keys and keys to slots map still both are right - assert_equal {1} [$replica get $slot0_key] + # Replica keys and keys to slots map still both are right. + # CLUSTERDOWN errors are acceptable here because the cluster may be in a transient state + # due to the timing relationship with cluster-node-timeout. + if {[catch {$replica get $slot0_key} result]} { + assert_match "*CLUSTERDOWN*" $result + } else { + assert_equal {1} $result + } + assert_equal $slot0_key [$replica CLUSTER GETKEYSINSLOT 0 1] } diff --git a/tests/instances.tcl b/tests/instances.tcl index 7406f14c1..05b8507a1 100644 --- a/tests/instances.tcl +++ b/tests/instances.tcl @@ -221,9 +221,10 @@ proc is_alive pid { } proc stop_instance pid { - catch {exec kill $pid} # Node might have been stopped in the test + # Send SIGCONT before SIGTERM, otherwise shutdown may be slow with ASAN. catch {exec kill -SIGCONT $pid} + catch {exec kill $pid} if {$::valgrind} { set max_wait 120000 } else { diff --git a/tests/support/server.tcl b/tests/support/server.tcl index 9640fa547..47e347be4 100644 --- a/tests/support/server.tcl +++ b/tests/support/server.tcl @@ -95,9 +95,10 @@ proc kill_server config { # kill server and wait for the process to be totally exited send_data_packet $::test_server_fd server-killing $pid - catch {exec kill $pid} # Node might have been stopped in the test + # Send SIGCONT before SIGTERM, otherwise shutdown may be slow with ASAN. catch {exec kill -SIGCONT $pid} + catch {exec kill $pid} if {$::valgrind} { set max_wait 120000 } else { diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl index af516f7fe..71d0e511d 100644 --- a/tests/unit/memefficiency.tcl +++ b/tests/unit/memefficiency.tcl @@ -334,31 +334,46 @@ run_solo {defrag} { set expected_frag 1.49 if {$::accurate} { # scale the hash to 1m fields in order to have a measurable the latency + set count 0 for {set j 10000} {$j < 1000000} {incr j} { $rd hset bighash $j [concat "asdfasdfasdf" $j] - } - for {set j 10000} {$j < 1000000} {incr j} { - $rd read ; # Discard replies + + incr count + if {$count % 10000 == 0} { + for {set k 0} {$k < 10000} {incr k} { + $rd read ; # Discard replies + } + } } # creating that big hash, increased used_memory, so the relative frag goes down set expected_frag 1.3 } # add a mass of string keys + set count 0 for {set j 0} {$j < 500000} {incr j} { $rd setrange $j 150 a - } - for {set j 0} {$j < 500000} {incr j} { - $rd read ; # Discard replies + + incr count + if {$count % 10000 == 0} { + for {set k 0} {$k < 10000} {incr k} { + $rd read ; # Discard replies + } + } } assert_equal [r dbsize] 500016 # create some fragmentation + set count 0 for {set j 0} {$j < 500000} {incr j 2} { $rd del $j - } - for {set j 0} {$j < 500000} {incr j 2} { - $rd read ; # Discard replies + + incr count + if {$count % 10000 == 0} { + for {set k 0} {$k < 10000} {incr k} { + $rd read ; # Discard replies + } + } } assert_equal [r dbsize] 250016