mirror of https://github.com/redis/redis.git
				
				
				
			make replication tests more stable on slow machines
solving few replication related tests race conditions which fail on slow machines bugfix in slave buffers test: since the test is executed twice, each time with a different commands count, the threshold for the delta can't be a constant.
This commit is contained in:
		
							parent
							
								
									0a6090bfd8
								
							
						
					
					
						commit
						ba809f26d4
					
				|  | @ -166,12 +166,15 @@ start_server {} { | ||||||
|         # Pick a random slave |         # Pick a random slave | ||||||
|         set slave_id [expr {($master_id+1)%5}] |         set slave_id [expr {($master_id+1)%5}] | ||||||
|         set sync_count [status $R($master_id) sync_full] |         set sync_count [status $R($master_id) sync_full] | ||||||
|  |         set sync_partial [status $R($master_id) sync_partial_ok] | ||||||
|         catch { |         catch { | ||||||
|             $R($slave_id) config rewrite |             $R($slave_id) config rewrite | ||||||
|             $R($slave_id) debug restart |             $R($slave_id) debug restart | ||||||
|         } |         } | ||||||
|  |         # note: just waiting for connected_slaves==4 has a race condition since | ||||||
|  |         # we might do the check before the master realized that the slave disconnected | ||||||
|         wait_for_condition 50 1000 { |         wait_for_condition 50 1000 { | ||||||
|             [status $R($master_id) connected_slaves] == 4 |             [status $R($master_id) sync_partial_ok] == $sync_partial + 1 | ||||||
|         } else { |         } else { | ||||||
|             fail "Replica not reconnecting" |             fail "Replica not reconnecting" | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  | @ -79,6 +79,32 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec | ||||||
|                 stop_bg_complex_data $load_handle0 |                 stop_bg_complex_data $load_handle0 | ||||||
|                 stop_bg_complex_data $load_handle1 |                 stop_bg_complex_data $load_handle1 | ||||||
|                 stop_bg_complex_data $load_handle2 |                 stop_bg_complex_data $load_handle2 | ||||||
|  | 
 | ||||||
|  |                 # Wait for the slave to reach the "online" | ||||||
|  |                 # state from the POV of the master. | ||||||
|  |                 set retry 5000 | ||||||
|  |                 while {$retry} { | ||||||
|  |                     set info [$master info] | ||||||
|  |                     if {[string match {*slave0:*state=online*} $info]} { | ||||||
|  |                         break | ||||||
|  |                     } else { | ||||||
|  |                         incr retry -1 | ||||||
|  |                         after 100 | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 if {$retry == 0} { | ||||||
|  |                     error "assertion:Slave not correctly synchronized" | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 # Wait that slave acknowledge it is online so | ||||||
|  |                 # we are sure that DBSIZE and DEBUG DIGEST will not | ||||||
|  |                 # fail because of timing issues. (-LOADING error) | ||||||
|  |                 wait_for_condition 5000 100 { | ||||||
|  |                     [lindex [$slave role] 3] eq {connected} | ||||||
|  |                 } else { | ||||||
|  |                     fail "Slave still not connected after some time" | ||||||
|  |                 }   | ||||||
|  | 
 | ||||||
|                 set retry 10 |                 set retry 10 | ||||||
|                 while {$retry && ([$master debug digest] ne [$slave debug digest])}\ |                 while {$retry && ([$master debug digest] ne [$slave debug digest])}\ | ||||||
|                 { |                 { | ||||||
|  |  | ||||||
|  | @ -161,7 +161,7 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline} | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             # make sure master doesn't disconnect slave because of timeout |             # make sure master doesn't disconnect slave because of timeout | ||||||
|             $master config set repl-timeout 300 ;# 5 minutes |             $master config set repl-timeout 1200 ;# 20 minutes (for valgrind and slow machines) | ||||||
|             $master config set maxmemory-policy allkeys-random |             $master config set maxmemory-policy allkeys-random | ||||||
|             $master config set client-output-buffer-limit "replica 100000000 100000000 300" |             $master config set client-output-buffer-limit "replica 100000000 100000000 300" | ||||||
|             $master config set repl-backlog-size [expr {10*1024}] |             $master config set repl-backlog-size [expr {10*1024}] | ||||||
|  | @ -212,7 +212,8 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline} | ||||||
| 
 | 
 | ||||||
|             assert {[$master dbsize] == 100} |             assert {[$master dbsize] == 100} | ||||||
|             assert {$slave_buf > 2*1024*1024} ;# some of the data may have been pushed to the OS buffers |             assert {$slave_buf > 2*1024*1024} ;# some of the data may have been pushed to the OS buffers | ||||||
|             assert {$delta < 50*1024 && $delta > -50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB |             set delta_max [expr {$cmd_count / 2}] ;# 1 byte unaccounted for, with 1M commands will consume some 1MB | ||||||
|  |             assert {$delta < $delta_max && $delta > -$delta_max} | ||||||
| 
 | 
 | ||||||
|             $master client kill type slave |             $master client kill type slave | ||||||
|             set killed_used [s -1 used_memory] |             set killed_used [s -1 used_memory] | ||||||
|  | @ -221,7 +222,7 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline} | ||||||
|             set killed_used_no_repl [expr {$killed_used - $killed_mem_not_counted_for_evict}] |             set killed_used_no_repl [expr {$killed_used - $killed_mem_not_counted_for_evict}] | ||||||
|             set delta_no_repl [expr {$killed_used_no_repl - $used_no_repl}] |             set delta_no_repl [expr {$killed_used_no_repl - $used_no_repl}] | ||||||
|             assert {$killed_slave_buf == 0} |             assert {$killed_slave_buf == 0} | ||||||
|             assert {$delta_no_repl > -50*1024 && $delta_no_repl < 50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB |             assert {$delta_no_repl > -$delta_max && $delta_no_repl < $delta_max} | ||||||
| 
 | 
 | ||||||
|         } |         } | ||||||
|         # unfreeze slave process (after the 'test' succeeded or failed, but before we attempt to terminate the server |         # unfreeze slave process (after the 'test' succeeded or failed, but before we attempt to terminate the server | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue