Fix vrand ping pong (#14183)

VRANDMEMBER had a bug when exactly two elements where present in the
vector set: we selected a fixed number of random paths to take, and this
will lead always to the same element. This PR should be kindly
back-ported to Redis 8.x.
This commit is contained in:
Salvatore Sanfilippo 2025-07-18 12:19:14 +02:00 committed by GitHub
parent d86cf66101
commit b528788f65
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 44 additions and 2 deletions

View File

@ -1975,6 +1975,13 @@ hnswNode *hnsw_random_node(HNSW *index, int slot) {
double logN = log2(index->node_count + 1);
uint32_t num_walks = (uint32_t)(logN * c);
/* Avoid the ping-pong effect: imagine there are just two nodes and
* the number of walks selected is even. We will select always the
* first element of the graph; conversely, if it is odd, we will always
* select the other element. One way to add more selection randomness is
* to randomly add '1' or '0' to the number of walks to perform. */
num_walks += rand() & 1;
// Perform random walk at level 0.
for (uint32_t i = 0; i < num_walks; i++) {
if (current->layers[0].num_links == 0) return current;

View File

@ -82,5 +82,5 @@ class HNSWPersistence(TestCase):
f"Projected vectors: Score mismatch for {key}: " + \
f"before={initial_projected[key]:.6f}, after={reloaded_projected[key]:.6f}"
self.redis.del(f"{self.test_key}:normal")
self.redis.del(f"{self.test_key}:projected")
self.redis.delete(f"{self.test_key}:normal")
self.redis.delete(f"{self.test_key}:projected")

View File

@ -0,0 +1,35 @@
from test import TestCase, generate_random_vector
import struct
class VRANDMEMBERPingPongRegressionTest(TestCase):
def getname(self):
return "[regression] VRANDMEMBER ping-pong"
def test(self):
"""
This test ensures that when only two vectors exist, VRANDMEMBER
does not get stuck returning only one of them due to the "ping-pong" issue.
"""
self.redis.delete(self.test_key) # Clean up before test
dim = 4
# Add exactly two vectors
vec1_name = "vec1"
vec1_data = generate_random_vector(dim)
self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, *vec1_data, vec1_name)
vec2_name = "vec2"
vec2_data = generate_random_vector(dim)
self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, *vec2_data, vec2_name)
# Call VRANDMEMBER many times and check for distribution
iterations = 100
results = []
for _ in range(iterations):
member = self.redis.execute_command('VRANDMEMBER', self.test_key)
results.append(member.decode())
# Verify that both members were returned, proving it's not stuck
unique_results = set(results)
assert len(unique_results) == 2, f"Ping-pong test failed: should have returned 2 unique members, but got {len(unique_results)}."