From b528788f6547e73afead8bd37cbc0c0bb373e871 Mon Sep 17 00:00:00 2001 From: Salvatore Sanfilippo Date: Fri, 18 Jul 2025 12:19:14 +0200 Subject: [PATCH] Fix vrand ping pong (#14183) VRANDMEMBER had a bug when exactly two elements where present in the vector set: we selected a fixed number of random paths to take, and this will lead always to the same element. This PR should be kindly back-ported to Redis 8.x. --- modules/vector-sets/hnsw.c | 7 ++++ modules/vector-sets/tests/persistence.py | 4 +-- modules/vector-sets/tests/vrand-ping-pong.py | 35 ++++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 modules/vector-sets/tests/vrand-ping-pong.py diff --git a/modules/vector-sets/hnsw.c b/modules/vector-sets/hnsw.c index 701c1f0d22..cae43e305b 100644 --- a/modules/vector-sets/hnsw.c +++ b/modules/vector-sets/hnsw.c @@ -1975,6 +1975,13 @@ hnswNode *hnsw_random_node(HNSW *index, int slot) { double logN = log2(index->node_count + 1); uint32_t num_walks = (uint32_t)(logN * c); + /* Avoid the ping-pong effect: imagine there are just two nodes and + * the number of walks selected is even. We will select always the + * first element of the graph; conversely, if it is odd, we will always + * select the other element. One way to add more selection randomness is + * to randomly add '1' or '0' to the number of walks to perform. */ + num_walks += rand() & 1; + // Perform random walk at level 0. for (uint32_t i = 0; i < num_walks; i++) { if (current->layers[0].num_links == 0) return current; diff --git a/modules/vector-sets/tests/persistence.py b/modules/vector-sets/tests/persistence.py index 948b90404a..79730f4881 100644 --- a/modules/vector-sets/tests/persistence.py +++ b/modules/vector-sets/tests/persistence.py @@ -82,5 +82,5 @@ class HNSWPersistence(TestCase): f"Projected vectors: Score mismatch for {key}: " + \ f"before={initial_projected[key]:.6f}, after={reloaded_projected[key]:.6f}" - self.redis.del(f"{self.test_key}:normal") - self.redis.del(f"{self.test_key}:projected") + self.redis.delete(f"{self.test_key}:normal") + self.redis.delete(f"{self.test_key}:projected") diff --git a/modules/vector-sets/tests/vrand-ping-pong.py b/modules/vector-sets/tests/vrand-ping-pong.py new file mode 100644 index 0000000000..99d2e9a1bb --- /dev/null +++ b/modules/vector-sets/tests/vrand-ping-pong.py @@ -0,0 +1,35 @@ +from test import TestCase, generate_random_vector +import struct + +class VRANDMEMBERPingPongRegressionTest(TestCase): + def getname(self): + return "[regression] VRANDMEMBER ping-pong" + + def test(self): + """ + This test ensures that when only two vectors exist, VRANDMEMBER + does not get stuck returning only one of them due to the "ping-pong" issue. + """ + self.redis.delete(self.test_key) # Clean up before test + dim = 4 + + # Add exactly two vectors + vec1_name = "vec1" + vec1_data = generate_random_vector(dim) + self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, *vec1_data, vec1_name) + + vec2_name = "vec2" + vec2_data = generate_random_vector(dim) + self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, *vec2_data, vec2_name) + + # Call VRANDMEMBER many times and check for distribution + iterations = 100 + results = [] + for _ in range(iterations): + member = self.redis.execute_command('VRANDMEMBER', self.test_key) + results.append(member.decode()) + + # Verify that both members were returned, proving it's not stuck + unique_results = set(results) + + assert len(unique_results) == 2, f"Ping-pong test failed: should have returned 2 unique members, but got {len(unique_results)}."