mirror of https://github.com/redis/redis.git
Fix short read issue that causes exit() on replica (#14085)
CI / test-ubuntu-latest (push) Has been cancelled
Details
CI / test-sanitizer-address (push) Has been cancelled
Details
CI / build-debian-old (push) Has been cancelled
Details
CI / build-macos-latest (push) Has been cancelled
Details
CI / build-32bit (push) Has been cancelled
Details
CI / build-libc-malloc (push) Has been cancelled
Details
CI / build-centos-jemalloc (push) Has been cancelled
Details
CI / build-old-chain-jemalloc (push) Has been cancelled
Details
Codecov / code-coverage (push) Has been cancelled
Details
External Server Tests / test-external-standalone (push) Has been cancelled
Details
External Server Tests / test-external-cluster (push) Has been cancelled
Details
External Server Tests / test-external-nodebug (push) Has been cancelled
Details
Spellcheck / Spellcheck (push) Has been cancelled
Details
CI / test-ubuntu-latest (push) Has been cancelled
Details
CI / test-sanitizer-address (push) Has been cancelled
Details
CI / build-debian-old (push) Has been cancelled
Details
CI / build-macos-latest (push) Has been cancelled
Details
CI / build-32bit (push) Has been cancelled
Details
CI / build-libc-malloc (push) Has been cancelled
Details
CI / build-centos-jemalloc (push) Has been cancelled
Details
CI / build-old-chain-jemalloc (push) Has been cancelled
Details
Codecov / code-coverage (push) Has been cancelled
Details
External Server Tests / test-external-standalone (push) Has been cancelled
Details
External Server Tests / test-external-cluster (push) Has been cancelled
Details
External Server Tests / test-external-nodebug (push) Has been cancelled
Details
Spellcheck / Spellcheck (push) Has been cancelled
Details
When `repl-diskless-load` is enabled on a replica, and it is in the
process of loading an RDB file, a broken connection detected by the main
channel may trigger a call to rioAbort(). This sets a flag to cause the
rdb channel to fail on the next rioRead() call, allowing it to perform
necessary cleanup.
However, there are specific scenarios where the error is checked using
rioGetReadError(), which does not account for the RIO_ABORT flag (see
[source](79b37ff535/src/rdb.c (L3098)
)).
As a result, the error goes undetected. The code then proceeds to
validate a module type, fails to find a match, and calls
rdbReportCorruptRDB() which logs the following error and exits the
process:
```
The RDB file contains module data I can't load: no matching module type '_________'
```
To fix this issue, the RIO_ABORT flag has been removed. Now, rioAbort()
sets both read and write error flags, so that subsequent operations and
error checks properly detect the failure.
Additional keys were added to the short read test. It reproduces the
issue with this change. We hit that problematic line once per key. My
guess is that with many smaller keys, the likelihood of the connection
being killed at just the right moment increases.
This commit is contained in:
parent
161326d332
commit
7f60945bc6
11
src/rio.h
11
src/rio.h
|
@ -23,7 +23,6 @@
|
|||
|
||||
#define RIO_FLAG_READ_ERROR (1<<0)
|
||||
#define RIO_FLAG_WRITE_ERROR (1<<1)
|
||||
#define RIO_FLAG_ABORT (1<<2)
|
||||
|
||||
#define RIO_TYPE_FILE (1<<0)
|
||||
#define RIO_TYPE_BUFFER (1<<1)
|
||||
|
@ -103,7 +102,7 @@ typedef struct _rio rio;
|
|||
* if needed. */
|
||||
|
||||
static inline size_t rioWrite(rio *r, const void *buf, size_t len) {
|
||||
if (r->flags & (RIO_FLAG_WRITE_ERROR | RIO_FLAG_ABORT)) return 0;
|
||||
if (r->flags & (RIO_FLAG_WRITE_ERROR)) return 0;
|
||||
while (len) {
|
||||
size_t bytes_to_write = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
|
||||
if (r->update_cksum) r->update_cksum(r,buf,bytes_to_write);
|
||||
|
@ -119,7 +118,7 @@ static inline size_t rioWrite(rio *r, const void *buf, size_t len) {
|
|||
}
|
||||
|
||||
static inline size_t rioRead(rio *r, void *buf, size_t len) {
|
||||
if (r->flags & (RIO_FLAG_READ_ERROR | RIO_FLAG_ABORT)) return 0;
|
||||
if (r->flags & (RIO_FLAG_READ_ERROR)) return 0;
|
||||
while (len) {
|
||||
size_t bytes_to_read = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
|
||||
if (r->read(r,buf,bytes_to_read) == 0) {
|
||||
|
@ -142,8 +141,10 @@ static inline int rioFlush(rio *r) {
|
|||
return r->flush(r);
|
||||
}
|
||||
|
||||
/* Abort RIO asynchronously by setting read and write error flags. Subsequent
|
||||
* rioRead()/rioWrite() calls will fail, letting the caller terminate safely. */
|
||||
static inline void rioAbort(rio *r) {
|
||||
r->flags |= RIO_FLAG_ABORT;
|
||||
r->flags |= (RIO_FLAG_READ_ERROR | RIO_FLAG_WRITE_ERROR);
|
||||
}
|
||||
|
||||
/* This function allows to know if there was a read error in any past
|
||||
|
@ -159,7 +160,7 @@ static inline int rioGetWriteError(rio *r) {
|
|||
}
|
||||
|
||||
static inline void rioClearErrors(rio *r) {
|
||||
r->flags &= ~(RIO_FLAG_READ_ERROR|RIO_FLAG_WRITE_ERROR|RIO_FLAG_ABORT);
|
||||
r->flags &= ~(RIO_FLAG_READ_ERROR|RIO_FLAG_WRITE_ERROR);
|
||||
}
|
||||
|
||||
void rioInitWithFile(rio *r, FILE *fp);
|
||||
|
|
|
@ -116,6 +116,11 @@ tags "modules" {
|
|||
$master config set dynamic-hz no
|
||||
$replica config set dynamic-hz no
|
||||
set start [clock clicks -milliseconds]
|
||||
# Generate small keys
|
||||
for {set k 0} {$k < 20000} {incr k} {
|
||||
r testrdb.set.key keysmall$k [string repeat A [expr {int(rand()*100)}]]
|
||||
}
|
||||
# Generate larger keys
|
||||
for {set k 0} {$k < 30} {incr k} {
|
||||
r testrdb.set.key key$k [string repeat A [expr {int(rand()*1000000)}]]
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue