mirror of https://github.com/redis/redis.git
Replica fail and retry the PSYNC if the master is unresponsive (#10726)
We observed from our replication testing that when the master becomes unresponsive, or the replication connection is broken during PSYNC so the replica doesn't get a response from the master, it was not able to recognize that condition as a failure and instead moved into the full-sync code path. This fix makes the replica fail and retry the PSYNC with the master in such scenarios.
This commit is contained in:
parent
a7d6ca9770
commit
837c063baf
|
|
@ -2223,8 +2223,8 @@ char *receiveSynchronousResponse(connection *conn) {
|
|||
/* Read the reply from the server. */
|
||||
if (connSyncReadLine(conn,buf,sizeof(buf),server.repl_syncio_timeout*1000) == -1)
|
||||
{
|
||||
return sdscatprintf(sdsempty(),"-Reading from master: %s",
|
||||
strerror(errno));
|
||||
serverLog(LL_WARNING, "Failed to read response from the server: %s", strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
server.repl_transfer_lastio = server.unixtime;
|
||||
return sdsnew(buf);
|
||||
|
|
@ -2407,6 +2407,13 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) {
|
|||
|
||||
/* Reading half */
|
||||
reply = receiveSynchronousResponse(conn);
|
||||
/* Master did not reply to PSYNC */
|
||||
if (reply == NULL) {
|
||||
connSetReadHandler(conn, NULL);
|
||||
serverLog(LL_WARNING, "Master did not reply to PSYNC, will try later");
|
||||
return PSYNC_TRY_LATER;
|
||||
}
|
||||
|
||||
if (sdslen(reply) == 0) {
|
||||
/* The master may send empty newlines after it receives PSYNC
|
||||
* and before to reply, just to keep the connection alive. */
|
||||
|
|
@ -2566,6 +2573,9 @@ void syncWithMaster(connection *conn) {
|
|||
if (server.repl_state == REPL_STATE_RECEIVE_PING_REPLY) {
|
||||
err = receiveSynchronousResponse(conn);
|
||||
|
||||
/* The master did not reply */
|
||||
if (err == NULL) goto no_response_error;
|
||||
|
||||
/* We accept only two replies as valid, a positive +PONG reply
|
||||
* (we just check for "+") or an authentication error.
|
||||
* Note that older versions of Redis replied with "operation not
|
||||
|
|
@ -2652,6 +2662,7 @@ void syncWithMaster(connection *conn) {
|
|||
/* Receive AUTH reply. */
|
||||
if (server.repl_state == REPL_STATE_RECEIVE_AUTH_REPLY) {
|
||||
err = receiveSynchronousResponse(conn);
|
||||
if (err == NULL) goto no_response_error;
|
||||
if (err[0] == '-') {
|
||||
serverLog(LL_WARNING,"Unable to AUTH to MASTER: %s",err);
|
||||
sdsfree(err);
|
||||
|
|
@ -2666,6 +2677,7 @@ void syncWithMaster(connection *conn) {
|
|||
/* Receive REPLCONF listening-port reply. */
|
||||
if (server.repl_state == REPL_STATE_RECEIVE_PORT_REPLY) {
|
||||
err = receiveSynchronousResponse(conn);
|
||||
if (err == NULL) goto no_response_error;
|
||||
/* Ignore the error if any, not all the Redis versions support
|
||||
* REPLCONF listening-port. */
|
||||
if (err[0] == '-') {
|
||||
|
|
@ -2683,6 +2695,7 @@ void syncWithMaster(connection *conn) {
|
|||
/* Receive REPLCONF ip-address reply. */
|
||||
if (server.repl_state == REPL_STATE_RECEIVE_IP_REPLY) {
|
||||
err = receiveSynchronousResponse(conn);
|
||||
if (err == NULL) goto no_response_error;
|
||||
/* Ignore the error if any, not all the Redis versions support
|
||||
* REPLCONF ip-address. */
|
||||
if (err[0] == '-') {
|
||||
|
|
@ -2697,6 +2710,7 @@ void syncWithMaster(connection *conn) {
|
|||
/* Receive CAPA reply. */
|
||||
if (server.repl_state == REPL_STATE_RECEIVE_CAPA_REPLY) {
|
||||
err = receiveSynchronousResponse(conn);
|
||||
if (err == NULL) goto no_response_error;
|
||||
/* Ignore the error if any, not all the Redis versions support
|
||||
* REPLCONF capa. */
|
||||
if (err[0] == '-') {
|
||||
|
|
@ -2810,6 +2824,10 @@ void syncWithMaster(connection *conn) {
|
|||
server.repl_transfer_lastio = server.unixtime;
|
||||
return;
|
||||
|
||||
no_response_error: /* Handle receiveSynchronousResponse() error when master has no reply */
|
||||
serverLog(LL_WARNING, "Master did not respond to command during SYNC handshake");
|
||||
/* Fall through to regular error handling */
|
||||
|
||||
error:
|
||||
if (dfd != -1) close(dfd);
|
||||
connClose(conn);
|
||||
|
|
|
|||
Loading…
Reference in New Issue