mirror of https://github.com/redis/redis.git
				
				
				
			Make cluster config file saving atomic and fsync acl (#10924)
As an outstanding part mentioned in #10737, we could just make the cluster config file and ACL file saving done with a more safe and atomic pattern (write to temp file, fsync, rename, fsync dir). The cluster config file uses an in-place overwrite and truncation (which was also used by the main config file before #7824). The ACL file is using the temp file and rename approach, but was missing an fsync. Co-authored-by: 朱天 <zhutian03@meituan.com>
This commit is contained in:
		
							parent
							
								
									56828bab59
								
							
						
					
					
						commit
						cc2848132f
					
				
							
								
								
									
										22
									
								
								src/acl.c
								
								
								
								
							
							
						
						
									
										22
									
								
								src/acl.c
								
								
								
								
							|  | @ -2264,7 +2264,7 @@ int ACLSaveToFile(const char *filename) { | |||
|     /* Create a temp file with the new content. */ | ||||
|     tmpfilename = sdsnew(filename); | ||||
|     tmpfilename = sdscatfmt(tmpfilename,".tmp-%i-%I", | ||||
|         (int)getpid(),(int)mstime()); | ||||
|         (int) getpid(),mstime()); | ||||
|     if ((fd = open(tmpfilename,O_WRONLY|O_CREAT,0644)) == -1) { | ||||
|         serverLog(LL_WARNING,"Opening temp ACL file for ACL SAVE: %s", | ||||
|             strerror(errno)); | ||||
|  | @ -2272,8 +2272,19 @@ int ACLSaveToFile(const char *filename) { | |||
|     } | ||||
| 
 | ||||
|     /* Write it. */ | ||||
|     if (write(fd,acl,sdslen(acl)) != (ssize_t)sdslen(acl)) { | ||||
|         serverLog(LL_WARNING,"Writing ACL file for ACL SAVE: %s", | ||||
|     size_t offset = 0; | ||||
|     while (offset < sdslen(acl)) { | ||||
|         ssize_t written_bytes = write(fd,acl + offset,sdslen(acl) - offset); | ||||
|         if (written_bytes <= 0) { | ||||
|             if (errno == EINTR) continue; | ||||
|             serverLog(LL_WARNING,"Writing ACL file for ACL SAVE: %s", | ||||
|                 strerror(errno)); | ||||
|             goto cleanup; | ||||
|         } | ||||
|         offset += written_bytes; | ||||
|     } | ||||
|     if (redis_fsync(fd) == -1) { | ||||
|         serverLog(LL_WARNING,"Syncing ACL file for ACL SAVE: %s", | ||||
|             strerror(errno)); | ||||
|         goto cleanup; | ||||
|     } | ||||
|  | @ -2285,6 +2296,11 @@ int ACLSaveToFile(const char *filename) { | |||
|             strerror(errno)); | ||||
|         goto cleanup; | ||||
|     } | ||||
|     if (fsyncFileDir(filename) == -1) { | ||||
|         serverLog(LL_WARNING,"Syncing ACL directory for ACL SAVE: %s", | ||||
|             strerror(errno)); | ||||
|         goto cleanup; | ||||
|     } | ||||
|     sdsfree(tmpfilename); tmpfilename = NULL; | ||||
|     retval = C_OK; /* If we reached this point, everything is fine. */ | ||||
| 
 | ||||
|  |  | |||
|  | @ -406,10 +406,11 @@ fmterr: | |||
|  * bigger we pad our payload with newlines that are anyway ignored and truncate | ||||
|  * the file afterward. */ | ||||
| int clusterSaveConfig(int do_fsync) { | ||||
|     sds ci; | ||||
|     size_t content_size; | ||||
|     struct stat sb; | ||||
|     int fd; | ||||
|     sds ci,tmpfilename; | ||||
|     size_t content_size,offset = 0; | ||||
|     ssize_t written_bytes; | ||||
|     int fd = -1; | ||||
|     int retval = C_ERR; | ||||
| 
 | ||||
|     server.cluster->todo_before_sleep &= ~CLUSTER_TODO_SAVE_CONFIG; | ||||
| 
 | ||||
|  | @ -421,36 +422,52 @@ int clusterSaveConfig(int do_fsync) { | |||
|         (unsigned long long) server.cluster->lastVoteEpoch); | ||||
|     content_size = sdslen(ci); | ||||
| 
 | ||||
|     if ((fd = open(server.cluster_configfile,O_WRONLY|O_CREAT,0644)) | ||||
|         == -1) goto err; | ||||
| 
 | ||||
|     if (redis_fstat(fd,&sb) == -1) goto err; | ||||
| 
 | ||||
|     /* Pad the new payload if the existing file length is greater. */ | ||||
|     if (sb.st_size > (off_t)content_size) { | ||||
|         ci = sdsgrowzero(ci,sb.st_size); | ||||
|         memset(ci+content_size,'\n',sb.st_size-content_size); | ||||
|     /* Create a temp file with the new content. */ | ||||
|     tmpfilename = sdscatfmt(sdsempty(),"%s.tmp-%i-%I", | ||||
|         server.cluster_configfile,(int) getpid(),mstime()); | ||||
|     if ((fd = open(tmpfilename,O_WRONLY|O_CREAT,0644)) == -1) { | ||||
|         serverLog(LL_WARNING,"Could not open temp cluster config file: %s",strerror(errno)); | ||||
|         goto cleanup; | ||||
|     } | ||||
|      | ||||
|     if (write(fd,ci,sdslen(ci)) != (ssize_t)sdslen(ci)) goto err; | ||||
| 
 | ||||
|     while (offset < content_size) { | ||||
|         written_bytes = write(fd,ci + offset,content_size - offset); | ||||
|         if (written_bytes <= 0) { | ||||
|             if (errno == EINTR) continue; | ||||
|             serverLog(LL_WARNING,"Failed after writing (%zd) bytes to tmp cluster config file: %s", | ||||
|                 offset,strerror(errno)); | ||||
|             goto cleanup; | ||||
|         } | ||||
|         offset += written_bytes; | ||||
|     } | ||||
| 
 | ||||
|     if (do_fsync) { | ||||
|         server.cluster->todo_before_sleep &= ~CLUSTER_TODO_FSYNC_CONFIG; | ||||
|         if (fsync(fd) == -1) goto err; | ||||
|         if (redis_fsync(fd) == -1) { | ||||
|             serverLog(LL_WARNING,"Could not sync tmp cluster config file: %s",strerror(errno)); | ||||
|             goto cleanup; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /* Truncate the file if needed to remove the final \n padding that
 | ||||
|      * is just garbage. */ | ||||
|     if (content_size != sdslen(ci) && ftruncate(fd,content_size) == -1) { | ||||
|         /* ftruncate() failing is not a critical error. */ | ||||
|     if (rename(tmpfilename, server.cluster_configfile) == -1) { | ||||
|         serverLog(LL_WARNING,"Could not rename tmp cluster config file: %s",strerror(errno)); | ||||
|         goto cleanup; | ||||
|     } | ||||
|     close(fd); | ||||
|     sdsfree(ci); | ||||
|     return 0; | ||||
| 
 | ||||
| err: | ||||
|     if (do_fsync) { | ||||
|         if (fsyncFileDir(server.cluster_configfile) == -1) { | ||||
|             serverLog(LL_WARNING,"Could not sync cluster config file dir: %s",strerror(errno)); | ||||
|             goto cleanup; | ||||
|         } | ||||
|     } | ||||
|     retval = C_OK; /* If we reached this point, everything is fine. */ | ||||
| 
 | ||||
| cleanup: | ||||
|     if (fd != -1) close(fd); | ||||
|     if (retval) unlink(tmpfilename); | ||||
|     sdsfree(tmpfilename); | ||||
|     sdsfree(ci); | ||||
|     return -1; | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| void clusterSaveConfigOrDie(int do_fsync) { | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue