mirror of https://github.com/apache/kafka.git
				
				
				
			KAFKA-15078; KRaft leader replys with snapshot for offset 0 (#13845)
If the follower has an empty log, fetches with offset 0, it is more efficient for the leader to reply with a snapshot id (redirect to FETCH_SNAPSHOT) than for the follower to continue fetching from the log segments. Reviewers: David Arthur <mumrah@gmail.com>, dengziming <dengziming1993@gmail.com>
This commit is contained in:
		
							parent
							
								
									2f71708955
								
							
						
					
					
						commit
						3a246b1aba
					
				|  | @ -1017,7 +1017,16 @@ public class KafkaRaftClient<T> implements RaftClient<T> { | ||||||
|             long fetchOffset = request.fetchOffset(); |             long fetchOffset = request.fetchOffset(); | ||||||
|             int lastFetchedEpoch = request.lastFetchedEpoch(); |             int lastFetchedEpoch = request.lastFetchedEpoch(); | ||||||
|             LeaderState<T> state = quorum.leaderStateOrThrow(); |             LeaderState<T> state = quorum.leaderStateOrThrow(); | ||||||
|             ValidOffsetAndEpoch validOffsetAndEpoch = log.validateOffsetAndEpoch(fetchOffset, lastFetchedEpoch); | 
 | ||||||
|  |             Optional<OffsetAndEpoch> latestSnapshotId = log.latestSnapshotId(); | ||||||
|  |             final ValidOffsetAndEpoch validOffsetAndEpoch; | ||||||
|  |             if (fetchOffset == 0 && latestSnapshotId.isPresent()) { | ||||||
|  |                 // If the follower has an empty log and a snapshot exist, it is always more efficient | ||||||
|  |                 // to reply with a snapshot id (FETCH_SNAPSHOT) instead of fetching from the log segments. | ||||||
|  |                 validOffsetAndEpoch = ValidOffsetAndEpoch.snapshot(latestSnapshotId.get()); | ||||||
|  |             } else { | ||||||
|  |                 validOffsetAndEpoch = log.validateOffsetAndEpoch(fetchOffset, lastFetchedEpoch); | ||||||
|  |             } | ||||||
| 
 | 
 | ||||||
|             final Records records; |             final Records records; | ||||||
|             if (validOffsetAndEpoch.kind() == ValidOffsetAndEpoch.Kind.VALID) { |             if (validOffsetAndEpoch.kind() == ValidOffsetAndEpoch.Kind.VALID) { | ||||||
|  |  | ||||||
|  | @ -303,7 +303,53 @@ final public class KafkaRaftClientSnapshotTest { | ||||||
|         context.client.poll(); |         context.client.poll(); | ||||||
| 
 | 
 | ||||||
|         // Send Fetch request less than start offset |         // Send Fetch request less than start offset | ||||||
|         context.deliverRequest(context.fetchRequest(epoch, otherNodeId, 0, epoch, 0)); |         context.deliverRequest(context.fetchRequest(epoch, otherNodeId, snapshotId.offset() - 2, snapshotId.epoch(), 0)); | ||||||
|  |         context.pollUntilResponse(); | ||||||
|  |         FetchResponseData.PartitionData partitionResponse = context.assertSentFetchPartitionResponse(); | ||||||
|  |         assertEquals(Errors.NONE, Errors.forCode(partitionResponse.errorCode())); | ||||||
|  |         assertEquals(epoch, partitionResponse.currentLeader().leaderEpoch()); | ||||||
|  |         assertEquals(localId, partitionResponse.currentLeader().leaderId()); | ||||||
|  |         assertEquals(snapshotId.epoch(), partitionResponse.snapshotId().epoch()); | ||||||
|  |         assertEquals(snapshotId.offset(), partitionResponse.snapshotId().endOffset()); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     @Test | ||||||
|  |     public void testFetchRequestOffsetAtZero() throws Exception { | ||||||
|  |         // When the follower sends a FETCH request at offset 0, reply with snapshot id if it exists | ||||||
|  |         int localId = 0; | ||||||
|  |         int otherNodeId = localId + 1; | ||||||
|  |         Set<Integer> voters = Utils.mkSet(localId, otherNodeId); | ||||||
|  | 
 | ||||||
|  |         RaftClientTestContext context = new RaftClientTestContext.Builder(localId, voters) | ||||||
|  |             .withAppendLingerMs(1) | ||||||
|  |             .build(); | ||||||
|  | 
 | ||||||
|  |         context.becomeLeader(); | ||||||
|  |         int epoch = context.currentEpoch(); | ||||||
|  | 
 | ||||||
|  |         List<String> appendRecords = Arrays.asList("a", "b", "c"); | ||||||
|  |         context.client.scheduleAppend(epoch, appendRecords); | ||||||
|  |         context.time.sleep(context.appendLingerMs()); | ||||||
|  |         context.client.poll(); | ||||||
|  | 
 | ||||||
|  |         long localLogEndOffset = context.log.endOffset().offset; | ||||||
|  |         assertTrue( | ||||||
|  |             appendRecords.size() <= localLogEndOffset, | ||||||
|  |             String.format("Record length = %s, log end offset = %s", appendRecords.size(), localLogEndOffset) | ||||||
|  |         ); | ||||||
|  | 
 | ||||||
|  |         // Advance the highWatermark | ||||||
|  |         context.advanceLocalLeaderHighWatermarkToLogEndOffset(); | ||||||
|  | 
 | ||||||
|  |         // Generate a snapshot at the LEO | ||||||
|  |         OffsetAndEpoch snapshotId = new OffsetAndEpoch(localLogEndOffset, epoch); | ||||||
|  |         try (SnapshotWriter<String> snapshot = context.client.createSnapshot(snapshotId, 0).get()) { | ||||||
|  |             assertEquals(snapshotId, snapshot.snapshotId()); | ||||||
|  |             snapshot.freeze(); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Send Fetch request for offset 0 | ||||||
|  |         context.deliverRequest(context.fetchRequest(epoch, otherNodeId, 0, 0, 0)); | ||||||
|         context.pollUntilResponse(); |         context.pollUntilResponse(); | ||||||
|         FetchResponseData.PartitionData partitionResponse = context.assertSentFetchPartitionResponse(); |         FetchResponseData.PartitionData partitionResponse = context.assertSentFetchPartitionResponse(); | ||||||
|         assertEquals(Errors.NONE, Errors.forCode(partitionResponse.errorCode())); |         assertEquals(Errors.NONE, Errors.forCode(partitionResponse.errorCode())); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue