| 
									
										
										
										
											2021-04-19 03:41:13 +08:00
										 |  |  | // Copyright (c) 2015-2021 MinIO, Inc.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This file is part of MinIO Object Storage stack
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is free software: you can redistribute it and/or modify
 | 
					
						
							|  |  |  | // it under the terms of the GNU Affero General Public License as published by
 | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or
 | 
					
						
							|  |  |  | // (at your option) any later version.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is distributed in the hope that it will be useful
 | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					
						
							|  |  |  | // GNU Affero General Public License for more details.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // You should have received a copy of the GNU Affero General Public License
 | 
					
						
							|  |  |  | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
					
						
							| 
									
										
										
										
											2016-03-28 12:52:38 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-08-19 07:23:42 +08:00
										 |  |  | package cmd | 
					
						
							| 
									
										
										
										
											2016-03-28 12:52:38 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-01 11:23:31 +08:00
										 |  |  | import ( | 
					
						
							| 
									
										
										
										
											2018-04-06 06:04:40 +08:00
										 |  |  | 	"context" | 
					
						
							| 
									
										
										
										
											2020-04-02 03:14:00 +08:00
										 |  |  | 	"errors" | 
					
						
							| 
									
										
										
										
											2016-06-20 04:35:26 +08:00
										 |  |  | 	"io" | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 	"sync" | 
					
						
							| 
									
										
										
										
											2020-04-02 03:14:00 +08:00
										 |  |  | 	"sync/atomic" | 
					
						
							| 
									
										
										
										
											2017-11-26 03:58:29 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-02 05:59:40 +08:00
										 |  |  | 	"github.com/minio/minio/internal/logger" | 
					
						
							| 
									
										
										
										
											2016-06-01 11:23:31 +08:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2016-03-28 12:52:38 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | // Reads in parallel from readers.
 | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | type parallelReader struct { | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | 	readers       []io.ReaderAt | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | 	orgReaders    []io.ReaderAt | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	dataBlocks    int | 
					
						
							|  |  |  | 	offset        int64 | 
					
						
							|  |  |  | 	shardSize     int64 | 
					
						
							|  |  |  | 	shardFileSize int64 | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | 	buf           [][]byte | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | 	readerToBuf   []int | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | // newParallelReader returns parallelReader.
 | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | func newParallelReader(readers []io.ReaderAt, e Erasure, offset, totalLength int64) *parallelReader { | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | 	r2b := make([]int, len(readers)) | 
					
						
							|  |  |  | 	for i := range r2b { | 
					
						
							|  |  |  | 		r2b[i] = i | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	return ¶llelReader{ | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | 		readers:       readers, | 
					
						
							|  |  |  | 		orgReaders:    readers, | 
					
						
							|  |  |  | 		dataBlocks:    e.dataBlocks, | 
					
						
							|  |  |  | 		offset:        (offset / e.blockSize) * e.ShardSize(), | 
					
						
							|  |  |  | 		shardSize:     e.ShardSize(), | 
					
						
							|  |  |  | 		shardFileSize: e.ShardFileSize(totalLength), | 
					
						
							|  |  |  | 		buf:           make([][]byte, len(readers)), | 
					
						
							|  |  |  | 		readerToBuf:   r2b, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // preferReaders can mark readers as preferred.
 | 
					
						
							|  |  |  | // These will be chosen before others.
 | 
					
						
							|  |  |  | func (p *parallelReader) preferReaders(prefer []bool) { | 
					
						
							|  |  |  | 	if len(prefer) != len(p.orgReaders) { | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Copy so we don't change our input.
 | 
					
						
							|  |  |  | 	tmp := make([]io.ReaderAt, len(p.orgReaders)) | 
					
						
							|  |  |  | 	copy(tmp, p.orgReaders) | 
					
						
							|  |  |  | 	p.readers = tmp | 
					
						
							|  |  |  | 	// next is the next non-preferred index.
 | 
					
						
							|  |  |  | 	next := 0 | 
					
						
							|  |  |  | 	for i, ok := range prefer { | 
					
						
							|  |  |  | 		if !ok || p.readers[i] == nil { | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if i == next { | 
					
						
							|  |  |  | 			next++ | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		// Move reader with index i to index next.
 | 
					
						
							|  |  |  | 		// Do this by swapping next and i
 | 
					
						
							|  |  |  | 		p.readers[next], p.readers[i] = p.readers[i], p.readers[next] | 
					
						
							|  |  |  | 		p.readerToBuf[next] = i | 
					
						
							|  |  |  | 		p.readerToBuf[i] = next | 
					
						
							|  |  |  | 		next++ | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | // Returns if buf can be erasure decoded.
 | 
					
						
							|  |  |  | func (p *parallelReader) canDecode(buf [][]byte) bool { | 
					
						
							|  |  |  | 	bufCount := 0 | 
					
						
							|  |  |  | 	for _, b := range buf { | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | 		if len(b) > 0 { | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 			bufCount++ | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	return bufCount >= p.dataBlocks | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | // Read reads from readers in parallel. Returns p.dataBlocks number of bufs.
 | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | func (p *parallelReader) Read(dst [][]byte) ([][]byte, error) { | 
					
						
							|  |  |  | 	newBuf := dst | 
					
						
							|  |  |  | 	if len(dst) != len(p.readers) { | 
					
						
							|  |  |  | 		newBuf = make([][]byte, len(p.readers)) | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		for i := range newBuf { | 
					
						
							|  |  |  | 			newBuf[i] = newBuf[i][:0] | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 	var newBufLK sync.RWMutex | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	if p.offset+p.shardSize > p.shardFileSize { | 
					
						
							|  |  |  | 		p.shardSize = p.shardFileSize - p.offset | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | 	if p.shardSize == 0 { | 
					
						
							|  |  |  | 		return newBuf, nil | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 	readTriggerCh := make(chan bool, len(p.readers)) | 
					
						
							| 
									
										
										
										
											2021-06-29 23:47:15 +08:00
										 |  |  | 	defer close(readTriggerCh) // close the channel upon return
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 	for i := 0; i < p.dataBlocks; i++ { | 
					
						
							|  |  |  | 		// Setup read triggers for p.dataBlocks number of reads so that it reads in parallel.
 | 
					
						
							|  |  |  | 		readTriggerCh <- true | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 	bitrotHeal := int32(0)       // Atomic bool flag.
 | 
					
						
							|  |  |  | 	missingPartsHeal := int32(0) // Atomic bool flag.
 | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 	readerIndex := 0 | 
					
						
							|  |  |  | 	var wg sync.WaitGroup | 
					
						
							|  |  |  | 	// if readTrigger is true, it implies next disk.ReadAt() should be tried
 | 
					
						
							|  |  |  | 	// if readTrigger is false, it implies previous disk.ReadAt() was successful and there is no need
 | 
					
						
							|  |  |  | 	// to try reading the next disk.
 | 
					
						
							|  |  |  | 	for readTrigger := range readTriggerCh { | 
					
						
							|  |  |  | 		newBufLK.RLock() | 
					
						
							|  |  |  | 		canDecode := p.canDecode(newBuf) | 
					
						
							|  |  |  | 		newBufLK.RUnlock() | 
					
						
							|  |  |  | 		if canDecode { | 
					
						
							|  |  |  | 			break | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 		if readerIndex == len(p.readers) { | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 			break | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 		if !readTrigger { | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 			continue | 
					
						
							| 
									
										
										
										
											2016-06-25 09:00:34 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 		wg.Add(1) | 
					
						
							|  |  |  | 		go func(i int) { | 
					
						
							|  |  |  | 			defer wg.Done() | 
					
						
							| 
									
										
										
										
											2020-06-26 14:20:12 +08:00
										 |  |  | 			rr := p.readers[i] | 
					
						
							|  |  |  | 			if rr == nil { | 
					
						
							|  |  |  | 				// Since reader is nil, trigger another read.
 | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 				readTriggerCh <- true | 
					
						
							|  |  |  | 				return | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | 			} | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | 			bufIdx := p.readerToBuf[i] | 
					
						
							|  |  |  | 			if p.buf[bufIdx] == nil { | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 				// Reading first time on this disk, hence the buffer needs to be allocated.
 | 
					
						
							|  |  |  | 				// Subsequent reads will re-use this buffer.
 | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | 				p.buf[bufIdx] = make([]byte, p.shardSize) | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 			} | 
					
						
							|  |  |  | 			// For the last shard, the shardsize might be less than previous shard sizes.
 | 
					
						
							|  |  |  | 			// Hence the following statement ensures that the buffer size is reset to the right size.
 | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | 			p.buf[bufIdx] = p.buf[bufIdx][:p.shardSize] | 
					
						
							| 
									
										
										
										
											2021-03-16 11:03:13 +08:00
										 |  |  | 			n, err := rr.ReadAt(p.buf[bufIdx], p.offset) | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 			if err != nil { | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 				if errors.Is(err, errFileNotFound) { | 
					
						
							|  |  |  | 					atomic.StoreInt32(&missingPartsHeal, 1) | 
					
						
							|  |  |  | 				} else if errors.Is(err, errFileCorrupt) { | 
					
						
							|  |  |  | 					atomic.StoreInt32(&bitrotHeal, 1) | 
					
						
							| 
									
										
										
										
											2020-04-02 03:14:00 +08:00
										 |  |  | 				} | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 				// This will be communicated upstream.
 | 
					
						
							|  |  |  | 				p.orgReaders[bufIdx] = nil | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 				p.readers[i] = nil | 
					
						
							| 
									
										
										
										
											2021-01-03 02:35:57 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 				// Since ReadAt returned error, trigger another read.
 | 
					
						
							|  |  |  | 				readTriggerCh <- true | 
					
						
							|  |  |  | 				return | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			newBufLK.Lock() | 
					
						
							| 
									
										
										
										
											2021-03-16 11:03:13 +08:00
										 |  |  | 			newBuf[bufIdx] = p.buf[bufIdx][:n] | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 			newBufLK.Unlock() | 
					
						
							|  |  |  | 			// Since ReadAt returned success, there is no need to trigger another read.
 | 
					
						
							|  |  |  | 			readTriggerCh <- false | 
					
						
							|  |  |  | 		}(readerIndex) | 
					
						
							|  |  |  | 		readerIndex++ | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	wg.Wait() | 
					
						
							|  |  |  | 	if p.canDecode(newBuf) { | 
					
						
							|  |  |  | 		p.offset += p.shardSize | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 		if atomic.LoadInt32(&missingPartsHeal) == 1 { | 
					
						
							|  |  |  | 			return newBuf, errFileNotFound | 
					
						
							|  |  |  | 		} else if atomic.LoadInt32(&bitrotHeal) == 1 { | 
					
						
							|  |  |  | 			return newBuf, errFileCorrupt | 
					
						
							| 
									
										
										
										
											2020-04-02 03:14:00 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-01-18 23:48:25 +08:00
										 |  |  | 		return newBuf, nil | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-15 07:50:47 +08:00
										 |  |  | 	// If we cannot decode, just return read quorum error.
 | 
					
						
							|  |  |  | 	return nil, errErasureReadQuorum | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | // Decode reads from readers, reconstructs data if needed and writes the data to the writer.
 | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | // A set of preferred drives can be supplied. In that case they will be used and the data reconstructed.
 | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | func (e Erasure) Decode(ctx context.Context, writer io.Writer, readers []io.ReaderAt, offset, length, totalLength int64, prefer []bool) (written int64, derr error) { | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	if offset < 0 || length < 0 { | 
					
						
							|  |  |  | 		logger.LogIf(ctx, errInvalidArgument) | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 		return -1, errInvalidArgument | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	if offset+length > totalLength { | 
					
						
							|  |  |  | 		logger.LogIf(ctx, errInvalidArgument) | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 		return -1, errInvalidArgument | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	if length == 0 { | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 		return 0, nil | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2018-03-05 06:16:45 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | 	reader := newParallelReader(readers, e, offset, totalLength) | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | 	if len(prefer) == len(readers) { | 
					
						
							|  |  |  | 		reader.preferReaders(prefer) | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 	startBlock := offset / e.blockSize | 
					
						
							|  |  |  | 	endBlock := (offset + length) / e.blockSize | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	var bytesWritten int64 | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | 	var bufs [][]byte | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	for block := startBlock; block <= endBlock; block++ { | 
					
						
							|  |  |  | 		var blockOffset, blockLength int64 | 
					
						
							|  |  |  | 		switch { | 
					
						
							|  |  |  | 		case startBlock == endBlock: | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 			blockOffset = offset % e.blockSize | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 			blockLength = length | 
					
						
							|  |  |  | 		case block == startBlock: | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 			blockOffset = offset % e.blockSize | 
					
						
							|  |  |  | 			blockLength = e.blockSize - blockOffset | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		case block == endBlock: | 
					
						
							|  |  |  | 			blockOffset = 0 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 			blockLength = (offset + length) % e.blockSize | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		default: | 
					
						
							|  |  |  | 			blockOffset = 0 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 			blockLength = e.blockSize | 
					
						
							| 
									
										
										
										
											2016-06-25 09:00:34 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		if blockLength == 0 { | 
					
						
							|  |  |  | 			break | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.
It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger
1MB objects, before:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms
Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```
After:
```
Operation: GET. Concurrency: 32. Hosts: 4.
Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms
Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```
Bonus fix: Only ask for heal once on an object.
											
										 
											2020-05-27 07:47:23 +08:00
										 |  |  | 		var err error | 
					
						
							|  |  |  | 		bufs, err = reader.Read(bufs) | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 		if len(bufs) > 0 { | 
					
						
							|  |  |  | 			// Set only if there are be enough data for reconstruction.
 | 
					
						
							|  |  |  | 			// and only for expected errors, also set once.
 | 
					
						
							|  |  |  | 			if errors.Is(err, errFileNotFound) || errors.Is(err, errFileCorrupt) { | 
					
						
							|  |  |  | 				if derr == nil { | 
					
						
							|  |  |  | 					derr = err | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2020-04-02 03:14:00 +08:00
										 |  |  | 			} | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 		} else if err != nil { | 
					
						
							|  |  |  | 			// For all errors that cannot be reconstructed fail the read operation.
 | 
					
						
							|  |  |  | 			return -1, err | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2020-11-13 04:12:09 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 		if err = e.DecodeDataBlocks(bufs); err != nil { | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 			logger.LogIf(ctx, err) | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 			return -1, err | 
					
						
							| 
									
										
										
										
											2016-06-28 04:24:55 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2020-11-13 04:12:09 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 		n, err := writeDataBlocks(ctx, writer, bufs, e.dataBlocks, blockOffset, blockLength) | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		if err != nil { | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 			return -1, err | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		bytesWritten += n | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	if bytesWritten != length { | 
					
						
							|  |  |  | 		logger.LogIf(ctx, errLessData) | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 		return bytesWritten, errLessData | 
					
						
							| 
									
										
										
										
											2016-06-28 04:24:55 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2020-04-02 03:14:00 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-28 02:21:14 +08:00
										 |  |  | 	return bytesWritten, derr | 
					
						
							| 
									
										
										
										
											2016-06-28 04:24:55 +08:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2021-12-04 01:26:30 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | // Heal reads from readers, reconstruct shards and writes the data to the writers.
 | 
					
						
							|  |  |  | func (e Erasure) Heal(ctx context.Context, writers []io.Writer, readers []io.ReaderAt, totalLength int64) (derr error) { | 
					
						
							|  |  |  | 	if len(writers) != e.parityBlocks+e.dataBlocks { | 
					
						
							|  |  |  | 		return errInvalidArgument | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	reader := newParallelReader(readers, e, 0, totalLength) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	startBlock := int64(0) | 
					
						
							|  |  |  | 	endBlock := totalLength / e.blockSize | 
					
						
							|  |  |  | 	if totalLength%e.blockSize != 0 { | 
					
						
							|  |  |  | 		endBlock++ | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	var bufs [][]byte | 
					
						
							|  |  |  | 	for block := startBlock; block < endBlock; block++ { | 
					
						
							|  |  |  | 		var err error | 
					
						
							|  |  |  | 		bufs, err = reader.Read(bufs) | 
					
						
							|  |  |  | 		if len(bufs) > 0 { | 
					
						
							|  |  |  | 			if errors.Is(err, errFileNotFound) || errors.Is(err, errFileCorrupt) { | 
					
						
							|  |  |  | 				if derr == nil { | 
					
						
							|  |  |  | 					derr = err | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} else if err != nil { | 
					
						
							|  |  |  | 			return err | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if err = e.DecodeDataAndParityBlocks(ctx, bufs); err != nil { | 
					
						
							|  |  |  | 			logger.LogIf(ctx, err) | 
					
						
							|  |  |  | 			return err | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		w := parallelWriter{ | 
					
						
							|  |  |  | 			writers:     writers, | 
					
						
							|  |  |  | 			writeQuorum: 1, | 
					
						
							|  |  |  | 			errs:        make([]error, len(writers)), | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-13 10:49:01 +08:00
										 |  |  | 		if err = w.Write(ctx, bufs); err != nil { | 
					
						
							|  |  |  | 			logger.LogIf(ctx, err) | 
					
						
							| 
									
										
										
										
											2021-12-04 01:26:30 +08:00
										 |  |  | 			return err | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return derr | 
					
						
							|  |  |  | } |