| 
									
										
										
										
											2021-04-19 03:41:13 +08:00
										 |  |  | // Copyright (c) 2015-2021 MinIO, Inc.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This file is part of MinIO Object Storage stack
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is free software: you can redistribute it and/or modify
 | 
					
						
							|  |  |  | // it under the terms of the GNU Affero General Public License as published by
 | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or
 | 
					
						
							|  |  |  | // (at your option) any later version.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is distributed in the hope that it will be useful
 | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					
						
							|  |  |  | // GNU Affero General Public License for more details.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // You should have received a copy of the GNU Affero General Public License
 | 
					
						
							|  |  |  | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
					
						
							| 
									
										
										
										
											2016-07-19 14:56:16 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-08-19 07:23:42 +08:00
										 |  |  | package cmd | 
					
						
							| 
									
										
										
										
											2016-07-19 14:56:16 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"bytes" | 
					
						
							| 
									
										
										
										
											2018-04-06 06:04:40 +08:00
										 |  |  | 	"context" | 
					
						
							| 
									
										
										
										
											2016-07-19 14:56:16 +08:00
										 |  |  | 	"crypto/rand" | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 	"io" | 
					
						
							| 
									
										
										
										
											2016-07-19 14:56:16 +08:00
										 |  |  | 	"testing" | 
					
						
							| 
									
										
										
										
											2016-07-28 17:20:34 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-02-24 11:33:31 +08:00
										 |  |  | 	"github.com/dustin/go-humanize" | 
					
						
							| 
									
										
										
										
											2016-07-19 14:56:16 +08:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | type badDisk struct{ StorageAPI } | 
					
						
							| 
									
										
										
										
											2016-07-19 14:56:16 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-14 12:42:50 +08:00
										 |  |  | func (a badDisk) String() string { | 
					
						
							|  |  |  | 	return "bad-disk" | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-05 00:45:06 +08:00
										 |  |  | func (a badDisk) AppendFile(ctx context.Context, volume string, path string, buf []byte) error { | 
					
						
							| 
									
										
										
										
											2016-07-19 14:56:16 +08:00
										 |  |  | 	return errFaultyDisk | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-05 00:45:06 +08:00
										 |  |  | func (a badDisk) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) { | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | 	return nil, errFaultyDisk | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-05 00:45:06 +08:00
										 |  |  | func (a badDisk) CreateFile(ctx context.Context, volume, path string, size int64, reader io.Reader) error { | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | 	return errFaultyDisk | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-01-14 05:09:10 +08:00
										 |  |  | func (badDisk) Hostname() string { | 
					
						
							|  |  |  | 	return "" | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | const oneMiByte = 1 * humanize.MiByte | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | var erasureEncodeTests = []struct { | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 	dataBlocks                   int | 
					
						
							|  |  |  | 	onDisks, offDisks            int | 
					
						
							|  |  |  | 	blocksize, data              int64 | 
					
						
							|  |  |  | 	offset                       int | 
					
						
							|  |  |  | 	algorithm                    BitrotAlgorithm | 
					
						
							|  |  |  | 	shouldFail, shouldFailQuorum bool | 
					
						
							|  |  |  | }{ | 
					
						
							| 
									
										
											  
											
												[feat]: change erasure coding default block size from 10MiB to 1MiB (#11721)
major performance improvements in range GETs to avoid large
read amplification when ranges are tiny and random
```
-------------------
Operation: GET
Operations: 142014 -> 339421
Duration: 4m50s -> 4m56s
* Average: +139.41% (+1177.3 MiB/s) throughput, +139.11% (+658.4) obj/s
* Fastest: +125.24% (+1207.4 MiB/s) throughput, +132.32% (+612.9) obj/s
* 50% Median: +139.06% (+1175.7 MiB/s) throughput, +133.46% (+660.9) obj/s
* Slowest: +203.40% (+1267.9 MiB/s) throughput, +198.59% (+753.5) obj/s
```
TTFB from 10MiB BlockSize
```
* First Access TTFB: Avg: 81ms, Median: 61ms, Best: 20ms, Worst: 2.056s
```
TTFB from 1MiB BlockSize
```
* First Access TTFB: Avg: 22ms, Median: 21ms, Best: 8ms, Worst: 91ms
```
Full object reads however do see a slight change which won't be
noticeable in real world, so not doing any comparisons
TTFB still had improvements with full object reads with 1MiB
```
* First Access TTFB: Avg: 68ms, Median: 35ms, Best: 11ms, Worst: 1.16s
```
v/s
TTFB with 10MiB
```
* First Access TTFB: Avg: 388ms, Median: 98ms, Best: 20ms, Worst: 4.156s
```
This change should affect all new uploads, previous uploads should
continue to work with business as usual. But dramatic improvements can
be seen with these changes.
											
										 
											2021-03-07 06:09:34 +08:00
										 |  |  | 	{dataBlocks: 2, onDisks: 4, offDisks: 0, blocksize: int64(blockSizeV2), data: oneMiByte, offset: 0, algorithm: BLAKE2b512, shouldFail: false, shouldFailQuorum: false},                             // 0
 | 
					
						
							|  |  |  | 	{dataBlocks: 3, onDisks: 6, offDisks: 0, blocksize: int64(blockSizeV2), data: oneMiByte, offset: 1, algorithm: SHA256, shouldFail: false, shouldFailQuorum: false},                                 // 1
 | 
					
						
							|  |  |  | 	{dataBlocks: 4, onDisks: 8, offDisks: 2, blocksize: int64(blockSizeV2), data: oneMiByte, offset: 2, algorithm: DefaultBitrotAlgorithm, shouldFail: false, shouldFailQuorum: false},                 // 2
 | 
					
						
							|  |  |  | 	{dataBlocks: 5, onDisks: 10, offDisks: 3, blocksize: int64(blockSizeV2), data: oneMiByte, offset: oneMiByte, algorithm: BLAKE2b512, shouldFail: false, shouldFailQuorum: false},                    // 3
 | 
					
						
							|  |  |  | 	{dataBlocks: 6, onDisks: 12, offDisks: 4, blocksize: int64(blockSizeV2), data: oneMiByte, offset: oneMiByte, algorithm: BLAKE2b512, shouldFail: false, shouldFailQuorum: false},                    // 4
 | 
					
						
							|  |  |  | 	{dataBlocks: 7, onDisks: 14, offDisks: 5, blocksize: int64(blockSizeV2), data: 0, offset: 0, shouldFail: false, algorithm: SHA256, shouldFailQuorum: false},                                        // 5
 | 
					
						
							|  |  |  | 	{dataBlocks: 8, onDisks: 16, offDisks: 7, blocksize: int64(blockSizeV2), data: 0, offset: 0, shouldFail: false, algorithm: DefaultBitrotAlgorithm, shouldFailQuorum: false},                        // 6
 | 
					
						
							|  |  |  | 	{dataBlocks: 2, onDisks: 4, offDisks: 2, blocksize: int64(blockSizeV2), data: oneMiByte, offset: 0, algorithm: BLAKE2b512, shouldFail: false, shouldFailQuorum: true},                              // 7
 | 
					
						
							|  |  |  | 	{dataBlocks: 4, onDisks: 8, offDisks: 4, blocksize: int64(blockSizeV2), data: oneMiByte, offset: 0, algorithm: SHA256, shouldFail: false, shouldFailQuorum: true},                                  // 8
 | 
					
						
							|  |  |  | 	{dataBlocks: 7, onDisks: 14, offDisks: 7, blocksize: int64(blockSizeV2), data: oneMiByte, offset: 0, algorithm: DefaultBitrotAlgorithm, shouldFail: false, shouldFailQuorum: true},                 // 9
 | 
					
						
							|  |  |  | 	{dataBlocks: 8, onDisks: 16, offDisks: 8, blocksize: int64(blockSizeV2), data: oneMiByte, offset: 0, algorithm: DefaultBitrotAlgorithm, shouldFail: false, shouldFailQuorum: true},                 // 10
 | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 	{dataBlocks: 5, onDisks: 10, offDisks: 3, blocksize: int64(oneMiByte), data: oneMiByte, offset: 0, algorithm: DefaultBitrotAlgorithm, shouldFail: false, shouldFailQuorum: false},                  // 11
 | 
					
						
							| 
									
										
											  
											
												[feat]: change erasure coding default block size from 10MiB to 1MiB (#11721)
major performance improvements in range GETs to avoid large
read amplification when ranges are tiny and random
```
-------------------
Operation: GET
Operations: 142014 -> 339421
Duration: 4m50s -> 4m56s
* Average: +139.41% (+1177.3 MiB/s) throughput, +139.11% (+658.4) obj/s
* Fastest: +125.24% (+1207.4 MiB/s) throughput, +132.32% (+612.9) obj/s
* 50% Median: +139.06% (+1175.7 MiB/s) throughput, +133.46% (+660.9) obj/s
* Slowest: +203.40% (+1267.9 MiB/s) throughput, +198.59% (+753.5) obj/s
```
TTFB from 10MiB BlockSize
```
* First Access TTFB: Avg: 81ms, Median: 61ms, Best: 20ms, Worst: 2.056s
```
TTFB from 1MiB BlockSize
```
* First Access TTFB: Avg: 22ms, Median: 21ms, Best: 8ms, Worst: 91ms
```
Full object reads however do see a slight change which won't be
noticeable in real world, so not doing any comparisons
TTFB still had improvements with full object reads with 1MiB
```
* First Access TTFB: Avg: 68ms, Median: 35ms, Best: 11ms, Worst: 1.16s
```
v/s
TTFB with 10MiB
```
* First Access TTFB: Avg: 388ms, Median: 98ms, Best: 20ms, Worst: 4.156s
```
This change should affect all new uploads, previous uploads should
continue to work with business as usual. But dramatic improvements can
be seen with these changes.
											
										 
											2021-03-07 06:09:34 +08:00
										 |  |  | 	{dataBlocks: 3, onDisks: 6, offDisks: 1, blocksize: int64(blockSizeV2), data: oneMiByte, offset: oneMiByte / 2, algorithm: DefaultBitrotAlgorithm, shouldFail: false, shouldFailQuorum: false},     // 12
 | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	{dataBlocks: 2, onDisks: 4, offDisks: 0, blocksize: int64(oneMiByte / 2), data: oneMiByte, offset: oneMiByte/2 + 1, algorithm: DefaultBitrotAlgorithm, shouldFail: false, shouldFailQuorum: false}, // 13
 | 
					
						
							|  |  |  | 	{dataBlocks: 4, onDisks: 8, offDisks: 0, blocksize: int64(oneMiByte - 1), data: oneMiByte, offset: oneMiByte - 1, algorithm: BLAKE2b512, shouldFail: false, shouldFailQuorum: false},               // 14
 | 
					
						
							| 
									
										
											  
											
												[feat]: change erasure coding default block size from 10MiB to 1MiB (#11721)
major performance improvements in range GETs to avoid large
read amplification when ranges are tiny and random
```
-------------------
Operation: GET
Operations: 142014 -> 339421
Duration: 4m50s -> 4m56s
* Average: +139.41% (+1177.3 MiB/s) throughput, +139.11% (+658.4) obj/s
* Fastest: +125.24% (+1207.4 MiB/s) throughput, +132.32% (+612.9) obj/s
* 50% Median: +139.06% (+1175.7 MiB/s) throughput, +133.46% (+660.9) obj/s
* Slowest: +203.40% (+1267.9 MiB/s) throughput, +198.59% (+753.5) obj/s
```
TTFB from 10MiB BlockSize
```
* First Access TTFB: Avg: 81ms, Median: 61ms, Best: 20ms, Worst: 2.056s
```
TTFB from 1MiB BlockSize
```
* First Access TTFB: Avg: 22ms, Median: 21ms, Best: 8ms, Worst: 91ms
```
Full object reads however do see a slight change which won't be
noticeable in real world, so not doing any comparisons
TTFB still had improvements with full object reads with 1MiB
```
* First Access TTFB: Avg: 68ms, Median: 35ms, Best: 11ms, Worst: 1.16s
```
v/s
TTFB with 10MiB
```
* First Access TTFB: Avg: 388ms, Median: 98ms, Best: 20ms, Worst: 4.156s
```
This change should affect all new uploads, previous uploads should
continue to work with business as usual. But dramatic improvements can
be seen with these changes.
											
										 
											2021-03-07 06:09:34 +08:00
										 |  |  | 	{dataBlocks: 8, onDisks: 12, offDisks: 2, blocksize: int64(blockSizeV2), data: oneMiByte, offset: 2, algorithm: DefaultBitrotAlgorithm, shouldFail: false, shouldFailQuorum: false},                // 15
 | 
					
						
							|  |  |  | 	{dataBlocks: 8, onDisks: 10, offDisks: 1, blocksize: int64(blockSizeV2), data: oneMiByte, offset: 0, algorithm: DefaultBitrotAlgorithm, shouldFail: false, shouldFailQuorum: false},                // 16
 | 
					
						
							|  |  |  | 	{dataBlocks: 10, onDisks: 14, offDisks: 0, blocksize: int64(blockSizeV2), data: oneMiByte, offset: 17, algorithm: DefaultBitrotAlgorithm, shouldFail: false, shouldFailQuorum: false},              // 17
 | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	{dataBlocks: 2, onDisks: 6, offDisks: 2, blocksize: int64(oneMiByte), data: oneMiByte, offset: oneMiByte / 2, algorithm: DefaultBitrotAlgorithm, shouldFail: false, shouldFailQuorum: false},       // 18
 | 
					
						
							| 
									
										
											  
											
												[feat]: change erasure coding default block size from 10MiB to 1MiB (#11721)
major performance improvements in range GETs to avoid large
read amplification when ranges are tiny and random
```
-------------------
Operation: GET
Operations: 142014 -> 339421
Duration: 4m50s -> 4m56s
* Average: +139.41% (+1177.3 MiB/s) throughput, +139.11% (+658.4) obj/s
* Fastest: +125.24% (+1207.4 MiB/s) throughput, +132.32% (+612.9) obj/s
* 50% Median: +139.06% (+1175.7 MiB/s) throughput, +133.46% (+660.9) obj/s
* Slowest: +203.40% (+1267.9 MiB/s) throughput, +198.59% (+753.5) obj/s
```
TTFB from 10MiB BlockSize
```
* First Access TTFB: Avg: 81ms, Median: 61ms, Best: 20ms, Worst: 2.056s
```
TTFB from 1MiB BlockSize
```
* First Access TTFB: Avg: 22ms, Median: 21ms, Best: 8ms, Worst: 91ms
```
Full object reads however do see a slight change which won't be
noticeable in real world, so not doing any comparisons
TTFB still had improvements with full object reads with 1MiB
```
* First Access TTFB: Avg: 68ms, Median: 35ms, Best: 11ms, Worst: 1.16s
```
v/s
TTFB with 10MiB
```
* First Access TTFB: Avg: 388ms, Median: 98ms, Best: 20ms, Worst: 4.156s
```
This change should affect all new uploads, previous uploads should
continue to work with business as usual. But dramatic improvements can
be seen with these changes.
											
										 
											2021-03-07 06:09:34 +08:00
										 |  |  | 	{dataBlocks: 10, onDisks: 16, offDisks: 8, blocksize: int64(blockSizeV2), data: oneMiByte, offset: 0, algorithm: DefaultBitrotAlgorithm, shouldFail: false, shouldFailQuorum: true},                // 19
 | 
					
						
							| 
									
										
										
										
											2016-07-19 14:56:16 +08:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2016-07-26 11:36:41 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | func TestErasureEncode(t *testing.T) { | 
					
						
							|  |  |  | 	for i, test := range erasureEncodeTests { | 
					
						
							| 
									
										
										
										
											2022-07-26 03:37:26 +08:00
										 |  |  | 		setup, err := newErasureTestSetup(t, test.dataBlocks, test.onDisks-test.dataBlocks, test.blocksize) | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			t.Fatalf("Test %d: failed to create test setup: %v", i, err) | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		disks := setup.disks | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 		erasure, err := NewErasure(context.Background(), test.dataBlocks, test.onDisks-test.dataBlocks, test.blocksize) | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			t.Fatalf("Test %d: failed to create ErasureStorage: %v", i, err) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		buffer := make([]byte, test.blocksize, 2*test.blocksize) | 
					
						
							| 
									
										
										
										
											2016-07-26 11:36:41 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 		data := make([]byte, test.data) | 
					
						
							|  |  |  | 		if _, err = io.ReadFull(rand.Reader, data); err != nil { | 
					
						
							|  |  |  | 			t.Fatalf("Test %d: failed to generate random test data: %v", i, err) | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | 		writers := make([]io.Writer, len(disks)) | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		for i, disk := range disks { | 
					
						
							|  |  |  | 			if disk == OfflineDisk { | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2021-05-17 23:32:28 +08:00
										 |  |  | 			writers[i] = newBitrotWriter(disk, "testbucket", "object", erasure.ShardFileSize(int64(len(data[test.offset:]))), test.algorithm, erasure.ShardSize()) | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 		n, err := erasure.Encode(context.Background(), bytes.NewReader(data[test.offset:]), writers, buffer, erasure.dataBlocks+1) | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | 		closeBitrotWriters(writers) | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 		if err != nil && !test.shouldFail { | 
					
						
							|  |  |  | 			t.Errorf("Test %d: should pass but failed with: %v", i, err) | 
					
						
							| 
									
										
										
										
											2016-07-26 11:36:41 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 		if err == nil && test.shouldFail { | 
					
						
							|  |  |  | 			t.Errorf("Test %d: should fail but it passed", i) | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		for i, w := range writers { | 
					
						
							|  |  |  | 			if w == nil { | 
					
						
							|  |  |  | 				disks[i] = OfflineDisk | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 		if err == nil { | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 			if length := int64(len(data[test.offset:])); n != length { | 
					
						
							|  |  |  | 				t.Errorf("Test %d: invalid number of bytes written: got: #%d want #%d", i, n, length) | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 			} | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | 			writers := make([]io.Writer, len(disks)) | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 			for i, disk := range disks { | 
					
						
							|  |  |  | 				if disk == nil { | 
					
						
							|  |  |  | 					continue | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2021-05-17 23:32:28 +08:00
										 |  |  | 				writers[i] = newBitrotWriter(disk, "testbucket", "object2", erasure.ShardFileSize(int64(len(data[test.offset:]))), test.algorithm, erasure.ShardSize()) | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 			} | 
					
						
							|  |  |  | 			for j := range disks[:test.offDisks] { | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | 				switch w := writers[j].(type) { | 
					
						
							|  |  |  | 				case *wholeBitrotWriter: | 
					
						
							|  |  |  | 					w.disk = badDisk{nil} | 
					
						
							|  |  |  | 				case *streamingBitrotWriter: | 
					
						
							| 
									
										
										
										
											2021-03-30 08:00:55 +08:00
										 |  |  | 					w.closeWithErr(errFaultyDisk) | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | 				} | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 			} | 
					
						
							|  |  |  | 			if test.offDisks > 0 { | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 				writers[0] = nil | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 			} | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 			n, err = erasure.Encode(context.Background(), bytes.NewReader(data[test.offset:]), writers, buffer, erasure.dataBlocks+1) | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | 			closeBitrotWriters(writers) | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 			if err != nil && !test.shouldFailQuorum { | 
					
						
							|  |  |  | 				t.Errorf("Test %d: should pass but failed with: %v", i, err) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if err == nil && test.shouldFailQuorum { | 
					
						
							|  |  |  | 				t.Errorf("Test %d: should fail but it passed", i) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if err == nil { | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 				if length := int64(len(data[test.offset:])); n != length { | 
					
						
							|  |  |  | 					t.Errorf("Test %d: invalid number of bytes written: got: #%d want #%d", i, n, length) | 
					
						
							| 
									
										
										
										
											2017-08-15 09:08:42 +08:00
										 |  |  | 				} | 
					
						
							| 
									
										
										
										
											2016-07-26 11:36:41 +08:00
										 |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | // Benchmarks
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | func benchmarkErasureEncode(data, parity, dataDown, parityDown int, size int64, b *testing.B) { | 
					
						
							| 
									
										
										
										
											2022-07-26 03:37:26 +08:00
										 |  |  | 	setup, err := newErasureTestSetup(b, data, parity, blockSizeV2) | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		b.Fatalf("failed to create test setup: %v", err) | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
											  
											
												[feat]: change erasure coding default block size from 10MiB to 1MiB (#11721)
major performance improvements in range GETs to avoid large
read amplification when ranges are tiny and random
```
-------------------
Operation: GET
Operations: 142014 -> 339421
Duration: 4m50s -> 4m56s
* Average: +139.41% (+1177.3 MiB/s) throughput, +139.11% (+658.4) obj/s
* Fastest: +125.24% (+1207.4 MiB/s) throughput, +132.32% (+612.9) obj/s
* 50% Median: +139.06% (+1175.7 MiB/s) throughput, +133.46% (+660.9) obj/s
* Slowest: +203.40% (+1267.9 MiB/s) throughput, +198.59% (+753.5) obj/s
```
TTFB from 10MiB BlockSize
```
* First Access TTFB: Avg: 81ms, Median: 61ms, Best: 20ms, Worst: 2.056s
```
TTFB from 1MiB BlockSize
```
* First Access TTFB: Avg: 22ms, Median: 21ms, Best: 8ms, Worst: 91ms
```
Full object reads however do see a slight change which won't be
noticeable in real world, so not doing any comparisons
TTFB still had improvements with full object reads with 1MiB
```
* First Access TTFB: Avg: 68ms, Median: 35ms, Best: 11ms, Worst: 1.16s
```
v/s
TTFB with 10MiB
```
* First Access TTFB: Avg: 388ms, Median: 98ms, Best: 20ms, Worst: 4.156s
```
This change should affect all new uploads, previous uploads should
continue to work with business as usual. But dramatic improvements can
be seen with these changes.
											
										 
											2021-03-07 06:09:34 +08:00
										 |  |  | 	erasure, err := NewErasure(context.Background(), data, parity, blockSizeV2) | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		b.Fatalf("failed to create ErasureStorage: %v", err) | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 	disks := setup.disks | 
					
						
							| 
									
										
											  
											
												[feat]: change erasure coding default block size from 10MiB to 1MiB (#11721)
major performance improvements in range GETs to avoid large
read amplification when ranges are tiny and random
```
-------------------
Operation: GET
Operations: 142014 -> 339421
Duration: 4m50s -> 4m56s
* Average: +139.41% (+1177.3 MiB/s) throughput, +139.11% (+658.4) obj/s
* Fastest: +125.24% (+1207.4 MiB/s) throughput, +132.32% (+612.9) obj/s
* 50% Median: +139.06% (+1175.7 MiB/s) throughput, +133.46% (+660.9) obj/s
* Slowest: +203.40% (+1267.9 MiB/s) throughput, +198.59% (+753.5) obj/s
```
TTFB from 10MiB BlockSize
```
* First Access TTFB: Avg: 81ms, Median: 61ms, Best: 20ms, Worst: 2.056s
```
TTFB from 1MiB BlockSize
```
* First Access TTFB: Avg: 22ms, Median: 21ms, Best: 8ms, Worst: 91ms
```
Full object reads however do see a slight change which won't be
noticeable in real world, so not doing any comparisons
TTFB still had improvements with full object reads with 1MiB
```
* First Access TTFB: Avg: 68ms, Median: 35ms, Best: 11ms, Worst: 1.16s
```
v/s
TTFB with 10MiB
```
* First Access TTFB: Avg: 388ms, Median: 98ms, Best: 20ms, Worst: 4.156s
```
This change should affect all new uploads, previous uploads should
continue to work with business as usual. But dramatic improvements can
be seen with these changes.
											
										 
											2021-03-07 06:09:34 +08:00
										 |  |  | 	buffer := make([]byte, blockSizeV2, 2*blockSizeV2) | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | 	content := make([]byte, size) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for i := 0; i < dataDown; i++ { | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		disks[i] = OfflineDisk | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	for i := data; i < data+parityDown; i++ { | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		disks[i] = OfflineDisk | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	b.ResetTimer() | 
					
						
							|  |  |  | 	b.SetBytes(size) | 
					
						
							|  |  |  | 	b.ReportAllocs() | 
					
						
							|  |  |  | 	for i := 0; i < b.N; i++ { | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | 		writers := make([]io.Writer, len(disks)) | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		for i, disk := range disks { | 
					
						
							|  |  |  | 			if disk == OfflineDisk { | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2022-07-12 00:15:54 +08:00
										 |  |  | 			disk.Delete(context.Background(), "testbucket", "object", DeleteOptions{ | 
					
						
							|  |  |  | 				Recursive: false, | 
					
						
							| 
									
										
										
										
											2023-11-29 14:35:16 +08:00
										 |  |  | 				Immediate: false, | 
					
						
							| 
									
										
										
										
											2022-07-12 00:15:54 +08:00
										 |  |  | 			}) | 
					
						
							| 
									
										
										
										
											2021-05-17 23:32:28 +08:00
										 |  |  | 			writers[i] = newBitrotWriter(disk, "testbucket", "object", erasure.ShardFileSize(size), DefaultBitrotAlgorithm, erasure.ShardSize()) | 
					
						
							| 
									
										
										
										
											2018-08-07 06:14:08 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 		_, err := erasure.Encode(context.Background(), bytes.NewReader(content), writers, buffer, erasure.dataBlocks+1) | 
					
						
							| 
									
										
										
										
											2019-01-17 20:58:18 +08:00
										 |  |  | 		closeBitrotWriters(writers) | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			panic(err) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | func BenchmarkErasureEncodeQuick(b *testing.B) { | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | 	const size = 12 * 1024 * 1024 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 	b.Run(" 00|00 ", func(b *testing.B) { benchmarkErasureEncode(2, 2, 0, 0, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" 00|X0 ", func(b *testing.B) { benchmarkErasureEncode(2, 2, 0, 1, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" X0|00 ", func(b *testing.B) { benchmarkErasureEncode(2, 2, 1, 0, size, b) }) | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | func BenchmarkErasureEncode_4_64KB(b *testing.B) { | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | 	const size = 64 * 1024 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 	b.Run(" 00|00 ", func(b *testing.B) { benchmarkErasureEncode(2, 2, 0, 0, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" 00|X0 ", func(b *testing.B) { benchmarkErasureEncode(2, 2, 0, 1, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" X0|00 ", func(b *testing.B) { benchmarkErasureEncode(2, 2, 1, 0, size, b) }) | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | func BenchmarkErasureEncode_8_20MB(b *testing.B) { | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | 	const size = 20 * 1024 * 1024 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 	b.Run(" 0000|0000 ", func(b *testing.B) { benchmarkErasureEncode(4, 4, 0, 0, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" 0000|X000 ", func(b *testing.B) { benchmarkErasureEncode(4, 4, 0, 1, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" X000|0000 ", func(b *testing.B) { benchmarkErasureEncode(4, 4, 1, 0, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" 0000|XXX0 ", func(b *testing.B) { benchmarkErasureEncode(4, 4, 0, 3, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" XXX0|0000 ", func(b *testing.B) { benchmarkErasureEncode(4, 4, 3, 0, size, b) }) | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | func BenchmarkErasureEncode_12_30MB(b *testing.B) { | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | 	const size = 30 * 1024 * 1024 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 	b.Run(" 000000|000000 ", func(b *testing.B) { benchmarkErasureEncode(6, 6, 0, 0, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" 000000|X00000 ", func(b *testing.B) { benchmarkErasureEncode(6, 6, 0, 1, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" X00000|000000 ", func(b *testing.B) { benchmarkErasureEncode(6, 6, 1, 0, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" 000000|XXXXX0 ", func(b *testing.B) { benchmarkErasureEncode(6, 6, 0, 5, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" XXXXX0|000000 ", func(b *testing.B) { benchmarkErasureEncode(6, 6, 5, 0, size, b) }) | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | func BenchmarkErasureEncode_16_40MB(b *testing.B) { | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | 	const size = 40 * 1024 * 1024 | 
					
						
							| 
									
										
										
										
											2018-08-24 14:35:37 +08:00
										 |  |  | 	b.Run(" 00000000|00000000 ", func(b *testing.B) { benchmarkErasureEncode(8, 8, 0, 0, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" 00000000|X0000000 ", func(b *testing.B) { benchmarkErasureEncode(8, 8, 0, 1, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" X0000000|00000000 ", func(b *testing.B) { benchmarkErasureEncode(8, 8, 1, 0, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" 00000000|XXXXXXX0 ", func(b *testing.B) { benchmarkErasureEncode(8, 8, 0, 7, size, b) }) | 
					
						
							|  |  |  | 	b.Run(" XXXXXXX0|00000000 ", func(b *testing.B) { benchmarkErasureEncode(8, 8, 7, 0, size, b) }) | 
					
						
							| 
									
										
										
										
											2017-11-18 06:57:04 +08:00
										 |  |  | } |