| 
									
										
										
										
											2021-04-19 03:41:13 +08:00
										 |  |  | // Copyright (c) 2015-2021 MinIO, Inc.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This file is part of MinIO Object Storage stack
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is free software: you can redistribute it and/or modify
 | 
					
						
							|  |  |  | // it under the terms of the GNU Affero General Public License as published by
 | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or
 | 
					
						
							|  |  |  | // (at your option) any later version.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is distributed in the hope that it will be useful
 | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					
						
							|  |  |  | // GNU Affero General Public License for more details.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // You should have received a copy of the GNU Affero General Public License
 | 
					
						
							|  |  |  | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | package s3select | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"bytes" | 
					
						
							|  |  |  | 	"encoding/csv" | 
					
						
							|  |  |  | 	"math/rand" | 
					
						
							|  |  |  | 	"net/http" | 
					
						
							|  |  |  | 	"strconv" | 
					
						
							|  |  |  | 	"testing" | 
					
						
							|  |  |  | 	"time" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	humanize "github.com/dustin/go-humanize" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func newRandString(length int) string { | 
					
						
							| 
									
										
										
										
											2020-09-23 06:34:27 +08:00
										 |  |  | 	randSrc := rand.New(rand.NewSource(time.Now().UnixNano())) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	b := make([]byte, length) | 
					
						
							|  |  |  | 	for i := range b { | 
					
						
							|  |  |  | 		b[i] = charset[randSrc.Intn(len(charset))] | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return string(b) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func genSampleCSVData(count int) []byte { | 
					
						
							|  |  |  | 	buf := &bytes.Buffer{} | 
					
						
							|  |  |  | 	csvWriter := csv.NewWriter(buf) | 
					
						
							|  |  |  | 	csvWriter.Write([]string{"id", "name", "age", "city"}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for i := 0; i < count; i++ { | 
					
						
							|  |  |  | 		csvWriter.Write([]string{ | 
					
						
							|  |  |  | 			strconv.Itoa(i), | 
					
						
							|  |  |  | 			newRandString(10), | 
					
						
							|  |  |  | 			newRandString(5), | 
					
						
							|  |  |  | 			newRandString(10), | 
					
						
							|  |  |  | 		}) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	csvWriter.Flush() | 
					
						
							|  |  |  | 	return buf.Bytes() | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-03 01:15:06 +08:00
										 |  |  | type nullResponseWriter struct{} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | func (w *nullResponseWriter) Header() http.Header { | 
					
						
							|  |  |  | 	return nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (w *nullResponseWriter) Write(p []byte) (int, error) { | 
					
						
							|  |  |  | 	return len(p), nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (w *nullResponseWriter) WriteHeader(statusCode int) { | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (w *nullResponseWriter) Flush() { | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func benchmarkSelect(b *testing.B, count int, query string) { | 
					
						
							| 
									
										
										
										
											2022-01-03 01:15:06 +08:00
										 |  |  | 	requestXML := []byte(` | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | <?xml version="1.0" encoding="UTF-8"?> | 
					
						
							|  |  |  | <SelectObjectContentRequest> | 
					
						
							|  |  |  |     <Expression>` + query + `</Expression> | 
					
						
							|  |  |  |     <ExpressionType>SQL</ExpressionType> | 
					
						
							|  |  |  |     <InputSerialization> | 
					
						
							|  |  |  |         <CompressionType>NONE</CompressionType> | 
					
						
							|  |  |  |         <CSV> | 
					
						
							|  |  |  |             <FileHeaderInfo>USE</FileHeaderInfo> | 
					
						
							|  |  |  |         </CSV> | 
					
						
							|  |  |  |     </InputSerialization> | 
					
						
							|  |  |  |     <OutputSerialization> | 
					
						
							|  |  |  |         <CSV> | 
					
						
							|  |  |  |         </CSV> | 
					
						
							|  |  |  |     </OutputSerialization> | 
					
						
							|  |  |  |     <RequestProgress> | 
					
						
							|  |  |  |         <Enabled>FALSE</Enabled> | 
					
						
							|  |  |  |     </RequestProgress> | 
					
						
							|  |  |  | </SelectObjectContentRequest> | 
					
						
							|  |  |  | `) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	csvData := genSampleCSVData(count) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	b.ResetTimer() | 
					
						
							|  |  |  | 	b.ReportAllocs() | 
					
						
							| 
									
										
										
										
											2019-09-17 08:26:27 +08:00
										 |  |  | 	b.SetBytes(int64(count)) | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 	b.RunParallel(func(pb *testing.PB) { | 
					
						
							|  |  |  | 		for pb.Next() { | 
					
						
							|  |  |  | 			s3Select, err := NewS3Select(bytes.NewReader(requestXML)) | 
					
						
							|  |  |  | 			if err != nil { | 
					
						
							|  |  |  | 				b.Fatal(err) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 			if err = s3Select.Open(newBytesRSC(csvData)); err != nil { | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 				b.Fatal(err) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			s3Select.Evaluate(&nullResponseWriter{}) | 
					
						
							|  |  |  | 			s3Select.Close() | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 	}) | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func benchmarkSelectAll(b *testing.B, count int) { | 
					
						
							|  |  |  | 	benchmarkSelect(b, count, "select * from S3Object") | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // BenchmarkSelectAll_100K - benchmark * function with 100k records.
 | 
					
						
							|  |  |  | func BenchmarkSelectAll_100K(b *testing.B) { | 
					
						
							|  |  |  | 	benchmarkSelectAll(b, 100*humanize.KiByte) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // BenchmarkSelectAll_1M - benchmark * function with 1m records.
 | 
					
						
							|  |  |  | func BenchmarkSelectAll_1M(b *testing.B) { | 
					
						
							|  |  |  | 	benchmarkSelectAll(b, 1*humanize.MiByte) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // BenchmarkSelectAll_2M - benchmark * function with 2m records.
 | 
					
						
							|  |  |  | func BenchmarkSelectAll_2M(b *testing.B) { | 
					
						
							|  |  |  | 	benchmarkSelectAll(b, 2*humanize.MiByte) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // BenchmarkSelectAll_10M - benchmark * function with 10m records.
 | 
					
						
							|  |  |  | func BenchmarkSelectAll_10M(b *testing.B) { | 
					
						
							|  |  |  | 	benchmarkSelectAll(b, 10*humanize.MiByte) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-09-17 08:26:27 +08:00
										 |  |  | func benchmarkSingleCol(b *testing.B, count int) { | 
					
						
							|  |  |  | 	benchmarkSelect(b, count, "select id from S3Object") | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // BenchmarkSingleRow_100K - benchmark SELECT column function with 100k records.
 | 
					
						
							|  |  |  | func BenchmarkSingleCol_100K(b *testing.B) { | 
					
						
							|  |  |  | 	benchmarkSingleCol(b, 1e5) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // BenchmarkSelectAll_1M - benchmark * function with 1m records.
 | 
					
						
							|  |  |  | func BenchmarkSingleCol_1M(b *testing.B) { | 
					
						
							|  |  |  | 	benchmarkSingleCol(b, 1e6) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // BenchmarkSelectAll_2M - benchmark * function with 2m records.
 | 
					
						
							|  |  |  | func BenchmarkSingleCol_2M(b *testing.B) { | 
					
						
							|  |  |  | 	benchmarkSingleCol(b, 2e6) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // BenchmarkSelectAll_10M - benchmark * function with 10m records.
 | 
					
						
							|  |  |  | func BenchmarkSingleCol_10M(b *testing.B) { | 
					
						
							|  |  |  | 	benchmarkSingleCol(b, 1e7) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | func benchmarkAggregateCount(b *testing.B, count int) { | 
					
						
							|  |  |  | 	benchmarkSelect(b, count, "select count(*) from S3Object") | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // BenchmarkAggregateCount_100K - benchmark count(*) function with 100k records.
 | 
					
						
							|  |  |  | func BenchmarkAggregateCount_100K(b *testing.B) { | 
					
						
							|  |  |  | 	benchmarkAggregateCount(b, 100*humanize.KiByte) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // BenchmarkAggregateCount_1M - benchmark count(*) function with 1m records.
 | 
					
						
							|  |  |  | func BenchmarkAggregateCount_1M(b *testing.B) { | 
					
						
							|  |  |  | 	benchmarkAggregateCount(b, 1*humanize.MiByte) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // BenchmarkAggregateCount_2M - benchmark count(*) function with 2m records.
 | 
					
						
							|  |  |  | func BenchmarkAggregateCount_2M(b *testing.B) { | 
					
						
							|  |  |  | 	benchmarkAggregateCount(b, 2*humanize.MiByte) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // BenchmarkAggregateCount_10M - benchmark count(*) function with 10m records.
 | 
					
						
							|  |  |  | func BenchmarkAggregateCount_10M(b *testing.B) { | 
					
						
							|  |  |  | 	benchmarkAggregateCount(b, 10*humanize.MiByte) | 
					
						
							|  |  |  | } |