| 
									
										
										
										
											2021-04-19 03:41:13 +08:00
										 |  |  | // Copyright (c) 2015-2021 MinIO, Inc.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This file is part of MinIO Object Storage stack
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is free software: you can redistribute it and/or modify
 | 
					
						
							|  |  |  | // it under the terms of the GNU Affero General Public License as published by
 | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or
 | 
					
						
							|  |  |  | // (at your option) any later version.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is distributed in the hope that it will be useful
 | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					
						
							|  |  |  | // GNU Affero General Public License for more details.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // You should have received a copy of the GNU Affero General Public License
 | 
					
						
							|  |  |  | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | package s3select | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 	"bufio" | 
					
						
							|  |  |  | 	"bytes" | 
					
						
							| 
									
										
										
										
											2020-03-06 11:34:04 +08:00
										 |  |  | 	"compress/bzip2" | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	"encoding/xml" | 
					
						
							| 
									
										
										
										
											2020-03-06 11:34:04 +08:00
										 |  |  | 	"errors" | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	"fmt" | 
					
						
							|  |  |  | 	"io" | 
					
						
							|  |  |  | 	"net/http" | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 	"strings" | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 	"sync" | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-07 00:09:53 +08:00
										 |  |  | 	"github.com/klauspost/compress/s2" | 
					
						
							|  |  |  | 	"github.com/klauspost/compress/zstd" | 
					
						
							|  |  |  | 	gzip "github.com/klauspost/pgzip" | 
					
						
							| 
									
										
										
										
											2023-07-20 22:52:49 +08:00
										 |  |  | 	"github.com/minio/minio/internal/config" | 
					
						
							| 
									
										
										
										
											2023-11-07 06:26:08 +08:00
										 |  |  | 	xioutil "github.com/minio/minio/internal/ioutil" | 
					
						
							| 
									
										
										
										
											2021-06-02 05:59:40 +08:00
										 |  |  | 	"github.com/minio/minio/internal/s3select/csv" | 
					
						
							|  |  |  | 	"github.com/minio/minio/internal/s3select/json" | 
					
						
							|  |  |  | 	"github.com/minio/minio/internal/s3select/parquet" | 
					
						
							|  |  |  | 	"github.com/minio/minio/internal/s3select/simdj" | 
					
						
							|  |  |  | 	"github.com/minio/minio/internal/s3select/sql" | 
					
						
							| 
									
										
										
										
											2023-09-05 03:57:37 +08:00
										 |  |  | 	"github.com/minio/pkg/v2/env" | 
					
						
							| 
									
										
										
										
											2020-02-14 06:03:52 +08:00
										 |  |  | 	"github.com/minio/simdjson-go" | 
					
						
							| 
									
										
										
										
											2021-09-07 00:09:53 +08:00
										 |  |  | 	"github.com/pierrec/lz4" | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | type recordReader interface { | 
					
						
							| 
									
										
										
										
											2019-09-14 05:18:35 +08:00
										 |  |  | 	// Read a record.
 | 
					
						
							|  |  |  | 	// dst is optional but will be used if valid.
 | 
					
						
							|  |  |  | 	Read(dst sql.Record) (sql.Record, error) | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	Close() error | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | const ( | 
					
						
							|  |  |  | 	csvFormat     = "csv" | 
					
						
							|  |  |  | 	jsonFormat    = "json" | 
					
						
							|  |  |  | 	parquetFormat = "parquet" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // CompressionType - represents value inside <CompressionType/> in request XML.
 | 
					
						
							|  |  |  | type CompressionType string | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const ( | 
					
						
							|  |  |  | 	noneType  CompressionType = "none" | 
					
						
							| 
									
										
										
										
											2021-09-07 00:09:53 +08:00
										 |  |  | 	gzipType  CompressionType = "GZIP" | 
					
						
							|  |  |  | 	bzip2Type CompressionType = "BZIP2" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	zstdType   CompressionType = "ZSTD" | 
					
						
							|  |  |  | 	lz4Type    CompressionType = "LZ4" | 
					
						
							|  |  |  | 	s2Type     CompressionType = "S2" | 
					
						
							|  |  |  | 	snappyType CompressionType = "SNAPPY" | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-29 12:00:18 +08:00
										 |  |  | const ( | 
					
						
							|  |  |  | 	maxRecordSize = 1 << 20 // 1 MiB
 | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-20 22:52:49 +08:00
										 |  |  | var parquetSupport bool | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func init() { | 
					
						
							|  |  |  | 	parquetSupport = env.Get("MINIO_API_SELECT_PARQUET", config.EnableOff) == config.EnableOn | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | var bufPool = sync.Pool{ | 
					
						
							|  |  |  | 	New: func() interface{} { | 
					
						
							| 
									
										
										
										
											2019-09-17 08:26:27 +08:00
										 |  |  | 		// make a buffer with a reasonable capacity.
 | 
					
						
							|  |  |  | 		return bytes.NewBuffer(make([]byte, 0, maxRecordSize)) | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 	}, | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var bufioWriterPool = sync.Pool{ | 
					
						
							|  |  |  | 	New: func() interface{} { | 
					
						
							| 
									
										
										
										
											2022-09-20 02:05:16 +08:00
										 |  |  | 		// io.Discard is just used to create the writer. Actual destination
 | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 		// writer is set later by Reset() before using it.
 | 
					
						
							| 
									
										
										
										
											2023-11-07 06:26:08 +08:00
										 |  |  | 		return bufio.NewWriter(xioutil.Discard) | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 	}, | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | // UnmarshalXML - decodes XML data.
 | 
					
						
							|  |  |  | func (c *CompressionType) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | 
					
						
							|  |  |  | 	var s string | 
					
						
							|  |  |  | 	if err := d.DecodeElement(&s, &start); err != nil { | 
					
						
							|  |  |  | 		return errMalformedXML(err) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-07 00:09:53 +08:00
										 |  |  | 	parsedType := CompressionType(strings.ToUpper(s)) | 
					
						
							|  |  |  | 	if s == "" || parsedType == "NONE" { | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 		parsedType = noneType | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	switch parsedType { | 
					
						
							| 
									
										
										
										
											2021-09-07 00:09:53 +08:00
										 |  |  | 	case noneType, gzipType, bzip2Type, snappyType, s2Type, zstdType, lz4Type: | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	default: | 
					
						
							|  |  |  | 		return errInvalidCompressionFormat(fmt.Errorf("invalid compression format '%v'", s)) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	*c = parsedType | 
					
						
							|  |  |  | 	return nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // InputSerialization - represents elements inside <InputSerialization/> in request XML.
 | 
					
						
							|  |  |  | type InputSerialization struct { | 
					
						
							|  |  |  | 	CompressionType CompressionType    `xml:"CompressionType"` | 
					
						
							|  |  |  | 	CSVArgs         csv.ReaderArgs     `xml:"CSV"` | 
					
						
							|  |  |  | 	JSONArgs        json.ReaderArgs    `xml:"JSON"` | 
					
						
							|  |  |  | 	ParquetArgs     parquet.ReaderArgs `xml:"Parquet"` | 
					
						
							|  |  |  | 	unmarshaled     bool | 
					
						
							|  |  |  | 	format          string | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // IsEmpty - returns whether input serialization is empty or not.
 | 
					
						
							|  |  |  | func (input *InputSerialization) IsEmpty() bool { | 
					
						
							|  |  |  | 	return !input.unmarshaled | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // UnmarshalXML - decodes XML data.
 | 
					
						
							|  |  |  | func (input *InputSerialization) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | 
					
						
							|  |  |  | 	// Make subtype to avoid recursive UnmarshalXML().
 | 
					
						
							|  |  |  | 	type subInputSerialization InputSerialization | 
					
						
							|  |  |  | 	parsedInput := subInputSerialization{} | 
					
						
							|  |  |  | 	if err := d.DecodeElement(&parsedInput, &start); err != nil { | 
					
						
							|  |  |  | 		return errMalformedXML(err) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-02-07 05:25:52 +08:00
										 |  |  | 	// If no compression is specified, set to noneType
 | 
					
						
							| 
									
										
										
										
											2021-09-07 00:09:53 +08:00
										 |  |  | 	if parsedInput.CompressionType == "" { | 
					
						
							| 
									
										
										
										
											2019-02-07 05:25:52 +08:00
										 |  |  | 		parsedInput.CompressionType = noneType | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	found := 0 | 
					
						
							|  |  |  | 	if !parsedInput.CSVArgs.IsEmpty() { | 
					
						
							|  |  |  | 		parsedInput.format = csvFormat | 
					
						
							|  |  |  | 		found++ | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if !parsedInput.JSONArgs.IsEmpty() { | 
					
						
							|  |  |  | 		parsedInput.format = jsonFormat | 
					
						
							|  |  |  | 		found++ | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if !parsedInput.ParquetArgs.IsEmpty() { | 
					
						
							| 
									
										
										
										
											2019-01-29 09:59:48 +08:00
										 |  |  | 		if parsedInput.CompressionType != "" && parsedInput.CompressionType != noneType { | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 			return errInvalidRequestParameter(fmt.Errorf("CompressionType must be NONE for Parquet format")) | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		parsedInput.format = parquetFormat | 
					
						
							|  |  |  | 		found++ | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if found != 1 { | 
					
						
							|  |  |  | 		return errInvalidDataSource(nil) | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	*input = InputSerialization(parsedInput) | 
					
						
							|  |  |  | 	input.unmarshaled = true | 
					
						
							|  |  |  | 	return nil | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | // OutputSerialization - represents elements inside <OutputSerialization/> in request XML.
 | 
					
						
							|  |  |  | type OutputSerialization struct { | 
					
						
							|  |  |  | 	CSVArgs     csv.WriterArgs  `xml:"CSV"` | 
					
						
							|  |  |  | 	JSONArgs    json.WriterArgs `xml:"JSON"` | 
					
						
							|  |  |  | 	unmarshaled bool | 
					
						
							|  |  |  | 	format      string | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // IsEmpty - returns whether output serialization is empty or not.
 | 
					
						
							|  |  |  | func (output *OutputSerialization) IsEmpty() bool { | 
					
						
							|  |  |  | 	return !output.unmarshaled | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // UnmarshalXML - decodes XML data.
 | 
					
						
							|  |  |  | func (output *OutputSerialization) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | 
					
						
							|  |  |  | 	// Make subtype to avoid recursive UnmarshalXML().
 | 
					
						
							|  |  |  | 	type subOutputSerialization OutputSerialization | 
					
						
							|  |  |  | 	parsedOutput := subOutputSerialization{} | 
					
						
							|  |  |  | 	if err := d.DecodeElement(&parsedOutput, &start); err != nil { | 
					
						
							|  |  |  | 		return errMalformedXML(err) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	found := 0 | 
					
						
							|  |  |  | 	if !parsedOutput.CSVArgs.IsEmpty() { | 
					
						
							|  |  |  | 		parsedOutput.format = csvFormat | 
					
						
							|  |  |  | 		found++ | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if !parsedOutput.JSONArgs.IsEmpty() { | 
					
						
							|  |  |  | 		parsedOutput.format = jsonFormat | 
					
						
							|  |  |  | 		found++ | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if found != 1 { | 
					
						
							|  |  |  | 		return errObjectSerializationConflict(fmt.Errorf("either CSV or JSON should be present in OutputSerialization")) | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2018-10-23 03:12:22 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	*output = OutputSerialization(parsedOutput) | 
					
						
							|  |  |  | 	output.unmarshaled = true | 
					
						
							|  |  |  | 	return nil | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | // RequestProgress - represents elements inside <RequestProgress/> in request XML.
 | 
					
						
							|  |  |  | type RequestProgress struct { | 
					
						
							|  |  |  | 	Enabled bool `xml:"Enabled"` | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-03-15 00:48:36 +08:00
										 |  |  | // ScanRange represents the ScanRange parameter.
 | 
					
						
							|  |  |  | type ScanRange struct { | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 	// Start is the byte offset to read from (from the start of the file).
 | 
					
						
							| 
									
										
										
										
											2022-03-15 00:48:36 +08:00
										 |  |  | 	Start *uint64 `xml:"Start"` | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 	// End is the offset of the last byte that should be returned when Start
 | 
					
						
							|  |  |  | 	// is set, otherwise it is the offset from EOF to start reading.
 | 
					
						
							| 
									
										
										
										
											2022-03-15 00:48:36 +08:00
										 |  |  | 	End *uint64 `xml:"End"` | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Validate if the scan range is valid.
 | 
					
						
							|  |  |  | func (s *ScanRange) Validate() error { | 
					
						
							|  |  |  | 	if s == nil { | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if s.Start == nil && s.End == nil { | 
					
						
							|  |  |  | 		// This parameter is optional, but when specified, it must not be empty.
 | 
					
						
							|  |  |  | 		// Ref: https://docs.aws.amazon.com/AmazonS3/latest/API/API_SelectObjectContent.html#AmazonS3-SelectObjectContent-request-ScanRange
 | 
					
						
							|  |  |  | 		return errors.New("ScanRange: No Start or End specified") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if s.Start == nil || s.End == nil { | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if *s.Start > *s.End { | 
					
						
							|  |  |  | 		return errors.New("ScanRange: Start cannot be after end") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // StartLen returns start offset plus length from range.
 | 
					
						
							|  |  |  | func (s *ScanRange) StartLen() (start, length int64, err error) { | 
					
						
							|  |  |  | 	if s == nil { | 
					
						
							|  |  |  | 		return 0, -1, nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	err = s.Validate() | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return 0, 0, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if s.End == nil && s.Start == nil { | 
					
						
							|  |  |  | 		// Not valid, but should be caught above.
 | 
					
						
							|  |  |  | 		return 0, -1, nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if s.End == nil { | 
					
						
							|  |  |  | 		start := int64(*s.Start) | 
					
						
							|  |  |  | 		if start < 0 { | 
					
						
							|  |  |  | 			return 0, 0, errors.New("ScanRange: Start after EOF") | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		return start, -1, nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if s.Start == nil { | 
					
						
							|  |  |  | 		// Suffix length
 | 
					
						
							|  |  |  | 		end := int64(*s.End) | 
					
						
							|  |  |  | 		if end < 0 { | 
					
						
							|  |  |  | 			return 0, 0, errors.New("ScanRange: End bigger than file") | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		// Suffix length
 | 
					
						
							|  |  |  | 		return -end, -1, nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	start = int64(*s.Start) | 
					
						
							|  |  |  | 	end := int64(*s.End) | 
					
						
							|  |  |  | 	return start, end - start + 1, nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | // S3Select - filters the contents on a simple structured query language (SQL) statement. It
 | 
					
						
							| 
									
										
										
										
											2019-02-07 05:25:52 +08:00
										 |  |  | // represents elements inside <SelectRequest/> in request XML specified in detail at
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html.
 | 
					
						
							|  |  |  | type S3Select struct { | 
					
						
							| 
									
										
										
										
											2019-02-07 05:25:52 +08:00
										 |  |  | 	XMLName        xml.Name            `xml:"SelectRequest"` | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	Expression     string              `xml:"Expression"` | 
					
						
							|  |  |  | 	ExpressionType string              `xml:"ExpressionType"` | 
					
						
							|  |  |  | 	Input          InputSerialization  `xml:"InputSerialization"` | 
					
						
							|  |  |  | 	Output         OutputSerialization `xml:"OutputSerialization"` | 
					
						
							|  |  |  | 	Progress       RequestProgress     `xml:"RequestProgress"` | 
					
						
							| 
									
										
										
										
											2022-03-15 00:48:36 +08:00
										 |  |  | 	ScanRange      *ScanRange          `xml:"ScanRange"` | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-29 09:59:48 +08:00
										 |  |  | 	statement      *sql.SelectStatement | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	progressReader *progressReader | 
					
						
							|  |  |  | 	recordReader   recordReader | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-03 01:15:06 +08:00
										 |  |  | var legacyXMLName = "SelectObjectContentRequest" | 
					
						
							| 
									
										
										
										
											2019-02-07 05:25:52 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | // UnmarshalXML - decodes XML data.
 | 
					
						
							|  |  |  | func (s3Select *S3Select) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | 
					
						
							| 
									
										
										
										
											2019-02-07 05:25:52 +08:00
										 |  |  | 	// S3 also supports the older SelectObjectContentRequest tag,
 | 
					
						
							|  |  |  | 	// though it is no longer found in documentation. This is
 | 
					
						
							|  |  |  | 	// checked and renamed below to allow older clients to also
 | 
					
						
							|  |  |  | 	// work.
 | 
					
						
							|  |  |  | 	if start.Name.Local == legacyXMLName { | 
					
						
							|  |  |  | 		start.Name = xml.Name{Space: "", Local: "SelectRequest"} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	// Make subtype to avoid recursive UnmarshalXML().
 | 
					
						
							|  |  |  | 	type subS3Select S3Select | 
					
						
							|  |  |  | 	parsedS3Select := subS3Select{} | 
					
						
							|  |  |  | 	if err := d.DecodeElement(&parsedS3Select, &start); err != nil { | 
					
						
							|  |  |  | 		if _, ok := err.(*s3Error); ok { | 
					
						
							|  |  |  | 			return err | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 		return errMalformedXML(err) | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2022-03-15 00:48:36 +08:00
										 |  |  | 	if err := parsedS3Select.ScanRange.Validate(); err != nil { | 
					
						
							|  |  |  | 		return errInvalidScanRangeParameter(err) | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	parsedS3Select.ExpressionType = strings.ToLower(parsedS3Select.ExpressionType) | 
					
						
							|  |  |  | 	if parsedS3Select.ExpressionType != "sql" { | 
					
						
							|  |  |  | 		return errInvalidExpressionType(fmt.Errorf("invalid expression type '%v'", parsedS3Select.ExpressionType)) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if parsedS3Select.Input.IsEmpty() { | 
					
						
							|  |  |  | 		return errMissingRequiredParameter(fmt.Errorf("InputSerialization must be provided")) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if parsedS3Select.Output.IsEmpty() { | 
					
						
							|  |  |  | 		return errMissingRequiredParameter(fmt.Errorf("OutputSerialization must be provided")) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-29 09:59:48 +08:00
										 |  |  | 	statement, err := sql.ParseSelectStatement(parsedS3Select.Expression) | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-29 09:59:48 +08:00
										 |  |  | 	parsedS3Select.statement = &statement | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	*s3Select = S3Select(parsedS3Select) | 
					
						
							|  |  |  | 	return nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (s3Select *S3Select) outputRecord() sql.Record { | 
					
						
							|  |  |  | 	switch s3Select.Output.format { | 
					
						
							|  |  |  | 	case csvFormat: | 
					
						
							|  |  |  | 		return csv.NewRecord() | 
					
						
							|  |  |  | 	case jsonFormat: | 
					
						
							| 
									
										
										
										
											2019-03-10 00:13:37 +08:00
										 |  |  | 		return json.NewRecord(sql.SelectFmtJSON) | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	panic(fmt.Errorf("unknown output format '%v'", s3Select.Output.format)) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (s3Select *S3Select) getProgress() (bytesScanned, bytesProcessed int64) { | 
					
						
							|  |  |  | 	if s3Select.progressReader != nil { | 
					
						
							|  |  |  | 		return s3Select.progressReader.Stats() | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return -1, -1 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Open - opens S3 object by using callback for SQL selection query.
 | 
					
						
							|  |  |  | // Currently CSV, JSON and Apache Parquet formats are supported.
 | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | func (s3Select *S3Select) Open(rsc io.ReadSeekCloser) error { | 
					
						
							|  |  |  | 	offset, length, err := s3Select.ScanRange.StartLen() | 
					
						
							| 
									
										
										
										
											2022-03-15 00:48:36 +08:00
										 |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return err | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 	seekDirection := io.SeekStart | 
					
						
							|  |  |  | 	if offset < 0 { | 
					
						
							|  |  |  | 		seekDirection = io.SeekEnd | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	switch s3Select.Input.format { | 
					
						
							|  |  |  | 	case csvFormat: | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 		_, err = rsc.Seek(offset, seekDirection) | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			return err | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 		var rc io.ReadCloser = rsc | 
					
						
							|  |  |  | 		if length != -1 { | 
					
						
							|  |  |  | 			rc = newLimitedReadCloser(rsc, length) | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		s3Select.progressReader, err = newProgressReader(rc, s3Select.Input.CompressionType) | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 			rsc.Close() | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 			return err | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		s3Select.recordReader, err = csv.NewReader(s3Select.progressReader, &s3Select.Input.CSVArgs) | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							| 
									
										
										
										
											2022-03-24 11:58:53 +08:00
										 |  |  | 			// Close all reader resources opened so far.
 | 
					
						
							|  |  |  | 			s3Select.progressReader.Close() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-03-06 11:34:04 +08:00
										 |  |  | 			var stErr bzip2.StructuralError | 
					
						
							|  |  |  | 			if errors.As(err, &stErr) { | 
					
						
							| 
									
										
										
										
											2021-09-07 00:09:53 +08:00
										 |  |  | 				return errInvalidCompression(err, s3Select.Input.CompressionType) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			// Test these compressor errors
 | 
					
						
							|  |  |  | 			errs := []error{ | 
					
						
							|  |  |  | 				gzip.ErrHeader, gzip.ErrChecksum, | 
					
						
							|  |  |  | 				s2.ErrCorrupt, s2.ErrUnsupported, s2.ErrCRC, | 
					
						
							|  |  |  | 				zstd.ErrBlockTooSmall, zstd.ErrMagicMismatch, zstd.ErrWindowSizeExceeded, zstd.ErrUnknownDictionary, zstd.ErrWindowSizeTooSmall, | 
					
						
							|  |  |  | 				lz4.ErrInvalid, lz4.ErrBlockDependency, | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			for _, e := range errs { | 
					
						
							|  |  |  | 				if errors.Is(err, e) { | 
					
						
							|  |  |  | 					return errInvalidCompression(err, s3Select.Input.CompressionType) | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2020-03-06 11:34:04 +08:00
										 |  |  | 			} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 			return err | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 		return nil | 
					
						
							|  |  |  | 	case jsonFormat: | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 		_, err = rsc.Seek(offset, seekDirection) | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			return err | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 		var rc io.ReadCloser = rsc | 
					
						
							|  |  |  | 		if length != -1 { | 
					
						
							|  |  |  | 			rc = newLimitedReadCloser(rsc, length) | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		s3Select.progressReader, err = newProgressReader(rc, s3Select.Input.CompressionType) | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 			rsc.Close() | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 			return err | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-09 22:55:31 +08:00
										 |  |  | 		if strings.EqualFold(s3Select.Input.JSONArgs.ContentType, "lines") { | 
					
						
							| 
									
										
										
										
											2020-02-14 06:03:52 +08:00
										 |  |  | 			if simdjson.SupportedCPU() { | 
					
						
							|  |  |  | 				s3Select.recordReader = simdj.NewReader(s3Select.progressReader, &s3Select.Input.JSONArgs) | 
					
						
							|  |  |  | 			} else { | 
					
						
							|  |  |  | 				s3Select.recordReader = json.NewPReader(s3Select.progressReader, &s3Select.Input.JSONArgs) | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2019-12-09 22:55:31 +08:00
										 |  |  | 		} else { | 
					
						
							|  |  |  | 			s3Select.recordReader = json.NewReader(s3Select.progressReader, &s3Select.Input.JSONArgs) | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2021-04-08 04:29:27 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 		return nil | 
					
						
							|  |  |  | 	case parquetFormat: | 
					
						
							| 
									
										
										
										
											2023-07-20 22:52:49 +08:00
										 |  |  | 		if !parquetSupport { | 
					
						
							| 
									
										
										
										
											2020-08-19 01:23:28 +08:00
										 |  |  | 			return errors.New("parquet format parsing not enabled on server") | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 		if offset != 0 || length != -1 { | 
					
						
							| 
									
										
										
										
											2022-03-15 00:48:36 +08:00
										 |  |  | 			// Offsets do not make sense in parquet files.
 | 
					
						
							|  |  |  | 			return errors.New("parquet format does not support offsets") | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 		var err error | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 		s3Select.recordReader, err = parquet.NewParquetReader(rsc, &s3Select.Input.ParquetArgs) | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 		return err | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-01 00:42:42 +08:00
										 |  |  | 	return fmt.Errorf("unknown input format '%v'", s3Select.Input.format) | 
					
						
							| 
									
										
										
										
											2018-11-15 07:55:10 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | func (s3Select *S3Select) marshal(buf *bytes.Buffer, record sql.Record) error { | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	switch s3Select.Output.format { | 
					
						
							|  |  |  | 	case csvFormat: | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 		// Use bufio Writer to prevent csv.Writer from allocating a new buffer.
 | 
					
						
							|  |  |  | 		bufioWriter := bufioWriterPool.Get().(*bufio.Writer) | 
					
						
							|  |  |  | 		defer func() { | 
					
						
							| 
									
										
										
										
											2023-11-07 06:26:08 +08:00
										 |  |  | 			bufioWriter.Reset(xioutil.Discard) | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 			bufioWriterPool.Put(bufioWriter) | 
					
						
							|  |  |  | 		}() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		bufioWriter.Reset(buf) | 
					
						
							| 
									
										
										
										
											2020-04-02 06:39:34 +08:00
										 |  |  | 		opts := sql.WriteCSVOpts{ | 
					
						
							|  |  |  | 			FieldDelimiter: []rune(s3Select.Output.CSVArgs.FieldDelimiter)[0], | 
					
						
							|  |  |  | 			Quote:          []rune(s3Select.Output.CSVArgs.QuoteCharacter)[0], | 
					
						
							|  |  |  | 			QuoteEscape:    []rune(s3Select.Output.CSVArgs.QuoteEscapeCharacter)[0], | 
					
						
							| 
									
										
										
										
											2021-11-11 00:12:50 +08:00
										 |  |  | 			AlwaysQuote:    strings.EqualFold(s3Select.Output.CSVArgs.QuoteFields, "always"), | 
					
						
							| 
									
										
										
										
											2020-04-02 06:39:34 +08:00
										 |  |  | 		} | 
					
						
							|  |  |  | 		err := record.WriteCSV(bufioWriter, opts) | 
					
						
							| 
									
										
										
										
											2018-11-15 07:55:10 +08:00
										 |  |  | 		if err != nil { | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 			return err | 
					
						
							| 
									
										
										
										
											2018-11-15 07:55:10 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-09-17 08:26:27 +08:00
										 |  |  | 		err = bufioWriter.Flush() | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			return err | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2020-02-14 06:03:52 +08:00
										 |  |  | 		if buf.Bytes()[buf.Len()-1] == '\n' { | 
					
						
							|  |  |  | 			buf.Truncate(buf.Len() - 1) | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 		buf.WriteString(s3Select.Output.CSVArgs.RecordDelimiter) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		return nil | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	case jsonFormat: | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 		err := record.WriteJSON(buf) | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 		if err != nil { | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 			return err | 
					
						
							| 
									
										
										
										
											2018-11-15 07:55:10 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2020-02-14 06:03:52 +08:00
										 |  |  | 		// Trim trailing newline from non-simd output
 | 
					
						
							|  |  |  | 		if buf.Bytes()[buf.Len()-1] == '\n' { | 
					
						
							|  |  |  | 			buf.Truncate(buf.Len() - 1) | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 		buf.WriteString(s3Select.Output.JSONArgs.RecordDelimiter) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		return nil | 
					
						
							| 
									
										
										
										
											2018-11-15 07:55:10 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	panic(fmt.Errorf("unknown output format '%v'", s3Select.Output.format)) | 
					
						
							| 
									
										
										
										
											2018-11-15 07:55:10 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | // Evaluate - filters and sends records read from opened reader as per select statement to http response writer.
 | 
					
						
							|  |  |  | func (s3Select *S3Select) Evaluate(w http.ResponseWriter) { | 
					
						
							|  |  |  | 	getProgressFunc := s3Select.getProgress | 
					
						
							|  |  |  | 	if !s3Select.Progress.Enabled { | 
					
						
							|  |  |  | 		getProgressFunc = nil | 
					
						
							| 
									
										
										
										
											2018-11-15 07:55:10 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	writer := newMessageWriter(w, getProgressFunc) | 
					
						
							| 
									
										
										
										
											2018-11-15 07:55:10 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-09-17 08:26:27 +08:00
										 |  |  | 	var outputQueue []sql.Record | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Create queue based on the type.
 | 
					
						
							|  |  |  | 	if s3Select.statement.IsAggregated() { | 
					
						
							|  |  |  | 		outputQueue = make([]sql.Record, 0, 1) | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		outputQueue = make([]sql.Record, 0, 100) | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	var err error | 
					
						
							|  |  |  | 	sendRecord := func() bool { | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 		buf := bufPool.Get().(*bytes.Buffer) | 
					
						
							|  |  |  | 		buf.Reset() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-09-17 08:26:27 +08:00
										 |  |  | 		for _, outputRecord := range outputQueue { | 
					
						
							|  |  |  | 			if outputRecord == nil { | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			before := buf.Len() | 
					
						
							|  |  |  | 			if err = s3Select.marshal(buf, outputRecord); err != nil { | 
					
						
							|  |  |  | 				bufPool.Put(buf) | 
					
						
							|  |  |  | 				return false | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if buf.Len()-before > maxRecordSize { | 
					
						
							|  |  |  | 				writer.FinishWithError("OverMaxRecordSize", "The length of a record in the input or result is greater than maxCharsPerRecord of 1 MB.") | 
					
						
							|  |  |  | 				bufPool.Put(buf) | 
					
						
							|  |  |  | 				return false | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2019-01-29 12:00:18 +08:00
										 |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 		if err = writer.SendRecord(buf); err != nil { | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 			// FIXME: log this error.
 | 
					
						
							|  |  |  | 			err = nil | 
					
						
							| 
									
										
										
										
											2019-08-31 15:07:40 +08:00
										 |  |  | 			bufPool.Put(buf) | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 			return false | 
					
						
							| 
									
										
										
										
											2018-11-15 07:55:10 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-09-17 08:26:27 +08:00
										 |  |  | 		outputQueue = outputQueue[:0] | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 		return true | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2018-11-15 07:55:10 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-09-14 05:18:35 +08:00
										 |  |  | 	var rec sql.Record | 
					
						
							| 
									
										
										
										
											2020-03-11 13:34:58 +08:00
										 |  |  | OuterLoop: | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 	for { | 
					
						
							| 
									
										
										
										
											2019-01-29 09:59:48 +08:00
										 |  |  | 		if s3Select.statement.LimitReached() { | 
					
						
							| 
									
										
										
										
											2019-09-21 02:00:17 +08:00
										 |  |  | 			if !sendRecord() { | 
					
						
							|  |  |  | 				break | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2019-02-06 18:33:05 +08:00
										 |  |  | 			if err = writer.Finish(s3Select.getProgress()); err != nil { | 
					
						
							| 
									
										
										
										
											2019-01-29 09:59:48 +08:00
										 |  |  | 				// FIXME: log this error.
 | 
					
						
							|  |  |  | 				err = nil | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			break | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-09-14 05:18:35 +08:00
										 |  |  | 		if rec, err = s3Select.recordReader.Read(rec); err != nil { | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 			if err != io.EOF { | 
					
						
							|  |  |  | 				break | 
					
						
							| 
									
										
										
										
											2018-10-23 03:12:22 +08:00
										 |  |  | 			} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 			if s3Select.statement.IsAggregated() { | 
					
						
							| 
									
										
										
										
											2019-09-17 08:26:27 +08:00
										 |  |  | 				outputRecord := s3Select.outputRecord() | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 				if err = s3Select.statement.AggregateResult(outputRecord); err != nil { | 
					
						
							|  |  |  | 					break | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 				} | 
					
						
							| 
									
										
										
										
											2019-09-17 08:26:27 +08:00
										 |  |  | 				outputQueue = append(outputQueue, outputRecord) | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2018-10-23 03:12:22 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-09-17 08:26:27 +08:00
										 |  |  | 			if !sendRecord() { | 
					
						
							|  |  |  | 				break | 
					
						
							| 
									
										
										
										
											2018-12-08 06:55:32 +08:00
										 |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-02-06 18:33:05 +08:00
										 |  |  | 			if err = writer.Finish(s3Select.getProgress()); err != nil { | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 				// FIXME: log this error.
 | 
					
						
							|  |  |  | 				err = nil | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			break | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2018-10-23 03:12:22 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-03-11 13:34:58 +08:00
										 |  |  | 		var inputRecords []*sql.Record | 
					
						
							|  |  |  | 		if inputRecords, err = s3Select.statement.EvalFrom(s3Select.Input.format, rec); err != nil { | 
					
						
							| 
									
										
										
										
											2019-03-10 00:13:37 +08:00
										 |  |  | 			break | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-03-11 13:34:58 +08:00
										 |  |  | 		for _, inputRecord := range inputRecords { | 
					
						
							|  |  |  | 			if s3Select.statement.IsAggregated() { | 
					
						
							|  |  |  | 				if err = s3Select.statement.AggregateRow(*inputRecord); err != nil { | 
					
						
							|  |  |  | 					break OuterLoop | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2019-09-17 08:26:27 +08:00
										 |  |  | 			} else { | 
					
						
							| 
									
										
										
										
											2020-03-11 13:34:58 +08:00
										 |  |  | 				var outputRecord sql.Record | 
					
						
							|  |  |  | 				// We will attempt to reuse the records in the table.
 | 
					
						
							|  |  |  | 				// The type of these should not change.
 | 
					
						
							|  |  |  | 				// The queue should always have at least one entry left for this to work.
 | 
					
						
							|  |  |  | 				outputQueue = outputQueue[:len(outputQueue)+1] | 
					
						
							|  |  |  | 				if t := outputQueue[len(outputQueue)-1]; t != nil { | 
					
						
							|  |  |  | 					// If the output record is already set, we reuse it.
 | 
					
						
							|  |  |  | 					outputRecord = t | 
					
						
							|  |  |  | 					outputRecord.Reset() | 
					
						
							|  |  |  | 				} else { | 
					
						
							|  |  |  | 					// Create new one
 | 
					
						
							|  |  |  | 					outputRecord = s3Select.outputRecord() | 
					
						
							|  |  |  | 					outputQueue[len(outputQueue)-1] = outputRecord | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				outputRecord, err = s3Select.statement.Eval(*inputRecord, outputRecord) | 
					
						
							|  |  |  | 				if outputRecord == nil || err != nil { | 
					
						
							|  |  |  | 					// This should not be written.
 | 
					
						
							|  |  |  | 					// Remove it from the queue.
 | 
					
						
							|  |  |  | 					outputQueue = outputQueue[:len(outputQueue)-1] | 
					
						
							|  |  |  | 					if err != nil { | 
					
						
							|  |  |  | 						break OuterLoop | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					continue | 
					
						
							| 
									
										
										
										
											2019-09-18 08:21:23 +08:00
										 |  |  | 				} | 
					
						
							| 
									
										
										
										
											2019-09-17 08:26:27 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-03-11 13:34:58 +08:00
										 |  |  | 				outputQueue[len(outputQueue)-1] = outputRecord | 
					
						
							| 
									
										
										
										
											2021-11-03 10:14:46 +08:00
										 |  |  | 				if s3Select.statement.LimitReached() { | 
					
						
							|  |  |  | 					if !sendRecord() { | 
					
						
							|  |  |  | 						break | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					if err = writer.Finish(s3Select.getProgress()); err != nil { | 
					
						
							|  |  |  | 						// FIXME: log this error.
 | 
					
						
							|  |  |  | 						err = nil | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					return | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-03-11 13:34:58 +08:00
										 |  |  | 				if len(outputQueue) < cap(outputQueue) { | 
					
						
							|  |  |  | 					continue | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2018-11-15 07:55:10 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-03-11 13:34:58 +08:00
										 |  |  | 				if !sendRecord() { | 
					
						
							|  |  |  | 					break OuterLoop | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	if err != nil { | 
					
						
							| 
									
										
										
										
											2019-02-13 20:59:36 +08:00
										 |  |  | 		_ = writer.FinishWithError("InternalError", err.Error()) | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | // Close - closes opened S3 object.
 | 
					
						
							|  |  |  | func (s3Select *S3Select) Close() error { | 
					
						
							| 
									
										
										
										
											2021-12-01 00:42:42 +08:00
										 |  |  | 	if s3Select.recordReader == nil { | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	return s3Select.recordReader.Close() | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | // NewS3Select - creates new S3Select by given request XML reader.
 | 
					
						
							|  |  |  | func NewS3Select(r io.Reader) (*S3Select, error) { | 
					
						
							|  |  |  | 	s3Select := &S3Select{} | 
					
						
							|  |  |  | 	if err := xml.NewDecoder(r).Decode(s3Select); err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-09 08:53:04 +08:00
										 |  |  | 	return s3Select, nil | 
					
						
							| 
									
										
										
										
											2018-08-15 18:30:19 +08:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | //////////////////
 | 
					
						
							|  |  |  | // Helpers
 | 
					
						
							|  |  |  | /////////////////
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // limitedReadCloser is like io.LimitedReader, but also implements io.Closer.
 | 
					
						
							|  |  |  | type limitedReadCloser struct { | 
					
						
							|  |  |  | 	io.LimitedReader | 
					
						
							|  |  |  | 	io.Closer | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func newLimitedReadCloser(r io.ReadCloser, n int64) *limitedReadCloser { | 
					
						
							|  |  |  | 	return &limitedReadCloser{ | 
					
						
							|  |  |  | 		LimitedReader: io.LimitedReader{R: r, N: n}, | 
					
						
							|  |  |  | 		Closer:        r, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // ObjectSegmentReaderFn is a function that returns a reader for a contiguous
 | 
					
						
							|  |  |  | // suffix segment of an object starting at the given (non-negative) offset.
 | 
					
						
							|  |  |  | type ObjectSegmentReaderFn func(offset int64) (io.ReadCloser, error) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // ObjectReadSeekCloser implements ReadSeekCloser interface for reading objects.
 | 
					
						
							|  |  |  | // It uses a function that returns a io.ReadCloser for the object.
 | 
					
						
							|  |  |  | type ObjectReadSeekCloser struct { | 
					
						
							|  |  |  | 	segmentReader ObjectSegmentReaderFn | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	size   int64 // actual object size regardless of compression/encryption
 | 
					
						
							|  |  |  | 	offset int64 | 
					
						
							|  |  |  | 	reader io.ReadCloser | 
					
						
							| 
									
										
										
										
											2023-02-17 17:44:40 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	// reader can be closed idempotently multiple times
 | 
					
						
							|  |  |  | 	closerOnce sync.Once | 
					
						
							|  |  |  | 	// Error storing reader.Close()
 | 
					
						
							|  |  |  | 	closerErr error | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // NewObjectReadSeekCloser creates a new ObjectReadSeekCloser.
 | 
					
						
							|  |  |  | func NewObjectReadSeekCloser(segmentReader ObjectSegmentReaderFn, actualSize int64) *ObjectReadSeekCloser { | 
					
						
							|  |  |  | 	return &ObjectReadSeekCloser{ | 
					
						
							|  |  |  | 		segmentReader: segmentReader, | 
					
						
							|  |  |  | 		size:          actualSize, | 
					
						
							|  |  |  | 		offset:        0, | 
					
						
							|  |  |  | 		reader:        nil, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Seek call to implement io.Seeker
 | 
					
						
							|  |  |  | func (rsc *ObjectReadSeekCloser) Seek(offset int64, whence int) (int64, error) { | 
					
						
							|  |  |  | 	// fmt.Printf("actual: %v offset: %v (%v) whence: %v\n", rsc.size, offset, rsc.offset, whence)
 | 
					
						
							|  |  |  | 	switch whence { | 
					
						
							|  |  |  | 	case io.SeekStart: | 
					
						
							|  |  |  | 		rsc.offset = offset | 
					
						
							|  |  |  | 	case io.SeekCurrent: | 
					
						
							|  |  |  | 		rsc.offset += offset | 
					
						
							|  |  |  | 	case io.SeekEnd: | 
					
						
							|  |  |  | 		rsc.offset = rsc.size + offset | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if rsc.offset < 0 { | 
					
						
							|  |  |  | 		return rsc.offset, errors.New("seek to invalid negative offset") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if rsc.offset >= rsc.size { | 
					
						
							|  |  |  | 		return rsc.offset, errors.New("seek past end of object") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if rsc.reader != nil { | 
					
						
							|  |  |  | 		_ = rsc.reader.Close() | 
					
						
							|  |  |  | 		rsc.reader = nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return rsc.offset, nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Read call to implement io.Reader
 | 
					
						
							|  |  |  | func (rsc *ObjectReadSeekCloser) Read(p []byte) (n int, err error) { | 
					
						
							|  |  |  | 	if rsc.reader == nil { | 
					
						
							|  |  |  | 		rsc.reader, err = rsc.segmentReader(rsc.offset) | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			return 0, err | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return rsc.reader.Read(p) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Close call to implement io.Closer. Calling Read/Seek after Close reopens the
 | 
					
						
							|  |  |  | // object for reading and a subsequent Close call is required to ensure
 | 
					
						
							|  |  |  | // resources are freed.
 | 
					
						
							|  |  |  | func (rsc *ObjectReadSeekCloser) Close() error { | 
					
						
							| 
									
										
										
										
											2023-02-17 17:44:40 +08:00
										 |  |  | 	rsc.closerOnce.Do(func() { | 
					
						
							|  |  |  | 		if rsc.reader != nil { | 
					
						
							|  |  |  | 			rsc.closerErr = rsc.reader.Close() | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | 			rsc.reader = nil | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2023-02-17 17:44:40 +08:00
										 |  |  | 	}) | 
					
						
							|  |  |  | 	return rsc.closerErr | 
					
						
							| 
									
										
										
										
											2022-04-14 21:54:47 +08:00
										 |  |  | } |