mirror of https://github.com/minio/minio.git
				
				
				
			select: Add ScanRange to CSV&JSON (#14546)
Implements https://docs.aws.amazon.com/AmazonS3/latest/API/API_SelectObjectContent.html#AmazonS3-SelectObjectContent-request-ScanRange Fixes #14539
This commit is contained in:
		
							parent
							
								
									9c846106fa
								
							
						
					
					
						commit
						c07af89e48
					
				|  | @ -206,6 +206,9 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r | ||||||
| 			Start:          offset, | 			Start:          offset, | ||||||
| 			End:            offset + length, | 			End:            offset + length, | ||||||
| 		} | 		} | ||||||
|  | 		if length == -1 { | ||||||
|  | 			rs.End = -1 | ||||||
|  | 		} | ||||||
| 
 | 
 | ||||||
| 		return getObjectNInfo(ctx, bucket, object, rs, r.Header, readLock, opts) | 		return getObjectNInfo(ctx, bucket, object, rs, r.Header, readLock, opts) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -101,6 +101,15 @@ func errInvalidRequestParameter(err error) *s3Error { | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | func errInvalidScanRangeParameter(err error) *s3Error { | ||||||
|  | 	return &s3Error{ | ||||||
|  | 		code:       "InvalidRequestParameter", | ||||||
|  | 		message:    "The value of a parameter in ScanRange element is invalid. Check the service API documentation and try again.", | ||||||
|  | 		statusCode: 400, | ||||||
|  | 		cause:      err, | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| func errObjectSerializationConflict(err error) *s3Error { | func errObjectSerializationConflict(err error) *s3Error { | ||||||
| 	return &s3Error{ | 	return &s3Error{ | ||||||
| 		code:       "ObjectSerializationConflict", | 		code:       "ObjectSerializationConflict", | ||||||
|  |  | ||||||
|  | @ -212,6 +212,69 @@ type RequestProgress struct { | ||||||
| 	Enabled bool `xml:"Enabled"` | 	Enabled bool `xml:"Enabled"` | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // ScanRange represents the ScanRange parameter.
 | ||||||
|  | type ScanRange struct { | ||||||
|  | 	// Start if byte offset form the start off the file.
 | ||||||
|  | 	Start *uint64 `xml:"Start"` | ||||||
|  | 	// End is the last byte that should be returned, if Start is set,
 | ||||||
|  | 	// or the offset from EOF to start reading if start is not present.
 | ||||||
|  | 	End *uint64 `xml:"End"` | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Validate if the scan range is valid.
 | ||||||
|  | func (s *ScanRange) Validate() error { | ||||||
|  | 	if s == nil { | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  | 	if s.Start == nil && s.End == nil { | ||||||
|  | 		// This parameter is optional, but when specified, it must not be empty.
 | ||||||
|  | 		// Ref: https://docs.aws.amazon.com/AmazonS3/latest/API/API_SelectObjectContent.html#AmazonS3-SelectObjectContent-request-ScanRange
 | ||||||
|  | 		return errors.New("ScanRange: No Start or End specified") | ||||||
|  | 	} | ||||||
|  | 	if s.Start == nil || s.End == nil { | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  | 	if *s.Start > *s.End { | ||||||
|  | 		return errors.New("ScanRange: Start cannot be after end") | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // StartLen returns start offset plus length from range.
 | ||||||
|  | func (s *ScanRange) StartLen() (start, length int64, err error) { | ||||||
|  | 	if s == nil { | ||||||
|  | 		return 0, -1, nil | ||||||
|  | 	} | ||||||
|  | 	err = s.Validate() | ||||||
|  | 	if err != nil { | ||||||
|  | 		return 0, 0, err | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if s.End == nil && s.Start == nil { | ||||||
|  | 		// Not valid, but should be caught above.
 | ||||||
|  | 		return 0, -1, nil | ||||||
|  | 	} | ||||||
|  | 	if s.End == nil { | ||||||
|  | 		start := int64(*s.Start) | ||||||
|  | 		if start < 0 { | ||||||
|  | 			return 0, 0, errors.New("ScanRange: Start after EOF") | ||||||
|  | 		} | ||||||
|  | 		return start, -1, nil | ||||||
|  | 	} | ||||||
|  | 	if s.Start == nil { | ||||||
|  | 		// Suffix length
 | ||||||
|  | 		end := int64(*s.End) | ||||||
|  | 		if end < 0 { | ||||||
|  | 			return 0, 0, errors.New("ScanRange: End bigger than file") | ||||||
|  | 		} | ||||||
|  | 		// Suffix length
 | ||||||
|  | 		return -end, -1, nil | ||||||
|  | 	} | ||||||
|  | 	start = int64(*s.Start) | ||||||
|  | 	end := int64(*s.End) | ||||||
|  | 	return start, end - start + 1, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
| // S3Select - filters the contents on a simple structured query language (SQL) statement. It
 | // S3Select - filters the contents on a simple structured query language (SQL) statement. It
 | ||||||
| // represents elements inside <SelectRequest/> in request XML specified in detail at
 | // represents elements inside <SelectRequest/> in request XML specified in detail at
 | ||||||
| // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html.
 | // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html.
 | ||||||
|  | @ -222,6 +285,7 @@ type S3Select struct { | ||||||
| 	Input          InputSerialization  `xml:"InputSerialization"` | 	Input          InputSerialization  `xml:"InputSerialization"` | ||||||
| 	Output         OutputSerialization `xml:"OutputSerialization"` | 	Output         OutputSerialization `xml:"OutputSerialization"` | ||||||
| 	Progress       RequestProgress     `xml:"RequestProgress"` | 	Progress       RequestProgress     `xml:"RequestProgress"` | ||||||
|  | 	ScanRange      *ScanRange          `xml:"ScanRange"` | ||||||
| 
 | 
 | ||||||
| 	statement      *sql.SelectStatement | 	statement      *sql.SelectStatement | ||||||
| 	progressReader *progressReader | 	progressReader *progressReader | ||||||
|  | @ -251,7 +315,9 @@ func (s3Select *S3Select) UnmarshalXML(d *xml.Decoder, start xml.StartElement) e | ||||||
| 
 | 
 | ||||||
| 		return errMalformedXML(err) | 		return errMalformedXML(err) | ||||||
| 	} | 	} | ||||||
| 
 | 	if err := parsedS3Select.ScanRange.Validate(); err != nil { | ||||||
|  | 		return errInvalidScanRangeParameter(err) | ||||||
|  | 	} | ||||||
| 	parsedS3Select.ExpressionType = strings.ToLower(parsedS3Select.ExpressionType) | 	parsedS3Select.ExpressionType = strings.ToLower(parsedS3Select.ExpressionType) | ||||||
| 	if parsedS3Select.ExpressionType != "sql" { | 	if parsedS3Select.ExpressionType != "sql" { | ||||||
| 		return errInvalidExpressionType(fmt.Errorf("invalid expression type '%v'", parsedS3Select.ExpressionType)) | 		return errInvalidExpressionType(fmt.Errorf("invalid expression type '%v'", parsedS3Select.ExpressionType)) | ||||||
|  | @ -298,9 +364,13 @@ func (s3Select *S3Select) getProgress() (bytesScanned, bytesProcessed int64) { | ||||||
| // Open - opens S3 object by using callback for SQL selection query.
 | // Open - opens S3 object by using callback for SQL selection query.
 | ||||||
| // Currently CSV, JSON and Apache Parquet formats are supported.
 | // Currently CSV, JSON and Apache Parquet formats are supported.
 | ||||||
| func (s3Select *S3Select) Open(getReader func(offset, length int64) (io.ReadCloser, error)) error { | func (s3Select *S3Select) Open(getReader func(offset, length int64) (io.ReadCloser, error)) error { | ||||||
|  | 	offset, end, err := s3Select.ScanRange.StartLen() | ||||||
|  | 	if err != nil { | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
| 	switch s3Select.Input.format { | 	switch s3Select.Input.format { | ||||||
| 	case csvFormat: | 	case csvFormat: | ||||||
| 		rc, err := getReader(0, -1) | 		rc, err := getReader(offset, end) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return err | 			return err | ||||||
| 		} | 		} | ||||||
|  | @ -335,7 +405,7 @@ func (s3Select *S3Select) Open(getReader func(offset, length int64) (io.ReadClos | ||||||
| 		s3Select.close = rc.Close | 		s3Select.close = rc.Close | ||||||
| 		return nil | 		return nil | ||||||
| 	case jsonFormat: | 	case jsonFormat: | ||||||
| 		rc, err := getReader(0, -1) | 		rc, err := getReader(offset, end) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return err | 			return err | ||||||
| 		} | 		} | ||||||
|  | @ -362,6 +432,10 @@ func (s3Select *S3Select) Open(getReader func(offset, length int64) (io.ReadClos | ||||||
| 		if !strings.EqualFold(os.Getenv("MINIO_API_SELECT_PARQUET"), "on") { | 		if !strings.EqualFold(os.Getenv("MINIO_API_SELECT_PARQUET"), "on") { | ||||||
| 			return errors.New("parquet format parsing not enabled on server") | 			return errors.New("parquet format parsing not enabled on server") | ||||||
| 		} | 		} | ||||||
|  | 		if offset != 0 || end != -1 { | ||||||
|  | 			// Offsets do not make sense in parquet files.
 | ||||||
|  | 			return errors.New("parquet format does not support offsets") | ||||||
|  | 		} | ||||||
| 		var err error | 		var err error | ||||||
| 		s3Select.recordReader, err = parquet.NewReader(getReader, &s3Select.Input.ParquetArgs) | 		s3Select.recordReader, err = parquet.NewReader(getReader, &s3Select.Input.ParquetArgs) | ||||||
| 		return err | 		return err | ||||||
|  |  | ||||||
|  | @ -20,6 +20,7 @@ package s3select | ||||||
| import ( | import ( | ||||||
| 	"bytes" | 	"bytes" | ||||||
| 	"encoding/xml" | 	"encoding/xml" | ||||||
|  | 	"errors" | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"io" | 	"io" | ||||||
| 	"io/ioutil" | 	"io/ioutil" | ||||||
|  | @ -1374,6 +1375,314 @@ func TestJSONInput(t *testing.T) { | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | func TestCSVRanges(t *testing.T) { | ||||||
|  | 	testInput := []byte(`id,time,num,num2,text | ||||||
|  | 1,2010-01-01T,7867786,4565.908123,"a text, with comma" | ||||||
|  | 2,2017-01-02T03:04Z,-5, 0.765111, | ||||||
|  | `) | ||||||
|  | 	testTable := []struct { | ||||||
|  | 		name       string | ||||||
|  | 		query      string | ||||||
|  | 		input      []byte | ||||||
|  | 		requestXML []byte // override request XML
 | ||||||
|  | 		wantResult string | ||||||
|  | 		wantErr    bool | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			name:  "select-all", | ||||||
|  | 			input: testInput, | ||||||
|  | 			query: ``, | ||||||
|  | 			// Since we are doing offset, no headers are used.
 | ||||||
|  | 			wantResult: `{"_1":"2","_2":"2017-01-02T03:04Z","_3":"-5","_4":" 0.765111","_5":""}`, | ||||||
|  | 			requestXML: []byte(`<?xml version="1.0" encoding="UTF-8"?> | ||||||
|  | <SelectObjectContentRequest> | ||||||
|  |     <Expression>SELECT * from s3object AS s</Expression> | ||||||
|  |     <ExpressionType>SQL</ExpressionType> | ||||||
|  |     <InputSerialization> | ||||||
|  |         <CompressionType>NONE</CompressionType> | ||||||
|  |         <CSV> | ||||||
|  |         <FileHeaderInfo>NONE</FileHeaderInfo> | ||||||
|  | 	    <QuoteCharacter>"</QuoteCharacter> | ||||||
|  |         </CSV> | ||||||
|  |     </InputSerialization> | ||||||
|  |     <OutputSerialization> | ||||||
|  |         <JSON> | ||||||
|  |         </JSON> | ||||||
|  |     </OutputSerialization> | ||||||
|  |     <RequestProgress> | ||||||
|  |         <Enabled>FALSE</Enabled> | ||||||
|  |     </RequestProgress> | ||||||
|  | 	<ScanRange><Start>76</Start><End>109</End></ScanRange> | ||||||
|  | </SelectObjectContentRequest>`), | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:  "select-remain", | ||||||
|  | 			input: testInput, | ||||||
|  | 			// Since we are doing offset, no headers are used.
 | ||||||
|  | 			wantResult: `{"_1":"2","_2":"2017-01-02T03:04Z","_3":"-5","_4":" 0.765111","_5":""}`, | ||||||
|  | 			requestXML: []byte(`<?xml version="1.0" encoding="UTF-8"?> | ||||||
|  | <SelectObjectContentRequest> | ||||||
|  |     <Expression>SELECT * from s3object AS s</Expression> | ||||||
|  |     <ExpressionType>SQL</ExpressionType> | ||||||
|  |     <InputSerialization> | ||||||
|  |         <CompressionType>NONE</CompressionType> | ||||||
|  |         <CSV> | ||||||
|  |         <FileHeaderInfo>NONE</FileHeaderInfo> | ||||||
|  | 	    <QuoteCharacter>"</QuoteCharacter> | ||||||
|  |         </CSV> | ||||||
|  |     </InputSerialization> | ||||||
|  |     <OutputSerialization> | ||||||
|  |         <JSON> | ||||||
|  |         </JSON> | ||||||
|  |     </OutputSerialization> | ||||||
|  |     <RequestProgress> | ||||||
|  |         <Enabled>FALSE</Enabled> | ||||||
|  |     </RequestProgress> | ||||||
|  | 	<ScanRange><Start>76</Start></ScanRange> | ||||||
|  | </SelectObjectContentRequest>`), | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:  "select-end-bytes", | ||||||
|  | 			input: testInput, | ||||||
|  | 			query: ``, | ||||||
|  | 			// Since we are doing offset, no headers are used.
 | ||||||
|  | 			wantResult: `{"_1":"2","_2":"2017-01-02T03:04Z","_3":"-5","_4":" 0.765111","_5":""}`, | ||||||
|  | 			requestXML: []byte(`<?xml version="1.0" encoding="UTF-8"?> | ||||||
|  | <SelectObjectContentRequest> | ||||||
|  |     <Expression>SELECT * from s3object AS s</Expression> | ||||||
|  |     <ExpressionType>SQL</ExpressionType> | ||||||
|  |     <InputSerialization> | ||||||
|  |         <CompressionType>NONE</CompressionType> | ||||||
|  |         <CSV> | ||||||
|  |         <FileHeaderInfo>NONE</FileHeaderInfo> | ||||||
|  | 	    <QuoteCharacter>"</QuoteCharacter> | ||||||
|  |         </CSV> | ||||||
|  |     </InputSerialization> | ||||||
|  |     <OutputSerialization> | ||||||
|  |         <JSON> | ||||||
|  |         </JSON> | ||||||
|  |     </OutputSerialization> | ||||||
|  |     <RequestProgress> | ||||||
|  |         <Enabled>FALSE</Enabled> | ||||||
|  |     </RequestProgress> | ||||||
|  | 	<ScanRange><End>35</End></ScanRange> | ||||||
|  | </SelectObjectContentRequest>`), | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:  "select-middle", | ||||||
|  | 			input: testInput, | ||||||
|  | 			// Since we are doing offset, no headers are used.
 | ||||||
|  | 			wantResult: `{"_1":"a text, with comma"}`, | ||||||
|  | 			requestXML: []byte(`<?xml version="1.0" encoding="UTF-8"?> | ||||||
|  | <SelectObjectContentRequest> | ||||||
|  |     <Expression>SELECT * from s3object AS s</Expression> | ||||||
|  |     <ExpressionType>SQL</ExpressionType> | ||||||
|  |     <InputSerialization> | ||||||
|  |         <CompressionType>NONE</CompressionType> | ||||||
|  |         <CSV> | ||||||
|  |         <FileHeaderInfo>NONE</FileHeaderInfo> | ||||||
|  | 	    <QuoteCharacter>"</QuoteCharacter> | ||||||
|  |         </CSV> | ||||||
|  |     </InputSerialization> | ||||||
|  |     <OutputSerialization> | ||||||
|  |         <JSON> | ||||||
|  |         </JSON> | ||||||
|  |     </OutputSerialization> | ||||||
|  |     <RequestProgress> | ||||||
|  |         <Enabled>FALSE</Enabled> | ||||||
|  |     </RequestProgress> | ||||||
|  | 	<ScanRange><Start>56</Start><End>76</End></ScanRange> | ||||||
|  | </SelectObjectContentRequest>`), | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:  "error-end-before-start", | ||||||
|  | 			input: testInput, | ||||||
|  | 			// Since we are doing offset, no headers are used.
 | ||||||
|  | 			wantResult: ``, | ||||||
|  | 			wantErr:    true, | ||||||
|  | 			requestXML: []byte(`<?xml version="1.0" encoding="UTF-8"?> | ||||||
|  | <SelectObjectContentRequest> | ||||||
|  |     <Expression>SELECT * from s3object AS s</Expression> | ||||||
|  |     <ExpressionType>SQL</ExpressionType> | ||||||
|  |     <InputSerialization> | ||||||
|  |         <CompressionType>NONE</CompressionType> | ||||||
|  |         <CSV> | ||||||
|  |         <FileHeaderInfo>NONE</FileHeaderInfo> | ||||||
|  | 	    <QuoteCharacter>"</QuoteCharacter> | ||||||
|  |         </CSV> | ||||||
|  |     </InputSerialization> | ||||||
|  |     <OutputSerialization> | ||||||
|  |         <JSON> | ||||||
|  |         </JSON> | ||||||
|  |     </OutputSerialization> | ||||||
|  |     <RequestProgress> | ||||||
|  |         <Enabled>FALSE</Enabled> | ||||||
|  |     </RequestProgress> | ||||||
|  | 	<ScanRange><Start>56</Start><End>26</End></ScanRange> | ||||||
|  | </SelectObjectContentRequest>`), | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:  "error-empty", | ||||||
|  | 			input: testInput, | ||||||
|  | 			// Since we are doing offset, no headers are used.
 | ||||||
|  | 			wantResult: ``, | ||||||
|  | 			wantErr:    true, | ||||||
|  | 			requestXML: []byte(`<?xml version="1.0" encoding="UTF-8"?> | ||||||
|  | <SelectObjectContentRequest> | ||||||
|  |     <Expression>SELECT * from s3object AS s</Expression> | ||||||
|  |     <ExpressionType>SQL</ExpressionType> | ||||||
|  |     <InputSerialization> | ||||||
|  |         <CompressionType>NONE</CompressionType> | ||||||
|  |         <CSV> | ||||||
|  |         <FileHeaderInfo>NONE</FileHeaderInfo> | ||||||
|  | 	    <QuoteCharacter>"</QuoteCharacter> | ||||||
|  |         </CSV> | ||||||
|  |     </InputSerialization> | ||||||
|  |     <OutputSerialization> | ||||||
|  |         <JSON> | ||||||
|  |         </JSON> | ||||||
|  |     </OutputSerialization> | ||||||
|  |     <RequestProgress> | ||||||
|  |         <Enabled>FALSE</Enabled> | ||||||
|  |     </RequestProgress> | ||||||
|  | 	<ScanRange></ScanRange> | ||||||
|  | </SelectObjectContentRequest>`), | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:  "error-after-eof", | ||||||
|  | 			input: testInput, | ||||||
|  | 			// Since we are doing offset, no headers are used.
 | ||||||
|  | 			wantResult: ``, | ||||||
|  | 			wantErr:    true, | ||||||
|  | 			requestXML: []byte(`<?xml version="1.0" encoding="UTF-8"?> | ||||||
|  | <SelectObjectContentRequest> | ||||||
|  |     <Expression>SELECT * from s3object AS s</Expression> | ||||||
|  |     <ExpressionType>SQL</ExpressionType> | ||||||
|  |     <InputSerialization> | ||||||
|  |         <CompressionType>NONE</CompressionType> | ||||||
|  |         <CSV> | ||||||
|  |         <FileHeaderInfo>NONE</FileHeaderInfo> | ||||||
|  | 	    <QuoteCharacter>"</QuoteCharacter> | ||||||
|  |         </CSV> | ||||||
|  |     </InputSerialization> | ||||||
|  |     <OutputSerialization> | ||||||
|  |         <JSON> | ||||||
|  |         </JSON> | ||||||
|  |     </OutputSerialization> | ||||||
|  |     <RequestProgress> | ||||||
|  |         <Enabled>FALSE</Enabled> | ||||||
|  |     </RequestProgress> | ||||||
|  | 	<ScanRange><Start>2600000</Start></ScanRange> | ||||||
|  | </SelectObjectContentRequest>`), | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:  "error-after-eof", | ||||||
|  | 			input: testInput, | ||||||
|  | 			// Since we are doing offset, no headers are used.
 | ||||||
|  | 			wantResult: ``, | ||||||
|  | 			wantErr:    true, | ||||||
|  | 			requestXML: []byte(`<?xml version="1.0" encoding="UTF-8"?> | ||||||
|  | <SelectObjectContentRequest> | ||||||
|  |     <Expression>SELECT * from s3object AS s</Expression> | ||||||
|  |     <ExpressionType>SQL</ExpressionType> | ||||||
|  |     <InputSerialization> | ||||||
|  |         <CompressionType>NONE</CompressionType> | ||||||
|  |         <CSV> | ||||||
|  |         <FileHeaderInfo>NONE</FileHeaderInfo> | ||||||
|  | 	    <QuoteCharacter>"</QuoteCharacter> | ||||||
|  |         </CSV> | ||||||
|  |     </InputSerialization> | ||||||
|  |     <OutputSerialization> | ||||||
|  |         <JSON> | ||||||
|  |         </JSON> | ||||||
|  |     </OutputSerialization> | ||||||
|  |     <RequestProgress> | ||||||
|  |         <Enabled>FALSE</Enabled> | ||||||
|  |     </RequestProgress> | ||||||
|  | 	<ScanRange><Start>2600000</Start><End>2600001</End></ScanRange> | ||||||
|  | </SelectObjectContentRequest>`), | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for _, testCase := range testTable { | ||||||
|  | 		t.Run(testCase.name, func(t *testing.T) { | ||||||
|  | 			testReq := testCase.requestXML | ||||||
|  | 			s3Select, err := NewS3Select(bytes.NewReader(testReq)) | ||||||
|  | 			if err != nil { | ||||||
|  | 				if !testCase.wantErr { | ||||||
|  | 					t.Fatal(err) | ||||||
|  | 				} | ||||||
|  | 				t.Logf("got expected error: %v", err) | ||||||
|  | 				return | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) { | ||||||
|  | 				in := testCase.input | ||||||
|  | 				if offset != 0 || length != -1 { | ||||||
|  | 					// Copy from SelectObjectContentHandler
 | ||||||
|  | 					isSuffixLength := false | ||||||
|  | 					if offset < 0 { | ||||||
|  | 						isSuffixLength = true | ||||||
|  | 					} | ||||||
|  | 
 | ||||||
|  | 					if length > 0 { | ||||||
|  | 						length-- | ||||||
|  | 					} | ||||||
|  | 
 | ||||||
|  | 					rs := &httpRangeSpec{ | ||||||
|  | 						IsSuffixLength: isSuffixLength, | ||||||
|  | 						Start:          offset, | ||||||
|  | 						End:            offset + length, | ||||||
|  | 					} | ||||||
|  | 					if length == -1 { | ||||||
|  | 						rs.End = -1 | ||||||
|  | 					} | ||||||
|  | 					t.Log("input, offset:", offset, "length:", length, "size:", len(in)) | ||||||
|  | 					offset, length, err = rs.GetOffsetLength(int64(len(in))) | ||||||
|  | 					if err != nil { | ||||||
|  | 						return nil, err | ||||||
|  | 					} | ||||||
|  | 					t.Log("rs:", *rs, "offset:", offset, "length:", length) | ||||||
|  | 					in = in[offset : offset+length] | ||||||
|  | 				} | ||||||
|  | 				return ioutil.NopCloser(bytes.NewBuffer(in)), nil | ||||||
|  | 			}); err != nil { | ||||||
|  | 				if !testCase.wantErr { | ||||||
|  | 					t.Fatal(err) | ||||||
|  | 				} | ||||||
|  | 				t.Logf("got expected error: %v", err) | ||||||
|  | 				return | ||||||
|  | 			} else if testCase.wantErr { | ||||||
|  | 				t.Error("did not get expected error") | ||||||
|  | 				return | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			w := &testResponseWriter{} | ||||||
|  | 			s3Select.Evaluate(w) | ||||||
|  | 			s3Select.Close() | ||||||
|  | 			resp := http.Response{ | ||||||
|  | 				StatusCode:    http.StatusOK, | ||||||
|  | 				Body:          ioutil.NopCloser(bytes.NewReader(w.response)), | ||||||
|  | 				ContentLength: int64(len(w.response)), | ||||||
|  | 			} | ||||||
|  | 			res, err := minio.NewSelectResults(&resp, "testbucket") | ||||||
|  | 			if err != nil { | ||||||
|  | 				t.Error(err) | ||||||
|  | 				return | ||||||
|  | 			} | ||||||
|  | 			got, err := ioutil.ReadAll(res) | ||||||
|  | 			if err != nil { | ||||||
|  | 				t.Error(err) | ||||||
|  | 				return | ||||||
|  | 			} | ||||||
|  | 			gotS := strings.TrimSpace(string(got)) | ||||||
|  | 			if !reflect.DeepEqual(gotS, testCase.wantResult) { | ||||||
|  | 				t.Errorf("received response does not match with expected reply. Query: %s\ngot: %s\nwant:%s", testCase.query, gotS, testCase.wantResult) | ||||||
|  | 			} | ||||||
|  | 		}) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| func TestParquetInput(t *testing.T) { | func TestParquetInput(t *testing.T) { | ||||||
| 	os.Setenv("MINIO_API_SELECT_PARQUET", "on") | 	os.Setenv("MINIO_API_SELECT_PARQUET", "on") | ||||||
| 	defer os.Setenv("MINIO_API_SELECT_PARQUET", "off") | 	defer os.Setenv("MINIO_API_SELECT_PARQUET", "off") | ||||||
|  | @ -1728,3 +2037,76 @@ func TestParquetInputSchemaCSV(t *testing.T) { | ||||||
| 		}) | 		}) | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | // httpRangeSpec represents a range specification as supported by S3 GET
 | ||||||
|  | // object request.
 | ||||||
|  | //
 | ||||||
|  | // Case 1: Not present -> represented by a nil RangeSpec
 | ||||||
|  | // Case 2: bytes=1-10 (absolute start and end offsets) -> RangeSpec{false, 1, 10}
 | ||||||
|  | // Case 3: bytes=10- (absolute start offset with end offset unspecified) -> RangeSpec{false, 10, -1}
 | ||||||
|  | // Case 4: bytes=-30 (suffix length specification) -> RangeSpec{true, -30, -1}
 | ||||||
|  | type httpRangeSpec struct { | ||||||
|  | 	// Does the range spec refer to a suffix of the object?
 | ||||||
|  | 	IsSuffixLength bool | ||||||
|  | 
 | ||||||
|  | 	// Start and end offset specified in range spec
 | ||||||
|  | 	Start, End int64 | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (h *httpRangeSpec) GetLength(resourceSize int64) (rangeLength int64, err error) { | ||||||
|  | 	switch { | ||||||
|  | 	case resourceSize < 0: | ||||||
|  | 		return 0, errors.New("Resource size cannot be negative") | ||||||
|  | 
 | ||||||
|  | 	case h == nil: | ||||||
|  | 		rangeLength = resourceSize | ||||||
|  | 
 | ||||||
|  | 	case h.IsSuffixLength: | ||||||
|  | 		specifiedLen := -h.Start | ||||||
|  | 		rangeLength = specifiedLen | ||||||
|  | 		if specifiedLen > resourceSize { | ||||||
|  | 			rangeLength = resourceSize | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 	case h.Start >= resourceSize: | ||||||
|  | 		return 0, errors.New("errInvalidRange") | ||||||
|  | 
 | ||||||
|  | 	case h.End > -1: | ||||||
|  | 		end := h.End | ||||||
|  | 		if resourceSize <= end { | ||||||
|  | 			end = resourceSize - 1 | ||||||
|  | 		} | ||||||
|  | 		rangeLength = end - h.Start + 1 | ||||||
|  | 
 | ||||||
|  | 	case h.End == -1: | ||||||
|  | 		rangeLength = resourceSize - h.Start | ||||||
|  | 
 | ||||||
|  | 	default: | ||||||
|  | 		return 0, errors.New("Unexpected range specification case") | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return rangeLength, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // GetOffsetLength computes the start offset and length of the range
 | ||||||
|  | // given the size of the resource
 | ||||||
|  | func (h *httpRangeSpec) GetOffsetLength(resourceSize int64) (start, length int64, err error) { | ||||||
|  | 	if h == nil { | ||||||
|  | 		// No range specified, implies whole object.
 | ||||||
|  | 		return 0, resourceSize, nil | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	length, err = h.GetLength(resourceSize) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return 0, 0, err | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	start = h.Start | ||||||
|  | 	if h.IsSuffixLength { | ||||||
|  | 		start = resourceSize + h.Start | ||||||
|  | 		if start < 0 { | ||||||
|  | 			start = 0 | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return start, length, nil | ||||||
|  | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue