| 
									
										
										
										
											2021-04-19 03:41:13 +08:00
										 |  |  | // Copyright (c) 2015-2021 MinIO, Inc.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This file is part of MinIO Object Storage stack
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is free software: you can redistribute it and/or modify
 | 
					
						
							|  |  |  | // it under the terms of the GNU Affero General Public License as published by
 | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or
 | 
					
						
							|  |  |  | // (at your option) any later version.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This program is distributed in the hope that it will be useful
 | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					
						
							|  |  |  | // GNU Affero General Public License for more details.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // You should have received a copy of the GNU Affero General Public License
 | 
					
						
							|  |  |  | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | package cmd | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"archive/tar" | 
					
						
							|  |  |  | 	"bufio" | 
					
						
							|  |  |  | 	"bytes" | 
					
						
							| 
									
										
										
										
											2021-10-15 02:11:07 +08:00
										 |  |  | 	"context" | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 	"errors" | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 	"fmt" | 
					
						
							|  |  |  | 	"io" | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 	"io/fs" | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 	"os" | 
					
						
							|  |  |  | 	"path" | 
					
						
							| 
									
										
										
										
											2021-10-15 02:11:07 +08:00
										 |  |  | 	"runtime" | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 	"sync" | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-15 02:11:07 +08:00
										 |  |  | 	"github.com/cosnicolaou/pbzip2" | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 	"github.com/klauspost/compress/s2" | 
					
						
							|  |  |  | 	"github.com/klauspost/compress/zstd" | 
					
						
							|  |  |  | 	gzip "github.com/klauspost/pgzip" | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 	"github.com/minio/minio/internal/logger" | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 	"github.com/pierrec/lz4" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-18 23:44:36 +08:00
										 |  |  | // Max bzip2 concurrency across calls. 50% of GOMAXPROCS.
 | 
					
						
							|  |  |  | var bz2Limiter = pbzip2.CreateConcurrencyPool((runtime.GOMAXPROCS(0) + 1) / 2) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | func detect(r *bufio.Reader) format { | 
					
						
							|  |  |  | 	z, err := r.Peek(4) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return formatUnknown | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	for _, f := range magicHeaders { | 
					
						
							|  |  |  | 		if bytes.Equal(f.header, z[:len(f.header)]) { | 
					
						
							|  |  |  | 			return f.f | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return formatUnknown | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | //go:generate stringer -type=format -trimprefix=format $GOFILE
 | 
					
						
							|  |  |  | type format int | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const ( | 
					
						
							|  |  |  | 	formatUnknown format = iota | 
					
						
							|  |  |  | 	formatGzip | 
					
						
							|  |  |  | 	formatZstd | 
					
						
							|  |  |  | 	formatLZ4 | 
					
						
							|  |  |  | 	formatS2 | 
					
						
							|  |  |  | 	formatBZ2 | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var magicHeaders = []struct { | 
					
						
							|  |  |  | 	header []byte | 
					
						
							|  |  |  | 	f      format | 
					
						
							|  |  |  | }{ | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		header: []byte{0x1f, 0x8b, 8}, | 
					
						
							|  |  |  | 		f:      formatGzip, | 
					
						
							|  |  |  | 	}, | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		// Zstd default header.
 | 
					
						
							|  |  |  | 		header: []byte{0x28, 0xb5, 0x2f, 0xfd}, | 
					
						
							|  |  |  | 		f:      formatZstd, | 
					
						
							|  |  |  | 	}, | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		// Zstd skippable frame header.
 | 
					
						
							|  |  |  | 		header: []byte{0x2a, 0x4d, 0x18}, | 
					
						
							|  |  |  | 		f:      formatZstd, | 
					
						
							|  |  |  | 	}, | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		// LZ4
 | 
					
						
							|  |  |  | 		header: []byte{0x4, 0x22, 0x4d, 0x18}, | 
					
						
							|  |  |  | 		f:      formatLZ4, | 
					
						
							|  |  |  | 	}, | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		// Snappy/S2 stream
 | 
					
						
							|  |  |  | 		header: []byte{0xff, 0x06, 0x00, 0x00}, | 
					
						
							|  |  |  | 		f:      formatS2, | 
					
						
							|  |  |  | 	}, | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		header: []byte{0x42, 0x5a, 'h'}, | 
					
						
							|  |  |  | 		f:      formatBZ2, | 
					
						
							|  |  |  | 	}, | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | type untarOptions struct { | 
					
						
							|  |  |  | 	ignoreDirs bool | 
					
						
							|  |  |  | 	ignoreErrs bool | 
					
						
							|  |  |  | 	prefixAll  string | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // disconnectReader will ensure that no reads can take place on
 | 
					
						
							|  |  |  | // the upstream reader after close has been called.
 | 
					
						
							|  |  |  | type disconnectReader struct { | 
					
						
							|  |  |  | 	r  io.Reader | 
					
						
							|  |  |  | 	mu sync.Mutex | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (d *disconnectReader) Read(p []byte) (n int, err error) { | 
					
						
							|  |  |  | 	d.mu.Lock() | 
					
						
							|  |  |  | 	defer d.mu.Unlock() | 
					
						
							|  |  |  | 	if d.r != nil { | 
					
						
							|  |  |  | 		return d.r.Read(p) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0, errors.New("reader closed") | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (d *disconnectReader) Close() error { | 
					
						
							|  |  |  | 	d.mu.Lock() | 
					
						
							|  |  |  | 	d.r = nil | 
					
						
							|  |  |  | 	d.mu.Unlock() | 
					
						
							|  |  |  | 	return nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func untar(ctx context.Context, r io.Reader, putObject func(reader io.Reader, info os.FileInfo, name string) error, o untarOptions) error { | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 	bf := bufio.NewReader(r) | 
					
						
							|  |  |  | 	switch f := detect(bf); f { | 
					
						
							|  |  |  | 	case formatGzip: | 
					
						
							|  |  |  | 		gz, err := gzip.NewReader(bf) | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			return err | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		defer gz.Close() | 
					
						
							|  |  |  | 		r = gz | 
					
						
							|  |  |  | 	case formatS2: | 
					
						
							|  |  |  | 		r = s2.NewReader(bf) | 
					
						
							|  |  |  | 	case formatZstd: | 
					
						
							|  |  |  | 		dec, err := zstd.NewReader(bf) | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			return err | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		defer dec.Close() | 
					
						
							|  |  |  | 		r = dec | 
					
						
							|  |  |  | 	case formatBZ2: | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 		ctx, cancel := context.WithCancel(ctx) | 
					
						
							| 
									
										
										
										
											2021-10-15 02:11:07 +08:00
										 |  |  | 		defer cancel() | 
					
						
							| 
									
										
										
										
											2021-10-18 23:44:36 +08:00
										 |  |  | 		r = pbzip2.NewReader(ctx, bf, pbzip2.DecompressionOptions( | 
					
						
							|  |  |  | 			pbzip2.BZConcurrency((runtime.GOMAXPROCS(0)+1)/2), | 
					
						
							|  |  |  | 			pbzip2.BZConcurrencyPool(bz2Limiter))) | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 	case formatLZ4: | 
					
						
							|  |  |  | 		r = lz4.NewReader(bf) | 
					
						
							|  |  |  | 	case formatUnknown: | 
					
						
							|  |  |  | 		r = bf | 
					
						
							|  |  |  | 	default: | 
					
						
							|  |  |  | 		return fmt.Errorf("Unsupported format %s", f) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	tarReader := tar.NewReader(r) | 
					
						
							| 
									
										
										
										
											2021-12-07 01:45:23 +08:00
										 |  |  | 	n := 0 | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 	asyncWriters := make(chan struct{}, 16) | 
					
						
							|  |  |  | 	var wg sync.WaitGroup | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	var asyncErr error | 
					
						
							|  |  |  | 	var asyncErrMu sync.Mutex | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 	for { | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 		if !o.ignoreErrs { | 
					
						
							|  |  |  | 			asyncErrMu.Lock() | 
					
						
							|  |  |  | 			err := asyncErr | 
					
						
							|  |  |  | 			asyncErrMu.Unlock() | 
					
						
							|  |  |  | 			if err != nil { | 
					
						
							|  |  |  | 				return err | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 		header, err := tarReader.Next() | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 		switch { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// if no more files are found return
 | 
					
						
							|  |  |  | 		case err == io.EOF: | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 			wg.Wait() | 
					
						
							|  |  |  | 			return asyncErr | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		// return any other error
 | 
					
						
							|  |  |  | 		case err != nil: | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 			wg.Wait() | 
					
						
							| 
									
										
										
										
											2021-12-07 01:45:23 +08:00
										 |  |  | 			extra := "" | 
					
						
							|  |  |  | 			if n > 0 { | 
					
						
							|  |  |  | 				extra = fmt.Sprintf(" after %d successful object(s)", n) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			return fmt.Errorf("tar file error: %w%s", err, extra) | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		// if the header is nil, just skip it (not sure how this happens)
 | 
					
						
							|  |  |  | 		case header == nil: | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		name := header.Name | 
					
						
							|  |  |  | 		if name == slashSeparator { | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		switch header.Typeflag { | 
					
						
							|  |  |  | 		case tar.TypeDir: // = directory
 | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 			if o.ignoreDirs { | 
					
						
							|  |  |  | 				continue | 
					
						
							| 
									
										
										
										
											2021-12-07 01:45:23 +08:00
										 |  |  | 			} | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 			name = trimLeadingSlash(pathJoin(name, slashSeparator)) | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 		case tar.TypeReg, tar.TypeChar, tar.TypeBlock, tar.TypeFifo, tar.TypeGNUSparse: // = regular
 | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 			name = trimLeadingSlash(path.Clean(name)) | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 		default: | 
					
						
							|  |  |  | 			// ignore symlink'ed
 | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2022-10-19 04:50:21 +08:00
										 |  |  | 		if o.prefixAll != "" { | 
					
						
							|  |  |  | 			name = pathJoin(o.prefixAll, name) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// Do small files async
 | 
					
						
							|  |  |  | 		n++ | 
					
						
							|  |  |  | 		if header.Size <= smallFileThreshold { | 
					
						
							|  |  |  | 			asyncWriters <- struct{}{} | 
					
						
							|  |  |  | 			b := poolBuf128k.Get().([]byte) | 
					
						
							|  |  |  | 			if cap(b) < int(header.Size) { | 
					
						
							|  |  |  | 				b = make([]byte, smallFileThreshold) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			b = b[:header.Size] | 
					
						
							|  |  |  | 			if _, err := io.ReadFull(tarReader, b); err != nil { | 
					
						
							|  |  |  | 				return err | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			wg.Add(1) | 
					
						
							|  |  |  | 			go func(name string, fi fs.FileInfo, b []byte) { | 
					
						
							|  |  |  | 				rc := disconnectReader{r: bytes.NewReader(b)} | 
					
						
							|  |  |  | 				defer func() { | 
					
						
							|  |  |  | 					rc.Close() | 
					
						
							|  |  |  | 					<-asyncWriters | 
					
						
							|  |  |  | 					wg.Done() | 
					
						
							|  |  |  | 					poolBuf128k.Put(b) | 
					
						
							|  |  |  | 				}() | 
					
						
							|  |  |  | 				if err := putObject(&rc, fi, name); err != nil { | 
					
						
							|  |  |  | 					if o.ignoreErrs { | 
					
						
							|  |  |  | 						logger.LogIf(ctx, err) | 
					
						
							|  |  |  | 						return | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					asyncErrMu.Lock() | 
					
						
							|  |  |  | 					if asyncErr == nil { | 
					
						
							|  |  |  | 						asyncErr = err | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					asyncErrMu.Unlock() | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			}(name, header.FileInfo(), b) | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// Sync upload.
 | 
					
						
							|  |  |  | 		rc := disconnectReader{r: tarReader} | 
					
						
							|  |  |  | 		if err := putObject(&rc, header.FileInfo(), name); err != nil { | 
					
						
							|  |  |  | 			rc.Close() | 
					
						
							|  |  |  | 			if o.ignoreErrs { | 
					
						
							|  |  |  | 				logger.LogIf(ctx, err) | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			return err | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		rc.Close() | 
					
						
							| 
									
										
										
										
											2021-03-27 08:15:09 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | } |