mirror of https://github.com/minio/minio.git
				
				
				
			move parquet-go to github.com/minio/parquet-go repo
This commit is contained in:
		
							parent
							
								
									6c8fddb70f
								
							
						
					
					
						commit
						e948e7cdf6
					
				
							
								
								
									
										208
									
								
								CREDITS
								
								
								
								
							
							
						
						
									
										208
									
								
								CREDITS
								
								
								
								
							|  | @ -10733,6 +10733,214 @@ https://github.com/minio/minio-go/v7 | |||
| 
 | ||||
| ================================================================ | ||||
| 
 | ||||
| github.com/minio/parquet-go | ||||
| https://github.com/minio/parquet-go | ||||
| ---------------------------------------------------------------- | ||||
| 
 | ||||
|                                  Apache License | ||||
|                            Version 2.0, January 2004 | ||||
|                         http://www.apache.org/licenses/ | ||||
| 
 | ||||
|    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION | ||||
| 
 | ||||
|    1. Definitions. | ||||
| 
 | ||||
|       "License" shall mean the terms and conditions for use, reproduction, | ||||
|       and distribution as defined by Sections 1 through 9 of this document. | ||||
| 
 | ||||
|       "Licensor" shall mean the copyright owner or entity authorized by | ||||
|       the copyright owner that is granting the License. | ||||
| 
 | ||||
|       "Legal Entity" shall mean the union of the acting entity and all | ||||
|       other entities that control, are controlled by, or are under common | ||||
|       control with that entity. For the purposes of this definition, | ||||
|       "control" means (i) the power, direct or indirect, to cause the | ||||
|       direction or management of such entity, whether by contract or | ||||
|       otherwise, or (ii) ownership of fifty percent (50%) or more of the | ||||
|       outstanding shares, or (iii) beneficial ownership of such entity. | ||||
| 
 | ||||
|       "You" (or "Your") shall mean an individual or Legal Entity | ||||
|       exercising permissions granted by this License. | ||||
| 
 | ||||
|       "Source" form shall mean the preferred form for making modifications, | ||||
|       including but not limited to software source code, documentation | ||||
|       source, and configuration files. | ||||
| 
 | ||||
|       "Object" form shall mean any form resulting from mechanical | ||||
|       transformation or translation of a Source form, including but | ||||
|       not limited to compiled object code, generated documentation, | ||||
|       and conversions to other media types. | ||||
| 
 | ||||
|       "Work" shall mean the work of authorship, whether in Source or | ||||
|       Object form, made available under the License, as indicated by a | ||||
|       copyright notice that is included in or attached to the work | ||||
|       (an example is provided in the Appendix below). | ||||
| 
 | ||||
|       "Derivative Works" shall mean any work, whether in Source or Object | ||||
|       form, that is based on (or derived from) the Work and for which the | ||||
|       editorial revisions, annotations, elaborations, or other modifications | ||||
|       represent, as a whole, an original work of authorship. For the purposes | ||||
|       of this License, Derivative Works shall not include works that remain | ||||
|       separable from, or merely link (or bind by name) to the interfaces of, | ||||
|       the Work and Derivative Works thereof. | ||||
| 
 | ||||
|       "Contribution" shall mean any work of authorship, including | ||||
|       the original version of the Work and any modifications or additions | ||||
|       to that Work or Derivative Works thereof, that is intentionally | ||||
|       submitted to Licensor for inclusion in the Work by the copyright owner | ||||
|       or by an individual or Legal Entity authorized to submit on behalf of | ||||
|       the copyright owner. For the purposes of this definition, "submitted" | ||||
|       means any form of electronic, verbal, or written communication sent | ||||
|       to the Licensor or its representatives, including but not limited to | ||||
|       communication on electronic mailing lists, source code control systems, | ||||
|       and issue tracking systems that are managed by, or on behalf of, the | ||||
|       Licensor for the purpose of discussing and improving the Work, but | ||||
|       excluding communication that is conspicuously marked or otherwise | ||||
|       designated in writing by the copyright owner as "Not a Contribution." | ||||
| 
 | ||||
|       "Contributor" shall mean Licensor and any individual or Legal Entity | ||||
|       on behalf of whom a Contribution has been received by Licensor and | ||||
|       subsequently incorporated within the Work. | ||||
| 
 | ||||
|    2. Grant of Copyright License. Subject to the terms and conditions of | ||||
|       this License, each Contributor hereby grants to You a perpetual, | ||||
|       worldwide, non-exclusive, no-charge, royalty-free, irrevocable | ||||
|       copyright license to reproduce, prepare Derivative Works of, | ||||
|       publicly display, publicly perform, sublicense, and distribute the | ||||
|       Work and such Derivative Works in Source or Object form. | ||||
| 
 | ||||
|    3. Grant of Patent License. Subject to the terms and conditions of | ||||
|       this License, each Contributor hereby grants to You a perpetual, | ||||
|       worldwide, non-exclusive, no-charge, royalty-free, irrevocable | ||||
|       (except as stated in this section) patent license to make, have made, | ||||
|       use, offer to sell, sell, import, and otherwise transfer the Work, | ||||
|       where such license applies only to those patent claims licensable | ||||
|       by such Contributor that are necessarily infringed by their | ||||
|       Contribution(s) alone or by combination of their Contribution(s) | ||||
|       with the Work to which such Contribution(s) was submitted. If You | ||||
|       institute patent litigation against any entity (including a | ||||
|       cross-claim or counterclaim in a lawsuit) alleging that the Work | ||||
|       or a Contribution incorporated within the Work constitutes direct | ||||
|       or contributory patent infringement, then any patent licenses | ||||
|       granted to You under this License for that Work shall terminate | ||||
|       as of the date such litigation is filed. | ||||
| 
 | ||||
|    4. Redistribution. You may reproduce and distribute copies of the | ||||
|       Work or Derivative Works thereof in any medium, with or without | ||||
|       modifications, and in Source or Object form, provided that You | ||||
|       meet the following conditions: | ||||
| 
 | ||||
|       (a) You must give any other recipients of the Work or | ||||
|           Derivative Works a copy of this License; and | ||||
| 
 | ||||
|       (b) You must cause any modified files to carry prominent notices | ||||
|           stating that You changed the files; and | ||||
| 
 | ||||
|       (c) You must retain, in the Source form of any Derivative Works | ||||
|           that You distribute, all copyright, patent, trademark, and | ||||
|           attribution notices from the Source form of the Work, | ||||
|           excluding those notices that do not pertain to any part of | ||||
|           the Derivative Works; and | ||||
| 
 | ||||
|       (d) If the Work includes a "NOTICE" text file as part of its | ||||
|           distribution, then any Derivative Works that You distribute must | ||||
|           include a readable copy of the attribution notices contained | ||||
|           within such NOTICE file, excluding those notices that do not | ||||
|           pertain to any part of the Derivative Works, in at least one | ||||
|           of the following places: within a NOTICE text file distributed | ||||
|           as part of the Derivative Works; within the Source form or | ||||
|           documentation, if provided along with the Derivative Works; or, | ||||
|           within a display generated by the Derivative Works, if and | ||||
|           wherever such third-party notices normally appear. The contents | ||||
|           of the NOTICE file are for informational purposes only and | ||||
|           do not modify the License. You may add Your own attribution | ||||
|           notices within Derivative Works that You distribute, alongside | ||||
|           or as an addendum to the NOTICE text from the Work, provided | ||||
|           that such additional attribution notices cannot be construed | ||||
|           as modifying the License. | ||||
| 
 | ||||
|       You may add Your own copyright statement to Your modifications and | ||||
|       may provide additional or different license terms and conditions | ||||
|       for use, reproduction, or distribution of Your modifications, or | ||||
|       for any such Derivative Works as a whole, provided Your use, | ||||
|       reproduction, and distribution of the Work otherwise complies with | ||||
|       the conditions stated in this License. | ||||
| 
 | ||||
|    5. Submission of Contributions. Unless You explicitly state otherwise, | ||||
|       any Contribution intentionally submitted for inclusion in the Work | ||||
|       by You to the Licensor shall be under the terms and conditions of | ||||
|       this License, without any additional terms or conditions. | ||||
|       Notwithstanding the above, nothing herein shall supersede or modify | ||||
|       the terms of any separate license agreement you may have executed | ||||
|       with Licensor regarding such Contributions. | ||||
| 
 | ||||
|    6. Trademarks. This License does not grant permission to use the trade | ||||
|       names, trademarks, service marks, or product names of the Licensor, | ||||
|       except as required for reasonable and customary use in describing the | ||||
|       origin of the Work and reproducing the content of the NOTICE file. | ||||
| 
 | ||||
|    7. Disclaimer of Warranty. Unless required by applicable law or | ||||
|       agreed to in writing, Licensor provides the Work (and each | ||||
|       Contributor provides its Contributions) on an "AS IS" BASIS, | ||||
|       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||||
|       implied, including, without limitation, any warranties or conditions | ||||
|       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A | ||||
|       PARTICULAR PURPOSE. You are solely responsible for determining the | ||||
|       appropriateness of using or redistributing the Work and assume any | ||||
|       risks associated with Your exercise of permissions under this License. | ||||
| 
 | ||||
|    8. Limitation of Liability. In no event and under no legal theory, | ||||
|       whether in tort (including negligence), contract, or otherwise, | ||||
|       unless required by applicable law (such as deliberate and grossly | ||||
|       negligent acts) or agreed to in writing, shall any Contributor be | ||||
|       liable to You for damages, including any direct, indirect, special, | ||||
|       incidental, or consequential damages of any character arising as a | ||||
|       result of this License or out of the use or inability to use the | ||||
|       Work (including but not limited to damages for loss of goodwill, | ||||
|       work stoppage, computer failure or malfunction, or any and all | ||||
|       other commercial damages or losses), even if such Contributor | ||||
|       has been advised of the possibility of such damages. | ||||
| 
 | ||||
|    9. Accepting Warranty or Additional Liability. While redistributing | ||||
|       the Work or Derivative Works thereof, You may choose to offer, | ||||
|       and charge a fee for, acceptance of support, warranty, indemnity, | ||||
|       or other liability obligations and/or rights consistent with this | ||||
|       License. However, in accepting such obligations, You may act only | ||||
|       on Your own behalf and on Your sole responsibility, not on behalf | ||||
|       of any other Contributor, and only if You agree to indemnify, | ||||
|       defend, and hold each Contributor harmless for any liability | ||||
|       incurred by, or claims asserted against, such Contributor by reason | ||||
|       of your accepting any such warranty or additional liability. | ||||
| 
 | ||||
|    END OF TERMS AND CONDITIONS | ||||
| 
 | ||||
|    APPENDIX: How to apply the Apache License to your work. | ||||
| 
 | ||||
|       To apply the Apache License to your work, attach the following | ||||
|       boilerplate notice, with the fields enclosed by brackets "[]" | ||||
|       replaced with your own identifying information. (Don't include | ||||
|       the brackets!)  The text should be enclosed in the appropriate | ||||
|       comment syntax for the file format. We also recommend that a | ||||
|       file or class name and description of purpose be included on the | ||||
|       same "printed page" as the copyright notice for easier | ||||
|       identification within third-party archives. | ||||
| 
 | ||||
|    Copyright [yyyy] [name of copyright owner] | ||||
| 
 | ||||
|    Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|    you may not use this file except in compliance with the License. | ||||
|    You may obtain a copy of the License at | ||||
| 
 | ||||
|        http://www.apache.org/licenses/LICENSE-2.0 | ||||
| 
 | ||||
|    Unless required by applicable law or agreed to in writing, software | ||||
|    distributed under the License is distributed on an "AS IS" BASIS, | ||||
|    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|    See the License for the specific language governing permissions and | ||||
|    limitations under the License. | ||||
| 
 | ||||
| ================================================================ | ||||
| 
 | ||||
| github.com/minio/rpc | ||||
| https://github.com/minio/rpc | ||||
| ---------------------------------------------------------------- | ||||
|  |  | |||
							
								
								
									
										7
									
								
								go.mod
								
								
								
								
							
							
						
						
									
										7
									
								
								go.mod
								
								
								
								
							|  | @ -4,7 +4,6 @@ go 1.16 | |||
| 
 | ||||
| require ( | ||||
| 	cloud.google.com/go/storage v1.8.0 | ||||
| 	git.apache.org/thrift.git v0.13.0 | ||||
| 	github.com/Azure/azure-pipeline-go v0.2.2 | ||||
| 	github.com/Azure/azure-storage-blob-go v0.10.0 | ||||
| 	github.com/Azure/go-autorest/autorest/adal v0.9.1 // indirect | ||||
|  | @ -30,7 +29,6 @@ require ( | |||
| 	github.com/fatih/structs v1.1.0 | ||||
| 	github.com/go-ldap/ldap/v3 v3.2.4 | ||||
| 	github.com/go-sql-driver/mysql v1.5.0 | ||||
| 	github.com/golang/snappy v0.0.3 | ||||
| 	github.com/gomodule/redigo v2.0.0+incompatible | ||||
| 	github.com/google/martian v2.1.1-0.20190517191504-25dcb96d9e51+incompatible // indirect | ||||
| 	github.com/google/uuid v1.1.2 | ||||
|  | @ -55,6 +53,7 @@ require ( | |||
| 	github.com/minio/highwayhash v1.0.2 | ||||
| 	github.com/minio/md5-simd v1.1.1 // indirect | ||||
| 	github.com/minio/minio-go/v7 v7.0.11-0.20210302210017-6ae69c73ce78 | ||||
| 	github.com/minio/parquet-go v1.0.0 | ||||
| 	github.com/minio/rpc v1.0.0 | ||||
| 	github.com/minio/selfupdate v0.3.1 | ||||
| 	github.com/minio/sha256-simd v1.0.0 | ||||
|  | @ -71,7 +70,7 @@ require ( | |||
| 	github.com/nsqio/go-nsq v1.0.8 | ||||
| 	github.com/olivere/elastic/v7 v7.0.22 | ||||
| 	github.com/philhofer/fwd v1.1.1 | ||||
| 	github.com/pierrec/lz4 v2.5.2+incompatible | ||||
| 	github.com/pierrec/lz4 v2.6.0+incompatible | ||||
| 	github.com/pkg/errors v0.9.1 | ||||
| 	github.com/prometheus/client_golang v1.8.0 | ||||
| 	github.com/prometheus/client_model v0.2.0 | ||||
|  | @ -83,8 +82,6 @@ require ( | |||
| 	github.com/spaolacci/murmur3 v1.1.0 // indirect | ||||
| 	github.com/spf13/pflag v1.0.5 // indirect | ||||
| 	github.com/streadway/amqp v1.0.0 | ||||
| 	github.com/tidwall/gjson v1.6.8 | ||||
| 	github.com/tidwall/sjson v1.0.4 | ||||
| 	github.com/tinylib/msgp v1.1.3 | ||||
| 	github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a | ||||
| 	github.com/willf/bitset v1.1.11 // indirect | ||||
|  |  | |||
							
								
								
									
										25
									
								
								go.sum
								
								
								
								
							
							
						
						
									
										25
									
								
								go.sum
								
								
								
								
							|  | @ -180,8 +180,9 @@ github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8 | |||
| github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= | ||||
| github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4= | ||||
| github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20= | ||||
| github.com/frankban/quicktest v1.10.2 h1:19ARM85nVi4xH7xPXuc5eM/udya5ieh7b/Sv+d844Tk= | ||||
| github.com/frankban/quicktest v1.10.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= | ||||
| github.com/frankban/quicktest v1.12.1 h1:P6vQcHwZYgVGIpUzKB5DXzkEeYJppJOStPLuh9aB89c= | ||||
| github.com/frankban/quicktest v1.12.1/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU= | ||||
| github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= | ||||
| github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= | ||||
| github.com/go-asn1-ber/asn1-ber v1.5.1 h1:pDbRAunXzIUXfx4CB2QJFv5IuPiuoW+sWvr/Us009o8= | ||||
|  | @ -251,8 +252,9 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw | |||
| github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= | ||||
| github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= | ||||
| github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= | ||||
| github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M= | ||||
| github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= | ||||
| github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= | ||||
| github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= | ||||
| github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= | ||||
| github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= | ||||
| github.com/google/martian v2.1.1-0.20190517191504-25dcb96d9e51+incompatible h1:xmapqc1AyLoB+ddYT6r04bD9lIjlOqGaREovi0SzFaE= | ||||
|  | @ -443,8 +445,11 @@ github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLT | |||
| github.com/minio/md5-simd v1.1.0/go.mod h1:XpBqgZULrMYD3R+M28PcmP0CkI7PEMzB3U77ZrKZ0Gw= | ||||
| github.com/minio/md5-simd v1.1.1 h1:9ojcLbuZ4gXbB2sX53MKn8JUZ0sB/2wfwsEcRw+I08U= | ||||
| github.com/minio/md5-simd v1.1.1/go.mod h1:XpBqgZULrMYD3R+M28PcmP0CkI7PEMzB3U77ZrKZ0Gw= | ||||
| github.com/minio/minio-go/v7 v7.0.10/go.mod h1:td4gW1ldOsj1PbSNS+WYK43j+P1XVhX/8W8awaYlBFo= | ||||
| github.com/minio/minio-go/v7 v7.0.11-0.20210302210017-6ae69c73ce78 h1:v7OMbUnWkyRlO2MZ5AuYioELhwXF/BgZEznrQ1drBEM= | ||||
| github.com/minio/minio-go/v7 v7.0.11-0.20210302210017-6ae69c73ce78/go.mod h1:mTh2uJuAbEqdhMVl6CMIIZLUeiMiWtJR4JB8/5g2skw= | ||||
| github.com/minio/parquet-go v1.0.0 h1:fcWsEvub04Nsl/4hiRBDWlbqd6jhacQieV07a+nhiIk= | ||||
| github.com/minio/parquet-go v1.0.0/go.mod h1:aQlkSOfOq2AtQKkuou3mosNVMwNokd+faTacxxk/oHA= | ||||
| github.com/minio/rpc v1.0.0 h1:tJCHyLfQF6k6HlMQFpKy2FO/7lc2WP8gLDGMZp18E70= | ||||
| github.com/minio/rpc v1.0.0/go.mod h1:b9xqF7J0xeMXr0cM4pnBlP7Te7PDsG5JrRxl5dG6Ldk= | ||||
| github.com/minio/selfupdate v0.3.1 h1:BWEFSNnrZVMUWXbXIgLDNDjbejkmpAmZvy/nCz1HlEs= | ||||
|  | @ -531,8 +536,9 @@ github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ= | |||
| github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= | ||||
| github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= | ||||
| github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= | ||||
| github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI= | ||||
| github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= | ||||
| github.com/pierrec/lz4 v2.6.0+incompatible h1:Ix9yFKn1nSPBLFl/yZknTp8TU5G4Ps0JDmguYK6iH1A= | ||||
| github.com/pierrec/lz4 v2.6.0+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= | ||||
| github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= | ||||
| github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= | ||||
| github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= | ||||
|  | @ -626,14 +632,15 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P | |||
| github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= | ||||
| github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= | ||||
| github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= | ||||
| github.com/tidwall/gjson v1.6.8 h1:CTmXMClGYPAmln7652e69B7OLXfTi5ABcPPwjIWUv7w= | ||||
| github.com/tidwall/gjson v1.6.8/go.mod h1:zeFuBCIqD4sN/gmqBzZ4j7Jd6UcA2Fc56x7QFsv+8fI= | ||||
| github.com/tidwall/gjson v1.7.4/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk= | ||||
| github.com/tidwall/gjson v1.7.5 h1:zmAN/xmX7OtpAkv4Ovfso60r/BiCi5IErCDYGNJu+uc= | ||||
| github.com/tidwall/gjson v1.7.5/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk= | ||||
| github.com/tidwall/match v1.0.3 h1:FQUVvBImDutD8wJLN6c5eMzWtjgONK9MwIBCOrUJKeE= | ||||
| github.com/tidwall/match v1.0.3/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= | ||||
| github.com/tidwall/pretty v1.0.2 h1:Z7S3cePv9Jwm1KwS0513MRaoUe3S01WPbLNV40pwWZU= | ||||
| github.com/tidwall/pretty v1.0.2/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= | ||||
| github.com/tidwall/sjson v1.0.4 h1:UcdIRXff12Lpnu3OLtZvnc03g4vH2suXDXhBwBqmzYg= | ||||
| github.com/tidwall/sjson v1.0.4/go.mod h1:bURseu1nuBkFpIES5cz6zBtjmYeOQmEESshn7VpF15Y= | ||||
| github.com/tidwall/pretty v1.1.0 h1:K3hMW5epkdAVwibsQEfR/7Zj0Qgt4DxtNumTq/VloO8= | ||||
| github.com/tidwall/pretty v1.1.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= | ||||
| github.com/tidwall/sjson v1.1.6 h1:8fDdlahON04OZBlTQCIatW8FstSFJz8oxidj5h0rmSQ= | ||||
| github.com/tidwall/sjson v1.1.6/go.mod h1:KN3FZ7odvXIHPbJdhNorK/M9lWweVUbXsXXhrJ/kGOA= | ||||
| github.com/tinylib/msgp v1.1.3 h1:3giwAkmtaEDLSV0MdO1lDLuPgklgPzmk8H9+So2BVfA= | ||||
| github.com/tinylib/msgp v1.1.3/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= | ||||
| github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8 h1:ndzgwNDnKIqyCvHTXaCqh9KlOWKvBry6nuXMJmonVsE= | ||||
|  |  | |||
|  | @ -1,661 +0,0 @@ | |||
|                     GNU AFFERO GENERAL PUBLIC LICENSE | ||||
|                        Version 3, 19 November 2007 | ||||
| 
 | ||||
|  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/> | ||||
|  Everyone is permitted to copy and distribute verbatim copies | ||||
|  of this license document, but changing it is not allowed. | ||||
| 
 | ||||
|                             Preamble | ||||
| 
 | ||||
|   The GNU Affero General Public License is a free, copyleft license for | ||||
| software and other kinds of works, specifically designed to ensure | ||||
| cooperation with the community in the case of network server software. | ||||
| 
 | ||||
|   The licenses for most software and other practical works are designed | ||||
| to take away your freedom to share and change the works.  By contrast, | ||||
| our General Public Licenses are intended to guarantee your freedom to | ||||
| share and change all versions of a program--to make sure it remains free | ||||
| software for all its users. | ||||
| 
 | ||||
|   When we speak of free software, we are referring to freedom, not | ||||
| price.  Our General Public Licenses are designed to make sure that you | ||||
| have the freedom to distribute copies of free software (and charge for | ||||
| them if you wish), that you receive source code or can get it if you | ||||
| want it, that you can change the software or use pieces of it in new | ||||
| free programs, and that you know you can do these things. | ||||
| 
 | ||||
|   Developers that use our General Public Licenses protect your rights | ||||
| with two steps: (1) assert copyright on the software, and (2) offer | ||||
| you this License which gives you legal permission to copy, distribute | ||||
| and/or modify the software. | ||||
| 
 | ||||
|   A secondary benefit of defending all users' freedom is that | ||||
| improvements made in alternate versions of the program, if they | ||||
| receive widespread use, become available for other developers to | ||||
| incorporate.  Many developers of free software are heartened and | ||||
| encouraged by the resulting cooperation.  However, in the case of | ||||
| software used on network servers, this result may fail to come about. | ||||
| The GNU General Public License permits making a modified version and | ||||
| letting the public access it on a server without ever releasing its | ||||
| source code to the public. | ||||
| 
 | ||||
|   The GNU Affero General Public License is designed specifically to | ||||
| ensure that, in such cases, the modified source code becomes available | ||||
| to the community.  It requires the operator of a network server to | ||||
| provide the source code of the modified version running there to the | ||||
| users of that server.  Therefore, public use of a modified version, on | ||||
| a publicly accessible server, gives the public access to the source | ||||
| code of the modified version. | ||||
| 
 | ||||
|   An older license, called the Affero General Public License and | ||||
| published by Affero, was designed to accomplish similar goals.  This is | ||||
| a different license, not a version of the Affero GPL, but Affero has | ||||
| released a new version of the Affero GPL which permits relicensing under | ||||
| this license. | ||||
| 
 | ||||
|   The precise terms and conditions for copying, distribution and | ||||
| modification follow. | ||||
| 
 | ||||
|                        TERMS AND CONDITIONS | ||||
| 
 | ||||
|   0. Definitions. | ||||
| 
 | ||||
|   "This License" refers to version 3 of the GNU Affero General Public License. | ||||
| 
 | ||||
|   "Copyright" also means copyright-like laws that apply to other kinds of | ||||
| works, such as semiconductor masks. | ||||
| 
 | ||||
|   "The Program" refers to any copyrightable work licensed under this | ||||
| License.  Each licensee is addressed as "you".  "Licensees" and | ||||
| "recipients" may be individuals or organizations. | ||||
| 
 | ||||
|   To "modify" a work means to copy from or adapt all or part of the work | ||||
| in a fashion requiring copyright permission, other than the making of an | ||||
| exact copy.  The resulting work is called a "modified version" of the | ||||
| earlier work or a work "based on" the earlier work. | ||||
| 
 | ||||
|   A "covered work" means either the unmodified Program or a work based | ||||
| on the Program. | ||||
| 
 | ||||
|   To "propagate" a work means to do anything with it that, without | ||||
| permission, would make you directly or secondarily liable for | ||||
| infringement under applicable copyright law, except executing it on a | ||||
| computer or modifying a private copy.  Propagation includes copying, | ||||
| distribution (with or without modification), making available to the | ||||
| public, and in some countries other activities as well. | ||||
| 
 | ||||
|   To "convey" a work means any kind of propagation that enables other | ||||
| parties to make or receive copies.  Mere interaction with a user through | ||||
| a computer network, with no transfer of a copy, is not conveying. | ||||
| 
 | ||||
|   An interactive user interface displays "Appropriate Legal Notices" | ||||
| to the extent that it includes a convenient and prominently visible | ||||
| feature that (1) displays an appropriate copyright notice, and (2) | ||||
| tells the user that there is no warranty for the work (except to the | ||||
| extent that warranties are provided), that licensees may convey the | ||||
| work under this License, and how to view a copy of this License.  If | ||||
| the interface presents a list of user commands or options, such as a | ||||
| menu, a prominent item in the list meets this criterion. | ||||
| 
 | ||||
|   1. Source Code. | ||||
| 
 | ||||
|   The "source code" for a work means the preferred form of the work | ||||
| for making modifications to it.  "Object code" means any non-source | ||||
| form of a work. | ||||
| 
 | ||||
|   A "Standard Interface" means an interface that either is an official | ||||
| standard defined by a recognized standards body, or, in the case of | ||||
| interfaces specified for a particular programming language, one that | ||||
| is widely used among developers working in that language. | ||||
| 
 | ||||
|   The "System Libraries" of an executable work include anything, other | ||||
| than the work as a whole, that (a) is included in the normal form of | ||||
| packaging a Major Component, but which is not part of that Major | ||||
| Component, and (b) serves only to enable use of the work with that | ||||
| Major Component, or to implement a Standard Interface for which an | ||||
| implementation is available to the public in source code form.  A | ||||
| "Major Component", in this context, means a major essential component | ||||
| (kernel, window system, and so on) of the specific operating system | ||||
| (if any) on which the executable work runs, or a compiler used to | ||||
| produce the work, or an object code interpreter used to run it. | ||||
| 
 | ||||
|   The "Corresponding Source" for a work in object code form means all | ||||
| the source code needed to generate, install, and (for an executable | ||||
| work) run the object code and to modify the work, including scripts to | ||||
| control those activities.  However, it does not include the work's | ||||
| System Libraries, or general-purpose tools or generally available free | ||||
| programs which are used unmodified in performing those activities but | ||||
| which are not part of the work.  For example, Corresponding Source | ||||
| includes interface definition files associated with source files for | ||||
| the work, and the source code for shared libraries and dynamically | ||||
| linked subprograms that the work is specifically designed to require, | ||||
| such as by intimate data communication or control flow between those | ||||
| subprograms and other parts of the work. | ||||
| 
 | ||||
|   The Corresponding Source need not include anything that users | ||||
| can regenerate automatically from other parts of the Corresponding | ||||
| Source. | ||||
| 
 | ||||
|   The Corresponding Source for a work in source code form is that | ||||
| same work. | ||||
| 
 | ||||
|   2. Basic Permissions. | ||||
| 
 | ||||
|   All rights granted under this License are granted for the term of | ||||
| copyright on the Program, and are irrevocable provided the stated | ||||
| conditions are met.  This License explicitly affirms your unlimited | ||||
| permission to run the unmodified Program.  The output from running a | ||||
| covered work is covered by this License only if the output, given its | ||||
| content, constitutes a covered work.  This License acknowledges your | ||||
| rights of fair use or other equivalent, as provided by copyright law. | ||||
| 
 | ||||
|   You may make, run and propagate covered works that you do not | ||||
| convey, without conditions so long as your license otherwise remains | ||||
| in force.  You may convey covered works to others for the sole purpose | ||||
| of having them make modifications exclusively for you, or provide you | ||||
| with facilities for running those works, provided that you comply with | ||||
| the terms of this License in conveying all material for which you do | ||||
| not control copyright.  Those thus making or running the covered works | ||||
| for you must do so exclusively on your behalf, under your direction | ||||
| and control, on terms that prohibit them from making any copies of | ||||
| your copyrighted material outside their relationship with you. | ||||
| 
 | ||||
|   Conveying under any other circumstances is permitted solely under | ||||
| the conditions stated below.  Sublicensing is not allowed; section 10 | ||||
| makes it unnecessary. | ||||
| 
 | ||||
|   3. Protecting Users' Legal Rights From Anti-Circumvention Law. | ||||
| 
 | ||||
|   No covered work shall be deemed part of an effective technological | ||||
| measure under any applicable law fulfilling obligations under article | ||||
| 11 of the WIPO copyright treaty adopted on 20 December 1996, or | ||||
| similar laws prohibiting or restricting circumvention of such | ||||
| measures. | ||||
| 
 | ||||
|   When you convey a covered work, you waive any legal power to forbid | ||||
| circumvention of technological measures to the extent such circumvention | ||||
| is effected by exercising rights under this License with respect to | ||||
| the covered work, and you disclaim any intention to limit operation or | ||||
| modification of the work as a means of enforcing, against the work's | ||||
| users, your or third parties' legal rights to forbid circumvention of | ||||
| technological measures. | ||||
| 
 | ||||
|   4. Conveying Verbatim Copies. | ||||
| 
 | ||||
|   You may convey verbatim copies of the Program's source code as you | ||||
| receive it, in any medium, provided that you conspicuously and | ||||
| appropriately publish on each copy an appropriate copyright notice; | ||||
| keep intact all notices stating that this License and any | ||||
| non-permissive terms added in accord with section 7 apply to the code; | ||||
| keep intact all notices of the absence of any warranty; and give all | ||||
| recipients a copy of this License along with the Program. | ||||
| 
 | ||||
|   You may charge any price or no price for each copy that you convey, | ||||
| and you may offer support or warranty protection for a fee. | ||||
| 
 | ||||
|   5. Conveying Modified Source Versions. | ||||
| 
 | ||||
|   You may convey a work based on the Program, or the modifications to | ||||
| produce it from the Program, in the form of source code under the | ||||
| terms of section 4, provided that you also meet all of these conditions: | ||||
| 
 | ||||
|     a) The work must carry prominent notices stating that you modified | ||||
|     it, and giving a relevant date. | ||||
| 
 | ||||
|     b) The work must carry prominent notices stating that it is | ||||
|     released under this License and any conditions added under section | ||||
|     7.  This requirement modifies the requirement in section 4 to | ||||
|     "keep intact all notices". | ||||
| 
 | ||||
|     c) You must license the entire work, as a whole, under this | ||||
|     License to anyone who comes into possession of a copy.  This | ||||
|     License will therefore apply, along with any applicable section 7 | ||||
|     additional terms, to the whole of the work, and all its parts, | ||||
|     regardless of how they are packaged.  This License gives no | ||||
|     permission to license the work in any other way, but it does not | ||||
|     invalidate such permission if you have separately received it. | ||||
| 
 | ||||
|     d) If the work has interactive user interfaces, each must display | ||||
|     Appropriate Legal Notices; however, if the Program has interactive | ||||
|     interfaces that do not display Appropriate Legal Notices, your | ||||
|     work need not make them do so. | ||||
| 
 | ||||
|   A compilation of a covered work with other separate and independent | ||||
| works, which are not by their nature extensions of the covered work, | ||||
| and which are not combined with it such as to form a larger program, | ||||
| in or on a volume of a storage or distribution medium, is called an | ||||
| "aggregate" if the compilation and its resulting copyright are not | ||||
| used to limit the access or legal rights of the compilation's users | ||||
| beyond what the individual works permit.  Inclusion of a covered work | ||||
| in an aggregate does not cause this License to apply to the other | ||||
| parts of the aggregate. | ||||
| 
 | ||||
|   6. Conveying Non-Source Forms. | ||||
| 
 | ||||
|   You may convey a covered work in object code form under the terms | ||||
| of sections 4 and 5, provided that you also convey the | ||||
| machine-readable Corresponding Source under the terms of this License, | ||||
| in one of these ways: | ||||
| 
 | ||||
|     a) Convey the object code in, or embodied in, a physical product | ||||
|     (including a physical distribution medium), accompanied by the | ||||
|     Corresponding Source fixed on a durable physical medium | ||||
|     customarily used for software interchange. | ||||
| 
 | ||||
|     b) Convey the object code in, or embodied in, a physical product | ||||
|     (including a physical distribution medium), accompanied by a | ||||
|     written offer, valid for at least three years and valid for as | ||||
|     long as you offer spare parts or customer support for that product | ||||
|     model, to give anyone who possesses the object code either (1) a | ||||
|     copy of the Corresponding Source for all the software in the | ||||
|     product that is covered by this License, on a durable physical | ||||
|     medium customarily used for software interchange, for a price no | ||||
|     more than your reasonable cost of physically performing this | ||||
|     conveying of source, or (2) access to copy the | ||||
|     Corresponding Source from a network server at no charge. | ||||
| 
 | ||||
|     c) Convey individual copies of the object code with a copy of the | ||||
|     written offer to provide the Corresponding Source.  This | ||||
|     alternative is allowed only occasionally and noncommercially, and | ||||
|     only if you received the object code with such an offer, in accord | ||||
|     with subsection 6b. | ||||
| 
 | ||||
|     d) Convey the object code by offering access from a designated | ||||
|     place (gratis or for a charge), and offer equivalent access to the | ||||
|     Corresponding Source in the same way through the same place at no | ||||
|     further charge.  You need not require recipients to copy the | ||||
|     Corresponding Source along with the object code.  If the place to | ||||
|     copy the object code is a network server, the Corresponding Source | ||||
|     may be on a different server (operated by you or a third party) | ||||
|     that supports equivalent copying facilities, provided you maintain | ||||
|     clear directions next to the object code saying where to find the | ||||
|     Corresponding Source.  Regardless of what server hosts the | ||||
|     Corresponding Source, you remain obligated to ensure that it is | ||||
|     available for as long as needed to satisfy these requirements. | ||||
| 
 | ||||
|     e) Convey the object code using peer-to-peer transmission, provided | ||||
|     you inform other peers where the object code and Corresponding | ||||
|     Source of the work are being offered to the general public at no | ||||
|     charge under subsection 6d. | ||||
| 
 | ||||
|   A separable portion of the object code, whose source code is excluded | ||||
| from the Corresponding Source as a System Library, need not be | ||||
| included in conveying the object code work. | ||||
| 
 | ||||
|   A "User Product" is either (1) a "consumer product", which means any | ||||
| tangible personal property which is normally used for personal, family, | ||||
| or household purposes, or (2) anything designed or sold for incorporation | ||||
| into a dwelling.  In determining whether a product is a consumer product, | ||||
| doubtful cases shall be resolved in favor of coverage.  For a particular | ||||
| product received by a particular user, "normally used" refers to a | ||||
| typical or common use of that class of product, regardless of the status | ||||
| of the particular user or of the way in which the particular user | ||||
| actually uses, or expects or is expected to use, the product.  A product | ||||
| is a consumer product regardless of whether the product has substantial | ||||
| commercial, industrial or non-consumer uses, unless such uses represent | ||||
| the only significant mode of use of the product. | ||||
| 
 | ||||
|   "Installation Information" for a User Product means any methods, | ||||
| procedures, authorization keys, or other information required to install | ||||
| and execute modified versions of a covered work in that User Product from | ||||
| a modified version of its Corresponding Source.  The information must | ||||
| suffice to ensure that the continued functioning of the modified object | ||||
| code is in no case prevented or interfered with solely because | ||||
| modification has been made. | ||||
| 
 | ||||
|   If you convey an object code work under this section in, or with, or | ||||
| specifically for use in, a User Product, and the conveying occurs as | ||||
| part of a transaction in which the right of possession and use of the | ||||
| User Product is transferred to the recipient in perpetuity or for a | ||||
| fixed term (regardless of how the transaction is characterized), the | ||||
| Corresponding Source conveyed under this section must be accompanied | ||||
| by the Installation Information.  But this requirement does not apply | ||||
| if neither you nor any third party retains the ability to install | ||||
| modified object code on the User Product (for example, the work has | ||||
| been installed in ROM). | ||||
| 
 | ||||
|   The requirement to provide Installation Information does not include a | ||||
| requirement to continue to provide support service, warranty, or updates | ||||
| for a work that has been modified or installed by the recipient, or for | ||||
| the User Product in which it has been modified or installed.  Access to a | ||||
| network may be denied when the modification itself materially and | ||||
| adversely affects the operation of the network or violates the rules and | ||||
| protocols for communication across the network. | ||||
| 
 | ||||
|   Corresponding Source conveyed, and Installation Information provided, | ||||
| in accord with this section must be in a format that is publicly | ||||
| documented (and with an implementation available to the public in | ||||
| source code form), and must require no special password or key for | ||||
| unpacking, reading or copying. | ||||
| 
 | ||||
|   7. Additional Terms. | ||||
| 
 | ||||
|   "Additional permissions" are terms that supplement the terms of this | ||||
| License by making exceptions from one or more of its conditions. | ||||
| Additional permissions that are applicable to the entire Program shall | ||||
| be treated as though they were included in this License, to the extent | ||||
| that they are valid under applicable law.  If additional permissions | ||||
| apply only to part of the Program, that part may be used separately | ||||
| under those permissions, but the entire Program remains governed by | ||||
| this License without regard to the additional permissions. | ||||
| 
 | ||||
|   When you convey a copy of a covered work, you may at your option | ||||
| remove any additional permissions from that copy, or from any part of | ||||
| it.  (Additional permissions may be written to require their own | ||||
| removal in certain cases when you modify the work.)  You may place | ||||
| additional permissions on material, added by you to a covered work, | ||||
| for which you have or can give appropriate copyright permission. | ||||
| 
 | ||||
|   Notwithstanding any other provision of this License, for material you | ||||
| add to a covered work, you may (if authorized by the copyright holders of | ||||
| that material) supplement the terms of this License with terms: | ||||
| 
 | ||||
|     a) Disclaiming warranty or limiting liability differently from the | ||||
|     terms of sections 15 and 16 of this License; or | ||||
| 
 | ||||
|     b) Requiring preservation of specified reasonable legal notices or | ||||
|     author attributions in that material or in the Appropriate Legal | ||||
|     Notices displayed by works containing it; or | ||||
| 
 | ||||
|     c) Prohibiting misrepresentation of the origin of that material, or | ||||
|     requiring that modified versions of such material be marked in | ||||
|     reasonable ways as different from the original version; or | ||||
| 
 | ||||
|     d) Limiting the use for publicity purposes of names of licensors or | ||||
|     authors of the material; or | ||||
| 
 | ||||
|     e) Declining to grant rights under trademark law for use of some | ||||
|     trade names, trademarks, or service marks; or | ||||
| 
 | ||||
|     f) Requiring indemnification of licensors and authors of that | ||||
|     material by anyone who conveys the material (or modified versions of | ||||
|     it) with contractual assumptions of liability to the recipient, for | ||||
|     any liability that these contractual assumptions directly impose on | ||||
|     those licensors and authors. | ||||
| 
 | ||||
|   All other non-permissive additional terms are considered "further | ||||
| restrictions" within the meaning of section 10.  If the Program as you | ||||
| received it, or any part of it, contains a notice stating that it is | ||||
| governed by this License along with a term that is a further | ||||
| restriction, you may remove that term.  If a license document contains | ||||
| a further restriction but permits relicensing or conveying under this | ||||
| License, you may add to a covered work material governed by the terms | ||||
| of that license document, provided that the further restriction does | ||||
| not survive such relicensing or conveying. | ||||
| 
 | ||||
|   If you add terms to a covered work in accord with this section, you | ||||
| must place, in the relevant source files, a statement of the | ||||
| additional terms that apply to those files, or a notice indicating | ||||
| where to find the applicable terms. | ||||
| 
 | ||||
|   Additional terms, permissive or non-permissive, may be stated in the | ||||
| form of a separately written license, or stated as exceptions; | ||||
| the above requirements apply either way. | ||||
| 
 | ||||
|   8. Termination. | ||||
| 
 | ||||
|   You may not propagate or modify a covered work except as expressly | ||||
| provided under this License.  Any attempt otherwise to propagate or | ||||
| modify it is void, and will automatically terminate your rights under | ||||
| this License (including any patent licenses granted under the third | ||||
| paragraph of section 11). | ||||
| 
 | ||||
|   However, if you cease all violation of this License, then your | ||||
| license from a particular copyright holder is reinstated (a) | ||||
| provisionally, unless and until the copyright holder explicitly and | ||||
| finally terminates your license, and (b) permanently, if the copyright | ||||
| holder fails to notify you of the violation by some reasonable means | ||||
| prior to 60 days after the cessation. | ||||
| 
 | ||||
|   Moreover, your license from a particular copyright holder is | ||||
| reinstated permanently if the copyright holder notifies you of the | ||||
| violation by some reasonable means, this is the first time you have | ||||
| received notice of violation of this License (for any work) from that | ||||
| copyright holder, and you cure the violation prior to 30 days after | ||||
| your receipt of the notice. | ||||
| 
 | ||||
|   Termination of your rights under this section does not terminate the | ||||
| licenses of parties who have received copies or rights from you under | ||||
| this License.  If your rights have been terminated and not permanently | ||||
| reinstated, you do not qualify to receive new licenses for the same | ||||
| material under section 10. | ||||
| 
 | ||||
|   9. Acceptance Not Required for Having Copies. | ||||
| 
 | ||||
|   You are not required to accept this License in order to receive or | ||||
| run a copy of the Program.  Ancillary propagation of a covered work | ||||
| occurring solely as a consequence of using peer-to-peer transmission | ||||
| to receive a copy likewise does not require acceptance.  However, | ||||
| nothing other than this License grants you permission to propagate or | ||||
| modify any covered work.  These actions infringe copyright if you do | ||||
| not accept this License.  Therefore, by modifying or propagating a | ||||
| covered work, you indicate your acceptance of this License to do so. | ||||
| 
 | ||||
|   10. Automatic Licensing of Downstream Recipients. | ||||
| 
 | ||||
|   Each time you convey a covered work, the recipient automatically | ||||
| receives a license from the original licensors, to run, modify and | ||||
| propagate that work, subject to this License.  You are not responsible | ||||
| for enforcing compliance by third parties with this License. | ||||
| 
 | ||||
|   An "entity transaction" is a transaction transferring control of an | ||||
| organization, or substantially all assets of one, or subdividing an | ||||
| organization, or merging organizations.  If propagation of a covered | ||||
| work results from an entity transaction, each party to that | ||||
| transaction who receives a copy of the work also receives whatever | ||||
| licenses to the work the party's predecessor in interest had or could | ||||
| give under the previous paragraph, plus a right to possession of the | ||||
| Corresponding Source of the work from the predecessor in interest, if | ||||
| the predecessor has it or can get it with reasonable efforts. | ||||
| 
 | ||||
|   You may not impose any further restrictions on the exercise of the | ||||
| rights granted or affirmed under this License.  For example, you may | ||||
| not impose a license fee, royalty, or other charge for exercise of | ||||
| rights granted under this License, and you may not initiate litigation | ||||
| (including a cross-claim or counterclaim in a lawsuit) alleging that | ||||
| any patent claim is infringed by making, using, selling, offering for | ||||
| sale, or importing the Program or any portion of it. | ||||
| 
 | ||||
|   11. Patents. | ||||
| 
 | ||||
|   A "contributor" is a copyright holder who authorizes use under this | ||||
| License of the Program or a work on which the Program is based.  The | ||||
| work thus licensed is called the contributor's "contributor version". | ||||
| 
 | ||||
|   A contributor's "essential patent claims" are all patent claims | ||||
| owned or controlled by the contributor, whether already acquired or | ||||
| hereafter acquired, that would be infringed by some manner, permitted | ||||
| by this License, of making, using, or selling its contributor version, | ||||
| but do not include claims that would be infringed only as a | ||||
| consequence of further modification of the contributor version.  For | ||||
| purposes of this definition, "control" includes the right to grant | ||||
| patent sublicenses in a manner consistent with the requirements of | ||||
| this License. | ||||
| 
 | ||||
|   Each contributor grants you a non-exclusive, worldwide, royalty-free | ||||
| patent license under the contributor's essential patent claims, to | ||||
| make, use, sell, offer for sale, import and otherwise run, modify and | ||||
| propagate the contents of its contributor version. | ||||
| 
 | ||||
|   In the following three paragraphs, a "patent license" is any express | ||||
| agreement or commitment, however denominated, not to enforce a patent | ||||
| (such as an express permission to practice a patent or covenant not to | ||||
| sue for patent infringement).  To "grant" such a patent license to a | ||||
| party means to make such an agreement or commitment not to enforce a | ||||
| patent against the party. | ||||
| 
 | ||||
|   If you convey a covered work, knowingly relying on a patent license, | ||||
| and the Corresponding Source of the work is not available for anyone | ||||
| to copy, free of charge and under the terms of this License, through a | ||||
| publicly available network server or other readily accessible means, | ||||
| then you must either (1) cause the Corresponding Source to be so | ||||
| available, or (2) arrange to deprive yourself of the benefit of the | ||||
| patent license for this particular work, or (3) arrange, in a manner | ||||
| consistent with the requirements of this License, to extend the patent | ||||
| license to downstream recipients.  "Knowingly relying" means you have | ||||
| actual knowledge that, but for the patent license, your conveying the | ||||
| covered work in a country, or your recipient's use of the covered work | ||||
| in a country, would infringe one or more identifiable patents in that | ||||
| country that you have reason to believe are valid. | ||||
| 
 | ||||
|   If, pursuant to or in connection with a single transaction or | ||||
| arrangement, you convey, or propagate by procuring conveyance of, a | ||||
| covered work, and grant a patent license to some of the parties | ||||
| receiving the covered work authorizing them to use, propagate, modify | ||||
| or convey a specific copy of the covered work, then the patent license | ||||
| you grant is automatically extended to all recipients of the covered | ||||
| work and works based on it. | ||||
| 
 | ||||
|   A patent license is "discriminatory" if it does not include within | ||||
| the scope of its coverage, prohibits the exercise of, or is | ||||
| conditioned on the non-exercise of one or more of the rights that are | ||||
| specifically granted under this License.  You may not convey a covered | ||||
| work if you are a party to an arrangement with a third party that is | ||||
| in the business of distributing software, under which you make payment | ||||
| to the third party based on the extent of your activity of conveying | ||||
| the work, and under which the third party grants, to any of the | ||||
| parties who would receive the covered work from you, a discriminatory | ||||
| patent license (a) in connection with copies of the covered work | ||||
| conveyed by you (or copies made from those copies), or (b) primarily | ||||
| for and in connection with specific products or compilations that | ||||
| contain the covered work, unless you entered into that arrangement, | ||||
| or that patent license was granted, prior to 28 March 2007. | ||||
| 
 | ||||
|   Nothing in this License shall be construed as excluding or limiting | ||||
| any implied license or other defenses to infringement that may | ||||
| otherwise be available to you under applicable patent law. | ||||
| 
 | ||||
|   12. No Surrender of Others' Freedom. | ||||
| 
 | ||||
|   If conditions are imposed on you (whether by court order, agreement or | ||||
| otherwise) that contradict the conditions of this License, they do not | ||||
| excuse you from the conditions of this License.  If you cannot convey a | ||||
| covered work so as to satisfy simultaneously your obligations under this | ||||
| License and any other pertinent obligations, then as a consequence you may | ||||
| not convey it at all.  For example, if you agree to terms that obligate you | ||||
| to collect a royalty for further conveying from those to whom you convey | ||||
| the Program, the only way you could satisfy both those terms and this | ||||
| License would be to refrain entirely from conveying the Program. | ||||
| 
 | ||||
|   13. Remote Network Interaction; Use with the GNU General Public License. | ||||
| 
 | ||||
|   Notwithstanding any other provision of this License, if you modify the | ||||
| Program, your modified version must prominently offer all users | ||||
| interacting with it remotely through a computer network (if your version | ||||
| supports such interaction) an opportunity to receive the Corresponding | ||||
| Source of your version by providing access to the Corresponding Source | ||||
| from a network server at no charge, through some standard or customary | ||||
| means of facilitating copying of software.  This Corresponding Source | ||||
| shall include the Corresponding Source for any work covered by version 3 | ||||
| of the GNU General Public License that is incorporated pursuant to the | ||||
| following paragraph. | ||||
| 
 | ||||
|   Notwithstanding any other provision of this License, you have | ||||
| permission to link or combine any covered work with a work licensed | ||||
| under version 3 of the GNU General Public License into a single | ||||
| combined work, and to convey the resulting work.  The terms of this | ||||
| License will continue to apply to the part which is the covered work, | ||||
| but the work with which it is combined will remain governed by version | ||||
| 3 of the GNU General Public License. | ||||
| 
 | ||||
|   14. Revised Versions of this License. | ||||
| 
 | ||||
|   The Free Software Foundation may publish revised and/or new versions of | ||||
| the GNU Affero General Public License from time to time.  Such new versions | ||||
| will be similar in spirit to the present version, but may differ in detail to | ||||
| address new problems or concerns. | ||||
| 
 | ||||
|   Each version is given a distinguishing version number.  If the | ||||
| Program specifies that a certain numbered version of the GNU Affero General | ||||
| Public License "or any later version" applies to it, you have the | ||||
| option of following the terms and conditions either of that numbered | ||||
| version or of any later version published by the Free Software | ||||
| Foundation.  If the Program does not specify a version number of the | ||||
| GNU Affero General Public License, you may choose any version ever published | ||||
| by the Free Software Foundation. | ||||
| 
 | ||||
|   If the Program specifies that a proxy can decide which future | ||||
| versions of the GNU Affero General Public License can be used, that proxy's | ||||
| public statement of acceptance of a version permanently authorizes you | ||||
| to choose that version for the Program. | ||||
| 
 | ||||
|   Later license versions may give you additional or different | ||||
| permissions.  However, no additional obligations are imposed on any | ||||
| author or copyright holder as a result of your choosing to follow a | ||||
| later version. | ||||
| 
 | ||||
|   15. Disclaimer of Warranty. | ||||
| 
 | ||||
|   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY | ||||
| APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT | ||||
| HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY | ||||
| OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, | ||||
| THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||||
| PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM | ||||
| IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF | ||||
| ALL NECESSARY SERVICING, REPAIR OR CORRECTION. | ||||
| 
 | ||||
|   16. Limitation of Liability. | ||||
| 
 | ||||
|   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING | ||||
| WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS | ||||
| THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY | ||||
| GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE | ||||
| USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF | ||||
| DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD | ||||
| PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), | ||||
| EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF | ||||
| SUCH DAMAGES. | ||||
| 
 | ||||
|   17. Interpretation of Sections 15 and 16. | ||||
| 
 | ||||
|   If the disclaimer of warranty and limitation of liability provided | ||||
| above cannot be given local legal effect according to their terms, | ||||
| reviewing courts shall apply local law that most closely approximates | ||||
| an absolute waiver of all civil liability in connection with the | ||||
| Program, unless a warranty or assumption of liability accompanies a | ||||
| copy of the Program in return for a fee. | ||||
| 
 | ||||
|                      END OF TERMS AND CONDITIONS | ||||
| 
 | ||||
|             How to Apply These Terms to Your New Programs | ||||
| 
 | ||||
|   If you develop a new program, and you want it to be of the greatest | ||||
| possible use to the public, the best way to achieve this is to make it | ||||
| free software which everyone can redistribute and change under these terms. | ||||
| 
 | ||||
|   To do so, attach the following notices to the program.  It is safest | ||||
| to attach them to the start of each source file to most effectively | ||||
| state the exclusion of warranty; and each file should have at least | ||||
| the "copyright" line and a pointer to where the full notice is found. | ||||
| 
 | ||||
|     <one line to give the program's name and a brief idea of what it does.> | ||||
|     Copyright (C) <year>  <name of author> | ||||
| 
 | ||||
|     This program is free software: you can redistribute it and/or modify | ||||
|     it under the terms of the GNU Affero General Public License as published by | ||||
|     the Free Software Foundation, either version 3 of the License, or | ||||
|     (at your option) any later version. | ||||
| 
 | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU Affero General Public License for more details. | ||||
| 
 | ||||
|     You should have received a copy of the GNU Affero General Public License | ||||
|     along with this program.  If not, see <https://www.gnu.org/licenses/>. | ||||
| 
 | ||||
| Also add information on how to contact you by electronic and paper mail. | ||||
| 
 | ||||
|   If your software can interact with users remotely through a computer | ||||
| network, you should also make sure that it provides a way for users to | ||||
| get its source.  For example, if your program is a web application, its | ||||
| interface could display a "Source" link that leads users to an archive | ||||
| of the code.  There are many ways you could offer source, and different | ||||
| solutions will be better for different programs; see section 13 for the | ||||
| specific requirements. | ||||
| 
 | ||||
|   You should also get your employer (if you work as a programmer) or school, | ||||
| if any, to sign a "copyright disclaimer" for the program, if necessary. | ||||
| For more information on this, and how to apply and follow the GNU AGPL, see | ||||
| <https://www.gnu.org/licenses/>. | ||||
|  | @ -1,36 +0,0 @@ | |||
| GOPATH := $(shell go env GOPATH) | ||||
| 
 | ||||
| all: check | ||||
| 
 | ||||
| getdeps: | ||||
| 	@if [ ! -f ${GOPATH}/bin/golint ]; then echo "Installing golint" && go get -u golang.org/x/lint/golint; fi | ||||
| 	@if [ ! -f ${GOPATH}/bin/gocyclo ]; then echo "Installing gocyclo" && go get -u github.com/fzipp/gocyclo; fi | ||||
| 	@if [ ! -f ${GOPATH}/bin/misspell ]; then echo "Installing misspell" && go get -u github.com/client9/misspell/cmd/misspell; fi | ||||
| 	@if [ ! -f ${GOPATH}/bin/ineffassign ]; then echo "Installing ineffassign" && go get -u github.com/gordonklaus/ineffassign; fi | ||||
| 
 | ||||
| vet: | ||||
| 	@echo "Running $@" | ||||
| 	@go vet *.go | ||||
| 
 | ||||
| fmt: | ||||
| 	@echo "Running $@" | ||||
| 	@gofmt -d *.go | ||||
| 
 | ||||
| lint: | ||||
| 	@echo "Running $@" | ||||
| 	@${GOPATH}/bin/golint -set_exit_status | ||||
| 
 | ||||
| cyclo: | ||||
| 	@echo "Running $@" | ||||
| 	@${GOPATH}/bin/gocyclo -over 200 . | ||||
| 
 | ||||
| spelling: | ||||
| 	@${GOPATH}/bin/misspell -locale US -error *.go README.md | ||||
| 
 | ||||
| ineffassign: | ||||
| 	@echo "Running $@" | ||||
| 	@${GOPATH}/bin/ineffassign . | ||||
| 
 | ||||
| check: getdeps vet fmt lint cyclo spelling ineffassign | ||||
| 	@echo "Running unit tests" | ||||
| 	@go test -tags kqueue ./... | ||||
|  | @ -1,3 +0,0 @@ | |||
| # parquet-go | ||||
| 
 | ||||
| Modified version of https://github.com/xitongsys/parquet-go | ||||
|  | @ -1,170 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"io" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"git.apache.org/thrift.git/lib/go/thrift" | ||||
| 	"github.com/minio/minio-go/v7/pkg/set" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| func getColumns( | ||||
| 	rowGroup *parquet.RowGroup, | ||||
| 	columnNames set.StringSet, | ||||
| 	schemaElements []*parquet.SchemaElement, | ||||
| 	getReaderFunc GetReaderFunc, | ||||
| ) (nameColumnMap map[string]*column, err error) { | ||||
| 	nameIndexMap := make(map[string]int) | ||||
| 	for colIndex, columnChunk := range rowGroup.GetColumns() { | ||||
| 		meta := columnChunk.GetMetaData() | ||||
| 		if meta == nil { | ||||
| 			return nil, errors.New("parquet: column metadata missing") | ||||
| 		} | ||||
| 		columnName := strings.Join(meta.GetPathInSchema(), ".") | ||||
| 		if columnNames != nil && !columnNames.Contains(columnName) { | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		// Ignore column spanning into another file.
 | ||||
| 		if columnChunk.GetFilePath() != "" { | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		offset := meta.GetDataPageOffset() | ||||
| 		if meta.DictionaryPageOffset != nil { | ||||
| 			offset = meta.GetDictionaryPageOffset() | ||||
| 		} | ||||
| 
 | ||||
| 		size := meta.GetTotalCompressedSize() | ||||
| 		if size < 0 { | ||||
| 			return nil, errors.New("parquet: negative compressed size") | ||||
| 		} | ||||
| 		rc, err := getReaderFunc(offset, size) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		thriftReader := thrift.NewTBufferedTransport(thrift.NewStreamTransportR(rc), int(size)) | ||||
| 
 | ||||
| 		if nameColumnMap == nil { | ||||
| 			nameColumnMap = make(map[string]*column) | ||||
| 		} | ||||
| 		var se *parquet.SchemaElement | ||||
| 		for _, schema := range schemaElements { | ||||
| 			if schema != nil && schema.Name == columnName { | ||||
| 				se = schema | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		nameColumnMap[columnName] = &column{ | ||||
| 			name:           columnName, | ||||
| 			metadata:       meta, | ||||
| 			schema:         se, | ||||
| 			schemaElements: schemaElements, | ||||
| 			rc:             rc, | ||||
| 			thriftReader:   thriftReader, | ||||
| 			valueType:      meta.GetType(), | ||||
| 		} | ||||
| 
 | ||||
| 		// First element of []*parquet.SchemaElement from parquet file metadata is 'schema'
 | ||||
| 		// which is always skipped, hence colIndex + 1 is valid.
 | ||||
| 		nameIndexMap[columnName] = colIndex + 1 | ||||
| 	} | ||||
| 
 | ||||
| 	for name := range nameColumnMap { | ||||
| 		nameColumnMap[name].nameIndexMap = nameIndexMap | ||||
| 	} | ||||
| 
 | ||||
| 	return nameColumnMap, nil | ||||
| } | ||||
| 
 | ||||
| type column struct { | ||||
| 	name           string | ||||
| 	endOfValues    bool | ||||
| 	valueIndex     int | ||||
| 	valueType      parquet.Type | ||||
| 	metadata       *parquet.ColumnMetaData | ||||
| 	schema         *parquet.SchemaElement | ||||
| 	schemaElements []*parquet.SchemaElement | ||||
| 	nameIndexMap   map[string]int | ||||
| 	dictPage       *page | ||||
| 	dataTable      *table | ||||
| 	rc             io.ReadCloser | ||||
| 	thriftReader   *thrift.TBufferedTransport | ||||
| } | ||||
| 
 | ||||
| func (column *column) close() (err error) { | ||||
| 	if column.rc != nil { | ||||
| 		err = column.rc.Close() | ||||
| 		column.rc = nil | ||||
| 	} | ||||
| 
 | ||||
| 	return err | ||||
| } | ||||
| 
 | ||||
| func (column *column) readPage() { | ||||
| 	page, _, _, err := readPage( | ||||
| 		column.thriftReader, | ||||
| 		column.metadata, | ||||
| 		column.nameIndexMap, | ||||
| 		column.schemaElements, | ||||
| 	) | ||||
| 
 | ||||
| 	if err != nil { | ||||
| 		column.endOfValues = true | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	if page.Header.GetType() == parquet.PageType_DICTIONARY_PAGE { | ||||
| 		column.dictPage = page | ||||
| 		column.readPage() | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	page.decode(column.dictPage) | ||||
| 
 | ||||
| 	if column.dataTable == nil { | ||||
| 		column.dataTable = newTableFromTable(page.DataTable) | ||||
| 	} | ||||
| 
 | ||||
| 	column.dataTable.Merge(page.DataTable) | ||||
| } | ||||
| 
 | ||||
| func (column *column) read() (value interface{}, valueType parquet.Type, cnv *parquet.SchemaElement) { | ||||
| 	if column.dataTable == nil { | ||||
| 		column.readPage() | ||||
| 		column.valueIndex = 0 | ||||
| 	} | ||||
| 
 | ||||
| 	if column.endOfValues { | ||||
| 		return nil, column.metadata.GetType(), column.schema | ||||
| 	} | ||||
| 
 | ||||
| 	value = column.dataTable.Values[column.valueIndex] | ||||
| 	column.valueIndex++ | ||||
| 	if len(column.dataTable.Values) == column.valueIndex { | ||||
| 		column.dataTable = nil | ||||
| 	} | ||||
| 
 | ||||
| 	return value, column.metadata.GetType(), column.schema | ||||
| } | ||||
|  | @ -1,96 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| func valuesToInterfaces(values interface{}, valueType parquet.Type) (tableValues []interface{}) { | ||||
| 	switch valueType { | ||||
| 	case parquet.Type_BOOLEAN: | ||||
| 		for _, v := range values.([]bool) { | ||||
| 			tableValues = append(tableValues, v) | ||||
| 		} | ||||
| 	case parquet.Type_INT32: | ||||
| 		for _, v := range values.([]int32) { | ||||
| 			tableValues = append(tableValues, v) | ||||
| 		} | ||||
| 	case parquet.Type_INT64: | ||||
| 		for _, v := range values.([]int64) { | ||||
| 			tableValues = append(tableValues, v) | ||||
| 		} | ||||
| 	case parquet.Type_FLOAT: | ||||
| 		for _, v := range values.([]float32) { | ||||
| 			tableValues = append(tableValues, v) | ||||
| 		} | ||||
| 	case parquet.Type_DOUBLE: | ||||
| 		for _, v := range values.([]float64) { | ||||
| 			tableValues = append(tableValues, v) | ||||
| 		} | ||||
| 	case parquet.Type_INT96, parquet.Type_BYTE_ARRAY, parquet.Type_FIXED_LEN_BYTE_ARRAY: | ||||
| 		for _, v := range values.([][]byte) { | ||||
| 			tableValues = append(tableValues, v) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return tableValues | ||||
| } | ||||
| 
 | ||||
| func interfacesToValues(values []interface{}, valueType parquet.Type) interface{} { | ||||
| 	switch valueType { | ||||
| 	case parquet.Type_BOOLEAN: | ||||
| 		bs := make([]bool, len(values)) | ||||
| 		for i := range values { | ||||
| 			bs[i] = values[i].(bool) | ||||
| 		} | ||||
| 		return bs | ||||
| 	case parquet.Type_INT32: | ||||
| 		i32s := make([]int32, len(values)) | ||||
| 		for i := range values { | ||||
| 			i32s[i] = values[i].(int32) | ||||
| 		} | ||||
| 		return i32s | ||||
| 	case parquet.Type_INT64: | ||||
| 		i64s := make([]int64, len(values)) | ||||
| 		for i := range values { | ||||
| 			i64s[i] = values[i].(int64) | ||||
| 		} | ||||
| 		return i64s | ||||
| 	case parquet.Type_FLOAT: | ||||
| 		f32s := make([]float32, len(values)) | ||||
| 		for i := range values { | ||||
| 			f32s[i] = values[i].(float32) | ||||
| 		} | ||||
| 		return f32s | ||||
| 	case parquet.Type_DOUBLE: | ||||
| 		f64s := make([]float64, len(values)) | ||||
| 		for i := range values { | ||||
| 			f64s[i] = values[i].(float64) | ||||
| 		} | ||||
| 		return f64s | ||||
| 	case parquet.Type_INT96, parquet.Type_BYTE_ARRAY, parquet.Type_FIXED_LEN_BYTE_ARRAY: | ||||
| 		array := make([][]byte, len(values)) | ||||
| 		for i := range values { | ||||
| 			array[i] = values[i].([]byte) | ||||
| 		} | ||||
| 		return array | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,161 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package common | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"compress/gzip" | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 
 | ||||
| 	"github.com/golang/snappy" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/pierrec/lz4" | ||||
| ) | ||||
| 
 | ||||
| // ToSliceValue converts values to a slice value.
 | ||||
| func ToSliceValue(values []interface{}, parquetType parquet.Type) interface{} { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_BOOLEAN: | ||||
| 		bs := make([]bool, len(values)) | ||||
| 		for i := range values { | ||||
| 			bs[i] = values[i].(bool) | ||||
| 		} | ||||
| 		return bs | ||||
| 	case parquet.Type_INT32: | ||||
| 		i32s := make([]int32, len(values)) | ||||
| 		for i := range values { | ||||
| 			i32s[i] = values[i].(int32) | ||||
| 		} | ||||
| 		return i32s | ||||
| 	case parquet.Type_INT64: | ||||
| 		i64s := make([]int64, len(values)) | ||||
| 		for i := range values { | ||||
| 			i64s[i] = values[i].(int64) | ||||
| 		} | ||||
| 		return i64s | ||||
| 	case parquet.Type_FLOAT: | ||||
| 		f32s := make([]float32, len(values)) | ||||
| 		for i := range values { | ||||
| 			f32s[i] = values[i].(float32) | ||||
| 		} | ||||
| 		return f32s | ||||
| 	case parquet.Type_DOUBLE: | ||||
| 		f64s := make([]float64, len(values)) | ||||
| 		for i := range values { | ||||
| 			f64s[i] = values[i].(float64) | ||||
| 		} | ||||
| 		return f64s | ||||
| 	case parquet.Type_BYTE_ARRAY: | ||||
| 		array := make([][]byte, len(values)) | ||||
| 		for i := range values { | ||||
| 			array[i] = values[i].([]byte) | ||||
| 		} | ||||
| 		return array | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // BitWidth returns bits count required to accommodate given value.
 | ||||
| func BitWidth(ui64 uint64) (width int32) { | ||||
| 	for ; ui64 != 0; ui64 >>= 1 { | ||||
| 		width++ | ||||
| 	} | ||||
| 
 | ||||
| 	return width | ||||
| } | ||||
| 
 | ||||
| // Compress compresses given data.
 | ||||
| func Compress(compressionType parquet.CompressionCodec, data []byte) ([]byte, error) { | ||||
| 	switch compressionType { | ||||
| 	case parquet.CompressionCodec_UNCOMPRESSED: | ||||
| 		return data, nil | ||||
| 
 | ||||
| 	case parquet.CompressionCodec_SNAPPY: | ||||
| 		return snappy.Encode(nil, data), nil | ||||
| 
 | ||||
| 	case parquet.CompressionCodec_GZIP: | ||||
| 		buf := new(bytes.Buffer) | ||||
| 		writer := gzip.NewWriter(buf) | ||||
| 		n, err := writer.Write(data) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		if n != len(data) { | ||||
| 			return nil, fmt.Errorf("short writes") | ||||
| 		} | ||||
| 
 | ||||
| 		if err = writer.Flush(); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		if err = writer.Close(); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		return buf.Bytes(), nil | ||||
| 
 | ||||
| 	case parquet.CompressionCodec_LZ4: | ||||
| 		buf := new(bytes.Buffer) | ||||
| 		writer := lz4.NewWriter(buf) | ||||
| 		n, err := writer.Write(data) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		if n != len(data) { | ||||
| 			return nil, fmt.Errorf("short writes") | ||||
| 		} | ||||
| 
 | ||||
| 		if err = writer.Flush(); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		if err = writer.Close(); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		return buf.Bytes(), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("unsupported compression codec %v", compressionType) | ||||
| } | ||||
| 
 | ||||
| // Uncompress uncompresses given data.
 | ||||
| func Uncompress(compressionType parquet.CompressionCodec, data []byte) ([]byte, error) { | ||||
| 	switch compressionType { | ||||
| 	case parquet.CompressionCodec_UNCOMPRESSED: | ||||
| 		return data, nil | ||||
| 
 | ||||
| 	case parquet.CompressionCodec_SNAPPY: | ||||
| 		return snappy.Decode(nil, data) | ||||
| 
 | ||||
| 	case parquet.CompressionCodec_GZIP: | ||||
| 		reader, err := gzip.NewReader(bytes.NewReader(data)) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		defer reader.Close() | ||||
| 		return ioutil.ReadAll(reader) | ||||
| 
 | ||||
| 	case parquet.CompressionCodec_LZ4: | ||||
| 		return ioutil.ReadAll(lz4.NewReader(bytes.NewReader(data))) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("unsupported compression codec %v", compressionType) | ||||
| } | ||||
|  | @ -1,128 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"sync" | ||||
| 
 | ||||
| 	"github.com/golang/snappy" | ||||
| 	"github.com/klauspost/compress/gzip" | ||||
| 	"github.com/klauspost/compress/zstd" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/pierrec/lz4" | ||||
| ) | ||||
| 
 | ||||
| type compressionCodec parquet.CompressionCodec | ||||
| 
 | ||||
| var zstdOnce sync.Once | ||||
| var zstdEnc *zstd.Encoder | ||||
| var zstdDec *zstd.Decoder | ||||
| 
 | ||||
| func initZstd() { | ||||
| 	zstdOnce.Do(func() { | ||||
| 		zstdEnc, _ = zstd.NewWriter(nil, zstd.WithZeroFrames(true)) | ||||
| 		zstdDec, _ = zstd.NewReader(nil) | ||||
| 	}) | ||||
| } | ||||
| 
 | ||||
| func (c compressionCodec) compress(buf []byte) ([]byte, error) { | ||||
| 	switch parquet.CompressionCodec(c) { | ||||
| 	case parquet.CompressionCodec_UNCOMPRESSED: | ||||
| 		return buf, nil | ||||
| 
 | ||||
| 	case parquet.CompressionCodec_SNAPPY: | ||||
| 		return snappy.Encode(nil, buf), nil | ||||
| 
 | ||||
| 	case parquet.CompressionCodec_GZIP: | ||||
| 		byteBuf := new(bytes.Buffer) | ||||
| 		writer := gzip.NewWriter(byteBuf) | ||||
| 		n, err := writer.Write(buf) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		if n != len(buf) { | ||||
| 			return nil, fmt.Errorf("short writes") | ||||
| 		} | ||||
| 
 | ||||
| 		if err = writer.Flush(); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		if err = writer.Close(); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		return byteBuf.Bytes(), nil | ||||
| 
 | ||||
| 	case parquet.CompressionCodec_LZ4: | ||||
| 		byteBuf := new(bytes.Buffer) | ||||
| 		writer := lz4.NewWriter(byteBuf) | ||||
| 		n, err := writer.Write(buf) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		if n != len(buf) { | ||||
| 			return nil, fmt.Errorf("short writes") | ||||
| 		} | ||||
| 
 | ||||
| 		if err = writer.Flush(); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		if err = writer.Close(); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		return byteBuf.Bytes(), nil | ||||
| 	case parquet.CompressionCodec_ZSTD: | ||||
| 		initZstd() | ||||
| 		return zstdEnc.EncodeAll(buf, nil), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("invalid compression codec %v", c) | ||||
| } | ||||
| 
 | ||||
| func (c compressionCodec) uncompress(buf []byte) ([]byte, error) { | ||||
| 	switch parquet.CompressionCodec(c) { | ||||
| 	case parquet.CompressionCodec_UNCOMPRESSED: | ||||
| 		return buf, nil | ||||
| 
 | ||||
| 	case parquet.CompressionCodec_SNAPPY: | ||||
| 		return snappy.Decode(nil, buf) | ||||
| 
 | ||||
| 	case parquet.CompressionCodec_GZIP: | ||||
| 		reader, err := gzip.NewReader(bytes.NewReader(buf)) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		defer reader.Close() | ||||
| 		return ioutil.ReadAll(reader) | ||||
| 
 | ||||
| 	case parquet.CompressionCodec_LZ4: | ||||
| 		return ioutil.ReadAll(lz4.NewReader(bytes.NewReader(buf))) | ||||
| 
 | ||||
| 	case parquet.CompressionCodec_ZSTD: | ||||
| 		initZstd() | ||||
| 		return zstdDec.DecodeAll(buf, nil) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("invalid compression codec %v", c) | ||||
| } | ||||
|  | @ -1,619 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package data | ||||
| 
 | ||||
| import ( | ||||
| 	"reflect" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" | ||||
| ) | ||||
| 
 | ||||
| func TestPopulateGroupList(t *testing.T) { | ||||
| 	requiredList1 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredList1.Set("group", requiredGroup); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList1.Set("group.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList1.Set("group.list.element", requiredElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList1.Set("group.list.element.col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err := requiredList1.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	requiredList2 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredList2.Set("group", requiredGroup); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList2.Set("group.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList2.Set("group.list.element", requiredElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList2.Set("group.list.element.col", optionalCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err := requiredList2.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	requiredList3 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredList3.Set("group", requiredGroup); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList3.Set("group.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList3.Set("group.list.element", optionalElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList3.Set("group.list.element.col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err := requiredList3.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	requiredList4 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredList4.Set("group", requiredGroup); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList4.Set("group.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList4.Set("group.list.element", optionalElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList4.Set("group.list.element.col", optionalCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err := requiredList4.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalList1 := schema.NewTree() | ||||
| 	{ | ||||
| 		optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalList1.Set("group", optionalGroup); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList1.Set("group.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList1.Set("group.list.element", requiredElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList1.Set("group.list.element.col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err := optionalList1.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalList2 := schema.NewTree() | ||||
| 	{ | ||||
| 		optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalList2.Set("group", optionalGroup); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList2.Set("group.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList2.Set("group.list.element", requiredElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList2.Set("group.list.element.col", optionalCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err := optionalList2.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalList3 := schema.NewTree() | ||||
| 	{ | ||||
| 		optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalList3.Set("group", optionalGroup); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList3.Set("group.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList3.Set("group.list.element", optionalElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList3.Set("group.list.element.col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err := optionalList3.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalList4 := schema.NewTree() | ||||
| 	{ | ||||
| 		optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalList4.Set("group", optionalGroup); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList4.Set("group.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList4.Set("group.list.element", optionalElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList4.Set("group.list.element.col", optionalCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err := optionalList4.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	result1 := map[string]*Column{ | ||||
| 		"group.list.element.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{1}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result2 := map[string]*Column{ | ||||
| 		"group.list.element.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10, v20}, | ||||
| 			definitionLevels: []int64{1, 1}, | ||||
| 			repetitionLevels: []int64{0, 1}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      5, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v20, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result3 := map[string]*Column{ | ||||
| 		"group.list.element.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{1}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result4 := map[string]*Column{ | ||||
| 		"group.list.element.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result5 := map[string]*Column{ | ||||
| 		"group.list.element.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10, v20}, | ||||
| 			definitionLevels: []int64{2, 2}, | ||||
| 			repetitionLevels: []int64{0, 1}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      5, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v20, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result6 := map[string]*Column{ | ||||
| 		"group.list.element.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result7 := map[string]*Column{ | ||||
| 		"group.list.element.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{3}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result8 := map[string]*Column{ | ||||
| 		"group.list.element.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10, v20}, | ||||
| 			definitionLevels: []int64{3, 3}, | ||||
| 			repetitionLevels: []int64{0, 1}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      5, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v20, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result9 := map[string]*Column{ | ||||
| 		"group.list.element.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result10 := map[string]*Column{ | ||||
| 		"group.list.element.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{3}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result11 := map[string]*Column{ | ||||
| 		"group.list.element.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{4}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result12 := map[string]*Column{ | ||||
| 		"group.list.element.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10, v20}, | ||||
| 			definitionLevels: []int64{4, 4}, | ||||
| 			repetitionLevels: []int64{0, 1}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      5, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v20, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	testCases := []struct { | ||||
| 		schemaTree     *schema.Tree | ||||
| 		data           string | ||||
| 		expectedResult map[string]*Column | ||||
| 		expectErr      bool | ||||
| 	}{ | ||||
| 		{requiredList1, `{}`, nil, true},                         // err: group: nil value for required field
 | ||||
| 		{requiredList1, `{"group": null}`, nil, true},            // err: group: nil value for required field
 | ||||
| 		{requiredList1, `{"group": [{"col": null}]}`, nil, true}, // err: group.list.element.col: nil value for required field
 | ||||
| 		{requiredList1, `{"group": [{"col": 10}]}`, result1, false}, | ||||
| 		{requiredList1, `{"group": [{"col": 10}, {"col": 20}]}`, result2, false}, | ||||
| 		{requiredList2, `{}`, nil, true},              // err: group: nil value for required field
 | ||||
| 		{requiredList2, `{"group": null}`, nil, true}, // err: group: nil value for required field
 | ||||
| 		{requiredList2, `{"group": [{"col": null}]}`, result3, false}, | ||||
| 		{requiredList2, `{"group": [{"col": 10}]}`, result4, false}, | ||||
| 		{requiredList2, `{"group": [{"col": 10}, {"col": 20}]}`, result5, false}, | ||||
| 		{requiredList3, `{}`, nil, true},                         // err: group: nil value for required field
 | ||||
| 		{requiredList3, `{"group": null}`, nil, true},            // err: group: nil value for required field
 | ||||
| 		{requiredList3, `{"group": [{"col": null}]}`, nil, true}, // err: group.list.element.col: nil value for required field
 | ||||
| 		{requiredList3, `{"group": [{"col": 10}]}`, result4, false}, | ||||
| 		{requiredList3, `{"group": [{"col": 10}, {"col": 20}]}`, result5, false}, | ||||
| 		{requiredList4, `{}`, nil, true},              // err: group: nil value for required field
 | ||||
| 		{requiredList4, `{"group": null}`, nil, true}, // err: group: nil value for required field
 | ||||
| 		{requiredList4, `{"group": [{"col": null}]}`, result6, false}, | ||||
| 		{requiredList4, `{"group": [{"col": 10}]}`, result7, false}, | ||||
| 		{requiredList4, `{"group": [{"col": 10}, {"col": 20}]}`, result8, false}, | ||||
| 		{optionalList1, `{}`, result9, false}, | ||||
| 		{optionalList1, `{"group": null}`, result9, false}, | ||||
| 		{optionalList1, `{"group": [{"col": null}]}`, nil, true}, // err: group.list.element.col: nil value for required field
 | ||||
| 		{optionalList1, `{"group": [{"col": 10}]}`, result4, false}, | ||||
| 		{optionalList1, `{"group": [{"col": 10}, {"col": 20}]}`, result5, false}, | ||||
| 		{optionalList2, `{}`, result9, false}, | ||||
| 		{optionalList2, `{"group": null}`, result9, false}, | ||||
| 		{optionalList2, `{"group": [{"col": null}]}`, result6, false}, | ||||
| 		{optionalList2, `{"group": [{"col": 10}]}`, result7, false}, | ||||
| 		{optionalList2, `{"group": [{"col": 10}, {"col": 20}]}`, result8, false}, | ||||
| 		{optionalList3, `{}`, result9, false}, | ||||
| 		{optionalList3, `{"group": null}`, result9, false}, | ||||
| 		{optionalList3, `{"group": [{"col": null}]}`, nil, true}, // err: group.list.element.col: nil value for required field
 | ||||
| 		{optionalList3, `{"group": [{"col": 10}]}`, result7, false}, | ||||
| 		{optionalList3, `{"group": [{"col": 10}, {"col": 20}]}`, result8, false}, | ||||
| 		{optionalList4, `{}`, result9, false}, | ||||
| 		{optionalList4, `{"group": null}`, result9, false}, | ||||
| 		{optionalList4, `{"group": [{"col": null}]}`, result10, false}, | ||||
| 		{optionalList4, `{"group": [{"col": 10}]}`, result11, false}, | ||||
| 		{optionalList4, `{"group": [{"col": 10}, {"col": 20}]}`, result12, false}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree) | ||||
| 		expectErr := (err != nil) | ||||
| 
 | ||||
| 		if testCase.expectErr != expectErr { | ||||
| 			t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) | ||||
| 		} | ||||
| 
 | ||||
| 		if !testCase.expectErr { | ||||
| 			if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 				t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,238 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package data | ||||
| 
 | ||||
| import ( | ||||
| 	"reflect" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" | ||||
| ) | ||||
| 
 | ||||
| func TestPopulateGroupType(t *testing.T) { | ||||
| 	requiredGroup1 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredGroup1.Set("group", requiredGroup); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredGroup1.Set("group.col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err := requiredGroup1.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	requiredGroup2 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredGroup2.Set("group", requiredGroup); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredGroup2.Set("group.col", optionalCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err := requiredGroup2.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalGroup1 := schema.NewTree() | ||||
| 	{ | ||||
| 		optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalGroup1.Set("group", optionalGroup); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalGroup1.Set("group.col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err := optionalGroup1.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalGroup2 := schema.NewTree() | ||||
| 	{ | ||||
| 		optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalGroup2.Set("group", optionalGroup); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalGroup2.Set("group.col", optionalCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err := optionalGroup2.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	result1 := map[string]*Column{ | ||||
| 		"group.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result2 := map[string]*Column{ | ||||
| 		"group.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result3 := map[string]*Column{ | ||||
| 		"group.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{1}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result4 := map[string]*Column{ | ||||
| 		"group.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{1}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result5 := map[string]*Column{ | ||||
| 		"group.col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	testCases := []struct { | ||||
| 		schemaTree     *schema.Tree | ||||
| 		data           string | ||||
| 		expectedResult map[string]*Column | ||||
| 		expectErr      bool | ||||
| 	}{ | ||||
| 		{requiredGroup1, `{}`, nil, true},                       // err: group: nil value for required field
 | ||||
| 		{requiredGroup1, `{"group": null}`, nil, true},          // err: group: nil value for required field
 | ||||
| 		{requiredGroup1, `{"group": {"col": null}}`, nil, true}, // err: group.col: nil value for required field
 | ||||
| 		{requiredGroup1, `{"group": {"col": 10}}`, result1, false}, | ||||
| 		{requiredGroup2, `{}`, nil, true},              // err: group: nil value for required field
 | ||||
| 		{requiredGroup2, `{"group": null}`, nil, true}, // err: group: nil value for required field
 | ||||
| 		{requiredGroup2, `{"group": {"col": null}}`, result2, false}, | ||||
| 		{requiredGroup2, `{"group": {"col": 10}}`, result3, false}, | ||||
| 		{optionalGroup1, `{}`, result2, false}, | ||||
| 		{optionalGroup1, `{"group": null}`, result2, false}, | ||||
| 		{optionalGroup1, `{"group": {"col": null}}`, nil, true}, // err: group.col: nil value for required field
 | ||||
| 		{optionalGroup1, `{"group": {"col": 10}}`, result3, false}, | ||||
| 		{optionalGroup2, `{}`, result2, false}, | ||||
| 		{optionalGroup2, `{"group": null}`, result2, false}, | ||||
| 		{optionalGroup2, `{"group": {"col": null}}`, result4, false}, | ||||
| 		{optionalGroup2, `{"group": {"col": 10}}`, result5, false}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree) | ||||
| 		expectErr := (err != nil) | ||||
| 
 | ||||
| 		if testCase.expectErr != expectErr { | ||||
| 			t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) | ||||
| 		} | ||||
| 
 | ||||
| 		if !testCase.expectErr { | ||||
| 			if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 				t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,699 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package data | ||||
| 
 | ||||
| import ( | ||||
| 	"reflect" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" | ||||
| ) | ||||
| 
 | ||||
| func TestPopulateListOfList(t *testing.T) { | ||||
| 	requiredList1 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredList1.Set("col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList1.Set("col.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList1.Set("col.list.element", requiredElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList1.Set("col.list.element.list", subList); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList1.Set("col.list.element.list.element", requiredSubElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = requiredList1.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	requiredList2 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredList2.Set("col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList2.Set("col.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList2.Set("col.list.element", requiredElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList2.Set("col.list.element.list", subList); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList2.Set("col.list.element.list.element", optionalSubElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = requiredList2.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	requiredList3 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optioonalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredList3.Set("col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList3.Set("col.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList3.Set("col.list.element", optioonalElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList3.Set("col.list.element.list", subList); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList3.Set("col.list.element.list.element", requiredSubElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = requiredList3.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	requiredList4 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optioonalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredList4.Set("col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList4.Set("col.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList4.Set("col.list.element", optioonalElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList4.Set("col.list.element.list", subList); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList4.Set("col.list.element.list.element", optionalSubElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = requiredList4.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalList1 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalList1.Set("col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList1.Set("col.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList1.Set("col.list.element", requiredElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList1.Set("col.list.element.list", subList); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList1.Set("col.list.element.list.element", requiredSubElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = optionalList1.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalList2 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalList2.Set("col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList2.Set("col.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList2.Set("col.list.element", requiredElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList2.Set("col.list.element.list", subList); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList2.Set("col.list.element.list.element", optionalSubElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = optionalList2.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalList3 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optioonalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalList3.Set("col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList3.Set("col.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList3.Set("col.list.element", optioonalElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList3.Set("col.list.element.list", subList); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList3.Set("col.list.element.list.element", requiredSubElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = optionalList3.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalList4 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optioonalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalList4.Set("col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList4.Set("col.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList4.Set("col.list.element", optioonalElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList4.Set("col.list.element.list", subList); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList4.Set("col.list.element.list.element", optionalSubElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = optionalList4.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	result1 := map[string]*Column{ | ||||
| 		"col.list.element.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result2 := map[string]*Column{ | ||||
| 		"col.list.element.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10, v20, v30, v10, v20, v10, v30}, | ||||
| 			definitionLevels: []int64{2, 2, 2, 2, 2, 2, 2}, | ||||
| 			repetitionLevels: []int64{0, 2, 1, 2, 1, 2, 2}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      5, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v30, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result3 := map[string]*Column{ | ||||
| 		"col.list.element.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result4 := map[string]*Column{ | ||||
| 		"col.list.element.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{3}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result5 := map[string]*Column{ | ||||
| 		"col.list.element.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10, v20, v30, v10, v20, v10, v30}, | ||||
| 			definitionLevels: []int64{3, 3, 3, 3, 3, 3, 3}, | ||||
| 			repetitionLevels: []int64{0, 2, 1, 2, 1, 2, 2}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      5, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v30, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result6 := map[string]*Column{ | ||||
| 		"col.list.element.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{3}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result7 := map[string]*Column{ | ||||
| 		"col.list.element.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{4}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result8 := map[string]*Column{ | ||||
| 		"col.list.element.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10, v20, v30, v10, v20, v10, v30}, | ||||
| 			definitionLevels: []int64{4, 4, 4, 4, 4, 4, 4}, | ||||
| 			repetitionLevels: []int64{0, 2, 1, 2, 1, 2, 2}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      5, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v30, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result9 := map[string]*Column{ | ||||
| 		"col.list.element.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result10 := map[string]*Column{ | ||||
| 		"col.list.element.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{4}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result11 := map[string]*Column{ | ||||
| 		"col.list.element.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{5}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result12 := map[string]*Column{ | ||||
| 		"col.list.element.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10, v20, v30, v10, v20, v10, v30}, | ||||
| 			definitionLevels: []int64{5, 5, 5, 5, 5, 5, 5}, | ||||
| 			repetitionLevels: []int64{0, 2, 1, 2, 1, 2, 2}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      5, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v30, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	testCases := []struct { | ||||
| 		schemaTree     *schema.Tree | ||||
| 		data           string | ||||
| 		expectedResult map[string]*Column | ||||
| 		expectErr      bool | ||||
| 	}{ | ||||
| 		{requiredList1, `{}`, nil, true},                // err: col: nil value for required field
 | ||||
| 		{requiredList1, `{"col": null}`, nil, true},     // err: col: nil value for required field
 | ||||
| 		{requiredList1, `{"col": [[null]]}`, nil, true}, // err: col.list.element.list.element: nil value for required field
 | ||||
| 		{requiredList1, `{"col": [[10]]}`, result1, false}, | ||||
| 		{requiredList1, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result2, false}, | ||||
| 		{requiredList2, `{}`, nil, true},            // err: col: nil value for required field
 | ||||
| 		{requiredList2, `{"col": null}`, nil, true}, // err: col: nil value for required field
 | ||||
| 		{requiredList2, `{"col": [[null]]}`, result3, false}, | ||||
| 		{requiredList2, `{"col": [[10]]}`, result4, false}, | ||||
| 		{requiredList2, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result5, false}, | ||||
| 		{requiredList3, `{}`, nil, true},                // err: col: nil value for required field
 | ||||
| 		{requiredList3, `{"col": null}`, nil, true},     // err: col: nil value for required field
 | ||||
| 		{requiredList3, `{"col": [[null]]}`, nil, true}, // err: col.list.element.list.element: nil value for required field
 | ||||
| 		{requiredList3, `{"col": [[10]]}`, result4, false}, | ||||
| 		{requiredList3, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result5, false}, | ||||
| 		{requiredList4, `{}`, nil, true},            // err: col: nil value for required field
 | ||||
| 		{requiredList4, `{"col": null}`, nil, true}, // err: col: nil value for required field
 | ||||
| 		{requiredList4, `{"col": [[null]]}`, result6, false}, | ||||
| 		{requiredList4, `{"col": [[10]]}`, result7, false}, | ||||
| 		{requiredList4, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result8, false}, | ||||
| 		{optionalList1, `{}`, result9, false}, | ||||
| 		{optionalList1, `{"col": null}`, result9, false}, | ||||
| 		{optionalList1, `{"col": [[null]]}`, nil, true}, // err: col.list.element.list.element: nil value for required field
 | ||||
| 		{optionalList1, `{"col": [[10]]}`, result4, false}, | ||||
| 		{optionalList1, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result5, false}, | ||||
| 		{optionalList2, `{}`, result9, false}, | ||||
| 		{optionalList2, `{"col": null}`, result9, false}, | ||||
| 		{optionalList2, `{"col": [[null]]}`, result6, false}, | ||||
| 		{optionalList2, `{"col": [[10]]}`, result7, false}, | ||||
| 		{optionalList2, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result8, false}, | ||||
| 		{optionalList3, `{}`, result9, false}, | ||||
| 		{optionalList3, `{"col": null}`, result9, false}, | ||||
| 		{optionalList3, `{"col": [[null]]}`, nil, true}, // err: col.list.element.list.element: nil value for required field
 | ||||
| 		{optionalList3, `{"col": [[10]]}`, result7, false}, | ||||
| 		{optionalList3, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result8, false}, | ||||
| 		{optionalList4, `{}`, result9, false}, | ||||
| 		{optionalList4, `{"col": null}`, result9, false}, | ||||
| 		{optionalList4, `{"col": [[null]]}`, result10, false}, | ||||
| 		{optionalList4, `{"col": [[10]]}`, result11, false}, | ||||
| 		{optionalList4, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result12, false}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree) | ||||
| 		expectErr := (err != nil) | ||||
| 
 | ||||
| 		if testCase.expectErr != expectErr { | ||||
| 			t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) | ||||
| 		} | ||||
| 
 | ||||
| 		if !testCase.expectErr { | ||||
| 			if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 				t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,371 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package data | ||||
| 
 | ||||
| import ( | ||||
| 	"reflect" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" | ||||
| ) | ||||
| 
 | ||||
| func TestPopulateMap(t *testing.T) { | ||||
| 	t.Skip("Broken") | ||||
| 	requiredMap1 := schema.NewTree() | ||||
| 	{ | ||||
| 		mapElement, err := schema.NewElement("map", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		keyValue, err := schema.NewElement("key_value", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredKey, err := schema.NewElement("key", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredValue, err := schema.NewElement("value", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredMap1.Set("map", mapElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredMap1.Set("map.key_value", keyValue); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredMap1.Set("map.key_value.key", requiredKey); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredMap1.Set("map.key_value.value", requiredValue); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = requiredMap1.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	requiredMap2 := schema.NewTree() | ||||
| 	{ | ||||
| 		mapElement, err := schema.NewElement("map", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		keyValue, err := schema.NewElement("key_value", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredKey, err := schema.NewElement("key", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalValue, err := schema.NewElement("value", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredMap2.Set("map", mapElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredMap2.Set("map.key_value", keyValue); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredMap2.Set("map.key_value.key", requiredKey); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredMap2.Set("map.key_value.value", optionalValue); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = requiredMap2.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalMap1 := schema.NewTree() | ||||
| 	{ | ||||
| 		mapElement, err := schema.NewElement("map", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		keyValue, err := schema.NewElement("key_value", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredKey, err := schema.NewElement("key", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredValue, err := schema.NewElement("value", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalMap1.Set("map", mapElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalMap1.Set("map.key_value", keyValue); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalMap1.Set("map.key_value.key", requiredKey); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalMap1.Set("map.key_value.value", requiredValue); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = optionalMap1.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalMap2 := schema.NewTree() | ||||
| 	{ | ||||
| 		mapElement, err := schema.NewElement("map", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		keyValue, err := schema.NewElement("key_value", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredKey, err := schema.NewElement("key", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalValue, err := schema.NewElement("value", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalMap2.Set("map", mapElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalMap2.Set("map.key_value", keyValue); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalMap2.Set("map.key_value.key", requiredKey); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalMap2.Set("map.key_value.value", optionalValue); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = optionalMap2.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	result1 := map[string]*Column{ | ||||
| 		"map.key_value.key": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{ten}, | ||||
| 			definitionLevels: []int64{1}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"map.key_value.value": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{1}, | ||||
| 			repetitionLevels: []int64{1}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result2 := map[string]*Column{ | ||||
| 		"map.key_value.key": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{ten}, | ||||
| 			definitionLevels: []int64{1}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"map.key_value.value": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{1}, | ||||
| 			repetitionLevels: []int64{1}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result3 := map[string]*Column{ | ||||
| 		"map.key_value.key": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{ten}, | ||||
| 			definitionLevels: []int64{1}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"map.key_value.value": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{1}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result4 := map[string]*Column{ | ||||
| 		"map.key_value.key": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result5 := map[string]*Column{ | ||||
| 		"map.key_value.key": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{ten}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"map.key_value.value": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{1}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result6 := map[string]*Column{ | ||||
| 		"map.key_value.key": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{ten}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"map.key_value.value": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{1}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result7 := map[string]*Column{ | ||||
| 		"map.key_value.key": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{ten}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"map.key_value.value": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{3}, | ||||
| 			repetitionLevels: []int64{1}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	testCases := []struct { | ||||
| 		schemaTree     *schema.Tree | ||||
| 		data           string | ||||
| 		expectedResult map[string]*Column | ||||
| 		expectErr      bool | ||||
| 	}{ | ||||
| 		{requiredMap1, `{}`, nil, true},                     // err: map: nil value for required field
 | ||||
| 		{requiredMap1, `{"map": null}`, nil, true},          // err: map: nil value for required field
 | ||||
| 		{requiredMap1, `{"map": {"ten": null}}`, nil, true}, // err: map.key_value.value: nil value for required field
 | ||||
| 		{requiredMap1, `{"map": {"ten": 10}}`, result1, false}, | ||||
| 		{requiredMap2, `{}`, nil, true},            // err: map: nil value for required field
 | ||||
| 		{requiredMap2, `{"map": null}`, nil, true}, // err: map: nil value for required field
 | ||||
| 		{requiredMap2, `{"map": {"ten": null}}`, result2, false}, | ||||
| 		{requiredMap2, `{"map": {"ten": 10}}`, result3, false}, | ||||
| 		{optionalMap1, `{}`, result4, false}, | ||||
| 		{optionalMap1, `{"map": null}`, result4, false}, | ||||
| 		{optionalMap1, `{"map": {"ten": null}}`, nil, true}, // err: map.key_value.value: nil value for required field
 | ||||
| 		{optionalMap1, `{"map": {"ten": 10}}`, result5, false}, | ||||
| 		{optionalMap2, `{}`, result4, false}, | ||||
| 		{optionalMap2, `{"map": null}`, result4, false}, | ||||
| 		{optionalMap2, `{"map": {"ten": null}}`, result6, false}, | ||||
| 		{optionalMap2, `{"map": {"ten": 10}}`, result7, false}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree) | ||||
| 		expectErr := (err != nil) | ||||
| 
 | ||||
| 		if testCase.expectErr != expectErr { | ||||
| 			t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) | ||||
| 		} | ||||
| 
 | ||||
| 		if !testCase.expectErr { | ||||
| 			if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 				t.Errorf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,331 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package data | ||||
| 
 | ||||
| import ( | ||||
| 	"reflect" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" | ||||
| ) | ||||
| 
 | ||||
| func TestPopulatePrimitiveList(t *testing.T) { | ||||
| 	requiredList1 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredList1.Set("col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList1.Set("col.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList1.Set("col.list.element", requiredElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = requiredList1.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	requiredList2 := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredList2.Set("col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList2.Set("col.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = requiredList2.Set("col.list.element", optionalElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = requiredList2.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalList1 := schema.NewTree() | ||||
| 	{ | ||||
| 		optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalList1.Set("col", optionalCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList1.Set("col.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList1.Set("col.list.element", requiredElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = optionalList1.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalList2 := schema.NewTree() | ||||
| 	{ | ||||
| 		optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalList2.Set("col", optionalCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList2.Set("col.list", list); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = optionalList2.Set("col.list.element", optionalElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = optionalList2.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	result1 := map[string]*Column{ | ||||
| 		"col.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{1}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result2 := map[string]*Column{ | ||||
| 		"col.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10, v20, v30}, | ||||
| 			definitionLevels: []int64{1, 1, 1}, | ||||
| 			repetitionLevels: []int64{0, 1, 1}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      5, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v30, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result3 := map[string]*Column{ | ||||
| 		"col.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{1}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result4 := map[string]*Column{ | ||||
| 		"col.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result5 := map[string]*Column{ | ||||
| 		"col.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10, v20, v30}, | ||||
| 			definitionLevels: []int64{2, 2, 2}, | ||||
| 			repetitionLevels: []int64{0, 1, 1}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      5, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v30, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result6 := map[string]*Column{ | ||||
| 		"col.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result7 := map[string]*Column{ | ||||
| 		"col.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result8 := map[string]*Column{ | ||||
| 		"col.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{3}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result9 := map[string]*Column{ | ||||
| 		"col.list.element": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10, v20, v30}, | ||||
| 			definitionLevels: []int64{3, 3, 3}, | ||||
| 			repetitionLevels: []int64{0, 1, 1}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      5, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v30, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	testCases := []struct { | ||||
| 		schemaTree     *schema.Tree | ||||
| 		data           string | ||||
| 		expectedResult map[string]*Column | ||||
| 		expectErr      bool | ||||
| 	}{ | ||||
| 		{requiredList1, `{}`, nil, true},              // err: col: nil value for required field
 | ||||
| 		{requiredList1, `{"col": null}`, nil, true},   // err: col: nil value for required field
 | ||||
| 		{requiredList1, `{"col": [null]}`, nil, true}, // err: col.list.element: nil value for required field
 | ||||
| 		{requiredList1, `{"col": [10]}`, result1, false}, | ||||
| 		{requiredList1, `{"col": [10, 20, 30]}`, result2, false}, | ||||
| 		{requiredList2, `{}`, nil, true},            // err: col: nil value for required field
 | ||||
| 		{requiredList2, `{"col": null}`, nil, true}, // err: col: nil value for required field
 | ||||
| 		{requiredList2, `{"col": [null]}`, result3, false}, | ||||
| 		{requiredList2, `{"col": [10]}`, result4, false}, | ||||
| 		{requiredList2, `{"col": [10, 20, 30]}`, result5, false}, | ||||
| 		{optionalList1, `{}`, result6, false}, | ||||
| 		{optionalList1, `{"col": null}`, result6, false}, | ||||
| 		{optionalList1, `{"col": [null]}`, nil, true}, // err: col.list.element: nil value for required field
 | ||||
| 		{optionalList1, `{"col": [10]}`, result4, false}, | ||||
| 		{optionalList1, `{"col": [10, 20, 30]}`, result5, false}, | ||||
| 		{optionalList2, `{}`, result6, false}, | ||||
| 		{optionalList2, `{"col": null}`, result6, false}, | ||||
| 		{optionalList2, `{"col": [null]}`, result7, false}, | ||||
| 		{optionalList2, `{"col": [10]}`, result8, false}, | ||||
| 		{optionalList2, `{"col": [10, 20, 30]}`, result9, false}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree) | ||||
| 		expectErr := (err != nil) | ||||
| 
 | ||||
| 		if testCase.expectErr != expectErr { | ||||
| 			t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) | ||||
| 		} | ||||
| 
 | ||||
| 		if !testCase.expectErr { | ||||
| 			if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 				t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,129 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package data | ||||
| 
 | ||||
| import ( | ||||
| 	"reflect" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" | ||||
| ) | ||||
| 
 | ||||
| func TestPopulatePrimitiveType(t *testing.T) { | ||||
| 	requiredField := schema.NewTree() | ||||
| 	{ | ||||
| 		requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = requiredField.Set("col", requiredCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = requiredField.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	optionalField := schema.NewTree() | ||||
| 	{ | ||||
| 		optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = optionalField.Set("col", optionalCol); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, _, err = optionalField.ToParquetSchema(); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	result1 := map[string]*Column{ | ||||
| 		"col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result2 := map[string]*Column{ | ||||
| 		"col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	result3 := map[string]*Column{ | ||||
| 		"col": { | ||||
| 			parquetType:      parquet.Type_INT32, | ||||
| 			values:           []interface{}{v10}, | ||||
| 			definitionLevels: []int64{1}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 			rowCount:         1, | ||||
| 			maxBitWidth:      4, | ||||
| 			minValue:         v10, | ||||
| 			maxValue:         v10, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	testCases := []struct { | ||||
| 		schemaTree     *schema.Tree | ||||
| 		data           string | ||||
| 		expectedResult map[string]*Column | ||||
| 		expectErr      bool | ||||
| 	}{ | ||||
| 		{requiredField, `{}`, nil, true}, | ||||
| 		{requiredField, `{"col": null}`, nil, true}, // err: col: nil value for required field
 | ||||
| 		{requiredField, `{"col": 10}`, result1, false}, | ||||
| 		{optionalField, `{}`, result2, false}, | ||||
| 		{optionalField, `{"col": null}`, result2, false}, | ||||
| 		{optionalField, `{"col": 10}`, result3, false}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree) | ||||
| 		expectErr := (err != nil) | ||||
| 
 | ||||
| 		if testCase.expectErr != expectErr { | ||||
| 			t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) | ||||
| 		} | ||||
| 
 | ||||
| 		if !testCase.expectErr { | ||||
| 			if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 				t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,681 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package data | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"git.apache.org/thrift.git/lib/go/thrift" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/common" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/encoding" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" | ||||
| 	"github.com/tidwall/gjson" | ||||
| 	"github.com/tidwall/sjson" | ||||
| ) | ||||
| 
 | ||||
| func getDefaultEncoding(parquetType parquet.Type) parquet.Encoding { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_BOOLEAN: | ||||
| 		return parquet.Encoding_PLAIN | ||||
| 	case parquet.Type_INT32, parquet.Type_INT64, parquet.Type_FLOAT, parquet.Type_DOUBLE: | ||||
| 		return parquet.Encoding_RLE_DICTIONARY | ||||
| 	case parquet.Type_BYTE_ARRAY: | ||||
| 		return parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY | ||||
| 	} | ||||
| 
 | ||||
| 	return parquet.Encoding_PLAIN | ||||
| } | ||||
| 
 | ||||
| func getFirstValueElement(tree *schema.Tree) (valueElement *schema.Element) { | ||||
| 	tree.Range(func(name string, element *schema.Element) bool { | ||||
| 		if element.Children == nil { | ||||
| 			valueElement = element | ||||
| 		} else { | ||||
| 			valueElement = getFirstValueElement(element.Children) | ||||
| 		} | ||||
| 
 | ||||
| 		return false | ||||
| 	}) | ||||
| 
 | ||||
| 	return valueElement | ||||
| } | ||||
| 
 | ||||
| func populate(columnDataMap map[string]*Column, input *jsonValue, tree *schema.Tree, firstValueRL int64) (map[string]*Column, error) { | ||||
| 	var err error | ||||
| 
 | ||||
| 	pos := 0 | ||||
| 	handleElement := func(name string, element *schema.Element) bool { | ||||
| 		pos++ | ||||
| 
 | ||||
| 		dataPath := element.PathInTree | ||||
| 
 | ||||
| 		if *element.RepetitionType == parquet.FieldRepetitionType_REPEATED { | ||||
| 			panic(fmt.Errorf("%v: repetition type must be REQUIRED or OPTIONAL type", dataPath)) | ||||
| 		} | ||||
| 
 | ||||
| 		inputValue := input.Get(name) | ||||
| 		if *element.RepetitionType == parquet.FieldRepetitionType_REQUIRED && inputValue.IsNull() { | ||||
| 			err = fmt.Errorf("%v: nil value for required field", dataPath) | ||||
| 			return false | ||||
| 		} | ||||
| 
 | ||||
| 		add := func(element *schema.Element, value interface{}, DL, RL int64) { | ||||
| 			columnData := columnDataMap[element.PathInSchema] | ||||
| 			if columnData == nil { | ||||
| 				columnData = NewColumn(*element.Type) | ||||
| 			} | ||||
| 			columnData.add(value, DL, RL) | ||||
| 			columnDataMap[element.PathInSchema] = columnData | ||||
| 		} | ||||
| 
 | ||||
| 		// Handle primitive type element.
 | ||||
| 		if element.Type != nil { | ||||
| 			var value interface{} | ||||
| 			if value, err = inputValue.GetValue(*element.Type, element.ConvertedType); err != nil { | ||||
| 				return false | ||||
| 			} | ||||
| 
 | ||||
| 			DL := element.MaxDefinitionLevel | ||||
| 			if value == nil && DL > 0 { | ||||
| 				DL-- | ||||
| 			} | ||||
| 
 | ||||
| 			RL := element.MaxRepetitionLevel | ||||
| 			if pos == 1 { | ||||
| 				RL = firstValueRL | ||||
| 			} | ||||
| 
 | ||||
| 			add(element, value, DL, RL) | ||||
| 			return true | ||||
| 		} | ||||
| 
 | ||||
| 		addNull := func() { | ||||
| 			valueElement := getFirstValueElement(element.Children) | ||||
| 
 | ||||
| 			DL := element.MaxDefinitionLevel | ||||
| 			if DL > 0 { | ||||
| 				DL-- | ||||
| 			} | ||||
| 
 | ||||
| 			RL := element.MaxRepetitionLevel | ||||
| 			if RL > 0 { | ||||
| 				RL-- | ||||
| 			} | ||||
| 
 | ||||
| 			add(valueElement, nil, DL, RL) | ||||
| 		} | ||||
| 
 | ||||
| 		// Handle group type element.
 | ||||
| 		if element.ConvertedType == nil { | ||||
| 			if inputValue.IsNull() { | ||||
| 				addNull() | ||||
| 				return true | ||||
| 			} | ||||
| 
 | ||||
| 			columnDataMap, err = populate(columnDataMap, inputValue, element.Children, firstValueRL) | ||||
| 			return (err == nil) | ||||
| 		} | ||||
| 
 | ||||
| 		// Handle list type element.
 | ||||
| 		if *element.ConvertedType == parquet.ConvertedType_LIST { | ||||
| 			if inputValue.IsNull() { | ||||
| 				addNull() | ||||
| 				return true | ||||
| 			} | ||||
| 
 | ||||
| 			var results []gjson.Result | ||||
| 			if results, err = inputValue.GetArray(); err != nil { | ||||
| 				return false | ||||
| 			} | ||||
| 
 | ||||
| 			listElement, _ := element.Children.Get("list") | ||||
| 			valueElement, _ := listElement.Children.Get("element") | ||||
| 			for i := range results { | ||||
| 				rl := valueElement.MaxRepetitionLevel | ||||
| 				if i == 0 { | ||||
| 					rl = firstValueRL | ||||
| 				} | ||||
| 
 | ||||
| 				var jsonData []byte | ||||
| 				if jsonData, err = sjson.SetBytes([]byte{}, "element", results[i].Value()); err != nil { | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				var jv *jsonValue | ||||
| 				if jv, err = bytesToJSONValue(jsonData); err != nil { | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if columnDataMap, err = populate(columnDataMap, jv, listElement.Children, rl); err != nil { | ||||
| 					return false | ||||
| 				} | ||||
| 			} | ||||
| 			return true | ||||
| 		} | ||||
| 
 | ||||
| 		if *element.ConvertedType == parquet.ConvertedType_MAP { | ||||
| 			if inputValue.IsNull() { | ||||
| 				addNull() | ||||
| 				return true | ||||
| 			} | ||||
| 
 | ||||
| 			keyValueElement, _ := element.Children.Get("key_value") | ||||
| 			var rerr error | ||||
| 			err = inputValue.Range(func(key, value gjson.Result) bool { | ||||
| 				if !key.Exists() || key.Type == gjson.Null { | ||||
| 					rerr = fmt.Errorf("%v.key_value.key: not found or null", dataPath) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				var jsonData []byte | ||||
| 				if jsonData, rerr = sjson.SetBytes([]byte{}, "key", key.Value()); rerr != nil { | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if jsonData, rerr = sjson.SetBytes(jsonData, "value", value.Value()); rerr != nil { | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				var jv *jsonValue | ||||
| 				if jv, rerr = bytesToJSONValue(jsonData); rerr != nil { | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if columnDataMap, rerr = populate(columnDataMap, jv, keyValueElement.Children, firstValueRL); rerr != nil { | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				return true | ||||
| 			}) | ||||
| 
 | ||||
| 			if err != nil { | ||||
| 				return false | ||||
| 			} | ||||
| 
 | ||||
| 			err = rerr | ||||
| 			return (err == nil) | ||||
| 		} | ||||
| 
 | ||||
| 		err = fmt.Errorf("%v: unsupported converted type %v in %v field type", dataPath, *element.ConvertedType, *element.RepetitionType) | ||||
| 		return false | ||||
| 	} | ||||
| 
 | ||||
| 	tree.Range(handleElement) | ||||
| 	return columnDataMap, err | ||||
| } | ||||
| 
 | ||||
| // Column - denotes values of a column.
 | ||||
| type Column struct { | ||||
| 	parquetType      parquet.Type  // value type.
 | ||||
| 	values           []interface{} // must be a slice of parquet typed values.
 | ||||
| 	definitionLevels []int64       // exactly same length of values.
 | ||||
| 	repetitionLevels []int64       // exactly same length of values.
 | ||||
| 	rowCount         int32 | ||||
| 	maxBitWidth      int32 | ||||
| 	minValue         interface{} | ||||
| 	maxValue         interface{} | ||||
| } | ||||
| 
 | ||||
| func (column *Column) updateMinMaxValue(value interface{}) { | ||||
| 	if column.minValue == nil && column.maxValue == nil { | ||||
| 		column.minValue = value | ||||
| 		column.maxValue = value | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	switch column.parquetType { | ||||
| 	case parquet.Type_BOOLEAN: | ||||
| 		if column.minValue.(bool) && !value.(bool) { | ||||
| 			column.minValue = value | ||||
| 		} | ||||
| 
 | ||||
| 		if !column.maxValue.(bool) && value.(bool) { | ||||
| 			column.maxValue = value | ||||
| 		} | ||||
| 
 | ||||
| 	case parquet.Type_INT32: | ||||
| 		if column.minValue.(int32) > value.(int32) { | ||||
| 			column.minValue = value | ||||
| 		} | ||||
| 
 | ||||
| 		if column.maxValue.(int32) < value.(int32) { | ||||
| 			column.maxValue = value | ||||
| 		} | ||||
| 
 | ||||
| 	case parquet.Type_INT64: | ||||
| 		if column.minValue.(int64) > value.(int64) { | ||||
| 			column.minValue = value | ||||
| 		} | ||||
| 
 | ||||
| 		if column.maxValue.(int64) < value.(int64) { | ||||
| 			column.maxValue = value | ||||
| 		} | ||||
| 
 | ||||
| 	case parquet.Type_FLOAT: | ||||
| 		if column.minValue.(float32) > value.(float32) { | ||||
| 			column.minValue = value | ||||
| 		} | ||||
| 
 | ||||
| 		if column.maxValue.(float32) < value.(float32) { | ||||
| 			column.maxValue = value | ||||
| 		} | ||||
| 
 | ||||
| 	case parquet.Type_DOUBLE: | ||||
| 		if column.minValue.(float64) > value.(float64) { | ||||
| 			column.minValue = value | ||||
| 		} | ||||
| 
 | ||||
| 		if column.maxValue.(float64) < value.(float64) { | ||||
| 			column.maxValue = value | ||||
| 		} | ||||
| 
 | ||||
| 	case parquet.Type_BYTE_ARRAY: | ||||
| 		if bytes.Compare(column.minValue.([]byte), value.([]byte)) > 0 { | ||||
| 			column.minValue = value | ||||
| 		} | ||||
| 
 | ||||
| 		if bytes.Compare(column.minValue.([]byte), value.([]byte)) < 0 { | ||||
| 			column.maxValue = value | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (column *Column) updateStats(value interface{}, DL, RL int64) { | ||||
| 	if RL == 0 { | ||||
| 		column.rowCount++ | ||||
| 	} | ||||
| 
 | ||||
| 	if value == nil { | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	var bitWidth int32 | ||||
| 	switch column.parquetType { | ||||
| 	case parquet.Type_BOOLEAN: | ||||
| 		bitWidth = 1 | ||||
| 	case parquet.Type_INT32: | ||||
| 		bitWidth = common.BitWidth(uint64(value.(int32))) | ||||
| 	case parquet.Type_INT64: | ||||
| 		bitWidth = common.BitWidth(uint64(value.(int64))) | ||||
| 	case parquet.Type_FLOAT: | ||||
| 		bitWidth = 32 | ||||
| 	case parquet.Type_DOUBLE: | ||||
| 		bitWidth = 64 | ||||
| 	case parquet.Type_BYTE_ARRAY: | ||||
| 		bitWidth = int32(len(value.([]byte))) | ||||
| 	} | ||||
| 	if column.maxBitWidth < bitWidth { | ||||
| 		column.maxBitWidth = bitWidth | ||||
| 	} | ||||
| 
 | ||||
| 	column.updateMinMaxValue(value) | ||||
| } | ||||
| 
 | ||||
| func (column *Column) add(value interface{}, DL, RL int64) { | ||||
| 	column.values = append(column.values, value) | ||||
| 	column.definitionLevels = append(column.definitionLevels, DL) | ||||
| 	column.repetitionLevels = append(column.repetitionLevels, RL) | ||||
| 	column.updateStats(value, DL, RL) | ||||
| } | ||||
| 
 | ||||
| // AddNull - adds nil value.
 | ||||
| func (column *Column) AddNull(DL, RL int64) { | ||||
| 	column.add(nil, DL, RL) | ||||
| } | ||||
| 
 | ||||
| // AddBoolean - adds boolean value.
 | ||||
| func (column *Column) AddBoolean(value bool, DL, RL int64) { | ||||
| 	if column.parquetType != parquet.Type_BOOLEAN { | ||||
| 		panic(fmt.Errorf("expected %v value", column.parquetType)) | ||||
| 	} | ||||
| 
 | ||||
| 	column.add(value, DL, RL) | ||||
| } | ||||
| 
 | ||||
| // AddInt32 - adds int32 value.
 | ||||
| func (column *Column) AddInt32(value int32, DL, RL int64) { | ||||
| 	if column.parquetType != parquet.Type_INT32 { | ||||
| 		panic(fmt.Errorf("expected %v value", column.parquetType)) | ||||
| 	} | ||||
| 
 | ||||
| 	column.add(value, DL, RL) | ||||
| } | ||||
| 
 | ||||
| // AddInt64 - adds int64 value.
 | ||||
| func (column *Column) AddInt64(value int64, DL, RL int64) { | ||||
| 	if column.parquetType != parquet.Type_INT64 { | ||||
| 		panic(fmt.Errorf("expected %v value", column.parquetType)) | ||||
| 	} | ||||
| 
 | ||||
| 	column.add(value, DL, RL) | ||||
| } | ||||
| 
 | ||||
| // AddFloat - adds float32 value.
 | ||||
| func (column *Column) AddFloat(value float32, DL, RL int64) { | ||||
| 	if column.parquetType != parquet.Type_FLOAT { | ||||
| 		panic(fmt.Errorf("expected %v value", column.parquetType)) | ||||
| 	} | ||||
| 
 | ||||
| 	column.add(value, DL, RL) | ||||
| } | ||||
| 
 | ||||
| // AddDouble - adds float64 value.
 | ||||
| func (column *Column) AddDouble(value float64, DL, RL int64) { | ||||
| 	if column.parquetType != parquet.Type_DOUBLE { | ||||
| 		panic(fmt.Errorf("expected %v value", column.parquetType)) | ||||
| 	} | ||||
| 
 | ||||
| 	column.add(value, DL, RL) | ||||
| } | ||||
| 
 | ||||
| // AddByteArray - adds byte array value.
 | ||||
| func (column *Column) AddByteArray(value []byte, DL, RL int64) { | ||||
| 	if column.parquetType != parquet.Type_BYTE_ARRAY { | ||||
| 		panic(fmt.Errorf("expected %v value", column.parquetType)) | ||||
| 	} | ||||
| 
 | ||||
| 	column.add(value, DL, RL) | ||||
| } | ||||
| 
 | ||||
| // Merge - merges columns.
 | ||||
| func (column *Column) Merge(column2 *Column) { | ||||
| 	if column.parquetType != column2.parquetType { | ||||
| 		panic(fmt.Errorf("merge differs in parquet type")) | ||||
| 	} | ||||
| 
 | ||||
| 	column.values = append(column.values, column2.values...) | ||||
| 	column.definitionLevels = append(column.definitionLevels, column2.definitionLevels...) | ||||
| 	column.repetitionLevels = append(column.repetitionLevels, column2.repetitionLevels...) | ||||
| 
 | ||||
| 	column.rowCount += column2.rowCount | ||||
| 	if column.maxBitWidth < column2.maxBitWidth { | ||||
| 		column.maxBitWidth = column2.maxBitWidth | ||||
| 	} | ||||
| 
 | ||||
| 	column.updateMinMaxValue(column2.minValue) | ||||
| 	column.updateMinMaxValue(column2.maxValue) | ||||
| } | ||||
| 
 | ||||
| func (column *Column) String() string { | ||||
| 	var strs []string | ||||
| 	strs = append(strs, fmt.Sprintf("parquetType: %v", column.parquetType)) | ||||
| 	strs = append(strs, fmt.Sprintf("values: %v", column.values)) | ||||
| 	strs = append(strs, fmt.Sprintf("definitionLevels: %v", column.definitionLevels)) | ||||
| 	strs = append(strs, fmt.Sprintf("repetitionLevels: %v", column.repetitionLevels)) | ||||
| 	strs = append(strs, fmt.Sprintf("rowCount: %v", column.rowCount)) | ||||
| 	strs = append(strs, fmt.Sprintf("maxBitWidth: %v", column.maxBitWidth)) | ||||
| 	strs = append(strs, fmt.Sprintf("minValue: %v", column.minValue)) | ||||
| 	strs = append(strs, fmt.Sprintf("maxValue: %v", column.maxValue)) | ||||
| 	return "{" + strings.Join(strs, ", ") + "}" | ||||
| } | ||||
| 
 | ||||
| func (column *Column) encodeValue(value interface{}, element *schema.Element) []byte { | ||||
| 	if value == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	valueData := encoding.PlainEncode(common.ToSliceValue([]interface{}{value}, column.parquetType), column.parquetType) | ||||
| 	if column.parquetType == parquet.Type_BYTE_ARRAY && element.ConvertedType != nil { | ||||
| 		switch *element.ConvertedType { | ||||
| 		case parquet.ConvertedType_UTF8, parquet.ConvertedType_DECIMAL: | ||||
| 			valueData = valueData[4:] | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return valueData | ||||
| } | ||||
| 
 | ||||
| func (column *Column) toDataPageV2(element *schema.Element, parquetEncoding parquet.Encoding) *ColumnChunk { | ||||
| 	var definedValues []interface{} | ||||
| 	for _, value := range column.values { | ||||
| 		if value != nil { | ||||
| 			definedValues = append(definedValues, value) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	var encodedData []byte | ||||
| 	switch parquetEncoding { | ||||
| 	case parquet.Encoding_PLAIN: | ||||
| 		encodedData = encoding.PlainEncode(common.ToSliceValue(definedValues, column.parquetType), column.parquetType) | ||||
| 
 | ||||
| 	case parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY: | ||||
| 		var bytesSlices [][]byte | ||||
| 		for _, value := range column.values { | ||||
| 			bytesSlices = append(bytesSlices, value.([]byte)) | ||||
| 		} | ||||
| 		encodedData = encoding.DeltaLengthByteArrayEncode(bytesSlices) | ||||
| 	} | ||||
| 
 | ||||
| 	compressionType := parquet.CompressionCodec_SNAPPY | ||||
| 	if element.CompressionType != nil { | ||||
| 		compressionType = *element.CompressionType | ||||
| 	} | ||||
| 
 | ||||
| 	compressedData, err := common.Compress(compressionType, encodedData) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 
 | ||||
| 	DLData := encoding.RLEBitPackedHybridEncode( | ||||
| 		column.definitionLevels, | ||||
| 		common.BitWidth(uint64(element.MaxDefinitionLevel)), | ||||
| 		parquet.Type_INT64, | ||||
| 	) | ||||
| 
 | ||||
| 	RLData := encoding.RLEBitPackedHybridEncode( | ||||
| 		column.repetitionLevels, | ||||
| 		common.BitWidth(uint64(element.MaxRepetitionLevel)), | ||||
| 		parquet.Type_INT64, | ||||
| 	) | ||||
| 
 | ||||
| 	pageHeader := parquet.NewPageHeader() | ||||
| 	pageHeader.Type = parquet.PageType_DATA_PAGE_V2 | ||||
| 	pageHeader.CompressedPageSize = int32(len(compressedData) + len(DLData) + len(RLData)) | ||||
| 	pageHeader.UncompressedPageSize = int32(len(encodedData) + len(DLData) + len(RLData)) | ||||
| 	pageHeader.DataPageHeaderV2 = parquet.NewDataPageHeaderV2() | ||||
| 	pageHeader.DataPageHeaderV2.NumValues = int32(len(column.values)) | ||||
| 	pageHeader.DataPageHeaderV2.NumNulls = int32(len(column.values) - len(definedValues)) | ||||
| 	pageHeader.DataPageHeaderV2.NumRows = column.rowCount | ||||
| 	pageHeader.DataPageHeaderV2.Encoding = parquetEncoding | ||||
| 	pageHeader.DataPageHeaderV2.DefinitionLevelsByteLength = int32(len(DLData)) | ||||
| 	pageHeader.DataPageHeaderV2.RepetitionLevelsByteLength = int32(len(RLData)) | ||||
| 	pageHeader.DataPageHeaderV2.IsCompressed = true | ||||
| 	pageHeader.DataPageHeaderV2.Statistics = parquet.NewStatistics() | ||||
| 	pageHeader.DataPageHeaderV2.Statistics.Min = column.encodeValue(column.minValue, element) | ||||
| 	pageHeader.DataPageHeaderV2.Statistics.Max = column.encodeValue(column.maxValue, element) | ||||
| 
 | ||||
| 	ts := thrift.NewTSerializer() | ||||
| 	ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) | ||||
| 	rawData, err := ts.Write(context.TODO(), pageHeader) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 	rawData = append(rawData, RLData...) | ||||
| 	rawData = append(rawData, DLData...) | ||||
| 	rawData = append(rawData, compressedData...) | ||||
| 
 | ||||
| 	metadata := parquet.NewColumnMetaData() | ||||
| 	metadata.Type = column.parquetType | ||||
| 	metadata.Encodings = []parquet.Encoding{ | ||||
| 		parquet.Encoding_PLAIN, | ||||
| 		parquet.Encoding_RLE, | ||||
| 		parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY, | ||||
| 	} | ||||
| 	metadata.Codec = compressionType | ||||
| 	metadata.NumValues = int64(pageHeader.DataPageHeaderV2.NumValues) | ||||
| 	metadata.TotalCompressedSize = int64(len(rawData)) | ||||
| 	metadata.TotalUncompressedSize = int64(pageHeader.UncompressedPageSize) + int64(len(rawData)) - int64(pageHeader.CompressedPageSize) | ||||
| 	metadata.PathInSchema = strings.Split(element.PathInSchema, ".") | ||||
| 	metadata.Statistics = parquet.NewStatistics() | ||||
| 	metadata.Statistics.Min = pageHeader.DataPageHeaderV2.Statistics.Min | ||||
| 	metadata.Statistics.Max = pageHeader.DataPageHeaderV2.Statistics.Max | ||||
| 
 | ||||
| 	chunk := new(ColumnChunk) | ||||
| 	chunk.ColumnChunk.MetaData = metadata | ||||
| 	chunk.dataPageLen = int64(len(rawData)) | ||||
| 	chunk.dataLen = int64(len(rawData)) | ||||
| 	chunk.data = rawData | ||||
| 
 | ||||
| 	return chunk | ||||
| } | ||||
| 
 | ||||
| func (column *Column) toRLEDictPage(element *schema.Element) *ColumnChunk { | ||||
| 	dictPageData, dataPageData, dictValueCount, indexBitWidth := encoding.RLEDictEncode(column.values, column.parquetType, column.maxBitWidth) | ||||
| 
 | ||||
| 	compressionType := parquet.CompressionCodec_SNAPPY | ||||
| 	if element.CompressionType != nil { | ||||
| 		compressionType = *element.CompressionType | ||||
| 	} | ||||
| 
 | ||||
| 	compressedData, err := common.Compress(compressionType, dictPageData) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 
 | ||||
| 	dictPageHeader := parquet.NewPageHeader() | ||||
| 	dictPageHeader.Type = parquet.PageType_DICTIONARY_PAGE | ||||
| 	dictPageHeader.CompressedPageSize = int32(len(compressedData)) | ||||
| 	dictPageHeader.UncompressedPageSize = int32(len(dictPageData)) | ||||
| 	dictPageHeader.DictionaryPageHeader = parquet.NewDictionaryPageHeader() | ||||
| 	dictPageHeader.DictionaryPageHeader.NumValues = dictValueCount | ||||
| 	dictPageHeader.DictionaryPageHeader.Encoding = parquet.Encoding_PLAIN | ||||
| 
 | ||||
| 	ts := thrift.NewTSerializer() | ||||
| 	ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) | ||||
| 	dictPageRawData, err := ts.Write(context.TODO(), dictPageHeader) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 	dictPageRawData = append(dictPageRawData, compressedData...) | ||||
| 
 | ||||
| 	RLData := encoding.RLEBitPackedHybridEncode( | ||||
| 		column.repetitionLevels, | ||||
| 		common.BitWidth(uint64(element.MaxRepetitionLevel)), | ||||
| 		parquet.Type_INT64, | ||||
| 	) | ||||
| 	encodedData := RLData | ||||
| 
 | ||||
| 	DLData := encoding.RLEBitPackedHybridEncode( | ||||
| 		column.definitionLevels, | ||||
| 		common.BitWidth(uint64(element.MaxDefinitionLevel)), | ||||
| 		parquet.Type_INT64, | ||||
| 	) | ||||
| 	encodedData = append(encodedData, DLData...) | ||||
| 
 | ||||
| 	encodedData = append(encodedData, indexBitWidth) | ||||
| 	encodedData = append(encodedData, dataPageData...) | ||||
| 
 | ||||
| 	compressedData, err = common.Compress(compressionType, encodedData) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 
 | ||||
| 	dataPageHeader := parquet.NewPageHeader() | ||||
| 	dataPageHeader.Type = parquet.PageType_DATA_PAGE | ||||
| 	dataPageHeader.CompressedPageSize = int32(len(compressedData)) | ||||
| 	dataPageHeader.UncompressedPageSize = int32(len(encodedData)) | ||||
| 	dataPageHeader.DataPageHeader = parquet.NewDataPageHeader() | ||||
| 	dataPageHeader.DataPageHeader.NumValues = int32(len(column.values)) | ||||
| 	dataPageHeader.DataPageHeader.DefinitionLevelEncoding = parquet.Encoding_RLE | ||||
| 	dataPageHeader.DataPageHeader.RepetitionLevelEncoding = parquet.Encoding_RLE | ||||
| 	dataPageHeader.DataPageHeader.Encoding = parquet.Encoding_RLE_DICTIONARY | ||||
| 
 | ||||
| 	ts = thrift.NewTSerializer() | ||||
| 	ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) | ||||
| 	dataPageRawData, err := ts.Write(context.TODO(), dataPageHeader) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 	dataPageRawData = append(dataPageRawData, compressedData...) | ||||
| 
 | ||||
| 	metadata := parquet.NewColumnMetaData() | ||||
| 	metadata.Type = column.parquetType | ||||
| 	metadata.Encodings = []parquet.Encoding{ | ||||
| 		parquet.Encoding_PLAIN, | ||||
| 		parquet.Encoding_RLE, | ||||
| 		parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY, | ||||
| 		parquet.Encoding_RLE_DICTIONARY, | ||||
| 	} | ||||
| 	metadata.Codec = compressionType | ||||
| 	metadata.NumValues = int64(dataPageHeader.DataPageHeader.NumValues) | ||||
| 	metadata.TotalCompressedSize = int64(len(dictPageRawData)) + int64(len(dataPageRawData)) | ||||
| 	uncompressedSize := int64(dictPageHeader.UncompressedPageSize) + int64(len(dictPageData)) - int64(dictPageHeader.CompressedPageSize) | ||||
| 	uncompressedSize += int64(dataPageHeader.UncompressedPageSize) + int64(len(dataPageData)) - int64(dataPageHeader.CompressedPageSize) | ||||
| 	metadata.TotalUncompressedSize = uncompressedSize | ||||
| 	metadata.PathInSchema = strings.Split(element.PathInSchema, ".") | ||||
| 	metadata.Statistics = parquet.NewStatistics() | ||||
| 	metadata.Statistics.Min = column.encodeValue(column.minValue, element) | ||||
| 	metadata.Statistics.Max = column.encodeValue(column.maxValue, element) | ||||
| 
 | ||||
| 	chunk := new(ColumnChunk) | ||||
| 	chunk.ColumnChunk.MetaData = metadata | ||||
| 	chunk.isDictPage = true | ||||
| 	chunk.dictPageLen = int64(len(dictPageRawData)) | ||||
| 	chunk.dataPageLen = int64(len(dataPageRawData)) | ||||
| 	chunk.dataLen = chunk.dictPageLen + chunk.dataPageLen | ||||
| 	chunk.data = append(dictPageRawData, dataPageRawData...) | ||||
| 
 | ||||
| 	return chunk | ||||
| } | ||||
| 
 | ||||
| // Encode an element.
 | ||||
| func (column *Column) Encode(element *schema.Element) *ColumnChunk { | ||||
| 	parquetEncoding := getDefaultEncoding(column.parquetType) | ||||
| 	if element.Encoding != nil { | ||||
| 		parquetEncoding = *element.Encoding | ||||
| 	} | ||||
| 
 | ||||
| 	switch parquetEncoding { | ||||
| 	case parquet.Encoding_PLAIN, parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY: | ||||
| 		return column.toDataPageV2(element, parquetEncoding) | ||||
| 	} | ||||
| 
 | ||||
| 	return column.toRLEDictPage(element) | ||||
| } | ||||
| 
 | ||||
| // NewColumn - creates new column data
 | ||||
| func NewColumn(parquetType parquet.Type) *Column { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_BOOLEAN, parquet.Type_INT32, parquet.Type_INT64, parquet.Type_FLOAT, parquet.Type_DOUBLE, parquet.Type_BYTE_ARRAY: | ||||
| 	default: | ||||
| 		panic(fmt.Errorf("unsupported parquet type %v", parquetType)) | ||||
| 	} | ||||
| 
 | ||||
| 	return &Column{ | ||||
| 		parquetType: parquetType, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // UnmarshalJSON - decodes JSON data into map of Column.
 | ||||
| func UnmarshalJSON(data []byte, tree *schema.Tree) (map[string]*Column, error) { | ||||
| 	if !tree.ReadOnly() { | ||||
| 		return nil, fmt.Errorf("tree must be read only") | ||||
| 	} | ||||
| 
 | ||||
| 	inputValue, err := bytesToJSONValue(data) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	columnDataMap := make(map[string]*Column) | ||||
| 	return populate(columnDataMap, inputValue, tree, 0) | ||||
| } | ||||
|  | @ -1,370 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package data | ||||
| 
 | ||||
| import ( | ||||
| 	"reflect" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	v10    = int32(10) | ||||
| 	v20    = int32(20) | ||||
| 	v30    = int32(30) | ||||
| 	ten    = []byte("ten") | ||||
| 	foo    = []byte("foo") | ||||
| 	bar    = []byte("bar") | ||||
| 	phone1 = []byte("1-234-567-8901") | ||||
| 	phone2 = []byte("1-234-567-1098") | ||||
| 	phone3 = []byte("1-111-222-3333") | ||||
| ) | ||||
| 
 | ||||
| func TestAddressBookExample(t *testing.T) { | ||||
| 	// message AddressBook {
 | ||||
| 	//   required string owner;
 | ||||
| 	//   repeated string ownerPhoneNumbers;
 | ||||
| 	//   repeated group contacts {
 | ||||
| 	//     required string name;
 | ||||
| 	//     optional string phoneNumber;
 | ||||
| 	//   }
 | ||||
| 	// }
 | ||||
| 	t.Skip("Broken") | ||||
| 
 | ||||
| 	addressBook := schema.NewTree() | ||||
| 	{ | ||||
| 		owner, err := schema.NewElement("owner", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		ownerPhoneNumbers, err := schema.NewElement("ownerPhoneNumbers", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		ownerPhoneNumbersList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		ownerPhoneNumbersElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		contacts, err := schema.NewElement("contacts", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		contactsList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		contactsElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			nil, nil, | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		contactName, err := schema.NewElement("name", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		contactPhoneNumber, err := schema.NewElement("phoneNumber", parquet.FieldRepetitionType_OPTIONAL, | ||||
| 			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = addressBook.Set("owner", owner); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = addressBook.Set("ownerPhoneNumbers", ownerPhoneNumbers); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = addressBook.Set("ownerPhoneNumbers.list", ownerPhoneNumbersList); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = addressBook.Set("ownerPhoneNumbers.list.element", ownerPhoneNumbersElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err = addressBook.Set("contacts", contacts); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = addressBook.Set("contacts.list", contactsList); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = addressBook.Set("contacts.list.element", contactsElement); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = addressBook.Set("contacts.list.element.name", contactName); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err = addressBook.Set("contacts.list.element.phoneNumber", contactPhoneNumber); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if _, _, err := addressBook.ToParquetSchema(); err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	case2Data := `{ | ||||
|     "owner": "foo" | ||||
| }` | ||||
| 	result2 := map[string]*Column{ | ||||
| 		"owner": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{foo}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"ownerPhoneNumbers.list.element": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"contacts.list.element.name": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	case3Data := `{ | ||||
|     "owner": "foo", | ||||
|     "ownerPhoneNumbers": [ | ||||
|         "1-234-567-8901" | ||||
|     ] | ||||
| } | ||||
| ` | ||||
| 	result3 := map[string]*Column{ | ||||
| 		"owner": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{foo}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"ownerPhoneNumbers.list.element": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{phone1}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"contacts.list.element.name": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	case4Data := `{ | ||||
|     "owner": "foo", | ||||
|     "ownerPhoneNumbers": [ | ||||
|         "1-234-567-8901", | ||||
|         "1-234-567-1098" | ||||
|     ] | ||||
| } | ||||
| ` | ||||
| 	result4 := map[string]*Column{ | ||||
| 		"owner": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{foo}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"ownerPhoneNumbers.list.element": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{phone1, phone2}, | ||||
| 			definitionLevels: []int64{2, 2}, | ||||
| 			repetitionLevels: []int64{0, 1}, | ||||
| 		}, | ||||
| 		"contacts.list.element.name": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	case5Data := `{ | ||||
|     "contacts": [ | ||||
|         { | ||||
|             "name": "bar" | ||||
|         } | ||||
|     ], | ||||
|     "owner": "foo" | ||||
| }` | ||||
| 	result5 := map[string]*Column{ | ||||
| 		"owner": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{foo}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"ownerPhoneNumbers.list.element": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"contacts.list.element.name": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{bar}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"contacts.list.element.phoneNumber": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{1}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	case6Data := `{ | ||||
|     "contacts": [ | ||||
|         { | ||||
|             "name": "bar", | ||||
|             "phoneNumber": "1-111-222-3333" | ||||
|         } | ||||
|     ], | ||||
|     "owner": "foo" | ||||
| }` | ||||
| 	result6 := map[string]*Column{ | ||||
| 		"owner": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{foo}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"ownerPhoneNumbers.list.element": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{nil}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"contacts.list.element.name": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{bar}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"contacts.list.element.phoneNumber": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{phone3}, | ||||
| 			definitionLevels: []int64{3}, | ||||
| 			repetitionLevels: []int64{1}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	case7Data := `{ | ||||
|     "contacts": [ | ||||
|         { | ||||
|             "name": "bar", | ||||
|             "phoneNumber": "1-111-222-3333" | ||||
|         } | ||||
|     ], | ||||
|     "owner": "foo", | ||||
|     "ownerPhoneNumbers": [ | ||||
|         "1-234-567-8901", | ||||
|         "1-234-567-1098" | ||||
|     ] | ||||
| }` | ||||
| 	result7 := map[string]*Column{ | ||||
| 		"owner": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{foo}, | ||||
| 			definitionLevels: []int64{0}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"ownerPhoneNumbers.list.element": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{phone1, phone2}, | ||||
| 			definitionLevels: []int64{2, 2}, | ||||
| 			repetitionLevels: []int64{0, 1}, | ||||
| 		}, | ||||
| 		"contacts.list.element.name": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{bar}, | ||||
| 			definitionLevels: []int64{2}, | ||||
| 			repetitionLevels: []int64{0}, | ||||
| 		}, | ||||
| 		"contacts.list.element.phoneNumber": { | ||||
| 			parquetType:      parquet.Type_BYTE_ARRAY, | ||||
| 			values:           []interface{}{phone3}, | ||||
| 			definitionLevels: []int64{3}, | ||||
| 			repetitionLevels: []int64{1}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	testCases := []struct { | ||||
| 		data           string | ||||
| 		expectedResult map[string]*Column | ||||
| 		expectErr      bool | ||||
| 	}{ | ||||
| 		{`{}`, nil, true}, // err: owner: nil value for required field
 | ||||
| 		{case2Data, result2, false}, | ||||
| 		{case3Data, result3, false}, | ||||
| 		{case4Data, result4, false}, | ||||
| 		{case5Data, result5, false}, | ||||
| 		{case6Data, result6, false}, | ||||
| 		{case7Data, result7, false}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result, err := UnmarshalJSON([]byte(testCase.data), addressBook) | ||||
| 		expectErr := (err != nil) | ||||
| 
 | ||||
| 		if testCase.expectErr != expectErr { | ||||
| 			t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) | ||||
| 		} | ||||
| 
 | ||||
| 		if !testCase.expectErr { | ||||
| 			if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 				t.Errorf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,66 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package data | ||||
| 
 | ||||
| import ( | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| // ColumnChunk ...
 | ||||
| type ColumnChunk struct { | ||||
| 	parquet.ColumnChunk | ||||
| 	isDictPage  bool | ||||
| 	dictPageLen int64 | ||||
| 	dataPageLen int64 | ||||
| 	dataLen     int64 | ||||
| 	data        []byte | ||||
| } | ||||
| 
 | ||||
| // Data returns the data.
 | ||||
| func (chunk *ColumnChunk) Data() []byte { | ||||
| 	return chunk.data | ||||
| } | ||||
| 
 | ||||
| // DataLen returns the length of the data.
 | ||||
| func (chunk *ColumnChunk) DataLen() int64 { | ||||
| 	return chunk.dataLen | ||||
| } | ||||
| 
 | ||||
| // NewRowGroup creates a new row group.
 | ||||
| func NewRowGroup(chunks []*ColumnChunk, numRows, offset int64) *parquet.RowGroup { | ||||
| 	rows := parquet.NewRowGroup() | ||||
| 	rows.NumRows = numRows | ||||
| 
 | ||||
| 	for _, chunk := range chunks { | ||||
| 		rows.Columns = append(rows.Columns, &chunk.ColumnChunk) | ||||
| 		rows.TotalByteSize += chunk.dataLen | ||||
| 
 | ||||
| 		chunk.ColumnChunk.FileOffset = offset | ||||
| 
 | ||||
| 		if chunk.isDictPage { | ||||
| 			dictPageOffset := offset | ||||
| 			chunk.ColumnChunk.MetaData.DictionaryPageOffset = &dictPageOffset | ||||
| 			offset += chunk.dictPageLen | ||||
| 		} | ||||
| 
 | ||||
| 		chunk.ColumnChunk.MetaData.DataPageOffset = offset | ||||
| 		offset += chunk.dataPageLen | ||||
| 	} | ||||
| 
 | ||||
| 	return rows | ||||
| } | ||||
|  | @ -1,108 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package data | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/tidwall/gjson" | ||||
| ) | ||||
| 
 | ||||
| type jsonValue struct { | ||||
| 	result *gjson.Result | ||||
| 	path   *string | ||||
| } | ||||
| 
 | ||||
| func (v *jsonValue) String() string { | ||||
| 	if v.result == nil { | ||||
| 		return "<nil>" | ||||
| 	} | ||||
| 
 | ||||
| 	return fmt.Sprintf("%v", *v.result) | ||||
| } | ||||
| 
 | ||||
| func (v *jsonValue) IsNull() bool { | ||||
| 	return v.result == nil || v.result.Type == gjson.Null | ||||
| } | ||||
| 
 | ||||
| func (v *jsonValue) Get(path string) *jsonValue { | ||||
| 	if v.path != nil { | ||||
| 		var result *gjson.Result | ||||
| 		if *v.path == path { | ||||
| 			result = v.result | ||||
| 		} | ||||
| 
 | ||||
| 		return resultToJSONValue(result) | ||||
| 	} | ||||
| 
 | ||||
| 	if v.result == nil { | ||||
| 		return resultToJSONValue(nil) | ||||
| 	} | ||||
| 
 | ||||
| 	result := v.result.Get(path) | ||||
| 	if !result.Exists() { | ||||
| 		return resultToJSONValue(nil) | ||||
| 	} | ||||
| 
 | ||||
| 	return resultToJSONValue(&result) | ||||
| } | ||||
| 
 | ||||
| func (v *jsonValue) GetValue(parquetType parquet.Type, convertedType *parquet.ConvertedType) (interface{}, error) { | ||||
| 	if v.result == nil { | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 
 | ||||
| 	return resultToParquetValue(*v.result, parquetType, convertedType) | ||||
| } | ||||
| 
 | ||||
| func (v *jsonValue) GetArray() ([]gjson.Result, error) { | ||||
| 	if v.result == nil { | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 
 | ||||
| 	return resultToArray(*v.result) | ||||
| } | ||||
| 
 | ||||
| func (v *jsonValue) Range(iterator func(key, value gjson.Result) bool) error { | ||||
| 	if v.result == nil || v.result.Type == gjson.Null { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	if v.result.Type != gjson.JSON || !v.result.IsObject() { | ||||
| 		return fmt.Errorf("result is not Map but %v", v.result.Type) | ||||
| 	} | ||||
| 
 | ||||
| 	v.result.ForEach(iterator) | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func resultToJSONValue(result *gjson.Result) *jsonValue { | ||||
| 	return &jsonValue{ | ||||
| 		result: result, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func bytesToJSONValue(data []byte) (*jsonValue, error) { | ||||
| 	if !gjson.ValidBytes(data) { | ||||
| 		return nil, fmt.Errorf("invalid JSON data") | ||||
| 	} | ||||
| 
 | ||||
| 	result := gjson.ParseBytes(data) | ||||
| 	return resultToJSONValue(&result), nil | ||||
| } | ||||
|  | @ -1,361 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package data | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"math" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/tidwall/gjson" | ||||
| ) | ||||
| 
 | ||||
| func resultToBool(result gjson.Result) (value interface{}, err error) { | ||||
| 	switch result.Type { | ||||
| 	case gjson.False, gjson.True: | ||||
| 		return result.Bool(), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("result is not Bool but %v", result.Type) | ||||
| } | ||||
| 
 | ||||
| func resultToInt32(result gjson.Result) (value interface{}, err error) { | ||||
| 	if value, err = resultToInt64(result); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if value.(int64) < math.MinInt32 || value.(int64) > math.MaxInt32 { | ||||
| 		return nil, fmt.Errorf("int32 overflow") | ||||
| 	} | ||||
| 
 | ||||
| 	return int32(value.(int64)), nil | ||||
| } | ||||
| 
 | ||||
| func resultToInt64(result gjson.Result) (value interface{}, err error) { | ||||
| 	if result.Type == gjson.Number { | ||||
| 		return result.Int(), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("result is not Number but %v", result.Type) | ||||
| } | ||||
| 
 | ||||
| func resultToFloat(result gjson.Result) (value interface{}, err error) { | ||||
| 	if result.Type == gjson.Number { | ||||
| 		return float32(result.Float()), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("result is not float32 but %v", result.Type) | ||||
| } | ||||
| 
 | ||||
| func resultToDouble(result gjson.Result) (value interface{}, err error) { | ||||
| 	if result.Type == gjson.Number { | ||||
| 		return result.Float(), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("result is not float64 but %v", result.Type) | ||||
| } | ||||
| 
 | ||||
| func resultToBytes(result gjson.Result) (interface{}, error) { | ||||
| 	if result.Type != gjson.JSON || !result.IsArray() { | ||||
| 		return nil, fmt.Errorf("result is not byte array but %v", result.Type) | ||||
| 	} | ||||
| 
 | ||||
| 	data := []byte{} | ||||
| 	for i, r := range result.Array() { | ||||
| 		if r.Type != gjson.Number { | ||||
| 			return nil, fmt.Errorf("result[%v] is not byte but %v", i, r.Type) | ||||
| 		} | ||||
| 
 | ||||
| 		value := r.Uint() | ||||
| 		if value > math.MaxUint8 { | ||||
| 			return nil, fmt.Errorf("byte overflow in result[%v]", i) | ||||
| 		} | ||||
| 
 | ||||
| 		data = append(data, byte(value)) | ||||
| 	} | ||||
| 
 | ||||
| 	return data, nil | ||||
| } | ||||
| 
 | ||||
| func resultToString(result gjson.Result) (value interface{}, err error) { | ||||
| 	if result.Type == gjson.String { | ||||
| 		return result.String(), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("result is not String but %v", result.Type) | ||||
| } | ||||
| 
 | ||||
| func resultToUint8(result gjson.Result) (value interface{}, err error) { | ||||
| 	if value, err = resultToUint64(result); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if value.(uint64) > math.MaxUint8 { | ||||
| 		return nil, fmt.Errorf("uint8 overflow") | ||||
| 	} | ||||
| 
 | ||||
| 	return uint8(value.(uint64)), nil | ||||
| } | ||||
| 
 | ||||
| func resultToUint16(result gjson.Result) (value interface{}, err error) { | ||||
| 	if value, err = resultToUint64(result); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if value.(uint64) > math.MaxUint16 { | ||||
| 		return nil, fmt.Errorf("uint16 overflow") | ||||
| 	} | ||||
| 
 | ||||
| 	return uint16(value.(uint64)), nil | ||||
| } | ||||
| 
 | ||||
| func resultToUint32(result gjson.Result) (value interface{}, err error) { | ||||
| 	if value, err = resultToUint64(result); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if value.(uint64) > math.MaxUint32 { | ||||
| 		return nil, fmt.Errorf("uint32 overflow") | ||||
| 	} | ||||
| 
 | ||||
| 	return uint32(value.(uint64)), nil | ||||
| } | ||||
| 
 | ||||
| func resultToUint64(result gjson.Result) (value interface{}, err error) { | ||||
| 	if result.Type == gjson.Number { | ||||
| 		return result.Uint(), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("result is not Number but %v", result.Type) | ||||
| } | ||||
| 
 | ||||
| func resultToInt8(result gjson.Result) (value interface{}, err error) { | ||||
| 	if value, err = resultToInt64(result); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if value.(int64) < math.MinInt8 || value.(int64) > math.MaxInt8 { | ||||
| 		return nil, fmt.Errorf("int8 overflow") | ||||
| 	} | ||||
| 
 | ||||
| 	return int8(value.(int64)), nil | ||||
| } | ||||
| 
 | ||||
| func resultToInt16(result gjson.Result) (value interface{}, err error) { | ||||
| 	if value, err = resultToInt64(result); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if value.(int64) < math.MinInt16 || value.(int64) > math.MaxInt16 { | ||||
| 		return nil, fmt.Errorf("int16 overflow") | ||||
| 	} | ||||
| 
 | ||||
| 	return int16(value.(int64)), nil | ||||
| } | ||||
| 
 | ||||
| func stringToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_INT96, parquet.Type_BYTE_ARRAY, parquet.Type_FIXED_LEN_BYTE_ARRAY: | ||||
| 		return []byte(value.(string)), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("string cannot be converted to parquet type %v", parquetType) | ||||
| } | ||||
| 
 | ||||
| func uint8ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_INT32: | ||||
| 		return int32(value.(uint8)), nil | ||||
| 	case parquet.Type_INT64: | ||||
| 		return int64(value.(uint8)), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("uint8 cannot be converted to parquet type %v", parquetType) | ||||
| } | ||||
| 
 | ||||
| func uint16ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_INT32: | ||||
| 		return int32(value.(uint16)), nil | ||||
| 	case parquet.Type_INT64: | ||||
| 		return int64(value.(uint16)), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("uint16 cannot be converted to parquet type %v", parquetType) | ||||
| } | ||||
| 
 | ||||
| func uint32ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_INT32: | ||||
| 		return int32(value.(uint32)), nil | ||||
| 	case parquet.Type_INT64: | ||||
| 		return int64(value.(uint32)), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("uint32 cannot be converted to parquet type %v", parquetType) | ||||
| } | ||||
| 
 | ||||
| func uint64ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_INT32: | ||||
| 		return int32(value.(uint64)), nil | ||||
| 	case parquet.Type_INT64: | ||||
| 		return int64(value.(uint64)), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("uint64 cannot be converted to parquet type %v", parquetType) | ||||
| } | ||||
| 
 | ||||
| func int8ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_INT32: | ||||
| 		return int32(value.(int8)), nil | ||||
| 	case parquet.Type_INT64: | ||||
| 		return int64(value.(int8)), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("int8 cannot be converted to parquet type %v", parquetType) | ||||
| } | ||||
| 
 | ||||
| func int16ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_INT32: | ||||
| 		return int32(value.(int16)), nil | ||||
| 	case parquet.Type_INT64: | ||||
| 		return int64(value.(int16)), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("int16 cannot be converted to parquet type %v", parquetType) | ||||
| } | ||||
| 
 | ||||
| func int32ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_INT32: | ||||
| 		return value.(int32), nil | ||||
| 	case parquet.Type_INT64: | ||||
| 		return int64(value.(int32)), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("int32 cannot be converted to parquet type %v", parquetType) | ||||
| } | ||||
| 
 | ||||
| func int64ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_INT32: | ||||
| 		return int32(value.(int64)), nil | ||||
| 	case parquet.Type_INT64: | ||||
| 		return value.(int64), nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("int64 cannot be converted to parquet type %v", parquetType) | ||||
| } | ||||
| 
 | ||||
| func resultToParquetValueByConvertedValue(result gjson.Result, convertedType parquet.ConvertedType, parquetType parquet.Type) (value interface{}, err error) { | ||||
| 	if result.Type == gjson.Null { | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 
 | ||||
| 	switch convertedType { | ||||
| 	case parquet.ConvertedType_UTF8: | ||||
| 		if value, err = resultToString(result); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		return stringToParquetValue(value, parquetType) | ||||
| 	case parquet.ConvertedType_UINT_8: | ||||
| 		if value, err = resultToUint8(result); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		return uint8ToParquetValue(value, parquetType) | ||||
| 	case parquet.ConvertedType_UINT_16: | ||||
| 		if value, err = resultToUint16(result); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		return uint16ToParquetValue(value, parquetType) | ||||
| 	case parquet.ConvertedType_UINT_32: | ||||
| 		if value, err = resultToUint32(result); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		return uint32ToParquetValue(value, parquetType) | ||||
| 	case parquet.ConvertedType_UINT_64: | ||||
| 		if value, err = resultToUint64(result); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		return uint64ToParquetValue(value, parquetType) | ||||
| 	case parquet.ConvertedType_INT_8: | ||||
| 		if value, err = resultToInt8(result); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		return int8ToParquetValue(value, parquetType) | ||||
| 	case parquet.ConvertedType_INT_16: | ||||
| 		if value, err = resultToInt16(result); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		return int16ToParquetValue(value, parquetType) | ||||
| 	case parquet.ConvertedType_INT_32: | ||||
| 		if value, err = resultToInt32(result); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		return int32ToParquetValue(value, parquetType) | ||||
| 	case parquet.ConvertedType_INT_64: | ||||
| 		if value, err = resultToInt64(result); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		return int64ToParquetValue(value, parquetType) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("unsupported converted type %v", convertedType) | ||||
| } | ||||
| 
 | ||||
| func resultToParquetValue(result gjson.Result, parquetType parquet.Type, convertedType *parquet.ConvertedType) (interface{}, error) { | ||||
| 	if convertedType != nil { | ||||
| 		return resultToParquetValueByConvertedValue(result, *convertedType, parquetType) | ||||
| 	} | ||||
| 
 | ||||
| 	if result.Type == gjson.Null { | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 
 | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_BOOLEAN: | ||||
| 		return resultToBool(result) | ||||
| 	case parquet.Type_INT32: | ||||
| 		return resultToInt32(result) | ||||
| 	case parquet.Type_INT64: | ||||
| 		return resultToInt64(result) | ||||
| 	case parquet.Type_FLOAT: | ||||
| 		return resultToFloat(result) | ||||
| 	case parquet.Type_DOUBLE: | ||||
| 		return resultToDouble(result) | ||||
| 	case parquet.Type_INT96, parquet.Type_BYTE_ARRAY, parquet.Type_FIXED_LEN_BYTE_ARRAY: | ||||
| 		return resultToBytes(result) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("unknown parquet type %v", parquetType) | ||||
| } | ||||
| 
 | ||||
| func resultToArray(result gjson.Result) ([]gjson.Result, error) { | ||||
| 	if result.Type == gjson.Null { | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 
 | ||||
| 	if result.Type != gjson.JSON || !result.IsArray() { | ||||
| 		return nil, fmt.Errorf("result is not Array but %v", result.Type) | ||||
| 	} | ||||
| 
 | ||||
| 	return result.Array(), nil | ||||
| } | ||||
|  | @ -1,514 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"math" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| func i64sToi32s(i64s []int64) (i32s []int32) { | ||||
| 	i32s = make([]int32, len(i64s)) | ||||
| 	for i := range i64s { | ||||
| 		i32s[i] = int32(i64s[i]) | ||||
| 	} | ||||
| 
 | ||||
| 	return i32s | ||||
| } | ||||
| 
 | ||||
| func readBitPacked(reader *bytes.Reader, header, bitWidth uint64) (result []int64, err error) { | ||||
| 	count := header * 8 | ||||
| 
 | ||||
| 	if count == 0 { | ||||
| 		return result, nil | ||||
| 	} | ||||
| 
 | ||||
| 	if bitWidth == 0 { | ||||
| 		return make([]int64, count), nil | ||||
| 	} | ||||
| 
 | ||||
| 	data := make([]byte, header*bitWidth) | ||||
| 	if _, err = reader.Read(data); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	var val, used, left, b uint64 | ||||
| 
 | ||||
| 	valNeedBits := bitWidth | ||||
| 	i := -1 | ||||
| 	for { | ||||
| 		if left <= 0 { | ||||
| 			i++ | ||||
| 			if i >= len(data) { | ||||
| 				break | ||||
| 			} | ||||
| 
 | ||||
| 			b = uint64(data[i]) | ||||
| 			left = 8 | ||||
| 			used = 0 | ||||
| 		} | ||||
| 
 | ||||
| 		if left >= valNeedBits { | ||||
| 			val |= ((b >> used) & ((1 << valNeedBits) - 1)) << (bitWidth - valNeedBits) | ||||
| 			result = append(result, int64(val)) | ||||
| 			val = 0 | ||||
| 			left -= valNeedBits | ||||
| 			used += valNeedBits | ||||
| 			valNeedBits = bitWidth | ||||
| 		} else { | ||||
| 			val |= (b >> used) << (bitWidth - valNeedBits) | ||||
| 			valNeedBits -= left | ||||
| 			left = 0 | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func readBools(reader *bytes.Reader, count uint64) (result []bool, err error) { | ||||
| 	i64s, err := readBitPacked(reader, count, 1) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	var i uint64 | ||||
| 	for i = 0; i < count; i++ { | ||||
| 		result = append(result, i64s[i] > 0) | ||||
| 	} | ||||
| 
 | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func readInt32s(reader *bytes.Reader, count uint64) (result []int32, err error) { | ||||
| 	buf := make([]byte, 4) | ||||
| 
 | ||||
| 	var i uint64 | ||||
| 	for i = 0; i < count; i++ { | ||||
| 		if _, err = reader.Read(buf); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		result = append(result, int32(bytesToUint32(buf))) | ||||
| 	} | ||||
| 
 | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func readInt64s(reader *bytes.Reader, count uint64) (result []int64, err error) { | ||||
| 	buf := make([]byte, 8) | ||||
| 
 | ||||
| 	var i uint64 | ||||
| 	for i = 0; i < count; i++ { | ||||
| 		if _, err = reader.Read(buf); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		result = append(result, int64(bytesToUint64(buf))) | ||||
| 	} | ||||
| 
 | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func readInt96s(reader *bytes.Reader, count uint64) (result [][]byte, err error) { | ||||
| 	var i uint64 | ||||
| 	for i = 0; i < count; i++ { | ||||
| 		buf := make([]byte, 12) | ||||
| 
 | ||||
| 		if _, err = reader.Read(buf); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		result = append(result, buf) | ||||
| 	} | ||||
| 
 | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func readFloats(reader *bytes.Reader, count uint64) (result []float32, err error) { | ||||
| 	buf := make([]byte, 4) | ||||
| 
 | ||||
| 	var i uint64 | ||||
| 	for i = 0; i < count; i++ { | ||||
| 		if _, err = reader.Read(buf); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		result = append(result, math.Float32frombits(bytesToUint32(buf))) | ||||
| 	} | ||||
| 
 | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func readDoubles(reader *bytes.Reader, count uint64) (result []float64, err error) { | ||||
| 	buf := make([]byte, 8) | ||||
| 
 | ||||
| 	var i uint64 | ||||
| 	for i = 0; i < count; i++ { | ||||
| 		if _, err = reader.Read(buf); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		result = append(result, math.Float64frombits(bytesToUint64(buf))) | ||||
| 	} | ||||
| 
 | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func readByteArrays(reader *bytes.Reader, count uint64) (result [][]byte, err error) { | ||||
| 	buf := make([]byte, 4) | ||||
| 	var length uint32 | ||||
| 	var data []byte | ||||
| 
 | ||||
| 	var i uint64 | ||||
| 	for i = 0; i < count; i++ { | ||||
| 		if _, err = reader.Read(buf); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		length = bytesToUint32(buf) | ||||
| 		data = make([]byte, length) | ||||
| 		if length > 0 { | ||||
| 			if _, err = reader.Read(data); err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		result = append(result, data) | ||||
| 	} | ||||
| 
 | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func readFixedLenByteArrays(reader *bytes.Reader, count, length uint64) (result [][]byte, err error) { | ||||
| 	var i uint64 | ||||
| 	for i = 0; i < count; i++ { | ||||
| 		data := make([]byte, length) | ||||
| 		if _, err = reader.Read(data); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		result = append(result, data) | ||||
| 	} | ||||
| 
 | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func readValues(reader *bytes.Reader, dataType parquet.Type, count, length uint64) (interface{}, error) { | ||||
| 	switch dataType { | ||||
| 	case parquet.Type_BOOLEAN: | ||||
| 		return readBools(reader, count) | ||||
| 	case parquet.Type_INT32: | ||||
| 		return readInt32s(reader, count) | ||||
| 	case parquet.Type_INT64: | ||||
| 		return readInt64s(reader, count) | ||||
| 	case parquet.Type_INT96: | ||||
| 		return readInt96s(reader, count) | ||||
| 	case parquet.Type_FLOAT: | ||||
| 		return readFloats(reader, count) | ||||
| 	case parquet.Type_DOUBLE: | ||||
| 		return readDoubles(reader, count) | ||||
| 	case parquet.Type_BYTE_ARRAY: | ||||
| 		return readByteArrays(reader, count) | ||||
| 	case parquet.Type_FIXED_LEN_BYTE_ARRAY: | ||||
| 		return readFixedLenByteArrays(reader, count, length) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, fmt.Errorf("unknown parquet type %v", dataType) | ||||
| } | ||||
| 
 | ||||
| func readUnsignedVarInt(reader *bytes.Reader) (v uint64, err error) { | ||||
| 	var b byte | ||||
| 	var shift uint64 | ||||
| 
 | ||||
| 	for { | ||||
| 		if b, err = reader.ReadByte(); err != nil { | ||||
| 			return 0, err | ||||
| 		} | ||||
| 
 | ||||
| 		if v |= ((uint64(b) & 0x7F) << shift); b&0x80 == 0 { | ||||
| 			break | ||||
| 		} | ||||
| 
 | ||||
| 		shift += 7 | ||||
| 	} | ||||
| 
 | ||||
| 	return v, nil | ||||
| } | ||||
| 
 | ||||
| func readRLE(reader *bytes.Reader, header, bitWidth uint64) (result []int64, err error) { | ||||
| 	width := (bitWidth + 7) / 8 | ||||
| 	data := make([]byte, width) | ||||
| 	if width > 0 { | ||||
| 		if _, err = reader.Read(data); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if width < 4 { | ||||
| 		data = append(data, make([]byte, 4-width)...) | ||||
| 	} | ||||
| 
 | ||||
| 	val := int64(bytesToUint32(data)) | ||||
| 	count := header >> 1 | ||||
| 	if count > math.MaxInt64/8 { | ||||
| 		// 8 bytes/element.
 | ||||
| 		return nil, errors.New("parquet: size too large") | ||||
| 	} | ||||
| 	result = make([]int64, count) | ||||
| 	for i := range result { | ||||
| 		result[i] = val | ||||
| 	} | ||||
| 
 | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func readRLEBitPackedHybrid(reader *bytes.Reader, length, bitWidth uint64) (result []int64, err error) { | ||||
| 	if length <= 0 { | ||||
| 		var i32s []int32 | ||||
| 		i32s, err = readInt32s(reader, 1) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		if i32s[0] < 0 { | ||||
| 			return nil, errors.New("parquet: negative RLEBitPackedHybrid length") | ||||
| 		} | ||||
| 		length = uint64(i32s[0]) | ||||
| 	} | ||||
| 
 | ||||
| 	buf := make([]byte, length) | ||||
| 	if _, err = reader.Read(buf); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	reader = bytes.NewReader(buf) | ||||
| 	for reader.Len() > 0 { | ||||
| 		header, err := readUnsignedVarInt(reader) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		var i64s []int64 | ||||
| 		if header&1 == 0 { | ||||
| 			i64s, err = readRLE(reader, header, bitWidth) | ||||
| 		} else { | ||||
| 			i64s, err = readBitPacked(reader, header>>1, bitWidth) | ||||
| 		} | ||||
| 
 | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		result = append(result, i64s...) | ||||
| 	} | ||||
| 
 | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func readDeltaBinaryPackedInt(reader *bytes.Reader) (result []int64, err error) { | ||||
| 	blockSize, err := readUnsignedVarInt(reader) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	numMiniblocksInBlock, err := readUnsignedVarInt(reader) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	numValues, err := readUnsignedVarInt(reader) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	firstValueZigZag, err := readUnsignedVarInt(reader) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	v := int64(firstValueZigZag>>1) ^ (-int64(firstValueZigZag & 1)) | ||||
| 	result = append(result, v) | ||||
| 	if numMiniblocksInBlock == 0 { | ||||
| 		return nil, errors.New("parquet: zero mini blocks in block") | ||||
| 	} | ||||
| 	numValuesInMiniBlock := blockSize / numMiniblocksInBlock | ||||
| 
 | ||||
| 	bitWidths := make([]uint64, numMiniblocksInBlock) | ||||
| 	for uint64(len(result)) < numValues { | ||||
| 		minDeltaZigZag, err := readUnsignedVarInt(reader) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		for i := 0; uint64(i) < numMiniblocksInBlock; i++ { | ||||
| 			b, err := reader.ReadByte() | ||||
| 			if err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 			bitWidths[i] = uint64(b) | ||||
| 		} | ||||
| 
 | ||||
| 		minDelta := int64(minDeltaZigZag>>1) ^ (-int64(minDeltaZigZag & 1)) | ||||
| 		for i := 0; uint64(i) < numMiniblocksInBlock; i++ { | ||||
| 			i64s, err := readBitPacked(reader, numValuesInMiniBlock/8, bitWidths[i]) | ||||
| 			if err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 
 | ||||
| 			for j := range i64s { | ||||
| 				v += i64s[j] + minDelta | ||||
| 				result = append(result, v) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return result[:numValues], nil | ||||
| } | ||||
| 
 | ||||
| func readDeltaLengthByteArrays(reader *bytes.Reader) (result [][]byte, err error) { | ||||
| 	i64s, err := readDeltaBinaryPackedInt(reader) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	for i := 0; i < len(i64s); i++ { | ||||
| 		arrays, err := readFixedLenByteArrays(reader, 1, uint64(i64s[i])) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		result = append(result, arrays[0]) | ||||
| 	} | ||||
| 
 | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func readDeltaByteArrays(reader *bytes.Reader) (result [][]byte, err error) { | ||||
| 	i64s, err := readDeltaBinaryPackedInt(reader) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	suffixes, err := readDeltaLengthByteArrays(reader) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	result = append(result, suffixes[0]) | ||||
| 	for i := 1; i < len(i64s); i++ { | ||||
| 		prefixLength := i64s[i] | ||||
| 		val := append([]byte{}, result[i-1][:prefixLength]...) | ||||
| 		val = append(val, suffixes[i]...) | ||||
| 		result = append(result, val) | ||||
| 	} | ||||
| 
 | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func readDataPageValues( | ||||
| 	bytesReader *bytes.Reader, | ||||
| 	encoding parquet.Encoding, | ||||
| 	dataType parquet.Type, | ||||
| 	convertedType parquet.ConvertedType, | ||||
| 	count, bitWidth uint64, | ||||
| ) (result interface{}, resultDataType parquet.Type, err error) { | ||||
| 	switch encoding { | ||||
| 	case parquet.Encoding_PLAIN: | ||||
| 		result, err = readValues(bytesReader, dataType, count, bitWidth) | ||||
| 		return result, dataType, err | ||||
| 
 | ||||
| 	case parquet.Encoding_PLAIN_DICTIONARY: | ||||
| 		b, err := bytesReader.ReadByte() | ||||
| 		if err != nil { | ||||
| 			return nil, -1, err | ||||
| 		} | ||||
| 
 | ||||
| 		i64s, err := readRLEBitPackedHybrid(bytesReader, uint64(bytesReader.Len()), uint64(b)) | ||||
| 		if err != nil { | ||||
| 			return nil, -1, err | ||||
| 		} | ||||
| 		if len(i64s) < int(count) || count > math.MaxInt64/8 { | ||||
| 			return nil, -1, errors.New("parquet: value out of range") | ||||
| 		} | ||||
| 		return i64s[:count], parquet.Type_INT64, nil | ||||
| 
 | ||||
| 	case parquet.Encoding_RLE: | ||||
| 		i64s, err := readRLEBitPackedHybrid(bytesReader, 0, bitWidth) | ||||
| 		if err != nil { | ||||
| 			return nil, -1, err | ||||
| 		} | ||||
| 
 | ||||
| 		if len(i64s) < int(count) || count > math.MaxInt64/8 { | ||||
| 			return nil, -1, errors.New("parquet: value out of range") | ||||
| 		} | ||||
| 		i64s = i64s[:count] | ||||
| 
 | ||||
| 		if dataType == parquet.Type_INT32 { | ||||
| 			return i64sToi32s(i64s), parquet.Type_INT32, nil | ||||
| 		} | ||||
| 
 | ||||
| 		return i64s, parquet.Type_INT64, nil | ||||
| 
 | ||||
| 	case parquet.Encoding_BIT_PACKED: | ||||
| 		return nil, -1, fmt.Errorf("deprecated parquet encoding %v", parquet.Encoding_BIT_PACKED) | ||||
| 
 | ||||
| 	case parquet.Encoding_DELTA_BINARY_PACKED: | ||||
| 		i64s, err := readDeltaBinaryPackedInt(bytesReader) | ||||
| 		if err != nil { | ||||
| 			return nil, -1, err | ||||
| 		} | ||||
| 
 | ||||
| 		if len(i64s) < int(count) || count > math.MaxInt64/8 { | ||||
| 			return nil, -1, errors.New("parquet: value out of range") | ||||
| 		} | ||||
| 		i64s = i64s[:count] | ||||
| 
 | ||||
| 		if dataType == parquet.Type_INT32 { | ||||
| 			return i64sToi32s(i64s), parquet.Type_INT32, nil | ||||
| 		} | ||||
| 
 | ||||
| 		return i64s, parquet.Type_INT64, nil | ||||
| 
 | ||||
| 	case parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY: | ||||
| 		byteSlices, err := readDeltaLengthByteArrays(bytesReader) | ||||
| 		if err != nil { | ||||
| 			return nil, -1, err | ||||
| 		} | ||||
| 		if len(byteSlices) < int(count) || count > math.MaxInt64/24 { | ||||
| 			return nil, -1, errors.New("parquet: value out of range") | ||||
| 		} | ||||
| 
 | ||||
| 		return byteSlices[:count], parquet.Type_FIXED_LEN_BYTE_ARRAY, nil | ||||
| 
 | ||||
| 	case parquet.Encoding_DELTA_BYTE_ARRAY: | ||||
| 		byteSlices, err := readDeltaByteArrays(bytesReader) | ||||
| 		if err != nil { | ||||
| 			return nil, -1, err | ||||
| 		} | ||||
| 		if len(byteSlices) < int(count) || count > math.MaxInt64/24 { | ||||
| 			return nil, -1, errors.New("parquet: value out of range") | ||||
| 		} | ||||
| 
 | ||||
| 		return byteSlices[:count], parquet.Type_FIXED_LEN_BYTE_ARRAY, nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, -1, fmt.Errorf("unsupported parquet encoding %v", encoding) | ||||
| } | ||||
|  | @ -1,451 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"encoding/binary" | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"math" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| func boolsToBytes(bs []bool) []byte { | ||||
| 	size := (len(bs) + 7) / 8 | ||||
| 	result := make([]byte, size) | ||||
| 	for i := range bs { | ||||
| 		if bs[i] { | ||||
| 			result[i/8] |= 1 << uint32(i%8) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return result | ||||
| } | ||||
| 
 | ||||
| func int32sToBytes(i32s []int32) []byte { | ||||
| 	buf := make([]byte, 4*len(i32s)) | ||||
| 	for i, i32 := range i32s { | ||||
| 		binary.LittleEndian.PutUint32(buf[i*4:], uint32(i32)) | ||||
| 	} | ||||
| 	return buf | ||||
| } | ||||
| 
 | ||||
| func int64sToBytes(i64s []int64) []byte { | ||||
| 	buf := make([]byte, 8*len(i64s)) | ||||
| 	for i, i64 := range i64s { | ||||
| 		binary.LittleEndian.PutUint64(buf[i*8:], uint64(i64)) | ||||
| 	} | ||||
| 	return buf | ||||
| } | ||||
| 
 | ||||
| func float32sToBytes(f32s []float32) []byte { | ||||
| 	buf := make([]byte, 4*len(f32s)) | ||||
| 	for i, f32 := range f32s { | ||||
| 		binary.LittleEndian.PutUint32(buf[i*4:], math.Float32bits(f32)) | ||||
| 	} | ||||
| 	return buf | ||||
| } | ||||
| 
 | ||||
| func float64sToBytes(f64s []float64) []byte { | ||||
| 	buf := make([]byte, 8*len(f64s)) | ||||
| 	for i, f64 := range f64s { | ||||
| 		binary.LittleEndian.PutUint64(buf[i*8:], math.Float64bits(f64)) | ||||
| 	} | ||||
| 	return buf | ||||
| } | ||||
| 
 | ||||
| func byteSlicesToBytes(byteSlices [][]byte) []byte { | ||||
| 	buf := new(bytes.Buffer) | ||||
| 	for _, s := range byteSlices { | ||||
| 		if err := binary.Write(buf, binary.LittleEndian, uint32(len(s))); err != nil { | ||||
| 			panic(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, err := buf.Write(s); err != nil { | ||||
| 			panic(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return buf.Bytes() | ||||
| } | ||||
| 
 | ||||
| func byteArraysToBytes(arrayList [][]byte) []byte { | ||||
| 	buf := new(bytes.Buffer) | ||||
| 	arrayLen := -1 | ||||
| 	for _, array := range arrayList { | ||||
| 		if arrayLen != -1 && len(array) != arrayLen { | ||||
| 			panic(errors.New("array list does not have same length")) | ||||
| 		} | ||||
| 
 | ||||
| 		arrayLen = len(array) | ||||
| 		if _, err := buf.Write(array); err != nil { | ||||
| 			panic(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return buf.Bytes() | ||||
| } | ||||
| 
 | ||||
| func int96sToBytes(i96s [][]byte) []byte { | ||||
| 	return byteArraysToBytes(i96s) | ||||
| } | ||||
| 
 | ||||
| func valuesToBytes(values interface{}, dataType parquet.Type) []byte { | ||||
| 	switch dataType { | ||||
| 	case parquet.Type_BOOLEAN: | ||||
| 		return boolsToBytes(values.([]bool)) | ||||
| 	case parquet.Type_INT32: | ||||
| 		return int32sToBytes(values.([]int32)) | ||||
| 	case parquet.Type_INT64: | ||||
| 		return int64sToBytes(values.([]int64)) | ||||
| 	case parquet.Type_INT96: | ||||
| 		return int96sToBytes(values.([][]byte)) | ||||
| 	case parquet.Type_FLOAT: | ||||
| 		return float32sToBytes(values.([]float32)) | ||||
| 	case parquet.Type_DOUBLE: | ||||
| 		return float64sToBytes(values.([]float64)) | ||||
| 	case parquet.Type_BYTE_ARRAY: | ||||
| 		return byteSlicesToBytes(values.([][]byte)) | ||||
| 	case parquet.Type_FIXED_LEN_BYTE_ARRAY: | ||||
| 		return byteArraysToBytes(values.([][]byte)) | ||||
| 	} | ||||
| 
 | ||||
| 	return []byte{} | ||||
| } | ||||
| 
 | ||||
| func valueToBytes(value interface{}, dataType parquet.Type) []byte { | ||||
| 	var values interface{} | ||||
| 	switch dataType { | ||||
| 	case parquet.Type_BOOLEAN: | ||||
| 		values = []bool{value.(bool)} | ||||
| 	case parquet.Type_INT32: | ||||
| 		values = []int32{value.(int32)} | ||||
| 	case parquet.Type_INT64: | ||||
| 		values = []int64{value.(int64)} | ||||
| 	case parquet.Type_INT96: | ||||
| 		values = [][]byte{value.([]byte)} | ||||
| 	case parquet.Type_FLOAT: | ||||
| 		values = []float32{value.(float32)} | ||||
| 	case parquet.Type_DOUBLE: | ||||
| 		values = []float64{value.(float64)} | ||||
| 	case parquet.Type_BYTE_ARRAY, parquet.Type_FIXED_LEN_BYTE_ARRAY: | ||||
| 		values = [][]byte{value.([]byte)} | ||||
| 	} | ||||
| 
 | ||||
| 	return valuesToBytes(values, dataType) | ||||
| } | ||||
| 
 | ||||
| func unsignedVarIntToBytes(ui64 uint64) []byte { | ||||
| 	size := (getBitWidth(ui64) + 6) / 7 | ||||
| 	if size == 0 { | ||||
| 		return []byte{0} | ||||
| 	} | ||||
| 
 | ||||
| 	buf := make([]byte, size) | ||||
| 	for i := uint64(0); i < size; i++ { | ||||
| 		buf[i] = byte(ui64&0x7F) | 0x80 | ||||
| 		ui64 >>= 7 | ||||
| 	} | ||||
| 	buf[size-1] &= 0x7F | ||||
| 
 | ||||
| 	return buf | ||||
| } | ||||
| 
 | ||||
| func valuesToRLEBytes(values interface{}, bitWidth int32, valueType parquet.Type) []byte { | ||||
| 	vals := valuesToInterfaces(values, valueType) | ||||
| 	result := []byte{} | ||||
| 	j := 0 | ||||
| 	for i := 0; i < len(vals); i = j { | ||||
| 		for j = i + 1; j < len(vals) && vals[i] == vals[j]; j++ { | ||||
| 		} | ||||
| 		headerBytes := unsignedVarIntToBytes(uint64((j - i) << 1)) | ||||
| 		result = append(result, headerBytes...) | ||||
| 
 | ||||
| 		valBytes := valueToBytes(vals[i], valueType) | ||||
| 		byteCount := (bitWidth + 7) / 8 | ||||
| 		result = append(result, valBytes[:byteCount]...) | ||||
| 	} | ||||
| 
 | ||||
| 	return result | ||||
| } | ||||
| 
 | ||||
| func valuesToRLEBitPackedHybridBytes(values interface{}, bitWidth int32, dataType parquet.Type) []byte { | ||||
| 	rleBytes := valuesToRLEBytes(values, bitWidth, dataType) | ||||
| 	lenBytes := valueToBytes(int32(len(rleBytes)), parquet.Type_INT32) | ||||
| 	return append(lenBytes, rleBytes...) | ||||
| } | ||||
| 
 | ||||
| func valuesToBitPackedBytes(values interface{}, bitWidth int64, withHeader bool, dataType parquet.Type) []byte { | ||||
| 	var i64s []int64 | ||||
| 	switch dataType { | ||||
| 	case parquet.Type_BOOLEAN: | ||||
| 		bs := values.([]bool) | ||||
| 		i64s = make([]int64, len(bs)) | ||||
| 		for i := range bs { | ||||
| 			if bs[i] { | ||||
| 				i64s[i] = 1 | ||||
| 			} | ||||
| 		} | ||||
| 	case parquet.Type_INT32: | ||||
| 		i32s := values.([]int32) | ||||
| 		i64s = make([]int64, len(i32s)) | ||||
| 		for i := range i32s { | ||||
| 			i64s[i] = int64(i32s[i]) | ||||
| 		} | ||||
| 	case parquet.Type_INT64: | ||||
| 		i64s = values.([]int64) | ||||
| 	default: | ||||
| 		panic(fmt.Errorf("data type %v is not supported for bit packing", dataType)) | ||||
| 	} | ||||
| 
 | ||||
| 	if len(i64s) == 0 { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	var valueByte byte | ||||
| 	bitsSet := uint64(0) | ||||
| 	bitsNeeded := uint64(8) | ||||
| 	bitsToSet := uint64(bitWidth) | ||||
| 	value := i64s[0] | ||||
| 
 | ||||
| 	valueBytes := []byte{} | ||||
| 	for i := 0; i < len(i64s); { | ||||
| 		if bitsToSet >= bitsNeeded { | ||||
| 			valueByte |= byte(((value >> bitsSet) & ((1 << bitsNeeded) - 1)) << (8 - bitsNeeded)) | ||||
| 			valueBytes = append(valueBytes, valueByte) | ||||
| 			bitsToSet -= bitsNeeded | ||||
| 			bitsSet += bitsNeeded | ||||
| 
 | ||||
| 			bitsNeeded = 8 | ||||
| 			valueByte = 0 | ||||
| 
 | ||||
| 			if bitsToSet <= 0 && (i+1) < len(i64s) { | ||||
| 				i++ | ||||
| 				value = i64s[i] | ||||
| 				bitsToSet = uint64(bitWidth) | ||||
| 				bitsSet = 0 | ||||
| 			} | ||||
| 		} else { | ||||
| 			valueByte |= byte((value >> bitsSet) << (8 - bitsNeeded)) | ||||
| 			i++ | ||||
| 
 | ||||
| 			if i < len(i64s) { | ||||
| 				value = i64s[i] | ||||
| 			} | ||||
| 
 | ||||
| 			bitsNeeded -= bitsToSet | ||||
| 			bitsToSet = uint64(bitWidth) | ||||
| 			bitsSet = 0 | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if withHeader { | ||||
| 		header := uint64(((len(i64s) / 8) << 1) | 1) | ||||
| 		headerBytes := unsignedVarIntToBytes(header) | ||||
| 		return append(headerBytes, valueBytes...) | ||||
| 	} | ||||
| 
 | ||||
| 	return valueBytes | ||||
| } | ||||
| 
 | ||||
| const ( | ||||
| 	blockSize     = 128 | ||||
| 	subBlockSize  = 32 | ||||
| 	subBlockCount = blockSize / subBlockSize | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	blockSizeBytes     = unsignedVarIntToBytes(blockSize) | ||||
| 	subBlockCountBytes = unsignedVarIntToBytes(subBlockCount) | ||||
| ) | ||||
| 
 | ||||
| func int32ToDeltaBytes(i32s []int32) []byte { | ||||
| 	getValue := func(i32 int32) uint64 { | ||||
| 		return uint64((i32 >> 31) ^ (i32 << 1)) | ||||
| 	} | ||||
| 
 | ||||
| 	result := append([]byte{}, blockSizeBytes...) | ||||
| 	result = append(result, subBlockCountBytes...) | ||||
| 	result = append(result, unsignedVarIntToBytes(uint64(len(i32s)))...) | ||||
| 	result = append(result, unsignedVarIntToBytes(getValue(i32s[0]))...) | ||||
| 
 | ||||
| 	for i := 1; i < len(i32s); { | ||||
| 		block := []int32{} | ||||
| 		minDelta := int32(0x7FFFFFFF) | ||||
| 
 | ||||
| 		for ; i < len(i32s) && len(block) < blockSize; i++ { | ||||
| 			delta := i32s[i] - i32s[i-1] | ||||
| 			block = append(block, delta) | ||||
| 			if delta < minDelta { | ||||
| 				minDelta = delta | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		for len(block) < blockSize { | ||||
| 			block = append(block, minDelta) | ||||
| 		} | ||||
| 
 | ||||
| 		bitWidths := make([]byte, subBlockCount) | ||||
| 		for j := 0; j < subBlockCount; j++ { | ||||
| 			maxValue := int32(0) | ||||
| 			for k := j * subBlockSize; k < (j+1)*subBlockSize; k++ { | ||||
| 				block[k] -= minDelta | ||||
| 				if block[k] > maxValue { | ||||
| 					maxValue = block[k] | ||||
| 				} | ||||
| 			} | ||||
| 
 | ||||
| 			bitWidths[j] = byte(getBitWidth(uint64(maxValue))) | ||||
| 		} | ||||
| 
 | ||||
| 		minDeltaZigZag := getValue(minDelta) | ||||
| 		result = append(result, unsignedVarIntToBytes(minDeltaZigZag)...) | ||||
| 		result = append(result, bitWidths...) | ||||
| 
 | ||||
| 		for j := 0; j < subBlockCount; j++ { | ||||
| 			bitPacked := valuesToBitPackedBytes( | ||||
| 				block[j*subBlockSize:(j+1)*subBlockSize], | ||||
| 				int64(bitWidths[j]), | ||||
| 				false, | ||||
| 				parquet.Type_INT32, | ||||
| 			) | ||||
| 			result = append(result, bitPacked...) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return result | ||||
| } | ||||
| 
 | ||||
| func int64ToDeltaBytes(i64s []int64) []byte { | ||||
| 	getValue := func(i64 int64) uint64 { | ||||
| 		return uint64((i64 >> 63) ^ (i64 << 1)) | ||||
| 	} | ||||
| 
 | ||||
| 	result := append([]byte{}, blockSizeBytes...) | ||||
| 	result = append(result, subBlockCountBytes...) | ||||
| 	result = append(result, unsignedVarIntToBytes(uint64(len(i64s)))...) | ||||
| 	result = append(result, unsignedVarIntToBytes(getValue(i64s[0]))...) | ||||
| 
 | ||||
| 	for i := 1; i < len(i64s); { | ||||
| 		block := []int64{} | ||||
| 		minDelta := int64(0x7FFFFFFFFFFFFFFF) | ||||
| 
 | ||||
| 		for ; i < len(i64s) && len(block) < blockSize; i++ { | ||||
| 			delta := i64s[i] - i64s[i-1] | ||||
| 			block = append(block, delta) | ||||
| 			if delta < minDelta { | ||||
| 				minDelta = delta | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		for len(block) < blockSize { | ||||
| 			block = append(block, minDelta) | ||||
| 		} | ||||
| 
 | ||||
| 		bitWidths := make([]byte, subBlockCount) | ||||
| 		for j := 0; j < subBlockCount; j++ { | ||||
| 			maxValue := int64(0) | ||||
| 			for k := j * subBlockSize; k < (j+1)*subBlockSize; k++ { | ||||
| 				block[k] -= minDelta | ||||
| 				if block[k] > maxValue { | ||||
| 					maxValue = block[k] | ||||
| 				} | ||||
| 			} | ||||
| 
 | ||||
| 			bitWidths[j] = byte(getBitWidth(uint64(maxValue))) | ||||
| 		} | ||||
| 
 | ||||
| 		minDeltaZigZag := getValue(minDelta) | ||||
| 		result = append(result, unsignedVarIntToBytes(minDeltaZigZag)...) | ||||
| 		result = append(result, bitWidths...) | ||||
| 
 | ||||
| 		for j := 0; j < subBlockCount; j++ { | ||||
| 			bitPacked := valuesToBitPackedBytes( | ||||
| 				block[j*subBlockSize:(j+1)*subBlockSize], | ||||
| 				int64(bitWidths[j]), | ||||
| 				false, | ||||
| 				parquet.Type_INT64, | ||||
| 			) | ||||
| 			result = append(result, bitPacked...) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return result | ||||
| } | ||||
| 
 | ||||
| func valuesToDeltaBytes(values interface{}, dataType parquet.Type) []byte { | ||||
| 	switch dataType { | ||||
| 	case parquet.Type_INT32: | ||||
| 		return int32ToDeltaBytes(values.([]int32)) | ||||
| 	case parquet.Type_INT64: | ||||
| 		return int64ToDeltaBytes(values.([]int64)) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func stringsToDeltaLengthByteArrayBytes(strs []string) []byte { | ||||
| 	lengths := make([]int32, len(strs)) | ||||
| 	for i, s := range strs { | ||||
| 		lengths[i] = int32(len(s)) | ||||
| 	} | ||||
| 
 | ||||
| 	result := int32ToDeltaBytes(lengths) | ||||
| 	for _, s := range strs { | ||||
| 		result = append(result, []byte(s)...) | ||||
| 	} | ||||
| 
 | ||||
| 	return result | ||||
| } | ||||
| 
 | ||||
| func stringsToDeltaByteArrayBytes(strs []string) []byte { | ||||
| 	prefixLengths := make([]int32, len(strs)) | ||||
| 	suffixes := make([]string, len(strs)) | ||||
| 
 | ||||
| 	var i, j int | ||||
| 	for i = 1; i < len(strs); i++ { | ||||
| 		for j = 0; j < len(strs[i-1]) && j < len(strs[i]); j++ { | ||||
| 			if strs[i-1][j] != strs[i][j] { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		prefixLengths[i] = int32(j) | ||||
| 		suffixes[i] = strs[i][j:] | ||||
| 	} | ||||
| 
 | ||||
| 	result := int32ToDeltaBytes(prefixLengths) | ||||
| 	return append(result, stringsToDeltaLengthByteArrayBytes(suffixes)...) | ||||
| } | ||||
| 
 | ||||
| func encodeValues(values interface{}, dataType parquet.Type, encoding parquet.Encoding, bitWidth int32) []byte { | ||||
| 	switch encoding { | ||||
| 	case parquet.Encoding_RLE: | ||||
| 		return valuesToRLEBitPackedHybridBytes(values, bitWidth, dataType) | ||||
| 	case parquet.Encoding_DELTA_BINARY_PACKED: | ||||
| 		return valuesToDeltaBytes(values, dataType) | ||||
| 	case parquet.Encoding_DELTA_BYTE_ARRAY: | ||||
| 		return stringsToDeltaByteArrayBytes(values.([]string)) | ||||
| 	case parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY: | ||||
| 		return stringsToDeltaLengthByteArrayBytes(values.([]string)) | ||||
| 	} | ||||
| 
 | ||||
| 	return valuesToBytes(values, dataType) | ||||
| } | ||||
|  | @ -1,190 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"math" | ||||
| 	"reflect" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| func TestBoolsToBytes(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		bs             []bool | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{nil, []byte{}}, | ||||
| 		{[]bool{}, []byte{}}, | ||||
| 		{[]bool{true}, []byte{1}}, | ||||
| 		{[]bool{false}, []byte{0}}, | ||||
| 		{[]bool{true, true}, []byte{3}}, | ||||
| 		{[]bool{false, false}, []byte{0}}, | ||||
| 		{[]bool{false, true}, []byte{2}}, | ||||
| 		{[]bool{true, false}, []byte{1}}, | ||||
| 		{[]bool{false, false, false, false, false, false, false, true, true}, []byte{128, 1}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := boolsToBytes(testCase.bs) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestInt32sToBytes(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		i32s           []int32 | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{nil, []byte{}}, | ||||
| 		{[]int32{}, []byte{}}, | ||||
| 		{[]int32{1}, []byte{1, 0, 0, 0}}, | ||||
| 		{[]int32{-1}, []byte{255, 255, 255, 255}}, | ||||
| 		{[]int32{256}, []byte{0, 1, 0, 0}}, | ||||
| 		{[]int32{math.MinInt32}, []byte{0, 0, 0, 128}}, | ||||
| 		{[]int32{math.MaxInt32}, []byte{255, 255, 255, 127}}, | ||||
| 		{[]int32{257, -2}, []byte{1, 1, 0, 0, 254, 255, 255, 255}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := int32sToBytes(testCase.i32s) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestInt64sToBytes(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		i64s           []int64 | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{nil, []byte{}}, | ||||
| 		{[]int64{}, []byte{}}, | ||||
| 		{[]int64{1}, []byte{1, 0, 0, 0, 0, 0, 0, 0}}, | ||||
| 		{[]int64{-1}, []byte{255, 255, 255, 255, 255, 255, 255, 255}}, | ||||
| 		{[]int64{256}, []byte{0, 1, 0, 0, 0, 0, 0, 0}}, | ||||
| 		{[]int64{math.MinInt64}, []byte{0, 0, 0, 0, 0, 0, 0, 128}}, | ||||
| 		{[]int64{math.MaxInt64}, []byte{255, 255, 255, 255, 255, 255, 255, 127}}, | ||||
| 		{[]int64{257, -2}, []byte{1, 1, 0, 0, 0, 0, 0, 0, 254, 255, 255, 255, 255, 255, 255, 255}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := int64sToBytes(testCase.i64s) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestFloat32sToBytes(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		f32s           []float32 | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{nil, []byte{}}, | ||||
| 		{[]float32{}, []byte{}}, | ||||
| 		{[]float32{1}, []byte{0, 0, 128, 63}}, | ||||
| 		{[]float32{1.0}, []byte{0, 0, 128, 63}}, | ||||
| 		{[]float32{-1}, []byte{0, 0, 128, 191}}, | ||||
| 		{[]float32{-1.0}, []byte{0, 0, 128, 191}}, | ||||
| 		{[]float32{256}, []byte{0, 0, 128, 67}}, | ||||
| 		{[]float32{1.1}, []byte{205, 204, 140, 63}}, | ||||
| 		{[]float32{-1.1}, []byte{205, 204, 140, 191}}, | ||||
| 		{[]float32{math.Pi}, []byte{219, 15, 73, 64}}, | ||||
| 		{[]float32{257, -2}, []byte{0, 128, 128, 67, 0, 0, 0, 192}}, | ||||
| 		{[]float32{257.1, -2.1}, []byte{205, 140, 128, 67, 102, 102, 6, 192}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := float32sToBytes(testCase.f32s) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestFloat64sToBytes(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		f64s           []float64 | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{nil, []byte{}}, | ||||
| 		{[]float64{}, []byte{}}, | ||||
| 		{[]float64{1}, []byte{0, 0, 0, 0, 0, 0, 240, 63}}, | ||||
| 		{[]float64{1.0}, []byte{0, 0, 0, 0, 0, 0, 240, 63}}, | ||||
| 		{[]float64{-1}, []byte{0, 0, 0, 0, 0, 0, 240, 191}}, | ||||
| 		{[]float64{-1.0}, []byte{0, 0, 0, 0, 0, 0, 240, 191}}, | ||||
| 		{[]float64{256}, []byte{0, 0, 0, 0, 0, 0, 112, 64}}, | ||||
| 		{[]float64{1.1}, []byte{154, 153, 153, 153, 153, 153, 241, 63}}, | ||||
| 		{[]float64{-1.1}, []byte{154, 153, 153, 153, 153, 153, 241, 191}}, | ||||
| 		{[]float64{math.Pi}, []byte{24, 45, 68, 84, 251, 33, 9, 64}}, | ||||
| 		{[]float64{257, -2}, []byte{0, 0, 0, 0, 0, 16, 112, 64, 0, 0, 0, 0, 0, 0, 0, 192}}, | ||||
| 		{[]float64{257.1, -2.1}, []byte{154, 153, 153, 153, 153, 17, 112, 64, 205, 204, 204, 204, 204, 204, 0, 192}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := float64sToBytes(testCase.f64s) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestUnsignedVarIntToBytes(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		ui64           uint64 | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{0, []byte{0}}, | ||||
| 		{1, []byte{1}}, | ||||
| 		{0x7F, []byte{127}}, | ||||
| 		{0x80, []byte{128, 1}}, | ||||
| 		{uint64(math.MaxUint64), []byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 1}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := unsignedVarIntToBytes(testCase.ui64) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestValuesToRLEBytes(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		values         interface{} | ||||
| 		bitWidth       int32 | ||||
| 		dataType       parquet.Type | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{[]int32{3, 5, 7}, 1, parquet.Type_INT32, []byte{2, 3, 2, 5, 2, 7}}, | ||||
| 		{[]int32{3, 3, 3}, 1, parquet.Type_INT32, []byte{6, 3}}, | ||||
| 		{[]int32{2, 2, 3, 3, 3}, 1, parquet.Type_INT32, []byte{4, 2, 6, 3}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := valuesToRLEBytes(testCase.values, testCase.bitWidth, testCase.dataType) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,39 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package encoding | ||||
| 
 | ||||
| import ( | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/common" | ||||
| ) | ||||
| 
 | ||||
| // Refer https://en.wikipedia.org/wiki/LEB128#Unsigned_LEB128
 | ||||
| func varIntEncode(ui64 uint64) []byte { | ||||
| 	if ui64 == 0 { | ||||
| 		return []byte{0} | ||||
| 	} | ||||
| 
 | ||||
| 	length := int(common.BitWidth(ui64)+6) / 7 | ||||
| 	data := make([]byte, length) | ||||
| 	for i := 0; i < length; i++ { | ||||
| 		data[i] = byte(ui64&0x7F) | 0x80 | ||||
| 		ui64 >>= 7 | ||||
| 	} | ||||
| 	data[length-1] &= 0x7F | ||||
| 
 | ||||
| 	return data | ||||
| } | ||||
|  | @ -1,44 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package encoding | ||||
| 
 | ||||
| import ( | ||||
| 	"math" | ||||
| 	"reflect" | ||||
| 	"testing" | ||||
| ) | ||||
| 
 | ||||
| func TestVarIntToBytes(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		ui64           uint64 | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{0, []byte{0}}, | ||||
| 		{1, []byte{1}}, | ||||
| 		{0x7F, []byte{127}}, | ||||
| 		{0x80, []byte{128, 1}}, | ||||
| 		{uint64(math.MaxUint64), []byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 1}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := varIntEncode(testCase.ui64) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,297 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package encoding | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/common" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	blockSize      = 128 | ||||
| 	miniBlockSize  = 32 | ||||
| 	miniBlockCount = blockSize / miniBlockSize | ||||
| ) | ||||
| 
 | ||||
| var deltaEncodeHeaderBytes []byte | ||||
| 
 | ||||
| func init() { | ||||
| 	deltaEncodeHeaderBytes = varIntEncode(blockSize) | ||||
| 	deltaEncodeHeaderBytes = append(deltaEncodeHeaderBytes, varIntEncode(miniBlockCount)...) | ||||
| } | ||||
| 
 | ||||
| // Supported Types: BOOLEAN, INT32, INT64
 | ||||
| func bitPackedEncode(values interface{}, bitWidth uint64, withHeader bool, parquetType parquet.Type) []byte { | ||||
| 	var i64s []int64 | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_BOOLEAN: | ||||
| 		bs, ok := values.([]bool) | ||||
| 		if !ok { | ||||
| 			panic(fmt.Errorf("expected slice of bool")) | ||||
| 		} | ||||
| 
 | ||||
| 		i64s = make([]int64, len(bs)) | ||||
| 		for i := range bs { | ||||
| 			if bs[i] { | ||||
| 				i64s[i] = 1 | ||||
| 			} | ||||
| 		} | ||||
| 	case parquet.Type_INT32: | ||||
| 		i32s, ok := values.([]int32) | ||||
| 		if !ok { | ||||
| 			panic(fmt.Errorf("expected slice of int32")) | ||||
| 		} | ||||
| 
 | ||||
| 		for i := range i32s { | ||||
| 			i64s[i] = int64(i32s[i]) | ||||
| 		} | ||||
| 	case parquet.Type_INT64: | ||||
| 		var ok bool | ||||
| 		i64s, ok = values.([]int64) | ||||
| 		if !ok { | ||||
| 			panic(fmt.Errorf("expected slice of int64")) | ||||
| 		} | ||||
| 	default: | ||||
| 		panic(fmt.Errorf("%v parquet type unsupported", parquetType)) | ||||
| 	} | ||||
| 
 | ||||
| 	if len(i64s) == 0 { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	var valueByte byte | ||||
| 	bitsSet := uint64(0) | ||||
| 	bitsNeeded := uint64(8) | ||||
| 	bitsToSet := bitWidth | ||||
| 	value := i64s[0] | ||||
| 
 | ||||
| 	valueBytes := []byte{} | ||||
| 	for i := 0; i < len(i64s); { | ||||
| 		if bitsToSet >= bitsNeeded { | ||||
| 			valueByte |= byte(((value >> bitsSet) & ((1 << bitsNeeded) - 1)) << (8 - bitsNeeded)) | ||||
| 			valueBytes = append(valueBytes, valueByte) | ||||
| 			bitsToSet -= bitsNeeded | ||||
| 			bitsSet += bitsNeeded | ||||
| 
 | ||||
| 			bitsNeeded = 8 | ||||
| 			valueByte = 0 | ||||
| 
 | ||||
| 			if bitsToSet <= 0 && (i+1) < len(i64s) { | ||||
| 				i++ | ||||
| 				value = i64s[i] | ||||
| 				bitsToSet = bitWidth | ||||
| 				bitsSet = 0 | ||||
| 			} | ||||
| 		} else { | ||||
| 			valueByte |= byte((value >> bitsSet) << (8 - bitsNeeded)) | ||||
| 			i++ | ||||
| 
 | ||||
| 			if i < len(i64s) { | ||||
| 				value = i64s[i] | ||||
| 			} | ||||
| 
 | ||||
| 			bitsNeeded -= bitsToSet | ||||
| 			bitsToSet = bitWidth | ||||
| 			bitsSet = 0 | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if withHeader { | ||||
| 		header := uint64(((len(i64s) / 8) << 1) | 1) | ||||
| 		headerBytes := varIntEncode(header) | ||||
| 		return append(headerBytes, valueBytes...) | ||||
| 	} | ||||
| 
 | ||||
| 	return valueBytes | ||||
| } | ||||
| 
 | ||||
| func deltaEncodeInt32s(i32s []int32) (data []byte) { | ||||
| 	getValue := func(i32 int32) uint64 { | ||||
| 		return uint64((i32 >> 31) ^ (i32 << 1)) | ||||
| 	} | ||||
| 
 | ||||
| 	data = append(data, deltaEncodeHeaderBytes...) | ||||
| 	data = append(data, varIntEncode(uint64(len(i32s)))...) | ||||
| 	data = append(data, varIntEncode(getValue(i32s[0]))...) | ||||
| 
 | ||||
| 	for i := 1; i < len(i32s); { | ||||
| 		block := []int32{} | ||||
| 		minDelta := int32(0x7FFFFFFF) | ||||
| 
 | ||||
| 		for ; i < len(i32s) && len(block) < blockSize; i++ { | ||||
| 			delta := i32s[i] - i32s[i-1] | ||||
| 			block = append(block, delta) | ||||
| 			if delta < minDelta { | ||||
| 				minDelta = delta | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		for len(block) < blockSize { | ||||
| 			block = append(block, minDelta) | ||||
| 		} | ||||
| 
 | ||||
| 		bitWidths := make([]byte, miniBlockCount) | ||||
| 		for j := 0; j < miniBlockCount; j++ { | ||||
| 			maxValue := int32(0) | ||||
| 			for k := j * miniBlockSize; k < (j+1)*miniBlockSize; k++ { | ||||
| 				block[k] -= minDelta | ||||
| 				if block[k] > maxValue { | ||||
| 					maxValue = block[k] | ||||
| 				} | ||||
| 			} | ||||
| 
 | ||||
| 			bitWidths[j] = byte(common.BitWidth(uint64(maxValue))) | ||||
| 		} | ||||
| 
 | ||||
| 		minDeltaZigZag := getValue(minDelta) | ||||
| 		data = append(data, varIntEncode(minDeltaZigZag)...) | ||||
| 		data = append(data, bitWidths...) | ||||
| 
 | ||||
| 		for j := 0; j < miniBlockCount; j++ { | ||||
| 			bitPacked := bitPackedEncode( | ||||
| 				block[j*miniBlockSize:(j+1)*miniBlockSize], | ||||
| 				uint64(bitWidths[j]), | ||||
| 				false, | ||||
| 				parquet.Type_INT32, | ||||
| 			) | ||||
| 			data = append(data, bitPacked...) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return data | ||||
| } | ||||
| 
 | ||||
| func deltaEncodeInt64s(i64s []int64) (data []byte) { | ||||
| 	getValue := func(i64 int64) uint64 { | ||||
| 		return uint64((i64 >> 63) ^ (i64 << 1)) | ||||
| 	} | ||||
| 
 | ||||
| 	data = append(data, deltaEncodeHeaderBytes...) | ||||
| 	data = append(data, varIntEncode(uint64(len(i64s)))...) | ||||
| 	data = append(data, varIntEncode(getValue(i64s[0]))...) | ||||
| 
 | ||||
| 	for i := 1; i < len(i64s); { | ||||
| 		block := []int64{} | ||||
| 		minDelta := int64(0x7FFFFFFFFFFFFFFF) | ||||
| 
 | ||||
| 		for ; i < len(i64s) && len(block) < blockSize; i++ { | ||||
| 			delta := i64s[i] - i64s[i-1] | ||||
| 			block = append(block, delta) | ||||
| 			if delta < minDelta { | ||||
| 				minDelta = delta | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		for len(block) < blockSize { | ||||
| 			block = append(block, minDelta) | ||||
| 		} | ||||
| 
 | ||||
| 		bitWidths := make([]byte, miniBlockCount) | ||||
| 		for j := 0; j < miniBlockCount; j++ { | ||||
| 			maxValue := int64(0) | ||||
| 			for k := j * miniBlockSize; k < (j+1)*miniBlockSize; k++ { | ||||
| 				block[k] -= minDelta | ||||
| 				if block[k] > maxValue { | ||||
| 					maxValue = block[k] | ||||
| 				} | ||||
| 			} | ||||
| 
 | ||||
| 			bitWidths[j] = byte(common.BitWidth(uint64(maxValue))) | ||||
| 		} | ||||
| 
 | ||||
| 		minDeltaZigZag := getValue(minDelta) | ||||
| 		data = append(data, varIntEncode(minDeltaZigZag)...) | ||||
| 		data = append(data, bitWidths...) | ||||
| 
 | ||||
| 		for j := 0; j < miniBlockCount; j++ { | ||||
| 			bitPacked := bitPackedEncode( | ||||
| 				block[j*miniBlockSize:(j+1)*miniBlockSize], | ||||
| 				uint64(bitWidths[j]), | ||||
| 				false, | ||||
| 				parquet.Type_INT64, | ||||
| 			) | ||||
| 			data = append(data, bitPacked...) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return data | ||||
| } | ||||
| 
 | ||||
| // DeltaEncode encodes values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-encoding-delta_binary_packed--5
 | ||||
| //
 | ||||
| // Supported Types: INT32, INT64.
 | ||||
| func DeltaEncode(values interface{}, parquetType parquet.Type) []byte { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_INT32: | ||||
| 		i32s, ok := values.([]int32) | ||||
| 		if !ok { | ||||
| 			panic(fmt.Errorf("expected slice of int32")) | ||||
| 		} | ||||
| 		return deltaEncodeInt32s(i32s) | ||||
| 	case parquet.Type_INT64: | ||||
| 		i64s, ok := values.([]int64) | ||||
| 		if !ok { | ||||
| 			panic(fmt.Errorf("expected slice of int64")) | ||||
| 		} | ||||
| 		return deltaEncodeInt64s(i64s) | ||||
| 	} | ||||
| 
 | ||||
| 	panic(fmt.Errorf("%v parquet type unsupported", parquetType)) | ||||
| } | ||||
| 
 | ||||
| // DeltaLengthByteArrayEncode encodes bytes slices specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-length-byte-array-delta_length_byte_array--6
 | ||||
| //
 | ||||
| // Supported Types: BYTE_ARRAY
 | ||||
| func DeltaLengthByteArrayEncode(bytesSlices [][]byte) (data []byte) { | ||||
| 	lengths := make([]int32, len(bytesSlices)) | ||||
| 	for i, bytes := range bytesSlices { | ||||
| 		lengths[i] = int32(len(bytes)) | ||||
| 	} | ||||
| 
 | ||||
| 	data = deltaEncodeInt32s(lengths) | ||||
| 	for _, bytes := range bytesSlices { | ||||
| 		data = append(data, []byte(bytes)...) | ||||
| 	} | ||||
| 
 | ||||
| 	return data | ||||
| } | ||||
| 
 | ||||
| // DeltaByteArrayEncode encodes sequence of strings values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-strings-delta_byte_array--7
 | ||||
| //
 | ||||
| // Supported Types: BYTE_ARRAY
 | ||||
| func DeltaByteArrayEncode(bytesSlices [][]byte) (data []byte) { | ||||
| 	prefixLengths := make([]int32, len(bytesSlices)) | ||||
| 	suffixes := make([][]byte, len(bytesSlices)) | ||||
| 
 | ||||
| 	var i, j int | ||||
| 	for i = 1; i < len(bytesSlices); i++ { | ||||
| 		for j = 0; j < len(bytesSlices[i-1]) && j < len(bytesSlices[i]); j++ { | ||||
| 			if bytesSlices[i-1][j] != bytesSlices[i][j] { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		prefixLengths[i] = int32(j) | ||||
| 		suffixes[i] = bytesSlices[i][j:] | ||||
| 	} | ||||
| 
 | ||||
| 	data = deltaEncodeInt32s(prefixLengths) | ||||
| 	return append(data, DeltaLengthByteArrayEncode(suffixes)...) | ||||
| } | ||||
|  | @ -1,141 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package encoding | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"encoding/binary" | ||||
| 	"fmt" | ||||
| 	"math" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| func plainEncodeBools(bs []bool) []byte { | ||||
| 	data := make([]byte, (len(bs)+7)/8) | ||||
| 
 | ||||
| 	for i := range bs { | ||||
| 		if bs[i] { | ||||
| 			data[i/8] |= 1 << uint(i%8) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return data | ||||
| } | ||||
| 
 | ||||
| func plainEncodeInt32s(i32s []int32) []byte { | ||||
| 	data := make([]byte, len(i32s)*4) | ||||
| 
 | ||||
| 	for i, i32 := range i32s { | ||||
| 		binary.LittleEndian.PutUint32(data[i*4:], uint32(i32)) | ||||
| 	} | ||||
| 
 | ||||
| 	return data | ||||
| } | ||||
| 
 | ||||
| func plainEncodeInt64s(i64s []int64) []byte { | ||||
| 	data := make([]byte, len(i64s)*8) | ||||
| 
 | ||||
| 	for i, i64 := range i64s { | ||||
| 		binary.LittleEndian.PutUint64(data[i*8:], uint64(i64)) | ||||
| 	} | ||||
| 
 | ||||
| 	return data | ||||
| } | ||||
| 
 | ||||
| func plainEncodeFloat32s(f32s []float32) []byte { | ||||
| 	data := make([]byte, len(f32s)*4) | ||||
| 
 | ||||
| 	for i, f32 := range f32s { | ||||
| 		binary.LittleEndian.PutUint32(data[i*4:], math.Float32bits(f32)) | ||||
| 	} | ||||
| 
 | ||||
| 	return data | ||||
| } | ||||
| 
 | ||||
| func plainEncodeFloat64s(f64s []float64) []byte { | ||||
| 	data := make([]byte, len(f64s)*8) | ||||
| 
 | ||||
| 	for i, f64 := range f64s { | ||||
| 		binary.LittleEndian.PutUint64(data[i*8:], math.Float64bits(f64)) | ||||
| 	} | ||||
| 
 | ||||
| 	return data | ||||
| } | ||||
| 
 | ||||
| func plainEncodeBytesSlices(bytesSlices [][]byte) []byte { | ||||
| 	buf := new(bytes.Buffer) | ||||
| 
 | ||||
| 	for _, s := range bytesSlices { | ||||
| 		if err := binary.Write(buf, binary.LittleEndian, uint32(len(s))); err != nil { | ||||
| 			panic(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if _, err := buf.Write(s); err != nil { | ||||
| 			panic(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return buf.Bytes() | ||||
| } | ||||
| 
 | ||||
| // PlainEncode encodes values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#plain-plain--0
 | ||||
| //
 | ||||
| // Supported Types: BOOLEAN, INT32, INT64, FLOAT, DOUBLE, BYTE_ARRAY
 | ||||
| func PlainEncode(values interface{}, parquetType parquet.Type) []byte { | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_BOOLEAN: | ||||
| 		bs, ok := values.([]bool) | ||||
| 		if !ok { | ||||
| 			panic(fmt.Errorf("expected slice of bool")) | ||||
| 		} | ||||
| 		return plainEncodeBools(bs) | ||||
| 	case parquet.Type_INT32: | ||||
| 		i32s, ok := values.([]int32) | ||||
| 		if !ok { | ||||
| 			panic(fmt.Errorf("expected slice of int32")) | ||||
| 		} | ||||
| 		return plainEncodeInt32s(i32s) | ||||
| 	case parquet.Type_INT64: | ||||
| 		i64s, ok := values.([]int64) | ||||
| 		if !ok { | ||||
| 			panic(fmt.Errorf("expected slice of int64")) | ||||
| 		} | ||||
| 		return plainEncodeInt64s(i64s) | ||||
| 	case parquet.Type_FLOAT: | ||||
| 		f32s, ok := values.([]float32) | ||||
| 		if !ok { | ||||
| 			panic(fmt.Errorf("expected slice of float32")) | ||||
| 		} | ||||
| 		return plainEncodeFloat32s(f32s) | ||||
| 	case parquet.Type_DOUBLE: | ||||
| 		f64s, ok := values.([]float64) | ||||
| 		if !ok { | ||||
| 			panic(fmt.Errorf("expected slice of float64")) | ||||
| 		} | ||||
| 		return plainEncodeFloat64s(f64s) | ||||
| 	case parquet.Type_BYTE_ARRAY: | ||||
| 		bytesSlices, ok := values.([][]byte) | ||||
| 		if !ok { | ||||
| 			panic(fmt.Errorf("expected slice of byte array")) | ||||
| 		} | ||||
| 		return plainEncodeBytesSlices(bytesSlices) | ||||
| 	} | ||||
| 
 | ||||
| 	panic(fmt.Errorf("%v parquet type unsupported", parquetType)) | ||||
| } | ||||
|  | @ -1,148 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package encoding | ||||
| 
 | ||||
| import ( | ||||
| 	"math" | ||||
| 	"reflect" | ||||
| 	"testing" | ||||
| ) | ||||
| 
 | ||||
| func TestPlainEncodeBools(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		bs             []bool | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{nil, []byte{}}, | ||||
| 		{[]bool{}, []byte{}}, | ||||
| 		{[]bool{true}, []byte{1}}, | ||||
| 		{[]bool{false}, []byte{0}}, | ||||
| 		{[]bool{true, true}, []byte{3}}, | ||||
| 		{[]bool{false, false}, []byte{0}}, | ||||
| 		{[]bool{false, true}, []byte{2}}, | ||||
| 		{[]bool{true, false}, []byte{1}}, | ||||
| 		{[]bool{false, false, false, false, false, false, false, true, true}, []byte{128, 1}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := plainEncodeBools(testCase.bs) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestPlainEncodeInt32s(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		i32s           []int32 | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{nil, []byte{}}, | ||||
| 		{[]int32{}, []byte{}}, | ||||
| 		{[]int32{1}, []byte{1, 0, 0, 0}}, | ||||
| 		{[]int32{-1}, []byte{255, 255, 255, 255}}, | ||||
| 		{[]int32{256}, []byte{0, 1, 0, 0}}, | ||||
| 		{[]int32{math.MinInt32}, []byte{0, 0, 0, 128}}, | ||||
| 		{[]int32{math.MaxInt32}, []byte{255, 255, 255, 127}}, | ||||
| 		{[]int32{257, -2}, []byte{1, 1, 0, 0, 254, 255, 255, 255}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := plainEncodeInt32s(testCase.i32s) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestPlainEncodeInt64s(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		i64s           []int64 | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{nil, []byte{}}, | ||||
| 		{[]int64{}, []byte{}}, | ||||
| 		{[]int64{1}, []byte{1, 0, 0, 0, 0, 0, 0, 0}}, | ||||
| 		{[]int64{-1}, []byte{255, 255, 255, 255, 255, 255, 255, 255}}, | ||||
| 		{[]int64{256}, []byte{0, 1, 0, 0, 0, 0, 0, 0}}, | ||||
| 		{[]int64{math.MinInt64}, []byte{0, 0, 0, 0, 0, 0, 0, 128}}, | ||||
| 		{[]int64{math.MaxInt64}, []byte{255, 255, 255, 255, 255, 255, 255, 127}}, | ||||
| 		{[]int64{257, -2}, []byte{1, 1, 0, 0, 0, 0, 0, 0, 254, 255, 255, 255, 255, 255, 255, 255}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := plainEncodeInt64s(testCase.i64s) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestPlainEncodeFloat32s(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		f32s           []float32 | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{nil, []byte{}}, | ||||
| 		{[]float32{}, []byte{}}, | ||||
| 		{[]float32{1}, []byte{0, 0, 128, 63}}, | ||||
| 		{[]float32{1.0}, []byte{0, 0, 128, 63}}, | ||||
| 		{[]float32{-1}, []byte{0, 0, 128, 191}}, | ||||
| 		{[]float32{-1.0}, []byte{0, 0, 128, 191}}, | ||||
| 		{[]float32{256}, []byte{0, 0, 128, 67}}, | ||||
| 		{[]float32{1.1}, []byte{205, 204, 140, 63}}, | ||||
| 		{[]float32{-1.1}, []byte{205, 204, 140, 191}}, | ||||
| 		{[]float32{math.Pi}, []byte{219, 15, 73, 64}}, | ||||
| 		{[]float32{257, -2}, []byte{0, 128, 128, 67, 0, 0, 0, 192}}, | ||||
| 		{[]float32{257.1, -2.1}, []byte{205, 140, 128, 67, 102, 102, 6, 192}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := plainEncodeFloat32s(testCase.f32s) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestPlainEncodeFloat64s(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		f64s           []float64 | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{nil, []byte{}}, | ||||
| 		{[]float64{}, []byte{}}, | ||||
| 		{[]float64{1}, []byte{0, 0, 0, 0, 0, 0, 240, 63}}, | ||||
| 		{[]float64{1.0}, []byte{0, 0, 0, 0, 0, 0, 240, 63}}, | ||||
| 		{[]float64{-1}, []byte{0, 0, 0, 0, 0, 0, 240, 191}}, | ||||
| 		{[]float64{-1.0}, []byte{0, 0, 0, 0, 0, 0, 240, 191}}, | ||||
| 		{[]float64{256}, []byte{0, 0, 0, 0, 0, 0, 112, 64}}, | ||||
| 		{[]float64{1.1}, []byte{154, 153, 153, 153, 153, 153, 241, 63}}, | ||||
| 		{[]float64{-1.1}, []byte{154, 153, 153, 153, 153, 153, 241, 191}}, | ||||
| 		{[]float64{math.Pi}, []byte{24, 45, 68, 84, 251, 33, 9, 64}}, | ||||
| 		{[]float64{257, -2}, []byte{0, 0, 0, 0, 0, 16, 112, 64, 0, 0, 0, 0, 0, 0, 0, 192}}, | ||||
| 		{[]float64{257.1, -2.1}, []byte{154, 153, 153, 153, 153, 17, 112, 64, 205, 204, 204, 204, 204, 204, 0, 192}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := plainEncodeFloat64s(testCase.f64s) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,85 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package encoding | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| func rleEncodeInt32s(i32s []int32, bitWidth int32) (data []byte) { | ||||
| 	j := 0 | ||||
| 	for i := 0; i < len(i32s); i = j { | ||||
| 		for j = i + 1; j < len(i32s) && i32s[i] == i32s[j]; j++ { | ||||
| 		} | ||||
| 
 | ||||
| 		headerBytes := varIntEncode(uint64((j - i) << 1)) | ||||
| 		data = append(data, headerBytes...) | ||||
| 
 | ||||
| 		valBytes := plainEncodeInt32s([]int32{i32s[i]}) | ||||
| 		byteCount := (bitWidth + 7) / 8 | ||||
| 		data = append(data, valBytes[:byteCount]...) | ||||
| 	} | ||||
| 
 | ||||
| 	return data | ||||
| } | ||||
| 
 | ||||
| func rleEncodeInt64s(i64s []int64, bitWidth int32) (data []byte) { | ||||
| 	j := 0 | ||||
| 	for i := 0; i < len(i64s); i = j { | ||||
| 		for j = i + 1; j < len(i64s) && i64s[i] == i64s[j]; j++ { | ||||
| 		} | ||||
| 
 | ||||
| 		headerBytes := varIntEncode(uint64((j - i) << 1)) | ||||
| 		data = append(data, headerBytes...) | ||||
| 
 | ||||
| 		valBytes := plainEncodeInt64s([]int64{i64s[i]}) | ||||
| 		byteCount := (bitWidth + 7) / 8 | ||||
| 		data = append(data, valBytes[:byteCount]...) | ||||
| 	} | ||||
| 
 | ||||
| 	return data | ||||
| } | ||||
| 
 | ||||
| // RLEBitPackedHybridEncode encodes values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#run-length-encoding--bit-packing-hybrid-rle--3
 | ||||
| //
 | ||||
| // Supported Types: INT32, INT64
 | ||||
| func RLEBitPackedHybridEncode(values interface{}, bitWidth int32, parquetType parquet.Type) []byte { | ||||
| 	var rleBytes []byte | ||||
| 
 | ||||
| 	switch parquetType { | ||||
| 	case parquet.Type_INT32: | ||||
| 		i32s, ok := values.([]int32) | ||||
| 		if !ok { | ||||
| 			panic(fmt.Errorf("expected slice of int32")) | ||||
| 		} | ||||
| 		rleBytes = rleEncodeInt32s(i32s, bitWidth) | ||||
| 	case parquet.Type_INT64: | ||||
| 		i64s, ok := values.([]int64) | ||||
| 		if !ok { | ||||
| 			panic(fmt.Errorf("expected slice of int64")) | ||||
| 		} | ||||
| 		rleBytes = rleEncodeInt64s(i64s, bitWidth) | ||||
| 	default: | ||||
| 		panic(fmt.Errorf("%v parquet type unsupported", parquetType)) | ||||
| 	} | ||||
| 
 | ||||
| 	lenBytes := plainEncodeInt32s([]int32{int32(len(rleBytes))}) | ||||
| 	return append(lenBytes, rleBytes...) | ||||
| } | ||||
|  | @ -1,45 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package encoding | ||||
| 
 | ||||
| import ( | ||||
| 	"reflect" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| func TestRLEEncodeInt32s(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		values         []int32 | ||||
| 		bitWidth       int32 | ||||
| 		dataType       parquet.Type | ||||
| 		expectedResult []byte | ||||
| 	}{ | ||||
| 		{[]int32{3, 5, 7}, 1, parquet.Type_INT32, []byte{2, 3, 2, 5, 2, 7}}, | ||||
| 		{[]int32{3, 3, 3}, 1, parquet.Type_INT32, []byte{6, 3}}, | ||||
| 		{[]int32{2, 2, 3, 3, 3}, 1, parquet.Type_INT32, []byte{4, 2, 6, 3}}, | ||||
| 	} | ||||
| 
 | ||||
| 	for i, testCase := range testCases { | ||||
| 		result := rleEncodeInt32s(testCase.values, testCase.bitWidth) | ||||
| 		if !reflect.DeepEqual(result, testCase.expectedResult) { | ||||
| 			t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,61 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package encoding | ||||
| 
 | ||||
| import ( | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/common" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| // RLEDictEncode encodes values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#dictionary-encoding-plain_dictionary--2-and-rle_dictionary--8 and returns dictionary page data and data page data.
 | ||||
| //
 | ||||
| // Dictionary page data contains PLAIN encodeed slice of uniquely fully defined non-nil values.
 | ||||
| // Data page data contains RLE/Bit-Packed Hybrid encoded indices of fully defined non-nil values.
 | ||||
| //
 | ||||
| // Supported Types: BOOLEAN, INT32, INT64, FLOAT, DOUBLE, BYTE_ARRAY
 | ||||
| func RLEDictEncode(values []interface{}, parquetType parquet.Type, bitWidth int32) (dictPageData, dataPageData []byte, dictValueCount int32, indexBitWidth uint8) { | ||||
| 	var definedValues []interface{} | ||||
| 	var indices []int32 | ||||
| 
 | ||||
| 	valueIndexMap := make(map[interface{}]int32) | ||||
| 	j := 0 | ||||
| 	for i := 0; i < len(values); i = j { | ||||
| 		for j = i; j < len(values); j++ { | ||||
| 			value := values[j] | ||||
| 			if value == nil { | ||||
| 				continue | ||||
| 			} | ||||
| 
 | ||||
| 			index, found := valueIndexMap[value] | ||||
| 			if !found { | ||||
| 				index = int32(len(definedValues)) | ||||
| 				definedValues = append(definedValues, value) | ||||
| 				valueIndexMap[value] = index | ||||
| 			} | ||||
| 
 | ||||
| 			indices = append(indices, index) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	indexBitWidth = uint8(common.BitWidth(uint64(indices[len(indices)-1]))) | ||||
| 
 | ||||
| 	dictPageData = PlainEncode(common.ToSliceValue(definedValues, parquetType), parquetType) | ||||
| 	dataPageData = RLEBitPackedHybridEncode(indices, int32(indexBitWidth), parquet.Type_INT32) | ||||
| 
 | ||||
| 	return dictPageData, dataPageData, int32(len(definedValues)), indexBitWidth | ||||
| } | ||||
|  | @ -1,36 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/binary" | ||||
| ) | ||||
| 
 | ||||
| func uint32ToBytes(v uint32) []byte { | ||||
| 	buf := make([]byte, 4) | ||||
| 	binary.LittleEndian.PutUint32(buf, v) | ||||
| 	return buf | ||||
| } | ||||
| 
 | ||||
| func bytesToUint32(buf []byte) uint32 { | ||||
| 	return binary.LittleEndian.Uint32(buf) | ||||
| } | ||||
| 
 | ||||
| func bytesToUint64(buf []byte) uint64 { | ||||
| 	return binary.LittleEndian.Uint64(buf) | ||||
| } | ||||
										
											Binary file not shown.
										
									
								
							|  | @ -1,20 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| var GoUnusedProtection__ int | ||||
|  | @ -1,33 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 
 | ||||
| 	"git.apache.org/thrift.git/lib/go/thrift" | ||||
| ) | ||||
| 
 | ||||
| // (needed to ensure safety because of naive import list construction.)
 | ||||
| var _ = thrift.ZERO | ||||
| var _ = fmt.Printf | ||||
| var _ = bytes.Equal | ||||
| 
 | ||||
| func init() { | ||||
| } | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -1,10 +0,0 @@ | |||
| #!/bin/bash | ||||
| # | ||||
| # | ||||
| 
 | ||||
| set -e | ||||
| 
 | ||||
| rm -f parquet.thrift | ||||
| wget -q https://github.com/apache/parquet-format/raw/df6132b94f273521a418a74442085fdd5a0aa009/src/main/thrift/parquet.thrift | ||||
| thrift --gen go parquet.thrift | ||||
| gofmt -w -s gen-go/parquet | ||||
|  | @ -1,824 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"context" | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"math" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"git.apache.org/thrift.git/lib/go/thrift" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| // getBitWidth - returns bits required to place num e.g.
 | ||||
| //
 | ||||
| //    num | width
 | ||||
| //   -----|-------
 | ||||
| //     0  |   0
 | ||||
| //     1  |   1
 | ||||
| //     2  |   2
 | ||||
| //     3  |   2
 | ||||
| //     4  |   3
 | ||||
| //     5  |   3
 | ||||
| //    ... |  ...
 | ||||
| //    ... |  ...
 | ||||
| //
 | ||||
| func getBitWidth(num uint64) (width uint64) { | ||||
| 	for ; num != 0; num >>= 1 { | ||||
| 		width++ | ||||
| 	} | ||||
| 
 | ||||
| 	return width | ||||
| } | ||||
| 
 | ||||
| // getMaxDefLevel - get maximum definition level.
 | ||||
| func getMaxDefLevel(nameIndexMap map[string]int, schemaElements []*parquet.SchemaElement, path []string) (v int) { | ||||
| 	for i := 1; i <= len(path); i++ { | ||||
| 		name := strings.Join(path[:i], ".") | ||||
| 		if index, ok := nameIndexMap[name]; ok { | ||||
| 			if schemaElements[index].GetRepetitionType() != parquet.FieldRepetitionType_REQUIRED { | ||||
| 				v++ | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return v | ||||
| } | ||||
| 
 | ||||
| // getMaxRepLevel - get maximum repetition level.
 | ||||
| func getMaxRepLevel(nameIndexMap map[string]int, schemaElements []*parquet.SchemaElement, path []string) (v int) { | ||||
| 	for i := 1; i <= len(path); i++ { | ||||
| 		name := strings.Join(path[:i], ".") | ||||
| 		if index, ok := nameIndexMap[name]; ok { | ||||
| 			if schemaElements[index].GetRepetitionType() == parquet.FieldRepetitionType_REPEATED { | ||||
| 				v++ | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return v | ||||
| } | ||||
| 
 | ||||
| func readPageHeader(reader *thrift.TBufferedTransport) (*parquet.PageHeader, error) { | ||||
| 	pageHeader := parquet.NewPageHeader() | ||||
| 	if err := pageHeader.Read(thrift.NewTCompactProtocol(reader)); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	return pageHeader, nil | ||||
| } | ||||
| 
 | ||||
| func readPage( | ||||
| 	thriftReader *thrift.TBufferedTransport, | ||||
| 	metadata *parquet.ColumnMetaData, | ||||
| 	columnNameIndexMap map[string]int, | ||||
| 	schemaElements []*parquet.SchemaElement, | ||||
| ) (page *page, definitionLevels, numRows int64, err error) { | ||||
| 
 | ||||
| 	pageHeader, err := readPageHeader(thriftReader) | ||||
| 	if err != nil { | ||||
| 		return nil, 0, 0, err | ||||
| 	} | ||||
| 
 | ||||
| 	read := func() (data []byte, err error) { | ||||
| 		var repLevelsLen, defLevelsLen int32 | ||||
| 		var repLevelsBuf, defLevelsBuf []byte | ||||
| 
 | ||||
| 		if pageHeader.GetType() == parquet.PageType_DATA_PAGE_V2 { | ||||
| 			if pageHeader.DataPageHeaderV2 == nil { | ||||
| 				return nil, errors.New("parquet: Header not set") | ||||
| 			} | ||||
| 			repLevelsLen = pageHeader.DataPageHeaderV2.GetRepetitionLevelsByteLength() | ||||
| 			repLevelsBuf = make([]byte, repLevelsLen) | ||||
| 
 | ||||
| 			n, err := io.ReadFull(thriftReader, repLevelsBuf) | ||||
| 			if err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 			if n != int(repLevelsLen) { | ||||
| 				return nil, fmt.Errorf("expected parquet header repetition levels %d, got %d", repLevelsLen, n) | ||||
| 			} | ||||
| 
 | ||||
| 			defLevelsLen = pageHeader.DataPageHeaderV2.GetDefinitionLevelsByteLength() | ||||
| 			defLevelsBuf = make([]byte, defLevelsLen) | ||||
| 
 | ||||
| 			n, err = io.ReadFull(thriftReader, defLevelsBuf) | ||||
| 			if err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 			if n != int(defLevelsLen) { | ||||
| 				return nil, fmt.Errorf("expected parquet header definition levels %d, got %d", defLevelsLen, n) | ||||
| 			} | ||||
| 		} | ||||
| 		dbLen := pageHeader.GetCompressedPageSize() - repLevelsLen - defLevelsLen | ||||
| 		if dbLen < 0 { | ||||
| 			return nil, errors.New("parquet: negative data length") | ||||
| 		} | ||||
| 
 | ||||
| 		dataBuf := make([]byte, dbLen) | ||||
| 		n, err := io.ReadFull(thriftReader, dataBuf) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		if n != int(dbLen) { | ||||
| 			return nil, fmt.Errorf("expected parquet data buffer %d, got %d", dbLen, n) | ||||
| 		} | ||||
| 
 | ||||
| 		if dataBuf, err = compressionCodec(metadata.GetCodec()).uncompress(dataBuf); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		if repLevelsLen == 0 && defLevelsLen == 0 { | ||||
| 			return dataBuf, nil | ||||
| 		} | ||||
| 
 | ||||
| 		if repLevelsLen > 0 { | ||||
| 			data = append(data, uint32ToBytes(uint32(repLevelsLen))...) | ||||
| 			data = append(data, repLevelsBuf...) | ||||
| 		} | ||||
| 
 | ||||
| 		if defLevelsLen > 0 { | ||||
| 			data = append(data, uint32ToBytes(uint32(defLevelsLen))...) | ||||
| 			data = append(data, defLevelsBuf...) | ||||
| 		} | ||||
| 
 | ||||
| 		data = append(data, dataBuf...) | ||||
| 
 | ||||
| 		return data, nil | ||||
| 	} | ||||
| 
 | ||||
| 	buf, err := read() | ||||
| 	if err != nil { | ||||
| 		return nil, 0, 0, err | ||||
| 	} | ||||
| 	if metadata == nil { | ||||
| 		return nil, 0, 0, errors.New("parquet: metadata not set") | ||||
| 	} | ||||
| 	path := append([]string{}, metadata.GetPathInSchema()...) | ||||
| 
 | ||||
| 	bytesReader := bytes.NewReader(buf) | ||||
| 	pageType := pageHeader.GetType() | ||||
| 	switch pageType { | ||||
| 	case parquet.PageType_INDEX_PAGE: | ||||
| 		return nil, 0, 0, fmt.Errorf("page type %v is not supported", parquet.PageType_INDEX_PAGE) | ||||
| 
 | ||||
| 	case parquet.PageType_DICTIONARY_PAGE: | ||||
| 		page = newDictPage() | ||||
| 		page.Header = pageHeader | ||||
| 		table := new(table) | ||||
| 		table.Path = path | ||||
| 		if pageHeader.DictionaryPageHeader == nil { | ||||
| 			return nil, 0, 0, errors.New("parquet: dictionary not set") | ||||
| 		} | ||||
| 		values, err := readValues(bytesReader, metadata.GetType(), | ||||
| 			uint64(pageHeader.DictionaryPageHeader.GetNumValues()), 0) | ||||
| 		if err != nil { | ||||
| 			return nil, 0, 0, err | ||||
| 		} | ||||
| 		table.Values = getTableValues(values, metadata.GetType()) | ||||
| 		page.DataTable = table | ||||
| 
 | ||||
| 		return page, 0, 0, nil | ||||
| 
 | ||||
| 	case parquet.PageType_DATA_PAGE, parquet.PageType_DATA_PAGE_V2: | ||||
| 		name := strings.Join(path, ".") | ||||
| 
 | ||||
| 		page = newDataPage() | ||||
| 		page.Header = pageHeader | ||||
| 
 | ||||
| 		maxDefinitionLevel := getMaxDefLevel(columnNameIndexMap, schemaElements, path) | ||||
| 		maxRepetitionLevel := getMaxRepLevel(columnNameIndexMap, schemaElements, path) | ||||
| 
 | ||||
| 		var numValues uint64 | ||||
| 		var encodingType parquet.Encoding | ||||
| 
 | ||||
| 		if pageHeader.GetType() == parquet.PageType_DATA_PAGE { | ||||
| 			if pageHeader.DataPageHeader == nil { | ||||
| 				return nil, 0, 0, errors.New("parquet: Header not set") | ||||
| 			} | ||||
| 			numValues = uint64(pageHeader.DataPageHeader.GetNumValues()) | ||||
| 			encodingType = pageHeader.DataPageHeader.GetEncoding() | ||||
| 		} else { | ||||
| 			if pageHeader.DataPageHeaderV2 == nil { | ||||
| 				return nil, 0, 0, errors.New("parquet: Header not set") | ||||
| 			} | ||||
| 			numValues = uint64(pageHeader.DataPageHeaderV2.GetNumValues()) | ||||
| 			encodingType = pageHeader.DataPageHeaderV2.GetEncoding() | ||||
| 		} | ||||
| 
 | ||||
| 		var repetitionLevels []int64 | ||||
| 		if maxRepetitionLevel > 0 { | ||||
| 			values, _, err := readDataPageValues(bytesReader, parquet.Encoding_RLE, parquet.Type_INT64, | ||||
| 				-1, numValues, getBitWidth(uint64(maxRepetitionLevel))) | ||||
| 			if err != nil { | ||||
| 				return nil, 0, 0, err | ||||
| 			} | ||||
| 
 | ||||
| 			if repetitionLevels = values.([]int64); len(repetitionLevels) > int(numValues) && int(numValues) >= 0 { | ||||
| 				repetitionLevels = repetitionLevels[:numValues] | ||||
| 			} | ||||
| 		} else { | ||||
| 			if numValues > math.MaxInt64/8 { | ||||
| 				return nil, 0, 0, errors.New("parquet: numvalues too large") | ||||
| 			} | ||||
| 			repetitionLevels = make([]int64, numValues) | ||||
| 		} | ||||
| 
 | ||||
| 		var definitionLevels []int64 | ||||
| 		if maxDefinitionLevel > 0 { | ||||
| 			values, _, err := readDataPageValues(bytesReader, parquet.Encoding_RLE, parquet.Type_INT64, | ||||
| 				-1, numValues, getBitWidth(uint64(maxDefinitionLevel))) | ||||
| 			if err != nil { | ||||
| 				return nil, 0, 0, err | ||||
| 			} | ||||
| 			if numValues > math.MaxInt64/8 { | ||||
| 				return nil, 0, 0, errors.New("parquet: numvalues too large") | ||||
| 			} | ||||
| 			if definitionLevels = values.([]int64); len(definitionLevels) > int(numValues) { | ||||
| 				definitionLevels = definitionLevels[:numValues] | ||||
| 			} | ||||
| 		} else { | ||||
| 			if numValues > math.MaxInt64/8 { | ||||
| 				return nil, 0, 0, errors.New("parquet: numvalues too large") | ||||
| 			} | ||||
| 			definitionLevels = make([]int64, numValues) | ||||
| 		} | ||||
| 
 | ||||
| 		var numNulls uint64 | ||||
| 		for i := 0; i < len(definitionLevels); i++ { | ||||
| 			if definitionLevels[i] != int64(maxDefinitionLevel) { | ||||
| 				numNulls++ | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		var convertedType parquet.ConvertedType = -1 | ||||
| 		if schemaElements[columnNameIndexMap[name]].IsSetConvertedType() { | ||||
| 			convertedType = schemaElements[columnNameIndexMap[name]].GetConvertedType() | ||||
| 		} | ||||
| 		values, valueType, err := readDataPageValues(bytesReader, encodingType, metadata.GetType(), | ||||
| 			convertedType, uint64(len(definitionLevels))-numNulls, | ||||
| 			uint64(schemaElements[columnNameIndexMap[name]].GetTypeLength())) | ||||
| 		if err != nil { | ||||
| 			return nil, 0, 0, err | ||||
| 		} | ||||
| 		tableValues := getTableValues(values, valueType) | ||||
| 
 | ||||
| 		table := new(table) | ||||
| 		table.Path = path | ||||
| 		table.RepetitionType = schemaElements[columnNameIndexMap[name]].GetRepetitionType() | ||||
| 		table.MaxRepetitionLevel = int32(maxRepetitionLevel) | ||||
| 		table.MaxDefinitionLevel = int32(maxDefinitionLevel) | ||||
| 		table.Values = make([]interface{}, len(definitionLevels)) | ||||
| 		table.RepetitionLevels = make([]int32, len(definitionLevels)) | ||||
| 		table.DefinitionLevels = make([]int32, len(definitionLevels)) | ||||
| 
 | ||||
| 		j := 0 | ||||
| 		numRows := int64(0) | ||||
| 		for i := 0; i < len(definitionLevels); i++ { | ||||
| 			table.RepetitionLevels[i] = int32(repetitionLevels[i]) | ||||
| 			table.DefinitionLevels[i] = int32(definitionLevels[i]) | ||||
| 			if int(table.DefinitionLevels[i]) == maxDefinitionLevel { | ||||
| 				table.Values[i] = tableValues[j] | ||||
| 				j++ | ||||
| 			} | ||||
| 			if table.RepetitionLevels[i] == 0 { | ||||
| 				numRows++ | ||||
| 			} | ||||
| 		} | ||||
| 		page.DataTable = table | ||||
| 
 | ||||
| 		return page, int64(len(definitionLevels)), numRows, nil | ||||
| 	} | ||||
| 
 | ||||
| 	return nil, 0, 0, fmt.Errorf("unknown page type %v", pageType) | ||||
| } | ||||
| 
 | ||||
| type page struct { | ||||
| 	Header       *parquet.PageHeader      // Header of a page
 | ||||
| 	DataTable    *table                   // Table to store values
 | ||||
| 	RawData      []byte                   // Compressed data of the page, which is written in parquet file
 | ||||
| 	CompressType parquet.CompressionCodec // Compress type: gzip/snappy/none
 | ||||
| 	DataType     parquet.Type             // Parquet type of the values in the page
 | ||||
| 	Path         []string                 // Path in schema(include the root)
 | ||||
| 	MaxVal       interface{}              // Maximum of the values
 | ||||
| 	MinVal       interface{}              // Minimum of the values
 | ||||
| 	PageSize     int32 | ||||
| } | ||||
| 
 | ||||
| func newPage() *page { | ||||
| 	return &page{ | ||||
| 		Header:   parquet.NewPageHeader(), | ||||
| 		PageSize: defaultPageSize, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func newDictPage() *page { | ||||
| 	page := newPage() | ||||
| 	page.Header.DictionaryPageHeader = parquet.NewDictionaryPageHeader() | ||||
| 	return page | ||||
| } | ||||
| 
 | ||||
| func newDataPage() *page { | ||||
| 	page := newPage() | ||||
| 	page.Header.DataPageHeader = parquet.NewDataPageHeader() | ||||
| 	return page | ||||
| } | ||||
| 
 | ||||
| func (page *page) decode(dictPage *page) { | ||||
| 	if dictPage == nil || page == nil || page.Header.DataPageHeader == nil || | ||||
| 		(page.Header.DataPageHeader.Encoding != parquet.Encoding_RLE_DICTIONARY && | ||||
| 			page.Header.DataPageHeader.Encoding != parquet.Encoding_PLAIN_DICTIONARY) { | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	for i := 0; i < len(page.DataTable.Values); i++ { | ||||
| 		if page.DataTable.Values[i] != nil { | ||||
| 			index, ok := page.DataTable.Values[i].(int64) | ||||
| 			if !ok || int(index) >= len(dictPage.DataTable.Values) { | ||||
| 				return | ||||
| 			} | ||||
| 			page.DataTable.Values[i] = dictPage.DataTable.Values[index] | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Get RepetitionLevels and Definitions from RawData
 | ||||
| func (page *page) getRLDLFromRawData(columnNameIndexMap map[string]int, schemaElements []*parquet.SchemaElement) (numValues int64, numRows int64, err error) { | ||||
| 	bytesReader := bytes.NewReader(page.RawData) | ||||
| 
 | ||||
| 	pageType := page.Header.GetType() | ||||
| 
 | ||||
| 	var buf []byte | ||||
| 	if pageType == parquet.PageType_DATA_PAGE_V2 { | ||||
| 		var repLevelsLen, defLevelsLen int32 | ||||
| 		var repLevelsBuf, defLevelsBuf []byte | ||||
| 		if page.Header.DataPageHeaderV2 == nil { | ||||
| 			return 0, 0, errors.New("parquet: Header not set") | ||||
| 		} | ||||
| 		repLevelsLen = page.Header.DataPageHeaderV2.GetRepetitionLevelsByteLength() | ||||
| 		repLevelsBuf = make([]byte, repLevelsLen) | ||||
| 		if _, err = bytesReader.Read(repLevelsBuf); err != nil { | ||||
| 			return 0, 0, err | ||||
| 		} | ||||
| 
 | ||||
| 		defLevelsLen = page.Header.DataPageHeaderV2.GetDefinitionLevelsByteLength() | ||||
| 		defLevelsBuf = make([]byte, defLevelsLen) | ||||
| 		if _, err = bytesReader.Read(defLevelsBuf); err != nil { | ||||
| 			return 0, 0, err | ||||
| 		} | ||||
| 
 | ||||
| 		dataBuf := make([]byte, len(page.RawData)-int(repLevelsLen)-int(defLevelsLen)) | ||||
| 		if _, err = bytesReader.Read(dataBuf); err != nil { | ||||
| 			return 0, 0, err | ||||
| 		} | ||||
| 
 | ||||
| 		if repLevelsLen == 0 && defLevelsLen == 0 { | ||||
| 			buf = dataBuf | ||||
| 		} else { | ||||
| 			if repLevelsLen > 0 { | ||||
| 				buf = append(buf, uint32ToBytes(uint32(repLevelsLen))...) | ||||
| 				buf = append(buf, repLevelsBuf...) | ||||
| 			} | ||||
| 
 | ||||
| 			if defLevelsLen > 0 { | ||||
| 				buf = append(buf, uint32ToBytes(uint32(defLevelsLen))...) | ||||
| 				buf = append(buf, defLevelsBuf...) | ||||
| 			} | ||||
| 
 | ||||
| 			buf = append(buf, dataBuf...) | ||||
| 		} | ||||
| 	} else { | ||||
| 		if buf, err = compressionCodec(page.CompressType).uncompress(page.RawData); err != nil { | ||||
| 			return 0, 0, err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	bytesReader = bytes.NewReader(buf) | ||||
| 
 | ||||
| 	switch pageType { | ||||
| 	case parquet.PageType_DICTIONARY_PAGE: | ||||
| 		table := new(table) | ||||
| 		table.Path = page.Path | ||||
| 		page.DataTable = table | ||||
| 		return 0, 0, nil | ||||
| 
 | ||||
| 	case parquet.PageType_DATA_PAGE, parquet.PageType_DATA_PAGE_V2: | ||||
| 		var numValues uint64 | ||||
| 		if pageType == parquet.PageType_DATA_PAGE { | ||||
| 			if page.Header.DataPageHeader == nil { | ||||
| 				return 0, 0, errors.New("parquet: Header not set") | ||||
| 			} | ||||
| 			numValues = uint64(page.Header.DataPageHeader.GetNumValues()) | ||||
| 		} else { | ||||
| 			if page.Header.DataPageHeaderV2 == nil { | ||||
| 				return 0, 0, errors.New("parquet: Header not set") | ||||
| 			} | ||||
| 			numValues = uint64(page.Header.DataPageHeaderV2.GetNumValues()) | ||||
| 		} | ||||
| 
 | ||||
| 		maxDefinitionLevel := getMaxDefLevel(columnNameIndexMap, schemaElements, page.Path) | ||||
| 		maxRepetitionLevel := getMaxRepLevel(columnNameIndexMap, schemaElements, page.Path) | ||||
| 
 | ||||
| 		var repetitionLevels []int64 | ||||
| 		if maxRepetitionLevel > 0 { | ||||
| 			values, _, err := readDataPageValues(bytesReader, parquet.Encoding_RLE, parquet.Type_INT64, | ||||
| 				-1, numValues, getBitWidth(uint64(maxRepetitionLevel))) | ||||
| 			if err != nil { | ||||
| 				return 0, 0, err | ||||
| 			} | ||||
| 
 | ||||
| 			if repetitionLevels = values.([]int64); uint64(len(repetitionLevels)) > numValues { | ||||
| 				repetitionLevels = repetitionLevels[:numValues] | ||||
| 			} | ||||
| 		} else { | ||||
| 			repetitionLevels = make([]int64, numValues) | ||||
| 		} | ||||
| 
 | ||||
| 		var definitionLevels []int64 | ||||
| 		if maxDefinitionLevel > 0 { | ||||
| 			values, _, err := readDataPageValues(bytesReader, parquet.Encoding_RLE, parquet.Type_INT64, | ||||
| 				-1, numValues, getBitWidth(uint64(maxDefinitionLevel))) | ||||
| 			if err != nil { | ||||
| 				return 0, 0, err | ||||
| 			} | ||||
| 			if definitionLevels = values.([]int64); uint64(len(definitionLevels)) > numValues { | ||||
| 				definitionLevels = definitionLevels[:numValues] | ||||
| 			} | ||||
| 		} else { | ||||
| 			definitionLevels = make([]int64, numValues) | ||||
| 		} | ||||
| 
 | ||||
| 		table := new(table) | ||||
| 		table.Path = page.Path | ||||
| 		name := strings.Join(page.Path, ".") | ||||
| 		table.RepetitionType = schemaElements[columnNameIndexMap[name]].GetRepetitionType() | ||||
| 		table.MaxRepetitionLevel = int32(maxRepetitionLevel) | ||||
| 		table.MaxDefinitionLevel = int32(maxDefinitionLevel) | ||||
| 		table.Values = make([]interface{}, len(definitionLevels)) | ||||
| 		table.RepetitionLevels = make([]int32, len(definitionLevels)) | ||||
| 		table.DefinitionLevels = make([]int32, len(definitionLevels)) | ||||
| 
 | ||||
| 		numRows := int64(0) | ||||
| 		for i := 0; i < len(definitionLevels); i++ { | ||||
| 			table.RepetitionLevels[i] = int32(repetitionLevels[i]) | ||||
| 			table.DefinitionLevels[i] = int32(definitionLevels[i]) | ||||
| 			if table.RepetitionLevels[i] == 0 { | ||||
| 				numRows++ | ||||
| 			} | ||||
| 		} | ||||
| 		page.DataTable = table | ||||
| 		page.RawData = buf[len(buf)-bytesReader.Len():] | ||||
| 
 | ||||
| 		return int64(numValues), numRows, nil | ||||
| 	} | ||||
| 
 | ||||
| 	return 0, 0, fmt.Errorf("Unsupported page type %v", pageType) | ||||
| } | ||||
| 
 | ||||
| func (page *page) getValueFromRawData(columnNameIndexMap map[string]int, schemaElements []*parquet.SchemaElement) (err error) { | ||||
| 	pageType := page.Header.GetType() | ||||
| 	switch pageType { | ||||
| 	case parquet.PageType_DICTIONARY_PAGE: | ||||
| 		bytesReader := bytes.NewReader(page.RawData) | ||||
| 		var values interface{} | ||||
| 		if page.Header.DictionaryPageHeader == nil { | ||||
| 			return errors.New("parquet: dictionary not set") | ||||
| 		} | ||||
| 		values, err = readValues(bytesReader, page.DataType, | ||||
| 			uint64(page.Header.DictionaryPageHeader.GetNumValues()), 0) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 
 | ||||
| 		page.DataTable.Values = getTableValues(values, page.DataType) | ||||
| 		return nil | ||||
| 
 | ||||
| 	case parquet.PageType_DATA_PAGE_V2: | ||||
| 		if page.RawData, err = compressionCodec(page.CompressType).uncompress(page.RawData); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		fallthrough | ||||
| 	case parquet.PageType_DATA_PAGE: | ||||
| 		encodingType := page.Header.DataPageHeader.GetEncoding() | ||||
| 		bytesReader := bytes.NewReader(page.RawData) | ||||
| 
 | ||||
| 		var numNulls uint64 | ||||
| 		for i := 0; i < len(page.DataTable.DefinitionLevels); i++ { | ||||
| 			if page.DataTable.DefinitionLevels[i] != page.DataTable.MaxDefinitionLevel { | ||||
| 				numNulls++ | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		name := strings.Join(page.DataTable.Path, ".") | ||||
| 		var convertedType parquet.ConvertedType = -1 | ||||
| 
 | ||||
| 		if schemaElements[columnNameIndexMap[name]].IsSetConvertedType() { | ||||
| 			convertedType = schemaElements[columnNameIndexMap[name]].GetConvertedType() | ||||
| 		} | ||||
| 
 | ||||
| 		values, _, err := readDataPageValues(bytesReader, encodingType, page.DataType, | ||||
| 			convertedType, uint64(len(page.DataTable.DefinitionLevels))-numNulls, | ||||
| 			uint64(schemaElements[columnNameIndexMap[name]].GetTypeLength())) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 
 | ||||
| 		tableValues := getTableValues(values, page.DataType) | ||||
| 
 | ||||
| 		j := 0 | ||||
| 		for i := 0; i < len(page.DataTable.DefinitionLevels); i++ { | ||||
| 			if page.DataTable.DefinitionLevels[i] == page.DataTable.MaxDefinitionLevel { | ||||
| 				page.DataTable.Values[i] = tableValues[j] | ||||
| 				j++ | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		page.RawData = []byte{} | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	return fmt.Errorf("unsupported page type %v", pageType) | ||||
| } | ||||
| 
 | ||||
| func (page *page) toDataPage(compressType parquet.CompressionCodec) []byte { | ||||
| 	values := []interface{}{} | ||||
| 	for i := range page.DataTable.DefinitionLevels { | ||||
| 		if page.DataTable.DefinitionLevels[i] == page.DataTable.MaxDefinitionLevel { | ||||
| 			values = append(values, page.DataTable.Values[i]) | ||||
| 		} | ||||
| 	} | ||||
| 	valuesBytes := encodeValues(interfacesToValues(values, page.DataTable.Type), page.DataType, page.DataTable.Encoding, page.DataTable.BitWidth) | ||||
| 
 | ||||
| 	var defLevelBytes []byte | ||||
| 	if page.DataTable.MaxDefinitionLevel > 0 { | ||||
| 		defLevels := make([]int64, len(page.DataTable.DefinitionLevels)) | ||||
| 		for i := range page.DataTable.DefinitionLevels { | ||||
| 			defLevels[i] = int64(page.DataTable.DefinitionLevels[i]) | ||||
| 		} | ||||
| 		defLevelBytes = valuesToRLEBitPackedHybridBytes( | ||||
| 			defLevels, | ||||
| 			int32(getBitWidth(uint64(page.DataTable.MaxDefinitionLevel))), | ||||
| 			parquet.Type_INT64, | ||||
| 		) | ||||
| 	} | ||||
| 
 | ||||
| 	var repLevelBytes []byte | ||||
| 	if page.DataTable.MaxRepetitionLevel > 0 { | ||||
| 		repLevels := make([]int64, len(page.DataTable.DefinitionLevels)) | ||||
| 		for i := range page.DataTable.DefinitionLevels { | ||||
| 			repLevels[i] = int64(page.DataTable.RepetitionLevels[i]) | ||||
| 		} | ||||
| 		repLevelBytes = valuesToRLEBitPackedHybridBytes( | ||||
| 			repLevels, | ||||
| 			int32(getBitWidth(uint64(page.DataTable.MaxRepetitionLevel))), | ||||
| 			parquet.Type_INT64, | ||||
| 		) | ||||
| 	} | ||||
| 
 | ||||
| 	data := repLevelBytes | ||||
| 	data = append(data, defLevelBytes...) | ||||
| 	data = append(data, valuesBytes...) | ||||
| 
 | ||||
| 	compressedData, err := compressionCodec(compressType).compress(data) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 
 | ||||
| 	page.Header = parquet.NewPageHeader() | ||||
| 	page.Header.Type = parquet.PageType_DATA_PAGE | ||||
| 	page.Header.CompressedPageSize = int32(len(compressedData)) | ||||
| 	page.Header.UncompressedPageSize = int32(len(data)) | ||||
| 	page.Header.DataPageHeader = parquet.NewDataPageHeader() | ||||
| 	page.Header.DataPageHeader.NumValues = int32(len(page.DataTable.DefinitionLevels)) | ||||
| 	page.Header.DataPageHeader.DefinitionLevelEncoding = parquet.Encoding_RLE | ||||
| 	page.Header.DataPageHeader.RepetitionLevelEncoding = parquet.Encoding_RLE | ||||
| 	page.Header.DataPageHeader.Encoding = page.DataTable.Encoding | ||||
| 	page.Header.DataPageHeader.Statistics = parquet.NewStatistics() | ||||
| 	if page.MaxVal != nil { | ||||
| 		tmpBuf := valueToBytes(page.MaxVal, page.DataType) | ||||
| 		if page.DataType == parquet.Type_BYTE_ARRAY { | ||||
| 			switch page.DataTable.ConvertedType { | ||||
| 			case parquet.ConvertedType_UTF8, parquet.ConvertedType_DECIMAL: | ||||
| 				tmpBuf = tmpBuf[4:] | ||||
| 			} | ||||
| 		} | ||||
| 		page.Header.DataPageHeader.Statistics.Max = tmpBuf | ||||
| 	} | ||||
| 	if page.MinVal != nil { | ||||
| 		tmpBuf := valueToBytes(page.MinVal, page.DataType) | ||||
| 		if page.DataType == parquet.Type_BYTE_ARRAY { | ||||
| 			switch page.DataTable.ConvertedType { | ||||
| 			case parquet.ConvertedType_UTF8, parquet.ConvertedType_DECIMAL: | ||||
| 				tmpBuf = tmpBuf[4:] | ||||
| 			} | ||||
| 		} | ||||
| 		page.Header.DataPageHeader.Statistics.Min = tmpBuf | ||||
| 	} | ||||
| 
 | ||||
| 	ts := thrift.NewTSerializer() | ||||
| 	ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) | ||||
| 	pageHeaderBytes, err := ts.Write(context.TODO(), page.Header) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 
 | ||||
| 	page.RawData = append(pageHeaderBytes, compressedData...) | ||||
| 	return page.RawData | ||||
| } | ||||
| 
 | ||||
| func (page *page) toDataPageV2(compressType parquet.CompressionCodec) []byte { | ||||
| 	values := []interface{}{} | ||||
| 	for i := range page.DataTable.DefinitionLevels { | ||||
| 		if page.DataTable.DefinitionLevels[i] == page.DataTable.MaxDefinitionLevel { | ||||
| 			values = append(values, page.DataTable.Values[i]) | ||||
| 		} | ||||
| 	} | ||||
| 	valuesBytes := encodeValues(values, page.DataType, page.DataTable.Encoding, page.DataTable.BitWidth) | ||||
| 
 | ||||
| 	var defLevelBytes []byte | ||||
| 	if page.DataTable.MaxDefinitionLevel > 0 { | ||||
| 		defLevels := make([]int64, len(page.DataTable.DefinitionLevels)) | ||||
| 		for i := range page.DataTable.DefinitionLevels { | ||||
| 			defLevels[i] = int64(page.DataTable.DefinitionLevels[i]) | ||||
| 		} | ||||
| 		defLevelBytes = valuesToRLEBytes( | ||||
| 			defLevels, | ||||
| 			int32(getBitWidth(uint64(page.DataTable.MaxDefinitionLevel))), | ||||
| 			parquet.Type_INT64, | ||||
| 		) | ||||
| 	} | ||||
| 
 | ||||
| 	var repLevelBytes []byte | ||||
| 	numRows := int32(0) | ||||
| 	if page.DataTable.MaxRepetitionLevel > 0 { | ||||
| 		repLevels := make([]int64, len(page.DataTable.DefinitionLevels)) | ||||
| 		for i := range page.DataTable.DefinitionLevels { | ||||
| 			repLevels[i] = int64(page.DataTable.RepetitionLevels[i]) | ||||
| 			if page.DataTable.RepetitionLevels[i] == 0 { | ||||
| 				numRows++ | ||||
| 			} | ||||
| 		} | ||||
| 		repLevelBytes = valuesToRLEBytes( | ||||
| 			repLevels, | ||||
| 			int32(getBitWidth(uint64(page.DataTable.MaxRepetitionLevel))), | ||||
| 			parquet.Type_INT64, | ||||
| 		) | ||||
| 	} | ||||
| 
 | ||||
| 	compressedData, err := compressionCodec(compressType).compress(valuesBytes) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 
 | ||||
| 	page.Header = parquet.NewPageHeader() | ||||
| 	page.Header.Type = parquet.PageType_DATA_PAGE_V2 | ||||
| 	page.Header.CompressedPageSize = int32(len(compressedData) + len(defLevelBytes) + len(repLevelBytes)) | ||||
| 	page.Header.UncompressedPageSize = int32(len(valuesBytes) + len(defLevelBytes) + len(repLevelBytes)) | ||||
| 	page.Header.DataPageHeaderV2 = parquet.NewDataPageHeaderV2() | ||||
| 	page.Header.DataPageHeaderV2.NumValues = int32(len(page.DataTable.Values)) | ||||
| 	page.Header.DataPageHeaderV2.NumNulls = page.Header.DataPageHeaderV2.NumValues - int32(len(values)) | ||||
| 	page.Header.DataPageHeaderV2.NumRows = numRows | ||||
| 	page.Header.DataPageHeaderV2.Encoding = page.DataTable.Encoding | ||||
| 	page.Header.DataPageHeaderV2.DefinitionLevelsByteLength = int32(len(defLevelBytes)) | ||||
| 	page.Header.DataPageHeaderV2.RepetitionLevelsByteLength = int32(len(repLevelBytes)) | ||||
| 	page.Header.DataPageHeaderV2.IsCompressed = true | ||||
| 
 | ||||
| 	page.Header.DataPageHeaderV2.Statistics = parquet.NewStatistics() | ||||
| 	if page.MaxVal != nil { | ||||
| 		tmpBuf := valueToBytes(page.MaxVal, page.DataType) | ||||
| 		if page.DataType == parquet.Type_BYTE_ARRAY { | ||||
| 			switch page.DataTable.ConvertedType { | ||||
| 			case parquet.ConvertedType_UTF8, parquet.ConvertedType_DECIMAL: | ||||
| 				tmpBuf = tmpBuf[4:] | ||||
| 			} | ||||
| 		} | ||||
| 		page.Header.DataPageHeaderV2.Statistics.Max = tmpBuf | ||||
| 	} | ||||
| 	if page.MinVal != nil { | ||||
| 		tmpBuf := valueToBytes(page.MinVal, page.DataType) | ||||
| 		if page.DataType == parquet.Type_BYTE_ARRAY { | ||||
| 			switch page.DataTable.ConvertedType { | ||||
| 			case parquet.ConvertedType_UTF8, parquet.ConvertedType_DECIMAL: | ||||
| 				tmpBuf = tmpBuf[4:] | ||||
| 			} | ||||
| 		} | ||||
| 		page.Header.DataPageHeaderV2.Statistics.Min = tmpBuf | ||||
| 	} | ||||
| 
 | ||||
| 	ts := thrift.NewTSerializer() | ||||
| 	ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) | ||||
| 	pageHeaderBytes, err := ts.Write(context.TODO(), page.Header) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 
 | ||||
| 	page.RawData = append(pageHeaderBytes, repLevelBytes...) | ||||
| 	page.RawData = append(page.RawData, defLevelBytes...) | ||||
| 	page.RawData = append(page.RawData, compressedData...) | ||||
| 
 | ||||
| 	return page.RawData | ||||
| } | ||||
| 
 | ||||
| func (page *page) toDictPage(compressType parquet.CompressionCodec, dataType parquet.Type) []byte { | ||||
| 	valuesBytes := valuesToBytes(page.DataTable.Values, dataType) | ||||
| 	compressedData, err := compressionCodec(compressType).compress(valuesBytes) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 
 | ||||
| 	page.Header = parquet.NewPageHeader() | ||||
| 	page.Header.Type = parquet.PageType_DICTIONARY_PAGE | ||||
| 	page.Header.CompressedPageSize = int32(len(compressedData)) | ||||
| 	page.Header.UncompressedPageSize = int32(len(valuesBytes)) | ||||
| 	page.Header.DictionaryPageHeader = parquet.NewDictionaryPageHeader() | ||||
| 	page.Header.DictionaryPageHeader.NumValues = int32(len(page.DataTable.Values)) | ||||
| 	page.Header.DictionaryPageHeader.Encoding = parquet.Encoding_PLAIN | ||||
| 
 | ||||
| 	ts := thrift.NewTSerializer() | ||||
| 	ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) | ||||
| 	pageHeaderBytes, err := ts.Write(context.TODO(), page.Header) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 
 | ||||
| 	page.RawData = append(pageHeaderBytes, compressedData...) | ||||
| 	return page.RawData | ||||
| } | ||||
| 
 | ||||
| func (page *page) toDictDataPage(compressType parquet.CompressionCodec, bitWidth int32) []byte { | ||||
| 	valuesBytes := append([]byte{byte(bitWidth)}, valuesToRLEBytes(page.DataTable.Values, bitWidth, parquet.Type_INT32)...) | ||||
| 
 | ||||
| 	var defLevelBytes []byte | ||||
| 	if page.DataTable.MaxDefinitionLevel > 0 { | ||||
| 		defLevels := make([]int64, len(page.DataTable.DefinitionLevels)) | ||||
| 		for i := range page.DataTable.DefinitionLevels { | ||||
| 			defLevels[i] = int64(page.DataTable.DefinitionLevels[i]) | ||||
| 		} | ||||
| 		defLevelBytes = valuesToRLEBitPackedHybridBytes( | ||||
| 			defLevels, | ||||
| 			int32(getBitWidth(uint64(page.DataTable.MaxDefinitionLevel))), | ||||
| 			parquet.Type_INT64, | ||||
| 		) | ||||
| 	} | ||||
| 
 | ||||
| 	var repLevelBytes []byte | ||||
| 	if page.DataTable.MaxRepetitionLevel > 0 { | ||||
| 		repLevels := make([]int64, len(page.DataTable.DefinitionLevels)) | ||||
| 		for i := range page.DataTable.DefinitionLevels { | ||||
| 			repLevels[i] = int64(page.DataTable.RepetitionLevels[i]) | ||||
| 		} | ||||
| 		repLevelBytes = valuesToRLEBitPackedHybridBytes( | ||||
| 			repLevels, | ||||
| 			int32(getBitWidth(uint64(page.DataTable.MaxRepetitionLevel))), | ||||
| 			parquet.Type_INT64, | ||||
| 		) | ||||
| 	} | ||||
| 
 | ||||
| 	data := append(repLevelBytes, defLevelBytes...) | ||||
| 	data = append(data, valuesBytes...) | ||||
| 
 | ||||
| 	compressedData, err := compressionCodec(compressType).compress(data) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 
 | ||||
| 	page.Header = parquet.NewPageHeader() | ||||
| 	page.Header.Type = parquet.PageType_DATA_PAGE | ||||
| 	page.Header.CompressedPageSize = int32(len(compressedData)) | ||||
| 	page.Header.UncompressedPageSize = int32(len(data)) | ||||
| 	page.Header.DataPageHeader = parquet.NewDataPageHeader() | ||||
| 	page.Header.DataPageHeader.NumValues = int32(len(page.DataTable.DefinitionLevels)) | ||||
| 	page.Header.DataPageHeader.DefinitionLevelEncoding = parquet.Encoding_RLE | ||||
| 	page.Header.DataPageHeader.RepetitionLevelEncoding = parquet.Encoding_RLE | ||||
| 	page.Header.DataPageHeader.Encoding = parquet.Encoding_PLAIN_DICTIONARY | ||||
| 
 | ||||
| 	ts := thrift.NewTSerializer() | ||||
| 	ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) | ||||
| 	pageHeaderBytes, err := ts.Write(context.TODO(), page.Header) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 
 | ||||
| 	page.RawData = append(pageHeaderBytes, compressedData...) | ||||
| 	return page.RawData | ||||
| } | ||||
|  | @ -1,881 +0,0 @@ | |||
| /** | ||||
|  * Licensed to the Apache Software Foundation (ASF) under one | ||||
|  * or more contributor license agreements.  See the NOTICE file | ||||
|  * distributed with this work for additional information | ||||
|  * regarding copyright ownership.  The ASF licenses this file | ||||
|  * to you under the Apache License, Version 2.0 (the | ||||
|  * "License"); you may not use this file except in compliance | ||||
|  * with the License.  You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, | ||||
|  * software distributed under the License is distributed on an | ||||
|  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||||
|  * KIND, either express or implied.  See the License for the | ||||
|  * specific language governing permissions and limitations | ||||
|  * under the License. | ||||
|  */ | ||||
| 
 | ||||
| /** | ||||
|  * File format description for the parquet file format | ||||
|  */ | ||||
| namespace cpp parquet | ||||
| namespace java org.apache.parquet.format | ||||
| 
 | ||||
| /** | ||||
|  * Types supported by Parquet.  These types are intended to be used in combination | ||||
|  * with the encodings to control the on disk storage format. | ||||
|  * For example INT16 is not included as a type since a good encoding of INT32 | ||||
|  * would handle this. | ||||
|  */ | ||||
| enum Type { | ||||
|   BOOLEAN = 0; | ||||
|   INT32 = 1; | ||||
|   INT64 = 2; | ||||
|   INT96 = 3;  // deprecated, only used by legacy implementations. | ||||
|   FLOAT = 4; | ||||
|   DOUBLE = 5; | ||||
|   BYTE_ARRAY = 6; | ||||
|   FIXED_LEN_BYTE_ARRAY = 7; | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Common types used by frameworks(e.g. hive, pig) using parquet.  This helps map | ||||
|  * between types in those frameworks to the base types in parquet.  This is only | ||||
|  * metadata and not needed to read or write the data. | ||||
|  */ | ||||
| enum ConvertedType { | ||||
|   /** a BYTE_ARRAY actually contains UTF8 encoded chars */ | ||||
|   UTF8 = 0; | ||||
| 
 | ||||
|   /** a map is converted as an optional field containing a repeated key/value pair */ | ||||
|   MAP = 1; | ||||
| 
 | ||||
|   /** a key/value pair is converted into a group of two fields */ | ||||
|   MAP_KEY_VALUE = 2; | ||||
| 
 | ||||
|   /** a list is converted into an optional field containing a repeated field for its | ||||
|    * values */ | ||||
|   LIST = 3; | ||||
| 
 | ||||
|   /** an enum is converted into a binary field */ | ||||
|   ENUM = 4; | ||||
| 
 | ||||
|   /** | ||||
|    * A decimal value. | ||||
|    * | ||||
|    * This may be used to annotate binary or fixed primitive types. The | ||||
|    * underlying byte array stores the unscaled value encoded as two's | ||||
|    * complement using big-endian byte order (the most significant byte is the | ||||
|    * zeroth element). The value of the decimal is the value * 10^{-scale}. | ||||
|    * | ||||
|    * This must be accompanied by a (maximum) precision and a scale in the | ||||
|    * SchemaElement. The precision specifies the number of digits in the decimal | ||||
|    * and the scale stores the location of the decimal point. For example 1.23 | ||||
|    * would have precision 3 (3 total digits) and scale 2 (the decimal point is | ||||
|    * 2 digits over). | ||||
|    */ | ||||
|   DECIMAL = 5; | ||||
| 
 | ||||
|   /** | ||||
|    * A Date | ||||
|    * | ||||
|    * Stored as days since Unix epoch, encoded as the INT32 physical type. | ||||
|    * | ||||
|    */ | ||||
|   DATE = 6; | ||||
| 
 | ||||
|   /** | ||||
|    * A time | ||||
|    * | ||||
|    * The total number of milliseconds since midnight.  The value is stored | ||||
|    * as an INT32 physical type. | ||||
|    */ | ||||
|   TIME_MILLIS = 7; | ||||
| 
 | ||||
|   /** | ||||
|    * A time. | ||||
|    * | ||||
|    * The total number of microseconds since midnight.  The value is stored as | ||||
|    * an INT64 physical type. | ||||
|    */ | ||||
|   TIME_MICROS = 8; | ||||
| 
 | ||||
|   /** | ||||
|    * A date/time combination | ||||
|    * | ||||
|    * Date and time recorded as milliseconds since the Unix epoch.  Recorded as | ||||
|    * a physical type of INT64. | ||||
|    */ | ||||
|   TIMESTAMP_MILLIS = 9; | ||||
| 
 | ||||
|   /** | ||||
|    * A date/time combination | ||||
|    * | ||||
|    * Date and time recorded as microseconds since the Unix epoch.  The value is | ||||
|    * stored as an INT64 physical type. | ||||
|    */ | ||||
|   TIMESTAMP_MICROS = 10; | ||||
| 
 | ||||
| 
 | ||||
|   /** | ||||
|    * An unsigned integer value. | ||||
|    * | ||||
|    * The number describes the maximum number of meainful data bits in | ||||
|    * the stored value. 8, 16 and 32 bit values are stored using the | ||||
|    * INT32 physical type.  64 bit values are stored using the INT64 | ||||
|    * physical type. | ||||
|    * | ||||
|    */ | ||||
|   UINT_8 = 11; | ||||
|   UINT_16 = 12; | ||||
|   UINT_32 = 13; | ||||
|   UINT_64 = 14; | ||||
| 
 | ||||
|   /** | ||||
|    * A signed integer value. | ||||
|    * | ||||
|    * The number describes the maximum number of meainful data bits in | ||||
|    * the stored value. 8, 16 and 32 bit values are stored using the | ||||
|    * INT32 physical type.  64 bit values are stored using the INT64 | ||||
|    * physical type. | ||||
|    * | ||||
|    */ | ||||
|   INT_8 = 15; | ||||
|   INT_16 = 16; | ||||
|   INT_32 = 17; | ||||
|   INT_64 = 18; | ||||
| 
 | ||||
|   /** | ||||
|    * An embedded JSON document | ||||
|    * | ||||
|    * A JSON document embedded within a single UTF8 column. | ||||
|    */ | ||||
|   JSON = 19; | ||||
| 
 | ||||
|   /** | ||||
|    * An embedded BSON document | ||||
|    * | ||||
|    * A BSON document embedded within a single BINARY column. | ||||
|    */ | ||||
|   BSON = 20; | ||||
| 
 | ||||
|   /** | ||||
|    * An interval of time | ||||
|    * | ||||
|    * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 | ||||
|    * This data is composed of three separate little endian unsigned | ||||
|    * integers.  Each stores a component of a duration of time.  The first | ||||
|    * integer identifies the number of months associated with the duration, | ||||
|    * the second identifies the number of days associated with the duration | ||||
|    * and the third identifies the number of milliseconds associated with | ||||
|    * the provided duration.  This duration of time is independent of any | ||||
|    * particular timezone or date. | ||||
|    */ | ||||
|   INTERVAL = 21; | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Representation of Schemas | ||||
|  */ | ||||
| enum FieldRepetitionType { | ||||
|   /** This field is required (can not be null) and each record has exactly 1 value. */ | ||||
|   REQUIRED = 0; | ||||
| 
 | ||||
|   /** The field is optional (can be null) and each record has 0 or 1 values. */ | ||||
|   OPTIONAL = 1; | ||||
| 
 | ||||
|   /** The field is repeated and can contain 0 or more values */ | ||||
|   REPEATED = 2; | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Statistics per row group and per page | ||||
|  * All fields are optional. | ||||
|  */ | ||||
| struct Statistics { | ||||
|    /** | ||||
|     * DEPRECATED: min and max value of the column. Use min_value and max_value. | ||||
|     * | ||||
|     * Values are encoded using PLAIN encoding, except that variable-length byte | ||||
|     * arrays do not include a length prefix. | ||||
|     * | ||||
|     * These fields encode min and max values determined by signed comparison | ||||
|     * only. New files should use the correct order for a column's logical type | ||||
|     * and store the values in the min_value and max_value fields. | ||||
|     * | ||||
|     * To support older readers, these may be set when the column order is | ||||
|     * signed. | ||||
|     */ | ||||
|    1: optional binary max; | ||||
|    2: optional binary min; | ||||
|    /** count of null value in the column */ | ||||
|    3: optional i64 null_count; | ||||
|    /** count of distinct values occurring */ | ||||
|    4: optional i64 distinct_count; | ||||
|    /** | ||||
|     * Min and max values for the column, determined by its ColumnOrder. | ||||
|     * | ||||
|     * Values are encoded using PLAIN encoding, except that variable-length byte | ||||
|     * arrays do not include a length prefix. | ||||
|     */ | ||||
|    5: optional binary max_value; | ||||
|    6: optional binary min_value; | ||||
| } | ||||
| 
 | ||||
| /** Empty structs to use as logical type annotations */ | ||||
| struct StringType {}  // allowed for BINARY, must be encoded with UTF-8 | ||||
| struct UUIDType {}    // allowed for FIXED[16], must encoded raw UUID bytes | ||||
| struct MapType {}     // see LogicalTypes.md | ||||
| struct ListType {}    // see LogicalTypes.md | ||||
| struct EnumType {}    // allowed for BINARY, must be encoded with UTF-8 | ||||
| struct DateType {}    // allowed for INT32 | ||||
| 
 | ||||
| /** | ||||
|  * Logical type to annotate a column that is always null. | ||||
|  * | ||||
|  * Sometimes when discovering the schema of existing data, values are always | ||||
|  * null and the physical type can't be determined. This annotation signals | ||||
|  * the case where the physical type was guessed from all null values. | ||||
|  */ | ||||
| struct NullType {}    // allowed for any physical type, only null values stored | ||||
| 
 | ||||
| /** | ||||
|  * Decimal logical type annotation | ||||
|  * | ||||
|  * To maintain forward-compatibility in v1, implementations using this logical | ||||
|  * type must also set scale and precision on the annotated SchemaElement. | ||||
|  * | ||||
|  * Allowed for physical types: INT32, INT64, FIXED, and BINARY | ||||
|  */ | ||||
| struct DecimalType { | ||||
|   1: required i32 scale | ||||
|   2: required i32 precision | ||||
| } | ||||
| 
 | ||||
| /** Time units for logical types */ | ||||
| struct MilliSeconds {} | ||||
| struct MicroSeconds {} | ||||
| struct NanoSeconds {} | ||||
| union TimeUnit { | ||||
|   1: MilliSeconds MILLIS | ||||
|   2: MicroSeconds MICROS | ||||
|   3: NanoSeconds NANOS | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Timestamp logical type annotation | ||||
|  * | ||||
|  * Allowed for physical types: INT64 | ||||
|  */ | ||||
| struct TimestampType { | ||||
|   1: required bool isAdjustedToUTC | ||||
|   2: required TimeUnit unit | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Time logical type annotation | ||||
|  * | ||||
|  * Allowed for physical types: INT32 (millis), INT64 (micros, nanos) | ||||
|  */ | ||||
| struct TimeType { | ||||
|   1: required bool isAdjustedToUTC | ||||
|   2: required TimeUnit unit | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Integer logical type annotation | ||||
|  * | ||||
|  * bitWidth must be 8, 16, 32, or 64. | ||||
|  * | ||||
|  * Allowed for physical types: INT32, INT64 | ||||
|  */ | ||||
| struct IntType { | ||||
|   1: required byte bitWidth | ||||
|   2: required bool isSigned | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Embedded JSON logical type annotation | ||||
|  * | ||||
|  * Allowed for physical types: BINARY | ||||
|  */ | ||||
| struct JsonType { | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Embedded BSON logical type annotation | ||||
|  * | ||||
|  * Allowed for physical types: BINARY | ||||
|  */ | ||||
| struct BsonType { | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * LogicalType annotations to replace ConvertedType. | ||||
|  * | ||||
|  * To maintain compatibility, implementations using LogicalType for a | ||||
|  * SchemaElement must also set the corresponding ConvertedType from the | ||||
|  * following table. | ||||
|  */ | ||||
| union LogicalType { | ||||
|   1:  StringType STRING       // use ConvertedType UTF8 | ||||
|   2:  MapType MAP             // use ConvertedType MAP | ||||
|   3:  ListType LIST           // use ConvertedType LIST | ||||
|   4:  EnumType ENUM           // use ConvertedType ENUM | ||||
|   5:  DecimalType DECIMAL     // use ConvertedType DECIMAL | ||||
|   6:  DateType DATE           // use ConvertedType DATE | ||||
|   7:  TimeType TIME           // use ConvertedType TIME_MICROS or TIME_MILLIS | ||||
|   8:  TimestampType TIMESTAMP // use ConvertedType TIMESTAMP_MICROS or TIMESTAMP_MILLIS | ||||
|   // 9: reserved for INTERVAL | ||||
|   10: IntType INTEGER         // use ConvertedType INT_* or UINT_* | ||||
|   11: NullType UNKNOWN        // no compatible ConvertedType | ||||
|   12: JsonType JSON           // use ConvertedType JSON | ||||
|   13: BsonType BSON           // use ConvertedType BSON | ||||
|   14: UUIDType UUID | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Represents a element inside a schema definition. | ||||
|  *  - if it is a group (inner node) then type is undefined and num_children is defined | ||||
|  *  - if it is a primitive type (leaf) then type is defined and num_children is undefined | ||||
|  * the nodes are listed in depth first traversal order. | ||||
|  */ | ||||
| struct SchemaElement { | ||||
|   /** Data type for this field. Not set if the current element is a non-leaf node */ | ||||
|   1: optional Type type; | ||||
| 
 | ||||
|   /** If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales. | ||||
|    * Otherwise, if specified, this is the maximum bit length to store any of the values. | ||||
|    * (e.g. a low cardinality INT col could have this set to 3).  Note that this is | ||||
|    * in the schema, and therefore fixed for the entire file. | ||||
|    */ | ||||
|   2: optional i32 type_length; | ||||
| 
 | ||||
|   /** repetition of the field. The root of the schema does not have a repetition_type. | ||||
|    * All other nodes must have one */ | ||||
|   3: optional FieldRepetitionType repetition_type; | ||||
| 
 | ||||
|   /** Name of the field in the schema */ | ||||
|   4: required string name; | ||||
| 
 | ||||
|   /** Nested fields.  Since thrift does not support nested fields, | ||||
|    * the nesting is flattened to a single list by a depth-first traversal. | ||||
|    * The children count is used to construct the nested relationship. | ||||
|    * This field is not set when the element is a primitive type | ||||
|    */ | ||||
|   5: optional i32 num_children; | ||||
| 
 | ||||
|   /** When the schema is the result of a conversion from another model | ||||
|    * Used to record the original type to help with cross conversion. | ||||
|    */ | ||||
|   6: optional ConvertedType converted_type; | ||||
| 
 | ||||
|   /** Used when this column contains decimal data. | ||||
|    * See the DECIMAL converted type for more details. | ||||
|    */ | ||||
|   7: optional i32 scale | ||||
|   8: optional i32 precision | ||||
| 
 | ||||
|   /** When the original schema supports field ids, this will save the | ||||
|    * original field id in the parquet schema | ||||
|    */ | ||||
|   9: optional i32 field_id; | ||||
| 
 | ||||
|   /** | ||||
|    * The logical type of this SchemaElement | ||||
|    * | ||||
|    * LogicalType replaces ConvertedType, but ConvertedType is still required | ||||
|    * for some logical types to ensure forward-compatibility in format v1. | ||||
|    */ | ||||
|   10: optional LogicalType logicalType | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Encodings supported by Parquet.  Not all encodings are valid for all types.  These | ||||
|  * enums are also used to specify the encoding of definition and repetition levels. | ||||
|  * See the accompanying doc for the details of the more complicated encodings. | ||||
|  */ | ||||
| enum Encoding { | ||||
|   /** Default encoding. | ||||
|    * BOOLEAN - 1 bit per value. 0 is false; 1 is true. | ||||
|    * INT32 - 4 bytes per value.  Stored as little-endian. | ||||
|    * INT64 - 8 bytes per value.  Stored as little-endian. | ||||
|    * FLOAT - 4 bytes per value.  IEEE. Stored as little-endian. | ||||
|    * DOUBLE - 8 bytes per value.  IEEE. Stored as little-endian. | ||||
|    * BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes. | ||||
|    * FIXED_LEN_BYTE_ARRAY - Just the bytes. | ||||
|    */ | ||||
|   PLAIN = 0; | ||||
| 
 | ||||
|   /** Group VarInt encoding for INT32/INT64. | ||||
|    * This encoding is deprecated. It was never used | ||||
|    */ | ||||
|   //  GROUP_VAR_INT = 1; | ||||
| 
 | ||||
|   /** | ||||
|    * Deprecated: Dictionary encoding. The values in the dictionary are encoded in the | ||||
|    * plain type. | ||||
|    * in a data page use RLE_DICTIONARY instead. | ||||
|    * in a Dictionary page use PLAIN instead | ||||
|    */ | ||||
|   PLAIN_DICTIONARY = 2; | ||||
| 
 | ||||
|   /** Group packed run length encoding. Usable for definition/repetition levels | ||||
|    * encoding and Booleans (on one bit: 0 is false; 1 is true.) | ||||
|    */ | ||||
|   RLE = 3; | ||||
| 
 | ||||
|   /** Bit packed encoding.  This can only be used if the data has a known max | ||||
|    * width.  Usable for definition/repetition levels encoding. | ||||
|    */ | ||||
|   BIT_PACKED = 4; | ||||
| 
 | ||||
|   /** Delta encoding for integers. This can be used for int columns and works best | ||||
|    * on sorted data | ||||
|    */ | ||||
|   DELTA_BINARY_PACKED = 5; | ||||
| 
 | ||||
|   /** Encoding for byte arrays to separate the length values and the data. The lengths | ||||
|    * are encoded using DELTA_BINARY_PACKED | ||||
|    */ | ||||
|   DELTA_LENGTH_BYTE_ARRAY = 6; | ||||
| 
 | ||||
|   /** Incremental-encoded byte array. Prefix lengths are encoded using DELTA_BINARY_PACKED. | ||||
|    * Suffixes are stored as delta length byte arrays. | ||||
|    */ | ||||
|   DELTA_BYTE_ARRAY = 7; | ||||
| 
 | ||||
|   /** Dictionary encoding: the ids are encoded using the RLE encoding | ||||
|    */ | ||||
|   RLE_DICTIONARY = 8; | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Supported compression algorithms. | ||||
|  * | ||||
|  * Codecs added in 2.4 can be read by readers based on 2.4 and later. | ||||
|  * Codec support may vary between readers based on the format version and | ||||
|  * libraries available at runtime. Gzip, Snappy, and LZ4 codecs are | ||||
|  * widely available, while Zstd and Brotli require additional libraries. | ||||
|  */ | ||||
| enum CompressionCodec { | ||||
|   UNCOMPRESSED = 0; | ||||
|   SNAPPY = 1; | ||||
|   GZIP = 2; | ||||
|   LZO = 3; | ||||
|   BROTLI = 4; // Added in 2.4 | ||||
|   LZ4 = 5;    // Added in 2.4 | ||||
|   ZSTD = 6;   // Added in 2.4 | ||||
| } | ||||
| 
 | ||||
| enum PageType { | ||||
|   DATA_PAGE = 0; | ||||
|   INDEX_PAGE = 1; | ||||
|   DICTIONARY_PAGE = 2; | ||||
|   DATA_PAGE_V2 = 3; | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Enum to annotate whether lists of min/max elements inside ColumnIndex | ||||
|  * are ordered and if so, in which direction. | ||||
|  */ | ||||
| enum BoundaryOrder { | ||||
|   UNORDERED = 0; | ||||
|   ASCENDING = 1; | ||||
|   DESCENDING = 2; | ||||
| } | ||||
| 
 | ||||
| /** Data page header */ | ||||
| struct DataPageHeader { | ||||
|   /** Number of values, including NULLs, in this data page. **/ | ||||
|   1: required i32 num_values | ||||
| 
 | ||||
|   /** Encoding used for this data page **/ | ||||
|   2: required Encoding encoding | ||||
| 
 | ||||
|   /** Encoding used for definition levels **/ | ||||
|   3: required Encoding definition_level_encoding; | ||||
| 
 | ||||
|   /** Encoding used for repetition levels **/ | ||||
|   4: required Encoding repetition_level_encoding; | ||||
| 
 | ||||
|   /** Optional statistics for the data in this page**/ | ||||
|   5: optional Statistics statistics; | ||||
| } | ||||
| 
 | ||||
| struct IndexPageHeader { | ||||
|   /** TODO: **/ | ||||
| } | ||||
| 
 | ||||
| struct DictionaryPageHeader { | ||||
|   /** Number of values in the dictionary **/ | ||||
|   1: required i32 num_values; | ||||
| 
 | ||||
|   /** Encoding using this dictionary page **/ | ||||
|   2: required Encoding encoding | ||||
| 
 | ||||
|   /** If true, the entries in the dictionary are sorted in ascending order **/ | ||||
|   3: optional bool is_sorted; | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * New page format allowing reading levels without decompressing the data | ||||
|  * Repetition and definition levels are uncompressed | ||||
|  * The remaining section containing the data is compressed if is_compressed is true | ||||
|  **/ | ||||
| struct DataPageHeaderV2 { | ||||
|   /** Number of values, including NULLs, in this data page. **/ | ||||
|   1: required i32 num_values | ||||
|   /** Number of NULL values, in this data page. | ||||
|       Number of non-null = num_values - num_nulls which is also the number of values in the data section **/ | ||||
|   2: required i32 num_nulls | ||||
|   /** Number of rows in this data page. which means pages change on record boundaries (r = 0) **/ | ||||
|   3: required i32 num_rows | ||||
|   /** Encoding used for data in this page **/ | ||||
|   4: required Encoding encoding | ||||
| 
 | ||||
|   // repetition levels and definition levels are always using RLE (without size in it) | ||||
| 
 | ||||
|   /** length of the definition levels */ | ||||
|   5: required i32 definition_levels_byte_length; | ||||
|   /** length of the repetition levels */ | ||||
|   6: required i32 repetition_levels_byte_length; | ||||
| 
 | ||||
|   /**  whether the values are compressed. | ||||
|   Which means the section of the page between | ||||
|   definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) | ||||
|   is compressed with the compression_codec. | ||||
|   If missing it is considered compressed */ | ||||
|   7: optional bool is_compressed = 1; | ||||
| 
 | ||||
|   /** optional statistics for this column chunk */ | ||||
|   8: optional Statistics statistics; | ||||
| } | ||||
| 
 | ||||
| struct PageHeader { | ||||
|   /** the type of the page: indicates which of the *_header fields is set **/ | ||||
|   1: required PageType type | ||||
| 
 | ||||
|   /** Uncompressed page size in bytes (not including this header) **/ | ||||
|   2: required i32 uncompressed_page_size | ||||
| 
 | ||||
|   /** Compressed page size in bytes (not including this header) **/ | ||||
|   3: required i32 compressed_page_size | ||||
| 
 | ||||
|   /** 32bit crc for the data below. This allows for disabling checksumming in HDFS | ||||
|    *  if only a few pages needs to be read | ||||
|    **/ | ||||
|   4: optional i32 crc | ||||
| 
 | ||||
|   // Headers for page specific data.  One only will be set. | ||||
|   5: optional DataPageHeader data_page_header; | ||||
|   6: optional IndexPageHeader index_page_header; | ||||
|   7: optional DictionaryPageHeader dictionary_page_header; | ||||
|   8: optional DataPageHeaderV2 data_page_header_v2; | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Wrapper struct to store key values | ||||
|  */ | ||||
|  struct KeyValue { | ||||
|   1: required string key | ||||
|   2: optional string value | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Wrapper struct to specify sort order | ||||
|  */ | ||||
| struct SortingColumn { | ||||
|   /** The column index (in this row group) **/ | ||||
|   1: required i32 column_idx | ||||
| 
 | ||||
|   /** If true, indicates this column is sorted in descending order. **/ | ||||
|   2: required bool descending | ||||
| 
 | ||||
|   /** If true, nulls will come before non-null values, otherwise, | ||||
|    * nulls go at the end. */ | ||||
|   3: required bool nulls_first | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * statistics of a given page type and encoding | ||||
|  */ | ||||
| struct PageEncodingStats { | ||||
| 
 | ||||
|   /** the page type (data/dic/...) **/ | ||||
|   1: required PageType page_type; | ||||
| 
 | ||||
|   /** encoding of the page **/ | ||||
|   2: required Encoding encoding; | ||||
| 
 | ||||
|   /** number of pages of this type with this encoding **/ | ||||
|   3: required i32 count; | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Description for column metadata | ||||
|  */ | ||||
| struct ColumnMetaData { | ||||
|   /** Type of this column **/ | ||||
|   1: required Type type | ||||
| 
 | ||||
|   /** Set of all encodings used for this column. The purpose is to validate | ||||
|    * whether we can decode those pages. **/ | ||||
|   2: required list<Encoding> encodings | ||||
| 
 | ||||
|   /** Path in schema **/ | ||||
|   3: required list<string> path_in_schema | ||||
| 
 | ||||
|   /** Compression codec **/ | ||||
|   4: required CompressionCodec codec | ||||
| 
 | ||||
|   /** Number of values in this column **/ | ||||
|   5: required i64 num_values | ||||
| 
 | ||||
|   /** total byte size of all uncompressed pages in this column chunk (including the headers) **/ | ||||
|   6: required i64 total_uncompressed_size | ||||
| 
 | ||||
|   /** total byte size of all compressed pages in this column chunk (including the headers) **/ | ||||
|   7: required i64 total_compressed_size | ||||
| 
 | ||||
|   /** Optional key/value metadata **/ | ||||
|   8: optional list<KeyValue> key_value_metadata | ||||
| 
 | ||||
|   /** Byte offset from beginning of file to first data page **/ | ||||
|   9: required i64 data_page_offset | ||||
| 
 | ||||
|   /** Byte offset from beginning of file to root index page **/ | ||||
|   10: optional i64 index_page_offset | ||||
| 
 | ||||
|   /** Byte offset from the beginning of file to first (only) dictionary page **/ | ||||
|   11: optional i64 dictionary_page_offset | ||||
| 
 | ||||
|   /** optional statistics for this column chunk */ | ||||
|   12: optional Statistics statistics; | ||||
| 
 | ||||
|   /** Set of all encodings used for pages in this column chunk. | ||||
|    * This information can be used to determine if all data pages are | ||||
|    * dictionary encoded for example **/ | ||||
|   13: optional list<PageEncodingStats> encoding_stats; | ||||
| } | ||||
| 
 | ||||
| struct ColumnChunk { | ||||
|   /** File where column data is stored.  If not set, assumed to be same file as | ||||
|     * metadata.  This path is relative to the current file. | ||||
|     **/ | ||||
|   1: optional string file_path | ||||
| 
 | ||||
|   /** Byte offset in file_path to the ColumnMetaData **/ | ||||
|   2: required i64 file_offset | ||||
| 
 | ||||
|   /** Column metadata for this chunk. This is the same content as what is at | ||||
|    * file_path/file_offset.  Having it here has it replicated in the file | ||||
|    * metadata. | ||||
|    **/ | ||||
|   3: optional ColumnMetaData meta_data | ||||
| 
 | ||||
|   /** File offset of ColumnChunk's OffsetIndex **/ | ||||
|   4: optional i64 offset_index_offset | ||||
| 
 | ||||
|   /** Size of ColumnChunk's OffsetIndex, in bytes **/ | ||||
|   5: optional i32 offset_index_length | ||||
| 
 | ||||
|   /** File offset of ColumnChunk's ColumnIndex **/ | ||||
|   6: optional i64 column_index_offset | ||||
| 
 | ||||
|   /** Size of ColumnChunk's ColumnIndex, in bytes **/ | ||||
|   7: optional i32 column_index_length | ||||
| } | ||||
| 
 | ||||
| struct RowGroup { | ||||
|   /** Metadata for each column chunk in this row group. | ||||
|    * This list must have the same order as the SchemaElement list in FileMetaData. | ||||
|    **/ | ||||
|   1: required list<ColumnChunk> columns | ||||
| 
 | ||||
|   /** Total byte size of all the uncompressed column data in this row group **/ | ||||
|   2: required i64 total_byte_size | ||||
| 
 | ||||
|   /** Number of rows in this row group **/ | ||||
|   3: required i64 num_rows | ||||
| 
 | ||||
|   /** If set, specifies a sort ordering of the rows in this RowGroup. | ||||
|    * The sorting columns can be a subset of all the columns. | ||||
|    */ | ||||
|   4: optional list<SortingColumn> sorting_columns | ||||
| } | ||||
| 
 | ||||
| /** Empty struct to signal the order defined by the physical or logical type */ | ||||
| struct TypeDefinedOrder {} | ||||
| 
 | ||||
| /** | ||||
|  * Union to specify the order used for the min_value and max_value fields for a | ||||
|  * column. This union takes the role of an enhanced enum that allows rich | ||||
|  * elements (which will be needed for a collation-based ordering in the future). | ||||
|  * | ||||
|  * Possible values are: | ||||
|  * * TypeDefinedOrder - the column uses the order defined by its logical or | ||||
|  *                      physical type (if there is no logical type). | ||||
|  * | ||||
|  * If the reader does not support the value of this union, min and max stats | ||||
|  * for this column should be ignored. | ||||
|  */ | ||||
| union ColumnOrder { | ||||
| 
 | ||||
|   /** | ||||
|    * The sort orders for logical types are: | ||||
|    *   UTF8 - unsigned byte-wise comparison | ||||
|    *   INT8 - signed comparison | ||||
|    *   INT16 - signed comparison | ||||
|    *   INT32 - signed comparison | ||||
|    *   INT64 - signed comparison | ||||
|    *   UINT8 - unsigned comparison | ||||
|    *   UINT16 - unsigned comparison | ||||
|    *   UINT32 - unsigned comparison | ||||
|    *   UINT64 - unsigned comparison | ||||
|    *   DECIMAL - signed comparison of the represented value | ||||
|    *   DATE - signed comparison | ||||
|    *   TIME_MILLIS - signed comparison | ||||
|    *   TIME_MICROS - signed comparison | ||||
|    *   TIMESTAMP_MILLIS - signed comparison | ||||
|    *   TIMESTAMP_MICROS - signed comparison | ||||
|    *   INTERVAL - unsigned comparison | ||||
|    *   JSON - unsigned byte-wise comparison | ||||
|    *   BSON - unsigned byte-wise comparison | ||||
|    *   ENUM - unsigned byte-wise comparison | ||||
|    *   LIST - undefined | ||||
|    *   MAP - undefined | ||||
|    * | ||||
|    * In the absence of logical types, the sort order is determined by the physical type: | ||||
|    *   BOOLEAN - false, true | ||||
|    *   INT32 - signed comparison | ||||
|    *   INT64 - signed comparison | ||||
|    *   INT96 (only used for legacy timestamps) - undefined | ||||
|    *   FLOAT - signed comparison of the represented value (*) | ||||
|    *   DOUBLE - signed comparison of the represented value (*) | ||||
|    *   BYTE_ARRAY - unsigned byte-wise comparison | ||||
|    *   FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison | ||||
|    * | ||||
|    * (*) Because the sorting order is not specified properly for floating | ||||
|    *     point values (relations vs. total ordering) the following | ||||
|    *     compatibility rules should be applied when reading statistics: | ||||
|    *     - If the min is a NaN, it should be ignored. | ||||
|    *     - If the max is a NaN, it should be ignored. | ||||
|    *     - If the min is +0, the row group may contain -0 values as well. | ||||
|    *     - If the max is -0, the row group may contain +0 values as well. | ||||
|    *     - When looking for NaN values, min and max should be ignored. | ||||
|    */ | ||||
|   1: TypeDefinedOrder TYPE_ORDER; | ||||
| } | ||||
| 
 | ||||
| struct PageLocation { | ||||
|   /** Offset of the page in the file **/ | ||||
|   1: required i64 offset | ||||
| 
 | ||||
|   /** | ||||
|    * Size of the page, including header. Sum of compressed_page_size and header | ||||
|    * length | ||||
|    */ | ||||
|   2: required i32 compressed_page_size | ||||
| 
 | ||||
|   /** | ||||
|    * Index within the RowGroup of the first row of the page; this means pages | ||||
|    * change on record boundaries (r = 0). | ||||
|    */ | ||||
|   3: required i64 first_row_index | ||||
| } | ||||
| 
 | ||||
| struct OffsetIndex { | ||||
|   /** | ||||
|    * PageLocations, ordered by increasing PageLocation.offset. It is required | ||||
|    * that page_locations[i].first_row_index < page_locations[i+1].first_row_index. | ||||
|    */ | ||||
|   1: required list<PageLocation> page_locations | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Description for ColumnIndex. | ||||
|  * Each <array-field>[i] refers to the page at OffsetIndex.page_locations[i] | ||||
|  */ | ||||
| struct ColumnIndex { | ||||
|   /** | ||||
|    * A list of Boolean values to determine the validity of the corresponding | ||||
|    * min and max values. If true, a page contains only null values, and writers | ||||
|    * have to set the corresponding entries in min_values and max_values to | ||||
|    * byte[0], so that all lists have the same length. If false, the | ||||
|    * corresponding entries in min_values and max_values must be valid. | ||||
|    */ | ||||
|   1: required list<bool> null_pages | ||||
| 
 | ||||
|   /** | ||||
|    * Two lists containing lower and upper bounds for the values of each page. | ||||
|    * These may be the actual minimum and maximum values found on a page, but | ||||
|    * can also be (more compact) values that do not exist on a page. For | ||||
|    * example, instead of storing ""Blart Versenwald III", a writer may set | ||||
|    * min_values[i]="B", max_values[i]="C". Such more compact values must still | ||||
|    * be valid values within the column's logical type. Readers must make sure | ||||
|    * that list entries are populated before using them by inspecting null_pages. | ||||
|    */ | ||||
|   2: required list<binary> min_values | ||||
|   3: required list<binary> max_values | ||||
| 
 | ||||
|   /** | ||||
|    * Stores whether both min_values and max_values are orderd and if so, in | ||||
|    * which direction. This allows readers to perform binary searches in both | ||||
|    * lists. Readers cannot assume that max_values[i] <= min_values[i+1], even | ||||
|    * if the lists are ordered. | ||||
|    */ | ||||
|   4: required BoundaryOrder boundary_order | ||||
| 
 | ||||
|   /** A list containing the number of null values for each page **/ | ||||
|   5: optional list<i64> null_counts | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Description for file metadata | ||||
|  */ | ||||
| struct FileMetaData { | ||||
|   /** Version of this file **/ | ||||
|   1: required i32 version | ||||
| 
 | ||||
|   /** Parquet schema for this file.  This schema contains metadata for all the columns. | ||||
|    * The schema is represented as a tree with a single root.  The nodes of the tree | ||||
|    * are flattened to a list by doing a depth-first traversal. | ||||
|    * The column metadata contains the path in the schema for that column which can be | ||||
|    * used to map columns to nodes in the schema. | ||||
|    * The first element is the root **/ | ||||
|   2: required list<SchemaElement> schema; | ||||
| 
 | ||||
|   /** Number of rows in this file **/ | ||||
|   3: required i64 num_rows | ||||
| 
 | ||||
|   /** Row groups in this file **/ | ||||
|   4: required list<RowGroup> row_groups | ||||
| 
 | ||||
|   /** Optional key/value metadata **/ | ||||
|   5: optional list<KeyValue> key_value_metadata | ||||
| 
 | ||||
|   /** String for application that wrote this file.  This should be in the format | ||||
|    * <Application> version <App Version> (build <App Build Hash>). | ||||
|    * e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) | ||||
|    **/ | ||||
|   6: optional string created_by | ||||
| 
 | ||||
|   /** | ||||
|    * Sort order used for the min_value and max_value fields of each column in | ||||
|    * this file. Each sort order corresponds to one column, determined by its | ||||
|    * position in the list, matching the position of the column in the schema. | ||||
|    * | ||||
|    * Without column_orders, the meaning of the min_value and max_value fields is | ||||
|    * undefined. To ensure well-defined behavior, if min_value and max_value are | ||||
|    * written to a Parquet file, column_orders must be written as well. | ||||
|    * | ||||
|    * The obsolete min and max fields are always sorted by signed comparison | ||||
|    * regardless of column_orders. | ||||
|    */ | ||||
|   7: optional list<ColumnOrder> column_orders; | ||||
| } | ||||
| 
 | ||||
|  | @ -1,169 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/binary" | ||||
| 	"encoding/json" | ||||
| 	"io" | ||||
| 
 | ||||
| 	"git.apache.org/thrift.git/lib/go/thrift" | ||||
| 	"github.com/minio/minio-go/v7/pkg/set" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| // GetReaderFunc - function type returning io.ReadCloser for requested offset/length.
 | ||||
| type GetReaderFunc func(offset, length int64) (io.ReadCloser, error) | ||||
| 
 | ||||
| func footerSize(getReaderFunc GetReaderFunc) (size int64, err error) { | ||||
| 	rc, err := getReaderFunc(-8, 4) | ||||
| 	if err != nil { | ||||
| 		return 0, err | ||||
| 	} | ||||
| 	defer rc.Close() | ||||
| 
 | ||||
| 	buf := make([]byte, 4) | ||||
| 	if _, err = io.ReadFull(rc, buf); err != nil { | ||||
| 		return 0, err | ||||
| 	} | ||||
| 
 | ||||
| 	size = int64(binary.LittleEndian.Uint32(buf)) | ||||
| 
 | ||||
| 	return size, nil | ||||
| } | ||||
| 
 | ||||
| func fileMetadata(getReaderFunc GetReaderFunc) (*parquet.FileMetaData, error) { | ||||
| 	size, err := footerSize(getReaderFunc) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	rc, err := getReaderFunc(-(8 + size), size) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer rc.Close() | ||||
| 
 | ||||
| 	fileMeta := parquet.NewFileMetaData() | ||||
| 
 | ||||
| 	pf := thrift.NewTCompactProtocolFactory() | ||||
| 	protocol := pf.GetProtocol(thrift.NewStreamTransportR(rc)) | ||||
| 	err = fileMeta.Read(protocol) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	return fileMeta, nil | ||||
| } | ||||
| 
 | ||||
| // Value - denotes column value
 | ||||
| type Value struct { | ||||
| 	Value  interface{} | ||||
| 	Type   parquet.Type | ||||
| 	Schema *parquet.SchemaElement | ||||
| } | ||||
| 
 | ||||
| // MarshalJSON - encodes to JSON data
 | ||||
| func (value Value) MarshalJSON() (data []byte, err error) { | ||||
| 	return json.Marshal(value.Value) | ||||
| } | ||||
| 
 | ||||
| // Reader - denotes parquet file.
 | ||||
| type Reader struct { | ||||
| 	getReaderFunc  GetReaderFunc | ||||
| 	schemaElements []*parquet.SchemaElement | ||||
| 	rowGroups      []*parquet.RowGroup | ||||
| 	rowGroupIndex  int | ||||
| 
 | ||||
| 	nameList    []string | ||||
| 	columnNames set.StringSet | ||||
| 	columns     map[string]*column | ||||
| 	rowIndex    int64 | ||||
| } | ||||
| 
 | ||||
| // NewReader - creates new parquet reader. Reader calls getReaderFunc to get required data range for given columnNames. If columnNames is empty, all columns are used.
 | ||||
| func NewReader(getReaderFunc GetReaderFunc, columnNames set.StringSet) (*Reader, error) { | ||||
| 	fileMeta, err := fileMetadata(getReaderFunc) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	nameList := []string{} | ||||
| 	schemaElements := fileMeta.GetSchema() | ||||
| 	for _, element := range schemaElements { | ||||
| 		nameList = append(nameList, element.Name) | ||||
| 	} | ||||
| 
 | ||||
| 	return &Reader{ | ||||
| 		getReaderFunc:  getReaderFunc, | ||||
| 		rowGroups:      fileMeta.GetRowGroups(), | ||||
| 		schemaElements: schemaElements, | ||||
| 		nameList:       nameList, | ||||
| 		columnNames:    columnNames, | ||||
| 	}, nil | ||||
| } | ||||
| 
 | ||||
| // Read - reads single record.
 | ||||
| func (reader *Reader) Read() (record *Record, err error) { | ||||
| 	if reader.rowGroupIndex >= len(reader.rowGroups) { | ||||
| 		return nil, io.EOF | ||||
| 	} | ||||
| 
 | ||||
| 	if reader.columns == nil { | ||||
| 		reader.columns, err = getColumns( | ||||
| 			reader.rowGroups[reader.rowGroupIndex], | ||||
| 			reader.columnNames, | ||||
| 			reader.schemaElements, | ||||
| 			reader.getReaderFunc, | ||||
| 		) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		reader.rowIndex = 0 | ||||
| 	} | ||||
| 
 | ||||
| 	if reader.rowIndex >= reader.rowGroups[reader.rowGroupIndex].GetNumRows() { | ||||
| 		reader.rowGroupIndex++ | ||||
| 		reader.Close() | ||||
| 		return reader.Read() | ||||
| 	} | ||||
| 
 | ||||
| 	record = newRecord(reader.nameList) | ||||
| 	for name := range reader.columns { | ||||
| 		col := reader.columns[name] | ||||
| 		value, valueType, schema := col.read() | ||||
| 		record.set(name, Value{Value: value, Type: valueType, Schema: schema}) | ||||
| 	} | ||||
| 
 | ||||
| 	reader.rowIndex++ | ||||
| 
 | ||||
| 	return record, nil | ||||
| } | ||||
| 
 | ||||
| // Close - closes underneath readers.
 | ||||
| func (reader *Reader) Close() (err error) { | ||||
| 	for _, column := range reader.columns { | ||||
| 		column.close() | ||||
| 	} | ||||
| 
 | ||||
| 	reader.columns = nil | ||||
| 	reader.rowIndex = 0 | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
|  | @ -1,91 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/minio/minio-go/v7/pkg/set" | ||||
| ) | ||||
| 
 | ||||
| func getReader(name string, offset int64, length int64) (io.ReadCloser, error) { | ||||
| 	file, err := os.Open(name) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	fi, err := file.Stat() | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if offset < 0 { | ||||
| 		offset = fi.Size() + offset | ||||
| 	} | ||||
| 
 | ||||
| 	if _, err = file.Seek(offset, io.SeekStart); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	return file, nil | ||||
| } | ||||
| 
 | ||||
| func TestReader(t *testing.T) { | ||||
| 	name := "example.parquet" | ||||
| 	reader, err := NewReader( | ||||
| 		func(offset, length int64) (io.ReadCloser, error) { | ||||
| 			return getReader(name, offset, length) | ||||
| 		}, | ||||
| 		set.CreateStringSet("one", "two", "three"), | ||||
| 	) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	expectedRecords := []string{ | ||||
| 		`map[one:{-1 DOUBLE SchemaElement({Type:DOUBLE TypeLength:<nil> RepetitionType:OPTIONAL Name:one NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} three:{true BOOLEAN SchemaElement({Type:BOOLEAN TypeLength:<nil> RepetitionType:OPTIONAL Name:three NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} two:{[102 111 111] BYTE_ARRAY SchemaElement({Type:BYTE_ARRAY TypeLength:<nil> RepetitionType:OPTIONAL Name:two NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})}]`, | ||||
| 		`map[one:{<nil> DOUBLE SchemaElement({Type:DOUBLE TypeLength:<nil> RepetitionType:OPTIONAL Name:one NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} three:{false BOOLEAN SchemaElement({Type:BOOLEAN TypeLength:<nil> RepetitionType:OPTIONAL Name:three NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} two:{[98 97 114] BYTE_ARRAY SchemaElement({Type:BYTE_ARRAY TypeLength:<nil> RepetitionType:OPTIONAL Name:two NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})}]`, | ||||
| 		`map[one:{2.5 DOUBLE SchemaElement({Type:DOUBLE TypeLength:<nil> RepetitionType:OPTIONAL Name:one NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} three:{true BOOLEAN SchemaElement({Type:BOOLEAN TypeLength:<nil> RepetitionType:OPTIONAL Name:three NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} two:{[98 97 122] BYTE_ARRAY SchemaElement({Type:BYTE_ARRAY TypeLength:<nil> RepetitionType:OPTIONAL Name:two NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})}]`, | ||||
| 	} | ||||
| 
 | ||||
| 	i := 0 | ||||
| 	for { | ||||
| 		record, err := reader.Read() | ||||
| 		if err != nil { | ||||
| 			if err != io.EOF { | ||||
| 				t.Error(err) | ||||
| 			} | ||||
| 
 | ||||
| 			break | ||||
| 		} | ||||
| 
 | ||||
| 		if i == len(expectedRecords) { | ||||
| 			t.Errorf("read more than expected record count %v", len(expectedRecords)) | ||||
| 		} | ||||
| 
 | ||||
| 		if record.String() != expectedRecords[i] { | ||||
| 			t.Errorf("record%v: expected: %v, got: %v", i+1, expectedRecords[i], record.String()) | ||||
| 		} | ||||
| 
 | ||||
| 		i++ | ||||
| 	} | ||||
| 
 | ||||
| 	reader.Close() | ||||
| } | ||||
|  | @ -1,71 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"strings" | ||||
| ) | ||||
| 
 | ||||
| // Record - ordered parquet record.
 | ||||
| type Record struct { | ||||
| 	nameList     []string | ||||
| 	nameValueMap map[string]Value | ||||
| } | ||||
| 
 | ||||
| // String - returns string representation of this record.
 | ||||
| func (r *Record) String() string { | ||||
| 	values := []string{} | ||||
| 	r.Range(func(name string, value Value) bool { | ||||
| 		values = append(values, fmt.Sprintf("%v:%v", name, value)) | ||||
| 		return true | ||||
| 	}) | ||||
| 
 | ||||
| 	return "map[" + strings.Join(values, " ") + "]" | ||||
| } | ||||
| 
 | ||||
| func (r *Record) set(name string, value Value) { | ||||
| 	r.nameValueMap[name] = value | ||||
| } | ||||
| 
 | ||||
| // Get - returns Value of name.
 | ||||
| func (r *Record) Get(name string) (Value, bool) { | ||||
| 	value, ok := r.nameValueMap[name] | ||||
| 	return value, ok | ||||
| } | ||||
| 
 | ||||
| // Range - calls f sequentially for each name and value present in the record. If f returns false, range stops the iteration.
 | ||||
| func (r *Record) Range(f func(name string, value Value) bool) { | ||||
| 	for _, name := range r.nameList { | ||||
| 		value, ok := r.nameValueMap[name] | ||||
| 		if !ok { | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		if !f(name, value) { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func newRecord(nameList []string) *Record { | ||||
| 	return &Record{ | ||||
| 		nameList:     nameList, | ||||
| 		nameValueMap: make(map[string]Value), | ||||
| 	} | ||||
| } | ||||
|  | @ -1,127 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package schema | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"regexp" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| var nameRegexp = regexp.MustCompile("^[a-zA-Z0-9_]+$") | ||||
| 
 | ||||
| func validataPathSegments(pathSegments []string) error { | ||||
| 	for _, pathSegment := range pathSegments { | ||||
| 		if !nameRegexp.MatchString(pathSegment) { | ||||
| 			return fmt.Errorf("unsupported name %v", strings.Join(pathSegments, ".")) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // Element - represents schema element and its children. Any element must have Name and RepetitionType fields set.
 | ||||
| type Element struct { | ||||
| 	parquet.SchemaElement | ||||
| 	numChildren        int32 | ||||
| 	Encoding           *parquet.Encoding         // Optional; defaults is computed.
 | ||||
| 	CompressionType    *parquet.CompressionCodec // Optional; defaults to SNAPPY.
 | ||||
| 	Children           *Tree | ||||
| 	MaxDefinitionLevel int64 | ||||
| 	MaxRepetitionLevel int64 | ||||
| 	PathInTree         string | ||||
| 	PathInSchema       string | ||||
| } | ||||
| 
 | ||||
| // String - stringify this element.
 | ||||
| func (element *Element) String() string { | ||||
| 	var s []string | ||||
| 	s = append(s, "Name:"+element.Name) | ||||
| 	s = append(s, "RepetitionType:"+element.RepetitionType.String()) | ||||
| 	if element.Type != nil { | ||||
| 		s = append(s, "Type:"+element.Type.String()) | ||||
| 	} | ||||
| 	if element.ConvertedType != nil { | ||||
| 		s = append(s, "ConvertedType:"+element.ConvertedType.String()) | ||||
| 	} | ||||
| 	if element.Encoding != nil { | ||||
| 		s = append(s, "Encoding:"+element.Encoding.String()) | ||||
| 	} | ||||
| 	if element.CompressionType != nil { | ||||
| 		s = append(s, "CompressionType:"+element.CompressionType.String()) | ||||
| 	} | ||||
| 	if element.Children != nil && element.Children.Length() > 0 { | ||||
| 		s = append(s, "Children:"+element.Children.String()) | ||||
| 	} | ||||
| 	s = append(s, fmt.Sprintf("MaxDefinitionLevel:%v", element.MaxDefinitionLevel)) | ||||
| 	s = append(s, fmt.Sprintf("MaxRepetitionLevel:%v", element.MaxRepetitionLevel)) | ||||
| 	if element.PathInTree != "" { | ||||
| 		s = append(s, "PathInTree:"+element.PathInTree) | ||||
| 	} | ||||
| 	if element.PathInSchema != "" { | ||||
| 		s = append(s, "PathInSchema:"+element.PathInSchema) | ||||
| 	} | ||||
| 
 | ||||
| 	return "{" + strings.Join(s, ", ") + "}" | ||||
| } | ||||
| 
 | ||||
| // NewElement - creates new element.
 | ||||
| func NewElement(name string, repetitionType parquet.FieldRepetitionType, | ||||
| 	elementType *parquet.Type, convertedType *parquet.ConvertedType, | ||||
| 	encoding *parquet.Encoding, compressionType *parquet.CompressionCodec, | ||||
| 	children *Tree) (*Element, error) { | ||||
| 
 | ||||
| 	if !nameRegexp.MatchString(name) { | ||||
| 		return nil, fmt.Errorf("unsupported name %v", name) | ||||
| 	} | ||||
| 
 | ||||
| 	switch repetitionType { | ||||
| 	case parquet.FieldRepetitionType_REQUIRED, parquet.FieldRepetitionType_OPTIONAL, parquet.FieldRepetitionType_REPEATED: | ||||
| 	default: | ||||
| 		return nil, fmt.Errorf("unknown repetition type %v", repetitionType) | ||||
| 	} | ||||
| 
 | ||||
| 	if repetitionType == parquet.FieldRepetitionType_REPEATED && (elementType != nil || convertedType != nil) { | ||||
| 		return nil, fmt.Errorf("repetition type REPEATED should be used in group element") | ||||
| 	} | ||||
| 
 | ||||
| 	if children != nil && children.Length() != 0 { | ||||
| 		if elementType != nil { | ||||
| 			return nil, fmt.Errorf("type should be nil for group element") | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	element := Element{ | ||||
| 		Encoding:        encoding, | ||||
| 		CompressionType: compressionType, | ||||
| 		Children:        children, | ||||
| 	} | ||||
| 
 | ||||
| 	element.Name = name | ||||
| 	element.RepetitionType = &repetitionType | ||||
| 	element.Type = elementType | ||||
| 	element.ConvertedType = convertedType | ||||
| 	element.NumChildren = &element.numChildren | ||||
| 	if element.Children != nil { | ||||
| 		element.numChildren = int32(element.Children.Length()) | ||||
| 	} | ||||
| 
 | ||||
| 	return &element, nil | ||||
| } | ||||
|  | @ -1,389 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package schema | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| func updateMaxDLRL(schemaMap map[string]*Element, maxDL, maxRL int64) { | ||||
| 	for _, element := range schemaMap { | ||||
| 		element.MaxDefinitionLevel = maxDL | ||||
| 		element.MaxRepetitionLevel = maxRL | ||||
| 		if *element.RepetitionType != parquet.FieldRepetitionType_REQUIRED { | ||||
| 			element.MaxDefinitionLevel++ | ||||
| 			if *element.RepetitionType == parquet.FieldRepetitionType_REPEATED { | ||||
| 				element.MaxRepetitionLevel++ | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		if element.Children != nil { | ||||
| 			updateMaxDLRL(element.Children.schemaMap, element.MaxDefinitionLevel, element.MaxRepetitionLevel) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func toParquetSchema(tree *Tree, treePrefix string, schemaPrefix string, schemaList *[]*parquet.SchemaElement, valueElements *[]*Element) (err error) { | ||||
| 	tree.Range(func(name string, element *Element) bool { | ||||
| 		pathInTree := name | ||||
| 		if treePrefix != "" { | ||||
| 			pathInTree = treePrefix + "." + name | ||||
| 		} | ||||
| 
 | ||||
| 		if element.Type == nil && element.ConvertedType == nil && element.Children == nil { | ||||
| 			err = fmt.Errorf("%v: group element must have children", pathInTree) | ||||
| 			return false | ||||
| 		} | ||||
| 
 | ||||
| 		if element.ConvertedType != nil { | ||||
| 			switch *element.ConvertedType { | ||||
| 			case parquet.ConvertedType_LIST: | ||||
| 				// Supported structure.
 | ||||
| 				// <REQUIRED|OPTIONAL> group <name> (LIST) {
 | ||||
| 				//   REPEATED group list {
 | ||||
| 				//     <REQUIRED|OPTIONAL> <element-type> element;
 | ||||
| 				//   }
 | ||||
| 				// }
 | ||||
| 
 | ||||
| 				if element.Type != nil { | ||||
| 					err = fmt.Errorf("%v: type must be nil for LIST ConvertedType", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if element.Children == nil || element.Children.Length() != 1 { | ||||
| 					err = fmt.Errorf("%v: children must have one element only for LIST ConvertedType", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				listElement, ok := element.Children.Get("list") | ||||
| 				if !ok { | ||||
| 					err = fmt.Errorf("%v: missing group element 'list' for LIST ConvertedType", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if listElement.Name != "list" { | ||||
| 					err = fmt.Errorf("%v.list: name must be 'list'", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if *listElement.RepetitionType != parquet.FieldRepetitionType_REPEATED { | ||||
| 					err = fmt.Errorf("%v.list: repetition type must be REPEATED type", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if listElement.Type != nil || listElement.ConvertedType != nil { | ||||
| 					err = fmt.Errorf("%v.list: type and converted type must be nil", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if listElement.Children == nil || listElement.Children.Length() != 1 { | ||||
| 					err = fmt.Errorf("%v.list.element: not found", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				valueElement, ok := listElement.Children.Get("element") | ||||
| 				if !ok { | ||||
| 					err = fmt.Errorf("%v.list.element: not found", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if valueElement.Name != "element" { | ||||
| 					err = fmt.Errorf("%v.list.element: name must be 'element'", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 			case parquet.ConvertedType_MAP: | ||||
| 				// Supported structure:
 | ||||
| 				// <REQUIRED|OPTIONAL> group <name> (MAP) {
 | ||||
| 				//   REPEATED group key_value {
 | ||||
| 				//     REQUIRED <key-type> key;
 | ||||
| 				//     <REQUIRED|OPTIONAL> <value-type> value;
 | ||||
| 				//   }
 | ||||
| 				// }
 | ||||
| 
 | ||||
| 				if element.Type != nil { | ||||
| 					err = fmt.Errorf("%v: type must be nil for MAP ConvertedType", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if element.Children == nil || element.Children.Length() != 1 { | ||||
| 					err = fmt.Errorf("%v: children must have one element only for MAP ConvertedType", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				keyValueElement, ok := element.Children.Get("key_value") | ||||
| 				if !ok { | ||||
| 					err = fmt.Errorf("%v: missing group element 'key_value' for MAP ConvertedType", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if keyValueElement.Name != "key_value" { | ||||
| 					err = fmt.Errorf("%v.key_value: name must be 'key_value'", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if *keyValueElement.RepetitionType != parquet.FieldRepetitionType_REPEATED { | ||||
| 					err = fmt.Errorf("%v.key_value: repetition type must be REPEATED type", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if keyValueElement.Children == nil || keyValueElement.Children.Length() < 1 || keyValueElement.Children.Length() > 2 { | ||||
| 					err = fmt.Errorf("%v.key_value: children must have 'key' and optionally 'value' elements for MAP ConvertedType", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				keyElement, ok := keyValueElement.Children.Get("key") | ||||
| 				if !ok { | ||||
| 					err = fmt.Errorf("%v.key_value: missing 'key' element for MAP ConvertedType", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if keyElement.Name != "key" { | ||||
| 					err = fmt.Errorf("%v.key_value.key: name must be 'key'", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if *keyElement.RepetitionType != parquet.FieldRepetitionType_REQUIRED { | ||||
| 					err = fmt.Errorf("%v.key_value: repetition type must be REQUIRED type", pathInTree) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 				if keyValueElement.Children.Length() == 2 { | ||||
| 					valueElement, ok := keyValueElement.Children.Get("value") | ||||
| 					if !ok { | ||||
| 						err = fmt.Errorf("%v.key_value: second element must be 'value' element for MAP ConvertedType", pathInTree) | ||||
| 						return false | ||||
| 					} | ||||
| 
 | ||||
| 					if valueElement.Name != "value" { | ||||
| 						err = fmt.Errorf("%v.key_value.value: name must be 'value'", pathInTree) | ||||
| 						return false | ||||
| 					} | ||||
| 				} | ||||
| 
 | ||||
| 			case parquet.ConvertedType_UTF8, parquet.ConvertedType_UINT_8, parquet.ConvertedType_UINT_16: | ||||
| 				fallthrough | ||||
| 			case parquet.ConvertedType_UINT_32, parquet.ConvertedType_UINT_64, parquet.ConvertedType_INT_8: | ||||
| 				fallthrough | ||||
| 			case parquet.ConvertedType_INT_16, parquet.ConvertedType_INT_32, parquet.ConvertedType_INT_64: | ||||
| 				if element.Type == nil { | ||||
| 					err = fmt.Errorf("%v: ConvertedType %v must have Type value", pathInTree, element.ConvertedType) | ||||
| 					return false | ||||
| 				} | ||||
| 
 | ||||
| 			default: | ||||
| 				err = fmt.Errorf("%v: unsupported ConvertedType %v", pathInTree, element.ConvertedType) | ||||
| 				return false | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		element.PathInTree = pathInTree | ||||
| 		element.PathInSchema = element.Name | ||||
| 		if schemaPrefix != "" { | ||||
| 			element.PathInSchema = schemaPrefix + "." + element.Name | ||||
| 		} | ||||
| 
 | ||||
| 		if element.Type != nil { | ||||
| 			*valueElements = append(*valueElements, element) | ||||
| 		} | ||||
| 
 | ||||
| 		*schemaList = append(*schemaList, &element.SchemaElement) | ||||
| 		if element.Children != nil { | ||||
| 			element.numChildren = int32(element.Children.Length()) | ||||
| 			err = toParquetSchema(element.Children, element.PathInTree, element.PathInSchema, schemaList, valueElements) | ||||
| 		} | ||||
| 
 | ||||
| 		return (err == nil) | ||||
| 	}) | ||||
| 
 | ||||
| 	return err | ||||
| } | ||||
| 
 | ||||
| // Tree - represents tree of schema.  Tree preserves order in which elements are added.
 | ||||
| type Tree struct { | ||||
| 	schemaMap map[string]*Element | ||||
| 	keys      []string | ||||
| 	readOnly  bool | ||||
| } | ||||
| 
 | ||||
| // String - stringify this tree.
 | ||||
| func (tree *Tree) String() string { | ||||
| 	var s []string | ||||
| 	tree.Range(func(name string, element *Element) bool { | ||||
| 		s = append(s, fmt.Sprintf("%v: %v", name, element)) | ||||
| 		return true | ||||
| 	}) | ||||
| 
 | ||||
| 	return "{" + strings.Join(s, ", ") + "}" | ||||
| } | ||||
| 
 | ||||
| // Length - returns length of tree.
 | ||||
| func (tree *Tree) Length() int { | ||||
| 	return len(tree.keys) | ||||
| } | ||||
| 
 | ||||
| func (tree *Tree) travel(pathSegments []string) (pathSegmentIndex int, pathSegment string, currElement *Element, parentTree *Tree, found bool) { | ||||
| 	parentTree = tree | ||||
| 	for pathSegmentIndex, pathSegment = range pathSegments { | ||||
| 		if tree == nil { | ||||
| 			found = false | ||||
| 			break | ||||
| 		} | ||||
| 
 | ||||
| 		var tmpCurrElement *Element | ||||
| 		if tmpCurrElement, found = tree.schemaMap[pathSegment]; !found { | ||||
| 			break | ||||
| 		} | ||||
| 		currElement = tmpCurrElement | ||||
| 
 | ||||
| 		parentTree = tree | ||||
| 		tree = currElement.Children | ||||
| 	} | ||||
| 
 | ||||
| 	return | ||||
| } | ||||
| 
 | ||||
| // ReadOnly - returns whether this tree is read only or not.
 | ||||
| func (tree *Tree) ReadOnly() bool { | ||||
| 	return tree.readOnly | ||||
| } | ||||
| 
 | ||||
| // Get - returns the element stored for name.
 | ||||
| func (tree *Tree) Get(name string) (element *Element, ok bool) { | ||||
| 	pathSegments := strings.Split(name, ".") | ||||
| 	for _, pathSegment := range pathSegments { | ||||
| 		if tree == nil { | ||||
| 			element = nil | ||||
| 			ok = false | ||||
| 			break | ||||
| 		} | ||||
| 
 | ||||
| 		if element, ok = tree.schemaMap[pathSegment]; !ok { | ||||
| 			break | ||||
| 		} | ||||
| 
 | ||||
| 		tree = element.Children | ||||
| 	} | ||||
| 
 | ||||
| 	return element, ok | ||||
| } | ||||
| 
 | ||||
| // Set - adds or sets element to name.
 | ||||
| func (tree *Tree) Set(name string, element *Element) error { | ||||
| 	if tree.readOnly { | ||||
| 		return fmt.Errorf("read only tree") | ||||
| 	} | ||||
| 
 | ||||
| 	pathSegments := strings.Split(name, ".") | ||||
| 	if err := validataPathSegments(pathSegments); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	i, pathSegment, currElement, parentTree, found := tree.travel(pathSegments) | ||||
| 
 | ||||
| 	if !found { | ||||
| 		if i != len(pathSegments)-1 { | ||||
| 			return fmt.Errorf("parent %v does not exist", strings.Join(pathSegments[:i+1], ".")) | ||||
| 		} | ||||
| 
 | ||||
| 		if currElement == nil { | ||||
| 			parentTree = tree | ||||
| 		} else { | ||||
| 			if currElement.Type != nil { | ||||
| 				return fmt.Errorf("parent %v is not group element", strings.Join(pathSegments[:i], ".")) | ||||
| 			} | ||||
| 
 | ||||
| 			if currElement.Children == nil { | ||||
| 				currElement.Children = NewTree() | ||||
| 			} | ||||
| 			parentTree = currElement.Children | ||||
| 		} | ||||
| 
 | ||||
| 		parentTree.keys = append(parentTree.keys, pathSegment) | ||||
| 	} | ||||
| 
 | ||||
| 	parentTree.schemaMap[pathSegment] = element | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // Delete - deletes name and its element.
 | ||||
| func (tree *Tree) Delete(name string) { | ||||
| 	if tree.readOnly { | ||||
| 		panic(fmt.Errorf("read only tree")) | ||||
| 	} | ||||
| 
 | ||||
| 	pathSegments := strings.Split(name, ".") | ||||
| 
 | ||||
| 	_, pathSegment, _, parentTree, found := tree.travel(pathSegments) | ||||
| 
 | ||||
| 	if found { | ||||
| 		for i := range parentTree.keys { | ||||
| 			if parentTree.keys[i] == pathSegment { | ||||
| 				copy(parentTree.keys[i:], parentTree.keys[i+1:]) | ||||
| 				parentTree.keys = parentTree.keys[:len(parentTree.keys)-1] | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		delete(parentTree.schemaMap, pathSegment) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Range - calls f sequentially for each name and its element. If f returns false, range stops the iteration.
 | ||||
| func (tree *Tree) Range(f func(name string, element *Element) bool) { | ||||
| 	for _, name := range tree.keys { | ||||
| 		if !f(name, tree.schemaMap[name]) { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // ToParquetSchema - returns list of parquet SchemaElement and list of elements those stores values.
 | ||||
| func (tree *Tree) ToParquetSchema() (schemaList []*parquet.SchemaElement, valueElements []*Element, err error) { | ||||
| 	if tree.readOnly { | ||||
| 		return nil, nil, fmt.Errorf("read only tree") | ||||
| 	} | ||||
| 
 | ||||
| 	updateMaxDLRL(tree.schemaMap, 0, 0) | ||||
| 
 | ||||
| 	var schemaElements []*parquet.SchemaElement | ||||
| 	if err = toParquetSchema(tree, "", "", &schemaElements, &valueElements); err != nil { | ||||
| 		return nil, nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	tree.readOnly = true | ||||
| 
 | ||||
| 	numChildren := int32(len(tree.keys)) | ||||
| 	schemaList = append(schemaList, &parquet.SchemaElement{ | ||||
| 		Name:           "schema", | ||||
| 		RepetitionType: parquet.FieldRepetitionTypePtr(parquet.FieldRepetitionType_REQUIRED), | ||||
| 		NumChildren:    &numChildren, | ||||
| 	}) | ||||
| 	schemaList = append(schemaList, schemaElements...) | ||||
| 	return schemaList, valueElements, nil | ||||
| } | ||||
| 
 | ||||
| // NewTree - creates new schema tree.
 | ||||
| func NewTree() *Tree { | ||||
| 	return &Tree{ | ||||
| 		schemaMap: make(map[string]*Element), | ||||
| 	} | ||||
| } | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -1,101 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 
 | ||||
| func getTableValues(values interface{}, valueType parquet.Type) (tableValues []interface{}) { | ||||
| 	return valuesToInterfaces(values, valueType) | ||||
| } | ||||
| 
 | ||||
| type table struct { | ||||
| 	RepetitionType     parquet.FieldRepetitionType | ||||
| 	Type               parquet.Type | ||||
| 	MaxDefinitionLevel int32 | ||||
| 	MaxRepetitionLevel int32 | ||||
| 	Path               []string      // Path of this column
 | ||||
| 	Values             []interface{} // Parquet values
 | ||||
| 	DefinitionLevels   []int32       // Definition Levels slice
 | ||||
| 	RepetitionLevels   []int32       // Repetition Levels slice
 | ||||
| 	ConvertedType      parquet.ConvertedType | ||||
| 	Encoding           parquet.Encoding | ||||
| 	BitWidth           int32 | ||||
| } | ||||
| 
 | ||||
| func newTableFromTable(srcTable *table) *table { | ||||
| 	if srcTable == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	return &table{ | ||||
| 		Type: srcTable.Type, | ||||
| 		Path: append([]string{}, srcTable.Path...), | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (table *table) Merge(tables ...*table) { | ||||
| 	for i := 0; i < len(tables); i++ { | ||||
| 		if tables[i] == nil { | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		table.Values = append(table.Values, tables[i].Values...) | ||||
| 		table.RepetitionLevels = append(table.RepetitionLevels, tables[i].RepetitionLevels...) | ||||
| 		table.DefinitionLevels = append(table.DefinitionLevels, tables[i].DefinitionLevels...) | ||||
| 
 | ||||
| 		if table.MaxDefinitionLevel < tables[i].MaxDefinitionLevel { | ||||
| 			table.MaxDefinitionLevel = tables[i].MaxDefinitionLevel | ||||
| 		} | ||||
| 
 | ||||
| 		if table.MaxRepetitionLevel < tables[i].MaxRepetitionLevel { | ||||
| 			table.MaxRepetitionLevel = tables[i].MaxRepetitionLevel | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (table *table) Pop(numRows int64) *table { | ||||
| 	result := newTableFromTable(table) | ||||
| 	var i, num int64 | ||||
| 	for i = int64(0); i < int64(len(table.Values)); i++ { | ||||
| 		if table.RepetitionLevels[i] == 0 { | ||||
| 			if num >= numRows { | ||||
| 				break | ||||
| 			} | ||||
| 
 | ||||
| 			num++ | ||||
| 		} | ||||
| 
 | ||||
| 		if result.MaxRepetitionLevel < table.RepetitionLevels[i] { | ||||
| 			result.MaxRepetitionLevel = table.RepetitionLevels[i] | ||||
| 		} | ||||
| 
 | ||||
| 		if result.MaxDefinitionLevel < table.DefinitionLevels[i] { | ||||
| 			result.MaxDefinitionLevel = table.DefinitionLevels[i] | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	result.RepetitionLevels = table.RepetitionLevels[:i] | ||||
| 	result.DefinitionLevels = table.DefinitionLevels[:i] | ||||
| 	result.Values = table.Values[:i] | ||||
| 
 | ||||
| 	table.RepetitionLevels = table.RepetitionLevels[i:] | ||||
| 	table.DefinitionLevels = table.DefinitionLevels[i:] | ||||
| 	table.Values = table.Values[i:] | ||||
| 
 | ||||
| 	return result | ||||
| } | ||||
										
											Binary file not shown.
										
									
								
							|  | @ -1,147 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/csv" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"path" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/minio/minio-go/v7/pkg/set" | ||||
| 	parquet "github.com/minio/minio/pkg/s3select/internal/parquet-go" | ||||
| ) | ||||
| 
 | ||||
| func getReader(name string, offset int64, length int64) (io.ReadCloser, error) { | ||||
| 	file, err := os.Open(name) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	fi, err := file.Stat() | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if offset < 0 { | ||||
| 		offset = fi.Size() + offset | ||||
| 	} | ||||
| 
 | ||||
| 	if _, err = file.Seek(offset, io.SeekStart); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	return file, nil | ||||
| } | ||||
| 
 | ||||
| func printUsage() { | ||||
| 	progName := path.Base(os.Args[0]) | ||||
| 	fmt.Printf("usage: %v PARQUET-FILE [COLUMN...]\n", progName) | ||||
| 	fmt.Println() | ||||
| 	fmt.Printf("examples:\n") | ||||
| 	fmt.Printf("# Convert all columns to CSV\n") | ||||
| 	fmt.Printf("$ %v example.parquet\n", progName) | ||||
| 	fmt.Println() | ||||
| 	fmt.Printf("# Convert specific columns to CSV\n") | ||||
| 	fmt.Printf("$ %v example.par firstname dob\n", progName) | ||||
| 	fmt.Println() | ||||
| } | ||||
| 
 | ||||
| func main() { | ||||
| 	if len(os.Args) < 2 { | ||||
| 		printUsage() | ||||
| 		os.Exit(-1) | ||||
| 	} | ||||
| 
 | ||||
| 	name := os.Args[1] | ||||
| 	ext := path.Ext(name) | ||||
| 	csvFilename := name + ".csv" | ||||
| 	if ext == ".parquet" || ext == ".par" { | ||||
| 		csvFilename = strings.TrimSuffix(name, ext) + ".csv" | ||||
| 	} | ||||
| 
 | ||||
| 	columns := set.CreateStringSet(os.Args[2:]...) | ||||
| 	if len(columns) == 0 { | ||||
| 		columns = nil | ||||
| 	} | ||||
| 
 | ||||
| 	file, err := parquet.NewReader( | ||||
| 		func(offset, length int64) (io.ReadCloser, error) { | ||||
| 			return getReader(name, offset, length) | ||||
| 		}, | ||||
| 		columns, | ||||
| 	) | ||||
| 	if err != nil { | ||||
| 		fmt.Printf("%v: %v\n", name, err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 
 | ||||
| 	defer file.Close() | ||||
| 
 | ||||
| 	csvFile, err := os.OpenFile(csvFilename, os.O_RDWR|os.O_CREATE, 0755) | ||||
| 	if err != nil { | ||||
| 		fmt.Printf("%v: %v\n", csvFilename, err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 
 | ||||
| 	defer csvFile.Close() | ||||
| 
 | ||||
| 	csvWriter := csv.NewWriter(csvFile) | ||||
| 	defer csvWriter.Flush() | ||||
| 
 | ||||
| 	headerWritten := false | ||||
| 	for { | ||||
| 		record, err := file.Read() | ||||
| 		if err != nil { | ||||
| 			if err != io.EOF { | ||||
| 				fmt.Printf("%v: %v\n", name, err) | ||||
| 				os.Exit(1) | ||||
| 			} | ||||
| 
 | ||||
| 			break | ||||
| 		} | ||||
| 
 | ||||
| 		if !headerWritten { | ||||
| 			var csvRecord []string | ||||
| 			record.Range(func(name string, value parquet.Value) bool { | ||||
| 				csvRecord = append(csvRecord, name) | ||||
| 				return true | ||||
| 			}) | ||||
| 
 | ||||
| 			if err = csvWriter.Write(csvRecord); err != nil { | ||||
| 				fmt.Printf("%v: %v\n", csvFilename, err) | ||||
| 				os.Exit(1) | ||||
| 			} | ||||
| 
 | ||||
| 			headerWritten = true | ||||
| 		} | ||||
| 
 | ||||
| 		var csvRecord []string | ||||
| 		record.Range(func(name string, value parquet.Value) bool { | ||||
| 			csvRecord = append(csvRecord, fmt.Sprintf("%v", value.Value)) | ||||
| 			return true | ||||
| 		}) | ||||
| 
 | ||||
| 		if err = csvWriter.Write(csvRecord); err != nil { | ||||
| 			fmt.Printf("%v: %v\n", csvFilename, err) | ||||
| 			os.Exit(1) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,129 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"path" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/minio/minio-go/v7/pkg/set" | ||||
| 	parquet "github.com/minio/minio/pkg/s3select/internal/parquet-go" | ||||
| ) | ||||
| 
 | ||||
| func getReader(name string, offset int64, length int64) (io.ReadCloser, error) { | ||||
| 	file, err := os.Open(name) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	fi, err := file.Stat() | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if offset < 0 { | ||||
| 		offset = fi.Size() + offset | ||||
| 	} | ||||
| 
 | ||||
| 	if _, err = file.Seek(offset, io.SeekStart); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	return file, nil | ||||
| } | ||||
| 
 | ||||
| func printUsage() { | ||||
| 	progName := path.Base(os.Args[0]) | ||||
| 	fmt.Printf("Usage: %v PARQUET-FILE [COLUMN...]\n", progName) | ||||
| 	fmt.Println() | ||||
| 	fmt.Printf("Examples:\n") | ||||
| 	fmt.Printf("# Convert all columns to JSON\n") | ||||
| 	fmt.Printf("$ %v example.parquet\n", progName) | ||||
| 	fmt.Println() | ||||
| 	fmt.Printf("# Convert specific columns to JSON\n") | ||||
| 	fmt.Printf("$ %v example.par firstname dob\n", progName) | ||||
| 	fmt.Println() | ||||
| } | ||||
| 
 | ||||
| func main() { | ||||
| 	if len(os.Args) < 2 { | ||||
| 		printUsage() | ||||
| 		os.Exit(-1) | ||||
| 	} | ||||
| 
 | ||||
| 	name := os.Args[1] | ||||
| 	ext := path.Ext(name) | ||||
| 	jsonFilename := name + ".json" | ||||
| 	if ext == ".parquet" || ext == ".par" { | ||||
| 		jsonFilename = strings.TrimSuffix(name, ext) + ".json" | ||||
| 	} | ||||
| 
 | ||||
| 	columns := set.CreateStringSet(os.Args[2:]...) | ||||
| 	if len(columns) == 0 { | ||||
| 		columns = nil | ||||
| 	} | ||||
| 
 | ||||
| 	file, err := parquet.NewReader( | ||||
| 		func(offset, length int64) (io.ReadCloser, error) { | ||||
| 			return getReader(name, offset, length) | ||||
| 		}, | ||||
| 		columns, | ||||
| 	) | ||||
| 	if err != nil { | ||||
| 		fmt.Printf("%v: %v\n", name, err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 
 | ||||
| 	defer file.Close() | ||||
| 
 | ||||
| 	jsonFile, err := os.OpenFile(jsonFilename, os.O_RDWR|os.O_CREATE, 0755) | ||||
| 	if err != nil { | ||||
| 		fmt.Printf("%v: %v\n", jsonFilename, err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 
 | ||||
| 	defer jsonFile.Close() | ||||
| 
 | ||||
| 	for { | ||||
| 		record, err := file.Read() | ||||
| 		if err != nil { | ||||
| 			if err != io.EOF { | ||||
| 				fmt.Printf("%v: %v\n", name, err) | ||||
| 				os.Exit(1) | ||||
| 			} | ||||
| 
 | ||||
| 			break | ||||
| 		} | ||||
| 
 | ||||
| 		data, err := json.Marshal(record) | ||||
| 		if err != nil { | ||||
| 			fmt.Printf("%v: %v\n", name, err) | ||||
| 			os.Exit(1) | ||||
| 		} | ||||
| 		data = append(data, byte('\n')) | ||||
| 
 | ||||
| 		if _, err = jsonFile.Write(data); err != nil { | ||||
| 			fmt.Printf("%v: %v\n", jsonFilename, err) | ||||
| 			os.Exit(1) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,192 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"context" | ||||
| 	"encoding/binary" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 
 | ||||
| 	"git.apache.org/thrift.git/lib/go/thrift" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/data" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	defaultPageSize     = 8 * 1024          // 8 KiB
 | ||||
| 	defaultRowGroupSize = 128 * 1024 * 1024 // 128 MiB
 | ||||
| ) | ||||
| 
 | ||||
| // Writer - represents parquet writer.
 | ||||
| type Writer struct { | ||||
| 	PageSize        int64 | ||||
| 	RowGroupSize    int64 | ||||
| 	CompressionType parquet.CompressionCodec | ||||
| 
 | ||||
| 	writeCloser   io.WriteCloser | ||||
| 	numRows       int64 | ||||
| 	offset        int64 | ||||
| 	footer        *parquet.FileMetaData | ||||
| 	schemaTree    *schema.Tree | ||||
| 	valueElements []*schema.Element | ||||
| 	columnDataMap map[string]*data.Column | ||||
| 	rowGroupCount int | ||||
| } | ||||
| 
 | ||||
| func (writer *Writer) writeData() (err error) { | ||||
| 	if writer.numRows == 0 { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	var chunks []*data.ColumnChunk | ||||
| 	for _, element := range writer.valueElements { | ||||
| 		name := element.PathInTree | ||||
| 		columnData, found := writer.columnDataMap[name] | ||||
| 		if !found { | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		columnChunk := columnData.Encode(element) | ||||
| 		chunks = append(chunks, columnChunk) | ||||
| 	} | ||||
| 
 | ||||
| 	rowGroup := data.NewRowGroup(chunks, writer.numRows, writer.offset) | ||||
| 
 | ||||
| 	for _, chunk := range chunks { | ||||
| 		if _, err = writer.writeCloser.Write(chunk.Data()); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 
 | ||||
| 		writer.offset += chunk.DataLen() | ||||
| 	} | ||||
| 
 | ||||
| 	writer.footer.RowGroups = append(writer.footer.RowGroups, rowGroup) | ||||
| 	writer.footer.NumRows += writer.numRows | ||||
| 
 | ||||
| 	writer.numRows = 0 | ||||
| 	writer.columnDataMap = nil | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // WriteJSON - writes a record represented in JSON.
 | ||||
| func (writer *Writer) WriteJSON(recordData []byte) (err error) { | ||||
| 	columnDataMap, err := data.UnmarshalJSON(recordData, writer.schemaTree) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	return writer.Write(columnDataMap) | ||||
| } | ||||
| 
 | ||||
| // Write - writes a record represented in map.
 | ||||
| func (writer *Writer) Write(record map[string]*data.Column) (err error) { | ||||
| 	if writer.columnDataMap == nil { | ||||
| 		writer.columnDataMap = record | ||||
| 	} else { | ||||
| 		for name, columnData := range record { | ||||
| 			var found bool | ||||
| 			var element *schema.Element | ||||
| 			for _, element = range writer.valueElements { | ||||
| 				if element.PathInTree == name { | ||||
| 					found = true | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 
 | ||||
| 			if !found { | ||||
| 				return fmt.Errorf("%v is not value column", name) | ||||
| 			} | ||||
| 
 | ||||
| 			writer.columnDataMap[name].Merge(columnData) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	writer.numRows++ | ||||
| 	if writer.numRows == int64(writer.rowGroupCount) { | ||||
| 		return writer.writeData() | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (writer *Writer) finalize() (err error) { | ||||
| 	if err = writer.writeData(); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	ts := thrift.NewTSerializer() | ||||
| 	ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) | ||||
| 	footerBuf, err := ts.Write(context.TODO(), writer.footer) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	if _, err = writer.writeCloser.Write(footerBuf); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	footerSizeBuf := make([]byte, 4) | ||||
| 	binary.LittleEndian.PutUint32(footerSizeBuf, uint32(len(footerBuf))) | ||||
| 
 | ||||
| 	if _, err = writer.writeCloser.Write(footerSizeBuf); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	_, err = writer.writeCloser.Write([]byte("PAR1")) | ||||
| 	return err | ||||
| } | ||||
| 
 | ||||
| // Close - finalizes and closes writer. If any pending records are available, they are written here.
 | ||||
| func (writer *Writer) Close() (err error) { | ||||
| 	if err = writer.finalize(); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	return writer.writeCloser.Close() | ||||
| } | ||||
| 
 | ||||
| // NewWriter - creates new parquet writer. Binary data of rowGroupCount records are written to writeCloser.
 | ||||
| func NewWriter(writeCloser io.WriteCloser, schemaTree *schema.Tree, rowGroupCount int) (*Writer, error) { | ||||
| 	if _, err := writeCloser.Write([]byte("PAR1")); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	schemaList, valueElements, err := schemaTree.ToParquetSchema() | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	footer := parquet.NewFileMetaData() | ||||
| 	footer.Version = 1 | ||||
| 	footer.Schema = schemaList | ||||
| 
 | ||||
| 	return &Writer{ | ||||
| 		PageSize:        defaultPageSize, | ||||
| 		RowGroupSize:    defaultRowGroupSize, | ||||
| 		CompressionType: parquet.CompressionCodec_SNAPPY, | ||||
| 
 | ||||
| 		writeCloser:   writeCloser, | ||||
| 		offset:        4, | ||||
| 		footer:        footer, | ||||
| 		schemaTree:    schemaTree, | ||||
| 		valueElements: valueElements, | ||||
| 		rowGroupCount: rowGroupCount, | ||||
| 	}, nil | ||||
| } | ||||
|  | @ -1,153 +0,0 @@ | |||
| // Copyright (c) 2015-2021 MinIO, Inc.
 | ||||
| //
 | ||||
| // This file is part of MinIO Object Storage stack
 | ||||
| //
 | ||||
| // This program is free software: you can redistribute it and/or modify
 | ||||
| // it under the terms of the GNU Affero General Public License as published by
 | ||||
| // the Free Software Foundation, either version 3 of the License, or
 | ||||
| // (at your option) any later version.
 | ||||
| //
 | ||||
| // This program is distributed in the hope that it will be useful
 | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| // GNU Affero General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Affero General Public License
 | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| package parquet | ||||
| 
 | ||||
| import ( | ||||
| 	"os" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/data" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" | ||||
| ) | ||||
| 
 | ||||
| func TestWriterWrite(t *testing.T) { | ||||
| 	schemaTree := schema.NewTree() | ||||
| 	{ | ||||
| 		one, err := schema.NewElement("one", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_16), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		two, err := schema.NewElement("two", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		three, err := schema.NewElement("three", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_BOOLEAN), nil, nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err := schemaTree.Set("one", one); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err := schemaTree.Set("two", two); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err := schemaTree.Set("three", three); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	file, err := os.Create("test.parquet") | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	writer, err := NewWriter(file, schemaTree, 100) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	oneColumn := data.NewColumn(parquet.Type_INT32) | ||||
| 	oneColumn.AddInt32(100, 0, 0) | ||||
| 
 | ||||
| 	twoColumn := data.NewColumn(parquet.Type_BYTE_ARRAY) | ||||
| 	twoColumn.AddByteArray([]byte("foo"), 0, 0) | ||||
| 
 | ||||
| 	threeColumn := data.NewColumn(parquet.Type_BOOLEAN) | ||||
| 	threeColumn.AddBoolean(true, 0, 0) | ||||
| 
 | ||||
| 	record := map[string]*data.Column{ | ||||
| 		"one":   oneColumn, | ||||
| 		"two":   twoColumn, | ||||
| 		"three": threeColumn, | ||||
| 	} | ||||
| 
 | ||||
| 	err = writer.Write(record) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	err = writer.Close() | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestWriterWriteJSON(t *testing.T) { | ||||
| 	schemaTree := schema.NewTree() | ||||
| 	{ | ||||
| 		one, err := schema.NewElement("one", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_16), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		two, err := schema.NewElement("two", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), | ||||
| 			nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		three, err := schema.NewElement("three", parquet.FieldRepetitionType_REQUIRED, | ||||
| 			parquet.TypePtr(parquet.Type_BOOLEAN), nil, nil, nil, nil) | ||||
| 		if err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if err := schemaTree.Set("one", one); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err := schemaTree.Set("two", two); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 		if err := schemaTree.Set("three", three); err != nil { | ||||
| 			t.Fatal(err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	file, err := os.Create("test.parquet") | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	writer, err := NewWriter(file, schemaTree, 100) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	record := `{"one": 100, "two": "foo", "three": true}` | ||||
| 	err = writer.WriteJSON([]byte(record)) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	err = writer.Close() | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| } | ||||
|  | @ -23,10 +23,10 @@ import ( | |||
| 	"time" | ||||
| 
 | ||||
| 	"github.com/bcicen/jstream" | ||||
| 	parquetgo "github.com/minio/minio/pkg/s3select/internal/parquet-go" | ||||
| 	parquetgen "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" | ||||
| 	jsonfmt "github.com/minio/minio/pkg/s3select/json" | ||||
| 	"github.com/minio/minio/pkg/s3select/sql" | ||||
| 	parquetgo "github.com/minio/parquet-go" | ||||
| 	parquetgen "github.com/minio/parquet-go/gen-go/parquet" | ||||
| ) | ||||
| 
 | ||||
| // Reader - Parquet record reader for S3Select.
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue