diff --git a/CREDITS b/CREDITS index 40f722b28..efe21db99 100644 --- a/CREDITS +++ b/CREDITS @@ -10733,6 +10733,214 @@ https://github.com/minio/minio-go/v7 ================================================================ +github.com/minio/parquet-go +https://github.com/minio/parquet-go +---------------------------------------------------------------- + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +================================================================ + github.com/minio/rpc https://github.com/minio/rpc ---------------------------------------------------------------- diff --git a/go.mod b/go.mod index 3daf0c8b3..dd57d89b1 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,6 @@ go 1.16 require ( cloud.google.com/go/storage v1.8.0 - git.apache.org/thrift.git v0.13.0 github.com/Azure/azure-pipeline-go v0.2.2 github.com/Azure/azure-storage-blob-go v0.10.0 github.com/Azure/go-autorest/autorest/adal v0.9.1 // indirect @@ -30,7 +29,6 @@ require ( github.com/fatih/structs v1.1.0 github.com/go-ldap/ldap/v3 v3.2.4 github.com/go-sql-driver/mysql v1.5.0 - github.com/golang/snappy v0.0.3 github.com/gomodule/redigo v2.0.0+incompatible github.com/google/martian v2.1.1-0.20190517191504-25dcb96d9e51+incompatible // indirect github.com/google/uuid v1.1.2 @@ -55,6 +53,7 @@ require ( github.com/minio/highwayhash v1.0.2 github.com/minio/md5-simd v1.1.1 // indirect github.com/minio/minio-go/v7 v7.0.11-0.20210302210017-6ae69c73ce78 + github.com/minio/parquet-go v1.0.0 github.com/minio/rpc v1.0.0 github.com/minio/selfupdate v0.3.1 github.com/minio/sha256-simd v1.0.0 @@ -71,7 +70,7 @@ require ( github.com/nsqio/go-nsq v1.0.8 github.com/olivere/elastic/v7 v7.0.22 github.com/philhofer/fwd v1.1.1 - github.com/pierrec/lz4 v2.5.2+incompatible + github.com/pierrec/lz4 v2.6.0+incompatible github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.8.0 github.com/prometheus/client_model v0.2.0 @@ -83,8 +82,6 @@ require ( github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/streadway/amqp v1.0.0 - github.com/tidwall/gjson v1.6.8 - github.com/tidwall/sjson v1.0.4 github.com/tinylib/msgp v1.1.3 github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a github.com/willf/bitset v1.1.11 // indirect diff --git a/go.sum b/go.sum index 85498363e..3a439e5f0 100644 --- a/go.sum +++ b/go.sum @@ -180,8 +180,9 @@ github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8 github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4= github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20= -github.com/frankban/quicktest v1.10.2 h1:19ARM85nVi4xH7xPXuc5eM/udya5ieh7b/Sv+d844Tk= github.com/frankban/quicktest v1.10.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= +github.com/frankban/quicktest v1.12.1 h1:P6vQcHwZYgVGIpUzKB5DXzkEeYJppJOStPLuh9aB89c= +github.com/frankban/quicktest v1.12.1/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-asn1-ber/asn1-ber v1.5.1 h1:pDbRAunXzIUXfx4CB2QJFv5IuPiuoW+sWvr/Us009o8= @@ -251,8 +252,9 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian v2.1.1-0.20190517191504-25dcb96d9e51+incompatible h1:xmapqc1AyLoB+ddYT6r04bD9lIjlOqGaREovi0SzFaE= @@ -443,8 +445,11 @@ github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLT github.com/minio/md5-simd v1.1.0/go.mod h1:XpBqgZULrMYD3R+M28PcmP0CkI7PEMzB3U77ZrKZ0Gw= github.com/minio/md5-simd v1.1.1 h1:9ojcLbuZ4gXbB2sX53MKn8JUZ0sB/2wfwsEcRw+I08U= github.com/minio/md5-simd v1.1.1/go.mod h1:XpBqgZULrMYD3R+M28PcmP0CkI7PEMzB3U77ZrKZ0Gw= +github.com/minio/minio-go/v7 v7.0.10/go.mod h1:td4gW1ldOsj1PbSNS+WYK43j+P1XVhX/8W8awaYlBFo= github.com/minio/minio-go/v7 v7.0.11-0.20210302210017-6ae69c73ce78 h1:v7OMbUnWkyRlO2MZ5AuYioELhwXF/BgZEznrQ1drBEM= github.com/minio/minio-go/v7 v7.0.11-0.20210302210017-6ae69c73ce78/go.mod h1:mTh2uJuAbEqdhMVl6CMIIZLUeiMiWtJR4JB8/5g2skw= +github.com/minio/parquet-go v1.0.0 h1:fcWsEvub04Nsl/4hiRBDWlbqd6jhacQieV07a+nhiIk= +github.com/minio/parquet-go v1.0.0/go.mod h1:aQlkSOfOq2AtQKkuou3mosNVMwNokd+faTacxxk/oHA= github.com/minio/rpc v1.0.0 h1:tJCHyLfQF6k6HlMQFpKy2FO/7lc2WP8gLDGMZp18E70= github.com/minio/rpc v1.0.0/go.mod h1:b9xqF7J0xeMXr0cM4pnBlP7Te7PDsG5JrRxl5dG6Ldk= github.com/minio/selfupdate v0.3.1 h1:BWEFSNnrZVMUWXbXIgLDNDjbejkmpAmZvy/nCz1HlEs= @@ -531,8 +536,9 @@ github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ= github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI= github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= +github.com/pierrec/lz4 v2.6.0+incompatible h1:Ix9yFKn1nSPBLFl/yZknTp8TU5G4Ps0JDmguYK6iH1A= +github.com/pierrec/lz4 v2.6.0+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -626,14 +632,15 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/tidwall/gjson v1.6.8 h1:CTmXMClGYPAmln7652e69B7OLXfTi5ABcPPwjIWUv7w= -github.com/tidwall/gjson v1.6.8/go.mod h1:zeFuBCIqD4sN/gmqBzZ4j7Jd6UcA2Fc56x7QFsv+8fI= +github.com/tidwall/gjson v1.7.4/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk= +github.com/tidwall/gjson v1.7.5 h1:zmAN/xmX7OtpAkv4Ovfso60r/BiCi5IErCDYGNJu+uc= +github.com/tidwall/gjson v1.7.5/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk= github.com/tidwall/match v1.0.3 h1:FQUVvBImDutD8wJLN6c5eMzWtjgONK9MwIBCOrUJKeE= github.com/tidwall/match v1.0.3/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= -github.com/tidwall/pretty v1.0.2 h1:Z7S3cePv9Jwm1KwS0513MRaoUe3S01WPbLNV40pwWZU= -github.com/tidwall/pretty v1.0.2/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= -github.com/tidwall/sjson v1.0.4 h1:UcdIRXff12Lpnu3OLtZvnc03g4vH2suXDXhBwBqmzYg= -github.com/tidwall/sjson v1.0.4/go.mod h1:bURseu1nuBkFpIES5cz6zBtjmYeOQmEESshn7VpF15Y= +github.com/tidwall/pretty v1.1.0 h1:K3hMW5epkdAVwibsQEfR/7Zj0Qgt4DxtNumTq/VloO8= +github.com/tidwall/pretty v1.1.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= +github.com/tidwall/sjson v1.1.6 h1:8fDdlahON04OZBlTQCIatW8FstSFJz8oxidj5h0rmSQ= +github.com/tidwall/sjson v1.1.6/go.mod h1:KN3FZ7odvXIHPbJdhNorK/M9lWweVUbXsXXhrJ/kGOA= github.com/tinylib/msgp v1.1.3 h1:3giwAkmtaEDLSV0MdO1lDLuPgklgPzmk8H9+So2BVfA= github.com/tinylib/msgp v1.1.3/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8 h1:ndzgwNDnKIqyCvHTXaCqh9KlOWKvBry6nuXMJmonVsE= diff --git a/pkg/s3select/internal/parquet-go/LICENSE b/pkg/s3select/internal/parquet-go/LICENSE deleted file mode 100644 index be3f7b28e..000000000 --- a/pkg/s3select/internal/parquet-go/LICENSE +++ /dev/null @@ -1,661 +0,0 @@ - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -. diff --git a/pkg/s3select/internal/parquet-go/Makefile b/pkg/s3select/internal/parquet-go/Makefile deleted file mode 100644 index 3f9a95084..000000000 --- a/pkg/s3select/internal/parquet-go/Makefile +++ /dev/null @@ -1,36 +0,0 @@ -GOPATH := $(shell go env GOPATH) - -all: check - -getdeps: - @if [ ! -f ${GOPATH}/bin/golint ]; then echo "Installing golint" && go get -u golang.org/x/lint/golint; fi - @if [ ! -f ${GOPATH}/bin/gocyclo ]; then echo "Installing gocyclo" && go get -u github.com/fzipp/gocyclo; fi - @if [ ! -f ${GOPATH}/bin/misspell ]; then echo "Installing misspell" && go get -u github.com/client9/misspell/cmd/misspell; fi - @if [ ! -f ${GOPATH}/bin/ineffassign ]; then echo "Installing ineffassign" && go get -u github.com/gordonklaus/ineffassign; fi - -vet: - @echo "Running $@" - @go vet *.go - -fmt: - @echo "Running $@" - @gofmt -d *.go - -lint: - @echo "Running $@" - @${GOPATH}/bin/golint -set_exit_status - -cyclo: - @echo "Running $@" - @${GOPATH}/bin/gocyclo -over 200 . - -spelling: - @${GOPATH}/bin/misspell -locale US -error *.go README.md - -ineffassign: - @echo "Running $@" - @${GOPATH}/bin/ineffassign . - -check: getdeps vet fmt lint cyclo spelling ineffassign - @echo "Running unit tests" - @go test -tags kqueue ./... diff --git a/pkg/s3select/internal/parquet-go/README.md b/pkg/s3select/internal/parquet-go/README.md deleted file mode 100644 index f38f9c78e..000000000 --- a/pkg/s3select/internal/parquet-go/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# parquet-go - -Modified version of https://github.com/xitongsys/parquet-go diff --git a/pkg/s3select/internal/parquet-go/column.go b/pkg/s3select/internal/parquet-go/column.go deleted file mode 100644 index 5e8f8db2f..000000000 --- a/pkg/s3select/internal/parquet-go/column.go +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "errors" - "io" - "strings" - - "git.apache.org/thrift.git/lib/go/thrift" - "github.com/minio/minio-go/v7/pkg/set" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -func getColumns( - rowGroup *parquet.RowGroup, - columnNames set.StringSet, - schemaElements []*parquet.SchemaElement, - getReaderFunc GetReaderFunc, -) (nameColumnMap map[string]*column, err error) { - nameIndexMap := make(map[string]int) - for colIndex, columnChunk := range rowGroup.GetColumns() { - meta := columnChunk.GetMetaData() - if meta == nil { - return nil, errors.New("parquet: column metadata missing") - } - columnName := strings.Join(meta.GetPathInSchema(), ".") - if columnNames != nil && !columnNames.Contains(columnName) { - continue - } - - // Ignore column spanning into another file. - if columnChunk.GetFilePath() != "" { - continue - } - - offset := meta.GetDataPageOffset() - if meta.DictionaryPageOffset != nil { - offset = meta.GetDictionaryPageOffset() - } - - size := meta.GetTotalCompressedSize() - if size < 0 { - return nil, errors.New("parquet: negative compressed size") - } - rc, err := getReaderFunc(offset, size) - if err != nil { - return nil, err - } - - thriftReader := thrift.NewTBufferedTransport(thrift.NewStreamTransportR(rc), int(size)) - - if nameColumnMap == nil { - nameColumnMap = make(map[string]*column) - } - var se *parquet.SchemaElement - for _, schema := range schemaElements { - if schema != nil && schema.Name == columnName { - se = schema - break - } - } - - nameColumnMap[columnName] = &column{ - name: columnName, - metadata: meta, - schema: se, - schemaElements: schemaElements, - rc: rc, - thriftReader: thriftReader, - valueType: meta.GetType(), - } - - // First element of []*parquet.SchemaElement from parquet file metadata is 'schema' - // which is always skipped, hence colIndex + 1 is valid. - nameIndexMap[columnName] = colIndex + 1 - } - - for name := range nameColumnMap { - nameColumnMap[name].nameIndexMap = nameIndexMap - } - - return nameColumnMap, nil -} - -type column struct { - name string - endOfValues bool - valueIndex int - valueType parquet.Type - metadata *parquet.ColumnMetaData - schema *parquet.SchemaElement - schemaElements []*parquet.SchemaElement - nameIndexMap map[string]int - dictPage *page - dataTable *table - rc io.ReadCloser - thriftReader *thrift.TBufferedTransport -} - -func (column *column) close() (err error) { - if column.rc != nil { - err = column.rc.Close() - column.rc = nil - } - - return err -} - -func (column *column) readPage() { - page, _, _, err := readPage( - column.thriftReader, - column.metadata, - column.nameIndexMap, - column.schemaElements, - ) - - if err != nil { - column.endOfValues = true - return - } - - if page.Header.GetType() == parquet.PageType_DICTIONARY_PAGE { - column.dictPage = page - column.readPage() - return - } - - page.decode(column.dictPage) - - if column.dataTable == nil { - column.dataTable = newTableFromTable(page.DataTable) - } - - column.dataTable.Merge(page.DataTable) -} - -func (column *column) read() (value interface{}, valueType parquet.Type, cnv *parquet.SchemaElement) { - if column.dataTable == nil { - column.readPage() - column.valueIndex = 0 - } - - if column.endOfValues { - return nil, column.metadata.GetType(), column.schema - } - - value = column.dataTable.Values[column.valueIndex] - column.valueIndex++ - if len(column.dataTable.Values) == column.valueIndex { - column.dataTable = nil - } - - return value, column.metadata.GetType(), column.schema -} diff --git a/pkg/s3select/internal/parquet-go/common.go b/pkg/s3select/internal/parquet-go/common.go deleted file mode 100644 index 947a2eb8d..000000000 --- a/pkg/s3select/internal/parquet-go/common.go +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -func valuesToInterfaces(values interface{}, valueType parquet.Type) (tableValues []interface{}) { - switch valueType { - case parquet.Type_BOOLEAN: - for _, v := range values.([]bool) { - tableValues = append(tableValues, v) - } - case parquet.Type_INT32: - for _, v := range values.([]int32) { - tableValues = append(tableValues, v) - } - case parquet.Type_INT64: - for _, v := range values.([]int64) { - tableValues = append(tableValues, v) - } - case parquet.Type_FLOAT: - for _, v := range values.([]float32) { - tableValues = append(tableValues, v) - } - case parquet.Type_DOUBLE: - for _, v := range values.([]float64) { - tableValues = append(tableValues, v) - } - case parquet.Type_INT96, parquet.Type_BYTE_ARRAY, parquet.Type_FIXED_LEN_BYTE_ARRAY: - for _, v := range values.([][]byte) { - tableValues = append(tableValues, v) - } - } - - return tableValues -} - -func interfacesToValues(values []interface{}, valueType parquet.Type) interface{} { - switch valueType { - case parquet.Type_BOOLEAN: - bs := make([]bool, len(values)) - for i := range values { - bs[i] = values[i].(bool) - } - return bs - case parquet.Type_INT32: - i32s := make([]int32, len(values)) - for i := range values { - i32s[i] = values[i].(int32) - } - return i32s - case parquet.Type_INT64: - i64s := make([]int64, len(values)) - for i := range values { - i64s[i] = values[i].(int64) - } - return i64s - case parquet.Type_FLOAT: - f32s := make([]float32, len(values)) - for i := range values { - f32s[i] = values[i].(float32) - } - return f32s - case parquet.Type_DOUBLE: - f64s := make([]float64, len(values)) - for i := range values { - f64s[i] = values[i].(float64) - } - return f64s - case parquet.Type_INT96, parquet.Type_BYTE_ARRAY, parquet.Type_FIXED_LEN_BYTE_ARRAY: - array := make([][]byte, len(values)) - for i := range values { - array[i] = values[i].([]byte) - } - return array - } - - return nil -} diff --git a/pkg/s3select/internal/parquet-go/common/common.go b/pkg/s3select/internal/parquet-go/common/common.go deleted file mode 100644 index cdaab534c..000000000 --- a/pkg/s3select/internal/parquet-go/common/common.go +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package common - -import ( - "bytes" - "compress/gzip" - "fmt" - "io/ioutil" - - "github.com/golang/snappy" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/pierrec/lz4" -) - -// ToSliceValue converts values to a slice value. -func ToSliceValue(values []interface{}, parquetType parquet.Type) interface{} { - switch parquetType { - case parquet.Type_BOOLEAN: - bs := make([]bool, len(values)) - for i := range values { - bs[i] = values[i].(bool) - } - return bs - case parquet.Type_INT32: - i32s := make([]int32, len(values)) - for i := range values { - i32s[i] = values[i].(int32) - } - return i32s - case parquet.Type_INT64: - i64s := make([]int64, len(values)) - for i := range values { - i64s[i] = values[i].(int64) - } - return i64s - case parquet.Type_FLOAT: - f32s := make([]float32, len(values)) - for i := range values { - f32s[i] = values[i].(float32) - } - return f32s - case parquet.Type_DOUBLE: - f64s := make([]float64, len(values)) - for i := range values { - f64s[i] = values[i].(float64) - } - return f64s - case parquet.Type_BYTE_ARRAY: - array := make([][]byte, len(values)) - for i := range values { - array[i] = values[i].([]byte) - } - return array - } - - return nil -} - -// BitWidth returns bits count required to accommodate given value. -func BitWidth(ui64 uint64) (width int32) { - for ; ui64 != 0; ui64 >>= 1 { - width++ - } - - return width -} - -// Compress compresses given data. -func Compress(compressionType parquet.CompressionCodec, data []byte) ([]byte, error) { - switch compressionType { - case parquet.CompressionCodec_UNCOMPRESSED: - return data, nil - - case parquet.CompressionCodec_SNAPPY: - return snappy.Encode(nil, data), nil - - case parquet.CompressionCodec_GZIP: - buf := new(bytes.Buffer) - writer := gzip.NewWriter(buf) - n, err := writer.Write(data) - if err != nil { - return nil, err - } - if n != len(data) { - return nil, fmt.Errorf("short writes") - } - - if err = writer.Flush(); err != nil { - return nil, err - } - - if err = writer.Close(); err != nil { - return nil, err - } - - return buf.Bytes(), nil - - case parquet.CompressionCodec_LZ4: - buf := new(bytes.Buffer) - writer := lz4.NewWriter(buf) - n, err := writer.Write(data) - if err != nil { - return nil, err - } - if n != len(data) { - return nil, fmt.Errorf("short writes") - } - - if err = writer.Flush(); err != nil { - return nil, err - } - - if err = writer.Close(); err != nil { - return nil, err - } - - return buf.Bytes(), nil - } - - return nil, fmt.Errorf("unsupported compression codec %v", compressionType) -} - -// Uncompress uncompresses given data. -func Uncompress(compressionType parquet.CompressionCodec, data []byte) ([]byte, error) { - switch compressionType { - case parquet.CompressionCodec_UNCOMPRESSED: - return data, nil - - case parquet.CompressionCodec_SNAPPY: - return snappy.Decode(nil, data) - - case parquet.CompressionCodec_GZIP: - reader, err := gzip.NewReader(bytes.NewReader(data)) - if err != nil { - return nil, err - } - defer reader.Close() - return ioutil.ReadAll(reader) - - case parquet.CompressionCodec_LZ4: - return ioutil.ReadAll(lz4.NewReader(bytes.NewReader(data))) - } - - return nil, fmt.Errorf("unsupported compression codec %v", compressionType) -} diff --git a/pkg/s3select/internal/parquet-go/compression.go b/pkg/s3select/internal/parquet-go/compression.go deleted file mode 100644 index a364cec2f..000000000 --- a/pkg/s3select/internal/parquet-go/compression.go +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "bytes" - "fmt" - "io/ioutil" - "sync" - - "github.com/golang/snappy" - "github.com/klauspost/compress/gzip" - "github.com/klauspost/compress/zstd" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/pierrec/lz4" -) - -type compressionCodec parquet.CompressionCodec - -var zstdOnce sync.Once -var zstdEnc *zstd.Encoder -var zstdDec *zstd.Decoder - -func initZstd() { - zstdOnce.Do(func() { - zstdEnc, _ = zstd.NewWriter(nil, zstd.WithZeroFrames(true)) - zstdDec, _ = zstd.NewReader(nil) - }) -} - -func (c compressionCodec) compress(buf []byte) ([]byte, error) { - switch parquet.CompressionCodec(c) { - case parquet.CompressionCodec_UNCOMPRESSED: - return buf, nil - - case parquet.CompressionCodec_SNAPPY: - return snappy.Encode(nil, buf), nil - - case parquet.CompressionCodec_GZIP: - byteBuf := new(bytes.Buffer) - writer := gzip.NewWriter(byteBuf) - n, err := writer.Write(buf) - if err != nil { - return nil, err - } - if n != len(buf) { - return nil, fmt.Errorf("short writes") - } - - if err = writer.Flush(); err != nil { - return nil, err - } - - if err = writer.Close(); err != nil { - return nil, err - } - - return byteBuf.Bytes(), nil - - case parquet.CompressionCodec_LZ4: - byteBuf := new(bytes.Buffer) - writer := lz4.NewWriter(byteBuf) - n, err := writer.Write(buf) - if err != nil { - return nil, err - } - if n != len(buf) { - return nil, fmt.Errorf("short writes") - } - - if err = writer.Flush(); err != nil { - return nil, err - } - - if err = writer.Close(); err != nil { - return nil, err - } - - return byteBuf.Bytes(), nil - case parquet.CompressionCodec_ZSTD: - initZstd() - return zstdEnc.EncodeAll(buf, nil), nil - } - - return nil, fmt.Errorf("invalid compression codec %v", c) -} - -func (c compressionCodec) uncompress(buf []byte) ([]byte, error) { - switch parquet.CompressionCodec(c) { - case parquet.CompressionCodec_UNCOMPRESSED: - return buf, nil - - case parquet.CompressionCodec_SNAPPY: - return snappy.Decode(nil, buf) - - case parquet.CompressionCodec_GZIP: - reader, err := gzip.NewReader(bytes.NewReader(buf)) - if err != nil { - return nil, err - } - defer reader.Close() - return ioutil.ReadAll(reader) - - case parquet.CompressionCodec_LZ4: - return ioutil.ReadAll(lz4.NewReader(bytes.NewReader(buf))) - - case parquet.CompressionCodec_ZSTD: - initZstd() - return zstdDec.DecodeAll(buf, nil) - } - - return nil, fmt.Errorf("invalid compression codec %v", c) -} diff --git a/pkg/s3select/internal/parquet-go/data/column-grouplist_test.go b/pkg/s3select/internal/parquet-go/data/column-grouplist_test.go deleted file mode 100644 index f1d88ad15..000000000 --- a/pkg/s3select/internal/parquet-go/data/column-grouplist_test.go +++ /dev/null @@ -1,619 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package data - -import ( - "reflect" - "testing" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" -) - -func TestPopulateGroupList(t *testing.T) { - requiredList1 := schema.NewTree() - { - requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredList1.Set("group", requiredGroup); err != nil { - t.Fatal(err) - } - if err = requiredList1.Set("group.list", list); err != nil { - t.Fatal(err) - } - if err = requiredList1.Set("group.list.element", requiredElement); err != nil { - t.Fatal(err) - } - if err = requiredList1.Set("group.list.element.col", requiredCol); err != nil { - t.Fatal(err) - } - - if _, _, err := requiredList1.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - requiredList2 := schema.NewTree() - { - requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredList2.Set("group", requiredGroup); err != nil { - t.Fatal(err) - } - if err = requiredList2.Set("group.list", list); err != nil { - t.Fatal(err) - } - if err = requiredList2.Set("group.list.element", requiredElement); err != nil { - t.Fatal(err) - } - if err = requiredList2.Set("group.list.element.col", optionalCol); err != nil { - t.Fatal(err) - } - - if _, _, err := requiredList2.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - requiredList3 := schema.NewTree() - { - requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredList3.Set("group", requiredGroup); err != nil { - t.Fatal(err) - } - if err = requiredList3.Set("group.list", list); err != nil { - t.Fatal(err) - } - if err = requiredList3.Set("group.list.element", optionalElement); err != nil { - t.Fatal(err) - } - if err = requiredList3.Set("group.list.element.col", requiredCol); err != nil { - t.Fatal(err) - } - - if _, _, err := requiredList3.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - requiredList4 := schema.NewTree() - { - requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredList4.Set("group", requiredGroup); err != nil { - t.Fatal(err) - } - if err = requiredList4.Set("group.list", list); err != nil { - t.Fatal(err) - } - if err = requiredList4.Set("group.list.element", optionalElement); err != nil { - t.Fatal(err) - } - if err = requiredList4.Set("group.list.element.col", optionalCol); err != nil { - t.Fatal(err) - } - - if _, _, err := requiredList4.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalList1 := schema.NewTree() - { - optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalList1.Set("group", optionalGroup); err != nil { - t.Fatal(err) - } - if err = optionalList1.Set("group.list", list); err != nil { - t.Fatal(err) - } - if err = optionalList1.Set("group.list.element", requiredElement); err != nil { - t.Fatal(err) - } - if err = optionalList1.Set("group.list.element.col", requiredCol); err != nil { - t.Fatal(err) - } - - if _, _, err := optionalList1.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalList2 := schema.NewTree() - { - optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalList2.Set("group", optionalGroup); err != nil { - t.Fatal(err) - } - if err = optionalList2.Set("group.list", list); err != nil { - t.Fatal(err) - } - if err = optionalList2.Set("group.list.element", requiredElement); err != nil { - t.Fatal(err) - } - if err = optionalList2.Set("group.list.element.col", optionalCol); err != nil { - t.Fatal(err) - } - - if _, _, err := optionalList2.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalList3 := schema.NewTree() - { - optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalList3.Set("group", optionalGroup); err != nil { - t.Fatal(err) - } - if err = optionalList3.Set("group.list", list); err != nil { - t.Fatal(err) - } - if err = optionalList3.Set("group.list.element", optionalElement); err != nil { - t.Fatal(err) - } - if err = optionalList3.Set("group.list.element.col", requiredCol); err != nil { - t.Fatal(err) - } - - if _, _, err := optionalList3.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalList4 := schema.NewTree() - { - optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalList4.Set("group", optionalGroup); err != nil { - t.Fatal(err) - } - if err = optionalList4.Set("group.list", list); err != nil { - t.Fatal(err) - } - if err = optionalList4.Set("group.list.element", optionalElement); err != nil { - t.Fatal(err) - } - if err = optionalList4.Set("group.list.element.col", optionalCol); err != nil { - t.Fatal(err) - } - - if _, _, err := optionalList4.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - result1 := map[string]*Column{ - "group.list.element.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{1}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result2 := map[string]*Column{ - "group.list.element.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10, v20}, - definitionLevels: []int64{1, 1}, - repetitionLevels: []int64{0, 1}, - rowCount: 1, - maxBitWidth: 5, - minValue: v10, - maxValue: v20, - }, - } - - result3 := map[string]*Column{ - "group.list.element.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{1}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result4 := map[string]*Column{ - "group.list.element.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result5 := map[string]*Column{ - "group.list.element.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10, v20}, - definitionLevels: []int64{2, 2}, - repetitionLevels: []int64{0, 1}, - rowCount: 1, - maxBitWidth: 5, - minValue: v10, - maxValue: v20, - }, - } - - result6 := map[string]*Column{ - "group.list.element.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result7 := map[string]*Column{ - "group.list.element.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{3}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result8 := map[string]*Column{ - "group.list.element.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10, v20}, - definitionLevels: []int64{3, 3}, - repetitionLevels: []int64{0, 1}, - rowCount: 1, - maxBitWidth: 5, - minValue: v10, - maxValue: v20, - }, - } - - result9 := map[string]*Column{ - "group.list.element.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result10 := map[string]*Column{ - "group.list.element.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{3}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result11 := map[string]*Column{ - "group.list.element.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{4}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result12 := map[string]*Column{ - "group.list.element.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10, v20}, - definitionLevels: []int64{4, 4}, - repetitionLevels: []int64{0, 1}, - rowCount: 1, - maxBitWidth: 5, - minValue: v10, - maxValue: v20, - }, - } - - testCases := []struct { - schemaTree *schema.Tree - data string - expectedResult map[string]*Column - expectErr bool - }{ - {requiredList1, `{}`, nil, true}, // err: group: nil value for required field - {requiredList1, `{"group": null}`, nil, true}, // err: group: nil value for required field - {requiredList1, `{"group": [{"col": null}]}`, nil, true}, // err: group.list.element.col: nil value for required field - {requiredList1, `{"group": [{"col": 10}]}`, result1, false}, - {requiredList1, `{"group": [{"col": 10}, {"col": 20}]}`, result2, false}, - {requiredList2, `{}`, nil, true}, // err: group: nil value for required field - {requiredList2, `{"group": null}`, nil, true}, // err: group: nil value for required field - {requiredList2, `{"group": [{"col": null}]}`, result3, false}, - {requiredList2, `{"group": [{"col": 10}]}`, result4, false}, - {requiredList2, `{"group": [{"col": 10}, {"col": 20}]}`, result5, false}, - {requiredList3, `{}`, nil, true}, // err: group: nil value for required field - {requiredList3, `{"group": null}`, nil, true}, // err: group: nil value for required field - {requiredList3, `{"group": [{"col": null}]}`, nil, true}, // err: group.list.element.col: nil value for required field - {requiredList3, `{"group": [{"col": 10}]}`, result4, false}, - {requiredList3, `{"group": [{"col": 10}, {"col": 20}]}`, result5, false}, - {requiredList4, `{}`, nil, true}, // err: group: nil value for required field - {requiredList4, `{"group": null}`, nil, true}, // err: group: nil value for required field - {requiredList4, `{"group": [{"col": null}]}`, result6, false}, - {requiredList4, `{"group": [{"col": 10}]}`, result7, false}, - {requiredList4, `{"group": [{"col": 10}, {"col": 20}]}`, result8, false}, - {optionalList1, `{}`, result9, false}, - {optionalList1, `{"group": null}`, result9, false}, - {optionalList1, `{"group": [{"col": null}]}`, nil, true}, // err: group.list.element.col: nil value for required field - {optionalList1, `{"group": [{"col": 10}]}`, result4, false}, - {optionalList1, `{"group": [{"col": 10}, {"col": 20}]}`, result5, false}, - {optionalList2, `{}`, result9, false}, - {optionalList2, `{"group": null}`, result9, false}, - {optionalList2, `{"group": [{"col": null}]}`, result6, false}, - {optionalList2, `{"group": [{"col": 10}]}`, result7, false}, - {optionalList2, `{"group": [{"col": 10}, {"col": 20}]}`, result8, false}, - {optionalList3, `{}`, result9, false}, - {optionalList3, `{"group": null}`, result9, false}, - {optionalList3, `{"group": [{"col": null}]}`, nil, true}, // err: group.list.element.col: nil value for required field - {optionalList3, `{"group": [{"col": 10}]}`, result7, false}, - {optionalList3, `{"group": [{"col": 10}, {"col": 20}]}`, result8, false}, - {optionalList4, `{}`, result9, false}, - {optionalList4, `{"group": null}`, result9, false}, - {optionalList4, `{"group": [{"col": null}]}`, result10, false}, - {optionalList4, `{"group": [{"col": 10}]}`, result11, false}, - {optionalList4, `{"group": [{"col": 10}, {"col": 20}]}`, result12, false}, - } - - for i, testCase := range testCases { - result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree) - expectErr := (err != nil) - - if testCase.expectErr != expectErr { - t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) - } - - if !testCase.expectErr { - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } - } -} diff --git a/pkg/s3select/internal/parquet-go/data/column-grouptype_test.go b/pkg/s3select/internal/parquet-go/data/column-grouptype_test.go deleted file mode 100644 index 9b5af52f3..000000000 --- a/pkg/s3select/internal/parquet-go/data/column-grouptype_test.go +++ /dev/null @@ -1,238 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package data - -import ( - "reflect" - "testing" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" -) - -func TestPopulateGroupType(t *testing.T) { - requiredGroup1 := schema.NewTree() - { - requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredGroup1.Set("group", requiredGroup); err != nil { - t.Fatal(err) - } - if err = requiredGroup1.Set("group.col", requiredCol); err != nil { - t.Fatal(err) - } - - if _, _, err := requiredGroup1.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - requiredGroup2 := schema.NewTree() - { - requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredGroup2.Set("group", requiredGroup); err != nil { - t.Fatal(err) - } - if err = requiredGroup2.Set("group.col", optionalCol); err != nil { - t.Fatal(err) - } - - if _, _, err := requiredGroup2.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalGroup1 := schema.NewTree() - { - optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalGroup1.Set("group", optionalGroup); err != nil { - t.Fatal(err) - } - if err = optionalGroup1.Set("group.col", requiredCol); err != nil { - t.Fatal(err) - } - - if _, _, err := optionalGroup1.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalGroup2 := schema.NewTree() - { - optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalGroup2.Set("group", optionalGroup); err != nil { - t.Fatal(err) - } - if err = optionalGroup2.Set("group.col", optionalCol); err != nil { - t.Fatal(err) - } - - if _, _, err := optionalGroup2.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - result1 := map[string]*Column{ - "group.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result2 := map[string]*Column{ - "group.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result3 := map[string]*Column{ - "group.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{1}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result4 := map[string]*Column{ - "group.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{1}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result5 := map[string]*Column{ - "group.col": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - testCases := []struct { - schemaTree *schema.Tree - data string - expectedResult map[string]*Column - expectErr bool - }{ - {requiredGroup1, `{}`, nil, true}, // err: group: nil value for required field - {requiredGroup1, `{"group": null}`, nil, true}, // err: group: nil value for required field - {requiredGroup1, `{"group": {"col": null}}`, nil, true}, // err: group.col: nil value for required field - {requiredGroup1, `{"group": {"col": 10}}`, result1, false}, - {requiredGroup2, `{}`, nil, true}, // err: group: nil value for required field - {requiredGroup2, `{"group": null}`, nil, true}, // err: group: nil value for required field - {requiredGroup2, `{"group": {"col": null}}`, result2, false}, - {requiredGroup2, `{"group": {"col": 10}}`, result3, false}, - {optionalGroup1, `{}`, result2, false}, - {optionalGroup1, `{"group": null}`, result2, false}, - {optionalGroup1, `{"group": {"col": null}}`, nil, true}, // err: group.col: nil value for required field - {optionalGroup1, `{"group": {"col": 10}}`, result3, false}, - {optionalGroup2, `{}`, result2, false}, - {optionalGroup2, `{"group": null}`, result2, false}, - {optionalGroup2, `{"group": {"col": null}}`, result4, false}, - {optionalGroup2, `{"group": {"col": 10}}`, result5, false}, - } - - for i, testCase := range testCases { - result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree) - expectErr := (err != nil) - - if testCase.expectErr != expectErr { - t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) - } - - if !testCase.expectErr { - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } - } -} diff --git a/pkg/s3select/internal/parquet-go/data/column-listoflist_test.go b/pkg/s3select/internal/parquet-go/data/column-listoflist_test.go deleted file mode 100644 index e279c3d66..000000000 --- a/pkg/s3select/internal/parquet-go/data/column-listoflist_test.go +++ /dev/null @@ -1,699 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package data - -import ( - "reflect" - "testing" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" -) - -func TestPopulateListOfList(t *testing.T) { - requiredList1 := schema.NewTree() - { - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredList1.Set("col", requiredCol); err != nil { - t.Fatal(err) - } - if err = requiredList1.Set("col.list", list); err != nil { - t.Fatal(err) - } - if err = requiredList1.Set("col.list.element", requiredElement); err != nil { - t.Fatal(err) - } - if err = requiredList1.Set("col.list.element.list", subList); err != nil { - t.Fatal(err) - } - if err = requiredList1.Set("col.list.element.list.element", requiredSubElement); err != nil { - t.Fatal(err) - } - - if _, _, err = requiredList1.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - requiredList2 := schema.NewTree() - { - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredList2.Set("col", requiredCol); err != nil { - t.Fatal(err) - } - if err = requiredList2.Set("col.list", list); err != nil { - t.Fatal(err) - } - if err = requiredList2.Set("col.list.element", requiredElement); err != nil { - t.Fatal(err) - } - if err = requiredList2.Set("col.list.element.list", subList); err != nil { - t.Fatal(err) - } - if err = requiredList2.Set("col.list.element.list.element", optionalSubElement); err != nil { - t.Fatal(err) - } - - if _, _, err = requiredList2.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - requiredList3 := schema.NewTree() - { - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optioonalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredList3.Set("col", requiredCol); err != nil { - t.Fatal(err) - } - if err = requiredList3.Set("col.list", list); err != nil { - t.Fatal(err) - } - if err = requiredList3.Set("col.list.element", optioonalElement); err != nil { - t.Fatal(err) - } - if err = requiredList3.Set("col.list.element.list", subList); err != nil { - t.Fatal(err) - } - if err = requiredList3.Set("col.list.element.list.element", requiredSubElement); err != nil { - t.Fatal(err) - } - - if _, _, err = requiredList3.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - requiredList4 := schema.NewTree() - { - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optioonalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredList4.Set("col", requiredCol); err != nil { - t.Fatal(err) - } - if err = requiredList4.Set("col.list", list); err != nil { - t.Fatal(err) - } - if err = requiredList4.Set("col.list.element", optioonalElement); err != nil { - t.Fatal(err) - } - if err = requiredList4.Set("col.list.element.list", subList); err != nil { - t.Fatal(err) - } - if err = requiredList4.Set("col.list.element.list.element", optionalSubElement); err != nil { - t.Fatal(err) - } - - if _, _, err = requiredList4.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalList1 := schema.NewTree() - { - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalList1.Set("col", requiredCol); err != nil { - t.Fatal(err) - } - if err = optionalList1.Set("col.list", list); err != nil { - t.Fatal(err) - } - if err = optionalList1.Set("col.list.element", requiredElement); err != nil { - t.Fatal(err) - } - if err = optionalList1.Set("col.list.element.list", subList); err != nil { - t.Fatal(err) - } - if err = optionalList1.Set("col.list.element.list.element", requiredSubElement); err != nil { - t.Fatal(err) - } - - if _, _, err = optionalList1.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalList2 := schema.NewTree() - { - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalList2.Set("col", requiredCol); err != nil { - t.Fatal(err) - } - if err = optionalList2.Set("col.list", list); err != nil { - t.Fatal(err) - } - if err = optionalList2.Set("col.list.element", requiredElement); err != nil { - t.Fatal(err) - } - if err = optionalList2.Set("col.list.element.list", subList); err != nil { - t.Fatal(err) - } - if err = optionalList2.Set("col.list.element.list.element", optionalSubElement); err != nil { - t.Fatal(err) - } - - if _, _, err = optionalList2.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalList3 := schema.NewTree() - { - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optioonalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalList3.Set("col", requiredCol); err != nil { - t.Fatal(err) - } - if err = optionalList3.Set("col.list", list); err != nil { - t.Fatal(err) - } - if err = optionalList3.Set("col.list.element", optioonalElement); err != nil { - t.Fatal(err) - } - if err = optionalList3.Set("col.list.element.list", subList); err != nil { - t.Fatal(err) - } - if err = optionalList3.Set("col.list.element.list.element", requiredSubElement); err != nil { - t.Fatal(err) - } - - if _, _, err = optionalList3.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalList4 := schema.NewTree() - { - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optioonalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalList4.Set("col", requiredCol); err != nil { - t.Fatal(err) - } - if err = optionalList4.Set("col.list", list); err != nil { - t.Fatal(err) - } - if err = optionalList4.Set("col.list.element", optioonalElement); err != nil { - t.Fatal(err) - } - if err = optionalList4.Set("col.list.element.list", subList); err != nil { - t.Fatal(err) - } - if err = optionalList4.Set("col.list.element.list.element", optionalSubElement); err != nil { - t.Fatal(err) - } - - if _, _, err = optionalList4.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - result1 := map[string]*Column{ - "col.list.element.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result2 := map[string]*Column{ - "col.list.element.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10, v20, v30, v10, v20, v10, v30}, - definitionLevels: []int64{2, 2, 2, 2, 2, 2, 2}, - repetitionLevels: []int64{0, 2, 1, 2, 1, 2, 2}, - rowCount: 1, - maxBitWidth: 5, - minValue: v10, - maxValue: v30, - }, - } - - result3 := map[string]*Column{ - "col.list.element.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result4 := map[string]*Column{ - "col.list.element.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{3}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result5 := map[string]*Column{ - "col.list.element.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10, v20, v30, v10, v20, v10, v30}, - definitionLevels: []int64{3, 3, 3, 3, 3, 3, 3}, - repetitionLevels: []int64{0, 2, 1, 2, 1, 2, 2}, - rowCount: 1, - maxBitWidth: 5, - minValue: v10, - maxValue: v30, - }, - } - - result6 := map[string]*Column{ - "col.list.element.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{3}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result7 := map[string]*Column{ - "col.list.element.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{4}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result8 := map[string]*Column{ - "col.list.element.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10, v20, v30, v10, v20, v10, v30}, - definitionLevels: []int64{4, 4, 4, 4, 4, 4, 4}, - repetitionLevels: []int64{0, 2, 1, 2, 1, 2, 2}, - rowCount: 1, - maxBitWidth: 5, - minValue: v10, - maxValue: v30, - }, - } - - result9 := map[string]*Column{ - "col.list.element.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result10 := map[string]*Column{ - "col.list.element.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{4}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result11 := map[string]*Column{ - "col.list.element.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{5}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result12 := map[string]*Column{ - "col.list.element.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10, v20, v30, v10, v20, v10, v30}, - definitionLevels: []int64{5, 5, 5, 5, 5, 5, 5}, - repetitionLevels: []int64{0, 2, 1, 2, 1, 2, 2}, - rowCount: 1, - maxBitWidth: 5, - minValue: v10, - maxValue: v30, - }, - } - - testCases := []struct { - schemaTree *schema.Tree - data string - expectedResult map[string]*Column - expectErr bool - }{ - {requiredList1, `{}`, nil, true}, // err: col: nil value for required field - {requiredList1, `{"col": null}`, nil, true}, // err: col: nil value for required field - {requiredList1, `{"col": [[null]]}`, nil, true}, // err: col.list.element.list.element: nil value for required field - {requiredList1, `{"col": [[10]]}`, result1, false}, - {requiredList1, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result2, false}, - {requiredList2, `{}`, nil, true}, // err: col: nil value for required field - {requiredList2, `{"col": null}`, nil, true}, // err: col: nil value for required field - {requiredList2, `{"col": [[null]]}`, result3, false}, - {requiredList2, `{"col": [[10]]}`, result4, false}, - {requiredList2, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result5, false}, - {requiredList3, `{}`, nil, true}, // err: col: nil value for required field - {requiredList3, `{"col": null}`, nil, true}, // err: col: nil value for required field - {requiredList3, `{"col": [[null]]}`, nil, true}, // err: col.list.element.list.element: nil value for required field - {requiredList3, `{"col": [[10]]}`, result4, false}, - {requiredList3, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result5, false}, - {requiredList4, `{}`, nil, true}, // err: col: nil value for required field - {requiredList4, `{"col": null}`, nil, true}, // err: col: nil value for required field - {requiredList4, `{"col": [[null]]}`, result6, false}, - {requiredList4, `{"col": [[10]]}`, result7, false}, - {requiredList4, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result8, false}, - {optionalList1, `{}`, result9, false}, - {optionalList1, `{"col": null}`, result9, false}, - {optionalList1, `{"col": [[null]]}`, nil, true}, // err: col.list.element.list.element: nil value for required field - {optionalList1, `{"col": [[10]]}`, result4, false}, - {optionalList1, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result5, false}, - {optionalList2, `{}`, result9, false}, - {optionalList2, `{"col": null}`, result9, false}, - {optionalList2, `{"col": [[null]]}`, result6, false}, - {optionalList2, `{"col": [[10]]}`, result7, false}, - {optionalList2, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result8, false}, - {optionalList3, `{}`, result9, false}, - {optionalList3, `{"col": null}`, result9, false}, - {optionalList3, `{"col": [[null]]}`, nil, true}, // err: col.list.element.list.element: nil value for required field - {optionalList3, `{"col": [[10]]}`, result7, false}, - {optionalList3, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result8, false}, - {optionalList4, `{}`, result9, false}, - {optionalList4, `{"col": null}`, result9, false}, - {optionalList4, `{"col": [[null]]}`, result10, false}, - {optionalList4, `{"col": [[10]]}`, result11, false}, - {optionalList4, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result12, false}, - } - - for i, testCase := range testCases { - result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree) - expectErr := (err != nil) - - if testCase.expectErr != expectErr { - t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) - } - - if !testCase.expectErr { - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } - } -} diff --git a/pkg/s3select/internal/parquet-go/data/column-map_test.go b/pkg/s3select/internal/parquet-go/data/column-map_test.go deleted file mode 100644 index 9d9dbfbc7..000000000 --- a/pkg/s3select/internal/parquet-go/data/column-map_test.go +++ /dev/null @@ -1,371 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package data - -import ( - "reflect" - "testing" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" -) - -func TestPopulateMap(t *testing.T) { - t.Skip("Broken") - requiredMap1 := schema.NewTree() - { - mapElement, err := schema.NewElement("map", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := schema.NewElement("key_value", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredKey, err := schema.NewElement("key", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredValue, err := schema.NewElement("value", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredMap1.Set("map", mapElement); err != nil { - t.Fatal(err) - } - - if err = requiredMap1.Set("map.key_value", keyValue); err != nil { - t.Fatal(err) - } - - if err = requiredMap1.Set("map.key_value.key", requiredKey); err != nil { - t.Fatal(err) - } - - if err = requiredMap1.Set("map.key_value.value", requiredValue); err != nil { - t.Fatal(err) - } - - if _, _, err = requiredMap1.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - requiredMap2 := schema.NewTree() - { - mapElement, err := schema.NewElement("map", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := schema.NewElement("key_value", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredKey, err := schema.NewElement("key", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalValue, err := schema.NewElement("value", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredMap2.Set("map", mapElement); err != nil { - t.Fatal(err) - } - - if err = requiredMap2.Set("map.key_value", keyValue); err != nil { - t.Fatal(err) - } - - if err = requiredMap2.Set("map.key_value.key", requiredKey); err != nil { - t.Fatal(err) - } - - if err = requiredMap2.Set("map.key_value.value", optionalValue); err != nil { - t.Fatal(err) - } - - if _, _, err = requiredMap2.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalMap1 := schema.NewTree() - { - mapElement, err := schema.NewElement("map", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := schema.NewElement("key_value", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredKey, err := schema.NewElement("key", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredValue, err := schema.NewElement("value", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalMap1.Set("map", mapElement); err != nil { - t.Fatal(err) - } - - if err = optionalMap1.Set("map.key_value", keyValue); err != nil { - t.Fatal(err) - } - - if err = optionalMap1.Set("map.key_value.key", requiredKey); err != nil { - t.Fatal(err) - } - - if err = optionalMap1.Set("map.key_value.value", requiredValue); err != nil { - t.Fatal(err) - } - - if _, _, err = optionalMap1.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalMap2 := schema.NewTree() - { - mapElement, err := schema.NewElement("map", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := schema.NewElement("key_value", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredKey, err := schema.NewElement("key", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalValue, err := schema.NewElement("value", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalMap2.Set("map", mapElement); err != nil { - t.Fatal(err) - } - - if err = optionalMap2.Set("map.key_value", keyValue); err != nil { - t.Fatal(err) - } - - if err = optionalMap2.Set("map.key_value.key", requiredKey); err != nil { - t.Fatal(err) - } - - if err = optionalMap2.Set("map.key_value.value", optionalValue); err != nil { - t.Fatal(err) - } - - if _, _, err = optionalMap2.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - result1 := map[string]*Column{ - "map.key_value.key": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{ten}, - definitionLevels: []int64{1}, - repetitionLevels: []int64{0}, - }, - "map.key_value.value": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{1}, - repetitionLevels: []int64{1}, - }, - } - - result2 := map[string]*Column{ - "map.key_value.key": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{ten}, - definitionLevels: []int64{1}, - repetitionLevels: []int64{0}, - }, - "map.key_value.value": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{1}, - repetitionLevels: []int64{1}, - }, - } - - result3 := map[string]*Column{ - "map.key_value.key": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{ten}, - definitionLevels: []int64{1}, - repetitionLevels: []int64{0}, - }, - "map.key_value.value": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{1}, - }, - } - - result4 := map[string]*Column{ - "map.key_value.key": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{nil}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - }, - } - - result5 := map[string]*Column{ - "map.key_value.key": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{ten}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - }, - "map.key_value.value": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{1}, - }, - } - - result6 := map[string]*Column{ - "map.key_value.key": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{ten}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - }, - "map.key_value.value": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{1}, - }, - } - - result7 := map[string]*Column{ - "map.key_value.key": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{ten}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - }, - "map.key_value.value": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{3}, - repetitionLevels: []int64{1}, - }, - } - - testCases := []struct { - schemaTree *schema.Tree - data string - expectedResult map[string]*Column - expectErr bool - }{ - {requiredMap1, `{}`, nil, true}, // err: map: nil value for required field - {requiredMap1, `{"map": null}`, nil, true}, // err: map: nil value for required field - {requiredMap1, `{"map": {"ten": null}}`, nil, true}, // err: map.key_value.value: nil value for required field - {requiredMap1, `{"map": {"ten": 10}}`, result1, false}, - {requiredMap2, `{}`, nil, true}, // err: map: nil value for required field - {requiredMap2, `{"map": null}`, nil, true}, // err: map: nil value for required field - {requiredMap2, `{"map": {"ten": null}}`, result2, false}, - {requiredMap2, `{"map": {"ten": 10}}`, result3, false}, - {optionalMap1, `{}`, result4, false}, - {optionalMap1, `{"map": null}`, result4, false}, - {optionalMap1, `{"map": {"ten": null}}`, nil, true}, // err: map.key_value.value: nil value for required field - {optionalMap1, `{"map": {"ten": 10}}`, result5, false}, - {optionalMap2, `{}`, result4, false}, - {optionalMap2, `{"map": null}`, result4, false}, - {optionalMap2, `{"map": {"ten": null}}`, result6, false}, - {optionalMap2, `{"map": {"ten": 10}}`, result7, false}, - } - - for i, testCase := range testCases { - result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree) - expectErr := (err != nil) - - if testCase.expectErr != expectErr { - t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) - } - - if !testCase.expectErr { - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Errorf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } - } -} diff --git a/pkg/s3select/internal/parquet-go/data/column-primitivelist_test.go b/pkg/s3select/internal/parquet-go/data/column-primitivelist_test.go deleted file mode 100644 index a7e320535..000000000 --- a/pkg/s3select/internal/parquet-go/data/column-primitivelist_test.go +++ /dev/null @@ -1,331 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package data - -import ( - "reflect" - "testing" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" -) - -func TestPopulatePrimitiveList(t *testing.T) { - requiredList1 := schema.NewTree() - { - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredList1.Set("col", requiredCol); err != nil { - t.Fatal(err) - } - if err = requiredList1.Set("col.list", list); err != nil { - t.Fatal(err) - } - if err = requiredList1.Set("col.list.element", requiredElement); err != nil { - t.Fatal(err) - } - - if _, _, err = requiredList1.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - requiredList2 := schema.NewTree() - { - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredList2.Set("col", requiredCol); err != nil { - t.Fatal(err) - } - if err = requiredList2.Set("col.list", list); err != nil { - t.Fatal(err) - } - if err = requiredList2.Set("col.list.element", optionalElement); err != nil { - t.Fatal(err) - } - - if _, _, err = requiredList2.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalList1 := schema.NewTree() - { - optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalList1.Set("col", optionalCol); err != nil { - t.Fatal(err) - } - if err = optionalList1.Set("col.list", list); err != nil { - t.Fatal(err) - } - if err = optionalList1.Set("col.list.element", requiredElement); err != nil { - t.Fatal(err) - } - - if _, _, err = optionalList1.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalList2 := schema.NewTree() - { - optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalList2.Set("col", optionalCol); err != nil { - t.Fatal(err) - } - if err = optionalList2.Set("col.list", list); err != nil { - t.Fatal(err) - } - if err = optionalList2.Set("col.list.element", optionalElement); err != nil { - t.Fatal(err) - } - - if _, _, err = optionalList2.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - result1 := map[string]*Column{ - "col.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{1}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result2 := map[string]*Column{ - "col.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10, v20, v30}, - definitionLevels: []int64{1, 1, 1}, - repetitionLevels: []int64{0, 1, 1}, - rowCount: 1, - maxBitWidth: 5, - minValue: v10, - maxValue: v30, - }, - } - - result3 := map[string]*Column{ - "col.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{1}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result4 := map[string]*Column{ - "col.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result5 := map[string]*Column{ - "col.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10, v20, v30}, - definitionLevels: []int64{2, 2, 2}, - repetitionLevels: []int64{0, 1, 1}, - rowCount: 1, - maxBitWidth: 5, - minValue: v10, - maxValue: v30, - }, - } - - result6 := map[string]*Column{ - "col.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result7 := map[string]*Column{ - "col.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result8 := map[string]*Column{ - "col.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{3}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result9 := map[string]*Column{ - "col.list.element": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10, v20, v30}, - definitionLevels: []int64{3, 3, 3}, - repetitionLevels: []int64{0, 1, 1}, - rowCount: 1, - maxBitWidth: 5, - minValue: v10, - maxValue: v30, - }, - } - - testCases := []struct { - schemaTree *schema.Tree - data string - expectedResult map[string]*Column - expectErr bool - }{ - {requiredList1, `{}`, nil, true}, // err: col: nil value for required field - {requiredList1, `{"col": null}`, nil, true}, // err: col: nil value for required field - {requiredList1, `{"col": [null]}`, nil, true}, // err: col.list.element: nil value for required field - {requiredList1, `{"col": [10]}`, result1, false}, - {requiredList1, `{"col": [10, 20, 30]}`, result2, false}, - {requiredList2, `{}`, nil, true}, // err: col: nil value for required field - {requiredList2, `{"col": null}`, nil, true}, // err: col: nil value for required field - {requiredList2, `{"col": [null]}`, result3, false}, - {requiredList2, `{"col": [10]}`, result4, false}, - {requiredList2, `{"col": [10, 20, 30]}`, result5, false}, - {optionalList1, `{}`, result6, false}, - {optionalList1, `{"col": null}`, result6, false}, - {optionalList1, `{"col": [null]}`, nil, true}, // err: col.list.element: nil value for required field - {optionalList1, `{"col": [10]}`, result4, false}, - {optionalList1, `{"col": [10, 20, 30]}`, result5, false}, - {optionalList2, `{}`, result6, false}, - {optionalList2, `{"col": null}`, result6, false}, - {optionalList2, `{"col": [null]}`, result7, false}, - {optionalList2, `{"col": [10]}`, result8, false}, - {optionalList2, `{"col": [10, 20, 30]}`, result9, false}, - } - - for i, testCase := range testCases { - result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree) - expectErr := (err != nil) - - if testCase.expectErr != expectErr { - t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) - } - - if !testCase.expectErr { - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } - } -} diff --git a/pkg/s3select/internal/parquet-go/data/column-primitivetype_test.go b/pkg/s3select/internal/parquet-go/data/column-primitivetype_test.go deleted file mode 100644 index 75a97802a..000000000 --- a/pkg/s3select/internal/parquet-go/data/column-primitivetype_test.go +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package data - -import ( - "reflect" - "testing" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" -) - -func TestPopulatePrimitiveType(t *testing.T) { - requiredField := schema.NewTree() - { - requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = requiredField.Set("col", requiredCol); err != nil { - t.Fatal(err) - } - - if _, _, err = requiredField.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - optionalField := schema.NewTree() - { - optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err = optionalField.Set("col", optionalCol); err != nil { - t.Fatal(err) - } - - if _, _, err = optionalField.ToParquetSchema(); err != nil { - t.Fatal(err) - } - } - - result1 := map[string]*Column{ - "col": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - result2 := map[string]*Column{ - "col": { - parquetType: parquet.Type_INT32, - values: []interface{}{nil}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - rowCount: 1, - }, - } - - result3 := map[string]*Column{ - "col": { - parquetType: parquet.Type_INT32, - values: []interface{}{v10}, - definitionLevels: []int64{1}, - repetitionLevels: []int64{0}, - rowCount: 1, - maxBitWidth: 4, - minValue: v10, - maxValue: v10, - }, - } - - testCases := []struct { - schemaTree *schema.Tree - data string - expectedResult map[string]*Column - expectErr bool - }{ - {requiredField, `{}`, nil, true}, - {requiredField, `{"col": null}`, nil, true}, // err: col: nil value for required field - {requiredField, `{"col": 10}`, result1, false}, - {optionalField, `{}`, result2, false}, - {optionalField, `{"col": null}`, result2, false}, - {optionalField, `{"col": 10}`, result3, false}, - } - - for i, testCase := range testCases { - result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree) - expectErr := (err != nil) - - if testCase.expectErr != expectErr { - t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) - } - - if !testCase.expectErr { - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } - } -} diff --git a/pkg/s3select/internal/parquet-go/data/column.go b/pkg/s3select/internal/parquet-go/data/column.go deleted file mode 100644 index e302c7cf2..000000000 --- a/pkg/s3select/internal/parquet-go/data/column.go +++ /dev/null @@ -1,681 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package data - -import ( - "bytes" - "context" - "fmt" - "strings" - - "git.apache.org/thrift.git/lib/go/thrift" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/common" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/encoding" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" - "github.com/tidwall/gjson" - "github.com/tidwall/sjson" -) - -func getDefaultEncoding(parquetType parquet.Type) parquet.Encoding { - switch parquetType { - case parquet.Type_BOOLEAN: - return parquet.Encoding_PLAIN - case parquet.Type_INT32, parquet.Type_INT64, parquet.Type_FLOAT, parquet.Type_DOUBLE: - return parquet.Encoding_RLE_DICTIONARY - case parquet.Type_BYTE_ARRAY: - return parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY - } - - return parquet.Encoding_PLAIN -} - -func getFirstValueElement(tree *schema.Tree) (valueElement *schema.Element) { - tree.Range(func(name string, element *schema.Element) bool { - if element.Children == nil { - valueElement = element - } else { - valueElement = getFirstValueElement(element.Children) - } - - return false - }) - - return valueElement -} - -func populate(columnDataMap map[string]*Column, input *jsonValue, tree *schema.Tree, firstValueRL int64) (map[string]*Column, error) { - var err error - - pos := 0 - handleElement := func(name string, element *schema.Element) bool { - pos++ - - dataPath := element.PathInTree - - if *element.RepetitionType == parquet.FieldRepetitionType_REPEATED { - panic(fmt.Errorf("%v: repetition type must be REQUIRED or OPTIONAL type", dataPath)) - } - - inputValue := input.Get(name) - if *element.RepetitionType == parquet.FieldRepetitionType_REQUIRED && inputValue.IsNull() { - err = fmt.Errorf("%v: nil value for required field", dataPath) - return false - } - - add := func(element *schema.Element, value interface{}, DL, RL int64) { - columnData := columnDataMap[element.PathInSchema] - if columnData == nil { - columnData = NewColumn(*element.Type) - } - columnData.add(value, DL, RL) - columnDataMap[element.PathInSchema] = columnData - } - - // Handle primitive type element. - if element.Type != nil { - var value interface{} - if value, err = inputValue.GetValue(*element.Type, element.ConvertedType); err != nil { - return false - } - - DL := element.MaxDefinitionLevel - if value == nil && DL > 0 { - DL-- - } - - RL := element.MaxRepetitionLevel - if pos == 1 { - RL = firstValueRL - } - - add(element, value, DL, RL) - return true - } - - addNull := func() { - valueElement := getFirstValueElement(element.Children) - - DL := element.MaxDefinitionLevel - if DL > 0 { - DL-- - } - - RL := element.MaxRepetitionLevel - if RL > 0 { - RL-- - } - - add(valueElement, nil, DL, RL) - } - - // Handle group type element. - if element.ConvertedType == nil { - if inputValue.IsNull() { - addNull() - return true - } - - columnDataMap, err = populate(columnDataMap, inputValue, element.Children, firstValueRL) - return (err == nil) - } - - // Handle list type element. - if *element.ConvertedType == parquet.ConvertedType_LIST { - if inputValue.IsNull() { - addNull() - return true - } - - var results []gjson.Result - if results, err = inputValue.GetArray(); err != nil { - return false - } - - listElement, _ := element.Children.Get("list") - valueElement, _ := listElement.Children.Get("element") - for i := range results { - rl := valueElement.MaxRepetitionLevel - if i == 0 { - rl = firstValueRL - } - - var jsonData []byte - if jsonData, err = sjson.SetBytes([]byte{}, "element", results[i].Value()); err != nil { - return false - } - - var jv *jsonValue - if jv, err = bytesToJSONValue(jsonData); err != nil { - return false - } - - if columnDataMap, err = populate(columnDataMap, jv, listElement.Children, rl); err != nil { - return false - } - } - return true - } - - if *element.ConvertedType == parquet.ConvertedType_MAP { - if inputValue.IsNull() { - addNull() - return true - } - - keyValueElement, _ := element.Children.Get("key_value") - var rerr error - err = inputValue.Range(func(key, value gjson.Result) bool { - if !key.Exists() || key.Type == gjson.Null { - rerr = fmt.Errorf("%v.key_value.key: not found or null", dataPath) - return false - } - - var jsonData []byte - if jsonData, rerr = sjson.SetBytes([]byte{}, "key", key.Value()); rerr != nil { - return false - } - - if jsonData, rerr = sjson.SetBytes(jsonData, "value", value.Value()); rerr != nil { - return false - } - - var jv *jsonValue - if jv, rerr = bytesToJSONValue(jsonData); rerr != nil { - return false - } - - if columnDataMap, rerr = populate(columnDataMap, jv, keyValueElement.Children, firstValueRL); rerr != nil { - return false - } - - return true - }) - - if err != nil { - return false - } - - err = rerr - return (err == nil) - } - - err = fmt.Errorf("%v: unsupported converted type %v in %v field type", dataPath, *element.ConvertedType, *element.RepetitionType) - return false - } - - tree.Range(handleElement) - return columnDataMap, err -} - -// Column - denotes values of a column. -type Column struct { - parquetType parquet.Type // value type. - values []interface{} // must be a slice of parquet typed values. - definitionLevels []int64 // exactly same length of values. - repetitionLevels []int64 // exactly same length of values. - rowCount int32 - maxBitWidth int32 - minValue interface{} - maxValue interface{} -} - -func (column *Column) updateMinMaxValue(value interface{}) { - if column.minValue == nil && column.maxValue == nil { - column.minValue = value - column.maxValue = value - return - } - - switch column.parquetType { - case parquet.Type_BOOLEAN: - if column.minValue.(bool) && !value.(bool) { - column.minValue = value - } - - if !column.maxValue.(bool) && value.(bool) { - column.maxValue = value - } - - case parquet.Type_INT32: - if column.minValue.(int32) > value.(int32) { - column.minValue = value - } - - if column.maxValue.(int32) < value.(int32) { - column.maxValue = value - } - - case parquet.Type_INT64: - if column.minValue.(int64) > value.(int64) { - column.minValue = value - } - - if column.maxValue.(int64) < value.(int64) { - column.maxValue = value - } - - case parquet.Type_FLOAT: - if column.minValue.(float32) > value.(float32) { - column.minValue = value - } - - if column.maxValue.(float32) < value.(float32) { - column.maxValue = value - } - - case parquet.Type_DOUBLE: - if column.minValue.(float64) > value.(float64) { - column.minValue = value - } - - if column.maxValue.(float64) < value.(float64) { - column.maxValue = value - } - - case parquet.Type_BYTE_ARRAY: - if bytes.Compare(column.minValue.([]byte), value.([]byte)) > 0 { - column.minValue = value - } - - if bytes.Compare(column.minValue.([]byte), value.([]byte)) < 0 { - column.maxValue = value - } - } -} - -func (column *Column) updateStats(value interface{}, DL, RL int64) { - if RL == 0 { - column.rowCount++ - } - - if value == nil { - return - } - - var bitWidth int32 - switch column.parquetType { - case parquet.Type_BOOLEAN: - bitWidth = 1 - case parquet.Type_INT32: - bitWidth = common.BitWidth(uint64(value.(int32))) - case parquet.Type_INT64: - bitWidth = common.BitWidth(uint64(value.(int64))) - case parquet.Type_FLOAT: - bitWidth = 32 - case parquet.Type_DOUBLE: - bitWidth = 64 - case parquet.Type_BYTE_ARRAY: - bitWidth = int32(len(value.([]byte))) - } - if column.maxBitWidth < bitWidth { - column.maxBitWidth = bitWidth - } - - column.updateMinMaxValue(value) -} - -func (column *Column) add(value interface{}, DL, RL int64) { - column.values = append(column.values, value) - column.definitionLevels = append(column.definitionLevels, DL) - column.repetitionLevels = append(column.repetitionLevels, RL) - column.updateStats(value, DL, RL) -} - -// AddNull - adds nil value. -func (column *Column) AddNull(DL, RL int64) { - column.add(nil, DL, RL) -} - -// AddBoolean - adds boolean value. -func (column *Column) AddBoolean(value bool, DL, RL int64) { - if column.parquetType != parquet.Type_BOOLEAN { - panic(fmt.Errorf("expected %v value", column.parquetType)) - } - - column.add(value, DL, RL) -} - -// AddInt32 - adds int32 value. -func (column *Column) AddInt32(value int32, DL, RL int64) { - if column.parquetType != parquet.Type_INT32 { - panic(fmt.Errorf("expected %v value", column.parquetType)) - } - - column.add(value, DL, RL) -} - -// AddInt64 - adds int64 value. -func (column *Column) AddInt64(value int64, DL, RL int64) { - if column.parquetType != parquet.Type_INT64 { - panic(fmt.Errorf("expected %v value", column.parquetType)) - } - - column.add(value, DL, RL) -} - -// AddFloat - adds float32 value. -func (column *Column) AddFloat(value float32, DL, RL int64) { - if column.parquetType != parquet.Type_FLOAT { - panic(fmt.Errorf("expected %v value", column.parquetType)) - } - - column.add(value, DL, RL) -} - -// AddDouble - adds float64 value. -func (column *Column) AddDouble(value float64, DL, RL int64) { - if column.parquetType != parquet.Type_DOUBLE { - panic(fmt.Errorf("expected %v value", column.parquetType)) - } - - column.add(value, DL, RL) -} - -// AddByteArray - adds byte array value. -func (column *Column) AddByteArray(value []byte, DL, RL int64) { - if column.parquetType != parquet.Type_BYTE_ARRAY { - panic(fmt.Errorf("expected %v value", column.parquetType)) - } - - column.add(value, DL, RL) -} - -// Merge - merges columns. -func (column *Column) Merge(column2 *Column) { - if column.parquetType != column2.parquetType { - panic(fmt.Errorf("merge differs in parquet type")) - } - - column.values = append(column.values, column2.values...) - column.definitionLevels = append(column.definitionLevels, column2.definitionLevels...) - column.repetitionLevels = append(column.repetitionLevels, column2.repetitionLevels...) - - column.rowCount += column2.rowCount - if column.maxBitWidth < column2.maxBitWidth { - column.maxBitWidth = column2.maxBitWidth - } - - column.updateMinMaxValue(column2.minValue) - column.updateMinMaxValue(column2.maxValue) -} - -func (column *Column) String() string { - var strs []string - strs = append(strs, fmt.Sprintf("parquetType: %v", column.parquetType)) - strs = append(strs, fmt.Sprintf("values: %v", column.values)) - strs = append(strs, fmt.Sprintf("definitionLevels: %v", column.definitionLevels)) - strs = append(strs, fmt.Sprintf("repetitionLevels: %v", column.repetitionLevels)) - strs = append(strs, fmt.Sprintf("rowCount: %v", column.rowCount)) - strs = append(strs, fmt.Sprintf("maxBitWidth: %v", column.maxBitWidth)) - strs = append(strs, fmt.Sprintf("minValue: %v", column.minValue)) - strs = append(strs, fmt.Sprintf("maxValue: %v", column.maxValue)) - return "{" + strings.Join(strs, ", ") + "}" -} - -func (column *Column) encodeValue(value interface{}, element *schema.Element) []byte { - if value == nil { - return nil - } - - valueData := encoding.PlainEncode(common.ToSliceValue([]interface{}{value}, column.parquetType), column.parquetType) - if column.parquetType == parquet.Type_BYTE_ARRAY && element.ConvertedType != nil { - switch *element.ConvertedType { - case parquet.ConvertedType_UTF8, parquet.ConvertedType_DECIMAL: - valueData = valueData[4:] - } - } - - return valueData -} - -func (column *Column) toDataPageV2(element *schema.Element, parquetEncoding parquet.Encoding) *ColumnChunk { - var definedValues []interface{} - for _, value := range column.values { - if value != nil { - definedValues = append(definedValues, value) - } - } - - var encodedData []byte - switch parquetEncoding { - case parquet.Encoding_PLAIN: - encodedData = encoding.PlainEncode(common.ToSliceValue(definedValues, column.parquetType), column.parquetType) - - case parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY: - var bytesSlices [][]byte - for _, value := range column.values { - bytesSlices = append(bytesSlices, value.([]byte)) - } - encodedData = encoding.DeltaLengthByteArrayEncode(bytesSlices) - } - - compressionType := parquet.CompressionCodec_SNAPPY - if element.CompressionType != nil { - compressionType = *element.CompressionType - } - - compressedData, err := common.Compress(compressionType, encodedData) - if err != nil { - panic(err) - } - - DLData := encoding.RLEBitPackedHybridEncode( - column.definitionLevels, - common.BitWidth(uint64(element.MaxDefinitionLevel)), - parquet.Type_INT64, - ) - - RLData := encoding.RLEBitPackedHybridEncode( - column.repetitionLevels, - common.BitWidth(uint64(element.MaxRepetitionLevel)), - parquet.Type_INT64, - ) - - pageHeader := parquet.NewPageHeader() - pageHeader.Type = parquet.PageType_DATA_PAGE_V2 - pageHeader.CompressedPageSize = int32(len(compressedData) + len(DLData) + len(RLData)) - pageHeader.UncompressedPageSize = int32(len(encodedData) + len(DLData) + len(RLData)) - pageHeader.DataPageHeaderV2 = parquet.NewDataPageHeaderV2() - pageHeader.DataPageHeaderV2.NumValues = int32(len(column.values)) - pageHeader.DataPageHeaderV2.NumNulls = int32(len(column.values) - len(definedValues)) - pageHeader.DataPageHeaderV2.NumRows = column.rowCount - pageHeader.DataPageHeaderV2.Encoding = parquetEncoding - pageHeader.DataPageHeaderV2.DefinitionLevelsByteLength = int32(len(DLData)) - pageHeader.DataPageHeaderV2.RepetitionLevelsByteLength = int32(len(RLData)) - pageHeader.DataPageHeaderV2.IsCompressed = true - pageHeader.DataPageHeaderV2.Statistics = parquet.NewStatistics() - pageHeader.DataPageHeaderV2.Statistics.Min = column.encodeValue(column.minValue, element) - pageHeader.DataPageHeaderV2.Statistics.Max = column.encodeValue(column.maxValue, element) - - ts := thrift.NewTSerializer() - ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) - rawData, err := ts.Write(context.TODO(), pageHeader) - if err != nil { - panic(err) - } - rawData = append(rawData, RLData...) - rawData = append(rawData, DLData...) - rawData = append(rawData, compressedData...) - - metadata := parquet.NewColumnMetaData() - metadata.Type = column.parquetType - metadata.Encodings = []parquet.Encoding{ - parquet.Encoding_PLAIN, - parquet.Encoding_RLE, - parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY, - } - metadata.Codec = compressionType - metadata.NumValues = int64(pageHeader.DataPageHeaderV2.NumValues) - metadata.TotalCompressedSize = int64(len(rawData)) - metadata.TotalUncompressedSize = int64(pageHeader.UncompressedPageSize) + int64(len(rawData)) - int64(pageHeader.CompressedPageSize) - metadata.PathInSchema = strings.Split(element.PathInSchema, ".") - metadata.Statistics = parquet.NewStatistics() - metadata.Statistics.Min = pageHeader.DataPageHeaderV2.Statistics.Min - metadata.Statistics.Max = pageHeader.DataPageHeaderV2.Statistics.Max - - chunk := new(ColumnChunk) - chunk.ColumnChunk.MetaData = metadata - chunk.dataPageLen = int64(len(rawData)) - chunk.dataLen = int64(len(rawData)) - chunk.data = rawData - - return chunk -} - -func (column *Column) toRLEDictPage(element *schema.Element) *ColumnChunk { - dictPageData, dataPageData, dictValueCount, indexBitWidth := encoding.RLEDictEncode(column.values, column.parquetType, column.maxBitWidth) - - compressionType := parquet.CompressionCodec_SNAPPY - if element.CompressionType != nil { - compressionType = *element.CompressionType - } - - compressedData, err := common.Compress(compressionType, dictPageData) - if err != nil { - panic(err) - } - - dictPageHeader := parquet.NewPageHeader() - dictPageHeader.Type = parquet.PageType_DICTIONARY_PAGE - dictPageHeader.CompressedPageSize = int32(len(compressedData)) - dictPageHeader.UncompressedPageSize = int32(len(dictPageData)) - dictPageHeader.DictionaryPageHeader = parquet.NewDictionaryPageHeader() - dictPageHeader.DictionaryPageHeader.NumValues = dictValueCount - dictPageHeader.DictionaryPageHeader.Encoding = parquet.Encoding_PLAIN - - ts := thrift.NewTSerializer() - ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) - dictPageRawData, err := ts.Write(context.TODO(), dictPageHeader) - if err != nil { - panic(err) - } - dictPageRawData = append(dictPageRawData, compressedData...) - - RLData := encoding.RLEBitPackedHybridEncode( - column.repetitionLevels, - common.BitWidth(uint64(element.MaxRepetitionLevel)), - parquet.Type_INT64, - ) - encodedData := RLData - - DLData := encoding.RLEBitPackedHybridEncode( - column.definitionLevels, - common.BitWidth(uint64(element.MaxDefinitionLevel)), - parquet.Type_INT64, - ) - encodedData = append(encodedData, DLData...) - - encodedData = append(encodedData, indexBitWidth) - encodedData = append(encodedData, dataPageData...) - - compressedData, err = common.Compress(compressionType, encodedData) - if err != nil { - panic(err) - } - - dataPageHeader := parquet.NewPageHeader() - dataPageHeader.Type = parquet.PageType_DATA_PAGE - dataPageHeader.CompressedPageSize = int32(len(compressedData)) - dataPageHeader.UncompressedPageSize = int32(len(encodedData)) - dataPageHeader.DataPageHeader = parquet.NewDataPageHeader() - dataPageHeader.DataPageHeader.NumValues = int32(len(column.values)) - dataPageHeader.DataPageHeader.DefinitionLevelEncoding = parquet.Encoding_RLE - dataPageHeader.DataPageHeader.RepetitionLevelEncoding = parquet.Encoding_RLE - dataPageHeader.DataPageHeader.Encoding = parquet.Encoding_RLE_DICTIONARY - - ts = thrift.NewTSerializer() - ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) - dataPageRawData, err := ts.Write(context.TODO(), dataPageHeader) - if err != nil { - panic(err) - } - dataPageRawData = append(dataPageRawData, compressedData...) - - metadata := parquet.NewColumnMetaData() - metadata.Type = column.parquetType - metadata.Encodings = []parquet.Encoding{ - parquet.Encoding_PLAIN, - parquet.Encoding_RLE, - parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY, - parquet.Encoding_RLE_DICTIONARY, - } - metadata.Codec = compressionType - metadata.NumValues = int64(dataPageHeader.DataPageHeader.NumValues) - metadata.TotalCompressedSize = int64(len(dictPageRawData)) + int64(len(dataPageRawData)) - uncompressedSize := int64(dictPageHeader.UncompressedPageSize) + int64(len(dictPageData)) - int64(dictPageHeader.CompressedPageSize) - uncompressedSize += int64(dataPageHeader.UncompressedPageSize) + int64(len(dataPageData)) - int64(dataPageHeader.CompressedPageSize) - metadata.TotalUncompressedSize = uncompressedSize - metadata.PathInSchema = strings.Split(element.PathInSchema, ".") - metadata.Statistics = parquet.NewStatistics() - metadata.Statistics.Min = column.encodeValue(column.minValue, element) - metadata.Statistics.Max = column.encodeValue(column.maxValue, element) - - chunk := new(ColumnChunk) - chunk.ColumnChunk.MetaData = metadata - chunk.isDictPage = true - chunk.dictPageLen = int64(len(dictPageRawData)) - chunk.dataPageLen = int64(len(dataPageRawData)) - chunk.dataLen = chunk.dictPageLen + chunk.dataPageLen - chunk.data = append(dictPageRawData, dataPageRawData...) - - return chunk -} - -// Encode an element. -func (column *Column) Encode(element *schema.Element) *ColumnChunk { - parquetEncoding := getDefaultEncoding(column.parquetType) - if element.Encoding != nil { - parquetEncoding = *element.Encoding - } - - switch parquetEncoding { - case parquet.Encoding_PLAIN, parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY: - return column.toDataPageV2(element, parquetEncoding) - } - - return column.toRLEDictPage(element) -} - -// NewColumn - creates new column data -func NewColumn(parquetType parquet.Type) *Column { - switch parquetType { - case parquet.Type_BOOLEAN, parquet.Type_INT32, parquet.Type_INT64, parquet.Type_FLOAT, parquet.Type_DOUBLE, parquet.Type_BYTE_ARRAY: - default: - panic(fmt.Errorf("unsupported parquet type %v", parquetType)) - } - - return &Column{ - parquetType: parquetType, - } -} - -// UnmarshalJSON - decodes JSON data into map of Column. -func UnmarshalJSON(data []byte, tree *schema.Tree) (map[string]*Column, error) { - if !tree.ReadOnly() { - return nil, fmt.Errorf("tree must be read only") - } - - inputValue, err := bytesToJSONValue(data) - if err != nil { - return nil, err - } - - columnDataMap := make(map[string]*Column) - return populate(columnDataMap, inputValue, tree, 0) -} diff --git a/pkg/s3select/internal/parquet-go/data/column_test.go b/pkg/s3select/internal/parquet-go/data/column_test.go deleted file mode 100644 index 0628cac98..000000000 --- a/pkg/s3select/internal/parquet-go/data/column_test.go +++ /dev/null @@ -1,370 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package data - -import ( - "reflect" - "testing" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" -) - -var ( - v10 = int32(10) - v20 = int32(20) - v30 = int32(30) - ten = []byte("ten") - foo = []byte("foo") - bar = []byte("bar") - phone1 = []byte("1-234-567-8901") - phone2 = []byte("1-234-567-1098") - phone3 = []byte("1-111-222-3333") -) - -func TestAddressBookExample(t *testing.T) { - // message AddressBook { - // required string owner; - // repeated string ownerPhoneNumbers; - // repeated group contacts { - // required string name; - // optional string phoneNumber; - // } - // } - t.Skip("Broken") - - addressBook := schema.NewTree() - { - owner, err := schema.NewElement("owner", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - ownerPhoneNumbers, err := schema.NewElement("ownerPhoneNumbers", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - ownerPhoneNumbersList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - ownerPhoneNumbersElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - contacts, err := schema.NewElement("contacts", parquet.FieldRepetitionType_OPTIONAL, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - contactsList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - contactsElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - contactName, err := schema.NewElement("name", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - contactPhoneNumber, err := schema.NewElement("phoneNumber", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - if err = addressBook.Set("owner", owner); err != nil { - t.Fatal(err) - } - - if err = addressBook.Set("ownerPhoneNumbers", ownerPhoneNumbers); err != nil { - t.Fatal(err) - } - if err = addressBook.Set("ownerPhoneNumbers.list", ownerPhoneNumbersList); err != nil { - t.Fatal(err) - } - if err = addressBook.Set("ownerPhoneNumbers.list.element", ownerPhoneNumbersElement); err != nil { - t.Fatal(err) - } - - if err = addressBook.Set("contacts", contacts); err != nil { - t.Fatal(err) - } - if err = addressBook.Set("contacts.list", contactsList); err != nil { - t.Fatal(err) - } - if err = addressBook.Set("contacts.list.element", contactsElement); err != nil { - t.Fatal(err) - } - if err = addressBook.Set("contacts.list.element.name", contactName); err != nil { - t.Fatal(err) - } - if err = addressBook.Set("contacts.list.element.phoneNumber", contactPhoneNumber); err != nil { - t.Fatal(err) - } - } - - if _, _, err := addressBook.ToParquetSchema(); err != nil { - t.Fatal(err) - } - - case2Data := `{ - "owner": "foo" -}` - result2 := map[string]*Column{ - "owner": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{foo}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - }, - "ownerPhoneNumbers.list.element": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{nil}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - }, - "contacts.list.element.name": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{nil}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - }, - } - - case3Data := `{ - "owner": "foo", - "ownerPhoneNumbers": [ - "1-234-567-8901" - ] -} -` - result3 := map[string]*Column{ - "owner": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{foo}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - }, - "ownerPhoneNumbers.list.element": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{phone1}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - }, - "contacts.list.element.name": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{nil}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - }, - } - - case4Data := `{ - "owner": "foo", - "ownerPhoneNumbers": [ - "1-234-567-8901", - "1-234-567-1098" - ] -} -` - result4 := map[string]*Column{ - "owner": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{foo}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - }, - "ownerPhoneNumbers.list.element": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{phone1, phone2}, - definitionLevels: []int64{2, 2}, - repetitionLevels: []int64{0, 1}, - }, - "contacts.list.element.name": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{nil}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - }, - } - - case5Data := `{ - "contacts": [ - { - "name": "bar" - } - ], - "owner": "foo" -}` - result5 := map[string]*Column{ - "owner": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{foo}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - }, - "ownerPhoneNumbers.list.element": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{nil}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - }, - "contacts.list.element.name": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{bar}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - }, - "contacts.list.element.phoneNumber": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{nil}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{1}, - }, - } - - case6Data := `{ - "contacts": [ - { - "name": "bar", - "phoneNumber": "1-111-222-3333" - } - ], - "owner": "foo" -}` - result6 := map[string]*Column{ - "owner": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{foo}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - }, - "ownerPhoneNumbers.list.element": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{nil}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - }, - "contacts.list.element.name": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{bar}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - }, - "contacts.list.element.phoneNumber": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{phone3}, - definitionLevels: []int64{3}, - repetitionLevels: []int64{1}, - }, - } - - case7Data := `{ - "contacts": [ - { - "name": "bar", - "phoneNumber": "1-111-222-3333" - } - ], - "owner": "foo", - "ownerPhoneNumbers": [ - "1-234-567-8901", - "1-234-567-1098" - ] -}` - result7 := map[string]*Column{ - "owner": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{foo}, - definitionLevels: []int64{0}, - repetitionLevels: []int64{0}, - }, - "ownerPhoneNumbers.list.element": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{phone1, phone2}, - definitionLevels: []int64{2, 2}, - repetitionLevels: []int64{0, 1}, - }, - "contacts.list.element.name": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{bar}, - definitionLevels: []int64{2}, - repetitionLevels: []int64{0}, - }, - "contacts.list.element.phoneNumber": { - parquetType: parquet.Type_BYTE_ARRAY, - values: []interface{}{phone3}, - definitionLevels: []int64{3}, - repetitionLevels: []int64{1}, - }, - } - - testCases := []struct { - data string - expectedResult map[string]*Column - expectErr bool - }{ - {`{}`, nil, true}, // err: owner: nil value for required field - {case2Data, result2, false}, - {case3Data, result3, false}, - {case4Data, result4, false}, - {case5Data, result5, false}, - {case6Data, result6, false}, - {case7Data, result7, false}, - } - - for i, testCase := range testCases { - result, err := UnmarshalJSON([]byte(testCase.data), addressBook) - expectErr := (err != nil) - - if testCase.expectErr != expectErr { - t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr) - } - - if !testCase.expectErr { - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Errorf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } - } -} diff --git a/pkg/s3select/internal/parquet-go/data/data.go b/pkg/s3select/internal/parquet-go/data/data.go deleted file mode 100644 index 8e7ef91f1..000000000 --- a/pkg/s3select/internal/parquet-go/data/data.go +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package data - -import ( - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -// ColumnChunk ... -type ColumnChunk struct { - parquet.ColumnChunk - isDictPage bool - dictPageLen int64 - dataPageLen int64 - dataLen int64 - data []byte -} - -// Data returns the data. -func (chunk *ColumnChunk) Data() []byte { - return chunk.data -} - -// DataLen returns the length of the data. -func (chunk *ColumnChunk) DataLen() int64 { - return chunk.dataLen -} - -// NewRowGroup creates a new row group. -func NewRowGroup(chunks []*ColumnChunk, numRows, offset int64) *parquet.RowGroup { - rows := parquet.NewRowGroup() - rows.NumRows = numRows - - for _, chunk := range chunks { - rows.Columns = append(rows.Columns, &chunk.ColumnChunk) - rows.TotalByteSize += chunk.dataLen - - chunk.ColumnChunk.FileOffset = offset - - if chunk.isDictPage { - dictPageOffset := offset - chunk.ColumnChunk.MetaData.DictionaryPageOffset = &dictPageOffset - offset += chunk.dictPageLen - } - - chunk.ColumnChunk.MetaData.DataPageOffset = offset - offset += chunk.dataPageLen - } - - return rows -} diff --git a/pkg/s3select/internal/parquet-go/data/jsonvalue.go b/pkg/s3select/internal/parquet-go/data/jsonvalue.go deleted file mode 100644 index 448a05afd..000000000 --- a/pkg/s3select/internal/parquet-go/data/jsonvalue.go +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package data - -import ( - "fmt" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/tidwall/gjson" -) - -type jsonValue struct { - result *gjson.Result - path *string -} - -func (v *jsonValue) String() string { - if v.result == nil { - return "" - } - - return fmt.Sprintf("%v", *v.result) -} - -func (v *jsonValue) IsNull() bool { - return v.result == nil || v.result.Type == gjson.Null -} - -func (v *jsonValue) Get(path string) *jsonValue { - if v.path != nil { - var result *gjson.Result - if *v.path == path { - result = v.result - } - - return resultToJSONValue(result) - } - - if v.result == nil { - return resultToJSONValue(nil) - } - - result := v.result.Get(path) - if !result.Exists() { - return resultToJSONValue(nil) - } - - return resultToJSONValue(&result) -} - -func (v *jsonValue) GetValue(parquetType parquet.Type, convertedType *parquet.ConvertedType) (interface{}, error) { - if v.result == nil { - return nil, nil - } - - return resultToParquetValue(*v.result, parquetType, convertedType) -} - -func (v *jsonValue) GetArray() ([]gjson.Result, error) { - if v.result == nil { - return nil, nil - } - - return resultToArray(*v.result) -} - -func (v *jsonValue) Range(iterator func(key, value gjson.Result) bool) error { - if v.result == nil || v.result.Type == gjson.Null { - return nil - } - - if v.result.Type != gjson.JSON || !v.result.IsObject() { - return fmt.Errorf("result is not Map but %v", v.result.Type) - } - - v.result.ForEach(iterator) - return nil -} - -func resultToJSONValue(result *gjson.Result) *jsonValue { - return &jsonValue{ - result: result, - } -} - -func bytesToJSONValue(data []byte) (*jsonValue, error) { - if !gjson.ValidBytes(data) { - return nil, fmt.Errorf("invalid JSON data") - } - - result := gjson.ParseBytes(data) - return resultToJSONValue(&result), nil -} diff --git a/pkg/s3select/internal/parquet-go/data/result.go b/pkg/s3select/internal/parquet-go/data/result.go deleted file mode 100644 index 3cdab35af..000000000 --- a/pkg/s3select/internal/parquet-go/data/result.go +++ /dev/null @@ -1,361 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package data - -import ( - "fmt" - "math" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/tidwall/gjson" -) - -func resultToBool(result gjson.Result) (value interface{}, err error) { - switch result.Type { - case gjson.False, gjson.True: - return result.Bool(), nil - } - - return nil, fmt.Errorf("result is not Bool but %v", result.Type) -} - -func resultToInt32(result gjson.Result) (value interface{}, err error) { - if value, err = resultToInt64(result); err != nil { - return nil, err - } - - if value.(int64) < math.MinInt32 || value.(int64) > math.MaxInt32 { - return nil, fmt.Errorf("int32 overflow") - } - - return int32(value.(int64)), nil -} - -func resultToInt64(result gjson.Result) (value interface{}, err error) { - if result.Type == gjson.Number { - return result.Int(), nil - } - - return nil, fmt.Errorf("result is not Number but %v", result.Type) -} - -func resultToFloat(result gjson.Result) (value interface{}, err error) { - if result.Type == gjson.Number { - return float32(result.Float()), nil - } - - return nil, fmt.Errorf("result is not float32 but %v", result.Type) -} - -func resultToDouble(result gjson.Result) (value interface{}, err error) { - if result.Type == gjson.Number { - return result.Float(), nil - } - - return nil, fmt.Errorf("result is not float64 but %v", result.Type) -} - -func resultToBytes(result gjson.Result) (interface{}, error) { - if result.Type != gjson.JSON || !result.IsArray() { - return nil, fmt.Errorf("result is not byte array but %v", result.Type) - } - - data := []byte{} - for i, r := range result.Array() { - if r.Type != gjson.Number { - return nil, fmt.Errorf("result[%v] is not byte but %v", i, r.Type) - } - - value := r.Uint() - if value > math.MaxUint8 { - return nil, fmt.Errorf("byte overflow in result[%v]", i) - } - - data = append(data, byte(value)) - } - - return data, nil -} - -func resultToString(result gjson.Result) (value interface{}, err error) { - if result.Type == gjson.String { - return result.String(), nil - } - - return nil, fmt.Errorf("result is not String but %v", result.Type) -} - -func resultToUint8(result gjson.Result) (value interface{}, err error) { - if value, err = resultToUint64(result); err != nil { - return nil, err - } - - if value.(uint64) > math.MaxUint8 { - return nil, fmt.Errorf("uint8 overflow") - } - - return uint8(value.(uint64)), nil -} - -func resultToUint16(result gjson.Result) (value interface{}, err error) { - if value, err = resultToUint64(result); err != nil { - return nil, err - } - - if value.(uint64) > math.MaxUint16 { - return nil, fmt.Errorf("uint16 overflow") - } - - return uint16(value.(uint64)), nil -} - -func resultToUint32(result gjson.Result) (value interface{}, err error) { - if value, err = resultToUint64(result); err != nil { - return nil, err - } - - if value.(uint64) > math.MaxUint32 { - return nil, fmt.Errorf("uint32 overflow") - } - - return uint32(value.(uint64)), nil -} - -func resultToUint64(result gjson.Result) (value interface{}, err error) { - if result.Type == gjson.Number { - return result.Uint(), nil - } - - return nil, fmt.Errorf("result is not Number but %v", result.Type) -} - -func resultToInt8(result gjson.Result) (value interface{}, err error) { - if value, err = resultToInt64(result); err != nil { - return nil, err - } - - if value.(int64) < math.MinInt8 || value.(int64) > math.MaxInt8 { - return nil, fmt.Errorf("int8 overflow") - } - - return int8(value.(int64)), nil -} - -func resultToInt16(result gjson.Result) (value interface{}, err error) { - if value, err = resultToInt64(result); err != nil { - return nil, err - } - - if value.(int64) < math.MinInt16 || value.(int64) > math.MaxInt16 { - return nil, fmt.Errorf("int16 overflow") - } - - return int16(value.(int64)), nil -} - -func stringToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { - switch parquetType { - case parquet.Type_INT96, parquet.Type_BYTE_ARRAY, parquet.Type_FIXED_LEN_BYTE_ARRAY: - return []byte(value.(string)), nil - } - - return nil, fmt.Errorf("string cannot be converted to parquet type %v", parquetType) -} - -func uint8ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { - switch parquetType { - case parquet.Type_INT32: - return int32(value.(uint8)), nil - case parquet.Type_INT64: - return int64(value.(uint8)), nil - } - - return nil, fmt.Errorf("uint8 cannot be converted to parquet type %v", parquetType) -} - -func uint16ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { - switch parquetType { - case parquet.Type_INT32: - return int32(value.(uint16)), nil - case parquet.Type_INT64: - return int64(value.(uint16)), nil - } - - return nil, fmt.Errorf("uint16 cannot be converted to parquet type %v", parquetType) -} - -func uint32ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { - switch parquetType { - case parquet.Type_INT32: - return int32(value.(uint32)), nil - case parquet.Type_INT64: - return int64(value.(uint32)), nil - } - - return nil, fmt.Errorf("uint32 cannot be converted to parquet type %v", parquetType) -} - -func uint64ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { - switch parquetType { - case parquet.Type_INT32: - return int32(value.(uint64)), nil - case parquet.Type_INT64: - return int64(value.(uint64)), nil - } - - return nil, fmt.Errorf("uint64 cannot be converted to parquet type %v", parquetType) -} - -func int8ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { - switch parquetType { - case parquet.Type_INT32: - return int32(value.(int8)), nil - case parquet.Type_INT64: - return int64(value.(int8)), nil - } - - return nil, fmt.Errorf("int8 cannot be converted to parquet type %v", parquetType) -} - -func int16ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { - switch parquetType { - case parquet.Type_INT32: - return int32(value.(int16)), nil - case parquet.Type_INT64: - return int64(value.(int16)), nil - } - - return nil, fmt.Errorf("int16 cannot be converted to parquet type %v", parquetType) -} - -func int32ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { - switch parquetType { - case parquet.Type_INT32: - return value.(int32), nil - case parquet.Type_INT64: - return int64(value.(int32)), nil - } - - return nil, fmt.Errorf("int32 cannot be converted to parquet type %v", parquetType) -} - -func int64ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) { - switch parquetType { - case parquet.Type_INT32: - return int32(value.(int64)), nil - case parquet.Type_INT64: - return value.(int64), nil - } - - return nil, fmt.Errorf("int64 cannot be converted to parquet type %v", parquetType) -} - -func resultToParquetValueByConvertedValue(result gjson.Result, convertedType parquet.ConvertedType, parquetType parquet.Type) (value interface{}, err error) { - if result.Type == gjson.Null { - return nil, nil - } - - switch convertedType { - case parquet.ConvertedType_UTF8: - if value, err = resultToString(result); err != nil { - return nil, err - } - return stringToParquetValue(value, parquetType) - case parquet.ConvertedType_UINT_8: - if value, err = resultToUint8(result); err != nil { - return nil, err - } - return uint8ToParquetValue(value, parquetType) - case parquet.ConvertedType_UINT_16: - if value, err = resultToUint16(result); err != nil { - return nil, err - } - return uint16ToParquetValue(value, parquetType) - case parquet.ConvertedType_UINT_32: - if value, err = resultToUint32(result); err != nil { - return nil, err - } - return uint32ToParquetValue(value, parquetType) - case parquet.ConvertedType_UINT_64: - if value, err = resultToUint64(result); err != nil { - return nil, err - } - return uint64ToParquetValue(value, parquetType) - case parquet.ConvertedType_INT_8: - if value, err = resultToInt8(result); err != nil { - return nil, err - } - return int8ToParquetValue(value, parquetType) - case parquet.ConvertedType_INT_16: - if value, err = resultToInt16(result); err != nil { - return nil, err - } - return int16ToParquetValue(value, parquetType) - case parquet.ConvertedType_INT_32: - if value, err = resultToInt32(result); err != nil { - return nil, err - } - return int32ToParquetValue(value, parquetType) - case parquet.ConvertedType_INT_64: - if value, err = resultToInt64(result); err != nil { - return nil, err - } - return int64ToParquetValue(value, parquetType) - } - - return nil, fmt.Errorf("unsupported converted type %v", convertedType) -} - -func resultToParquetValue(result gjson.Result, parquetType parquet.Type, convertedType *parquet.ConvertedType) (interface{}, error) { - if convertedType != nil { - return resultToParquetValueByConvertedValue(result, *convertedType, parquetType) - } - - if result.Type == gjson.Null { - return nil, nil - } - - switch parquetType { - case parquet.Type_BOOLEAN: - return resultToBool(result) - case parquet.Type_INT32: - return resultToInt32(result) - case parquet.Type_INT64: - return resultToInt64(result) - case parquet.Type_FLOAT: - return resultToFloat(result) - case parquet.Type_DOUBLE: - return resultToDouble(result) - case parquet.Type_INT96, parquet.Type_BYTE_ARRAY, parquet.Type_FIXED_LEN_BYTE_ARRAY: - return resultToBytes(result) - } - - return nil, fmt.Errorf("unknown parquet type %v", parquetType) -} - -func resultToArray(result gjson.Result) ([]gjson.Result, error) { - if result.Type == gjson.Null { - return nil, nil - } - - if result.Type != gjson.JSON || !result.IsArray() { - return nil, fmt.Errorf("result is not Array but %v", result.Type) - } - - return result.Array(), nil -} diff --git a/pkg/s3select/internal/parquet-go/decode.go b/pkg/s3select/internal/parquet-go/decode.go deleted file mode 100644 index f55aa6df2..000000000 --- a/pkg/s3select/internal/parquet-go/decode.go +++ /dev/null @@ -1,514 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "bytes" - "errors" - "fmt" - "math" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -func i64sToi32s(i64s []int64) (i32s []int32) { - i32s = make([]int32, len(i64s)) - for i := range i64s { - i32s[i] = int32(i64s[i]) - } - - return i32s -} - -func readBitPacked(reader *bytes.Reader, header, bitWidth uint64) (result []int64, err error) { - count := header * 8 - - if count == 0 { - return result, nil - } - - if bitWidth == 0 { - return make([]int64, count), nil - } - - data := make([]byte, header*bitWidth) - if _, err = reader.Read(data); err != nil { - return nil, err - } - - var val, used, left, b uint64 - - valNeedBits := bitWidth - i := -1 - for { - if left <= 0 { - i++ - if i >= len(data) { - break - } - - b = uint64(data[i]) - left = 8 - used = 0 - } - - if left >= valNeedBits { - val |= ((b >> used) & ((1 << valNeedBits) - 1)) << (bitWidth - valNeedBits) - result = append(result, int64(val)) - val = 0 - left -= valNeedBits - used += valNeedBits - valNeedBits = bitWidth - } else { - val |= (b >> used) << (bitWidth - valNeedBits) - valNeedBits -= left - left = 0 - } - } - - return result, nil -} - -func readBools(reader *bytes.Reader, count uint64) (result []bool, err error) { - i64s, err := readBitPacked(reader, count, 1) - if err != nil { - return nil, err - } - - var i uint64 - for i = 0; i < count; i++ { - result = append(result, i64s[i] > 0) - } - - return result, nil -} - -func readInt32s(reader *bytes.Reader, count uint64) (result []int32, err error) { - buf := make([]byte, 4) - - var i uint64 - for i = 0; i < count; i++ { - if _, err = reader.Read(buf); err != nil { - return nil, err - } - - result = append(result, int32(bytesToUint32(buf))) - } - - return result, nil -} - -func readInt64s(reader *bytes.Reader, count uint64) (result []int64, err error) { - buf := make([]byte, 8) - - var i uint64 - for i = 0; i < count; i++ { - if _, err = reader.Read(buf); err != nil { - return nil, err - } - - result = append(result, int64(bytesToUint64(buf))) - } - - return result, nil -} - -func readInt96s(reader *bytes.Reader, count uint64) (result [][]byte, err error) { - var i uint64 - for i = 0; i < count; i++ { - buf := make([]byte, 12) - - if _, err = reader.Read(buf); err != nil { - return nil, err - } - - result = append(result, buf) - } - - return result, nil -} - -func readFloats(reader *bytes.Reader, count uint64) (result []float32, err error) { - buf := make([]byte, 4) - - var i uint64 - for i = 0; i < count; i++ { - if _, err = reader.Read(buf); err != nil { - return nil, err - } - - result = append(result, math.Float32frombits(bytesToUint32(buf))) - } - - return result, nil -} - -func readDoubles(reader *bytes.Reader, count uint64) (result []float64, err error) { - buf := make([]byte, 8) - - var i uint64 - for i = 0; i < count; i++ { - if _, err = reader.Read(buf); err != nil { - return nil, err - } - - result = append(result, math.Float64frombits(bytesToUint64(buf))) - } - - return result, nil -} - -func readByteArrays(reader *bytes.Reader, count uint64) (result [][]byte, err error) { - buf := make([]byte, 4) - var length uint32 - var data []byte - - var i uint64 - for i = 0; i < count; i++ { - if _, err = reader.Read(buf); err != nil { - return nil, err - } - - length = bytesToUint32(buf) - data = make([]byte, length) - if length > 0 { - if _, err = reader.Read(data); err != nil { - return nil, err - } - } - - result = append(result, data) - } - - return result, nil -} - -func readFixedLenByteArrays(reader *bytes.Reader, count, length uint64) (result [][]byte, err error) { - var i uint64 - for i = 0; i < count; i++ { - data := make([]byte, length) - if _, err = reader.Read(data); err != nil { - return nil, err - } - - result = append(result, data) - } - - return result, nil -} - -func readValues(reader *bytes.Reader, dataType parquet.Type, count, length uint64) (interface{}, error) { - switch dataType { - case parquet.Type_BOOLEAN: - return readBools(reader, count) - case parquet.Type_INT32: - return readInt32s(reader, count) - case parquet.Type_INT64: - return readInt64s(reader, count) - case parquet.Type_INT96: - return readInt96s(reader, count) - case parquet.Type_FLOAT: - return readFloats(reader, count) - case parquet.Type_DOUBLE: - return readDoubles(reader, count) - case parquet.Type_BYTE_ARRAY: - return readByteArrays(reader, count) - case parquet.Type_FIXED_LEN_BYTE_ARRAY: - return readFixedLenByteArrays(reader, count, length) - } - - return nil, fmt.Errorf("unknown parquet type %v", dataType) -} - -func readUnsignedVarInt(reader *bytes.Reader) (v uint64, err error) { - var b byte - var shift uint64 - - for { - if b, err = reader.ReadByte(); err != nil { - return 0, err - } - - if v |= ((uint64(b) & 0x7F) << shift); b&0x80 == 0 { - break - } - - shift += 7 - } - - return v, nil -} - -func readRLE(reader *bytes.Reader, header, bitWidth uint64) (result []int64, err error) { - width := (bitWidth + 7) / 8 - data := make([]byte, width) - if width > 0 { - if _, err = reader.Read(data); err != nil { - return nil, err - } - } - - if width < 4 { - data = append(data, make([]byte, 4-width)...) - } - - val := int64(bytesToUint32(data)) - count := header >> 1 - if count > math.MaxInt64/8 { - // 8 bytes/element. - return nil, errors.New("parquet: size too large") - } - result = make([]int64, count) - for i := range result { - result[i] = val - } - - return result, nil -} - -func readRLEBitPackedHybrid(reader *bytes.Reader, length, bitWidth uint64) (result []int64, err error) { - if length <= 0 { - var i32s []int32 - i32s, err = readInt32s(reader, 1) - if err != nil { - return nil, err - } - if i32s[0] < 0 { - return nil, errors.New("parquet: negative RLEBitPackedHybrid length") - } - length = uint64(i32s[0]) - } - - buf := make([]byte, length) - if _, err = reader.Read(buf); err != nil { - return nil, err - } - - reader = bytes.NewReader(buf) - for reader.Len() > 0 { - header, err := readUnsignedVarInt(reader) - if err != nil { - return nil, err - } - - var i64s []int64 - if header&1 == 0 { - i64s, err = readRLE(reader, header, bitWidth) - } else { - i64s, err = readBitPacked(reader, header>>1, bitWidth) - } - - if err != nil { - return nil, err - } - - result = append(result, i64s...) - } - - return result, nil -} - -func readDeltaBinaryPackedInt(reader *bytes.Reader) (result []int64, err error) { - blockSize, err := readUnsignedVarInt(reader) - if err != nil { - return nil, err - } - - numMiniblocksInBlock, err := readUnsignedVarInt(reader) - if err != nil { - return nil, err - } - - numValues, err := readUnsignedVarInt(reader) - if err != nil { - return nil, err - } - - firstValueZigZag, err := readUnsignedVarInt(reader) - if err != nil { - return nil, err - } - - v := int64(firstValueZigZag>>1) ^ (-int64(firstValueZigZag & 1)) - result = append(result, v) - if numMiniblocksInBlock == 0 { - return nil, errors.New("parquet: zero mini blocks in block") - } - numValuesInMiniBlock := blockSize / numMiniblocksInBlock - - bitWidths := make([]uint64, numMiniblocksInBlock) - for uint64(len(result)) < numValues { - minDeltaZigZag, err := readUnsignedVarInt(reader) - if err != nil { - return nil, err - } - - for i := 0; uint64(i) < numMiniblocksInBlock; i++ { - b, err := reader.ReadByte() - if err != nil { - return nil, err - } - bitWidths[i] = uint64(b) - } - - minDelta := int64(minDeltaZigZag>>1) ^ (-int64(minDeltaZigZag & 1)) - for i := 0; uint64(i) < numMiniblocksInBlock; i++ { - i64s, err := readBitPacked(reader, numValuesInMiniBlock/8, bitWidths[i]) - if err != nil { - return nil, err - } - - for j := range i64s { - v += i64s[j] + minDelta - result = append(result, v) - } - } - } - - return result[:numValues], nil -} - -func readDeltaLengthByteArrays(reader *bytes.Reader) (result [][]byte, err error) { - i64s, err := readDeltaBinaryPackedInt(reader) - if err != nil { - return nil, err - } - - for i := 0; i < len(i64s); i++ { - arrays, err := readFixedLenByteArrays(reader, 1, uint64(i64s[i])) - if err != nil { - return nil, err - } - - result = append(result, arrays[0]) - } - - return result, nil -} - -func readDeltaByteArrays(reader *bytes.Reader) (result [][]byte, err error) { - i64s, err := readDeltaBinaryPackedInt(reader) - if err != nil { - return nil, err - } - - suffixes, err := readDeltaLengthByteArrays(reader) - if err != nil { - return nil, err - } - - result = append(result, suffixes[0]) - for i := 1; i < len(i64s); i++ { - prefixLength := i64s[i] - val := append([]byte{}, result[i-1][:prefixLength]...) - val = append(val, suffixes[i]...) - result = append(result, val) - } - - return result, nil -} - -func readDataPageValues( - bytesReader *bytes.Reader, - encoding parquet.Encoding, - dataType parquet.Type, - convertedType parquet.ConvertedType, - count, bitWidth uint64, -) (result interface{}, resultDataType parquet.Type, err error) { - switch encoding { - case parquet.Encoding_PLAIN: - result, err = readValues(bytesReader, dataType, count, bitWidth) - return result, dataType, err - - case parquet.Encoding_PLAIN_DICTIONARY: - b, err := bytesReader.ReadByte() - if err != nil { - return nil, -1, err - } - - i64s, err := readRLEBitPackedHybrid(bytesReader, uint64(bytesReader.Len()), uint64(b)) - if err != nil { - return nil, -1, err - } - if len(i64s) < int(count) || count > math.MaxInt64/8 { - return nil, -1, errors.New("parquet: value out of range") - } - return i64s[:count], parquet.Type_INT64, nil - - case parquet.Encoding_RLE: - i64s, err := readRLEBitPackedHybrid(bytesReader, 0, bitWidth) - if err != nil { - return nil, -1, err - } - - if len(i64s) < int(count) || count > math.MaxInt64/8 { - return nil, -1, errors.New("parquet: value out of range") - } - i64s = i64s[:count] - - if dataType == parquet.Type_INT32 { - return i64sToi32s(i64s), parquet.Type_INT32, nil - } - - return i64s, parquet.Type_INT64, nil - - case parquet.Encoding_BIT_PACKED: - return nil, -1, fmt.Errorf("deprecated parquet encoding %v", parquet.Encoding_BIT_PACKED) - - case parquet.Encoding_DELTA_BINARY_PACKED: - i64s, err := readDeltaBinaryPackedInt(bytesReader) - if err != nil { - return nil, -1, err - } - - if len(i64s) < int(count) || count > math.MaxInt64/8 { - return nil, -1, errors.New("parquet: value out of range") - } - i64s = i64s[:count] - - if dataType == parquet.Type_INT32 { - return i64sToi32s(i64s), parquet.Type_INT32, nil - } - - return i64s, parquet.Type_INT64, nil - - case parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY: - byteSlices, err := readDeltaLengthByteArrays(bytesReader) - if err != nil { - return nil, -1, err - } - if len(byteSlices) < int(count) || count > math.MaxInt64/24 { - return nil, -1, errors.New("parquet: value out of range") - } - - return byteSlices[:count], parquet.Type_FIXED_LEN_BYTE_ARRAY, nil - - case parquet.Encoding_DELTA_BYTE_ARRAY: - byteSlices, err := readDeltaByteArrays(bytesReader) - if err != nil { - return nil, -1, err - } - if len(byteSlices) < int(count) || count > math.MaxInt64/24 { - return nil, -1, errors.New("parquet: value out of range") - } - - return byteSlices[:count], parquet.Type_FIXED_LEN_BYTE_ARRAY, nil - } - - return nil, -1, fmt.Errorf("unsupported parquet encoding %v", encoding) -} diff --git a/pkg/s3select/internal/parquet-go/encode.go b/pkg/s3select/internal/parquet-go/encode.go deleted file mode 100644 index 0c7427a42..000000000 --- a/pkg/s3select/internal/parquet-go/encode.go +++ /dev/null @@ -1,451 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "bytes" - "encoding/binary" - "errors" - "fmt" - "math" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -func boolsToBytes(bs []bool) []byte { - size := (len(bs) + 7) / 8 - result := make([]byte, size) - for i := range bs { - if bs[i] { - result[i/8] |= 1 << uint32(i%8) - } - } - - return result -} - -func int32sToBytes(i32s []int32) []byte { - buf := make([]byte, 4*len(i32s)) - for i, i32 := range i32s { - binary.LittleEndian.PutUint32(buf[i*4:], uint32(i32)) - } - return buf -} - -func int64sToBytes(i64s []int64) []byte { - buf := make([]byte, 8*len(i64s)) - for i, i64 := range i64s { - binary.LittleEndian.PutUint64(buf[i*8:], uint64(i64)) - } - return buf -} - -func float32sToBytes(f32s []float32) []byte { - buf := make([]byte, 4*len(f32s)) - for i, f32 := range f32s { - binary.LittleEndian.PutUint32(buf[i*4:], math.Float32bits(f32)) - } - return buf -} - -func float64sToBytes(f64s []float64) []byte { - buf := make([]byte, 8*len(f64s)) - for i, f64 := range f64s { - binary.LittleEndian.PutUint64(buf[i*8:], math.Float64bits(f64)) - } - return buf -} - -func byteSlicesToBytes(byteSlices [][]byte) []byte { - buf := new(bytes.Buffer) - for _, s := range byteSlices { - if err := binary.Write(buf, binary.LittleEndian, uint32(len(s))); err != nil { - panic(err) - } - - if _, err := buf.Write(s); err != nil { - panic(err) - } - } - - return buf.Bytes() -} - -func byteArraysToBytes(arrayList [][]byte) []byte { - buf := new(bytes.Buffer) - arrayLen := -1 - for _, array := range arrayList { - if arrayLen != -1 && len(array) != arrayLen { - panic(errors.New("array list does not have same length")) - } - - arrayLen = len(array) - if _, err := buf.Write(array); err != nil { - panic(err) - } - } - - return buf.Bytes() -} - -func int96sToBytes(i96s [][]byte) []byte { - return byteArraysToBytes(i96s) -} - -func valuesToBytes(values interface{}, dataType parquet.Type) []byte { - switch dataType { - case parquet.Type_BOOLEAN: - return boolsToBytes(values.([]bool)) - case parquet.Type_INT32: - return int32sToBytes(values.([]int32)) - case parquet.Type_INT64: - return int64sToBytes(values.([]int64)) - case parquet.Type_INT96: - return int96sToBytes(values.([][]byte)) - case parquet.Type_FLOAT: - return float32sToBytes(values.([]float32)) - case parquet.Type_DOUBLE: - return float64sToBytes(values.([]float64)) - case parquet.Type_BYTE_ARRAY: - return byteSlicesToBytes(values.([][]byte)) - case parquet.Type_FIXED_LEN_BYTE_ARRAY: - return byteArraysToBytes(values.([][]byte)) - } - - return []byte{} -} - -func valueToBytes(value interface{}, dataType parquet.Type) []byte { - var values interface{} - switch dataType { - case parquet.Type_BOOLEAN: - values = []bool{value.(bool)} - case parquet.Type_INT32: - values = []int32{value.(int32)} - case parquet.Type_INT64: - values = []int64{value.(int64)} - case parquet.Type_INT96: - values = [][]byte{value.([]byte)} - case parquet.Type_FLOAT: - values = []float32{value.(float32)} - case parquet.Type_DOUBLE: - values = []float64{value.(float64)} - case parquet.Type_BYTE_ARRAY, parquet.Type_FIXED_LEN_BYTE_ARRAY: - values = [][]byte{value.([]byte)} - } - - return valuesToBytes(values, dataType) -} - -func unsignedVarIntToBytes(ui64 uint64) []byte { - size := (getBitWidth(ui64) + 6) / 7 - if size == 0 { - return []byte{0} - } - - buf := make([]byte, size) - for i := uint64(0); i < size; i++ { - buf[i] = byte(ui64&0x7F) | 0x80 - ui64 >>= 7 - } - buf[size-1] &= 0x7F - - return buf -} - -func valuesToRLEBytes(values interface{}, bitWidth int32, valueType parquet.Type) []byte { - vals := valuesToInterfaces(values, valueType) - result := []byte{} - j := 0 - for i := 0; i < len(vals); i = j { - for j = i + 1; j < len(vals) && vals[i] == vals[j]; j++ { - } - headerBytes := unsignedVarIntToBytes(uint64((j - i) << 1)) - result = append(result, headerBytes...) - - valBytes := valueToBytes(vals[i], valueType) - byteCount := (bitWidth + 7) / 8 - result = append(result, valBytes[:byteCount]...) - } - - return result -} - -func valuesToRLEBitPackedHybridBytes(values interface{}, bitWidth int32, dataType parquet.Type) []byte { - rleBytes := valuesToRLEBytes(values, bitWidth, dataType) - lenBytes := valueToBytes(int32(len(rleBytes)), parquet.Type_INT32) - return append(lenBytes, rleBytes...) -} - -func valuesToBitPackedBytes(values interface{}, bitWidth int64, withHeader bool, dataType parquet.Type) []byte { - var i64s []int64 - switch dataType { - case parquet.Type_BOOLEAN: - bs := values.([]bool) - i64s = make([]int64, len(bs)) - for i := range bs { - if bs[i] { - i64s[i] = 1 - } - } - case parquet.Type_INT32: - i32s := values.([]int32) - i64s = make([]int64, len(i32s)) - for i := range i32s { - i64s[i] = int64(i32s[i]) - } - case parquet.Type_INT64: - i64s = values.([]int64) - default: - panic(fmt.Errorf("data type %v is not supported for bit packing", dataType)) - } - - if len(i64s) == 0 { - return nil - } - - var valueByte byte - bitsSet := uint64(0) - bitsNeeded := uint64(8) - bitsToSet := uint64(bitWidth) - value := i64s[0] - - valueBytes := []byte{} - for i := 0; i < len(i64s); { - if bitsToSet >= bitsNeeded { - valueByte |= byte(((value >> bitsSet) & ((1 << bitsNeeded) - 1)) << (8 - bitsNeeded)) - valueBytes = append(valueBytes, valueByte) - bitsToSet -= bitsNeeded - bitsSet += bitsNeeded - - bitsNeeded = 8 - valueByte = 0 - - if bitsToSet <= 0 && (i+1) < len(i64s) { - i++ - value = i64s[i] - bitsToSet = uint64(bitWidth) - bitsSet = 0 - } - } else { - valueByte |= byte((value >> bitsSet) << (8 - bitsNeeded)) - i++ - - if i < len(i64s) { - value = i64s[i] - } - - bitsNeeded -= bitsToSet - bitsToSet = uint64(bitWidth) - bitsSet = 0 - } - } - - if withHeader { - header := uint64(((len(i64s) / 8) << 1) | 1) - headerBytes := unsignedVarIntToBytes(header) - return append(headerBytes, valueBytes...) - } - - return valueBytes -} - -const ( - blockSize = 128 - subBlockSize = 32 - subBlockCount = blockSize / subBlockSize -) - -var ( - blockSizeBytes = unsignedVarIntToBytes(blockSize) - subBlockCountBytes = unsignedVarIntToBytes(subBlockCount) -) - -func int32ToDeltaBytes(i32s []int32) []byte { - getValue := func(i32 int32) uint64 { - return uint64((i32 >> 31) ^ (i32 << 1)) - } - - result := append([]byte{}, blockSizeBytes...) - result = append(result, subBlockCountBytes...) - result = append(result, unsignedVarIntToBytes(uint64(len(i32s)))...) - result = append(result, unsignedVarIntToBytes(getValue(i32s[0]))...) - - for i := 1; i < len(i32s); { - block := []int32{} - minDelta := int32(0x7FFFFFFF) - - for ; i < len(i32s) && len(block) < blockSize; i++ { - delta := i32s[i] - i32s[i-1] - block = append(block, delta) - if delta < minDelta { - minDelta = delta - } - } - - for len(block) < blockSize { - block = append(block, minDelta) - } - - bitWidths := make([]byte, subBlockCount) - for j := 0; j < subBlockCount; j++ { - maxValue := int32(0) - for k := j * subBlockSize; k < (j+1)*subBlockSize; k++ { - block[k] -= minDelta - if block[k] > maxValue { - maxValue = block[k] - } - } - - bitWidths[j] = byte(getBitWidth(uint64(maxValue))) - } - - minDeltaZigZag := getValue(minDelta) - result = append(result, unsignedVarIntToBytes(minDeltaZigZag)...) - result = append(result, bitWidths...) - - for j := 0; j < subBlockCount; j++ { - bitPacked := valuesToBitPackedBytes( - block[j*subBlockSize:(j+1)*subBlockSize], - int64(bitWidths[j]), - false, - parquet.Type_INT32, - ) - result = append(result, bitPacked...) - } - } - - return result -} - -func int64ToDeltaBytes(i64s []int64) []byte { - getValue := func(i64 int64) uint64 { - return uint64((i64 >> 63) ^ (i64 << 1)) - } - - result := append([]byte{}, blockSizeBytes...) - result = append(result, subBlockCountBytes...) - result = append(result, unsignedVarIntToBytes(uint64(len(i64s)))...) - result = append(result, unsignedVarIntToBytes(getValue(i64s[0]))...) - - for i := 1; i < len(i64s); { - block := []int64{} - minDelta := int64(0x7FFFFFFFFFFFFFFF) - - for ; i < len(i64s) && len(block) < blockSize; i++ { - delta := i64s[i] - i64s[i-1] - block = append(block, delta) - if delta < minDelta { - minDelta = delta - } - } - - for len(block) < blockSize { - block = append(block, minDelta) - } - - bitWidths := make([]byte, subBlockCount) - for j := 0; j < subBlockCount; j++ { - maxValue := int64(0) - for k := j * subBlockSize; k < (j+1)*subBlockSize; k++ { - block[k] -= minDelta - if block[k] > maxValue { - maxValue = block[k] - } - } - - bitWidths[j] = byte(getBitWidth(uint64(maxValue))) - } - - minDeltaZigZag := getValue(minDelta) - result = append(result, unsignedVarIntToBytes(minDeltaZigZag)...) - result = append(result, bitWidths...) - - for j := 0; j < subBlockCount; j++ { - bitPacked := valuesToBitPackedBytes( - block[j*subBlockSize:(j+1)*subBlockSize], - int64(bitWidths[j]), - false, - parquet.Type_INT64, - ) - result = append(result, bitPacked...) - } - } - - return result -} - -func valuesToDeltaBytes(values interface{}, dataType parquet.Type) []byte { - switch dataType { - case parquet.Type_INT32: - return int32ToDeltaBytes(values.([]int32)) - case parquet.Type_INT64: - return int64ToDeltaBytes(values.([]int64)) - } - - return nil -} - -func stringsToDeltaLengthByteArrayBytes(strs []string) []byte { - lengths := make([]int32, len(strs)) - for i, s := range strs { - lengths[i] = int32(len(s)) - } - - result := int32ToDeltaBytes(lengths) - for _, s := range strs { - result = append(result, []byte(s)...) - } - - return result -} - -func stringsToDeltaByteArrayBytes(strs []string) []byte { - prefixLengths := make([]int32, len(strs)) - suffixes := make([]string, len(strs)) - - var i, j int - for i = 1; i < len(strs); i++ { - for j = 0; j < len(strs[i-1]) && j < len(strs[i]); j++ { - if strs[i-1][j] != strs[i][j] { - break - } - } - - prefixLengths[i] = int32(j) - suffixes[i] = strs[i][j:] - } - - result := int32ToDeltaBytes(prefixLengths) - return append(result, stringsToDeltaLengthByteArrayBytes(suffixes)...) -} - -func encodeValues(values interface{}, dataType parquet.Type, encoding parquet.Encoding, bitWidth int32) []byte { - switch encoding { - case parquet.Encoding_RLE: - return valuesToRLEBitPackedHybridBytes(values, bitWidth, dataType) - case parquet.Encoding_DELTA_BINARY_PACKED: - return valuesToDeltaBytes(values, dataType) - case parquet.Encoding_DELTA_BYTE_ARRAY: - return stringsToDeltaByteArrayBytes(values.([]string)) - case parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY: - return stringsToDeltaLengthByteArrayBytes(values.([]string)) - } - - return valuesToBytes(values, dataType) -} diff --git a/pkg/s3select/internal/parquet-go/encode_test.go b/pkg/s3select/internal/parquet-go/encode_test.go deleted file mode 100644 index 6f10af6f6..000000000 --- a/pkg/s3select/internal/parquet-go/encode_test.go +++ /dev/null @@ -1,190 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "math" - "reflect" - "testing" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -func TestBoolsToBytes(t *testing.T) { - testCases := []struct { - bs []bool - expectedResult []byte - }{ - {nil, []byte{}}, - {[]bool{}, []byte{}}, - {[]bool{true}, []byte{1}}, - {[]bool{false}, []byte{0}}, - {[]bool{true, true}, []byte{3}}, - {[]bool{false, false}, []byte{0}}, - {[]bool{false, true}, []byte{2}}, - {[]bool{true, false}, []byte{1}}, - {[]bool{false, false, false, false, false, false, false, true, true}, []byte{128, 1}}, - } - - for i, testCase := range testCases { - result := boolsToBytes(testCase.bs) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} - -func TestInt32sToBytes(t *testing.T) { - testCases := []struct { - i32s []int32 - expectedResult []byte - }{ - {nil, []byte{}}, - {[]int32{}, []byte{}}, - {[]int32{1}, []byte{1, 0, 0, 0}}, - {[]int32{-1}, []byte{255, 255, 255, 255}}, - {[]int32{256}, []byte{0, 1, 0, 0}}, - {[]int32{math.MinInt32}, []byte{0, 0, 0, 128}}, - {[]int32{math.MaxInt32}, []byte{255, 255, 255, 127}}, - {[]int32{257, -2}, []byte{1, 1, 0, 0, 254, 255, 255, 255}}, - } - - for i, testCase := range testCases { - result := int32sToBytes(testCase.i32s) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} - -func TestInt64sToBytes(t *testing.T) { - testCases := []struct { - i64s []int64 - expectedResult []byte - }{ - {nil, []byte{}}, - {[]int64{}, []byte{}}, - {[]int64{1}, []byte{1, 0, 0, 0, 0, 0, 0, 0}}, - {[]int64{-1}, []byte{255, 255, 255, 255, 255, 255, 255, 255}}, - {[]int64{256}, []byte{0, 1, 0, 0, 0, 0, 0, 0}}, - {[]int64{math.MinInt64}, []byte{0, 0, 0, 0, 0, 0, 0, 128}}, - {[]int64{math.MaxInt64}, []byte{255, 255, 255, 255, 255, 255, 255, 127}}, - {[]int64{257, -2}, []byte{1, 1, 0, 0, 0, 0, 0, 0, 254, 255, 255, 255, 255, 255, 255, 255}}, - } - - for i, testCase := range testCases { - result := int64sToBytes(testCase.i64s) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} - -func TestFloat32sToBytes(t *testing.T) { - testCases := []struct { - f32s []float32 - expectedResult []byte - }{ - {nil, []byte{}}, - {[]float32{}, []byte{}}, - {[]float32{1}, []byte{0, 0, 128, 63}}, - {[]float32{1.0}, []byte{0, 0, 128, 63}}, - {[]float32{-1}, []byte{0, 0, 128, 191}}, - {[]float32{-1.0}, []byte{0, 0, 128, 191}}, - {[]float32{256}, []byte{0, 0, 128, 67}}, - {[]float32{1.1}, []byte{205, 204, 140, 63}}, - {[]float32{-1.1}, []byte{205, 204, 140, 191}}, - {[]float32{math.Pi}, []byte{219, 15, 73, 64}}, - {[]float32{257, -2}, []byte{0, 128, 128, 67, 0, 0, 0, 192}}, - {[]float32{257.1, -2.1}, []byte{205, 140, 128, 67, 102, 102, 6, 192}}, - } - - for i, testCase := range testCases { - result := float32sToBytes(testCase.f32s) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} - -func TestFloat64sToBytes(t *testing.T) { - testCases := []struct { - f64s []float64 - expectedResult []byte - }{ - {nil, []byte{}}, - {[]float64{}, []byte{}}, - {[]float64{1}, []byte{0, 0, 0, 0, 0, 0, 240, 63}}, - {[]float64{1.0}, []byte{0, 0, 0, 0, 0, 0, 240, 63}}, - {[]float64{-1}, []byte{0, 0, 0, 0, 0, 0, 240, 191}}, - {[]float64{-1.0}, []byte{0, 0, 0, 0, 0, 0, 240, 191}}, - {[]float64{256}, []byte{0, 0, 0, 0, 0, 0, 112, 64}}, - {[]float64{1.1}, []byte{154, 153, 153, 153, 153, 153, 241, 63}}, - {[]float64{-1.1}, []byte{154, 153, 153, 153, 153, 153, 241, 191}}, - {[]float64{math.Pi}, []byte{24, 45, 68, 84, 251, 33, 9, 64}}, - {[]float64{257, -2}, []byte{0, 0, 0, 0, 0, 16, 112, 64, 0, 0, 0, 0, 0, 0, 0, 192}}, - {[]float64{257.1, -2.1}, []byte{154, 153, 153, 153, 153, 17, 112, 64, 205, 204, 204, 204, 204, 204, 0, 192}}, - } - - for i, testCase := range testCases { - result := float64sToBytes(testCase.f64s) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} - -func TestUnsignedVarIntToBytes(t *testing.T) { - testCases := []struct { - ui64 uint64 - expectedResult []byte - }{ - {0, []byte{0}}, - {1, []byte{1}}, - {0x7F, []byte{127}}, - {0x80, []byte{128, 1}}, - {uint64(math.MaxUint64), []byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 1}}, - } - - for i, testCase := range testCases { - result := unsignedVarIntToBytes(testCase.ui64) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} - -func TestValuesToRLEBytes(t *testing.T) { - testCases := []struct { - values interface{} - bitWidth int32 - dataType parquet.Type - expectedResult []byte - }{ - {[]int32{3, 5, 7}, 1, parquet.Type_INT32, []byte{2, 3, 2, 5, 2, 7}}, - {[]int32{3, 3, 3}, 1, parquet.Type_INT32, []byte{6, 3}}, - {[]int32{2, 2, 3, 3, 3}, 1, parquet.Type_INT32, []byte{4, 2, 6, 3}}, - } - - for i, testCase := range testCases { - result := valuesToRLEBytes(testCase.values, testCase.bitWidth, testCase.dataType) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} diff --git a/pkg/s3select/internal/parquet-go/encoding/common.go b/pkg/s3select/internal/parquet-go/encoding/common.go deleted file mode 100644 index 6ce01a838..000000000 --- a/pkg/s3select/internal/parquet-go/encoding/common.go +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package encoding - -import ( - "github.com/minio/minio/pkg/s3select/internal/parquet-go/common" -) - -// Refer https://en.wikipedia.org/wiki/LEB128#Unsigned_LEB128 -func varIntEncode(ui64 uint64) []byte { - if ui64 == 0 { - return []byte{0} - } - - length := int(common.BitWidth(ui64)+6) / 7 - data := make([]byte, length) - for i := 0; i < length; i++ { - data[i] = byte(ui64&0x7F) | 0x80 - ui64 >>= 7 - } - data[length-1] &= 0x7F - - return data -} diff --git a/pkg/s3select/internal/parquet-go/encoding/common_test.go b/pkg/s3select/internal/parquet-go/encoding/common_test.go deleted file mode 100644 index 715636756..000000000 --- a/pkg/s3select/internal/parquet-go/encoding/common_test.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package encoding - -import ( - "math" - "reflect" - "testing" -) - -func TestVarIntToBytes(t *testing.T) { - testCases := []struct { - ui64 uint64 - expectedResult []byte - }{ - {0, []byte{0}}, - {1, []byte{1}}, - {0x7F, []byte{127}}, - {0x80, []byte{128, 1}}, - {uint64(math.MaxUint64), []byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 1}}, - } - - for i, testCase := range testCases { - result := varIntEncode(testCase.ui64) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} diff --git a/pkg/s3select/internal/parquet-go/encoding/delta-encode.go b/pkg/s3select/internal/parquet-go/encoding/delta-encode.go deleted file mode 100644 index df3106ae8..000000000 --- a/pkg/s3select/internal/parquet-go/encoding/delta-encode.go +++ /dev/null @@ -1,297 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package encoding - -import ( - "fmt" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/common" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -const ( - blockSize = 128 - miniBlockSize = 32 - miniBlockCount = blockSize / miniBlockSize -) - -var deltaEncodeHeaderBytes []byte - -func init() { - deltaEncodeHeaderBytes = varIntEncode(blockSize) - deltaEncodeHeaderBytes = append(deltaEncodeHeaderBytes, varIntEncode(miniBlockCount)...) -} - -// Supported Types: BOOLEAN, INT32, INT64 -func bitPackedEncode(values interface{}, bitWidth uint64, withHeader bool, parquetType parquet.Type) []byte { - var i64s []int64 - switch parquetType { - case parquet.Type_BOOLEAN: - bs, ok := values.([]bool) - if !ok { - panic(fmt.Errorf("expected slice of bool")) - } - - i64s = make([]int64, len(bs)) - for i := range bs { - if bs[i] { - i64s[i] = 1 - } - } - case parquet.Type_INT32: - i32s, ok := values.([]int32) - if !ok { - panic(fmt.Errorf("expected slice of int32")) - } - - for i := range i32s { - i64s[i] = int64(i32s[i]) - } - case parquet.Type_INT64: - var ok bool - i64s, ok = values.([]int64) - if !ok { - panic(fmt.Errorf("expected slice of int64")) - } - default: - panic(fmt.Errorf("%v parquet type unsupported", parquetType)) - } - - if len(i64s) == 0 { - return nil - } - - var valueByte byte - bitsSet := uint64(0) - bitsNeeded := uint64(8) - bitsToSet := bitWidth - value := i64s[0] - - valueBytes := []byte{} - for i := 0; i < len(i64s); { - if bitsToSet >= bitsNeeded { - valueByte |= byte(((value >> bitsSet) & ((1 << bitsNeeded) - 1)) << (8 - bitsNeeded)) - valueBytes = append(valueBytes, valueByte) - bitsToSet -= bitsNeeded - bitsSet += bitsNeeded - - bitsNeeded = 8 - valueByte = 0 - - if bitsToSet <= 0 && (i+1) < len(i64s) { - i++ - value = i64s[i] - bitsToSet = bitWidth - bitsSet = 0 - } - } else { - valueByte |= byte((value >> bitsSet) << (8 - bitsNeeded)) - i++ - - if i < len(i64s) { - value = i64s[i] - } - - bitsNeeded -= bitsToSet - bitsToSet = bitWidth - bitsSet = 0 - } - } - - if withHeader { - header := uint64(((len(i64s) / 8) << 1) | 1) - headerBytes := varIntEncode(header) - return append(headerBytes, valueBytes...) - } - - return valueBytes -} - -func deltaEncodeInt32s(i32s []int32) (data []byte) { - getValue := func(i32 int32) uint64 { - return uint64((i32 >> 31) ^ (i32 << 1)) - } - - data = append(data, deltaEncodeHeaderBytes...) - data = append(data, varIntEncode(uint64(len(i32s)))...) - data = append(data, varIntEncode(getValue(i32s[0]))...) - - for i := 1; i < len(i32s); { - block := []int32{} - minDelta := int32(0x7FFFFFFF) - - for ; i < len(i32s) && len(block) < blockSize; i++ { - delta := i32s[i] - i32s[i-1] - block = append(block, delta) - if delta < minDelta { - minDelta = delta - } - } - - for len(block) < blockSize { - block = append(block, minDelta) - } - - bitWidths := make([]byte, miniBlockCount) - for j := 0; j < miniBlockCount; j++ { - maxValue := int32(0) - for k := j * miniBlockSize; k < (j+1)*miniBlockSize; k++ { - block[k] -= minDelta - if block[k] > maxValue { - maxValue = block[k] - } - } - - bitWidths[j] = byte(common.BitWidth(uint64(maxValue))) - } - - minDeltaZigZag := getValue(minDelta) - data = append(data, varIntEncode(minDeltaZigZag)...) - data = append(data, bitWidths...) - - for j := 0; j < miniBlockCount; j++ { - bitPacked := bitPackedEncode( - block[j*miniBlockSize:(j+1)*miniBlockSize], - uint64(bitWidths[j]), - false, - parquet.Type_INT32, - ) - data = append(data, bitPacked...) - } - } - - return data -} - -func deltaEncodeInt64s(i64s []int64) (data []byte) { - getValue := func(i64 int64) uint64 { - return uint64((i64 >> 63) ^ (i64 << 1)) - } - - data = append(data, deltaEncodeHeaderBytes...) - data = append(data, varIntEncode(uint64(len(i64s)))...) - data = append(data, varIntEncode(getValue(i64s[0]))...) - - for i := 1; i < len(i64s); { - block := []int64{} - minDelta := int64(0x7FFFFFFFFFFFFFFF) - - for ; i < len(i64s) && len(block) < blockSize; i++ { - delta := i64s[i] - i64s[i-1] - block = append(block, delta) - if delta < minDelta { - minDelta = delta - } - } - - for len(block) < blockSize { - block = append(block, minDelta) - } - - bitWidths := make([]byte, miniBlockCount) - for j := 0; j < miniBlockCount; j++ { - maxValue := int64(0) - for k := j * miniBlockSize; k < (j+1)*miniBlockSize; k++ { - block[k] -= minDelta - if block[k] > maxValue { - maxValue = block[k] - } - } - - bitWidths[j] = byte(common.BitWidth(uint64(maxValue))) - } - - minDeltaZigZag := getValue(minDelta) - data = append(data, varIntEncode(minDeltaZigZag)...) - data = append(data, bitWidths...) - - for j := 0; j < miniBlockCount; j++ { - bitPacked := bitPackedEncode( - block[j*miniBlockSize:(j+1)*miniBlockSize], - uint64(bitWidths[j]), - false, - parquet.Type_INT64, - ) - data = append(data, bitPacked...) - } - } - - return data -} - -// DeltaEncode encodes values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-encoding-delta_binary_packed--5 -// -// Supported Types: INT32, INT64. -func DeltaEncode(values interface{}, parquetType parquet.Type) []byte { - switch parquetType { - case parquet.Type_INT32: - i32s, ok := values.([]int32) - if !ok { - panic(fmt.Errorf("expected slice of int32")) - } - return deltaEncodeInt32s(i32s) - case parquet.Type_INT64: - i64s, ok := values.([]int64) - if !ok { - panic(fmt.Errorf("expected slice of int64")) - } - return deltaEncodeInt64s(i64s) - } - - panic(fmt.Errorf("%v parquet type unsupported", parquetType)) -} - -// DeltaLengthByteArrayEncode encodes bytes slices specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-length-byte-array-delta_length_byte_array--6 -// -// Supported Types: BYTE_ARRAY -func DeltaLengthByteArrayEncode(bytesSlices [][]byte) (data []byte) { - lengths := make([]int32, len(bytesSlices)) - for i, bytes := range bytesSlices { - lengths[i] = int32(len(bytes)) - } - - data = deltaEncodeInt32s(lengths) - for _, bytes := range bytesSlices { - data = append(data, []byte(bytes)...) - } - - return data -} - -// DeltaByteArrayEncode encodes sequence of strings values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-strings-delta_byte_array--7 -// -// Supported Types: BYTE_ARRAY -func DeltaByteArrayEncode(bytesSlices [][]byte) (data []byte) { - prefixLengths := make([]int32, len(bytesSlices)) - suffixes := make([][]byte, len(bytesSlices)) - - var i, j int - for i = 1; i < len(bytesSlices); i++ { - for j = 0; j < len(bytesSlices[i-1]) && j < len(bytesSlices[i]); j++ { - if bytesSlices[i-1][j] != bytesSlices[i][j] { - break - } - } - - prefixLengths[i] = int32(j) - suffixes[i] = bytesSlices[i][j:] - } - - data = deltaEncodeInt32s(prefixLengths) - return append(data, DeltaLengthByteArrayEncode(suffixes)...) -} diff --git a/pkg/s3select/internal/parquet-go/encoding/plain-encode.go b/pkg/s3select/internal/parquet-go/encoding/plain-encode.go deleted file mode 100644 index 028d53a09..000000000 --- a/pkg/s3select/internal/parquet-go/encoding/plain-encode.go +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package encoding - -import ( - "bytes" - "encoding/binary" - "fmt" - "math" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -func plainEncodeBools(bs []bool) []byte { - data := make([]byte, (len(bs)+7)/8) - - for i := range bs { - if bs[i] { - data[i/8] |= 1 << uint(i%8) - } - } - - return data -} - -func plainEncodeInt32s(i32s []int32) []byte { - data := make([]byte, len(i32s)*4) - - for i, i32 := range i32s { - binary.LittleEndian.PutUint32(data[i*4:], uint32(i32)) - } - - return data -} - -func plainEncodeInt64s(i64s []int64) []byte { - data := make([]byte, len(i64s)*8) - - for i, i64 := range i64s { - binary.LittleEndian.PutUint64(data[i*8:], uint64(i64)) - } - - return data -} - -func plainEncodeFloat32s(f32s []float32) []byte { - data := make([]byte, len(f32s)*4) - - for i, f32 := range f32s { - binary.LittleEndian.PutUint32(data[i*4:], math.Float32bits(f32)) - } - - return data -} - -func plainEncodeFloat64s(f64s []float64) []byte { - data := make([]byte, len(f64s)*8) - - for i, f64 := range f64s { - binary.LittleEndian.PutUint64(data[i*8:], math.Float64bits(f64)) - } - - return data -} - -func plainEncodeBytesSlices(bytesSlices [][]byte) []byte { - buf := new(bytes.Buffer) - - for _, s := range bytesSlices { - if err := binary.Write(buf, binary.LittleEndian, uint32(len(s))); err != nil { - panic(err) - } - - if _, err := buf.Write(s); err != nil { - panic(err) - } - } - - return buf.Bytes() -} - -// PlainEncode encodes values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#plain-plain--0 -// -// Supported Types: BOOLEAN, INT32, INT64, FLOAT, DOUBLE, BYTE_ARRAY -func PlainEncode(values interface{}, parquetType parquet.Type) []byte { - switch parquetType { - case parquet.Type_BOOLEAN: - bs, ok := values.([]bool) - if !ok { - panic(fmt.Errorf("expected slice of bool")) - } - return plainEncodeBools(bs) - case parquet.Type_INT32: - i32s, ok := values.([]int32) - if !ok { - panic(fmt.Errorf("expected slice of int32")) - } - return plainEncodeInt32s(i32s) - case parquet.Type_INT64: - i64s, ok := values.([]int64) - if !ok { - panic(fmt.Errorf("expected slice of int64")) - } - return plainEncodeInt64s(i64s) - case parquet.Type_FLOAT: - f32s, ok := values.([]float32) - if !ok { - panic(fmt.Errorf("expected slice of float32")) - } - return plainEncodeFloat32s(f32s) - case parquet.Type_DOUBLE: - f64s, ok := values.([]float64) - if !ok { - panic(fmt.Errorf("expected slice of float64")) - } - return plainEncodeFloat64s(f64s) - case parquet.Type_BYTE_ARRAY: - bytesSlices, ok := values.([][]byte) - if !ok { - panic(fmt.Errorf("expected slice of byte array")) - } - return plainEncodeBytesSlices(bytesSlices) - } - - panic(fmt.Errorf("%v parquet type unsupported", parquetType)) -} diff --git a/pkg/s3select/internal/parquet-go/encoding/plain-encode_test.go b/pkg/s3select/internal/parquet-go/encoding/plain-encode_test.go deleted file mode 100644 index 336a54dd5..000000000 --- a/pkg/s3select/internal/parquet-go/encoding/plain-encode_test.go +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package encoding - -import ( - "math" - "reflect" - "testing" -) - -func TestPlainEncodeBools(t *testing.T) { - testCases := []struct { - bs []bool - expectedResult []byte - }{ - {nil, []byte{}}, - {[]bool{}, []byte{}}, - {[]bool{true}, []byte{1}}, - {[]bool{false}, []byte{0}}, - {[]bool{true, true}, []byte{3}}, - {[]bool{false, false}, []byte{0}}, - {[]bool{false, true}, []byte{2}}, - {[]bool{true, false}, []byte{1}}, - {[]bool{false, false, false, false, false, false, false, true, true}, []byte{128, 1}}, - } - - for i, testCase := range testCases { - result := plainEncodeBools(testCase.bs) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} - -func TestPlainEncodeInt32s(t *testing.T) { - testCases := []struct { - i32s []int32 - expectedResult []byte - }{ - {nil, []byte{}}, - {[]int32{}, []byte{}}, - {[]int32{1}, []byte{1, 0, 0, 0}}, - {[]int32{-1}, []byte{255, 255, 255, 255}}, - {[]int32{256}, []byte{0, 1, 0, 0}}, - {[]int32{math.MinInt32}, []byte{0, 0, 0, 128}}, - {[]int32{math.MaxInt32}, []byte{255, 255, 255, 127}}, - {[]int32{257, -2}, []byte{1, 1, 0, 0, 254, 255, 255, 255}}, - } - - for i, testCase := range testCases { - result := plainEncodeInt32s(testCase.i32s) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} - -func TestPlainEncodeInt64s(t *testing.T) { - testCases := []struct { - i64s []int64 - expectedResult []byte - }{ - {nil, []byte{}}, - {[]int64{}, []byte{}}, - {[]int64{1}, []byte{1, 0, 0, 0, 0, 0, 0, 0}}, - {[]int64{-1}, []byte{255, 255, 255, 255, 255, 255, 255, 255}}, - {[]int64{256}, []byte{0, 1, 0, 0, 0, 0, 0, 0}}, - {[]int64{math.MinInt64}, []byte{0, 0, 0, 0, 0, 0, 0, 128}}, - {[]int64{math.MaxInt64}, []byte{255, 255, 255, 255, 255, 255, 255, 127}}, - {[]int64{257, -2}, []byte{1, 1, 0, 0, 0, 0, 0, 0, 254, 255, 255, 255, 255, 255, 255, 255}}, - } - - for i, testCase := range testCases { - result := plainEncodeInt64s(testCase.i64s) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} - -func TestPlainEncodeFloat32s(t *testing.T) { - testCases := []struct { - f32s []float32 - expectedResult []byte - }{ - {nil, []byte{}}, - {[]float32{}, []byte{}}, - {[]float32{1}, []byte{0, 0, 128, 63}}, - {[]float32{1.0}, []byte{0, 0, 128, 63}}, - {[]float32{-1}, []byte{0, 0, 128, 191}}, - {[]float32{-1.0}, []byte{0, 0, 128, 191}}, - {[]float32{256}, []byte{0, 0, 128, 67}}, - {[]float32{1.1}, []byte{205, 204, 140, 63}}, - {[]float32{-1.1}, []byte{205, 204, 140, 191}}, - {[]float32{math.Pi}, []byte{219, 15, 73, 64}}, - {[]float32{257, -2}, []byte{0, 128, 128, 67, 0, 0, 0, 192}}, - {[]float32{257.1, -2.1}, []byte{205, 140, 128, 67, 102, 102, 6, 192}}, - } - - for i, testCase := range testCases { - result := plainEncodeFloat32s(testCase.f32s) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} - -func TestPlainEncodeFloat64s(t *testing.T) { - testCases := []struct { - f64s []float64 - expectedResult []byte - }{ - {nil, []byte{}}, - {[]float64{}, []byte{}}, - {[]float64{1}, []byte{0, 0, 0, 0, 0, 0, 240, 63}}, - {[]float64{1.0}, []byte{0, 0, 0, 0, 0, 0, 240, 63}}, - {[]float64{-1}, []byte{0, 0, 0, 0, 0, 0, 240, 191}}, - {[]float64{-1.0}, []byte{0, 0, 0, 0, 0, 0, 240, 191}}, - {[]float64{256}, []byte{0, 0, 0, 0, 0, 0, 112, 64}}, - {[]float64{1.1}, []byte{154, 153, 153, 153, 153, 153, 241, 63}}, - {[]float64{-1.1}, []byte{154, 153, 153, 153, 153, 153, 241, 191}}, - {[]float64{math.Pi}, []byte{24, 45, 68, 84, 251, 33, 9, 64}}, - {[]float64{257, -2}, []byte{0, 0, 0, 0, 0, 16, 112, 64, 0, 0, 0, 0, 0, 0, 0, 192}}, - {[]float64{257.1, -2.1}, []byte{154, 153, 153, 153, 153, 17, 112, 64, 205, 204, 204, 204, 204, 204, 0, 192}}, - } - - for i, testCase := range testCases { - result := plainEncodeFloat64s(testCase.f64s) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} diff --git a/pkg/s3select/internal/parquet-go/encoding/rle-encode.go b/pkg/s3select/internal/parquet-go/encoding/rle-encode.go deleted file mode 100644 index 609fbd1b7..000000000 --- a/pkg/s3select/internal/parquet-go/encoding/rle-encode.go +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package encoding - -import ( - "fmt" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -func rleEncodeInt32s(i32s []int32, bitWidth int32) (data []byte) { - j := 0 - for i := 0; i < len(i32s); i = j { - for j = i + 1; j < len(i32s) && i32s[i] == i32s[j]; j++ { - } - - headerBytes := varIntEncode(uint64((j - i) << 1)) - data = append(data, headerBytes...) - - valBytes := plainEncodeInt32s([]int32{i32s[i]}) - byteCount := (bitWidth + 7) / 8 - data = append(data, valBytes[:byteCount]...) - } - - return data -} - -func rleEncodeInt64s(i64s []int64, bitWidth int32) (data []byte) { - j := 0 - for i := 0; i < len(i64s); i = j { - for j = i + 1; j < len(i64s) && i64s[i] == i64s[j]; j++ { - } - - headerBytes := varIntEncode(uint64((j - i) << 1)) - data = append(data, headerBytes...) - - valBytes := plainEncodeInt64s([]int64{i64s[i]}) - byteCount := (bitWidth + 7) / 8 - data = append(data, valBytes[:byteCount]...) - } - - return data -} - -// RLEBitPackedHybridEncode encodes values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#run-length-encoding--bit-packing-hybrid-rle--3 -// -// Supported Types: INT32, INT64 -func RLEBitPackedHybridEncode(values interface{}, bitWidth int32, parquetType parquet.Type) []byte { - var rleBytes []byte - - switch parquetType { - case parquet.Type_INT32: - i32s, ok := values.([]int32) - if !ok { - panic(fmt.Errorf("expected slice of int32")) - } - rleBytes = rleEncodeInt32s(i32s, bitWidth) - case parquet.Type_INT64: - i64s, ok := values.([]int64) - if !ok { - panic(fmt.Errorf("expected slice of int64")) - } - rleBytes = rleEncodeInt64s(i64s, bitWidth) - default: - panic(fmt.Errorf("%v parquet type unsupported", parquetType)) - } - - lenBytes := plainEncodeInt32s([]int32{int32(len(rleBytes))}) - return append(lenBytes, rleBytes...) -} diff --git a/pkg/s3select/internal/parquet-go/encoding/rle-encode_test.go b/pkg/s3select/internal/parquet-go/encoding/rle-encode_test.go deleted file mode 100644 index b87b9f951..000000000 --- a/pkg/s3select/internal/parquet-go/encoding/rle-encode_test.go +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package encoding - -import ( - "reflect" - "testing" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -func TestRLEEncodeInt32s(t *testing.T) { - testCases := []struct { - values []int32 - bitWidth int32 - dataType parquet.Type - expectedResult []byte - }{ - {[]int32{3, 5, 7}, 1, parquet.Type_INT32, []byte{2, 3, 2, 5, 2, 7}}, - {[]int32{3, 3, 3}, 1, parquet.Type_INT32, []byte{6, 3}}, - {[]int32{2, 2, 3, 3, 3}, 1, parquet.Type_INT32, []byte{4, 2, 6, 3}}, - } - - for i, testCase := range testCases { - result := rleEncodeInt32s(testCase.values, testCase.bitWidth) - if !reflect.DeepEqual(result, testCase.expectedResult) { - t.Fatalf("case %v: expected: %v, got: %v", i+1, testCase.expectedResult, result) - } - } -} diff --git a/pkg/s3select/internal/parquet-go/encoding/rledict-encode.go b/pkg/s3select/internal/parquet-go/encoding/rledict-encode.go deleted file mode 100644 index 30cd0ab72..000000000 --- a/pkg/s3select/internal/parquet-go/encoding/rledict-encode.go +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package encoding - -import ( - "github.com/minio/minio/pkg/s3select/internal/parquet-go/common" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -// RLEDictEncode encodes values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#dictionary-encoding-plain_dictionary--2-and-rle_dictionary--8 and returns dictionary page data and data page data. -// -// Dictionary page data contains PLAIN encodeed slice of uniquely fully defined non-nil values. -// Data page data contains RLE/Bit-Packed Hybrid encoded indices of fully defined non-nil values. -// -// Supported Types: BOOLEAN, INT32, INT64, FLOAT, DOUBLE, BYTE_ARRAY -func RLEDictEncode(values []interface{}, parquetType parquet.Type, bitWidth int32) (dictPageData, dataPageData []byte, dictValueCount int32, indexBitWidth uint8) { - var definedValues []interface{} - var indices []int32 - - valueIndexMap := make(map[interface{}]int32) - j := 0 - for i := 0; i < len(values); i = j { - for j = i; j < len(values); j++ { - value := values[j] - if value == nil { - continue - } - - index, found := valueIndexMap[value] - if !found { - index = int32(len(definedValues)) - definedValues = append(definedValues, value) - valueIndexMap[value] = index - } - - indices = append(indices, index) - } - } - - indexBitWidth = uint8(common.BitWidth(uint64(indices[len(indices)-1]))) - - dictPageData = PlainEncode(common.ToSliceValue(definedValues, parquetType), parquetType) - dataPageData = RLEBitPackedHybridEncode(indices, int32(indexBitWidth), parquet.Type_INT32) - - return dictPageData, dataPageData, int32(len(definedValues)), indexBitWidth -} diff --git a/pkg/s3select/internal/parquet-go/endian.go b/pkg/s3select/internal/parquet-go/endian.go deleted file mode 100644 index 8f02c537b..000000000 --- a/pkg/s3select/internal/parquet-go/endian.go +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "encoding/binary" -) - -func uint32ToBytes(v uint32) []byte { - buf := make([]byte, 4) - binary.LittleEndian.PutUint32(buf, v) - return buf -} - -func bytesToUint32(buf []byte) uint32 { - return binary.LittleEndian.Uint32(buf) -} - -func bytesToUint64(buf []byte) uint64 { - return binary.LittleEndian.Uint64(buf) -} diff --git a/pkg/s3select/internal/parquet-go/example.parquet b/pkg/s3select/internal/parquet-go/example.parquet deleted file mode 100644 index 05cd61aea..000000000 Binary files a/pkg/s3select/internal/parquet-go/example.parquet and /dev/null differ diff --git a/pkg/s3select/internal/parquet-go/gen-go/parquet/GoUnusedProtection__.go b/pkg/s3select/internal/parquet-go/gen-go/parquet/GoUnusedProtection__.go deleted file mode 100644 index ac735f76a..000000000 --- a/pkg/s3select/internal/parquet-go/gen-go/parquet/GoUnusedProtection__.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -var GoUnusedProtection__ int diff --git a/pkg/s3select/internal/parquet-go/gen-go/parquet/parquet-consts.go b/pkg/s3select/internal/parquet-go/gen-go/parquet/parquet-consts.go deleted file mode 100644 index 89f3025fa..000000000 --- a/pkg/s3select/internal/parquet-go/gen-go/parquet/parquet-consts.go +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "bytes" - "fmt" - - "git.apache.org/thrift.git/lib/go/thrift" -) - -// (needed to ensure safety because of naive import list construction.) -var _ = thrift.ZERO -var _ = fmt.Printf -var _ = bytes.Equal - -func init() { -} diff --git a/pkg/s3select/internal/parquet-go/gen-go/parquet/parquet.go b/pkg/s3select/internal/parquet-go/gen-go/parquet/parquet.go deleted file mode 100644 index bb456f3f4..000000000 --- a/pkg/s3select/internal/parquet-go/gen-go/parquet/parquet.go +++ /dev/null @@ -1,8206 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "bytes" - "database/sql/driver" - "errors" - "fmt" - - "git.apache.org/thrift.git/lib/go/thrift" -) - -// (needed to ensure safety because of naive import list construction.) -var _ = thrift.ZERO -var _ = fmt.Printf -var _ = bytes.Equal - -//Types supported by Parquet. These types are intended to be used in combination -//with the encodings to control the on disk storage format. -//For example INT16 is not included as a type since a good encoding of INT32 -//would handle this. -type Type int64 - -const ( - Type_BOOLEAN Type = 0 - Type_INT32 Type = 1 - Type_INT64 Type = 2 - Type_INT96 Type = 3 - Type_FLOAT Type = 4 - Type_DOUBLE Type = 5 - Type_BYTE_ARRAY Type = 6 - Type_FIXED_LEN_BYTE_ARRAY Type = 7 -) - -func (p Type) String() string { - switch p { - case Type_BOOLEAN: - return "BOOLEAN" - case Type_INT32: - return "INT32" - case Type_INT64: - return "INT64" - case Type_INT96: - return "INT96" - case Type_FLOAT: - return "FLOAT" - case Type_DOUBLE: - return "DOUBLE" - case Type_BYTE_ARRAY: - return "BYTE_ARRAY" - case Type_FIXED_LEN_BYTE_ARRAY: - return "FIXED_LEN_BYTE_ARRAY" - } - return "" -} - -func TypeFromString(s string) (Type, error) { - switch s { - case "BOOLEAN": - return Type_BOOLEAN, nil - case "INT32": - return Type_INT32, nil - case "INT64": - return Type_INT64, nil - case "INT96": - return Type_INT96, nil - case "FLOAT": - return Type_FLOAT, nil - case "DOUBLE": - return Type_DOUBLE, nil - case "BYTE_ARRAY": - return Type_BYTE_ARRAY, nil - case "FIXED_LEN_BYTE_ARRAY": - return Type_FIXED_LEN_BYTE_ARRAY, nil - } - return Type(0), fmt.Errorf("not a valid Type string") -} - -func TypePtr(v Type) *Type { return &v } - -func (p Type) MarshalText() ([]byte, error) { - return []byte(p.String()), nil -} - -func (p *Type) UnmarshalText(text []byte) error { - q, err := TypeFromString(string(text)) - if err != nil { - return err - } - *p = q - return nil -} - -func (p *Type) Scan(value interface{}) error { - v, ok := value.(int64) - if !ok { - return errors.New("Scan value is not int64") - } - *p = Type(v) - return nil -} - -func (p *Type) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } - return int64(*p), nil -} - -//Common types used by frameworks(e.g. hive, pig) using parquet. This helps map -//between types in those frameworks to the base types in parquet. This is only -//metadata and not needed to read or write the data. -type ConvertedType int64 - -const ( - ConvertedType_UTF8 ConvertedType = 0 - ConvertedType_MAP ConvertedType = 1 - ConvertedType_MAP_KEY_VALUE ConvertedType = 2 - ConvertedType_LIST ConvertedType = 3 - ConvertedType_ENUM ConvertedType = 4 - ConvertedType_DECIMAL ConvertedType = 5 - ConvertedType_DATE ConvertedType = 6 - ConvertedType_TIME_MILLIS ConvertedType = 7 - ConvertedType_TIME_MICROS ConvertedType = 8 - ConvertedType_TIMESTAMP_MILLIS ConvertedType = 9 - ConvertedType_TIMESTAMP_MICROS ConvertedType = 10 - ConvertedType_UINT_8 ConvertedType = 11 - ConvertedType_UINT_16 ConvertedType = 12 - ConvertedType_UINT_32 ConvertedType = 13 - ConvertedType_UINT_64 ConvertedType = 14 - ConvertedType_INT_8 ConvertedType = 15 - ConvertedType_INT_16 ConvertedType = 16 - ConvertedType_INT_32 ConvertedType = 17 - ConvertedType_INT_64 ConvertedType = 18 - ConvertedType_JSON ConvertedType = 19 - ConvertedType_BSON ConvertedType = 20 - ConvertedType_INTERVAL ConvertedType = 21 -) - -func (p ConvertedType) String() string { - switch p { - case ConvertedType_UTF8: - return "UTF8" - case ConvertedType_MAP: - return "MAP" - case ConvertedType_MAP_KEY_VALUE: - return "MAP_KEY_VALUE" - case ConvertedType_LIST: - return "LIST" - case ConvertedType_ENUM: - return "ENUM" - case ConvertedType_DECIMAL: - return "DECIMAL" - case ConvertedType_DATE: - return "DATE" - case ConvertedType_TIME_MILLIS: - return "TIME_MILLIS" - case ConvertedType_TIME_MICROS: - return "TIME_MICROS" - case ConvertedType_TIMESTAMP_MILLIS: - return "TIMESTAMP_MILLIS" - case ConvertedType_TIMESTAMP_MICROS: - return "TIMESTAMP_MICROS" - case ConvertedType_UINT_8: - return "UINT_8" - case ConvertedType_UINT_16: - return "UINT_16" - case ConvertedType_UINT_32: - return "UINT_32" - case ConvertedType_UINT_64: - return "UINT_64" - case ConvertedType_INT_8: - return "INT_8" - case ConvertedType_INT_16: - return "INT_16" - case ConvertedType_INT_32: - return "INT_32" - case ConvertedType_INT_64: - return "INT_64" - case ConvertedType_JSON: - return "JSON" - case ConvertedType_BSON: - return "BSON" - case ConvertedType_INTERVAL: - return "INTERVAL" - } - return "" -} - -func ConvertedTypeFromString(s string) (ConvertedType, error) { - switch s { - case "UTF8": - return ConvertedType_UTF8, nil - case "MAP": - return ConvertedType_MAP, nil - case "MAP_KEY_VALUE": - return ConvertedType_MAP_KEY_VALUE, nil - case "LIST": - return ConvertedType_LIST, nil - case "ENUM": - return ConvertedType_ENUM, nil - case "DECIMAL": - return ConvertedType_DECIMAL, nil - case "DATE": - return ConvertedType_DATE, nil - case "TIME_MILLIS": - return ConvertedType_TIME_MILLIS, nil - case "TIME_MICROS": - return ConvertedType_TIME_MICROS, nil - case "TIMESTAMP_MILLIS": - return ConvertedType_TIMESTAMP_MILLIS, nil - case "TIMESTAMP_MICROS": - return ConvertedType_TIMESTAMP_MICROS, nil - case "UINT_8": - return ConvertedType_UINT_8, nil - case "UINT_16": - return ConvertedType_UINT_16, nil - case "UINT_32": - return ConvertedType_UINT_32, nil - case "UINT_64": - return ConvertedType_UINT_64, nil - case "INT_8": - return ConvertedType_INT_8, nil - case "INT_16": - return ConvertedType_INT_16, nil - case "INT_32": - return ConvertedType_INT_32, nil - case "INT_64": - return ConvertedType_INT_64, nil - case "JSON": - return ConvertedType_JSON, nil - case "BSON": - return ConvertedType_BSON, nil - case "INTERVAL": - return ConvertedType_INTERVAL, nil - } - return ConvertedType(0), fmt.Errorf("not a valid ConvertedType string") -} - -func ConvertedTypePtr(v ConvertedType) *ConvertedType { return &v } - -func (p ConvertedType) MarshalText() ([]byte, error) { - return []byte(p.String()), nil -} - -func (p *ConvertedType) UnmarshalText(text []byte) error { - q, err := ConvertedTypeFromString(string(text)) - if err != nil { - return err - } - *p = q - return nil -} - -func (p *ConvertedType) Scan(value interface{}) error { - v, ok := value.(int64) - if !ok { - return errors.New("Scan value is not int64") - } - *p = ConvertedType(v) - return nil -} - -func (p *ConvertedType) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } - return int64(*p), nil -} - -//Representation of Schemas -type FieldRepetitionType int64 - -const ( - FieldRepetitionType_REQUIRED FieldRepetitionType = 0 - FieldRepetitionType_OPTIONAL FieldRepetitionType = 1 - FieldRepetitionType_REPEATED FieldRepetitionType = 2 -) - -func (p FieldRepetitionType) String() string { - switch p { - case FieldRepetitionType_REQUIRED: - return "REQUIRED" - case FieldRepetitionType_OPTIONAL: - return "OPTIONAL" - case FieldRepetitionType_REPEATED: - return "REPEATED" - } - return "" -} - -func FieldRepetitionTypeFromString(s string) (FieldRepetitionType, error) { - switch s { - case "REQUIRED": - return FieldRepetitionType_REQUIRED, nil - case "OPTIONAL": - return FieldRepetitionType_OPTIONAL, nil - case "REPEATED": - return FieldRepetitionType_REPEATED, nil - } - return FieldRepetitionType(0), fmt.Errorf("not a valid FieldRepetitionType string") -} - -func FieldRepetitionTypePtr(v FieldRepetitionType) *FieldRepetitionType { return &v } - -func (p FieldRepetitionType) MarshalText() ([]byte, error) { - return []byte(p.String()), nil -} - -func (p *FieldRepetitionType) UnmarshalText(text []byte) error { - q, err := FieldRepetitionTypeFromString(string(text)) - if err != nil { - return err - } - *p = q - return nil -} - -func (p *FieldRepetitionType) Scan(value interface{}) error { - v, ok := value.(int64) - if !ok { - return errors.New("Scan value is not int64") - } - *p = FieldRepetitionType(v) - return nil -} - -func (p *FieldRepetitionType) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } - return int64(*p), nil -} - -//Encodings supported by Parquet. Not all encodings are valid for all types. These -//enums are also used to specify the encoding of definition and repetition levels. -//See the accompanying doc for the details of the more complicated encodings. -type Encoding int64 - -const ( - Encoding_PLAIN Encoding = 0 - Encoding_PLAIN_DICTIONARY Encoding = 2 - Encoding_RLE Encoding = 3 - Encoding_BIT_PACKED Encoding = 4 - Encoding_DELTA_BINARY_PACKED Encoding = 5 - Encoding_DELTA_LENGTH_BYTE_ARRAY Encoding = 6 - Encoding_DELTA_BYTE_ARRAY Encoding = 7 - Encoding_RLE_DICTIONARY Encoding = 8 -) - -func (p Encoding) String() string { - switch p { - case Encoding_PLAIN: - return "PLAIN" - case Encoding_PLAIN_DICTIONARY: - return "PLAIN_DICTIONARY" - case Encoding_RLE: - return "RLE" - case Encoding_BIT_PACKED: - return "BIT_PACKED" - case Encoding_DELTA_BINARY_PACKED: - return "DELTA_BINARY_PACKED" - case Encoding_DELTA_LENGTH_BYTE_ARRAY: - return "DELTA_LENGTH_BYTE_ARRAY" - case Encoding_DELTA_BYTE_ARRAY: - return "DELTA_BYTE_ARRAY" - case Encoding_RLE_DICTIONARY: - return "RLE_DICTIONARY" - } - return "" -} - -func EncodingFromString(s string) (Encoding, error) { - switch s { - case "PLAIN": - return Encoding_PLAIN, nil - case "PLAIN_DICTIONARY": - return Encoding_PLAIN_DICTIONARY, nil - case "RLE": - return Encoding_RLE, nil - case "BIT_PACKED": - return Encoding_BIT_PACKED, nil - case "DELTA_BINARY_PACKED": - return Encoding_DELTA_BINARY_PACKED, nil - case "DELTA_LENGTH_BYTE_ARRAY": - return Encoding_DELTA_LENGTH_BYTE_ARRAY, nil - case "DELTA_BYTE_ARRAY": - return Encoding_DELTA_BYTE_ARRAY, nil - case "RLE_DICTIONARY": - return Encoding_RLE_DICTIONARY, nil - } - return Encoding(0), fmt.Errorf("not a valid Encoding string") -} - -func EncodingPtr(v Encoding) *Encoding { return &v } - -func (p Encoding) MarshalText() ([]byte, error) { - return []byte(p.String()), nil -} - -func (p *Encoding) UnmarshalText(text []byte) error { - q, err := EncodingFromString(string(text)) - if err != nil { - return err - } - *p = q - return nil -} - -func (p *Encoding) Scan(value interface{}) error { - v, ok := value.(int64) - if !ok { - return errors.New("Scan value is not int64") - } - *p = Encoding(v) - return nil -} - -func (p *Encoding) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } - return int64(*p), nil -} - -//Supported compression algorithms. -// -//Codecs added in 2.4 can be read by readers based on 2.4 and later. -//Codec support may vary between readers based on the format version and -//libraries available at runtime. Gzip, Snappy, and LZ4 codecs are -//widely available, while Zstd and Brotli require additional libraries. -type CompressionCodec int64 - -const ( - CompressionCodec_UNCOMPRESSED CompressionCodec = 0 - CompressionCodec_SNAPPY CompressionCodec = 1 - CompressionCodec_GZIP CompressionCodec = 2 - CompressionCodec_LZO CompressionCodec = 3 - CompressionCodec_BROTLI CompressionCodec = 4 - CompressionCodec_LZ4 CompressionCodec = 5 - CompressionCodec_ZSTD CompressionCodec = 6 -) - -func (p CompressionCodec) String() string { - switch p { - case CompressionCodec_UNCOMPRESSED: - return "UNCOMPRESSED" - case CompressionCodec_SNAPPY: - return "SNAPPY" - case CompressionCodec_GZIP: - return "GZIP" - case CompressionCodec_LZO: - return "LZO" - case CompressionCodec_BROTLI: - return "BROTLI" - case CompressionCodec_LZ4: - return "LZ4" - case CompressionCodec_ZSTD: - return "ZSTD" - } - return "" -} - -func CompressionCodecFromString(s string) (CompressionCodec, error) { - switch s { - case "UNCOMPRESSED": - return CompressionCodec_UNCOMPRESSED, nil - case "SNAPPY": - return CompressionCodec_SNAPPY, nil - case "GZIP": - return CompressionCodec_GZIP, nil - case "LZO": - return CompressionCodec_LZO, nil - case "BROTLI": - return CompressionCodec_BROTLI, nil - case "LZ4": - return CompressionCodec_LZ4, nil - case "ZSTD": - return CompressionCodec_ZSTD, nil - } - return CompressionCodec(0), fmt.Errorf("not a valid CompressionCodec string") -} - -func CompressionCodecPtr(v CompressionCodec) *CompressionCodec { return &v } - -func (p CompressionCodec) MarshalText() ([]byte, error) { - return []byte(p.String()), nil -} - -func (p *CompressionCodec) UnmarshalText(text []byte) error { - q, err := CompressionCodecFromString(string(text)) - if err != nil { - return err - } - *p = q - return nil -} - -func (p *CompressionCodec) Scan(value interface{}) error { - v, ok := value.(int64) - if !ok { - return errors.New("Scan value is not int64") - } - *p = CompressionCodec(v) - return nil -} - -func (p *CompressionCodec) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } - return int64(*p), nil -} - -type PageType int64 - -const ( - PageType_DATA_PAGE PageType = 0 - PageType_INDEX_PAGE PageType = 1 - PageType_DICTIONARY_PAGE PageType = 2 - PageType_DATA_PAGE_V2 PageType = 3 -) - -func (p PageType) String() string { - switch p { - case PageType_DATA_PAGE: - return "DATA_PAGE" - case PageType_INDEX_PAGE: - return "INDEX_PAGE" - case PageType_DICTIONARY_PAGE: - return "DICTIONARY_PAGE" - case PageType_DATA_PAGE_V2: - return "DATA_PAGE_V2" - } - return "" -} - -func PageTypeFromString(s string) (PageType, error) { - switch s { - case "DATA_PAGE": - return PageType_DATA_PAGE, nil - case "INDEX_PAGE": - return PageType_INDEX_PAGE, nil - case "DICTIONARY_PAGE": - return PageType_DICTIONARY_PAGE, nil - case "DATA_PAGE_V2": - return PageType_DATA_PAGE_V2, nil - } - return PageType(0), fmt.Errorf("not a valid PageType string") -} - -func PageTypePtr(v PageType) *PageType { return &v } - -func (p PageType) MarshalText() ([]byte, error) { - return []byte(p.String()), nil -} - -func (p *PageType) UnmarshalText(text []byte) error { - q, err := PageTypeFromString(string(text)) - if err != nil { - return err - } - *p = q - return nil -} - -func (p *PageType) Scan(value interface{}) error { - v, ok := value.(int64) - if !ok { - return errors.New("Scan value is not int64") - } - *p = PageType(v) - return nil -} - -func (p *PageType) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } - return int64(*p), nil -} - -//Enum to annotate whether lists of min/max elements inside ColumnIndex -//are ordered and if so, in which direction. -type BoundaryOrder int64 - -const ( - BoundaryOrder_UNORDERED BoundaryOrder = 0 - BoundaryOrder_ASCENDING BoundaryOrder = 1 - BoundaryOrder_DESCENDING BoundaryOrder = 2 -) - -func (p BoundaryOrder) String() string { - switch p { - case BoundaryOrder_UNORDERED: - return "UNORDERED" - case BoundaryOrder_ASCENDING: - return "ASCENDING" - case BoundaryOrder_DESCENDING: - return "DESCENDING" - } - return "" -} - -func BoundaryOrderFromString(s string) (BoundaryOrder, error) { - switch s { - case "UNORDERED": - return BoundaryOrder_UNORDERED, nil - case "ASCENDING": - return BoundaryOrder_ASCENDING, nil - case "DESCENDING": - return BoundaryOrder_DESCENDING, nil - } - return BoundaryOrder(0), fmt.Errorf("not a valid BoundaryOrder string") -} - -func BoundaryOrderPtr(v BoundaryOrder) *BoundaryOrder { return &v } - -func (p BoundaryOrder) MarshalText() ([]byte, error) { - return []byte(p.String()), nil -} - -func (p *BoundaryOrder) UnmarshalText(text []byte) error { - q, err := BoundaryOrderFromString(string(text)) - if err != nil { - return err - } - *p = q - return nil -} - -func (p *BoundaryOrder) Scan(value interface{}) error { - v, ok := value.(int64) - if !ok { - return errors.New("Scan value is not int64") - } - *p = BoundaryOrder(v) - return nil -} - -func (p *BoundaryOrder) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } - return int64(*p), nil -} - -// Statistics per row group and per page -// All fields are optional. -// -// Attributes: -// - Max: DEPRECATED: min and max value of the column. Use min_value and max_value. -// -// Values are encoded using PLAIN encoding, except that variable-length byte -// arrays do not include a length prefix. -// -// These fields encode min and max values determined by signed comparison -// only. New files should use the correct order for a column's logical type -// and store the values in the min_value and max_value fields. -// -// To support older readers, these may be set when the column order is -// signed. -// - Min -// - NullCount: count of null value in the column -// - DistinctCount: count of distinct values occurring -// - MaxValue: Min and max values for the column, determined by its ColumnOrder. -// -// Values are encoded using PLAIN encoding, except that variable-length byte -// arrays do not include a length prefix. -// - MinValue -type Statistics struct { - Max []byte `thrift:"max,1" db:"max" json:"max,omitempty"` - Min []byte `thrift:"min,2" db:"min" json:"min,omitempty"` - NullCount *int64 `thrift:"null_count,3" db:"null_count" json:"null_count,omitempty"` - DistinctCount *int64 `thrift:"distinct_count,4" db:"distinct_count" json:"distinct_count,omitempty"` - MaxValue []byte `thrift:"max_value,5" db:"max_value" json:"max_value,omitempty"` - MinValue []byte `thrift:"min_value,6" db:"min_value" json:"min_value,omitempty"` -} - -func NewStatistics() *Statistics { - return &Statistics{} -} - -var Statistics_Max_DEFAULT []byte - -func (p *Statistics) GetMax() []byte { - return p.Max -} - -var Statistics_Min_DEFAULT []byte - -func (p *Statistics) GetMin() []byte { - return p.Min -} - -var Statistics_NullCount_DEFAULT int64 - -func (p *Statistics) GetNullCount() int64 { - if !p.IsSetNullCount() { - return Statistics_NullCount_DEFAULT - } - return *p.NullCount -} - -var Statistics_DistinctCount_DEFAULT int64 - -func (p *Statistics) GetDistinctCount() int64 { - if !p.IsSetDistinctCount() { - return Statistics_DistinctCount_DEFAULT - } - return *p.DistinctCount -} - -var Statistics_MaxValue_DEFAULT []byte - -func (p *Statistics) GetMaxValue() []byte { - return p.MaxValue -} - -var Statistics_MinValue_DEFAULT []byte - -func (p *Statistics) GetMinValue() []byte { - return p.MinValue -} -func (p *Statistics) IsSetMax() bool { - return p.Max != nil -} - -func (p *Statistics) IsSetMin() bool { - return p.Min != nil -} - -func (p *Statistics) IsSetNullCount() bool { - return p.NullCount != nil -} - -func (p *Statistics) IsSetDistinctCount() bool { - return p.DistinctCount != nil -} - -func (p *Statistics) IsSetMaxValue() bool { - return p.MaxValue != nil -} - -func (p *Statistics) IsSetMinValue() bool { - return p.MinValue != nil -} - -func (p *Statistics) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - case 4: - if err := p.ReadField4(iprot); err != nil { - return err - } - case 5: - if err := p.ReadField5(iprot); err != nil { - return err - } - case 6: - if err := p.ReadField6(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *Statistics) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - p.Max = v - } - return nil -} - -func (p *Statistics) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - p.Min = v - } - return nil -} - -func (p *Statistics) ReadField3(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 3: ", err) - } else { - p.NullCount = &v - } - return nil -} - -func (p *Statistics) ReadField4(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 4: ", err) - } else { - p.DistinctCount = &v - } - return nil -} - -func (p *Statistics) ReadField5(iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(); err != nil { - return thrift.PrependError("error reading field 5: ", err) - } else { - p.MaxValue = v - } - return nil -} - -func (p *Statistics) ReadField6(iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(); err != nil { - return thrift.PrependError("error reading field 6: ", err) - } else { - p.MinValue = v - } - return nil -} - -func (p *Statistics) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("Statistics"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - if err := p.writeField4(oprot); err != nil { - return err - } - if err := p.writeField5(oprot); err != nil { - return err - } - if err := p.writeField6(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *Statistics) writeField1(oprot thrift.TProtocol) (err error) { - if p.IsSetMax() { - if err := oprot.WriteFieldBegin("max", thrift.STRING, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:max: ", p), err) - } - if err := oprot.WriteBinary(p.Max); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.max (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:max: ", p), err) - } - } - return err -} - -func (p *Statistics) writeField2(oprot thrift.TProtocol) (err error) { - if p.IsSetMin() { - if err := oprot.WriteFieldBegin("min", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min: ", p), err) - } - if err := oprot.WriteBinary(p.Min); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.min (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min: ", p), err) - } - } - return err -} - -func (p *Statistics) writeField3(oprot thrift.TProtocol) (err error) { - if p.IsSetNullCount() { - if err := oprot.WriteFieldBegin("null_count", thrift.I64, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:null_count: ", p), err) - } - if err := oprot.WriteI64(*p.NullCount); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.null_count (3) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:null_count: ", p), err) - } - } - return err -} - -func (p *Statistics) writeField4(oprot thrift.TProtocol) (err error) { - if p.IsSetDistinctCount() { - if err := oprot.WriteFieldBegin("distinct_count", thrift.I64, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:distinct_count: ", p), err) - } - if err := oprot.WriteI64(*p.DistinctCount); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.distinct_count (4) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:distinct_count: ", p), err) - } - } - return err -} - -func (p *Statistics) writeField5(oprot thrift.TProtocol) (err error) { - if p.IsSetMaxValue() { - if err := oprot.WriteFieldBegin("max_value", thrift.STRING, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:max_value: ", p), err) - } - if err := oprot.WriteBinary(p.MaxValue); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.max_value (5) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:max_value: ", p), err) - } - } - return err -} - -func (p *Statistics) writeField6(oprot thrift.TProtocol) (err error) { - if p.IsSetMinValue() { - if err := oprot.WriteFieldBegin("min_value", thrift.STRING, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:min_value: ", p), err) - } - if err := oprot.WriteBinary(p.MinValue); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.min_value (6) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:min_value: ", p), err) - } - } - return err -} - -func (p *Statistics) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("Statistics(%+v)", *p) -} - -// Empty structs to use as logical type annotations -type StringType struct { -} - -func NewStringType() *StringType { - return &StringType{} -} - -func (p *StringType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *StringType) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("StringType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *StringType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("StringType(%+v)", *p) -} - -type UUIDType struct { -} - -func NewUUIDType() *UUIDType { - return &UUIDType{} -} - -func (p *UUIDType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *UUIDType) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("UUIDType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *UUIDType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("UUIDType(%+v)", *p) -} - -type MapType struct { -} - -func NewMapType() *MapType { - return &MapType{} -} - -func (p *MapType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *MapType) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("MapType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *MapType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("MapType(%+v)", *p) -} - -type ListType struct { -} - -func NewListType() *ListType { - return &ListType{} -} - -func (p *ListType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *ListType) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("ListType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *ListType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ListType(%+v)", *p) -} - -type EnumType struct { -} - -func NewEnumType() *EnumType { - return &EnumType{} -} - -func (p *EnumType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *EnumType) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("EnumType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *EnumType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("EnumType(%+v)", *p) -} - -type DateType struct { -} - -func NewDateType() *DateType { - return &DateType{} -} - -func (p *DateType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *DateType) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("DateType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *DateType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DateType(%+v)", *p) -} - -// Logical type to annotate a column that is always null. -// -// Sometimes when discovering the schema of existing data, values are always -// null and the physical type can't be determined. This annotation signals -// the case where the physical type was guessed from all null values. -type NullType struct { -} - -func NewNullType() *NullType { - return &NullType{} -} - -func (p *NullType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *NullType) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("NullType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *NullType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("NullType(%+v)", *p) -} - -// Decimal logical type annotation -// -// To maintain forward-compatibility in v1, implementations using this logical -// type must also set scale and precision on the annotated SchemaElement. -// -// Allowed for physical types: INT32, INT64, FIXED, and BINARY -// -// Attributes: -// - Scale -// - Precision -type DecimalType struct { - Scale int32 `thrift:"scale,1,required" db:"scale" json:"scale"` - Precision int32 `thrift:"precision,2,required" db:"precision" json:"precision"` -} - -func NewDecimalType() *DecimalType { - return &DecimalType{} -} - -func (p *DecimalType) GetScale() int32 { - return p.Scale -} - -func (p *DecimalType) GetPrecision() int32 { - return p.Precision -} -func (p *DecimalType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetScale bool = false - var issetPrecision bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetScale = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetPrecision = true - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetScale { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Scale is not set")) - } - if !issetPrecision { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Precision is not set")) - } - return nil -} - -func (p *DecimalType) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - p.Scale = v - } - return nil -} - -func (p *DecimalType) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - p.Precision = v - } - return nil -} - -func (p *DecimalType) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("DecimalType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *DecimalType) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("scale", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:scale: ", p), err) - } - if err := oprot.WriteI32(p.Scale); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.scale (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:scale: ", p), err) - } - return err -} - -func (p *DecimalType) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("precision", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:precision: ", p), err) - } - if err := oprot.WriteI32(p.Precision); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.precision (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:precision: ", p), err) - } - return err -} - -func (p *DecimalType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DecimalType(%+v)", *p) -} - -// Time units for logical types -type MilliSeconds struct { -} - -func NewMilliSeconds() *MilliSeconds { - return &MilliSeconds{} -} - -func (p *MilliSeconds) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *MilliSeconds) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("MilliSeconds"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *MilliSeconds) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("MilliSeconds(%+v)", *p) -} - -type MicroSeconds struct { -} - -func NewMicroSeconds() *MicroSeconds { - return &MicroSeconds{} -} - -func (p *MicroSeconds) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *MicroSeconds) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("MicroSeconds"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *MicroSeconds) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("MicroSeconds(%+v)", *p) -} - -type NanoSeconds struct { -} - -func NewNanoSeconds() *NanoSeconds { - return &NanoSeconds{} -} - -func (p *NanoSeconds) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *NanoSeconds) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("NanoSeconds"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *NanoSeconds) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("NanoSeconds(%+v)", *p) -} - -// Attributes: -// - MILLIS -// - MICROS -// - NANOS -type TimeUnit struct { - MILLIS *MilliSeconds `thrift:"MILLIS,1" db:"MILLIS" json:"MILLIS,omitempty"` - MICROS *MicroSeconds `thrift:"MICROS,2" db:"MICROS" json:"MICROS,omitempty"` - NANOS *NanoSeconds `thrift:"NANOS,3" db:"NANOS" json:"NANOS,omitempty"` -} - -func NewTimeUnit() *TimeUnit { - return &TimeUnit{} -} - -var TimeUnit_MILLIS_DEFAULT *MilliSeconds - -func (p *TimeUnit) GetMILLIS() *MilliSeconds { - if !p.IsSetMILLIS() { - return TimeUnit_MILLIS_DEFAULT - } - return p.MILLIS -} - -var TimeUnit_MICROS_DEFAULT *MicroSeconds - -func (p *TimeUnit) GetMICROS() *MicroSeconds { - if !p.IsSetMICROS() { - return TimeUnit_MICROS_DEFAULT - } - return p.MICROS -} - -var TimeUnit_NANOS_DEFAULT *NanoSeconds - -func (p *TimeUnit) GetNANOS() *NanoSeconds { - if !p.IsSetNANOS() { - return TimeUnit_NANOS_DEFAULT - } - return p.NANOS -} -func (p *TimeUnit) CountSetFieldsTimeUnit() int { - count := 0 - if p.IsSetMILLIS() { - count++ - } - if p.IsSetMICROS() { - count++ - } - if p.IsSetNANOS() { - count++ - } - return count - -} - -func (p *TimeUnit) IsSetMILLIS() bool { - return p.MILLIS != nil -} - -func (p *TimeUnit) IsSetMICROS() bool { - return p.MICROS != nil -} - -func (p *TimeUnit) IsSetNANOS() bool { - return p.NANOS != nil -} - -func (p *TimeUnit) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *TimeUnit) ReadField1(iprot thrift.TProtocol) error { - p.MILLIS = &MilliSeconds{} - if err := p.MILLIS.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MILLIS), err) - } - return nil -} - -func (p *TimeUnit) ReadField2(iprot thrift.TProtocol) error { - p.MICROS = &MicroSeconds{} - if err := p.MICROS.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MICROS), err) - } - return nil -} - -func (p *TimeUnit) ReadField3(iprot thrift.TProtocol) error { - p.NANOS = &NanoSeconds{} - if err := p.NANOS.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.NANOS), err) - } - return nil -} - -func (p *TimeUnit) Write(oprot thrift.TProtocol) error { - if c := p.CountSetFieldsTimeUnit(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c) - } - if err := oprot.WriteStructBegin("TimeUnit"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *TimeUnit) writeField1(oprot thrift.TProtocol) (err error) { - if p.IsSetMILLIS() { - if err := oprot.WriteFieldBegin("MILLIS", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:MILLIS: ", p), err) - } - if err := p.MILLIS.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MILLIS), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:MILLIS: ", p), err) - } - } - return err -} - -func (p *TimeUnit) writeField2(oprot thrift.TProtocol) (err error) { - if p.IsSetMICROS() { - if err := oprot.WriteFieldBegin("MICROS", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MICROS: ", p), err) - } - if err := p.MICROS.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MICROS), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MICROS: ", p), err) - } - } - return err -} - -func (p *TimeUnit) writeField3(oprot thrift.TProtocol) (err error) { - if p.IsSetNANOS() { - if err := oprot.WriteFieldBegin("NANOS", thrift.STRUCT, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:NANOS: ", p), err) - } - if err := p.NANOS.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.NANOS), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:NANOS: ", p), err) - } - } - return err -} - -func (p *TimeUnit) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("TimeUnit(%+v)", *p) -} - -// Timestamp logical type annotation -// -// Allowed for physical types: INT64 -// -// Attributes: -// - IsAdjustedToUTC -// - Unit -type TimestampType struct { - IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"` - Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"` -} - -func NewTimestampType() *TimestampType { - return &TimestampType{} -} - -func (p *TimestampType) GetIsAdjustedToUTC() bool { - return p.IsAdjustedToUTC -} - -var TimestampType_Unit_DEFAULT *TimeUnit - -func (p *TimestampType) GetUnit() *TimeUnit { - if !p.IsSetUnit() { - return TimestampType_Unit_DEFAULT - } - return p.Unit -} -func (p *TimestampType) IsSetUnit() bool { - return p.Unit != nil -} - -func (p *TimestampType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetIsAdjustedToUTC bool = false - var issetUnit bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetIsAdjustedToUTC = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetUnit = true - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetIsAdjustedToUTC { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set")) - } - if !issetUnit { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set")) - } - return nil -} - -func (p *TimestampType) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - p.IsAdjustedToUTC = v - } - return nil -} - -func (p *TimestampType) ReadField2(iprot thrift.TProtocol) error { - p.Unit = &TimeUnit{} - if err := p.Unit.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err) - } - return nil -} - -func (p *TimestampType) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("TimestampType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *TimestampType) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("isAdjustedToUTC", thrift.BOOL, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) - } - if err := oprot.WriteBool(p.IsAdjustedToUTC); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) - } - return err -} - -func (p *TimestampType) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("unit", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) - } - if err := p.Unit.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) - } - return err -} - -func (p *TimestampType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("TimestampType(%+v)", *p) -} - -// Time logical type annotation -// -// Allowed for physical types: INT32 (millis), INT64 (micros, nanos) -// -// Attributes: -// - IsAdjustedToUTC -// - Unit -type TimeType struct { - IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"` - Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"` -} - -func NewTimeType() *TimeType { - return &TimeType{} -} - -func (p *TimeType) GetIsAdjustedToUTC() bool { - return p.IsAdjustedToUTC -} - -var TimeType_Unit_DEFAULT *TimeUnit - -func (p *TimeType) GetUnit() *TimeUnit { - if !p.IsSetUnit() { - return TimeType_Unit_DEFAULT - } - return p.Unit -} -func (p *TimeType) IsSetUnit() bool { - return p.Unit != nil -} - -func (p *TimeType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetIsAdjustedToUTC bool = false - var issetUnit bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetIsAdjustedToUTC = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetUnit = true - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetIsAdjustedToUTC { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set")) - } - if !issetUnit { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set")) - } - return nil -} - -func (p *TimeType) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - p.IsAdjustedToUTC = v - } - return nil -} - -func (p *TimeType) ReadField2(iprot thrift.TProtocol) error { - p.Unit = &TimeUnit{} - if err := p.Unit.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err) - } - return nil -} - -func (p *TimeType) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("TimeType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *TimeType) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("isAdjustedToUTC", thrift.BOOL, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) - } - if err := oprot.WriteBool(p.IsAdjustedToUTC); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) - } - return err -} - -func (p *TimeType) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("unit", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) - } - if err := p.Unit.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) - } - return err -} - -func (p *TimeType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("TimeType(%+v)", *p) -} - -// Integer logical type annotation -// -// bitWidth must be 8, 16, 32, or 64. -// -// Allowed for physical types: INT32, INT64 -// -// Attributes: -// - BitWidth -// - IsSigned -type IntType struct { - BitWidth int8 `thrift:"bitWidth,1,required" db:"bitWidth" json:"bitWidth"` - IsSigned bool `thrift:"isSigned,2,required" db:"isSigned" json:"isSigned"` -} - -func NewIntType() *IntType { - return &IntType{} -} - -func (p *IntType) GetBitWidth() int8 { - return p.BitWidth -} - -func (p *IntType) GetIsSigned() bool { - return p.IsSigned -} -func (p *IntType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetBitWidth bool = false - var issetIsSigned bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetBitWidth = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetIsSigned = true - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetBitWidth { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BitWidth is not set")) - } - if !issetIsSigned { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsSigned is not set")) - } - return nil -} - -func (p *IntType) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadByte(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - temp := v - p.BitWidth = temp - } - return nil -} - -func (p *IntType) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - p.IsSigned = v - } - return nil -} - -func (p *IntType) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("IntType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *IntType) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("bitWidth", thrift.BYTE, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:bitWidth: ", p), err) - } - if err := oprot.WriteByte(p.BitWidth); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.bitWidth (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:bitWidth: ", p), err) - } - return err -} - -func (p *IntType) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("isSigned", thrift.BOOL, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:isSigned: ", p), err) - } - if err := oprot.WriteBool(p.IsSigned); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.isSigned (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:isSigned: ", p), err) - } - return err -} - -func (p *IntType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("IntType(%+v)", *p) -} - -// Embedded JSON logical type annotation -// -// Allowed for physical types: BINARY -type JsonType struct { -} - -func NewJsonType() *JsonType { - return &JsonType{} -} - -func (p *JsonType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *JsonType) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("JsonType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *JsonType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("JsonType(%+v)", *p) -} - -// Embedded BSON logical type annotation -// -// Allowed for physical types: BINARY -type BsonType struct { -} - -func NewBsonType() *BsonType { - return &BsonType{} -} - -func (p *BsonType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *BsonType) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("BsonType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *BsonType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("BsonType(%+v)", *p) -} - -// LogicalType annotations to replace ConvertedType. -// -// To maintain compatibility, implementations using LogicalType for a -// SchemaElement must also set the corresponding ConvertedType from the -// following table. -// -// Attributes: -// - STRING -// - MAP -// - LIST -// - ENUM -// - DECIMAL -// - DATE -// - TIME -// - TIMESTAMP -// - INTEGER -// - UNKNOWN -// - JSON -// - BSON -// - UUID -type LogicalType struct { - STRING *StringType `thrift:"STRING,1" db:"STRING" json:"STRING,omitempty"` - MAP *MapType `thrift:"MAP,2" db:"MAP" json:"MAP,omitempty"` - LIST *ListType `thrift:"LIST,3" db:"LIST" json:"LIST,omitempty"` - ENUM *EnumType `thrift:"ENUM,4" db:"ENUM" json:"ENUM,omitempty"` - DECIMAL *DecimalType `thrift:"DECIMAL,5" db:"DECIMAL" json:"DECIMAL,omitempty"` - DATE *DateType `thrift:"DATE,6" db:"DATE" json:"DATE,omitempty"` - TIME *TimeType `thrift:"TIME,7" db:"TIME" json:"TIME,omitempty"` - TIMESTAMP *TimestampType `thrift:"TIMESTAMP,8" db:"TIMESTAMP" json:"TIMESTAMP,omitempty"` - // unused field # 9 - INTEGER *IntType `thrift:"INTEGER,10" db:"INTEGER" json:"INTEGER,omitempty"` - UNKNOWN *NullType `thrift:"UNKNOWN,11" db:"UNKNOWN" json:"UNKNOWN,omitempty"` - JSON *JsonType `thrift:"JSON,12" db:"JSON" json:"JSON,omitempty"` - BSON *BsonType `thrift:"BSON,13" db:"BSON" json:"BSON,omitempty"` - UUID *UUIDType `thrift:"UUID,14" db:"UUID" json:"UUID,omitempty"` -} - -func NewLogicalType() *LogicalType { - return &LogicalType{} -} - -var LogicalType_STRING_DEFAULT *StringType - -func (p *LogicalType) GetSTRING() *StringType { - if !p.IsSetSTRING() { - return LogicalType_STRING_DEFAULT - } - return p.STRING -} - -var LogicalType_MAP_DEFAULT *MapType - -func (p *LogicalType) GetMAP() *MapType { - if !p.IsSetMAP() { - return LogicalType_MAP_DEFAULT - } - return p.MAP -} - -var LogicalType_LIST_DEFAULT *ListType - -func (p *LogicalType) GetLIST() *ListType { - if !p.IsSetLIST() { - return LogicalType_LIST_DEFAULT - } - return p.LIST -} - -var LogicalType_ENUM_DEFAULT *EnumType - -func (p *LogicalType) GetENUM() *EnumType { - if !p.IsSetENUM() { - return LogicalType_ENUM_DEFAULT - } - return p.ENUM -} - -var LogicalType_DECIMAL_DEFAULT *DecimalType - -func (p *LogicalType) GetDECIMAL() *DecimalType { - if !p.IsSetDECIMAL() { - return LogicalType_DECIMAL_DEFAULT - } - return p.DECIMAL -} - -var LogicalType_DATE_DEFAULT *DateType - -func (p *LogicalType) GetDATE() *DateType { - if !p.IsSetDATE() { - return LogicalType_DATE_DEFAULT - } - return p.DATE -} - -var LogicalType_TIME_DEFAULT *TimeType - -func (p *LogicalType) GetTIME() *TimeType { - if !p.IsSetTIME() { - return LogicalType_TIME_DEFAULT - } - return p.TIME -} - -var LogicalType_TIMESTAMP_DEFAULT *TimestampType - -func (p *LogicalType) GetTIMESTAMP() *TimestampType { - if !p.IsSetTIMESTAMP() { - return LogicalType_TIMESTAMP_DEFAULT - } - return p.TIMESTAMP -} - -var LogicalType_INTEGER_DEFAULT *IntType - -func (p *LogicalType) GetINTEGER() *IntType { - if !p.IsSetINTEGER() { - return LogicalType_INTEGER_DEFAULT - } - return p.INTEGER -} - -var LogicalType_UNKNOWN_DEFAULT *NullType - -func (p *LogicalType) GetUNKNOWN() *NullType { - if !p.IsSetUNKNOWN() { - return LogicalType_UNKNOWN_DEFAULT - } - return p.UNKNOWN -} - -var LogicalType_JSON_DEFAULT *JsonType - -func (p *LogicalType) GetJSON() *JsonType { - if !p.IsSetJSON() { - return LogicalType_JSON_DEFAULT - } - return p.JSON -} - -var LogicalType_BSON_DEFAULT *BsonType - -func (p *LogicalType) GetBSON() *BsonType { - if !p.IsSetBSON() { - return LogicalType_BSON_DEFAULT - } - return p.BSON -} - -var LogicalType_UUID_DEFAULT *UUIDType - -func (p *LogicalType) GetUUID() *UUIDType { - if !p.IsSetUUID() { - return LogicalType_UUID_DEFAULT - } - return p.UUID -} -func (p *LogicalType) CountSetFieldsLogicalType() int { - count := 0 - if p.IsSetSTRING() { - count++ - } - if p.IsSetMAP() { - count++ - } - if p.IsSetLIST() { - count++ - } - if p.IsSetENUM() { - count++ - } - if p.IsSetDECIMAL() { - count++ - } - if p.IsSetDATE() { - count++ - } - if p.IsSetTIME() { - count++ - } - if p.IsSetTIMESTAMP() { - count++ - } - if p.IsSetINTEGER() { - count++ - } - if p.IsSetUNKNOWN() { - count++ - } - if p.IsSetJSON() { - count++ - } - if p.IsSetBSON() { - count++ - } - if p.IsSetUUID() { - count++ - } - return count - -} - -func (p *LogicalType) IsSetSTRING() bool { - return p.STRING != nil -} - -func (p *LogicalType) IsSetMAP() bool { - return p.MAP != nil -} - -func (p *LogicalType) IsSetLIST() bool { - return p.LIST != nil -} - -func (p *LogicalType) IsSetENUM() bool { - return p.ENUM != nil -} - -func (p *LogicalType) IsSetDECIMAL() bool { - return p.DECIMAL != nil -} - -func (p *LogicalType) IsSetDATE() bool { - return p.DATE != nil -} - -func (p *LogicalType) IsSetTIME() bool { - return p.TIME != nil -} - -func (p *LogicalType) IsSetTIMESTAMP() bool { - return p.TIMESTAMP != nil -} - -func (p *LogicalType) IsSetINTEGER() bool { - return p.INTEGER != nil -} - -func (p *LogicalType) IsSetUNKNOWN() bool { - return p.UNKNOWN != nil -} - -func (p *LogicalType) IsSetJSON() bool { - return p.JSON != nil -} - -func (p *LogicalType) IsSetBSON() bool { - return p.BSON != nil -} - -func (p *LogicalType) IsSetUUID() bool { - return p.UUID != nil -} - -func (p *LogicalType) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - case 4: - if err := p.ReadField4(iprot); err != nil { - return err - } - case 5: - if err := p.ReadField5(iprot); err != nil { - return err - } - case 6: - if err := p.ReadField6(iprot); err != nil { - return err - } - case 7: - if err := p.ReadField7(iprot); err != nil { - return err - } - case 8: - if err := p.ReadField8(iprot); err != nil { - return err - } - case 10: - if err := p.ReadField10(iprot); err != nil { - return err - } - case 11: - if err := p.ReadField11(iprot); err != nil { - return err - } - case 12: - if err := p.ReadField12(iprot); err != nil { - return err - } - case 13: - if err := p.ReadField13(iprot); err != nil { - return err - } - case 14: - if err := p.ReadField14(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *LogicalType) ReadField1(iprot thrift.TProtocol) error { - p.STRING = &StringType{} - if err := p.STRING.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.STRING), err) - } - return nil -} - -func (p *LogicalType) ReadField2(iprot thrift.TProtocol) error { - p.MAP = &MapType{} - if err := p.MAP.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MAP), err) - } - return nil -} - -func (p *LogicalType) ReadField3(iprot thrift.TProtocol) error { - p.LIST = &ListType{} - if err := p.LIST.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LIST), err) - } - return nil -} - -func (p *LogicalType) ReadField4(iprot thrift.TProtocol) error { - p.ENUM = &EnumType{} - if err := p.ENUM.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENUM), err) - } - return nil -} - -func (p *LogicalType) ReadField5(iprot thrift.TProtocol) error { - p.DECIMAL = &DecimalType{} - if err := p.DECIMAL.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DECIMAL), err) - } - return nil -} - -func (p *LogicalType) ReadField6(iprot thrift.TProtocol) error { - p.DATE = &DateType{} - if err := p.DATE.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DATE), err) - } - return nil -} - -func (p *LogicalType) ReadField7(iprot thrift.TProtocol) error { - p.TIME = &TimeType{} - if err := p.TIME.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIME), err) - } - return nil -} - -func (p *LogicalType) ReadField8(iprot thrift.TProtocol) error { - p.TIMESTAMP = &TimestampType{} - if err := p.TIMESTAMP.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIMESTAMP), err) - } - return nil -} - -func (p *LogicalType) ReadField10(iprot thrift.TProtocol) error { - p.INTEGER = &IntType{} - if err := p.INTEGER.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.INTEGER), err) - } - return nil -} - -func (p *LogicalType) ReadField11(iprot thrift.TProtocol) error { - p.UNKNOWN = &NullType{} - if err := p.UNKNOWN.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNKNOWN), err) - } - return nil -} - -func (p *LogicalType) ReadField12(iprot thrift.TProtocol) error { - p.JSON = &JsonType{} - if err := p.JSON.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.JSON), err) - } - return nil -} - -func (p *LogicalType) ReadField13(iprot thrift.TProtocol) error { - p.BSON = &BsonType{} - if err := p.BSON.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BSON), err) - } - return nil -} - -func (p *LogicalType) ReadField14(iprot thrift.TProtocol) error { - p.UUID = &UUIDType{} - if err := p.UUID.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UUID), err) - } - return nil -} - -func (p *LogicalType) Write(oprot thrift.TProtocol) error { - if c := p.CountSetFieldsLogicalType(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c) - } - if err := oprot.WriteStructBegin("LogicalType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - if err := p.writeField4(oprot); err != nil { - return err - } - if err := p.writeField5(oprot); err != nil { - return err - } - if err := p.writeField6(oprot); err != nil { - return err - } - if err := p.writeField7(oprot); err != nil { - return err - } - if err := p.writeField8(oprot); err != nil { - return err - } - if err := p.writeField10(oprot); err != nil { - return err - } - if err := p.writeField11(oprot); err != nil { - return err - } - if err := p.writeField12(oprot); err != nil { - return err - } - if err := p.writeField13(oprot); err != nil { - return err - } - if err := p.writeField14(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *LogicalType) writeField1(oprot thrift.TProtocol) (err error) { - if p.IsSetSTRING() { - if err := oprot.WriteFieldBegin("STRING", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:STRING: ", p), err) - } - if err := p.STRING.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.STRING), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:STRING: ", p), err) - } - } - return err -} - -func (p *LogicalType) writeField2(oprot thrift.TProtocol) (err error) { - if p.IsSetMAP() { - if err := oprot.WriteFieldBegin("MAP", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MAP: ", p), err) - } - if err := p.MAP.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MAP), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MAP: ", p), err) - } - } - return err -} - -func (p *LogicalType) writeField3(oprot thrift.TProtocol) (err error) { - if p.IsSetLIST() { - if err := oprot.WriteFieldBegin("LIST", thrift.STRUCT, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:LIST: ", p), err) - } - if err := p.LIST.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LIST), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:LIST: ", p), err) - } - } - return err -} - -func (p *LogicalType) writeField4(oprot thrift.TProtocol) (err error) { - if p.IsSetENUM() { - if err := oprot.WriteFieldBegin("ENUM", thrift.STRUCT, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:ENUM: ", p), err) - } - if err := p.ENUM.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENUM), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:ENUM: ", p), err) - } - } - return err -} - -func (p *LogicalType) writeField5(oprot thrift.TProtocol) (err error) { - if p.IsSetDECIMAL() { - if err := oprot.WriteFieldBegin("DECIMAL", thrift.STRUCT, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:DECIMAL: ", p), err) - } - if err := p.DECIMAL.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DECIMAL), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:DECIMAL: ", p), err) - } - } - return err -} - -func (p *LogicalType) writeField6(oprot thrift.TProtocol) (err error) { - if p.IsSetDATE() { - if err := oprot.WriteFieldBegin("DATE", thrift.STRUCT, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:DATE: ", p), err) - } - if err := p.DATE.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DATE), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:DATE: ", p), err) - } - } - return err -} - -func (p *LogicalType) writeField7(oprot thrift.TProtocol) (err error) { - if p.IsSetTIME() { - if err := oprot.WriteFieldBegin("TIME", thrift.STRUCT, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:TIME: ", p), err) - } - if err := p.TIME.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIME), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:TIME: ", p), err) - } - } - return err -} - -func (p *LogicalType) writeField8(oprot thrift.TProtocol) (err error) { - if p.IsSetTIMESTAMP() { - if err := oprot.WriteFieldBegin("TIMESTAMP", thrift.STRUCT, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:TIMESTAMP: ", p), err) - } - if err := p.TIMESTAMP.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIMESTAMP), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:TIMESTAMP: ", p), err) - } - } - return err -} - -func (p *LogicalType) writeField10(oprot thrift.TProtocol) (err error) { - if p.IsSetINTEGER() { - if err := oprot.WriteFieldBegin("INTEGER", thrift.STRUCT, 10); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:INTEGER: ", p), err) - } - if err := p.INTEGER.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.INTEGER), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 10:INTEGER: ", p), err) - } - } - return err -} - -func (p *LogicalType) writeField11(oprot thrift.TProtocol) (err error) { - if p.IsSetUNKNOWN() { - if err := oprot.WriteFieldBegin("UNKNOWN", thrift.STRUCT, 11); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:UNKNOWN: ", p), err) - } - if err := p.UNKNOWN.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNKNOWN), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 11:UNKNOWN: ", p), err) - } - } - return err -} - -func (p *LogicalType) writeField12(oprot thrift.TProtocol) (err error) { - if p.IsSetJSON() { - if err := oprot.WriteFieldBegin("JSON", thrift.STRUCT, 12); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:JSON: ", p), err) - } - if err := p.JSON.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.JSON), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 12:JSON: ", p), err) - } - } - return err -} - -func (p *LogicalType) writeField13(oprot thrift.TProtocol) (err error) { - if p.IsSetBSON() { - if err := oprot.WriteFieldBegin("BSON", thrift.STRUCT, 13); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:BSON: ", p), err) - } - if err := p.BSON.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BSON), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 13:BSON: ", p), err) - } - } - return err -} - -func (p *LogicalType) writeField14(oprot thrift.TProtocol) (err error) { - if p.IsSetUUID() { - if err := oprot.WriteFieldBegin("UUID", thrift.STRUCT, 14); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:UUID: ", p), err) - } - if err := p.UUID.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UUID), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 14:UUID: ", p), err) - } - } - return err -} - -func (p *LogicalType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("LogicalType(%+v)", *p) -} - -// Represents a element inside a schema definition. -// - if it is a group (inner node) then type is undefined and num_children is defined -// - if it is a primitive type (leaf) then type is defined and num_children is undefined -// the nodes are listed in depth first traversal order. -// -// Attributes: -// - Type: Data type for this field. Not set if the current element is a non-leaf node -// - TypeLength: If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales. -// Otherwise, if specified, this is the maximum bit length to store any of the values. -// (e.g. a low cardinality INT col could have this set to 3). Note that this is -// in the schema, and therefore fixed for the entire file. -// - RepetitionType: repetition of the field. The root of the schema does not have a repetition_type. -// All other nodes must have one -// - Name: Name of the field in the schema -// - NumChildren: Nested fields. Since thrift does not support nested fields, -// the nesting is flattened to a single list by a depth-first traversal. -// The children count is used to construct the nested relationship. -// This field is not set when the element is a primitive type -// - ConvertedType: When the schema is the result of a conversion from another model -// Used to record the original type to help with cross conversion. -// - Scale: Used when this column contains decimal data. -// See the DECIMAL converted type for more details. -// - Precision -// - FieldID: When the original schema supports field ids, this will save the -// original field id in the parquet schema -// - LogicalType: The logical type of this SchemaElement -// -// LogicalType replaces ConvertedType, but ConvertedType is still required -// for some logical types to ensure forward-compatibility in format v1. -type SchemaElement struct { - Type *Type `thrift:"type,1" db:"type" json:"type,omitempty"` - TypeLength *int32 `thrift:"type_length,2" db:"type_length" json:"type_length,omitempty"` - RepetitionType *FieldRepetitionType `thrift:"repetition_type,3" db:"repetition_type" json:"repetition_type,omitempty"` - Name string `thrift:"name,4,required" db:"name" json:"name"` - NumChildren *int32 `thrift:"num_children,5" db:"num_children" json:"num_children,omitempty"` - ConvertedType *ConvertedType `thrift:"converted_type,6" db:"converted_type" json:"converted_type,omitempty"` - Scale *int32 `thrift:"scale,7" db:"scale" json:"scale,omitempty"` - Precision *int32 `thrift:"precision,8" db:"precision" json:"precision,omitempty"` - FieldID *int32 `thrift:"field_id,9" db:"field_id" json:"field_id,omitempty"` - LogicalType *LogicalType `thrift:"logicalType,10" db:"logicalType" json:"logicalType,omitempty"` -} - -func NewSchemaElement() *SchemaElement { - return &SchemaElement{} -} - -var SchemaElement_Type_DEFAULT Type - -func (p *SchemaElement) GetType() Type { - if !p.IsSetType() { - return SchemaElement_Type_DEFAULT - } - return *p.Type -} - -var SchemaElement_TypeLength_DEFAULT int32 - -func (p *SchemaElement) GetTypeLength() int32 { - if !p.IsSetTypeLength() { - return SchemaElement_TypeLength_DEFAULT - } - return *p.TypeLength -} - -var SchemaElement_RepetitionType_DEFAULT FieldRepetitionType - -func (p *SchemaElement) GetRepetitionType() FieldRepetitionType { - if !p.IsSetRepetitionType() { - return SchemaElement_RepetitionType_DEFAULT - } - return *p.RepetitionType -} - -func (p *SchemaElement) GetName() string { - return p.Name -} - -var SchemaElement_NumChildren_DEFAULT int32 - -func (p *SchemaElement) GetNumChildren() int32 { - if !p.IsSetNumChildren() { - return SchemaElement_NumChildren_DEFAULT - } - return *p.NumChildren -} - -var SchemaElement_ConvertedType_DEFAULT ConvertedType - -func (p *SchemaElement) GetConvertedType() ConvertedType { - if !p.IsSetConvertedType() { - return SchemaElement_ConvertedType_DEFAULT - } - return *p.ConvertedType -} - -var SchemaElement_Scale_DEFAULT int32 - -func (p *SchemaElement) GetScale() int32 { - if !p.IsSetScale() { - return SchemaElement_Scale_DEFAULT - } - return *p.Scale -} - -var SchemaElement_Precision_DEFAULT int32 - -func (p *SchemaElement) GetPrecision() int32 { - if !p.IsSetPrecision() { - return SchemaElement_Precision_DEFAULT - } - return *p.Precision -} - -var SchemaElement_FieldID_DEFAULT int32 - -func (p *SchemaElement) GetFieldID() int32 { - if !p.IsSetFieldID() { - return SchemaElement_FieldID_DEFAULT - } - return *p.FieldID -} - -var SchemaElement_LogicalType_DEFAULT *LogicalType - -func (p *SchemaElement) GetLogicalType() *LogicalType { - if !p.IsSetLogicalType() { - return SchemaElement_LogicalType_DEFAULT - } - return p.LogicalType -} -func (p *SchemaElement) IsSetType() bool { - return p.Type != nil -} - -func (p *SchemaElement) IsSetTypeLength() bool { - return p.TypeLength != nil -} - -func (p *SchemaElement) IsSetRepetitionType() bool { - return p.RepetitionType != nil -} - -func (p *SchemaElement) IsSetNumChildren() bool { - return p.NumChildren != nil -} - -func (p *SchemaElement) IsSetConvertedType() bool { - return p.ConvertedType != nil -} - -func (p *SchemaElement) IsSetScale() bool { - return p.Scale != nil -} - -func (p *SchemaElement) IsSetPrecision() bool { - return p.Precision != nil -} - -func (p *SchemaElement) IsSetFieldID() bool { - return p.FieldID != nil -} - -func (p *SchemaElement) IsSetLogicalType() bool { - return p.LogicalType != nil -} - -func (p *SchemaElement) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetName bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - case 4: - if err := p.ReadField4(iprot); err != nil { - return err - } - issetName = true - case 5: - if err := p.ReadField5(iprot); err != nil { - return err - } - case 6: - if err := p.ReadField6(iprot); err != nil { - return err - } - case 7: - if err := p.ReadField7(iprot); err != nil { - return err - } - case 8: - if err := p.ReadField8(iprot); err != nil { - return err - } - case 9: - if err := p.ReadField9(iprot); err != nil { - return err - } - case 10: - if err := p.ReadField10(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetName { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Name is not set")) - } - return nil -} - -func (p *SchemaElement) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - temp := Type(v) - p.Type = &temp - } - return nil -} - -func (p *SchemaElement) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - p.TypeLength = &v - } - return nil -} - -func (p *SchemaElement) ReadField3(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 3: ", err) - } else { - temp := FieldRepetitionType(v) - p.RepetitionType = &temp - } - return nil -} - -func (p *SchemaElement) ReadField4(iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(); err != nil { - return thrift.PrependError("error reading field 4: ", err) - } else { - p.Name = v - } - return nil -} - -func (p *SchemaElement) ReadField5(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 5: ", err) - } else { - p.NumChildren = &v - } - return nil -} - -func (p *SchemaElement) ReadField6(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 6: ", err) - } else { - temp := ConvertedType(v) - p.ConvertedType = &temp - } - return nil -} - -func (p *SchemaElement) ReadField7(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 7: ", err) - } else { - p.Scale = &v - } - return nil -} - -func (p *SchemaElement) ReadField8(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 8: ", err) - } else { - p.Precision = &v - } - return nil -} - -func (p *SchemaElement) ReadField9(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 9: ", err) - } else { - p.FieldID = &v - } - return nil -} - -func (p *SchemaElement) ReadField10(iprot thrift.TProtocol) error { - p.LogicalType = &LogicalType{} - if err := p.LogicalType.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LogicalType), err) - } - return nil -} - -func (p *SchemaElement) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("SchemaElement"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - if err := p.writeField4(oprot); err != nil { - return err - } - if err := p.writeField5(oprot); err != nil { - return err - } - if err := p.writeField6(oprot); err != nil { - return err - } - if err := p.writeField7(oprot); err != nil { - return err - } - if err := p.writeField8(oprot); err != nil { - return err - } - if err := p.writeField9(oprot); err != nil { - return err - } - if err := p.writeField10(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *SchemaElement) writeField1(oprot thrift.TProtocol) (err error) { - if p.IsSetType() { - if err := oprot.WriteFieldBegin("type", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) - } - if err := oprot.WriteI32(int32(*p.Type)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) - } - } - return err -} - -func (p *SchemaElement) writeField2(oprot thrift.TProtocol) (err error) { - if p.IsSetTypeLength() { - if err := oprot.WriteFieldBegin("type_length", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:type_length: ", p), err) - } - if err := oprot.WriteI32(*p.TypeLength); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.type_length (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:type_length: ", p), err) - } - } - return err -} - -func (p *SchemaElement) writeField3(oprot thrift.TProtocol) (err error) { - if p.IsSetRepetitionType() { - if err := oprot.WriteFieldBegin("repetition_type", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:repetition_type: ", p), err) - } - if err := oprot.WriteI32(int32(*p.RepetitionType)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.repetition_type (3) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:repetition_type: ", p), err) - } - } - return err -} - -func (p *SchemaElement) writeField4(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("name", thrift.STRING, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:name: ", p), err) - } - if err := oprot.WriteString(string(p.Name)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.name (4) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:name: ", p), err) - } - return err -} - -func (p *SchemaElement) writeField5(oprot thrift.TProtocol) (err error) { - if p.IsSetNumChildren() { - if err := oprot.WriteFieldBegin("num_children", thrift.I32, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_children: ", p), err) - } - if err := oprot.WriteI32(*p.NumChildren); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_children (5) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_children: ", p), err) - } - } - return err -} - -func (p *SchemaElement) writeField6(oprot thrift.TProtocol) (err error) { - if p.IsSetConvertedType() { - if err := oprot.WriteFieldBegin("converted_type", thrift.I32, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:converted_type: ", p), err) - } - if err := oprot.WriteI32(int32(*p.ConvertedType)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.converted_type (6) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:converted_type: ", p), err) - } - } - return err -} - -func (p *SchemaElement) writeField7(oprot thrift.TProtocol) (err error) { - if p.IsSetScale() { - if err := oprot.WriteFieldBegin("scale", thrift.I32, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:scale: ", p), err) - } - if err := oprot.WriteI32(*p.Scale); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.scale (7) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:scale: ", p), err) - } - } - return err -} - -func (p *SchemaElement) writeField8(oprot thrift.TProtocol) (err error) { - if p.IsSetPrecision() { - if err := oprot.WriteFieldBegin("precision", thrift.I32, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:precision: ", p), err) - } - if err := oprot.WriteI32(*p.Precision); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.precision (8) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:precision: ", p), err) - } - } - return err -} - -func (p *SchemaElement) writeField9(oprot thrift.TProtocol) (err error) { - if p.IsSetFieldID() { - if err := oprot.WriteFieldBegin("field_id", thrift.I32, 9); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:field_id: ", p), err) - } - if err := oprot.WriteI32(*p.FieldID); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.field_id (9) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 9:field_id: ", p), err) - } - } - return err -} - -func (p *SchemaElement) writeField10(oprot thrift.TProtocol) (err error) { - if p.IsSetLogicalType() { - if err := oprot.WriteFieldBegin("logicalType", thrift.STRUCT, 10); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:logicalType: ", p), err) - } - if err := p.LogicalType.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LogicalType), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 10:logicalType: ", p), err) - } - } - return err -} - -func (p *SchemaElement) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("SchemaElement(%+v)", *p) -} - -// Data page header -// -// Attributes: -// - NumValues: Number of values, including NULLs, in this data page. * -// - Encoding: Encoding used for this data page * -// - DefinitionLevelEncoding: Encoding used for definition levels * -// - RepetitionLevelEncoding: Encoding used for repetition levels * -// - Statistics: Optional statistics for the data in this page* -type DataPageHeader struct { - NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` - Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` - DefinitionLevelEncoding Encoding `thrift:"definition_level_encoding,3,required" db:"definition_level_encoding" json:"definition_level_encoding"` - RepetitionLevelEncoding Encoding `thrift:"repetition_level_encoding,4,required" db:"repetition_level_encoding" json:"repetition_level_encoding"` - Statistics *Statistics `thrift:"statistics,5" db:"statistics" json:"statistics,omitempty"` -} - -func NewDataPageHeader() *DataPageHeader { - return &DataPageHeader{} -} - -func (p *DataPageHeader) GetNumValues() int32 { - return p.NumValues -} - -func (p *DataPageHeader) GetEncoding() Encoding { - return p.Encoding -} - -func (p *DataPageHeader) GetDefinitionLevelEncoding() Encoding { - return p.DefinitionLevelEncoding -} - -func (p *DataPageHeader) GetRepetitionLevelEncoding() Encoding { - return p.RepetitionLevelEncoding -} - -var DataPageHeader_Statistics_DEFAULT *Statistics - -func (p *DataPageHeader) GetStatistics() *Statistics { - if !p.IsSetStatistics() { - return DataPageHeader_Statistics_DEFAULT - } - return p.Statistics -} -func (p *DataPageHeader) IsSetStatistics() bool { - return p.Statistics != nil -} - -func (p *DataPageHeader) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNumValues bool = false - var issetEncoding bool = false - var issetDefinitionLevelEncoding bool = false - var issetRepetitionLevelEncoding bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetNumValues = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetEncoding = true - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - issetDefinitionLevelEncoding = true - case 4: - if err := p.ReadField4(iprot); err != nil { - return err - } - issetRepetitionLevelEncoding = true - case 5: - if err := p.ReadField5(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNumValues { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) - } - if !issetEncoding { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) - } - if !issetDefinitionLevelEncoding { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelEncoding is not set")) - } - if !issetRepetitionLevelEncoding { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelEncoding is not set")) - } - return nil -} - -func (p *DataPageHeader) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - p.NumValues = v - } - return nil -} - -func (p *DataPageHeader) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - temp := Encoding(v) - p.Encoding = temp - } - return nil -} - -func (p *DataPageHeader) ReadField3(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 3: ", err) - } else { - temp := Encoding(v) - p.DefinitionLevelEncoding = temp - } - return nil -} - -func (p *DataPageHeader) ReadField4(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 4: ", err) - } else { - temp := Encoding(v) - p.RepetitionLevelEncoding = temp - } - return nil -} - -func (p *DataPageHeader) ReadField5(iprot thrift.TProtocol) error { - p.Statistics = &Statistics{} - if err := p.Statistics.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) - } - return nil -} - -func (p *DataPageHeader) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("DataPageHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - if err := p.writeField4(oprot); err != nil { - return err - } - if err := p.writeField5(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *DataPageHeader) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("num_values", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) - } - if err := oprot.WriteI32(p.NumValues); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) - } - return err -} - -func (p *DataPageHeader) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("encoding", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) - } - if err := oprot.WriteI32(int32(p.Encoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) - } - return err -} - -func (p *DataPageHeader) writeField3(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("definition_level_encoding", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:definition_level_encoding: ", p), err) - } - if err := oprot.WriteI32(int32(p.DefinitionLevelEncoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.definition_level_encoding (3) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:definition_level_encoding: ", p), err) - } - return err -} - -func (p *DataPageHeader) writeField4(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("repetition_level_encoding", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:repetition_level_encoding: ", p), err) - } - if err := oprot.WriteI32(int32(p.RepetitionLevelEncoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.repetition_level_encoding (4) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:repetition_level_encoding: ", p), err) - } - return err -} - -func (p *DataPageHeader) writeField5(oprot thrift.TProtocol) (err error) { - if p.IsSetStatistics() { - if err := oprot.WriteFieldBegin("statistics", thrift.STRUCT, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:statistics: ", p), err) - } - if err := p.Statistics.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:statistics: ", p), err) - } - } - return err -} - -func (p *DataPageHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DataPageHeader(%+v)", *p) -} - -type IndexPageHeader struct { -} - -func NewIndexPageHeader() *IndexPageHeader { - return &IndexPageHeader{} -} - -func (p *IndexPageHeader) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *IndexPageHeader) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("IndexPageHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *IndexPageHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("IndexPageHeader(%+v)", *p) -} - -// TODO: * -// -// Attributes: -// - NumValues: Number of values in the dictionary * -// - Encoding: Encoding using this dictionary page * -// - IsSorted: If true, the entries in the dictionary are sorted in ascending order * -type DictionaryPageHeader struct { - NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` - Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` - IsSorted *bool `thrift:"is_sorted,3" db:"is_sorted" json:"is_sorted,omitempty"` -} - -func NewDictionaryPageHeader() *DictionaryPageHeader { - return &DictionaryPageHeader{} -} - -func (p *DictionaryPageHeader) GetNumValues() int32 { - return p.NumValues -} - -func (p *DictionaryPageHeader) GetEncoding() Encoding { - return p.Encoding -} - -var DictionaryPageHeader_IsSorted_DEFAULT bool - -func (p *DictionaryPageHeader) GetIsSorted() bool { - if !p.IsSetIsSorted() { - return DictionaryPageHeader_IsSorted_DEFAULT - } - return *p.IsSorted -} -func (p *DictionaryPageHeader) IsSetIsSorted() bool { - return p.IsSorted != nil -} - -func (p *DictionaryPageHeader) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNumValues bool = false - var issetEncoding bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetNumValues = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetEncoding = true - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNumValues { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) - } - if !issetEncoding { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) - } - return nil -} - -func (p *DictionaryPageHeader) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - p.NumValues = v - } - return nil -} - -func (p *DictionaryPageHeader) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - temp := Encoding(v) - p.Encoding = temp - } - return nil -} - -func (p *DictionaryPageHeader) ReadField3(iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(); err != nil { - return thrift.PrependError("error reading field 3: ", err) - } else { - p.IsSorted = &v - } - return nil -} - -func (p *DictionaryPageHeader) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("DictionaryPageHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *DictionaryPageHeader) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("num_values", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) - } - if err := oprot.WriteI32(p.NumValues); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) - } - return err -} - -func (p *DictionaryPageHeader) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("encoding", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) - } - if err := oprot.WriteI32(int32(p.Encoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) - } - return err -} - -func (p *DictionaryPageHeader) writeField3(oprot thrift.TProtocol) (err error) { - if p.IsSetIsSorted() { - if err := oprot.WriteFieldBegin("is_sorted", thrift.BOOL, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:is_sorted: ", p), err) - } - if err := oprot.WriteBool(*p.IsSorted); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.is_sorted (3) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:is_sorted: ", p), err) - } - } - return err -} - -func (p *DictionaryPageHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DictionaryPageHeader(%+v)", *p) -} - -// New page format allowing reading levels without decompressing the data -// Repetition and definition levels are uncompressed -// The remaining section containing the data is compressed if is_compressed is true -// -// -// Attributes: -// - NumValues: Number of values, including NULLs, in this data page. * -// - NumNulls: Number of NULL values, in this data page. -// Number of non-null = num_values - num_nulls which is also the number of values in the data section * -// - NumRows: Number of rows in this data page. which means pages change on record boundaries (r = 0) * -// - Encoding: Encoding used for data in this page * -// - DefinitionLevelsByteLength: length of the definition levels -// - RepetitionLevelsByteLength: length of the repetition levels -// - IsCompressed: whether the values are compressed. -// Which means the section of the page between -// definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) -// is compressed with the compression_codec. -// If missing it is considered compressed -// - Statistics: optional statistics for this column chunk -type DataPageHeaderV2 struct { - NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` - NumNulls int32 `thrift:"num_nulls,2,required" db:"num_nulls" json:"num_nulls"` - NumRows int32 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` - Encoding Encoding `thrift:"encoding,4,required" db:"encoding" json:"encoding"` - DefinitionLevelsByteLength int32 `thrift:"definition_levels_byte_length,5,required" db:"definition_levels_byte_length" json:"definition_levels_byte_length"` - RepetitionLevelsByteLength int32 `thrift:"repetition_levels_byte_length,6,required" db:"repetition_levels_byte_length" json:"repetition_levels_byte_length"` - IsCompressed bool `thrift:"is_compressed,7" db:"is_compressed" json:"is_compressed,omitempty"` - Statistics *Statistics `thrift:"statistics,8" db:"statistics" json:"statistics,omitempty"` -} - -func NewDataPageHeaderV2() *DataPageHeaderV2 { - return &DataPageHeaderV2{ - IsCompressed: true, - } -} - -func (p *DataPageHeaderV2) GetNumValues() int32 { - return p.NumValues -} - -func (p *DataPageHeaderV2) GetNumNulls() int32 { - return p.NumNulls -} - -func (p *DataPageHeaderV2) GetNumRows() int32 { - return p.NumRows -} - -func (p *DataPageHeaderV2) GetEncoding() Encoding { - return p.Encoding -} - -func (p *DataPageHeaderV2) GetDefinitionLevelsByteLength() int32 { - return p.DefinitionLevelsByteLength -} - -func (p *DataPageHeaderV2) GetRepetitionLevelsByteLength() int32 { - return p.RepetitionLevelsByteLength -} - -var DataPageHeaderV2_IsCompressed_DEFAULT bool = true - -func (p *DataPageHeaderV2) GetIsCompressed() bool { - return p.IsCompressed -} - -var DataPageHeaderV2_Statistics_DEFAULT *Statistics - -func (p *DataPageHeaderV2) GetStatistics() *Statistics { - if !p.IsSetStatistics() { - return DataPageHeaderV2_Statistics_DEFAULT - } - return p.Statistics -} -func (p *DataPageHeaderV2) IsSetIsCompressed() bool { - return p.IsCompressed != DataPageHeaderV2_IsCompressed_DEFAULT -} - -func (p *DataPageHeaderV2) IsSetStatistics() bool { - return p.Statistics != nil -} - -func (p *DataPageHeaderV2) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNumValues bool = false - var issetNumNulls bool = false - var issetNumRows bool = false - var issetEncoding bool = false - var issetDefinitionLevelsByteLength bool = false - var issetRepetitionLevelsByteLength bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetNumValues = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetNumNulls = true - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - issetNumRows = true - case 4: - if err := p.ReadField4(iprot); err != nil { - return err - } - issetEncoding = true - case 5: - if err := p.ReadField5(iprot); err != nil { - return err - } - issetDefinitionLevelsByteLength = true - case 6: - if err := p.ReadField6(iprot); err != nil { - return err - } - issetRepetitionLevelsByteLength = true - case 7: - if err := p.ReadField7(iprot); err != nil { - return err - } - case 8: - if err := p.ReadField8(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNumValues { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) - } - if !issetNumNulls { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumNulls is not set")) - } - if !issetNumRows { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")) - } - if !issetEncoding { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) - } - if !issetDefinitionLevelsByteLength { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelsByteLength is not set")) - } - if !issetRepetitionLevelsByteLength { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelsByteLength is not set")) - } - return nil -} - -func (p *DataPageHeaderV2) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - p.NumValues = v - } - return nil -} - -func (p *DataPageHeaderV2) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - p.NumNulls = v - } - return nil -} - -func (p *DataPageHeaderV2) ReadField3(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 3: ", err) - } else { - p.NumRows = v - } - return nil -} - -func (p *DataPageHeaderV2) ReadField4(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 4: ", err) - } else { - temp := Encoding(v) - p.Encoding = temp - } - return nil -} - -func (p *DataPageHeaderV2) ReadField5(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 5: ", err) - } else { - p.DefinitionLevelsByteLength = v - } - return nil -} - -func (p *DataPageHeaderV2) ReadField6(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 6: ", err) - } else { - p.RepetitionLevelsByteLength = v - } - return nil -} - -func (p *DataPageHeaderV2) ReadField7(iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(); err != nil { - return thrift.PrependError("error reading field 7: ", err) - } else { - p.IsCompressed = v - } - return nil -} - -func (p *DataPageHeaderV2) ReadField8(iprot thrift.TProtocol) error { - p.Statistics = &Statistics{} - if err := p.Statistics.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) - } - return nil -} - -func (p *DataPageHeaderV2) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("DataPageHeaderV2"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - if err := p.writeField4(oprot); err != nil { - return err - } - if err := p.writeField5(oprot); err != nil { - return err - } - if err := p.writeField6(oprot); err != nil { - return err - } - if err := p.writeField7(oprot); err != nil { - return err - } - if err := p.writeField8(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *DataPageHeaderV2) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("num_values", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) - } - if err := oprot.WriteI32(p.NumValues); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) - } - return err -} - -func (p *DataPageHeaderV2) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("num_nulls", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:num_nulls: ", p), err) - } - if err := oprot.WriteI32(p.NumNulls); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_nulls (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:num_nulls: ", p), err) - } - return err -} - -func (p *DataPageHeaderV2) writeField3(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("num_rows", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) - } - if err := oprot.WriteI32(p.NumRows); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) - } - return err -} - -func (p *DataPageHeaderV2) writeField4(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("encoding", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:encoding: ", p), err) - } - if err := oprot.WriteI32(int32(p.Encoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encoding (4) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:encoding: ", p), err) - } - return err -} - -func (p *DataPageHeaderV2) writeField5(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("definition_levels_byte_length", thrift.I32, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:definition_levels_byte_length: ", p), err) - } - if err := oprot.WriteI32(p.DefinitionLevelsByteLength); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.definition_levels_byte_length (5) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:definition_levels_byte_length: ", p), err) - } - return err -} - -func (p *DataPageHeaderV2) writeField6(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("repetition_levels_byte_length", thrift.I32, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:repetition_levels_byte_length: ", p), err) - } - if err := oprot.WriteI32(p.RepetitionLevelsByteLength); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.repetition_levels_byte_length (6) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:repetition_levels_byte_length: ", p), err) - } - return err -} - -func (p *DataPageHeaderV2) writeField7(oprot thrift.TProtocol) (err error) { - if p.IsSetIsCompressed() { - if err := oprot.WriteFieldBegin("is_compressed", thrift.BOOL, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:is_compressed: ", p), err) - } - if err := oprot.WriteBool(p.IsCompressed); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.is_compressed (7) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:is_compressed: ", p), err) - } - } - return err -} - -func (p *DataPageHeaderV2) writeField8(oprot thrift.TProtocol) (err error) { - if p.IsSetStatistics() { - if err := oprot.WriteFieldBegin("statistics", thrift.STRUCT, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:statistics: ", p), err) - } - if err := p.Statistics.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:statistics: ", p), err) - } - } - return err -} - -func (p *DataPageHeaderV2) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DataPageHeaderV2(%+v)", *p) -} - -// Attributes: -// - Type: the type of the page: indicates which of the *_header fields is set * -// - UncompressedPageSize: Uncompressed page size in bytes (not including this header) * -// - CompressedPageSize: Compressed page size in bytes (not including this header) * -// - Crc: 32bit crc for the data below. This allows for disabling checksumming in HDFS -// if only a few pages needs to be read -// -// - DataPageHeader -// - IndexPageHeader -// - DictionaryPageHeader -// - DataPageHeaderV2 -type PageHeader struct { - Type PageType `thrift:"type,1,required" db:"type" json:"type"` - UncompressedPageSize int32 `thrift:"uncompressed_page_size,2,required" db:"uncompressed_page_size" json:"uncompressed_page_size"` - CompressedPageSize int32 `thrift:"compressed_page_size,3,required" db:"compressed_page_size" json:"compressed_page_size"` - Crc *int32 `thrift:"crc,4" db:"crc" json:"crc,omitempty"` - DataPageHeader *DataPageHeader `thrift:"data_page_header,5" db:"data_page_header" json:"data_page_header,omitempty"` - IndexPageHeader *IndexPageHeader `thrift:"index_page_header,6" db:"index_page_header" json:"index_page_header,omitempty"` - DictionaryPageHeader *DictionaryPageHeader `thrift:"dictionary_page_header,7" db:"dictionary_page_header" json:"dictionary_page_header,omitempty"` - DataPageHeaderV2 *DataPageHeaderV2 `thrift:"data_page_header_v2,8" db:"data_page_header_v2" json:"data_page_header_v2,omitempty"` -} - -func NewPageHeader() *PageHeader { - return &PageHeader{} -} - -func (p *PageHeader) GetType() PageType { - return p.Type -} - -func (p *PageHeader) GetUncompressedPageSize() int32 { - return p.UncompressedPageSize -} - -func (p *PageHeader) GetCompressedPageSize() int32 { - return p.CompressedPageSize -} - -var PageHeader_Crc_DEFAULT int32 - -func (p *PageHeader) GetCrc() int32 { - if !p.IsSetCrc() { - return PageHeader_Crc_DEFAULT - } - return *p.Crc -} - -var PageHeader_DataPageHeader_DEFAULT *DataPageHeader - -func (p *PageHeader) GetDataPageHeader() *DataPageHeader { - if !p.IsSetDataPageHeader() { - return PageHeader_DataPageHeader_DEFAULT - } - return p.DataPageHeader -} - -var PageHeader_IndexPageHeader_DEFAULT *IndexPageHeader - -func (p *PageHeader) GetIndexPageHeader() *IndexPageHeader { - if !p.IsSetIndexPageHeader() { - return PageHeader_IndexPageHeader_DEFAULT - } - return p.IndexPageHeader -} - -var PageHeader_DictionaryPageHeader_DEFAULT *DictionaryPageHeader - -func (p *PageHeader) GetDictionaryPageHeader() *DictionaryPageHeader { - if !p.IsSetDictionaryPageHeader() { - return PageHeader_DictionaryPageHeader_DEFAULT - } - return p.DictionaryPageHeader -} - -var PageHeader_DataPageHeaderV2_DEFAULT *DataPageHeaderV2 - -func (p *PageHeader) GetDataPageHeaderV2() *DataPageHeaderV2 { - if !p.IsSetDataPageHeaderV2() { - return PageHeader_DataPageHeaderV2_DEFAULT - } - return p.DataPageHeaderV2 -} -func (p *PageHeader) IsSetCrc() bool { - return p.Crc != nil -} - -func (p *PageHeader) IsSetDataPageHeader() bool { - return p.DataPageHeader != nil -} - -func (p *PageHeader) IsSetIndexPageHeader() bool { - return p.IndexPageHeader != nil -} - -func (p *PageHeader) IsSetDictionaryPageHeader() bool { - return p.DictionaryPageHeader != nil -} - -func (p *PageHeader) IsSetDataPageHeaderV2() bool { - return p.DataPageHeaderV2 != nil -} - -func (p *PageHeader) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetType bool = false - var issetUncompressedPageSize bool = false - var issetCompressedPageSize bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetType = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetUncompressedPageSize = true - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - issetCompressedPageSize = true - case 4: - if err := p.ReadField4(iprot); err != nil { - return err - } - case 5: - if err := p.ReadField5(iprot); err != nil { - return err - } - case 6: - if err := p.ReadField6(iprot); err != nil { - return err - } - case 7: - if err := p.ReadField7(iprot); err != nil { - return err - } - case 8: - if err := p.ReadField8(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetType { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set")) - } - if !issetUncompressedPageSize { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field UncompressedPageSize is not set")) - } - if !issetCompressedPageSize { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set")) - } - return nil -} - -func (p *PageHeader) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - temp := PageType(v) - p.Type = temp - } - return nil -} - -func (p *PageHeader) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - p.UncompressedPageSize = v - } - return nil -} - -func (p *PageHeader) ReadField3(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 3: ", err) - } else { - p.CompressedPageSize = v - } - return nil -} - -func (p *PageHeader) ReadField4(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 4: ", err) - } else { - p.Crc = &v - } - return nil -} - -func (p *PageHeader) ReadField5(iprot thrift.TProtocol) error { - p.DataPageHeader = &DataPageHeader{} - if err := p.DataPageHeader.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeader), err) - } - return nil -} - -func (p *PageHeader) ReadField6(iprot thrift.TProtocol) error { - p.IndexPageHeader = &IndexPageHeader{} - if err := p.IndexPageHeader.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.IndexPageHeader), err) - } - return nil -} - -func (p *PageHeader) ReadField7(iprot thrift.TProtocol) error { - p.DictionaryPageHeader = &DictionaryPageHeader{} - if err := p.DictionaryPageHeader.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DictionaryPageHeader), err) - } - return nil -} - -func (p *PageHeader) ReadField8(iprot thrift.TProtocol) error { - p.DataPageHeaderV2 = &DataPageHeaderV2{ - IsCompressed: true, - } - if err := p.DataPageHeaderV2.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeaderV2), err) - } - return nil -} - -func (p *PageHeader) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("PageHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - if err := p.writeField4(oprot); err != nil { - return err - } - if err := p.writeField5(oprot); err != nil { - return err - } - if err := p.writeField6(oprot); err != nil { - return err - } - if err := p.writeField7(oprot); err != nil { - return err - } - if err := p.writeField8(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *PageHeader) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("type", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) - } - if err := oprot.WriteI32(int32(p.Type)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) - } - return err -} - -func (p *PageHeader) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("uncompressed_page_size", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:uncompressed_page_size: ", p), err) - } - if err := oprot.WriteI32(p.UncompressedPageSize); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.uncompressed_page_size (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:uncompressed_page_size: ", p), err) - } - return err -} - -func (p *PageHeader) writeField3(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("compressed_page_size", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:compressed_page_size: ", p), err) - } - if err := oprot.WriteI32(p.CompressedPageSize); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (3) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:compressed_page_size: ", p), err) - } - return err -} - -func (p *PageHeader) writeField4(oprot thrift.TProtocol) (err error) { - if p.IsSetCrc() { - if err := oprot.WriteFieldBegin("crc", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:crc: ", p), err) - } - if err := oprot.WriteI32(*p.Crc); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.crc (4) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:crc: ", p), err) - } - } - return err -} - -func (p *PageHeader) writeField5(oprot thrift.TProtocol) (err error) { - if p.IsSetDataPageHeader() { - if err := oprot.WriteFieldBegin("data_page_header", thrift.STRUCT, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:data_page_header: ", p), err) - } - if err := p.DataPageHeader.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeader), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:data_page_header: ", p), err) - } - } - return err -} - -func (p *PageHeader) writeField6(oprot thrift.TProtocol) (err error) { - if p.IsSetIndexPageHeader() { - if err := oprot.WriteFieldBegin("index_page_header", thrift.STRUCT, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:index_page_header: ", p), err) - } - if err := p.IndexPageHeader.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.IndexPageHeader), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:index_page_header: ", p), err) - } - } - return err -} - -func (p *PageHeader) writeField7(oprot thrift.TProtocol) (err error) { - if p.IsSetDictionaryPageHeader() { - if err := oprot.WriteFieldBegin("dictionary_page_header", thrift.STRUCT, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:dictionary_page_header: ", p), err) - } - if err := p.DictionaryPageHeader.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DictionaryPageHeader), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:dictionary_page_header: ", p), err) - } - } - return err -} - -func (p *PageHeader) writeField8(oprot thrift.TProtocol) (err error) { - if p.IsSetDataPageHeaderV2() { - if err := oprot.WriteFieldBegin("data_page_header_v2", thrift.STRUCT, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:data_page_header_v2: ", p), err) - } - if err := p.DataPageHeaderV2.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeaderV2), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:data_page_header_v2: ", p), err) - } - } - return err -} - -func (p *PageHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("PageHeader(%+v)", *p) -} - -// Wrapper struct to store key values -// -// Attributes: -// - Key -// - Value -type KeyValue struct { - Key string `thrift:"key,1,required" db:"key" json:"key"` - Value *string `thrift:"value,2" db:"value" json:"value,omitempty"` -} - -func NewKeyValue() *KeyValue { - return &KeyValue{} -} - -func (p *KeyValue) GetKey() string { - return p.Key -} - -var KeyValue_Value_DEFAULT string - -func (p *KeyValue) GetValue() string { - if !p.IsSetValue() { - return KeyValue_Value_DEFAULT - } - return *p.Value -} -func (p *KeyValue) IsSetValue() bool { - return p.Value != nil -} - -func (p *KeyValue) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetKey bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetKey = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetKey { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Key is not set")) - } - return nil -} - -func (p *KeyValue) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - p.Key = v - } - return nil -} - -func (p *KeyValue) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - p.Value = &v - } - return nil -} - -func (p *KeyValue) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("KeyValue"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *KeyValue) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("key", thrift.STRING, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:key: ", p), err) - } - if err := oprot.WriteString(string(p.Key)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.key (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:key: ", p), err) - } - return err -} - -func (p *KeyValue) writeField2(oprot thrift.TProtocol) (err error) { - if p.IsSetValue() { - if err := oprot.WriteFieldBegin("value", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:value: ", p), err) - } - if err := oprot.WriteString(string(*p.Value)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.value (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:value: ", p), err) - } - } - return err -} - -func (p *KeyValue) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("KeyValue(%+v)", *p) -} - -// Wrapper struct to specify sort order -// -// Attributes: -// - ColumnIdx: The column index (in this row group) * -// - Descending: If true, indicates this column is sorted in descending order. * -// - NullsFirst: If true, nulls will come before non-null values, otherwise, -// nulls go at the end. -type SortingColumn struct { - ColumnIdx int32 `thrift:"column_idx,1,required" db:"column_idx" json:"column_idx"` - Descending bool `thrift:"descending,2,required" db:"descending" json:"descending"` - NullsFirst bool `thrift:"nulls_first,3,required" db:"nulls_first" json:"nulls_first"` -} - -func NewSortingColumn() *SortingColumn { - return &SortingColumn{} -} - -func (p *SortingColumn) GetColumnIdx() int32 { - return p.ColumnIdx -} - -func (p *SortingColumn) GetDescending() bool { - return p.Descending -} - -func (p *SortingColumn) GetNullsFirst() bool { - return p.NullsFirst -} -func (p *SortingColumn) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetColumnIdx bool = false - var issetDescending bool = false - var issetNullsFirst bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetColumnIdx = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetDescending = true - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - issetNullsFirst = true - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetColumnIdx { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field ColumnIdx is not set")) - } - if !issetDescending { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Descending is not set")) - } - if !issetNullsFirst { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullsFirst is not set")) - } - return nil -} - -func (p *SortingColumn) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - p.ColumnIdx = v - } - return nil -} - -func (p *SortingColumn) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - p.Descending = v - } - return nil -} - -func (p *SortingColumn) ReadField3(iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(); err != nil { - return thrift.PrependError("error reading field 3: ", err) - } else { - p.NullsFirst = v - } - return nil -} - -func (p *SortingColumn) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("SortingColumn"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *SortingColumn) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("column_idx", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:column_idx: ", p), err) - } - if err := oprot.WriteI32(p.ColumnIdx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.column_idx (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:column_idx: ", p), err) - } - return err -} - -func (p *SortingColumn) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("descending", thrift.BOOL, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:descending: ", p), err) - } - if err := oprot.WriteBool(p.Descending); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.descending (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:descending: ", p), err) - } - return err -} - -func (p *SortingColumn) writeField3(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("nulls_first", thrift.BOOL, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:nulls_first: ", p), err) - } - if err := oprot.WriteBool(p.NullsFirst); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.nulls_first (3) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:nulls_first: ", p), err) - } - return err -} - -func (p *SortingColumn) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("SortingColumn(%+v)", *p) -} - -// statistics of a given page type and encoding -// -// Attributes: -// - PageType: the page type (data/dic/...) * -// - Encoding: encoding of the page * -// - Count: number of pages of this type with this encoding * -type PageEncodingStats struct { - PageType PageType `thrift:"page_type,1,required" db:"page_type" json:"page_type"` - Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` - Count int32 `thrift:"count,3,required" db:"count" json:"count"` -} - -func NewPageEncodingStats() *PageEncodingStats { - return &PageEncodingStats{} -} - -func (p *PageEncodingStats) GetPageType() PageType { - return p.PageType -} - -func (p *PageEncodingStats) GetEncoding() Encoding { - return p.Encoding -} - -func (p *PageEncodingStats) GetCount() int32 { - return p.Count -} -func (p *PageEncodingStats) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetPageType bool = false - var issetEncoding bool = false - var issetCount bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetPageType = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetEncoding = true - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - issetCount = true - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetPageType { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageType is not set")) - } - if !issetEncoding { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) - } - if !issetCount { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Count is not set")) - } - return nil -} - -func (p *PageEncodingStats) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - temp := PageType(v) - p.PageType = temp - } - return nil -} - -func (p *PageEncodingStats) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - temp := Encoding(v) - p.Encoding = temp - } - return nil -} - -func (p *PageEncodingStats) ReadField3(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 3: ", err) - } else { - p.Count = v - } - return nil -} - -func (p *PageEncodingStats) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("PageEncodingStats"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *PageEncodingStats) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("page_type", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_type: ", p), err) - } - if err := oprot.WriteI32(int32(p.PageType)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.page_type (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_type: ", p), err) - } - return err -} - -func (p *PageEncodingStats) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("encoding", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) - } - if err := oprot.WriteI32(int32(p.Encoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) - } - return err -} - -func (p *PageEncodingStats) writeField3(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("count", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:count: ", p), err) - } - if err := oprot.WriteI32(p.Count); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.count (3) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:count: ", p), err) - } - return err -} - -func (p *PageEncodingStats) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("PageEncodingStats(%+v)", *p) -} - -// Description for column metadata -// -// Attributes: -// - Type: Type of this column * -// - Encodings: Set of all encodings used for this column. The purpose is to validate -// whether we can decode those pages. * -// - PathInSchema: Path in schema * -// - Codec: Compression codec * -// - NumValues: Number of values in this column * -// - TotalUncompressedSize: total byte size of all uncompressed pages in this column chunk (including the headers) * -// - TotalCompressedSize: total byte size of all compressed pages in this column chunk (including the headers) * -// - KeyValueMetadata: Optional key/value metadata * -// - DataPageOffset: Byte offset from beginning of file to first data page * -// - IndexPageOffset: Byte offset from beginning of file to root index page * -// - DictionaryPageOffset: Byte offset from the beginning of file to first (only) dictionary page * -// - Statistics: optional statistics for this column chunk -// - EncodingStats: Set of all encodings used for pages in this column chunk. -// This information can be used to determine if all data pages are -// dictionary encoded for example * -type ColumnMetaData struct { - Type Type `thrift:"type,1,required" db:"type" json:"type"` - Encodings []Encoding `thrift:"encodings,2,required" db:"encodings" json:"encodings"` - PathInSchema []string `thrift:"path_in_schema,3,required" db:"path_in_schema" json:"path_in_schema"` - Codec CompressionCodec `thrift:"codec,4,required" db:"codec" json:"codec"` - NumValues int64 `thrift:"num_values,5,required" db:"num_values" json:"num_values"` - TotalUncompressedSize int64 `thrift:"total_uncompressed_size,6,required" db:"total_uncompressed_size" json:"total_uncompressed_size"` - TotalCompressedSize int64 `thrift:"total_compressed_size,7,required" db:"total_compressed_size" json:"total_compressed_size"` - KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,8" db:"key_value_metadata" json:"key_value_metadata,omitempty"` - DataPageOffset int64 `thrift:"data_page_offset,9,required" db:"data_page_offset" json:"data_page_offset"` - IndexPageOffset *int64 `thrift:"index_page_offset,10" db:"index_page_offset" json:"index_page_offset,omitempty"` - DictionaryPageOffset *int64 `thrift:"dictionary_page_offset,11" db:"dictionary_page_offset" json:"dictionary_page_offset,omitempty"` - Statistics *Statistics `thrift:"statistics,12" db:"statistics" json:"statistics,omitempty"` - EncodingStats []*PageEncodingStats `thrift:"encoding_stats,13" db:"encoding_stats" json:"encoding_stats,omitempty"` -} - -func NewColumnMetaData() *ColumnMetaData { - return &ColumnMetaData{} -} - -func (p *ColumnMetaData) GetType() Type { - return p.Type -} - -func (p *ColumnMetaData) GetEncodings() []Encoding { - return p.Encodings -} - -func (p *ColumnMetaData) GetPathInSchema() []string { - return p.PathInSchema -} - -func (p *ColumnMetaData) GetCodec() CompressionCodec { - return p.Codec -} - -func (p *ColumnMetaData) GetNumValues() int64 { - return p.NumValues -} - -func (p *ColumnMetaData) GetTotalUncompressedSize() int64 { - return p.TotalUncompressedSize -} - -func (p *ColumnMetaData) GetTotalCompressedSize() int64 { - return p.TotalCompressedSize -} - -var ColumnMetaData_KeyValueMetadata_DEFAULT []*KeyValue - -func (p *ColumnMetaData) GetKeyValueMetadata() []*KeyValue { - return p.KeyValueMetadata -} - -func (p *ColumnMetaData) GetDataPageOffset() int64 { - return p.DataPageOffset -} - -var ColumnMetaData_IndexPageOffset_DEFAULT int64 - -func (p *ColumnMetaData) GetIndexPageOffset() int64 { - if !p.IsSetIndexPageOffset() { - return ColumnMetaData_IndexPageOffset_DEFAULT - } - return *p.IndexPageOffset -} - -var ColumnMetaData_DictionaryPageOffset_DEFAULT int64 - -func (p *ColumnMetaData) GetDictionaryPageOffset() int64 { - if !p.IsSetDictionaryPageOffset() { - return ColumnMetaData_DictionaryPageOffset_DEFAULT - } - return *p.DictionaryPageOffset -} - -var ColumnMetaData_Statistics_DEFAULT *Statistics - -func (p *ColumnMetaData) GetStatistics() *Statistics { - if !p.IsSetStatistics() { - return ColumnMetaData_Statistics_DEFAULT - } - return p.Statistics -} - -var ColumnMetaData_EncodingStats_DEFAULT []*PageEncodingStats - -func (p *ColumnMetaData) GetEncodingStats() []*PageEncodingStats { - return p.EncodingStats -} -func (p *ColumnMetaData) IsSetKeyValueMetadata() bool { - return p.KeyValueMetadata != nil -} - -func (p *ColumnMetaData) IsSetIndexPageOffset() bool { - return p.IndexPageOffset != nil -} - -func (p *ColumnMetaData) IsSetDictionaryPageOffset() bool { - return p.DictionaryPageOffset != nil -} - -func (p *ColumnMetaData) IsSetStatistics() bool { - return p.Statistics != nil -} - -func (p *ColumnMetaData) IsSetEncodingStats() bool { - return p.EncodingStats != nil -} - -func (p *ColumnMetaData) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetType bool = false - var issetEncodings bool = false - var issetPathInSchema bool = false - var issetCodec bool = false - var issetNumValues bool = false - var issetTotalUncompressedSize bool = false - var issetTotalCompressedSize bool = false - var issetDataPageOffset bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetType = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetEncodings = true - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - issetPathInSchema = true - case 4: - if err := p.ReadField4(iprot); err != nil { - return err - } - issetCodec = true - case 5: - if err := p.ReadField5(iprot); err != nil { - return err - } - issetNumValues = true - case 6: - if err := p.ReadField6(iprot); err != nil { - return err - } - issetTotalUncompressedSize = true - case 7: - if err := p.ReadField7(iprot); err != nil { - return err - } - issetTotalCompressedSize = true - case 8: - if err := p.ReadField8(iprot); err != nil { - return err - } - case 9: - if err := p.ReadField9(iprot); err != nil { - return err - } - issetDataPageOffset = true - case 10: - if err := p.ReadField10(iprot); err != nil { - return err - } - case 11: - if err := p.ReadField11(iprot); err != nil { - return err - } - case 12: - if err := p.ReadField12(iprot); err != nil { - return err - } - case 13: - if err := p.ReadField13(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetType { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set")) - } - if !issetEncodings { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encodings is not set")) - } - if !issetPathInSchema { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set")) - } - if !issetCodec { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Codec is not set")) - } - if !issetNumValues { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) - } - if !issetTotalUncompressedSize { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalUncompressedSize is not set")) - } - if !issetTotalCompressedSize { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalCompressedSize is not set")) - } - if !issetDataPageOffset { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DataPageOffset is not set")) - } - return nil -} - -func (p *ColumnMetaData) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - temp := Type(v) - p.Type = temp - } - return nil -} - -func (p *ColumnMetaData) ReadField2(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]Encoding, 0, size) - p.Encodings = tSlice - for i := 0; i < size; i++ { - var _elem0 Encoding - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 0: ", err) - } else { - temp := Encoding(v) - _elem0 = temp - } - p.Encodings = append(p.Encodings, _elem0) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnMetaData) ReadField3(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]string, 0, size) - p.PathInSchema = tSlice - for i := 0; i < size; i++ { - var _elem1 string - if v, err := iprot.ReadString(); err != nil { - return thrift.PrependError("error reading field 0: ", err) - } else { - _elem1 = v - } - p.PathInSchema = append(p.PathInSchema, _elem1) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnMetaData) ReadField4(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 4: ", err) - } else { - temp := CompressionCodec(v) - p.Codec = temp - } - return nil -} - -func (p *ColumnMetaData) ReadField5(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 5: ", err) - } else { - p.NumValues = v - } - return nil -} - -func (p *ColumnMetaData) ReadField6(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 6: ", err) - } else { - p.TotalUncompressedSize = v - } - return nil -} - -func (p *ColumnMetaData) ReadField7(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 7: ", err) - } else { - p.TotalCompressedSize = v - } - return nil -} - -func (p *ColumnMetaData) ReadField8(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*KeyValue, 0, size) - p.KeyValueMetadata = tSlice - for i := 0; i < size; i++ { - _elem2 := &KeyValue{} - if err := _elem2.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem2), err) - } - p.KeyValueMetadata = append(p.KeyValueMetadata, _elem2) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnMetaData) ReadField9(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 9: ", err) - } else { - p.DataPageOffset = v - } - return nil -} - -func (p *ColumnMetaData) ReadField10(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 10: ", err) - } else { - p.IndexPageOffset = &v - } - return nil -} - -func (p *ColumnMetaData) ReadField11(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 11: ", err) - } else { - p.DictionaryPageOffset = &v - } - return nil -} - -func (p *ColumnMetaData) ReadField12(iprot thrift.TProtocol) error { - p.Statistics = &Statistics{} - if err := p.Statistics.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) - } - return nil -} - -func (p *ColumnMetaData) ReadField13(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*PageEncodingStats, 0, size) - p.EncodingStats = tSlice - for i := 0; i < size; i++ { - _elem3 := &PageEncodingStats{} - if err := _elem3.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem3), err) - } - p.EncodingStats = append(p.EncodingStats, _elem3) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnMetaData) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("ColumnMetaData"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - if err := p.writeField4(oprot); err != nil { - return err - } - if err := p.writeField5(oprot); err != nil { - return err - } - if err := p.writeField6(oprot); err != nil { - return err - } - if err := p.writeField7(oprot); err != nil { - return err - } - if err := p.writeField8(oprot); err != nil { - return err - } - if err := p.writeField9(oprot); err != nil { - return err - } - if err := p.writeField10(oprot); err != nil { - return err - } - if err := p.writeField11(oprot); err != nil { - return err - } - if err := p.writeField12(oprot); err != nil { - return err - } - if err := p.writeField13(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *ColumnMetaData) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("type", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) - } - if err := oprot.WriteI32(int32(p.Type)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) - } - return err -} - -func (p *ColumnMetaData) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("encodings", thrift.LIST, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encodings: ", p), err) - } - if err := oprot.WriteListBegin(thrift.I32, len(p.Encodings)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.Encodings { - if err := oprot.WriteI32(int32(v)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encodings: ", p), err) - } - return err -} - -func (p *ColumnMetaData) writeField3(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("path_in_schema", thrift.LIST, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:path_in_schema: ", p), err) - } - if err := oprot.WriteListBegin(thrift.STRING, len(p.PathInSchema)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.PathInSchema { - if err := oprot.WriteString(string(v)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:path_in_schema: ", p), err) - } - return err -} - -func (p *ColumnMetaData) writeField4(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("codec", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:codec: ", p), err) - } - if err := oprot.WriteI32(int32(p.Codec)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.codec (4) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:codec: ", p), err) - } - return err -} - -func (p *ColumnMetaData) writeField5(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("num_values", thrift.I64, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_values: ", p), err) - } - if err := oprot.WriteI64(p.NumValues); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_values (5) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_values: ", p), err) - } - return err -} - -func (p *ColumnMetaData) writeField6(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("total_uncompressed_size", thrift.I64, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_uncompressed_size: ", p), err) - } - if err := oprot.WriteI64(p.TotalUncompressedSize); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.total_uncompressed_size (6) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_uncompressed_size: ", p), err) - } - return err -} - -func (p *ColumnMetaData) writeField7(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("total_compressed_size", thrift.I64, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:total_compressed_size: ", p), err) - } - if err := oprot.WriteI64(p.TotalCompressedSize); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (7) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:total_compressed_size: ", p), err) - } - return err -} - -func (p *ColumnMetaData) writeField8(oprot thrift.TProtocol) (err error) { - if p.IsSetKeyValueMetadata() { - if err := oprot.WriteFieldBegin("key_value_metadata", thrift.LIST, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:key_value_metadata: ", p), err) - } - if err := oprot.WriteListBegin(thrift.STRUCT, len(p.KeyValueMetadata)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.KeyValueMetadata { - if err := v.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:key_value_metadata: ", p), err) - } - } - return err -} - -func (p *ColumnMetaData) writeField9(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("data_page_offset", thrift.I64, 9); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:data_page_offset: ", p), err) - } - if err := oprot.WriteI64(p.DataPageOffset); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.data_page_offset (9) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 9:data_page_offset: ", p), err) - } - return err -} - -func (p *ColumnMetaData) writeField10(oprot thrift.TProtocol) (err error) { - if p.IsSetIndexPageOffset() { - if err := oprot.WriteFieldBegin("index_page_offset", thrift.I64, 10); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:index_page_offset: ", p), err) - } - if err := oprot.WriteI64(*p.IndexPageOffset); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.index_page_offset (10) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 10:index_page_offset: ", p), err) - } - } - return err -} - -func (p *ColumnMetaData) writeField11(oprot thrift.TProtocol) (err error) { - if p.IsSetDictionaryPageOffset() { - if err := oprot.WriteFieldBegin("dictionary_page_offset", thrift.I64, 11); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:dictionary_page_offset: ", p), err) - } - if err := oprot.WriteI64(*p.DictionaryPageOffset); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.dictionary_page_offset (11) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 11:dictionary_page_offset: ", p), err) - } - } - return err -} - -func (p *ColumnMetaData) writeField12(oprot thrift.TProtocol) (err error) { - if p.IsSetStatistics() { - if err := oprot.WriteFieldBegin("statistics", thrift.STRUCT, 12); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:statistics: ", p), err) - } - if err := p.Statistics.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 12:statistics: ", p), err) - } - } - return err -} - -func (p *ColumnMetaData) writeField13(oprot thrift.TProtocol) (err error) { - if p.IsSetEncodingStats() { - if err := oprot.WriteFieldBegin("encoding_stats", thrift.LIST, 13); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:encoding_stats: ", p), err) - } - if err := oprot.WriteListBegin(thrift.STRUCT, len(p.EncodingStats)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.EncodingStats { - if err := v.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 13:encoding_stats: ", p), err) - } - } - return err -} - -func (p *ColumnMetaData) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnMetaData(%+v)", *p) -} - -// Attributes: -// - FilePath: File where column data is stored. If not set, assumed to be same file as -// metadata. This path is relative to the current file. -// -// - FileOffset: Byte offset in file_path to the ColumnMetaData * -// - MetaData: Column metadata for this chunk. This is the same content as what is at -// file_path/file_offset. Having it here has it replicated in the file -// metadata. -// -// - OffsetIndexOffset: File offset of ColumnChunk's OffsetIndex * -// - OffsetIndexLength: Size of ColumnChunk's OffsetIndex, in bytes * -// - ColumnIndexOffset: File offset of ColumnChunk's ColumnIndex * -// - ColumnIndexLength: Size of ColumnChunk's ColumnIndex, in bytes * -type ColumnChunk struct { - FilePath *string `thrift:"file_path,1" db:"file_path" json:"file_path,omitempty"` - FileOffset int64 `thrift:"file_offset,2,required" db:"file_offset" json:"file_offset"` - MetaData *ColumnMetaData `thrift:"meta_data,3" db:"meta_data" json:"meta_data,omitempty"` - OffsetIndexOffset *int64 `thrift:"offset_index_offset,4" db:"offset_index_offset" json:"offset_index_offset,omitempty"` - OffsetIndexLength *int32 `thrift:"offset_index_length,5" db:"offset_index_length" json:"offset_index_length,omitempty"` - ColumnIndexOffset *int64 `thrift:"column_index_offset,6" db:"column_index_offset" json:"column_index_offset,omitempty"` - ColumnIndexLength *int32 `thrift:"column_index_length,7" db:"column_index_length" json:"column_index_length,omitempty"` -} - -func NewColumnChunk() *ColumnChunk { - return &ColumnChunk{} -} - -var ColumnChunk_FilePath_DEFAULT string - -func (p *ColumnChunk) GetFilePath() string { - if !p.IsSetFilePath() { - return ColumnChunk_FilePath_DEFAULT - } - return *p.FilePath -} - -func (p *ColumnChunk) GetFileOffset() int64 { - return p.FileOffset -} - -var ColumnChunk_MetaData_DEFAULT *ColumnMetaData - -func (p *ColumnChunk) GetMetaData() *ColumnMetaData { - if !p.IsSetMetaData() { - return ColumnChunk_MetaData_DEFAULT - } - return p.MetaData -} - -var ColumnChunk_OffsetIndexOffset_DEFAULT int64 - -func (p *ColumnChunk) GetOffsetIndexOffset() int64 { - if !p.IsSetOffsetIndexOffset() { - return ColumnChunk_OffsetIndexOffset_DEFAULT - } - return *p.OffsetIndexOffset -} - -var ColumnChunk_OffsetIndexLength_DEFAULT int32 - -func (p *ColumnChunk) GetOffsetIndexLength() int32 { - if !p.IsSetOffsetIndexLength() { - return ColumnChunk_OffsetIndexLength_DEFAULT - } - return *p.OffsetIndexLength -} - -var ColumnChunk_ColumnIndexOffset_DEFAULT int64 - -func (p *ColumnChunk) GetColumnIndexOffset() int64 { - if !p.IsSetColumnIndexOffset() { - return ColumnChunk_ColumnIndexOffset_DEFAULT - } - return *p.ColumnIndexOffset -} - -var ColumnChunk_ColumnIndexLength_DEFAULT int32 - -func (p *ColumnChunk) GetColumnIndexLength() int32 { - if !p.IsSetColumnIndexLength() { - return ColumnChunk_ColumnIndexLength_DEFAULT - } - return *p.ColumnIndexLength -} -func (p *ColumnChunk) IsSetFilePath() bool { - return p.FilePath != nil -} - -func (p *ColumnChunk) IsSetMetaData() bool { - return p.MetaData != nil -} - -func (p *ColumnChunk) IsSetOffsetIndexOffset() bool { - return p.OffsetIndexOffset != nil -} - -func (p *ColumnChunk) IsSetOffsetIndexLength() bool { - return p.OffsetIndexLength != nil -} - -func (p *ColumnChunk) IsSetColumnIndexOffset() bool { - return p.ColumnIndexOffset != nil -} - -func (p *ColumnChunk) IsSetColumnIndexLength() bool { - return p.ColumnIndexLength != nil -} - -func (p *ColumnChunk) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetFileOffset bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetFileOffset = true - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - case 4: - if err := p.ReadField4(iprot); err != nil { - return err - } - case 5: - if err := p.ReadField5(iprot); err != nil { - return err - } - case 6: - if err := p.ReadField6(iprot); err != nil { - return err - } - case 7: - if err := p.ReadField7(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetFileOffset { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FileOffset is not set")) - } - return nil -} - -func (p *ColumnChunk) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - p.FilePath = &v - } - return nil -} - -func (p *ColumnChunk) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - p.FileOffset = v - } - return nil -} - -func (p *ColumnChunk) ReadField3(iprot thrift.TProtocol) error { - p.MetaData = &ColumnMetaData{} - if err := p.MetaData.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MetaData), err) - } - return nil -} - -func (p *ColumnChunk) ReadField4(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 4: ", err) - } else { - p.OffsetIndexOffset = &v - } - return nil -} - -func (p *ColumnChunk) ReadField5(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 5: ", err) - } else { - p.OffsetIndexLength = &v - } - return nil -} - -func (p *ColumnChunk) ReadField6(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 6: ", err) - } else { - p.ColumnIndexOffset = &v - } - return nil -} - -func (p *ColumnChunk) ReadField7(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 7: ", err) - } else { - p.ColumnIndexLength = &v - } - return nil -} - -func (p *ColumnChunk) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("ColumnChunk"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - if err := p.writeField4(oprot); err != nil { - return err - } - if err := p.writeField5(oprot); err != nil { - return err - } - if err := p.writeField6(oprot); err != nil { - return err - } - if err := p.writeField7(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *ColumnChunk) writeField1(oprot thrift.TProtocol) (err error) { - if p.IsSetFilePath() { - if err := oprot.WriteFieldBegin("file_path", thrift.STRING, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:file_path: ", p), err) - } - if err := oprot.WriteString(string(*p.FilePath)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.file_path (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:file_path: ", p), err) - } - } - return err -} - -func (p *ColumnChunk) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("file_offset", thrift.I64, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:file_offset: ", p), err) - } - if err := oprot.WriteI64(p.FileOffset); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.file_offset (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:file_offset: ", p), err) - } - return err -} - -func (p *ColumnChunk) writeField3(oprot thrift.TProtocol) (err error) { - if p.IsSetMetaData() { - if err := oprot.WriteFieldBegin("meta_data", thrift.STRUCT, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:meta_data: ", p), err) - } - if err := p.MetaData.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MetaData), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:meta_data: ", p), err) - } - } - return err -} - -func (p *ColumnChunk) writeField4(oprot thrift.TProtocol) (err error) { - if p.IsSetOffsetIndexOffset() { - if err := oprot.WriteFieldBegin("offset_index_offset", thrift.I64, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:offset_index_offset: ", p), err) - } - if err := oprot.WriteI64(*p.OffsetIndexOffset); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.offset_index_offset (4) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:offset_index_offset: ", p), err) - } - } - return err -} - -func (p *ColumnChunk) writeField5(oprot thrift.TProtocol) (err error) { - if p.IsSetOffsetIndexLength() { - if err := oprot.WriteFieldBegin("offset_index_length", thrift.I32, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:offset_index_length: ", p), err) - } - if err := oprot.WriteI32(*p.OffsetIndexLength); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.offset_index_length (5) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:offset_index_length: ", p), err) - } - } - return err -} - -func (p *ColumnChunk) writeField6(oprot thrift.TProtocol) (err error) { - if p.IsSetColumnIndexOffset() { - if err := oprot.WriteFieldBegin("column_index_offset", thrift.I64, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:column_index_offset: ", p), err) - } - if err := oprot.WriteI64(*p.ColumnIndexOffset); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.column_index_offset (6) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:column_index_offset: ", p), err) - } - } - return err -} - -func (p *ColumnChunk) writeField7(oprot thrift.TProtocol) (err error) { - if p.IsSetColumnIndexLength() { - if err := oprot.WriteFieldBegin("column_index_length", thrift.I32, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_index_length: ", p), err) - } - if err := oprot.WriteI32(*p.ColumnIndexLength); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.column_index_length (7) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_index_length: ", p), err) - } - } - return err -} - -func (p *ColumnChunk) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnChunk(%+v)", *p) -} - -// Attributes: -// - Columns: Metadata for each column chunk in this row group. -// This list must have the same order as the SchemaElement list in FileMetaData. -// -// - TotalByteSize: Total byte size of all the uncompressed column data in this row group * -// - NumRows: Number of rows in this row group * -// - SortingColumns: If set, specifies a sort ordering of the rows in this RowGroup. -// The sorting columns can be a subset of all the columns. -type RowGroup struct { - Columns []*ColumnChunk `thrift:"columns,1,required" db:"columns" json:"columns"` - TotalByteSize int64 `thrift:"total_byte_size,2,required" db:"total_byte_size" json:"total_byte_size"` - NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` - SortingColumns []*SortingColumn `thrift:"sorting_columns,4" db:"sorting_columns" json:"sorting_columns,omitempty"` -} - -func NewRowGroup() *RowGroup { - return &RowGroup{} -} - -func (p *RowGroup) GetColumns() []*ColumnChunk { - return p.Columns -} - -func (p *RowGroup) GetTotalByteSize() int64 { - return p.TotalByteSize -} - -func (p *RowGroup) GetNumRows() int64 { - return p.NumRows -} - -var RowGroup_SortingColumns_DEFAULT []*SortingColumn - -func (p *RowGroup) GetSortingColumns() []*SortingColumn { - return p.SortingColumns -} -func (p *RowGroup) IsSetSortingColumns() bool { - return p.SortingColumns != nil -} - -func (p *RowGroup) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetColumns bool = false - var issetTotalByteSize bool = false - var issetNumRows bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetColumns = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetTotalByteSize = true - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - issetNumRows = true - case 4: - if err := p.ReadField4(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetColumns { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Columns is not set")) - } - if !issetTotalByteSize { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalByteSize is not set")) - } - if !issetNumRows { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")) - } - return nil -} - -func (p *RowGroup) ReadField1(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*ColumnChunk, 0, size) - p.Columns = tSlice - for i := 0; i < size; i++ { - _elem4 := &ColumnChunk{} - if err := _elem4.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem4), err) - } - p.Columns = append(p.Columns, _elem4) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *RowGroup) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - p.TotalByteSize = v - } - return nil -} - -func (p *RowGroup) ReadField3(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 3: ", err) - } else { - p.NumRows = v - } - return nil -} - -func (p *RowGroup) ReadField4(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*SortingColumn, 0, size) - p.SortingColumns = tSlice - for i := 0; i < size; i++ { - _elem5 := &SortingColumn{} - if err := _elem5.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem5), err) - } - p.SortingColumns = append(p.SortingColumns, _elem5) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *RowGroup) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("RowGroup"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - if err := p.writeField4(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *RowGroup) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("columns", thrift.LIST, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:columns: ", p), err) - } - if err := oprot.WriteListBegin(thrift.STRUCT, len(p.Columns)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.Columns { - if err := v.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:columns: ", p), err) - } - return err -} - -func (p *RowGroup) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("total_byte_size", thrift.I64, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:total_byte_size: ", p), err) - } - if err := oprot.WriteI64(p.TotalByteSize); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.total_byte_size (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:total_byte_size: ", p), err) - } - return err -} - -func (p *RowGroup) writeField3(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("num_rows", thrift.I64, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) - } - if err := oprot.WriteI64(p.NumRows); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) - } - return err -} - -func (p *RowGroup) writeField4(oprot thrift.TProtocol) (err error) { - if p.IsSetSortingColumns() { - if err := oprot.WriteFieldBegin("sorting_columns", thrift.LIST, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:sorting_columns: ", p), err) - } - if err := oprot.WriteListBegin(thrift.STRUCT, len(p.SortingColumns)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.SortingColumns { - if err := v.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:sorting_columns: ", p), err) - } - } - return err -} - -func (p *RowGroup) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("RowGroup(%+v)", *p) -} - -// Empty struct to signal the order defined by the physical or logical type -type TypeDefinedOrder struct { -} - -func NewTypeDefinedOrder() *TypeDefinedOrder { - return &TypeDefinedOrder{} -} - -func (p *TypeDefinedOrder) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *TypeDefinedOrder) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("TypeDefinedOrder"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *TypeDefinedOrder) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("TypeDefinedOrder(%+v)", *p) -} - -// Union to specify the order used for the min_value and max_value fields for a -// column. This union takes the role of an enhanced enum that allows rich -// elements (which will be needed for a collation-based ordering in the future). -// -// Possible values are: -// * TypeDefinedOrder - the column uses the order defined by its logical or -// physical type (if there is no logical type). -// -// If the reader does not support the value of this union, min and max stats -// for this column should be ignored. -// -// Attributes: -// - TYPE_ORDER: The sort orders for logical types are: -// UTF8 - unsigned byte-wise comparison -// INT8 - signed comparison -// INT16 - signed comparison -// INT32 - signed comparison -// INT64 - signed comparison -// UINT8 - unsigned comparison -// UINT16 - unsigned comparison -// UINT32 - unsigned comparison -// UINT64 - unsigned comparison -// DECIMAL - signed comparison of the represented value -// DATE - signed comparison -// TIME_MILLIS - signed comparison -// TIME_MICROS - signed comparison -// TIMESTAMP_MILLIS - signed comparison -// TIMESTAMP_MICROS - signed comparison -// INTERVAL - unsigned comparison -// JSON - unsigned byte-wise comparison -// BSON - unsigned byte-wise comparison -// ENUM - unsigned byte-wise comparison -// LIST - undefined -// MAP - undefined -// -// In the absence of logical types, the sort order is determined by the physical type: -// BOOLEAN - false, true -// INT32 - signed comparison -// INT64 - signed comparison -// INT96 (only used for legacy timestamps) - undefined -// FLOAT - signed comparison of the represented value (*) -// DOUBLE - signed comparison of the represented value (*) -// BYTE_ARRAY - unsigned byte-wise comparison -// FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison -// -// (*) Because the sorting order is not specified properly for floating -// point values (relations vs. total ordering) the following -// compatibility rules should be applied when reading statistics: -// - If the min is a NaN, it should be ignored. -// - If the max is a NaN, it should be ignored. -// - If the min is +0, the row group may contain -0 values as well. -// - If the max is -0, the row group may contain +0 values as well. -// - When looking for NaN values, min and max should be ignored. -type ColumnOrder struct { - TYPE_ORDER *TypeDefinedOrder `thrift:"TYPE_ORDER,1" db:"TYPE_ORDER" json:"TYPE_ORDER,omitempty"` -} - -func NewColumnOrder() *ColumnOrder { - return &ColumnOrder{} -} - -var ColumnOrder_TYPE_ORDER_DEFAULT *TypeDefinedOrder - -func (p *ColumnOrder) GetTYPE_ORDER() *TypeDefinedOrder { - if !p.IsSetTYPE_ORDER() { - return ColumnOrder_TYPE_ORDER_DEFAULT - } - return p.TYPE_ORDER -} -func (p *ColumnOrder) CountSetFieldsColumnOrder() int { - count := 0 - if p.IsSetTYPE_ORDER() { - count++ - } - return count - -} - -func (p *ColumnOrder) IsSetTYPE_ORDER() bool { - return p.TYPE_ORDER != nil -} - -func (p *ColumnOrder) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *ColumnOrder) ReadField1(iprot thrift.TProtocol) error { - p.TYPE_ORDER = &TypeDefinedOrder{} - if err := p.TYPE_ORDER.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TYPE_ORDER), err) - } - return nil -} - -func (p *ColumnOrder) Write(oprot thrift.TProtocol) error { - if c := p.CountSetFieldsColumnOrder(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c) - } - if err := oprot.WriteStructBegin("ColumnOrder"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *ColumnOrder) writeField1(oprot thrift.TProtocol) (err error) { - if p.IsSetTYPE_ORDER() { - if err := oprot.WriteFieldBegin("TYPE_ORDER", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:TYPE_ORDER: ", p), err) - } - if err := p.TYPE_ORDER.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TYPE_ORDER), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:TYPE_ORDER: ", p), err) - } - } - return err -} - -func (p *ColumnOrder) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnOrder(%+v)", *p) -} - -// Attributes: -// - Offset: Offset of the page in the file * -// - CompressedPageSize: Size of the page, including header. Sum of compressed_page_size and header -// length -// - FirstRowIndex: Index within the RowGroup of the first row of the page; this means pages -// change on record boundaries (r = 0). -type PageLocation struct { - Offset int64 `thrift:"offset,1,required" db:"offset" json:"offset"` - CompressedPageSize int32 `thrift:"compressed_page_size,2,required" db:"compressed_page_size" json:"compressed_page_size"` - FirstRowIndex int64 `thrift:"first_row_index,3,required" db:"first_row_index" json:"first_row_index"` -} - -func NewPageLocation() *PageLocation { - return &PageLocation{} -} - -func (p *PageLocation) GetOffset() int64 { - return p.Offset -} - -func (p *PageLocation) GetCompressedPageSize() int32 { - return p.CompressedPageSize -} - -func (p *PageLocation) GetFirstRowIndex() int64 { - return p.FirstRowIndex -} -func (p *PageLocation) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetOffset bool = false - var issetCompressedPageSize bool = false - var issetFirstRowIndex bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetOffset = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetCompressedPageSize = true - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - issetFirstRowIndex = true - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetOffset { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Offset is not set")) - } - if !issetCompressedPageSize { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set")) - } - if !issetFirstRowIndex { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FirstRowIndex is not set")) - } - return nil -} - -func (p *PageLocation) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - p.Offset = v - } - return nil -} - -func (p *PageLocation) ReadField2(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 2: ", err) - } else { - p.CompressedPageSize = v - } - return nil -} - -func (p *PageLocation) ReadField3(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 3: ", err) - } else { - p.FirstRowIndex = v - } - return nil -} - -func (p *PageLocation) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("PageLocation"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *PageLocation) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("offset", thrift.I64, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:offset: ", p), err) - } - if err := oprot.WriteI64(p.Offset); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.offset (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:offset: ", p), err) - } - return err -} - -func (p *PageLocation) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("compressed_page_size", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:compressed_page_size: ", p), err) - } - if err := oprot.WriteI32(p.CompressedPageSize); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (2) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:compressed_page_size: ", p), err) - } - return err -} - -func (p *PageLocation) writeField3(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("first_row_index", thrift.I64, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:first_row_index: ", p), err) - } - if err := oprot.WriteI64(p.FirstRowIndex); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.first_row_index (3) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:first_row_index: ", p), err) - } - return err -} - -func (p *PageLocation) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("PageLocation(%+v)", *p) -} - -// Attributes: -// - PageLocations: PageLocations, ordered by increasing PageLocation.offset. It is required -// that page_locations[i].first_row_index < page_locations[i+1].first_row_index. -type OffsetIndex struct { - PageLocations []*PageLocation `thrift:"page_locations,1,required" db:"page_locations" json:"page_locations"` -} - -func NewOffsetIndex() *OffsetIndex { - return &OffsetIndex{} -} - -func (p *OffsetIndex) GetPageLocations() []*PageLocation { - return p.PageLocations -} -func (p *OffsetIndex) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetPageLocations bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetPageLocations = true - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetPageLocations { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageLocations is not set")) - } - return nil -} - -func (p *OffsetIndex) ReadField1(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*PageLocation, 0, size) - p.PageLocations = tSlice - for i := 0; i < size; i++ { - _elem6 := &PageLocation{} - if err := _elem6.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem6), err) - } - p.PageLocations = append(p.PageLocations, _elem6) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *OffsetIndex) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("OffsetIndex"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *OffsetIndex) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("page_locations", thrift.LIST, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_locations: ", p), err) - } - if err := oprot.WriteListBegin(thrift.STRUCT, len(p.PageLocations)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.PageLocations { - if err := v.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_locations: ", p), err) - } - return err -} - -func (p *OffsetIndex) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("OffsetIndex(%+v)", *p) -} - -// Description for ColumnIndex. -// Each [i] refers to the page at OffsetIndex.page_locations[i] -// -// Attributes: -// - NullPages: A list of Boolean values to determine the validity of the corresponding -// min and max values. If true, a page contains only null values, and writers -// have to set the corresponding entries in min_values and max_values to -// byte[0], so that all lists have the same length. If false, the -// corresponding entries in min_values and max_values must be valid. -// - MinValues: Two lists containing lower and upper bounds for the values of each page. -// These may be the actual minimum and maximum values found on a page, but -// can also be (more compact) values that do not exist on a page. For -// example, instead of storing ""Blart Versenwald III", a writer may set -// min_values[i]="B", max_values[i]="C". Such more compact values must still -// be valid values within the column's logical type. Readers must make sure -// that list entries are populated before using them by inspecting null_pages. -// - MaxValues -// - BoundaryOrder: Stores whether both min_values and max_values are orderd and if so, in -// which direction. This allows readers to perform binary searches in both -// lists. Readers cannot assume that max_values[i] <= min_values[i+1], even -// if the lists are ordered. -// - NullCounts: A list containing the number of null values for each page * -type ColumnIndex struct { - NullPages []bool `thrift:"null_pages,1,required" db:"null_pages" json:"null_pages"` - MinValues [][]byte `thrift:"min_values,2,required" db:"min_values" json:"min_values"` - MaxValues [][]byte `thrift:"max_values,3,required" db:"max_values" json:"max_values"` - BoundaryOrder BoundaryOrder `thrift:"boundary_order,4,required" db:"boundary_order" json:"boundary_order"` - NullCounts []int64 `thrift:"null_counts,5" db:"null_counts" json:"null_counts,omitempty"` -} - -func NewColumnIndex() *ColumnIndex { - return &ColumnIndex{} -} - -func (p *ColumnIndex) GetNullPages() []bool { - return p.NullPages -} - -func (p *ColumnIndex) GetMinValues() [][]byte { - return p.MinValues -} - -func (p *ColumnIndex) GetMaxValues() [][]byte { - return p.MaxValues -} - -func (p *ColumnIndex) GetBoundaryOrder() BoundaryOrder { - return p.BoundaryOrder -} - -var ColumnIndex_NullCounts_DEFAULT []int64 - -func (p *ColumnIndex) GetNullCounts() []int64 { - return p.NullCounts -} -func (p *ColumnIndex) IsSetNullCounts() bool { - return p.NullCounts != nil -} - -func (p *ColumnIndex) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNullPages bool = false - var issetMinValues bool = false - var issetMaxValues bool = false - var issetBoundaryOrder bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetNullPages = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetMinValues = true - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - issetMaxValues = true - case 4: - if err := p.ReadField4(iprot); err != nil { - return err - } - issetBoundaryOrder = true - case 5: - if err := p.ReadField5(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNullPages { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullPages is not set")) - } - if !issetMinValues { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MinValues is not set")) - } - if !issetMaxValues { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MaxValues is not set")) - } - if !issetBoundaryOrder { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BoundaryOrder is not set")) - } - return nil -} - -func (p *ColumnIndex) ReadField1(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]bool, 0, size) - p.NullPages = tSlice - for i := 0; i < size; i++ { - var _elem7 bool - if v, err := iprot.ReadBool(); err != nil { - return thrift.PrependError("error reading field 0: ", err) - } else { - _elem7 = v - } - p.NullPages = append(p.NullPages, _elem7) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnIndex) ReadField2(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([][]byte, 0, size) - p.MinValues = tSlice - for i := 0; i < size; i++ { - var _elem8 []byte - if v, err := iprot.ReadBinary(); err != nil { - return thrift.PrependError("error reading field 0: ", err) - } else { - _elem8 = v - } - p.MinValues = append(p.MinValues, _elem8) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnIndex) ReadField3(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([][]byte, 0, size) - p.MaxValues = tSlice - for i := 0; i < size; i++ { - var _elem9 []byte - if v, err := iprot.ReadBinary(); err != nil { - return thrift.PrependError("error reading field 0: ", err) - } else { - _elem9 = v - } - p.MaxValues = append(p.MaxValues, _elem9) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnIndex) ReadField4(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 4: ", err) - } else { - temp := BoundaryOrder(v) - p.BoundaryOrder = temp - } - return nil -} - -func (p *ColumnIndex) ReadField5(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]int64, 0, size) - p.NullCounts = tSlice - for i := 0; i < size; i++ { - var _elem10 int64 - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 0: ", err) - } else { - _elem10 = v - } - p.NullCounts = append(p.NullCounts, _elem10) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnIndex) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("ColumnIndex"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - if err := p.writeField4(oprot); err != nil { - return err - } - if err := p.writeField5(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *ColumnIndex) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("null_pages", thrift.LIST, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:null_pages: ", p), err) - } - if err := oprot.WriteListBegin(thrift.BOOL, len(p.NullPages)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.NullPages { - if err := oprot.WriteBool(v); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:null_pages: ", p), err) - } - return err -} - -func (p *ColumnIndex) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("min_values", thrift.LIST, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min_values: ", p), err) - } - if err := oprot.WriteListBegin(thrift.STRING, len(p.MinValues)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.MinValues { - if err := oprot.WriteBinary(v); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min_values: ", p), err) - } - return err -} - -func (p *ColumnIndex) writeField3(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("max_values", thrift.LIST, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:max_values: ", p), err) - } - if err := oprot.WriteListBegin(thrift.STRING, len(p.MaxValues)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.MaxValues { - if err := oprot.WriteBinary(v); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:max_values: ", p), err) - } - return err -} - -func (p *ColumnIndex) writeField4(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("boundary_order", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:boundary_order: ", p), err) - } - if err := oprot.WriteI32(int32(p.BoundaryOrder)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.boundary_order (4) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:boundary_order: ", p), err) - } - return err -} - -func (p *ColumnIndex) writeField5(oprot thrift.TProtocol) (err error) { - if p.IsSetNullCounts() { - if err := oprot.WriteFieldBegin("null_counts", thrift.LIST, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:null_counts: ", p), err) - } - if err := oprot.WriteListBegin(thrift.I64, len(p.NullCounts)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.NullCounts { - if err := oprot.WriteI64(v); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:null_counts: ", p), err) - } - } - return err -} - -func (p *ColumnIndex) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnIndex(%+v)", *p) -} - -// Description for file metadata -// -// Attributes: -// - Version: Version of this file * -// - Schema: Parquet schema for this file. This schema contains metadata for all the columns. -// The schema is represented as a tree with a single root. The nodes of the tree -// are flattened to a list by doing a depth-first traversal. -// The column metadata contains the path in the schema for that column which can be -// used to map columns to nodes in the schema. -// The first element is the root * -// - NumRows: Number of rows in this file * -// - RowGroups: Row groups in this file * -// - KeyValueMetadata: Optional key/value metadata * -// - CreatedBy: String for application that wrote this file. This should be in the format -// version (build ). -// e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) -// -// - ColumnOrders: Sort order used for the min_value and max_value fields of each column in -// this file. Each sort order corresponds to one column, determined by its -// position in the list, matching the position of the column in the schema. -// -// Without column_orders, the meaning of the min_value and max_value fields is -// undefined. To ensure well-defined behavior, if min_value and max_value are -// written to a Parquet file, column_orders must be written as well. -// -// The obsolete min and max fields are always sorted by signed comparison -// regardless of column_orders. -type FileMetaData struct { - Version int32 `thrift:"version,1,required" db:"version" json:"version"` - Schema []*SchemaElement `thrift:"schema,2,required" db:"schema" json:"schema"` - NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` - RowGroups []*RowGroup `thrift:"row_groups,4,required" db:"row_groups" json:"row_groups"` - KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,5" db:"key_value_metadata" json:"key_value_metadata,omitempty"` - CreatedBy *string `thrift:"created_by,6" db:"created_by" json:"created_by,omitempty"` - ColumnOrders []*ColumnOrder `thrift:"column_orders,7" db:"column_orders" json:"column_orders,omitempty"` -} - -func NewFileMetaData() *FileMetaData { - return &FileMetaData{} -} - -func (p *FileMetaData) GetVersion() int32 { - return p.Version -} - -func (p *FileMetaData) GetSchema() []*SchemaElement { - return p.Schema -} - -func (p *FileMetaData) GetNumRows() int64 { - return p.NumRows -} - -func (p *FileMetaData) GetRowGroups() []*RowGroup { - return p.RowGroups -} - -var FileMetaData_KeyValueMetadata_DEFAULT []*KeyValue - -func (p *FileMetaData) GetKeyValueMetadata() []*KeyValue { - return p.KeyValueMetadata -} - -var FileMetaData_CreatedBy_DEFAULT string - -func (p *FileMetaData) GetCreatedBy() string { - if !p.IsSetCreatedBy() { - return FileMetaData_CreatedBy_DEFAULT - } - return *p.CreatedBy -} - -var FileMetaData_ColumnOrders_DEFAULT []*ColumnOrder - -func (p *FileMetaData) GetColumnOrders() []*ColumnOrder { - return p.ColumnOrders -} -func (p *FileMetaData) IsSetKeyValueMetadata() bool { - return p.KeyValueMetadata != nil -} - -func (p *FileMetaData) IsSetCreatedBy() bool { - return p.CreatedBy != nil -} - -func (p *FileMetaData) IsSetColumnOrders() bool { - return p.ColumnOrders != nil -} - -func (p *FileMetaData) Read(iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetVersion bool = false - var issetSchema bool = false - var issetNumRows bool = false - var issetRowGroups bool = false - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { - break - } - switch fieldId { - case 1: - if err := p.ReadField1(iprot); err != nil { - return err - } - issetVersion = true - case 2: - if err := p.ReadField2(iprot); err != nil { - return err - } - issetSchema = true - case 3: - if err := p.ReadField3(iprot); err != nil { - return err - } - issetNumRows = true - case 4: - if err := p.ReadField4(iprot); err != nil { - return err - } - issetRowGroups = true - case 5: - if err := p.ReadField5(iprot); err != nil { - return err - } - case 6: - if err := p.ReadField6(iprot); err != nil { - return err - } - case 7: - if err := p.ReadField7(iprot); err != nil { - return err - } - default: - if err := iprot.Skip(fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetVersion { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Version is not set")) - } - if !issetSchema { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Schema is not set")) - } - if !issetNumRows { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")) - } - if !issetRowGroups { - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RowGroups is not set")) - } - return nil -} - -func (p *FileMetaData) ReadField1(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(); err != nil { - return thrift.PrependError("error reading field 1: ", err) - } else { - p.Version = v - } - return nil -} - -func (p *FileMetaData) ReadField2(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*SchemaElement, 0, size) - p.Schema = tSlice - for i := 0; i < size; i++ { - _elem11 := &SchemaElement{} - if err := _elem11.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem11), err) - } - p.Schema = append(p.Schema, _elem11) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *FileMetaData) ReadField3(iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(); err != nil { - return thrift.PrependError("error reading field 3: ", err) - } else { - p.NumRows = v - } - return nil -} - -func (p *FileMetaData) ReadField4(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*RowGroup, 0, size) - p.RowGroups = tSlice - for i := 0; i < size; i++ { - _elem12 := &RowGroup{} - if err := _elem12.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem12), err) - } - p.RowGroups = append(p.RowGroups, _elem12) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *FileMetaData) ReadField5(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*KeyValue, 0, size) - p.KeyValueMetadata = tSlice - for i := 0; i < size; i++ { - _elem13 := &KeyValue{} - if err := _elem13.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem13), err) - } - p.KeyValueMetadata = append(p.KeyValueMetadata, _elem13) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *FileMetaData) ReadField6(iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(); err != nil { - return thrift.PrependError("error reading field 6: ", err) - } else { - p.CreatedBy = &v - } - return nil -} - -func (p *FileMetaData) ReadField7(iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin() - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*ColumnOrder, 0, size) - p.ColumnOrders = tSlice - for i := 0; i < size; i++ { - _elem14 := &ColumnOrder{} - if err := _elem14.Read(iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem14), err) - } - p.ColumnOrders = append(p.ColumnOrders, _elem14) - } - if err := iprot.ReadListEnd(); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *FileMetaData) Write(oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin("FileMetaData"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) - } - if p != nil { - if err := p.writeField1(oprot); err != nil { - return err - } - if err := p.writeField2(oprot); err != nil { - return err - } - if err := p.writeField3(oprot); err != nil { - return err - } - if err := p.writeField4(oprot); err != nil { - return err - } - if err := p.writeField5(oprot); err != nil { - return err - } - if err := p.writeField6(oprot); err != nil { - return err - } - if err := p.writeField7(oprot); err != nil { - return err - } - } - if err := oprot.WriteFieldStop(); err != nil { - return thrift.PrependError("write field stop error: ", err) - } - if err := oprot.WriteStructEnd(); err != nil { - return thrift.PrependError("write struct stop error: ", err) - } - return nil -} - -func (p *FileMetaData) writeField1(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("version", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:version: ", p), err) - } - if err := oprot.WriteI32(p.Version); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.version (1) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:version: ", p), err) - } - return err -} - -func (p *FileMetaData) writeField2(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("schema", thrift.LIST, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:schema: ", p), err) - } - if err := oprot.WriteListBegin(thrift.STRUCT, len(p.Schema)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.Schema { - if err := v.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:schema: ", p), err) - } - return err -} - -func (p *FileMetaData) writeField3(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("num_rows", thrift.I64, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) - } - if err := oprot.WriteI64(p.NumRows); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) - } - return err -} - -func (p *FileMetaData) writeField4(oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin("row_groups", thrift.LIST, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:row_groups: ", p), err) - } - if err := oprot.WriteListBegin(thrift.STRUCT, len(p.RowGroups)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.RowGroups { - if err := v.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:row_groups: ", p), err) - } - return err -} - -func (p *FileMetaData) writeField5(oprot thrift.TProtocol) (err error) { - if p.IsSetKeyValueMetadata() { - if err := oprot.WriteFieldBegin("key_value_metadata", thrift.LIST, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:key_value_metadata: ", p), err) - } - if err := oprot.WriteListBegin(thrift.STRUCT, len(p.KeyValueMetadata)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.KeyValueMetadata { - if err := v.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:key_value_metadata: ", p), err) - } - } - return err -} - -func (p *FileMetaData) writeField6(oprot thrift.TProtocol) (err error) { - if p.IsSetCreatedBy() { - if err := oprot.WriteFieldBegin("created_by", thrift.STRING, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:created_by: ", p), err) - } - if err := oprot.WriteString(string(*p.CreatedBy)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.created_by (6) field write error: ", p), err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:created_by: ", p), err) - } - } - return err -} - -func (p *FileMetaData) writeField7(oprot thrift.TProtocol) (err error) { - if p.IsSetColumnOrders() { - if err := oprot.WriteFieldBegin("column_orders", thrift.LIST, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_orders: ", p), err) - } - if err := oprot.WriteListBegin(thrift.STRUCT, len(p.ColumnOrders)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.ColumnOrders { - if err := v.Write(oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_orders: ", p), err) - } - } - return err -} - -func (p *FileMetaData) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("FileMetaData(%+v)", *p) -} diff --git a/pkg/s3select/internal/parquet-go/gen-parquet-format-pkg.sh b/pkg/s3select/internal/parquet-go/gen-parquet-format-pkg.sh deleted file mode 100644 index aad245caf..000000000 --- a/pkg/s3select/internal/parquet-go/gen-parquet-format-pkg.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -# -# - -set -e - -rm -f parquet.thrift -wget -q https://github.com/apache/parquet-format/raw/df6132b94f273521a418a74442085fdd5a0aa009/src/main/thrift/parquet.thrift -thrift --gen go parquet.thrift -gofmt -w -s gen-go/parquet diff --git a/pkg/s3select/internal/parquet-go/page.go b/pkg/s3select/internal/parquet-go/page.go deleted file mode 100644 index cd02bd9ee..000000000 --- a/pkg/s3select/internal/parquet-go/page.go +++ /dev/null @@ -1,824 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "bytes" - "context" - "errors" - "fmt" - "io" - "math" - "strings" - - "git.apache.org/thrift.git/lib/go/thrift" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -// getBitWidth - returns bits required to place num e.g. -// -// num | width -// -----|------- -// 0 | 0 -// 1 | 1 -// 2 | 2 -// 3 | 2 -// 4 | 3 -// 5 | 3 -// ... | ... -// ... | ... -// -func getBitWidth(num uint64) (width uint64) { - for ; num != 0; num >>= 1 { - width++ - } - - return width -} - -// getMaxDefLevel - get maximum definition level. -func getMaxDefLevel(nameIndexMap map[string]int, schemaElements []*parquet.SchemaElement, path []string) (v int) { - for i := 1; i <= len(path); i++ { - name := strings.Join(path[:i], ".") - if index, ok := nameIndexMap[name]; ok { - if schemaElements[index].GetRepetitionType() != parquet.FieldRepetitionType_REQUIRED { - v++ - } - } - } - - return v -} - -// getMaxRepLevel - get maximum repetition level. -func getMaxRepLevel(nameIndexMap map[string]int, schemaElements []*parquet.SchemaElement, path []string) (v int) { - for i := 1; i <= len(path); i++ { - name := strings.Join(path[:i], ".") - if index, ok := nameIndexMap[name]; ok { - if schemaElements[index].GetRepetitionType() == parquet.FieldRepetitionType_REPEATED { - v++ - } - } - } - - return v -} - -func readPageHeader(reader *thrift.TBufferedTransport) (*parquet.PageHeader, error) { - pageHeader := parquet.NewPageHeader() - if err := pageHeader.Read(thrift.NewTCompactProtocol(reader)); err != nil { - return nil, err - } - - return pageHeader, nil -} - -func readPage( - thriftReader *thrift.TBufferedTransport, - metadata *parquet.ColumnMetaData, - columnNameIndexMap map[string]int, - schemaElements []*parquet.SchemaElement, -) (page *page, definitionLevels, numRows int64, err error) { - - pageHeader, err := readPageHeader(thriftReader) - if err != nil { - return nil, 0, 0, err - } - - read := func() (data []byte, err error) { - var repLevelsLen, defLevelsLen int32 - var repLevelsBuf, defLevelsBuf []byte - - if pageHeader.GetType() == parquet.PageType_DATA_PAGE_V2 { - if pageHeader.DataPageHeaderV2 == nil { - return nil, errors.New("parquet: Header not set") - } - repLevelsLen = pageHeader.DataPageHeaderV2.GetRepetitionLevelsByteLength() - repLevelsBuf = make([]byte, repLevelsLen) - - n, err := io.ReadFull(thriftReader, repLevelsBuf) - if err != nil { - return nil, err - } - if n != int(repLevelsLen) { - return nil, fmt.Errorf("expected parquet header repetition levels %d, got %d", repLevelsLen, n) - } - - defLevelsLen = pageHeader.DataPageHeaderV2.GetDefinitionLevelsByteLength() - defLevelsBuf = make([]byte, defLevelsLen) - - n, err = io.ReadFull(thriftReader, defLevelsBuf) - if err != nil { - return nil, err - } - if n != int(defLevelsLen) { - return nil, fmt.Errorf("expected parquet header definition levels %d, got %d", defLevelsLen, n) - } - } - dbLen := pageHeader.GetCompressedPageSize() - repLevelsLen - defLevelsLen - if dbLen < 0 { - return nil, errors.New("parquet: negative data length") - } - - dataBuf := make([]byte, dbLen) - n, err := io.ReadFull(thriftReader, dataBuf) - if err != nil { - return nil, err - } - if n != int(dbLen) { - return nil, fmt.Errorf("expected parquet data buffer %d, got %d", dbLen, n) - } - - if dataBuf, err = compressionCodec(metadata.GetCodec()).uncompress(dataBuf); err != nil { - return nil, err - } - - if repLevelsLen == 0 && defLevelsLen == 0 { - return dataBuf, nil - } - - if repLevelsLen > 0 { - data = append(data, uint32ToBytes(uint32(repLevelsLen))...) - data = append(data, repLevelsBuf...) - } - - if defLevelsLen > 0 { - data = append(data, uint32ToBytes(uint32(defLevelsLen))...) - data = append(data, defLevelsBuf...) - } - - data = append(data, dataBuf...) - - return data, nil - } - - buf, err := read() - if err != nil { - return nil, 0, 0, err - } - if metadata == nil { - return nil, 0, 0, errors.New("parquet: metadata not set") - } - path := append([]string{}, metadata.GetPathInSchema()...) - - bytesReader := bytes.NewReader(buf) - pageType := pageHeader.GetType() - switch pageType { - case parquet.PageType_INDEX_PAGE: - return nil, 0, 0, fmt.Errorf("page type %v is not supported", parquet.PageType_INDEX_PAGE) - - case parquet.PageType_DICTIONARY_PAGE: - page = newDictPage() - page.Header = pageHeader - table := new(table) - table.Path = path - if pageHeader.DictionaryPageHeader == nil { - return nil, 0, 0, errors.New("parquet: dictionary not set") - } - values, err := readValues(bytesReader, metadata.GetType(), - uint64(pageHeader.DictionaryPageHeader.GetNumValues()), 0) - if err != nil { - return nil, 0, 0, err - } - table.Values = getTableValues(values, metadata.GetType()) - page.DataTable = table - - return page, 0, 0, nil - - case parquet.PageType_DATA_PAGE, parquet.PageType_DATA_PAGE_V2: - name := strings.Join(path, ".") - - page = newDataPage() - page.Header = pageHeader - - maxDefinitionLevel := getMaxDefLevel(columnNameIndexMap, schemaElements, path) - maxRepetitionLevel := getMaxRepLevel(columnNameIndexMap, schemaElements, path) - - var numValues uint64 - var encodingType parquet.Encoding - - if pageHeader.GetType() == parquet.PageType_DATA_PAGE { - if pageHeader.DataPageHeader == nil { - return nil, 0, 0, errors.New("parquet: Header not set") - } - numValues = uint64(pageHeader.DataPageHeader.GetNumValues()) - encodingType = pageHeader.DataPageHeader.GetEncoding() - } else { - if pageHeader.DataPageHeaderV2 == nil { - return nil, 0, 0, errors.New("parquet: Header not set") - } - numValues = uint64(pageHeader.DataPageHeaderV2.GetNumValues()) - encodingType = pageHeader.DataPageHeaderV2.GetEncoding() - } - - var repetitionLevels []int64 - if maxRepetitionLevel > 0 { - values, _, err := readDataPageValues(bytesReader, parquet.Encoding_RLE, parquet.Type_INT64, - -1, numValues, getBitWidth(uint64(maxRepetitionLevel))) - if err != nil { - return nil, 0, 0, err - } - - if repetitionLevels = values.([]int64); len(repetitionLevels) > int(numValues) && int(numValues) >= 0 { - repetitionLevels = repetitionLevels[:numValues] - } - } else { - if numValues > math.MaxInt64/8 { - return nil, 0, 0, errors.New("parquet: numvalues too large") - } - repetitionLevels = make([]int64, numValues) - } - - var definitionLevels []int64 - if maxDefinitionLevel > 0 { - values, _, err := readDataPageValues(bytesReader, parquet.Encoding_RLE, parquet.Type_INT64, - -1, numValues, getBitWidth(uint64(maxDefinitionLevel))) - if err != nil { - return nil, 0, 0, err - } - if numValues > math.MaxInt64/8 { - return nil, 0, 0, errors.New("parquet: numvalues too large") - } - if definitionLevels = values.([]int64); len(definitionLevels) > int(numValues) { - definitionLevels = definitionLevels[:numValues] - } - } else { - if numValues > math.MaxInt64/8 { - return nil, 0, 0, errors.New("parquet: numvalues too large") - } - definitionLevels = make([]int64, numValues) - } - - var numNulls uint64 - for i := 0; i < len(definitionLevels); i++ { - if definitionLevels[i] != int64(maxDefinitionLevel) { - numNulls++ - } - } - - var convertedType parquet.ConvertedType = -1 - if schemaElements[columnNameIndexMap[name]].IsSetConvertedType() { - convertedType = schemaElements[columnNameIndexMap[name]].GetConvertedType() - } - values, valueType, err := readDataPageValues(bytesReader, encodingType, metadata.GetType(), - convertedType, uint64(len(definitionLevels))-numNulls, - uint64(schemaElements[columnNameIndexMap[name]].GetTypeLength())) - if err != nil { - return nil, 0, 0, err - } - tableValues := getTableValues(values, valueType) - - table := new(table) - table.Path = path - table.RepetitionType = schemaElements[columnNameIndexMap[name]].GetRepetitionType() - table.MaxRepetitionLevel = int32(maxRepetitionLevel) - table.MaxDefinitionLevel = int32(maxDefinitionLevel) - table.Values = make([]interface{}, len(definitionLevels)) - table.RepetitionLevels = make([]int32, len(definitionLevels)) - table.DefinitionLevels = make([]int32, len(definitionLevels)) - - j := 0 - numRows := int64(0) - for i := 0; i < len(definitionLevels); i++ { - table.RepetitionLevels[i] = int32(repetitionLevels[i]) - table.DefinitionLevels[i] = int32(definitionLevels[i]) - if int(table.DefinitionLevels[i]) == maxDefinitionLevel { - table.Values[i] = tableValues[j] - j++ - } - if table.RepetitionLevels[i] == 0 { - numRows++ - } - } - page.DataTable = table - - return page, int64(len(definitionLevels)), numRows, nil - } - - return nil, 0, 0, fmt.Errorf("unknown page type %v", pageType) -} - -type page struct { - Header *parquet.PageHeader // Header of a page - DataTable *table // Table to store values - RawData []byte // Compressed data of the page, which is written in parquet file - CompressType parquet.CompressionCodec // Compress type: gzip/snappy/none - DataType parquet.Type // Parquet type of the values in the page - Path []string // Path in schema(include the root) - MaxVal interface{} // Maximum of the values - MinVal interface{} // Minimum of the values - PageSize int32 -} - -func newPage() *page { - return &page{ - Header: parquet.NewPageHeader(), - PageSize: defaultPageSize, - } -} - -func newDictPage() *page { - page := newPage() - page.Header.DictionaryPageHeader = parquet.NewDictionaryPageHeader() - return page -} - -func newDataPage() *page { - page := newPage() - page.Header.DataPageHeader = parquet.NewDataPageHeader() - return page -} - -func (page *page) decode(dictPage *page) { - if dictPage == nil || page == nil || page.Header.DataPageHeader == nil || - (page.Header.DataPageHeader.Encoding != parquet.Encoding_RLE_DICTIONARY && - page.Header.DataPageHeader.Encoding != parquet.Encoding_PLAIN_DICTIONARY) { - return - } - - for i := 0; i < len(page.DataTable.Values); i++ { - if page.DataTable.Values[i] != nil { - index, ok := page.DataTable.Values[i].(int64) - if !ok || int(index) >= len(dictPage.DataTable.Values) { - return - } - page.DataTable.Values[i] = dictPage.DataTable.Values[index] - } - } -} - -// Get RepetitionLevels and Definitions from RawData -func (page *page) getRLDLFromRawData(columnNameIndexMap map[string]int, schemaElements []*parquet.SchemaElement) (numValues int64, numRows int64, err error) { - bytesReader := bytes.NewReader(page.RawData) - - pageType := page.Header.GetType() - - var buf []byte - if pageType == parquet.PageType_DATA_PAGE_V2 { - var repLevelsLen, defLevelsLen int32 - var repLevelsBuf, defLevelsBuf []byte - if page.Header.DataPageHeaderV2 == nil { - return 0, 0, errors.New("parquet: Header not set") - } - repLevelsLen = page.Header.DataPageHeaderV2.GetRepetitionLevelsByteLength() - repLevelsBuf = make([]byte, repLevelsLen) - if _, err = bytesReader.Read(repLevelsBuf); err != nil { - return 0, 0, err - } - - defLevelsLen = page.Header.DataPageHeaderV2.GetDefinitionLevelsByteLength() - defLevelsBuf = make([]byte, defLevelsLen) - if _, err = bytesReader.Read(defLevelsBuf); err != nil { - return 0, 0, err - } - - dataBuf := make([]byte, len(page.RawData)-int(repLevelsLen)-int(defLevelsLen)) - if _, err = bytesReader.Read(dataBuf); err != nil { - return 0, 0, err - } - - if repLevelsLen == 0 && defLevelsLen == 0 { - buf = dataBuf - } else { - if repLevelsLen > 0 { - buf = append(buf, uint32ToBytes(uint32(repLevelsLen))...) - buf = append(buf, repLevelsBuf...) - } - - if defLevelsLen > 0 { - buf = append(buf, uint32ToBytes(uint32(defLevelsLen))...) - buf = append(buf, defLevelsBuf...) - } - - buf = append(buf, dataBuf...) - } - } else { - if buf, err = compressionCodec(page.CompressType).uncompress(page.RawData); err != nil { - return 0, 0, err - } - } - - bytesReader = bytes.NewReader(buf) - - switch pageType { - case parquet.PageType_DICTIONARY_PAGE: - table := new(table) - table.Path = page.Path - page.DataTable = table - return 0, 0, nil - - case parquet.PageType_DATA_PAGE, parquet.PageType_DATA_PAGE_V2: - var numValues uint64 - if pageType == parquet.PageType_DATA_PAGE { - if page.Header.DataPageHeader == nil { - return 0, 0, errors.New("parquet: Header not set") - } - numValues = uint64(page.Header.DataPageHeader.GetNumValues()) - } else { - if page.Header.DataPageHeaderV2 == nil { - return 0, 0, errors.New("parquet: Header not set") - } - numValues = uint64(page.Header.DataPageHeaderV2.GetNumValues()) - } - - maxDefinitionLevel := getMaxDefLevel(columnNameIndexMap, schemaElements, page.Path) - maxRepetitionLevel := getMaxRepLevel(columnNameIndexMap, schemaElements, page.Path) - - var repetitionLevels []int64 - if maxRepetitionLevel > 0 { - values, _, err := readDataPageValues(bytesReader, parquet.Encoding_RLE, parquet.Type_INT64, - -1, numValues, getBitWidth(uint64(maxRepetitionLevel))) - if err != nil { - return 0, 0, err - } - - if repetitionLevels = values.([]int64); uint64(len(repetitionLevels)) > numValues { - repetitionLevels = repetitionLevels[:numValues] - } - } else { - repetitionLevels = make([]int64, numValues) - } - - var definitionLevels []int64 - if maxDefinitionLevel > 0 { - values, _, err := readDataPageValues(bytesReader, parquet.Encoding_RLE, parquet.Type_INT64, - -1, numValues, getBitWidth(uint64(maxDefinitionLevel))) - if err != nil { - return 0, 0, err - } - if definitionLevels = values.([]int64); uint64(len(definitionLevels)) > numValues { - definitionLevels = definitionLevels[:numValues] - } - } else { - definitionLevels = make([]int64, numValues) - } - - table := new(table) - table.Path = page.Path - name := strings.Join(page.Path, ".") - table.RepetitionType = schemaElements[columnNameIndexMap[name]].GetRepetitionType() - table.MaxRepetitionLevel = int32(maxRepetitionLevel) - table.MaxDefinitionLevel = int32(maxDefinitionLevel) - table.Values = make([]interface{}, len(definitionLevels)) - table.RepetitionLevels = make([]int32, len(definitionLevels)) - table.DefinitionLevels = make([]int32, len(definitionLevels)) - - numRows := int64(0) - for i := 0; i < len(definitionLevels); i++ { - table.RepetitionLevels[i] = int32(repetitionLevels[i]) - table.DefinitionLevels[i] = int32(definitionLevels[i]) - if table.RepetitionLevels[i] == 0 { - numRows++ - } - } - page.DataTable = table - page.RawData = buf[len(buf)-bytesReader.Len():] - - return int64(numValues), numRows, nil - } - - return 0, 0, fmt.Errorf("Unsupported page type %v", pageType) -} - -func (page *page) getValueFromRawData(columnNameIndexMap map[string]int, schemaElements []*parquet.SchemaElement) (err error) { - pageType := page.Header.GetType() - switch pageType { - case parquet.PageType_DICTIONARY_PAGE: - bytesReader := bytes.NewReader(page.RawData) - var values interface{} - if page.Header.DictionaryPageHeader == nil { - return errors.New("parquet: dictionary not set") - } - values, err = readValues(bytesReader, page.DataType, - uint64(page.Header.DictionaryPageHeader.GetNumValues()), 0) - if err != nil { - return err - } - - page.DataTable.Values = getTableValues(values, page.DataType) - return nil - - case parquet.PageType_DATA_PAGE_V2: - if page.RawData, err = compressionCodec(page.CompressType).uncompress(page.RawData); err != nil { - return err - } - fallthrough - case parquet.PageType_DATA_PAGE: - encodingType := page.Header.DataPageHeader.GetEncoding() - bytesReader := bytes.NewReader(page.RawData) - - var numNulls uint64 - for i := 0; i < len(page.DataTable.DefinitionLevels); i++ { - if page.DataTable.DefinitionLevels[i] != page.DataTable.MaxDefinitionLevel { - numNulls++ - } - } - - name := strings.Join(page.DataTable.Path, ".") - var convertedType parquet.ConvertedType = -1 - - if schemaElements[columnNameIndexMap[name]].IsSetConvertedType() { - convertedType = schemaElements[columnNameIndexMap[name]].GetConvertedType() - } - - values, _, err := readDataPageValues(bytesReader, encodingType, page.DataType, - convertedType, uint64(len(page.DataTable.DefinitionLevels))-numNulls, - uint64(schemaElements[columnNameIndexMap[name]].GetTypeLength())) - if err != nil { - return err - } - - tableValues := getTableValues(values, page.DataType) - - j := 0 - for i := 0; i < len(page.DataTable.DefinitionLevels); i++ { - if page.DataTable.DefinitionLevels[i] == page.DataTable.MaxDefinitionLevel { - page.DataTable.Values[i] = tableValues[j] - j++ - } - } - - page.RawData = []byte{} - return nil - } - - return fmt.Errorf("unsupported page type %v", pageType) -} - -func (page *page) toDataPage(compressType parquet.CompressionCodec) []byte { - values := []interface{}{} - for i := range page.DataTable.DefinitionLevels { - if page.DataTable.DefinitionLevels[i] == page.DataTable.MaxDefinitionLevel { - values = append(values, page.DataTable.Values[i]) - } - } - valuesBytes := encodeValues(interfacesToValues(values, page.DataTable.Type), page.DataType, page.DataTable.Encoding, page.DataTable.BitWidth) - - var defLevelBytes []byte - if page.DataTable.MaxDefinitionLevel > 0 { - defLevels := make([]int64, len(page.DataTable.DefinitionLevels)) - for i := range page.DataTable.DefinitionLevels { - defLevels[i] = int64(page.DataTable.DefinitionLevels[i]) - } - defLevelBytes = valuesToRLEBitPackedHybridBytes( - defLevels, - int32(getBitWidth(uint64(page.DataTable.MaxDefinitionLevel))), - parquet.Type_INT64, - ) - } - - var repLevelBytes []byte - if page.DataTable.MaxRepetitionLevel > 0 { - repLevels := make([]int64, len(page.DataTable.DefinitionLevels)) - for i := range page.DataTable.DefinitionLevels { - repLevels[i] = int64(page.DataTable.RepetitionLevels[i]) - } - repLevelBytes = valuesToRLEBitPackedHybridBytes( - repLevels, - int32(getBitWidth(uint64(page.DataTable.MaxRepetitionLevel))), - parquet.Type_INT64, - ) - } - - data := repLevelBytes - data = append(data, defLevelBytes...) - data = append(data, valuesBytes...) - - compressedData, err := compressionCodec(compressType).compress(data) - if err != nil { - panic(err) - } - - page.Header = parquet.NewPageHeader() - page.Header.Type = parquet.PageType_DATA_PAGE - page.Header.CompressedPageSize = int32(len(compressedData)) - page.Header.UncompressedPageSize = int32(len(data)) - page.Header.DataPageHeader = parquet.NewDataPageHeader() - page.Header.DataPageHeader.NumValues = int32(len(page.DataTable.DefinitionLevels)) - page.Header.DataPageHeader.DefinitionLevelEncoding = parquet.Encoding_RLE - page.Header.DataPageHeader.RepetitionLevelEncoding = parquet.Encoding_RLE - page.Header.DataPageHeader.Encoding = page.DataTable.Encoding - page.Header.DataPageHeader.Statistics = parquet.NewStatistics() - if page.MaxVal != nil { - tmpBuf := valueToBytes(page.MaxVal, page.DataType) - if page.DataType == parquet.Type_BYTE_ARRAY { - switch page.DataTable.ConvertedType { - case parquet.ConvertedType_UTF8, parquet.ConvertedType_DECIMAL: - tmpBuf = tmpBuf[4:] - } - } - page.Header.DataPageHeader.Statistics.Max = tmpBuf - } - if page.MinVal != nil { - tmpBuf := valueToBytes(page.MinVal, page.DataType) - if page.DataType == parquet.Type_BYTE_ARRAY { - switch page.DataTable.ConvertedType { - case parquet.ConvertedType_UTF8, parquet.ConvertedType_DECIMAL: - tmpBuf = tmpBuf[4:] - } - } - page.Header.DataPageHeader.Statistics.Min = tmpBuf - } - - ts := thrift.NewTSerializer() - ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) - pageHeaderBytes, err := ts.Write(context.TODO(), page.Header) - if err != nil { - panic(err) - } - - page.RawData = append(pageHeaderBytes, compressedData...) - return page.RawData -} - -func (page *page) toDataPageV2(compressType parquet.CompressionCodec) []byte { - values := []interface{}{} - for i := range page.DataTable.DefinitionLevels { - if page.DataTable.DefinitionLevels[i] == page.DataTable.MaxDefinitionLevel { - values = append(values, page.DataTable.Values[i]) - } - } - valuesBytes := encodeValues(values, page.DataType, page.DataTable.Encoding, page.DataTable.BitWidth) - - var defLevelBytes []byte - if page.DataTable.MaxDefinitionLevel > 0 { - defLevels := make([]int64, len(page.DataTable.DefinitionLevels)) - for i := range page.DataTable.DefinitionLevels { - defLevels[i] = int64(page.DataTable.DefinitionLevels[i]) - } - defLevelBytes = valuesToRLEBytes( - defLevels, - int32(getBitWidth(uint64(page.DataTable.MaxDefinitionLevel))), - parquet.Type_INT64, - ) - } - - var repLevelBytes []byte - numRows := int32(0) - if page.DataTable.MaxRepetitionLevel > 0 { - repLevels := make([]int64, len(page.DataTable.DefinitionLevels)) - for i := range page.DataTable.DefinitionLevels { - repLevels[i] = int64(page.DataTable.RepetitionLevels[i]) - if page.DataTable.RepetitionLevels[i] == 0 { - numRows++ - } - } - repLevelBytes = valuesToRLEBytes( - repLevels, - int32(getBitWidth(uint64(page.DataTable.MaxRepetitionLevel))), - parquet.Type_INT64, - ) - } - - compressedData, err := compressionCodec(compressType).compress(valuesBytes) - if err != nil { - panic(err) - } - - page.Header = parquet.NewPageHeader() - page.Header.Type = parquet.PageType_DATA_PAGE_V2 - page.Header.CompressedPageSize = int32(len(compressedData) + len(defLevelBytes) + len(repLevelBytes)) - page.Header.UncompressedPageSize = int32(len(valuesBytes) + len(defLevelBytes) + len(repLevelBytes)) - page.Header.DataPageHeaderV2 = parquet.NewDataPageHeaderV2() - page.Header.DataPageHeaderV2.NumValues = int32(len(page.DataTable.Values)) - page.Header.DataPageHeaderV2.NumNulls = page.Header.DataPageHeaderV2.NumValues - int32(len(values)) - page.Header.DataPageHeaderV2.NumRows = numRows - page.Header.DataPageHeaderV2.Encoding = page.DataTable.Encoding - page.Header.DataPageHeaderV2.DefinitionLevelsByteLength = int32(len(defLevelBytes)) - page.Header.DataPageHeaderV2.RepetitionLevelsByteLength = int32(len(repLevelBytes)) - page.Header.DataPageHeaderV2.IsCompressed = true - - page.Header.DataPageHeaderV2.Statistics = parquet.NewStatistics() - if page.MaxVal != nil { - tmpBuf := valueToBytes(page.MaxVal, page.DataType) - if page.DataType == parquet.Type_BYTE_ARRAY { - switch page.DataTable.ConvertedType { - case parquet.ConvertedType_UTF8, parquet.ConvertedType_DECIMAL: - tmpBuf = tmpBuf[4:] - } - } - page.Header.DataPageHeaderV2.Statistics.Max = tmpBuf - } - if page.MinVal != nil { - tmpBuf := valueToBytes(page.MinVal, page.DataType) - if page.DataType == parquet.Type_BYTE_ARRAY { - switch page.DataTable.ConvertedType { - case parquet.ConvertedType_UTF8, parquet.ConvertedType_DECIMAL: - tmpBuf = tmpBuf[4:] - } - } - page.Header.DataPageHeaderV2.Statistics.Min = tmpBuf - } - - ts := thrift.NewTSerializer() - ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) - pageHeaderBytes, err := ts.Write(context.TODO(), page.Header) - if err != nil { - panic(err) - } - - page.RawData = append(pageHeaderBytes, repLevelBytes...) - page.RawData = append(page.RawData, defLevelBytes...) - page.RawData = append(page.RawData, compressedData...) - - return page.RawData -} - -func (page *page) toDictPage(compressType parquet.CompressionCodec, dataType parquet.Type) []byte { - valuesBytes := valuesToBytes(page.DataTable.Values, dataType) - compressedData, err := compressionCodec(compressType).compress(valuesBytes) - if err != nil { - panic(err) - } - - page.Header = parquet.NewPageHeader() - page.Header.Type = parquet.PageType_DICTIONARY_PAGE - page.Header.CompressedPageSize = int32(len(compressedData)) - page.Header.UncompressedPageSize = int32(len(valuesBytes)) - page.Header.DictionaryPageHeader = parquet.NewDictionaryPageHeader() - page.Header.DictionaryPageHeader.NumValues = int32(len(page.DataTable.Values)) - page.Header.DictionaryPageHeader.Encoding = parquet.Encoding_PLAIN - - ts := thrift.NewTSerializer() - ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) - pageHeaderBytes, err := ts.Write(context.TODO(), page.Header) - if err != nil { - panic(err) - } - - page.RawData = append(pageHeaderBytes, compressedData...) - return page.RawData -} - -func (page *page) toDictDataPage(compressType parquet.CompressionCodec, bitWidth int32) []byte { - valuesBytes := append([]byte{byte(bitWidth)}, valuesToRLEBytes(page.DataTable.Values, bitWidth, parquet.Type_INT32)...) - - var defLevelBytes []byte - if page.DataTable.MaxDefinitionLevel > 0 { - defLevels := make([]int64, len(page.DataTable.DefinitionLevels)) - for i := range page.DataTable.DefinitionLevels { - defLevels[i] = int64(page.DataTable.DefinitionLevels[i]) - } - defLevelBytes = valuesToRLEBitPackedHybridBytes( - defLevels, - int32(getBitWidth(uint64(page.DataTable.MaxDefinitionLevel))), - parquet.Type_INT64, - ) - } - - var repLevelBytes []byte - if page.DataTable.MaxRepetitionLevel > 0 { - repLevels := make([]int64, len(page.DataTable.DefinitionLevels)) - for i := range page.DataTable.DefinitionLevels { - repLevels[i] = int64(page.DataTable.RepetitionLevels[i]) - } - repLevelBytes = valuesToRLEBitPackedHybridBytes( - repLevels, - int32(getBitWidth(uint64(page.DataTable.MaxRepetitionLevel))), - parquet.Type_INT64, - ) - } - - data := append(repLevelBytes, defLevelBytes...) - data = append(data, valuesBytes...) - - compressedData, err := compressionCodec(compressType).compress(data) - if err != nil { - panic(err) - } - - page.Header = parquet.NewPageHeader() - page.Header.Type = parquet.PageType_DATA_PAGE - page.Header.CompressedPageSize = int32(len(compressedData)) - page.Header.UncompressedPageSize = int32(len(data)) - page.Header.DataPageHeader = parquet.NewDataPageHeader() - page.Header.DataPageHeader.NumValues = int32(len(page.DataTable.DefinitionLevels)) - page.Header.DataPageHeader.DefinitionLevelEncoding = parquet.Encoding_RLE - page.Header.DataPageHeader.RepetitionLevelEncoding = parquet.Encoding_RLE - page.Header.DataPageHeader.Encoding = parquet.Encoding_PLAIN_DICTIONARY - - ts := thrift.NewTSerializer() - ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) - pageHeaderBytes, err := ts.Write(context.TODO(), page.Header) - if err != nil { - panic(err) - } - - page.RawData = append(pageHeaderBytes, compressedData...) - return page.RawData -} diff --git a/pkg/s3select/internal/parquet-go/parquet.thrift b/pkg/s3select/internal/parquet-go/parquet.thrift deleted file mode 100644 index edffc21cc..000000000 --- a/pkg/s3select/internal/parquet-go/parquet.thrift +++ /dev/null @@ -1,881 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/** - * File format description for the parquet file format - */ -namespace cpp parquet -namespace java org.apache.parquet.format - -/** - * Types supported by Parquet. These types are intended to be used in combination - * with the encodings to control the on disk storage format. - * For example INT16 is not included as a type since a good encoding of INT32 - * would handle this. - */ -enum Type { - BOOLEAN = 0; - INT32 = 1; - INT64 = 2; - INT96 = 3; // deprecated, only used by legacy implementations. - FLOAT = 4; - DOUBLE = 5; - BYTE_ARRAY = 6; - FIXED_LEN_BYTE_ARRAY = 7; -} - -/** - * Common types used by frameworks(e.g. hive, pig) using parquet. This helps map - * between types in those frameworks to the base types in parquet. This is only - * metadata and not needed to read or write the data. - */ -enum ConvertedType { - /** a BYTE_ARRAY actually contains UTF8 encoded chars */ - UTF8 = 0; - - /** a map is converted as an optional field containing a repeated key/value pair */ - MAP = 1; - - /** a key/value pair is converted into a group of two fields */ - MAP_KEY_VALUE = 2; - - /** a list is converted into an optional field containing a repeated field for its - * values */ - LIST = 3; - - /** an enum is converted into a binary field */ - ENUM = 4; - - /** - * A decimal value. - * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's - * complement using big-endian byte order (the most significant byte is the - * zeroth element). The value of the decimal is the value * 10^{-scale}. - * - * This must be accompanied by a (maximum) precision and a scale in the - * SchemaElement. The precision specifies the number of digits in the decimal - * and the scale stores the location of the decimal point. For example 1.23 - * would have precision 3 (3 total digits) and scale 2 (the decimal point is - * 2 digits over). - */ - DECIMAL = 5; - - /** - * A Date - * - * Stored as days since Unix epoch, encoded as the INT32 physical type. - * - */ - DATE = 6; - - /** - * A time - * - * The total number of milliseconds since midnight. The value is stored - * as an INT32 physical type. - */ - TIME_MILLIS = 7; - - /** - * A time. - * - * The total number of microseconds since midnight. The value is stored as - * an INT64 physical type. - */ - TIME_MICROS = 8; - - /** - * A date/time combination - * - * Date and time recorded as milliseconds since the Unix epoch. Recorded as - * a physical type of INT64. - */ - TIMESTAMP_MILLIS = 9; - - /** - * A date/time combination - * - * Date and time recorded as microseconds since the Unix epoch. The value is - * stored as an INT64 physical type. - */ - TIMESTAMP_MICROS = 10; - - - /** - * An unsigned integer value. - * - * The number describes the maximum number of meainful data bits in - * the stored value. 8, 16 and 32 bit values are stored using the - * INT32 physical type. 64 bit values are stored using the INT64 - * physical type. - * - */ - UINT_8 = 11; - UINT_16 = 12; - UINT_32 = 13; - UINT_64 = 14; - - /** - * A signed integer value. - * - * The number describes the maximum number of meainful data bits in - * the stored value. 8, 16 and 32 bit values are stored using the - * INT32 physical type. 64 bit values are stored using the INT64 - * physical type. - * - */ - INT_8 = 15; - INT_16 = 16; - INT_32 = 17; - INT_64 = 18; - - /** - * An embedded JSON document - * - * A JSON document embedded within a single UTF8 column. - */ - JSON = 19; - - /** - * An embedded BSON document - * - * A BSON document embedded within a single BINARY column. - */ - BSON = 20; - - /** - * An interval of time - * - * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 - * This data is composed of three separate little endian unsigned - * integers. Each stores a component of a duration of time. The first - * integer identifies the number of months associated with the duration, - * the second identifies the number of days associated with the duration - * and the third identifies the number of milliseconds associated with - * the provided duration. This duration of time is independent of any - * particular timezone or date. - */ - INTERVAL = 21; -} - -/** - * Representation of Schemas - */ -enum FieldRepetitionType { - /** This field is required (can not be null) and each record has exactly 1 value. */ - REQUIRED = 0; - - /** The field is optional (can be null) and each record has 0 or 1 values. */ - OPTIONAL = 1; - - /** The field is repeated and can contain 0 or more values */ - REPEATED = 2; -} - -/** - * Statistics per row group and per page - * All fields are optional. - */ -struct Statistics { - /** - * DEPRECATED: min and max value of the column. Use min_value and max_value. - * - * Values are encoded using PLAIN encoding, except that variable-length byte - * arrays do not include a length prefix. - * - * These fields encode min and max values determined by signed comparison - * only. New files should use the correct order for a column's logical type - * and store the values in the min_value and max_value fields. - * - * To support older readers, these may be set when the column order is - * signed. - */ - 1: optional binary max; - 2: optional binary min; - /** count of null value in the column */ - 3: optional i64 null_count; - /** count of distinct values occurring */ - 4: optional i64 distinct_count; - /** - * Min and max values for the column, determined by its ColumnOrder. - * - * Values are encoded using PLAIN encoding, except that variable-length byte - * arrays do not include a length prefix. - */ - 5: optional binary max_value; - 6: optional binary min_value; -} - -/** Empty structs to use as logical type annotations */ -struct StringType {} // allowed for BINARY, must be encoded with UTF-8 -struct UUIDType {} // allowed for FIXED[16], must encoded raw UUID bytes -struct MapType {} // see LogicalTypes.md -struct ListType {} // see LogicalTypes.md -struct EnumType {} // allowed for BINARY, must be encoded with UTF-8 -struct DateType {} // allowed for INT32 - -/** - * Logical type to annotate a column that is always null. - * - * Sometimes when discovering the schema of existing data, values are always - * null and the physical type can't be determined. This annotation signals - * the case where the physical type was guessed from all null values. - */ -struct NullType {} // allowed for any physical type, only null values stored - -/** - * Decimal logical type annotation - * - * To maintain forward-compatibility in v1, implementations using this logical - * type must also set scale and precision on the annotated SchemaElement. - * - * Allowed for physical types: INT32, INT64, FIXED, and BINARY - */ -struct DecimalType { - 1: required i32 scale - 2: required i32 precision -} - -/** Time units for logical types */ -struct MilliSeconds {} -struct MicroSeconds {} -struct NanoSeconds {} -union TimeUnit { - 1: MilliSeconds MILLIS - 2: MicroSeconds MICROS - 3: NanoSeconds NANOS -} - -/** - * Timestamp logical type annotation - * - * Allowed for physical types: INT64 - */ -struct TimestampType { - 1: required bool isAdjustedToUTC - 2: required TimeUnit unit -} - -/** - * Time logical type annotation - * - * Allowed for physical types: INT32 (millis), INT64 (micros, nanos) - */ -struct TimeType { - 1: required bool isAdjustedToUTC - 2: required TimeUnit unit -} - -/** - * Integer logical type annotation - * - * bitWidth must be 8, 16, 32, or 64. - * - * Allowed for physical types: INT32, INT64 - */ -struct IntType { - 1: required byte bitWidth - 2: required bool isSigned -} - -/** - * Embedded JSON logical type annotation - * - * Allowed for physical types: BINARY - */ -struct JsonType { -} - -/** - * Embedded BSON logical type annotation - * - * Allowed for physical types: BINARY - */ -struct BsonType { -} - -/** - * LogicalType annotations to replace ConvertedType. - * - * To maintain compatibility, implementations using LogicalType for a - * SchemaElement must also set the corresponding ConvertedType from the - * following table. - */ -union LogicalType { - 1: StringType STRING // use ConvertedType UTF8 - 2: MapType MAP // use ConvertedType MAP - 3: ListType LIST // use ConvertedType LIST - 4: EnumType ENUM // use ConvertedType ENUM - 5: DecimalType DECIMAL // use ConvertedType DECIMAL - 6: DateType DATE // use ConvertedType DATE - 7: TimeType TIME // use ConvertedType TIME_MICROS or TIME_MILLIS - 8: TimestampType TIMESTAMP // use ConvertedType TIMESTAMP_MICROS or TIMESTAMP_MILLIS - // 9: reserved for INTERVAL - 10: IntType INTEGER // use ConvertedType INT_* or UINT_* - 11: NullType UNKNOWN // no compatible ConvertedType - 12: JsonType JSON // use ConvertedType JSON - 13: BsonType BSON // use ConvertedType BSON - 14: UUIDType UUID -} - -/** - * Represents a element inside a schema definition. - * - if it is a group (inner node) then type is undefined and num_children is defined - * - if it is a primitive type (leaf) then type is defined and num_children is undefined - * the nodes are listed in depth first traversal order. - */ -struct SchemaElement { - /** Data type for this field. Not set if the current element is a non-leaf node */ - 1: optional Type type; - - /** If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales. - * Otherwise, if specified, this is the maximum bit length to store any of the values. - * (e.g. a low cardinality INT col could have this set to 3). Note that this is - * in the schema, and therefore fixed for the entire file. - */ - 2: optional i32 type_length; - - /** repetition of the field. The root of the schema does not have a repetition_type. - * All other nodes must have one */ - 3: optional FieldRepetitionType repetition_type; - - /** Name of the field in the schema */ - 4: required string name; - - /** Nested fields. Since thrift does not support nested fields, - * the nesting is flattened to a single list by a depth-first traversal. - * The children count is used to construct the nested relationship. - * This field is not set when the element is a primitive type - */ - 5: optional i32 num_children; - - /** When the schema is the result of a conversion from another model - * Used to record the original type to help with cross conversion. - */ - 6: optional ConvertedType converted_type; - - /** Used when this column contains decimal data. - * See the DECIMAL converted type for more details. - */ - 7: optional i32 scale - 8: optional i32 precision - - /** When the original schema supports field ids, this will save the - * original field id in the parquet schema - */ - 9: optional i32 field_id; - - /** - * The logical type of this SchemaElement - * - * LogicalType replaces ConvertedType, but ConvertedType is still required - * for some logical types to ensure forward-compatibility in format v1. - */ - 10: optional LogicalType logicalType -} - -/** - * Encodings supported by Parquet. Not all encodings are valid for all types. These - * enums are also used to specify the encoding of definition and repetition levels. - * See the accompanying doc for the details of the more complicated encodings. - */ -enum Encoding { - /** Default encoding. - * BOOLEAN - 1 bit per value. 0 is false; 1 is true. - * INT32 - 4 bytes per value. Stored as little-endian. - * INT64 - 8 bytes per value. Stored as little-endian. - * FLOAT - 4 bytes per value. IEEE. Stored as little-endian. - * DOUBLE - 8 bytes per value. IEEE. Stored as little-endian. - * BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes. - * FIXED_LEN_BYTE_ARRAY - Just the bytes. - */ - PLAIN = 0; - - /** Group VarInt encoding for INT32/INT64. - * This encoding is deprecated. It was never used - */ - // GROUP_VAR_INT = 1; - - /** - * Deprecated: Dictionary encoding. The values in the dictionary are encoded in the - * plain type. - * in a data page use RLE_DICTIONARY instead. - * in a Dictionary page use PLAIN instead - */ - PLAIN_DICTIONARY = 2; - - /** Group packed run length encoding. Usable for definition/repetition levels - * encoding and Booleans (on one bit: 0 is false; 1 is true.) - */ - RLE = 3; - - /** Bit packed encoding. This can only be used if the data has a known max - * width. Usable for definition/repetition levels encoding. - */ - BIT_PACKED = 4; - - /** Delta encoding for integers. This can be used for int columns and works best - * on sorted data - */ - DELTA_BINARY_PACKED = 5; - - /** Encoding for byte arrays to separate the length values and the data. The lengths - * are encoded using DELTA_BINARY_PACKED - */ - DELTA_LENGTH_BYTE_ARRAY = 6; - - /** Incremental-encoded byte array. Prefix lengths are encoded using DELTA_BINARY_PACKED. - * Suffixes are stored as delta length byte arrays. - */ - DELTA_BYTE_ARRAY = 7; - - /** Dictionary encoding: the ids are encoded using the RLE encoding - */ - RLE_DICTIONARY = 8; -} - -/** - * Supported compression algorithms. - * - * Codecs added in 2.4 can be read by readers based on 2.4 and later. - * Codec support may vary between readers based on the format version and - * libraries available at runtime. Gzip, Snappy, and LZ4 codecs are - * widely available, while Zstd and Brotli require additional libraries. - */ -enum CompressionCodec { - UNCOMPRESSED = 0; - SNAPPY = 1; - GZIP = 2; - LZO = 3; - BROTLI = 4; // Added in 2.4 - LZ4 = 5; // Added in 2.4 - ZSTD = 6; // Added in 2.4 -} - -enum PageType { - DATA_PAGE = 0; - INDEX_PAGE = 1; - DICTIONARY_PAGE = 2; - DATA_PAGE_V2 = 3; -} - -/** - * Enum to annotate whether lists of min/max elements inside ColumnIndex - * are ordered and if so, in which direction. - */ -enum BoundaryOrder { - UNORDERED = 0; - ASCENDING = 1; - DESCENDING = 2; -} - -/** Data page header */ -struct DataPageHeader { - /** Number of values, including NULLs, in this data page. **/ - 1: required i32 num_values - - /** Encoding used for this data page **/ - 2: required Encoding encoding - - /** Encoding used for definition levels **/ - 3: required Encoding definition_level_encoding; - - /** Encoding used for repetition levels **/ - 4: required Encoding repetition_level_encoding; - - /** Optional statistics for the data in this page**/ - 5: optional Statistics statistics; -} - -struct IndexPageHeader { - /** TODO: **/ -} - -struct DictionaryPageHeader { - /** Number of values in the dictionary **/ - 1: required i32 num_values; - - /** Encoding using this dictionary page **/ - 2: required Encoding encoding - - /** If true, the entries in the dictionary are sorted in ascending order **/ - 3: optional bool is_sorted; -} - -/** - * New page format allowing reading levels without decompressing the data - * Repetition and definition levels are uncompressed - * The remaining section containing the data is compressed if is_compressed is true - **/ -struct DataPageHeaderV2 { - /** Number of values, including NULLs, in this data page. **/ - 1: required i32 num_values - /** Number of NULL values, in this data page. - Number of non-null = num_values - num_nulls which is also the number of values in the data section **/ - 2: required i32 num_nulls - /** Number of rows in this data page. which means pages change on record boundaries (r = 0) **/ - 3: required i32 num_rows - /** Encoding used for data in this page **/ - 4: required Encoding encoding - - // repetition levels and definition levels are always using RLE (without size in it) - - /** length of the definition levels */ - 5: required i32 definition_levels_byte_length; - /** length of the repetition levels */ - 6: required i32 repetition_levels_byte_length; - - /** whether the values are compressed. - Which means the section of the page between - definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) - is compressed with the compression_codec. - If missing it is considered compressed */ - 7: optional bool is_compressed = 1; - - /** optional statistics for this column chunk */ - 8: optional Statistics statistics; -} - -struct PageHeader { - /** the type of the page: indicates which of the *_header fields is set **/ - 1: required PageType type - - /** Uncompressed page size in bytes (not including this header) **/ - 2: required i32 uncompressed_page_size - - /** Compressed page size in bytes (not including this header) **/ - 3: required i32 compressed_page_size - - /** 32bit crc for the data below. This allows for disabling checksumming in HDFS - * if only a few pages needs to be read - **/ - 4: optional i32 crc - - // Headers for page specific data. One only will be set. - 5: optional DataPageHeader data_page_header; - 6: optional IndexPageHeader index_page_header; - 7: optional DictionaryPageHeader dictionary_page_header; - 8: optional DataPageHeaderV2 data_page_header_v2; -} - -/** - * Wrapper struct to store key values - */ - struct KeyValue { - 1: required string key - 2: optional string value -} - -/** - * Wrapper struct to specify sort order - */ -struct SortingColumn { - /** The column index (in this row group) **/ - 1: required i32 column_idx - - /** If true, indicates this column is sorted in descending order. **/ - 2: required bool descending - - /** If true, nulls will come before non-null values, otherwise, - * nulls go at the end. */ - 3: required bool nulls_first -} - -/** - * statistics of a given page type and encoding - */ -struct PageEncodingStats { - - /** the page type (data/dic/...) **/ - 1: required PageType page_type; - - /** encoding of the page **/ - 2: required Encoding encoding; - - /** number of pages of this type with this encoding **/ - 3: required i32 count; - -} - -/** - * Description for column metadata - */ -struct ColumnMetaData { - /** Type of this column **/ - 1: required Type type - - /** Set of all encodings used for this column. The purpose is to validate - * whether we can decode those pages. **/ - 2: required list encodings - - /** Path in schema **/ - 3: required list path_in_schema - - /** Compression codec **/ - 4: required CompressionCodec codec - - /** Number of values in this column **/ - 5: required i64 num_values - - /** total byte size of all uncompressed pages in this column chunk (including the headers) **/ - 6: required i64 total_uncompressed_size - - /** total byte size of all compressed pages in this column chunk (including the headers) **/ - 7: required i64 total_compressed_size - - /** Optional key/value metadata **/ - 8: optional list key_value_metadata - - /** Byte offset from beginning of file to first data page **/ - 9: required i64 data_page_offset - - /** Byte offset from beginning of file to root index page **/ - 10: optional i64 index_page_offset - - /** Byte offset from the beginning of file to first (only) dictionary page **/ - 11: optional i64 dictionary_page_offset - - /** optional statistics for this column chunk */ - 12: optional Statistics statistics; - - /** Set of all encodings used for pages in this column chunk. - * This information can be used to determine if all data pages are - * dictionary encoded for example **/ - 13: optional list encoding_stats; -} - -struct ColumnChunk { - /** File where column data is stored. If not set, assumed to be same file as - * metadata. This path is relative to the current file. - **/ - 1: optional string file_path - - /** Byte offset in file_path to the ColumnMetaData **/ - 2: required i64 file_offset - - /** Column metadata for this chunk. This is the same content as what is at - * file_path/file_offset. Having it here has it replicated in the file - * metadata. - **/ - 3: optional ColumnMetaData meta_data - - /** File offset of ColumnChunk's OffsetIndex **/ - 4: optional i64 offset_index_offset - - /** Size of ColumnChunk's OffsetIndex, in bytes **/ - 5: optional i32 offset_index_length - - /** File offset of ColumnChunk's ColumnIndex **/ - 6: optional i64 column_index_offset - - /** Size of ColumnChunk's ColumnIndex, in bytes **/ - 7: optional i32 column_index_length -} - -struct RowGroup { - /** Metadata for each column chunk in this row group. - * This list must have the same order as the SchemaElement list in FileMetaData. - **/ - 1: required list columns - - /** Total byte size of all the uncompressed column data in this row group **/ - 2: required i64 total_byte_size - - /** Number of rows in this row group **/ - 3: required i64 num_rows - - /** If set, specifies a sort ordering of the rows in this RowGroup. - * The sorting columns can be a subset of all the columns. - */ - 4: optional list sorting_columns -} - -/** Empty struct to signal the order defined by the physical or logical type */ -struct TypeDefinedOrder {} - -/** - * Union to specify the order used for the min_value and max_value fields for a - * column. This union takes the role of an enhanced enum that allows rich - * elements (which will be needed for a collation-based ordering in the future). - * - * Possible values are: - * * TypeDefinedOrder - the column uses the order defined by its logical or - * physical type (if there is no logical type). - * - * If the reader does not support the value of this union, min and max stats - * for this column should be ignored. - */ -union ColumnOrder { - - /** - * The sort orders for logical types are: - * UTF8 - unsigned byte-wise comparison - * INT8 - signed comparison - * INT16 - signed comparison - * INT32 - signed comparison - * INT64 - signed comparison - * UINT8 - unsigned comparison - * UINT16 - unsigned comparison - * UINT32 - unsigned comparison - * UINT64 - unsigned comparison - * DECIMAL - signed comparison of the represented value - * DATE - signed comparison - * TIME_MILLIS - signed comparison - * TIME_MICROS - signed comparison - * TIMESTAMP_MILLIS - signed comparison - * TIMESTAMP_MICROS - signed comparison - * INTERVAL - unsigned comparison - * JSON - unsigned byte-wise comparison - * BSON - unsigned byte-wise comparison - * ENUM - unsigned byte-wise comparison - * LIST - undefined - * MAP - undefined - * - * In the absence of logical types, the sort order is determined by the physical type: - * BOOLEAN - false, true - * INT32 - signed comparison - * INT64 - signed comparison - * INT96 (only used for legacy timestamps) - undefined - * FLOAT - signed comparison of the represented value (*) - * DOUBLE - signed comparison of the represented value (*) - * BYTE_ARRAY - unsigned byte-wise comparison - * FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison - * - * (*) Because the sorting order is not specified properly for floating - * point values (relations vs. total ordering) the following - * compatibility rules should be applied when reading statistics: - * - If the min is a NaN, it should be ignored. - * - If the max is a NaN, it should be ignored. - * - If the min is +0, the row group may contain -0 values as well. - * - If the max is -0, the row group may contain +0 values as well. - * - When looking for NaN values, min and max should be ignored. - */ - 1: TypeDefinedOrder TYPE_ORDER; -} - -struct PageLocation { - /** Offset of the page in the file **/ - 1: required i64 offset - - /** - * Size of the page, including header. Sum of compressed_page_size and header - * length - */ - 2: required i32 compressed_page_size - - /** - * Index within the RowGroup of the first row of the page; this means pages - * change on record boundaries (r = 0). - */ - 3: required i64 first_row_index -} - -struct OffsetIndex { - /** - * PageLocations, ordered by increasing PageLocation.offset. It is required - * that page_locations[i].first_row_index < page_locations[i+1].first_row_index. - */ - 1: required list page_locations -} - -/** - * Description for ColumnIndex. - * Each [i] refers to the page at OffsetIndex.page_locations[i] - */ -struct ColumnIndex { - /** - * A list of Boolean values to determine the validity of the corresponding - * min and max values. If true, a page contains only null values, and writers - * have to set the corresponding entries in min_values and max_values to - * byte[0], so that all lists have the same length. If false, the - * corresponding entries in min_values and max_values must be valid. - */ - 1: required list null_pages - - /** - * Two lists containing lower and upper bounds for the values of each page. - * These may be the actual minimum and maximum values found on a page, but - * can also be (more compact) values that do not exist on a page. For - * example, instead of storing ""Blart Versenwald III", a writer may set - * min_values[i]="B", max_values[i]="C". Such more compact values must still - * be valid values within the column's logical type. Readers must make sure - * that list entries are populated before using them by inspecting null_pages. - */ - 2: required list min_values - 3: required list max_values - - /** - * Stores whether both min_values and max_values are orderd and if so, in - * which direction. This allows readers to perform binary searches in both - * lists. Readers cannot assume that max_values[i] <= min_values[i+1], even - * if the lists are ordered. - */ - 4: required BoundaryOrder boundary_order - - /** A list containing the number of null values for each page **/ - 5: optional list null_counts -} - -/** - * Description for file metadata - */ -struct FileMetaData { - /** Version of this file **/ - 1: required i32 version - - /** Parquet schema for this file. This schema contains metadata for all the columns. - * The schema is represented as a tree with a single root. The nodes of the tree - * are flattened to a list by doing a depth-first traversal. - * The column metadata contains the path in the schema for that column which can be - * used to map columns to nodes in the schema. - * The first element is the root **/ - 2: required list schema; - - /** Number of rows in this file **/ - 3: required i64 num_rows - - /** Row groups in this file **/ - 4: required list row_groups - - /** Optional key/value metadata **/ - 5: optional list key_value_metadata - - /** String for application that wrote this file. This should be in the format - * version (build ). - * e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) - **/ - 6: optional string created_by - - /** - * Sort order used for the min_value and max_value fields of each column in - * this file. Each sort order corresponds to one column, determined by its - * position in the list, matching the position of the column in the schema. - * - * Without column_orders, the meaning of the min_value and max_value fields is - * undefined. To ensure well-defined behavior, if min_value and max_value are - * written to a Parquet file, column_orders must be written as well. - * - * The obsolete min and max fields are always sorted by signed comparison - * regardless of column_orders. - */ - 7: optional list column_orders; -} - diff --git a/pkg/s3select/internal/parquet-go/reader.go b/pkg/s3select/internal/parquet-go/reader.go deleted file mode 100644 index bde4824a9..000000000 --- a/pkg/s3select/internal/parquet-go/reader.go +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "encoding/binary" - "encoding/json" - "io" - - "git.apache.org/thrift.git/lib/go/thrift" - "github.com/minio/minio-go/v7/pkg/set" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -// GetReaderFunc - function type returning io.ReadCloser for requested offset/length. -type GetReaderFunc func(offset, length int64) (io.ReadCloser, error) - -func footerSize(getReaderFunc GetReaderFunc) (size int64, err error) { - rc, err := getReaderFunc(-8, 4) - if err != nil { - return 0, err - } - defer rc.Close() - - buf := make([]byte, 4) - if _, err = io.ReadFull(rc, buf); err != nil { - return 0, err - } - - size = int64(binary.LittleEndian.Uint32(buf)) - - return size, nil -} - -func fileMetadata(getReaderFunc GetReaderFunc) (*parquet.FileMetaData, error) { - size, err := footerSize(getReaderFunc) - if err != nil { - return nil, err - } - - rc, err := getReaderFunc(-(8 + size), size) - if err != nil { - return nil, err - } - defer rc.Close() - - fileMeta := parquet.NewFileMetaData() - - pf := thrift.NewTCompactProtocolFactory() - protocol := pf.GetProtocol(thrift.NewStreamTransportR(rc)) - err = fileMeta.Read(protocol) - if err != nil { - return nil, err - } - - return fileMeta, nil -} - -// Value - denotes column value -type Value struct { - Value interface{} - Type parquet.Type - Schema *parquet.SchemaElement -} - -// MarshalJSON - encodes to JSON data -func (value Value) MarshalJSON() (data []byte, err error) { - return json.Marshal(value.Value) -} - -// Reader - denotes parquet file. -type Reader struct { - getReaderFunc GetReaderFunc - schemaElements []*parquet.SchemaElement - rowGroups []*parquet.RowGroup - rowGroupIndex int - - nameList []string - columnNames set.StringSet - columns map[string]*column - rowIndex int64 -} - -// NewReader - creates new parquet reader. Reader calls getReaderFunc to get required data range for given columnNames. If columnNames is empty, all columns are used. -func NewReader(getReaderFunc GetReaderFunc, columnNames set.StringSet) (*Reader, error) { - fileMeta, err := fileMetadata(getReaderFunc) - if err != nil { - return nil, err - } - - nameList := []string{} - schemaElements := fileMeta.GetSchema() - for _, element := range schemaElements { - nameList = append(nameList, element.Name) - } - - return &Reader{ - getReaderFunc: getReaderFunc, - rowGroups: fileMeta.GetRowGroups(), - schemaElements: schemaElements, - nameList: nameList, - columnNames: columnNames, - }, nil -} - -// Read - reads single record. -func (reader *Reader) Read() (record *Record, err error) { - if reader.rowGroupIndex >= len(reader.rowGroups) { - return nil, io.EOF - } - - if reader.columns == nil { - reader.columns, err = getColumns( - reader.rowGroups[reader.rowGroupIndex], - reader.columnNames, - reader.schemaElements, - reader.getReaderFunc, - ) - if err != nil { - return nil, err - } - - reader.rowIndex = 0 - } - - if reader.rowIndex >= reader.rowGroups[reader.rowGroupIndex].GetNumRows() { - reader.rowGroupIndex++ - reader.Close() - return reader.Read() - } - - record = newRecord(reader.nameList) - for name := range reader.columns { - col := reader.columns[name] - value, valueType, schema := col.read() - record.set(name, Value{Value: value, Type: valueType, Schema: schema}) - } - - reader.rowIndex++ - - return record, nil -} - -// Close - closes underneath readers. -func (reader *Reader) Close() (err error) { - for _, column := range reader.columns { - column.close() - } - - reader.columns = nil - reader.rowIndex = 0 - - return nil -} diff --git a/pkg/s3select/internal/parquet-go/reader_test.go b/pkg/s3select/internal/parquet-go/reader_test.go deleted file mode 100644 index f3359b05b..000000000 --- a/pkg/s3select/internal/parquet-go/reader_test.go +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "io" - "os" - "testing" - - "github.com/minio/minio-go/v7/pkg/set" -) - -func getReader(name string, offset int64, length int64) (io.ReadCloser, error) { - file, err := os.Open(name) - if err != nil { - return nil, err - } - - fi, err := file.Stat() - if err != nil { - return nil, err - } - - if offset < 0 { - offset = fi.Size() + offset - } - - if _, err = file.Seek(offset, io.SeekStart); err != nil { - return nil, err - } - - return file, nil -} - -func TestReader(t *testing.T) { - name := "example.parquet" - reader, err := NewReader( - func(offset, length int64) (io.ReadCloser, error) { - return getReader(name, offset, length) - }, - set.CreateStringSet("one", "two", "three"), - ) - if err != nil { - t.Fatal(err) - } - - expectedRecords := []string{ - `map[one:{-1 DOUBLE SchemaElement({Type:DOUBLE TypeLength: RepetitionType:OPTIONAL Name:one NumChildren: ConvertedType: Scale: Precision: FieldID: LogicalType:})} three:{true BOOLEAN SchemaElement({Type:BOOLEAN TypeLength: RepetitionType:OPTIONAL Name:three NumChildren: ConvertedType: Scale: Precision: FieldID: LogicalType:})} two:{[102 111 111] BYTE_ARRAY SchemaElement({Type:BYTE_ARRAY TypeLength: RepetitionType:OPTIONAL Name:two NumChildren: ConvertedType: Scale: Precision: FieldID: LogicalType:})}]`, - `map[one:{ DOUBLE SchemaElement({Type:DOUBLE TypeLength: RepetitionType:OPTIONAL Name:one NumChildren: ConvertedType: Scale: Precision: FieldID: LogicalType:})} three:{false BOOLEAN SchemaElement({Type:BOOLEAN TypeLength: RepetitionType:OPTIONAL Name:three NumChildren: ConvertedType: Scale: Precision: FieldID: LogicalType:})} two:{[98 97 114] BYTE_ARRAY SchemaElement({Type:BYTE_ARRAY TypeLength: RepetitionType:OPTIONAL Name:two NumChildren: ConvertedType: Scale: Precision: FieldID: LogicalType:})}]`, - `map[one:{2.5 DOUBLE SchemaElement({Type:DOUBLE TypeLength: RepetitionType:OPTIONAL Name:one NumChildren: ConvertedType: Scale: Precision: FieldID: LogicalType:})} three:{true BOOLEAN SchemaElement({Type:BOOLEAN TypeLength: RepetitionType:OPTIONAL Name:three NumChildren: ConvertedType: Scale: Precision: FieldID: LogicalType:})} two:{[98 97 122] BYTE_ARRAY SchemaElement({Type:BYTE_ARRAY TypeLength: RepetitionType:OPTIONAL Name:two NumChildren: ConvertedType: Scale: Precision: FieldID: LogicalType:})}]`, - } - - i := 0 - for { - record, err := reader.Read() - if err != nil { - if err != io.EOF { - t.Error(err) - } - - break - } - - if i == len(expectedRecords) { - t.Errorf("read more than expected record count %v", len(expectedRecords)) - } - - if record.String() != expectedRecords[i] { - t.Errorf("record%v: expected: %v, got: %v", i+1, expectedRecords[i], record.String()) - } - - i++ - } - - reader.Close() -} diff --git a/pkg/s3select/internal/parquet-go/record.go b/pkg/s3select/internal/parquet-go/record.go deleted file mode 100644 index b826165e6..000000000 --- a/pkg/s3select/internal/parquet-go/record.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "fmt" - "strings" -) - -// Record - ordered parquet record. -type Record struct { - nameList []string - nameValueMap map[string]Value -} - -// String - returns string representation of this record. -func (r *Record) String() string { - values := []string{} - r.Range(func(name string, value Value) bool { - values = append(values, fmt.Sprintf("%v:%v", name, value)) - return true - }) - - return "map[" + strings.Join(values, " ") + "]" -} - -func (r *Record) set(name string, value Value) { - r.nameValueMap[name] = value -} - -// Get - returns Value of name. -func (r *Record) Get(name string) (Value, bool) { - value, ok := r.nameValueMap[name] - return value, ok -} - -// Range - calls f sequentially for each name and value present in the record. If f returns false, range stops the iteration. -func (r *Record) Range(f func(name string, value Value) bool) { - for _, name := range r.nameList { - value, ok := r.nameValueMap[name] - if !ok { - continue - } - - if !f(name, value) { - break - } - } -} - -func newRecord(nameList []string) *Record { - return &Record{ - nameList: nameList, - nameValueMap: make(map[string]Value), - } -} diff --git a/pkg/s3select/internal/parquet-go/schema/element.go b/pkg/s3select/internal/parquet-go/schema/element.go deleted file mode 100644 index 09f61b8ff..000000000 --- a/pkg/s3select/internal/parquet-go/schema/element.go +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package schema - -import ( - "fmt" - "regexp" - "strings" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -var nameRegexp = regexp.MustCompile("^[a-zA-Z0-9_]+$") - -func validataPathSegments(pathSegments []string) error { - for _, pathSegment := range pathSegments { - if !nameRegexp.MatchString(pathSegment) { - return fmt.Errorf("unsupported name %v", strings.Join(pathSegments, ".")) - } - } - - return nil -} - -// Element - represents schema element and its children. Any element must have Name and RepetitionType fields set. -type Element struct { - parquet.SchemaElement - numChildren int32 - Encoding *parquet.Encoding // Optional; defaults is computed. - CompressionType *parquet.CompressionCodec // Optional; defaults to SNAPPY. - Children *Tree - MaxDefinitionLevel int64 - MaxRepetitionLevel int64 - PathInTree string - PathInSchema string -} - -// String - stringify this element. -func (element *Element) String() string { - var s []string - s = append(s, "Name:"+element.Name) - s = append(s, "RepetitionType:"+element.RepetitionType.String()) - if element.Type != nil { - s = append(s, "Type:"+element.Type.String()) - } - if element.ConvertedType != nil { - s = append(s, "ConvertedType:"+element.ConvertedType.String()) - } - if element.Encoding != nil { - s = append(s, "Encoding:"+element.Encoding.String()) - } - if element.CompressionType != nil { - s = append(s, "CompressionType:"+element.CompressionType.String()) - } - if element.Children != nil && element.Children.Length() > 0 { - s = append(s, "Children:"+element.Children.String()) - } - s = append(s, fmt.Sprintf("MaxDefinitionLevel:%v", element.MaxDefinitionLevel)) - s = append(s, fmt.Sprintf("MaxRepetitionLevel:%v", element.MaxRepetitionLevel)) - if element.PathInTree != "" { - s = append(s, "PathInTree:"+element.PathInTree) - } - if element.PathInSchema != "" { - s = append(s, "PathInSchema:"+element.PathInSchema) - } - - return "{" + strings.Join(s, ", ") + "}" -} - -// NewElement - creates new element. -func NewElement(name string, repetitionType parquet.FieldRepetitionType, - elementType *parquet.Type, convertedType *parquet.ConvertedType, - encoding *parquet.Encoding, compressionType *parquet.CompressionCodec, - children *Tree) (*Element, error) { - - if !nameRegexp.MatchString(name) { - return nil, fmt.Errorf("unsupported name %v", name) - } - - switch repetitionType { - case parquet.FieldRepetitionType_REQUIRED, parquet.FieldRepetitionType_OPTIONAL, parquet.FieldRepetitionType_REPEATED: - default: - return nil, fmt.Errorf("unknown repetition type %v", repetitionType) - } - - if repetitionType == parquet.FieldRepetitionType_REPEATED && (elementType != nil || convertedType != nil) { - return nil, fmt.Errorf("repetition type REPEATED should be used in group element") - } - - if children != nil && children.Length() != 0 { - if elementType != nil { - return nil, fmt.Errorf("type should be nil for group element") - } - } - - element := Element{ - Encoding: encoding, - CompressionType: compressionType, - Children: children, - } - - element.Name = name - element.RepetitionType = &repetitionType - element.Type = elementType - element.ConvertedType = convertedType - element.NumChildren = &element.numChildren - if element.Children != nil { - element.numChildren = int32(element.Children.Length()) - } - - return &element, nil -} diff --git a/pkg/s3select/internal/parquet-go/schema/tree.go b/pkg/s3select/internal/parquet-go/schema/tree.go deleted file mode 100644 index bdf2ca325..000000000 --- a/pkg/s3select/internal/parquet-go/schema/tree.go +++ /dev/null @@ -1,389 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package schema - -import ( - "fmt" - "strings" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -func updateMaxDLRL(schemaMap map[string]*Element, maxDL, maxRL int64) { - for _, element := range schemaMap { - element.MaxDefinitionLevel = maxDL - element.MaxRepetitionLevel = maxRL - if *element.RepetitionType != parquet.FieldRepetitionType_REQUIRED { - element.MaxDefinitionLevel++ - if *element.RepetitionType == parquet.FieldRepetitionType_REPEATED { - element.MaxRepetitionLevel++ - } - } - - if element.Children != nil { - updateMaxDLRL(element.Children.schemaMap, element.MaxDefinitionLevel, element.MaxRepetitionLevel) - } - } -} - -func toParquetSchema(tree *Tree, treePrefix string, schemaPrefix string, schemaList *[]*parquet.SchemaElement, valueElements *[]*Element) (err error) { - tree.Range(func(name string, element *Element) bool { - pathInTree := name - if treePrefix != "" { - pathInTree = treePrefix + "." + name - } - - if element.Type == nil && element.ConvertedType == nil && element.Children == nil { - err = fmt.Errorf("%v: group element must have children", pathInTree) - return false - } - - if element.ConvertedType != nil { - switch *element.ConvertedType { - case parquet.ConvertedType_LIST: - // Supported structure. - // group (LIST) { - // REPEATED group list { - // element; - // } - // } - - if element.Type != nil { - err = fmt.Errorf("%v: type must be nil for LIST ConvertedType", pathInTree) - return false - } - - if element.Children == nil || element.Children.Length() != 1 { - err = fmt.Errorf("%v: children must have one element only for LIST ConvertedType", pathInTree) - return false - } - - listElement, ok := element.Children.Get("list") - if !ok { - err = fmt.Errorf("%v: missing group element 'list' for LIST ConvertedType", pathInTree) - return false - } - - if listElement.Name != "list" { - err = fmt.Errorf("%v.list: name must be 'list'", pathInTree) - return false - } - - if *listElement.RepetitionType != parquet.FieldRepetitionType_REPEATED { - err = fmt.Errorf("%v.list: repetition type must be REPEATED type", pathInTree) - return false - } - - if listElement.Type != nil || listElement.ConvertedType != nil { - err = fmt.Errorf("%v.list: type and converted type must be nil", pathInTree) - return false - } - - if listElement.Children == nil || listElement.Children.Length() != 1 { - err = fmt.Errorf("%v.list.element: not found", pathInTree) - return false - } - - valueElement, ok := listElement.Children.Get("element") - if !ok { - err = fmt.Errorf("%v.list.element: not found", pathInTree) - return false - } - - if valueElement.Name != "element" { - err = fmt.Errorf("%v.list.element: name must be 'element'", pathInTree) - return false - } - - case parquet.ConvertedType_MAP: - // Supported structure: - // group (MAP) { - // REPEATED group key_value { - // REQUIRED key; - // value; - // } - // } - - if element.Type != nil { - err = fmt.Errorf("%v: type must be nil for MAP ConvertedType", pathInTree) - return false - } - - if element.Children == nil || element.Children.Length() != 1 { - err = fmt.Errorf("%v: children must have one element only for MAP ConvertedType", pathInTree) - return false - } - - keyValueElement, ok := element.Children.Get("key_value") - if !ok { - err = fmt.Errorf("%v: missing group element 'key_value' for MAP ConvertedType", pathInTree) - return false - } - - if keyValueElement.Name != "key_value" { - err = fmt.Errorf("%v.key_value: name must be 'key_value'", pathInTree) - return false - } - - if *keyValueElement.RepetitionType != parquet.FieldRepetitionType_REPEATED { - err = fmt.Errorf("%v.key_value: repetition type must be REPEATED type", pathInTree) - return false - } - - if keyValueElement.Children == nil || keyValueElement.Children.Length() < 1 || keyValueElement.Children.Length() > 2 { - err = fmt.Errorf("%v.key_value: children must have 'key' and optionally 'value' elements for MAP ConvertedType", pathInTree) - return false - } - - keyElement, ok := keyValueElement.Children.Get("key") - if !ok { - err = fmt.Errorf("%v.key_value: missing 'key' element for MAP ConvertedType", pathInTree) - return false - } - - if keyElement.Name != "key" { - err = fmt.Errorf("%v.key_value.key: name must be 'key'", pathInTree) - return false - } - - if *keyElement.RepetitionType != parquet.FieldRepetitionType_REQUIRED { - err = fmt.Errorf("%v.key_value: repetition type must be REQUIRED type", pathInTree) - return false - } - - if keyValueElement.Children.Length() == 2 { - valueElement, ok := keyValueElement.Children.Get("value") - if !ok { - err = fmt.Errorf("%v.key_value: second element must be 'value' element for MAP ConvertedType", pathInTree) - return false - } - - if valueElement.Name != "value" { - err = fmt.Errorf("%v.key_value.value: name must be 'value'", pathInTree) - return false - } - } - - case parquet.ConvertedType_UTF8, parquet.ConvertedType_UINT_8, parquet.ConvertedType_UINT_16: - fallthrough - case parquet.ConvertedType_UINT_32, parquet.ConvertedType_UINT_64, parquet.ConvertedType_INT_8: - fallthrough - case parquet.ConvertedType_INT_16, parquet.ConvertedType_INT_32, parquet.ConvertedType_INT_64: - if element.Type == nil { - err = fmt.Errorf("%v: ConvertedType %v must have Type value", pathInTree, element.ConvertedType) - return false - } - - default: - err = fmt.Errorf("%v: unsupported ConvertedType %v", pathInTree, element.ConvertedType) - return false - } - } - - element.PathInTree = pathInTree - element.PathInSchema = element.Name - if schemaPrefix != "" { - element.PathInSchema = schemaPrefix + "." + element.Name - } - - if element.Type != nil { - *valueElements = append(*valueElements, element) - } - - *schemaList = append(*schemaList, &element.SchemaElement) - if element.Children != nil { - element.numChildren = int32(element.Children.Length()) - err = toParquetSchema(element.Children, element.PathInTree, element.PathInSchema, schemaList, valueElements) - } - - return (err == nil) - }) - - return err -} - -// Tree - represents tree of schema. Tree preserves order in which elements are added. -type Tree struct { - schemaMap map[string]*Element - keys []string - readOnly bool -} - -// String - stringify this tree. -func (tree *Tree) String() string { - var s []string - tree.Range(func(name string, element *Element) bool { - s = append(s, fmt.Sprintf("%v: %v", name, element)) - return true - }) - - return "{" + strings.Join(s, ", ") + "}" -} - -// Length - returns length of tree. -func (tree *Tree) Length() int { - return len(tree.keys) -} - -func (tree *Tree) travel(pathSegments []string) (pathSegmentIndex int, pathSegment string, currElement *Element, parentTree *Tree, found bool) { - parentTree = tree - for pathSegmentIndex, pathSegment = range pathSegments { - if tree == nil { - found = false - break - } - - var tmpCurrElement *Element - if tmpCurrElement, found = tree.schemaMap[pathSegment]; !found { - break - } - currElement = tmpCurrElement - - parentTree = tree - tree = currElement.Children - } - - return -} - -// ReadOnly - returns whether this tree is read only or not. -func (tree *Tree) ReadOnly() bool { - return tree.readOnly -} - -// Get - returns the element stored for name. -func (tree *Tree) Get(name string) (element *Element, ok bool) { - pathSegments := strings.Split(name, ".") - for _, pathSegment := range pathSegments { - if tree == nil { - element = nil - ok = false - break - } - - if element, ok = tree.schemaMap[pathSegment]; !ok { - break - } - - tree = element.Children - } - - return element, ok -} - -// Set - adds or sets element to name. -func (tree *Tree) Set(name string, element *Element) error { - if tree.readOnly { - return fmt.Errorf("read only tree") - } - - pathSegments := strings.Split(name, ".") - if err := validataPathSegments(pathSegments); err != nil { - return err - } - - i, pathSegment, currElement, parentTree, found := tree.travel(pathSegments) - - if !found { - if i != len(pathSegments)-1 { - return fmt.Errorf("parent %v does not exist", strings.Join(pathSegments[:i+1], ".")) - } - - if currElement == nil { - parentTree = tree - } else { - if currElement.Type != nil { - return fmt.Errorf("parent %v is not group element", strings.Join(pathSegments[:i], ".")) - } - - if currElement.Children == nil { - currElement.Children = NewTree() - } - parentTree = currElement.Children - } - - parentTree.keys = append(parentTree.keys, pathSegment) - } - - parentTree.schemaMap[pathSegment] = element - return nil -} - -// Delete - deletes name and its element. -func (tree *Tree) Delete(name string) { - if tree.readOnly { - panic(fmt.Errorf("read only tree")) - } - - pathSegments := strings.Split(name, ".") - - _, pathSegment, _, parentTree, found := tree.travel(pathSegments) - - if found { - for i := range parentTree.keys { - if parentTree.keys[i] == pathSegment { - copy(parentTree.keys[i:], parentTree.keys[i+1:]) - parentTree.keys = parentTree.keys[:len(parentTree.keys)-1] - break - } - } - - delete(parentTree.schemaMap, pathSegment) - } -} - -// Range - calls f sequentially for each name and its element. If f returns false, range stops the iteration. -func (tree *Tree) Range(f func(name string, element *Element) bool) { - for _, name := range tree.keys { - if !f(name, tree.schemaMap[name]) { - break - } - } -} - -// ToParquetSchema - returns list of parquet SchemaElement and list of elements those stores values. -func (tree *Tree) ToParquetSchema() (schemaList []*parquet.SchemaElement, valueElements []*Element, err error) { - if tree.readOnly { - return nil, nil, fmt.Errorf("read only tree") - } - - updateMaxDLRL(tree.schemaMap, 0, 0) - - var schemaElements []*parquet.SchemaElement - if err = toParquetSchema(tree, "", "", &schemaElements, &valueElements); err != nil { - return nil, nil, err - } - - tree.readOnly = true - - numChildren := int32(len(tree.keys)) - schemaList = append(schemaList, &parquet.SchemaElement{ - Name: "schema", - RepetitionType: parquet.FieldRepetitionTypePtr(parquet.FieldRepetitionType_REQUIRED), - NumChildren: &numChildren, - }) - schemaList = append(schemaList, schemaElements...) - return schemaList, valueElements, nil -} - -// NewTree - creates new schema tree. -func NewTree() *Tree { - return &Tree{ - schemaMap: make(map[string]*Element), - } -} diff --git a/pkg/s3select/internal/parquet-go/schema/tree_test.go b/pkg/s3select/internal/parquet-go/schema/tree_test.go deleted file mode 100644 index 78279f470..000000000 --- a/pkg/s3select/internal/parquet-go/schema/tree_test.go +++ /dev/null @@ -1,1093 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package schema - -import ( - "testing" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" -) - -func TestTreeSet(t *testing.T) { - a, err := NewElement("a", parquet.FieldRepetitionType_OPTIONAL, nil, nil, nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - b, err := NewElement("b", parquet.FieldRepetitionType_OPTIONAL, nil, nil, nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - c, err := NewElement("c", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - testCases := []struct { - name string - element *Element - expectErr bool - }{ - {"A", a, false}, - {"A.B", b, false}, - {"A.B.C", c, false}, - {"B.C", nil, true}, // error: parent B does not exist - {"A.B.C.AA", nil, true}, // error: parent A.B.C is not group element - } - - root := NewTree() - for i, testCase := range testCases { - err := root.Set(testCase.name, testCase.element) - expectErr := (err != nil) - - if expectErr != testCase.expectErr { - if testCase.expectErr { - t.Fatalf("case %v: err: expected: , got: ", i+1) - } else { - t.Fatalf("case %v: err: expected: , got: %v", i+1, err) - } - } - } -} - -func TestTreeGet(t *testing.T) { - a, err := NewElement("a", parquet.FieldRepetitionType_OPTIONAL, nil, nil, nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - b, err := NewElement("b", parquet.FieldRepetitionType_OPTIONAL, nil, nil, nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - c, err := NewElement("c", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - root := NewTree() - if err := root.Set("A", a); err != nil { - t.Fatal(err) - } - if err := root.Set("A.B", b); err != nil { - t.Fatal(err) - } - if err := root.Set("A.B.C", c); err != nil { - t.Fatal(err) - } - - testCases := []struct { - name string - expectedElement *Element - expectedFound bool - }{ - {"A", a, true}, - {"A.B", b, true}, - {"A.B.C", c, true}, - {"B", nil, false}, - {"A.B.C.AA", nil, false}, - } - - for i, testCase := range testCases { - element, found := root.Get(testCase.name) - - if element != testCase.expectedElement { - t.Fatalf("case %v: element: expected: %v, got: %v", i+1, testCase.expectedElement, element) - } - - if found != testCase.expectedFound { - t.Fatalf("case %v: found: expected: %v, got: %v", i+1, testCase.expectedFound, found) - } - } -} - -func TestTreeDelete(t *testing.T) { - testCases := []struct { - name string - expectedFound bool - }{ - {"A", false}, - {"A.B", false}, - {"A.B.C", false}, - } - - for i, testCase := range testCases { - a, err := NewElement("a", parquet.FieldRepetitionType_OPTIONAL, nil, nil, nil, nil, nil) - if err != nil { - t.Fatalf("case %v: %v", i+1, err) - } - - b, err := NewElement("b", parquet.FieldRepetitionType_OPTIONAL, nil, nil, nil, nil, nil) - if err != nil { - t.Fatalf("case %v: %v", i+1, err) - } - - c, err := NewElement("c", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatalf("case %v: %v", i+1, err) - } - - root := NewTree() - if err := root.Set("A", a); err != nil { - t.Fatalf("case %v: %v", i+1, err) - } - if err := root.Set("A.B", b); err != nil { - t.Fatalf("case %v: %v", i+1, err) - } - if err := root.Set("A.B.C", c); err != nil { - t.Fatalf("case %v: %v", i+1, err) - } - - root.Delete(testCase.name) - _, found := root.Get(testCase.name) - - if found != testCase.expectedFound { - t.Fatalf("case %v: found: expected: %v, got: %v", i+1, testCase.expectedFound, found) - } - } -} - -func TestTreeToParquetSchema(t *testing.T) { - case1Root := NewTree() - { - a, err := NewElement("a", parquet.FieldRepetitionType_OPTIONAL, nil, nil, nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case1Root.Set("A", a); err != nil { - t.Fatal(err) - } - } - - case2Root := NewTree() - { - a, err := NewElement("a", parquet.FieldRepetitionType_OPTIONAL, nil, parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case2Root.Set("A", a); err != nil { - t.Fatal(err) - } - } - - case3Root := NewTree() - { - a, err := NewElement("a", parquet.FieldRepetitionType_OPTIONAL, nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP_KEY_VALUE), nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case3Root.Set("A", a); err != nil { - t.Fatal(err) - } - } - - case4Root := NewTree() - { - a, err := NewElement("a", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - if err := case4Root.Set("A", a); err != nil { - t.Fatal(err) - } - } - - case5Root := NewTree() - { - a, err := NewElement("a", parquet.FieldRepetitionType_OPTIONAL, nil, nil, nil, nil, nil) - if err != nil { - t.Fatal(err) - } - b, err := NewElement("b", parquet.FieldRepetitionType_OPTIONAL, nil, nil, nil, nil, nil) - if err != nil { - t.Fatal(err) - } - c, err := NewElement("c", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - if err := case5Root.Set("A", a); err != nil { - t.Fatal(err) - } - if err := case5Root.Set("A.B", b); err != nil { - t.Fatal(err) - } - if err := case5Root.Set("A.B.C", c); err != nil { - t.Fatal(err) - } - } - - testCases := []struct { - tree *Tree - expectErr bool - }{ - {case1Root, true}, // err: A: group element must have children - {case2Root, true}, // err: A: ConvertedType INT_8 must have Type value - {case3Root, true}, // err: A: unsupported ConvertedType MAP_KEY_VALUE - {case4Root, false}, - {case5Root, false}, - } - - for i, testCase := range testCases { - _, _, err := testCase.tree.ToParquetSchema() - expectErr := (err != nil) - - if expectErr != testCase.expectErr { - if testCase.expectErr { - t.Fatalf("case %v: err: expected: , got: ", i+1) - } else { - t.Fatalf("case %v: err: expected: , got: %v", i+1, err) - } - } - } -} - -func TestTreeToParquetSchemaOfList(t *testing.T) { - case1Root := NewTree() - { - names, err := NewElement("names", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case1Root.Set("Names", names); err != nil { - t.Fatal(err) - } - } - - case2Root := NewTree() - { - names, err := NewElement("names", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case2Root.Set("Names", names); err != nil { - t.Fatal(err) - } - } - - case3Root := NewTree() - { - names, err := NewElement("names", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - a, err := NewElement("a", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case3Root.Set("Names", names); err != nil { - t.Fatal(err) - } - - if err := case3Root.Set("Names.a", a); err != nil { - t.Fatal(err) - } - } - - case4Root := NewTree() - { - names, err := NewElement("names", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := NewElement("LIST", parquet.FieldRepetitionType_REQUIRED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case4Root.Set("Names", names); err != nil { - t.Fatal(err) - } - - if err := case4Root.Set("Names.list", list); err != nil { - t.Fatal(err) - } - } - - case5Root := NewTree() - { - names, err := NewElement("names", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := NewElement("list", parquet.FieldRepetitionType_REQUIRED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case5Root.Set("Names", names); err != nil { - t.Fatal(err) - } - - if err := case5Root.Set("Names.list", list); err != nil { - t.Fatal(err) - } - } - - case6Root := NewTree() - { - names, err := NewElement("names", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case6Root.Set("Names", names); err != nil { - t.Fatal(err) - } - - if err := case6Root.Set("Names.list", list); err != nil { - t.Fatal(err) - } - } - - case7Root := NewTree() - { - names, err := NewElement("names", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - a, err := NewElement("a", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case7Root.Set("Names", names); err != nil { - t.Fatal(err) - } - - if err := case7Root.Set("Names.list", list); err != nil { - t.Fatal(err) - } - - if err := case7Root.Set("Names.list.a", a); err != nil { - t.Fatal(err) - } - } - - case8Root := NewTree() - { - names, err := NewElement("names", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - element, err := NewElement("element", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - a, err := NewElement("a", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case8Root.Set("Names", names); err != nil { - t.Fatal(err) - } - - if err := case8Root.Set("Names.list", list); err != nil { - t.Fatal(err) - } - - if err := case8Root.Set("Names.list.element", element); err != nil { - t.Fatal(err) - } - - if err := case8Root.Set("Names.list.a", a); err != nil { - t.Fatal(err) - } - } - - case9Root := NewTree() - { - names, err := NewElement("names", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - element, err := NewElement("ELEMENT", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case9Root.Set("Names", names); err != nil { - t.Fatal(err) - } - - if err := case9Root.Set("Names.list", list); err != nil { - t.Fatal(err) - } - - if err := case9Root.Set("Names.list.element", element); err != nil { - t.Fatal(err) - } - } - - case10Root := NewTree() - { - names, err := NewElement("names", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - list, err := NewElement("list", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - element, err := NewElement("element", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case10Root.Set("Names", names); err != nil { - t.Fatal(err) - } - - if err := case10Root.Set("Names.list", list); err != nil { - t.Fatal(err) - } - - if err := case10Root.Set("Names.list.element", element); err != nil { - t.Fatal(err) - } - } - - testCases := []struct { - tree *Tree - expectErr bool - }{ - {case1Root, true}, // err: Names: type must be nil for LIST ConvertedType - {case2Root, true}, // err: Names: children must have one element only for LIST ConvertedType - {case3Root, true}, // err: Names: missing group element 'list' for LIST ConvertedType - {case4Root, true}, // err: Names.list: name must be 'list' - {case5Root, true}, // err: Names.list: repetition type must be REPEATED type - {case6Root, true}, // err: Names.list.element: not found - {case7Root, true}, // err: Names.list.element: not found - {case8Root, true}, // err: Names.list.element: not found - {case9Root, true}, // err: Names.list.element: name must be 'element' - {case10Root, false}, - } - - for i, testCase := range testCases { - _, _, err := testCase.tree.ToParquetSchema() - expectErr := (err != nil) - - if expectErr != testCase.expectErr { - if testCase.expectErr { - t.Fatalf("case %v: err: expected: , got: ", i+1) - } else { - t.Fatalf("case %v: err: expected: , got: %v", i+1, err) - } - } - } -} - -func TestTreeToParquetSchemaOfMap(t *testing.T) { - case1Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case1Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - } - - case2Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case2Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - } - - case3Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - a, err := NewElement("a", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case3Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - - if err := case3Root.Set("NameMap.a", a); err != nil { - t.Fatal(err) - } - } - - case4Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := NewElement("keyValue", parquet.FieldRepetitionType_REQUIRED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case4Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - - if err := case4Root.Set("NameMap.key_value", keyValue); err != nil { - t.Fatal(err) - } - } - - case5Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := NewElement("key_value", parquet.FieldRepetitionType_REQUIRED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case5Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - - if err := case5Root.Set("NameMap.key_value", keyValue); err != nil { - t.Fatal(err) - } - } - - case6Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := NewElement("key_value", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case6Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - - if err := case6Root.Set("NameMap.key_value", keyValue); err != nil { - t.Fatal(err) - } - } - - case7Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := NewElement("key_value", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - a, err := NewElement("a", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - b, err := NewElement("b", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - c, err := NewElement("c", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case7Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - - if err := case7Root.Set("NameMap.key_value", keyValue); err != nil { - t.Fatal(err) - } - - if err := case7Root.Set("NameMap.key_value.a", a); err != nil { - t.Fatal(err) - } - - if err := case7Root.Set("NameMap.key_value.b", b); err != nil { - t.Fatal(err) - } - - if err := case7Root.Set("NameMap.key_value.c", c); err != nil { - t.Fatal(err) - } - } - - case8Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := NewElement("key_value", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - a, err := NewElement("a", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case8Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - - if err := case8Root.Set("NameMap.key_value", keyValue); err != nil { - t.Fatal(err) - } - - if err := case8Root.Set("NameMap.key_value.a", a); err != nil { - t.Fatal(err) - } - } - - case9Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := NewElement("key_value", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - key, err := NewElement("KEY", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case9Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - - if err := case9Root.Set("NameMap.key_value", keyValue); err != nil { - t.Fatal(err) - } - - if err := case9Root.Set("NameMap.key_value.key", key); err != nil { - t.Fatal(err) - } - } - - case10Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := NewElement("key_value", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - key, err := NewElement("key", parquet.FieldRepetitionType_OPTIONAL, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case10Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - - if err := case10Root.Set("NameMap.key_value", keyValue); err != nil { - t.Fatal(err) - } - - if err := case10Root.Set("NameMap.key_value.key", key); err != nil { - t.Fatal(err) - } - } - - case11Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := NewElement("key_value", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - key, err := NewElement("key", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - a, err := NewElement("a", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case11Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - - if err := case11Root.Set("NameMap.key_value", keyValue); err != nil { - t.Fatal(err) - } - - if err := case11Root.Set("NameMap.key_value.key", key); err != nil { - t.Fatal(err) - } - - if err := case11Root.Set("NameMap.key_value.a", a); err != nil { - t.Fatal(err) - } - } - - case12Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := NewElement("key_value", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - key, err := NewElement("key", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - value, err := NewElement("VALUE", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case12Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - - if err := case12Root.Set("NameMap.key_value", keyValue); err != nil { - t.Fatal(err) - } - - if err := case12Root.Set("NameMap.key_value.key", key); err != nil { - t.Fatal(err) - } - - if err := case12Root.Set("NameMap.key_value.value", value); err != nil { - t.Fatal(err) - } - } - - case13Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := NewElement("key_value", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - key, err := NewElement("key", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case13Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - - if err := case13Root.Set("NameMap.key_value", keyValue); err != nil { - t.Fatal(err) - } - - if err := case13Root.Set("NameMap.key_value.key", key); err != nil { - t.Fatal(err) - } - } - - case14Root := NewTree() - { - nameMap, err := NewElement("nameMap", parquet.FieldRepetitionType_REQUIRED, - nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - keyValue, err := NewElement("key_value", parquet.FieldRepetitionType_REPEATED, - nil, nil, - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - key, err := NewElement("key", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - value, err := NewElement("value", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := case14Root.Set("NameMap", nameMap); err != nil { - t.Fatal(err) - } - - if err := case14Root.Set("NameMap.key_value", keyValue); err != nil { - t.Fatal(err) - } - - if err := case14Root.Set("NameMap.key_value.key", key); err != nil { - t.Fatal(err) - } - - if err := case13Root.Set("NameMap.key_value.value", value); err != nil { - t.Fatal(err) - } - } - - testCases := []struct { - tree *Tree - expectErr bool - }{ - {case1Root, true}, // err: NameMap: type must be nil for MAP ConvertedType - {case2Root, true}, // err: NameMap: children must have one element only for MAP ConvertedType - {case3Root, true}, // err: NameMap: missing group element 'key_value' for MAP ConvertedType - {case4Root, true}, // err: NameMap.key_value: name must be 'key_value' - {case5Root, true}, // err: NameMap.key_value: repetition type must be REPEATED type - {case6Root, true}, // err: NameMap.key_value: children must have 'key' and optionally 'value' elements for MAP ConvertedType - {case7Root, true}, // err: NameMap.key_value: children must have 'key' and optionally 'value' elements for MAP ConvertedType - {case8Root, true}, // err: NameMap.key_value: missing 'key' element for MAP ConvertedType - {case9Root, true}, // err: NameMap.key_value.key: name must be 'key' - {case10Root, true}, // err: NameMap.key_value: repetition type must be REQUIRED type - {case11Root, true}, // err: NameMap.key_value: second element must be 'value' element for MAP ConvertedType - {case12Root, true}, // err: NameMap.key_value.value: name must be 'value' - {case13Root, false}, - {case14Root, false}, - } - - for i, testCase := range testCases { - _, _, err := testCase.tree.ToParquetSchema() - expectErr := (err != nil) - - if expectErr != testCase.expectErr { - if testCase.expectErr { - t.Fatalf("case %v: err: expected: , got: ", i+1) - } else { - t.Fatalf("case %v: err: expected: , got: %v", i+1, err) - } - } - } -} diff --git a/pkg/s3select/internal/parquet-go/table.go b/pkg/s3select/internal/parquet-go/table.go deleted file mode 100644 index 60fa1c059..000000000 --- a/pkg/s3select/internal/parquet-go/table.go +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - -func getTableValues(values interface{}, valueType parquet.Type) (tableValues []interface{}) { - return valuesToInterfaces(values, valueType) -} - -type table struct { - RepetitionType parquet.FieldRepetitionType - Type parquet.Type - MaxDefinitionLevel int32 - MaxRepetitionLevel int32 - Path []string // Path of this column - Values []interface{} // Parquet values - DefinitionLevels []int32 // Definition Levels slice - RepetitionLevels []int32 // Repetition Levels slice - ConvertedType parquet.ConvertedType - Encoding parquet.Encoding - BitWidth int32 -} - -func newTableFromTable(srcTable *table) *table { - if srcTable == nil { - return nil - } - - return &table{ - Type: srcTable.Type, - Path: append([]string{}, srcTable.Path...), - } -} - -func (table *table) Merge(tables ...*table) { - for i := 0; i < len(tables); i++ { - if tables[i] == nil { - continue - } - - table.Values = append(table.Values, tables[i].Values...) - table.RepetitionLevels = append(table.RepetitionLevels, tables[i].RepetitionLevels...) - table.DefinitionLevels = append(table.DefinitionLevels, tables[i].DefinitionLevels...) - - if table.MaxDefinitionLevel < tables[i].MaxDefinitionLevel { - table.MaxDefinitionLevel = tables[i].MaxDefinitionLevel - } - - if table.MaxRepetitionLevel < tables[i].MaxRepetitionLevel { - table.MaxRepetitionLevel = tables[i].MaxRepetitionLevel - } - } -} - -func (table *table) Pop(numRows int64) *table { - result := newTableFromTable(table) - var i, num int64 - for i = int64(0); i < int64(len(table.Values)); i++ { - if table.RepetitionLevels[i] == 0 { - if num >= numRows { - break - } - - num++ - } - - if result.MaxRepetitionLevel < table.RepetitionLevels[i] { - result.MaxRepetitionLevel = table.RepetitionLevels[i] - } - - if result.MaxDefinitionLevel < table.DefinitionLevels[i] { - result.MaxDefinitionLevel = table.DefinitionLevels[i] - } - } - - result.RepetitionLevels = table.RepetitionLevels[:i] - result.DefinitionLevels = table.DefinitionLevels[:i] - result.Values = table.Values[:i] - - table.RepetitionLevels = table.RepetitionLevels[i:] - table.DefinitionLevels = table.DefinitionLevels[i:] - table.Values = table.Values[i:] - - return result -} diff --git a/pkg/s3select/internal/parquet-go/test.parquet b/pkg/s3select/internal/parquet-go/test.parquet deleted file mode 100644 index f9e319f39..000000000 Binary files a/pkg/s3select/internal/parquet-go/test.parquet and /dev/null differ diff --git a/pkg/s3select/internal/parquet-go/tools/parquet2csv/parquet2csv.go b/pkg/s3select/internal/parquet-go/tools/parquet2csv/parquet2csv.go deleted file mode 100644 index c10100f94..000000000 --- a/pkg/s3select/internal/parquet-go/tools/parquet2csv/parquet2csv.go +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package main - -import ( - "encoding/csv" - "fmt" - "io" - "os" - "path" - "strings" - - "github.com/minio/minio-go/v7/pkg/set" - parquet "github.com/minio/minio/pkg/s3select/internal/parquet-go" -) - -func getReader(name string, offset int64, length int64) (io.ReadCloser, error) { - file, err := os.Open(name) - if err != nil { - return nil, err - } - - fi, err := file.Stat() - if err != nil { - return nil, err - } - - if offset < 0 { - offset = fi.Size() + offset - } - - if _, err = file.Seek(offset, io.SeekStart); err != nil { - return nil, err - } - - return file, nil -} - -func printUsage() { - progName := path.Base(os.Args[0]) - fmt.Printf("usage: %v PARQUET-FILE [COLUMN...]\n", progName) - fmt.Println() - fmt.Printf("examples:\n") - fmt.Printf("# Convert all columns to CSV\n") - fmt.Printf("$ %v example.parquet\n", progName) - fmt.Println() - fmt.Printf("# Convert specific columns to CSV\n") - fmt.Printf("$ %v example.par firstname dob\n", progName) - fmt.Println() -} - -func main() { - if len(os.Args) < 2 { - printUsage() - os.Exit(-1) - } - - name := os.Args[1] - ext := path.Ext(name) - csvFilename := name + ".csv" - if ext == ".parquet" || ext == ".par" { - csvFilename = strings.TrimSuffix(name, ext) + ".csv" - } - - columns := set.CreateStringSet(os.Args[2:]...) - if len(columns) == 0 { - columns = nil - } - - file, err := parquet.NewReader( - func(offset, length int64) (io.ReadCloser, error) { - return getReader(name, offset, length) - }, - columns, - ) - if err != nil { - fmt.Printf("%v: %v\n", name, err) - os.Exit(1) - } - - defer file.Close() - - csvFile, err := os.OpenFile(csvFilename, os.O_RDWR|os.O_CREATE, 0755) - if err != nil { - fmt.Printf("%v: %v\n", csvFilename, err) - os.Exit(1) - } - - defer csvFile.Close() - - csvWriter := csv.NewWriter(csvFile) - defer csvWriter.Flush() - - headerWritten := false - for { - record, err := file.Read() - if err != nil { - if err != io.EOF { - fmt.Printf("%v: %v\n", name, err) - os.Exit(1) - } - - break - } - - if !headerWritten { - var csvRecord []string - record.Range(func(name string, value parquet.Value) bool { - csvRecord = append(csvRecord, name) - return true - }) - - if err = csvWriter.Write(csvRecord); err != nil { - fmt.Printf("%v: %v\n", csvFilename, err) - os.Exit(1) - } - - headerWritten = true - } - - var csvRecord []string - record.Range(func(name string, value parquet.Value) bool { - csvRecord = append(csvRecord, fmt.Sprintf("%v", value.Value)) - return true - }) - - if err = csvWriter.Write(csvRecord); err != nil { - fmt.Printf("%v: %v\n", csvFilename, err) - os.Exit(1) - } - } -} diff --git a/pkg/s3select/internal/parquet-go/tools/parquet2json/parquet2json.go b/pkg/s3select/internal/parquet-go/tools/parquet2json/parquet2json.go deleted file mode 100644 index 1d1de6d27..000000000 --- a/pkg/s3select/internal/parquet-go/tools/parquet2json/parquet2json.go +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package main - -import ( - "encoding/json" - "fmt" - "io" - "os" - "path" - "strings" - - "github.com/minio/minio-go/v7/pkg/set" - parquet "github.com/minio/minio/pkg/s3select/internal/parquet-go" -) - -func getReader(name string, offset int64, length int64) (io.ReadCloser, error) { - file, err := os.Open(name) - if err != nil { - return nil, err - } - - fi, err := file.Stat() - if err != nil { - return nil, err - } - - if offset < 0 { - offset = fi.Size() + offset - } - - if _, err = file.Seek(offset, io.SeekStart); err != nil { - return nil, err - } - - return file, nil -} - -func printUsage() { - progName := path.Base(os.Args[0]) - fmt.Printf("Usage: %v PARQUET-FILE [COLUMN...]\n", progName) - fmt.Println() - fmt.Printf("Examples:\n") - fmt.Printf("# Convert all columns to JSON\n") - fmt.Printf("$ %v example.parquet\n", progName) - fmt.Println() - fmt.Printf("# Convert specific columns to JSON\n") - fmt.Printf("$ %v example.par firstname dob\n", progName) - fmt.Println() -} - -func main() { - if len(os.Args) < 2 { - printUsage() - os.Exit(-1) - } - - name := os.Args[1] - ext := path.Ext(name) - jsonFilename := name + ".json" - if ext == ".parquet" || ext == ".par" { - jsonFilename = strings.TrimSuffix(name, ext) + ".json" - } - - columns := set.CreateStringSet(os.Args[2:]...) - if len(columns) == 0 { - columns = nil - } - - file, err := parquet.NewReader( - func(offset, length int64) (io.ReadCloser, error) { - return getReader(name, offset, length) - }, - columns, - ) - if err != nil { - fmt.Printf("%v: %v\n", name, err) - os.Exit(1) - } - - defer file.Close() - - jsonFile, err := os.OpenFile(jsonFilename, os.O_RDWR|os.O_CREATE, 0755) - if err != nil { - fmt.Printf("%v: %v\n", jsonFilename, err) - os.Exit(1) - } - - defer jsonFile.Close() - - for { - record, err := file.Read() - if err != nil { - if err != io.EOF { - fmt.Printf("%v: %v\n", name, err) - os.Exit(1) - } - - break - } - - data, err := json.Marshal(record) - if err != nil { - fmt.Printf("%v: %v\n", name, err) - os.Exit(1) - } - data = append(data, byte('\n')) - - if _, err = jsonFile.Write(data); err != nil { - fmt.Printf("%v: %v\n", jsonFilename, err) - os.Exit(1) - } - } -} diff --git a/pkg/s3select/internal/parquet-go/writer.go b/pkg/s3select/internal/parquet-go/writer.go deleted file mode 100644 index b0b0a0c8e..000000000 --- a/pkg/s3select/internal/parquet-go/writer.go +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "context" - "encoding/binary" - "fmt" - "io" - - "git.apache.org/thrift.git/lib/go/thrift" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/data" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" -) - -const ( - defaultPageSize = 8 * 1024 // 8 KiB - defaultRowGroupSize = 128 * 1024 * 1024 // 128 MiB -) - -// Writer - represents parquet writer. -type Writer struct { - PageSize int64 - RowGroupSize int64 - CompressionType parquet.CompressionCodec - - writeCloser io.WriteCloser - numRows int64 - offset int64 - footer *parquet.FileMetaData - schemaTree *schema.Tree - valueElements []*schema.Element - columnDataMap map[string]*data.Column - rowGroupCount int -} - -func (writer *Writer) writeData() (err error) { - if writer.numRows == 0 { - return nil - } - - var chunks []*data.ColumnChunk - for _, element := range writer.valueElements { - name := element.PathInTree - columnData, found := writer.columnDataMap[name] - if !found { - continue - } - - columnChunk := columnData.Encode(element) - chunks = append(chunks, columnChunk) - } - - rowGroup := data.NewRowGroup(chunks, writer.numRows, writer.offset) - - for _, chunk := range chunks { - if _, err = writer.writeCloser.Write(chunk.Data()); err != nil { - return err - } - - writer.offset += chunk.DataLen() - } - - writer.footer.RowGroups = append(writer.footer.RowGroups, rowGroup) - writer.footer.NumRows += writer.numRows - - writer.numRows = 0 - writer.columnDataMap = nil - return nil -} - -// WriteJSON - writes a record represented in JSON. -func (writer *Writer) WriteJSON(recordData []byte) (err error) { - columnDataMap, err := data.UnmarshalJSON(recordData, writer.schemaTree) - if err != nil { - return err - } - - return writer.Write(columnDataMap) -} - -// Write - writes a record represented in map. -func (writer *Writer) Write(record map[string]*data.Column) (err error) { - if writer.columnDataMap == nil { - writer.columnDataMap = record - } else { - for name, columnData := range record { - var found bool - var element *schema.Element - for _, element = range writer.valueElements { - if element.PathInTree == name { - found = true - break - } - } - - if !found { - return fmt.Errorf("%v is not value column", name) - } - - writer.columnDataMap[name].Merge(columnData) - } - } - - writer.numRows++ - if writer.numRows == int64(writer.rowGroupCount) { - return writer.writeData() - } - - return nil -} - -func (writer *Writer) finalize() (err error) { - if err = writer.writeData(); err != nil { - return err - } - - ts := thrift.NewTSerializer() - ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) - footerBuf, err := ts.Write(context.TODO(), writer.footer) - if err != nil { - return err - } - - if _, err = writer.writeCloser.Write(footerBuf); err != nil { - return err - } - - footerSizeBuf := make([]byte, 4) - binary.LittleEndian.PutUint32(footerSizeBuf, uint32(len(footerBuf))) - - if _, err = writer.writeCloser.Write(footerSizeBuf); err != nil { - return err - } - - _, err = writer.writeCloser.Write([]byte("PAR1")) - return err -} - -// Close - finalizes and closes writer. If any pending records are available, they are written here. -func (writer *Writer) Close() (err error) { - if err = writer.finalize(); err != nil { - return err - } - - return writer.writeCloser.Close() -} - -// NewWriter - creates new parquet writer. Binary data of rowGroupCount records are written to writeCloser. -func NewWriter(writeCloser io.WriteCloser, schemaTree *schema.Tree, rowGroupCount int) (*Writer, error) { - if _, err := writeCloser.Write([]byte("PAR1")); err != nil { - return nil, err - } - - schemaList, valueElements, err := schemaTree.ToParquetSchema() - if err != nil { - return nil, err - } - - footer := parquet.NewFileMetaData() - footer.Version = 1 - footer.Schema = schemaList - - return &Writer{ - PageSize: defaultPageSize, - RowGroupSize: defaultRowGroupSize, - CompressionType: parquet.CompressionCodec_SNAPPY, - - writeCloser: writeCloser, - offset: 4, - footer: footer, - schemaTree: schemaTree, - valueElements: valueElements, - rowGroupCount: rowGroupCount, - }, nil -} diff --git a/pkg/s3select/internal/parquet-go/writer_test.go b/pkg/s3select/internal/parquet-go/writer_test.go deleted file mode 100644 index 557f7240b..000000000 --- a/pkg/s3select/internal/parquet-go/writer_test.go +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright (c) 2015-2021 MinIO, Inc. -// -// This file is part of MinIO Object Storage stack -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package parquet - -import ( - "os" - "testing" - - "github.com/minio/minio/pkg/s3select/internal/parquet-go/data" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" - "github.com/minio/minio/pkg/s3select/internal/parquet-go/schema" -) - -func TestWriterWrite(t *testing.T) { - schemaTree := schema.NewTree() - { - one, err := schema.NewElement("one", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_16), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - two, err := schema.NewElement("two", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - three, err := schema.NewElement("three", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_BOOLEAN), nil, nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := schemaTree.Set("one", one); err != nil { - t.Fatal(err) - } - if err := schemaTree.Set("two", two); err != nil { - t.Fatal(err) - } - if err := schemaTree.Set("three", three); err != nil { - t.Fatal(err) - } - } - - file, err := os.Create("test.parquet") - if err != nil { - t.Fatal(err) - } - - writer, err := NewWriter(file, schemaTree, 100) - if err != nil { - t.Fatal(err) - } - - oneColumn := data.NewColumn(parquet.Type_INT32) - oneColumn.AddInt32(100, 0, 0) - - twoColumn := data.NewColumn(parquet.Type_BYTE_ARRAY) - twoColumn.AddByteArray([]byte("foo"), 0, 0) - - threeColumn := data.NewColumn(parquet.Type_BOOLEAN) - threeColumn.AddBoolean(true, 0, 0) - - record := map[string]*data.Column{ - "one": oneColumn, - "two": twoColumn, - "three": threeColumn, - } - - err = writer.Write(record) - if err != nil { - t.Fatal(err) - } - - err = writer.Close() - if err != nil { - t.Fatal(err) - } -} - -func TestWriterWriteJSON(t *testing.T) { - schemaTree := schema.NewTree() - { - one, err := schema.NewElement("one", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_16), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - two, err := schema.NewElement("two", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), - nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - three, err := schema.NewElement("three", parquet.FieldRepetitionType_REQUIRED, - parquet.TypePtr(parquet.Type_BOOLEAN), nil, nil, nil, nil) - if err != nil { - t.Fatal(err) - } - - if err := schemaTree.Set("one", one); err != nil { - t.Fatal(err) - } - if err := schemaTree.Set("two", two); err != nil { - t.Fatal(err) - } - if err := schemaTree.Set("three", three); err != nil { - t.Fatal(err) - } - } - - file, err := os.Create("test.parquet") - if err != nil { - t.Fatal(err) - } - - writer, err := NewWriter(file, schemaTree, 100) - if err != nil { - t.Fatal(err) - } - - record := `{"one": 100, "two": "foo", "three": true}` - err = writer.WriteJSON([]byte(record)) - if err != nil { - t.Fatal(err) - } - - err = writer.Close() - if err != nil { - t.Fatal(err) - } -} diff --git a/pkg/s3select/parquet/reader.go b/pkg/s3select/parquet/reader.go index 5d2121e83..832c39740 100644 --- a/pkg/s3select/parquet/reader.go +++ b/pkg/s3select/parquet/reader.go @@ -23,10 +23,10 @@ import ( "time" "github.com/bcicen/jstream" - parquetgo "github.com/minio/minio/pkg/s3select/internal/parquet-go" - parquetgen "github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" jsonfmt "github.com/minio/minio/pkg/s3select/json" "github.com/minio/minio/pkg/s3select/sql" + parquetgo "github.com/minio/parquet-go" + parquetgen "github.com/minio/parquet-go/gen-go/parquet" ) // Reader - Parquet record reader for S3Select.