| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | //  CPUDeconvolution.hpp
 | 
					
						
							|  |  |  | //  MNN
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //  Created by MNN on 2018/07/20.
 | 
					
						
							|  |  |  | //  Copyright © 2018, Alibaba Group Holding Limited
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifndef CPUDeconvolution_hpp
 | 
					
						
							|  |  |  | #define CPUDeconvolution_hpp
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  | #include "CPUConvolution.hpp"
 | 
					
						
							| 
									
										
										
										
											2024-04-19 11:58:21 +08:00
										 |  |  | #include "compute/CommonOptFunction.h"
 | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  | #include "compute/StrassenMatmulComputor.hpp"
 | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  | #include "compute/GemmInt8Executor.hpp"
 | 
					
						
							|  |  |  | #include "core/TensorUtils.hpp"
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | namespace MNN { | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | class CPUDeconvolutionBasic : public CPUConvolution { | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | public: | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     CPUDeconvolutionBasic(const Tensor *input, const Op *convOp, Backend *b); | 
					
						
							|  |  |  |     virtual ~CPUDeconvolutionBasic() = default; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | protected: | 
					
						
							|  |  |  |     int mSrcCount; | 
					
						
							| 
									
										
										
										
											2021-04-08 15:34:23 +08:00
										 |  |  |     std::vector<float> mPostParameters; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | class CPUDeconvolutionCommon : public CPUDeconvolutionBasic { | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | public: | 
					
						
							| 
									
										
										
										
											2023-12-04 11:12:20 +08:00
										 |  |  |     CPUDeconvolutionCommon(const Tensor *input, const Op *convOp, Backend *b, bool dynamicWeight); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     virtual ~CPUDeconvolutionCommon(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | protected: | 
					
						
							|  |  |  |     std::shared_ptr<Tensor> mBias; | 
					
						
							| 
									
										
										
										
											2023-12-04 11:12:20 +08:00
										 |  |  |     bool mDynamicWeight; | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class CPUDeconvolutionOrigin : public CPUDeconvolutionBasic { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |     CPUDeconvolutionOrigin(const Tensor *input, Tensor *weight, const Op *convOp, Backend *b, bool ModeInt8) | 
					
						
							|  |  |  |         : CPUDeconvolutionBasic(input, convOp, b){ | 
					
						
							|  |  |  |         if (ModeInt8) { | 
					
						
							|  |  |  |             const auto weightDataPtr = weight->host<int8_t>(); | 
					
						
							|  |  |  |             auto conv2d = convOp->main_as_Convolution2D(); | 
					
						
							|  |  |  |             auto common = conv2d->common(); | 
					
						
							| 
									
										
										
										
											2024-04-19 11:58:21 +08:00
										 |  |  |             auto pack = static_cast<CPUBackend*>(b)->functions()->pack; | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |             mResource = CPUConvolution::makeResourceInt8(backend(), convOp, pack); | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |             CPUConvolution::MutableResourceInt8 mutableResource(mResource, b); | 
					
						
							|  |  |  |             auto core = static_cast<CPUBackend*>(b)->int8Functions(); | 
					
						
							|  |  |  |             auto gemmKernel = core->Int8GemmKernel; | 
					
						
							|  |  |  |             int UNIT, SRC_UNIT, DST_XUNIT; | 
					
						
							|  |  |  |             core->MNNGetGemmUnit(&UNIT, &SRC_UNIT, &DST_XUNIT); | 
					
						
							|  |  |  |             const auto kEleCnt = mCommon->kernelX() * mCommon->kernelY(); | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |             const int ocDiv4 = UP_DIV(common->outputCount(), pack) * kEleCnt;  | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |             const int icDiv4 = UP_DIV(common->inputCount(), SRC_UNIT); | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |             const int ocDivUnit = UP_DIV(common->outputCount(), UNIT);  | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |             const int oc4 = ocDiv4 / kEleCnt; | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |             const int bias_elesize = ocDiv4 * pack; | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |             // set offset if use SSE.
 | 
					
						
							|  |  |  |             auto inputQuant = TensorUtils::getQuantInfo(input); | 
					
						
							|  |  |  |             auto inputZeroPoint = inputQuant[1]; | 
					
						
							|  |  |  |             std::vector<int32_t> _bias(bias_elesize, inputZeroPoint); | 
					
						
							|  |  |  | #ifdef MNN_USE_SSE
 | 
					
						
							|  |  |  |             int actBits = conv2d->symmetricQuan()->nbits(); | 
					
						
							|  |  |  |             if (actBits <= 7) { | 
					
						
							|  |  |  |                 gemmKernel = core->Int8GemmKernelFast; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             for (int a = 0; a < kEleCnt; ++a){ | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |                 for (int oz = 0; oz < ocDivUnit * UNIT; ++oz) { | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |                 int offset = inputZeroPoint, oz4 = oz / UNIT, ozRemain = oz % UNIT; | 
					
						
							|  |  |  |                 for (int sz = 0; sz < icDiv4 * SRC_UNIT; ++sz) { | 
					
						
							|  |  |  |                     int sz4 = sz / SRC_UNIT, szRemain = sz % SRC_UNIT; | 
					
						
							|  |  |  |                     int index = (((a * oc4 + oz4) * icDiv4 + sz4) * UNIT + ozRemain) * SRC_UNIT + szRemain; | 
					
						
							|  |  |  |                     auto weightInt8Data = weightDataPtr[index]; | 
					
						
							|  |  |  |                     offset += weightInt8Data * (-128); | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |                 if (oz < oc4 * pack) { | 
					
						
							|  |  |  |                     _bias[a * oc4 * pack + oz] = offset; | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #else
 | 
					
						
							| 
									
										
										
										
											2024-07-22 19:51:53 +08:00
										 |  |  |             if(conv2d->symmetricQuan() && conv2d->symmetricQuan()->method() == QuantizeAlgo_OVERFLOW_AWARE){ | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |                 gemmKernel = core->Int8GemmKernelFast; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |             mDeconvInt8Exe.reset(new GemmInt8Executor(b, mResource, convOp, gemmKernel, _bias)); | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     } | 
					
						
							|  |  |  |     virtual ~CPUDeconvolutionOrigin() = default; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override; | 
					
						
							|  |  |  |     virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | private: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |     std::shared_ptr<StrassenMatrixComputor> mMatMul; | 
					
						
							| 
									
										
										
										
											2023-04-18 18:54:46 +08:00
										 |  |  |     std::shared_ptr<GemmInt8Executor> mDeconvInt8Exe; | 
					
						
							|  |  |  |     std::vector<std::pair<std::function<void(uint8_t*, int)>, int>> mPostFunctions; | 
					
						
							|  |  |  |     std::shared_ptr<Tensor> mTempOutput; | 
					
						
							|  |  |  |     std::shared_ptr<CPUConvolution::ResourceInt8> mResource; | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class CPUDeconvolution : public CPUDeconvolutionCommon { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2023-12-04 11:12:20 +08:00
										 |  |  |     CPUDeconvolution(const Tensor *input, const Op *convOp, Backend *b, bool dynamicWeight); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     virtual ~CPUDeconvolution(); | 
					
						
							| 
									
										
										
										
											2023-12-04 11:12:20 +08:00
										 |  |  |     virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override; | 
					
						
							|  |  |  |     virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override; | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-04 11:12:20 +08:00
										 |  |  |     struct Param { | 
					
						
							|  |  |  |         int outputCount; | 
					
						
							|  |  |  |         int srcCount; | 
					
						
							|  |  |  |         int fh; | 
					
						
							|  |  |  |         int fw; | 
					
						
							|  |  |  |     }; | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | private: | 
					
						
							| 
									
										
										
										
											2023-12-04 11:12:20 +08:00
										 |  |  |     Param mParam; | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     std::shared_ptr<Tensor> mWeight; | 
					
						
							| 
									
										
										
										
											2023-12-04 11:12:20 +08:00
										 |  |  |     std::shared_ptr<Tensor> mWeightTransformCache; | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     std::vector<Tensor *> mTempInputs; | 
					
						
							|  |  |  |     std::shared_ptr<CPUDeconvolutionOrigin> mOrigin; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | }; | 
					
						
							|  |  |  | } // namespace MNN
 | 
					
						
							|  |  |  | #endif /* CPUDeconvolution_hpp */
 |