| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | //  CPUPool.cpp
 | 
					
						
							|  |  |  | //  MNN
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //  Created by MNN on 2018/07/15.
 | 
					
						
							|  |  |  | //  Copyright © 2018, Alibaba Group Holding Limited
 | 
					
						
							|  |  |  | //
 | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  | #include "backend/cpu/CPUBackend.hpp"
 | 
					
						
							|  |  |  | #include "core/Concurrency.h"
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include "backend/cpu/CPUPool.hpp"
 | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  | #include "compute/CommonOptFunction.h"
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | #include "math/Vec.hpp"
 | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | using Vec4 = MNN::Math::Vec<float, 4>; | 
					
						
							| 
									
										
										
										
											2021-04-08 15:34:23 +08:00
										 |  |  | using Vec16 = MNN::Math::Vec<int8_t, 16>; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | namespace MNN { | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  | class CPUPool : public Execution { | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |     CPUPool(Backend *b, const Pool *parameter, void* func, int bytes) : MNN::Execution(b), mParameter(parameter) { | 
					
						
							|  |  |  |         mCompute = (decltype(mCompute))func; | 
					
						
							|  |  |  |         mBytes = bytes; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     virtual ~CPUPool() = default; | 
					
						
							|  |  |  |     virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override { | 
					
						
							|  |  |  |         auto layer       = mParameter; | 
					
						
							|  |  |  |         int strideWidth  = layer->strideX(); | 
					
						
							|  |  |  |         int strideHeight = layer->strideY(); | 
					
						
							|  |  |  |         int padWidth     = layer->padX(); | 
					
						
							|  |  |  |         int padHeight    = layer->padY(); | 
					
						
							|  |  |  |         auto core   = static_cast<CPUBackend*>(backend())->functions(); | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         // edit const if global
 | 
					
						
							|  |  |  |         auto input       = inputs[0]; | 
					
						
							|  |  |  |         auto output      = outputs[0]; | 
					
						
							|  |  |  |         int kernelWidth  = layer->kernelX(); | 
					
						
							|  |  |  |         int kernelHeight = layer->kernelY(); | 
					
						
							|  |  |  |         if (layer->isGlobal()) { | 
					
						
							|  |  |  |             kernelWidth  = input->width(); | 
					
						
							|  |  |  |             kernelHeight = input->height(); | 
					
						
							|  |  |  |             strideWidth  = input->width(); | 
					
						
							|  |  |  |             strideHeight = input->height(); | 
					
						
							|  |  |  |             padWidth     = 0; | 
					
						
							|  |  |  |             padHeight    = 0; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         if (layer->padType() == PoolPadType_SAME) { | 
					
						
							|  |  |  |             int padNeededWidth  = (output->width() - 1) * strideWidth + kernelWidth - input->width(); | 
					
						
							|  |  |  |             int padNeededHeight = (output->height() - 1) * strideHeight + kernelHeight - input->height(); | 
					
						
							|  |  |  |             padWidth            = padNeededWidth > 0 ? padNeededWidth / 2 : 0; | 
					
						
							|  |  |  |             padHeight           = padNeededHeight > 0 ? padNeededHeight / 2 : 0; | 
					
						
							|  |  |  |         } else if (layer->padType() == PoolPadType_VALID) { | 
					
						
							|  |  |  |             padWidth = padHeight = 0; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         auto totalDepth        = input->batch() * UP_DIV(input->channel(), core->pack); | 
					
						
							|  |  |  |         auto inputData         = input->host<uint8_t>(); | 
					
						
							|  |  |  |         auto outputData        = output->host<uint8_t>(); | 
					
						
							|  |  |  |         auto inputPlaneStride  = core->pack * input->width() * input->height(); | 
					
						
							|  |  |  |         auto outputPlaneStride = core->pack * output->width() * output->height(); | 
					
						
							|  |  |  |         int threadNumber       = ((CPUBackend *)backend())->threadNumber(); | 
					
						
							|  |  |  |         auto padType           = layer->padType(); | 
					
						
							|  |  |  |         auto countType         = layer->countType(); | 
					
						
							|  |  |  |         if (layer->pads() != nullptr && padType == PoolPadType_CAFFE) { | 
					
						
							|  |  |  |             padType = PoolPadType_VALID; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         mFunction = std::make_pair(threadNumber, [=](int tId) { | 
					
						
							|  |  |  |             for (int channel = (int)tId; channel < totalDepth; channel += threadNumber) { | 
					
						
							|  |  |  |                 // run
 | 
					
						
							|  |  |  |                 mCompute(inputData + channel * inputPlaneStride * mBytes, input->width(), input->height(), | 
					
						
							|  |  |  |                               outputData + outputPlaneStride * channel * mBytes, output->width(), output->height(), kernelWidth, | 
					
						
							|  |  |  |                               kernelHeight, strideWidth, strideHeight, padWidth, padHeight, padType, countType); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         }); | 
					
						
							|  |  |  |         return NO_ERROR; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override { | 
					
						
							|  |  |  |         MNN_CONCURRENCY_BEGIN(tId, mFunction.first) { | 
					
						
							|  |  |  |             mFunction.second((int)tId); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         MNN_CONCURRENCY_END(); | 
					
						
							|  |  |  |         return NO_ERROR; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  | private: | 
					
						
							|  |  |  |     const Pool *mParameter; | 
					
						
							|  |  |  |     void(*mCompute)(const void* channelInput, int inputWidth, int inputHeight, void *channelOutput, | 
					
						
							|  |  |  |                            int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth, | 
					
						
							|  |  |  |                            int strideHeight, int padWidth, int padHeight, int padType, int countType); | 
					
						
							|  |  |  |     std::pair<int, std::function<void(int)> > mFunction; | 
					
						
							|  |  |  |     int mBytes; | 
					
						
							|  |  |  | }; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | class CPUPoolCreator : public CPUBackend::Creator { | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |     virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs, | 
					
						
							|  |  |  |                                 const MNN::Op *op, Backend *backend) const override { | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  |         void* func = nullptr; | 
					
						
							| 
									
										
										
										
											2021-04-08 15:34:23 +08:00
										 |  |  |         if (inputs[0]->getType() == halide_type_of<int8_t>()) { | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  |             if (op->main_as_Pool()->type() == PoolType_AVEPOOL) { | 
					
						
							|  |  |  |                 func = (void*)(poolingAvg<int8_t, Vec16, 4>); | 
					
						
							|  |  |  |             } else { | 
					
						
							|  |  |  |                 func = (void*)(poolingMax<int8_t, Vec16, 4, -128>); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             return new CPUPool(backend, op->main_as_Pool(), func, 1); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         auto core = static_cast<CPUBackend*>(backend)->functions(); | 
					
						
							|  |  |  |         if (op->main_as_Pool()->type() == PoolType_AVEPOOL) { | 
					
						
							|  |  |  |             func = (void*)(core->MNNPoolingAvg); | 
					
						
							|  |  |  |         } else { | 
					
						
							|  |  |  |             func = (void*)(core->MNNPoolingMax); | 
					
						
							| 
									
										
										
										
											2021-04-08 15:34:23 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  |         return new CPUPool(backend, op->main_as_Pool(), func, core->bytes); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | REGISTER_CPU_OP_CREATOR(CPUPoolCreator, OpType_Pooling); | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | } // namespace MNN
 |