| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | //  CPUConvolution.cpp
 | 
					
						
							|  |  |  | //  MNN
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //  Created by MNN on 2018/07/15.
 | 
					
						
							|  |  |  | //  Copyright © 2018, Alibaba Group Holding Limited
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include "backend/cpu/CPUConvolution.hpp"
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | #include <math.h>
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include "backend/cpu/compute/CommonOptFunction.h"
 | 
					
						
							|  |  |  | #include "core/Macro.h"
 | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  | #include <limits>
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include "backend/cpu/compute/ConvolutionFloatFactory.h"
 | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | //#define MNN_OPEN_TIME_TRACE
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include <MNN/AutoTime.hpp>
 | 
					
						
							| 
									
										
										
										
											2020-03-12 20:29:43 +08:00
										 |  |  | #include "core/ConvolutionCommon.hpp"
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | namespace MNN { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CPUConvolution::CPUConvolution(const Convolution2DCommon *convOp, Backend *b) : MNN::Execution(b), mCommon(convOp) { | 
					
						
							|  |  |  |     mPostFunction = getPostFunction(); | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  | std::vector<float> CPUConvolution::getPostParameters() const { | 
					
						
							|  |  |  |     std::vector<float> postParameters = { | 
					
						
							|  |  |  |         1.0f, | 
					
						
							|  |  |  |         1.0f, | 
					
						
							|  |  |  |         -std::numeric_limits<float>().max(), | 
					
						
							|  |  |  |         std::numeric_limits<float>().max(), | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  |     if (mCommon->relu()) { | 
					
						
							|  |  |  |         postParameters[2] = 0.0f; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (mCommon->relu6()) { | 
					
						
							|  |  |  |         postParameters[2] = 0.0f; | 
					
						
							|  |  |  |         postParameters[3] = 6.0f; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return postParameters; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | int CPUConvolution::reorderWeightSize(int depth, int outputCount, int kernelSize, int unitDepth, int unitOC) { | 
					
						
							|  |  |  |     return UP_DIV(outputCount, unitOC) * UP_DIV(depth, unitDepth) * kernelSize * unitDepth * unitOC; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | template<typename T> | 
					
						
							|  |  |  | void CPUConvolution::reorderWeightSlow(T* dest, const T* source, size_t depth, size_t outputCount, size_t kernelSize, | 
					
						
							|  |  |  |                                        size_t unitDepth, size_t unitOC, bool transpose) { | 
					
						
							|  |  |  |     memset(dest, 0, reorderWeightSize(depth, outputCount, kernelSize, unitDepth, unitOC) * sizeof(T)); | 
					
						
							|  |  |  |     for (int dz = 0; dz < outputCount; ++dz) { | 
					
						
							|  |  |  |         auto dz_unit = dz / unitOC; | 
					
						
							|  |  |  |         auto mx      = dz % unitOC; | 
					
						
							|  |  |  |         auto dst_dz = dest + dz_unit * UP_DIV(depth, unitDepth) * kernelSize * unitDepth * unitOC; | 
					
						
							|  |  |  |         for (int sz = 0; sz < depth; ++sz) { | 
					
						
							|  |  |  |             auto sz_unit = sz / unitDepth; | 
					
						
							|  |  |  |             auto my      = sz % unitDepth; | 
					
						
							|  |  |  |             auto dst_sz = dst_dz + sz_unit * kernelSize * unitDepth * unitOC; | 
					
						
							|  |  |  |             auto src    = source + kernelSize * (sz + dz * depth); | 
					
						
							|  |  |  |             for (int ki = 0; ki < kernelSize; ++ki) { | 
					
						
							|  |  |  |                 auto dst_i         = dst_sz + ki * unitDepth * unitOC; | 
					
						
							|  |  |  |                 if (transpose) { | 
					
						
							|  |  |  |                     dst_i[unitDepth * mx + my] = src[ki]; | 
					
						
							|  |  |  |                 } else { | 
					
						
							|  |  |  |                     dst_i[unitOC * my + mx] = src[ki]; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | template void CPUConvolution::reorderWeightSlow<int8_t>(int8_t*, const int8_t*, size_t, size_t, size_t, size_t, size_t, bool); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template<typename T, typename U> // T -> U
 | 
					
						
							|  |  |  | bool CPUConvolution::acquireMemoryAndCopy(std::shared_ptr<Tensor> dest, const T* source, size_t count, Backend* backend) { | 
					
						
							|  |  |  |     bool allocRes = ((CPUBackend*)backend)->onAcquireBuffer(dest.get(), Backend::STATIC); | 
					
						
							|  |  |  |     if (!allocRes) { | 
					
						
							|  |  |  |         return false; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     auto dataPtr = dest->host<U>(); | 
					
						
							|  |  |  |     memset(dataPtr, 0, dest->size()); | 
					
						
							|  |  |  |     for (int i = 0; i < count; ++i) { | 
					
						
							|  |  |  |         dataPtr[i] = source[i]; // type cast T -> U elementwise
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return true; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template bool CPUConvolution::acquireMemoryAndCopy<int32_t, float>(std::shared_ptr<Tensor>, const int32_t*, size_t, Backend*); | 
					
						
							|  |  |  | template bool CPUConvolution::acquireMemoryAndCopy<float, float>(std::shared_ptr<Tensor>, const float*, size_t, Backend*); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | ErrorCode CPUConvolution::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) { | 
					
						
							|  |  |  |     auto input  = inputs[0]; | 
					
						
							|  |  |  |     auto output = outputs[0]; | 
					
						
							| 
									
										
										
										
											2020-03-12 20:29:43 +08:00
										 |  |  |     auto pad = ConvolutionCommon::convolutionPad(input, output, mCommon); | 
					
						
							|  |  |  |     mPadY = pad.second; | 
					
						
							|  |  |  |     mPadX = pad.first; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     return NO_ERROR; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CPUConvolution::POSTFUNCTION CPUConvolution::getPostFunction() const { | 
					
						
							|  |  |  |     if (mCommon->relu()) { | 
					
						
							|  |  |  |         return MNNAddBiasRelu; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (mCommon->relu6()) { | 
					
						
							|  |  |  |         return MNNAddBiasRelu6; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return MNNAddBias; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class ConvolutionFactory : public CPUBackend::Creator { | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |     virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs, | 
					
						
							|  |  |  |                                 const MNN::Op *op, Backend *backend) const override { | 
					
						
							|  |  |  |         return ConvolutionFloatFactory::create(inputs, outputs, op, backend); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | REGISTER_CPU_OP_CREATOR(ConvolutionFactory, OpType_Convolution); | 
					
						
							|  |  |  | } // namespace MNN
 |