| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | //  CPUDeconvolution.cpp
 | 
					
						
							|  |  |  | //  MNN
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //  Created by MNN on 2018/07/20.
 | 
					
						
							|  |  |  | //  Copyright © 2018, Alibaba Group Holding Limited
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  | #include "CPUDeconvolution.hpp"
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include "core/BufferAllocator.hpp"
 | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  | #include "CPUBackend.hpp"
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include "core/Concurrency.h"
 | 
					
						
							|  |  |  | #include "core/Macro.h"
 | 
					
						
							|  |  |  | #include "math/Matrix.hpp"
 | 
					
						
							|  |  |  | #include "core/TensorUtils.hpp"
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | #include "math/Vec.hpp"
 | 
					
						
							| 
									
										
										
										
											2020-03-12 20:29:43 +08:00
										 |  |  | #include "core/ConvolutionCommon.hpp"
 | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  | #include "compute/CommonOptFunction.h"
 | 
					
						
							|  |  |  | #include "compute/ConvOpt.h"
 | 
					
						
							|  |  |  | #include "compute/DeconvolutionWithStride.hpp"
 | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | //#define MNN_OPEN_TIME_TRACE
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include <MNN/AutoTime.hpp>
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | using Vec4 = MNN::Math::Vec<float, 4>; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | namespace MNN { | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | CPUDeconvolutionBasic::CPUDeconvolutionBasic(const Tensor* input, const Op* convOp, Backend* b) | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     : CPUConvolution(convOp->main_as_Convolution2D()->common(), b) { | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     mSrcCount = input->channel(); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | ErrorCode CPUDeconvolutionBasic::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) { | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     auto input  = inputs[0]; | 
					
						
							|  |  |  |     auto output = outputs[0]; | 
					
						
							| 
									
										
										
										
											2020-03-12 20:29:43 +08:00
										 |  |  |     auto pad = ConvolutionCommon::convolutionTransposePad(input, output, mCommon); | 
					
						
							|  |  |  |     mPadY = pad.second; | 
					
						
							|  |  |  |     mPadX = pad.first; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     return NO_ERROR; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | CPUDeconvolutionCommon::CPUDeconvolutionCommon(const Tensor* input, const Op* convOp, Backend* b) | 
					
						
							|  |  |  |     : CPUDeconvolutionBasic(input, convOp, b) { | 
					
						
							|  |  |  |     auto conv2D     = convOp->main_as_Convolution2D(); | 
					
						
							|  |  |  |     int outputCount = mCommon->outputCount(); | 
					
						
							|  |  |  |     mBias.reset(Tensor::createDevice<float>(std::vector<int>{ALIGN_UP4(outputCount)})); | 
					
						
							|  |  |  |     bool success = b->onAcquireBuffer(mBias.get(), Backend::STATIC); | 
					
						
							|  |  |  |     if (!success) { | 
					
						
							|  |  |  |         mValid = false; | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     ::memset(mBias->host<float>(), 0, mBias->size()); | 
					
						
							|  |  |  |     ::memcpy(mBias->host<float>(), conv2D->bias()->data(), conv2D->bias()->size() * sizeof(float)); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CPUDeconvolutionCommon::~CPUDeconvolutionCommon() { | 
					
						
							|  |  |  |     backend()->onReleaseBuffer(mBias.get(), Backend::STATIC); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void _transformWeight(const float* tempWeight, float* dest, int outputCount, int srcCount, int fh, int fw, | 
					
						
							|  |  |  |                              float* cache) { | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |     auto outputC4 = UP_DIV(outputCount, 4); | 
					
						
							|  |  |  |     // c, n, h, w-> c, n/4 * 4, h, w
 | 
					
						
							|  |  |  |     for (int c=0; c<srcCount; ++c) { | 
					
						
							|  |  |  |         auto dst = cache + c * outputC4 * fw * fh * 4; | 
					
						
							|  |  |  |         auto src = tempWeight + c * outputCount * fw * fh; | 
					
						
							|  |  |  |         MNNPackC4(dst, src, fw*fh, outputCount); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |     //printf("%d - %d - %d - %d\n", outputCount, srcCount, fh, fw);
 | 
					
						
							|  |  |  |     MNNPackForMatMul_B(dest, cache, outputC4 * fw * fh * 4, srcCount, false); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CPUDeconvolution::CPUDeconvolution(const Tensor* input, const Op* convOp, Backend* backend) | 
					
						
							|  |  |  |     : MNN::CPUDeconvolutionCommon(input, convOp, backend) { | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     auto layer              = convOp->main_as_Convolution2D()->common(); | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     const float* tempWeight = nullptr; | 
					
						
							|  |  |  |     int tempWeightSize   = 0; | 
					
						
							|  |  |  |     std::shared_ptr<ConvolutionCommon::Int8Common> quanCommon; | 
					
						
							|  |  |  |     ConvolutionCommon::getConvParameters(&quanCommon, convOp->main_as_Convolution2D(), &tempWeight, &tempWeightSize); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     int fw                  = layer->kernelX(); | 
					
						
							|  |  |  |     int fh                  = layer->kernelY(); | 
					
						
							|  |  |  |     int srcCount            = mSrcCount; | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |     int eP, lP, hP; | 
					
						
							|  |  |  |     MNNGetMatMulPackMode(&eP, &lP, &hP); | 
					
						
							|  |  |  |     auto outputAlign = ALIGN_UP4(layer->outputCount()) * fw * fh; | 
					
						
							|  |  |  |     mWeight.reset(Tensor::createDevice<float>(std::vector<int>{UP_DIV(outputAlign, hP), srcCount, hP})); | 
					
						
							|  |  |  |     std::shared_ptr<Tensor> cache(Tensor::createDevice<float>({outputAlign * srcCount})); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     bool success = backend->onAcquireBuffer(mWeight.get(), Backend::STATIC) && | 
					
						
							|  |  |  |                    backend->onAcquireBuffer(cache.get(), Backend::STATIC); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     if (!success) { | 
					
						
							|  |  |  |         mValid = false; | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     float* dest = mWeight->host<float>(); | 
					
						
							|  |  |  |     MNN_ASSERT(nullptr != dest); | 
					
						
							|  |  |  |     int outputCount = layer->outputCount(); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     _transformWeight(tempWeight, dest, outputCount, srcCount, fh, fw, cache->host<float>()); | 
					
						
							|  |  |  |     backend->onReleaseBuffer(cache.get(), Backend::STATIC); | 
					
						
							|  |  |  |     mOrigin.reset(new CPUDeconvolutionOrigin(input, convOp, backend)); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CPUDeconvolution::~CPUDeconvolution() { | 
					
						
							|  |  |  |     backend()->onReleaseBuffer(mWeight.get(), Backend::STATIC); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | ErrorCode CPUDeconvolutionOrigin::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) { | 
					
						
							|  |  |  |     CPUDeconvolutionBasic::onResize(inputs, outputs); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     auto input  = inputs[0]; | 
					
						
							|  |  |  |     auto output = outputs[0]; | 
					
						
							|  |  |  |     auto oc     = output->channel(); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     if (ALIGN_UP4(oc) != inputs[2]->length(0)) { | 
					
						
							|  |  |  |         return INPUT_DATA_ERROR; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     auto ocC4       = UP_DIV(output->channel(), 4); | 
					
						
							|  |  |  |     auto icC4       = UP_DIV(input->channel(), 4); | 
					
						
							|  |  |  |     auto kw         = mCommon->kernelX(); | 
					
						
							|  |  |  |     auto kh         = mCommon->kernelY(); | 
					
						
							|  |  |  |     auto dilateX    = mCommon->dilateX(); | 
					
						
							|  |  |  |     auto dilateY    = mCommon->dilateY(); | 
					
						
							|  |  |  |     auto strideX    = mCommon->strideX(); | 
					
						
							|  |  |  |     auto strideY    = mCommon->strideY(); | 
					
						
							| 
									
										
										
										
											2020-07-14 20:01:53 +08:00
										 |  |  |     auto padX       = mPadX; | 
					
						
							|  |  |  |     auto padY       = mPadY; | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     auto width      = input->width(); | 
					
						
							|  |  |  |     auto height     = input->height(); | 
					
						
							|  |  |  |     auto src_height = output->height(); | 
					
						
							|  |  |  |     auto src_width  = output->width(); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     auto kernelCount = ocC4 * mCommon->kernelX() * mCommon->kernelY(); | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |     mPreFunctions.clear(); | 
					
						
							|  |  |  |     mPostFunctions.clear(); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     auto plane         = width * height; | 
					
						
							|  |  |  |     const int maxDepth = 5; | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |     std::shared_ptr<Tensor> tempColTotalBuffer(Tensor::createDevice<float>({kernelCount, plane, 4})); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     auto res = backend()->onAcquireBuffer(tempColTotalBuffer.get(), Backend::DYNAMIC); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     if (!res) { | 
					
						
							|  |  |  |         return OUT_OF_MEMORY; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     auto colBufferPtr = tempColTotalBuffer->host<float>(); | 
					
						
							|  |  |  |     auto biasPtr      = inputs[2]->host<float>(); | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |     auto inputPtr  = input->host<float>(); | 
					
						
							|  |  |  |     std::shared_ptr<Tensor> tempInputBuffer( | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |         Tensor::create<float>({icC4, plane, 4}, inputPtr)); | 
					
						
							|  |  |  |     std::shared_ptr<Tensor> tempInput(Tensor::createDevice<float>({icC4, plane, 4})); | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |     auto threadNumber = ((CPUBackend*)backend())->threadNumber(); | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |     if (input->batch() != 1) { | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |         res = backend()->onAcquireBuffer(tempInput.get(), Backend::DYNAMIC); | 
					
						
							|  |  |  |         if (!res) { | 
					
						
							|  |  |  |             return OUT_OF_MEMORY; | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |         auto newInputPtr = tempInput->host<float>(); | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |         // Copy Batch
 | 
					
						
							|  |  |  |         mPreFunctions.emplace_back(std::make_pair([newInputPtr, icC4, plane, threadNumber](const float* srcBatch, int tId) { | 
					
						
							|  |  |  |             for (int c = tId; c<icC4; c+=threadNumber) { | 
					
						
							|  |  |  |                 auto srcDepth = srcBatch + c * plane * 4; | 
					
						
							|  |  |  |                 auto dstDepth = newInputPtr + c * plane * 4; | 
					
						
							|  |  |  |                 ::memcpy(dstDepth, srcDepth, plane * 4 * sizeof(float)); | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         }, threadNumber)); | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         tempInput->buffer().host = (uint8_t*)inputPtr; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     mMatMul.reset(new StrassenMatrixComputor(backend(), true, maxDepth)); | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |     mMatMul->onEncode({tempInput.get(), inputs[1]}, {tempColTotalBuffer.get()}); | 
					
						
							|  |  |  |     mPostFunctions.emplace_back(std::make_pair([colBufferPtr, ocC4, width, height, kh, kw, padY, padX, dilateY, dilateX, strideY, | 
					
						
							|  |  |  |                        strideX, threadNumber, src_width, src_height, plane, biasPtr, this](float* outputPtr, int tId) { | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |             for (int z = (tId); z < ocC4; z += threadNumber) { | 
					
						
							|  |  |  |                 auto dstZ = outputPtr + z * src_height * src_width * 4; | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |                 auto srcZ = colBufferPtr + kw * kh * 4 * plane * z; | 
					
						
							|  |  |  |                 auto dstB = dstZ; | 
					
						
							|  |  |  |                 ::memset(dstB, 0, 4 * src_width * src_height * sizeof(float)); | 
					
						
							|  |  |  |                 auto srcB = srcZ; | 
					
						
							|  |  |  |                 for (int oy = 0; oy < height; ++oy) { | 
					
						
							|  |  |  |                     for (int ox = 0; ox < width; ++ox) { | 
					
						
							|  |  |  |                         int srcStartX = ox * strideX - padX; | 
					
						
							|  |  |  |                         int srcStartY = oy * strideY - padY; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |                         int sfy = ALIMAX(0, (UP_DIV(-srcStartY, dilateY))); | 
					
						
							|  |  |  |                         int efy = ALIMIN(kh, UP_DIV(src_height - srcStartY, dilateY)); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |                         int sfx = ALIMAX(0, (UP_DIV(-srcStartX, dilateX))); | 
					
						
							|  |  |  |                         int efx = ALIMIN(kw, UP_DIV(src_width - srcStartX, dilateX)); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |                         auto dstStart = dstB + srcStartX * 4 + srcStartY * src_width * 4; | 
					
						
							|  |  |  |                         auto srcStart = srcB + 4 * (ox + oy * width); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |                         for (int fy = sfy; fy < efy; ++fy) { | 
					
						
							|  |  |  |                             auto dstY = dstStart + fy * 4 * dilateY * src_width; | 
					
						
							|  |  |  |                             auto srcY = srcStart + fy * kw * plane * 4; | 
					
						
							|  |  |  |                             for (int fx = sfx; fx < efx; ++fx) { | 
					
						
							|  |  |  |                                 auto dstX = dstY + fx * dilateX * 4; | 
					
						
							|  |  |  |                                 auto srcX = srcY + fx * plane * 4; | 
					
						
							|  |  |  |                                 Vec4::save(dstX, Vec4::load(dstX) + Vec4::load(srcX)); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |                             } | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |                 mPostFunction(dstZ, biasPtr + 4 * z, src_height * src_width, 1); | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         }, threadNumber)); | 
					
						
							|  |  |  |     if (tempInput->host<float>() != inputPtr) { | 
					
						
							|  |  |  |         backend()->onReleaseBuffer(tempInput.get(), Backend::DYNAMIC); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |     } | 
					
						
							|  |  |  |     backend()->onReleaseBuffer(tempColTotalBuffer.get(), Backend::DYNAMIC); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     return NO_ERROR; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  | ErrorCode CPUDeconvolutionOrigin::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) { | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |     auto batch = inputs[0]->batch(); | 
					
						
							|  |  |  |     for (int i=0; i<batch; ++i) { | 
					
						
							|  |  |  |         auto inputPtr = inputs[0]->host<float>() + i * inputs[0]->stride(0); | 
					
						
							|  |  |  |         auto outputPtr = outputs[0]->host<float>() + i * outputs[0]->stride(0); | 
					
						
							|  |  |  |         for (auto& unit : mPreFunctions) { | 
					
						
							|  |  |  |             MNN_CONCURRENCY_BEGIN(tId, unit.second) { | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |                 unit.first(inputPtr, (int)tId); | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |             MNN_CONCURRENCY_END(); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |         mMatMul->onExecute(); | 
					
						
							|  |  |  |         for (auto& unit : mPostFunctions) { | 
					
						
							|  |  |  |             MNN_CONCURRENCY_BEGIN(tId, unit.second) { | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |                 unit.first(outputPtr, (int)tId); | 
					
						
							| 
									
										
										
										
											2020-07-04 01:21:30 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |             MNN_CONCURRENCY_END(); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return NO_ERROR; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | class CPUDeconvolutionCreator : public CPUBackend::Creator { | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |     virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, | 
					
						
							|  |  |  |                                 const MNN::Op* op, Backend* backend) const { | 
					
						
							|  |  |  |         auto convOp = op->main_as_Convolution2D(); | 
					
						
							|  |  |  |         auto common = convOp->common(); | 
					
						
							|  |  |  |         if (common->strideY() > 1 || common->strideX() > 1) { | 
					
						
							|  |  |  |             if (common->dilateX() == 1 && common->dilateY() == 1) { | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |                 return new DeconvolutionWithStride(inputs[0], op, backend); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |         return new CPUDeconvolution(inputs[0], op, backend); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | REGISTER_CPU_OP_CREATOR(CPUDeconvolutionCreator, OpType_Deconvolution); | 
					
						
							|  |  |  | } // namespace MNN
 |