2020-11-05 16:41:56 +08:00
|
|
|
//
|
|
|
|
// DeconvSingleInputExecution.cpp
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on 2020/08/22.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
|
|
|
#include "DeconvSingleInputExecution.hpp"
|
|
|
|
|
|
|
|
namespace MNN {
|
|
|
|
namespace CUDA {
|
|
|
|
|
2022-02-18 11:30:27 +08:00
|
|
|
__global__ void DeconvInputRerange(const int count,
|
|
|
|
const InputReorderParameter* param,
|
|
|
|
const float* Inp,
|
|
|
|
__half* InpRe
|
|
|
|
) {
|
|
|
|
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x) {
|
|
|
|
int l = param->l_size;
|
|
|
|
int h = param->h_size;
|
|
|
|
int lIndex = i % l;
|
|
|
|
int hIndex = i / l;
|
|
|
|
int lU = lIndex / 16;
|
|
|
|
int lR = lIndex % 16;
|
|
|
|
int hU = hIndex / 16;
|
|
|
|
int hR = hIndex % 16;
|
|
|
|
|
|
|
|
int bIndex = hIndex / param->hw_size;
|
|
|
|
int hwIndex = hIndex % param->hw_size;
|
|
|
|
|
|
|
|
float value = Inp[bIndex * param->ib_stride + lIndex * param->ic_stride + hwIndex];
|
|
|
|
//inpRe[lIndex * param->oc_stride + bIndex * param->ob_stride + hwIndex] = value;
|
|
|
|
|
|
|
|
//__half* dst = InpRe + lU * param->hpack_size * 16 * 16 + hU * 16 * 16 + hR + lR * 16;
|
|
|
|
__half* dst = InpRe + hU * param->lpack_size * 16 * 16 + lU * 16 * 16 + lR + hR * 16;
|
|
|
|
dst[0] = value;
|
2020-11-05 16:41:56 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-18 11:30:27 +08:00
|
|
|
template <typename Dtype>
|
|
|
|
__global__ void Col2Im(const int n, const Dtype* data_col,
|
|
|
|
const int batch, const int height, const int width, const int channels,
|
|
|
|
const int kernel_h, const int kernel_w,
|
|
|
|
const int pad_h, const int pad_w,
|
|
|
|
const int stride_h, const int stride_w,
|
|
|
|
const int dilation_h, const int dilation_w,
|
|
|
|
const int height_col, const int width_col,
|
|
|
|
const Dtype* bias, Dtype* data_im) {
|
|
|
|
for (size_t index = blockIdx.x * blockDim.x + threadIdx.x; index < (n); index += blockDim.x * gridDim.x) {
|
|
|
|
Dtype val = 0;
|
|
|
|
const int b_im = index / (channels * width * height);
|
|
|
|
const int chw = index % (channels * width * height);
|
|
|
|
const int w_im = chw % width + pad_w;
|
|
|
|
const int h_im = (chw / width) % height + pad_h;
|
|
|
|
const int c_im = chw / (width * height);
|
|
|
|
int kernel_extent_w = (kernel_w - 1) * dilation_w + 1;
|
|
|
|
int kernel_extent_h = (kernel_h - 1) * dilation_h + 1;
|
|
|
|
// compute the start and end of the output
|
|
|
|
const int w_col_start =
|
|
|
|
(w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1;
|
|
|
|
const int w_col_end = min(w_im / stride_w + 1, width_col);
|
|
|
|
const int h_col_start =
|
|
|
|
(h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1;
|
|
|
|
const int h_col_end = min(h_im / stride_h + 1, height_col);
|
|
|
|
// TODO: use LCM of stride and dilation to avoid unnecessary loops
|
|
|
|
for (int h_col = h_col_start; h_col < h_col_end; h_col += 1) {
|
|
|
|
for (int w_col = w_col_start; w_col < w_col_end; w_col += 1) {
|
|
|
|
int h_k = (h_im - h_col * stride_h);
|
|
|
|
int w_k = (w_im - w_col * stride_w);
|
|
|
|
if (h_k % dilation_h == 0 && w_k % dilation_w == 0) {
|
|
|
|
h_k /= dilation_h;
|
|
|
|
w_k /= dilation_w;
|
|
|
|
int data_col_index = ((((c_im * kernel_h + h_k) * kernel_w + w_k) * batch + b_im) *
|
|
|
|
height_col + h_col) * width_col + w_col;
|
|
|
|
val += data_col[data_col_index];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(nullptr != bias) {
|
|
|
|
val += bias[c_im];
|
|
|
|
}
|
|
|
|
data_im[index] = val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
DeconvSingleInputExecution::Resource::Resource(Backend* bn, const MNN::Op* op) {
|
|
|
|
mBackend = bn;
|
|
|
|
auto runtime = static_cast<CUDABackend*>(bn)->getCUDARuntime();
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
auto conv = op->main_as_Convolution2D();
|
|
|
|
auto common = conv->common();
|
|
|
|
mKernelInfo.kernelX = common->kernelX();
|
|
|
|
mKernelInfo.kernelY = common->kernelY();
|
2022-02-18 11:30:27 +08:00
|
|
|
mKernelInfo.groups = common->group();
|
2020-11-05 16:41:56 +08:00
|
|
|
mKernelInfo.strideX = common->strideX();
|
|
|
|
mKernelInfo.strideY = common->strideY();
|
|
|
|
mKernelInfo.dilateX = common->dilateX();
|
|
|
|
mKernelInfo.dilateY = common->dilateY();
|
|
|
|
mKernelInfo.activationType = common->relu() ? 1 : (common->relu6() ? 2 : 0);
|
|
|
|
|
|
|
|
//weight host->device
|
|
|
|
const float* filterDataPtr = nullptr;
|
|
|
|
int weightSize = 0;
|
|
|
|
std::shared_ptr<ConvolutionCommon::Int8Common> quanCommon;
|
|
|
|
ConvolutionCommon::getConvParameters(&quanCommon, conv, &filterDataPtr, &weightSize);
|
2022-02-18 11:30:27 +08:00
|
|
|
mKernelInfo.kernelN = common->outputCount();
|
|
|
|
mKernelInfo.kernelC = weightSize / mKernelInfo.kernelN / mKernelInfo.kernelX / mKernelInfo.kernelY;
|
|
|
|
|
|
|
|
MatMulParam param;
|
|
|
|
int e = mKernelInfo.kernelN * mKernelInfo.kernelX * mKernelInfo.kernelY;
|
|
|
|
int l = mKernelInfo.kernelC;
|
|
|
|
int h = 0;
|
|
|
|
param.elh[0] = e;
|
|
|
|
param.elh[1] = l;
|
|
|
|
param.elh[2] = h;
|
|
|
|
param.elhPack[0] = UP_DIV(e, 16);
|
|
|
|
param.elhPack[1] = UP_DIV(l, 16);
|
|
|
|
param.elhPack[2] = UP_DIV(h, 16);
|
|
|
|
|
|
|
|
param.aStride[0] = 1;
|
|
|
|
param.aStride[1] = e;
|
|
|
|
param.aStride[2] = 0;
|
|
|
|
|
|
|
|
auto gpuParam = static_cast<CUDABackend*>(bn)->getStaticBufferPool()->alloc(sizeof(MatMulParam));
|
|
|
|
auto tempCacheBuffer = static_cast<CUDABackend*>(bn)->getStaticBufferPool()->alloc(weightSize * sizeof(float));
|
|
|
|
float* cacheWeight = (float*)((uint8_t*)tempCacheBuffer.first + tempCacheBuffer.second);
|
|
|
|
runtime->memcpy(cacheWeight, filterDataPtr, weightSize * sizeof(float), MNNMemcpyHostToDevice);
|
|
|
|
runtime->memcpy((uint8_t*)gpuParam.first + gpuParam.second, ¶m, sizeof(MatMulParam), MNNMemcpyHostToDevice);
|
|
|
|
|
|
|
|
// Reorder weight
|
|
|
|
weightTensor.reset(Tensor::createDevice<int16_t>({param.elhPack[0] * param.elhPack[1] * (MATMULPACK * MATMULPACK)}));
|
|
|
|
bn->onAcquireBuffer(weightTensor.get(), Backend::STATIC);
|
2020-11-05 16:41:56 +08:00
|
|
|
mFilter = (void *)weightTensor.get()->buffer().device;
|
2022-02-18 11:30:27 +08:00
|
|
|
GemmPrepareRerange(runtime, ¶m, (const MatMulParam*)((uint8_t*)gpuParam.first + gpuParam.second), cacheWeight, (__half*)mFilter, nullptr, nullptr, 4);
|
|
|
|
static_cast<CUDABackend*>(bn)->getStaticBufferPool()->free(tempCacheBuffer);
|
|
|
|
static_cast<CUDABackend*>(bn)->getStaticBufferPool()->free(gpuParam);
|
|
|
|
|
|
|
|
// Copy Bias
|
|
|
|
int biasSize = conv->bias()->size();
|
|
|
|
biasTensor.reset(Tensor::createDevice<float>({biasSize}));
|
|
|
|
bn->onAcquireBuffer(biasTensor.get(), Backend::STATIC);
|
|
|
|
mBias = (void *)biasTensor.get()->buffer().device;
|
|
|
|
cuda_check(cudaMemcpy(mBias, conv->bias()->data(), conv->bias()->size()*sizeof(float), cudaMemcpyHostToDevice));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
DeconvSingleInputExecution::Resource::~Resource() {
|
|
|
|
// Do nothing
|
|
|
|
}
|
|
|
|
DeconvSingleInputExecution::DeconvSingleInputExecution(Backend* backend, const MNN::Op* op, std::shared_ptr<Resource> res) : Execution(backend), mOp(op) {
|
|
|
|
mResource = res;
|
|
|
|
auto runtime = static_cast<CUDABackend*>(backend)->getCUDARuntime();
|
|
|
|
auto staticPool = static_cast<CUDABackend*>(backend)->getStaticBufferPool();
|
|
|
|
mGpuMatMulParam = staticPool->alloc(sizeof(MatMulParam));
|
|
|
|
mGpuCol2ImParam = staticPool->alloc(sizeof(Col2ImParameter));
|
|
|
|
mGpuInpReorderParam = staticPool->alloc(sizeof(InputReorderParameter));
|
2020-11-05 16:41:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
DeconvSingleInputExecution::~DeconvSingleInputExecution() {
|
2022-02-18 11:30:27 +08:00
|
|
|
auto staticPool = static_cast<CUDABackend*>(backend())->getStaticBufferPool();
|
|
|
|
staticPool->free(mGpuMatMulParam);
|
|
|
|
staticPool->free(mGpuCol2ImParam);
|
|
|
|
staticPool->free(mGpuInpReorderParam);
|
|
|
|
}
|
|
|
|
bool DeconvSingleInputExecution::onClone(Backend* bn, const Op* op, Execution** dst) {
|
|
|
|
if (!mValid) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (nullptr == dst) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
auto dstExe = new DeconvSingleInputExecution(bn, op, mResource);
|
|
|
|
*dst = dstExe;
|
|
|
|
return true;
|
2020-11-05 16:41:56 +08:00
|
|
|
}
|
|
|
|
|
2022-02-18 11:30:27 +08:00
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
ErrorCode DeconvSingleInputExecution::onResize(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs) {
|
2022-02-18 11:30:27 +08:00
|
|
|
auto runtime = static_cast<CUDABackend*>(backend())->getCUDARuntime();
|
2020-11-05 16:41:56 +08:00
|
|
|
auto input = inputs[0], output = outputs[0];
|
2022-02-18 11:30:27 +08:00
|
|
|
const int UNIT = 1;
|
|
|
|
auto convCommon = mOp->main_as_Convolution2D()->common();
|
|
|
|
|
|
|
|
// Input Rerange Param
|
|
|
|
mInpReorderParameter.hw_size = input->height() * input->width();
|
|
|
|
mInpReorderParameter.ic_stride = mInpReorderParameter.hw_size;
|
|
|
|
mInpReorderParameter.ib_stride = mInpReorderParameter.hw_size * input->channel();
|
|
|
|
mInpReorderParameter.oc_stride = mInpReorderParameter.ib_stride;
|
|
|
|
mInpReorderParameter.ob_stride = mInpReorderParameter.hw_size;
|
|
|
|
mInpReorderParameter.l_size = input->channel();
|
|
|
|
mInpReorderParameter.h_size = input->batch() * mInpReorderParameter.hw_size;
|
|
|
|
mInpReorderParameter.lpack_size = UP_DIV(mInpReorderParameter.l_size, 16);
|
|
|
|
mInpReorderParameter.hpack_size = UP_DIV(mInpReorderParameter.h_size, 16);
|
|
|
|
|
|
|
|
runtime->memcpy((uint8_t*)mGpuInpReorderParam.first + mGpuInpReorderParam.second, &mInpReorderParameter, sizeof(InputReorderParameter), MNNMemcpyHostToDevice);
|
|
|
|
|
|
|
|
// Col2Im Param
|
|
|
|
auto pad = ConvolutionCommon::convolutionTransposePad(input, output, mOp->main_as_Convolution2D()->common());
|
|
|
|
mCol2ImParamter.dilateX = convCommon->dilateX();
|
|
|
|
mCol2ImParamter.dilateY = convCommon->dilateY();
|
|
|
|
mCol2ImParamter.strideX = convCommon->strideX();
|
|
|
|
mCol2ImParamter.strideY = convCommon->strideY();
|
|
|
|
mCol2ImParamter.ic = input->channel();
|
|
|
|
mCol2ImParamter.oc = output->channel();
|
|
|
|
mCol2ImParamter.kernelX = convCommon->kernelX();
|
|
|
|
mCol2ImParamter.kernelY = convCommon->kernelY();
|
|
|
|
mCol2ImParamter.padX = pad.first;
|
|
|
|
mCol2ImParamter.padY = pad.second;
|
|
|
|
|
|
|
|
mCol2ImParamter.ih = input->height();
|
|
|
|
mCol2ImParamter.iw = input->width();
|
|
|
|
mCol2ImParamter.oh = output->height();
|
|
|
|
mCol2ImParamter.ow = output->width();
|
|
|
|
mCol2ImParamter.ob = output->batch();
|
|
|
|
|
|
|
|
runtime->memcpy((uint8_t*)mGpuCol2ImParam.first + mGpuCol2ImParam.second, &mCol2ImParamter, sizeof(Col2ImParameter), MNNMemcpyHostToDevice);
|
|
|
|
|
|
|
|
// Matmul Param
|
|
|
|
int e = output->channel() * mCol2ImParamter.kernelX * mCol2ImParamter.kernelY;
|
|
|
|
int l = input->channel();
|
|
|
|
int h = input->height() * input->width() * output->batch();
|
|
|
|
|
|
|
|
mMatMulParam.elh[0] = e;
|
|
|
|
mMatMulParam.elh[1] = l;
|
|
|
|
mMatMulParam.elh[2] = h;
|
|
|
|
mMatMulParam.elhPack[0] = UP_DIV(e, 16);
|
|
|
|
mMatMulParam.elhPack[1] = UP_DIV(l, 16);
|
|
|
|
mMatMulParam.elhPack[2] = UP_DIV(h, 16);
|
|
|
|
|
|
|
|
mMatMulParam.bStride[0] = 0;
|
|
|
|
mMatMulParam.bStride[1] = input->height() * input->width();
|
|
|
|
mMatMulParam.bStride[2] = 1;
|
|
|
|
|
|
|
|
mMatMulParam.cStride[0] = h;
|
|
|
|
mMatMulParam.cStride[1] = 1;
|
|
|
|
mMatMulParam.cStride[2] = 1;
|
|
|
|
if (convCommon->relu()) {
|
|
|
|
mMatMulParam.minValue = 0.0f;
|
2020-11-05 16:41:56 +08:00
|
|
|
}
|
2022-02-18 11:30:27 +08:00
|
|
|
if (convCommon->relu6()) {
|
|
|
|
mMatMulParam.minValue = 0.0f;
|
|
|
|
mMatMulParam.maxValue = 6.0f;
|
2020-11-05 16:41:56 +08:00
|
|
|
}
|
2022-02-18 11:30:27 +08:00
|
|
|
runtime->memcpy((uint8_t*)mGpuMatMulParam.first + mGpuMatMulParam.second, &mMatMulParam, sizeof(MatMulParam), MNNMemcpyHostToDevice);
|
2020-11-05 16:41:56 +08:00
|
|
|
|
|
|
|
|
2022-02-18 11:30:27 +08:00
|
|
|
// Alloc temp cuda memory
|
|
|
|
auto pool = static_cast<CUDABackend*>(backend())->getBufferPool();
|
|
|
|
auto buffer1 = pool->alloc(sizeof(float) * mMatMulParam.elh[0] * mMatMulParam.elh[2]);
|
|
|
|
auto buffer2 = pool->alloc(sizeof(__half) * mMatMulParam.elhPack[1] * mMatMulParam.elhPack[2] * MATMULPACK * MATMULPACK);
|
|
|
|
|
|
|
|
mIm2ColBuffer = (float*)((uint8_t*)buffer1.first + buffer1.second);
|
|
|
|
mInputBuffer = (__half*)((uint8_t*)buffer2.first + buffer2.second);
|
|
|
|
|
|
|
|
pool->free(buffer2);
|
|
|
|
pool->free(buffer1);
|
2020-11-05 16:41:56 +08:00
|
|
|
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorCode DeconvSingleInputExecution::onExecute(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs) {
|
2022-02-18 11:30:27 +08:00
|
|
|
//MNN_PRINT("cuda convSingleInput onExecute in, inputsize:%d %d\n", (int)inputs.size(), workspace_size_);
|
2020-11-05 16:41:56 +08:00
|
|
|
MNN_ASSERT(inputs.size() == 1);
|
|
|
|
MNN_ASSERT(outputs.size() == 1);
|
2022-02-18 11:30:27 +08:00
|
|
|
auto bytes = static_cast<CUDABackend*>(backend())->getBytes(inputs[0]);
|
2020-11-05 16:41:56 +08:00
|
|
|
|
|
|
|
auto runtime = static_cast<CUDABackend*>(backend())->getCUDARuntime();
|
|
|
|
const void *input_addr = (const void*)inputs[0]->deviceId();
|
2022-02-18 11:30:27 +08:00
|
|
|
const void *filter_addr = mResource->mFilter;
|
|
|
|
const void *bias_addr = mResource->mBias;
|
2020-11-05 16:41:56 +08:00
|
|
|
void *output_addr = (void*)outputs[0]->deviceId();
|
|
|
|
|
2022-02-18 11:30:27 +08:00
|
|
|
auto gpuInpReorder = (const InputReorderParameter*)((uint8_t*)mGpuInpReorderParam.first + mGpuInpReorderParam.second);
|
|
|
|
auto gpuCol2Im = (const Col2ImParameter*)((uint8_t*)mGpuCol2ImParam.first + mGpuCol2ImParam.second);
|
|
|
|
auto gpuMatMul = (const MatMulParam*)((uint8_t*)mGpuMatMulParam.first + mGpuMatMulParam.second);
|
2020-11-05 16:41:56 +08:00
|
|
|
|
2022-02-18 11:30:27 +08:00
|
|
|
const int rerangeCount = mInpReorderParameter.ib_stride * inputs[0]->batch();
|
|
|
|
int inp_block_num = runtime->blocks_num(rerangeCount);
|
|
|
|
int inp_thread_num = runtime->threads_num();
|
2020-11-05 16:41:56 +08:00
|
|
|
|
2022-02-18 11:30:27 +08:00
|
|
|
// Do input Rerange
|
|
|
|
runtime->memset(mInputBuffer, 0, mMatMulParam.elhPack[2] * mMatMulParam.elhPack[1] * MATMULPACK * MATMULPACK * sizeof(__half));
|
|
|
|
DeconvInputRerange<<<inp_block_num, inp_thread_num>>>(rerangeCount, gpuInpReorder, (const float*)input_addr, mInputBuffer);
|
2020-11-05 16:41:56 +08:00
|
|
|
|
2022-02-18 11:30:27 +08:00
|
|
|
// Do Gemm operation
|
|
|
|
GemmPackedMain(runtime, &mMatMulParam, gpuMatMul, (float*)mIm2ColBuffer, (const half*)filter_addr, (const half*)mInputBuffer, nullptr, bytes, false, false);
|
2020-11-05 16:41:56 +08:00
|
|
|
|
2022-02-18 11:30:27 +08:00
|
|
|
// Do Col2Im trans
|
|
|
|
int height_col = mCol2ImParamter.ih;
|
|
|
|
int width_col = mCol2ImParamter.iw;
|
|
|
|
int num_kernels = mCol2ImParamter.ob * mCol2ImParamter.oc * mCol2ImParamter.oh * mCol2ImParamter.ow;
|
2020-11-05 16:41:56 +08:00
|
|
|
|
2022-02-18 11:30:27 +08:00
|
|
|
int col2im_block_num = runtime->blocks_num(num_kernels);
|
|
|
|
int col2im_thread_num = runtime->threads_num();
|
|
|
|
|
|
|
|
// printf("col2im:%d, %d-%d-%d-%d-%d-%d\n %d-%d-%d-%d-%d-%d\n %d-%d\n", mCol2ImParamter.ob, mCol2ImParamter.oh, mCol2ImParamter.ow, mCol2ImParamter.oc, \
|
|
|
|
// mCol2ImParamter.ih, mCol2ImParamter.iw, mCol2ImParamter.ic, \
|
|
|
|
// mCol2ImParamter.padX, mCol2ImParamter.padY, mCol2ImParamter.kernelX, mCol2ImParamter.kernelY, mCol2ImParamter.strideX, mCol2ImParamter.strideY, \
|
|
|
|
// col2im_block_num, col2im_thread_num);
|
|
|
|
|
|
|
|
Col2Im<float><<<col2im_block_num, col2im_thread_num>>>(
|
|
|
|
num_kernels, (const float*)mIm2ColBuffer, mCol2ImParamter.ob, mCol2ImParamter.oh, mCol2ImParamter.ow, mCol2ImParamter.oc,
|
|
|
|
mCol2ImParamter.kernelY, mCol2ImParamter.kernelX, mCol2ImParamter.padY, mCol2ImParamter.padX,
|
|
|
|
mCol2ImParamter.strideY, mCol2ImParamter.strideX, mCol2ImParamter.dilateY, mCol2ImParamter.dilateX,
|
|
|
|
height_col, width_col, (const float*)bias_addr, (float *)output_addr);
|
2020-11-05 16:41:56 +08:00
|
|
|
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
class CUDADeconvolutionCreator : public CUDABackend::Creator {
|
|
|
|
public:
|
|
|
|
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
|
|
|
const MNN::Op* op, Backend* backend) const override {
|
|
|
|
if (nullptr != op->main_as_Convolution2D()->quanParameter()) {
|
|
|
|
auto quan = op->main_as_Convolution2D()->quanParameter();
|
|
|
|
if (1 == quan->type() || 2 == quan->type()) {
|
|
|
|
MNN_PRINT("cuda Deconv quant type 1 or 2 not support\n");
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(inputs.size() == 3) {
|
|
|
|
MNN_PRINT("Deconv inputs size:3 not support\n");
|
|
|
|
return nullptr;
|
|
|
|
} else if(inputs.size() == 1) {
|
2022-02-18 11:30:27 +08:00
|
|
|
std::shared_ptr<DeconvSingleInputExecution::Resource> resource(new DeconvSingleInputExecution::Resource(backend, op));
|
|
|
|
return new DeconvSingleInputExecution(backend, op, resource);
|
2020-11-05 16:41:56 +08:00
|
|
|
} else {
|
|
|
|
MNN_PRINT("Deconv inputs size:%d not support", (int)inputs.size());
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2022-02-18 11:30:27 +08:00
|
|
|
//CUDACreatorRegister<CUDADeconvolutionCreator> __DeConvExecution(OpType_Deconvolution);
|
2020-11-05 16:41:56 +08:00
|
|
|
|
|
|
|
}// namespace CUDA
|
2021-11-30 10:10:53 +08:00
|
|
|
}// namespace MNN
|