MNN/source/backend/nnapi/execution/NNAPIConvolution.cpp

139 lines
5.5 KiB
C++
Raw Normal View History

2022-09-30 10:02:52 +08:00
//
// NNAPIConvolution.cpp
// MNN
//
// Created by MNN on 2022/09/06.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "NNAPIConvolution.hpp"
namespace MNN {
NNAPIConvolution::NNAPIConvolution(MNN::Backend *b, const MNN::Op *op, const std::vector<Tensor *> &inputs, const std::vector<MNN::Tensor *> &outputs) : NNAPICommonExecution(b, op) {
isDepthwise = mOp->type() == OpType_ConvolutionDepthwise;
isDeconv = mOp->type() == OpType_Deconvolution;
}
template<typename T>
static void NCHW2NHWC(const T* source, T* dest, int b, int c, int area) {
int sourceBatchsize = c * area;
int destBatchSize = sourceBatchsize;
for (int bi = 0; bi < b; ++bi) {
auto srcBatch = source + bi * sourceBatchsize;
auto dstBatch = dest + bi * destBatchSize;
for (int i = 0; i < area; ++i) {
auto srcArea = srcBatch + i;
auto dstArea = dstBatch + i * c;
for (int ci = 0; ci < c; ++ci) {
dstArea[ci] = srcArea[ci * area];
}
}
}
}
ErrorCode NNAPIConvolution::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
auto conv2D = mOp->main_as_Convolution2D();
auto common = conv2D->common();
int kernelX = common->kernelX();
int kernelY = common->kernelY();
int strideX = common->strideX();
int strideY = common->strideY();
int dilateX = common->dilateX();
int dilateY = common->dilateY();
int group = common->group();
uint32_t outputCount = common->outputCount();
auto padMod = common->padMode();
bool relu = common->relu();
bool relu6 = common->relu6();
int top, left, bottom, right;
if (nullptr != common->pads()) {
MNN_ASSERT(common->pads()->size() >= 4);
top = common->pads()->Get(0);
left = common->pads()->Get(1);
bottom = common->pads()->Get(2);
right = common->pads()->Get(3);
} else {
top = common->padY();
left = common->padX();
bottom = common->padY();
right = common->padX();
}
// NNAPI inputs:
// conv2d: [input, weight, bias, pad_left, pad_right, pad_top, pad_bottom, stride_w, stride_h, fusecode, NCHW/NHWC, dilate_w, dilate_h]
// depthwise_conv2d: [input, weight, bias, pad_left, pad_right, pad_top, pad_bottom, stride_w, stride_h, multiplier, fusecode, NCHW/NHWC, dilate_w, dilate_h]
auto inputIdxs = getTensorIdxs(inputs);
// inputs not contain weight and bias, read from param
if (inputs.size() < 3) {
const void *weightPtr, *biasPtr;
int weightSize, biasSize;
if (nullptr != conv2D->quanParameter()) {
quanCommon = ConvolutionCommon::load(conv2D->quanParameter(), true);
if (nullptr == quanCommon) {
MNN_ERROR("Memory not Enough, can't extract IDST Convolution: %s \n", mOp->name()->c_str());
}
if (quanCommon->weightFloat.get() == nullptr) {
MNN_PRINT("quanCommon->weightFloat.get() == nullptr \n");
}
// Back to float
weightPtr = quanCommon->weightFloat.get();
weightSize = quanCommon->weightFloat.size();
} else {
weightPtr = conv2D->weight()->data();
weightSize = conv2D->weight()->size();
}
biasSize = conv2D->bias()->size();
biasPtr = conv2D->bias()->data();
uint32_t inputCount = weightSize / (kernelX * kernelY * outputCount);
uint32_t n = outputCount;
uint32_t c = inputCount;
uint32_t h = kernelY;
uint32_t w = kernelX;
if (isDepthwise) {
n = 1;
c = outputCount;
}
nhwcWeight.reset(new float[weightSize]);
std::vector<uint32_t> weightDims {n, h, w, c};
// [outputCount, inputChannel, h, w] -> [outputCount, h, w, inputChannel]
NCHW2NHWC<float>(reinterpret_cast<const float*>(weightPtr), nhwcWeight.get(), n, c, h * w);
std::vector<uint32_t> biasDims {outputCount};
inputIdxs.push_back(buildConstant(nhwcWeight.get(), weightSize * sizeof(float), ANEURALNETWORKS_TENSOR_FLOAT32, weightDims));
inputIdxs.push_back(buildConstant(biasPtr, biasSize * sizeof(float), ANEURALNETWORKS_TENSOR_FLOAT32, biasDims));
}
// pad
inputIdxs.push_back(buildScalar(left));
inputIdxs.push_back(buildScalar(right));
inputIdxs.push_back(buildScalar(top));
inputIdxs.push_back(buildScalar(bottom));
// stride
inputIdxs.push_back(buildScalar(strideX));
inputIdxs.push_back(buildScalar(strideY));
if (isDepthwise) {
int multiplier = outputCount / group;
inputIdxs.push_back(buildScalar(multiplier));
}
// fusecode
FuseCode code = ANEURALNETWORKS_FUSED_NONE;
if (relu) code = ANEURALNETWORKS_FUSED_RELU;
if (relu6) code = ANEURALNETWORKS_FUSED_RELU6;
inputIdxs.push_back(buildScalar(code));
// NCHW/NHWC
inputIdxs.push_back(buildScalar(mNCHW));
// dilate
if (dilateX > 1 || dilateY > 1) {
inputIdxs.push_back(buildScalar(dilateX));
inputIdxs.push_back(buildScalar(dilateY));
}
auto op = ANEURALNETWORKS_CONV_2D;
if (mOp->type() == OpType_ConvolutionDepthwise) {
op = ANEURALNETWORKS_DEPTHWISE_CONV_2D;
} else {
// TODO: deconv
}
return buildOperation(op, inputIdxs, getTensorIdxs(outputs));
}
REGISTER_NNAPI_OP_CREATOR(NNAPIConvolution, OpType_Convolution)
REGISTER_NNAPI_OP_CREATOR(NNAPIConvolution, OpType_ConvolutionDepthwise)
} // namespace MNN