2020-11-05 16:41:56 +08:00
|
|
|
//
|
|
|
|
// GeometryConv2D.cpp
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on 2020/07/14.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
|
|
|
#include <limits>
|
|
|
|
#include "ConvertUtils.hpp"
|
|
|
|
#include "GeometryConvUtils.hpp"
|
|
|
|
#define MNN_OPEN_TIME_TRACE
|
|
|
|
#include <MNN/AutoTime.hpp>
|
|
|
|
namespace MNN {
|
2021-02-07 10:45:07 +08:00
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
class GeometryConv2D : public GeometryComputer {
|
|
|
|
public:
|
|
|
|
// Im2Col + GEMM
|
2021-02-07 10:45:07 +08:00
|
|
|
bool computeIm2Col_GEMM( const Convolution2DCommon* common, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
2020-11-05 16:41:56 +08:00
|
|
|
Context& context, CommandBuffer& res) const {
|
|
|
|
auto input = inputs[0];
|
|
|
|
auto outputDiff = outputs[0];
|
|
|
|
MNN_ASSERT(1 == common->group());
|
|
|
|
auto kw = common->kernelX();
|
|
|
|
auto kh = common->kernelY();
|
|
|
|
auto sw = common->strideX();
|
|
|
|
auto sh = common->strideY();
|
|
|
|
auto dw = common->dilateX();
|
|
|
|
auto dh = common->dilateY();
|
|
|
|
auto batch = outputDiff->batch();
|
|
|
|
auto ow = outputDiff->width();
|
|
|
|
auto oh = outputDiff->height();
|
|
|
|
auto oc = outputDiff->channel();
|
|
|
|
auto ic = input->channel();
|
|
|
|
auto iw = input->width();
|
|
|
|
auto ih = input->height();
|
|
|
|
auto pads = ConvolutionCommon::convolutionPad(input, outputDiff, common);
|
|
|
|
MNN_ASSERT(TensorUtils::getDescribe(input)->dimensionFormat != MNN_DATA_FORMAT_NHWC);
|
|
|
|
MNN_ASSERT(TensorUtils::getDescribe(outputDiff)->dimensionFormat != MNN_DATA_FORMAT_NHWC);
|
|
|
|
Tensor* A = nullptr;
|
|
|
|
Tensor* B = nullptr;
|
|
|
|
{
|
|
|
|
// B: Input Im2Col, n, ic, ih, iw -> ic*kh*kw, n*oh*ow
|
|
|
|
std::shared_ptr<Tensor> im2Col(new Tensor);
|
|
|
|
GeometryConvUtils::im2Col(im2Col.get(), input, ic, kh, kw, batch, oh, ow, ih, iw, sh, sw, dh, dw, pads);
|
|
|
|
B = im2Col.get();
|
|
|
|
res.extras.emplace_back(im2Col);
|
|
|
|
}
|
|
|
|
{
|
|
|
|
// A: Weight oc, ic, kh, kw -> oc, ic*kh*kw
|
|
|
|
std::shared_ptr<Tensor> kernel(new Tensor);
|
|
|
|
A = kernel.get();
|
|
|
|
kernel->buffer().type = halide_type_of<float>();
|
|
|
|
kernel->buffer().dimensions = 2;
|
|
|
|
kernel->setLength(0, oc);
|
|
|
|
kernel->setLength(1, ic * kw * kh);
|
|
|
|
auto des = TensorUtils::getDescribe(kernel.get());
|
|
|
|
des->dimensionFormat = MNN_DATA_FORMAT_NCHW;
|
|
|
|
GeometryComputerUtils::makeRawAddressRef(kernel.get(), inputs[1], 0, ic * kw * kh * oc);
|
|
|
|
res.extras.emplace_back(std::move(kernel));
|
|
|
|
}
|
|
|
|
{
|
|
|
|
// C = MatMul(B, A)
|
|
|
|
std::shared_ptr<Tensor> C(new Tensor);
|
|
|
|
C->buffer().type = halide_type_of<float>();
|
|
|
|
C->buffer().dimensions = 2;
|
|
|
|
C->setLength(0, batch * ow * oh);
|
|
|
|
C->setLength(1, oc);
|
|
|
|
TensorUtils::getDescribe(C.get())->dimensionFormat = MNN_DATA_FORMAT_NCHW;
|
|
|
|
Tensor* bias = nullptr;
|
|
|
|
if (inputs.size() > 2) {
|
|
|
|
bias = inputs[2];
|
|
|
|
}
|
|
|
|
res.command.emplace_back(GeometryComputerUtils::makeMatMul(B, A, C.get(), bias, true, true));
|
|
|
|
res.extras.emplace_back(C);
|
|
|
|
|
|
|
|
// Activation
|
|
|
|
float minValue = 0.0f, maxValue = 6.0f;
|
|
|
|
bool needPostTreat = false;
|
|
|
|
if (common->relu()) {
|
|
|
|
needPostTreat = true;
|
|
|
|
minValue = 0.0f;
|
|
|
|
maxValue = std::numeric_limits<float>().max();
|
|
|
|
}
|
|
|
|
if (common->relu6()) {
|
|
|
|
needPostTreat = true;
|
|
|
|
minValue = 0.0f;
|
|
|
|
maxValue = 6.0f;
|
|
|
|
}
|
|
|
|
if (needPostTreat) {
|
2021-02-07 10:45:07 +08:00
|
|
|
flatbuffers::FlatBufferBuilder builder;
|
|
|
|
builder.Finish(GeometryConvUtils::makeRelu6(builder, minValue, maxValue));
|
2020-11-05 16:41:56 +08:00
|
|
|
std::shared_ptr<Tensor> C2(new Tensor);
|
|
|
|
C2->buffer().type = halide_type_of<float>();
|
|
|
|
C2->buffer().dimensions = 2;
|
|
|
|
C2->setLength(0, batch * ow * oh);
|
|
|
|
C2->setLength(1, oc);
|
|
|
|
TensorUtils::getDescribe(C2.get())->dimensionFormat = MNN_DATA_FORMAT_NCHW;
|
2021-02-07 10:45:07 +08:00
|
|
|
auto cmd = GeometryComputerUtils::makeCommand(builder, {C.get()}, {C2.get()});
|
2020-11-05 16:41:56 +08:00
|
|
|
res.command.emplace_back(cmd);
|
|
|
|
res.extras.emplace_back(C2);
|
|
|
|
C = C2;
|
|
|
|
}
|
|
|
|
// Transpose
|
|
|
|
// Batch, oh, ow, oc -> batch, oc, oh, ow
|
|
|
|
TensorUtils::setLinearLayout(C.get());
|
|
|
|
if (ow == oh && oh == 1) {
|
|
|
|
GeometryComputerUtils::makeRawAddressRef(outputs[0], C.get(), 0, batch * oc);
|
|
|
|
} else {
|
|
|
|
auto kernelDiffDes = TensorUtils::getDescribe(outputs[0]);
|
|
|
|
kernelDiffDes->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
|
|
|
kernelDiffDes->regions.resize(1);
|
|
|
|
auto& desReg = kernelDiffDes->regions[0];
|
|
|
|
desReg.size[0] = batch;
|
|
|
|
desReg.size[1] = oc;
|
|
|
|
desReg.size[2] = oh * ow;
|
|
|
|
desReg.dst.offset = 0;
|
|
|
|
desReg.dst.stride[0] = oc * oh * ow;
|
|
|
|
desReg.dst.stride[1] = oh * ow;
|
|
|
|
desReg.dst.stride[2] = 1;
|
|
|
|
desReg.src.offset = 0;
|
|
|
|
desReg.src.stride[0] = oh * ow * oc;
|
|
|
|
desReg.src.stride[1] = 1;
|
|
|
|
desReg.src.stride[2] = oc;
|
|
|
|
desReg.origin = C.get();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
|
|
|
Context& context, CommandBuffer& res) const override {
|
|
|
|
if (inputs.size() == 1) {
|
|
|
|
// Origin convolution with format converter
|
|
|
|
return GeometryConvUtils::computeSingle(op, inputs, outputs, context, res);
|
|
|
|
}
|
2021-02-07 10:45:07 +08:00
|
|
|
auto common = op->main_as_Convolution2D()->common();
|
|
|
|
if (common->outputCount() > 0) {
|
|
|
|
return computeIm2Col_GEMM(common, inputs, outputs, context, res);
|
|
|
|
}
|
|
|
|
std::unique_ptr<Convolution2DCommonT> temp(common->UnPack());
|
|
|
|
temp->outputCount = inputs[1]->length(0);
|
|
|
|
temp->kernelY = inputs[1]->length(2);
|
|
|
|
temp->kernelX = inputs[1]->length(3);
|
|
|
|
flatbuffers::FlatBufferBuilder builder;
|
|
|
|
builder.Finish(Convolution2DCommon::Pack(builder, temp.get()));
|
|
|
|
return computeIm2Col_GEMM(flatbuffers::GetRoot<MNN::Convolution2DCommon>(builder.GetBufferPointer()), inputs, outputs, context, res);
|
2020-11-05 16:41:56 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class GeometryConvTranspose2D : public GeometryConv2D {
|
|
|
|
public:
|
|
|
|
// Im2Col + GEMM
|
|
|
|
bool computeGEMM_Col2Im(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
|
|
|
Context& context, CommandBuffer& res) const {
|
|
|
|
auto common = op->main_as_Convolution2D()->common();
|
|
|
|
auto input = inputs[0];
|
|
|
|
auto outputDiff = outputs[0];
|
|
|
|
auto weight = inputs[1];
|
|
|
|
MNN_ASSERT(1 == common->group());
|
|
|
|
auto kw = common->kernelX();
|
|
|
|
auto kh = common->kernelY();
|
|
|
|
auto sw = common->strideX();
|
|
|
|
auto sh = common->strideY();
|
|
|
|
auto dw = common->dilateX();
|
|
|
|
auto dh = common->dilateY();
|
|
|
|
auto batch = outputDiff->batch();
|
|
|
|
auto ow = outputDiff->width();
|
|
|
|
auto oh = outputDiff->height();
|
|
|
|
auto oc = outputDiff->channel();
|
|
|
|
auto ic = input->channel();
|
|
|
|
auto iw = input->width();
|
|
|
|
auto ih = input->height();
|
|
|
|
auto pads = ConvolutionCommon::convolutionTransposePad(input, outputDiff, common);
|
|
|
|
MNN_ASSERT(TensorUtils::getDescribe(input)->dimensionFormat != MNN_DATA_FORMAT_NHWC);
|
|
|
|
MNN_ASSERT(TensorUtils::getDescribe(outputDiff)->dimensionFormat != MNN_DATA_FORMAT_NHWC);
|
|
|
|
Tensor* A = nullptr;
|
|
|
|
Tensor* B = nullptr;
|
|
|
|
{
|
|
|
|
// B: Input n, ic, ih, iw -> ic, n * ih * iw
|
|
|
|
std::shared_ptr<Tensor> dest(Tensor::createDevice<float>({ic, batch * ih * iw}));
|
|
|
|
res.extras.emplace_back(dest);
|
|
|
|
B = dest.get();
|
|
|
|
auto des = TensorUtils::getDescribe(dest.get());
|
|
|
|
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
|
|
|
des->regions.resize(1);
|
|
|
|
auto& reg = des->regions[0];
|
|
|
|
reg.origin = input;
|
|
|
|
reg.size[0] = ic;
|
|
|
|
reg.size[1] = batch;
|
|
|
|
reg.size[2] = ih * iw;
|
|
|
|
reg.src.offset = 0;
|
|
|
|
reg.src.stride[0] = ih * iw;
|
|
|
|
reg.src.stride[1] = ic * ih * iw;
|
|
|
|
reg.src.stride[2] = 1;
|
|
|
|
reg.dst.offset = 0;
|
|
|
|
reg.dst.stride[0] = ih * iw * batch;
|
|
|
|
reg.dst.stride[1] = ih * iw;
|
|
|
|
reg.dst.stride[2] = 1;
|
|
|
|
}
|
|
|
|
{
|
|
|
|
// A: Weight ic, oc, kh, kw -> ic, oc*kh*kw
|
|
|
|
std::shared_ptr<Tensor> kernel(Tensor::createDevice<float>({ic, oc * kw * kh}));
|
|
|
|
A = kernel.get();
|
|
|
|
GeometryComputerUtils::makeRawAddressRef(kernel.get(), weight, 0, ic * kw * kh * oc);
|
|
|
|
res.extras.emplace_back(std::move(kernel));
|
|
|
|
}
|
|
|
|
{
|
|
|
|
// C = MatMul(B, A)
|
|
|
|
std::shared_ptr<Tensor> C(Tensor::createDevice<float>({oc * kw * kh, batch * ih * iw}));
|
|
|
|
res.command.emplace_back(GeometryComputerUtils::makeMatMul(A, B, C.get(), nullptr, true, false));
|
|
|
|
res.extras.emplace_back(C);
|
|
|
|
|
|
|
|
// Col2Im:
|
|
|
|
// 1. C-> C' batch, oc, oh, ow, kw*kh, 2. C' -> C'' batch, oc, oh, ow (reduce_sum)
|
|
|
|
// 3. C'' -> C'' + bias, 4. posttreat(C'' + bias)
|
|
|
|
std::shared_ptr<Tensor> C_(Tensor::createDevice<float>({batch, kw * kh, oc * oh * ow}));
|
|
|
|
res.extras.emplace_back(C_);
|
|
|
|
{
|
|
|
|
std::shared_ptr<Tensor> im2ColTemp(Tensor::createDevice<float>({oc * kw * kh, batch * ih * iw}));
|
|
|
|
// Swap ow, iw, oh, ih for im2Col
|
|
|
|
GeometryConvUtils::im2Col(im2ColTemp.get(), outputDiff, oc, kh, kw, batch, ih, iw, oh, ow, sh, sw, dh, dw, pads, oh * ow * oc);
|
|
|
|
auto des = TensorUtils::getDescribe(C_.get());
|
|
|
|
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
|
|
|
auto originDes = TensorUtils::getDescribe(im2ColTemp.get());
|
|
|
|
des->regions = std::move(originDes->regions);
|
|
|
|
// Swap src and dst, from im2col->col2im
|
|
|
|
for (auto& reg : des->regions) {
|
|
|
|
reg.origin = C.get();
|
|
|
|
auto temp = reg.src;
|
|
|
|
reg.src = std::move(reg.dst);
|
|
|
|
reg.dst = std::move(temp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::shared_ptr<Tensor> C__(Tensor::createDevice<float>({batch, 1, oc * oh * ow}));
|
|
|
|
res.extras.emplace_back(C__);
|
|
|
|
res.command.emplace_back(GeometryComputerUtils::makeReduce(ReductionType_SUM, C_.get(), C__.get()));
|
|
|
|
|
|
|
|
if (inputs.size() > 2) {
|
|
|
|
MNN_ASSERT(oc == inputs[2]->elementSize());
|
|
|
|
std::shared_ptr<Tensor> biasLarge(Tensor::createDevice<float>({batch, 1, oc * oh * ow}));
|
|
|
|
res.extras.emplace_back(biasLarge);
|
|
|
|
auto des = TensorUtils::getDescribe(biasLarge.get());
|
|
|
|
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
|
|
|
des->regions.resize(1);
|
|
|
|
auto& reg = des->regions[0];
|
|
|
|
reg.origin = inputs[2];
|
|
|
|
reg.size[0] = batch;
|
|
|
|
reg.size[1] = oc;
|
|
|
|
reg.size[2] = oh * ow;
|
|
|
|
reg.src.offset = 0;
|
|
|
|
reg.src.stride[0] = 0;
|
|
|
|
reg.src.stride[1] = 1;
|
|
|
|
reg.src.stride[2] = 0;
|
|
|
|
reg.dst.offset = 0;
|
|
|
|
reg.dst.stride[0] = oc * oh * ow;
|
|
|
|
reg.dst.stride[1] = oh * ow;
|
|
|
|
reg.dst.stride[2] = 1;
|
|
|
|
std::shared_ptr<Tensor> temp(Tensor::createDevice<float>({batch, 1, oh * ow * oc}));
|
|
|
|
res.extras.emplace_back(temp);
|
|
|
|
res.command.emplace_back(GeometryComputerUtils::makeBinary(BinaryOpOperation_ADD, C__.get(), biasLarge.get(), temp.get()));
|
|
|
|
C__ = temp;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Activation
|
2020-12-15 14:12:35 +08:00
|
|
|
float minValue = 0.0f, maxValue = 0.0f;
|
2020-11-05 16:41:56 +08:00
|
|
|
bool needPostTreat = false;
|
|
|
|
if (common->relu()) {
|
|
|
|
needPostTreat = true;
|
|
|
|
minValue = 0.0f;
|
|
|
|
maxValue = std::numeric_limits<float>().max();
|
|
|
|
}
|
|
|
|
if (common->relu6()) {
|
|
|
|
needPostTreat = true;
|
|
|
|
minValue = 0.0f;
|
|
|
|
maxValue = 6.0f;
|
|
|
|
}
|
|
|
|
if (needPostTreat) {
|
2021-02-07 10:45:07 +08:00
|
|
|
flatbuffers::FlatBufferBuilder builder;
|
|
|
|
builder.Finish(GeometryConvUtils::makeRelu6(builder, minValue, maxValue));
|
2020-11-05 16:41:56 +08:00
|
|
|
std::shared_ptr<Tensor> C2(new Tensor);
|
|
|
|
C2->buffer().type = halide_type_of<float>();
|
|
|
|
C2->buffer().dimensions = 3;
|
|
|
|
C2->setLength(0, batch);
|
|
|
|
C2->setLength(1, 1);
|
|
|
|
C2->setLength(2, ow * oh * oc);
|
|
|
|
TensorUtils::getDescribe(C2.get())->dimensionFormat = MNN_DATA_FORMAT_NCHW;
|
2021-02-07 10:45:07 +08:00
|
|
|
auto cmd = GeometryComputerUtils::makeCommand(builder, {C__.get()}, {C2.get()});
|
2020-11-05 16:41:56 +08:00
|
|
|
res.command.emplace_back(cmd);
|
|
|
|
res.extras.emplace_back(C2);
|
|
|
|
C__ = C2;
|
|
|
|
}
|
|
|
|
GeometryComputerUtils::makeRawAddressRef(outputs[0], C__.get(), 0, oc * batch * ow * oh);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
|
|
|
Context& context, CommandBuffer& res) const override {
|
|
|
|
if (inputs.size() == 1) {
|
|
|
|
// Origin convolution with format converter
|
|
|
|
return GeometryConvUtils::computeSingle(op, inputs, outputs, context, res);
|
|
|
|
}
|
|
|
|
return computeGEMM_Col2Im(op, inputs, outputs, context, res);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
static void _create() {
|
|
|
|
std::shared_ptr<GeometryComputer> comp(new GeometryConv2D);
|
|
|
|
GeometryComputer::registerGeometryComputer(comp, {OpType_Convolution});
|
|
|
|
|
|
|
|
std::shared_ptr<GeometryComputer> comp2(new GeometryConvTranspose2D);
|
|
|
|
GeometryComputer::registerGeometryComputer(comp2, {OpType_Deconvolution});
|
|
|
|
}
|
|
|
|
|
|
|
|
REGISTER_GEOMETRY(GeometryConv2D, _create);
|
|
|
|
|
|
|
|
} // namespace MNN
|