MNN/source/geometry/GeometryLRN.cpp

344 lines
17 KiB
C++
Raw Normal View History

2020-11-05 16:41:56 +08:00
//
// GeometryLRN.cpp
// MNN
//
// Created by MNN on 2020/07/09.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "ConvertUtils.hpp"
#include "geometry/GeometryComputer.hpp"
#include "geometry/GeometryComputerUtils.hpp"
#include "core/Macro.h"
#include "core/OpCommonUtils.hpp"
#define MNN_OPEN_TIME_TRACE
#include <MNN/AutoTime.hpp>
namespace MNN {
class GeometryLRN : public GeometryComputer {
public:
bool computeForNormalize(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const {
auto normalize = op->main_as_Normalize();
auto mAcrossSpatial = normalize->acrossSpatial();
auto mChannelShared = normalize->channelShared();
Tensor* eps = nullptr;
Tensor* scale = nullptr;
2021-01-06 16:29:37 +08:00
auto cache = context.searchConst(op);
2020-11-05 16:41:56 +08:00
if (!cache.empty()) {
eps = cache[0].get();
scale = cache[1].get();
} else {
auto mEps = normalize->eps();
auto epsT = context.allocConst(op, {}, halide_type_of<float>());
epsT->host<float>()[0] = mEps;
eps = epsT.get();
auto mScale = context.allocConst(op, {1, (int)normalize->scale()->size(), 1}, halide_type_of<float>());
::memcpy(mScale->host<float>(), normalize->scale()->data(), normalize->scale()->size() * sizeof(float));
scale = mScale.get();
}
auto inputTensor = inputs[0];
// Across channel
int inside = inputTensor->width() * inputTensor->height();
int axis = inputTensor->channel();
int outside = inputTensor->batch();
2020-11-05 16:41:56 +08:00
{
// 1, axis, 1 -> outside, axis, inside
2023-04-11 11:12:00 +08:00
std::shared_ptr<Tensor> broadCastScale(Tensor::createDevice<float>({outside, axis, inside}, Tensor::CAFFE));
2020-11-05 16:41:56 +08:00
res.extras.emplace_back(broadCastScale);
auto des = TensorUtils::getDescribe(broadCastScale.get());
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
des->regions.resize(1);
auto& reg = des->regions[0];
reg.size[0] = outside;
reg.size[1] = axis;
reg.size[2] = inside;
reg.src.offset = 0;
reg.src.stride[0] = 0;
reg.src.stride[1] = 1;
reg.src.stride[2] = 0;
reg.dst.offset = 0;
reg.dst.stride[0] = axis * inside;
reg.dst.stride[1] = inside;
reg.dst.stride[2] = 1;
reg.origin = scale;
scale = broadCastScale.get();
}
// Across Spatial
if (mAcrossSpatial) {
inside = 1;
axis = inputTensor->width() * inputTensor->height() * inputTensor->channel();
}
2023-04-11 11:12:00 +08:00
std::shared_ptr<Tensor> inputRaw(Tensor::createDevice<float>({outside, axis, inside}, Tensor::CAFFE));
2020-11-05 16:41:56 +08:00
res.extras.emplace_back(inputRaw);
2023-04-11 11:12:00 +08:00
std::shared_ptr<Tensor> inputRawSquare(Tensor::createDevice<float>({outside, axis, inside}, Tensor::CAFFE));
2020-11-05 16:41:56 +08:00
res.extras.emplace_back(inputRawSquare);
GeometryComputerUtils::makeRawAddressRef(inputRaw.get(), inputTensor, 0, outside * axis * inside);
res.command.emplace_back(
GeometryComputerUtils::makeUnary(UnaryOpOperation_SQUARE, inputRaw.get(), inputRawSquare.get()));
2023-04-11 11:12:00 +08:00
std::shared_ptr<Tensor> summer(Tensor::createDevice<float>({outside, 1, inside}, Tensor::CAFFE));
2020-11-05 16:41:56 +08:00
res.extras.emplace_back(summer);
res.command.emplace_back(
GeometryComputerUtils::makeReduce(ReductionType_SUM, inputRawSquare.get(), summer.get()));
2023-04-11 11:12:00 +08:00
std::shared_ptr<Tensor> temp0(Tensor::createDevice<float>({outside, 1, inside}, Tensor::CAFFE));
2020-11-05 16:41:56 +08:00
res.extras.emplace_back(temp0);
2023-04-11 11:12:00 +08:00
std::shared_ptr<Tensor> temp1(Tensor::createDevice<float>({outside, 1, inside}, Tensor::CAFFE));
2020-11-05 16:41:56 +08:00
res.extras.emplace_back(temp1);
res.command.emplace_back(
GeometryComputerUtils::makeBinary(BinaryOpOperation_ADD, summer.get(), eps, temp0.get()));
res.command.emplace_back(GeometryComputerUtils::makeUnary(UnaryOpOperation_RSQRT, temp0.get(), temp1.get()));
2023-04-11 11:12:00 +08:00
std::shared_ptr<Tensor> scaleFirst(Tensor::createDevice<float>({outside, axis, inside}, Tensor::CAFFE));
2020-11-05 16:41:56 +08:00
res.extras.emplace_back(scaleFirst);
{
// Broadcast scale
auto des = TensorUtils::getDescribe(scaleFirst.get());
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
des->regions.resize(1);
auto& reg = des->regions[0];
reg.size[0] = outside;
reg.size[1] = axis;
reg.size[2] = inside;
reg.src.offset = 0;
reg.src.stride[0] = inside;
reg.src.stride[1] = 0;
reg.src.stride[2] = 1;
reg.dst.offset = 0;
reg.dst.stride[0] = axis * inside;
reg.dst.stride[1] = inside;
reg.dst.stride[2] = 1;
reg.origin = temp1.get();
}
2023-04-11 11:12:00 +08:00
std::shared_ptr<Tensor> output0(Tensor::createDevice<float>({outside, axis, inside}, Tensor::CAFFE));
2020-11-05 16:41:56 +08:00
res.extras.emplace_back(output0);
2023-04-11 11:12:00 +08:00
std::shared_ptr<Tensor> output1(Tensor::createDevice<float>({outside, axis, inside}, Tensor::CAFFE));
2020-11-05 16:41:56 +08:00
res.extras.emplace_back(output1);
res.command.emplace_back(
GeometryComputerUtils::makeBinary(BinaryOpOperation_MUL, inputRaw.get(), scaleFirst.get(), output0.get()));
res.command.emplace_back(
GeometryComputerUtils::makeBinary(BinaryOpOperation_MUL, output0.get(), scale, output1.get()));
GeometryComputerUtils::makeRawAddressRef(outputs[0], output1.get(), 0, inside * outside * axis);
return true;
}
bool computeForLRN(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const {
auto parameter = op->main_as_LRN();
// Across channel
auto alpha = parameter->alpha();
auto beta = parameter->beta();
2022-01-04 10:50:40 +08:00
auto bias = parameter->bias();
2020-11-05 16:41:56 +08:00
auto input = inputs[0];
int outside = input->length(0);
int channel = input->length(1);
int inside = 1;
for (int i = 2; i < input->dimensions(); ++i) {
inside *= input->length(i);
}
MNN_ASSERT(TensorUtils::getDescribe(input)->dimensionFormat != MNN_DATA_FORMAT_NHWC);
if (TensorUtils::getDescribe(input)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4) {
std::shared_ptr<Tensor> newInput(new Tensor);
newInput->buffer().type = input->getType();
TensorUtils::copyShape(input, newInput.get(), true);
TensorUtils::getDescribe(newInput.get())->dimensionFormat = MNN_DATA_FORMAT_NCHW;
res.extras.emplace_back(newInput);
GeometryComputerUtils::makeRawAddressRef(newInput.get(), input, 0, inside * outside * channel);
input = newInput.get();
}
// 1. y = x^2
std::shared_ptr<Tensor> squareInput(new Tensor);
squareInput->buffer().type = input->getType();
TensorUtils::copyShape(input, squareInput.get(), true);
res.extras.emplace_back(squareInput);
res.command.emplace_back(GeometryComputerUtils::makeUnary(UnaryOpOperation_SQUARE, input, squareInput.get()));
// 2. z = filter(y, 1)
std::shared_ptr<Tensor> filterOutput(new Tensor);
filterOutput->buffer().type = input->getType();
TensorUtils::copyShape(input, filterOutput.get());
res.extras.emplace_back(filterOutput);
if (parameter->regionType() == 0) {
// 2.1 NCHW -> N, H*W, 1, localsize /2 + C + localsize / 2
std::shared_ptr<Tensor> squareInputTranspose(new Tensor);
{
auto pad = parameter->localSize() / 2;
squareInputTranspose->buffer().type = input->getType();
squareInputTranspose->buffer().dimensions = 4;
squareInputTranspose->setLength(0, outside);
squareInputTranspose->setLength(1, inside);
squareInputTranspose->setLength(2, 1);
squareInputTranspose->setLength(3, channel + 2 * pad);
auto des = TensorUtils::getDescribe(squareInputTranspose.get());
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
des->regions.resize(1);
auto& reg = des->regions[0];
reg.origin = squareInput.get();
reg.size[0] = outside;
reg.size[1] = inside;
reg.size[2] = channel;
reg.src.offset = 0;
reg.src.stride[0] = inside * channel;
reg.src.stride[1] = 1;
reg.src.stride[2] = inside;
reg.dst.offset = pad;
reg.dst.stride[0] = inside * (channel + 2 * pad);
reg.dst.stride[1] = channel + 2 * pad;
reg.dst.stride[2] = 1;
}
res.extras.emplace_back(squareInputTranspose);
// 2.2 Filter, Use AVE pool to compute
std::shared_ptr<Tensor> avgTensor(new Tensor);
TensorUtils::copyShape(squareInputTranspose.get(), avgTensor.get(), true);
avgTensor->setLength(3, channel);
avgTensor->buffer().type = squareInputTranspose->getType();
res.extras.emplace_back(avgTensor);
{
2021-02-07 10:45:07 +08:00
flatbuffers::FlatBufferBuilder builder;
builder.Finish(GeometryComputerUtils::makePool(builder, std::make_pair(parameter->localSize(), 1), std::make_pair(1, 1), PoolType_AVEPOOL, PoolPadType_VALID, std::make_pair(0, 0), false));
2020-11-05 16:41:56 +08:00
res.command.emplace_back(
2021-02-07 10:45:07 +08:00
GeometryComputerUtils::makeCommand(builder, {squareInputTranspose.get()}, {avgTensor.get()}));
2020-11-05 16:41:56 +08:00
}
// 2.3 N, H*W, 1, C -> NCHW
{
auto des = TensorUtils::getDescribe(filterOutput.get());
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
des->dimensionFormat = MNN_DATA_FORMAT_NCHW;
des->regions.resize(1);
auto& reg = des->regions[0];
reg.origin = avgTensor.get();
reg.size[0] = outside;
reg.size[1] = channel;
reg.size[2] = inside;
reg.src.offset = 0;
reg.src.stride[0] = inside * channel;
reg.src.stride[1] = 1;
reg.src.stride[2] = channel;
reg.dst.offset = 0;
reg.dst.stride[0] = inside * channel;
reg.dst.stride[1] = inside;
reg.dst.stride[2] = 1;
}
} else {
// 2.1 NCHW -> N, C, H+localsize-1, W+localSize-1
std::shared_ptr<Tensor> squareInputTranspose(new Tensor);
{
auto pad = parameter->localSize() / 2;
squareInputTranspose->buffer().type = input->getType();
squareInputTranspose->buffer().dimensions = 4;
squareInputTranspose->setLength(0, outside);
squareInputTranspose->setLength(1, channel);
squareInputTranspose->setLength(2, input->length(2) + 2 * pad);
squareInputTranspose->setLength(3, input->length(3) + 2 * pad);
auto des = TensorUtils::getDescribe(squareInputTranspose.get());
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
des->regions.resize(1);
auto& reg = des->regions[0];
reg.origin = squareInput.get();
reg.size[0] = outside * channel;
reg.size[1] = input->length(2);
reg.size[2] = input->length(3);
reg.src.offset = 0;
reg.src.stride[0] = input->length(3) * input->length(2);
reg.src.stride[1] = input->length(3);
reg.src.stride[2] = 1;
reg.dst.offset = pad * squareInputTranspose->length(3) + pad;
reg.dst.stride[0] = squareInputTranspose->length(2) * squareInputTranspose->length(3);
reg.dst.stride[1] = squareInputTranspose->length(3);
reg.dst.stride[2] = 1;
}
res.extras.emplace_back(squareInputTranspose);
// 2.2 Filter, Use AVE pool to compute
std::shared_ptr<Tensor> avgTensor(new Tensor);
TensorUtils::copyShape(squareInputTranspose.get(), avgTensor.get(), true);
avgTensor->setLength(3, input->length(3));
avgTensor->setLength(2, input->length(2));
avgTensor->buffer().type = squareInputTranspose->getType();
res.extras.emplace_back(avgTensor);
{
2021-02-07 10:45:07 +08:00
flatbuffers::FlatBufferBuilder builder;
builder.Finish(GeometryComputerUtils::makePool(builder, std::make_pair(parameter->localSize(), parameter->localSize()), std::make_pair(1, 1), PoolType_AVEPOOL, PoolPadType_VALID, std::make_pair(0, 0), false));
2020-11-05 16:41:56 +08:00
res.command.emplace_back(
2021-02-07 10:45:07 +08:00
GeometryComputerUtils::makeCommand(builder, {squareInputTranspose.get()}, {avgTensor.get()}));
2020-11-05 16:41:56 +08:00
}
// 2.3 N, C4, HW, 4 -> NCHW
{
GeometryComputerUtils::makeRawAddressRef(filterOutput.get(), avgTensor.get(), 0,
outside * inside * channel);
}
}
// 3. filter = filter * beta + alpha
std::shared_ptr<Tensor> temp0(new Tensor);
temp0->buffer().type = input->getType();
std::shared_ptr<Tensor> temp1(new Tensor);
temp1->buffer().type = input->getType();
2022-06-27 10:51:38 +08:00
std::shared_ptr<Tensor> temp2(new Tensor);
temp2->buffer().type = input->getType();
2020-11-05 16:41:56 +08:00
TensorUtils::copyShape(filterOutput.get(), temp0.get(), true);
TensorUtils::copyShape(filterOutput.get(), temp1.get(), true);
2022-06-27 10:51:38 +08:00
TensorUtils::copyShape(filterOutput.get(), temp2.get(), true);
2020-11-05 16:41:56 +08:00
res.extras.emplace_back(temp0);
res.extras.emplace_back(temp1);
2022-06-27 10:51:38 +08:00
res.extras.emplace_back(temp2);
2020-11-05 16:41:56 +08:00
{
2021-01-06 16:29:37 +08:00
Tensor* Alpha = nullptr;
Tensor* Beta = nullptr;
2022-01-04 10:50:40 +08:00
Tensor* Bias = nullptr;
2021-01-06 16:29:37 +08:00
auto constTensors = context.searchConst(op);
2020-11-05 16:41:56 +08:00
if (!constTensors.empty()) {
Alpha = constTensors[0].get();
Beta = constTensors[1].get();
2022-01-04 10:50:40 +08:00
Bias = constTensors[2].get();
2020-11-05 16:41:56 +08:00
} else {
auto t0 = context.allocConst(op, {}, halide_type_of<float>());
auto t1 = context.allocConst(op, {}, halide_type_of<float>());
auto t2 = context.allocConst(op, {}, halide_type_of<float>());
t0->host<float>()[0] = alpha;
t1->host<float>()[0] = -beta; // turn input / pow(filter, beta) -> input * pow(filter, -beta)
2022-01-04 10:50:40 +08:00
t2->host<float>()[0] = bias;
2020-11-05 16:41:56 +08:00
Alpha = t0.get();
Beta = t1.get();
2022-01-04 10:50:40 +08:00
Bias = t2.get();
2020-11-05 16:41:56 +08:00
}
res.command.emplace_back(
GeometryComputerUtils::makeBinary(BinaryOpOperation_MUL, filterOutput.get(), Alpha, temp0.get()));
res.command.emplace_back(
2022-01-04 10:50:40 +08:00
GeometryComputerUtils::makeBinary(BinaryOpOperation_ADD, temp0.get(), Bias, temp1.get()));
2020-11-05 16:41:56 +08:00
res.command.emplace_back(
2022-06-27 10:51:38 +08:00
GeometryComputerUtils::makeBinary(BinaryOpOperation_POW, temp1.get(), Beta, temp2.get()));
2020-11-05 16:41:56 +08:00
}
// 4. output = input * filter
std::shared_ptr<Tensor> output(new Tensor);
output->buffer().type = input->getType();
TensorUtils::copyShape(input, output.get(), true);
res.extras.emplace_back(output);
res.command.emplace_back(
2022-06-27 10:51:38 +08:00
GeometryComputerUtils::makeBinary(BinaryOpOperation_MUL, input, temp2.get(), output.get()));
2020-11-05 16:41:56 +08:00
GeometryComputerUtils::makeRawAddressRef(outputs[0], output.get(), 0, outside * inside * channel);
return true;
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
if (op->type() == OpType_Normalize) {
return computeForNormalize(op, inputs, outputs, context, res);
}
return computeForLRN(op, inputs, outputs, context, res);
}
};
static void _create() {
std::shared_ptr<GeometryComputer> comp(new GeometryLRN);
GeometryComputer::registerGeometryComputer(comp, {OpType_LRN, OpType_Normalize});
}
REGISTER_GEOMETRY(GeometryLRN, _create);
} // namespace MNN