mirror of https://github.com/alibaba/MNN.git
344 lines
17 KiB
C++
344 lines
17 KiB
C++
//
|
|
// GeometryLRN.cpp
|
|
// MNN
|
|
//
|
|
// Created by MNN on 2020/07/09.
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
//
|
|
|
|
#include "ConvertUtils.hpp"
|
|
#include "geometry/GeometryComputer.hpp"
|
|
#include "geometry/GeometryComputerUtils.hpp"
|
|
#include "core/Macro.h"
|
|
#include "core/OpCommonUtils.hpp"
|
|
#define MNN_OPEN_TIME_TRACE
|
|
#include <MNN/AutoTime.hpp>
|
|
namespace MNN {
|
|
class GeometryLRN : public GeometryComputer {
|
|
public:
|
|
bool computeForNormalize(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
|
Context& context, CommandBuffer& res) const {
|
|
auto normalize = op->main_as_Normalize();
|
|
auto mAcrossSpatial = normalize->acrossSpatial();
|
|
auto mChannelShared = normalize->channelShared();
|
|
Tensor* eps = nullptr;
|
|
Tensor* scale = nullptr;
|
|
auto cache = context.searchConst(op);
|
|
if (!cache.empty()) {
|
|
eps = cache[0].get();
|
|
scale = cache[1].get();
|
|
} else {
|
|
auto mEps = normalize->eps();
|
|
auto epsT = context.allocConst(op, {}, halide_type_of<float>());
|
|
epsT->host<float>()[0] = mEps;
|
|
eps = epsT.get();
|
|
auto mScale = context.allocConst(op, {1, (int)normalize->scale()->size(), 1}, halide_type_of<float>());
|
|
::memcpy(mScale->host<float>(), normalize->scale()->data(), normalize->scale()->size() * sizeof(float));
|
|
scale = mScale.get();
|
|
}
|
|
auto inputTensor = inputs[0];
|
|
// Across channel
|
|
int inside = inputTensor->width() * inputTensor->height();
|
|
int axis = inputTensor->channel();
|
|
int outside = inputTensor->batch();
|
|
|
|
{
|
|
// 1, axis, 1 -> outside, axis, inside
|
|
std::shared_ptr<Tensor> broadCastScale(Tensor::createDevice<float>({outside, axis, inside}, Tensor::CAFFE));
|
|
res.extras.emplace_back(broadCastScale);
|
|
auto des = TensorUtils::getDescribe(broadCastScale.get());
|
|
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
|
des->regions.resize(1);
|
|
auto& reg = des->regions[0];
|
|
reg.size[0] = outside;
|
|
reg.size[1] = axis;
|
|
reg.size[2] = inside;
|
|
reg.src.offset = 0;
|
|
reg.src.stride[0] = 0;
|
|
reg.src.stride[1] = 1;
|
|
reg.src.stride[2] = 0;
|
|
reg.dst.offset = 0;
|
|
reg.dst.stride[0] = axis * inside;
|
|
reg.dst.stride[1] = inside;
|
|
reg.dst.stride[2] = 1;
|
|
reg.origin = scale;
|
|
scale = broadCastScale.get();
|
|
}
|
|
|
|
// Across Spatial
|
|
if (mAcrossSpatial) {
|
|
inside = 1;
|
|
axis = inputTensor->width() * inputTensor->height() * inputTensor->channel();
|
|
}
|
|
std::shared_ptr<Tensor> inputRaw(Tensor::createDevice<float>({outside, axis, inside}, Tensor::CAFFE));
|
|
res.extras.emplace_back(inputRaw);
|
|
std::shared_ptr<Tensor> inputRawSquare(Tensor::createDevice<float>({outside, axis, inside}, Tensor::CAFFE));
|
|
res.extras.emplace_back(inputRawSquare);
|
|
GeometryComputerUtils::makeRawAddressRef(inputRaw.get(), inputTensor, 0, outside * axis * inside);
|
|
res.command.emplace_back(
|
|
GeometryComputerUtils::makeUnary(UnaryOpOperation_SQUARE, inputRaw.get(), inputRawSquare.get()));
|
|
std::shared_ptr<Tensor> summer(Tensor::createDevice<float>({outside, 1, inside}, Tensor::CAFFE));
|
|
res.extras.emplace_back(summer);
|
|
res.command.emplace_back(
|
|
GeometryComputerUtils::makeReduce(ReductionType_SUM, inputRawSquare.get(), summer.get()));
|
|
std::shared_ptr<Tensor> temp0(Tensor::createDevice<float>({outside, 1, inside}, Tensor::CAFFE));
|
|
res.extras.emplace_back(temp0);
|
|
std::shared_ptr<Tensor> temp1(Tensor::createDevice<float>({outside, 1, inside}, Tensor::CAFFE));
|
|
res.extras.emplace_back(temp1);
|
|
res.command.emplace_back(
|
|
GeometryComputerUtils::makeBinary(BinaryOpOperation_ADD, summer.get(), eps, temp0.get()));
|
|
res.command.emplace_back(GeometryComputerUtils::makeUnary(UnaryOpOperation_RSQRT, temp0.get(), temp1.get()));
|
|
|
|
std::shared_ptr<Tensor> scaleFirst(Tensor::createDevice<float>({outside, axis, inside}, Tensor::CAFFE));
|
|
res.extras.emplace_back(scaleFirst);
|
|
{
|
|
// Broadcast scale
|
|
auto des = TensorUtils::getDescribe(scaleFirst.get());
|
|
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
|
des->regions.resize(1);
|
|
auto& reg = des->regions[0];
|
|
reg.size[0] = outside;
|
|
reg.size[1] = axis;
|
|
reg.size[2] = inside;
|
|
reg.src.offset = 0;
|
|
reg.src.stride[0] = inside;
|
|
reg.src.stride[1] = 0;
|
|
reg.src.stride[2] = 1;
|
|
reg.dst.offset = 0;
|
|
reg.dst.stride[0] = axis * inside;
|
|
reg.dst.stride[1] = inside;
|
|
reg.dst.stride[2] = 1;
|
|
reg.origin = temp1.get();
|
|
}
|
|
|
|
std::shared_ptr<Tensor> output0(Tensor::createDevice<float>({outside, axis, inside}, Tensor::CAFFE));
|
|
res.extras.emplace_back(output0);
|
|
std::shared_ptr<Tensor> output1(Tensor::createDevice<float>({outside, axis, inside}, Tensor::CAFFE));
|
|
res.extras.emplace_back(output1);
|
|
res.command.emplace_back(
|
|
GeometryComputerUtils::makeBinary(BinaryOpOperation_MUL, inputRaw.get(), scaleFirst.get(), output0.get()));
|
|
res.command.emplace_back(
|
|
GeometryComputerUtils::makeBinary(BinaryOpOperation_MUL, output0.get(), scale, output1.get()));
|
|
|
|
GeometryComputerUtils::makeRawAddressRef(outputs[0], output1.get(), 0, inside * outside * axis);
|
|
return true;
|
|
}
|
|
bool computeForLRN(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
|
Context& context, CommandBuffer& res) const {
|
|
auto parameter = op->main_as_LRN();
|
|
// Across channel
|
|
auto alpha = parameter->alpha();
|
|
auto beta = parameter->beta();
|
|
auto bias = parameter->bias();
|
|
auto input = inputs[0];
|
|
int outside = input->length(0);
|
|
int channel = input->length(1);
|
|
int inside = 1;
|
|
for (int i = 2; i < input->dimensions(); ++i) {
|
|
inside *= input->length(i);
|
|
}
|
|
MNN_ASSERT(TensorUtils::getDescribe(input)->dimensionFormat != MNN_DATA_FORMAT_NHWC);
|
|
if (TensorUtils::getDescribe(input)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4) {
|
|
std::shared_ptr<Tensor> newInput(new Tensor);
|
|
newInput->buffer().type = input->getType();
|
|
TensorUtils::copyShape(input, newInput.get(), true);
|
|
TensorUtils::getDescribe(newInput.get())->dimensionFormat = MNN_DATA_FORMAT_NCHW;
|
|
res.extras.emplace_back(newInput);
|
|
GeometryComputerUtils::makeRawAddressRef(newInput.get(), input, 0, inside * outside * channel);
|
|
input = newInput.get();
|
|
}
|
|
// 1. y = x^2
|
|
std::shared_ptr<Tensor> squareInput(new Tensor);
|
|
squareInput->buffer().type = input->getType();
|
|
TensorUtils::copyShape(input, squareInput.get(), true);
|
|
res.extras.emplace_back(squareInput);
|
|
res.command.emplace_back(GeometryComputerUtils::makeUnary(UnaryOpOperation_SQUARE, input, squareInput.get()));
|
|
// 2. z = filter(y, 1)
|
|
std::shared_ptr<Tensor> filterOutput(new Tensor);
|
|
filterOutput->buffer().type = input->getType();
|
|
TensorUtils::copyShape(input, filterOutput.get());
|
|
res.extras.emplace_back(filterOutput);
|
|
|
|
if (parameter->regionType() == 0) {
|
|
// 2.1 NCHW -> N, H*W, 1, localsize /2 + C + localsize / 2
|
|
std::shared_ptr<Tensor> squareInputTranspose(new Tensor);
|
|
{
|
|
auto pad = parameter->localSize() / 2;
|
|
squareInputTranspose->buffer().type = input->getType();
|
|
squareInputTranspose->buffer().dimensions = 4;
|
|
squareInputTranspose->setLength(0, outside);
|
|
squareInputTranspose->setLength(1, inside);
|
|
squareInputTranspose->setLength(2, 1);
|
|
squareInputTranspose->setLength(3, channel + 2 * pad);
|
|
auto des = TensorUtils::getDescribe(squareInputTranspose.get());
|
|
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
|
|
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
|
des->regions.resize(1);
|
|
auto& reg = des->regions[0];
|
|
reg.origin = squareInput.get();
|
|
reg.size[0] = outside;
|
|
reg.size[1] = inside;
|
|
reg.size[2] = channel;
|
|
reg.src.offset = 0;
|
|
reg.src.stride[0] = inside * channel;
|
|
reg.src.stride[1] = 1;
|
|
reg.src.stride[2] = inside;
|
|
reg.dst.offset = pad;
|
|
reg.dst.stride[0] = inside * (channel + 2 * pad);
|
|
reg.dst.stride[1] = channel + 2 * pad;
|
|
reg.dst.stride[2] = 1;
|
|
}
|
|
res.extras.emplace_back(squareInputTranspose);
|
|
// 2.2 Filter, Use AVE pool to compute
|
|
std::shared_ptr<Tensor> avgTensor(new Tensor);
|
|
TensorUtils::copyShape(squareInputTranspose.get(), avgTensor.get(), true);
|
|
avgTensor->setLength(3, channel);
|
|
avgTensor->buffer().type = squareInputTranspose->getType();
|
|
res.extras.emplace_back(avgTensor);
|
|
{
|
|
flatbuffers::FlatBufferBuilder builder;
|
|
builder.Finish(GeometryComputerUtils::makePool(builder, std::make_pair(parameter->localSize(), 1), std::make_pair(1, 1), PoolType_AVEPOOL, PoolPadType_VALID, std::make_pair(0, 0), false));
|
|
res.command.emplace_back(
|
|
GeometryComputerUtils::makeCommand(builder, {squareInputTranspose.get()}, {avgTensor.get()}));
|
|
}
|
|
// 2.3 N, H*W, 1, C -> NCHW
|
|
{
|
|
auto des = TensorUtils::getDescribe(filterOutput.get());
|
|
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
|
des->dimensionFormat = MNN_DATA_FORMAT_NCHW;
|
|
des->regions.resize(1);
|
|
auto& reg = des->regions[0];
|
|
reg.origin = avgTensor.get();
|
|
reg.size[0] = outside;
|
|
reg.size[1] = channel;
|
|
reg.size[2] = inside;
|
|
reg.src.offset = 0;
|
|
reg.src.stride[0] = inside * channel;
|
|
reg.src.stride[1] = 1;
|
|
reg.src.stride[2] = channel;
|
|
reg.dst.offset = 0;
|
|
reg.dst.stride[0] = inside * channel;
|
|
reg.dst.stride[1] = inside;
|
|
reg.dst.stride[2] = 1;
|
|
}
|
|
} else {
|
|
// 2.1 NCHW -> N, C, H+localsize-1, W+localSize-1
|
|
std::shared_ptr<Tensor> squareInputTranspose(new Tensor);
|
|
{
|
|
auto pad = parameter->localSize() / 2;
|
|
squareInputTranspose->buffer().type = input->getType();
|
|
squareInputTranspose->buffer().dimensions = 4;
|
|
squareInputTranspose->setLength(0, outside);
|
|
squareInputTranspose->setLength(1, channel);
|
|
squareInputTranspose->setLength(2, input->length(2) + 2 * pad);
|
|
squareInputTranspose->setLength(3, input->length(3) + 2 * pad);
|
|
auto des = TensorUtils::getDescribe(squareInputTranspose.get());
|
|
des->dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
|
|
des->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL;
|
|
des->regions.resize(1);
|
|
auto& reg = des->regions[0];
|
|
reg.origin = squareInput.get();
|
|
reg.size[0] = outside * channel;
|
|
reg.size[1] = input->length(2);
|
|
reg.size[2] = input->length(3);
|
|
reg.src.offset = 0;
|
|
reg.src.stride[0] = input->length(3) * input->length(2);
|
|
reg.src.stride[1] = input->length(3);
|
|
reg.src.stride[2] = 1;
|
|
reg.dst.offset = pad * squareInputTranspose->length(3) + pad;
|
|
reg.dst.stride[0] = squareInputTranspose->length(2) * squareInputTranspose->length(3);
|
|
reg.dst.stride[1] = squareInputTranspose->length(3);
|
|
reg.dst.stride[2] = 1;
|
|
}
|
|
res.extras.emplace_back(squareInputTranspose);
|
|
// 2.2 Filter, Use AVE pool to compute
|
|
std::shared_ptr<Tensor> avgTensor(new Tensor);
|
|
TensorUtils::copyShape(squareInputTranspose.get(), avgTensor.get(), true);
|
|
avgTensor->setLength(3, input->length(3));
|
|
avgTensor->setLength(2, input->length(2));
|
|
avgTensor->buffer().type = squareInputTranspose->getType();
|
|
res.extras.emplace_back(avgTensor);
|
|
{
|
|
flatbuffers::FlatBufferBuilder builder;
|
|
builder.Finish(GeometryComputerUtils::makePool(builder, std::make_pair(parameter->localSize(), parameter->localSize()), std::make_pair(1, 1), PoolType_AVEPOOL, PoolPadType_VALID, std::make_pair(0, 0), false));
|
|
res.command.emplace_back(
|
|
GeometryComputerUtils::makeCommand(builder, {squareInputTranspose.get()}, {avgTensor.get()}));
|
|
}
|
|
// 2.3 N, C4, HW, 4 -> NCHW
|
|
{
|
|
GeometryComputerUtils::makeRawAddressRef(filterOutput.get(), avgTensor.get(), 0,
|
|
outside * inside * channel);
|
|
}
|
|
}
|
|
|
|
// 3. filter = filter * beta + alpha
|
|
std::shared_ptr<Tensor> temp0(new Tensor);
|
|
temp0->buffer().type = input->getType();
|
|
std::shared_ptr<Tensor> temp1(new Tensor);
|
|
temp1->buffer().type = input->getType();
|
|
std::shared_ptr<Tensor> temp2(new Tensor);
|
|
temp2->buffer().type = input->getType();
|
|
TensorUtils::copyShape(filterOutput.get(), temp0.get(), true);
|
|
TensorUtils::copyShape(filterOutput.get(), temp1.get(), true);
|
|
TensorUtils::copyShape(filterOutput.get(), temp2.get(), true);
|
|
res.extras.emplace_back(temp0);
|
|
res.extras.emplace_back(temp1);
|
|
res.extras.emplace_back(temp2);
|
|
|
|
{
|
|
Tensor* Alpha = nullptr;
|
|
Tensor* Beta = nullptr;
|
|
Tensor* Bias = nullptr;
|
|
auto constTensors = context.searchConst(op);
|
|
if (!constTensors.empty()) {
|
|
Alpha = constTensors[0].get();
|
|
Beta = constTensors[1].get();
|
|
Bias = constTensors[2].get();
|
|
} else {
|
|
auto t0 = context.allocConst(op, {}, halide_type_of<float>());
|
|
auto t1 = context.allocConst(op, {}, halide_type_of<float>());
|
|
auto t2 = context.allocConst(op, {}, halide_type_of<float>());
|
|
t0->host<float>()[0] = alpha;
|
|
t1->host<float>()[0] = -beta; // turn input / pow(filter, beta) -> input * pow(filter, -beta)
|
|
t2->host<float>()[0] = bias;
|
|
Alpha = t0.get();
|
|
Beta = t1.get();
|
|
Bias = t2.get();
|
|
}
|
|
res.command.emplace_back(
|
|
GeometryComputerUtils::makeBinary(BinaryOpOperation_MUL, filterOutput.get(), Alpha, temp0.get()));
|
|
res.command.emplace_back(
|
|
GeometryComputerUtils::makeBinary(BinaryOpOperation_ADD, temp0.get(), Bias, temp1.get()));
|
|
res.command.emplace_back(
|
|
GeometryComputerUtils::makeBinary(BinaryOpOperation_POW, temp1.get(), Beta, temp2.get()));
|
|
}
|
|
// 4. output = input * filter
|
|
std::shared_ptr<Tensor> output(new Tensor);
|
|
output->buffer().type = input->getType();
|
|
TensorUtils::copyShape(input, output.get(), true);
|
|
res.extras.emplace_back(output);
|
|
|
|
res.command.emplace_back(
|
|
GeometryComputerUtils::makeBinary(BinaryOpOperation_MUL, input, temp2.get(), output.get()));
|
|
GeometryComputerUtils::makeRawAddressRef(outputs[0], output.get(), 0, outside * inside * channel);
|
|
return true;
|
|
}
|
|
|
|
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
|
Context& context, CommandBuffer& res) const override {
|
|
if (op->type() == OpType_Normalize) {
|
|
return computeForNormalize(op, inputs, outputs, context, res);
|
|
}
|
|
return computeForLRN(op, inputs, outputs, context, res);
|
|
}
|
|
};
|
|
|
|
static void _create() {
|
|
std::shared_ptr<GeometryComputer> comp(new GeometryLRN);
|
|
GeometryComputer::registerGeometryComputer(comp, {OpType_LRN, OpType_Normalize});
|
|
}
|
|
|
|
REGISTER_GEOMETRY(GeometryLRN, _create);
|
|
|
|
} // namespace MNN
|