MNN/source/geometry/GeometryBinary.cpp

249 lines
12 KiB
C++

//
// GeometryBinary.cpp
// MNN
//
// Created by MNN on 2020/05/07.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "ConvertUtils.hpp"
#include "geometry/GeometryComputer.hpp"
#include "geometry/GeometryComputerUtils.hpp"
#include "shape/SizeComputer.hpp"
#ifndef MNN_REDUCE_SIZE
#define MNN_BINARY_LOOP_OPT
#endif
namespace MNN {
class GeometryBinary : public GeometryComputer {
public:
virtual bool onRecompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
if (res.command.size() != 1) {
return false;
}
auto input0 = inputs[0];
auto input1 = inputs[1];
auto output = outputs[0];
auto inputL0 = TensorUtils::getRawSize(input0);
auto inputL1 = TensorUtils::getRawSize(input1);
auto outputSize = TensorUtils::getRawSize(output);
auto inp0format = TensorUtils::getDescribe(inputs[0])->dimensionFormat;
auto inp1format = TensorUtils::getDescribe(inputs[1])->dimensionFormat;
auto outFormat = TensorUtils::getDescribe(output)->dimensionFormat;
auto cmdP = res.command[0];
if (cmdP->op->type() != OpType_BinaryOp) {
return false;
}
MNN_ASSERT(0 != inputL1 && 0 != inputL0 && 0 != outputSize);
//MNN_PRINT("On compute geometry: %d - %d - %d\n", inputL0, inputL1, outputSize);
if (1 == inputL0 || 1 == inputL1) {
// Can directly compute
cmdP->inputs[0] = input0;
cmdP->inputs[1] = input1;
return true;
}
// Need Broadcast or same shape
bool input0Broadcast = false;
bool input1Broadcast = false;
if (outputSize != inputL0 || inp0format != outFormat ||
(output->dimensions() != input0->dimensions() && (MNN_DATA_FORMAT_NC4HW4 == outFormat || context.forwardType() == MNN_FORWARD_OPENCL))// OpenCL default format is MNN_DATA_FORMAT_NC4HW4
) {
input0Broadcast = true;
}
if (outputSize != inputL1 || inp1format != outFormat ||
(output->dimensions() != input1->dimensions() && (MNN_DATA_FORMAT_NC4HW4 == outFormat || context.forwardType() == MNN_FORWARD_OPENCL))) {// OpenCL default format is MNN_DATA_FORMAT_NC4HW4
input1Broadcast = true;
}
auto cacheTensor = std::move(res.extras);
if (input0Broadcast) {
std::shared_ptr<Tensor> newTensor;
if (!cacheTensor.empty()) {
newTensor = cacheTensor[cacheTensor.size() - 1];
cacheTensor.erase(cacheTensor.begin() + cacheTensor.size() - 1);
} else {
newTensor.reset(new Tensor);
}
TensorUtils::copyShape(output, newTensor.get(), true);
newTensor->buffer().type = output->buffer().type;
ConvertUtils::broadcastto(input0, newTensor.get());
input0 = newTensor.get();
res.extras.emplace_back(newTensor);
}
if (input1Broadcast) {
std::shared_ptr<Tensor> newTensor;
if (!cacheTensor.empty()) {
newTensor = cacheTensor[cacheTensor.size() - 1];
cacheTensor.erase(cacheTensor.begin() + cacheTensor.size() - 1);
} else {
newTensor.reset(new Tensor);
}
TensorUtils::copyShape(output, newTensor.get(), true);
newTensor->buffer().type = output->buffer().type;
ConvertUtils::broadcastto(input1, newTensor.get());
input1 = newTensor.get();
res.extras.emplace_back(newTensor);
}
cmdP->inputs[0] = input0;
cmdP->inputs[1] = input1;
return true;
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
auto input0 = inputs[0];
auto input1 = inputs[1];
auto output = outputs[0];
auto inputL0 = TensorUtils::getRawSize(input0);
auto inputL1 = TensorUtils::getRawSize(input1);
auto outputSize = TensorUtils::getRawSize(output);
auto inp0format = TensorUtils::getDescribe(inputs[0])->dimensionFormat;
auto inp1format = TensorUtils::getDescribe(inputs[1])->dimensionFormat;
auto outFormat = TensorUtils::getDescribe(output)->dimensionFormat;
MNN_ASSERT(0 != inputL1 && 0 != inputL0 && 0 != outputSize);
//MNN_PRINT("On compute geometry: %d - %d - %d\n", inputL0, inputL1, outputSize);
if (1 == inputL0 || 1 == inputL1 || context.forwardType() == MNN_FORWARD_NN) {
// Can directly compute
std::shared_ptr<Command> cmdP(new Command);
auto& cmd = *cmdP;
cmd.op = op;
cmd.inputs = {input0, input1};
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmdP));
return true;
}
// Need Broadcast or same shape
bool input0Broadcast = false;
bool input1Broadcast = false;
if (outputSize != inputL0 || inp0format != outFormat ||
(output->dimensions() != input0->dimensions() && (MNN_DATA_FORMAT_NC4HW4 == outFormat || context.forwardType() == MNN_FORWARD_OPENCL))// OpenCL default format is MNN_DATA_FORMAT_NC4HW4
) {
input0Broadcast = true;
}
if (outputSize != inputL1 || inp1format != outFormat ||
(output->dimensions() != input1->dimensions() && (MNN_DATA_FORMAT_NC4HW4 == outFormat || context.forwardType() == MNN_FORWARD_OPENCL))) {// OpenCL default format is MNN_DATA_FORMAT_NC4HW4
input1Broadcast = true;
}
#ifdef MNN_BINARY_LOOP_OPT
// One input need broadcast, the other needn't
bool singleBroadCast = (!(input0Broadcast && input1Broadcast)) && (input0Broadcast || input1Broadcast);
bool forwardSupportLoop = inp0format == outFormat && inp1format == outFormat && outFormat != MNN_DATA_FORMAT_NC4HW4 && input0->getType().code == halide_type_float && op->main_as_BinaryOp()->activationType() == 0;
bool openLoop = context.support(Interpreter::GeometryComputeMask::GEOMETRCOMPUTEMASK_USELOOP);
if (singleBroadCast && forwardSupportLoop && openLoop) {
// Use Loop instead of broadcast
std::shared_ptr<Tensor> newTensor(new Tensor);
TensorUtils::copyShape(output, newTensor.get(), true);
newTensor->buffer().type = output->buffer().type;
int srcIndex = 1;
int dstIndex = 2;
if (input0Broadcast) {
ConvertUtils::broadcastto(input0, newTensor.get());
} else {
srcIndex = 2;
dstIndex = 1;
ConvertUtils::broadcastto(input1, newTensor.get());
}
auto des = TensorUtils::getDescribe(newTensor.get());
flatbuffers::FlatBufferBuilder builder;
BinaryOpBuilder binaryOpParamBuilder(builder);
binaryOpParamBuilder.add_opType(op->main_as_BinaryOp()->opType());
auto binaryOpParamOffset = binaryOpParamBuilder.Finish();
OpBuilder cmdOpBuilder(builder);
cmdOpBuilder.add_type(OpType_BinaryOp);
cmdOpBuilder.add_main(binaryOpParamOffset.Union());
cmdOpBuilder.add_main_type(OpParameter_BinaryOp);
auto cmdOpOffset = cmdOpBuilder.Finish();
auto iterIndexesOffset = builder.CreateVector(std::vector<int>{-1, -1, -1});
auto stepOffset = builder.CreateVector(std::vector<int>{0, 0, 0});
auto indexesOffset = builder.CreateVector(std::vector<int>{2, 0, 1});
std::vector<flatbuffers::Offset<RegionCommand>> regionCommands;
for (int i=0; i<des->regions.size(); ++i) {
auto& reg = des->regions[i];
auto sizeOffset = builder.CreateVector(reg.size, 3);
auto dstStride = builder.CreateVector(reg.dst.stride, 3);
auto srcStride = builder.CreateVector(reg.src.stride, 3);
std::vector<flatbuffers::Offset<View>> views(3);
{
ViewBuilder dstBuilder(builder);
dstBuilder.add_offset(reg.dst.offset);
dstBuilder.add_stride(dstStride);
views[0] = dstBuilder.Finish();
views[dstIndex] = views[0];
ViewBuilder srcBuilder(builder);
srcBuilder.add_offset(reg.src.offset);
srcBuilder.add_stride(srcStride);
views[srcIndex] = srcBuilder.Finish();
}
auto viewsOffset = builder.CreateVector<flatbuffers::Offset<View>>(views);
RegionCommandBuilder cmdBuilder(builder);
cmdBuilder.add_op(cmdOpOffset);
cmdBuilder.add_view(viewsOffset);
cmdBuilder.add_size(sizeOffset);
cmdBuilder.add_steps(stepOffset);
cmdBuilder.add_iterIndexes(iterIndexesOffset);
cmdBuilder.add_indexes(indexesOffset);
regionCommands.emplace_back(cmdBuilder.Finish());
}
auto rcmdAllOffset = builder.CreateVector<flatbuffers::Offset<RegionCommand>>(regionCommands);
auto inputIndexesOffset = builder.CreateVector(std::vector<int>{0, 1});
auto outputIndexesOffset = builder.CreateVector(std::vector<int>{2});
LoopParamBuilder loopBuilder(builder);
loopBuilder.add_commands(rcmdAllOffset);
loopBuilder.add_loopNumber(1);
loopBuilder.add_tensorNumber(3);
loopBuilder.add_inputIndexes(inputIndexesOffset);
loopBuilder.add_outputIndexes(outputIndexesOffset);
auto loopOffset = loopBuilder.Finish();
flatbuffers::Offset<flatbuffers::String> nameOffset;
if (nullptr != op->name()) {
nameOffset = builder.CreateString(op->name()->c_str());
}
OpBuilder finishBuilder(builder);
finishBuilder.add_main(loopOffset.Union());
finishBuilder.add_main_type(OpParameter_LoopParam);
finishBuilder.add_type(OpType_While);
if (nullptr != op->name()) {
finishBuilder.add_name(nameOffset);
}
builder.Finish(finishBuilder.Finish());
auto cmd = GeometryComputerUtils::makeCommand(builder, {input0, input1}, outputs);
res.command.emplace_back(std::move(cmd));
return true;
}
#endif
if (input0Broadcast) {
std::shared_ptr<Tensor> newTensor(new Tensor);
TensorUtils::copyShape(output, newTensor.get(), true);
newTensor->buffer().type = output->buffer().type;
ConvertUtils::broadcastto(input0, newTensor.get());
input0 = newTensor.get();
res.extras.emplace_back(newTensor);
}
if (input1Broadcast) {
std::shared_ptr<Tensor> newTensor(new Tensor);
TensorUtils::copyShape(output, newTensor.get(), true);
newTensor->buffer().type = output->buffer().type;
ConvertUtils::broadcastto(input1, newTensor.get());
input1 = newTensor.get();
res.extras.emplace_back(newTensor);
}
std::shared_ptr<Command> cmdP(new Command);
auto& cmd = *cmdP;
cmd.op = op;
cmd.inputs = {input0, input1};
cmd.outputs = std::move(outputs);
res.command.emplace_back(std::move(cmdP));
return true;
}
};
static void _create() {
std::shared_ptr<GeometryComputer> comp(new GeometryBinary);
GeometryComputer::registerGeometryComputer(comp, {OpType_BinaryOp});
}
REGISTER_GEOMETRY(GeometryBinary, _create);
} // namespace MNN