MNN/source/shape/SizeComputer.cpp

229 lines
8.2 KiB
C++
Raw Normal View History

2019-04-17 10:49:11 +08:00
//
// SizeComputer.cpp
// MNN
//
// Created by MNN on 2019/01/10.
// Copyright © 2018, Alibaba Group Holding Limited
//
2020-11-05 16:41:56 +08:00
#include "shape/SizeComputer.hpp"
2019-04-17 10:49:11 +08:00
#include <stdlib.h>
2020-11-05 16:41:56 +08:00
#include <mutex>
2019-12-27 22:16:57 +08:00
#include "core/Macro.h"
#include "core/TensorUtils.hpp"
2022-06-10 10:39:50 +08:00
// #define MNN_DEBUG_TENSOR_SIZE
2019-04-17 10:49:11 +08:00
namespace MNN {
2019-05-09 19:39:33 +08:00
void registerShapeOps();
2019-04-17 10:49:11 +08:00
SizeComputerSuite* SizeComputerSuite::gInstance = nullptr;
SizeComputerSuite::~SizeComputerSuite() {
for (auto& iter : mRegistry) {
delete iter;
2019-04-17 10:49:11 +08:00
}
}
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
void SizeComputerSuite::init() {
2020-11-05 16:41:56 +08:00
if (nullptr != gInstance) {
return;
}
gInstance = new SizeComputerSuite;
gInstance->mRegistry.resize(OpType_MAX + 1);
::memset(gInstance->mRegistry.data(), 0, gInstance->mRegistry.size() * sizeof(SizeComputer*));
2020-11-05 16:41:56 +08:00
registerShapeOps();
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
}
SizeComputerSuite* SizeComputerSuite::get() {
2019-04-17 10:49:11 +08:00
return gInstance;
}
void SizeComputerSuite::insert(SizeComputer* t, OpType type) {
mRegistry[type] = t;
2019-04-17 10:49:11 +08:00
}
SizeComputer* SizeComputerSuite::search(OpType name) {
auto iter = mRegistry[name];
if (iter == nullptr) {
2019-04-17 10:49:11 +08:00
return nullptr;
}
return iter;
2019-04-17 10:49:11 +08:00
}
float SizeComputer::onComputeFlops(const MNN::Op* op, const std::vector<Tensor*>& inputs,
2019-05-09 19:39:33 +08:00
const std::vector<Tensor*>& outputs) const {
2019-04-17 10:49:11 +08:00
MNN_ASSERT(outputs.size() >= 1);
return (float)outputs[0]->elementSize() / 1024.0f / 1024.0f;
}
2021-04-08 15:34:23 +08:00
2020-11-05 16:41:56 +08:00
float SizeComputer::computeFlops(const MNN::Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) {
auto computeFactory = SizeComputerSuite::get();
2020-11-05 16:41:56 +08:00
auto computer = computeFactory->search(op->type());
if (nullptr != computer) {
return computer->onComputeFlops(op, inputs, outputs);
}
if (op->type() == OpType_While && op->main_type() == OpParameter_LoopParam) {
auto sumFlops = 0.0f;
auto loop = op->main_as_LoopParam();
auto cmdSize = loop->commands()->size();
for (int i=0; i<cmdSize; ++i) {
auto cmd = loop->commands()->GetAs<RegionCommand>(i);
auto size = cmd->size()->data();
sumFlops += (float)size[0] * (float)size[1] * (float)size[2] / 1024.0f / 1024.0f;
}
return sumFlops * (float)loop->loopNumber();
}
2019-12-27 22:16:57 +08:00
auto sumFlops = 0.0f;
for (auto output : outputs) {
sumFlops += (float)output->elementSize() / 1024.0f / 1024.0f;
}
return sumFlops;
}
2019-04-17 10:49:11 +08:00
bool SizeComputer::computeOutputSize(const MNN::Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) {
auto computeFactory = SizeComputerSuite::get();
// When op is nullptr, it means a copy op
if (nullptr != op) {
2020-11-05 16:41:56 +08:00
// Don't support compute shape for control flow op
if (op->type() == OpType_While || op->type() == OpType_If) {
return false;
}
// Check -1 input
for (auto& t : inputs) {
for (int i=0; i < t->dimensions(); ++i) {
if (t->length(i) < 0) {
return false;
}
}
}
2019-04-17 10:49:11 +08:00
auto computer = computeFactory->search(op->type());
if (nullptr != computer) {
bool ret = computer->onComputeSize(op, inputs, outputs);
2020-11-05 16:41:56 +08:00
#ifdef MNN_DEBUG_TENSOR_SIZE
2022-09-30 10:02:52 +08:00
2020-11-05 16:41:56 +08:00
if (op->name() != nullptr) {
2022-06-10 10:39:50 +08:00
MNN_PRINT("===> compute shape: %s, [%s]\n", op->name()->c_str(), MNN::EnumNameOpType(op->type()));
2020-11-05 16:41:56 +08:00
} else {
2022-06-10 10:39:50 +08:00
MNN_PRINT("===> compute shape:[%s]\n", MNN::EnumNameOpType(op->type()));
2020-11-05 16:41:56 +08:00
}
if (inputs.size()) {
2022-06-10 10:39:50 +08:00
MNN_PRINT("\tInputs:\n");
2020-11-05 16:41:56 +08:00
for (auto o : inputs) {
2022-09-30 10:02:52 +08:00
MNN_PRINT("\tptr=%p, format=%s, datatype=%d;\t", o, EnumNameMNN_DATA_FORMAT(TensorUtils::getDescribe(o)->dimensionFormat), o->getType().code);
2020-11-05 16:41:56 +08:00
if (o->dimensions() == 0) {
MNN_PRINT("\t*Scalar*");
}
for (int i = 0; i < o->dimensions(); ++i) {
MNN_PRINT("%d, ", o->length(i));
}
MNN_PRINT("\n");
}
}
2022-06-10 10:39:50 +08:00
MNN_PRINT("\tOutputs:\n");
2020-11-05 16:41:56 +08:00
for (auto o : outputs) {
2022-09-30 10:02:52 +08:00
MNN_PRINT("\tptr=:%p, format=%s, datatype=%d;\t",o, EnumNameMNN_DATA_FORMAT(TensorUtils::getDescribe(o)->dimensionFormat), o->getType().code);
2020-11-05 16:41:56 +08:00
if (o->dimensions() == 0) {
MNN_PRINT("\t*Scalar*");
}
for (int i = 0; i < o->dimensions(); ++i) {
MNN_PRINT("%d, ", o->length(i));
}
MNN_PRINT("\n");
}
2022-09-30 10:02:52 +08:00
// }
2020-11-05 16:41:56 +08:00
#endif
2019-04-17 10:49:11 +08:00
return ret;
}
}
// Default Set to the same
2022-11-18 22:35:31 +08:00
if (inputs.size() >= 1 && (outputs.size() == 1 || outputs.size() == inputs.size())) {
2019-04-17 10:49:11 +08:00
if (inputs[0] == outputs[0]) {
return true;
}
2022-11-18 22:35:31 +08:00
for (int i=0; i<outputs.size(); ++i) {
const auto& ib = inputs[i]->buffer();
auto& ob = outputs[i]->buffer();
memcpy(ob.dim, ib.dim, sizeof(halide_dimension_t) * ib.dimensions);
ob.dimensions = ib.dimensions;
ob.type = ib.type;
TensorUtils::getDescribe(outputs[i])->dimensionFormat = TensorUtils::getDescribe(inputs[i])->dimensionFormat;
}
2019-04-17 10:49:11 +08:00
return true;
}
// Not Support
2019-12-27 22:16:57 +08:00
MNN_PRINT("Can't compute size for %d, name=%s\n", op->type(), op->name() ? op->name()->c_str() : "");
2019-04-17 10:49:11 +08:00
return false;
}
2021-04-08 15:34:23 +08:00
std::vector<int> SizeComputer::needInputContent(const MNN::Op* op, int inputSize) {
auto computeFactory = SizeComputerSuite::get();
// When op is nullptr, it means a copy op
if (nullptr != op) {
2021-04-08 15:34:23 +08:00
// when hasOutputShape = true, deconv last is outputShape
if (op->type() == OpType_Deconvolution && op->main_as_Convolution2D() && op->main_as_Convolution2D()->common()) {
if (op->main_as_Convolution2D()->common()->hasOutputShape()) {
return std::vector<int>{ inputSize - 1 };
}
}
2021-09-18 15:52:30 +08:00
if (inputSize > 1 && (op->type() == OpType_Squeeze || op->type() == OpType_Unsqueeze)) {
return std::vector<int>{1};
}
2022-01-04 10:50:40 +08:00
if (op->type() == OpType_CumSum) {
return std::vector<int>{1};
}
auto computer = computeFactory->search(op->type());
if (nullptr != computer) {
return computer->mNeedContentInputIndex;
}
}
return std::vector<int>{};
}
2021-04-08 15:34:23 +08:00
bool SizeComputer::computeBroadCastDims(const MNN::Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) {
int maxDimensions = inputs[0]->dimensions();
int maxIndex = 0;
for (int index=1; index < inputs.size(); ++index) {
if (inputs[index]->dimensions() > maxDimensions) {
maxDimensions = inputs[index]->dimensions();
maxIndex = index;
}
}
int outputDims[MNN_MAX_TENSOR_DIM];
for (int i = 0; i < maxDimensions; i++) {
outputDims[i] = inputs[maxIndex]->length(i);
}
for (int index=0; index < inputs.size(); ++index) {
if (index == maxIndex) {
continue;
}
auto input1 = inputs[index];
auto input0 = inputs[maxIndex];
const int diffDimension = maxDimensions - input1->dimensions();
for (int i = diffDimension; i < maxDimensions; i++) {
const int input1Index = i - diffDimension;
int dim1 = input1->buffer().dim[input1Index].extent;
if (dim1 != outputDims[i] && (dim1 != 1 && outputDims[i] != 1)) {
2021-04-16 14:50:43 +08:00
MNN_ERROR("Broad cast error, dim1 = %d, dim2 = %d\n", dim1, outputDims[i]);
2021-04-08 15:34:23 +08:00
return false;
}
if (dim1 == outputDims[i]) {
continue;
}
if (dim1 != outputDims[i] && (dim1 == 1 || outputDims[i] == 1)) {
outputDims[i] = outputDims[i] * dim1;
} else {
return false;
}
}
}
auto& ob = outputs[0]->buffer();
ob.dimensions = maxDimensions;
for (int i = 0; i < maxDimensions; i++) {
ob.dim[i].extent = outputDims[i];
}
return true;
}
2019-04-17 10:49:11 +08:00
} // namespace MNN