MNN/source/shape/SizeComputer.hpp

//
//  SizeComputer.hpp
//  MNN
//
//  Created by MNN on 2019/01/23.
//  Copyright © 2018, Alibaba Group Holding Limited
//

#ifndef SizeComputer_hpp
#define SizeComputer_hpp

#include <MNN/Tensor.hpp>
#include <map>
#include <string>
#include <vector>
#include "MNN_generated.h"
#include "core/Execution.hpp"
#include "core/TensorUtils.hpp"
#define FLOPS_M 1000000.0f

namespace MNN {

/** computer for op. calculate input and output tensors' shape. when analyzing model, calculate flops too. */
class MNN_PUBLIC SizeComputer {
    friend class SizeComputerSuite;

public:
    void setInputIndex(std::vector<int>&& index) {
        mNeedContentInputIndex = std::move(index);
    }
    /**
     * @brief deinitializer.
     */
    virtual ~SizeComputer() = default;

public:
    /**
     * @brief calculate input and output tensors' shape for given op.
     * @param op        given op.
     * @param inputs    given input tensors.
     * @param outputs   given output tensors.
     * @return true if success, false otherwise.
     */
    virtual bool onComputeSize(const MNN::Op* op, const std::vector<Tensor*>& inputs,
                               const std::vector<Tensor*>& outputs) const = 0;

    /**
     * @brief calculate the flops of this op with the info of inputs size.
     * @param op        given op.
     * @param inputs    given input tensors.
     * @param outputs   given output tensors.
     * @return the flops in M.
     */
    virtual float onComputeFlops(const MNN::Op* op, const std::vector<Tensor*>& inputs,
                                 const std::vector<Tensor*>& outputs) const;

    /**
     * @brief calculate input and output tensors' shape for any registed op.
     * @param op        given registed op.
     * @param inputs    given input tensors.
     * @param outputs   given output tensors.
     * @return true if success, false otherwise.
     */
    static bool computeOutputSize(const MNN::Op* op, const std::vector<Tensor*>& inputs,
                                  const std::vector<Tensor*>& outputs);
    static float computeFlops(const MNN::Op* op, const std::vector<Tensor*>& inputs,
                              const std::vector<Tensor*>& outputs);

    static bool computeBroadCastDims(const std::vector<Tensor*>& inputs,
                                     const std::vector<Tensor*>& outputs);

    static std::vector<int> needInputContent(const MNN::Op* op, int inputSize);
private:
    std::vector<int> mNeedContentInputIndex;
};

/** size computer suite */
class MNN_PUBLIC SizeComputerSuite {
public:
    /**
     * @brief deinitializer.
     */
    ~SizeComputerSuite();
    /**
     * @brief get shared instance.
     * @return shared instance.
     */
    static SizeComputerSuite* get();
    static void init();

public:
    /**
     * @brief register size computer for designated type
     * @param computer  size computer
     * @param type      designated type
     */
    void insert(SizeComputer* computer, OpType type);

    /**
     * @brief query size computer for designated type
     * @param type      designated type
     * @return size computer if found, nullptr otherwise.
     */
    SizeComputer* search(OpType type);

private:
    /** shared instance */
    static SizeComputerSuite* gInstance;
    /** registered size computer */
    std::vector<SizeComputer*> mRegistry;
};

/** register for size computer */
template <class T>
class SizeComputerRegister {
public:
    /**
     * @brief initializer. register size computer to suite.
     * @param type  designated type
     */
    SizeComputerRegister(OpType type) {
        T* test               = new T;
        SizeComputerSuite* ts = SizeComputerSuite::get();
        ts->insert(test, type);
    }
    SizeComputerRegister(OpType type, std::vector<int>&& index) {
        T* test = new T;
        test->setInputIndex(std::move(index));
        SizeComputerSuite* ts = SizeComputerSuite::get();
        ts->insert(test, type);
    }
};
} // namespace MNN
#define REGISTER_SHAPE(name, op)                          \
    void ___##name##__##op##__() {                        \
        name* _temp = new name;                            \
        SizeComputerSuite* ts = SizeComputerSuite::get(); \
        ts->insert(_temp, op);                           \
    }
// Output shape is depent on index-input content data
#define REGISTER_SHAPE_INPUTS(name, op, index)            \
    void ___##name##__##op##__() {                        \
        SizeComputerSuite* ts = SizeComputerSuite::get(); \
        name* computer = new name;                             \
        computer->setInputIndex(index);                    \
        ts->insert(computer, op);                        \
    }

#ifdef MNN_SUPPORT_DEPRECATED_OP
#define REGISTER_SHAPE_OLD(name, op)                          \
    void ___##name##__##op##__() {                        \
        name* _temp = new name;                            \
        SizeComputerSuite* ts = SizeComputerSuite::get(); \
        ts->insert(_temp, op);                           \
    }
#else
#define REGISTER_SHAPE_OLD(name, op) void ___##name##__##op##__() {}

#endif


#ifdef MNN_SUPPORT_RENDER
#define REGISTER_SHAPE_INPUTS_RENDER(name, op, index)            \
    void ___##name##__##op##__() {                        \
        SizeComputerSuite* ts = SizeComputerSuite::get(); \
        name* computer = new name;                             \
        computer->setInputIndex(index);                    \
        ts->insert(computer, op);                        \
    }

#else
#define REGISTER_SHAPE_INPUTS_RENDER(name, op, index) void ___##name##__##op##__() {}

#endif

#ifdef MNN_SUPPORT_TRANSFORMER_FUSE
#define REGISTER_SHAPE_INPUTS_TRANSFORMER_FUSE(name, op)            \
    void ___##name##__##op##__() {                        \
        name* _temp = new name;                            \
        SizeComputerSuite* ts = SizeComputerSuite::get(); \
        ts->insert(_temp, op);                           \
    }

#else
#define REGISTER_SHAPE_INPUTS_TRANSFORMER_FUSE(name, op) void ___##name##__##op##__() {}

#endif

#endif
beta 0.1.0 2019-04-17 10:49:11 +08:00			`//`
			`// SizeComputer.hpp`
			`// MNN`
			`//`
			`// Created by MNN on 2019/01/23.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`

			`#ifndef SizeComputer_hpp`
			`#define SizeComputer_hpp`

Github release 1.1.0 2020-11-05 16:41:56 +08:00			`#include <MNN/Tensor.hpp>`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`#include <map>`
			`#include <string>`
			`#include <vector>`
			`#include "MNN_generated.h"`
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`#include "core/Execution.hpp"`
Update 2019-12-27 22:16:57 +08:00			`#include "core/TensorUtils.hpp"`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`#define FLOPS_M 1000000.0f`

			`namespace MNN {`

			`/** computer for op. calculate input and output tensors' shape. when analyzing model, calculate flops too. */`
beta 0.2.0.8 - add NaN check-up - add quantification support for ScaleAdd Op - add binary to eltwise optimization - add console logs for quantization tool - better document for quantization tool - replace redundant dimension flags with dimension format - optimize performance of TensorFlow Lite Quantized Convolution - fix axis support for ONNX softmax - fix get performance compile error on Windows 2019-08-22 20:13:46 +08:00			`class MNN_PUBLIC SizeComputer {`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`friend class SizeComputerSuite;`
Github release 1.1.0 2020-11-05 16:41:56 +08:00
beta 0.1.0 2019-04-17 10:49:11 +08:00			`public:`
beta 0.2.0.8 - add NaN check-up - add quantification support for ScaleAdd Op - add binary to eltwise optimization - add console logs for quantization tool - better document for quantization tool - replace redundant dimension flags with dimension format - optimize performance of TensorFlow Lite Quantized Convolution - fix axis support for ONNX softmax - fix get performance compile error on Windows 2019-08-22 20:13:46 +08:00			`void setInputIndex(std::vector<int>&& index) {`
			`mNeedContentInputIndex = std::move(index);`
			`}`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`/**`
			`* @brief deinitializer.`
			`*/`
			`virtual ~SizeComputer() = default;`

			`public:`
			`/**`
			`* @brief calculate input and output tensors' shape for given op.`
			`* @param op given op.`
			`* @param inputs given input tensors.`
			`* @param outputs given output tensors.`
			`* @return true if success, false otherwise.`
			`*/`
			`virtual bool onComputeSize(const MNN::Op* op, const std::vector<Tensor*>& inputs,`
			`const std::vector<Tensor*>& outputs) const = 0;`

			`/**`
			`* @brief calculate the flops of this op with the info of inputs size.`
			`* @param op given op.`
			`* @param inputs given input tensors.`
			`* @param outputs given output tensors.`
			`* @return the flops in M.`
			`*/`
			`virtual float onComputeFlops(const MNN::Op* op, const std::vector<Tensor*>& inputs,`
			`const std::vector<Tensor*>& outputs) const;`

			`/**`
			`* @brief calculate input and output tensors' shape for any registed op.`
			`* @param op given registed op.`
			`* @param inputs given input tensors.`
			`* @param outputs given output tensors.`
			`* @return true if success, false otherwise.`
			`*/`
			`static bool computeOutputSize(const MNN::Op* op, const std::vector<Tensor*>& inputs,`
			`const std::vector<Tensor*>& outputs);`
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`static float computeFlops(const MNN::Op* op, const std::vector<Tensor*>& inputs,`
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`const std::vector<Tensor*>& outputs);`
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00
MNN:Sync: Sync Internal 3.2.4 2025-09-22 23:05:26 +08:00			`static bool computeBroadCastDims(const std::vector<Tensor*>& inputs,`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`const std::vector<Tensor*>& outputs);`
Github release 1.1.0 2020-11-05 16:41:56 +08:00
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`static std::vector<int> needInputContent(const MNN::Op* op, int inputSize);`
beta 0.2.0.8 - add NaN check-up - add quantification support for ScaleAdd Op - add binary to eltwise optimization - add console logs for quantization tool - better document for quantization tool - replace redundant dimension flags with dimension format - optimize performance of TensorFlow Lite Quantized Convolution - fix axis support for ONNX softmax - fix get performance compile error on Windows 2019-08-22 20:13:46 +08:00			`private:`
			`std::vector<int> mNeedContentInputIndex;`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`};`

			`/** size computer suite */`
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr; 2019-10-29 13:37:26 +08:00			`class MNN_PUBLIC SizeComputerSuite {`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`public:`
			`/**`
			`* @brief deinitializer.`
			`*/`
			`~SizeComputerSuite();`
			`/**`
			`* @brief get shared instance.`
			`* @return shared instance.`
			`*/`
			`static SizeComputerSuite* get();`
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr; 2019-10-29 13:37:26 +08:00			`static void init();`
beta 0.1.0 2019-04-17 10:49:11 +08:00
			`public:`
			`/**`
			`* @brief register size computer for designated type`
			`* @param computer size computer`
			`* @param type designated type`
			`*/`
			`void insert(SizeComputer* computer, OpType type);`

			`/**`
			`* @brief query size computer for designated type`
			`* @param type designated type`
			`* @return size computer if found, nullptr otherwise.`
			`*/`
			`SizeComputer* search(OpType type);`

			`private:`
			`/** shared instance */`
			`static SizeComputerSuite* gInstance;`
			`/** registered size computer */`
[MNN:Sync] Sync internal gitlab Main Feature: 1. Add OpenCV API and Numpy API Support 2. Protobuf move into MNN 3. Add more op for torchscript convert 4. Add recompute to speed up geometry compute 5. Add ModuleBasic Test 2021-11-30 10:10:53 +08:00			`std::vector<SizeComputer*> mRegistry;`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`};`

			`/** register for size computer */`
			`template <class T>`
			`class SizeComputerRegister {`
			`public:`
			`/**`
			`* @brief initializer. register size computer to suite.`
			`* @param type designated type`
			`*/`
			`SizeComputerRegister(OpType type) {`
			`T* test = new T;`
			`SizeComputerSuite* ts = SizeComputerSuite::get();`
			`ts->insert(test, type);`
			`}`
beta 0.2.0.8 - add NaN check-up - add quantification support for ScaleAdd Op - add binary to eltwise optimization - add console logs for quantization tool - better document for quantization tool - replace redundant dimension flags with dimension format - optimize performance of TensorFlow Lite Quantized Convolution - fix axis support for ONNX softmax - fix get performance compile error on Windows 2019-08-22 20:13:46 +08:00			`SizeComputerRegister(OpType type, std::vector<int>&& index) {`
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`T* test = new T;`
beta 0.2.0.8 - add NaN check-up - add quantification support for ScaleAdd Op - add binary to eltwise optimization - add console logs for quantization tool - better document for quantization tool - replace redundant dimension flags with dimension format - optimize performance of TensorFlow Lite Quantized Convolution - fix axis support for ONNX softmax - fix get performance compile error on Windows 2019-08-22 20:13:46 +08:00			`test->setInputIndex(std::move(index));`
			`SizeComputerSuite* ts = SizeComputerSuite::get();`
			`ts->insert(test, type);`
			`}`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`};`
			`} // namespace MNN`
fix backend/op/sizer register 2019-05-09 19:39:33 +08:00			`#define REGISTER_SHAPE(name, op) \`
			`void ___##name##__##op##__() { \`
Sync Internal Github 2021-02-07 10:45:07 +08:00			`name* _temp = new name; \`
fix backend/op/sizer register 2019-05-09 19:39:33 +08:00			`SizeComputerSuite* ts = SizeComputerSuite::get(); \`
Sync Internal Github 2021-02-07 10:45:07 +08:00			`ts->insert(_temp, op); \`
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`}`
			`// Output shape is depent on index-input content data`
			`#define REGISTER_SHAPE_INPUTS(name, op, index) \`
			`void ___##name##__##op##__() { \`
			`SizeComputerSuite* ts = SizeComputerSuite::get(); \`
Sync Internal Github 2021-02-07 10:45:07 +08:00			`name* computer = new name; \`
			`computer->setInputIndex(index); \`
			`ts->insert(computer, op); \`
fix backend/op/sizer register 2019-05-09 19:39:33 +08:00			`}`
beta 0.1.0 2019-04-17 10:49:11 +08:00
[MNN:Sync] Sync internal Gitlab to 2.0.2 2022-07-19 13:52:07 +08:00			`#ifdef MNN_SUPPORT_DEPRECATED_OP`
			`#define REGISTER_SHAPE_OLD(name, op) \`
			`void ___##name##__##op##__() { \`
			`name* _temp = new name; \`
			`SizeComputerSuite* ts = SizeComputerSuite::get(); \`
			`ts->insert(_temp, op); \`
			`}`
			`#else`
			`#define REGISTER_SHAPE_OLD(name, op) void ___##name##__##op##__() {}`

			`#endif`


[MNN:Sync] Sync Internal 2.8.0 2023-12-04 11:12:20 +08:00			`#ifdef MNN_SUPPORT_RENDER`
			`#define REGISTER_SHAPE_INPUTS_RENDER(name, op, index) \`
			`void ___##name##__##op##__() { \`
			`SizeComputerSuite* ts = SizeComputerSuite::get(); \`
			`name* computer = new name; \`
			`computer->setInputIndex(index); \`
			`ts->insert(computer, op); \`
			`}`

			`#else`
			`#define REGISTER_SHAPE_INPUTS_RENDER(name, op, index) void ___##name##__##op##__() {}`

			`#endif`

[MNN:Sync] Sync Internal 2.8.2 2024-02-29 16:21:40 +08:00			`#ifdef MNN_SUPPORT_TRANSFORMER_FUSE`
			`#define REGISTER_SHAPE_INPUTS_TRANSFORMER_FUSE(name, op) \`
			`void ___##name##__##op##__() { \`
			`name* _temp = new name; \`
			`SizeComputerSuite* ts = SizeComputerSuite::get(); \`
			`ts->insert(_temp, op); \`
			`}`

			`#else`
			`#define REGISTER_SHAPE_INPUTS_TRANSFORMER_FUSE(name, op) void ___##name##__##op##__() {}`

			`#endif`

beta 0.1.0 2019-04-17 10:49:11 +08:00			`#endif`