MNN/source/backend/cpu/compute/ConvInt8TiledExecutor.hpp

//
//  ConvInt8TiledExecutor.hpp
//  MNN
//
//  Created by MNN on 2019/5/17.
//  Copyright © 2018, Alibaba Group Holding Limited
//

#ifndef ConvInt8TiledExecutor_hpp
#define ConvInt8TiledExecutor_hpp

#include "backend/cpu/CPUConvolution.hpp"
#include "Int8FunctionsOpt.h"
#include "CommonOptFunction.h"

namespace MNN {

class ConvInt8TiledExecutor : public CPUConvolution {
public:
    // given weight+bias+scale, do post process
    ConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, std::shared_ptr<ResourceInt8> res);
    virtual ~ConvInt8TiledExecutor();
    virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
    virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override;
    virtual void getPackParameter(int* Unit, int* SrcUnit, int* DestUnit, const CoreInt8Functions* core) = 0;
    static void reorderWeight(Tensor* weight, const uint8_t* weightSrc, int SRC_UNIT, int UNIT, int ic, int oc, int kernelCount, int pack);

protected:
    ConvolutionCommon::Im2ColParameter mIm2ColParamter;
    int mTileCount;
    int mThreadNums;
    std::shared_ptr<Tensor> mTempIm2ColBuffer;
    std::shared_ptr<CPUConvolution::ResourceInt8> mResourceInt8;
    // std::shared_ptr<CPUConvolution::Resource> mResource;
    CPUConvolution::MutableResourceInt8 mMutableResource;
    MemChunk mBlitInfo;
    std::pair<size_t, size_t> mBlitInfoStride;
    int mIm2ColCount;
};

//
//  DenseConvInt8TiledExecutor.hpp
//  MNN
//
//  Created by MNN on 2019/5/17.
//  Copyright © 2018, Alibaba Group Holding Limited
//


class DenseConvInt8TiledExecutor : public ConvInt8TiledExecutor {
public:
    // given weight+bias+scale, do post process
    DenseConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, std::shared_ptr<ResourceInt8> res, bool dynamicQuantExe);
    virtual ~DenseConvInt8TiledExecutor();
    virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
    virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
    virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override;
    void getPackParameter(int* Unit, int* SrcUnit, int* DestUnit, const CoreInt8Functions* core) override;
private:
    DenseConvInt8TiledExecutor(Backend* backend, const Convolution2D* common, bool dynamicQuantExe, const DenseConvInt8TiledExecutor& exe);

    decltype(CoreInt8Functions::Int8GemmKernel) mGemmKernel;
    std::function<void(const float*, int8_t*, size_t, const float*, ssize_t, ssize_t, ssize_t)> mQuantFunc;
    std::function<void(const float*, int8_t*, size_t, const float*, ssize_t, ssize_t, ssize_t, size_t, size_t)> mQuantAndReorderFunc = nullptr;
    std::function<void(float* dest, int8_t* source, const float* scale, ssize_t realDstCount, SumByAxisParams sumParams)> mSumByAxisLFunc;
    std::shared_ptr<Tensor> mQuantInput;
    std::shared_ptr<Tensor> mDynamicBias;
    std::shared_ptr<Tensor> mScaleFuse;
    std::shared_ptr<Tensor> mBatchQuantInfo;
    std::shared_ptr<Tensor> mInputDeqScales;
    std::shared_ptr<Tensor> mTempMaxMinValueBuffer;
    std::shared_ptr<CPUConvolution::Resource> mResource;
    std::vector<uint8_t> mTempSrcSum;
    std::vector<int32_t> mDivides;

    int mThreadNums;
    int mBlockNum;
    int mOcPerThread;
    bool mDynamicQuantExe;
    bool mSplitByOc;
    bool mUseBatchQuan;
};

} // namespace MNN

#endif /* ConvInt8TiledExecutor_hpp */
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`//`
			`// ConvInt8TiledExecutor.hpp`
			`// MNN`
			`//`
			`// Created by MNN on 2019/5/17.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`

			`#ifndef ConvInt8TiledExecutor_hpp`
			`#define ConvInt8TiledExecutor_hpp`

			`#include "backend/cpu/CPUConvolution.hpp"`
			`#include "Int8FunctionsOpt.h"`
MNN:Sync: Sync Internal 2.9.3 2024-07-22 19:51:53 +08:00			`#include "CommonOptFunction.h"`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00
			`namespace MNN {`

			`class ConvInt8TiledExecutor : public CPUConvolution {`
			`public:`
			`// given weight+bias+scale, do post process`
MNN:Sync: Sync Internal 2.9.3 2024-07-22 19:51:53 +08:00			`ConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, std::shared_ptr<ResourceInt8> res);`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`virtual ~ConvInt8TiledExecutor();`
			`virtual ErrorCode onResize(const std::vector<Tensor > &inputs, const std::vector<Tensor > &outputs) override;`
			`virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override;`
[MNN:Sync] Sync internal Gitlab 2021-09-18 15:52:30 +08:00			`virtual void getPackParameter(int* Unit, int* SrcUnit, int* DestUnit, const CoreInt8Functions* core) = 0;`
im2col parameter bug: use SRC_UNIT>pack to decide weight&input shape. Signed-off-by: jingbang.yjb <jingbang.yjb@alibaba-inc.com> 2024-07-23 10:47:04 +08:00			`static void reorderWeight(Tensor* weight, const uint8_t* weightSrc, int SRC_UNIT, int UNIT, int ic, int oc, int kernelCount, int pack);`
[MNN:Sync] Sync Internal 2.5.3 2023-06-16 09:42:45 +08:00
[MNN:Sync] Sync internal Gitlab 2021-09-18 15:52:30 +08:00			`protected:`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`ConvolutionCommon::Im2ColParameter mIm2ColParamter;`
			`int mTileCount;`
			`int mThreadNums;`
			`std::shared_ptr<Tensor> mTempIm2ColBuffer;`
MNN:Sync: Sync Internal 2.9.3 2024-07-22 19:51:53 +08:00			`std::shared_ptr<CPUConvolution::ResourceInt8> mResourceInt8;`
			`// std::shared_ptr<CPUConvolution::Resource> mResource;`
[MNN:Sync] Sync Internal 2.3.0 2022-12-30 15:18:58 +08:00			`CPUConvolution::MutableResourceInt8 mMutableResource;`
[MNN:Sync] Sync Internal 2.7.0 2023-09-04 10:42:11 +08:00			`MemChunk mBlitInfo;`
[MNN:Sync] Sync Internal 2.5.3 2023-06-16 09:42:45 +08:00			`std::pair<size_t, size_t> mBlitInfoStride;`
			`int mIm2ColCount;`
[MNN:Sync] Sync internal Gitlab 2021-09-18 15:52:30 +08:00			`};`

			`//`
			`// DenseConvInt8TiledExecutor.hpp`
			`// MNN`
			`//`
			`// Created by MNN on 2019/5/17.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`


			`class DenseConvInt8TiledExecutor : public ConvInt8TiledExecutor {`
			`public:`
			`// given weight+bias+scale, do post process`
MNN:Sync: Sync Internal 2.9.3 2024-07-22 19:51:53 +08:00			`DenseConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, std::shared_ptr<ResourceInt8> res, bool dynamicQuantExe);`
[MNN:Sync] Sync internal Gitlab 2021-09-18 15:52:30 +08:00			`virtual ~DenseConvInt8TiledExecutor();`
			`virtual ErrorCode onResize(const std::vector<Tensor > &inputs, const std::vector<Tensor > &outputs) override;`
			`virtual ErrorCode onExecute(const std::vector<Tensor > &inputs, const std::vector<Tensor > &outputs) override;`
			`virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override;`
			`void getPackParameter(int* Unit, int* SrcUnit, int* DestUnit, const CoreInt8Functions* core) override;`
			`private:`
MNN:Sync: Sync Internal 2.9.3 2024-07-22 19:51:53 +08:00			`DenseConvInt8TiledExecutor(Backend* backend, const Convolution2D* common, bool dynamicQuantExe, const DenseConvInt8TiledExecutor& exe);`
[MNN:Sync] Sync internal Gitlab 2021-09-18 15:52:30 +08:00
			`decltype(CoreInt8Functions::Int8GemmKernel) mGemmKernel;`
MNN:Sync: Sync Internal 2.9.3 2024-07-22 19:51:53 +08:00			`std::function<void(const float, int8_t, size_t, const float*, ssize_t, ssize_t, ssize_t)> mQuantFunc;`
			`std::function<void(const float, int8_t, size_t, const float*, ssize_t, ssize_t, ssize_t, size_t, size_t)> mQuantAndReorderFunc = nullptr;`
			`std::function<void(float* dest, int8_t* source, const float* scale, ssize_t realDstCount, SumByAxisParams sumParams)> mSumByAxisLFunc;`
			`std::shared_ptr<Tensor> mQuantInput;`
			`std::shared_ptr<Tensor> mDynamicBias;`
			`std::shared_ptr<Tensor> mScaleFuse;`
			`std::shared_ptr<Tensor> mBatchQuantInfo;`
			`std::shared_ptr<Tensor> mInputDeqScales;`
			`std::shared_ptr<Tensor> mTempMaxMinValueBuffer;`
			`std::shared_ptr<CPUConvolution::Resource> mResource;`
			`std::vector<uint8_t> mTempSrcSum;`
			`std::vector<int32_t> mDivides;`

			`int mThreadNums;`
			`int mBlockNum;`
			`int mOcPerThread;`
			`bool mDynamicQuantExe;`
			`bool mSplitByOc;`
			`bool mUseBatchQuan;`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`};`

			`} // namespace MNN`

			`#endif /* ConvInt8TiledExecutor_hpp */`