MNN/source/backend/cpu/compute/ConvInt8TiledExecutor.hpp

//
//  ConvInt8TiledExecutor.hpp
//  MNN
//
//  Created by MNN on 2019/5/17.
//  Copyright © 2018, Alibaba Group Holding Limited
//

#ifndef ConvInt8TiledExecutor_hpp
#define ConvInt8TiledExecutor_hpp

#include "backend/cpu/CPUConvolution.hpp"
#include "Int8FunctionsOpt.h"
#include "CommonOptFunction.h"

namespace MNN {
typedef void (*weightSummerFuncion)(float* kernlesum, int8_t* source, size_t outside, size_t reduceAxis, size_t hP, size_t lP);
class ConvInt8TiledExecutor : public CPUConvolution {
public:
    // given weight+bias+scale, do post process
    ConvInt8TiledExecutor(Backend* backend, const Op* op);
    ConvInt8TiledExecutor(Backend* backend, const Op* op, std::shared_ptr<ResourceInt8> res);
    virtual ~ConvInt8TiledExecutor();
    virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
    virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override;
    static void packWeightAndQuantInfo(int8_t* dstbuffer, const int8_t* weight, const int8_t* quantInfo, int32_t* info, int infoBytes = 4);
    static void reorderWeight(uint8_t* dst, const uint8_t* src, int32_t* info, int32_t initval = 0, float* kernelsum = nullptr, weightSummerFuncion summerFunc = nullptr);
    static void initializeConvInt8QuantInfo(std::shared_ptr<CPUConvolution::ResourceInt8>& resourceInt8, const Convolution2D* conv2D);

protected:
    ConvolutionCommon::Im2ColParameter mIm2ColParamter;
    int mTileCount;
    int mThreadNums;
    std::shared_ptr<Tensor> mTempIm2ColBuffer;
    std::shared_ptr<CPUConvolution::ResourceInt8> mResourceInt8;
    std::shared_ptr<CPUConvolution::MutableResourceInt8> mMutableResource;
    MemChunk mBlitInfo;
    std::pair<size_t, size_t> mBlitInfoStride;
    int mIm2ColCount;
};

//
//  DenseConvInt8TiledExecutor.hpp
//  MNN
//
//  Created by MNN on 2019/5/17.
//  Copyright © 2018, Alibaba Group Holding Limited
//


class DenseConvInt8TiledExecutor : public ConvInt8TiledExecutor {
public:
    DenseConvInt8TiledExecutor(Backend* backend, const Op* op, std::shared_ptr<ConvolutionCommon::Int8Common> quanCommon, bool isDynamicQuant); // dynamic quant
    virtual ~DenseConvInt8TiledExecutor();
    virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
    virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
    virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override;
private:
    DenseConvInt8TiledExecutor(Backend* backend, const Op* op, const DenseConvInt8TiledExecutor& exe);

    decltype(CoreInt8Functions::Int8GemmKernel) mGemmKernel;
    std::function<void(const float*, int8_t*, size_t, const float*, ssize_t, ssize_t, const float*, ssize_t)> mQuantFunc;
    std::function<void(const float*, int8_t*, size_t, const float*, ssize_t, ssize_t, const float*, size_t, size_t)> mQuantAndReorderFunc = nullptr;
    std::function<void(float* dest, int8_t* source, const float* scale, ssize_t realDstCount, SumByAxisParams sumParams)> mSumByAxisLFunc;
    std::shared_ptr<Tensor> mQuantInput;
    std::shared_ptr<Tensor> mDynamicBias;
    std::shared_ptr<Tensor> mAccumBuffer;
    std::shared_ptr<Tensor> mBatchQuantInfo;
    MemChunk mTempMaxMinValueBuffer;
    MemChunk mTempSrcSum;
    MemChunk mQScaleZero;
    MemChunk mReorderBuffer;
    MemChunk mBiasBufferFusedInputzero;
    std::vector<int32_t> mDivides;

    int mThreadNums;
    int mBlockNum = 1;
    int mInputBlockNum = 1;
    int mOcPerThread;
    bool mSplitByOc;
    bool mUseBatchQuan;
    bool mIm2ColBasedInt8;
    int mSizeInputBlockQuant;
    bool mToFuseInputbias2Bias;
    MatmulRelatedFunctions mRelatedFunctions;
};

} // namespace MNN

#endif /* ConvInt8TiledExecutor_hpp */