MNN/source/backend/cuda/execution/cutlass_common/CutlassConvCommonExecution.hpp

//
//  CutlassConvCommonExecution.hpp
//  MNN
//
//  Created by MNN on 2023/03/22.
//  Copyright © 2018, Alibaba Group Holding Limited
//

#ifndef CutlassConvCommonExecution_hpp
#define CutlassConvCommonExecution_hpp

#include "backend/cuda/core/CUDABackend.hpp"
#include "core/Execution.hpp"
#include "../CutlassGemmParam.hpp"
#include "../bf16/CutlassGemmBf16Param.hpp"
#include "../MNNCUDADefine.hpp"
#include "../MNNCUDAFunction.cuh"

#ifdef ENABLE_CUDA_TUNE_PARAM
#include "tune/CutlassGemmTuneCommonExecution.hpp"
#endif
namespace MNN {
namespace CUDA {

class CutlassConvCommonExecution :
    #ifdef ENABLE_CUDA_TUNE_PARAM
    public CutlassGemmTuneCommonExecution
    #else
    public Execution 
    #endif
{
public:
    CutlassConvCommonExecution(Backend* backend);
    virtual ~CutlassConvCommonExecution() = default;

    ErrorCode callCutlassGemmCudaCoreFloat16(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);
    ErrorCode callCutlassGemmCudaCoreFloat32(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);
    ErrorCode callCutlassGemmTensorCore884(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);
    ErrorCode callCutlassGemmTensorCore(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);
    ErrorCode callCutlassGemmBf16TensorCore(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);

    ErrorCode runCutlassGemmFunc();

protected:

    Backend* mBackendPtr;
    void* mFilterAddr;
    void* mBiasAddr;
    CutlassGemmInfo mGemmInfo;
    const Op* mOp = nullptr;

    ConvolutionCommon::Im2ColParameter mIm2ColParamter;
    MemChunk mGpuIm2ColParam;

    void* mIm2ColBuffer;

    bool mIsConv1x1S1D1P0 = false;
    bool mNeedIm2Col = true;
    MemChunk mGpuKernelParam;
    bool mIsBlock = false;
    int mBlockNum = 1;

    GemmTensor_F16_F16_Linear_AlignTensor_Sm70 mGemmF16F16LnSm70;
    GemmTensor_F16_F32_Linear_AlignTensor_Sm70 mGemmF16F32LnSm70;
    GemmCuda_F16_F16_Linear_AlignCuda  mGemmCudaF16F16Ln;
    GemmCuda_F16_F32_Linear_AlignCuda  mGemmCudaF16F32Ln;

    GemmTensor_F16_F16_Relu_AlignTensor_Sm70 mGemmF16F16ReluSm70;
    GemmTensor_F16_F32_Relu_AlignTensor_Sm70 mGemmF16F32ReluSm70;
    GemmCuda_F16_F16_Relu_AlignCuda  mGemmCudaF16F16Relu;
    GemmCuda_F16_F32_Relu_AlignCuda  mGemmCudaF16F32Relu;

    GemmTensor_F16_F16_Relu6_AlignTensor_Sm70 mGemmF16F16Relu6Sm70;
    GemmTensor_F16_F32_Relu6_AlignTensor_Sm70 mGemmF16F32Relu6Sm70;
    GemmCuda_F16_F16_Relu6_AlignCuda  mGemmCudaF16F16Relu6;
    GemmCuda_F16_F32_Relu6_AlignCuda  mGemmCudaF16F32Relu6;

    GemmTensor_F16_F16_Linear_AlignTensor_Sm75 mGemmF16F16LnSm75;
    GemmTensor_F16_F32_Linear_AlignTensor_Sm75 mGemmF16F32LnSm75;

    GemmTensor_F16_F16_Relu_AlignTensor_Sm75 mGemmF16F16ReluSm75;
    GemmTensor_F16_F32_Relu_AlignTensor_Sm75 mGemmF16F32ReluSm75;

    GemmTensor_F16_F16_Relu6_AlignTensor_Sm75 mGemmF16F16Relu6Sm75;
    GemmTensor_F16_F32_Relu6_AlignTensor_Sm75 mGemmF16F32Relu6Sm75;

    GemmCuda_F32_F32_Relu_AlignCuda mGemmCudaF32F32Relu;
    GemmCuda_F32_F32_Relu6_AlignCuda mGemmCudaF32F32Relu6;
    GemmCuda_F32_F32_Linear_AlignCuda mGemmCudaF32F32Ln;

    #ifdef ENABLE_CUDA_BF16
    GemmTensor_BF16_BF16_Linear_AlignTensor_Sm80 mGemmBF16BF16LnSm80;
    GemmTensor_BF16_BF16_Relu_AlignTensor_Sm80 mGemmBF16BF16ReluSm80;
    GemmTensor_BF16_BF16_Relu6_AlignTensor_Sm80 mGemmBF16BF16Relu6Sm80;
    #endif
    int mGpuComputeCap = 75;
    bool mIsTuned = false;
    int mActivationType = 0;
    bool mFp16Infer = false;
    bool mFp32Infer = false;
    bool mFp16Fp32MixInfer = false;
    bool mBf16Infer = false;
    int mPrecisonLevel;
    std::shared_ptr<Tensor> workspaceTensor;
    void* mWorkspace;
};

} // namespace CUDA
} // namespace MNN

#endif /* CutlassConvCommonExecution */
[MNN:Sync] Sync 2.4.2 2023-04-11 11:12:00 +08:00			`//`
[MNN:Sync] Sync Internal 2.5.0 2023-04-27 15:11:05 +08:00			`// CutlassConvCommonExecution.hpp`
[MNN:Sync] Sync 2.4.2 2023-04-11 11:12:00 +08:00			`// MNN`
			`//`
			`// Created by MNN on 2023/03/22.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`

[MNN:Sync] Sync Internal 2.5.0 2023-04-27 15:11:05 +08:00			`#ifndef CutlassConvCommonExecution_hpp`
			`#define CutlassConvCommonExecution_hpp`
[MNN:Sync] Sync 2.4.2 2023-04-11 11:12:00 +08:00
			`#include "backend/cuda/core/CUDABackend.hpp"`
			`#include "core/Execution.hpp"`
			`#include "../CutlassGemmParam.hpp"`
[MNN:Sync] Sync Internal 2.5.3 2023-06-16 09:42:45 +08:00			`#include "../bf16/CutlassGemmBf16Param.hpp"`
[MNN:Sync] Sync 2.4.2 2023-04-11 11:12:00 +08:00			`#include "../MNNCUDADefine.hpp"`
			`#include "../MNNCUDAFunction.cuh"`

[MNN:Sync] Sync Internal 2.7.2 2023-10-18 10:31:02 +08:00			`#ifdef ENABLE_CUDA_TUNE_PARAM`
			`#include "tune/CutlassGemmTuneCommonExecution.hpp"`
			`#endif`
[MNN:Sync] Sync 2.4.2 2023-04-11 11:12:00 +08:00			`namespace MNN {`
			`namespace CUDA {`

[MNN:Sync] Sync Internal 2.7.2 2023-10-18 10:31:02 +08:00			`class CutlassConvCommonExecution :`
			`#ifdef ENABLE_CUDA_TUNE_PARAM`
			`public CutlassGemmTuneCommonExecution`
			`#else`
			`public Execution`
			`#endif`
			`{`
[MNN:Sync] Sync 2.4.2 2023-04-11 11:12:00 +08:00			`public:`
[MNN:Sync] Sync Internal 2.5.0 2023-04-27 15:11:05 +08:00			`CutlassConvCommonExecution(Backend* backend);`
			`virtual ~CutlassConvCommonExecution() = default;`
[MNN:Sync] Sync 2.4.2 2023-04-11 11:12:00 +08:00
			`ErrorCode callCutlassGemmCudaCoreFloat16(const std::vector<Tensor> &inputs, const std::vector<Tensor> &outputs);`
			`ErrorCode callCutlassGemmCudaCoreFloat32(const std::vector<Tensor> &inputs, const std::vector<Tensor> &outputs);`
			`ErrorCode callCutlassGemmTensorCore884(const std::vector<Tensor> &inputs, const std::vector<Tensor> &outputs);`
			`ErrorCode callCutlassGemmTensorCore(const std::vector<Tensor> &inputs, const std::vector<Tensor> &outputs);`
[MNN:Sync] Sync Internal 2.5.3 2023-06-16 09:42:45 +08:00			`ErrorCode callCutlassGemmBf16TensorCore(const std::vector<Tensor> &inputs, const std::vector<Tensor> &outputs);`
[MNN:Sync] Sync 2.4.2 2023-04-11 11:12:00 +08:00
			`ErrorCode runCutlassGemmFunc();`

			`protected:`

			`Backend* mBackendPtr;`
			`void* mFilterAddr;`
			`void* mBiasAddr;`
			`CutlassGemmInfo mGemmInfo;`
			`const Op* mOp = nullptr;`

			`ConvolutionCommon::Im2ColParameter mIm2ColParamter;`
[MNN:Sync] Sync Internal 2.7.0 2023-09-04 10:42:11 +08:00			`MemChunk mGpuIm2ColParam;`
[MNN:Sync] Sync 2.4.2 2023-04-11 11:12:00 +08:00
			`void* mIm2ColBuffer;`

			`bool mIsConv1x1S1D1P0 = false;`
			`bool mNeedIm2Col = true;`
[MNN:Sync] Sync Internal 2.7.0 2023-09-04 10:42:11 +08:00			`MemChunk mGpuKernelParam;`
[MNN:Sync] Sync 2.4.2 2023-04-11 11:12:00 +08:00			`bool mIsBlock = false;`
			`int mBlockNum = 1;`

			`GemmTensor_F16_F16_Linear_AlignTensor_Sm70 mGemmF16F16LnSm70;`
			`GemmTensor_F16_F32_Linear_AlignTensor_Sm70 mGemmF16F32LnSm70;`
			`GemmCuda_F16_F16_Linear_AlignCuda mGemmCudaF16F16Ln;`
			`GemmCuda_F16_F32_Linear_AlignCuda mGemmCudaF16F32Ln;`

			`GemmTensor_F16_F16_Relu_AlignTensor_Sm70 mGemmF16F16ReluSm70;`
			`GemmTensor_F16_F32_Relu_AlignTensor_Sm70 mGemmF16F32ReluSm70;`
			`GemmCuda_F16_F16_Relu_AlignCuda mGemmCudaF16F16Relu;`
			`GemmCuda_F16_F32_Relu_AlignCuda mGemmCudaF16F32Relu;`

			`GemmTensor_F16_F16_Relu6_AlignTensor_Sm70 mGemmF16F16Relu6Sm70;`
			`GemmTensor_F16_F32_Relu6_AlignTensor_Sm70 mGemmF16F32Relu6Sm70;`
			`GemmCuda_F16_F16_Relu6_AlignCuda mGemmCudaF16F16Relu6;`
			`GemmCuda_F16_F32_Relu6_AlignCuda mGemmCudaF16F32Relu6;`

			`GemmTensor_F16_F16_Linear_AlignTensor_Sm75 mGemmF16F16LnSm75;`
			`GemmTensor_F16_F32_Linear_AlignTensor_Sm75 mGemmF16F32LnSm75;`

			`GemmTensor_F16_F16_Relu_AlignTensor_Sm75 mGemmF16F16ReluSm75;`
			`GemmTensor_F16_F32_Relu_AlignTensor_Sm75 mGemmF16F32ReluSm75;`

			`GemmTensor_F16_F16_Relu6_AlignTensor_Sm75 mGemmF16F16Relu6Sm75;`
			`GemmTensor_F16_F32_Relu6_AlignTensor_Sm75 mGemmF16F32Relu6Sm75;`

			`GemmCuda_F32_F32_Relu_AlignCuda mGemmCudaF32F32Relu;`
			`GemmCuda_F32_F32_Relu6_AlignCuda mGemmCudaF32F32Relu6;`
			`GemmCuda_F32_F32_Linear_AlignCuda mGemmCudaF32F32Ln;`

[MNN:Sync] Sync Internal 2.6.0 2023-07-05 11:44:25 +08:00			`#ifdef ENABLE_CUDA_BF16`
[MNN:Sync] Sync Internal 2.5.3 2023-06-16 09:42:45 +08:00			`GemmTensor_BF16_BF16_Linear_AlignTensor_Sm80 mGemmBF16BF16LnSm80;`
			`GemmTensor_BF16_BF16_Relu_AlignTensor_Sm80 mGemmBF16BF16ReluSm80;`
			`GemmTensor_BF16_BF16_Relu6_AlignTensor_Sm80 mGemmBF16BF16Relu6Sm80;`
[MNN:Sync] Sync Internal 2.6.0 2023-07-05 11:44:25 +08:00			`#endif`
[MNN:Sync] Sync 2.4.2 2023-04-11 11:12:00 +08:00			`int mGpuComputeCap = 75;`
[MNN:Sync] Sync Internal 2.8.1 2023-12-27 17:26:44 +08:00			`bool mIsTuned = false;`
[MNN:Sync] Sync 2.4.2 2023-04-11 11:12:00 +08:00			`int mActivationType = 0;`
			`bool mFp16Infer = false;`
			`bool mFp32Infer = false;`
			`bool mFp16Fp32MixInfer = false;`
[MNN:Sync] Sync Internal 2.5.3 2023-06-16 09:42:45 +08:00			`bool mBf16Infer = false;`
[MNN:Sync] Sync 2.4.2 2023-04-11 11:12:00 +08:00			`int mPrecisonLevel;`
			`std::shared_ptr<Tensor> workspaceTensor;`
			`void* mWorkspace;`
			`};`

			`} // namespace CUDA`
			`} // namespace MNN`

[MNN:Sync] Sync Internal 2.5.3 2023-06-16 09:42:45 +08:00			`#endif /* CutlassConvCommonExecution */`