MNN/source/backend/cuda/execution/cutlass_common/CutlassConvCommonExecution.hpp

112 lines
3.7 KiB
C++
Raw Normal View History

2023-04-11 11:12:00 +08:00
//
2023-04-27 15:11:05 +08:00
// CutlassConvCommonExecution.hpp
2023-04-11 11:12:00 +08:00
// MNN
//
// Created by MNN on 2023/03/22.
// Copyright © 2018, Alibaba Group Holding Limited
//
2023-04-27 15:11:05 +08:00
#ifndef CutlassConvCommonExecution_hpp
#define CutlassConvCommonExecution_hpp
2023-04-11 11:12:00 +08:00
#include "backend/cuda/core/CUDABackend.hpp"
#include "core/Execution.hpp"
#include "../CutlassGemmParam.hpp"
2023-06-16 09:42:45 +08:00
#include "../bf16/CutlassGemmBf16Param.hpp"
2023-04-11 11:12:00 +08:00
#include "../MNNCUDADefine.hpp"
#include "../MNNCUDAFunction.cuh"
2023-10-18 10:31:02 +08:00
#ifdef ENABLE_CUDA_TUNE_PARAM
#include "tune/CutlassGemmTuneCommonExecution.hpp"
#endif
2023-04-11 11:12:00 +08:00
namespace MNN {
namespace CUDA {
2023-10-18 10:31:02 +08:00
class CutlassConvCommonExecution :
#ifdef ENABLE_CUDA_TUNE_PARAM
public CutlassGemmTuneCommonExecution
#else
public Execution
#endif
{
2023-04-11 11:12:00 +08:00
public:
2023-04-27 15:11:05 +08:00
CutlassConvCommonExecution(Backend* backend);
virtual ~CutlassConvCommonExecution() = default;
2023-04-11 11:12:00 +08:00
ErrorCode callCutlassGemmCudaCoreFloat16(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);
ErrorCode callCutlassGemmCudaCoreFloat32(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);
ErrorCode callCutlassGemmTensorCore884(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);
ErrorCode callCutlassGemmTensorCore(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);
2023-06-16 09:42:45 +08:00
ErrorCode callCutlassGemmBf16TensorCore(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);
2023-04-11 11:12:00 +08:00
ErrorCode runCutlassGemmFunc();
protected:
Backend* mBackendPtr;
void* mFilterAddr;
void* mBiasAddr;
CutlassGemmInfo mGemmInfo;
const Op* mOp = nullptr;
ConvolutionCommon::Im2ColParameter mIm2ColParamter;
2023-09-04 10:42:11 +08:00
MemChunk mGpuIm2ColParam;
2023-04-11 11:12:00 +08:00
void* mIm2ColBuffer;
bool mIsConv1x1S1D1P0 = false;
bool mNeedIm2Col = true;
2023-09-04 10:42:11 +08:00
MemChunk mGpuKernelParam;
2023-04-11 11:12:00 +08:00
bool mIsBlock = false;
int mBlockNum = 1;
GemmTensor_F16_F16_Linear_AlignTensor_Sm70 mGemmF16F16LnSm70;
GemmTensor_F16_F32_Linear_AlignTensor_Sm70 mGemmF16F32LnSm70;
GemmCuda_F16_F16_Linear_AlignCuda mGemmCudaF16F16Ln;
GemmCuda_F16_F32_Linear_AlignCuda mGemmCudaF16F32Ln;
GemmTensor_F16_F16_Relu_AlignTensor_Sm70 mGemmF16F16ReluSm70;
GemmTensor_F16_F32_Relu_AlignTensor_Sm70 mGemmF16F32ReluSm70;
GemmCuda_F16_F16_Relu_AlignCuda mGemmCudaF16F16Relu;
GemmCuda_F16_F32_Relu_AlignCuda mGemmCudaF16F32Relu;
GemmTensor_F16_F16_Relu6_AlignTensor_Sm70 mGemmF16F16Relu6Sm70;
GemmTensor_F16_F32_Relu6_AlignTensor_Sm70 mGemmF16F32Relu6Sm70;
GemmCuda_F16_F16_Relu6_AlignCuda mGemmCudaF16F16Relu6;
GemmCuda_F16_F32_Relu6_AlignCuda mGemmCudaF16F32Relu6;
GemmTensor_F16_F16_Linear_AlignTensor_Sm75 mGemmF16F16LnSm75;
GemmTensor_F16_F32_Linear_AlignTensor_Sm75 mGemmF16F32LnSm75;
GemmTensor_F16_F16_Relu_AlignTensor_Sm75 mGemmF16F16ReluSm75;
GemmTensor_F16_F32_Relu_AlignTensor_Sm75 mGemmF16F32ReluSm75;
GemmTensor_F16_F16_Relu6_AlignTensor_Sm75 mGemmF16F16Relu6Sm75;
GemmTensor_F16_F32_Relu6_AlignTensor_Sm75 mGemmF16F32Relu6Sm75;
GemmCuda_F32_F32_Relu_AlignCuda mGemmCudaF32F32Relu;
GemmCuda_F32_F32_Relu6_AlignCuda mGemmCudaF32F32Relu6;
GemmCuda_F32_F32_Linear_AlignCuda mGemmCudaF32F32Ln;
2023-07-05 11:44:25 +08:00
#ifdef ENABLE_CUDA_BF16
2023-06-16 09:42:45 +08:00
GemmTensor_BF16_BF16_Linear_AlignTensor_Sm80 mGemmBF16BF16LnSm80;
GemmTensor_BF16_BF16_Relu_AlignTensor_Sm80 mGemmBF16BF16ReluSm80;
GemmTensor_BF16_BF16_Relu6_AlignTensor_Sm80 mGemmBF16BF16Relu6Sm80;
2023-07-05 11:44:25 +08:00
#endif
2023-04-11 11:12:00 +08:00
int mGpuComputeCap = 75;
2023-12-27 17:26:44 +08:00
bool mIsTuned = false;
2023-04-11 11:12:00 +08:00
int mActivationType = 0;
bool mFp16Infer = false;
bool mFp32Infer = false;
bool mFp16Fp32MixInfer = false;
2023-06-16 09:42:45 +08:00
bool mBf16Infer = false;
2023-04-11 11:12:00 +08:00
int mPrecisonLevel;
std::shared_ptr<Tensor> workspaceTensor;
void* mWorkspace;
};
} // namespace CUDA
} // namespace MNN
2023-06-16 09:42:45 +08:00
#endif /* CutlassConvCommonExecution */