MNN/source/backend/cuda/execution/ConvCutlassExecution.hpp

99 lines
3.6 KiB
C++
Raw Normal View History

2022-08-12 10:30:48 +08:00
//
// ConvCutlassExecution.hpp
// MNN
//
// Created by MNN on 2020/08/22.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef ConvCutlassExecution_hpp
#define ConvCutlassExecution_hpp
#include "backend/cuda/core/CUDABackend.hpp"
#include "core/Execution.hpp"
#include "CutlassGemmParam.hpp"
2022-09-30 10:02:52 +08:00
#include "MNNCUDADefine.hpp"
#include "MNNCUDAFunction.cuh"
2022-08-12 10:30:48 +08:00
namespace MNN {
namespace CUDA {
class ConvCutlassExecution : public Execution {
public:
struct Resource {
Resource(Backend* bn, const MNN::Op* op);
~ Resource();
void* mFilter;
void* mBias;
std::shared_ptr<Tensor> weightTensor;
std::shared_ptr<Tensor> biasTensor;
Backend* mBackend = nullptr;
};
ConvCutlassExecution(Backend* backend, const MNN::Op* op, std::shared_ptr<Resource> res);
virtual ~ConvCutlassExecution();
virtual ErrorCode onResize(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs) override;
virtual ErrorCode onExecute(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs) override;
virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override;
2023-01-11 15:08:58 +08:00
ErrorCode callCutlassGemmCudaCoreFloat16(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);
ErrorCode callCutlassGemmCudaCoreFloat32(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);
ErrorCode callCutlassGemmTensorCore884(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);
ErrorCode callCutlassGemmTensorCore(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs);
2022-08-12 10:30:48 +08:00
private:
std::shared_ptr<Resource> mResource;
const Op* mOp = nullptr;
CutlassGemmInfo mGemmInfo;
ConvolutionCommon::Im2ColParameter mIm2ColParamter;
std::pair<void*, int> mGpuIm2ColParam;
2022-11-18 22:35:31 +08:00
void* mIm2ColBuffer;
2022-08-12 10:30:48 +08:00
bool mIsConv1x1S1D1P0 = false;
bool mNeedIm2Col = true;
std::pair<void*, int> mGpuKernelParam;
bool mIsBlock = false;
int mBlockNum = 1;
2022-11-18 22:35:31 +08:00
GemmTensor_F16_F16_Linear_AlignTensor_Sm70 mGemmF16F16LnSm70;
GemmTensor_F16_F32_Linear_AlignTensor_Sm70 mGemmF16F32LnSm70;
GemmCuda_F16_F16_Linear_AlignCuda mGemmCudaF16F16Ln;
GemmCuda_F16_F32_Linear_AlignCuda mGemmCudaF16F32Ln;
2022-08-12 10:30:48 +08:00
2022-11-18 22:35:31 +08:00
GemmTensor_F16_F16_Relu_AlignTensor_Sm70 mGemmF16F16ReluSm70;
GemmTensor_F16_F32_Relu_AlignTensor_Sm70 mGemmF16F32ReluSm70;
GemmCuda_F16_F16_Relu_AlignCuda mGemmCudaF16F16Relu;
GemmCuda_F16_F32_Relu_AlignCuda mGemmCudaF16F32Relu;
2022-08-12 10:30:48 +08:00
2022-11-18 22:35:31 +08:00
GemmTensor_F16_F16_Relu6_AlignTensor_Sm70 mGemmF16F16Relu6Sm70;
GemmTensor_F16_F32_Relu6_AlignTensor_Sm70 mGemmF16F32Relu6Sm70;
GemmCuda_F16_F16_Relu6_AlignCuda mGemmCudaF16F16Relu6;
GemmCuda_F16_F32_Relu6_AlignCuda mGemmCudaF16F32Relu6;
2022-08-12 10:30:48 +08:00
2022-11-18 22:35:31 +08:00
GemmTensor_F16_F16_Linear_AlignTensor_Sm75 mGemmF16F16LnSm75;
GemmTensor_F16_F32_Linear_AlignTensor_Sm75 mGemmF16F32LnSm75;
2022-08-12 10:30:48 +08:00
2022-11-18 22:35:31 +08:00
GemmTensor_F16_F16_Relu_AlignTensor_Sm75 mGemmF16F16ReluSm75;
GemmTensor_F16_F32_Relu_AlignTensor_Sm75 mGemmF16F32ReluSm75;
2022-08-12 10:30:48 +08:00
2022-11-18 22:35:31 +08:00
GemmTensor_F16_F16_Relu6_AlignTensor_Sm75 mGemmF16F16Relu6Sm75;
GemmTensor_F16_F32_Relu6_AlignTensor_Sm75 mGemmF16F32Relu6Sm75;
GemmCuda_F32_F32_Relu_AlignCuda mGemmCudaF32F32Relu;
GemmCuda_F32_F32_Relu6_AlignCuda mGemmCudaF32F32Relu6;
GemmCuda_F32_F32_Linear_AlignCuda mGemmCudaF32F32Ln;
2022-08-12 10:30:48 +08:00
int mGpuComputeCap = 75;
int mActivationType = 0;
2022-11-18 22:35:31 +08:00
bool mFp16Infer = false;
bool mFp32Infer = false;
bool mFp16Fp32MixInfer = false;
2022-08-12 10:30:48 +08:00
std::shared_ptr<Tensor> workspaceTensor;
2022-12-24 09:42:39 +08:00
void* mWorkspace;
2022-08-12 10:30:48 +08:00
};
} // namespace CUDA
} // namespace MNN
#endif /* ConvCutlassExecution */