MNN/source/backend/cuda/execution/MatMulExecution.hpp

85 lines
3.4 KiB
C++
Raw Normal View History

2019-04-17 10:49:11 +08:00
//
2020-11-05 16:41:56 +08:00
// MatMulExecution.hpp
2019-04-17 10:49:11 +08:00
// MNN
//
2020-11-05 16:41:56 +08:00
// Created by MNN on 2020/07/30.
2019-04-17 10:49:11 +08:00
// Copyright © 2018, Alibaba Group Holding Limited
//
2020-11-05 16:41:56 +08:00
#ifndef MatMulExecution_hpp
#define MatMulExecution_hpp
2022-11-08 17:05:14 +08:00
2020-11-05 16:41:56 +08:00
#include "backend/cuda/core/CUDABackend.hpp"
2022-11-08 17:05:14 +08:00
#include "MNNCUDADefine.hpp"
#include "CutlassGemmBatchedParam.hpp"
2023-05-18 19:11:50 +08:00
#include "CutlassGemmParam.hpp"
2022-11-08 17:05:14 +08:00
#include "MNNCUDAFunction.cuh"
2019-04-17 10:49:11 +08:00
namespace MNN {
2020-11-05 16:41:56 +08:00
namespace CUDA {
class MatMulExecution : public Execution {
2019-04-17 10:49:11 +08:00
public:
2023-03-17 17:04:38 +08:00
MatMulExecution(bool transposeA, bool transposeB, Backend *backend, int aS = 1, int bS = 1, int cS = 1);
2020-11-05 16:41:56 +08:00
virtual ~MatMulExecution();
2019-04-17 10:49:11 +08:00
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
2020-11-05 16:41:56 +08:00
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
2022-11-08 17:05:14 +08:00
void setArguments(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs);
2019-04-17 10:49:11 +08:00
private:
2020-11-05 16:41:56 +08:00
bool mTransposeA;
bool mTransposeB;
2023-03-17 17:04:38 +08:00
int mAs;
int mBs;
int mCs;
2022-12-24 09:42:39 +08:00
Backend* mBackend = nullptr;
2022-11-08 17:05:14 +08:00
std::shared_ptr<Tensor> mBiasTensor;
2022-11-18 22:35:31 +08:00
GemmBatchedTensor_F16_F16_Linear_AlignCuda_Row_Column_Sm75 mGemmBatchedF16F16LnAlign1RCSm75;
2023-05-18 19:11:50 +08:00
GemmTensor_F16_F16_Linear_AlignCuda_Sm75 mGemmF16F16LnAlign1Sm75;
2022-11-08 17:05:14 +08:00
GemmBatchedTensor_F32_F32_Linear_AlignCuda_Row_Column_Sm75 mGemmBatchedF32F32LnAlign1RCSm75;
2023-05-18 19:11:50 +08:00
GemmTensor_F32_F32_Linear_AlignCuda_Sm75 mGemmF32F32LnAlign1Sm75;
2022-11-08 17:05:14 +08:00
GemmBatchedTensor_F16_F32_Linear_AlignCuda_Row_Column_Sm75 mGemmBatchedF16F32LnAlign1RCSm75;
2023-05-18 19:11:50 +08:00
GemmTensor_F16_F32_Linear_AlignCuda_Sm75 mGemmF16F32LnAlign1Sm75;
2022-11-08 17:05:14 +08:00
2022-11-18 22:35:31 +08:00
GemmBatchedTensor_F16_F16_Linear_AlignTensor_Row_Column_Sm75 mGemmBatchedF16F16LnAlign8RCSm75;
2023-05-18 19:11:50 +08:00
GemmTensor_F16_F16_Linear_AlignTensor_Sm75 mGemmF16F16LnAlign8Sm75;
2022-11-08 17:05:14 +08:00
GemmBatchedTensor_F32_F32_Linear_AlignTensor_Row_Column_Sm75 mGemmBatchedF32F32LnAlign8RCSm75;
2023-05-18 19:11:50 +08:00
GemmTensor_F32_F32_Linear_AlignTensor_Sm75 mGemmF32F32LnAlign8Sm75;
2022-11-08 17:05:14 +08:00
GemmBatchedTensor_F16_F32_Linear_AlignTensor_Row_Column_Sm75 mGemmBatchedF16F32LnAlign8RCSm75;
2023-05-18 19:11:50 +08:00
GemmTensor_F16_F32_Linear_AlignTensor_Sm75 mGemmF16F32LnAlign8Sm75;
2022-11-08 17:05:14 +08:00
2022-11-18 22:35:31 +08:00
GemmBatchedTensor_F16_F16_Linear_AlignTensor_Row_Row_Sm75 mGemmBatchedF16F16LnAlign8RRSm75;
2022-11-08 17:05:14 +08:00
GemmBatchedTensor_F32_F32_Linear_AlignTensor_Row_Row_Sm75 mGemmBatchedF32F32LnAlign8RRSm75;
GemmBatchedTensor_F16_F32_Linear_AlignTensor_Row_Row_Sm75 mGemmBatchedF16F32LnAlign8RRSm75;
2022-11-18 22:35:31 +08:00
GemmBatchedCuda_F16_F16_Linear_AlignCuda_Row_Column mGemmBatchedCudaF16F16LnAlign1RC;
2022-11-08 17:05:14 +08:00
GemmBatchedCuda_F32_F32_Linear_AlignCuda_Row_Column mGemmBatchedCudaF32F32LnAlign1RC;
GemmBatchedCuda_F16_F32_Linear_AlignCuda_Row_Column mGemmBatchedCudaF16F32LnAlign1RC;
2022-11-18 22:35:31 +08:00
GemmBatchedCuda_F16_F16_Linear_AlignCuda_Row_Row mGemmBatchedCudaF16F16LnAlign1RR;
2022-11-08 17:05:14 +08:00
GemmBatchedCuda_F32_F32_Linear_AlignCuda_Row_Row mGemmBatchedCudaF32F32LnAlign1RR;
GemmBatchedCuda_F16_F32_Linear_AlignCuda_Row_Row mGemmBatchedCudaF16F32LnAlign1RR;
std::shared_ptr<Tensor> workspaceTensor;
2022-12-24 09:42:39 +08:00
void* mWorkspace;
2022-11-08 17:05:14 +08:00
void* mTempMatA;
void* mTempMatB;
void* mBiasPtr = nullptr;
bool mNeedATempBuffer = false;
bool mNeedBTempBuffer = false;
bool mUseRRLayout = false;
bool mResizeSetArgument = false;
bool mNeedConvertMatAB = false;
CutlassGemmInfo mGemmInfo;
int mBatch = 1;
int mGpuComputeCap;
2022-11-18 22:35:31 +08:00
bool mFp16Infer = false;
bool mFp32Infer = false;
bool mFp16Fp32MixInfer = false;
2023-05-18 19:11:50 +08:00
bool mConvertGemmSplitK = false;
2019-04-17 10:49:11 +08:00
};
2020-11-05 16:41:56 +08:00
} // namespace CUDA
2019-04-17 10:49:11 +08:00
} // namespace MNN
2020-11-05 16:41:56 +08:00
#endif