MNN/source/backend/cuda/core/CUDABackend.hpp

141 lines
4.5 KiB
C++
Raw Normal View History

2020-11-05 16:41:56 +08:00
//
// CUDABackend.hpp
// MNN
//
// Created by MNN on 2019/01/31.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef CUDABackend_hpp
#define CUDABackend_hpp
#include <set>
#include <vector>
2023-09-20 20:16:25 +08:00
#include <MNN/ErrorCode.hpp>
2020-11-05 16:41:56 +08:00
#include "MNN_generated.h"
#include "backend/cuda/core/runtime/CUDARuntime.hpp"
#include "core/Backend.hpp"
#include "core/Macro.h"
#include "core/ConvolutionCommon.hpp"
2020-12-15 14:12:35 +08:00
#include "core/BufferAllocator.hpp"
2022-02-18 11:30:27 +08:00
#include "backend/cpu/CPUResizeCache.hpp"
2023-04-18 18:54:46 +08:00
#define MNN_USER_SET_DEVICE
#include "MNN/MNNSharedContext.h"
2023-07-18 09:36:26 +08:00
#ifdef MNN_CODEGEN_CUDA
#include "backend/cuda/core/compiler/CUDACompiler.hpp"
#endif
2023-04-18 18:54:46 +08:00
2020-11-05 16:41:56 +08:00
namespace MNN {
namespace CUDA {
class MNN_PUBLIC CUDARuntimeWrapper : public Runtime {
public:
2023-04-18 18:54:46 +08:00
CUDARuntimeWrapper(BackendConfig::PrecisionMode precision, BackendConfig::PowerMode power, int deviceId = 0);
2020-11-05 16:41:56 +08:00
virtual ~CUDARuntimeWrapper();
2021-04-08 15:34:23 +08:00
virtual Backend *onCreate(const BackendConfig* config) const override;
2020-11-05 16:41:56 +08:00
virtual void onGabageCollect(int level) override;
bool isCreateError() const {
return mIsCreateError;
}
virtual CompilerType onGetCompilerType() const override {
return Compiler_Loop;
}
virtual float onGetMemoryInMB() override;
2020-11-05 16:41:56 +08:00
private:
2023-09-04 10:42:11 +08:00
std::shared_ptr<EagerBufferAllocator> mBufferPool;
2022-09-30 10:02:52 +08:00
std::shared_ptr<CUDARuntime> mCUDARuntime;
2020-11-05 16:41:56 +08:00
bool mIsCreateError{false};
2022-02-18 11:30:27 +08:00
BackendConfig::PrecisionMode mDefaultPrecision;
2020-11-05 16:41:56 +08:00
};
class CUDABackend : public Backend {
2020-11-05 16:41:56 +08:00
public:
2022-11-18 22:35:31 +08:00
CUDABackend(std::shared_ptr<BufferAllocator> st, std::shared_ptr<CUDARuntime> rt, int precisionLevel);
2020-11-05 16:41:56 +08:00
~CUDABackend();
CUDARuntime *getCUDARuntime();
2022-09-30 10:02:52 +08:00
virtual const Runtime* getRuntime() override;
virtual Backend::MemObj* onAcquire(const Tensor *nativeTensor, StorageType storageType) override;
2020-11-05 16:41:56 +08:00
virtual bool onClearBuffer() override;
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
const MNN::Op *op) override;
2021-01-06 16:29:37 +08:00
virtual void onResizeBegin() override;
2023-09-20 20:16:25 +08:00
virtual ErrorCode onResizeEnd() override;
2021-01-06 16:29:37 +08:00
2020-11-05 16:41:56 +08:00
virtual void onExecuteBegin() const override;
virtual void onExecuteEnd() const override;
virtual void onCopyBuffer(const Tensor *srcTensor, const Tensor *dstTensor) const override;
class Creator {
public:
virtual ~Creator() = default;
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &output,
const MNN::Op *op, Backend *backend) const = 0;
};
static bool addCreator(OpType t, Creator *c);
2023-06-16 09:42:45 +08:00
static DataType getDataType(const Tensor* tensor);
2020-11-05 16:41:56 +08:00
2020-12-15 14:12:35 +08:00
BufferAllocator *getBufferPool() const {
2020-11-05 16:41:56 +08:00
return mBufferPool.get();
}
2020-12-15 14:12:35 +08:00
BufferAllocator *getStaticBufferPool() const {
2020-11-05 16:41:56 +08:00
return mStaticBufferPool.get();
}
static size_t realSize(const Tensor *tensor);
2022-02-18 11:30:27 +08:00
int getBytes(const Tensor* tensor) const;
CPUResizeCache* getCache();
bool useFp16() const;
2022-11-18 22:35:31 +08:00
int getPrecision() const;
2023-07-18 09:36:26 +08:00
#ifdef MNN_CODEGEN_CUDA
std::map<std::pair<std::string, std:: string>, CUmodule> kernelCuModuleMap();
#endif
2020-11-05 16:41:56 +08:00
private:
2020-12-15 14:12:35 +08:00
std::shared_ptr<BufferAllocator> mBufferPool;
std::shared_ptr<BufferAllocator> mStaticBufferPool;
2020-11-05 16:41:56 +08:00
std::shared_ptr<CUDARuntime> mCUDARuntime;
2022-02-18 11:30:27 +08:00
CPUResizeCache mCache;
bool mUseFp16AsFp32 = false;
2022-11-18 22:35:31 +08:00
int mPrecision = 0;
2023-07-18 09:36:26 +08:00
#ifdef MNN_CODEGEN_CUDA
CUmodule mCuModule;
std::map<std::pair<std::string, std:: string>, CUmodule> mKernelCuModuleMap;
#endif
2020-11-05 16:41:56 +08:00
};
template <class T>
class CUDACreatorRegister {
public:
CUDACreatorRegister(OpType type) {
T *t = new T;
CUDABackend::addCreator(type, t);
}
~CUDACreatorRegister() = default;
};
2023-06-16 09:42:45 +08:00
/** execution cast wrapper. insert tensor cast dynamic. */
class CastWrapExecution : public Execution {
public:
CastWrapExecution(Backend* backend, DataType runT)
: Execution(backend), mRunType(runT) {}
virtual ErrorCode onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) override;
private:
DataType mRunType;
};
2020-11-05 16:41:56 +08:00
template <typename T>
class TypedCreator : public CUDABackend::Creator {
public:
virtual ~TypedCreator() = default;
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
const MNN::Op *op, Backend *backend) const override {
return new T(inputs, op, backend);
}
};
} // namespace CUDA
} // namespace MNN
#endif /* CUDABackend_hpp */