MNN/source/backend/cuda/core/runtime/CUDARuntime.hpp

135 lines
3.7 KiB
C++
Raw Normal View History

2020-11-05 16:41:56 +08:00
//
// CUDARuntime.hpp
// MNN
//
// Created by MNN on 2019/01/31.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef OpenCLRuntime_hpp
#define OpenCLRuntime_hpp
#include <map>
#include <memory>
#include <mutex>
#include <set>
#include <string>
#include <vector>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cusolverDn.h>
#include <sstream>
#include <string>
#include <vector>
#include "Type_generated.h"
#include "core/Macro.h"
typedef enum {
CUDA_FLOAT32 = 0,
CUDA_FLOAT16 = 1,
} MNNCUDADataType_t;
typedef enum {
MNNMemcpyHostToDevice = 1,
MNNMemcpyDeviceToHost = 2,
MNNMemcpyDeviceToDevice = 3,
} MNNMemcpyKind_t;
#define cuda_check(_x) \
do { \
cudaError_t _err = (_x); \
if (_err != cudaSuccess) { \
MNN_CHECK(_err, #_x); \
} \
} while (0)
#define after_kernel_launch() \
do { \
cuda_check(cudaGetLastError()); \
} while (0)
2022-08-12 10:30:48 +08:00
#define cutlass_check(status) \
{ \
cutlass::Status error = status; \
if (error != cutlass::Status::kSuccess) { \
printf("File:%s Line %d: failed: %s\n", __FILE__, __LINE__,\
cutlassGetStatusString(error)); \
abort(); \
} \
}
2022-02-18 11:30:27 +08:00
#ifdef DEBUG
#define checkKernelErrors\
do { \
2022-05-06 19:51:20 +08:00
cudaDeviceSynchronize();\
2022-02-18 11:30:27 +08:00
cudaError_t __err = cudaGetLastError(); \
if (__err != cudaSuccess) { \
printf("File:%s Line %d: failed: %s\n", __FILE__, __LINE__,\
cudaGetErrorString(__err)); \
abort(); \
} \
} while (0)
#else
#define checkKernelErrors
#endif
2020-11-05 16:41:56 +08:00
namespace MNN {
class CUDARuntime {
public:
2022-02-18 11:30:27 +08:00
CUDARuntime(int device_id);
2020-11-05 16:41:56 +08:00
~CUDARuntime();
CUDARuntime(const CUDARuntime &) = delete;
CUDARuntime &operator=(const CUDARuntime &) = delete;
bool isSupportedFP16() const;
bool isSupportedDotInt8() const;
bool isSupportedDotAccInt8() const;
std::vector<size_t> getMaxImage2DSize();
bool isCreateError() const;
float flops() const {
return mFlops;
}
int device_id() const;
size_t mem_alignment_in_bytes() const;
void activate();
void *alloc(size_t size_in_bytes);
void free(void *ptr);
void memcpy(void *dst, const void *src, size_t size_in_bytes, MNNMemcpyKind_t kind, bool sync = false);
void memset(void *dst, int value, size_t size_in_bytes);
2022-02-18 11:30:27 +08:00
size_t threads_num() {
2020-12-15 14:12:35 +08:00
return mThreadPerBlock;
2020-11-05 16:41:56 +08:00
}
int major_sm() const {
return mProp.major;
}
2022-08-12 10:30:48 +08:00
int compute_capability() {
return mProp.major * 10 + mProp.minor;
}
2022-02-18 11:30:27 +08:00
size_t blocks_num(const size_t total_threads);
2022-01-04 10:50:40 +08:00
const cudaDeviceProp& prop() const {
return mProp;
}
2020-11-05 16:41:56 +08:00
2022-05-06 19:51:20 +08:00
int selectDeviceMaxFreeMemory();
2020-11-05 16:41:56 +08:00
private:
cudaDeviceProp mProp;
int mDeviceId;
bool mIsSupportedFP16 = false;
bool mSupportDotInt8 = false;
bool mSupportDotAccInt8 = false;
float mFlops = 4.0f;
bool mIsCreateError{false};
2022-02-18 11:30:27 +08:00
size_t mThreadPerBlock = 128;
2020-11-05 16:41:56 +08:00
};
} // namespace MNN
#endif /* CUDARuntime_hpp */