MNN/source/backend/cuda/core/CUDABackend.cpp

255 lines
8.5 KiB
C++
Raw Normal View History

2020-11-05 16:41:56 +08:00
//
// CUDABackend.cpp
// MNN
//
// Created by MNN on 2019/02/28.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "backend/cuda/core/CUDABackend.hpp"
#include "MNN_generated.h"
#include <map>
#include <mutex>
#include "core/Macro.h"
#include "shape/SizeComputer.hpp"
#include "core/TensorUtils.hpp"
namespace MNN {
namespace CUDA {
static std::once_flag gOnce;
std::map<OpType, CUDABackend::Creator*>* gCreator() {
static std::map<OpType, CUDABackend::Creator*>* creators = nullptr;
std::call_once(gOnce, [&]() { creators = new std::map<OpType, CUDABackend::Creator*>; });
return creators;
};
2020-12-15 14:12:35 +08:00
class CUDARuntimeAllocator : public BufferAllocator::Allocator {
public:
CUDARuntimeAllocator(CUDARuntime* rt) : mRuntime(rt) {
// Do nothing
}
virtual ~ CUDARuntimeAllocator() = default;
virtual std::pair<void*, int> onAlloc(int size) override {
return std::make_pair(mRuntime->alloc(size), 0);
}
virtual void onRelease(std::pair<void*, int> ptr) override {
mRuntime->free(ptr.first);
}
private:
CUDARuntime* mRuntime;
};
2020-11-05 16:41:56 +08:00
CUDARuntimeWrapper::CUDARuntimeWrapper(BackendConfig::PrecisionMode precision, BackendConfig::PowerMode power) {
// Shader precision
if (precision == BackendConfig::Precision_Low) {
mCUDARuntime.reset(new CUDARuntime(true, -1));
} else {
mCUDARuntime.reset(new CUDARuntime(false, -1));
}
if (mCUDARuntime.get()) {
if (mCUDARuntime->isCreateError() == true) {
mIsCreateError = true;
return;
}
2020-12-15 14:12:35 +08:00
std::shared_ptr<BufferAllocator::Allocator> allocator(new CUDARuntimeAllocator(mCUDARuntime.get()));
mBufferPool.reset(new BufferAllocator(allocator));
2020-11-05 16:41:56 +08:00
}
}
CUDARuntimeWrapper::~CUDARuntimeWrapper() {
// Do nothing
}
2021-04-08 15:34:23 +08:00
Backend* CUDARuntimeWrapper::onCreate(const BackendConfig* config) const {
2020-12-15 14:12:35 +08:00
return new CUDABackend(mBufferPool, mCUDARuntime);
2020-11-05 16:41:56 +08:00
}
void CUDARuntimeWrapper::onGabageCollect(int level) {
2020-12-15 14:12:35 +08:00
mBufferPool->release(false);
2020-11-05 16:41:56 +08:00
}
2020-12-15 14:12:35 +08:00
CUDABackend::CUDABackend(std::shared_ptr<BufferAllocator> st,
2020-11-05 16:41:56 +08:00
std::shared_ptr<CUDARuntime> rt)
: Backend(MNN_FORWARD_CUDA) {
2020-12-15 14:12:35 +08:00
mBufferPool.reset(new BufferAllocator(BufferAllocator::Allocator::createRecurse(st.get())));
2020-11-05 16:41:56 +08:00
mStaticBufferPool = st;
mCUDARuntime = rt;
}
CUDABackend::~CUDABackend() {
#ifdef LOG_VERBOSE
MNN_PRINT("enter CUDABackend::~CUDABackend \n");
#endif
}
CUDARuntime* CUDABackend::getCUDARuntime() {
MNN_ASSERT(nullptr != mCUDARuntime.get());
return mCUDARuntime.get();
}
bool CUDABackend::onAcquireBuffer(const Tensor* nativeTensor, StorageType storageType) {
#ifdef LOG_VERBOSE
MNN_PRINT("Start CUDABackend::onAcquireBuffer !\n");
#endif
int mallocSize = realSize(nativeTensor) * nativeTensor->getType().bytes();
2020-12-15 14:12:35 +08:00
std::pair<void*, int> buffer;
2020-11-05 16:41:56 +08:00
if (storageType == DYNAMIC_SEPERATE) {
2020-12-15 14:12:35 +08:00
buffer = mBufferPool->alloc(mallocSize, true);
2020-11-05 16:41:56 +08:00
} else if (storageType == DYNAMIC) {
2020-12-15 14:12:35 +08:00
buffer = mBufferPool->alloc(mallocSize, false);
2020-11-05 16:41:56 +08:00
} else {
MNN_ASSERT(storageType == STATIC);
2020-12-15 14:12:35 +08:00
buffer = mStaticBufferPool->alloc(mallocSize, false);
2020-11-05 16:41:56 +08:00
}
2020-12-15 14:12:35 +08:00
if(nullptr == buffer.first) {
return false;
};
auto host = (uint8_t*)buffer.first + buffer.second;
((Tensor*)nativeTensor)->buffer().device = (uint64_t)host;
auto des = TensorUtils::getDescribe(nativeTensor);
des->extra.offset = buffer.second;
2020-11-05 16:41:56 +08:00
return true;
}
bool CUDABackend::onReleaseBuffer(const Tensor* nativeTensor, StorageType storageType) {
if (storageType == DYNAMIC_SEPERATE) {
return true;
}
2020-12-15 14:12:35 +08:00
auto buffer = (uint8_t*)nativeTensor->deviceId();
auto des = TensorUtils::getDescribe(nativeTensor);
auto pointer = std::make_pair(buffer - des->extra.offset, des->extra.offset);
2020-11-05 16:41:56 +08:00
if (storageType == DYNAMIC) {
2020-12-15 14:12:35 +08:00
mBufferPool->free(pointer);
2020-11-05 16:41:56 +08:00
return true;
}
if (storageType == STATIC) {
2020-12-15 14:12:35 +08:00
mStaticBufferPool->free(pointer);
2020-11-05 16:41:56 +08:00
}
return true;
}
bool CUDABackend::onClearBuffer() {
2020-12-15 14:12:35 +08:00
mBufferPool->release(true);
2020-11-05 16:41:56 +08:00
return true;
}
size_t CUDABackend::realSize(const Tensor* tensor) {
size_t res = 1;
for (int i = 0; i < tensor->dimensions(); ++i) {
res *= tensor->length(i);
}
return res;
}
std::pair<float, bool> CUDABackend::onMeasure(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
const MNN::Op* op) {
auto creators = gCreator();
auto iter = creators->find(op->type());
if (iter == creators->end()) {
return std::make_pair(0.0f, false);
}
const float defaultScheduleTime = 0.05f;
2021-04-08 15:34:23 +08:00
// FIXME: Compute in future
2020-11-05 16:41:56 +08:00
auto flops = 0.0f;
auto computeFlops = mCUDARuntime->flops();
return std::make_pair(defaultScheduleTime + flops / 1024.0f / computeFlops * 1000.0f, true);
}
Execution* CUDABackend::onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
const MNN::Op* op) {
#ifdef LOG_VERBOSE
MNN_PRINT("Start CUDABackend::onCreate \n");
#endif
auto creators = gCreator();
auto iter = creators->find(op->type());
if (iter == creators->end()) {
if (nullptr != op->name()) {
MNN_PRINT("Don't support type %s, %s\n", EnumNameOpType(op->type()), op->name()->c_str());
} else {
MNN_PRINT("Don't support type %s\n", EnumNameOpType(op->type()));
}
return NULL;
}
auto exe = iter->second->onCreate(inputs, outputs, op, this);
if (NULL == exe) {
if (nullptr != op->name()) {
2020-12-15 14:12:35 +08:00
MNN_PRINT("The Creator Don't support type %s, %s\n", EnumNameOpType(op->type()), op->name()->c_str());
2020-11-05 16:41:56 +08:00
} else {
2020-12-15 14:12:35 +08:00
MNN_PRINT("The Creator Don't support type %s\n", EnumNameOpType(op->type()));
2020-11-05 16:41:56 +08:00
}
return NULL;
}
#ifdef LOG_VERBOSE
2020-11-13 09:01:15 +08:00
MNN_PRINT("End CUDABackend::onCreate \n");
2020-11-05 16:41:56 +08:00
#endif
return exe;
}
2021-01-06 16:29:37 +08:00
void CUDABackend::onResizeBegin() {
}
void CUDABackend::onResizeEnd() {
}
2020-11-05 16:41:56 +08:00
void CUDABackend::onExecuteBegin() const {
}
void CUDABackend::onExecuteEnd() const {
}
void CUDABackend::onCopyBuffer(const Tensor* srcTensor, const Tensor* dstTensor) const {
auto srcDimensionFormat = TensorUtils::getDescribe(srcTensor)->dimensionFormat;
auto srcDevice = srcTensor->deviceId() != 0;
auto dstDimensionFormat = TensorUtils::getDescribe(dstTensor)->dimensionFormat;
auto dstDevice = dstTensor->deviceId() != 0;
if (srcDevice && srcDimensionFormat == MNN_DATA_FORMAT_NC4HW4) {
srcDimensionFormat = MNN_DATA_FORMAT_NCHW;
}
if (dstDevice && dstDimensionFormat == MNN_DATA_FORMAT_NC4HW4) {
dstDimensionFormat = MNN_DATA_FORMAT_NCHW;
}
auto needSize = realSize(srcTensor) * srcTensor->getType().bytes();
std::shared_ptr<Tensor> srcTempTensor;
std::shared_ptr<Tensor> dstTempTensor;
2021-04-08 15:34:23 +08:00
2020-11-05 16:41:56 +08:00
if (srcTensor->deviceId() != 0 && dstTensor->deviceId() != 0) {
mCUDARuntime->memcpy((void*)(dstTensor->deviceId()), (void*)(srcTensor->deviceId()), needSize,
2021-04-08 15:34:23 +08:00
MNNMemcpyDeviceToDevice, true);
2020-11-05 16:41:56 +08:00
}
if (srcTensor->deviceId() != 0 && dstTensor->deviceId() == 0) {
2021-04-28 18:02:10 +08:00
if(srcDimensionFormat != dstDimensionFormat) {
2021-04-08 15:34:23 +08:00
dstTempTensor.reset(new Tensor(srcTensor, srcTensor->getDimensionType(), true));
mCUDARuntime->memcpy(dstTempTensor->host<void>(), (void*)(srcTensor->deviceId()), needSize, MNNMemcpyDeviceToHost,
true);
MNNCPUCopyBuffer(dstTempTensor.get(), dstTensor);
2021-04-28 18:02:10 +08:00
} else {
mCUDARuntime->memcpy(dstTensor->host<void>(), (void*)(srcTensor->deviceId()), needSize, MNNMemcpyDeviceToHost,
true);
2021-04-08 15:34:23 +08:00
}
2020-11-05 16:41:56 +08:00
}
if (srcTensor->deviceId() == 0 && dstTensor->deviceId() != 0) {
2021-04-28 18:02:10 +08:00
if (srcDimensionFormat != dstDimensionFormat) {
2021-04-08 15:34:23 +08:00
srcTempTensor.reset(new Tensor(dstTensor, dstTensor->getDimensionType(), true));
MNNCPUCopyBuffer(srcTensor, srcTempTensor.get());
srcTensor = srcTempTensor.get();
}
2020-11-05 16:41:56 +08:00
mCUDARuntime->memcpy((void*)(dstTensor->deviceId()), srcTensor->host<void>(), needSize, MNNMemcpyHostToDevice,
true);
}
return;
}
bool CUDABackend::addCreator(OpType t, Creator* c) {
auto map = gCreator();
if (map->find(t) != map->end()) {
MNN_PRINT("Error: %d type has be added\n", t);
return false;
}
map->insert(std::make_pair(t, c));
return true;
}
} // namespace CUDA
} // namespace MNN