mirror of https://github.com/alibaba/MNN.git
Merge pull request #3610 from alibaba/feautre/bugfix
android / android_build (push) Has been cancelled
Details
ios / ios_build (push) Has been cancelled
Details
linux / linux_buil_test (push) Has been cancelled
Details
macos / macos_buil_test (push) Has been cancelled
Details
windows / windows_build_test (push) Has been cancelled
Details
stale / stale (push) Has been cancelled
Details
android / android_build (push) Has been cancelled
Details
ios / ios_build (push) Has been cancelled
Details
linux / linux_buil_test (push) Has been cancelled
Details
macos / macos_buil_test (push) Has been cancelled
Details
windows / windows_build_test (push) Has been cancelled
Details
stale / stale (push) Has been cancelled
Details
Vulkan:Bugfix: Fix bug for radixsort copy sortnumber error
This commit is contained in:
commit
4f39ef5f3f
|
@ -105,6 +105,7 @@ ErrorCode VulkanBackend::onResizeEnd() {
|
||||||
if (!mDirect) {
|
if (!mDirect) {
|
||||||
mCmdBuffer->end();
|
mCmdBuffer->end();
|
||||||
}
|
}
|
||||||
|
mHostBuffer.reset();
|
||||||
return NO_ERROR;
|
return NO_ERROR;
|
||||||
}
|
}
|
||||||
class VulkanMemRelease : public Backend::MemObj {
|
class VulkanMemRelease : public Backend::MemObj {
|
||||||
|
@ -288,27 +289,36 @@ static Tensor::DimensionType _convert(MNN_DATA_FORMAT format) {
|
||||||
}
|
}
|
||||||
return Tensor::CAFFE;
|
return Tensor::CAFFE;
|
||||||
}
|
}
|
||||||
void VulkanBackend::copyToGPUBuffer(const void* src, VkBuffer buffer, VkDeviceSize size, VkDeviceSize offset) const {
|
std::shared_ptr<VulkanBuffer> VulkanBackend::createHostBuffer(size_t size) const {
|
||||||
_requireHostBuffer(size);
|
std::shared_ptr<VulkanBuffer> res;
|
||||||
::memcpy(mHostBuffer->map(), src, size);
|
res.reset(new VulkanBuffer(*mRuntime->mMemoryPool, false, size, nullptr, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
|
||||||
mHostBuffer->unmap();
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
void VulkanBackend::copyGPUToGPUBuffer(VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size, VkDeviceSize srcOffset, VkDeviceSize dstOffset) const {
|
||||||
auto cmdbuffer = mCmdBufferForCopy;
|
auto cmdbuffer = mCmdBufferForCopy;
|
||||||
cmdbuffer->begin(0);
|
cmdbuffer->begin(0);
|
||||||
VkBufferCopy bufferCopy;
|
VkBufferCopy bufferCopy;
|
||||||
bufferCopy.size = size;
|
bufferCopy.size = size;
|
||||||
bufferCopy.dstOffset = offset;
|
bufferCopy.dstOffset = dstOffset;
|
||||||
bufferCopy.srcOffset = 0;
|
bufferCopy.srcOffset = srcOffset;
|
||||||
vkCmdCopyBuffer(cmdbuffer->get(), mHostBuffer->buffer(), buffer,
|
vkCmdCopyBuffer(cmdbuffer->get(), srcBuffer, dstBuffer,
|
||||||
1, &bufferCopy);
|
1, &bufferCopy);
|
||||||
cmdbuffer->end();
|
cmdbuffer->end();
|
||||||
pushCommand(cmdbuffer->get());
|
pushCommand(cmdbuffer->get());
|
||||||
_finish();
|
_finish();
|
||||||
mHostBuffer.reset();
|
}
|
||||||
|
|
||||||
|
void VulkanBackend::copyToGPUBuffer(const void* src, VkBuffer buffer, VkDeviceSize size, VkDeviceSize offset) const {
|
||||||
|
_requireHostBuffer(size);
|
||||||
|
::memcpy(mHostBuffer->map(), src, size);
|
||||||
|
mHostBuffer->unmap();
|
||||||
|
copyGPUToGPUBuffer(mHostBuffer->buffer(), buffer, size, 0, offset);
|
||||||
}
|
}
|
||||||
void VulkanBackend::_requireHostBuffer(size_t size) const {
|
void VulkanBackend::_requireHostBuffer(size_t size) const {
|
||||||
_finish();
|
_finish();
|
||||||
if (nullptr == mHostBuffer || mHostBuffer->size() < size) {
|
if (nullptr == mHostBuffer || mHostBuffer->size() < size) {
|
||||||
mHostBuffer.reset(new VulkanBuffer(*mRuntime->mMemoryPool, false, size, nullptr, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
|
mHostBuffer = createHostBuffer(size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -92,7 +92,9 @@ public:
|
||||||
VULKAN_TENSOR getBuffer(const Tensor* tensor) const;
|
VULKAN_TENSOR getBuffer(const Tensor* tensor) const;
|
||||||
std::shared_ptr<VulkanBuffer> allocUniform(const void* src = nullptr, int size = 0);
|
std::shared_ptr<VulkanBuffer> allocUniform(const void* src = nullptr, int size = 0);
|
||||||
void recycleUniform(std::shared_ptr<VulkanBuffer> buffer);
|
void recycleUniform(std::shared_ptr<VulkanBuffer> buffer);
|
||||||
|
void copyGPUToGPUBuffer(VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size, VkDeviceSize srcOffset, VkDeviceSize dstOffset) const;
|
||||||
void copyToGPUBuffer(const void* src, VkBuffer buffer, VkDeviceSize size, VkDeviceSize offset) const;
|
void copyToGPUBuffer(const void* src, VkBuffer buffer, VkDeviceSize size, VkDeviceSize offset) const;
|
||||||
|
std::shared_ptr<VulkanBuffer> createHostBuffer(size_t size) const;
|
||||||
|
|
||||||
const VulkanDevice& device() const;
|
const VulkanDevice& device() const;
|
||||||
#ifdef ENABLE_VULKAN_TIME_PROFILE
|
#ifdef ENABLE_VULKAN_TIME_PROFILE
|
||||||
|
|
|
@ -93,14 +93,15 @@ public:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
std::shared_ptr<Tensor> sourceWeight(Tensor::createDevice<float>({ci * co * kernelSize}));
|
std::shared_ptr<Tensor> sourceWeight(Tensor::createDevice<float>({ci * co * kernelSize}));
|
||||||
res = vkBn->onAcquireBuffer(sourceWeight.get(), Backend::STATIC);
|
auto sourceBuffer = vkBn->createHostBuffer(ci * co * kernelSize * sizeof(float));
|
||||||
if (!res) {
|
if (nullptr == sourceBuffer.get()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
{
|
::memcpy(sourceBuffer->map(), weightPtr, ci * co * kernelSize * sizeof(float));
|
||||||
auto vkTensor = extra->getBuffer(sourceWeight.get());
|
sourceBuffer->unmap();
|
||||||
extra->copyToGPUBuffer(weightPtr, std::get<0>(vkTensor), sourceWeight->size(), std::get<2>(vkTensor));
|
sourceWeight->buffer().device = (uint64_t)(sourceBuffer.get());
|
||||||
}
|
TensorUtils::getDescribe(sourceWeight.get())->extra.offset = 0;
|
||||||
|
|
||||||
std::shared_ptr<VulkanCommandPool::Buffer> prearrangeCmd( vkBn->getPool().allocBuffer());
|
std::shared_ptr<VulkanCommandPool::Buffer> prearrangeCmd( vkBn->getPool().allocBuffer());
|
||||||
for (auto& reg : des->regions) {
|
for (auto& reg : des->regions) {
|
||||||
reg.origin = sourceWeight.get();
|
reg.origin = sourceWeight.get();
|
||||||
|
|
|
@ -70,6 +70,8 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
types.resize(maxIndex+1);
|
types.resize(maxIndex+1);
|
||||||
|
std::vector<std::tuple<int, void*, size_t>> constStoragePtrs;
|
||||||
|
std::vector<std::tuple<int, void*, size_t>> constUniformPtrs;
|
||||||
for (int i=0; i<extra->attr()->size(); ++i) {
|
for (int i=0; i<extra->attr()->size(); ++i) {
|
||||||
auto attr = extra->attr()->GetAs<Attribute>(i);
|
auto attr = extra->attr()->GetAs<Attribute>(i);
|
||||||
if (attr->key()->str() == "input") {
|
if (attr->key()->str() == "input") {
|
||||||
|
@ -89,13 +91,6 @@ public:
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (attr->key()->str() == "const") {
|
if (attr->key()->str() == "const") {
|
||||||
auto usageBit = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
|
|
||||||
if (attr->b()) {
|
|
||||||
types[attr->i()] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
||||||
} else {
|
|
||||||
usageBit = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
|
||||||
types[attr->i()] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
||||||
}
|
|
||||||
auto b = attr->tensor();
|
auto b = attr->tensor();
|
||||||
void* result = nullptr;
|
void* result = nullptr;
|
||||||
size_t bufferSize = 0;
|
size_t bufferSize = 0;
|
||||||
|
@ -112,14 +107,59 @@ public:
|
||||||
MNN_ASSERT(false);
|
MNN_ASSERT(false);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
std::shared_ptr<VulkanBuffer> vkBuffer(new VulkanBuffer(vkBn->getMemoryPool(), false, bufferSize, nullptr, usageBit, VK_SHARING_MODE_EXCLUSIVE, 0));
|
if (attr->b()) {
|
||||||
vkBn->copyToGPUBuffer(result, vkBuffer->buffer(), bufferSize, 0);
|
types[attr->i()] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||||
mConstIndides.emplace_back(std::make_pair(attr->i(), vkBuffer));
|
constUniformPtrs.emplace_back(std::make_tuple(attr->i(), result, bufferSize));
|
||||||
|
} else {
|
||||||
|
types[attr->i()] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||||
|
constStoragePtrs.emplace_back(std::make_tuple(attr->i(), result, bufferSize));
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
auto alignSize = vkBn->device().proty().limits.minMemoryMapAlignment;
|
||||||
|
size_t offset = 0;
|
||||||
|
std::shared_ptr<VulkanCommandPool::Buffer> cmdbuffer( vkBn->getPool().allocBuffer());
|
||||||
|
cmdbuffer->begin(0);
|
||||||
|
auto merge = [&](const std::vector<std::tuple<int, void*, size_t>>& constPtrs, VkDescriptorType type) {
|
||||||
|
if (constPtrs.empty()) {
|
||||||
|
return std::make_tuple(std::vector<std::tuple<int, size_t, size_t>>{}, std::shared_ptr<VulkanBuffer>(nullptr), std::shared_ptr<VulkanBuffer>(nullptr));
|
||||||
|
}
|
||||||
|
std::vector<std::tuple<int, size_t, size_t>> mConstOffset;
|
||||||
|
for (auto& constAttr : constPtrs) {
|
||||||
|
auto size = UP_DIV(std::get<2>(constAttr), alignSize) * alignSize;
|
||||||
|
mConstOffset.emplace_back(std::make_tuple(std::get<0>(constAttr), size, offset));
|
||||||
|
offset += size;
|
||||||
|
}
|
||||||
|
std::shared_ptr<VulkanBuffer> hostBuffer(new VulkanBuffer(vkBn->getMemoryPool(), false, offset, nullptr, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
|
||||||
|
auto ptr = (uint8_t*)hostBuffer->map();
|
||||||
|
for (int i=0; i<constPtrs.size(); ++i) {
|
||||||
|
::memcpy(ptr + std::get<2>(mConstOffset[i]), std::get<1>(constPtrs[i]), std::get<2>(constPtrs[i]));
|
||||||
|
}
|
||||||
|
hostBuffer->unmap();
|
||||||
|
std::shared_ptr<VulkanBuffer> vkBuffer(new VulkanBuffer(vkBn->getMemoryPool(), false, offset, nullptr, type, VK_SHARING_MODE_EXCLUSIVE, 0));
|
||||||
|
VkBufferCopy bufferCopy;
|
||||||
|
bufferCopy.size = offset;
|
||||||
|
bufferCopy.dstOffset = 0;
|
||||||
|
bufferCopy.srcOffset = 0;
|
||||||
|
vkCmdCopyBuffer(cmdbuffer->get(), hostBuffer->buffer(), vkBuffer->buffer(),
|
||||||
|
1, &bufferCopy);
|
||||||
|
return std::make_tuple(mConstOffset, vkBuffer, hostBuffer);
|
||||||
|
};
|
||||||
|
mConstStorageOffset.clear();
|
||||||
|
mConstUniformOffset.clear();
|
||||||
|
auto uniforms = merge(constUniformPtrs, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
|
||||||
|
mConstUniformOffset = std::get<0>(uniforms);
|
||||||
|
mConstUniformBuffer = std::get<1>(uniforms);
|
||||||
|
auto storages = merge(constStoragePtrs, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
||||||
|
mConstStorageOffset = std::get<0>(storages);
|
||||||
|
mConstStorageBuffer = std::get<1>(storages);
|
||||||
|
cmdbuffer->end();
|
||||||
|
auto fence = vkBn->getPool().submit(cmdbuffer->get());
|
||||||
|
|
||||||
mPipeline = factory->createComputePipeline(data, dataSize, types, std::vector<uint32_t>{});
|
mPipeline = factory->createComputePipeline(data, dataSize, types, std::vector<uint32_t>{});
|
||||||
mDescriptorSet = mPipeline->createSet();
|
mDescriptorSet = mPipeline->createSet();
|
||||||
|
fence->wait();
|
||||||
}
|
}
|
||||||
virtual ~VulkanFuse() {
|
virtual ~VulkanFuse() {
|
||||||
// Remove set firstly before destroy pipeline
|
// Remove set firstly before destroy pipeline
|
||||||
|
@ -134,8 +174,11 @@ public:
|
||||||
for (int i=0; i<outputs.size(); ++i) {
|
for (int i=0; i<outputs.size(); ++i) {
|
||||||
mDescriptorSet->writeBuffer(vkBn->getBuffer(outputs[i]), mOutputBinding[i]);
|
mDescriptorSet->writeBuffer(vkBn->getBuffer(outputs[i]), mOutputBinding[i]);
|
||||||
}
|
}
|
||||||
for (auto& iter : mConstIndides) {
|
for (auto& iter : mConstStorageOffset) {
|
||||||
mDescriptorSet->writeBuffer(iter.second->buffer(), iter.first, iter.second->size());
|
mDescriptorSet->writeBuffer(mConstStorageBuffer->buffer(), std::get<0>(iter), std::get<1>(iter), std::get<2>(iter));
|
||||||
|
}
|
||||||
|
for (auto& iter : mConstUniformOffset) {
|
||||||
|
mDescriptorSet->writeBuffer(mConstUniformBuffer->buffer(), std::get<0>(iter), std::get<1>(iter), std::get<2>(iter));
|
||||||
}
|
}
|
||||||
if (mNeedAutoTuning) {
|
if (mNeedAutoTuning) {
|
||||||
auto localSize = vkBn->autoTunePipeline(mPipeline.get(), mDescriptorSet, mGlobalSize);
|
auto localSize = vkBn->autoTunePipeline(mPipeline.get(), mDescriptorSet, mGlobalSize);
|
||||||
|
@ -153,7 +196,11 @@ private:
|
||||||
std::vector<int> mGlobalSize;
|
std::vector<int> mGlobalSize;
|
||||||
std::vector<int> mInputBinding;
|
std::vector<int> mInputBinding;
|
||||||
std::vector<int> mOutputBinding;
|
std::vector<int> mOutputBinding;
|
||||||
std::vector<std::pair<int, std::shared_ptr<VulkanBuffer>>> mConstIndides;
|
std::shared_ptr<VulkanBuffer> mConstStorageBuffer;
|
||||||
|
std::shared_ptr<VulkanBuffer> mConstUniformBuffer;
|
||||||
|
// Index, offset, size
|
||||||
|
std::vector<std::tuple<int, size_t, size_t>> mConstStorageOffset;
|
||||||
|
std::vector<std::tuple<int, size_t, size_t>> mConstUniformOffset;
|
||||||
SharedPtr<VulkanPipeline> mPipeline;
|
SharedPtr<VulkanPipeline> mPipeline;
|
||||||
SharedPtr<VulkanLayout::DescriptorSet> mDescriptorSet;
|
SharedPtr<VulkanLayout::DescriptorSet> mDescriptorSet;
|
||||||
bool mNeedAutoTuning = false;
|
bool mNeedAutoTuning = false;
|
||||||
|
|
|
@ -82,7 +82,7 @@ void VulkanRaster::onEncodeFast(const Tensor* input, const Tensor* output, const
|
||||||
for (int i=0; i< des->regions.size(); ++i) {
|
for (int i=0; i< des->regions.size(); ++i) {
|
||||||
auto& slice = des->regions[i];
|
auto& slice = des->regions[i];
|
||||||
Tensor::InsideDescribe::Region newRegion;
|
Tensor::InsideDescribe::Region newRegion;
|
||||||
OpCommonUtils::turnToPackRegion(slice, newRegion, output, 4);
|
OpCommonUtils::turnToPackRegion(slice, newRegion, output, 4, true);
|
||||||
// TODO: Find better way
|
// TODO: Find better way
|
||||||
newRegion.dst.offset /= 4;
|
newRegion.dst.offset /= 4;
|
||||||
newRegion.src.offset /= 4;
|
newRegion.src.offset /= 4;
|
||||||
|
@ -92,6 +92,8 @@ void VulkanRaster::onEncodeFast(const Tensor* input, const Tensor* output, const
|
||||||
auto group = UP_DIV(total, 256);
|
auto group = UP_DIV(total, 256);
|
||||||
std::shared_ptr<VulkanLayout::DescriptorSet> describe(blitPipeline->createSet());
|
std::shared_ptr<VulkanLayout::DescriptorSet> describe(blitPipeline->createSet());
|
||||||
std::shared_ptr<VulkanBuffer> uniform = vkBn->allocUniform();
|
std::shared_ptr<VulkanBuffer> uniform = vkBn->allocUniform();
|
||||||
|
::memcpy(uniform->map(), &info, sizeof(SamplerInfo));
|
||||||
|
uniform->unmap();
|
||||||
auto srcTensor = vkBn->getTensorBuffer(slice.origin);
|
auto srcTensor = vkBn->getTensorBuffer(slice.origin);
|
||||||
auto srcTensorSize = vkBn->getTensorSize(slice.origin);
|
auto srcTensorSize = vkBn->getTensorSize(slice.origin);
|
||||||
describe->writeBuffer(dstTensor.first->buffer(), 0, dstTensorSize, dstTensor.second);
|
describe->writeBuffer(dstTensor.first->buffer(), 0, dstTensorSize, dstTensor.second);
|
||||||
|
@ -127,7 +129,7 @@ ErrorCode VulkanRaster::onEncode(const std::vector<Tensor *> &____inputs, const
|
||||||
fast = false;
|
fast = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!OpCommonUtils::canBlitFast(slice, output)) {
|
if (!OpCommonUtils::canBlitFast(slice, output, 4, true)) {
|
||||||
fast = false;
|
fast = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -148,7 +148,7 @@ ErrorCode VulkanRasterSort::onEncode(const std::vector<Tensor *> &inputs, const
|
||||||
region2.dstOffset = std::get<2>(output);
|
region2.dstOffset = std::get<2>(output);
|
||||||
region2.srcOffset = pointOffsetSum.second + (pointOffsetBytes / sizeof(uint32_t) - 1) * sizeof(uint32_t);
|
region2.srcOffset = pointOffsetSum.second + (pointOffsetBytes / sizeof(uint32_t) - 1) * sizeof(uint32_t);
|
||||||
|
|
||||||
vkCmdCopyBuffer(cmdBuffer->get(), ((VulkanBuffer*)pointOffsetSum.first)->buffer(), std::get<0>(output), 1, ®ion);
|
vkCmdCopyBuffer(cmdBuffer->get(), ((VulkanBuffer*)pointOffsetSum.first)->buffer(), std::get<0>(output), 1, ®ion2);
|
||||||
|
|
||||||
cmdBuffer->barrierSource(sortNumber->buffer(), 0, sizeof(uint32_t));
|
cmdBuffer->barrierSource(sortNumber->buffer(), 0, sizeof(uint32_t));
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,8 +24,7 @@ VulkanCommandPool::~VulkanCommandPool() {
|
||||||
mDevice.destroyCommandPool(mPool);
|
mDevice.destroyCommandPool(mPool);
|
||||||
// FUNC_PRINT(1);
|
// FUNC_PRINT(1);
|
||||||
}
|
}
|
||||||
|
std::shared_ptr<VulkanFence> VulkanCommandPool::submit(VkCommandBuffer buffer) const {
|
||||||
void VulkanCommandPool::submitAndWait(VkCommandBuffer buffer) const {
|
|
||||||
auto b = buffer;
|
auto b = buffer;
|
||||||
auto fence = std::make_shared<VulkanFence>(mDevice);
|
auto fence = std::make_shared<VulkanFence>(mDevice);
|
||||||
VkSubmitInfo submit_info = {/* .sType = */ VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
VkSubmitInfo submit_info = {/* .sType = */ VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||||
|
@ -40,6 +39,11 @@ void VulkanCommandPool::submitAndWait(VkCommandBuffer buffer) const {
|
||||||
auto fenceReal = fence->get();
|
auto fenceReal = fence->get();
|
||||||
auto queue = mDevice.acquireDefaultDevQueue();
|
auto queue = mDevice.acquireDefaultDevQueue();
|
||||||
CALL_VK(vkQueueSubmit(queue, 1, &submit_info, fenceReal));
|
CALL_VK(vkQueueSubmit(queue, 1, &submit_info, fenceReal));
|
||||||
|
return fence;
|
||||||
|
}
|
||||||
|
|
||||||
|
void VulkanCommandPool::submitAndWait(VkCommandBuffer buffer) const {
|
||||||
|
auto fence = submit(buffer);
|
||||||
fence->wait();
|
fence->wait();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
#include "core/NonCopyable.hpp"
|
#include "core/NonCopyable.hpp"
|
||||||
#include "backend/vulkan/component/VulkanDevice.hpp"
|
#include "backend/vulkan/component/VulkanDevice.hpp"
|
||||||
|
#include "backend/vulkan/component/VulkanFence.hpp"
|
||||||
#include "backend/vulkan/vulkan/vulkan_wrapper.h"
|
#include "backend/vulkan/vulkan/vulkan_wrapper.h"
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
class VulkanImage;
|
class VulkanImage;
|
||||||
|
@ -49,6 +50,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
void submitAndWait(VkCommandBuffer buffer) const;
|
void submitAndWait(VkCommandBuffer buffer) const;
|
||||||
|
std::shared_ptr<VulkanFence> submit(VkCommandBuffer buffer) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const VulkanDevice& mDevice;
|
const VulkanDevice& mDevice;
|
||||||
|
|
|
@ -11,18 +11,23 @@
|
||||||
//#define MNN_VULKAN_PRINT_EXT
|
//#define MNN_VULKAN_PRINT_EXT
|
||||||
namespace MNN {
|
namespace MNN {
|
||||||
static uint32_t _getLocalMemorySize(const VkPhysicalDeviceMemoryProperties& memProty) {
|
static uint32_t _getLocalMemorySize(const VkPhysicalDeviceMemoryProperties& memProty) {
|
||||||
|
#ifdef __APPLE__
|
||||||
|
// For mac vulkan driver can not get correct local size
|
||||||
|
return 16384;
|
||||||
|
#else
|
||||||
int32_t localMemorySize = 0;
|
int32_t localMemorySize = 0;
|
||||||
for (int i=0; i<VK_MAX_MEMORY_TYPES; ++i) {
|
for (int i=0; i<memProty.memoryHeapCount; ++i) {
|
||||||
auto& heap = memProty.memoryHeaps[i];
|
auto& heap = memProty.memoryHeaps[i];
|
||||||
if (heap.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
|
if (heap.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
|
||||||
auto size = (int32_t)heap.size;
|
auto size = (int32_t)heap.size;
|
||||||
if (size > 0) {
|
if (size > 0) {
|
||||||
localMemorySize = size;
|
localMemorySize = size;
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return localMemorySize;
|
return localMemorySize;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
VulkanDevice::VulkanDevice(std::shared_ptr<VulkanInstance> instance)
|
VulkanDevice::VulkanDevice(std::shared_ptr<VulkanInstance> instance)
|
||||||
: mOwner(true),
|
: mOwner(true),
|
||||||
|
@ -118,6 +123,7 @@ VulkanDevice::VulkanDevice(std::shared_ptr<VulkanInstance> instance)
|
||||||
}
|
}
|
||||||
vkGetPhysicalDeviceProperties(mPhysicalDevice, &mDeviceProty);
|
vkGetPhysicalDeviceProperties(mPhysicalDevice, &mDeviceProty);
|
||||||
vkGetPhysicalDeviceMemoryProperties(mPhysicalDevice, &mMemoryProty);
|
vkGetPhysicalDeviceMemoryProperties(mPhysicalDevice, &mMemoryProty);
|
||||||
|
mLocalMemorySize = _getLocalMemorySize(mMemoryProty);
|
||||||
getDeviceQueue(mQueueFamilyIndex, 0, mQueue);
|
getDeviceQueue(mQueueFamilyIndex, 0, mQueue);
|
||||||
|
|
||||||
// query subgroupSize
|
// query subgroupSize
|
||||||
|
@ -132,7 +138,6 @@ VulkanDevice::VulkanDevice(std::shared_ptr<VulkanInstance> instance)
|
||||||
vkGetPhysicalDeviceProperties2(mPhysicalDevice, &deviceProperties2);
|
vkGetPhysicalDeviceProperties2(mPhysicalDevice, &deviceProperties2);
|
||||||
mSubgroupSize = subgroupProperties.subgroupSize;
|
mSubgroupSize = subgroupProperties.subgroupSize;
|
||||||
}
|
}
|
||||||
mLocalMemorySize = _getLocalMemorySize(mMemoryProty);
|
|
||||||
#ifdef MNN_VULKAN_PRINT_EXT
|
#ifdef MNN_VULKAN_PRINT_EXT
|
||||||
uint32_t pPropertyCount;
|
uint32_t pPropertyCount;
|
||||||
vkEnumerateInstanceExtensionProperties(nullptr, &pPropertyCount, nullptr);
|
vkEnumerateInstanceExtensionProperties(nullptr, &pPropertyCount, nullptr);
|
||||||
|
@ -146,6 +151,7 @@ VulkanDevice::VulkanDevice(std::shared_ptr<VulkanInstance> instance)
|
||||||
FUNC_PRINT(mDeviceProty.limits.maxComputeWorkGroupCount[0]);
|
FUNC_PRINT(mDeviceProty.limits.maxComputeWorkGroupCount[0]);
|
||||||
FUNC_PRINT(mDeviceProty.limits.maxComputeWorkGroupInvocations);
|
FUNC_PRINT(mDeviceProty.limits.maxComputeWorkGroupInvocations);
|
||||||
FUNC_PRINT(mDeviceProty.limits.maxComputeSharedMemorySize);
|
FUNC_PRINT(mDeviceProty.limits.maxComputeSharedMemorySize);
|
||||||
|
FUNC_PRINT(mLocalMemorySize);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -350,7 +350,11 @@ void Tensor::print() const {
|
||||||
|
|
||||||
// convert to host if needed
|
// convert to host if needed
|
||||||
auto printee = this;
|
auto printee = this;
|
||||||
bool device = this->buffer().host == NULL && this->buffer().device != 0;
|
auto bnType = MNN_FORWARD_CPU;
|
||||||
|
if (nullptr != mDescribe->getBackend()) {
|
||||||
|
bnType = mDescribe->getBackend()->type();
|
||||||
|
}
|
||||||
|
bool device = bnType != MNN_FORWARD_CPU;
|
||||||
if (device) {
|
if (device) {
|
||||||
printee = this->createHostTensorFromDevice(this, true);
|
printee = this->createHostTensorFromDevice(this, true);
|
||||||
}
|
}
|
||||||
|
|
|
@ -105,6 +105,7 @@ void ConvertUtils::broadcastto(Tensor* input, Tensor* output, bool forward) {
|
||||||
reg.dst.stride[1] = multipler;
|
reg.dst.stride[1] = multipler;
|
||||||
reg.dst.stride[2] = 1;
|
reg.dst.stride[2] = 1;
|
||||||
reg.origin = input;
|
reg.origin = input;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
int32_t inputShape[MNN_MAX_TENSOR_DIM];
|
int32_t inputShape[MNN_MAX_TENSOR_DIM];
|
||||||
int32_t outputShape[MNN_MAX_TENSOR_DIM];
|
int32_t outputShape[MNN_MAX_TENSOR_DIM];
|
||||||
|
|
Loading…
Reference in New Issue