Merge pull request #3610 from alibaba/feautre/bugfix
android / android_build (push) Has been cancelled Details
ios / ios_build (push) Has been cancelled Details
linux / linux_buil_test (push) Has been cancelled Details
macos / macos_buil_test (push) Has been cancelled Details
windows / windows_build_test (push) Has been cancelled Details
stale / stale (push) Has been cancelled Details

Vulkan:Bugfix: Fix bug for radixsort copy sortnumber error
This commit is contained in:
jxt1234 2025-06-09 10:20:00 +08:00 committed by GitHub
commit 4f39ef5f3f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 116 additions and 37 deletions

View File

@ -105,6 +105,7 @@ ErrorCode VulkanBackend::onResizeEnd() {
if (!mDirect) { if (!mDirect) {
mCmdBuffer->end(); mCmdBuffer->end();
} }
mHostBuffer.reset();
return NO_ERROR; return NO_ERROR;
} }
class VulkanMemRelease : public Backend::MemObj { class VulkanMemRelease : public Backend::MemObj {
@ -288,27 +289,36 @@ static Tensor::DimensionType _convert(MNN_DATA_FORMAT format) {
} }
return Tensor::CAFFE; return Tensor::CAFFE;
} }
void VulkanBackend::copyToGPUBuffer(const void* src, VkBuffer buffer, VkDeviceSize size, VkDeviceSize offset) const { std::shared_ptr<VulkanBuffer> VulkanBackend::createHostBuffer(size_t size) const {
_requireHostBuffer(size); std::shared_ptr<VulkanBuffer> res;
::memcpy(mHostBuffer->map(), src, size); res.reset(new VulkanBuffer(*mRuntime->mMemoryPool, false, size, nullptr, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
mHostBuffer->unmap(); return res;
}
void VulkanBackend::copyGPUToGPUBuffer(VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size, VkDeviceSize srcOffset, VkDeviceSize dstOffset) const {
auto cmdbuffer = mCmdBufferForCopy; auto cmdbuffer = mCmdBufferForCopy;
cmdbuffer->begin(0); cmdbuffer->begin(0);
VkBufferCopy bufferCopy; VkBufferCopy bufferCopy;
bufferCopy.size = size; bufferCopy.size = size;
bufferCopy.dstOffset = offset; bufferCopy.dstOffset = dstOffset;
bufferCopy.srcOffset = 0; bufferCopy.srcOffset = srcOffset;
vkCmdCopyBuffer(cmdbuffer->get(), mHostBuffer->buffer(), buffer, vkCmdCopyBuffer(cmdbuffer->get(), srcBuffer, dstBuffer,
1, &bufferCopy); 1, &bufferCopy);
cmdbuffer->end(); cmdbuffer->end();
pushCommand(cmdbuffer->get()); pushCommand(cmdbuffer->get());
_finish(); _finish();
mHostBuffer.reset(); }
void VulkanBackend::copyToGPUBuffer(const void* src, VkBuffer buffer, VkDeviceSize size, VkDeviceSize offset) const {
_requireHostBuffer(size);
::memcpy(mHostBuffer->map(), src, size);
mHostBuffer->unmap();
copyGPUToGPUBuffer(mHostBuffer->buffer(), buffer, size, 0, offset);
} }
void VulkanBackend::_requireHostBuffer(size_t size) const { void VulkanBackend::_requireHostBuffer(size_t size) const {
_finish(); _finish();
if (nullptr == mHostBuffer || mHostBuffer->size() < size) { if (nullptr == mHostBuffer || mHostBuffer->size() < size) {
mHostBuffer.reset(new VulkanBuffer(*mRuntime->mMemoryPool, false, size, nullptr, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); mHostBuffer = createHostBuffer(size);
} }
} }

View File

@ -92,7 +92,9 @@ public:
VULKAN_TENSOR getBuffer(const Tensor* tensor) const; VULKAN_TENSOR getBuffer(const Tensor* tensor) const;
std::shared_ptr<VulkanBuffer> allocUniform(const void* src = nullptr, int size = 0); std::shared_ptr<VulkanBuffer> allocUniform(const void* src = nullptr, int size = 0);
void recycleUniform(std::shared_ptr<VulkanBuffer> buffer); void recycleUniform(std::shared_ptr<VulkanBuffer> buffer);
void copyGPUToGPUBuffer(VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size, VkDeviceSize srcOffset, VkDeviceSize dstOffset) const;
void copyToGPUBuffer(const void* src, VkBuffer buffer, VkDeviceSize size, VkDeviceSize offset) const; void copyToGPUBuffer(const void* src, VkBuffer buffer, VkDeviceSize size, VkDeviceSize offset) const;
std::shared_ptr<VulkanBuffer> createHostBuffer(size_t size) const;
const VulkanDevice& device() const; const VulkanDevice& device() const;
#ifdef ENABLE_VULKAN_TIME_PROFILE #ifdef ENABLE_VULKAN_TIME_PROFILE

View File

@ -93,14 +93,15 @@ public:
return; return;
} }
std::shared_ptr<Tensor> sourceWeight(Tensor::createDevice<float>({ci * co * kernelSize})); std::shared_ptr<Tensor> sourceWeight(Tensor::createDevice<float>({ci * co * kernelSize}));
res = vkBn->onAcquireBuffer(sourceWeight.get(), Backend::STATIC); auto sourceBuffer = vkBn->createHostBuffer(ci * co * kernelSize * sizeof(float));
if (!res) { if (nullptr == sourceBuffer.get()) {
return; return;
} }
{ ::memcpy(sourceBuffer->map(), weightPtr, ci * co * kernelSize * sizeof(float));
auto vkTensor = extra->getBuffer(sourceWeight.get()); sourceBuffer->unmap();
extra->copyToGPUBuffer(weightPtr, std::get<0>(vkTensor), sourceWeight->size(), std::get<2>(vkTensor)); sourceWeight->buffer().device = (uint64_t)(sourceBuffer.get());
} TensorUtils::getDescribe(sourceWeight.get())->extra.offset = 0;
std::shared_ptr<VulkanCommandPool::Buffer> prearrangeCmd( vkBn->getPool().allocBuffer()); std::shared_ptr<VulkanCommandPool::Buffer> prearrangeCmd( vkBn->getPool().allocBuffer());
for (auto& reg : des->regions) { for (auto& reg : des->regions) {
reg.origin = sourceWeight.get(); reg.origin = sourceWeight.get();

View File

@ -70,6 +70,8 @@ public:
} }
} }
types.resize(maxIndex+1); types.resize(maxIndex+1);
std::vector<std::tuple<int, void*, size_t>> constStoragePtrs;
std::vector<std::tuple<int, void*, size_t>> constUniformPtrs;
for (int i=0; i<extra->attr()->size(); ++i) { for (int i=0; i<extra->attr()->size(); ++i) {
auto attr = extra->attr()->GetAs<Attribute>(i); auto attr = extra->attr()->GetAs<Attribute>(i);
if (attr->key()->str() == "input") { if (attr->key()->str() == "input") {
@ -89,13 +91,6 @@ public:
continue; continue;
} }
if (attr->key()->str() == "const") { if (attr->key()->str() == "const") {
auto usageBit = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
if (attr->b()) {
types[attr->i()] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
} else {
usageBit = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
types[attr->i()] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
}
auto b = attr->tensor(); auto b = attr->tensor();
void* result = nullptr; void* result = nullptr;
size_t bufferSize = 0; size_t bufferSize = 0;
@ -112,14 +107,59 @@ public:
MNN_ASSERT(false); MNN_ASSERT(false);
break; break;
} }
std::shared_ptr<VulkanBuffer> vkBuffer(new VulkanBuffer(vkBn->getMemoryPool(), false, bufferSize, nullptr, usageBit, VK_SHARING_MODE_EXCLUSIVE, 0)); if (attr->b()) {
vkBn->copyToGPUBuffer(result, vkBuffer->buffer(), bufferSize, 0); types[attr->i()] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
mConstIndides.emplace_back(std::make_pair(attr->i(), vkBuffer)); constUniformPtrs.emplace_back(std::make_tuple(attr->i(), result, bufferSize));
} else {
types[attr->i()] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
constStoragePtrs.emplace_back(std::make_tuple(attr->i(), result, bufferSize));
}
continue; continue;
} }
} }
auto alignSize = vkBn->device().proty().limits.minMemoryMapAlignment;
size_t offset = 0;
std::shared_ptr<VulkanCommandPool::Buffer> cmdbuffer( vkBn->getPool().allocBuffer());
cmdbuffer->begin(0);
auto merge = [&](const std::vector<std::tuple<int, void*, size_t>>& constPtrs, VkDescriptorType type) {
if (constPtrs.empty()) {
return std::make_tuple(std::vector<std::tuple<int, size_t, size_t>>{}, std::shared_ptr<VulkanBuffer>(nullptr), std::shared_ptr<VulkanBuffer>(nullptr));
}
std::vector<std::tuple<int, size_t, size_t>> mConstOffset;
for (auto& constAttr : constPtrs) {
auto size = UP_DIV(std::get<2>(constAttr), alignSize) * alignSize;
mConstOffset.emplace_back(std::make_tuple(std::get<0>(constAttr), size, offset));
offset += size;
}
std::shared_ptr<VulkanBuffer> hostBuffer(new VulkanBuffer(vkBn->getMemoryPool(), false, offset, nullptr, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
auto ptr = (uint8_t*)hostBuffer->map();
for (int i=0; i<constPtrs.size(); ++i) {
::memcpy(ptr + std::get<2>(mConstOffset[i]), std::get<1>(constPtrs[i]), std::get<2>(constPtrs[i]));
}
hostBuffer->unmap();
std::shared_ptr<VulkanBuffer> vkBuffer(new VulkanBuffer(vkBn->getMemoryPool(), false, offset, nullptr, type, VK_SHARING_MODE_EXCLUSIVE, 0));
VkBufferCopy bufferCopy;
bufferCopy.size = offset;
bufferCopy.dstOffset = 0;
bufferCopy.srcOffset = 0;
vkCmdCopyBuffer(cmdbuffer->get(), hostBuffer->buffer(), vkBuffer->buffer(),
1, &bufferCopy);
return std::make_tuple(mConstOffset, vkBuffer, hostBuffer);
};
mConstStorageOffset.clear();
mConstUniformOffset.clear();
auto uniforms = merge(constUniformPtrs, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
mConstUniformOffset = std::get<0>(uniforms);
mConstUniformBuffer = std::get<1>(uniforms);
auto storages = merge(constStoragePtrs, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
mConstStorageOffset = std::get<0>(storages);
mConstStorageBuffer = std::get<1>(storages);
cmdbuffer->end();
auto fence = vkBn->getPool().submit(cmdbuffer->get());
mPipeline = factory->createComputePipeline(data, dataSize, types, std::vector<uint32_t>{}); mPipeline = factory->createComputePipeline(data, dataSize, types, std::vector<uint32_t>{});
mDescriptorSet = mPipeline->createSet(); mDescriptorSet = mPipeline->createSet();
fence->wait();
} }
virtual ~VulkanFuse() { virtual ~VulkanFuse() {
// Remove set firstly before destroy pipeline // Remove set firstly before destroy pipeline
@ -134,8 +174,11 @@ public:
for (int i=0; i<outputs.size(); ++i) { for (int i=0; i<outputs.size(); ++i) {
mDescriptorSet->writeBuffer(vkBn->getBuffer(outputs[i]), mOutputBinding[i]); mDescriptorSet->writeBuffer(vkBn->getBuffer(outputs[i]), mOutputBinding[i]);
} }
for (auto& iter : mConstIndides) { for (auto& iter : mConstStorageOffset) {
mDescriptorSet->writeBuffer(iter.second->buffer(), iter.first, iter.second->size()); mDescriptorSet->writeBuffer(mConstStorageBuffer->buffer(), std::get<0>(iter), std::get<1>(iter), std::get<2>(iter));
}
for (auto& iter : mConstUniformOffset) {
mDescriptorSet->writeBuffer(mConstUniformBuffer->buffer(), std::get<0>(iter), std::get<1>(iter), std::get<2>(iter));
} }
if (mNeedAutoTuning) { if (mNeedAutoTuning) {
auto localSize = vkBn->autoTunePipeline(mPipeline.get(), mDescriptorSet, mGlobalSize); auto localSize = vkBn->autoTunePipeline(mPipeline.get(), mDescriptorSet, mGlobalSize);
@ -153,7 +196,11 @@ private:
std::vector<int> mGlobalSize; std::vector<int> mGlobalSize;
std::vector<int> mInputBinding; std::vector<int> mInputBinding;
std::vector<int> mOutputBinding; std::vector<int> mOutputBinding;
std::vector<std::pair<int, std::shared_ptr<VulkanBuffer>>> mConstIndides; std::shared_ptr<VulkanBuffer> mConstStorageBuffer;
std::shared_ptr<VulkanBuffer> mConstUniformBuffer;
// Index, offset, size
std::vector<std::tuple<int, size_t, size_t>> mConstStorageOffset;
std::vector<std::tuple<int, size_t, size_t>> mConstUniformOffset;
SharedPtr<VulkanPipeline> mPipeline; SharedPtr<VulkanPipeline> mPipeline;
SharedPtr<VulkanLayout::DescriptorSet> mDescriptorSet; SharedPtr<VulkanLayout::DescriptorSet> mDescriptorSet;
bool mNeedAutoTuning = false; bool mNeedAutoTuning = false;

View File

@ -82,7 +82,7 @@ void VulkanRaster::onEncodeFast(const Tensor* input, const Tensor* output, const
for (int i=0; i< des->regions.size(); ++i) { for (int i=0; i< des->regions.size(); ++i) {
auto& slice = des->regions[i]; auto& slice = des->regions[i];
Tensor::InsideDescribe::Region newRegion; Tensor::InsideDescribe::Region newRegion;
OpCommonUtils::turnToPackRegion(slice, newRegion, output, 4); OpCommonUtils::turnToPackRegion(slice, newRegion, output, 4, true);
// TODO: Find better way // TODO: Find better way
newRegion.dst.offset /= 4; newRegion.dst.offset /= 4;
newRegion.src.offset /= 4; newRegion.src.offset /= 4;
@ -92,6 +92,8 @@ void VulkanRaster::onEncodeFast(const Tensor* input, const Tensor* output, const
auto group = UP_DIV(total, 256); auto group = UP_DIV(total, 256);
std::shared_ptr<VulkanLayout::DescriptorSet> describe(blitPipeline->createSet()); std::shared_ptr<VulkanLayout::DescriptorSet> describe(blitPipeline->createSet());
std::shared_ptr<VulkanBuffer> uniform = vkBn->allocUniform(); std::shared_ptr<VulkanBuffer> uniform = vkBn->allocUniform();
::memcpy(uniform->map(), &info, sizeof(SamplerInfo));
uniform->unmap();
auto srcTensor = vkBn->getTensorBuffer(slice.origin); auto srcTensor = vkBn->getTensorBuffer(slice.origin);
auto srcTensorSize = vkBn->getTensorSize(slice.origin); auto srcTensorSize = vkBn->getTensorSize(slice.origin);
describe->writeBuffer(dstTensor.first->buffer(), 0, dstTensorSize, dstTensor.second); describe->writeBuffer(dstTensor.first->buffer(), 0, dstTensorSize, dstTensor.second);
@ -127,7 +129,7 @@ ErrorCode VulkanRaster::onEncode(const std::vector<Tensor *> &____inputs, const
fast = false; fast = false;
break; break;
} }
if (!OpCommonUtils::canBlitFast(slice, output)) { if (!OpCommonUtils::canBlitFast(slice, output, 4, true)) {
fast = false; fast = false;
break; break;
} }

View File

@ -148,7 +148,7 @@ ErrorCode VulkanRasterSort::onEncode(const std::vector<Tensor *> &inputs, const
region2.dstOffset = std::get<2>(output); region2.dstOffset = std::get<2>(output);
region2.srcOffset = pointOffsetSum.second + (pointOffsetBytes / sizeof(uint32_t) - 1) * sizeof(uint32_t); region2.srcOffset = pointOffsetSum.second + (pointOffsetBytes / sizeof(uint32_t) - 1) * sizeof(uint32_t);
vkCmdCopyBuffer(cmdBuffer->get(), ((VulkanBuffer*)pointOffsetSum.first)->buffer(), std::get<0>(output), 1, &region); vkCmdCopyBuffer(cmdBuffer->get(), ((VulkanBuffer*)pointOffsetSum.first)->buffer(), std::get<0>(output), 1, &region2);
cmdBuffer->barrierSource(sortNumber->buffer(), 0, sizeof(uint32_t)); cmdBuffer->barrierSource(sortNumber->buffer(), 0, sizeof(uint32_t));
} }

View File

@ -24,8 +24,7 @@ VulkanCommandPool::~VulkanCommandPool() {
mDevice.destroyCommandPool(mPool); mDevice.destroyCommandPool(mPool);
// FUNC_PRINT(1); // FUNC_PRINT(1);
} }
std::shared_ptr<VulkanFence> VulkanCommandPool::submit(VkCommandBuffer buffer) const {
void VulkanCommandPool::submitAndWait(VkCommandBuffer buffer) const {
auto b = buffer; auto b = buffer;
auto fence = std::make_shared<VulkanFence>(mDevice); auto fence = std::make_shared<VulkanFence>(mDevice);
VkSubmitInfo submit_info = {/* .sType = */ VK_STRUCTURE_TYPE_SUBMIT_INFO, VkSubmitInfo submit_info = {/* .sType = */ VK_STRUCTURE_TYPE_SUBMIT_INFO,
@ -40,6 +39,11 @@ void VulkanCommandPool::submitAndWait(VkCommandBuffer buffer) const {
auto fenceReal = fence->get(); auto fenceReal = fence->get();
auto queue = mDevice.acquireDefaultDevQueue(); auto queue = mDevice.acquireDefaultDevQueue();
CALL_VK(vkQueueSubmit(queue, 1, &submit_info, fenceReal)); CALL_VK(vkQueueSubmit(queue, 1, &submit_info, fenceReal));
return fence;
}
void VulkanCommandPool::submitAndWait(VkCommandBuffer buffer) const {
auto fence = submit(buffer);
fence->wait(); fence->wait();
} }

View File

@ -11,6 +11,7 @@
#include "core/NonCopyable.hpp" #include "core/NonCopyable.hpp"
#include "backend/vulkan/component/VulkanDevice.hpp" #include "backend/vulkan/component/VulkanDevice.hpp"
#include "backend/vulkan/component/VulkanFence.hpp"
#include "backend/vulkan/vulkan/vulkan_wrapper.h" #include "backend/vulkan/vulkan/vulkan_wrapper.h"
namespace MNN { namespace MNN {
class VulkanImage; class VulkanImage;
@ -49,6 +50,7 @@ public:
} }
void submitAndWait(VkCommandBuffer buffer) const; void submitAndWait(VkCommandBuffer buffer) const;
std::shared_ptr<VulkanFence> submit(VkCommandBuffer buffer) const;
private: private:
const VulkanDevice& mDevice; const VulkanDevice& mDevice;

View File

@ -11,18 +11,23 @@
//#define MNN_VULKAN_PRINT_EXT //#define MNN_VULKAN_PRINT_EXT
namespace MNN { namespace MNN {
static uint32_t _getLocalMemorySize(const VkPhysicalDeviceMemoryProperties& memProty) { static uint32_t _getLocalMemorySize(const VkPhysicalDeviceMemoryProperties& memProty) {
#ifdef __APPLE__
// For mac vulkan driver can not get correct local size
return 16384;
#else
int32_t localMemorySize = 0; int32_t localMemorySize = 0;
for (int i=0; i<VK_MAX_MEMORY_TYPES; ++i) { for (int i=0; i<memProty.memoryHeapCount; ++i) {
auto& heap = memProty.memoryHeaps[i]; auto& heap = memProty.memoryHeaps[i];
if (heap.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { if (heap.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
auto size = (int32_t)heap.size; auto size = (int32_t)heap.size;
if (size > 0) { if (size > 0) {
localMemorySize = size; localMemorySize = size;
}
break; break;
} }
} }
}
return localMemorySize; return localMemorySize;
#endif
} }
VulkanDevice::VulkanDevice(std::shared_ptr<VulkanInstance> instance) VulkanDevice::VulkanDevice(std::shared_ptr<VulkanInstance> instance)
: mOwner(true), : mOwner(true),
@ -118,6 +123,7 @@ VulkanDevice::VulkanDevice(std::shared_ptr<VulkanInstance> instance)
} }
vkGetPhysicalDeviceProperties(mPhysicalDevice, &mDeviceProty); vkGetPhysicalDeviceProperties(mPhysicalDevice, &mDeviceProty);
vkGetPhysicalDeviceMemoryProperties(mPhysicalDevice, &mMemoryProty); vkGetPhysicalDeviceMemoryProperties(mPhysicalDevice, &mMemoryProty);
mLocalMemorySize = _getLocalMemorySize(mMemoryProty);
getDeviceQueue(mQueueFamilyIndex, 0, mQueue); getDeviceQueue(mQueueFamilyIndex, 0, mQueue);
// query subgroupSize // query subgroupSize
@ -132,7 +138,6 @@ VulkanDevice::VulkanDevice(std::shared_ptr<VulkanInstance> instance)
vkGetPhysicalDeviceProperties2(mPhysicalDevice, &deviceProperties2); vkGetPhysicalDeviceProperties2(mPhysicalDevice, &deviceProperties2);
mSubgroupSize = subgroupProperties.subgroupSize; mSubgroupSize = subgroupProperties.subgroupSize;
} }
mLocalMemorySize = _getLocalMemorySize(mMemoryProty);
#ifdef MNN_VULKAN_PRINT_EXT #ifdef MNN_VULKAN_PRINT_EXT
uint32_t pPropertyCount; uint32_t pPropertyCount;
vkEnumerateInstanceExtensionProperties(nullptr, &pPropertyCount, nullptr); vkEnumerateInstanceExtensionProperties(nullptr, &pPropertyCount, nullptr);
@ -146,6 +151,7 @@ VulkanDevice::VulkanDevice(std::shared_ptr<VulkanInstance> instance)
FUNC_PRINT(mDeviceProty.limits.maxComputeWorkGroupCount[0]); FUNC_PRINT(mDeviceProty.limits.maxComputeWorkGroupCount[0]);
FUNC_PRINT(mDeviceProty.limits.maxComputeWorkGroupInvocations); FUNC_PRINT(mDeviceProty.limits.maxComputeWorkGroupInvocations);
FUNC_PRINT(mDeviceProty.limits.maxComputeSharedMemorySize); FUNC_PRINT(mDeviceProty.limits.maxComputeSharedMemorySize);
FUNC_PRINT(mLocalMemorySize);
#endif #endif
} }

View File

@ -350,7 +350,11 @@ void Tensor::print() const {
// convert to host if needed // convert to host if needed
auto printee = this; auto printee = this;
bool device = this->buffer().host == NULL && this->buffer().device != 0; auto bnType = MNN_FORWARD_CPU;
if (nullptr != mDescribe->getBackend()) {
bnType = mDescribe->getBackend()->type();
}
bool device = bnType != MNN_FORWARD_CPU;
if (device) { if (device) {
printee = this->createHostTensorFromDevice(this, true); printee = this->createHostTensorFromDevice(this, true);
} }

View File

@ -105,6 +105,7 @@ void ConvertUtils::broadcastto(Tensor* input, Tensor* output, bool forward) {
reg.dst.stride[1] = multipler; reg.dst.stride[1] = multipler;
reg.dst.stride[2] = 1; reg.dst.stride[2] = 1;
reg.origin = input; reg.origin = input;
return;
} }
int32_t inputShape[MNN_MAX_TENSOR_DIM]; int32_t inputShape[MNN_MAX_TENSOR_DIM];
int32_t outputShape[MNN_MAX_TENSOR_DIM]; int32_t outputShape[MNN_MAX_TENSOR_DIM];