mirror of https://github.com/alibaba/MNN.git
227 lines
9.0 KiB
C++
227 lines
9.0 KiB
C++
//
|
|
// VulkanDeconvolution.cpp
|
|
// MNN
|
|
//
|
|
// Created by MNN on 2019/01/31.
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
//
|
|
|
|
#include "VulkanDeconvolution.hpp"
|
|
#include "core/Macro.h"
|
|
#include "core/TensorUtils.hpp"
|
|
namespace MNN {
|
|
static void _initKernelRegion() {
|
|
|
|
}
|
|
VulkanDeconvolution::VulkanDeconvolution(Backend* bn) : VulkanBasicExecution(bn) {
|
|
// Donthing
|
|
}
|
|
|
|
VulkanDeconvolution* VulkanDeconvolution::create(Backend* bn, const Convolution2D* conv, OpType type, bool multiInputs) {
|
|
auto exeRes = new VulkanDeconvolution(bn);
|
|
exeRes->mConvCommonOption = conv->common();
|
|
auto vkBn = (VulkanBackend*)bn;
|
|
int outputC4 = UP_DIV(exeRes->mConvCommonOption->outputCount(), 4);
|
|
auto biasBuffer = std::make_shared<VulkanBuffer>(vkBn->getMemoryPool(), false, outputC4 * 4 * sizeof(float));
|
|
auto biasPtr = biasBuffer->map();
|
|
::memset(biasPtr, 0, outputC4 * 4 * sizeof(float));
|
|
if (conv->bias() != nullptr) {
|
|
::memcpy(biasPtr, conv->bias()->data(), conv->bias()->size() * sizeof(float));
|
|
}
|
|
biasBuffer->unmap();
|
|
exeRes->mBias = biasBuffer;
|
|
exeRes->mConvParam = std::make_shared<VulkanBuffer>(vkBn->getMemoryPool(), false,
|
|
sizeof(VulkanConvolutionCommon::ConvolutionParameter), nullptr,
|
|
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
|
|
int kh = exeRes->mConvCommonOption->kernelY();
|
|
int kw = exeRes->mConvCommonOption->kernelX();
|
|
int co = exeRes->mConvCommonOption->outputCount();
|
|
int coC4 = UP_DIV(co, 4);
|
|
int ci = exeRes->mConvCommonOption->inputCount();
|
|
if (type == OpType_DeconvolutionDepthwise) {
|
|
ci = 1;
|
|
}
|
|
const float* tempWeight = nullptr;
|
|
int tempWeightSize = 0;
|
|
std::shared_ptr<ConvolutionCommon::Int8Common> quanCommon;
|
|
if (!multiInputs) {
|
|
ConvolutionCommon::getConvParameters(&quanCommon, bn, conv, &tempWeight, &tempWeightSize);
|
|
MNN_ASSERT(nullptr != tempWeight);
|
|
if (0 >= ci) {
|
|
ci = tempWeightSize / co / kw / kh;
|
|
}
|
|
}
|
|
|
|
int ciC4 = UP_DIV(ci, 4);
|
|
if (type == OpType_Deconvolution) {
|
|
exeRes->mKernel.reset(MNN::Tensor::createDevice<float>({kw*kh, coC4, ciC4, 16}));
|
|
} else {
|
|
exeRes->mKernel.reset(MNN::Tensor::createDevice<float>({kw*kh, coC4, 4}));
|
|
}
|
|
exeRes->mKernelReorder = VulkanRaster::create(exeRes->mKernel.get(), vkBn);
|
|
auto des = TensorUtils::getDescribe(exeRes->mKernel.get());
|
|
int pack = 4;
|
|
if (OpType_DeconvolutionDepthwise == type) {
|
|
for (int i=0; i<pack; ++i) {
|
|
auto oSize = (co + pack - 1 - i) / pack;
|
|
if (oSize <= 0) {
|
|
continue;
|
|
}
|
|
Tensor::InsideDescribe::Region reg;
|
|
reg.size[0] = 1;
|
|
reg.size[1] = oSize;
|
|
reg.size[2] = kh * kw;
|
|
reg.dst.offset = i;
|
|
reg.dst.stride[0] = 0;
|
|
reg.dst.stride[1] = pack * kh * kw;
|
|
reg.dst.stride[2] = pack;
|
|
|
|
reg.src.offset = kh * kw * i;
|
|
reg.src.stride[0] = 0;
|
|
reg.src.stride[1] = pack * kh * kw;
|
|
reg.src.stride[2] = 1;
|
|
des->regions.emplace_back(std::move(reg));
|
|
}
|
|
} else {
|
|
for (int i=0; i<pack; ++i) {
|
|
auto oSize = (co + pack - 1 - i) / pack;
|
|
if (oSize <= 0) {
|
|
continue;
|
|
}
|
|
for (int j=0; j<pack; ++j) {
|
|
int cSize = (ci + pack - 1 - j) / pack;
|
|
if (cSize <= 0) {
|
|
continue;
|
|
}
|
|
Tensor::InsideDescribe::Region reg;
|
|
reg.size[0] = oSize;
|
|
reg.size[1] = cSize;
|
|
reg.size[2] = kh * kw;
|
|
reg.dst.offset = i + j * pack;
|
|
reg.dst.stride[0] = pack * pack * ciC4 * kh * kw;
|
|
reg.dst.stride[1] = pack * pack;
|
|
reg.dst.stride[2] = pack * pack * ciC4;
|
|
|
|
reg.src.offset = kh * kw * i + kh * kw * co * j;
|
|
reg.src.stride[0] = pack * kh * kw;
|
|
reg.src.stride[1] = pack * kh * kw * co;
|
|
reg.src.stride[2] = 1;
|
|
des->regions.emplace_back(std::move(reg));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!multiInputs) {
|
|
MNN_ASSERT(nullptr != tempWeight);
|
|
auto res = vkBn->onAcquireBuffer(exeRes->mKernel.get(), Backend::STATIC);
|
|
if (!res) {
|
|
return nullptr;
|
|
}
|
|
std::shared_ptr<Tensor> tempWeightTensor(Tensor::createDevice<float>({tempWeightSize}));
|
|
res = vkBn->onAcquireBuffer(tempWeightTensor.get(), Backend::STATIC);
|
|
if (!res) {
|
|
return nullptr;
|
|
}
|
|
auto tempWeightBuffer = reinterpret_cast<VulkanBuffer*>(tempWeightTensor->deviceId());
|
|
auto tempReorderWeight = (float*)tempWeightBuffer->map(TensorUtils::getDescribe(tempWeightTensor.get())->extra.offset);
|
|
::memcpy(tempReorderWeight, tempWeight, tempWeightSize * sizeof(float));
|
|
tempWeightBuffer->unmap();
|
|
std::shared_ptr<VulkanCommandPool::Buffer> prearrangeCmd( vkBn->getPool().allocBuffer());
|
|
for (auto& reg : des->regions) {
|
|
reg.origin = tempWeightTensor.get();
|
|
}
|
|
prearrangeCmd->begin(0);
|
|
exeRes->mKernelReorder.exe->onEncode({}, {exeRes->mKernel.get()}, prearrangeCmd.get());
|
|
prearrangeCmd->end();
|
|
vkBn->pushCommand(prearrangeCmd->get());
|
|
vkBn->onExecuteEnd();
|
|
exeRes->mKernelReorder.exe = nullptr;
|
|
}
|
|
std::vector<VkDescriptorType> types{
|
|
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
};
|
|
std::string macro = VulkanConvolutionCommon::getPostTreatMacro(exeRes->mConvCommonOption);
|
|
|
|
if (type == OpType_Deconvolution) {
|
|
exeRes->mPipeline = vkBn->getPipeline("glsl_deconvolution_" + macro + "comp", types);
|
|
} else {
|
|
MNN_ASSERT(type == OpType_DeconvolutionDepthwise);
|
|
exeRes->mPipeline = vkBn->getPipeline("glsl_deconvolutionDepthwise_" + macro + "comp", types);
|
|
}
|
|
exeRes->mPipelineSet.reset(exeRes->mPipeline->createSet());
|
|
return exeRes;
|
|
}
|
|
|
|
ErrorCode VulkanDeconvolution::onEncode(const std::vector<Tensor*>& inputs,
|
|
const std::vector<Tensor*>& outputs,
|
|
const VulkanCommandPool::Buffer* cmdBuffer) {
|
|
auto src = inputs[0];
|
|
auto dst = outputs[0];
|
|
const int ocDiv4 = UP_DIV(dst->channel(), 4);
|
|
auto common = mConvCommonOption;
|
|
auto extra = static_cast<VulkanBackend*>(backend());
|
|
if (inputs.size() >= 2) {
|
|
auto res = extra->onAcquireBuffer(mKernel.get(), Backend::DYNAMIC);
|
|
if (!res) {
|
|
return NO_ERROR;
|
|
}
|
|
auto kernelBuffer = extra->getBuffer(mKernel.get());
|
|
auto des = TensorUtils::getDescribe(mKernel.get());
|
|
for (auto& reg : des->regions) {
|
|
reg.origin = inputs[1];
|
|
}
|
|
auto rasterCode = mKernelReorder.exe->onEncode({}, {mKernel.get()}, cmdBuffer);
|
|
if (NO_ERROR != rasterCode) {
|
|
return rasterCode;
|
|
}
|
|
cmdBuffer->barrierSource(kernelBuffer);
|
|
}
|
|
{
|
|
auto convCons = reinterpret_cast<VulkanConvolutionCommon::ConvolutionParameter*>(mConvParam->map());
|
|
VulkanConvolutionCommon::writeDeconvolution(convCons, common, src, dst);
|
|
mConvParam->unmap();
|
|
}
|
|
auto dstBuffer = extra->getBuffer(dst);
|
|
auto srcBuffer = extra->getBuffer(src);
|
|
auto kernelBuffer = extra->getBuffer(mKernel.get());
|
|
|
|
mPipelineSet->writeBuffer(dstBuffer, 0);
|
|
mPipelineSet->writeBuffer(srcBuffer, 1);
|
|
mPipelineSet->writeBuffer(kernelBuffer, 2);
|
|
if (inputs.size() >= 3) {
|
|
auto biasBuffer = extra->getBuffer(inputs[2]);
|
|
mPipelineSet->writeBuffer(biasBuffer, 3);
|
|
} else {
|
|
mPipelineSet->writeBuffer(mBias->buffer(), 3, mBias->size());
|
|
}
|
|
mPipelineSet->writeBuffer(mConvParam->buffer(), 4, mConvParam->size());
|
|
mPipeline->bind(cmdBuffer->get(), mPipelineSet->get());
|
|
auto totalCount = dst->width() * dst->height() * ocDiv4 * dst->batch();
|
|
|
|
vkCmdDispatch(cmdBuffer->get(), UP_DIV(totalCount, 64), 1, 1);
|
|
if (inputs.size() >= 2) {
|
|
extra->onReleaseBuffer(mKernel.get(), Backend::DYNAMIC);
|
|
}
|
|
|
|
return NO_ERROR;
|
|
}
|
|
|
|
class VulkanDeconvolutionCreator : public VulkanBackend::Creator {
|
|
public:
|
|
virtual VulkanBasicExecution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, const MNN::Op* op,
|
|
Backend* backend) const override {
|
|
return VulkanDeconvolution::create(backend, op->main_as_Convolution2D(), op->type(), inputs.size() > 1);
|
|
}
|
|
};
|
|
|
|
static bool gResistor = []() {
|
|
VulkanBackend::addCreator(OpType_DeconvolutionDepthwise, new VulkanDeconvolutionCreator);
|
|
VulkanBackend::addCreator(OpType_Deconvolution, new VulkanDeconvolutionCreator);
|
|
return true;
|
|
}();
|
|
} // namespace MNN
|