mirror of https://github.com/alibaba/MNN.git
155 lines
7.2 KiB
C++
155 lines
7.2 KiB
C++
|
|
//
|
||
|
|
// VulkanGroupConvolution.cpp
|
||
|
|
// MNN
|
||
|
|
//
|
||
|
|
// Created by MNN on 2019/01/31.
|
||
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
||
|
|
//
|
||
|
|
|
||
|
|
#include "VulkanGroupConvolution.hpp"
|
||
|
|
#include "ConvolutionIntFactory.hpp"
|
||
|
|
#include "Macro.h"
|
||
|
|
#include "TensorUtils.hpp"
|
||
|
|
namespace MNN {
|
||
|
|
VulkanGroupConvolution::VulkanGroupConvolution(const Op *op, Backend *backend)
|
||
|
|
: Execution(backend), mTempSrc(4), mTempDst(4) {
|
||
|
|
mConvParamater = op->main_as_Convolution2D();
|
||
|
|
mBackend = static_cast<VulkanBackend *>(backend);
|
||
|
|
}
|
||
|
|
|
||
|
|
VulkanGroupConvolution::~VulkanGroupConvolution() {
|
||
|
|
}
|
||
|
|
ErrorCode VulkanGroupConvolution::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
||
|
|
for (auto &iter : mSubConvolutions) {
|
||
|
|
mBackend->pushCommand(std::get<0>(iter)->get());
|
||
|
|
std::get<1>(iter)->onExecute(mTempInputs, mTempOutputs);
|
||
|
|
mBackend->pushCommand(std::get<2>(iter)->get());
|
||
|
|
}
|
||
|
|
return NO_ERROR;
|
||
|
|
}
|
||
|
|
|
||
|
|
ErrorCode VulkanGroupConvolution::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
||
|
|
auto input = inputs[0];
|
||
|
|
auto output = outputs[0];
|
||
|
|
const int group = mConvParamater->common()->group();
|
||
|
|
mTempInputs = std::vector<Tensor *>{&mTempSrc};
|
||
|
|
mTempOutputs = std::vector<Tensor *>{&mTempDst};
|
||
|
|
if (mSubConvolutions.empty()) {
|
||
|
|
mSubConvolutions.resize(group);
|
||
|
|
const auto convReal = mConvParamater;
|
||
|
|
const auto common = convReal->common();
|
||
|
|
const auto outputCount = common->outputCount();
|
||
|
|
const int fh = common->kernelY();
|
||
|
|
const int fw = common->kernelX();
|
||
|
|
int groupCI = 0;
|
||
|
|
const float *source = nullptr;
|
||
|
|
std::shared_ptr<ConvolutionIntFactory::Int8Common> quanCommon;
|
||
|
|
// check whether idst quantized op
|
||
|
|
if (nullptr != convReal->quanParameter()) {
|
||
|
|
quanCommon = ConvolutionIntFactory::load(convReal->quanParameter(), true);
|
||
|
|
groupCI = quanCommon->weightFloat.size() / (outputCount * fh * fw);
|
||
|
|
source = quanCommon->weightFloat.get();
|
||
|
|
} else {
|
||
|
|
groupCI = convReal->weight()->size() / (outputCount * fh * fw);
|
||
|
|
source = convReal->weight()->data();
|
||
|
|
}
|
||
|
|
|
||
|
|
const int groupCO = outputCount / group;
|
||
|
|
const int groupWeightSize = groupCI * fw * fh * groupCO;
|
||
|
|
|
||
|
|
for (int i = 0; i < group; ++i) {
|
||
|
|
const float *curWeightPtr = source + i * groupWeightSize;
|
||
|
|
const float *curBiasPtr = convReal->bias()->data() + i * groupCO;
|
||
|
|
std::shared_ptr<Execution> subConvolution(VulkanConvolutionImpl::create(
|
||
|
|
mBackend, mConvParamater->common(), input, output, curWeightPtr, curBiasPtr, groupCI, groupCO));
|
||
|
|
std::get<1>(mSubConvolutions[i]) = subConvolution;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// copy input-output's shape and acquire memory
|
||
|
|
TensorUtils::copyShape(input, &mTempSrc, true);
|
||
|
|
mTempSrc.setLength(1, input->channel() / group);
|
||
|
|
TensorUtils::copyShape(output, &mTempDst, true);
|
||
|
|
mTempDst.setLength(1, output->channel() / group);
|
||
|
|
backend()->onAcquireBuffer(&mTempSrc, Backend::DYNAMIC);
|
||
|
|
backend()->onAcquireBuffer(&mTempDst, Backend::DYNAMIC);
|
||
|
|
|
||
|
|
auto inputImage = mBackend->findTensor(input->deviceId())->image()->get();
|
||
|
|
auto outputImage = mBackend->findTensor(output->deviceId())->image()->get();
|
||
|
|
|
||
|
|
auto tempSrcImage = mBackend->findTensor(mTempSrc.deviceId())->image()->get();
|
||
|
|
auto tempDstImage = mBackend->findTensor(mTempDst.deviceId())->image()->get();
|
||
|
|
|
||
|
|
const int tempChannelDiv4Src = mTempSrc.channel() / 4;
|
||
|
|
const int tempChannelDiv4Dst = mTempDst.channel() / 4;
|
||
|
|
VkImageCopy copyRegionInput;
|
||
|
|
::memset(©RegionInput, 0, sizeof(copyRegionInput));
|
||
|
|
copyRegionInput.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||
|
|
copyRegionInput.srcSubresource.mipLevel = 0;
|
||
|
|
copyRegionInput.srcSubresource.baseArrayLayer = 0;
|
||
|
|
copyRegionInput.srcSubresource.layerCount = 1;
|
||
|
|
copyRegionInput.srcOffset.x = 0;
|
||
|
|
copyRegionInput.srcOffset.y = 0;
|
||
|
|
copyRegionInput.srcOffset.z = 0;
|
||
|
|
|
||
|
|
copyRegionInput.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||
|
|
copyRegionInput.dstSubresource.mipLevel = 0;
|
||
|
|
copyRegionInput.dstSubresource.baseArrayLayer = 0;
|
||
|
|
copyRegionInput.dstSubresource.layerCount = 1;
|
||
|
|
copyRegionInput.dstOffset.x = 0;
|
||
|
|
copyRegionInput.dstOffset.y = 0;
|
||
|
|
copyRegionInput.dstOffset.z = 0;
|
||
|
|
copyRegionInput.extent.width = mTempSrc.width();
|
||
|
|
copyRegionInput.extent.height = mTempSrc.height();
|
||
|
|
copyRegionInput.extent.depth = tempChannelDiv4Src;
|
||
|
|
|
||
|
|
VkImageCopy copyRegionOutput;
|
||
|
|
::memset(©RegionOutput, 0, sizeof(copyRegionOutput));
|
||
|
|
copyRegionOutput.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||
|
|
copyRegionOutput.srcSubresource.mipLevel = 0;
|
||
|
|
copyRegionOutput.srcSubresource.baseArrayLayer = 0;
|
||
|
|
copyRegionOutput.srcSubresource.layerCount = 1;
|
||
|
|
copyRegionOutput.srcOffset.x = 0;
|
||
|
|
copyRegionOutput.srcOffset.y = 0;
|
||
|
|
copyRegionOutput.srcOffset.z = 0;
|
||
|
|
|
||
|
|
copyRegionOutput.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||
|
|
copyRegionOutput.dstSubresource.mipLevel = 0;
|
||
|
|
copyRegionOutput.dstSubresource.baseArrayLayer = 0;
|
||
|
|
copyRegionOutput.dstSubresource.layerCount = 1;
|
||
|
|
copyRegionOutput.dstOffset.x = 0;
|
||
|
|
copyRegionOutput.dstOffset.y = 0;
|
||
|
|
copyRegionOutput.dstOffset.z = 0;
|
||
|
|
copyRegionOutput.extent.width = mTempDst.width();
|
||
|
|
copyRegionOutput.extent.height = mTempDst.height();
|
||
|
|
copyRegionOutput.extent.depth = tempChannelDiv4Dst;
|
||
|
|
|
||
|
|
for (int i = 0; i < group; ++i) {
|
||
|
|
{
|
||
|
|
copyRegionInput.srcOffset.z = i * tempChannelDiv4Src;
|
||
|
|
std::get<0>(mSubConvolutions[i])
|
||
|
|
.reset(const_cast<VulkanCommandPool::Buffer *>(mBackend->getPool().allocBuffer()));
|
||
|
|
auto cmdBuffer = std::get<0>(mSubConvolutions[i]).get();
|
||
|
|
cmdBuffer->begin(0);
|
||
|
|
vkCmdCopyImage(cmdBuffer->get(), inputImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, tempSrcImage,
|
||
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©RegionInput);
|
||
|
|
cmdBuffer->end();
|
||
|
|
}
|
||
|
|
std::get<1>(mSubConvolutions[i])->onResize(mTempInputs, mTempOutputs);
|
||
|
|
{
|
||
|
|
copyRegionOutput.dstOffset.z = i * tempChannelDiv4Dst;
|
||
|
|
std::get<2>(mSubConvolutions[i])
|
||
|
|
.reset(const_cast<VulkanCommandPool::Buffer *>(mBackend->getPool().allocBuffer()));
|
||
|
|
auto cmdBuffer = std::get<2>(mSubConvolutions[i]).get();
|
||
|
|
cmdBuffer->begin(0);
|
||
|
|
vkCmdCopyImage(cmdBuffer->get(), tempDstImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, outputImage,
|
||
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©RegionOutput);
|
||
|
|
cmdBuffer->end();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
backend()->onReleaseBuffer(&mTempSrc, Backend::DYNAMIC);
|
||
|
|
backend()->onReleaseBuffer(&mTempDst, Backend::DYNAMIC);
|
||
|
|
return NO_ERROR;
|
||
|
|
}
|
||
|
|
} // namespace MNN
|