mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			247 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			247 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
//
 | 
						|
//  VulkanConcat.cpp
 | 
						|
//  MNN
 | 
						|
//
 | 
						|
//  Created by MNN on 2019/01/31.
 | 
						|
//  Copyright © 2018, Alibaba Group Holding Limited
 | 
						|
//
 | 
						|
 | 
						|
#include "backend/vulkan/execution/VulkanConcat.hpp"
 | 
						|
#include "core/Macro.h"
 | 
						|
#include "core/TensorUtils.hpp"
 | 
						|
namespace MNN {
 | 
						|
struct ConcatParam {
 | 
						|
    ivec4 inImageSize;
 | 
						|
    ivec4 outImageSize;
 | 
						|
    ivec4 offset; // w, h, c, 0
 | 
						|
};
 | 
						|
 | 
						|
VulkanConcat::VulkanConcat(const Op* op, Backend* bn) : VulkanBasicExecution(bn) {
 | 
						|
    auto axis  = op->main_as_Axis()->axis();
 | 
						|
    mAxis      = axis;
 | 
						|
    mVkbackend = static_cast<VulkanBackend*>(bn);
 | 
						|
}
 | 
						|
 | 
						|
ErrorCode VulkanConcat::onEncode(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
 | 
						|
                                 const VulkanCommandPool::Buffer* cmdBuffer) {
 | 
						|
    auto output = outputs[0];
 | 
						|
    int axis = mAxis;
 | 
						|
    if (0 > axis) {
 | 
						|
        axis = output->dimensions() + axis;
 | 
						|
    }
 | 
						|
    bool fastMode = true;
 | 
						|
    if (1 == axis) {
 | 
						|
        for (int i = 0; i < inputs.size() - 1; ++i) {
 | 
						|
            auto input = inputs[i];
 | 
						|
            if (input->channel() % 4 != 0) {
 | 
						|
                fastMode = false;
 | 
						|
                break;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    if (fastMode) {
 | 
						|
        mImageConcat = std::make_shared<VulkanConcatImageImpl>(axis, mVkbackend);
 | 
						|
        mImageConcat->encodeImageImpl(inputs, output, cmdBuffer);
 | 
						|
    } else {
 | 
						|
        mBufferConcat = std::make_shared<VulkanConcatBufferImpl>(axis, mVkbackend);
 | 
						|
        mBufferConcat->encodeBufferImpl(inputs, output, cmdBuffer);
 | 
						|
    }
 | 
						|
 | 
						|
    return NO_ERROR;
 | 
						|
}
 | 
						|
 | 
						|
VulkanConcatImageImpl::VulkanConcatImageImpl(int axis, VulkanBackend* vkBackend) : mAxis(axis), mVkbackend(vkBackend) {
 | 
						|
    mSampler = vkBackend->getCommonSampler();
 | 
						|
}
 | 
						|
 | 
						|
ErrorCode VulkanConcatImageImpl::encodeImageImpl(const std::vector<Tensor*>& inputs, Tensor* output,
 | 
						|
                                                 const VulkanCommandPool::Buffer* cmdBuffer) {
 | 
						|
    mConstBuffers.clear();
 | 
						|
    mSets.clear();
 | 
						|
    int axisOffset = 0;
 | 
						|
 | 
						|
    auto pipeline = mVkbackend->getPipeline(
 | 
						|
        "glsl_blitC4_comp", /*glsl_blitC4_comp, glsl_blitC4_comp_len,*/ std::vector<VkDescriptorType>{
 | 
						|
            VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
 | 
						|
            VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER});
 | 
						|
    for (int i = 0; i < inputs.size(); ++i) {
 | 
						|
        auto input       = inputs[i];
 | 
						|
        int icDiv4       = UP_DIV(input->channel(), 4);
 | 
						|
        auto constBuffer = std::make_shared<VulkanBuffer>(mVkbackend->getMemoryPool(), false, sizeof(ConcatParam),
 | 
						|
                                                          nullptr, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
 | 
						|
        mConstBuffers.push_back(constBuffer);
 | 
						|
        auto constValue = reinterpret_cast<ConcatParam*>(constBuffer->map());
 | 
						|
        ::memset(constValue, 0, sizeof(ConcatParam));
 | 
						|
        constValue->inImageSize[0]  = input->width();
 | 
						|
        constValue->inImageSize[1]  = input->height();
 | 
						|
        constValue->inImageSize[2]  = icDiv4;
 | 
						|
        constValue->inImageSize[3]  = input->batch();
 | 
						|
        constValue->outImageSize[0] = output->width();
 | 
						|
        constValue->outImageSize[1] = output->height();
 | 
						|
        constValue->outImageSize[2] = UP_DIV(output->channel(), 4);
 | 
						|
        constValue->outImageSize[3] = output->batch();
 | 
						|
        switch (mAxis) {
 | 
						|
            case 0:
 | 
						|
                constValue->offset[2] = axisOffset;
 | 
						|
                axisOffset += input->batch() * icDiv4;
 | 
						|
                break;
 | 
						|
            case 1:
 | 
						|
                constValue->offset[2] = axisOffset;
 | 
						|
                axisOffset += icDiv4;
 | 
						|
                break;
 | 
						|
            case 2:
 | 
						|
                constValue->offset[1] = axisOffset;
 | 
						|
                axisOffset += input->height();
 | 
						|
                break;
 | 
						|
            case 3:
 | 
						|
                constValue->offset[0] = axisOffset;
 | 
						|
                axisOffset += input->width();
 | 
						|
                break;
 | 
						|
            default:
 | 
						|
                return NOT_SUPPORT;
 | 
						|
        }
 | 
						|
        constBuffer->unmap();
 | 
						|
        std::shared_ptr<VulkanPipeline::DescriptorSet> desSet;
 | 
						|
        desSet.reset(pipeline->createSet());
 | 
						|
        desSet->writeImage(reinterpret_cast<VkImageView>(output->deviceId()), mSampler->get(), VK_IMAGE_LAYOUT_GENERAL,
 | 
						|
                           0);
 | 
						|
        desSet->writeImage(reinterpret_cast<VkImageView>(input->deviceId()), mSampler->get(),
 | 
						|
                           VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 1);
 | 
						|
        desSet->writeBuffer(constBuffer->buffer(), 2, constBuffer->size());
 | 
						|
        pipeline->bind(cmdBuffer->get(), desSet->get());
 | 
						|
        mSets.push_back(desSet);
 | 
						|
        vkCmdDispatch(cmdBuffer->get(), UP_DIV(input->width(), 16), UP_DIV(input->height(), 16),
 | 
						|
                      icDiv4 * input->batch());
 | 
						|
    }
 | 
						|
 | 
						|
    return NO_ERROR;
 | 
						|
}
 | 
						|
 | 
						|
VulkanConcatBufferImpl::VulkanConcatBufferImpl(int axis, VulkanBackend* vkBackend)
 | 
						|
    : mAxis(axis), mVkbackend(vkBackend) {
 | 
						|
}
 | 
						|
 | 
						|
ErrorCode VulkanConcatBufferImpl::encodeBufferImpl(const std::vector<Tensor*>& inputs, Tensor* output,
 | 
						|
                                                   const VulkanCommandPool::Buffer* cmdBuffer) {
 | 
						|
    const int inputSize = inputs.size();
 | 
						|
    // set temp-output tensor layout and acquire memory for temp-output tensor
 | 
						|
    mTempOutputTensor = std::make_shared<Tensor>(4);
 | 
						|
    TensorUtils::copyShape(output, mTempOutputTensor.get());
 | 
						|
    TensorUtils::getDescribe(mTempOutputTensor.get())->dimensionFormat = MNN_DATA_FORMAT_NCHW;
 | 
						|
    mVkbackend->onAcquireBuffer(mTempOutputTensor.get(), Backend::DYNAMIC);
 | 
						|
    // set temp-input tensors layout and acquire memory for temp-input tensors
 | 
						|
    mTempInputTensors.clear();
 | 
						|
    for (int i = 0; i < inputSize; ++i) {
 | 
						|
        auto inputTemp = std::make_shared<Tensor>();
 | 
						|
        TensorUtils::copyShape(inputs[i], inputTemp.get());
 | 
						|
        TensorUtils::getDescribe(inputTemp.get())->dimensionFormat = MNN_DATA_FORMAT_NCHW;
 | 
						|
        mTempInputTensors.push_back(inputTemp);
 | 
						|
        mVkbackend->onAcquireBuffer(inputTemp.get(), Backend::DYNAMIC);
 | 
						|
    }
 | 
						|
 | 
						|
    // reset converter
 | 
						|
    // image to nchw
 | 
						|
    for (int i = 0; i < inputSize; ++i) {
 | 
						|
        auto converter = std::make_shared<VulkanImageConverter>(mVkbackend);
 | 
						|
        mTensorConvert4Inputs.push_back(converter);
 | 
						|
    }
 | 
						|
    // nchw to image
 | 
						|
    mTensorConvert4Output = std::make_shared<VulkanImageConverter>(mVkbackend);
 | 
						|
 | 
						|
    // encode
 | 
						|
    for (int i = 0; i < inputSize; ++i) {
 | 
						|
        mTensorConvert4Inputs[i]->encodeTensorToBuffer(
 | 
						|
            inputs[i], reinterpret_cast<VkBuffer>(mTempInputTensors[i]->deviceId()), mTempInputTensors[i]->size(), 0,
 | 
						|
            MNN_DATA_FORMAT_NCHW, cmdBuffer);
 | 
						|
    }
 | 
						|
    // concat
 | 
						|
    std::vector<VkDescriptorType> types{
 | 
						|
        VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
 | 
						|
        VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
 | 
						|
        VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
 | 
						|
    };
 | 
						|
    auto bufferConcatPipeline = mVkbackend->getPipeline("glsl_concatBuffer_comp",
 | 
						|
                                                        /*glsl_concatBuffer_comp, glsl_concatBuffer_comp_len,*/ types);
 | 
						|
    int axisOffset            = 0;
 | 
						|
    for (int i = 0; i < inputSize; ++i) {
 | 
						|
        auto& tempInput  = mTempInputTensors[i];
 | 
						|
        auto constBuffer = std::make_shared<VulkanBuffer>(mVkbackend->getMemoryPool(), false, sizeof(ConcatParam),
 | 
						|
                                                          nullptr, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
 | 
						|
        mConstBuffers.push_back(constBuffer);
 | 
						|
        auto dataPtr = reinterpret_cast<ConcatParam*>(constBuffer->map());
 | 
						|
        ::memset(dataPtr, 0, sizeof(ConcatParam));
 | 
						|
        dataPtr->inImageSize[0]  = tempInput->width();
 | 
						|
        dataPtr->inImageSize[1]  = tempInput->height();
 | 
						|
        dataPtr->inImageSize[2]  = tempInput->channel();
 | 
						|
        dataPtr->inImageSize[3]  = tempInput->batch();
 | 
						|
        dataPtr->outImageSize[0] = output->width();
 | 
						|
        dataPtr->outImageSize[1] = output->height();
 | 
						|
        dataPtr->outImageSize[2] = output->channel();
 | 
						|
        dataPtr->outImageSize[3] = output->batch();
 | 
						|
        switch (mAxis) {
 | 
						|
            case 0:
 | 
						|
                dataPtr->offset[2] = axisOffset;
 | 
						|
                axisOffset += tempInput->batch() * tempInput->channel();
 | 
						|
                break;
 | 
						|
            case 1:
 | 
						|
                dataPtr->offset[2] = axisOffset;
 | 
						|
                axisOffset += tempInput->channel();
 | 
						|
                break;
 | 
						|
            case 2:
 | 
						|
                dataPtr->offset[1] = axisOffset;
 | 
						|
                axisOffset += tempInput->height();
 | 
						|
                break;
 | 
						|
            case 3:
 | 
						|
                dataPtr->offset[0] = axisOffset;
 | 
						|
                axisOffset += tempInput->width();
 | 
						|
                break;
 | 
						|
 | 
						|
            default:
 | 
						|
                return NOT_SUPPORT;
 | 
						|
                break;
 | 
						|
        }
 | 
						|
        constBuffer->unmap();
 | 
						|
        std::shared_ptr<VulkanPipeline::DescriptorSet> desSet;
 | 
						|
        desSet.reset(bufferConcatPipeline->createSet());
 | 
						|
        desSet->writeBuffer(reinterpret_cast<VkBuffer>(mTempOutputTensor->deviceId()), 0, mTempOutputTensor->size());
 | 
						|
        desSet->writeBuffer(reinterpret_cast<VkBuffer>(tempInput->deviceId()), 1, tempInput->size());
 | 
						|
        desSet->writeBuffer(constBuffer->buffer(), 2, constBuffer->size());
 | 
						|
        bufferConcatPipeline->bind(cmdBuffer->get(), desSet->get());
 | 
						|
        mSets.push_back(desSet);
 | 
						|
        cmdBuffer->barrierSource(reinterpret_cast<VkBuffer>(tempInput->deviceId()), 0, tempInput->size());
 | 
						|
        vkCmdDispatch(cmdBuffer->get(), UP_DIV(tempInput->width(), 16), UP_DIV(tempInput->height(), 16),
 | 
						|
                      tempInput->channel() * tempInput->batch());
 | 
						|
    }
 | 
						|
    // back to image for temp-output tensor
 | 
						|
    mTensorConvert4Output->encodeBufferToTensor(reinterpret_cast<VkBuffer>(mTempOutputTensor->deviceId()), output,
 | 
						|
                                                mTempOutputTensor->size(), 0, MNN_DATA_FORMAT_NCHW, cmdBuffer);
 | 
						|
 | 
						|
    // reuse memory
 | 
						|
    mVkbackend->onReleaseBuffer(mTempOutputTensor.get(), Backend::DYNAMIC);
 | 
						|
    for (auto& item : mTempInputTensors) {
 | 
						|
        mVkbackend->onReleaseBuffer(item.get(), Backend::DYNAMIC);
 | 
						|
    }
 | 
						|
    return NO_ERROR;
 | 
						|
}
 | 
						|
 | 
						|
class VulkanConcatCreator : public VulkanBackend::Creator {
 | 
						|
public:
 | 
						|
    virtual VulkanBasicExecution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, const MNN::Op* op,
 | 
						|
                                Backend* backend) const override {
 | 
						|
        if (TensorUtils::getDescribe(outputs[0])->dimensionFormat != MNN_DATA_FORMAT_NC4HW4) {
 | 
						|
            MNN_PRINT("Vulkan Concat NOT SUPPORT for Buffer Layout Now!\n");
 | 
						|
            return nullptr;
 | 
						|
        }
 | 
						|
 | 
						|
        return new VulkanConcat(op, backend);
 | 
						|
    }
 | 
						|
};
 | 
						|
 | 
						|
static bool gResistor = []() {
 | 
						|
    VulkanBackend::addCreator(OpType_Concat, new VulkanConcatCreator);
 | 
						|
    return true;
 | 
						|
}();
 | 
						|
 | 
						|
} // namespace MNN
 |