mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			111 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			111 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| //  VulkanDeconvolutionDepthwise.cpp
 | |
| //  MNN
 | |
| //
 | |
| //  Created by MNN on 2019/01/31.
 | |
| //  Copyright © 2018, Alibaba Group Holding Limited
 | |
| //
 | |
| 
 | |
| #include "VulkanDeconvolutionDepthwise.hpp"
 | |
| #include "Macro.h"
 | |
| namespace MNN {
 | |
| VulkanDeconvolutionDepthwise::VulkanDeconvolutionDepthwise(Backend* bn, const Convolution2D* conv)
 | |
|     : VulkanBasicExecution(bn) {
 | |
|     mConvCommonOption = conv->common();
 | |
|     auto vkBn         = (VulkanBackend*)bn;
 | |
|     int outputC4      = UP_DIV(mConvCommonOption->outputCount(), 4);
 | |
|     mBias             = std::make_shared<VulkanImage>(vkBn->getMemoryPool(), false, std::vector<int>{outputC4, 1});
 | |
|     {
 | |
|         auto biasBuffer = std::make_shared<VulkanBuffer>(vkBn->getMemoryPool(), false, outputC4 * 4 * sizeof(float));
 | |
|         auto biasPtr    = biasBuffer->map();
 | |
|         ::memset(biasPtr, 0, outputC4 * 4 * sizeof(float));
 | |
|         ::memcpy(biasPtr, conv->bias()->data(), conv->bias()->size() * sizeof(float));
 | |
|         biasBuffer->unmap();
 | |
|         vkBn->copyBufferToImage(biasBuffer.get(), mBias.get());
 | |
|     }
 | |
|     mConvParam = std::make_shared<VulkanBuffer>(vkBn->getMemoryPool(), false,
 | |
|                                                 sizeof(VulkanConvolutionCommon::ConvolutionParameter), nullptr,
 | |
|                                                 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
 | |
|     int kh     = mConvCommonOption->kernelY();
 | |
|     int kw     = mConvCommonOption->kernelX();
 | |
|     int co     = mConvCommonOption->outputCount();
 | |
|     int coC4   = UP_DIV(co, 4);
 | |
|     mKernel    = std::make_shared<VulkanImage>(vkBn->getMemoryPool(), false, std::vector<int>{kw * kh, coC4});
 | |
| 
 | |
|     const int alignedWeightSize = kh * kw * ALIGN_UP4(co);
 | |
|     auto tempWeightBuffer =
 | |
|         std::make_shared<VulkanBuffer>(vkBn->getMemoryPool(), false, alignedWeightSize * sizeof(float));
 | |
|     auto tempReorderWeight = (float*)tempWeightBuffer->map();
 | |
|     ::memset(tempReorderWeight, 0, alignedWeightSize * sizeof(float));
 | |
|     auto tempWeight = conv->weight()->data();
 | |
|     for (int b = 0; b < co; ++b) {
 | |
|         int b_4      = b / 4;
 | |
|         float* dst_b = tempReorderWeight + b_4 * 4 * kw * kh;
 | |
|         int mx       = b % 4;
 | |
|         for (int y = 0; y < kh; ++y) {
 | |
|             float* dst_y = dst_b + y * kw * 4;
 | |
|             for (int x = 0; x < kw; ++x) {
 | |
|                 float* dst_x = dst_y + x * 4;
 | |
|                 dst_x[mx]    = tempWeight[x + y * kw + b * kw * kh];
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     tempWeightBuffer->unmap();
 | |
| 
 | |
|     vkBn->copyBufferToImage(tempWeightBuffer.get(), mKernel.get());
 | |
|     mSampler = vkBn->getCommonSampler();
 | |
| 
 | |
|     std::vector<VkDescriptorType> types{
 | |
|         VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,          VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
 | |
|         VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
 | |
|         VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
 | |
|     };
 | |
|     std::string macro = VulkanConvolutionCommon::getPostTreatMacro(mConvCommonOption);
 | |
| 
 | |
|     mPipeline = vkBn->getPipeline("glsl_deconvolutionDepthwise_" + macro + "comp", types);
 | |
|     mPipelineSet.reset(mPipeline->createSet());
 | |
| 
 | |
|     mLocalSize[0] = 8;
 | |
|     mLocalSize[1] = 8;
 | |
|     mLocalSize[2] = 1;
 | |
| }
 | |
| 
 | |
| ErrorCode VulkanDeconvolutionDepthwise::onEncode(const std::vector<Tensor*>& inputs,
 | |
|                                                  const std::vector<Tensor*>& outputs,
 | |
|                                                  const VulkanCommandPool::Buffer* cmdBuffer) {
 | |
|     auto src         = inputs[0];
 | |
|     auto dst         = outputs[0];
 | |
|     const int ocDiv4 = UP_DIV(dst->channel(), 4);
 | |
|     auto common      = mConvCommonOption;
 | |
|     {
 | |
|         auto convCons = reinterpret_cast<VulkanConvolutionCommon::ConvolutionParameter*>(mConvParam->map());
 | |
|         VulkanDeconvolution::writeConvolutionConst(convCons, common, src, dst);
 | |
|         mConvParam->unmap();
 | |
|     }
 | |
|     mPipelineSet->writeImage((VkImageView)dst->deviceId(), mSampler->get(), VK_IMAGE_LAYOUT_GENERAL, 0);
 | |
|     mPipelineSet->writeImage((VkImageView)src->deviceId(), mSampler->get(), VK_IMAGE_LAYOUT_GENERAL, 1);
 | |
|     mPipelineSet->writeImage(mKernel->view(), mSampler->get(), VK_IMAGE_LAYOUT_GENERAL, 2);
 | |
|     mPipelineSet->writeImage(mBias->view(), mSampler->get(), VK_IMAGE_LAYOUT_GENERAL, 3);
 | |
|     mPipelineSet->writeBuffer(mConvParam->buffer(), 4, mConvParam->size());
 | |
|     mPipeline->bind(cmdBuffer->get(), mPipelineSet->get());
 | |
| 
 | |
|     vkCmdDispatch(cmdBuffer->get(), UP_DIV(dst->width(), mLocalSize[0]), UP_DIV(dst->height(), mLocalSize[1]),
 | |
|                   UP_DIV(ocDiv4, mLocalSize[2]));
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| class VulkanDeconvolutionDepthwiseCreator : public VulkanBackend::Creator {
 | |
| public:
 | |
|     virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const MNN::Op* op,
 | |
|                                 Backend* backend) const override {
 | |
|         return new VulkanDeconvolutionDepthwise(backend, op->main_as_Convolution2D());
 | |
|     }
 | |
| };
 | |
| 
 | |
| static bool gResistor = []() {
 | |
|     VulkanBackend::addCreator(OpType_DeconvolutionDepthwise, new VulkanDeconvolutionDepthwiseCreator);
 | |
|     return true;
 | |
| }();
 | |
| } // namespace MNN
 |