mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			225 lines
		
	
	
		
			9.3 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			225 lines
		
	
	
		
			9.3 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| //  VulkanConvolutionWinograd.cpp
 | |
| //  MNN
 | |
| //
 | |
| //  Created by MNN on 2019/01/31.
 | |
| //  Copyright © 2018, Alibaba Group Holding Limited
 | |
| //
 | |
| 
 | |
| #include "VulkanConvolutionWinograd.hpp"
 | |
| #include <string.h>
 | |
| #include "Macro.h"
 | |
| #include "WingoradGenerater.hpp"
 | |
| #define COMPUT_SIZE 4
 | |
| #define COMPUT_SIZE2 16
 | |
| #include "VulkanConvolution.hpp"
 | |
| namespace MNN {
 | |
| struct WinogradConst {
 | |
|     ivec4 inputSize;
 | |
|     ivec4 outputSize;
 | |
|     int padX;
 | |
|     int padY;
 | |
|     int unitWidth;
 | |
|     int unitHeight;
 | |
|     int unit;
 | |
| };
 | |
| 
 | |
| bool VulkanConvolutionWinograd::support(const Convolution2DCommon* convOption) {
 | |
|     if (convOption->strideX() != 1 || convOption->strideY() != 1) {
 | |
|         return false;
 | |
|     }
 | |
|     if (convOption->dilateX() != 1 || convOption->dilateY() != 1) {
 | |
|         return false;
 | |
|     }
 | |
|     if (convOption->kernelX() != convOption->kernelY()) {
 | |
|         return false;
 | |
|     }
 | |
|     if (convOption->kernelX() != 3) {
 | |
|         // [TODO] Support other kernel size
 | |
|         return false;
 | |
|     }
 | |
|     if (convOption->kernelY() <= 1 || convOption->kernelY() >= COMPUT_SIZE) {
 | |
|         return false;
 | |
|     }
 | |
|     if (convOption->group() != 1) {
 | |
|         return false;
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| VulkanConvolutionWinograd::~VulkanConvolutionWinograd() {
 | |
| }
 | |
| 
 | |
| VulkanConvolutionWinograd::VulkanConvolutionWinograd(VulkanBackend* backend, const Convolution2DCommon* convOption,
 | |
|                                                      const float* weightPtr, const float* biasPtr, int ci, int co)
 | |
|     : VulkanBasicExecution(backend) {
 | |
|     MNN_ASSERT(support(convOption));
 | |
|     mBackend = backend;
 | |
|     mCommon  = convOption;
 | |
|     mSampler = backend->getCommonSampler();
 | |
|     mBias.reset(new VulkanImage(backend->getMemoryPool(), false, UP_DIV(co, 4), 1));
 | |
|     {
 | |
|         std::shared_ptr<VulkanBuffer> biasBuffer(
 | |
|             new VulkanBuffer(backend->getMemoryPool(), false, ALIGN_UP4(co) * sizeof(float)));
 | |
|         auto ptr = biasBuffer->map();
 | |
|         ::memset(ptr, 0, ALIGN_UP4(co) * sizeof(float));
 | |
|         ::memcpy(ptr, biasPtr, co * sizeof(float));
 | |
|         biasBuffer->unmap();
 | |
|         backend->copyBufferToImage(biasBuffer.get(), mBias.get());
 | |
|     }
 | |
|     int unit = COMPUT_SIZE - convOption->kernelY() + 1;
 | |
|     mUnit    = unit;
 | |
|     Math::WinogradGenerater generator(unit, convOption->kernelY(), 1.0f);
 | |
| 
 | |
|     mWinogradConst.reset(new VulkanBuffer(backend->getMemoryPool(), false, sizeof(WinogradConst), nullptr,
 | |
|                                           VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
 | |
|     // Create Matrix Multier
 | |
|     {
 | |
|         auto ciC4 = UP_DIV(ci, 4);
 | |
|         auto coC4 = UP_DIV(co, 4);
 | |
|         std::shared_ptr<Tensor> originWeight(Tensor::create<float>(
 | |
|             std::vector<int>{co, ci, (int)mCommon->kernelY(), (int)mCommon->kernelX()}, (void*)weightPtr, Tensor::CAFFE));
 | |
|         auto weightDest = generator.allocTransformWeight(originWeight.get());
 | |
|         generator.transformWeight(weightDest.get(), originWeight.get());
 | |
|         mMultier.reset(new VulkanMatrixMultier(backend, weightDest->host<float>(), ciC4 * 4, coC4 * 4, COMPUT_SIZE2));
 | |
|     }
 | |
| 
 | |
|     // Get transform pipeline
 | |
|     {
 | |
|         std::vector<VkDescriptorType> sourceTypes{
 | |
|             VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
 | |
|             VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
 | |
|             VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
 | |
|             VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
 | |
|         };
 | |
|         mSourceTransform = backend->getPipeline("glsl_winogradTransformSource2_3_1_comp", sourceTypes);
 | |
|         std::vector<VkDescriptorType> destTypes{
 | |
|             VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,          VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
 | |
|             VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
 | |
|             VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
 | |
|         };
 | |
|         auto macro     = VulkanConvolutionCommon::getPostTreatMacro(mCommon);
 | |
|         mDestTransform = backend->getPipeline("glsl_winogradTransformDest2_3_1_" + macro + "comp", destTypes);
 | |
|     }
 | |
| 
 | |
|     mTransformLocalSize[0] = 8;
 | |
|     mTransformLocalSize[1] = 8;
 | |
|     mTransformLocalSize[2] = 1;
 | |
| }
 | |
| 
 | |
| ErrorCode VulkanConvolutionWinograd::onEncode(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
 | |
|                                               const VulkanCommandPool::Buffer* cmdBuffer) {
 | |
|     auto src    = inputs[0];
 | |
|     auto dst    = outputs[0];
 | |
|     auto ow     = dst->width();
 | |
|     auto oh     = dst->height();
 | |
|     auto icC4   = UP_DIV(src->channel(), 4);
 | |
|     auto ocC4   = UP_DIV(dst->channel(), 4);
 | |
|     auto owUnit = UP_DIV(ow, mUnit);
 | |
|     auto ohUnit = UP_DIV(oh, mUnit);
 | |
|     int padX    = mCommon->padX();
 | |
|     int padY    = mCommon->padY();
 | |
|     if (mCommon->padMode() == PadMode_SAME) {
 | |
|         int pad_needed_width  = (dst->width() - 1) * mCommon->strideX() + mCommon->kernelX() - src->width();
 | |
|         int pad_needed_height = (dst->height() - 1) * mCommon->strideY() + mCommon->kernelY() - src->height();
 | |
| 
 | |
|         padX = pad_needed_width / 2;
 | |
|         padY = pad_needed_height / 2;
 | |
|     }
 | |
|     int maxNumber      = (mBackend->proty().limits.maxImageDimension1D * 4) / COMPUT_SIZE2;
 | |
|     int totalNumber    = owUnit * ohUnit;
 | |
|     int sliceNumber    = 1;
 | |
|     const int maxSlice = 100;
 | |
|     if (maxNumber < totalNumber) {
 | |
|         for (int i = 2; i < maxSlice; ++i) {
 | |
|             int realNumber = UP_DIV(owUnit, i) * UP_DIV(ohUnit, i);
 | |
|             if (realNumber < maxNumber) {
 | |
|                 sliceNumber = i;
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     int wPiece = UP_DIV(owUnit, sliceNumber);
 | |
|     int hPiece = UP_DIV(ohUnit, sliceNumber);
 | |
|     {
 | |
|         auto value          = (WinogradConst*)mWinogradConst->map();
 | |
|         value->inputSize[0] = src->width();
 | |
|         value->inputSize[1] = src->height();
 | |
|         value->inputSize[2] = icC4;
 | |
|         value->inputSize[3] = src->batch();
 | |
| 
 | |
|         value->outputSize[0] = dst->width();
 | |
|         value->outputSize[1] = dst->height();
 | |
|         value->outputSize[2] = ocC4;
 | |
|         value->outputSize[3] = dst->batch();
 | |
| 
 | |
|         value->padX       = padX;
 | |
|         value->padY       = padY;
 | |
|         value->unit       = mUnit;
 | |
|         value->unitHeight = hPiece;
 | |
|         value->unitWidth  = wPiece;
 | |
|         mWinogradConst->unmap();
 | |
|     }
 | |
| 
 | |
|     mMultier->prepare(wPiece * hPiece);
 | |
|     mOffsetsBuffer.resize(sliceNumber * sliceNumber);
 | |
|     mSourceTransformSet.resize(sliceNumber * sliceNumber);
 | |
|     mDestTransformSet.resize(sliceNumber * sliceNumber);
 | |
| 
 | |
|     ivec2 offsetData;
 | |
|     offsetData[0] = 0;
 | |
|     offsetData[1] = 0;
 | |
|     for (int y = 0; y < sliceNumber; ++y) {
 | |
|         int hCount = hPiece;
 | |
|         if (y == sliceNumber - 1) {
 | |
|             hCount = ohUnit - (sliceNumber - 1) * hPiece;
 | |
|         }
 | |
|         offsetData[1] = y * hPiece;
 | |
|         for (int x = 0; x < sliceNumber; ++x) {
 | |
|             int wCount = wPiece;
 | |
|             if (x == sliceNumber - 1) {
 | |
|                 wCount = owUnit - (sliceNumber - 1) * wPiece;
 | |
|             }
 | |
|             offsetData[0] = x * wPiece;
 | |
|             int i         = y * sliceNumber + x;
 | |
|             mOffsetsBuffer[i].reset(new VulkanBuffer(mBackend->getMemoryPool(), false, sizeof(offsetData), offsetData,
 | |
|                                                      VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
 | |
|             mSourceTransformSet[i].reset(mSourceTransform->createSet());
 | |
|             mDestTransformSet[i].reset(mDestTransform->createSet());
 | |
|             if (true) {
 | |
|                 auto sourceImage = mMultier->source();
 | |
|                 mSourceTransformSet[i]->writeImage(sourceImage->view(), mSampler->get(), VK_IMAGE_LAYOUT_GENERAL, 0);
 | |
|                 mSourceTransformSet[i]->writeImage((VkImageView)src->deviceId(), mSampler->get(),
 | |
|                                                    VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 1);
 | |
|                 mSourceTransformSet[i]->writeBuffer(mWinogradConst->buffer(), 2, mWinogradConst->size());
 | |
|                 mSourceTransformSet[i]->writeBuffer(mOffsetsBuffer[i]->buffer(), 3, mOffsetsBuffer[i]->size());
 | |
|                 mSourceTransform->bind(cmdBuffer->get(), mSourceTransformSet[i]->get());
 | |
|                 vkCmdDispatch(cmdBuffer->get(), UP_DIV(wCount, mTransformLocalSize[0]),
 | |
|                               UP_DIV(hCount, mTransformLocalSize[1]), UP_DIV(icC4, mTransformLocalSize[2]));
 | |
|             }
 | |
| 
 | |
|             mMultier->compute(cmdBuffer);
 | |
|             if (true) {
 | |
|                 auto destImage = mMultier->dest();
 | |
|                 mDestTransformSet[i]->writeImage((VkImageView)dst->deviceId(), mSampler->get(), VK_IMAGE_LAYOUT_GENERAL,
 | |
|                                                  0);
 | |
|                 mDestTransformSet[i]->writeImage(destImage->view(), mSampler->get(),
 | |
|                                                  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 1);
 | |
|                 mDestTransformSet[i]->writeImage(mBias->view(), mSampler->get(),
 | |
|                                                  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 2);
 | |
|                 mDestTransformSet[i]->writeBuffer(mWinogradConst->buffer(), 3, mWinogradConst->size());
 | |
|                 mDestTransformSet[i]->writeBuffer(mOffsetsBuffer[i]->buffer(), 4, mOffsetsBuffer[i]->size());
 | |
|                 mDestTransform->bind(cmdBuffer->get(), mDestTransformSet[i]->get());
 | |
|                 cmdBuffer->barrierImage(destImage->get(), VK_IMAGE_LAYOUT_GENERAL,
 | |
|                                         VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
 | |
|                 vkCmdDispatch(cmdBuffer->get(), UP_DIV(wCount, mTransformLocalSize[0]),
 | |
|                               UP_DIV(hCount, mTransformLocalSize[1]), UP_DIV(ocC4, mTransformLocalSize[2]));
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| } // namespace MNN
 |