mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			162 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			162 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| //  GLConvolutionDepthwise.cpp
 | |
| //  MNN
 | |
| //
 | |
| //  Created by MNN on 2019/01/31.
 | |
| //  Copyright © 2018, Alibaba Group Holding Limited
 | |
| //
 | |
| 
 | |
| #include "backend/opengl/GLConvolutionDepthwise.hpp"
 | |
| #include <MNN/AutoTime.hpp>
 | |
| 
 | |
| #include <sstream>
 | |
| #include "AllShader.hpp"
 | |
| #include "backend/opengl/GLBackend.hpp"
 | |
| #include "core/Macro.h"
 | |
| namespace MNN {
 | |
| namespace OpenGL {
 | |
| 
 | |
| static const int gXLocal = 8;
 | |
| static const int gYLocal = 8;
 | |
| static const int gZLocal = 1;
 | |
| 
 | |
| GLConvolutionDepthwise::~GLConvolutionDepthwise() {
 | |
| }
 | |
| 
 | |
| GLConvolutionDepthwise::GLConvolutionDepthwise(const std::vector<Tensor *> &inputs, const Op *convOp, Backend *bn) : GPUConvolution(convOp, bn) {
 | |
|     auto extra = (GLBackend *)bn;
 | |
| 
 | |
|     mBiasBuffer.reset(new GLSSBOBuffer(sizeof(float) * ALIGN_UP4(mCommon->outputCount())));
 | |
|     int fw           = mCommon->kernelX();
 | |
|     int fh           = mCommon->kernelY();
 | |
|     int unit         = 4;
 | |
|     int srcDepthQuad = UP_DIV(mInputDepth, unit);
 | |
| 
 | |
|     auto kernelBuffer = std::shared_ptr<GLSSBOBuffer>(new GLSSBOBuffer(sizeof(float) * fw * fh * srcDepthQuad * 4));
 | |
|     auto weight       = kernelBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
 | |
|     if(weight != nullptr){
 | |
|         ::memset(weight, 0, fw * fh * srcDepthQuad * 4 * sizeof(float));
 | |
|         ::memcpy(weight, convOp->main_as_Convolution2D()->weight()->data(),
 | |
|                  convOp->main_as_Convolution2D()->weight()->size() * sizeof(float));
 | |
|     }
 | |
| 
 | |
|     kernelBuffer->unmap();
 | |
| 
 | |
|     auto bias = mBiasBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
 | |
|     if(bias != nullptr){
 | |
|         ::memset(bias, 0, ALIGN_UP4(mCommon->outputCount()) * sizeof(float));
 | |
|         ::memcpy(bias, convOp->main_as_Convolution2D()->bias()->data(),
 | |
|                  convOp->main_as_Convolution2D()->bias()->size() * sizeof(float));
 | |
|     }
 | |
|     mBiasBuffer->unmap();
 | |
| 
 | |
|     std::vector<std::string> prefix;
 | |
|     if (mCommon->relu()) {
 | |
|         prefix.push_back("#define RELU");
 | |
|     }
 | |
|     if (mCommon->relu6()) {
 | |
|         prefix.push_back("#define RELU6");
 | |
|     }
 | |
| 
 | |
|     {
 | |
|         std::ostringstream os;
 | |
|         os << "#define XLOCAL " << gXLocal;
 | |
|         prefix.push_back(os.str());
 | |
|     }
 | |
|     {
 | |
|         std::ostringstream os;
 | |
|         os << "#define YLOCAL " << gYLocal;
 | |
|         prefix.push_back(os.str());
 | |
|     }
 | |
|     {
 | |
|         std::ostringstream os;
 | |
|         os << "#define ZLOCAL " << gZLocal;
 | |
|         prefix.push_back(os.str());
 | |
|     }
 | |
| 
 | |
|     mProgram       = extra->getProgram("convolution_depthwise", glsl_convlutionDepthwise_glsl, prefix);
 | |
|     mKernelTexture = std::shared_ptr<GLTexture>(new GLTexture(srcDepthQuad, fw, fh, ((GLBackend *)backend())->getTextrueFormat(), GL_TEXTURE_3D, false));
 | |
| 
 | |
|     auto transform = extra->getProgram("transform_kernel_image_depthwise", glsl_kernel2ImageDepthwise_glsl);
 | |
|     transform->useProgram();
 | |
|     glBindImageTexture(0, mKernelTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
 | |
|     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, kernelBuffer->getId());
 | |
|     OPENGL_CHECK_ERROR;
 | |
|     glUniform1i(3, fw);
 | |
|     glUniform1i(4, fh);
 | |
|     OPENGL_CHECK_ERROR;
 | |
| 
 | |
|     ((GLBackend *)backend())->compute(srcDepthQuad, fw, fh);
 | |
|     OPENGL_CHECK_ERROR;
 | |
| 
 | |
| }
 | |
| 
 | |
| ErrorCode GLConvolutionDepthwise::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
 | |
|     GPUConvolution::onResize(inputs, outputs);
 | |
|     int kx      = mCommon->kernelX();
 | |
|     int ky      = mCommon->kernelY();
 | |
|     int sx      = mCommon->strideX();
 | |
|     int sy      = mCommon->strideY();
 | |
|     int dx      = mCommon->dilateX();
 | |
|     int dy      = mCommon->dilateY();
 | |
|     mSetUniform = [=]() {
 | |
|         glUniform2i(4, mPadX, mPadY);
 | |
|         glUniform2i(5, kx, ky);
 | |
|         glUniform2i(6, sx, sy);
 | |
|         glUniform2i(7, dx, dy);
 | |
|     };
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| ErrorCode GLConvolutionDepthwise::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
 | |
|     {
 | |
|         auto convLayer = mCommon;
 | |
| 
 | |
|         auto input         = inputs[0];
 | |
|         auto output        = outputs[0];
 | |
|         auto inputTexture  = input->deviceId();
 | |
|         auto outputTexture = output->deviceId();
 | |
|         int dst_depth_quad = UP_DIV(output->channel(), 4);
 | |
| 
 | |
|         mProgram->useProgram();
 | |
|         glBindImageTexture(0, outputTexture, 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
 | |
|         OPENGL_CHECK_ERROR;
 | |
|         {
 | |
|             int texId = 0;
 | |
|             glActiveTexture(GL_TEXTURE0 + texId);
 | |
|             glUniform1i(1, texId);
 | |
|             glBindTexture(GL_TEXTURE_3D, inputTexture);
 | |
|             OPENGL_CHECK_ERROR;
 | |
|         }
 | |
|         {
 | |
|             int texId = 1;
 | |
|             glActiveTexture(GL_TEXTURE0 + texId);
 | |
|             OPENGL_CHECK_ERROR;
 | |
|             glUniform1i(2, texId);
 | |
| 
 | |
|             OPENGL_CHECK_ERROR;
 | |
|             glBindTexture(GL_TEXTURE_3D, mKernelTexture->id());
 | |
|             OPENGL_CHECK_ERROR;
 | |
|         }
 | |
|         glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, mBiasBuffer->getId());
 | |
| 
 | |
|         OPENGL_CHECK_ERROR;
 | |
|         mSetUniform();
 | |
| 
 | |
|         glUniform3i(10, output->width(), output->height(), UP_DIV(output->channel(), 4));
 | |
|         glUniform3i(11, input->width(), input->height(), UP_DIV(input->channel(), 4));
 | |
| 
 | |
|         OPENGL_CHECK_ERROR;
 | |
| 
 | |
|         ((GLBackend *)backend())->compute(UP_DIV(output->width(), (gXLocal)), UP_DIV(output->height(), gYLocal),
 | |
|                           UP_DIV(dst_depth_quad, gZLocal));
 | |
|         OPENGL_CHECK_ERROR;
 | |
| 
 | |
|     }
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| GLCreatorRegister<TypedCreator<GLConvolutionDepthwise>> __depthwise_conv_op(OpType_ConvolutionDepthwise);
 | |
| } // namespace OpenGL
 | |
| } // namespace MNN
 |