mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			240 lines
		
	
	
		
			8.8 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			240 lines
		
	
	
		
			8.8 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| //  GLConvolution.cpp
 | |
| //  MNN
 | |
| //
 | |
| //  Created by MNN on 2019/01/31.
 | |
| //  Copyright © 2018, Alibaba Group Holding Limited
 | |
| //
 | |
| 
 | |
| #include "backend/opengl/GLConvolution.hpp"
 | |
| #include <MNN/AutoTime.hpp>
 | |
| 
 | |
| #include <sstream>
 | |
| #include "AllShader.hpp"
 | |
| #include "core/Macro.h"
 | |
| #include "backend/opengl/GLConvolutionIm2col.hpp"
 | |
| namespace MNN {
 | |
| namespace OpenGL {
 | |
| 
 | |
| #define UNIT 4
 | |
| 
 | |
| GPUConvolution::GPUConvolution(const Op *convOp, Backend *b) : MNN::Execution(b) {
 | |
|     mCommon          = convOp->main_as_Convolution2D()->common();
 | |
|     auto convReal    = convOp->main_as_Convolution2D();
 | |
|     auto outputCount = mCommon->outputCount();
 | |
|     mInputDepth        = 0;
 | |
| 
 | |
|     if (convReal->weight() != NULL) {
 | |
|         auto weightSize = convReal->weight()->size();
 | |
|         mInputDepth       = weightSize * mCommon->group() / mCommon->kernelX() / mCommon->kernelY() / outputCount;
 | |
|     }
 | |
| }
 | |
| GPUConvolution::~GPUConvolution() {
 | |
| }
 | |
| 
 | |
| ErrorCode GPUConvolution::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
 | |
|     auto input  = inputs[0];
 | |
|     auto output = outputs[0];
 | |
|     if (mCommon->padMode() == PadMode_SAME) {
 | |
|         int kernelWidthSize = (mCommon->kernelX() - 1) * mCommon->dilateX() + 1;
 | |
|         int kernelHeightSize = (mCommon->kernelY() - 1) * mCommon->dilateY() + 1;
 | |
|         int pad_needed_width  = (output->width() - 1) * mCommon->strideX() + kernelWidthSize - input->width();
 | |
|         int pad_needed_height = (output->height() - 1) * mCommon->strideY() + kernelHeightSize - input->height();
 | |
| 
 | |
|         mPadX = (pad_needed_width > 0 ?  pad_needed_width : 0) / 2;
 | |
|         mPadY = (pad_needed_height > 0 ?  pad_needed_height : 0) / 2;
 | |
|         return NO_ERROR;
 | |
|     }
 | |
|     mPadX = mCommon->padX();
 | |
|     mPadY = mCommon->padY();
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| GLConvolution::~GLConvolution() {
 | |
| }
 | |
| 
 | |
| GLConvolution::GLConvolution(const std::vector<Tensor *> &inputs, const Op *convOp, Backend *bn) : GPUConvolution(convOp, bn) {
 | |
|     auto totalWeightSize =
 | |
|         ALIGN_UP4(mCommon->outputCount()) * ALIGN_UP4(mInputDepth) * (mCommon->kernelY() * mCommon->kernelX());
 | |
|     auto extra = (GLBackend *)bn;
 | |
| 
 | |
|     mBiasBuffer.reset(new GLSSBOBuffer(sizeof(float) * ALIGN_UP4(mCommon->outputCount())));
 | |
|     float* bias = (float*)(mBiasBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
 | |
|     if(bias != nullptr){
 | |
|         ::memset(bias, 0, ALIGN_UP4(mCommon->outputCount()) * sizeof(float));
 | |
|         ::memcpy(bias, convOp->main_as_Convolution2D()->bias()->data(),
 | |
|                  convOp->main_as_Convolution2D()->bias()->size() * sizeof(float));
 | |
|     }
 | |
|     mBiasBuffer->unmap();
 | |
| 
 | |
|     auto mKernelBuffer = std::shared_ptr<GLSSBOBuffer>(new GLSSBOBuffer(sizeof(float) * totalWeightSize));
 | |
|     int fw                = mCommon->kernelX();
 | |
|     int fh                = mCommon->kernelY();
 | |
|     int unit              = 4;
 | |
|     int unit2             = unit * unit;
 | |
|     int alignedWeightSize = UP_DIV(mInputDepth, unit) * fw * fh * unit2;
 | |
|     int oc_4         = UP_DIV(mCommon->outputCount(), unit);
 | |
| 
 | |
|     float *dest           = (float *)mKernelBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
 | |
|     if(dest != nullptr){
 | |
|         ::memset(dest, 0, alignedWeightSize * sizeof(float));
 | |
|         const float *source = convOp->main_as_Convolution2D()->weight()->data();
 | |
|         int cur             = 0;
 | |
| 
 | |
|         //weight : oc ic h w -> oc/4, ic/4 ky kx ic4 oc4
 | |
|         for (int b = 0; b < mCommon->outputCount(); ++b) {
 | |
|             int b_4      = b / unit;
 | |
|             float *dst_b = dest + b_4 * alignedWeightSize;
 | |
|             int mx       = b % unit;
 | |
|             for (int d = 0; d < mInputDepth; ++d) {
 | |
|                 int my       = d % unit;
 | |
|                 int d_4      = d / unit;
 | |
|                 float *dst_d = dst_b + d_4 * fw * fh * unit2;
 | |
|                 for (int y = 0; y < fh; ++y) {
 | |
|                     float *dst_y = dst_d + y * fw * unit2;
 | |
|                     for (int x = 0; x < fw; ++x) {
 | |
|                         float *dst_x          = dst_y + x * unit2;
 | |
|                         dst_x[unit * my + mx] = source[cur++];
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     mKernelBuffer->unmap();
 | |
| 
 | |
|     int ic_4      = UP_DIV(mInputDepth, unit);
 | |
|     //weight image : ky kx, oc/4, ic/4*ic4 oc4
 | |
|     mKernelTexture =
 | |
|     std::shared_ptr<GLTexture>(new GLTexture(ic_4 * unit, oc_4, fw * fh, ((GLBackend *)backend())->getTextrueFormat() , GL_TEXTURE_3D, false));
 | |
| 
 | |
|     auto transform = extra->getProgram("transform_kernel_image_adreno", glsl_kernel2image_adreno_glsl);
 | |
|     transform->useProgram();
 | |
|     glBindImageTexture(0, mKernelTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
 | |
|     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, mKernelBuffer->getId());
 | |
|     OPENGL_CHECK_ERROR;
 | |
|     glUniform1i(3, fw * fh);
 | |
|     glUniform1i(4, ic_4);
 | |
|     OPENGL_CHECK_ERROR;
 | |
| 
 | |
|     ((GLBackend *)backend())->compute(ic_4, oc_4, fw * fh);
 | |
|     OPENGL_CHECK_ERROR;
 | |
| }
 | |
| 
 | |
| ErrorCode GLConvolution::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
 | |
|     GPUConvolution::onResize(inputs, outputs);
 | |
|     auto extra = (GLBackend *)backend();
 | |
|     std::vector<std::string> prefix;
 | |
|     if (mCommon->relu()) {
 | |
|         prefix.push_back("#define RELU");
 | |
|     }
 | |
|     if (mCommon->relu6()) {
 | |
|         prefix.push_back("#define RELU6");
 | |
|     }
 | |
| 
 | |
|     auto dstDepthQuad = UP_DIV(outputs[0]->channel(), 4);
 | |
| 
 | |
|     setLocalSize(prefix, mLocalSize, 1, 1, dstDepthQuad);
 | |
| 
 | |
|     if (1 == mCommon->kernelY() && 1 == mCommon->kernelX() && 1 == mCommon->strideY() && 1 == mCommon->strideX() &&
 | |
|         0 == mCommon->padX() && 0 == mCommon->padY()) {
 | |
|         mIs1x1      = true;
 | |
|     }
 | |
| 
 | |
|     if (mIs1x1) {
 | |
|         mProgram = extra->getProgram("convolution1x1", glsl_convolution1x1_glsl, prefix);
 | |
|     } else {
 | |
|         mKx      = mCommon->kernelX();
 | |
|         mKy      = mCommon->kernelY();
 | |
|         mSx      = mCommon->strideX();
 | |
|         mSy      = mCommon->strideY();
 | |
|         mDx      = mCommon->dilateX();
 | |
|         mDy      = mCommon->dilateY();
 | |
|         mProgram = extra->getProgram("convolution", glsl_convolution_glsl, prefix);
 | |
|     }
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| 
 | |
| ErrorCode GLConvolution::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
 | |
|     {
 | |
|         auto convLayer = mCommon;
 | |
| 
 | |
|         auto input         = inputs[0];
 | |
|         auto output        = outputs[0];
 | |
|         auto inputTexture  = input->deviceId();
 | |
|         auto outputTexture = output->deviceId();
 | |
|         int oc_4 = UP_DIV(output->channel(), 4);
 | |
| 
 | |
|         mProgram->useProgram();
 | |
|         glBindImageTexture(0, outputTexture, 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
 | |
|         {
 | |
|             int texId = 0;
 | |
|             glActiveTexture(GL_TEXTURE0 + texId);
 | |
|             glUniform1i(1, texId);
 | |
|             glBindTexture(GL_TEXTURE_3D, inputTexture);
 | |
|             OPENGL_CHECK_ERROR;
 | |
|         }
 | |
|         {
 | |
|             int texId = 1;
 | |
|             glActiveTexture(GL_TEXTURE0 + texId);
 | |
|             OPENGL_CHECK_ERROR;
 | |
|             glUniform1i(2, texId);
 | |
|             OPENGL_CHECK_ERROR;
 | |
|             glBindTexture(GL_TEXTURE_3D, mKernelTexture->id());
 | |
|             OPENGL_CHECK_ERROR;
 | |
|         }
 | |
|         glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, mBiasBuffer->getId());
 | |
| 
 | |
|         if(!mIs1x1){
 | |
|             glUniform2i(4, mPadX, mPadY);
 | |
|             glUniform2i(5, mKx, mKy);
 | |
|             glUniform2i(6, mSx, mSy);
 | |
|             glUniform2i(7, mDx, mDy);
 | |
|         }
 | |
|         OPENGL_CHECK_ERROR;
 | |
|         glUniform3i(10, output->width(), output->height(), UP_DIV(output->channel(), 4));
 | |
|         glUniform3i(11, input->width(), input->height(), UP_DIV(input->channel(), 4));
 | |
| 
 | |
|         glUniform1i(8, UNIT);
 | |
|         OPENGL_CHECK_ERROR;
 | |
| 
 | |
|         ((GLBackend *)backend())->compute(UP_DIV(output->width(), UNIT*mLocalSize[0]), UP_DIV(output->height(), mLocalSize[1]),
 | |
|                                                 UP_DIV(oc_4, mLocalSize[2]));
 | |
| 
 | |
|         OPENGL_CHECK_ERROR;
 | |
|     }
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| 
 | |
| class ConvolutionCreator : public GLBackend::Creator {
 | |
| public:
 | |
|     virtual ~ConvolutionCreator() = default;
 | |
|     virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
 | |
|                                 const MNN::Op *op, Backend *backend) const override {
 | |
|         auto common = op->main_as_Convolution2D()->common();
 | |
| 
 | |
|         //TODO: bugfix
 | |
|         if(common->padX() == 1 || common->strideX() != 1){
 | |
|             return new GLConvolution(inputs, op, backend);
 | |
|         }
 | |
|         if(((GLBackend *)backend)->gpuType() == GLBackend::ADRENO){
 | |
|             if(((GLBackend *)backend)->glVersion() >= 269){
 | |
|                 return new GLConvolution(inputs, op, backend);
 | |
|             }else{
 | |
|                 return new GLConvolutionIm2col(inputs, op, backend);
 | |
|             }
 | |
|         }else{
 | |
|             return new GLConvolutionIm2col(inputs, op, backend);
 | |
|         }
 | |
|     }
 | |
| };
 | |
| 
 | |
| GLCreatorRegister<ConvolutionCreator> __gl_conv_op(OpType_Convolution);
 | |
| } // namespace OpenGL
 | |
| } // namespace MNN
 |