mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			250 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			250 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| //  GLConvolution.cpp
 | |
| //  MNN
 | |
| //
 | |
| //  Created by MNN on 2019/01/31.
 | |
| //  Copyright © 2018, Alibaba Group Holding Limited
 | |
| //
 | |
| 
 | |
| #include "backend/opengl/GLConvolution.hpp"
 | |
| #include <MNN/AutoTime.hpp>
 | |
| 
 | |
| #include <sstream>
 | |
| #include "AllShader.hpp"
 | |
| #include "backend/opengl/GLBackend.hpp"
 | |
| #include "core/Macro.h"
 | |
| #include "backend/opengl/GLConvolutionIm2col.hpp"
 | |
| #include "backend/opengl/GLUtils.hpp"
 | |
| namespace MNN {
 | |
| namespace OpenGL {
 | |
| 
 | |
| GLConvolutionIm2col::~GLConvolutionIm2col() {
 | |
| }
 | |
| 
 | |
| #define UNIT 4
 | |
| #define UNIT2 16
 | |
| GLConvolutionIm2col::GLConvolutionIm2col(const std::vector<Tensor *> &inputs, const Op *convOp, Backend *bn) : GPUConvolution(convOp, bn) {
 | |
|     auto totalWeightSize = ALIGN_UP4(mCommon->outputCount()) * ALIGN_UP4(mInputDepth) * (mCommon->kernelY() * mCommon->kernelX());
 | |
|     mGLBackend = (GLBackend *)bn;
 | |
|     auto mKernelBuffer = std::shared_ptr<GLSSBOBuffer>(new GLSSBOBuffer(sizeof(float) * totalWeightSize));
 | |
|     int fw                = mCommon->kernelX();
 | |
|     int fh                = mCommon->kernelY();
 | |
|     mIsConv1x1 = (fw == 1 && fh == 1) ? true : false;
 | |
|     int oc_4         = UP_DIV(mCommon->outputCount(), UNIT);
 | |
|     int ic_4      = UP_DIV(mInputDepth, UNIT);
 | |
|     float *dest           = (float *)mKernelBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
 | |
|     if(NULL != dest){
 | |
|         ::memset(dest, 0, totalWeightSize * sizeof(float));
 | |
|         const float *source = convOp->main_as_Convolution2D()->weight()->data();
 | |
|         int cur             = 0;
 | |
| 
 | |
|         //weight : oc ic -> oc/4 ic/4 ic4 oc4
 | |
|         //weight image : oc_4, ic_4 * ic4 oc4
 | |
|         int alignedWeightSize = ic_4 * fw * fh * UNIT2;
 | |
|         for (int b = 0; b < mCommon->outputCount(); ++b) {
 | |
|             int b_4      = b / UNIT;
 | |
|             float *dst_b = dest + b_4 * alignedWeightSize;
 | |
|             int mx       = b % UNIT;
 | |
|             for (int d = 0; d < mInputDepth; ++d) {
 | |
|                 int my       = d % UNIT;
 | |
|                 int d_4      = d / UNIT;
 | |
|                 float *dst_d = dst_b + d_4 * fw * fh * UNIT2;
 | |
|                 for (int y = 0; y < fh; ++y) {
 | |
|                     float *dst_y = dst_d + y * fw * UNIT2;
 | |
|                     for (int x = 0; x < fw; ++x) {
 | |
|                         float *dst_x          = dst_y + x * UNIT2;
 | |
|                         dst_x[UNIT * my + mx] = source[cur++];
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }else{
 | |
|         MNN_ASSERT(NULL != dest);
 | |
|     }
 | |
| 
 | |
|     mKernelBuffer->unmap();
 | |
| 
 | |
|     mKernelTexture = std::shared_ptr<GLTexture>(new GLTexture(ic_4 * UNIT*fw*fh, oc_4, 1, ((GLBackend *)backend())->getTextrueFormat(), GL_TEXTURE_2D, false));
 | |
|     auto transform = mGLBackend->getProgram("transform_kernel_image", glsl_kernel2image_glsl);
 | |
|     int imageWidth = ROUND_UP(mInputDepth, 4)*fw*fh;
 | |
|     int imageHeight = oc_4;
 | |
|     transform->useProgram();
 | |
|     glBindImageTexture(0, mKernelTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
 | |
|     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, mKernelBuffer->getId());
 | |
|     OPENGL_CHECK_ERROR;
 | |
|     glUniform1i(3, imageWidth);
 | |
|     OPENGL_CHECK_ERROR;
 | |
|     glUniform1i(4, imageHeight);
 | |
|     OPENGL_CHECK_ERROR;
 | |
|     ((GLBackend *)backend())->compute(UP_DIV(imageWidth, 4), UP_DIV(oc_4, 4), 1);
 | |
|     OPENGL_CHECK_ERROR;
 | |
| 
 | |
| //bias
 | |
|     mBiasBuffer.reset(new GLSSBOBuffer(sizeof(float) * ALIGN_UP4(mCommon->outputCount())));
 | |
|     float* bias = (float*)(mBiasBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
 | |
|     if(bias != nullptr){
 | |
|         ::memset(bias, 0, ALIGN_UP4(mCommon->outputCount()) * sizeof(float));
 | |
|         ::memcpy(bias, convOp->main_as_Convolution2D()->bias()->data(),
 | |
|                  convOp->main_as_Convolution2D()->bias()->size() * sizeof(float));
 | |
|     }
 | |
|     mBiasBuffer->unmap();
 | |
| }
 | |
| 
 | |
| ErrorCode GLConvolutionIm2col::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
 | |
|     GPUConvolution::onResize(inputs, outputs);
 | |
|     std::vector<std::string> im2colPrefix;
 | |
|     std::vector<std::string> gemmPrefix;
 | |
|     std::vector<std::string> col2imPrefix;
 | |
| 
 | |
|     if (mCommon->relu()) {
 | |
|         im2colPrefix.push_back("#define RELU");
 | |
|         gemmPrefix.push_back("#define RELU");
 | |
|         col2imPrefix.push_back("#define RELU");
 | |
|     }
 | |
|     if (mCommon->relu6()) {
 | |
|         im2colPrefix.push_back("#define RELU6");
 | |
|         gemmPrefix.push_back("#define RELU6");
 | |
|         col2imPrefix.push_back("#define RELU6");
 | |
|     }
 | |
| 
 | |
|     int ob = outputs[0]->batch();
 | |
|     int oc = outputs[0]->channel();
 | |
|     int oh = outputs[0]->height();
 | |
|     int ow = outputs[0]->width();
 | |
| 
 | |
|     int ic = inputs[0]->channel();
 | |
| 
 | |
|     obxohxow_4  = UP_DIV(ob*oh*ow, 4);
 | |
| 
 | |
|     int fw                = mCommon->kernelX();
 | |
|     int fh                = mCommon->kernelY();
 | |
| 
 | |
|     //input : temp image : (ib*oh*ow)/ 4, ic/4*(ib*oh*ow)%4*ic4
 | |
|     //output : temp image : oc/4 * (ob*oh*ow)%4, (ob*oh*ow)/4 * oc4
 | |
|     mSrcTexture = std::shared_ptr<GLTexture>(new GLTexture(UP_DIV(ic, 4)*UNIT*fw*fh, obxohxow_4, 1, ((GLBackend *)backend())->getTextrueFormat(), GL_TEXTURE_2D, false));
 | |
|     mDstTexture = std::shared_ptr<GLTexture>(new GLTexture(obxohxow_4, UP_DIV(oc, 4) * UNIT, 1, ((GLBackend *)backend())->getTextrueFormat(), GL_TEXTURE_2D, false));
 | |
| 
 | |
|     auto transform = mGLBackend->getProgram("clear_texture", glsl_clear_texture_glsl);
 | |
|     transform->useProgram();
 | |
|     glBindImageTexture(0, mSrcTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
 | |
|     OPENGL_CHECK_ERROR;
 | |
|     glUniform1i(1, UP_DIV(ic, 4)*UNIT*fw*fh);
 | |
|     OPENGL_CHECK_ERROR;
 | |
|     glUniform1i(2, obxohxow_4);
 | |
|     OPENGL_CHECK_ERROR;
 | |
|     ((GLBackend *)backend())->compute(UP_DIV(UP_DIV(ic, 4)*UNIT*fw*fh, 4), UP_DIV(obxohxow_4, 4), 1);
 | |
|     OPENGL_CHECK_ERROR;
 | |
| 
 | |
|     if (true == mIsConv1x1) {
 | |
|         setLocalSize(im2colPrefix, mIm2colSize, 8, 8, 1);
 | |
|         mIm2ColProgram = mGLBackend->getProgram("image2col1x1", glsl_im2col1x1_glsl, im2colPrefix);
 | |
|     }else{
 | |
|         setLocalSize(im2colPrefix, mIm2colSize, 8, 8, 1);
 | |
|         mIm2ColProgram = mGLBackend->getProgram("image2col", glsl_im2col_glsl, im2colPrefix);
 | |
|     }
 | |
| 
 | |
|     setLocalSize(gemmPrefix, mGemmSize, 8, 8, 1);
 | |
|     mGemm16x16Program = mGLBackend->getProgram("gemm16x16", glsl_gemm16x16_glsl, gemmPrefix);
 | |
|     setLocalSize(col2imPrefix, mCol2imSize, 8, 8, 1);
 | |
|     mCol2ImProgram = mGLBackend->getProgram("col2image", glsl_col2im_glsl, col2imPrefix);
 | |
|     if (!mIsConv1x1) {
 | |
|         mImage2ColUniform = [=]() {
 | |
|             glUniform2i(2, mPadX, mPadY);
 | |
|             glUniform2i(3, mCommon->kernelX(), mCommon->kernelY());
 | |
|             glUniform2i(4, mCommon->strideX(), mCommon->strideY());
 | |
|             glUniform2i(5, mCommon->dilateX(), mCommon->dilateY());
 | |
|         };
 | |
|     }
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| ErrorCode GLConvolutionIm2col::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
 | |
| 
 | |
|     auto input         = inputs[0];
 | |
|     auto output        = outputs[0];
 | |
|     auto inputTexture  = input->deviceId();
 | |
|     auto outputTexture = output->deviceId();
 | |
| 
 | |
|     int iw = input->width();
 | |
|     int ih = input->height();
 | |
|     int ic = input->channel();
 | |
|     int ib = input->batch();
 | |
| 
 | |
|     int ow = output->width();
 | |
|     int oh = output->height();
 | |
|     int oc = output->channel();
 | |
|     int ob = output->batch();
 | |
| 
 | |
|     int ic_4 = UP_DIV(ic, 4);
 | |
|     int oc_4 = UP_DIV(oc, 4);
 | |
| 
 | |
|     //        image2col
 | |
|     {
 | |
|         mIm2ColProgram->useProgram();
 | |
|         glBindImageTexture(0, mSrcTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
 | |
|         {
 | |
|             int texId = 0;
 | |
|             glActiveTexture(GL_TEXTURE0 + texId);
 | |
|             glUniform1i(1, texId);
 | |
|             glBindTexture(GL_TEXTURE_3D, inputTexture);
 | |
|             OPENGL_CHECK_ERROR;
 | |
|         }
 | |
| 
 | |
|         if (mIsConv1x1) {
 | |
|             glUniform1i(5, ic_4);
 | |
|             glUniform1i(6, ow);
 | |
|             glUniform1i(7, oh);
 | |
|         }else{
 | |
|             mImage2ColUniform();
 | |
|             glUniform4i(6, iw, ih, ic_4, 1);
 | |
|             glUniform4i(7, ow, oh, oc_4, 1);
 | |
|         }
 | |
|         OPENGL_CHECK_ERROR;
 | |
|         ((GLBackend *)backend())->compute(UP_DIV(ow, mIm2colSize[0]), UP_DIV(oh, mIm2colSize[1]), UP_DIV(ic_4*ib, mIm2colSize[2]));
 | |
|         OPENGL_CHECK_ERROR;
 | |
|     }
 | |
| 
 | |
|     //gemm
 | |
|     {
 | |
|         mGemm16x16Program->useProgram();
 | |
|         glBindImageTexture(0, mDstTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
 | |
|         OPENGL_CHECK_ERROR;
 | |
|         glBindImageTexture(1, mSrcTexture->id(), 0, GL_TRUE, 0, GL_READ_ONLY, ((GLBackend *)backend())->getTextrueFormat());
 | |
|         glBindImageTexture(2, mKernelTexture->id(), 0, GL_TRUE, 0, GL_READ_ONLY, ((GLBackend *)backend())->getTextrueFormat());
 | |
|         glUniform2i(3, obxohxow_4, oc_4);
 | |
|         if (mIsConv1x1) {
 | |
|             glUniform1i(4, ic_4);
 | |
|         }else{
 | |
|             glUniform1i(4, ic_4*mCommon->kernelX()*mCommon->kernelY());
 | |
|         }
 | |
|         OPENGL_CHECK_ERROR;
 | |
|         ((GLBackend *)backend())->compute(UP_DIV(obxohxow_4, mGemmSize[0]), UP_DIV(oc_4, mGemmSize[1]), 1);
 | |
|         OPENGL_CHECK_ERROR;
 | |
|     }
 | |
| 
 | |
|     //col2image
 | |
|     {
 | |
|         mCol2ImProgram->useProgram();
 | |
|         OPENGL_CHECK_ERROR;
 | |
|         glBindImageTexture(0, outputTexture, 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
 | |
|         {
 | |
|             int texId = 0;
 | |
|             glActiveTexture(GL_TEXTURE0 + texId);
 | |
|             glUniform1i(1, texId);
 | |
|             glBindTexture(GL_TEXTURE_2D, mDstTexture->id());
 | |
|             OPENGL_CHECK_ERROR;
 | |
|         }
 | |
|         glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, mBiasBuffer->getId());
 | |
|         OPENGL_CHECK_ERROR;
 | |
|         glUniform3i(3, ow, oh, oc_4);
 | |
|         OPENGL_CHECK_ERROR;
 | |
|         ((GLBackend *)backend())->compute(UP_DIV(ow, mCol2imSize[0]), UP_DIV(oh, mCol2imSize[1]), UP_DIV(oc_4*ob, mCol2imSize[2]));
 | |
|         OPENGL_CHECK_ERROR;
 | |
|     }
 | |
| 
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| } // namespace OpenGL
 | |
| } // namespace MNN
 |