mirror of https://github.com/alibaba/MNN.git
250 lines
9.6 KiB
C++
250 lines
9.6 KiB
C++
//
|
|
// GLConvolution.cpp
|
|
// MNN
|
|
//
|
|
// Created by MNN on 2019/01/31.
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
//
|
|
|
|
#include "backend/opengl/GLConvolution.hpp"
|
|
#include <MNN/AutoTime.hpp>
|
|
|
|
#include <sstream>
|
|
#include "AllShader.hpp"
|
|
#include "backend/opengl/GLBackend.hpp"
|
|
#include "core/Macro.h"
|
|
#include "backend/opengl/GLConvolutionIm2col.hpp"
|
|
#include "backend/opengl/GLUtils.hpp"
|
|
namespace MNN {
|
|
namespace OpenGL {
|
|
|
|
GLConvolutionIm2col::~GLConvolutionIm2col() {
|
|
}
|
|
|
|
#define UNIT 4
|
|
#define UNIT2 16
|
|
GLConvolutionIm2col::GLConvolutionIm2col(const std::vector<Tensor *> &inputs, const Op *convOp, Backend *bn) : GPUConvolution(convOp, bn) {
|
|
auto totalWeightSize = ALIGN_UP4(mCommon->outputCount()) * ALIGN_UP4(mInputDepth) * (mCommon->kernelY() * mCommon->kernelX());
|
|
mGLBackend = (GLBackend *)bn;
|
|
auto mKernelBuffer = std::shared_ptr<GLSSBOBuffer>(new GLSSBOBuffer(sizeof(float) * totalWeightSize));
|
|
int fw = mCommon->kernelX();
|
|
int fh = mCommon->kernelY();
|
|
mIsConv1x1 = (fw == 1 && fh == 1) ? true : false;
|
|
int oc_4 = UP_DIV(mCommon->outputCount(), UNIT);
|
|
int ic_4 = UP_DIV(mInputDepth, UNIT);
|
|
float *dest = (float *)mKernelBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
|
|
if(NULL != dest){
|
|
::memset(dest, 0, totalWeightSize * sizeof(float));
|
|
const float *source = convOp->main_as_Convolution2D()->weight()->data();
|
|
int cur = 0;
|
|
|
|
//weight : oc ic -> oc/4 ic/4 ic4 oc4
|
|
//weight image : oc_4, ic_4 * ic4 oc4
|
|
int alignedWeightSize = ic_4 * fw * fh * UNIT2;
|
|
for (int b = 0; b < mCommon->outputCount(); ++b) {
|
|
int b_4 = b / UNIT;
|
|
float *dst_b = dest + b_4 * alignedWeightSize;
|
|
int mx = b % UNIT;
|
|
for (int d = 0; d < mInputDepth; ++d) {
|
|
int my = d % UNIT;
|
|
int d_4 = d / UNIT;
|
|
float *dst_d = dst_b + d_4 * fw * fh * UNIT2;
|
|
for (int y = 0; y < fh; ++y) {
|
|
float *dst_y = dst_d + y * fw * UNIT2;
|
|
for (int x = 0; x < fw; ++x) {
|
|
float *dst_x = dst_y + x * UNIT2;
|
|
dst_x[UNIT * my + mx] = source[cur++];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}else{
|
|
MNN_ASSERT(NULL != dest);
|
|
}
|
|
|
|
mKernelBuffer->unmap();
|
|
|
|
mKernelTexture = std::shared_ptr<GLTexture>(new GLTexture(ic_4 * UNIT*fw*fh, oc_4, 1, ((GLBackend *)backend())->getTextrueFormat(), GL_TEXTURE_2D, false));
|
|
auto transform = mGLBackend->getProgram("transform_kernel_image", glsl_kernel2image_glsl);
|
|
int imageWidth = ROUND_UP(mInputDepth, 4)*fw*fh;
|
|
int imageHeight = oc_4;
|
|
transform->useProgram();
|
|
glBindImageTexture(0, mKernelTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
|
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, mKernelBuffer->getId());
|
|
OPENGL_CHECK_ERROR;
|
|
glUniform1i(3, imageWidth);
|
|
OPENGL_CHECK_ERROR;
|
|
glUniform1i(4, imageHeight);
|
|
OPENGL_CHECK_ERROR;
|
|
((GLBackend *)backend())->compute(UP_DIV(imageWidth, 4), UP_DIV(oc_4, 4), 1);
|
|
OPENGL_CHECK_ERROR;
|
|
|
|
//bias
|
|
mBiasBuffer.reset(new GLSSBOBuffer(sizeof(float) * ALIGN_UP4(mCommon->outputCount())));
|
|
float* bias = (float*)(mBiasBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
|
|
if(bias != nullptr){
|
|
::memset(bias, 0, ALIGN_UP4(mCommon->outputCount()) * sizeof(float));
|
|
::memcpy(bias, convOp->main_as_Convolution2D()->bias()->data(),
|
|
convOp->main_as_Convolution2D()->bias()->size() * sizeof(float));
|
|
}
|
|
mBiasBuffer->unmap();
|
|
}
|
|
|
|
ErrorCode GLConvolutionIm2col::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
|
GPUConvolution::onResize(inputs, outputs);
|
|
std::vector<std::string> im2colPrefix;
|
|
std::vector<std::string> gemmPrefix;
|
|
std::vector<std::string> col2imPrefix;
|
|
|
|
if (mCommon->relu()) {
|
|
im2colPrefix.push_back("#define RELU");
|
|
gemmPrefix.push_back("#define RELU");
|
|
col2imPrefix.push_back("#define RELU");
|
|
}
|
|
if (mCommon->relu6()) {
|
|
im2colPrefix.push_back("#define RELU6");
|
|
gemmPrefix.push_back("#define RELU6");
|
|
col2imPrefix.push_back("#define RELU6");
|
|
}
|
|
|
|
int ob = outputs[0]->batch();
|
|
int oc = outputs[0]->channel();
|
|
int oh = outputs[0]->height();
|
|
int ow = outputs[0]->width();
|
|
|
|
int ic = inputs[0]->channel();
|
|
|
|
obxohxow_4 = UP_DIV(ob*oh*ow, 4);
|
|
|
|
int fw = mCommon->kernelX();
|
|
int fh = mCommon->kernelY();
|
|
|
|
//input : temp image : (ib*oh*ow)/ 4, ic/4*(ib*oh*ow)%4*ic4
|
|
//output : temp image : oc/4 * (ob*oh*ow)%4, (ob*oh*ow)/4 * oc4
|
|
mSrcTexture = std::shared_ptr<GLTexture>(new GLTexture(UP_DIV(ic, 4)*UNIT*fw*fh, obxohxow_4, 1, ((GLBackend *)backend())->getTextrueFormat(), GL_TEXTURE_2D, false));
|
|
mDstTexture = std::shared_ptr<GLTexture>(new GLTexture(obxohxow_4, UP_DIV(oc, 4) * UNIT, 1, ((GLBackend *)backend())->getTextrueFormat(), GL_TEXTURE_2D, false));
|
|
|
|
auto transform = mGLBackend->getProgram("clear_texture", glsl_clear_texture_glsl);
|
|
transform->useProgram();
|
|
glBindImageTexture(0, mSrcTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
|
|
OPENGL_CHECK_ERROR;
|
|
glUniform1i(1, UP_DIV(ic, 4)*UNIT*fw*fh);
|
|
OPENGL_CHECK_ERROR;
|
|
glUniform1i(2, obxohxow_4);
|
|
OPENGL_CHECK_ERROR;
|
|
((GLBackend *)backend())->compute(UP_DIV(UP_DIV(ic, 4)*UNIT*fw*fh, 4), UP_DIV(obxohxow_4, 4), 1);
|
|
OPENGL_CHECK_ERROR;
|
|
|
|
if (true == mIsConv1x1) {
|
|
setLocalSize(im2colPrefix, mIm2colSize, 8, 8, 1);
|
|
mIm2ColProgram = mGLBackend->getProgram("image2col1x1", glsl_im2col1x1_glsl, im2colPrefix);
|
|
}else{
|
|
setLocalSize(im2colPrefix, mIm2colSize, 8, 8, 1);
|
|
mIm2ColProgram = mGLBackend->getProgram("image2col", glsl_im2col_glsl, im2colPrefix);
|
|
}
|
|
|
|
setLocalSize(gemmPrefix, mGemmSize, 8, 8, 1);
|
|
mGemm16x16Program = mGLBackend->getProgram("gemm16x16", glsl_gemm16x16_glsl, gemmPrefix);
|
|
setLocalSize(col2imPrefix, mCol2imSize, 8, 8, 1);
|
|
mCol2ImProgram = mGLBackend->getProgram("col2image", glsl_col2im_glsl, col2imPrefix);
|
|
if (!mIsConv1x1) {
|
|
mImage2ColUniform = [=]() {
|
|
glUniform2i(2, mPadX, mPadY);
|
|
glUniform2i(3, mCommon->kernelX(), mCommon->kernelY());
|
|
glUniform2i(4, mCommon->strideX(), mCommon->strideY());
|
|
glUniform2i(5, mCommon->dilateX(), mCommon->dilateY());
|
|
};
|
|
}
|
|
|
|
return NO_ERROR;
|
|
}
|
|
ErrorCode GLConvolutionIm2col::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
|
|
|
auto input = inputs[0];
|
|
auto output = outputs[0];
|
|
auto inputTexture = input->deviceId();
|
|
auto outputTexture = output->deviceId();
|
|
|
|
int iw = input->width();
|
|
int ih = input->height();
|
|
int ic = input->channel();
|
|
int ib = input->batch();
|
|
|
|
int ow = output->width();
|
|
int oh = output->height();
|
|
int oc = output->channel();
|
|
int ob = output->batch();
|
|
|
|
int ic_4 = UP_DIV(ic, 4);
|
|
int oc_4 = UP_DIV(oc, 4);
|
|
|
|
// image2col
|
|
{
|
|
mIm2ColProgram->useProgram();
|
|
glBindImageTexture(0, mSrcTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
|
|
{
|
|
int texId = 0;
|
|
glActiveTexture(GL_TEXTURE0 + texId);
|
|
glUniform1i(1, texId);
|
|
glBindTexture(GL_TEXTURE_3D, inputTexture);
|
|
OPENGL_CHECK_ERROR;
|
|
}
|
|
|
|
if (mIsConv1x1) {
|
|
glUniform1i(5, ic_4);
|
|
glUniform1i(6, ow);
|
|
glUniform1i(7, oh);
|
|
}else{
|
|
mImage2ColUniform();
|
|
glUniform4i(6, iw, ih, ic_4, 1);
|
|
glUniform4i(7, ow, oh, oc_4, 1);
|
|
}
|
|
OPENGL_CHECK_ERROR;
|
|
((GLBackend *)backend())->compute(UP_DIV(ow, mIm2colSize[0]), UP_DIV(oh, mIm2colSize[1]), UP_DIV(ic_4*ib, mIm2colSize[2]));
|
|
OPENGL_CHECK_ERROR;
|
|
}
|
|
|
|
//gemm
|
|
{
|
|
mGemm16x16Program->useProgram();
|
|
glBindImageTexture(0, mDstTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
|
|
OPENGL_CHECK_ERROR;
|
|
glBindImageTexture(1, mSrcTexture->id(), 0, GL_TRUE, 0, GL_READ_ONLY, ((GLBackend *)backend())->getTextrueFormat());
|
|
glBindImageTexture(2, mKernelTexture->id(), 0, GL_TRUE, 0, GL_READ_ONLY, ((GLBackend *)backend())->getTextrueFormat());
|
|
glUniform2i(3, obxohxow_4, oc_4);
|
|
if (mIsConv1x1) {
|
|
glUniform1i(4, ic_4);
|
|
}else{
|
|
glUniform1i(4, ic_4*mCommon->kernelX()*mCommon->kernelY());
|
|
}
|
|
OPENGL_CHECK_ERROR;
|
|
((GLBackend *)backend())->compute(UP_DIV(obxohxow_4, mGemmSize[0]), UP_DIV(oc_4, mGemmSize[1]), 1);
|
|
OPENGL_CHECK_ERROR;
|
|
}
|
|
|
|
//col2image
|
|
{
|
|
mCol2ImProgram->useProgram();
|
|
OPENGL_CHECK_ERROR;
|
|
glBindImageTexture(0, outputTexture, 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
|
|
{
|
|
int texId = 0;
|
|
glActiveTexture(GL_TEXTURE0 + texId);
|
|
glUniform1i(1, texId);
|
|
glBindTexture(GL_TEXTURE_2D, mDstTexture->id());
|
|
OPENGL_CHECK_ERROR;
|
|
}
|
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, mBiasBuffer->getId());
|
|
OPENGL_CHECK_ERROR;
|
|
glUniform3i(3, ow, oh, oc_4);
|
|
OPENGL_CHECK_ERROR;
|
|
((GLBackend *)backend())->compute(UP_DIV(ow, mCol2imSize[0]), UP_DIV(oh, mCol2imSize[1]), UP_DIV(oc_4*ob, mCol2imSize[2]));
|
|
OPENGL_CHECK_ERROR;
|
|
}
|
|
|
|
return NO_ERROR;
|
|
}
|
|
|
|
} // namespace OpenGL
|
|
} // namespace MNN
|