mirror of https://github.com/alibaba/MNN.git
240 lines
8.8 KiB
C++
240 lines
8.8 KiB
C++
//
|
|
// GLConvolution.cpp
|
|
// MNN
|
|
//
|
|
// Created by MNN on 2019/01/31.
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
//
|
|
|
|
#include "backend/opengl/GLConvolution.hpp"
|
|
#include <MNN/AutoTime.hpp>
|
|
|
|
#include <sstream>
|
|
#include "AllShader.hpp"
|
|
#include "core/Macro.h"
|
|
#include "backend/opengl/GLConvolutionIm2col.hpp"
|
|
namespace MNN {
|
|
namespace OpenGL {
|
|
|
|
#define UNIT 4
|
|
|
|
GPUConvolution::GPUConvolution(const Op *convOp, Backend *b) : MNN::Execution(b) {
|
|
mCommon = convOp->main_as_Convolution2D()->common();
|
|
auto convReal = convOp->main_as_Convolution2D();
|
|
auto outputCount = mCommon->outputCount();
|
|
mInputDepth = 0;
|
|
|
|
if (convReal->weight() != NULL) {
|
|
auto weightSize = convReal->weight()->size();
|
|
mInputDepth = weightSize * mCommon->group() / mCommon->kernelX() / mCommon->kernelY() / outputCount;
|
|
}
|
|
}
|
|
GPUConvolution::~GPUConvolution() {
|
|
}
|
|
|
|
ErrorCode GPUConvolution::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
|
auto input = inputs[0];
|
|
auto output = outputs[0];
|
|
if (mCommon->padMode() == PadMode_SAME) {
|
|
int kernelWidthSize = (mCommon->kernelX() - 1) * mCommon->dilateX() + 1;
|
|
int kernelHeightSize = (mCommon->kernelY() - 1) * mCommon->dilateY() + 1;
|
|
int pad_needed_width = (output->width() - 1) * mCommon->strideX() + kernelWidthSize - input->width();
|
|
int pad_needed_height = (output->height() - 1) * mCommon->strideY() + kernelHeightSize - input->height();
|
|
|
|
mPadX = (pad_needed_width > 0 ? pad_needed_width : 0) / 2;
|
|
mPadY = (pad_needed_height > 0 ? pad_needed_height : 0) / 2;
|
|
return NO_ERROR;
|
|
}
|
|
mPadX = mCommon->padX();
|
|
mPadY = mCommon->padY();
|
|
|
|
return NO_ERROR;
|
|
}
|
|
|
|
GLConvolution::~GLConvolution() {
|
|
}
|
|
|
|
GLConvolution::GLConvolution(const std::vector<Tensor *> &inputs, const Op *convOp, Backend *bn) : GPUConvolution(convOp, bn) {
|
|
auto totalWeightSize =
|
|
ALIGN_UP4(mCommon->outputCount()) * ALIGN_UP4(mInputDepth) * (mCommon->kernelY() * mCommon->kernelX());
|
|
auto extra = (GLBackend *)bn;
|
|
|
|
mBiasBuffer.reset(new GLSSBOBuffer(sizeof(float) * ALIGN_UP4(mCommon->outputCount())));
|
|
float* bias = (float*)(mBiasBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
|
|
if(bias != nullptr){
|
|
::memset(bias, 0, ALIGN_UP4(mCommon->outputCount()) * sizeof(float));
|
|
::memcpy(bias, convOp->main_as_Convolution2D()->bias()->data(),
|
|
convOp->main_as_Convolution2D()->bias()->size() * sizeof(float));
|
|
}
|
|
mBiasBuffer->unmap();
|
|
|
|
auto mKernelBuffer = std::shared_ptr<GLSSBOBuffer>(new GLSSBOBuffer(sizeof(float) * totalWeightSize));
|
|
int fw = mCommon->kernelX();
|
|
int fh = mCommon->kernelY();
|
|
int unit = 4;
|
|
int unit2 = unit * unit;
|
|
int alignedWeightSize = UP_DIV(mInputDepth, unit) * fw * fh * unit2;
|
|
int oc_4 = UP_DIV(mCommon->outputCount(), unit);
|
|
|
|
float *dest = (float *)mKernelBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
|
|
if(dest != nullptr){
|
|
::memset(dest, 0, alignedWeightSize * sizeof(float));
|
|
const float *source = convOp->main_as_Convolution2D()->weight()->data();
|
|
int cur = 0;
|
|
|
|
//weight : oc ic h w -> oc/4, ic/4 ky kx ic4 oc4
|
|
for (int b = 0; b < mCommon->outputCount(); ++b) {
|
|
int b_4 = b / unit;
|
|
float *dst_b = dest + b_4 * alignedWeightSize;
|
|
int mx = b % unit;
|
|
for (int d = 0; d < mInputDepth; ++d) {
|
|
int my = d % unit;
|
|
int d_4 = d / unit;
|
|
float *dst_d = dst_b + d_4 * fw * fh * unit2;
|
|
for (int y = 0; y < fh; ++y) {
|
|
float *dst_y = dst_d + y * fw * unit2;
|
|
for (int x = 0; x < fw; ++x) {
|
|
float *dst_x = dst_y + x * unit2;
|
|
dst_x[unit * my + mx] = source[cur++];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
mKernelBuffer->unmap();
|
|
|
|
int ic_4 = UP_DIV(mInputDepth, unit);
|
|
//weight image : ky kx, oc/4, ic/4*ic4 oc4
|
|
mKernelTexture =
|
|
std::shared_ptr<GLTexture>(new GLTexture(ic_4 * unit, oc_4, fw * fh, ((GLBackend *)backend())->getTextrueFormat() , GL_TEXTURE_3D, false));
|
|
|
|
auto transform = extra->getProgram("transform_kernel_image_adreno", glsl_kernel2image_adreno_glsl);
|
|
transform->useProgram();
|
|
glBindImageTexture(0, mKernelTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
|
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, mKernelBuffer->getId());
|
|
OPENGL_CHECK_ERROR;
|
|
glUniform1i(3, fw * fh);
|
|
glUniform1i(4, ic_4);
|
|
OPENGL_CHECK_ERROR;
|
|
|
|
((GLBackend *)backend())->compute(ic_4, oc_4, fw * fh);
|
|
OPENGL_CHECK_ERROR;
|
|
}
|
|
|
|
ErrorCode GLConvolution::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
|
GPUConvolution::onResize(inputs, outputs);
|
|
auto extra = (GLBackend *)backend();
|
|
std::vector<std::string> prefix;
|
|
if (mCommon->relu()) {
|
|
prefix.push_back("#define RELU");
|
|
}
|
|
if (mCommon->relu6()) {
|
|
prefix.push_back("#define RELU6");
|
|
}
|
|
|
|
auto dstDepthQuad = UP_DIV(outputs[0]->channel(), 4);
|
|
|
|
setLocalSize(prefix, mLocalSize, 1, 1, dstDepthQuad);
|
|
|
|
if (1 == mCommon->kernelY() && 1 == mCommon->kernelX() && 1 == mCommon->strideY() && 1 == mCommon->strideX() &&
|
|
0 == mCommon->padX() && 0 == mCommon->padY()) {
|
|
mIs1x1 = true;
|
|
}
|
|
|
|
if (mIs1x1) {
|
|
mProgram = extra->getProgram("convolution1x1", glsl_convolution1x1_glsl, prefix);
|
|
} else {
|
|
mKx = mCommon->kernelX();
|
|
mKy = mCommon->kernelY();
|
|
mSx = mCommon->strideX();
|
|
mSy = mCommon->strideY();
|
|
mDx = mCommon->dilateX();
|
|
mDy = mCommon->dilateY();
|
|
mProgram = extra->getProgram("convolution", glsl_convolution_glsl, prefix);
|
|
}
|
|
|
|
return NO_ERROR;
|
|
}
|
|
|
|
|
|
ErrorCode GLConvolution::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
|
{
|
|
auto convLayer = mCommon;
|
|
|
|
auto input = inputs[0];
|
|
auto output = outputs[0];
|
|
auto inputTexture = input->deviceId();
|
|
auto outputTexture = output->deviceId();
|
|
int oc_4 = UP_DIV(output->channel(), 4);
|
|
|
|
mProgram->useProgram();
|
|
glBindImageTexture(0, outputTexture, 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
|
|
{
|
|
int texId = 0;
|
|
glActiveTexture(GL_TEXTURE0 + texId);
|
|
glUniform1i(1, texId);
|
|
glBindTexture(GL_TEXTURE_3D, inputTexture);
|
|
OPENGL_CHECK_ERROR;
|
|
}
|
|
{
|
|
int texId = 1;
|
|
glActiveTexture(GL_TEXTURE0 + texId);
|
|
OPENGL_CHECK_ERROR;
|
|
glUniform1i(2, texId);
|
|
OPENGL_CHECK_ERROR;
|
|
glBindTexture(GL_TEXTURE_3D, mKernelTexture->id());
|
|
OPENGL_CHECK_ERROR;
|
|
}
|
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, mBiasBuffer->getId());
|
|
|
|
if(!mIs1x1){
|
|
glUniform2i(4, mPadX, mPadY);
|
|
glUniform2i(5, mKx, mKy);
|
|
glUniform2i(6, mSx, mSy);
|
|
glUniform2i(7, mDx, mDy);
|
|
}
|
|
OPENGL_CHECK_ERROR;
|
|
glUniform3i(10, output->width(), output->height(), UP_DIV(output->channel(), 4));
|
|
glUniform3i(11, input->width(), input->height(), UP_DIV(input->channel(), 4));
|
|
|
|
glUniform1i(8, UNIT);
|
|
OPENGL_CHECK_ERROR;
|
|
|
|
((GLBackend *)backend())->compute(UP_DIV(output->width(), UNIT*mLocalSize[0]), UP_DIV(output->height(), mLocalSize[1]),
|
|
UP_DIV(oc_4, mLocalSize[2]));
|
|
|
|
OPENGL_CHECK_ERROR;
|
|
}
|
|
|
|
return NO_ERROR;
|
|
}
|
|
|
|
|
|
class ConvolutionCreator : public GLBackend::Creator {
|
|
public:
|
|
virtual ~ConvolutionCreator() = default;
|
|
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
|
|
const MNN::Op *op, Backend *backend) const override {
|
|
auto common = op->main_as_Convolution2D()->common();
|
|
|
|
//TODO: bugfix
|
|
if(common->padX() == 1 || common->strideX() != 1){
|
|
return new GLConvolution(inputs, op, backend);
|
|
}
|
|
if(((GLBackend *)backend)->gpuType() == GLBackend::ADRENO){
|
|
if(((GLBackend *)backend)->glVersion() >= 269){
|
|
return new GLConvolution(inputs, op, backend);
|
|
}else{
|
|
return new GLConvolutionIm2col(inputs, op, backend);
|
|
}
|
|
}else{
|
|
return new GLConvolutionIm2col(inputs, op, backend);
|
|
}
|
|
}
|
|
};
|
|
|
|
GLCreatorRegister<ConvolutionCreator> __gl_conv_op(OpType_Convolution);
|
|
} // namespace OpenGL
|
|
} // namespace MNN
|