mirror of https://github.com/alibaba/MNN.git
162 lines
5.4 KiB
C++
162 lines
5.4 KiB
C++
//
|
|
// GLConvolutionDepthwise.cpp
|
|
// MNN
|
|
//
|
|
// Created by MNN on 2019/01/31.
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
//
|
|
|
|
#include "backend/opengl/GLConvolutionDepthwise.hpp"
|
|
#include <MNN/AutoTime.hpp>
|
|
|
|
#include <sstream>
|
|
#include "AllShader.hpp"
|
|
#include "backend/opengl/GLBackend.hpp"
|
|
#include "core/Macro.h"
|
|
namespace MNN {
|
|
namespace OpenGL {
|
|
|
|
static const int gXLocal = 8;
|
|
static const int gYLocal = 8;
|
|
static const int gZLocal = 1;
|
|
|
|
GLConvolutionDepthwise::~GLConvolutionDepthwise() {
|
|
}
|
|
|
|
GLConvolutionDepthwise::GLConvolutionDepthwise(const std::vector<Tensor *> &inputs, const Op *convOp, Backend *bn) : GPUConvolution(convOp, bn) {
|
|
auto extra = (GLBackend *)bn;
|
|
|
|
mBiasBuffer.reset(new GLSSBOBuffer(sizeof(float) * ALIGN_UP4(mCommon->outputCount())));
|
|
int fw = mCommon->kernelX();
|
|
int fh = mCommon->kernelY();
|
|
int unit = 4;
|
|
int srcDepthQuad = UP_DIV(mInputDepth, unit);
|
|
|
|
auto kernelBuffer = std::shared_ptr<GLSSBOBuffer>(new GLSSBOBuffer(sizeof(float) * fw * fh * srcDepthQuad * 4));
|
|
auto weight = kernelBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
|
|
if(weight != nullptr){
|
|
::memset(weight, 0, fw * fh * srcDepthQuad * 4 * sizeof(float));
|
|
::memcpy(weight, convOp->main_as_Convolution2D()->weight()->data(),
|
|
convOp->main_as_Convolution2D()->weight()->size() * sizeof(float));
|
|
}
|
|
|
|
kernelBuffer->unmap();
|
|
|
|
auto bias = mBiasBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
|
|
if(bias != nullptr){
|
|
::memset(bias, 0, ALIGN_UP4(mCommon->outputCount()) * sizeof(float));
|
|
::memcpy(bias, convOp->main_as_Convolution2D()->bias()->data(),
|
|
convOp->main_as_Convolution2D()->bias()->size() * sizeof(float));
|
|
}
|
|
mBiasBuffer->unmap();
|
|
|
|
std::vector<std::string> prefix;
|
|
if (mCommon->relu()) {
|
|
prefix.push_back("#define RELU");
|
|
}
|
|
if (mCommon->relu6()) {
|
|
prefix.push_back("#define RELU6");
|
|
}
|
|
|
|
{
|
|
std::ostringstream os;
|
|
os << "#define XLOCAL " << gXLocal;
|
|
prefix.push_back(os.str());
|
|
}
|
|
{
|
|
std::ostringstream os;
|
|
os << "#define YLOCAL " << gYLocal;
|
|
prefix.push_back(os.str());
|
|
}
|
|
{
|
|
std::ostringstream os;
|
|
os << "#define ZLOCAL " << gZLocal;
|
|
prefix.push_back(os.str());
|
|
}
|
|
|
|
mProgram = extra->getProgram("convolution_depthwise", glsl_convlutionDepthwise_glsl, prefix);
|
|
mKernelTexture = std::shared_ptr<GLTexture>(new GLTexture(srcDepthQuad, fw, fh, ((GLBackend *)backend())->getTextrueFormat(), GL_TEXTURE_3D, false));
|
|
|
|
auto transform = extra->getProgram("transform_kernel_image_depthwise", glsl_kernel2ImageDepthwise_glsl);
|
|
transform->useProgram();
|
|
glBindImageTexture(0, mKernelTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
|
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, kernelBuffer->getId());
|
|
OPENGL_CHECK_ERROR;
|
|
glUniform1i(3, fw);
|
|
glUniform1i(4, fh);
|
|
OPENGL_CHECK_ERROR;
|
|
|
|
((GLBackend *)backend())->compute(srcDepthQuad, fw, fh);
|
|
OPENGL_CHECK_ERROR;
|
|
|
|
}
|
|
|
|
ErrorCode GLConvolutionDepthwise::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
|
GPUConvolution::onResize(inputs, outputs);
|
|
int kx = mCommon->kernelX();
|
|
int ky = mCommon->kernelY();
|
|
int sx = mCommon->strideX();
|
|
int sy = mCommon->strideY();
|
|
int dx = mCommon->dilateX();
|
|
int dy = mCommon->dilateY();
|
|
mSetUniform = [=]() {
|
|
glUniform2i(4, mPadX, mPadY);
|
|
glUniform2i(5, kx, ky);
|
|
glUniform2i(6, sx, sy);
|
|
glUniform2i(7, dx, dy);
|
|
};
|
|
return NO_ERROR;
|
|
}
|
|
|
|
ErrorCode GLConvolutionDepthwise::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
|
{
|
|
auto convLayer = mCommon;
|
|
|
|
auto input = inputs[0];
|
|
auto output = outputs[0];
|
|
auto inputTexture = input->deviceId();
|
|
auto outputTexture = output->deviceId();
|
|
int dst_depth_quad = UP_DIV(output->channel(), 4);
|
|
|
|
mProgram->useProgram();
|
|
glBindImageTexture(0, outputTexture, 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
|
|
OPENGL_CHECK_ERROR;
|
|
{
|
|
int texId = 0;
|
|
glActiveTexture(GL_TEXTURE0 + texId);
|
|
glUniform1i(1, texId);
|
|
glBindTexture(GL_TEXTURE_3D, inputTexture);
|
|
OPENGL_CHECK_ERROR;
|
|
}
|
|
{
|
|
int texId = 1;
|
|
glActiveTexture(GL_TEXTURE0 + texId);
|
|
OPENGL_CHECK_ERROR;
|
|
glUniform1i(2, texId);
|
|
|
|
OPENGL_CHECK_ERROR;
|
|
glBindTexture(GL_TEXTURE_3D, mKernelTexture->id());
|
|
OPENGL_CHECK_ERROR;
|
|
}
|
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, mBiasBuffer->getId());
|
|
|
|
OPENGL_CHECK_ERROR;
|
|
mSetUniform();
|
|
|
|
glUniform3i(10, output->width(), output->height(), UP_DIV(output->channel(), 4));
|
|
glUniform3i(11, input->width(), input->height(), UP_DIV(input->channel(), 4));
|
|
|
|
OPENGL_CHECK_ERROR;
|
|
|
|
((GLBackend *)backend())->compute(UP_DIV(output->width(), (gXLocal)), UP_DIV(output->height(), gYLocal),
|
|
UP_DIV(dst_depth_quad, gZLocal));
|
|
OPENGL_CHECK_ERROR;
|
|
|
|
}
|
|
|
|
return NO_ERROR;
|
|
}
|
|
GLCreatorRegister<TypedCreator<GLConvolutionDepthwise>> __depthwise_conv_op(OpType_ConvolutionDepthwise);
|
|
} // namespace OpenGL
|
|
} // namespace MNN
|