MNN/source/backend/opengl/GLBackend.cpp

501 lines
16 KiB
C++

//
// GLBackend.cpp
// MNN
//
// Created by MNN on 2019/01/31.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include <sstream>
#include "AllShader.hpp"
#include "GLSSBOBuffer.hpp"
#include "GLTexture.hpp"
#include <MNN/AutoTime.hpp>
#include "GLBackend.hpp"
#include "core/Macro.h"
#include "core/TensorUtils.hpp"
#include "core/BufferAllocator.hpp"
#include <mutex>
#include <MNN/Tensor.hpp>
namespace MNN {
namespace OpenGL {
std::map<OpType, GLBackend::Creator*>* gCreator() {
static std::once_flag once;
static std::map<OpType, GLBackend::Creator*>* creators = nullptr;
std::call_once(once, [&]() { creators = new std::map<OpType, GLBackend::Creator*>; });
return creators;
};
bool GLBackend::addCreator(OpType t, Creator* c) {
auto map = gCreator();
if (map->find(t) != map->end()) {
MNN_PRINT("Error: %d type has be added\n", t);
return false;
}
map->insert(std::make_pair(t, c));
return true;
}
std::shared_ptr<GLProgram> GLBackend::getTreatedProgramWithPrefix(const char *content, const std::vector<std::string> &prefix) {
std::ostringstream tc;
tc << GLProgram::getHead(getImageFormat());
for (auto &s : prefix) {
tc << s << "\n";
}
tc << content;
return std::shared_ptr<GLProgram>(new GLProgram(tc.str()));
}
std::shared_ptr<GLProgram> GLBackend::getTreatedProgram(const char *content) {
std::ostringstream tc;
tc << GLProgram::getHead(getImageFormat()) << content;
return std::shared_ptr<GLProgram>(new GLProgram(tc.str()));
}
bool GLBackend::getOpenGLExtensions(const std::string& extStr) {
const std::string extension_str((const char*)glGetString(GL_EXTENSIONS));
return extension_str.find(extStr.c_str()) != std::string::npos;
}
bool GLBackend::isSupportHalf() const{
return mIsSupportHalf;
}
GLenum GLBackend::getTextrueFormat() const{
return mTextrueFormat;
}
std::string GLBackend::getImageFormat() const{
return mImageFormat;
}
std::unique_ptr<GLContext> GLBackend::mContext = nullptr;
GLBackend::GLBackend(BackendConfig::PrecisionMode precision, BackendConfig::PowerMode power) : Backend(MNN_FORWARD_OPENGL) {
if (mContext == nullptr) {
mContext.reset(new GLContext());
if(mContext != nullptr){
if(mContext->isCreateError()){
MNN_PRINT("mContext error !!! \n");
mIsCreateError = true;
}
}else{
MNN_PRINT("mContext == nullptr !!! \n");
mIsCreateError = true;
}
}
mIsSupportHalf = getOpenGLExtensions("GL_EXT_color_buffer_half_float");
if(mIsSupportHalf && precision != BackendConfig::Precision_High) {
mTextrueFormat = GL_RGBA16F;
mImageFormat = "rgba16f";
}else{
MNN_PRINT("not support half \n");
mTextrueFormat = GL_RGBA32F;
mImageFormat = "rgba32f";
}
mRuntime = new Runtime;
mRuntime->mImage2NchwProgram = getTreatedProgram(glsl_image_to_nchw_buffer_glsl);
mRuntime->mNchw2ImageProgram = getTreatedProgram(glsl_nchw_buffer_to_image_glsl);
mRuntime->mNc4hw42ImageProgram = getTreatedProgram(glsl_nc4hw4_buffer_to_image_glsl);
mRuntime->mImage2Nc4hw4Program = getTreatedProgram(glsl_image_to_nc4hw4_buffer_glsl);
std::vector<std::string> prefix;
setLocalSize(prefix, mLocalSize, 8, 8, 1);
mRuntime->mNhwc2ImageProgram = getProgram("nhwc_buffer_to_image", glsl_nhwc_buffer_to_image_glsl, prefix);
mRuntime->mImage2NhwcProgram = getProgram("image_to_nhwc_buffer", glsl_image_to_nhwc_buffer_glsl, prefix);
const GLubyte* renderer = glGetString(GL_RENDERER);
if(renderer != nullptr){
MNN_PRINT("gpu type : %s \n", (char*)renderer);
if(strstr((char *) renderer, "Adreno")){
mGpuType = ADRENO;
}else if(strstr((char *) renderer, "Mali")){
mGpuType = MALI;
}else{
mGpuType = OTHER;
}
}
const GLubyte* version = glGetString(GL_VERSION);
if(version != nullptr){
MNN_PRINT("gl version : %s \n", version);
char* p = strstr((char *) version, "V@");
if(p != nullptr){
p += strlen("V@");
char* v = strtok(p, ".");
if(v != nullptr){
mVersion = atoi(v);
}
}
}
}
GLBackend::~GLBackend() {
if(mRuntime != nullptr){
delete mRuntime;
}
if(mContext != nullptr){
mContext.reset(nullptr);
}
}
void GLBackend::copyImageToNhwcBuffer(GLuint textureId, float *outputData, int width, int height, int channel) const {
width = std::max(1, width);
height = std::max(1, height);
channel = std::max(1, channel);
wait();
auto depthQuad = UP_DIV(channel, 4);
auto size = depthQuad * 4 * width * height * sizeof(float);
auto buffer = std::shared_ptr<GLSSBOBuffer>(new GLSSBOBuffer(size));
mRuntime->mImage2NhwcProgram->useProgram();
glBindImageTexture(0, textureId, 0, GL_TRUE, 0, GL_READ_ONLY, getTextrueFormat());
OPENGL_CHECK_ERROR;
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffer->getId());
OPENGL_CHECK_ERROR;
glUniform1i(2, width);
glUniform1i(3, height);
glUniform1i(4, channel);
OPENGL_CHECK_ERROR;
compute(UP_DIV(width, mLocalSize[0]), UP_DIV(height, mLocalSize[1]), UP_DIV(depthQuad, mLocalSize[2]));
OPENGL_CHECK_ERROR;
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
OPENGL_CHECK_ERROR;
auto gpuoutput = buffer->map(GL_MAP_READ_BIT);
if(gpuoutput != nullptr){
::memcpy(outputData, gpuoutput, height * width * channel * sizeof(float));
}
buffer->unmap();
}
void GLBackend::copyNhwcBufferToImage(GLuint textureId, const float *inputData, int width, int height, int channel) const {
int c_4 = UP_DIV(channel, 4);
auto size = ROUND_UP(channel, 4) * width * height * sizeof(float);
auto buffer = std::shared_ptr<GLSSBOBuffer>(new GLSSBOBuffer(size));
auto gpuoutput = buffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
if(gpuoutput != nullptr){
::memcpy(gpuoutput, inputData, channel*height*width * sizeof(float));
}
buffer->unmap();
mRuntime->mNhwc2ImageProgram->useProgram();
glBindImageTexture(0, textureId, 0, GL_TRUE, 0, GL_WRITE_ONLY, getTextrueFormat());
OPENGL_CHECK_ERROR;
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffer->getId());
OPENGL_CHECK_ERROR;
glUniform1i(2, width);
glUniform1i(3, height);
glUniform1i(4, channel);
OPENGL_CHECK_ERROR;
compute(UP_DIV(width, mLocalSize[0]), UP_DIV(height, mLocalSize[1]), UP_DIV(c_4, mLocalSize[2]));
OPENGL_CHECK_ERROR;
}
void GLBackend::wait() const {
#ifdef USE_GL_FINISH
glFinish();
#else
glFlush();
#endif
}
void GLBackend::compute(int dim1, int dim2, int dim3, bool needWait) const {
wait();
glDispatchCompute(dim1, dim2, dim3);
}
void GLBackend::download(GLuint textureId, float *outputData, int d1, int d2, int d3, bool align) const {
wait();
auto depthQuad = UP_DIV(d3, 4);
auto size = depthQuad * 4 * d1 * d2 * sizeof(float);
if (NULL == mRuntime->mTempBuffer.get() || mRuntime->mTempBuffer->size() < size) {
mRuntime->mTempBuffer = std::shared_ptr<GLSSBOBuffer>(new GLSSBOBuffer(size));
}
auto &buffer = mRuntime->mTempBuffer;
if (align) {
mRuntime->mImage2Nc4hw4Program->useProgram();
} else {
mRuntime->mImage2NchwProgram->useProgram();
}
glBindImageTexture(0, textureId, 0, GL_TRUE, 0, GL_READ_ONLY, getTextrueFormat());
OPENGL_CHECK_ERROR;
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffer->getId());
OPENGL_CHECK_ERROR;
glUniform1i(2, d1);
glUniform1i(3, d2);
OPENGL_CHECK_ERROR;
compute(UP_DIV(d1, 8), UP_DIV(d2, 8), depthQuad);
OPENGL_CHECK_ERROR;
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
OPENGL_CHECK_ERROR;
auto gpuoutput = buffer->map(GL_MAP_READ_BIT);
if(gpuoutput != nullptr){
if (align) {
::memcpy(outputData, gpuoutput, size);
} else {
::memcpy(outputData, gpuoutput, d1 * d2 * d3 * sizeof(float));
}
}
buffer->unmap();
}
void GLBackend::upload(GLuint textureId, const float *inputData, int width, int height, int channel, bool align) const {
int c_4 = UP_DIV(channel, 4);
auto size = ROUND_UP(channel, 4) * width * height * sizeof(float);
if (NULL == mRuntime->mTempBuffer.get() || mRuntime->mTempBuffer->size() < size) {
mRuntime->mTempBuffer = std::shared_ptr<GLSSBOBuffer>(new GLSSBOBuffer(size));
}
auto &buffer = mRuntime->mTempBuffer;
auto gpuoutput = buffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
if(gpuoutput != nullptr){
if (align) {
::memcpy(gpuoutput, inputData, size);
} else {
::memcpy(gpuoutput, inputData, channel*height*width * sizeof(float));
}
}
buffer->unmap();
if (align) {
mRuntime->mNc4hw42ImageProgram->useProgram();
} else {
mRuntime->mNchw2ImageProgram->useProgram();
}
glBindImageTexture(0, textureId, 0, GL_TRUE, 0, GL_WRITE_ONLY, getTextrueFormat());
OPENGL_CHECK_ERROR;
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffer->getId());
OPENGL_CHECK_ERROR;
glUniform1i(2, width);
glUniform1i(3, height);
OPENGL_CHECK_ERROR;
compute(UP_DIV(width, 8), UP_DIV(height, 8), c_4);
OPENGL_CHECK_ERROR;
}
Execution *GLBackend::onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
const MNN::Op *op) {
auto map = gCreator();
auto iter = map->find(op->type());
if (iter == map->end()) {
if (nullptr != op->name()) {
MNN_PRINT("Don't support type %d, %s\n", op->type(), op->name()->c_str());
} else {
MNN_PRINT("Don't support type %d\n", op->type());
}
return nullptr;
}
auto exe = iter->second->onCreate(inputs, outputs, op, this);
if (nullptr == exe) {
if (nullptr != op->name()) {
MNN_PRINT("The Creator Don't support type %d, %s\n", op->type(), op->name()->c_str());
} else {
MNN_PRINT("The Creator Don't support type %d\n", op->type());
}
return nullptr;
}
return exe;
}
void GLBackend::onExecuteEnd() const {
// MNN_PRINT("Finish\n");
// glFinish();
}
void GLBackend::onExecuteBegin() const {
}
void GLBackend::onCopyBuffer(const Tensor *srcTensor, const Tensor *dstTensor) const {
std::vector<int> inputShape = tensorShapeFormat(srcTensor);
int ib = inputShape.at(0);
int ih = inputShape.at(1);
int iw = inputShape.at(2);
int ic = inputShape.at(3);
// OpenGL -> Host
if (NULL == srcTensor->buffer().host && srcTensor->buffer().device > 0) {
if(TensorUtils::getDescribe(dstTensor)->dimensionFormat == MNN_DATA_FORMAT_NHWC){
copyImageToNhwcBuffer((GLuint)srcTensor->deviceId(), dstTensor->host<float>(), iw, ih, ic);
}else{
download((GLuint)srcTensor->deviceId(), dstTensor->host<float>(), iw, ih, ic,
TensorUtils::getDescribe(dstTensor)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4);
}
// Host -> OpenGL
}else if (NULL == dstTensor->buffer().host && dstTensor->buffer().device > 0) {
if(TensorUtils::getDescribe(srcTensor)->dimensionFormat == MNN_DATA_FORMAT_NHWC){
copyNhwcBufferToImage((GLuint)dstTensor->deviceId(), srcTensor->host<float>(), iw, ih, ic);
}else{
upload((GLuint)dstTensor->deviceId(), srcTensor->host<float>(), iw, ih, ic,
TensorUtils::getDescribe(srcTensor)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4);
}
}else{
MNN_ASSERT(false);
}
}
bool GLBackend::onClearBuffer() {
mRuntime->mBlocks.clear();
mRuntime->mFreeTextures.clear();
return true;
}
class GLMemObj : public Backend::MemObj {
public:
GLMemObj(const Tensor *nativeTensor, uint64_t device, GLBackend::Runtime* runtime) {
mTensor = nativeTensor;
mDevice = device;
mRuntime = runtime;
}
virtual ~ GLMemObj() {
mRuntime->mFreeTextures.push_back(std::make_pair(mTensor, mDevice));
}
private:
const Tensor* mTensor;
uint64_t mDevice;
GLBackend::Runtime* mRuntime;
};
Backend::MemObj* GLBackend::onAcquire(const Tensor *nativeTensor, Backend::StorageType storageType) {
auto tensor = (Tensor *)nativeTensor;
// reuse only for dynamic storage
if (Backend::DYNAMIC == storageType) {
for (auto iter = mRuntime->mFreeTextures.begin(); iter != mRuntime->mFreeTextures.end(); ++iter) {
auto preiousTensor = iter->first;
if (preiousTensor->width() >= nativeTensor->width() && preiousTensor->height() >= nativeTensor->height() &&
UP_DIV(preiousTensor->channel(), 4) >= UP_DIV(nativeTensor->channel(), 4)) {
tensor->buffer().device = iter->second;
mRuntime->mFreeTextures.erase(iter);
return new GLMemObj(nativeTensor, tensor->buffer().device, mRuntime);
}
}
}
std::shared_ptr<GLTexture> newTexture(new GLTexture(nativeTensor->width(), nativeTensor->height(), nativeTensor->channel(), getTextrueFormat()));
tensor->buffer().device = newTexture->id();
mRuntime->mBlocks.push_back(std::move(newTexture));
if (Backend::DYNAMIC == storageType) {
return new GLMemObj(nativeTensor, tensor->buffer().device, mRuntime);
}
return new Backend::MemObj;
}
std::shared_ptr<GLProgram> GLBackend::getProgram(const std::string &key, const char *content,
const std::vector<std::string> &prefix) {
if (key.empty()) {
return getTreatedProgramWithPrefix(content, prefix);
}
// Generate New Key
std::ostringstream newKey;
for (auto s : prefix) {
newKey << s;
}
newKey << key;
auto newKeyStr = newKey.str();
auto iter = mRuntime->mProgramCache.find(newKeyStr);
if (iter != mRuntime->mProgramCache.end()) {
return iter->second;
}
auto program = getTreatedProgramWithPrefix(content, prefix);
mRuntime->mProgramCache.insert(std::make_pair(newKeyStr, program));
return program;
}
std::shared_ptr<GLProgram> GLBackend::getProgram(const std::string &key, const char *content) {
if (key.empty()) {
return getTreatedProgram(content);
}
auto iter = mRuntime->mProgramCache.find(key);
if (iter != mRuntime->mProgramCache.end()) {
return iter->second;
}
auto program = getTreatedProgram(content);
mRuntime->mProgramCache.insert(std::make_pair(key, program));
return program;
}
bool GLBackend::isCreateError() const {
return mIsCreateError;
}
Backend* GLRuntime::onCreate(const BackendConfig* config, Backend* origin) const {
BackendConfig::PrecisionMode precision = BackendConfig::Precision_Normal;
BackendConfig::PowerMode power = BackendConfig::Power_Normal;
if (nullptr != mInfo.user) {
precision = mInfo.user->precision;
power = mInfo.user->power;
}
auto backend = new GLBackend(precision, power);
return backend;
}
int GLRuntime::onGetRuntimeStatus(RuntimeStatus statusEnum) const {
MNN_ERROR("in GLRuntime\n");
switch (statusEnum) {
case STATUS_SUPPORT_FP16: {
return GLBackend::getOpenGLExtensions("GL_EXT_color_buffer_half_float");
break;
}
case STATUS_SUPPORT_DOT_PRODUCT: {
return 0;
break;
}
default: {
MNN_ERROR("unsupported interface");
break;
}
}
return 0;
}
Runtime::CompilerType GLRuntime::onGetCompilerType() const {
return Compiler_Origin;
}
class GLRuntimeCreator : public RuntimeCreator {
public:
virtual Runtime *onCreate(const Backend::Info &info) const override {
auto rt = new GLRuntime(info);
auto bn = (GLBackend*)(rt->onCreate(nullptr, nullptr));
if (bn->isCreateError()) {
delete bn;
delete rt;
return nullptr;
}
delete bn;
return rt;
}
};
bool placeholder = []() {
static std::once_flag createOnce;
std::call_once(createOnce, []() {
MNNInsertExtraRuntimeCreator(MNN_FORWARD_OPENGL, new GLRuntimeCreator, false);
});
return true;
}();
} // namespace OpenGL
} // namespace MNN