2019-04-17 10:49:11 +08:00
|
|
|
//
|
|
|
|
// OpenCLRuntime.hpp
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on 2019/01/31.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
|
|
|
#ifndef OpenCLRuntime_hpp
|
|
|
|
#define OpenCLRuntime_hpp
|
|
|
|
|
|
|
|
|
|
|
|
#include <map>
|
|
|
|
#include <memory>
|
|
|
|
#include <mutex>
|
|
|
|
#include <set>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include <sstream>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "core/Macro.h"
|
2019-04-17 10:49:11 +08:00
|
|
|
#include "Type_generated.h"
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "backend/opencl/core/runtime/OpenCLWrapper.hpp"
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
namespace MNN {
|
|
|
|
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
#define CL_CONTEXT_PERF_HINT_QCOM 0x40C2
|
|
|
|
#define CL_PERF_HINT_HIGH_QCOM 0x40C3
|
|
|
|
#define CL_PERF_HINT_NORMAL_QCOM 0x40C4
|
|
|
|
#define CL_PERF_HINT_LOW_QCOM 0x40C5
|
|
|
|
#define CL_CONTEXT_PRIORITY_HINT_QCOM 0x40C9
|
|
|
|
#define CL_PRIORITY_HINT_HIGH_QCOM 0x40CA
|
|
|
|
#define CL_PRIORITY_HINT_NORMAL_QCOM 0x40CB
|
|
|
|
#define CL_PRIORITY_HINT_LOW_QCOM 0x40CC
|
|
|
|
|
|
|
|
#define CL_KERNEL_WAVE_SIZE_QCOM 0xAA02
|
|
|
|
|
2019-09-01 19:25:26 +08:00
|
|
|
enum GpuType { MALI = 0, ADRENO = 1, RADEON = 2, OTHER = 3 };
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
class OpenCLRuntime {
|
|
|
|
public:
|
|
|
|
OpenCLRuntime(bool permitFloat16);
|
|
|
|
~OpenCLRuntime();
|
|
|
|
OpenCLRuntime(const OpenCLRuntime &) = delete;
|
|
|
|
OpenCLRuntime &operator=(const OpenCLRuntime &) = delete;
|
|
|
|
|
|
|
|
bool isSupportedFP16() const;
|
2019-11-15 14:22:45 +08:00
|
|
|
bool isSupportedDotInt8() const;
|
|
|
|
bool isSupportedDotAccInt8() const;
|
2019-04-17 10:49:11 +08:00
|
|
|
::cl::Context &context();
|
|
|
|
::cl::CommandQueue &commandQueue();
|
|
|
|
uint64_t deviceGlobalMemeryCacheSize() const;
|
|
|
|
uint32_t deviceComputeUnits() const;
|
|
|
|
uint32_t maxFreq() const;
|
|
|
|
uint64_t getMaxWorkGroupSize(const ::cl::Kernel &kernel);
|
2019-12-27 22:16:57 +08:00
|
|
|
uint64_t GetKernelWaveSize(const cl::Kernel &kernel);
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
uint64_t getMaxLocalMem() const;
|
2019-04-17 10:49:11 +08:00
|
|
|
GpuType getGpuType();
|
|
|
|
uint64_t maxAllocSize() const;
|
2020-06-22 11:23:12 +08:00
|
|
|
void setCommandQueueProfileEnable();
|
|
|
|
void setCommandQueueProfileDisable();
|
2020-06-23 17:50:24 +08:00
|
|
|
unsigned int getQueueNum();
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
::cl::Kernel buildKernel(const std::string &programName, const std::string &kernelName,
|
|
|
|
const std::set<std::string> &buildOptions);
|
|
|
|
|
|
|
|
std::vector<size_t> getMaxImage2DSize();
|
2019-07-02 18:01:08 +08:00
|
|
|
bool isCreateError() const;
|
2019-04-17 10:49:11 +08:00
|
|
|
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
float flops() const {
|
|
|
|
return mFlops;
|
|
|
|
}
|
2019-12-27 22:16:57 +08:00
|
|
|
|
|
|
|
double getCostTime(const cl::Event *event);
|
|
|
|
double getQueuedTime(const cl::Event *event);
|
|
|
|
double getSubmitTime(const cl::Event *event);
|
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
private:
|
|
|
|
bool loadProgram(const std::string &programName, cl::Program *program);
|
|
|
|
bool buildProgram(const std::string &buildOptionsStr, cl::Program *program);
|
|
|
|
bool getDeviceSupportsExtension(const cl::Device &device, const char *extensionName);
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::shared_ptr<::cl::Context> mContext;
|
|
|
|
std::shared_ptr<::cl::Device> mFirstGPUDevicePtr;
|
|
|
|
std::shared_ptr<::cl::CommandQueue> mCommandQueuePtr;
|
|
|
|
std::map<std::string, ::cl::Program> mBuildProgramMap;
|
|
|
|
uint64_t mGPUGlobalMemeryCacheSize;
|
|
|
|
uint32_t mGPUComputeUnits;
|
|
|
|
uint32_t mMaxFreq;
|
|
|
|
uint32_t mMaxMemAllocSize;
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
uint64_t mMaxLocalMemSize;
|
2019-04-17 10:49:11 +08:00
|
|
|
bool mIsSupportedFP16 = false;
|
2019-11-15 14:22:45 +08:00
|
|
|
bool mSupportDotInt8 = false;
|
|
|
|
bool mSupportDotAccInt8 = false;
|
2019-04-17 10:49:11 +08:00
|
|
|
GpuType mGpuType;
|
|
|
|
std::string mDefaultBuildParams;
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
float mFlops = 4.0f;
|
2019-12-27 22:16:57 +08:00
|
|
|
bool mIsCreateError{false};
|
|
|
|
|
|
|
|
double mStartNanos;
|
|
|
|
double mStopNanos;
|
2020-06-23 17:50:24 +08:00
|
|
|
unsigned int mQueueCount = 0;
|
2019-12-27 22:16:57 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace MNN
|
|
|
|
#endif /* OpenCLRuntime_hpp */
|