2019-04-17 10:49:11 +08:00
|
|
|
//
|
|
|
|
// CPUBackend.cpp
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on 2018/07/06.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "backend/cpu/CPUBackend.hpp"
|
2019-08-22 20:13:46 +08:00
|
|
|
#include <cmath>
|
2019-04-17 10:49:11 +08:00
|
|
|
#include <mutex>
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "core/BufferAllocator.hpp"
|
|
|
|
#include "backend/cpu/CPUTensorConvert.hpp"
|
|
|
|
#include "backend/cpu/compute/CommonOptFunction.h"
|
|
|
|
#include "core/TensorUtils.hpp"
|
|
|
|
#include "backend/cpu/ThreadPool.hpp"
|
2020-11-05 16:41:56 +08:00
|
|
|
#include "shape/SizeComputer.hpp"
|
2020-07-04 01:21:30 +08:00
|
|
|
#include "compute/CommonOptFunction.h"
|
2019-04-17 10:49:11 +08:00
|
|
|
#ifdef _OPENMP
|
|
|
|
#include <omp.h>
|
|
|
|
#endif // _OPENMP
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "backend/cpu/CPURuntime.hpp"
|
2020-11-05 16:41:56 +08:00
|
|
|
#if defined(__aarch64__) && ENABLE_ARMV82
|
|
|
|
#include "backend/arm82/Arm82Backend.hpp"
|
|
|
|
#endif
|
2019-04-17 10:49:11 +08:00
|
|
|
#define MAX_THREAD_NUMBER 32
|
2020-11-05 16:41:56 +08:00
|
|
|
#define LARGE_MEMORY 1024 * 1024 * 100
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
//#define MNN_DUMP_MEMORY_USAGE
|
2019-08-22 20:13:46 +08:00
|
|
|
#define MNN_CPU_CHECK_NAN 1
|
2019-04-17 10:49:11 +08:00
|
|
|
namespace MNN {
|
2019-05-09 19:39:33 +08:00
|
|
|
void registerCPUOps();
|
2020-11-05 16:41:56 +08:00
|
|
|
#if defined(__aarch64__) && ENABLE_ARMV82
|
|
|
|
struct cpuinfo_arm_isa gCPUInfo;
|
2019-05-09 19:39:33 +08:00
|
|
|
#endif
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
CPURuntime::CPURuntime(const Backend::Info& info) {
|
2019-04-17 10:49:11 +08:00
|
|
|
mDynamicAllocator.reset(new BufferAllocator);
|
|
|
|
mStaticAllocator.reset(new BufferAllocator);
|
2020-11-05 16:41:56 +08:00
|
|
|
mThreadNumber = info.numThread;
|
|
|
|
mThreadNumber = std::max(1, mThreadNumber);
|
|
|
|
mThreadNumber = std::min(mThreadNumber, MAX_THREAD_NUMBER);
|
|
|
|
mPower = BackendConfig::Power_Normal;
|
|
|
|
mMemory = BackendConfig::Memory_Normal;
|
|
|
|
mPrecision = BackendConfig::Precision_Normal;
|
|
|
|
mFlags = 0;
|
|
|
|
mFlops = MNNGetCPUFlops(mThreadNumber);
|
|
|
|
#if defined(__aarch64__) && ENABLE_ARMV82
|
|
|
|
mIsSupportDot = gCPUInfo.dot;
|
|
|
|
mIsSupportFp16arith = gCPUInfo.fp16arith;
|
|
|
|
#endif
|
|
|
|
if (info.user != nullptr) {
|
|
|
|
mPrecision = info.user->precision;
|
|
|
|
mPower = info.user->power;
|
|
|
|
mMemory = info.user->memory;
|
|
|
|
mFlags = info.user->flags;
|
|
|
|
}
|
2019-07-11 13:56:52 +08:00
|
|
|
#ifdef _OPENMP
|
2020-11-05 16:41:56 +08:00
|
|
|
switch (mPower) {
|
2019-04-17 10:49:11 +08:00
|
|
|
case BackendConfig::Power_Low:
|
|
|
|
MNNSetCPUThreadsMode(MNN_CPU_MODE_LITTLE);
|
|
|
|
break;
|
|
|
|
case BackendConfig::Power_High:
|
|
|
|
MNNSetCPUThreadsMode(MNN_CPU_MODE_POWER_FRI);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2019-07-11 13:56:52 +08:00
|
|
|
#endif
|
|
|
|
#ifdef MNN_USE_THREAD_POOL
|
2019-07-19 17:09:09 +08:00
|
|
|
mThreadNumber = ThreadPool::init(mThreadNumber);
|
|
|
|
if (mThreadNumber > 1) {
|
|
|
|
mTaskIndex = ThreadPool::acquireWorkIndex();
|
|
|
|
} else {
|
|
|
|
mTaskIndex = -1;
|
|
|
|
}
|
|
|
|
if (mTaskIndex >= 0 && mPower == BackendConfig::Power_High) {
|
|
|
|
ThreadPool::active();
|
|
|
|
}
|
2019-07-11 13:56:52 +08:00
|
|
|
#endif
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
CPURuntime:: ~ CPURuntime() {
|
2019-07-19 17:09:09 +08:00
|
|
|
#ifdef MNN_USE_THREAD_POOL
|
|
|
|
if (mTaskIndex >= 0 && mPower == BackendConfig::Power_High) {
|
|
|
|
ThreadPool::deactive();
|
|
|
|
}
|
|
|
|
ThreadPool::releaseWorkIndex(mTaskIndex);
|
|
|
|
#endif
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
float CPURuntime::onGetMemoryInMB() {
|
|
|
|
auto dynamicMemoryInMB = mDynamicAllocator->totalSize() / 1024.0f / 1024.0f;
|
|
|
|
auto staticMemoryInMB = mStaticAllocator->totalSize() / 1024.0f / 1024.0f;
|
|
|
|
return dynamicMemoryInMB + staticMemoryInMB;
|
|
|
|
}
|
|
|
|
Backend* CPURuntime::onCreate() const{
|
|
|
|
#if defined(__aarch64__) && ENABLE_ARMV82
|
|
|
|
if (mIsSupportFp16arith && mPrecision == BackendConfig::Precision_Low) {
|
|
|
|
return new Arm82Backend(this);
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
#endif
|
2020-11-05 16:41:56 +08:00
|
|
|
return new CPUBackend(this);
|
|
|
|
}
|
|
|
|
void CPURuntime::onGabageCollect(int level) {
|
|
|
|
mStaticAllocator->release(false);
|
|
|
|
if (level > 50) {
|
|
|
|
mDynamicAllocator->release(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::map<OpType, CPUBackend::Creator*>* CPUBackend::gCreator = nullptr;
|
|
|
|
|
|
|
|
void CPUBackend::initCreatorMap() {
|
|
|
|
gCreator = new std::map<OpType, CPUBackend::Creator*>;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::map<OpType, CPUBackend::Creator*>* CPUBackend::getCreatorMap() {
|
|
|
|
return gCreator;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CPUBackend::addCreator(OpType t, Creator* c) {
|
|
|
|
auto map = getCreatorMap();
|
|
|
|
if (map->find(t) != map->end()) {
|
|
|
|
MNN_PRINT("Error: %d type has be added\n", t);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
map->insert(std::make_pair(t, c));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
CPUBackend::CPUBackend(const CPURuntime* runtime, MNNForwardType type) : Backend(type) {
|
|
|
|
mRuntime = runtime;
|
|
|
|
mCheckNAN = runtime->mFlags == MNN_CPU_CHECK_NAN;
|
|
|
|
mDynamicAllocator = runtime->mDynamicAllocator;
|
|
|
|
mStaticAllocator = runtime->mStaticAllocator;
|
|
|
|
}
|
|
|
|
bool CPUBackend::supportDot() const {
|
|
|
|
return mRuntime->mIsSupportDot;
|
|
|
|
}
|
|
|
|
|
|
|
|
CPUBackend::~CPUBackend() {
|
|
|
|
for (auto p : mDynamic) {
|
|
|
|
mDynamicAllocator->free(p);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CPUBackend::onExecuteBegin() const {
|
2019-07-19 17:09:09 +08:00
|
|
|
#ifdef MNN_USE_THREAD_POOL
|
2020-11-05 16:41:56 +08:00
|
|
|
if (mRuntime->mTaskIndex >= 0 && mRuntime->mPower != BackendConfig::Power_High) {
|
2019-07-19 17:09:09 +08:00
|
|
|
ThreadPool::active();
|
|
|
|
}
|
|
|
|
#else
|
2019-04-17 10:49:11 +08:00
|
|
|
#ifdef _OPENMP
|
|
|
|
omp_set_dynamic(0);
|
2020-11-05 16:41:56 +08:00
|
|
|
omp_set_num_threads(threadNumber());
|
2019-04-17 10:49:11 +08:00
|
|
|
#endif
|
2019-07-11 13:56:52 +08:00
|
|
|
#endif
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
2019-07-19 17:09:09 +08:00
|
|
|
void CPUBackend::onExecuteEnd() const {
|
|
|
|
#ifdef MNN_USE_THREAD_POOL
|
2020-11-05 16:41:56 +08:00
|
|
|
if (mRuntime->mTaskIndex >= 0 && mRuntime->mPower != BackendConfig::Power_High) {
|
2019-07-19 17:09:09 +08:00
|
|
|
ThreadPool::deactive();
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
bool CPUBackend::allocBuffer(int size, halide_buffer_t& buffer, StorageType storageType) {
|
2019-04-17 10:49:11 +08:00
|
|
|
// MNN_PRINT("Acquire size = %d\n", size);
|
|
|
|
if (size <= 0) {
|
|
|
|
MNN_ASSERT(false);
|
|
|
|
return false;
|
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
if (size > LARGE_MEMORY) {
|
|
|
|
MNN_PRINT("Size larger the 100 M :%d\n", size);
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
switch (storageType) {
|
|
|
|
case STATIC: {
|
2020-11-05 16:41:56 +08:00
|
|
|
#ifdef MNN_DUMP_MEMORY_USAGE
|
|
|
|
buffer.host = (uint8_t*)malloc(size);
|
|
|
|
#else
|
|
|
|
buffer.host = (uint8_t*)(mStaticAllocator->alloc(size, false));
|
|
|
|
#endif
|
2019-04-17 10:49:11 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case DYNAMIC: {
|
|
|
|
buffer.host = (uint8_t*)(mDynamicAllocator->alloc(size, false));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case DYNAMIC_SEPERATE: {
|
|
|
|
buffer.host = (uint8_t*)(mDynamicAllocator->alloc(size, true));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
2020-11-05 16:41:56 +08:00
|
|
|
MNN_ASSERT(false);
|
2019-04-17 10:49:11 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (nullptr == buffer.host) {
|
|
|
|
MNN_ERROR("Alloc buffer error for cpu backend\n");
|
|
|
|
return false;
|
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
if (STATIC == storageType) {
|
|
|
|
// Do nothing
|
|
|
|
} else {
|
|
|
|
mDynamic.insert(buffer.host);
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
if (buffer.type.code == halide_type_handle) {
|
|
|
|
::memset(buffer.host, 0, size);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
bool CPUBackend::onAcquireBuffer(const MNN::Tensor* nativeTensorConst, StorageType storageType) {
|
|
|
|
if (nativeTensorConst == nullptr) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
//FUNC_PRINT_ALL(nativeTensorConst, p);
|
|
|
|
auto nativeTensor = (Tensor*)nativeTensorConst;
|
|
|
|
auto& buffer = nativeTensor->buffer();
|
|
|
|
|
|
|
|
auto size = nativeTensor->size();
|
|
|
|
return allocBuffer(size, buffer, storageType);
|
|
|
|
}
|
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
bool CPUBackend::onReleaseBuffer(const MNN::Tensor* nativeTensor, StorageType storageType) {
|
2019-12-27 22:16:57 +08:00
|
|
|
if (nativeTensor == nullptr) {
|
|
|
|
return false;
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
if (nullptr == nativeTensor->buffer().host) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (STATIC == storageType) {
|
2020-11-05 16:41:56 +08:00
|
|
|
#ifdef MNN_DUMP_MEMORY_USAGE
|
|
|
|
free(nativeTensor->buffer().host);
|
|
|
|
#else
|
2020-01-15 13:33:47 +08:00
|
|
|
mStaticAllocator->free(nativeTensor->buffer().host);
|
2020-11-05 16:41:56 +08:00
|
|
|
#endif
|
2019-04-17 10:49:11 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (DYNAMIC_SEPERATE == storageType) {
|
|
|
|
return true;
|
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
mDynamic.erase(nativeTensor->buffer().host);
|
2019-04-17 10:49:11 +08:00
|
|
|
mDynamicAllocator->free(nativeTensor->buffer().host);
|
|
|
|
return true;
|
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
std::pair<float, bool> CPUBackend::onMeasure(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
|
|
|
const MNN::Op* op) {
|
|
|
|
auto map = getCreatorMap();
|
|
|
|
auto iter = map->find(op->type());
|
|
|
|
if (iter == map->end()) {
|
2020-03-22 20:16:29 +08:00
|
|
|
MNN_PRINT("Don't support type %s, %s\n", MNN::EnumNameOpType(op->type()), op->name()->c_str());
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
return std::make_pair(0.0f, false);
|
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
#ifndef MNN_BUILD_MINI
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
auto computeFlops = SizeComputer::computeFlops(op, inputs, outputs);
|
2020-11-05 16:41:56 +08:00
|
|
|
return std::make_pair(computeFlops / mRuntime->mFlops * 1000.0f, true);
|
|
|
|
#else
|
|
|
|
return std::make_pair(0.0f, false);
|
|
|
|
#endif
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
/// get execution
|
|
|
|
Execution* CPUBackend::onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
|
|
|
const MNN::Op* op) {
|
2020-11-05 16:41:56 +08:00
|
|
|
/**
|
|
|
|
BatchNorm it will be converted to scale
|
|
|
|
for model convert, don't print error log
|
|
|
|
*/
|
|
|
|
if (op->type() == OpType_BatchNorm) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
auto map = getCreatorMap();
|
|
|
|
auto iter = map->find(op->type());
|
|
|
|
if (iter == map->end()) {
|
2020-03-22 20:16:29 +08:00
|
|
|
MNN_PRINT("Don't support type [%s], %s\n", MNN::EnumNameOpType(op->type()), op->name()->c_str());
|
2019-04-17 10:49:11 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
auto exe = iter->second->onCreate(inputs, outputs, op, this);
|
|
|
|
if (nullptr == exe) {
|
2020-03-22 20:16:29 +08:00
|
|
|
MNN_PRINT("The Creator Don't support type [%s], %s\n", MNN::EnumNameOpType(op->type()), op->name()->c_str());
|
2019-04-17 10:49:11 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
2019-08-22 20:13:46 +08:00
|
|
|
if (mCheckNAN) {
|
|
|
|
class CheckNANExecution : public Execution {
|
|
|
|
public:
|
|
|
|
CheckNANExecution(Execution* exe) : Execution(exe->backend()) {
|
|
|
|
mExecution.reset(exe);
|
|
|
|
mValid = exe->valid();
|
|
|
|
}
|
|
|
|
virtual ~CheckNANExecution() {
|
|
|
|
// Do nothing
|
|
|
|
}
|
|
|
|
virtual ErrorCode onResize(const std::vector<Tensor*>& inputs,
|
|
|
|
const std::vector<Tensor*>& outputs) override {
|
|
|
|
return mExecution->onResize(inputs, outputs);
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual ErrorCode onExecute(const std::vector<Tensor*>& inputs,
|
|
|
|
const std::vector<Tensor*>& outputs) override {
|
|
|
|
for (auto tensor : inputs) {
|
|
|
|
if (halide_type_float != tensor->getType().code) {
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
auto size = tensor->elementSize();
|
|
|
|
auto ptr = tensor->host<float>();
|
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
auto value = ptr[i];
|
|
|
|
if (std::isnan(value) || std::isinf(value)) {
|
|
|
|
return INVALID_VALUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
auto code = mExecution->onExecute(inputs, outputs);
|
|
|
|
if (NO_ERROR != code) {
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
for (auto tensor : outputs) {
|
|
|
|
if (halide_type_float != tensor->getType().code) {
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
auto size = tensor->elementSize();
|
|
|
|
auto ptr = tensor->host<float>();
|
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
auto value = ptr[i];
|
|
|
|
if (std::isnan(value) || std::isinf(value)) {
|
|
|
|
return INVALID_VALUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::unique_ptr<Execution> mExecution;
|
|
|
|
};
|
|
|
|
return new CheckNANExecution(exe);
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
return exe;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CPUBackend::onClearBuffer() {
|
2020-11-05 16:41:56 +08:00
|
|
|
for (auto p : mDynamic) {
|
|
|
|
mDynamicAllocator->free(p);
|
|
|
|
}
|
|
|
|
mDynamic.clear();
|
2019-04-17 10:49:11 +08:00
|
|
|
return true;
|
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
|
2020-02-26 09:57:17 +08:00
|
|
|
std::pair<int, int> CPUBackend::multiThreadDivide(int size) const {
|
2020-11-05 16:41:56 +08:00
|
|
|
int sizeDivide = size / threadNumber();
|
2020-02-26 09:57:17 +08:00
|
|
|
sizeDivide = UP_DIV(sizeDivide, 4) * 4;
|
|
|
|
int scheduleNumber = 1;
|
|
|
|
if (sizeDivide > 0) {
|
|
|
|
scheduleNumber = UP_DIV(size, sizeDivide);
|
|
|
|
}
|
|
|
|
return std::make_pair(sizeDivide, scheduleNumber);
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
void CPUBackend::onCopyBuffer(const Tensor* srcTensor, const Tensor* dstTensor) const {
|
|
|
|
auto& srcBuffer = srcTensor->buffer();
|
|
|
|
auto& dstBuffer = dstTensor->buffer();
|
|
|
|
|
|
|
|
MNN_ASSERT(srcBuffer.dimensions == dstBuffer.dimensions);
|
|
|
|
MNN_ASSERT(srcBuffer.type == dstBuffer.type);
|
|
|
|
if (srcTensor->getDimensionType() == dstTensor->getDimensionType()) {
|
|
|
|
for (int i = 0; i < srcBuffer.dimensions; ++i) {
|
|
|
|
MNN_ASSERT(srcBuffer.dim[i].extent <= dstBuffer.dim[i].extent);
|
|
|
|
}
|
|
|
|
}
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
if (nullptr == srcBuffer.host || nullptr == dstBuffer.host) {
|
|
|
|
return;
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2019-12-27 22:16:57 +08:00
|
|
|
auto code = CPUTensorConverter::convert(srcTensor, dstTensor);
|
|
|
|
if (NO_ERROR != code) {
|
|
|
|
MNN_ERROR("Error in CPUBackend::onCopyBuffer\n");
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
class CPURuntimeCreator : public RuntimeCreator {
|
|
|
|
public:
|
|
|
|
virtual Runtime* onCreate(const Backend::Info& info) const override {
|
|
|
|
return new CPURuntime(info);
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
|
|
|
|
void registerCPURuntimeCreator() {
|
|
|
|
CPUBackend::initCreatorMap();
|
|
|
|
registerCPUOps();
|
|
|
|
MNNFunctionInit();
|
|
|
|
#if defined(__aarch64__) && ENABLE_ARMV82
|
|
|
|
cpuinfo_arm_init(&gCPUInfo);
|
|
|
|
#endif
|
|
|
|
MNNInsertExtraRuntimeCreator(MNN_FORWARD_CPU, new CPURuntimeCreator);
|
2019-05-08 15:44:57 +08:00
|
|
|
};
|
2019-04-17 10:49:11 +08:00
|
|
|
} // namespace MNN
|