2019-12-27 22:16:57 +08:00
|
|
|
//
|
|
|
|
// Executor.cpp
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on 2019/07/26.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
|
|
|
#include <MNN/expr/Executor.hpp>
|
|
|
|
#include "core/Session.hpp"
|
|
|
|
#include "core/TensorUtils.hpp"
|
|
|
|
#include "Utils.hpp"
|
|
|
|
#include "core/Backend.hpp"
|
|
|
|
#include <MNN/Tensor.hpp>
|
2020-01-15 13:33:47 +08:00
|
|
|
#include "core/TensorUtils.hpp"
|
|
|
|
#include <MNN/AutoTime.hpp>
|
2019-12-27 22:16:57 +08:00
|
|
|
namespace MNN {
|
|
|
|
namespace Express {
|
2020-01-15 13:33:47 +08:00
|
|
|
class Executor::Profiler {
|
|
|
|
public:
|
|
|
|
void reset();
|
|
|
|
void dump() const;
|
|
|
|
void add(int opType, float timeInMs);
|
|
|
|
private:
|
|
|
|
std::map<int, float> mTimes;
|
|
|
|
};
|
|
|
|
void Executor::Profiler::reset() {
|
|
|
|
mTimes.clear();
|
|
|
|
}
|
|
|
|
void Executor::Profiler::dump() const {
|
|
|
|
for (auto iter : mTimes) {
|
|
|
|
MNN_PRINT("%s: %f ms\n", EnumNameOpType((OpType)iter.first), iter.second);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
void Executor::Profiler::add(int opType, float timeInMs) {
|
|
|
|
auto iter = mTimes.find(opType);
|
|
|
|
if (iter == mTimes.end()) {
|
|
|
|
mTimes[opType] = timeInMs;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
iter->second += timeInMs;
|
|
|
|
}
|
|
|
|
|
2019-12-27 22:16:57 +08:00
|
|
|
void Executor::setGlobalExecutorConfig(MNNForwardType type, const BackendConfig& config, int numberThread) {
|
|
|
|
std::lock_guard<std::mutex> _l(mMutex);
|
|
|
|
auto creator = MNNGetExtraBackendCreator(type);
|
|
|
|
if (nullptr == creator) {
|
|
|
|
MNN_ERROR("Error to find creator of %d\n", type);
|
|
|
|
return;
|
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
_resetCache();
|
2019-12-27 22:16:57 +08:00
|
|
|
Backend::Info info;
|
|
|
|
info.type = type;
|
|
|
|
info.numThread = numberThread;
|
|
|
|
std::shared_ptr<Backend> bn(creator->onCreate(info));
|
|
|
|
mBackend = bn;
|
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
void Executor::_resetCache() {
|
|
|
|
}
|
|
|
|
|
2019-12-27 22:16:57 +08:00
|
|
|
void Executor::gc(GCFlag flag) {
|
|
|
|
std::lock_guard<std::mutex> _l(mMutex);
|
2020-01-15 13:33:47 +08:00
|
|
|
_resetCache();
|
|
|
|
if (FULL == flag) {
|
|
|
|
mBackend->onClearBuffer();
|
|
|
|
mBackupBackend->onClearBuffer();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Executor::Executor(std::shared_ptr<Backend> backend) {
|
|
|
|
mBackend = backend;
|
|
|
|
if (mBackend->type() == MNN_FORWARD_CPU) {
|
|
|
|
mBackupBackend = mBackend;
|
|
|
|
} else {
|
|
|
|
Backend::Info info;
|
|
|
|
info.type = MNN_FORWARD_CPU;
|
|
|
|
info.numThread = 1;
|
|
|
|
auto creator = MNNGetExtraBackendCreator(MNN_FORWARD_CPU);
|
|
|
|
mBackupBackend.reset(creator->onCreate(info));
|
|
|
|
}
|
|
|
|
_resetCache();
|
|
|
|
#ifdef MNN_EXPR_ENABLE_PROFILER
|
|
|
|
mProfiler.reset(new Profiler);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
Executor::~Executor(){
|
|
|
|
mBackend = nullptr;
|
|
|
|
mBackupBackend = nullptr;
|
|
|
|
}
|
|
|
|
void Executor::_addToCache(const std::vector<std::shared_ptr<ComputeCache>>& caches) {
|
|
|
|
//FUNC_PRINT(mCaches.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
Executor::Requirement Executor::getRequirement(Expr* expr) const {
|
|
|
|
Executor::Requirement req;
|
|
|
|
auto op = expr->get();
|
|
|
|
auto inputSize = expr->inputs().size();
|
|
|
|
req.contentNeedContent.resize(inputSize);
|
|
|
|
req.shapeNeedContent.resize(inputSize);
|
|
|
|
req.supportError.resize(inputSize);
|
|
|
|
if (op->type() == OpType_Extra) {
|
|
|
|
for (int i = 0; i < inputSize; ++i) {
|
|
|
|
req.contentNeedContent[i] = true;
|
|
|
|
req.shapeNeedContent[i] = false;
|
|
|
|
req.supportError[i] = false;
|
|
|
|
}
|
|
|
|
return req;
|
|
|
|
}
|
|
|
|
for (int i = 0; i < inputSize; ++i) {
|
|
|
|
req.contentNeedContent[i] = SizeComputer::opNeedContent(op->type(), i);
|
|
|
|
req.shapeNeedContent[i] = false;
|
|
|
|
if (op->type() != OpType_Concat) {
|
|
|
|
req.supportError[i] = false;
|
|
|
|
} else {
|
|
|
|
req.supportError[i] = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
auto needIndexId = SizeComputer::needInputContent(op);
|
|
|
|
for (auto index : needIndexId) {
|
|
|
|
if (index < req.shapeNeedContent.size()) {
|
|
|
|
req.shapeNeedContent[index] = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return req;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::shared_ptr<Executor> Executor::getGlobalExecutor() {
|
|
|
|
static std::once_flag of;
|
|
|
|
static std::shared_ptr<Executor> gExecutor;
|
|
|
|
std::call_once(of, [&]() {
|
|
|
|
auto creator = MNNGetExtraBackendCreator(MNN_FORWARD_CPU);
|
|
|
|
SizeComputerSuite::init();
|
|
|
|
Backend::Info info;
|
|
|
|
info.type = MNN_FORWARD_CPU;
|
|
|
|
info.numThread = 1;
|
|
|
|
std::shared_ptr<Backend> bn(creator->onCreate(info));
|
|
|
|
gExecutor.reset(new Executor(bn));
|
|
|
|
});
|
|
|
|
return gExecutor;
|
|
|
|
}
|
|
|
|
|
2020-01-15 13:33:47 +08:00
|
|
|
ErrorCode Executor::computeInfo(Expr* expr) {
|
|
|
|
MNN_ASSERT(nullptr != expr);
|
|
|
|
MNN_ASSERT(nullptr != expr->get());
|
|
|
|
if (expr->get()->type() == OpType_Extra) {
|
|
|
|
return NOT_SUPPORT;
|
|
|
|
}
|
|
|
|
std::lock_guard<std::mutex> _l(mMutex);
|
|
|
|
mInputs.resize(expr->inputs().size());
|
|
|
|
mOutputs.resize(expr->outputSize());
|
|
|
|
if (mStack.size() < mInputs.size() + mOutputs.size()) {
|
|
|
|
int origin = (int)mStack.size();
|
|
|
|
int destSize = (int)(mInputs.size() + mOutputs.size());
|
|
|
|
for (int i=origin; i<destSize; ++i) {
|
|
|
|
mStack.emplace_back(std::shared_ptr<Tensor>(new Tensor));
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
}
|
|
|
|
for (int i=0; i<mInputs.size(); ++i) {
|
|
|
|
mInputs[i] = mStack[i].get();
|
|
|
|
}
|
|
|
|
for (int i=0; i<mOutputs.size(); ++i) {
|
|
|
|
mOutputs[i] = mStack[i+(int)mInputs.size()].get();
|
|
|
|
}
|
|
|
|
auto op = expr->get();
|
|
|
|
for (int i = 0; i < expr->inputs().size(); ++i) {
|
|
|
|
auto inputExpr = expr->inputs()[i]->expr();
|
|
|
|
Utils::copyInfoToTensor(mInputs[i], inputExpr.first->outputInfo(inputExpr.second));
|
|
|
|
}
|
|
|
|
bool res = SizeComputer::computeOutputSize(op, mInputs, mOutputs);
|
|
|
|
if (!res) {
|
|
|
|
// Compute Error
|
|
|
|
#ifdef MNN_EXPRESS_ERROR_REPORT
|
|
|
|
FUNC_PRINT(op->type());
|
|
|
|
#endif
|
|
|
|
return COMPUTE_SIZE_ERROR;
|
|
|
|
}
|
|
|
|
for (int i = 0; i < mOutputs.size(); ++i) {
|
|
|
|
auto tensor = mOutputs[i];
|
|
|
|
for (int j = 0; j < tensor->dimensions(); ++j) {
|
|
|
|
if (tensor->length(j) <= 0) {
|
|
|
|
#ifdef MNN_EXPRESS_ERROR_REPORT
|
|
|
|
if (nullptr != op->name()) {
|
|
|
|
auto name = op->name()->str();
|
|
|
|
MNN_ERROR("Error to compute shape for %s\n", op->name()->c_str());
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
#endif
|
|
|
|
return COMPUTE_SIZE_ERROR;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
auto shape = expr->outputInfo(i);
|
|
|
|
Utils::copyTensorToInfo(shape, tensor);
|
|
|
|
}
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
Executor::ComputeCache::~ComputeCache() {
|
|
|
|
mUnits.clear();
|
|
|
|
for (auto t : mTensors) {
|
|
|
|
t.reset();
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void Executor::ComputeCache::setShapeDirty() {
|
|
|
|
mShapeDirty = true;
|
|
|
|
for (auto iter : mLinks) {
|
|
|
|
auto cache = iter.lock();
|
|
|
|
if (nullptr != cache && false == cache->mShapeDirty) {
|
|
|
|
cache->setShapeDirty();
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
void Executor::ComputeCache::setContentDirty() {
|
|
|
|
mContentDirty = true;
|
|
|
|
for (auto iter : mLinks) {
|
|
|
|
auto cache = iter.lock();
|
|
|
|
if (nullptr != cache && false == cache->mContentDirty) {
|
|
|
|
cache->setContentDirty();
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Executor::ComputeCache::TensorContent::reset() {
|
|
|
|
auto des = TensorUtils::getDescribe(tensor.get());
|
|
|
|
des->useCount = refCount;
|
|
|
|
if (nullptr != des->backend) {
|
|
|
|
des->backend->onReleaseBuffer(tensor.get(), Backend::DYNAMIC);
|
|
|
|
des->backend = nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
void Executor::ComputeCache::addLink(std::shared_ptr<ComputeCache> cache) {
|
|
|
|
for (int i=0; i<mLinks.size(); ++i) {
|
|
|
|
auto ptr = mLinks[i].lock().get();
|
|
|
|
if (ptr == cache.get()) {
|
|
|
|
return;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
if (ptr == nullptr) {
|
|
|
|
mLinks[i] = std::weak_ptr<ComputeCache>(cache);
|
|
|
|
return;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
mLinks.emplace_back(std::weak_ptr<ComputeCache>(cache));
|
|
|
|
}
|
|
|
|
Tensor* Executor::ComputeCache::output(EXPRP outputExpr, int index, bool host) const {
|
|
|
|
auto iter = mOutputTensors.find(outputExpr.get());
|
|
|
|
if (iter == mOutputTensors.end()) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
MNN_ASSERT(index >= 0 && index < iter->second.size());
|
|
|
|
if (host) {
|
|
|
|
return iter->second[index].first;
|
|
|
|
}
|
|
|
|
return iter->second[index].second;
|
|
|
|
}
|
|
|
|
void Executor::ComputeCache::dup(EXPRP src, EXPRP dst) {
|
|
|
|
if (mOutputTensors.find(src.get()) == mOutputTensors.end()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
mOutputTensors[dst.get()] = mOutputTensors[src.get()];
|
|
|
|
}
|
|
|
|
void Executor::ComputeCache::recycle(Expr* expr) {
|
|
|
|
mOutputTensors.erase(expr);
|
|
|
|
if (mOutputTensors.empty()) {
|
|
|
|
mUnits.clear();
|
|
|
|
for (auto& t : mTensors) {
|
|
|
|
t.reset();
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
mTensors.clear();
|
|
|
|
mInputs.clear();
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-15 13:33:47 +08:00
|
|
|
|
|
|
|
ErrorCode Executor::ComputeCache::compute() {
|
|
|
|
if (mShapeDirty) {
|
|
|
|
auto code = resize();
|
|
|
|
if (NO_ERROR != code) {
|
|
|
|
return code;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
}
|
|
|
|
if (!mContentDirty) {
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
for (auto c : mInputs) {
|
|
|
|
auto code = c->compute();
|
|
|
|
if (NO_ERROR != code) {
|
|
|
|
return code;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
mBackend->onExecuteBegin();
|
|
|
|
for (int i=0; i<mUnits.size(); ++i) {
|
|
|
|
auto& iter = mUnits[i];
|
|
|
|
if (nullptr == iter.exe) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
//FUNC_PRINT_ALL(EnumNameOpType(iter.origin->get()->type()), s);
|
|
|
|
#ifdef MNN_EXPR_ENABLE_PROFILER
|
|
|
|
Timer autoTime;
|
|
|
|
#endif
|
|
|
|
auto code = iter.exe->onExecute(iter.inputs, iter.outputs);
|
|
|
|
#ifdef MNN_EXPR_ENABLE_PROFILER
|
|
|
|
float costTime = (float)autoTime.durationInUs() / (float)1000;
|
|
|
|
Executor::getGlobalExecutor()->addOpCostTime((int)mUnits[i].origin->get()->type(), costTime);
|
|
|
|
#endif
|
|
|
|
if (NO_ERROR != code) {
|
|
|
|
mBackend->onExecuteEnd();
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mBackend->onExecuteEnd();
|
|
|
|
for (auto& iter : mOutputTensors) {
|
|
|
|
for (auto& output : iter.second) {
|
|
|
|
TensorUtils::getDescribe(output.second)->useCount = 0;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
}
|
|
|
|
for (auto& iter : mOutputTensors) {
|
|
|
|
for (auto& output : iter.second) {
|
|
|
|
if (TensorUtils::getDescribe(output.second)->useCount > 0) {
|
|
|
|
continue;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
if (mUnits.empty()) {
|
|
|
|
output.second->copyFromHostTensor(output.first);
|
|
|
|
} else {
|
|
|
|
output.second->copyToHostTensor(output.first);
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
TensorUtils::getDescribe(output.second)->useCount = 1;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
}
|
|
|
|
mContentDirty = false;
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ErrorCode Executor::ComputeCache::resize() {
|
|
|
|
if (!mShapeDirty) {
|
2019-12-27 22:16:57 +08:00
|
|
|
return NO_ERROR;
|
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
for (auto c : mInputs) {
|
|
|
|
auto code = c->resize();
|
|
|
|
if (NO_ERROR != code) {
|
|
|
|
return code;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
for (auto& t : mTensors) {
|
|
|
|
t.reset();
|
|
|
|
}
|
|
|
|
if (mUnits.empty()) {
|
|
|
|
// Single Tensor
|
|
|
|
auto iter = mOutputTensors.begin();
|
|
|
|
auto expr = iter->first;
|
|
|
|
Utils::copyInfoToTensor(iter->second[0].first, expr->outputInfo(0));
|
|
|
|
iter->second[0].first->buffer().device = 0;
|
|
|
|
}
|
|
|
|
for (auto& iter : mUnits) {
|
|
|
|
if ((iter.origin->infoDirty()) || (!iter.origin->valid())) {
|
|
|
|
for (int i=0; i<iter.outputs.size(); ++i) {
|
|
|
|
iter.outputs[i]->buffer().dimensions = 0;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
continue;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
for (int i=0; i<iter.outputs.size(); ++i) {
|
|
|
|
Utils::copyInfoToTensor(iter.outputs[i], iter.origin->outputInfo(i));
|
|
|
|
auto res = mBackend->onAcquireBuffer(iter.outputs[i], Backend::DYNAMIC);
|
|
|
|
TensorUtils::getDescribe(iter.outputs[i])->backend = mBackend.get();
|
|
|
|
if (!res) {
|
|
|
|
return OUT_OF_MEMORY;
|
|
|
|
}
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
if (nullptr == iter.exe) {
|
|
|
|
#ifdef MNN_EXPR_ENABLE_PROFILER
|
|
|
|
Timer autoTime;
|
|
|
|
#endif
|
|
|
|
iter.exe.reset(mBackend->onCreate(iter.inputs, iter.outputs, iter.origin->get()));
|
|
|
|
#ifdef MNN_EXPR_ENABLE_PROFILER
|
|
|
|
float costTime = (float)autoTime.durationInUs() / (float)1000;
|
|
|
|
Executor::getGlobalExecutor()->addOpCostTime((int)iter.origin->get()->type(), costTime);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
if (nullptr == iter.exe) {
|
|
|
|
return NOT_SUPPORT;
|
|
|
|
}
|
|
|
|
#ifdef MNN_EXPR_ENABLE_PROFILER
|
|
|
|
Timer autoTime;
|
|
|
|
#endif
|
|
|
|
auto code= iter.exe->onResize(iter.inputs, iter.outputs);
|
|
|
|
#ifdef MNN_EXPR_ENABLE_PROFILER
|
|
|
|
float costTime = (float)autoTime.durationInUs() / (float)1000;
|
|
|
|
Executor::getGlobalExecutor()->addOpCostTime((int)iter.origin->get()->type(), costTime);
|
|
|
|
#endif
|
2019-12-27 22:16:57 +08:00
|
|
|
if (NO_ERROR != code) {
|
|
|
|
return code;
|
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
auto& req = iter.origin->inside()->mReq.contentNeedContent;
|
|
|
|
for (int i=0; i<iter.inputs.size(); ++i) {
|
|
|
|
if (iter.inputFromCache[i]) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!req[i]) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
auto des = TensorUtils::getDescribe(iter.inputs[i]);
|
|
|
|
des->useCount--;
|
|
|
|
if (des->useCount <= 0 && des->backend != nullptr) {
|
|
|
|
des->backend->onReleaseBuffer(iter.inputs[i], Backend::DYNAMIC);
|
|
|
|
des->backend = nullptr;
|
|
|
|
}
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
for (auto& iter : mOutputTensors) {
|
|
|
|
auto expr = iter.first;
|
|
|
|
for (int i=0; i<iter.second.size(); ++i) {
|
|
|
|
if (mUnits.empty()) {
|
|
|
|
// For Single Tensor, Host -> Device
|
|
|
|
if (iter.second[i].first != iter.second[i].second) {
|
|
|
|
TensorUtils::copyShape(iter.second[i].first, iter.second[i].second, true);
|
|
|
|
iter.second[i].second->buffer().host = nullptr;
|
|
|
|
auto res = mBackend->onAcquireBuffer(iter.second[i].second, Backend::DYNAMIC);
|
|
|
|
if (!res) {
|
|
|
|
return OUT_OF_MEMORY;
|
|
|
|
}
|
|
|
|
TensorUtils::getDescribe(iter.second[i].second)->backend = mBackend.get();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// For Other Cache, Device -> Host
|
|
|
|
if (iter.second[i].first != iter.second[i].second) {
|
|
|
|
TensorUtils::copyShape(iter.second[i].second, iter.second[i].first, true);
|
|
|
|
iter.second[i].first->buffer().device = 0;
|
|
|
|
auto res = mBackupBackend->onAcquireBuffer(iter.second[i].first, Backend::DYNAMIC);
|
|
|
|
if (!res) {
|
|
|
|
return OUT_OF_MEMORY;
|
|
|
|
}
|
|
|
|
TensorUtils::getDescribe(iter.second[i].first)->backend = mBackupBackend.get();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
expr->outputInfo(i)->ptr = iter.second[i].first->host<void>();
|
|
|
|
}
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
mShapeDirty = false;
|
|
|
|
mContentDirty = true;
|
|
|
|
return NO_ERROR;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
|
|
|
|
2020-01-15 13:33:47 +08:00
|
|
|
static void _collectExecuteUnit(std::vector<Executor::ComputeCache::Unit>& dest, EXPRP expr, std::map<EXPRP, Executor::ComputeCache::Unit>& units) {
|
|
|
|
auto& inputs = expr->inputs();
|
|
|
|
auto& req = expr->inside()->mReq.contentNeedContent;
|
|
|
|
MNN_ASSERT(inputs.size() == req.size());
|
|
|
|
|
|
|
|
for (int i=0; i<inputs.size(); ++i) {
|
|
|
|
if (!req[i]) {
|
|
|
|
continue;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
auto inputExpr = inputs[i]->expr();
|
|
|
|
if (units.find(inputExpr.first) == units.end()) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
auto inputCache = inputExpr.first->inside()->mCache;
|
|
|
|
if (nullptr != inputCache) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
_collectExecuteUnit(dest, inputExpr.first, units);
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
auto iter = units.find(expr);
|
|
|
|
if (iter == units.end()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
dest.emplace_back(std::move(iter->second));
|
|
|
|
units.erase(iter);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Executor::ComputeCache::create(const std::vector<EXPRP>& outputs, std::map<EXPRP, ComputeCache::Unit>& units, std::set<std::shared_ptr<Executor::ComputeCache>>&& inputCaches, std::vector<ComputeCache::TensorContent>&& tensors, std::shared_ptr<Backend> bn, std::shared_ptr<Backend> backup) {
|
|
|
|
std::vector<EXPRP> packed;
|
|
|
|
for (auto expr : outputs) {
|
|
|
|
// Make Cache For Single Tensor
|
|
|
|
auto cache = expr->inside()->mCache;
|
|
|
|
if (nullptr != cache) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (nullptr != expr->get()) {
|
|
|
|
packed.emplace_back(expr);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
cache.reset(new ComputeCache);
|
|
|
|
cache->mBackend = bn;
|
|
|
|
cache->mTensors.resize(1);
|
|
|
|
cache->mTensors[0].tensor.reset(new Tensor);
|
|
|
|
Utils::copyInfoToTensor(cache->mTensors[0].tensor.get(), expr->outputInfo(0));
|
|
|
|
expr->inside()->mCache = cache;
|
|
|
|
if (bn->type() != MNN_FORWARD_CPU) {
|
|
|
|
cache->mTensors.resize(2);
|
|
|
|
cache->mTensors[1].tensor.reset(new Tensor);
|
|
|
|
Utils::copyInfoToTensor(cache->mTensors[1].tensor.get(), expr->outputInfo(0));
|
|
|
|
cache->mTensors[1].tensor->buffer().host = nullptr;
|
|
|
|
cache->mOutputTensors[expr.get()] = {std::make_pair(cache->mTensors[0].tensor.get(), cache->mTensors[1].tensor.get())};
|
2019-12-27 22:16:57 +08:00
|
|
|
} else {
|
2020-01-15 13:33:47 +08:00
|
|
|
cache->mOutputTensors[expr.get()] = {std::make_pair(cache->mTensors[0].tensor.get(), cache->mTensors[0].tensor.get())};
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
cache->mBackupBackend = backup;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
if (packed.empty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
std::shared_ptr<ComputeCache> packedCache(new ComputeCache);
|
|
|
|
packedCache->mBackend = bn;
|
|
|
|
packedCache->mInputs = std::move(inputCaches);
|
|
|
|
for (auto input : packedCache->mInputs) {
|
|
|
|
input->addLink(packedCache);
|
|
|
|
}
|
|
|
|
for (auto expr : packed) {
|
|
|
|
MNN_ASSERT(units.find(expr) != units.end());
|
|
|
|
auto& originOutputs = units[expr].outputs;
|
|
|
|
std::vector<std::pair<Tensor*, Tensor*>> destOutputs;
|
|
|
|
if (bn->type() == MNN_FORWARD_CPU) {
|
|
|
|
for (auto t : originOutputs) {
|
|
|
|
destOutputs.emplace_back(std::make_pair(t, t));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (auto t : originOutputs) {
|
|
|
|
ComputeCache::TensorContent content;
|
|
|
|
content.tensor.reset(new Tensor);
|
|
|
|
TensorUtils::copyShape(t, content.tensor.get(), true);
|
|
|
|
destOutputs.emplace_back(std::make_pair(content.tensor.get(), t));
|
|
|
|
tensors.emplace_back(std::move(content));
|
|
|
|
}
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
packedCache->mOutputTensors[expr.get()] = std::move(destOutputs);
|
|
|
|
expr->inside()->mCache = packedCache;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
packedCache->mTensors = std::move(tensors);
|
|
|
|
packedCache->mBackupBackend = backup;
|
|
|
|
|
|
|
|
// Backup Tensor Refcount
|
|
|
|
for (auto& t : packedCache->mTensors) {
|
|
|
|
t.refCount = TensorUtils::getDescribe(t.tensor.get())->useCount;
|
|
|
|
}
|
|
|
|
// Create Units
|
|
|
|
for (auto expr : packed) {
|
|
|
|
_collectExecuteUnit(packedCache->mUnits, expr, units);
|
|
|
|
}
|
|
|
|
// Resize if possible
|
|
|
|
packedCache->resize();
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
|
|
|
|
2020-01-15 13:33:47 +08:00
|
|
|
void Executor::_visit(EXPRP expr, std::map<EXPRP, ComputeCache::Unit>& units, std::set<std::shared_ptr<Executor::ComputeCache>>& inputCaches, std::vector<ComputeCache::TensorContent>& tensors) {
|
|
|
|
auto& inputs = expr->inputs();
|
|
|
|
auto& req = expr->inside()->mReq.contentNeedContent;
|
|
|
|
MNN_ASSERT(inputs.size() == req.size());
|
|
|
|
|
|
|
|
// Create Input's Unit / Cache
|
|
|
|
for (int i=0; i<inputs.size(); ++i) {
|
|
|
|
if (!req[i]) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
auto inputExpr = inputs[i]->expr();
|
|
|
|
if (units.find(inputExpr.first) != units.end()) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
auto inputCache = inputExpr.first->inside()->mCache;
|
|
|
|
if (nullptr != inputCache) {
|
|
|
|
inputCaches.insert(inputCache);
|
|
|
|
continue;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
_visit(inputExpr.first, units, inputCaches, tensors);
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
|
|
|
|
// Create Self Unit / Cache
|
|
|
|
auto op = expr->get();
|
|
|
|
if (nullptr == op) {
|
|
|
|
// Make Cache For Single Tensor
|
|
|
|
Executor::ComputeCache::create({expr}, units, {}, {}, mBackend, mBackupBackend);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
ComputeCache::Unit unit;
|
|
|
|
unit.origin = expr.get();
|
|
|
|
unit.inputs.resize(inputs.size());
|
|
|
|
unit.inputFromCache.resize(inputs.size());
|
|
|
|
for (int i=0; i<inputs.size(); ++i) {
|
|
|
|
auto inputExpr = inputs[i]->expr();
|
|
|
|
if (!req[i]) {
|
|
|
|
ComputeCache::TensorContent content;
|
|
|
|
content.tensor.reset(new Tensor);
|
|
|
|
Utils::copyInfoToTensor(content.tensor.get(), inputExpr.first->outputInfo(inputExpr.second));
|
|
|
|
unit.inputs[i] = content.tensor.get();
|
|
|
|
tensors.emplace_back(std::move(content));
|
|
|
|
continue;
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
auto iter = units.find(inputExpr.first);
|
|
|
|
if (iter != units.end()) {
|
|
|
|
unit.inputs[i] = iter->second.outputs[inputExpr.second];
|
|
|
|
TensorUtils::getDescribe(unit.inputs[i])->useCount++;
|
|
|
|
unit.inputFromCache[i] = false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
auto inputCache = inputExpr.first->inside()->mCache;
|
|
|
|
if (nullptr != inputCache) {
|
|
|
|
unit.inputs[i] = inputCache->output(inputExpr.first, inputExpr.second, false);
|
|
|
|
unit.inputFromCache[i] = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
MNN_ASSERT(false);
|
|
|
|
}
|
|
|
|
unit.outputs.resize(expr->outputSize());
|
|
|
|
for (int i=0; i<unit.outputs.size(); ++i) {
|
|
|
|
ComputeCache::TensorContent content;
|
|
|
|
content.tensor.reset(new Tensor);
|
|
|
|
unit.outputs[i] = content.tensor.get();
|
|
|
|
tensors.emplace_back(std::move(content));
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
units.insert(std::make_pair(expr, std::move(unit)));
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
|
|
|
|
void Executor::makeCache(std::vector<EXPRP> expr) {
|
2019-12-27 22:16:57 +08:00
|
|
|
std::lock_guard<std::mutex> _l(mMutex);
|
2020-01-15 13:33:47 +08:00
|
|
|
//FUNC_PRINT(mCaches.size());
|
|
|
|
std::map<EXPRP, ComputeCache::Unit> units;
|
|
|
|
std::set<std::shared_ptr<Executor::ComputeCache>> inputCaches;
|
|
|
|
std::vector<ComputeCache::TensorContent> tensors;
|
|
|
|
for (auto e : expr) {
|
|
|
|
_visit(e, units, inputCaches, tensors);
|
|
|
|
}
|
|
|
|
Executor::ComputeCache::create(expr, units, std::move(inputCaches), std::move(tensors), mBackend, mBackupBackend);
|
|
|
|
}
|
|
|
|
void Executor::addOpCostTime(int op, float costTime) {
|
|
|
|
#ifdef MNN_EXPR_ENABLE_PROFILER
|
|
|
|
mProfiler->add(op, costTime);
|
|
|
|
#endif
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
2020-01-15 13:33:47 +08:00
|
|
|
|
|
|
|
ErrorCode Executor::runCache(std::shared_ptr<ComputeCache> cache) {
|
2019-12-27 22:16:57 +08:00
|
|
|
std::lock_guard<std::mutex> _l(mMutex);
|
2020-01-15 13:33:47 +08:00
|
|
|
return cache->compute();
|
|
|
|
}
|
|
|
|
void Executor::resetProfile() {
|
|
|
|
#ifdef MNN_EXPR_ENABLE_PROFILER
|
|
|
|
mProfiler->reset();
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
void Executor::dumpProfile() {
|
|
|
|
#ifdef MNN_EXPR_ENABLE_PROFILER
|
|
|
|
mProfiler->dump();
|
|
|
|
#endif
|
2019-12-27 22:16:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace Express
|
|
|
|
} // namespace MNN
|