2019-04-17 10:49:11 +08:00
|
|
|
//
|
|
|
|
// Pipeline.cpp
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on 2019/01/14.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
|
|
|
#include "Pipeline.hpp"
|
|
|
|
#include "Backend.hpp"
|
|
|
|
#include "Macro.h"
|
|
|
|
#include "SizeComputer.hpp"
|
|
|
|
#include "TensorUtils.hpp"
|
|
|
|
#include "WrapExecution.hpp"
|
|
|
|
//#define MNN_OPEN_TIME_TRACE
|
|
|
|
#include "AutoTime.hpp"
|
|
|
|
//#define MNN_DEBUG_TENSOR_SIZE
|
|
|
|
namespace MNN {
|
|
|
|
OperatorInfo::OperatorInfo() {
|
|
|
|
mContent = new Info;
|
|
|
|
MNN_ASSERT(nullptr != mContent);
|
|
|
|
}
|
|
|
|
OperatorInfo::~OperatorInfo() {
|
|
|
|
delete mContent;
|
|
|
|
}
|
|
|
|
|
|
|
|
const std::string& OperatorInfo::name() const {
|
|
|
|
return mContent->name;
|
|
|
|
}
|
|
|
|
|
|
|
|
const std::string& OperatorInfo::type() const {
|
|
|
|
return mContent->type;
|
|
|
|
}
|
|
|
|
|
|
|
|
float OperatorInfo::flops() const {
|
|
|
|
return mContent->flops;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Backend::StorageType _getTensorStorageType(const Tensor* tensor) {
|
|
|
|
auto des = TensorUtils::getDescribe(tensor);
|
|
|
|
if (des->isConst || des->isInput) {
|
|
|
|
return Backend::DYNAMIC_SEPERATE;
|
|
|
|
}
|
|
|
|
if (des->handleType != Tensor::HANDLE_NONE) {
|
|
|
|
return Backend::DYNAMIC_SEPERATE;
|
|
|
|
}
|
|
|
|
return Backend::DYNAMIC;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Backend::StorageType _getTensorReleaseStorageType(const Tensor* tensor) {
|
|
|
|
auto des = TensorUtils::getDescribe(tensor);
|
|
|
|
if (des->handleType != Tensor::HANDLE_NONE) {
|
|
|
|
return Backend::DYNAMIC_SEPERATE;
|
|
|
|
}
|
|
|
|
if (des->isConst) {
|
|
|
|
return Backend::DYNAMIC_SEPERATE;
|
|
|
|
}
|
|
|
|
return Backend::DYNAMIC;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Pipeline::Unit::_allocTensors(Backend* bn, const std::vector<Tensor*>& tensors) {
|
|
|
|
for (auto t : tensors) {
|
|
|
|
auto des = TensorUtils::getDescribe(t);
|
|
|
|
if (nullptr != des->backend) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
des->backend = bn;
|
|
|
|
TensorUtils::setLinearLayout(t);
|
|
|
|
auto success = bn->onAcquireBuffer(t, _getTensorStorageType(t));
|
|
|
|
if (!success) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
Pipeline::Unit::Unit(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
|
|
|
|
MNN_ASSERT(nullptr != op);
|
|
|
|
mOriginOp = op;
|
|
|
|
mType = op->type();
|
|
|
|
mInputs = inputs;
|
|
|
|
mOutputs = outputs;
|
|
|
|
if (nullptr != op->name()) {
|
|
|
|
mContent->name = op->name()->str();
|
|
|
|
}
|
|
|
|
auto typeStr = EnumNameOpType(mType);
|
|
|
|
if (nullptr != typeStr) {
|
|
|
|
mContent->type = typeStr;
|
|
|
|
}
|
|
|
|
mComputer = SizeComputerSuite::get()->search(mType);
|
|
|
|
}
|
|
|
|
|
2019-08-22 20:13:46 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
bool Pipeline::Unit::_createExecution(Backend* bn, Backend* cpuBn) {
|
|
|
|
mExecution.reset(bn->onCreate(mInputs, mOutputs, mOriginOp));
|
|
|
|
if (nullptr == mExecution) {
|
|
|
|
mExecution.reset(cpuBn->onCreate(mInputs, mOutputs, mOriginOp));
|
|
|
|
}
|
|
|
|
if (nullptr == mExecution) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
bool needWrap = false;
|
|
|
|
|
|
|
|
auto executionBackend = mExecution->backend();
|
|
|
|
for (int i = 0; i < mInputs.size(); ++i) {
|
|
|
|
auto t = mInputs[i];
|
|
|
|
auto des = TensorUtils::getDescribe(t);
|
2019-08-22 20:13:46 +08:00
|
|
|
if (des->backend != executionBackend && SizeComputer::opNeedContent(mOriginOp->type(), i)) {
|
2019-04-17 10:49:11 +08:00
|
|
|
needWrap = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (needWrap) {
|
|
|
|
// FUNC_PRINT_ALL(mOriginOp->name()->c_str(), s);
|
|
|
|
auto tempExecution = mExecution;
|
|
|
|
mExecution.reset(new WrapExecution(cpuBn, tempExecution));
|
|
|
|
}
|
2019-08-08 14:42:14 +08:00
|
|
|
return mExecution->valid();
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ErrorCode Pipeline::Unit::execute() {
|
|
|
|
if (nullptr == mExecution) {
|
|
|
|
return NO_EXECUTION;
|
|
|
|
}
|
|
|
|
if (mConst) {
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
auto code = mExecution->onExecute(mInputs, mOutputs);
|
|
|
|
if (NO_ERROR != code) {
|
|
|
|
MNN_ERROR("Execute Error for %s, code=%d\n", mContent->name.c_str(), code);
|
|
|
|
}
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
ErrorCode Pipeline::Unit::executeCallBack(const TensorCallBackWithInfo& before, const TensorCallBackWithInfo& after) {
|
|
|
|
if (nullptr == mExecution) {
|
|
|
|
return NO_EXECUTION;
|
|
|
|
}
|
|
|
|
if (mConst) {
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
auto run = before(mInputs, this);
|
|
|
|
if (run) {
|
|
|
|
auto code = mExecution->onExecute(mInputs, mOutputs);
|
|
|
|
if (NO_ERROR != code) {
|
|
|
|
MNN_ERROR("Execute Error for %s, code=%d\n", mContent->name.c_str(), code);
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
auto runOthers = after(mOutputs, this);
|
|
|
|
if (!runOthers) {
|
|
|
|
return CALL_BACK_STOP;
|
|
|
|
}
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorCode Pipeline::Unit::prepare(Backend* bn, Backend* cpuBn) {
|
|
|
|
for (auto t : mInputs) {
|
|
|
|
bool valid = true;
|
|
|
|
for (int i = 0; i < t->dimensions(); ++i) {
|
|
|
|
if (t->length(i) <= 0) {
|
|
|
|
valid = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!valid) {
|
|
|
|
MNN_ERROR("The %s's input is not ready\n", mContent->name.c_str());
|
|
|
|
return COMPUTE_SIZE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
{
|
|
|
|
auto success = _allocTensors(bn, mInputs);
|
|
|
|
if (!success) {
|
|
|
|
return OUT_OF_MEMORY;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
bool ready = SizeComputer::computeOutputSize(mOriginOp, mInputs, mOutputs);
|
|
|
|
for (auto o : mOutputs) {
|
|
|
|
if (o->size() <= 0) {
|
|
|
|
ready = false;
|
|
|
|
}
|
|
|
|
}
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
mContent->flops = SizeComputer::computeFlops(mOriginOp, mInputs, mOutputs);
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
#ifdef MNN_DEBUG_TENSOR_SIZE
|
2019-06-17 20:10:35 +08:00
|
|
|
MNN_PRINT("\n===> compute shape: %s, [%d]\n", mOriginOp->name()->c_str(), mOriginOp->type());
|
2019-04-17 10:49:11 +08:00
|
|
|
if (mInputs.size()) {
|
|
|
|
MNN_PRINT("Inputs:\n");
|
|
|
|
for (auto o : mInputs) {
|
|
|
|
if (o->dimensions() == 0) {
|
|
|
|
MNN_PRINT("\t*Scalar*");
|
|
|
|
}
|
|
|
|
for (int i = 0; i < o->dimensions(); ++i) {
|
|
|
|
MNN_PRINT("%d, ", o->length(i));
|
|
|
|
}
|
|
|
|
MNN_PRINT("\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
MNN_PRINT("Outputs:\n");
|
|
|
|
for (auto o : mOutputs) {
|
|
|
|
if (o->dimensions() == 0) {
|
|
|
|
MNN_PRINT("\t*Scalar*");
|
|
|
|
}
|
|
|
|
for (int i = 0; i < o->dimensions(); ++i) {
|
|
|
|
MNN_PRINT("%d, ", o->length(i));
|
|
|
|
}
|
|
|
|
MNN_PRINT("\n");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if (!ready) {
|
|
|
|
return COMPUTE_SIZE_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check const
|
2019-06-17 20:10:35 +08:00
|
|
|
mConst = true;
|
2019-07-02 18:01:08 +08:00
|
|
|
for (int i = 0; i < mInputs.size(); ++i) {
|
2019-08-22 20:13:46 +08:00
|
|
|
if (SizeComputer::opNeedContent(mOriginOp->type(), i) && (!TensorUtils::getDescribe(mInputs[i])->isConst)) {
|
2019-06-17 20:10:35 +08:00
|
|
|
mConst = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2019-07-02 18:01:08 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
if (mConst) {
|
|
|
|
for (auto t : mOutputs) {
|
|
|
|
TensorUtils::getDescribe(t)->isConst = true;
|
|
|
|
}
|
|
|
|
bn = cpuBn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create or Resize execution
|
|
|
|
if (nullptr == mExecution) {
|
|
|
|
auto sucess = _createExecution(bn, cpuBn);
|
|
|
|
if (!sucess || mExecution == nullptr) {
|
|
|
|
return NOT_SUPPORT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
bn = mExecution->backend();
|
|
|
|
{
|
|
|
|
auto success = _allocTensors(bn, mOutputs);
|
|
|
|
if (!success) {
|
|
|
|
return OUT_OF_MEMORY;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
auto code = mExecution->onResize(mInputs, mOutputs);
|
|
|
|
if (TENSOR_NOT_SUPPORT == code || TENSOR_NEED_DIVIDE == code) {
|
|
|
|
// TODO
|
|
|
|
mExecution.reset();
|
|
|
|
for (auto t : mOutputs) {
|
|
|
|
auto des = TensorUtils::getDescribe(t);
|
|
|
|
des->backend->onReleaseBuffer(t, _getTensorReleaseStorageType(t));
|
|
|
|
des->backend = nullptr;
|
|
|
|
}
|
|
|
|
auto sucess = _createExecution(cpuBn, cpuBn);
|
|
|
|
MNN_ASSERT(NO_ERROR == sucess);
|
|
|
|
auto success = _allocTensors(mExecution->backend(), mOutputs);
|
|
|
|
if (!success) {
|
|
|
|
return OUT_OF_MEMORY;
|
|
|
|
}
|
|
|
|
code = mExecution->onResize(mInputs, mOutputs);
|
|
|
|
}
|
|
|
|
if (NO_ERROR != code) {
|
2019-06-17 20:10:35 +08:00
|
|
|
mExecution.reset();
|
2019-04-17 10:49:11 +08:00
|
|
|
return code;
|
|
|
|
}
|
|
|
|
if (mConst) {
|
|
|
|
code = mExecution->onExecute(mInputs, mOutputs);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto t : mInputs) {
|
|
|
|
auto des = TensorUtils::getDescribe(t);
|
|
|
|
des->useCount -= 1;
|
|
|
|
if (0 == des->useCount) {
|
|
|
|
des->backend->onReleaseBuffer(t, _getTensorReleaseStorageType(t));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
|
|
|
|
Pipeline::Pipeline(const std::vector<Schedule::PipelineInfo>& infos, Backend* backend, Backend* cpuBackend) {
|
- build:
- unify schema building in core and converter;
- add more build script for android;
- add linux build script for python;
- ops impl:
- add floor mod support in binary;
- use eltwise impl in add/max/sub/mul binary for optimization;
- remove fake double support in cast;
- fix 5d support for concat;
- add adjX and adjY support for batch matmul;
- optimize conv2d back prop filter;
- add pad mode support for conv3d;
- fix bug in conv2d & conv depthwise with very small feature map;
- optimize binary without broacast;
- add data types support for gather;
- add gather ND support;
- use uint8 data type in gather v2;
- add transpose support for matmul;
- add matrix band part;
- add dim != 4 support for padding, reshape & tensor convert;
- add pad type support for pool3d;
- make ops based on TensorFlow Lite quantization optional;
- add all & any support for reduction;
- use type in parameter as output type in reduction;
- add int support for unary;
- add variable weight support for conv2d;
- fix conv2d depthwise weights initialization;
- fix type support for transpose;
- fix grad outputs count for reduce grad and reshape grad;
- fix priorbox & detection output;
- fix metal softmax error;
- python:
- add runSessionWithCallBackInfo interface;
- add max nodes limit (1400) for visualization tool;
- fix save error in python3;
- align default dim;
- convert:
- add extra design for optimization;
- add more post converting optimizers;
- add caffe v1 weights blob support;
- add cast, unary, conv transpose support for onnx model;
- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
- add cos/sin/atan/tan support for unary for tensorflow model;
- add any/all support for reduction for tensorflow model;
- add elu, conv3d, pool3d support for tensorflow model;
- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
- fix size computer lock;
- fix thread pool deadlock;
- add express & parameters in express;
- rewrite blitter chooser without static map;
- add tests for expr;
2019-10-29 13:37:26 +08:00
|
|
|
SizeComputerSuite::init();
|
2019-04-17 10:49:11 +08:00
|
|
|
MNN_ASSERT(nullptr != backend);
|
|
|
|
MNN_ASSERT(nullptr != cpuBackend);
|
|
|
|
mBackupBackend = cpuBackend;
|
|
|
|
mBackend = backend;
|
|
|
|
|
|
|
|
for (auto& info : infos) {
|
|
|
|
std::shared_ptr<Unit> unit(new Unit(info.op, info.inputs, info.outputs));
|
|
|
|
mUnits.emplace_back(unit);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorCode Pipeline::prepare() {
|
|
|
|
mBackend->onResizeBegin();
|
|
|
|
for (auto& u : mUnits) {
|
|
|
|
auto code = u->prepare(mBackend, mBackupBackend);
|
|
|
|
if (NO_ERROR != code) {
|
|
|
|
if (nullptr != u->mOriginOp->name()) {
|
|
|
|
MNN_ERROR("Resize error for %s, code=%d\n", u->mOriginOp->name()->c_str(), code);
|
|
|
|
}
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mBackend->onResizeEnd();
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorCode Pipeline::execute() {
|
|
|
|
mBackend->onExecuteBegin();
|
|
|
|
for (auto& u : mUnits) {
|
|
|
|
auto code = u->execute();
|
|
|
|
if (code != NO_ERROR) {
|
|
|
|
mBackend->onExecuteEnd();
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mBackend->onExecuteEnd();
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorCode Pipeline::executeCallBack(const TensorCallBackWithInfo& before, const TensorCallBackWithInfo& after) {
|
|
|
|
mBackend->onExecuteBegin();
|
|
|
|
std::shared_ptr<char> __defer(nullptr, [this](void*) { mBackend->onExecuteEnd(); });
|
|
|
|
for (auto& u : mUnits) {
|
|
|
|
auto code = u->executeCallBack(before, after);
|
|
|
|
if (code != NO_ERROR) {
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorCode Pipeline::releaseCache() {
|
|
|
|
for (auto& u : mUnits) {
|
2019-06-05 10:45:59 +08:00
|
|
|
if (nullptr != u->mExecution) {
|
|
|
|
auto code = u->mExecution->onReleaseCache();
|
|
|
|
if (NO_ERROR != code) {
|
|
|
|
MNN_ERROR("Error for release cache for %s\n", u->name().c_str());
|
|
|
|
return code;
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace MNN
|