mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			1297 lines
		
	
	
		
			53 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			1297 lines
		
	
	
		
			53 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| //  Pipeline.cpp
 | |
| //  MNN
 | |
| //
 | |
| //  Created by MNN on 2019/01/14.
 | |
| //  Copyright © 2018, Alibaba Group Holding Limited
 | |
| //
 | |
| 
 | |
| #include <string.h>
 | |
| #include "core/Pipeline.hpp"
 | |
| #include "core/Backend.hpp"
 | |
| #include "core/Macro.h"
 | |
| #include "core/TensorUtils.hpp"
 | |
| #include "core/WrapExecution.hpp"
 | |
| #include "geometry/GeometryComputerUtils.hpp"
 | |
| #include "shape/SizeComputer.hpp"
 | |
| #include "core/OpCommonUtils.hpp"
 | |
| 
 | |
| // TODO: Find better way for debug
 | |
| //#define MNN_OP_SEPERATE
 | |
| //#define MNN_PIPELINE_DEBUG
 | |
| namespace MNN {
 | |
| 
 | |
| // FIXME: Move in Backend
 | |
| static bool _supportQuant(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, MNNForwardType type) {
 | |
|     auto otype = op->type();
 | |
|     switch (otype) {
 | |
|         case OpType_Convolution:
 | |
|         case OpType_ConvolutionDepthwise:
 | |
|             if (inputs.size() > 1) {
 | |
|                 return false;
 | |
|             }
 | |
|             if (op->main_as_Convolution2D() && op->main_as_Convolution2D()->weight() != nullptr) {
 | |
|                 return false;
 | |
|             } else {
 | |
|                 return true;
 | |
|             }
 | |
|         case OpType_ConvInt8:
 | |
|         case OpType_DepthwiseConvInt8:
 | |
|             return true;
 | |
|             // case OpType_Eltwise:
 | |
|         case OpType_Raster:
 | |
|         {
 | |
|             for (auto input : inputs) {
 | |
|                 if (TensorUtils::getDescribe(input)->quantAttr.get() != TensorUtils::getDescribe(outputs[0])->quantAttr.get()) {
 | |
|                     return false;
 | |
|                 }
 | |
|             }
 | |
|             return true;
 | |
|         }
 | |
|         case OpType_Pooling:
 | |
|             if (op->main_as_Pool() && op->main_as_Pool()->type() == PoolType_MAXPOOL ) {
 | |
|                 return true;
 | |
|             } else if (op->main_as_Pool() && op->main_as_Pool()->type() == PoolType_AVEPOOL) {
 | |
|                 return true;
 | |
|             } else {
 | |
|                 return false;
 | |
|             }
 | |
|         case OpType_Softmax:
 | |
|             return true;
 | |
|         case OpType_LayerNorm:
 | |
|             return true;
 | |
| #ifdef MNN_SUPPORT_QUANT_EXTEND
 | |
|         case OpType_ReLU:
 | |
|             if (TensorUtils::getDescribe(inputs[0])->quantAttr.get() != TensorUtils::getDescribe(outputs[0])->quantAttr.get()) {
 | |
|                 return false;
 | |
|             }
 | |
|             // now just relu without slope support quant
 | |
|             if ((op->main_as_Relu() == nullptr) || op->main_as_Relu()->slope() == 0.f) {
 | |
|                 return true;
 | |
|             } else {
 | |
|                 return false;
 | |
|             }
 | |
|         case OpType_BinaryOp:
 | |
|             return true;
 | |
|         case OpType_Scale:
 | |
|             return true;
 | |
|         case OpType_Interp:
 | |
|             return true;
 | |
|         case OpType_UnaryOp:
 | |
|             if (op->main_as_UnaryOp()->tableInt8() || op->main_as_UnaryOp()->opType() == UnaryOpOperation_NEG || op->main_as_UnaryOp()->opType() == UnaryOpOperation_ABS || op->main_as_UnaryOp()->opType() == UnaryOpOperation_SIGN) {
 | |
|                 return true;
 | |
|             } else {
 | |
|                 return false;
 | |
|             }
 | |
|         case OpType_PReLU:
 | |
|             return true;
 | |
| #endif
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| OperatorInfo::OperatorInfo() {
 | |
|     mContent = new Info;
 | |
|     MNN_ASSERT(nullptr != mContent);
 | |
| }
 | |
| OperatorInfo::~OperatorInfo() {
 | |
|     delete mContent;
 | |
| }
 | |
| 
 | |
| const std::string& OperatorInfo::name() const {
 | |
|     return mContent->name;
 | |
| }
 | |
| 
 | |
| const std::string& OperatorInfo::type() const {
 | |
|     return mContent->type;
 | |
| }
 | |
| 
 | |
| float OperatorInfo::flops() const {
 | |
|     return mContent->flops;
 | |
| }
 | |
| static Backend::StorageType _getTensorStorageType(const Tensor* tensor, bool outputStatic) {
 | |
|     auto des   = TensorUtils::getDescribe(tensor);
 | |
|     auto usage = des->usage;
 | |
|     if (TensorUsage::OUTPUT == usage && outputStatic) {
 | |
|         return Backend::STATIC;
 | |
|     }
 | |
|     if (TensorUsage::CONSTANT == usage || TensorUsage::INPUT == usage || TensorUsage::TRAINABLE == usage) {
 | |
|         return Backend::DYNAMIC_SEPERATE;
 | |
|     }
 | |
|     return Backend::DYNAMIC;
 | |
| }
 | |
| 
 | |
| static bool _needRelease(const Tensor* tensor, bool inputOutside) {
 | |
|     auto des   = TensorUtils::getDescribe(tensor);
 | |
|     auto desO   = TensorUtils::getDescribeOrigin(tensor);
 | |
|     auto usage = des->usage;
 | |
|     if (0 != des->useCount) {
 | |
|         return false;
 | |
|     }
 | |
|     if (des->memoryType == Tensor::InsideDescribe::MEMORY_HOST || des->memoryType == Tensor::InsideDescribe::MEMORY_OUTSIDE) {
 | |
|         return false;
 | |
|     }
 | |
|     if (nullptr == desO->getBackend()) {
 | |
|         return false;
 | |
|     }
 | |
|     if (inputOutside) {
 | |
|         return usage == Tensor::InsideDescribe::NORMAL;
 | |
|     }
 | |
|     if (tensor->buffer().type.code == halide_type_handle) {
 | |
|         return false;
 | |
|     }
 | |
|     if (TensorUsage::CONSTANT == usage || TensorUsage::TRAINABLE == usage || TensorUsage::OUTPUT == usage) {
 | |
|         return false;
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| static void _releaseTensor(Tensor* origin, bool mAllocInput, int group) {
 | |
|     auto des = TensorUtils::getDescribe(origin);
 | |
|     if (des->usage != Tensor::InsideDescribe::CONSTANT) {
 | |
|         des->useCount -= 1;
 | |
|     }
 | |
|     if (des->group != group) {
 | |
|         return;
 | |
|     }
 | |
|     auto needRelease = _needRelease(origin, !mAllocInput);
 | |
|     if (needRelease) {
 | |
|         TensorUtils::getDescribeOrigin(origin)->mem = nullptr;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static bool _allocTensor(Tensor* t, Backend* curBackend, bool outputStatic, int group) {
 | |
|     auto memoryType = _getTensorStorageType(t, outputStatic);
 | |
|     auto des = TensorUtils::getDescribe(t);
 | |
|     if (des->group != group) {
 | |
|         return true;
 | |
|     }
 | |
|     if (nullptr == TensorUtils::getDescribeOrigin(t)->mem.get()) {
 | |
|         TensorUtils::setLinearLayout(t);
 | |
|         auto res     = curBackend->onAcquireBuffer(t, memoryType);
 | |
|         return res;
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| void Pipeline::UnitInfo::setUp(const Command& command, int index, const Op* originOp, int totalIndex) {
 | |
|     if (nullptr != command.op->name()) {
 | |
|         mContent->name = command.op->name()->str();
 | |
|     } else {
 | |
|         if (nullptr != originOp && nullptr != originOp->name()) {
 | |
|             char buffer[20];
 | |
|             sprintf(buffer, "%d", index);
 | |
|             mContent->name = originOp->name()->str() + "_raster_" + buffer;
 | |
|         } else {
 | |
|             char buffer[20];
 | |
|             sprintf(buffer, "_raster_%d", totalIndex);
 | |
|             mContent->name = buffer;
 | |
|         }
 | |
|     }
 | |
| #ifdef MNN_OP_SEPERATE
 | |
|     if (command.op->type() == OpType_UnaryOp) {
 | |
|         mContent->type = EnumNameUnaryOpOperation(command.op->main_as_UnaryOp()->opType());
 | |
|     } else if (command.op->type() == OpType_BinaryOp) {
 | |
|         mContent->type = EnumNameBinaryOpOperation((BinaryOpOperation)(command.op->main_as_BinaryOp()->opType()));
 | |
|     } else if (command.op->type() == OpType_Reduction) {
 | |
|         mContent->type = EnumNameReductionType(command.op->main_as_ReductionParam()->operation());
 | |
|     } else {
 | |
|         mContent->type = EnumNameOpType(command.op->type());
 | |
|     }
 | |
| #else
 | |
|     mContent->type = EnumNameOpType(command.op->type());
 | |
| #endif
 | |
| #ifndef MNN_SKIPBUILD_GEOMETRY
 | |
|     mContent->flops = SizeComputer::computeFlops(command.op, command.inputs, command.outputs);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| Pipeline::Pipeline(const std::string& externalFile, Schedule::PipelineInfo&& info, bool allocInput, bool outputStatic, const TuningAttr& tune, const Runtime* rt, const Runtime* cpuRt, int geometryMask)
 | |
| #ifndef MNN_SKIPBUILD_GEOMETRY
 | |
|     : mContext(geometryMask, info.first.cache.second, info.first.cache.first->type(), info.first.info.user ? info.first.info.user->precision :  BackendConfig::Precision_Normal), mUseGeometry(rt->onGetCompilerType()) {
 | |
| #else
 | |
| {
 | |
| #endif
 | |
|     mExternalFile = externalFile;
 | |
|     rt->onCheckInfo(info.first.info);
 | |
|     mRuntime = rt;
 | |
|     mCpuRuntime = cpuRt;
 | |
|     mTuneAttr = tune;
 | |
|     mAllocInput    = allocInput;
 | |
|     mOutputStatic  = outputStatic;
 | |
|     mInfo          = std::move(info);
 | |
|     mIsQuantModel = false;
 | |
|     for (auto& iter : mInfo.second) {
 | |
|         for (auto t : iter.outputs) {
 | |
|             if (TensorUtils::getDescribe(t)->quantAttr.get() != nullptr) {
 | |
|                 mIsQuantModel = true;
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|         for (auto t : iter.inputs) {
 | |
|             if (TensorUtils::getDescribe(t)->quantAttr.get() != nullptr) {
 | |
|                 mIsQuantModel = true;
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|         if (mIsQuantModel) {
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
| }
 | |
| ErrorCode Pipeline::encode(bool supportDebug, bool permitCodegen) {
 | |
|     auto& mBackend = mInfo.first.cache.first;
 | |
|     auto& mBackupBackend = mInfo.first.cache.second;
 | |
|     // Static Model just copy info to command buffer
 | |
|     if (!mInfo.first.needComputeGeometry) {
 | |
|         for (int i=0; i<mInfo.second.size(); ++i) {
 | |
|             auto& info = mInfo.second[i];
 | |
|             std::shared_ptr<Command> cmd(new Command);
 | |
|             cmd->op      = info.op;
 | |
|             if (cmd->op->type() == OpType_Raster) {
 | |
|                 // Compability for Origin Static Model
 | |
|                 cmd->outputs  = info.outputs;
 | |
|                 if (TensorUtils::getDescribe(info.outputs[0])->regions.empty() && info.inputs.size() > 0 && TensorUtils::getDescribe(info.inputs[0])->regions.size() > 0) {
 | |
|                     TensorUtils::getDescribe(info.outputs[0])->regions = std::move(TensorUtils::getDescribe(info.inputs[0])->regions);
 | |
|                     TensorUtils::setRasterInputs(cmd.get());
 | |
|                 } else {
 | |
|                     cmd->inputs  = info.inputs;
 | |
|                 }
 | |
|             } else {
 | |
|                 cmd->inputs  = info.inputs;
 | |
|                 cmd->outputs = info.outputs;
 | |
|             }
 | |
|             info.executeBuffer.command = {cmd};
 | |
|         }
 | |
|     } else {
 | |
| #ifndef MNN_SKIPBUILD_GEOMETRY
 | |
|         mBackend->onClearBuffer();
 | |
|         mBackupBackend->onClearBuffer();
 | |
|         mContext.clear();
 | |
|         mContext.mNeedRelease = mGeometryNeedRelease;
 | |
|         FileLoader l(mExternalFile.c_str());
 | |
|         /** Size Compute and compute Const Begin */
 | |
|         auto res = GeometryComputerUtils::shapeComputeAndGeometryTransform(mCpuRuntime, &l, mInfo.second, mContext, mInfo.first.cache.second, mUseGeometry, false, permitCodegen);
 | |
|         if (res != NO_ERROR) {
 | |
|             return res;
 | |
|         }
 | |
| #endif
 | |
|     }
 | |
|     // Propagate Scale and insert new command
 | |
|     if (mIsQuantModel && (mBackend->type() == MNN_FORWARD_CPU || mBackend->type() == MNN_FORWARD_CPU_EXTENSION || mBackend->type() == MNN_FORWARD_CUDA || mBackend->type() == MNN_FORWARD_NN || mBackend->type() == MNN_FORWARD_OPENCL)) {
 | |
|         // get propagate map
 | |
|         using PropagateMap = std::map<const MNN::Tensor*, std::set<const MNN::Tensor*>>;
 | |
|         PropagateMap forwardMap, backwardMap;
 | |
|         auto insertPropagateMap = [](PropagateMap& propagateMap, const Tensor* s, const Tensor* t) {
 | |
|             if (propagateMap.find(s) == propagateMap.end()) {
 | |
|                 propagateMap[s] = std::set<const Tensor*>({t});
 | |
|             } else {
 | |
|                 propagateMap[s].insert(t);
 | |
|             }
 | |
|         };
 | |
|         std::set<OpType> propagateOpTypes = { OpType_Raster, OpType_ReLU, OpType_ReLU6, OpType_Pooling,
 | |
|                                               OpType_Interp, OpType_CropAndResize, OpType_ROIPooling, OpType_Gather,
 | |
|                                               OpType_GatherV2, OpType_GatherV2, OpType_ScatterNd};
 | |
|         for (auto& info : mInfo.second) {
 | |
|             auto& buffer = info.executeBuffer;
 | |
|             for (const auto& cmdP : buffer.command) {
 | |
|                 auto& cmd = *cmdP;
 | |
|                 const auto type = cmd.op->type();
 | |
|                 const auto output = cmd.outputs[0];
 | |
|                 if (propagateOpTypes.find(type) != propagateOpTypes.end()) {
 | |
|                     for (auto t : cmd.inputs) {
 | |
|                         insertPropagateMap(forwardMap, t, output);
 | |
|                         insertPropagateMap(backwardMap, output, t);
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         auto getStart = [&forwardMap, &backwardMap](bool forward) {
 | |
|             auto& propagateMap = forward ? forwardMap : backwardMap;
 | |
|             auto& antiMap = forward ? backwardMap : forwardMap;
 | |
|             // delete N->1 Map of Op
 | |
|             for (const auto& iter : antiMap) {
 | |
|                 if (iter.second.size() > 1) {
 | |
|                     for (auto t : iter.second) {
 | |
|                         auto res = propagateMap.find(t);
 | |
|                         if (res != propagateMap.end()) {
 | |
|                             propagateMap.erase(res);
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|             std::set<const Tensor*> root, leaf, start;
 | |
|             for (const auto& iter : propagateMap) {
 | |
|                 root.insert(iter.first);
 | |
|                 for (auto t : iter.second) {
 | |
|                     leaf.insert(t);
 | |
|                 }
 | |
|             }
 | |
|             std::set_difference(root.begin(), root.end(), leaf.begin(), leaf.end(), std::inserter(start, start.begin()));
 | |
|             return start;
 | |
|         };
 | |
|         auto forwardStart = getStart(true);
 | |
|         auto backwardStart = getStart(false);
 | |
|         // propagate scale
 | |
|         auto propagateScale = [](PropagateMap& propagateMap, std::set<const Tensor*>& start) {
 | |
|             std::function<bool(const Tensor*)> scalePropagate = [&propagateMap, &scalePropagate](const Tensor* t) {
 | |
|                 if (TensorUtils::getDescribe(t)->quantAttr.get() == nullptr) {
 | |
|                     return false;
 | |
|                 }
 | |
|                 if (propagateMap.find(t) == propagateMap.end()) {
 | |
|                     return false;
 | |
|                 }
 | |
|                 bool change = false;
 | |
|                 for (auto x : propagateMap[t]) {
 | |
|                     if (TensorUtils::getDescribe(x)->quantAttr != TensorUtils::getDescribe(t)->quantAttr) {
 | |
|                         TensorUtils::getDescribe(x)->quantAttr = TensorUtils::getDescribe(t)->quantAttr;
 | |
|                         change = true;
 | |
|                     }
 | |
|                     change |= scalePropagate(x);
 | |
|                 }
 | |
|                 return change;
 | |
|             };
 | |
|             bool change = false;
 | |
|             for (auto t : start) {
 | |
|                 change |= scalePropagate(t);
 | |
|             }
 | |
|             return change;
 | |
|         };
 | |
|         for (int i = 0; i < 3 && (propagateScale(forwardMap, forwardStart) || propagateScale(backwardMap, backwardStart)); i++);
 | |
|         
 | |
|         // Insert cast
 | |
|         std::map<const Tensor*, Tensor*> cachedCastTensor;
 | |
|         for (auto& info : mInfo.second) {
 | |
|             auto bufferCommand = std::move(info.executeBuffer.command);
 | |
|             bool hasConvert = false;
 | |
|             for (auto cmdP : bufferCommand) {
 | |
|                 auto& cmd = *cmdP;
 | |
|                 auto& outputs = cmd.outputs;
 | |
|                 auto& inputs = cmd.inputs;
 | |
|                 auto opType = cmd.op->type();
 | |
|                 // Check if need use quant op
 | |
|                 DataType runType = DataType_DT_FLOAT;
 | |
|                 bool useQuant = false;
 | |
|                 if (outputs.size() == 1) {
 | |
|                     // Quant: output and all input has quantAttr and op support
 | |
|                     if (TensorUtils::getDescribe(outputs[0])->quantAttr != nullptr) {
 | |
|                         useQuant = _supportQuant(cmd.op, inputs, outputs, mBackend->type());
 | |
|                     }
 | |
|                     if (useQuant) {
 | |
|                         for (auto t : inputs) {
 | |
|                             if (TensorUtils::getDescribe(t)->quantAttr == nullptr) {
 | |
|                                 useQuant = false;
 | |
|                                 break;
 | |
|                             }
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|                 if (useQuant) {
 | |
|                     runType = DataType_DT_INT8;
 | |
|                 }
 | |
|                 
 | |
|                 for (auto o : outputs) {
 | |
|                     auto quan = TensorUtils::getDescribe(o)->quantAttr;
 | |
|                     if (nullptr != quan) {
 | |
|                         TensorUtils::getDescribe(o)->type = runType;
 | |
|                     }
 | |
|                 }
 | |
|                 auto makeCommand = [&cachedCastTensor, &info](CommandBuffer& cmdBuffer, Tensor* input, DataType runType) {
 | |
|                     if (cachedCastTensor.find(input) != cachedCastTensor.end()) {
 | |
|                         return cachedCastTensor[input];
 | |
|                     }
 | |
|                     std::shared_ptr<Tensor> wrapTensor(new Tensor);
 | |
|                     TensorUtils::copyShape(input, wrapTensor.get(), true);
 | |
|                     TensorUtils::setLinearLayout(wrapTensor.get());
 | |
|                     auto des = TensorUtils::getDescribe(wrapTensor.get());
 | |
|                     auto originDes = TensorUtils::getDescribe(input);
 | |
|                     if (originDes->quantAttr != nullptr) {
 | |
|                         des->quantAttr.reset(new QuantAttr);
 | |
|                         *des->quantAttr = *originDes->quantAttr;
 | |
|                         des->type = runType;
 | |
|                     }
 | |
|                     cmdBuffer.extras.emplace_back(wrapTensor);
 | |
|                     std::shared_ptr<Command> command(new Command);
 | |
|                     command->inputs = {input};
 | |
|                     command->outputs = {wrapTensor.get()};
 | |
|                     info.cacheBuffer.hasWrap = true;
 | |
|                     flatbuffers::FlatBufferBuilder builder;
 | |
|                     OpBuilder opB(builder);
 | |
|                     if (runType == DataType_DT_INT8) {
 | |
|                         opB.add_type(OpType_FloatToInt8);
 | |
|                     } else {
 | |
|                         opB.add_type(OpType_Int8ToFloat);
 | |
|                     }
 | |
|                     builder.Finish(opB.Finish());
 | |
|                     command->buffer.reset(new BufferStorage);
 | |
|                     command->buffer->storage = builder.ReleaseRaw(command->buffer->allocated_size, command->buffer->offset);
 | |
|                     command->op = flatbuffers::GetRoot<Op>(command->buffer->buffer());
 | |
|                     info.executeBuffer.command.emplace_back(std::move(command));
 | |
|                     return wrapTensor.get();
 | |
|                 };
 | |
|                 // judge is it need CastWrap
 | |
|                 if (OpType_Raster == opType) {
 | |
|                     for (int v=0; v<cmd.inputs.size(); ++v) {
 | |
|                         auto input = cmd.inputs[v];
 | |
|                         bool needCast = CPUBackend::getDataType(input) != runType;
 | |
|                         if (needCast) {
 | |
|                             cmd.inputs[v] = makeCommand(info.executeBuffer, input, runType);
 | |
|                         }
 | |
|                     }
 | |
|                 } else {
 | |
|                     for (int i = 0; i < cmd.inputs.size(); i++) {
 | |
|                         if (OpCommonUtils::opNeedContent(cmd.op, i) && inputs[i]->getType() != halide_type_of<int>()) {
 | |
|                             bool needCast = CPUBackend::getDataType(inputs[i]) != runType;
 | |
|                             if (needCast) {
 | |
|                                 cmd.inputs[i] = makeCommand(info.executeBuffer, inputs[i], runType);
 | |
|                             }
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|                 info.executeBuffer.command.emplace_back(cmdP);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     /** Prepare DebugInfo*/
 | |
|     if (supportDebug) {
 | |
|         mFlops = 0.0f;
 | |
|         int totalIndex = 0;
 | |
|         for (auto& info : mInfo.second) {
 | |
|             auto& buffer = info.executeBuffer;
 | |
|             int index = 0;
 | |
|             for (auto& cmdP : buffer.command) {
 | |
|                 auto& cmd = *cmdP;
 | |
|                 cmd.info.reset(new UnitInfo);
 | |
|                 static_cast<UnitInfo*>(cmd.info.get())->setUp(cmd, index++, info.op, totalIndex++);
 | |
|                 mFlops += cmd.info->flops();
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| void Pipeline::_pushTuningTask(std::vector<Schedule::OpCacheInfo>&& initInfos) {
 | |
|     // Dup Tensors for initInfos;
 | |
|     std::map<Tensor*, std::shared_ptr<Tensor>> holdTensors;
 | |
|     auto& mBackend = mInfo.first.cache.first;
 | |
|     auto& mBackupBackend = mInfo.first.cache.second;
 | |
| 
 | |
|     for (auto& info : initInfos) {
 | |
|         auto& buffer = info.executeBuffer;
 | |
|         if (info.type == Schedule::CONSTANT) {
 | |
|             continue;
 | |
|         }
 | |
|         for (int v=0; v<buffer.command.size(); ++v) {
 | |
|             auto iterP = buffer.command[v];
 | |
|             auto& iter = *iterP;
 | |
|             buffer.command[v].reset(new Command);
 | |
|             iterP = buffer.command[v];
 | |
|             iterP->inputs = iter.inputs;
 | |
|             iterP->outputs = iter.outputs;
 | |
|             iterP->op = iter.op;
 | |
|             iterP->buffer = iter.buffer;
 | |
| #ifndef MNN_SKIPBUILD_GEOMETRY
 | |
|             if (iter.op->type() == OpType_Raster) {
 | |
|                 iterP->buffer = mContext.mRasterOp;
 | |
|             }
 | |
| #endif
 | |
|             auto copyTensor = [&](std::vector<Tensor*>& tensors) {
 | |
|                 for (int v=0; v<tensors.size(); ++v) {
 | |
|                     auto t = tensors[v];
 | |
|                     auto findIter = holdTensors.find(t);
 | |
|                     if (findIter != holdTensors.end()) {
 | |
|                         tensors[v] = findIter->second.get();
 | |
|                         continue;
 | |
|                     }
 | |
|                     std::shared_ptr<Tensor> newTensor(new Tensor);
 | |
|                     newTensor->buffer().type = t->getType();
 | |
|                     TensorUtils::copyShape(t, newTensor.get(), true);
 | |
|                     TensorUtils::getDescribe(newTensor.get())->regions = TensorUtils::getDescribe(t)->regions;
 | |
|                     tensors[v] = newTensor.get();
 | |
|                     holdTensors.insert(std::make_pair(t, newTensor));
 | |
|                     holdTensors.insert(std::make_pair(newTensor.get(), newTensor));
 | |
|                 }
 | |
|             };
 | |
|             copyTensor(iterP->inputs);
 | |
|             copyTensor(iterP->outputs);
 | |
|         }
 | |
|     }
 | |
|     // Make async task for tuning
 | |
|     const_cast<Runtime*>(mRuntime)->mCancelled = false;
 | |
|     auto future = std::async(std::launch::async, [&, this](std::vector<Schedule::OpCacheInfo>&& infos, std::map<Tensor*, std::shared_ptr<Tensor>>&& tensors, std::shared_ptr<Backend> backend, const std::atomic_bool& cancelled) -> int {
 | |
|         FileLoader loader(mExternalFile.c_str());
 | |
| 
 | |
|         backend->onClearBuffer();
 | |
|         backend->onResizeBegin();
 | |
|         std::vector<std::shared_ptr<BufferStorage>> tmpStorage;
 | |
|         for (auto& info : infos) {
 | |
|             if (info.type == Schedule::CONSTANT) {
 | |
|                 continue;
 | |
|             }
 | |
|             auto& buffer = info.executeBuffer;
 | |
|             for (auto& iterP : buffer.command) {
 | |
|                 if(cancelled) {
 | |
|                     return -1;
 | |
|                 }
 | |
|                 auto& iter = *iterP;
 | |
|                 // FIXME: Remove onMaskOpReady in future
 | |
|                 const_cast<Runtime*>(mRuntime)->onMaskOpReady(iter.inputs, iter.outputs, iter.op);
 | |
|                 std::shared_ptr<BufferStorage> tmp;
 | |
|                 // If create op failed, we can also mask the op is ready for runtime
 | |
|                 auto exePtr = OpCommonUtils::createExecutionWithExternal(backend.get(), iter.inputs, iter.outputs, iter.op, &loader, tmp);
 | |
|                 std::shared_ptr<Execution> exe(exePtr);
 | |
|                 if (nullptr == exe) {
 | |
|                     continue;
 | |
|                 }
 | |
|                 if (nullptr != tmp) {
 | |
|                     tmpStorage.emplace_back(tmp);
 | |
|                 }
 | |
|                 std::vector<Tensor*> forRelease;
 | |
|                 std::shared_ptr<void> _defer(nullptr, [&forRelease](void*) {
 | |
|                     for (auto t : forRelease) {
 | |
|                         TensorUtils::getDescribeOrigin(t)->mem = nullptr;
 | |
|                     }
 | |
|                 });
 | |
|                 // Alloc inputs and outputs
 | |
|                 for (auto t : iter.inputs) {
 | |
|                     auto des = TensorUtils::getDescribe(t);
 | |
|                     bool allocRes = backend->onAcquireBuffer(t, Backend::DYNAMIC);
 | |
|                     if (!allocRes) {
 | |
|                         return -1;
 | |
|                     }
 | |
|                     forRelease.emplace_back(t);
 | |
|                 }
 | |
|                 for (auto t : iter.outputs) {
 | |
|                     bool allocRes = backend->onAcquireBuffer(t, Backend::DYNAMIC);
 | |
|                     if (!allocRes) {
 | |
|                         return -1;
 | |
|                     }
 | |
|                     forRelease.emplace_back(t);
 | |
|                 }
 | |
|                 auto code = exe->onResize(iter.inputs, iter.outputs);
 | |
|                 if (NO_ERROR != code) {
 | |
|                     return -1;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         backend->onResizeEnd();
 | |
|         return 0;
 | |
|     }, std::move(initInfos), std::move(holdTensors), mBackend, std::ref(const_cast<Runtime*>(mRuntime)->mCancelled));
 | |
|     const_cast<Runtime*>(mRuntime)->setAsyncWork(std::move(future));
 | |
| }
 | |
| 
 | |
| static ErrorCode _createExecutions(Schedule::PipelineInfo& mInfo, const std::string& externalFile, std::vector<std::shared_ptr<BufferStorage>>& extraStorage) {
 | |
|     FileLoader loader(externalFile.c_str());
 | |
|     auto& mBackend = mInfo.first.cache.first;
 | |
|     auto& mBackupBackend = mInfo.first.cache.second;
 | |
|     for (auto& info : mInfo.second) {
 | |
|         if (!info.computeCache.needComputeShape) {
 | |
|             continue;
 | |
|         }
 | |
|         if (info.type == Schedule::CONSTANT) {
 | |
|             continue;
 | |
|         }
 | |
|         auto& buffer = info.executeBuffer;
 | |
|         // MNN_PRINT("before resize, mInfo.second size:%lu, command size:%lu,op type:%s, op name:%s\n", mInfo.second.size(), buffer.command.size(), EnumNameOpType(info.op->type()), info.op->name()->c_str());
 | |
|         for (auto& iterP : buffer.command) {
 | |
|             auto& iter = *iterP;
 | |
|             // Create exe
 | |
|             // Find Cache
 | |
|             bool cached    = false;
 | |
|             if (nullptr == iter.execution) {
 | |
|                 /** Cache origin execution for fast resize*/
 | |
|                 auto exeIter = info.executionCache.find(iter.op);
 | |
|                 if (exeIter != info.executionCache.end()) {
 | |
|                     iter.execution = exeIter->second;
 | |
|                     cached         = true;
 | |
|                 }
 | |
|             }
 | |
|             std::shared_ptr<BufferStorage> tmpStorage;
 | |
|             if (nullptr == iter.execution) {
 | |
|                 iter.execution.reset(OpCommonUtils::createExecutionWithExternal(mBackend.get(), iter.inputs, iter.outputs, iter.op, &loader, tmpStorage));
 | |
|             }
 | |
|             if (nullptr == iter.execution) {
 | |
|                 // Try Backup
 | |
|                 iter.execution.reset(OpCommonUtils::createExecutionWithExternal(mBackupBackend.get(), iter.inputs, iter.outputs, iter.op, &loader, tmpStorage));
 | |
|                 if (nullptr == iter.execution) {
 | |
|                     if (mInfo.first.reportError) {
 | |
|                         MNN_ERROR("Create execution error : %d\n", iter.op->type());
 | |
|                     }
 | |
|                     return NOT_SUPPORT;
 | |
|                 }
 | |
|             }
 | |
|             if (nullptr != tmpStorage.get()) {
 | |
|                 extraStorage.emplace_back(tmpStorage);
 | |
|             }
 | |
|             // invalid means memory alloc failed
 | |
|             if (!iter.execution->valid()) {
 | |
|                 iter.execution = nullptr;
 | |
|                 iter.execution = nullptr;
 | |
|                 return OUT_OF_MEMORY;
 | |
|             }
 | |
|             if ((!cached) && iter.buffer == nullptr && (iter.op->type() != OpType_Raster) && (iter.op->type() != OpType_BinaryOp)) {
 | |
|                 info.executionCache.insert(std::make_pair(iter.op, iter.execution));
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     return NO_ERROR;
 | |
| }
 | |
| static void _SetTensorBackend(Schedule::PipelineInfo& mInfo, bool ownInputs) {
 | |
|     // Clear Valid Tensor's Backend
 | |
|     for (int infoIndex=0; infoIndex < mInfo.second.size(); ++infoIndex) {
 | |
|         auto& info = mInfo.second[infoIndex];
 | |
|         if (info.type == Schedule::CONSTANT) {
 | |
|             continue;
 | |
|         }
 | |
|         auto& buffer = info.executeBuffer;
 | |
|         // MNN_PRINT("before resize, mInfo.second size:%lu, command size:%lu,op type:%s, op name:%s\n", mInfo.second.size(), buffer.command.size(), EnumNameOpType(info.op->type()), info.op->name()->c_str());
 | |
|         for (int iterIndex=0; iterIndex<buffer.command.size(); ++iterIndex) {
 | |
|             auto& iterP = buffer.command[iterIndex];
 | |
|             auto& iter = *iterP;
 | |
|             if (iter.op->type() == OpType_Copy) {
 | |
|                 continue;
 | |
|             }
 | |
|             auto curBackend = iter.execution->backend();
 | |
|             if (ownInputs) {
 | |
|                 for (auto t : iter.inputs) {
 | |
|                     auto des = TensorUtils::getDescribeOrigin(t);
 | |
|                     if (nullptr == des->mem.get()) {
 | |
|                         des->setBackend(nullptr);
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|             for (auto t : iter.outputs) {
 | |
|                 auto des = TensorUtils::getDescribeOrigin(t);
 | |
|                 if (nullptr == des->mem.get()) {
 | |
|                     des->setBackend(nullptr);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // Set Tensor's Backend
 | |
|     for (auto& info : mInfo.second) {
 | |
|         if (info.type == Schedule::CONSTANT) {
 | |
|             continue;
 | |
|         }
 | |
|         auto& buffer = info.executeBuffer;
 | |
|         // MNN_PRINT("before resize, mInfo.second size:%lu, command size:%lu,op type:%s, op name:%s\n", mInfo.second.size(), buffer.command.size(), EnumNameOpType(info.op->type()), info.op->name()->c_str());
 | |
|         for (auto& iterP : buffer.command) {
 | |
|             auto& iter = *iterP;
 | |
|             if (iter.op->type() == OpType_Copy) {
 | |
|                 continue;
 | |
|             }
 | |
|             auto curBackend = iter.execution->backend();
 | |
|             if (ownInputs) {
 | |
|                 for (auto t : iter.inputs) {
 | |
|                     auto des = TensorUtils::getDescribeOrigin(t);
 | |
|                     if (nullptr == des->mem.get() && nullptr == des->getBackend()) {
 | |
|                         des->setBackend(curBackend);
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|             for (auto t : iter.outputs) {
 | |
|                 auto des = TensorUtils::getDescribeOrigin(t);
 | |
|                 if (nullptr == des->mem.get() && nullptr == des->getBackend()) {
 | |
|                     des->setBackend(curBackend);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| static void _makeCopyOp(std::shared_ptr<BufferStorage>& copyOp) {
 | |
|     if (copyOp.get() == nullptr) {
 | |
|         flatbuffers::FlatBufferBuilder builder(32);
 | |
|         OpBuilder builder_(builder);
 | |
|         builder_.add_type(OpType_Copy);
 | |
|         builder.Finish(builder_.Finish());
 | |
|         copyOp.reset(new BufferStorage);
 | |
|         copyOp->storage = builder.ReleaseRaw(copyOp->allocated_size, copyOp->offset);
 | |
|     }
 | |
| }
 | |
| static ErrorCode _InsertCopy(Schedule::PipelineInfo& mInfo, std::map<Tensor*, std::shared_ptr<Tensor>>& mCacheConstTensors, Pipeline::WrapTensorCache& shapeFixConstCache, bool ownInput, bool permitCodegen) {
 | |
|     std::shared_ptr<BufferStorage> copyOp;
 | |
|     for (auto iterP = shapeFixConstCache.begin(); iterP != shapeFixConstCache.end();) {
 | |
|         auto& iter = *iterP;
 | |
|         if (iter.second.first.lock() == nullptr) {
 | |
|             // Has released, remove cache
 | |
|             iterP = shapeFixConstCache.erase(iterP);
 | |
|             continue;
 | |
|         }
 | |
|         auto des = iter.first.first;
 | |
|         bool needReset = true;
 | |
|         if (des->usage == Tensor::InsideDescribe::CONSTANT && ((des->stageMask & Tensor::InsideDescribe::CONTENT_NOT_CHANGE) != 0)) {
 | |
|             // If the tensor is not compute in shape-geometry stage, needn't recopy it
 | |
|             needReset = false;
 | |
|         }
 | |
|         if (needReset) {
 | |
|             TensorUtils::getDescribeOrigin(iter.second.second.get())->setBackend(nullptr);
 | |
|             TensorUtils::getDescribeOrigin(iter.second.second.get())->mem = nullptr;
 | |
|         }
 | |
|         iterP++;
 | |
|     }
 | |
|     for (auto& info : mInfo.second) {
 | |
|         if (info.type == Schedule::CONSTANT) {
 | |
|             continue;
 | |
|         }
 | |
|         auto& buffer = info.executeBuffer;
 | |
|         if (buffer.command.empty()) {
 | |
|             continue;
 | |
|         }
 | |
|         auto commands = std::move(buffer.command);
 | |
|         for (auto& iterP : commands) {
 | |
|             auto& iter = *iterP;
 | |
|             if (iter.op->type() == OpType_Copy) {
 | |
|                 continue;
 | |
|             }
 | |
|             // Check If need wrap
 | |
|             auto curBackend = iter.execution->backend();
 | |
| #ifdef MNN_PIPELINE_DEBUG
 | |
|             if (nullptr != iter.op->name()) {
 | |
|                 MNN_PRINT("%s Run on %d\n", iter.op->name()->c_str(), curBackend->type());
 | |
|             }
 | |
| #endif
 | |
|             iter.workInputs = iter.inputs;
 | |
|             for (int v=0; v<iter.inputs.size(); ++v) {
 | |
|                 auto t = iter.inputs[v];
 | |
|                 auto des = TensorUtils::getDescribe(t);
 | |
|                 if (WrapExecution::needWrap(t, curBackend)) {
 | |
|                     do {
 | |
|                         Tensor* newTensor = nullptr;
 | |
|                         if (!des->isMutable) {
 | |
|                             newTensor = WrapExecution::copyConstCache(t, curBackend, mCacheConstTensors, permitCodegen);
 | |
|                             if (nullptr != newTensor) {
 | |
|                                 iter.workInputs[v] = newTensor;
 | |
|                                 break;
 | |
|                             }
 | |
|                         }
 | |
|                         if (!ownInput) {
 | |
|                             if (TensorUtils::getDescribe(t)->usage == Tensor::InsideDescribe::INPUT) {
 | |
|                                 auto inputCacheIter = mInfo.first.inputTensorCopyCache.find(t);
 | |
|                                 if (inputCacheIter != mInfo.first.inputTensorCopyCache.end()) {
 | |
|                                     auto& tensorCache = inputCacheIter->second;
 | |
|                                     if (nullptr == std::get<0>(tensorCache) || WrapExecution::needWrap(std::get<0>(tensorCache), curBackend)) {
 | |
|                                         std::shared_ptr<Tensor> wrapTensor = WrapExecution::makeCopyTensor(t, curBackend);
 | |
|                                         TensorUtils::getDescribe(wrapTensor.get())->usage = Tensor::InsideDescribe::CONSTANT;
 | |
|                                         std::get<0>(tensorCache) = wrapTensor.get();
 | |
|                                         std::get<1>(tensorCache) = wrapTensor;
 | |
|                                         std::get<2>(tensorCache) = true;
 | |
|                                         std::get<3>(tensorCache) = true;
 | |
|                                     }
 | |
|                                     iter.workInputs[v] = std::get<0>(tensorCache);
 | |
|                                     if (std::get<2>(tensorCache)) {
 | |
|                                         auto allocRes = curBackend->onAcquireBuffer(std::get<1>(tensorCache).get(), Backend::STATIC);
 | |
|                                         if (!allocRes) {
 | |
|                                             return OUT_OF_MEMORY;
 | |
|                                         }
 | |
|                                         std::get<2>(tensorCache) = false;
 | |
|                                     }
 | |
|                                     break;
 | |
|                                 }
 | |
|                             }
 | |
|                         }
 | |
|                         {
 | |
|                             auto titer = shapeFixConstCache.find(std::make_pair(des, curBackend));
 | |
|                             if (titer != shapeFixConstCache.end()) {
 | |
|                                 newTensor = titer->second.second.get();
 | |
|                             } else {
 | |
|                                 std::shared_ptr<MNN::Tensor> tensor(new Tensor);
 | |
|                                 shapeFixConstCache.insert(std::make_pair(std::make_pair(des, curBackend), std::make_pair(std::weak_ptr<Tensor::InsideDescribe::NativeInsideDescribe>(TensorUtils::getDescribeOrigin(t)->mContent), tensor)));
 | |
|                                 newTensor = tensor.get();
 | |
|                             }
 | |
|                             iter.workInputs[v] = newTensor;
 | |
|                         }
 | |
|                         auto newMemory = TensorUtils::getDescribeOrigin(newTensor);
 | |
|                         if (newMemory->getBackend() != nullptr) {
 | |
|                             // The memory has been init, skip it
 | |
|                             break;
 | |
|                         }
 | |
|                         TensorUtils::copyShape(t, newTensor, true, true);
 | |
|                         if (des->usage == Tensor::InsideDescribe::CONSTANT) {
 | |
|                             TensorUtils::getDescribe(newTensor)->usage = des->usage;
 | |
|                             auto tempRes = WrapExecution::allocAndCopy(curBackend, t, newTensor);
 | |
|                             if (!tempRes) {
 | |
|                                 return OUT_OF_MEMORY;
 | |
|                             }
 | |
|                             break;
 | |
|                         }
 | |
|                         newMemory->setBackend(curBackend);
 | |
|                         auto copyWrap = WrapExecution::makeCopyExecution(curBackend, mInfo.first.cache.second.get());
 | |
|                         _makeCopyOp(copyOp);
 | |
|                         std::shared_ptr<Command> cmdP(new Command);
 | |
|                         auto& cmd = *cmdP;
 | |
|                         cmd.buffer = copyOp;
 | |
|                         cmd.workInputs  = {t};
 | |
|                         cmd.workOutputs = {newTensor};
 | |
|                         cmd.op      = flatbuffers::GetRoot<Op>(cmd.buffer->buffer());
 | |
|                         cmd.execution.reset(copyWrap);
 | |
|                         buffer.command.emplace_back(cmdP);
 | |
|                     } while(false);
 | |
|                 }
 | |
|             }
 | |
|             buffer.command.emplace_back(iterP);
 | |
|             iter.workOutputs = iter.outputs;
 | |
|             for (int v=0; v<iter.workOutputs.size(); ++v) {
 | |
|                 auto t = iter.workOutputs[v];
 | |
|                 if (WrapExecution::needWrap(t, curBackend)) {
 | |
|                     auto copyWrap = WrapExecution::makeCopyExecution(curBackend, mInfo.first.cache.second.get());
 | |
|                     std::shared_ptr<Tensor> newTensor(new Tensor);
 | |
|                     TensorUtils::copyShape(t, newTensor.get(), true, true);
 | |
|                     iterP->workOutputs[v] = newTensor.get();
 | |
|                     _makeCopyOp(copyOp);
 | |
|                     std::shared_ptr<Command> cmdP(new Command);
 | |
|                     auto& cmd = *cmdP;
 | |
|                     cmd.buffer = copyOp;
 | |
|                     cmd.workInputs  = {newTensor.get()};
 | |
|                     cmd.workOutputs = {t};
 | |
|                     cmd.op      = flatbuffers::GetRoot<Op>(cmd.buffer->buffer());
 | |
|                     buffer.extras.emplace_back(newTensor);
 | |
|                     cmd.execution.reset(copyWrap);
 | |
|                     buffer.command.emplace_back(cmdP);
 | |
|                     for(int i = 0; i < iter.inputs.size(); ++i){
 | |
|                         if(t == iter.inputs[i]){
 | |
|                             iterP->workOutputs[v] = iter.workInputs[i];
 | |
|                             cmd.workInputs = {iter.workInputs[i]};
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| void Pipeline::_recycleDynamicMemory(Command* command) {
 | |
|     for (auto& t : command->workOutputs) {
 | |
|         auto memoryType = _getTensorStorageType(t, mOutputStatic);
 | |
|         if (Backend::DYNAMIC == memoryType) {
 | |
|             TensorUtils::getDescribeOrigin(t)->mem = nullptr;
 | |
|         }
 | |
|     }
 | |
|     for (auto& t : command->workInputs) {
 | |
|         auto memoryType = _getTensorStorageType(t, mOutputStatic);
 | |
|         if (Backend::DYNAMIC == memoryType) {
 | |
|             TensorUtils::getDescribeOrigin(t)->mem = nullptr;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| void Pipeline::openResizeCheck() {
 | |
| #ifndef MNN_SKIPBUILD_GEOMETRY
 | |
|     mGeometryNeedRelease = false;
 | |
|     for (auto& info : mInfo.second) {
 | |
|         info.computeCache.open();
 | |
|     }
 | |
| #endif
 | |
| }
 | |
| 
 | |
| ErrorCode Pipeline::fixResizeCache() {
 | |
| #ifndef MNN_SKIPBUILD_GEOMETRY
 | |
|     // TODO: Recompute release mask and set mGeometryNeedRelease = true
 | |
|     for (auto& info : mInfo.second) {
 | |
|         if (info.type == Schedule::CONSTANT && (!info.computeCache.needExecuteConst)) {
 | |
|             info.executeBuffer.command.clear();
 | |
|             info.executeBuffer.extras.clear();
 | |
|             info.cacheBuffer.command.clear();
 | |
|             info.cacheBuffer.extras.clear();
 | |
|         }
 | |
|     }
 | |
|     mInfo.first.cache.first->onResizeBegin();
 | |
|     mInfo.first.cache.first->onResizeEnd();
 | |
|     mInfo.first.cache.second->onResizeBegin();
 | |
|     mInfo.first.cache.second->onResizeEnd();
 | |
|     auto res = mInfo.first.cache.first->onSelectDynamicAllocator(1, 2);
 | |
|     res = res && mInfo.first.cache.second->onSelectDynamicAllocator(1, 2);
 | |
|     if (!res) {
 | |
|         MNN_PRINT("%d backend don't support resize fix optimize\n", mInfo.first.cache.first->type());
 | |
|         mGeometryNeedRelease = true;
 | |
|         return NOT_SUPPORT;
 | |
|     }
 | |
|     size_t totalNumber = 0;
 | |
|     size_t fixNumber = 0;
 | |
|     // Mask begin
 | |
|     for (auto& info : mInfo.second) {
 | |
|         auto& buffer = info.executeBuffer;
 | |
|         if (info.type != Schedule::CONSTANT) {
 | |
|             totalNumber += buffer.command.size();
 | |
|         }
 | |
|         if ((!info.computeCache.canCache()) && info.computeCache.needComputeShape) {
 | |
|             // If the session has been resized and the op is checked will change shape, set as shape mutable
 | |
|             info.computeCache.close(false);
 | |
|             continue;
 | |
|         }
 | |
|         info.computeCache.close(true);
 | |
|         if (info.type == Schedule::CONSTANT) {
 | |
|             continue;
 | |
|         }
 | |
|         // TODO: OCL and Vulkan don't support input vary
 | |
|         bool notSupportInputVarying = !OpCommonUtils::supportDynamicInputMemory(mInfo.first.cache.first->type());
 | |
|         for (int cmdIndex=0; cmdIndex<buffer.command.size(); ++cmdIndex) {
 | |
|             auto& cmd = *buffer.command[cmdIndex];
 | |
|             cmd.group = 1;
 | |
|             if (notSupportInputVarying) {
 | |
|                 for (auto t : cmd.workInputs) {
 | |
|                     if (TensorUtils::getDescribe(t)->group < 0 || TensorUtils::getDescribe(t)->usage != Tensor::InsideDescribe::NORMAL) {
 | |
|                         cmd.group = 0;
 | |
|                         break;
 | |
|                     }
 | |
|                 }
 | |
|                 if (mOutputStatic) {
 | |
|                     for (auto t : cmd.workOutputs) {
 | |
|                         if (TensorUtils::getDescribe(t)->usage != Tensor::InsideDescribe::NORMAL) {
 | |
|                             cmd.group = 0;
 | |
|                             break;
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|             if (1 == cmd.group) {
 | |
|                 fixNumber++;
 | |
|             }
 | |
|             for (auto t : cmd.workInputs) {
 | |
|                 if (TensorUtils::getDescribe(t)->group == 0) {
 | |
|                     TensorUtils::getDescribe(t)->group = 1;
 | |
|                 }
 | |
|             }
 | |
|             for (auto t : cmd.workOutputs) {
 | |
|                 TensorUtils::getDescribe(t)->group = 1;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     // Mask End
 | |
|     _allocForTensor(1, true);
 | |
| 
 | |
|     mInfo.first.cache.first->onSelectDynamicAllocator(0, 2);
 | |
|     res && mInfo.first.cache.second->onSelectDynamicAllocator(0, 2);
 | |
|     MNN_PRINT("Fix: %d - Total: %d, rate = %f\n", fixNumber, totalNumber, (float)fixNumber / (float)totalNumber);
 | |
| #endif
 | |
|     return NO_ERROR;
 | |
| }
 | |
| ErrorCode Pipeline::_allocForTensor(int index, bool allocInput) {
 | |
| #ifdef MNN_PIPELINE_DEBUG
 | |
|     int resizeNumber = 0;
 | |
| #endif
 | |
|     // Compute RefCount Begin
 | |
|     for (auto& info : mInfo.second) {
 | |
|         if (info.type == Schedule::CONSTANT) {
 | |
|             continue;
 | |
|         }
 | |
|         auto& buffer = info.executeBuffer;
 | |
|         // MNN_PRINT("before resize, mInfo.second size:%lu, command size:%lu,op type:%s, op name:%s\n", mInfo.second.size(), buffer.command.size(), EnumNameOpType(info.op->type()), info.op->name()->c_str());
 | |
|         for (auto& iterP : buffer.command) {
 | |
|             auto& iter = *iterP;
 | |
|             for (auto t : iter.workInputs) {
 | |
|                 auto des = TensorUtils::getDescribe(t);
 | |
|                 if (des->usage != Tensor::InsideDescribe::CONSTANT) {
 | |
|                     des->useCount = 0;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     for (auto& info : mInfo.second) {
 | |
|         if (info.type == Schedule::CONSTANT) {
 | |
|             continue;
 | |
|         }
 | |
|         auto& buffer = info.executeBuffer;
 | |
|         for (auto& iterP : buffer.command) {
 | |
|             auto& iter = *iterP;
 | |
|             for (auto t : iter.workInputs) {
 | |
|                 auto des = TensorUtils::getDescribe(t);
 | |
|                 if (des->usage != Tensor::InsideDescribe::CONSTANT) {
 | |
|                     des->useCount += 1;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     // Compute RefCount End
 | |
|     auto& mBackend = mInfo.first.cache.first;
 | |
|     auto& mBackupBackend = mInfo.first.cache.second;
 | |
|     mBackend->onResizeBegin();
 | |
|     mBackupBackend->onResizeBegin();
 | |
|     for (auto& info : mInfo.second) {
 | |
|         if (info.type == Schedule::CONSTANT) {
 | |
|             continue;
 | |
|         }
 | |
|         auto& buffer = info.executeBuffer;
 | |
|         for (int cmdIndex=0; cmdIndex < buffer.command.size(); ++cmdIndex) {
 | |
|             auto& iterP = buffer.command[cmdIndex];
 | |
|             auto& iter = *iterP;
 | |
| #ifdef MNN_PIPELINE_DEBUG
 | |
|             auto memory = const_cast<Runtime*>(mRuntime)->onGetMemoryInMB();
 | |
|             if (nullptr != info.op->name()) {
 | |
|                 MNN_PRINT("%f, before Resize: %s - %d\n", memory, info.op->name()->c_str(), cmdIndex);
 | |
|             }
 | |
| #endif
 | |
|             // Alloc for Tensors
 | |
|             auto curBackend = iter.execution->backend();
 | |
|             if (allocInput) {
 | |
|                 for (auto t : iter.workInputs) {
 | |
|                     auto allocRes = _allocTensor(t, curBackend, mOutputStatic, index);
 | |
|                     if (!allocRes) {
 | |
|                         return OUT_OF_MEMORY;
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|             {
 | |
|                 for (auto t : iter.workOutputs) {
 | |
|                     auto res = _allocTensor(t, curBackend, mOutputStatic, index);
 | |
|                     if (!res) {
 | |
|                         return OUT_OF_MEMORY;
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
| #ifdef MNN_PIPELINE_DEBUG
 | |
|             if (iter.info != nullptr) {
 | |
|                 MNN_PRINT("before Resize 2, calling: %s - %d \n", iter.info->name().c_str(), cmdIndex);
 | |
|             }
 | |
| #endif
 | |
|             if (iter.group == index) {
 | |
| #ifdef MNN_PIPELINE_DEBUG
 | |
|                 resizeNumber++;
 | |
| #endif
 | |
|                 auto code = iter.execution->onResize(iter.workInputs, iter.workOutputs);
 | |
|                 if (NO_ERROR != code) {
 | |
| #ifdef MNN_PIPELINE_DEBUG
 | |
|                     MNN_ERROR("Pipeline Resize error: %d\n", code);
 | |
| #endif
 | |
|                     if (iter.info.get()) {
 | |
|                         MNN_ERROR("Resize error for type = %s, name = %s \n", iter.info->type().c_str(), iter.info->name().c_str());
 | |
|                     }
 | |
|                     return code;
 | |
|                 }
 | |
|             }
 | |
|             // Free mid tensor
 | |
|             for (auto t : iter.workInputs) {
 | |
|                 _releaseTensor(t, allocInput, index);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     // Recycle All Dynamic Tensor
 | |
|     for (auto& info : mInfo.second) {
 | |
|         auto& buffer = info.executeBuffer;
 | |
|         for (auto& c : buffer.command) {
 | |
|             if (c->group != index) {
 | |
|                 continue;
 | |
|             }
 | |
|             _recycleDynamicMemory(c.get());
 | |
|         }
 | |
|     }
 | |
|     auto code = mBackend->onResizeEnd();
 | |
|     if (code != NO_ERROR) {
 | |
|         return code;
 | |
|     }
 | |
| #ifdef MNN_PIPELINE_DEBUG
 | |
|     MNN_PRINT("Resize %d op for index: %d\n", resizeNumber, index);
 | |
| #endif
 | |
|     code = mBackupBackend->onResizeEnd();
 | |
|     return code;
 | |
| }
 | |
| ErrorCode Pipeline::allocMemory(bool firstMalloc, bool forbidReplace) {
 | |
|     // MNN_PRINT("allocMemory mtype:%d, cpubackendType:%d, cpuBackend runtime:%p\n", mBackend->type(), mBackupBackend->type(), mBackupBackend->getRuntime());
 | |
|     if (!firstMalloc) {
 | |
|         if (OpCommonUtils::supportDynamicInputMemory(mInfo.first.cache.first->type()) && (!mInfo.first.inputBackendChange)) {
 | |
|             return NO_ERROR;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /* Create Execution Begin */
 | |
|     auto& mBackend = mInfo.first.cache.first;
 | |
|     auto& mBackupBackend = mInfo.first.cache.second;
 | |
|     // Check If we need a lone time for init
 | |
|     if (mBackend->type() != MNN_FORWARD_CPU && mBackend->type() != MNN_FORWARD_CPU_EXTENSION && mTuneAttr.autoSetOpType) {
 | |
|         Runtime::OpInfo dstInfo;
 | |
|         int currentInitCount = 0;
 | |
|         std::vector<Schedule::OpCacheInfo> initInfos;
 | |
|         for (auto& info : mInfo.second) {
 | |
|             if (info.type == Schedule::CONSTANT) {
 | |
|                 continue;
 | |
|             }
 | |
|             auto& buffer = info.executeBuffer;
 | |
|             for (auto& iterP : buffer.command) {
 | |
|                 auto& iter = *iterP;
 | |
|                 dstInfo.initCostLong = false;
 | |
|                 mRuntime->onMeasure(iter.inputs, iter.outputs, iter.op, dstInfo);
 | |
|                 if (dstInfo.initCostLong) {
 | |
|                     initInfos.emplace_back(info);
 | |
|                     currentInitCount++;
 | |
|                     break;
 | |
|                 }
 | |
|             }
 | |
|             if (currentInitCount >= mTuneAttr.maxTuningNumber) {
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|         if (currentInitCount > 0) {
 | |
|             MNN_PRINT("Turn back to cpu\n");
 | |
|             // Reset execution
 | |
|             for (auto& info : mInfo.second) {
 | |
|                 info.executionCache.clear();
 | |
|                 for (auto& iterP : info.executeBuffer.command) {
 | |
|                     iterP->execution = nullptr;
 | |
|                     iterP->execution = nullptr;
 | |
|                     _recycleDynamicMemory(iterP.get());
 | |
|                 }
 | |
|             }
 | |
|             if (!mRuntime->hasAsyncWork()) {
 | |
|                 _pushTuningTask(std::move(initInfos));
 | |
|             }
 | |
|             mBackend.reset(mCpuRuntime->onCreate(nullptr, mBackupBackend.get()));
 | |
|         }
 | |
|     }
 | |
|     {
 | |
|         auto code = _createExecutions(mInfo, mExternalFile, mExternalStorage);
 | |
|         if (NO_ERROR != code) {
 | |
|             return code;
 | |
|         }
 | |
|     }
 | |
|     /* Create Execution End */
 | |
|     mBackend->onClearBuffer();
 | |
|     mBackupBackend->onClearBuffer();
 | |
|     _SetTensorBackend(mInfo, mAllocInput);
 | |
|     // Insert Wrap If needed
 | |
|     {
 | |
|         // Reset memory allocator for backend
 | |
|         auto insertCode = _InsertCopy(mInfo, mCacheConstTensors, mWrapTensors, mAllocInput, forbidReplace);
 | |
|         if (NO_ERROR != insertCode) {
 | |
|             return insertCode;
 | |
|         }
 | |
|     }
 | |
|     /* Insert Wrap End*/
 | |
| 
 | |
|     return _allocForTensor(0, mAllocInput);
 | |
| }
 | |
| 
 | |
| void Pipeline::_copyInputs() {
 | |
|     for (auto& iter : mInfo.first.inputTensorCopyCache) {
 | |
|         auto& tensorCache = iter.second;
 | |
|         if (std::get<0>(tensorCache) == nullptr) {
 | |
|             continue;
 | |
|         }
 | |
|         if (!std::get<3>(tensorCache)) {
 | |
|             continue;
 | |
|         }
 | |
|         std::get<0>(tensorCache)->copyFromHostTensor(iter.first);
 | |
|         std::get<3>(tensorCache) = false;
 | |
|     }
 | |
| }
 | |
| ErrorCode Pipeline::execute() {
 | |
|     _copyInputs();
 | |
|     auto enterCode = _enterExecute();
 | |
|     if (NO_ERROR != enterCode) {
 | |
|         return enterCode;
 | |
|     }
 | |
|     auto& mBackend = mInfo.first.cache.first;
 | |
|     auto& mBackupBackend = mInfo.first.cache.second;
 | |
|     for (auto& info : mInfo.second) {
 | |
|         if (info.type == Schedule::CONSTANT) {
 | |
|             continue;
 | |
|         }
 | |
|         auto& buffer = info.executeBuffer;
 | |
|         for (int cmdIndex=0; cmdIndex<buffer.command.size(); ++cmdIndex) {
 | |
|             auto& cmd = *buffer.command[cmdIndex];
 | |
| #ifdef MNN_PIPELINE_DEBUG
 | |
|             if (info.op->name() != nullptr) {
 | |
|                 std::string groupOfInput = "input group: [";
 | |
|                 for (int v=0; v<cmd.workInputs.size(); ++v) {
 | |
|                     groupOfInput = groupOfInput + " " + std::to_string(TensorUtils::getDescribe(cmd.workInputs[v])->group) + " ";
 | |
|                 }
 | |
|                 groupOfInput += "]";
 | |
|                 std::string deviceOfInput = "input: [";
 | |
|                 for (int v=0; v<cmd.workInputs.size(); ++v) {
 | |
|                     deviceOfInput = deviceOfInput + " " + std::to_string(cmd.workInputs[v]->deviceId()) + " ";
 | |
|                 }
 | |
|                 deviceOfInput += "]";
 | |
|                 std::string deviceOfOutput = "output: [";
 | |
|                 for (int v=0; v<cmd.workOutputs.size(); ++v) {
 | |
|                     deviceOfOutput = deviceOfOutput + " " + std::to_string(cmd.workOutputs[v]->deviceId()) + " ";
 | |
|                 }
 | |
|                 deviceOfOutput += "]";
 | |
|                 MNN_PRINT("Group: %d, %s - %d, type=%s, inputs: %s, devices: %s - %s\n", info.group, info.op->name()->c_str(), cmdIndex, EnumNameOpType(cmd.op->type()), groupOfInput.c_str(), deviceOfInput.c_str(), deviceOfOutput.c_str());
 | |
|             }
 | |
| #endif
 | |
|             auto code = cmd.execution->onExecute(cmd.workInputs, cmd.workOutputs);
 | |
|             if (NO_ERROR != code) {
 | |
|                 _exitExecute();
 | |
|                 return code;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     _exitExecute();
 | |
|     return NO_ERROR;
 | |
| }
 | |
| ErrorCode Pipeline::_enterExecute() {
 | |
|     auto& mBackend = mInfo.first.cache.first;
 | |
|     auto& mBackupBackend = mInfo.first.cache.second;
 | |
|     mBackend->onExecuteBegin();
 | |
|     mBackupBackend->onExecuteBegin();
 | |
|     if (mRuntime->pCurrentStatus != NO_ERROR) {
 | |
|         return (ErrorCode)mRuntime->pCurrentStatus;
 | |
|     }
 | |
|     if (mCpuRuntime->pCurrentStatus != NO_ERROR) {
 | |
|         return (ErrorCode)mCpuRuntime->pCurrentStatus;
 | |
|     }
 | |
|     return NO_ERROR;
 | |
| }
 | |
| void Pipeline::_exitExecute() {
 | |
|     auto& mBackend = mInfo.first.cache.first;
 | |
|     auto& mBackupBackend = mInfo.first.cache.second;
 | |
|     mBackupBackend->onExecuteEnd();
 | |
|     mBackend->onExecuteEnd();
 | |
| }
 | |
| 
 | |
| ErrorCode Pipeline::executeCallBack(const TensorCallBackWithInfo& before, const TensorCallBackWithInfo& after) {
 | |
|     _copyInputs();
 | |
|     auto enterCode = _enterExecute();
 | |
|     if (NO_ERROR != enterCode) {
 | |
|         return enterCode;
 | |
|     }
 | |
|     auto& mBackend = mInfo.first.cache.first;
 | |
|     auto& mBackupBackend = mInfo.first.cache.second;
 | |
|     for (auto& info : mInfo.second) {
 | |
|         if (info.type == Schedule::CONSTANT) {
 | |
|             continue;
 | |
|         }
 | |
|         auto& buffer = info.executeBuffer;
 | |
|         for (int cmdIndex=0; cmdIndex < buffer.command.size(); ++cmdIndex) {
 | |
|             auto cmdP = buffer.command[cmdIndex];
 | |
|             auto& cmd = *cmdP;
 | |
|             if (nullptr == cmd.info.get()) {
 | |
|                 auto code = cmd.execution->onExecute(cmd.workInputs, cmd.workOutputs);
 | |
|                 if (NO_ERROR != code) {
 | |
|                     _exitExecute();
 | |
|                     return code;
 | |
|                 }
 | |
|                 continue;
 | |
|             }
 | |
|             auto run = before(cmd.workInputs, cmd.info.get());
 | |
|             if (run) {
 | |
|                 auto code = cmd.execution->onExecute(cmd.workInputs, cmd.workOutputs);
 | |
|                 if (NO_ERROR != code) {
 | |
|                     _exitExecute();
 | |
|                     return code;
 | |
|                 }
 | |
|             }
 | |
|             auto stop = !(after(cmd.workOutputs, cmd.info.get()));
 | |
|             if (stop) {
 | |
|                 _exitExecute();
 | |
|                 return CALL_BACK_STOP;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     _exitExecute();
 | |
|     return NO_ERROR;
 | |
| }
 | |
| 
 | |
| Pipeline::~Pipeline() {
 | |
|     auto& bn = mInfo.first.cache.first;
 | |
|     auto& backupbn = mInfo.first.cache.second;
 | |
|     bn->onClearBuffer();
 | |
|     backupbn->onClearBuffer();
 | |
|     mInfo.second.clear();
 | |
|     mCacheConstTensors.clear();
 | |
|     mWrapTensors.clear();
 | |
| }
 | |
| 
 | |
| } // namespace MNN
 |