| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | //  Schedule.cpp
 | 
					
						
							|  |  |  | //  MNN
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //  Created by MNN on 2018/07/30.
 | 
					
						
							|  |  |  | //  Copyright © 2018, Alibaba Group Holding Limited
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include "core/Schedule.hpp"
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | #include <algorithm>
 | 
					
						
							|  |  |  | #include <iterator>
 | 
					
						
							|  |  |  | #include <set>
 | 
					
						
							| 
									
										
										
										
											2021-04-08 15:34:23 +08:00
										 |  |  | #include <vector>
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | #include <unordered_map>
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include "core/Macro.h"
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | #include "core/RuntimeFactory.hpp"
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include "core/TensorUtils.hpp"
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | #include "shape/SizeComputer.hpp"
 | 
					
						
							|  |  |  | #include "utils/InitNet.hpp"
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | //#define MNN_OPEN_TIME_TRACE
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include <MNN/AutoTime.hpp>
 | 
					
						
							| 
									
										
										
										
											2021-04-08 15:34:23 +08:00
										 |  |  | using namespace std; | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  | //#define MNN_AUTO_CHECK_COST
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | namespace MNN { | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  | MNNForwardType Schedule::getApprociateType(const ScheduleConfig& config) { | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     MNNForwardType type = config.type; | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |     // FIXME: Support Auto determine
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     if (MNN_FORWARD_AUTO == config.type) { | 
					
						
							|  |  |  |         // Search Backend Exclude MNN_FORWARD_CPU
 | 
					
						
							|  |  |  |         for (int i = 1; i < MNN_FORWARD_ALL; ++i) { | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |             if (MNNGetExtraRuntimeCreator((MNNForwardType)i) != nullptr) { | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |                 type = (MNNForwardType)i; | 
					
						
							|  |  |  |                 break; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |     auto creator = MNNGetExtraRuntimeCreator(type); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     if (nullptr == creator) { | 
					
						
							|  |  |  |         MNN_PRINT("Can't Find type=%d backend, use %d instead\n", type, config.backupType); | 
					
						
							|  |  |  |         type = config.backupType; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return type; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  | static bool _setUpTensorInfo(std::vector<std::shared_ptr<Tensor>>& allTensors, const Net* net) { | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |     bool valid    = true; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     auto& tensors = allTensors; | 
					
						
							|  |  |  |     tensors.resize(net->tensorName()->size()); | 
					
						
							| 
									
										
										
										
											2021-04-08 15:34:23 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |     if (net->usage() == Usage_INFERENCE_STATIC) { | 
					
						
							|  |  |  |         // static model will set all tensors' shape
 | 
					
						
							|  |  |  |         auto describes = net->extraTensorDescribe(); | 
					
						
							|  |  |  |         std::vector<const TensorDescribe*> des(tensors.size()); | 
					
						
							|  |  |  |         for (int i = 0; i < describes->size(); i++) { | 
					
						
							|  |  |  |             int index  = describes->GetAs<TensorDescribe>(i)->index(); | 
					
						
							|  |  |  |             des[index] = describes->GetAs<TensorDescribe>(i); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         for (int i = 0; i < tensors.size(); ++i) { | 
					
						
							|  |  |  |             auto blob = des[i]->blob(); | 
					
						
							|  |  |  |             if (auto idims = blob->dims()) { | 
					
						
							|  |  |  |                 tensors[i].reset(new Tensor(idims->size())); | 
					
						
							|  |  |  |                 auto& tb = tensors[i]->buffer(); | 
					
						
							|  |  |  |                 for (int d = 0; d < idims->size(); d++) { | 
					
						
							|  |  |  |                     tb.dim[d].extent = idims->Get(d); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |                 } | 
					
						
							|  |  |  |             } else { | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |                 tensors[i].reset(new Tensor(1)); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |             } | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |             tensors[i]->setType(blob->dataType()); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |         for (int i = 0; i < tensors.size(); ++i) { | 
					
						
							|  |  |  |             auto blob                                                   = des[i]->blob(); | 
					
						
							|  |  |  |             TensorUtils::getDescribe(tensors[i].get())->dimensionFormat = blob->dataFormat(); | 
					
						
							|  |  |  |             if (auto regions = des[i]->regions()) { | 
					
						
							|  |  |  |                 auto& regs = TensorUtils::getDescribe(tensors[i].get())->regions; | 
					
						
							|  |  |  |                 TensorUtils::getDescribe(tensors[i].get())->memoryType = Tensor::InsideDescribe::MEMORY_VIRTUAL; | 
					
						
							|  |  |  |                 regs.reserve(regions->size()); | 
					
						
							|  |  |  |                 for (int r = 0; r < regions->size(); r++) { | 
					
						
							|  |  |  |                     auto region = regions->GetAs<Region>(r); | 
					
						
							|  |  |  |                     Tensor::InsideDescribe::Region reg; | 
					
						
							|  |  |  |                     reg.origin     = tensors[region->origin()].get(); | 
					
						
							|  |  |  |                     reg.src.offset = region->src()->offset(); | 
					
						
							|  |  |  |                     reg.dst.offset = region->dst()->offset(); | 
					
						
							|  |  |  |                     for (int d = 0; d < 3; d++) { | 
					
						
							|  |  |  |                         reg.size[d]       = region->size()->data()[d]; | 
					
						
							|  |  |  |                         reg.src.stride[d] = region->src()->stride()->data()[d]; | 
					
						
							|  |  |  |                         reg.dst.stride[d] = region->dst()->stride()->data()[d]; | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     regs.emplace_back(std::move(reg)); | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         for (int opIndex = 0; opIndex < net->oplists()->size(); ++opIndex) { | 
					
						
							|  |  |  |             auto op = net->oplists()->GetAs<Op>(opIndex); | 
					
						
							|  |  |  |             if (OpType_Const == op->type()) { | 
					
						
							|  |  |  |                 MNN_ASSERT(nullptr != op->outputIndexes()); | 
					
						
							|  |  |  |                 auto index                                            = op->outputIndexes()->data()[0]; | 
					
						
							|  |  |  |                 TensorUtils::getDescribe(tensors[index].get())->usage = Tensor::InsideDescribe::CONSTANT; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         // Dynamic Model just set input tensor's shape
 | 
					
						
							|  |  |  |         valid = initTensors(tensors, net); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     } | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |     return valid; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void generateScheduleGraph(vector<const Op*>& ops, const Net* net, const ScheduleConfig& configs, | 
					
						
							|  |  |  |                                   const vector<shared_ptr<Tensor>>& allTensors) { | 
					
						
							|  |  |  |     if (configs.path.inputs.empty() && configs.path.outputs.empty()) { | 
					
						
							|  |  |  |         // Use Default Linear schedule
 | 
					
						
							|  |  |  |         ops.clear(); | 
					
						
							|  |  |  |         ops.reserve(net->oplists()->size()); | 
					
						
							|  |  |  |         for (int i = 0; i < net->oplists()->size(); ++i) { | 
					
						
							|  |  |  |             auto op = net->oplists()->GetAs<Op>(i); | 
					
						
							|  |  |  |             if (op->type() != OpType_Input) { | 
					
						
							|  |  |  |                 ops.emplace_back(op); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-04-08 15:34:23 +08:00
										 |  |  |     // 0: not set, 1: output, 2:input
 | 
					
						
							|  |  |  |     std::vector<int> tensorMask(net->tensorName()->size()); | 
					
						
							|  |  |  |     ::memset(tensorMask.data(), 0, tensorMask.size() * sizeof(int)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // 0: use, 1: no use
 | 
					
						
							|  |  |  |     std::vector<int> opMask(net->oplists()->size()); | 
					
						
							|  |  |  |     ::memset(opMask.data(), 0, opMask.size() * sizeof(int)); | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     // Set Initial Status
 | 
					
						
							|  |  |  |     std::set<std::string> inputNames; | 
					
						
							|  |  |  |     std::set<std::string> outputNames; | 
					
						
							|  |  |  |     for (auto& n : configs.path.inputs) { | 
					
						
							|  |  |  |         inputNames.insert(n); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     for (auto& n : configs.path.outputs) { | 
					
						
							|  |  |  |         outputNames.insert(n); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (configs.mode == ScheduleConfig::Path::Mode::Tensor) { | 
					
						
							|  |  |  |         for (int i=0; i<tensorMask.size(); ++i) { | 
					
						
							|  |  |  |             auto name = net->tensorName()->GetAsString(i)->c_str(); | 
					
						
							|  |  |  |             if (outputNames.find(name) != outputNames.end()) { | 
					
						
							|  |  |  |                 tensorMask[i] = 1; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             // If both input/output, set as input
 | 
					
						
							|  |  |  |             if (inputNames.find(name) != inputNames.end()) { | 
					
						
							|  |  |  |                 tensorMask[i] = 2; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         // Op Mode
 | 
					
						
							|  |  |  |         for (int i=0; i<opMask.size(); ++i) { | 
					
						
							|  |  |  |             auto op = net->oplists()->GetAs<Op>(i); | 
					
						
							|  |  |  |             if (nullptr == op->name()) { | 
					
						
							|  |  |  |                 continue; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             auto name = op->name()->c_str(); | 
					
						
							|  |  |  |             if (outputNames.find(name) != outputNames.end()) { | 
					
						
							|  |  |  |                 opMask[i] = 1; | 
					
						
							|  |  |  |                 if (nullptr != op->outputIndexes()) { | 
					
						
							|  |  |  |                     for (int j=0; j<op->outputIndexes()->size(); ++j) { | 
					
						
							|  |  |  |                         auto index = op->outputIndexes()->data()[j]; | 
					
						
							|  |  |  |                         if (tensorMask[index] != 2) { | 
					
						
							|  |  |  |                             tensorMask[index] = 1; | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 if (nullptr != op->inputIndexes()) { | 
					
						
							|  |  |  |                     for (int j=0; j<op->inputIndexes()->size(); ++j) { | 
					
						
							|  |  |  |                         auto index = op->inputIndexes()->data()[j]; | 
					
						
							|  |  |  |                         if (tensorMask[index] != 2) { | 
					
						
							|  |  |  |                             tensorMask[index] = 1; | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             if (inputNames.find(name) != inputNames.end()) { | 
					
						
							|  |  |  |                 opMask[i] = 1; | 
					
						
							|  |  |  |                 if (nullptr != op->outputIndexes()) { | 
					
						
							|  |  |  |                     for (int j=0; j<op->outputIndexes()->size(); ++j) { | 
					
						
							|  |  |  |                         auto index = op->outputIndexes()->data()[j]; | 
					
						
							|  |  |  |                         tensorMask[index] = 2; | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-08 15:34:23 +08:00
										 |  |  |     bool change = false; | 
					
						
							|  |  |  |     do { | 
					
						
							|  |  |  |         change = false; | 
					
						
							|  |  |  |         for (int i=0; i<opMask.size(); ++i) { | 
					
						
							|  |  |  |             if (opMask[i] > 0) { | 
					
						
							|  |  |  |                 continue; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             auto op = net->oplists()->GetAs<Op>(i); | 
					
						
							|  |  |  |             if (nullptr != op->outputIndexes()) { | 
					
						
							|  |  |  |                 for (int j=0; j<op->outputIndexes()->size(); ++j) { | 
					
						
							|  |  |  |                     auto index = op->outputIndexes()->data()[j]; | 
					
						
							|  |  |  |                     if (tensorMask[index] == 1) { | 
					
						
							|  |  |  |                         opMask[i] = 1; | 
					
						
							|  |  |  |                         change = true; | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             if (nullptr != op->inputIndexes() && opMask[i]) { | 
					
						
							|  |  |  |                 for (int j=0; j<op->inputIndexes()->size(); ++j) { | 
					
						
							|  |  |  |                     auto index = op->inputIndexes()->data()[j]; | 
					
						
							|  |  |  |                     if (tensorMask[index] != 2) { | 
					
						
							|  |  |  |                         tensorMask[index] = 1; | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-04-08 15:34:23 +08:00
										 |  |  |     } while (change); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for (int i=0; i<opMask.size(); ++i) { | 
					
						
							|  |  |  |         if (opMask[i] > 0) { | 
					
						
							|  |  |  |             ops.emplace_back(net->oplists()->GetAs<Op>(i)); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static vector<Schedule::PipelineInfo> _scheduleUnit(const Net* net, const ScheduleConfig& configs, | 
					
						
							|  |  |  |                                                     const vector<shared_ptr<Tensor>>& allTensors) { | 
					
						
							|  |  |  |     vector<Schedule::PipelineInfo> oplists; | 
					
						
							|  |  |  |     vector<const Op*> ops; | 
					
						
							|  |  |  |     generateScheduleGraph(ops, net, configs, allTensors); | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |     initPipelineInfosFromOps(oplists, ops, allTensors); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     return oplists; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Schedule::ScheduleInfo Schedule::schedule(const Net* net, const std::vector<ScheduleConfig>& configs) { | 
					
						
							|  |  |  |     std::vector<std::shared_ptr<Tensor>> allTensors; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ScheduleInfo schedule; | 
					
						
							|  |  |  |     if (nullptr == net->oplists()) { | 
					
						
							|  |  |  |         MNN_PRINT("Error net for schedule\n"); | 
					
						
							|  |  |  |         return schedule; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |     bool valid              = _setUpTensorInfo(allTensors, net); | 
					
						
							| 
									
										
											  
											
												- build:
	- unify schema building in core and converter;
	- add more build script for android;
	- add linux build script for python;
- ops impl:
	- add floor mod support in binary;
	- use eltwise impl in add/max/sub/mul binary for optimization;
	- remove fake double support in cast;
	- fix 5d support for concat;
	- add adjX and adjY support for batch matmul;
	- optimize conv2d back prop filter;
	- add pad mode support for conv3d;
	- fix bug in conv2d & conv depthwise with very small feature map;
	- optimize binary without broacast;
	- add data types support for gather;
	- add gather ND support;
	- use uint8 data type in gather v2;
	- add transpose support for matmul;
	- add matrix band part;
	- add dim != 4 support for padding, reshape & tensor convert;
	- add pad type support for pool3d;
	- make ops based on TensorFlow Lite quantization optional;
	- add all & any support for reduction;
	- use type in parameter as output type in reduction;
	- add int support for unary;
	- add variable weight support for conv2d;
	- fix conv2d depthwise weights initialization;
	- fix type support for transpose;
	- fix grad outputs count for  reduce grad and reshape grad;
	- fix priorbox & detection output;
	- fix metal softmax error;
- python:
	- add runSessionWithCallBackInfo interface;
	- add max nodes limit (1400) for visualization tool;
	- fix save error in python3;
	- align default dim;
- convert:
	- add extra design for optimization;
	- add more post converting optimizers;
	- add caffe v1 weights blob support;
	- add cast, unary, conv transpose support for onnx model;
	- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
	- add cos/sin/atan/tan support for unary for tensorflow model;
	- add any/all support for reduction for tensorflow model;
	- add elu, conv3d, pool3d support for tensorflow model;
	- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
	- fix size computer lock;
	- fix thread pool deadlock;
	- add express & parameters in express;
	- rewrite blitter chooser without static map;
	- add tests for expr;
											
										 
											2019-10-29 13:37:26 +08:00
										 |  |  |     schedule.validForResize = valid; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |     std::vector<std::pair<Backend::Info, std::vector<Schedule::PipelineInfo>>> result; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     for (auto& config : configs) { | 
					
						
							|  |  |  |         Backend::Info compute; | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |         compute.type      = getApprociateType(config); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |         compute.numThread = config.numThread; | 
					
						
							|  |  |  |         compute.user      = config.backendConfig; | 
					
						
							|  |  |  |         auto oplists      = _scheduleUnit(net, config, allTensors); | 
					
						
							|  |  |  |         result.emplace_back(std::make_pair(compute, std::move(oplists))); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     schedule.pipelineInfo = std::move(result); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // get all used op's output, drop unused op, won't change op order. always insert all Input Ops
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |     std::vector<const Op*> oplists; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |         for (std::pair<Backend::Info, vector<Schedule::PipelineInfo>>& pipeline : schedule.pipelineInfo) { | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |             for (auto& info : pipeline.second) { | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |                 oplists.push_back(info.op); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |     // set tensors' input/output usage by oplists info
 | 
					
						
							|  |  |  |     setInputOutputForOps(allTensors, oplists, net->usage() == Usage_INFERENCE_STATIC); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |     // add output index by config info and outputName
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     std::unordered_map<std::string, int> tensorNameIndexMap; | 
					
						
							|  |  |  |     for (int i = 0; i < net->tensorName()->size(); ++i) { | 
					
						
							|  |  |  |         tensorNameIndexMap[net->tensorName()->Get(i)->str()] = i; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     for (auto& config : configs) { | 
					
						
							|  |  |  |         for (const auto& name : config.saveTensors) { | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |             auto iter = tensorNameIndexMap.find(name); | 
					
						
							|  |  |  |             if (iter != tensorNameIndexMap.end()) { | 
					
						
							|  |  |  |                 auto t = allTensors[iter->second].get(); | 
					
						
							|  |  |  |                 if (TensorUtils::getDescribe(t)->usage == Tensor::InsideDescribe::NORMAL) { | 
					
						
							|  |  |  |                     TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::OUTPUT; | 
					
						
							|  |  |  |                 } else { | 
					
						
							|  |  |  |                     schedule.outputTensor.insert( | 
					
						
							|  |  |  |                                std::make_pair(net->tensorName()->GetAsString(iter->second)->c_str(), t)); | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |             } else { | 
					
						
							|  |  |  |                 MNN_PRINT("Bad outputname: %s\n", name.c_str()); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (net->outputName()) { | 
					
						
							|  |  |  |         for (int i = 0; i < net->outputName()->size(); ++i) { | 
					
						
							|  |  |  |             std::string name = net->outputName()->Get(i)->str(); | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |             auto iter = tensorNameIndexMap.find(name); | 
					
						
							|  |  |  |             if (iter != tensorNameIndexMap.end()) { | 
					
						
							|  |  |  |                 auto t = allTensors[iter->second].get(); | 
					
						
							|  |  |  |                 if (TensorUtils::getDescribe(t)->usage == Tensor::InsideDescribe::NORMAL) { | 
					
						
							|  |  |  |                     TensorUtils::getDescribe(t)->usage = Tensor::InsideDescribe::OUTPUT; | 
					
						
							|  |  |  |                 } else { | 
					
						
							|  |  |  |                     schedule.outputTensor.insert( | 
					
						
							|  |  |  |                                std::make_pair(net->tensorName()->GetAsString(iter->second)->c_str(), t)); | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |     // add input/output tensor to schedule's input/output
 | 
					
						
							|  |  |  |     for (int index = 0; index < allTensors.size(); index++) { | 
					
						
							|  |  |  |         auto t = allTensors[index].get(); | 
					
						
							|  |  |  |         auto usage = TensorUtils::getDescribe(t)->usage; | 
					
						
							|  |  |  |         if (usage == Tensor::InsideDescribe::INPUT) { | 
					
						
							|  |  |  |             schedule.inputTensors.insert(std::make_pair(net->tensorName()->GetAsString(index)->c_str(), t)); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         if (usage == Tensor::InsideDescribe::OUTPUT) { | 
					
						
							|  |  |  |             schedule.outputTensor.insert( | 
					
						
							|  |  |  |                        std::make_pair(net->tensorName()->GetAsString(index)->c_str(), t)); | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-11-05 16:41:56 +08:00
										 |  |  |     // move tensors to schedule
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     for (auto& t : allTensors) { | 
					
						
							|  |  |  |         schedule.allTensors.emplace_back(std::make_pair(0, std::move(t))); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return schedule; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | } // namespace MNN
 |