| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  | //
 | 
					
						
							|  |  |  |  | //  CPUBinary.cpp
 | 
					
						
							|  |  |  |  | //  MNN
 | 
					
						
							|  |  |  |  | //
 | 
					
						
							|  |  |  |  | //  Created by MNN on 2018/08/02.
 | 
					
						
							|  |  |  |  | //  Copyright © 2018, Alibaba Group Holding Limited
 | 
					
						
							|  |  |  |  | //
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  | #include "CPUBinary.hpp"
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  | #include <math.h>
 | 
					
						
							|  |  |  |  | #include <algorithm>
 | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  | #include "CPUBackend.hpp"
 | 
					
						
							|  |  |  |  | #include "compute/CommonOptFunction.h"
 | 
					
						
							|  |  |  |  | #include "compute/ConvOpt.h"
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  |  | #include "core/Macro.h"
 | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  | #include "core/Concurrency.h"
 | 
					
						
							| 
									
										
										
										
											2020-03-08 10:20:18 +08:00
										 |  |  |  | #include "core/OpCommonUtils.hpp"
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  | namespace MNN { | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  | #define MAX_DIM 6
 | 
					
						
							|  |  |  |  | CPUBinaryInt::CPUBinaryInt(Backend* b, int32_t type) : MNN::Execution(b), mType(type) { | 
					
						
							|  |  |  |  |     // nothing to do
 | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | CPUBinaryFloat::CPUBinaryFloat(Backend* b, int32_t type) : MNN::Execution(b), mType(type) { | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |     // nothing to do
 | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  | ErrorCode CPUBinaryFloat::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) { | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |     MNN_ASSERT(1 == outputs.size()); | 
					
						
							| 
									
										
											  
											
												- build:
	- unify schema building in core and converter;
	- add more build script for android;
	- add linux build script for python;
- ops impl:
	- add floor mod support in binary;
	- use eltwise impl in add/max/sub/mul binary for optimization;
	- remove fake double support in cast;
	- fix 5d support for concat;
	- add adjX and adjY support for batch matmul;
	- optimize conv2d back prop filter;
	- add pad mode support for conv3d;
	- fix bug in conv2d & conv depthwise with very small feature map;
	- optimize binary without broacast;
	- add data types support for gather;
	- add gather ND support;
	- use uint8 data type in gather v2;
	- add transpose support for matmul;
	- add matrix band part;
	- add dim != 4 support for padding, reshape & tensor convert;
	- add pad type support for pool3d;
	- make ops based on TensorFlow Lite quantization optional;
	- add all & any support for reduction;
	- use type in parameter as output type in reduction;
	- add int support for unary;
	- add variable weight support for conv2d;
	- fix conv2d depthwise weights initialization;
	- fix type support for transpose;
	- fix grad outputs count for  reduce grad and reshape grad;
	- fix priorbox & detection output;
	- fix metal softmax error;
- python:
	- add runSessionWithCallBackInfo interface;
	- add max nodes limit (1400) for visualization tool;
	- fix save error in python3;
	- align default dim;
- convert:
	- add extra design for optimization;
	- add more post converting optimizers;
	- add caffe v1 weights blob support;
	- add cast, unary, conv transpose support for onnx model;
	- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
	- add cos/sin/atan/tan support for unary for tensorflow model;
	- add any/all support for reduction for tensorflow model;
	- add elu, conv3d, pool3d support for tensorflow model;
	- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
	- fix size computer lock;
	- fix thread pool deadlock;
	- add express & parameters in express;
	- rewrite blitter chooser without static map;
	- add tests for expr;
											
										 
											2019-10-29 13:37:26 +08:00
										 |  |  |  |     const int input0DataCount = inputs[0]->elementSize(); | 
					
						
							|  |  |  |  |     const int input1DataCount = inputs[1]->elementSize(); | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |     const int outputDataCount = outputs[0]->elementSize(); | 
					
						
							|  |  |  |  |     int maxCount = input0DataCount > input1DataCount ?  input0DataCount : input1DataCount; | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  |     mElementProc = nullptr; | 
					
						
							|  |  |  |  |     mSupportScale = false; | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |     if (outputs[0]->getType().code != halide_type_float || maxCount < 4 || (outputDataCount > input0DataCount && outputDataCount > input1DataCount)) { | 
					
						
							|  |  |  |  |         // Can't optimize
 | 
					
						
							|  |  |  |  |         return NO_ERROR; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  |     auto eleProc = mElementProc;// Set nullptr for begin
 | 
					
						
							|  |  |  |  |     switch (mType) { | 
					
						
							|  |  |  |  |         case BinaryOpOperation_MUL: | 
					
						
							|  |  |  |  |             eleProc = MNNMatrixProdCommon; | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_ADD: | 
					
						
							|  |  |  |  |             eleProc = MNNMatrixAddCommon; | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_MAXIMUM: | 
					
						
							|  |  |  |  |             eleProc = MNNMatrixMaxCommon; | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_SUB: | 
					
						
							|  |  |  |  |             eleProc = MNNMatrixSubCommon; | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         default: | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  |     if (input1DataCount == input0DataCount) { | 
					
						
							|  |  |  |  |         mOutside = 1; | 
					
						
							|  |  |  |  |         mInside = input0DataCount; | 
					
						
							|  |  |  |  |         mElementProc = eleProc; | 
					
						
							|  |  |  |  |         return NO_ERROR; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  |     if (input1DataCount == 1 || input0DataCount == 1) { | 
					
						
							|  |  |  |  |         mAxis = 1; | 
					
						
							|  |  |  |  |         mOutside = 1; | 
					
						
							|  |  |  |  |         switch (mType) { | 
					
						
							|  |  |  |  |             case BinaryOpOperation_MUL: | 
					
						
							|  |  |  |  |             case BinaryOpOperation_ADD: | 
					
						
							|  |  |  |  |             case BinaryOpOperation_SUB: | 
					
						
							|  |  |  |  |                 mSupportScale = true; | 
					
						
							|  |  |  |  |                 break; | 
					
						
							|  |  |  |  |             default: | 
					
						
							|  |  |  |  |                 break; | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |         return NO_ERROR; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  |     if (nullptr == eleProc) { | 
					
						
							|  |  |  |  |         return NO_ERROR; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  |     // For AddBias / Mul Sqrt
 | 
					
						
							|  |  |  |  |     int dims[MAX_DIM]; | 
					
						
							|  |  |  |  |     int stride[MAX_DIM]; | 
					
						
							|  |  |  |  |     int iStride0[MAX_DIM]; | 
					
						
							|  |  |  |  |     int iStride1[MAX_DIM]; | 
					
						
							|  |  |  |  |     const Tensor* input0 = inputs[0]; | 
					
						
							|  |  |  |  |     const Tensor* input1 = inputs[1]; | 
					
						
							|  |  |  |  |     const Tensor* output = outputs[0]; | 
					
						
							|  |  |  |  |     if (input0DataCount < input1DataCount) { | 
					
						
							|  |  |  |  |         input0 = inputs[1]; | 
					
						
							|  |  |  |  |         input1 = inputs[0]; | 
					
						
							|  |  |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-03-08 10:20:18 +08:00
										 |  |  |  |     OpCommonUtils::broastCastComputeDim(dims, stride, iStride0, iStride1, input0, input1, output); | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |     int breakPos = -1; | 
					
						
							|  |  |  |  |     for (int i=0; i<MAX_DIM; ++i) { | 
					
						
							|  |  |  |  |         if (iStride1[i] > 0) { | 
					
						
							|  |  |  |  |             if (breakPos >= 0) { | 
					
						
							|  |  |  |  |                 // Failed to optmize
 | 
					
						
							|  |  |  |  |                 return NO_ERROR; | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  |             } | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             breakPos = i; | 
					
						
							| 
									
										
											  
											
												- build:
	- unify schema building in core and converter;
	- add more build script for android;
	- add linux build script for python;
- ops impl:
	- add floor mod support in binary;
	- use eltwise impl in add/max/sub/mul binary for optimization;
	- remove fake double support in cast;
	- fix 5d support for concat;
	- add adjX and adjY support for batch matmul;
	- optimize conv2d back prop filter;
	- add pad mode support for conv3d;
	- fix bug in conv2d & conv depthwise with very small feature map;
	- optimize binary without broacast;
	- add data types support for gather;
	- add gather ND support;
	- use uint8 data type in gather v2;
	- add transpose support for matmul;
	- add matrix band part;
	- add dim != 4 support for padding, reshape & tensor convert;
	- add pad type support for pool3d;
	- make ops based on TensorFlow Lite quantization optional;
	- add all & any support for reduction;
	- use type in parameter as output type in reduction;
	- add int support for unary;
	- add variable weight support for conv2d;
	- fix conv2d depthwise weights initialization;
	- fix type support for transpose;
	- fix grad outputs count for  reduce grad and reshape grad;
	- fix priorbox & detection output;
	- fix metal softmax error;
- python:
	- add runSessionWithCallBackInfo interface;
	- add max nodes limit (1400) for visualization tool;
	- fix save error in python3;
	- align default dim;
- convert:
	- add extra design for optimization;
	- add more post converting optimizers;
	- add caffe v1 weights blob support;
	- add cast, unary, conv transpose support for onnx model;
	- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
	- add cos/sin/atan/tan support for unary for tensorflow model;
	- add any/all support for reduction for tensorflow model;
	- add elu, conv3d, pool3d support for tensorflow model;
	- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
	- fix size computer lock;
	- fix thread pool deadlock;
	- add express & parameters in express;
	- rewrite blitter chooser without static map;
	- add tests for expr;
											
										 
											2019-10-29 13:37:26 +08:00
										 |  |  |  |         } | 
					
						
							|  |  |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |     MNN_ASSERT(breakPos >= 0); | 
					
						
							|  |  |  |  |     //FUNC_PRINT(breakPos);
 | 
					
						
							|  |  |  |  |     mOutside = 1; | 
					
						
							|  |  |  |  |     mInside = 1; | 
					
						
							|  |  |  |  |     for (int i=0; i<breakPos; ++i) { | 
					
						
							|  |  |  |  |         mOutside *= dims[i]; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  |     mAxis = dims[breakPos]; | 
					
						
							|  |  |  |  |     for (int i=breakPos+1; i<MAX_DIM; ++i) { | 
					
						
							|  |  |  |  |         mInside *= dims[i]; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  |     // Serveral Machine need memory 4 * sizeof(float) align
 | 
					
						
							|  |  |  |  |     if (1 == mInside && mAxis >= 4) { | 
					
						
							|  |  |  |  |         mElementProc = eleProc; | 
					
						
							|  |  |  |  |         //MNN_PRINT("Open Optimize\n");
 | 
					
						
							|  |  |  |  |     } else if (BinaryOpOperation_MAXIMUM != mType && mInside >= 4) { | 
					
						
							|  |  |  |  |         mSupportScale = true; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  |     //MNN_PRINT("%d, %d, %d\n", mInside, mAxis, mOutside);
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |     return NO_ERROR; | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | template <typename Tin, typename Tout, typename Func> | 
					
						
							|  |  |  |  | static ErrorCode _binaryOp(Tensor* input0, Tensor* input1, Tensor* output) { | 
					
						
							|  |  |  |  |     Func f; | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |  |     const int input0DataCount = input0->elementSize(); | 
					
						
							|  |  |  |  |     const int input1DataCount = input1->elementSize(); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |     const Tin* input0Data = input0->host<Tin>(); | 
					
						
							|  |  |  |  |     const Tin* input1Data = input1->host<Tin>(); | 
					
						
							|  |  |  |  |     Tout* outputData      = output->host<Tout>(); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     if (input0DataCount == 1) { // data count == 1, not only mean scalar input, maybe of shape (1, 1, 1, ...,1)
 | 
					
						
							|  |  |  |  |         for (int i = 0; i < input1DataCount; i++) { | 
					
						
							|  |  |  |  |             outputData[i] = static_cast<Tout>(f(input0Data[0], input1Data[i])); | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |     } else if (input1DataCount == 1) { | 
					
						
							|  |  |  |  |         for (int i = 0; i < input0DataCount; i++) { | 
					
						
							|  |  |  |  |             outputData[i] = static_cast<Tout>(f(input0Data[i], input1Data[0])); | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |     } else { // both input contains more than one element,which means no scalar input
 | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |         bool sameShape = true; | 
					
						
							|  |  |  |  |         { | 
					
						
							|  |  |  |  |             if (input0->dimensions() == input1->dimensions()) { | 
					
						
							|  |  |  |  |                 for (int i = 0; i < input0->buffer().dimensions; i++) { | 
					
						
							|  |  |  |  |                     if (input0->buffer().dim[i].extent != input1->buffer().dim[i].extent) { | 
					
						
							|  |  |  |  |                         sameShape = false; | 
					
						
							|  |  |  |  |                         break; | 
					
						
							|  |  |  |  |                     } | 
					
						
							|  |  |  |  |                 } | 
					
						
							|  |  |  |  |             } | 
					
						
							|  |  |  |  |             else { | 
					
						
							|  |  |  |  |                 sameShape = false; | 
					
						
							|  |  |  |  |             } | 
					
						
							|  |  |  |  |         } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |         if (sameShape) { // two inputs have the same shape, apply element-wise operation
 | 
					
						
							|  |  |  |  |             for (int i = 0; i < input0DataCount; i++) { | 
					
						
							|  |  |  |  |                 outputData[i] = static_cast<Tout>(f(input0Data[i], input1Data[i])); | 
					
						
							|  |  |  |  |             } | 
					
						
							|  |  |  |  |         } else { // not the same shape, use broadcast
 | 
					
						
							|  |  |  |  |             MNN_ASSERT(output->dimensions() <= MAX_DIM); | 
					
						
							|  |  |  |  |             int dims[MAX_DIM]; | 
					
						
							|  |  |  |  |             int stride[MAX_DIM]; | 
					
						
							|  |  |  |  |             int iStride0[MAX_DIM]; | 
					
						
							|  |  |  |  |             int iStride1[MAX_DIM]; | 
					
						
							| 
									
										
										
										
											2020-03-08 10:20:18 +08:00
										 |  |  |  |             OpCommonUtils::broastCastComputeDim(dims, stride, iStride0, iStride1, input0, input1, output); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |             for (int w = 0; w < dims[5]; ++w) { | 
					
						
							|  |  |  |  |                 auto ow  = outputData + w * stride[5]; | 
					
						
							|  |  |  |  |                 auto i0w = input0Data + w * iStride0[5]; | 
					
						
							|  |  |  |  |                 auto i1w = input1Data + w * iStride1[5]; | 
					
						
							|  |  |  |  | #define PTR(x, y, i)                      \
 | 
					
						
							|  |  |  |  |     auto o##x  = o##y + x * stride[i];    \ | 
					
						
							|  |  |  |  |     auto i0##x = i0##y + x * iStride0[i]; \ | 
					
						
							|  |  |  |  |     auto i1##x = i1##y + x * iStride1[i] | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |                 for (int v = 0; v < dims[4]; ++v) { | 
					
						
							|  |  |  |  |                     PTR(v, w, 4); | 
					
						
							|  |  |  |  |                     for (int u = 0; u < dims[3]; ++u) { | 
					
						
							|  |  |  |  |                         PTR(u, v, 3); | 
					
						
							|  |  |  |  |                         for (int z = 0; z < dims[2]; ++z) { | 
					
						
							|  |  |  |  |                             PTR(z, u, 2); | 
					
						
							|  |  |  |  |                             for (int y = 0; y < dims[1]; ++y) { | 
					
						
							|  |  |  |  |                                 PTR(y, z, 1); | 
					
						
							|  |  |  |  |                                 for (int x = 0; x < dims[0]; ++x) { | 
					
						
							|  |  |  |  |                                     PTR(x, y, 0); | 
					
						
							|  |  |  |  |                                     *ox = static_cast<Tout>(f(*i0x, *i1x)); | 
					
						
							|  |  |  |  |                                 } | 
					
						
							|  |  |  |  |                             } | 
					
						
							|  |  |  |  |                         } | 
					
						
							|  |  |  |  |                     } | 
					
						
							|  |  |  |  |                 } | 
					
						
							|  |  |  |  |             } | 
					
						
							|  |  |  |  | #undef MAX_DIM
 | 
					
						
							|  |  |  |  | #undef PTR
 | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |         // broadcast-capable check is done in compute size
 | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     return NO_ERROR; | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryMax : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return std::max(x, y); | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryMin : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return std::min(x, y); | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryMul : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return x * y; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryAdd : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return x + y; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinarySub : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return x - y; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryRealDiv : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return x / y; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryMod : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return x - x / y; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryGreater : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |  |         return (_ErrorCode)((x > y) ? 1 : 0); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryLess : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |  |         return (_ErrorCode)((x < y) ? 1 : 0); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryGreaterEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |  |         return (_ErrorCode)((x >= y) ? 1 : 0); | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |  | struct BinaryLessEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return (_ErrorCode)((x <= y) ? 1 : 0); | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |  | struct BinaryEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return (_ErrorCode)((x == y) ? 1 : 0); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryFloorDiv : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return floor(x / y); | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												- build:
	- unify schema building in core and converter;
	- add more build script for android;
	- add linux build script for python;
- ops impl:
	- add floor mod support in binary;
	- use eltwise impl in add/max/sub/mul binary for optimization;
	- remove fake double support in cast;
	- fix 5d support for concat;
	- add adjX and adjY support for batch matmul;
	- optimize conv2d back prop filter;
	- add pad mode support for conv3d;
	- fix bug in conv2d & conv depthwise with very small feature map;
	- optimize binary without broacast;
	- add data types support for gather;
	- add gather ND support;
	- use uint8 data type in gather v2;
	- add transpose support for matmul;
	- add matrix band part;
	- add dim != 4 support for padding, reshape & tensor convert;
	- add pad type support for pool3d;
	- make ops based on TensorFlow Lite quantization optional;
	- add all & any support for reduction;
	- use type in parameter as output type in reduction;
	- add int support for unary;
	- add variable weight support for conv2d;
	- fix conv2d depthwise weights initialization;
	- fix type support for transpose;
	- fix grad outputs count for  reduce grad and reshape grad;
	- fix priorbox & detection output;
	- fix metal softmax error;
- python:
	- add runSessionWithCallBackInfo interface;
	- add max nodes limit (1400) for visualization tool;
	- fix save error in python3;
	- align default dim;
- convert:
	- add extra design for optimization;
	- add more post converting optimizers;
	- add caffe v1 weights blob support;
	- add cast, unary, conv transpose support for onnx model;
	- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
	- add cos/sin/atan/tan support for unary for tensorflow model;
	- add any/all support for reduction for tensorflow model;
	- add elu, conv3d, pool3d support for tensorflow model;
	- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
	- fix size computer lock;
	- fix thread pool deadlock;
	- add express & parameters in express;
	- rewrite blitter chooser without static map;
	- add tests for expr;
											
										 
											2019-10-29 13:37:26 +08:00
										 |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryFloorMod : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return x - floor(x / y) * y; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinarySquaredDifference : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return (x - y) * (x - y); | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryPow : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return pow(x, y); | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryAtan2 : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return atan(x / y); | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryLogicalOr : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return (_ErrorCode)((x || y) ? 1 : 0); | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | template <typename _Arg1, typename _Arg2, typename _ErrorCode> | 
					
						
							|  |  |  |  | struct BinaryNotEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> { | 
					
						
							|  |  |  |  |     _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const { | 
					
						
							|  |  |  |  |         return (_ErrorCode)((x != y) ? 1 : 0); | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-28 14:54:41 +08:00
										 |  |  |  | static void callEleFunc(void(*proc)(float* C, const float* A, const float* B, size_t width, size_t cStride, size_t aStride, size_t bStride, size_t height), | 
					
						
							|  |  |  |  |                         float* C, const float* A, const float* B, size_t size, bool swap) { | 
					
						
							|  |  |  |  |     if (swap) { | 
					
						
							|  |  |  |  |         proc(C, B, A, size, 0, 0, 0, 1); | 
					
						
							|  |  |  |  |     } else { | 
					
						
							|  |  |  |  |         proc(C, A, B, size, 0, 0, 0, 1); | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  | ErrorCode CPUBinaryFloat::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) { | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |     auto input  = inputs[0]; | 
					
						
							|  |  |  |  |     auto input1 = inputs[1]; | 
					
						
							|  |  |  |  |     auto output = outputs[0]; | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  |      | 
					
						
							|  |  |  |  |     if (nullptr != mElementProc || mSupportScale) { | 
					
						
							|  |  |  |  |         auto numberThread = ((CPUBackend*)backend())->threadNumber(); | 
					
						
							|  |  |  |  |         auto i1Size = input->elementSize(); | 
					
						
							|  |  |  |  |         auto i2Size = input1->elementSize(); | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |         bool swap = false; | 
					
						
							|  |  |  |  |         if (i1Size < i2Size) { | 
					
						
							|  |  |  |  |             auto temp = i2Size; | 
					
						
							|  |  |  |  |             i2Size = i1Size; | 
					
						
							|  |  |  |  |             i1Size = temp; | 
					
						
							|  |  |  |  |             input = inputs[1]; | 
					
						
							|  |  |  |  |             input1 = inputs[0]; | 
					
						
							|  |  |  |  |             swap = true; | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  |         } | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |         auto size = i1Size; | 
					
						
							|  |  |  |  |         auto schedule = ((CPUBackend*)backend())->multiThreadDivide(size); | 
					
						
							|  |  |  |  |         int sizeDivide = schedule.first; | 
					
						
							|  |  |  |  |         int scheduleNumber = schedule.second; | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  |         if (nullptr != mElementProc) { | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             if (mOutside == 1) { | 
					
						
							|  |  |  |  |                 MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) { | 
					
						
							|  |  |  |  |                     int start = sizeDivide * (int)tId; | 
					
						
							|  |  |  |  |                     int realSize = sizeDivide; | 
					
						
							|  |  |  |  |                     if (tId == scheduleNumber -1 ) { | 
					
						
							|  |  |  |  |                         realSize = size - start; | 
					
						
							|  |  |  |  |                     } | 
					
						
							|  |  |  |  |                     if (realSize > 0) { | 
					
						
							|  |  |  |  |                         mElementProc(output->host<float>() + start, input->host<float>() + start, input1->host<float>() + start, realSize, 0, 0, 0, 1); | 
					
						
							|  |  |  |  |                     } | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |                 MNN_CONCURRENCY_END(); | 
					
						
							|  |  |  |  |             } else { | 
					
						
							|  |  |  |  |                 MNN_CONCURRENCY_BEGIN(tId, numberThread) { | 
					
						
							|  |  |  |  |                     for (int y = tId; y < mOutside; y+=numberThread) { | 
					
						
							| 
									
										
										
										
											2020-05-28 14:54:41 +08:00
										 |  |  |  |                         callEleFunc(mElementProc, output->host<float>() + y * mAxis, input->host<float>() + y * mAxis, input1->host<float>(), mAxis, swap); | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |                     } | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |                 MNN_CONCURRENCY_END(); | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  |             } | 
					
						
							|  |  |  |  |         } else { | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             if (mOutside == 1 && mAxis == 1) { | 
					
						
							|  |  |  |  |                 float* inputPtr = input->host<float>(); | 
					
						
							|  |  |  |  |                 float scalar = input1->host<float>()[0]; | 
					
						
							|  |  |  |  |                 float scale = scalar; | 
					
						
							|  |  |  |  |                 float bias = 0.0f; | 
					
						
							|  |  |  |  |                 switch (mType) { | 
					
						
							|  |  |  |  |                     case BinaryOpOperation_ADD: | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  |                         scale = 1.0f; | 
					
						
							|  |  |  |  |                         bias = scalar; | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |                         break; | 
					
						
							|  |  |  |  |                     case BinaryOpOperation_SUB: | 
					
						
							|  |  |  |  |                         if (!swap) { | 
					
						
							|  |  |  |  |                             scale = 1.0f; | 
					
						
							|  |  |  |  |                             bias = -scalar; | 
					
						
							|  |  |  |  |                         } else { | 
					
						
							|  |  |  |  |                             scale = -1.0f; | 
					
						
							|  |  |  |  |                             bias = scalar; | 
					
						
							|  |  |  |  |                         } | 
					
						
							|  |  |  |  |                         break; | 
					
						
							|  |  |  |  |                     default: | 
					
						
							|  |  |  |  |                         break; | 
					
						
							|  |  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |                 MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) { | 
					
						
							|  |  |  |  |                     int start = sizeDivide * (int)tId; | 
					
						
							|  |  |  |  |                     int realSize = sizeDivide; | 
					
						
							|  |  |  |  |                     if (tId == scheduleNumber -1 ) { | 
					
						
							|  |  |  |  |                         realSize = size - start; | 
					
						
							|  |  |  |  |                     } | 
					
						
							|  |  |  |  |                     if (realSize > 0) { | 
					
						
							|  |  |  |  |                         MNNScaleAndAddBiasScalar(output->host<float>() + start, inputPtr + start, bias, scale, realSize); | 
					
						
							|  |  |  |  |                     } | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |                 MNN_CONCURRENCY_END(); | 
					
						
							|  |  |  |  |             } else { | 
					
						
							|  |  |  |  |                 float* inputPtr = input->host<float>(); | 
					
						
							|  |  |  |  |                 float* input1Ptr = input1->host<float>(); | 
					
						
							|  |  |  |  |                 auto total = mOutside * mAxis; | 
					
						
							|  |  |  |  |                 MNN_CONCURRENCY_BEGIN(tId, numberThread) { | 
					
						
							|  |  |  |  |                     for (int index = tId; index < total; index += numberThread) { | 
					
						
							|  |  |  |  |                         auto axis = index % mAxis; | 
					
						
							|  |  |  |  |                         float scalar = input1Ptr[axis]; | 
					
						
							|  |  |  |  |                         float scale = scalar; | 
					
						
							|  |  |  |  |                         float bias = 0.0f; | 
					
						
							|  |  |  |  |                         switch (mType) { | 
					
						
							|  |  |  |  |                             case BinaryOpOperation_ADD: | 
					
						
							|  |  |  |  |                                 scale = 1.0f; | 
					
						
							|  |  |  |  |                                 bias = scalar; | 
					
						
							|  |  |  |  |                                 break; | 
					
						
							|  |  |  |  |                             case BinaryOpOperation_SUB: | 
					
						
							|  |  |  |  |                                 if (!swap) { | 
					
						
							|  |  |  |  |                                     scale = 1.0f; | 
					
						
							|  |  |  |  |                                     bias = -scalar; | 
					
						
							|  |  |  |  |                                 } else { | 
					
						
							|  |  |  |  |                                     scale = -1.0f; | 
					
						
							|  |  |  |  |                                     bias = scalar; | 
					
						
							|  |  |  |  |                                 } | 
					
						
							|  |  |  |  |                                 break; | 
					
						
							|  |  |  |  |                             default: | 
					
						
							|  |  |  |  |                                 break; | 
					
						
							|  |  |  |  |                         } | 
					
						
							|  |  |  |  |                         MNNScaleAndAddBiasScalar(output->host<float>() + mInside * index, inputPtr +  mInside * index, bias, scale, mInside); | 
					
						
							|  |  |  |  |                     } | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |                 MNN_CONCURRENCY_END(); | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  |             } | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-01-15 13:33:47 +08:00
										 |  |  |  |         } | 
					
						
							|  |  |  |  |         return NO_ERROR; | 
					
						
							|  |  |  |  |     } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     switch (mType) { | 
					
						
							|  |  |  |  |         case BinaryOpOperation_MUL: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinaryMul<float, float, float>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_ADD: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinaryAdd<float, float, float>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_SUB: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinarySub<float, float, float>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |             break; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         case BinaryOpOperation_REALDIV: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinaryRealDiv<float, float, float>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_MINIMUM: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinaryMin<float, float, float>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_MAXIMUM: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinaryMax<float, float, float>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_GREATER: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, int32_t, BinaryGreater<float, float, int32_t>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_LESS: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinaryLess<float, float, int32_t>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |             break; | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |  |         case BinaryOpOperation_LESS_EQUAL: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinaryLessEqual<float, float, int32_t>>(input, input1, output); | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |  |             break; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |         case BinaryOpOperation_GREATER_EQUAL: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinaryGreaterEqual<float, float, int32_t>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |             break; | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |  |         case BinaryOpOperation_EQUAL: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinaryEqual<float, float, int32_t>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |  |             break; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |         case BinaryOpOperation_FLOORDIV: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinaryFloorDiv<float, float, float>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |             break; | 
					
						
							| 
									
										
											  
											
												- build:
	- unify schema building in core and converter;
	- add more build script for android;
	- add linux build script for python;
- ops impl:
	- add floor mod support in binary;
	- use eltwise impl in add/max/sub/mul binary for optimization;
	- remove fake double support in cast;
	- fix 5d support for concat;
	- add adjX and adjY support for batch matmul;
	- optimize conv2d back prop filter;
	- add pad mode support for conv3d;
	- fix bug in conv2d & conv depthwise with very small feature map;
	- optimize binary without broacast;
	- add data types support for gather;
	- add gather ND support;
	- use uint8 data type in gather v2;
	- add transpose support for matmul;
	- add matrix band part;
	- add dim != 4 support for padding, reshape & tensor convert;
	- add pad type support for pool3d;
	- make ops based on TensorFlow Lite quantization optional;
	- add all & any support for reduction;
	- use type in parameter as output type in reduction;
	- add int support for unary;
	- add variable weight support for conv2d;
	- fix conv2d depthwise weights initialization;
	- fix type support for transpose;
	- fix grad outputs count for  reduce grad and reshape grad;
	- fix priorbox & detection output;
	- fix metal softmax error;
- python:
	- add runSessionWithCallBackInfo interface;
	- add max nodes limit (1400) for visualization tool;
	- fix save error in python3;
	- align default dim;
- convert:
	- add extra design for optimization;
	- add more post converting optimizers;
	- add caffe v1 weights blob support;
	- add cast, unary, conv transpose support for onnx model;
	- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
	- add cos/sin/atan/tan support for unary for tensorflow model;
	- add any/all support for reduction for tensorflow model;
	- add elu, conv3d, pool3d support for tensorflow model;
	- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
	- fix size computer lock;
	- fix thread pool deadlock;
	- add express & parameters in express;
	- rewrite blitter chooser without static map;
	- add tests for expr;
											
										 
											2019-10-29 13:37:26 +08:00
										 |  |  |  |         case BinaryOpOperation_FLOORMOD: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinaryFloorMod<float, float, float>>(input, input1, output); | 
					
						
							| 
									
										
											  
											
												- build:
	- unify schema building in core and converter;
	- add more build script for android;
	- add linux build script for python;
- ops impl:
	- add floor mod support in binary;
	- use eltwise impl in add/max/sub/mul binary for optimization;
	- remove fake double support in cast;
	- fix 5d support for concat;
	- add adjX and adjY support for batch matmul;
	- optimize conv2d back prop filter;
	- add pad mode support for conv3d;
	- fix bug in conv2d & conv depthwise with very small feature map;
	- optimize binary without broacast;
	- add data types support for gather;
	- add gather ND support;
	- use uint8 data type in gather v2;
	- add transpose support for matmul;
	- add matrix band part;
	- add dim != 4 support for padding, reshape & tensor convert;
	- add pad type support for pool3d;
	- make ops based on TensorFlow Lite quantization optional;
	- add all & any support for reduction;
	- use type in parameter as output type in reduction;
	- add int support for unary;
	- add variable weight support for conv2d;
	- fix conv2d depthwise weights initialization;
	- fix type support for transpose;
	- fix grad outputs count for  reduce grad and reshape grad;
	- fix priorbox & detection output;
	- fix metal softmax error;
- python:
	- add runSessionWithCallBackInfo interface;
	- add max nodes limit (1400) for visualization tool;
	- fix save error in python3;
	- align default dim;
- convert:
	- add extra design for optimization;
	- add more post converting optimizers;
	- add caffe v1 weights blob support;
	- add cast, unary, conv transpose support for onnx model;
	- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
	- add cos/sin/atan/tan support for unary for tensorflow model;
	- add any/all support for reduction for tensorflow model;
	- add elu, conv3d, pool3d support for tensorflow model;
	- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
	- fix size computer lock;
	- fix thread pool deadlock;
	- add express & parameters in express;
	- rewrite blitter chooser without static map;
	- add tests for expr;
											
										 
											2019-10-29 13:37:26 +08:00
										 |  |  |  |             break; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |         case BinaryOpOperation_POW: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinaryPow<float, float, float>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_SquaredDifference: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinarySquaredDifference<float, float, float>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |             break; | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  |  |         case BinaryOpOperation_ATAN2: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<float, float, BinaryAtan2<float, float, float>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_NOTEQUAL: | 
					
						
							|  |  |  |  |             _binaryOp<float, float, BinaryNotEqual<float, float, float>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_MOD: | 
					
						
							|  |  |  |  |             _binaryOp<float, float, BinaryMod<float, float, float>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         default: | 
					
						
							|  |  |  |  |             MNN_ASSERT(false); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  |     return NO_ERROR; | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | ErrorCode CPUBinaryInt::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) { | 
					
						
							|  |  |  |  |     auto input  = inputs[0]; | 
					
						
							|  |  |  |  |     auto input1 = inputs[1]; | 
					
						
							|  |  |  |  |     auto output = outputs[0]; | 
					
						
							|  |  |  |  |     switch (mType) { | 
					
						
							|  |  |  |  |         case BinaryOpOperation_MUL: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinaryMul<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_ADD: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinaryAdd<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_SUB: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinarySub<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         case BinaryOpOperation_REALDIV: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinaryRealDiv<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_MINIMUM: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinaryMin<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_MAXIMUM: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinaryMax<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_GREATER: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinaryGreater<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_LESS: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinaryLess<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_LESS_EQUAL: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinaryLessEqual<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_GREATER_EQUAL: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinaryGreaterEqual<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_EQUAL: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinaryEqual<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_FLOORDIV: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinaryFloorDiv<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_FLOORMOD: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinaryFloorMod<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_SquaredDifference: | 
					
						
							|  |  |  |  |             _binaryOp<int32_t, int32_t, BinarySquaredDifference<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_LOGICALOR: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<int32_t, int32_t, BinaryLogicalOr<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_NOTEQUAL: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<int32_t, int32_t, BinaryNotEqual<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  |  |             break; | 
					
						
							|  |  |  |  |         case BinaryOpOperation_MOD: | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |             _binaryOp<int32_t, int32_t, BinaryMod<int32_t, int32_t, int32_t>>(input, input1, output); | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  |  |             break; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |         default: | 
					
						
							|  |  |  |  |             MNN_ASSERT(false); | 
					
						
							|  |  |  |  |             break; | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  |     return NO_ERROR; | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | class CPUBinaryCreator : public CPUBackend::Creator { | 
					
						
							|  |  |  |  | public: | 
					
						
							|  |  |  |  |     virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, | 
					
						
							|  |  |  |  |                                 const MNN::Op* op, Backend* backend) const override { | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |  |         auto dataType   = outputs[0]->getType(); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |         int32_t type = op->main_as_BinaryOp()->opType(); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |  |         if (dataType.bits == 32) { | 
					
						
							|  |  |  |  |             if (dataType.code == halide_type_int) { | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |                 return new CPUBinaryInt(backend, type); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |  |             } | 
					
						
							|  |  |  |  |             if (dataType.code == halide_type_float) { | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |  |                 return new CPUBinaryFloat(backend, type); | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |  |             } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |         } | 
					
						
							| 
									
										
										
										
											2019-06-17 20:10:35 +08:00
										 |  |  |  |         return nullptr; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |  |     } | 
					
						
							|  |  |  |  | }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | REGISTER_CPU_OP_CREATOR(CPUBinaryCreator, OpType_BinaryOp); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | } // namespace MNN
 |