| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | //  CPUArgMax.cpp
 | 
					
						
							|  |  |  | //  MNN
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //  Created by MNN on 2018/07/17.
 | 
					
						
							|  |  |  | //  Copyright © 2018, Alibaba Group Holding Limited
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include "backend/cpu/CPUArgMax.hpp"
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | #include <float.h>
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | #include "backend/cpu/CPUBackend.hpp"
 | 
					
						
							|  |  |  | #include "backend/cpu/compute/CommonOptFunction.h"
 | 
					
						
							|  |  |  | #include "core/TensorUtils.hpp"
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | namespace MNN { | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | CPUArgMax::CPUArgMax(Backend *backend, ArgMinOrMax mode, int topk, int outMaxVal, int softmaxThreshold, int axis) | 
					
						
							| 
									
										
										
										
											2020-02-26 09:57:17 +08:00
										 |  |  |     : Execution(backend), mTopk(topk), mOutMaxVal(outMaxVal), mSoftmaxThreshold(softmaxThreshold), mAxis(axis), mMode(mode) { | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     // nothing to do
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ErrorCode CPUArgMax::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) { | 
					
						
							|  |  |  |     // acquire buffer space
 | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |     auto input                = inputs[0]; | 
					
						
							|  |  |  |     auto output               = outputs[0]; | 
					
						
							|  |  |  |     auto inputDimensionFromat = TensorUtils::getDescribe(input)->dimensionFormat; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  |     mFromNHWC = inputDimensionFromat != MNN_DATA_FORMAT_NC4HW4; | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (!mFromNHWC) { | 
					
						
							|  |  |  |         // if the input format is NC4HW4, convert to be NCHW from NC4HW4 firstly
 | 
					
						
							|  |  |  |         TensorUtils::copyShape(input, &mInputBuffer); | 
					
						
							|  |  |  |         TensorUtils::copyShape(output, &mOutputBuffer); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         backend()->onAcquireBuffer(&mInputBuffer, Backend::DYNAMIC); | 
					
						
							|  |  |  |         backend()->onAcquireBuffer(&mOutputBuffer, Backend::DYNAMIC); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // release temp buffer space
 | 
					
						
							|  |  |  |         backend()->onReleaseBuffer(&mInputBuffer, Backend::DYNAMIC); | 
					
						
							|  |  |  |         backend()->onReleaseBuffer(&mOutputBuffer, Backend::DYNAMIC); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |     // compute params
 | 
					
						
							|  |  |  |     mNum       = 1; | 
					
						
							|  |  |  |     mDim       = 1; | 
					
						
							|  |  |  |     mKeyExtent = 1; | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |     if(mAxis < 0){ | 
					
						
							|  |  |  |         mAxis = mAxis + input->dimensions(); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |     if (mFromNHWC) { | 
					
						
							|  |  |  |         const int dimensions = input->dimensions(); | 
					
						
							|  |  |  |         for (int i = 0; i < mAxis; ++i) { | 
					
						
							|  |  |  |             mNum = mNum * input->length(i); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         mDim = input->length(mAxis); | 
					
						
							|  |  |  |         for (int i = mAxis + 1; i < dimensions; ++i) { | 
					
						
							|  |  |  |             mKeyExtent = mKeyExtent * input->length(i); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         // Legacy code
 | 
					
						
							|  |  |  |         int iw = input->width(), ow = output->width(); | 
					
						
							|  |  |  |         int ih = input->height(), oh = output->height(); | 
					
						
							|  |  |  |         int ic = input->channel(), oc = output->channel(); | 
					
						
							|  |  |  |         if (iw > 1) { | 
					
						
							|  |  |  |             mNum       = ic * ih; | 
					
						
							|  |  |  |             mDim       = iw; | 
					
						
							|  |  |  |             mKeyExtent = ow; | 
					
						
							|  |  |  |         } else if (ih > 1) { // iw = ow = 1
 | 
					
						
							|  |  |  |             mNum       = ic; | 
					
						
							|  |  |  |             mDim       = ih; | 
					
						
							|  |  |  |             mKeyExtent = oh; | 
					
						
							|  |  |  |         } else { // iw = ow = 1, ih = oh = 1;
 | 
					
						
							|  |  |  |             mNum       = 1; | 
					
						
							|  |  |  |             mDim       = ic; | 
					
						
							|  |  |  |             mKeyExtent = oc; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return NO_ERROR; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ErrorCode CPUArgMax::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) { | 
					
						
							|  |  |  |     auto input  = inputs[0]; | 
					
						
							|  |  |  |     auto output = outputs[0]; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |     using sortElementT = std::tuple<int, float>; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | #define element_index(ele) (std::get<0>(ele))
 | 
					
						
							|  |  |  | #define element_value(ele) (std::get<1>(ele))
 | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |     auto comp = [](const sortElementT &a, const sortElementT &b) -> int { | 
					
						
							|  |  |  |         float va = element_value(a); | 
					
						
							|  |  |  |         float vb = element_value(b); | 
					
						
							|  |  |  |         return va > vb; | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (mFromNHWC) { | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  |         if (mMode == ARGMAX) { | 
					
						
							|  |  |  |             auto srcOrigin = input->host<float>(); | 
					
						
							|  |  |  |             auto dstOrigin = output->host<int>(); | 
					
						
							|  |  |  |             for (int i = 0; i < mNum; ++i) { | 
					
						
							|  |  |  |                 auto iptr = srcOrigin + i * mDim * mKeyExtent; | 
					
						
							|  |  |  |                 auto optr = dstOrigin + i * mKeyExtent; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 for(int k = 0; k < mKeyExtent; ++k){ | 
					
						
							|  |  |  |                     int index      = 0; | 
					
						
							|  |  |  |                     float maxValue = -FLT_MAX; | 
					
						
							|  |  |  |                     for (int j = 0; j < mDim; ++j) { | 
					
						
							|  |  |  |                         auto val = iptr[k + j * mKeyExtent]; | 
					
						
							|  |  |  |                         if (val > maxValue) { | 
					
						
							|  |  |  |                             maxValue = val; | 
					
						
							|  |  |  |                             index    = j; | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     optr[k] = index; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } else { | 
					
						
							|  |  |  |             auto srcOrigin = input->host<float>(); | 
					
						
							|  |  |  |             auto dstOrigin = output->host<int>(); | 
					
						
							|  |  |  |             for (int i = 0; i < mNum; ++i) { | 
					
						
							|  |  |  |                 auto iptr = srcOrigin + i * mDim * mKeyExtent; | 
					
						
							|  |  |  |                 auto optr = dstOrigin + i * mKeyExtent; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 for(int k = 0; k < mKeyExtent; ++k){ | 
					
						
							|  |  |  |                     int index      = 0; | 
					
						
							|  |  |  |                     float minValue = FLT_MAX; | 
					
						
							|  |  |  |                     for (int j = 0; j < mDim; ++j) { | 
					
						
							|  |  |  |                         auto val = iptr[k + j * mKeyExtent]; | 
					
						
							|  |  |  |                         if (val < minValue) { | 
					
						
							|  |  |  |                             minValue = val; | 
					
						
							|  |  |  |                             index    = j; | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     optr[k] = index; | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |                 } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |     } else { | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  |         MNN_ASSERT(mMode == ARGMAX); // caffe does not have argmin layer
 | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |         // Legacy code for CAFFE
 | 
					
						
							|  |  |  |         backend()->onCopyBuffer(input, &mInputBuffer); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // threshold
 | 
					
						
							|  |  |  |         float softmaxThreshold = -FLT_MAX; | 
					
						
							|  |  |  |         if (mSoftmaxThreshold) { | 
					
						
							|  |  |  |             softmaxThreshold = 1.0f / mDim; | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |         float *srcOrigin = mInputBuffer.host<float>(); // used as NCHW input
 | 
					
						
							|  |  |  |         float *dstOrigin = mOutputBuffer.host<float>(); | 
					
						
							|  |  |  |         for (int i = 0; i < mNum; ++i) { | 
					
						
							|  |  |  |             float *iptr = srcOrigin + i * mDim; | 
					
						
							|  |  |  |             float *optr = dstOrigin + i * mKeyExtent; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // apply threshold
 | 
					
						
							|  |  |  |             std::vector<sortElementT> vec; | 
					
						
							|  |  |  |             vec.reserve(mDim); | 
					
						
							|  |  |  |             for (int j = 0; j < mDim; ++j) { | 
					
						
							|  |  |  |                 float val = iptr[j]; | 
					
						
							|  |  |  |                 if (val >= softmaxThreshold) { | 
					
						
							|  |  |  |                     vec.emplace_back(std::make_tuple(j, val)); | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             size_t sortDim = vec.size(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // sort
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             int realTopK = std::min(mTopk, (int)sortDim); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             std::partial_sort(vec.begin(), vec.begin() + realTopK, vec.end(), comp); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // copy index
 | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |             for (int j = 0; j < mTopk; ++j) { | 
					
						
							|  |  |  |                 if (j < sortDim) { | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |                     optr[j] = element_index(vec[j]); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |                 } else { | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |                     optr[j] = 0.f; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // copy max value
 | 
					
						
							|  |  |  |             if (mOutMaxVal) { | 
					
						
							|  |  |  |                 for (int j = 0; j < mTopk; ++j) { | 
					
						
							|  |  |  |                     if (j < sortDim) { | 
					
						
							|  |  |  |                         optr[mTopk + j] = element_value(vec[j]); | 
					
						
							|  |  |  |                     } else { | 
					
						
							|  |  |  |                         optr[mTopk + j] = 0.f; | 
					
						
							|  |  |  |                     } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
											
												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes
											
										 
											2019-09-26 21:02:07 +08:00
										 |  |  |         backend()->onCopyBuffer(&mOutputBuffer, output); | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return NO_ERROR; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class CPUArgMaxCreator : public CPUBackend::Creator { | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |     virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs, | 
					
						
							|  |  |  |                                 const MNN::Op *op, Backend *backend) const { | 
					
						
							|  |  |  |         auto argMax = op->main_as_ArgMax(); | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  |         if (op->type() == OpType_ArgMin) { | 
					
						
							|  |  |  |             return new CPUArgMax(backend, CPUArgMax::ArgMinOrMax::ARGMIN, | 
					
						
							|  |  |  |                     argMax->topK(), argMax->outMaxVal(), argMax->softmaxThreshold(), argMax->axis()); | 
					
						
							|  |  |  |         } else { | 
					
						
							|  |  |  |             return new CPUArgMax(backend, CPUArgMax::ArgMinOrMax::ARGMAX, | 
					
						
							|  |  |  |                     argMax->topK(), argMax->outMaxVal(), argMax->softmaxThreshold(), argMax->axis()); | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | REGISTER_CPU_OP_CREATOR(CPUArgMaxCreator, OpType_ArgMax); | 
					
						
							| 
									
										
										
										
											2019-12-27 22:16:57 +08:00
										 |  |  | REGISTER_CPU_OP_CREATOR(CPUArgMaxCreator, OpType_ArgMin); | 
					
						
							| 
									
										
										
										
											2019-04-17 10:49:11 +08:00
										 |  |  | } // namespace MNN
 |