MNN/source/backend/cpu/CPUArgMax.cpp

//
//  CPUArgMax.cpp
//  MNN
//
//  Created by MNN on 2018/07/17.
//  Copyright © 2018, Alibaba Group Holding Limited
//

#include "backend/cpu/CPUArgMax.hpp"
#include <float.h>
#include "backend/cpu/CPUBackend.hpp"
#include "backend/cpu/compute/CommonOptFunction.h"
#include "core/TensorUtils.hpp"

namespace MNN {

CPUArgMax::CPUArgMax(Backend *backend, ArgMinOrMax mode, int topk, int outMaxVal, int softmaxThreshold, int axis)
    : Execution(backend), mTopk(topk), mOutMaxVal(outMaxVal), mSoftmaxThreshold(softmaxThreshold), mAxis(axis), mMode(mode) {
    // nothing to do
}

ErrorCode CPUArgMax::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
    // acquire buffer space
    auto input                = inputs[0];
    auto output               = outputs[0];
    auto inputDimensionFromat = TensorUtils::getDescribe(input)->dimensionFormat;

    mFromNHWC = inputDimensionFromat != MNN_DATA_FORMAT_NC4HW4;

    if (!mFromNHWC) {
        // if the input format is NC4HW4, convert to be NCHW from NC4HW4 firstly
        TensorUtils::copyShape(input, &mInputBuffer);
        TensorUtils::copyShape(output, &mOutputBuffer);

        backend()->onAcquireBuffer(&mInputBuffer, Backend::DYNAMIC);
        backend()->onAcquireBuffer(&mOutputBuffer, Backend::DYNAMIC);

        // release temp buffer space
        backend()->onReleaseBuffer(&mInputBuffer, Backend::DYNAMIC);
        backend()->onReleaseBuffer(&mOutputBuffer, Backend::DYNAMIC);
    }

    // compute params
    mNum       = 1;
    mDim       = 1;
    mKeyExtent = 1;

    if(mAxis < 0){
        mAxis = mAxis + input->dimensions();
    }

    if (mFromNHWC) {
        const int dimensions = input->dimensions();
        for (int i = 0; i < mAxis; ++i) {
            mNum = mNum * input->length(i);
        }
        mDim = input->length(mAxis);
        for (int i = mAxis + 1; i < dimensions; ++i) {
            mKeyExtent = mKeyExtent * input->length(i);
        }
    } else {
        // Legacy code
        int iw = input->width(), ow = output->width();
        int ih = input->height(), oh = output->height();
        int ic = input->channel(), oc = output->channel();
        if (iw > 1) {
            mNum       = ic * ih;
            mDim       = iw;
            mKeyExtent = ow;
        } else if (ih > 1) { // iw = ow = 1
            mNum       = ic;
            mDim       = ih;
            mKeyExtent = oh;
        } else { // iw = ow = 1, ih = oh = 1;
            mNum       = 1;
            mDim       = ic;
            mKeyExtent = oc;
        }
    }

    return NO_ERROR;
}

ErrorCode CPUArgMax::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
    auto input  = inputs[0];
    auto output = outputs[0];

    using sortElementT = std::tuple<int, float>;
#define element_index(ele) (std::get<0>(ele))
#define element_value(ele) (std::get<1>(ele))
    auto comp = [](const sortElementT &a, const sortElementT &b) -> int {
        float va = element_value(a);
        float vb = element_value(b);
        return va > vb;
    };

    if (mFromNHWC) {
        if (mMode == ARGMAX) {
            auto srcOrigin = input->host<float>();
            auto dstOrigin = output->host<int>();
            for (int i = 0; i < mNum; ++i) {
                auto iptr = srcOrigin + i * mDim * mKeyExtent;
                auto optr = dstOrigin + i * mKeyExtent;

                for(int k = 0; k < mKeyExtent; ++k){
                    int index      = 0;
                    float maxValue = -FLT_MAX;
                    for (int j = 0; j < mDim; ++j) {
                        auto val = iptr[k + j * mKeyExtent];
                        if (val > maxValue) {
                            maxValue = val;
                            index    = j;
                        }
                    }
                    optr[k] = index;
                }
            }
        } else {
            auto srcOrigin = input->host<float>();
            auto dstOrigin = output->host<int>();
            for (int i = 0; i < mNum; ++i) {
                auto iptr = srcOrigin + i * mDim * mKeyExtent;
                auto optr = dstOrigin + i * mKeyExtent;

                for(int k = 0; k < mKeyExtent; ++k){
                    int index      = 0;
                    float minValue = FLT_MAX;
                    for (int j = 0; j < mDim; ++j) {
                        auto val = iptr[k + j * mKeyExtent];
                        if (val < minValue) {
                            minValue = val;
                            index    = j;
                        }
                    }
                    optr[k] = index;
                }
            }
        }

    } else {
        MNN_ASSERT(mMode == ARGMAX); // caffe does not have argmin layer
        // Legacy code for CAFFE
        backend()->onCopyBuffer(input, &mInputBuffer);

        // threshold
        float softmaxThreshold = -FLT_MAX;
        if (mSoftmaxThreshold) {
            softmaxThreshold = 1.0f / mDim;
        }

        float *srcOrigin = mInputBuffer.host<float>(); // used as NCHW input
        float *dstOrigin = mOutputBuffer.host<float>();
        for (int i = 0; i < mNum; ++i) {
            float *iptr = srcOrigin + i * mDim;
            float *optr = dstOrigin + i * mKeyExtent;

            // apply threshold
            std::vector<sortElementT> vec;
            vec.reserve(mDim);
            for (int j = 0; j < mDim; ++j) {
                float val = iptr[j];
                if (val >= softmaxThreshold) {
                    vec.emplace_back(std::make_tuple(j, val));
                }
            }
            size_t sortDim = vec.size();

            // sort

            int realTopK = std::min(mTopk, (int)sortDim);

            std::partial_sort(vec.begin(), vec.begin() + realTopK, vec.end(), comp);

            // copy index
            for (int j = 0; j < mTopk; ++j) {
                if (j < sortDim) {
                    optr[j] = element_index(vec[j]);
                } else {
                    optr[j] = 0.f;
                }
            }

            // copy max value
            if (mOutMaxVal) {
                for (int j = 0; j < mTopk; ++j) {
                    if (j < sortDim) {
                        optr[mTopk + j] = element_value(vec[j]);
                    } else {
                        optr[mTopk + j] = 0.f;
                    }
                }
            }
        }

        backend()->onCopyBuffer(&mOutputBuffer, output);
    }

    return NO_ERROR;
}

class CPUArgMaxCreator : public CPUBackend::Creator {
public:
    virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
                                const MNN::Op *op, Backend *backend) const {
        auto argMax = op->main_as_ArgMax();
        if (op->type() == OpType_ArgMin) {
            return new CPUArgMax(backend, CPUArgMax::ArgMinOrMax::ARGMIN,
                    argMax->topK(), argMax->outMaxVal(), argMax->softmaxThreshold(), argMax->axis());
        } else {
            return new CPUArgMax(backend, CPUArgMax::ArgMinOrMax::ARGMAX,
                    argMax->topK(), argMax->outMaxVal(), argMax->softmaxThreshold(), argMax->axis());
        }
    }
};
REGISTER_CPU_OP_CREATOR(CPUArgMaxCreator, OpType_ArgMax);
REGISTER_CPU_OP_CREATOR(CPUArgMaxCreator, OpType_ArgMin);
} // namespace MNN
beta 0.1.0 2019-04-17 10:49:11 +08:00			`//`
			`// CPUArgMax.cpp`
			`// MNN`
			`//`
			`// Created by MNN on 2018/07/17.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`

Update 2019-12-27 22:16:57 +08:00			`#include "backend/cpu/CPUArgMax.hpp"`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`#include <float.h>`
Update 2019-12-27 22:16:57 +08:00			`#include "backend/cpu/CPUBackend.hpp"`
			`#include "backend/cpu/compute/CommonOptFunction.h"`
			`#include "core/TensorUtils.hpp"`
beta 0.1.0 2019-04-17 10:49:11 +08:00
			`namespace MNN {`

Update 2019-12-27 22:16:57 +08:00			`CPUArgMax::CPUArgMax(Backend *backend, ArgMinOrMax mode, int topk, int outMaxVal, int softmaxThreshold, int axis)`
Update 2020-02-26 09:57:17 +08:00			`: Execution(backend), mTopk(topk), mOutMaxVal(outMaxVal), mSoftmaxThreshold(softmaxThreshold), mAxis(axis), mMode(mode) {`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`// nothing to do`
			`}`

			`ErrorCode CPUArgMax::onResize(const std::vector<Tensor > &inputs, const std::vector<Tensor > &outputs) {`
			`// acquire buffer space`
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`auto input = inputs[0];`
			`auto output = outputs[0];`
			`auto inputDimensionFromat = TensorUtils::getDescribe(input)->dimensionFormat;`
beta 0.1.0 2019-04-17 10:49:11 +08:00
Update 2019-12-27 22:16:57 +08:00			`mFromNHWC = inputDimensionFromat != MNN_DATA_FORMAT_NC4HW4;`
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00
			`if (!mFromNHWC) {`
			`// if the input format is NC4HW4, convert to be NCHW from NC4HW4 firstly`
			`TensorUtils::copyShape(input, &mInputBuffer);`
			`TensorUtils::copyShape(output, &mOutputBuffer);`

			`backend()->onAcquireBuffer(&mInputBuffer, Backend::DYNAMIC);`
			`backend()->onAcquireBuffer(&mOutputBuffer, Backend::DYNAMIC);`

			`// release temp buffer space`
			`backend()->onReleaseBuffer(&mInputBuffer, Backend::DYNAMIC);`
			`backend()->onReleaseBuffer(&mOutputBuffer, Backend::DYNAMIC);`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`

- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`// compute params`
			`mNum = 1;`
			`mDim = 1;`
			`mKeyExtent = 1;`
Update 2019-12-27 22:16:57 +08:00
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`if(mAxis < 0){`
			`mAxis = mAxis + input->dimensions();`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`

- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`if (mFromNHWC) {`
			`const int dimensions = input->dimensions();`
			`for (int i = 0; i < mAxis; ++i) {`
			`mNum = mNum * input->length(i);`
			`}`
			`mDim = input->length(mAxis);`
			`for (int i = mAxis + 1; i < dimensions; ++i) {`
			`mKeyExtent = mKeyExtent * input->length(i);`
			`}`
			`} else {`
			`// Legacy code`
			`int iw = input->width(), ow = output->width();`
			`int ih = input->height(), oh = output->height();`
			`int ic = input->channel(), oc = output->channel();`
			`if (iw > 1) {`
			`mNum = ic * ih;`
			`mDim = iw;`
			`mKeyExtent = ow;`
			`} else if (ih > 1) { // iw = ow = 1`
			`mNum = ic;`
			`mDim = ih;`
			`mKeyExtent = oh;`
			`} else { // iw = ow = 1, ih = oh = 1;`
			`mNum = 1;`
			`mDim = ic;`
			`mKeyExtent = oc;`
			`}`
			`}`

			`return NO_ERROR;`
			`}`

			`ErrorCode CPUArgMax::onExecute(const std::vector<Tensor > &inputs, const std::vector<Tensor > &outputs) {`
			`auto input = inputs[0];`
			`auto output = outputs[0];`
beta 0.1.0 2019-04-17 10:49:11 +08:00
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`using sortElementT = std::tuple<int, float>;`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`#define element_index(ele) (std::get<0>(ele))`
			`#define element_value(ele) (std::get<1>(ele))`
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`auto comp = [](const sortElementT &a, const sortElementT &b) -> int {`
			`float va = element_value(a);`
			`float vb = element_value(b);`
			`return va > vb;`
			`};`

			`if (mFromNHWC) {`
Update 2019-12-27 22:16:57 +08:00			`if (mMode == ARGMAX) {`
			`auto srcOrigin = input->host<float>();`
			`auto dstOrigin = output->host<int>();`
			`for (int i = 0; i < mNum; ++i) {`
			`auto iptr = srcOrigin + i * mDim * mKeyExtent;`
			`auto optr = dstOrigin + i * mKeyExtent;`

			`for(int k = 0; k < mKeyExtent; ++k){`
			`int index = 0;`
			`float maxValue = -FLT_MAX;`
			`for (int j = 0; j < mDim; ++j) {`
			`auto val = iptr[k + j * mKeyExtent];`
			`if (val > maxValue) {`
			`maxValue = val;`
			`index = j;`
			`}`
			`}`
			`optr[k] = index;`
			`}`
			`}`
			`} else {`
			`auto srcOrigin = input->host<float>();`
			`auto dstOrigin = output->host<int>();`
			`for (int i = 0; i < mNum; ++i) {`
			`auto iptr = srcOrigin + i * mDim * mKeyExtent;`
			`auto optr = dstOrigin + i * mKeyExtent;`

			`for(int k = 0; k < mKeyExtent; ++k){`
			`int index = 0;`
			`float minValue = FLT_MAX;`
			`for (int j = 0; j < mDim; ++j) {`
			`auto val = iptr[k + j * mKeyExtent];`
			`if (val < minValue) {`
			`minValue = val;`
			`index = j;`
			`}`
			`}`
			`optr[k] = index;`
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`}`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`
			`}`
Update 2019-12-27 22:16:57 +08:00
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`} else {`
Update 2019-12-27 22:16:57 +08:00			`MNN_ASSERT(mMode == ARGMAX); // caffe does not have argmin layer`
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`// Legacy code for CAFFE`
			`backend()->onCopyBuffer(input, &mInputBuffer);`

			`// threshold`
			`float softmaxThreshold = -FLT_MAX;`
			`if (mSoftmaxThreshold) {`
			`softmaxThreshold = 1.0f / mDim;`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`

- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`float *srcOrigin = mInputBuffer.host<float>(); // used as NCHW input`
			`float *dstOrigin = mOutputBuffer.host<float>();`
			`for (int i = 0; i < mNum; ++i) {`
			`float iptr = srcOrigin + i mDim;`
			`float optr = dstOrigin + i mKeyExtent;`

			`// apply threshold`
			`std::vector<sortElementT> vec;`
			`vec.reserve(mDim);`
			`for (int j = 0; j < mDim; ++j) {`
			`float val = iptr[j];`
			`if (val >= softmaxThreshold) {`
			`vec.emplace_back(std::make_tuple(j, val));`
			`}`
			`}`
			`size_t sortDim = vec.size();`

			`// sort`

			`int realTopK = std::min(mTopk, (int)sortDim);`

			`std::partial_sort(vec.begin(), vec.begin() + realTopK, vec.end(), comp);`

			`// copy index`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`for (int j = 0; j < mTopk; ++j) {`
			`if (j < sortDim) {`
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`optr[j] = element_index(vec[j]);`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`} else {`
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`optr[j] = 0.f;`
			`}`
			`}`

			`// copy max value`
			`if (mOutMaxVal) {`
			`for (int j = 0; j < mTopk; ++j) {`
			`if (j < sortDim) {`
			`optr[mTopk + j] = element_value(vec[j]);`
			`} else {`
			`optr[mTopk + j] = 0.f;`
			`}`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`
			`}`
			`}`

- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00			`backend()->onCopyBuffer(&mOutputBuffer, output);`
			`}`
beta 0.1.0 2019-04-17 10:49:11 +08:00
			`return NO_ERROR;`
			`}`

			`class CPUArgMaxCreator : public CPUBackend::Creator {`
			`public:`
			`virtual Execution onCreate(const std::vector<Tensor > &inputs, const std::vector<Tensor *> &outputs,`
			`const MNN::Op op, Backend backend) const {`
			`auto argMax = op->main_as_ArgMax();`
Update 2019-12-27 22:16:57 +08:00			`if (op->type() == OpType_ArgMin) {`
			`return new CPUArgMax(backend, CPUArgMax::ArgMinOrMax::ARGMIN,`
			`argMax->topK(), argMax->outMaxVal(), argMax->softmaxThreshold(), argMax->axis());`
			`} else {`
			`return new CPUArgMax(backend, CPUArgMax::ArgMinOrMax::ARGMAX,`
			`argMax->topK(), argMax->outMaxVal(), argMax->softmaxThreshold(), argMax->axis());`
			`}`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`
			`};`
			`REGISTER_CPU_OP_CREATOR(CPUArgMaxCreator, OpType_ArgMax);`
Update 2019-12-27 22:16:57 +08:00			`REGISTER_CPU_OP_CREATOR(CPUArgMaxCreator, OpType_ArgMin);`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`} // namespace MNN`