MNN/source/backend/cpu/CPUEltwise.cpp

//
//  CPUEltwise.cpp
//  MNN
//
//  Created by MNN on 2018/07/19.
//  Copyright © 2018, Alibaba Group Holding Limited
//

#include "backend/cpu/CPUEltwise.hpp"
#include <math.h>
#include <string.h>
#include "core/Concurrency.h"
#include <algorithm>
#include "backend/cpu/CPUBackend.hpp"
#include "backend/cpu/compute/CommonOptFunction.h"

namespace MNN {

CPUEltwise::CPUEltwise(Backend *b, EltwiseType type, std::vector<float> coef) : Execution(b) {
    mType = type;
    mCoeff = coef;
}

ErrorCode CPUEltwise::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
    auto inputTensor = inputs[0];
    const int size   = static_cast<CPUBackend*>(backend())->getTensorSize(inputTensor);
    auto core = static_cast<CPUBackend*>(backend())->functions();

    auto outputTensor    = outputs[0];
    auto outputHost      = outputTensor->host<uint8_t>();
    const auto input0Ptr = inputs[0]->host<uint8_t>();
    const auto input1Ptr = inputs[1]->host<uint8_t>();

    auto coeffSize = mCoeff.size();
    bool isIdentity     = coeffSize >= 2;
    if (isIdentity) {
        // when Eltwise has coeff
        if (mCoeff[0] == 1.0f && mCoeff[1] == 0.0f) {
            memcpy(outputHost, input0Ptr, size * core->bytes);
            return NO_ERROR;
        } else {
            return NOT_SUPPORT;
        }
    }
    int opType = -1;

    switch (mType) {
        case EltwiseType_PROD:
            opType = BinaryOpOperation_MUL;
            break;
        case EltwiseType_SUM:
            opType = BinaryOpOperation_ADD;
            break;
        case EltwiseType_MAXIMUM:
            opType = BinaryOpOperation_MAXIMUM;
            break;
        case EltwiseType_SUB:
            opType = BinaryOpOperation_SUB;
            break;
        default:
            MNN_ERROR("Don't support %d type for eltwise", mType);
            return INPUT_DATA_ERROR;
    }
    auto proc = core->MNNSelectBinaryFunctionForFloat(opType);
    auto schedule = ((CPUBackend*)backend())->multiThreadDivide(size);
    int sizeDivide = schedule.first;
    int scheduleNumber = schedule.second;

    MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
        int start = sizeDivide * (int)tId;
        int realSize = sizeDivide;
        if (tId == scheduleNumber -1 ) {
            realSize = size - start;
        }
        if (realSize > 0) {
            auto inputT1 = inputs[1];
            auto inp0 = input0Ptr + start * core->bytes;
            auto inp1 = input1Ptr + start * core->bytes;
            auto out = outputHost + start * core->bytes;

            proc(out, inp0, inp1, realSize, -1);
            for (int i = 2; i < inputs.size(); ++i) {
                proc(out, out, inputs[i]->host<uint8_t>() + start * core->bytes, realSize, -1);
            }
        }
    }
    MNN_CONCURRENCY_END();
    return NO_ERROR;
}

class CPUEltwiseCreator : public CPUBackend::Creator {
public:
    virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
                                const MNN::Op *op, Backend *backend) const {
        auto eltwiseParam = op->main_as_Eltwise();
        auto type         = eltwiseParam->type();
        std::vector<float> coeff;
        // keep compatible with old model
        if (eltwiseParam->coeff()) {
            const int size = eltwiseParam->coeff()->size();
            coeff.resize(size);
            memcpy(coeff.data(), eltwiseParam->coeff()->data(), size * sizeof(float));
        }
        return new CPUEltwise(backend, type, coeff);
    }
};
REGISTER_CPU_OP_CREATOR(CPUEltwiseCreator, OpType_Eltwise);

} // namespace MNN
beta 0.1.0 2019-04-17 10:49:11 +08:00			`//`
			`// CPUEltwise.cpp`
			`// MNN`
			`//`
			`// Created by MNN on 2018/07/19.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`

Update 2019-12-27 22:16:57 +08:00			`#include "backend/cpu/CPUEltwise.hpp"`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`#include <math.h>`
			`#include <string.h>`
Update 2019-12-27 22:16:57 +08:00			`#include "core/Concurrency.h"`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`#include <algorithm>`
Update 2019-12-27 22:16:57 +08:00			`#include "backend/cpu/CPUBackend.hpp"`
			`#include "backend/cpu/compute/CommonOptFunction.h"`
beta 0.1.0 2019-04-17 10:49:11 +08:00
			`namespace MNN {`

- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr; 2019-10-29 13:37:26 +08:00			`CPUEltwise::CPUEltwise(Backend *b, EltwiseType type, std::vector<float> coef) : Execution(b) {`
			`mType = type;`
			`mCoeff = coef;`
beta 0.1.1.3 - fix benchmark script for older version adb - add FAQ.md - add environment requirement in Install.md - add coeff in Eltwise Op - fix bugs in strassen 1x1 data preparation - add download failure process in get_model.sh 2019-05-17 14:59:57 +08:00			`}`

beta 0.1.0 2019-04-17 10:49:11 +08:00			`ErrorCode CPUEltwise::onExecute(const std::vector<Tensor > &inputs, const std::vector<Tensor > &outputs) {`
			`auto inputTensor = inputs[0];`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`const int size = static_cast<CPUBackend*>(backend())->getTensorSize(inputTensor);`
			`auto core = static_cast<CPUBackend*>(backend())->functions();`
beta 0.1.0 2019-04-17 10:49:11 +08:00
beta 0.1.1.3 - fix benchmark script for older version adb - add FAQ.md - add environment requirement in Install.md - add coeff in Eltwise Op - fix bugs in strassen 1x1 data preparation - add download failure process in get_model.sh 2019-05-17 14:59:57 +08:00			`auto outputTensor = outputs[0];`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`auto outputHost = outputTensor->host<uint8_t>();`
			`const auto input0Ptr = inputs[0]->host<uint8_t>();`
			`const auto input1Ptr = inputs[1]->host<uint8_t>();`
beta 0.1.1.3 - fix benchmark script for older version adb - add FAQ.md - add environment requirement in Install.md - add coeff in Eltwise Op - fix bugs in strassen 1x1 data preparation - add download failure process in get_model.sh 2019-05-17 14:59:57 +08:00
beta 0.2.0.0 - replace FreeImage with stb_image - warn unicode error in Windows compiling - separate clang/gcc build script for android - add default values in fbs - optimize CPU conv / conv depthwise / deconv / deconv depthwise / lstm / sigmoid - add sub support in eltwise - add reciprocal / log1p / log in unary - add zero like / select / set diff 1d - add batch support for permute - add training codes - fix metal error in dynamic separate storage type handling 2019-06-17 20:10:35 +08:00			`auto coeffSize = mCoeff.size();`
beta 0.1.1.3 - fix benchmark script for older version adb - add FAQ.md - add environment requirement in Install.md - add coeff in Eltwise Op - fix bugs in strassen 1x1 data preparation - add download failure process in get_model.sh 2019-05-17 14:59:57 +08:00			`bool isIdentity = coeffSize >= 2;`
			`if (isIdentity) {`
			`// when Eltwise has coeff`
			`if (mCoeff[0] == 1.0f && mCoeff[1] == 0.0f) {`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`memcpy(outputHost, input0Ptr, size * core->bytes);`
beta 0.1.1.3 - fix benchmark script for older version adb - add FAQ.md - add environment requirement in Install.md - add coeff in Eltwise Op - fix bugs in strassen 1x1 data preparation - add download failure process in get_model.sh 2019-05-17 14:59:57 +08:00			`return NO_ERROR;`
			`} else {`
			`return NOT_SUPPORT;`
			`}`
			`}`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`int opType = -1;`
beta 0.1.1.3 - fix benchmark script for older version adb - add FAQ.md - add environment requirement in Install.md - add coeff in Eltwise Op - fix bugs in strassen 1x1 data preparation - add download failure process in get_model.sh 2019-05-17 14:59:57 +08:00
beta 0.1.0 2019-04-17 10:49:11 +08:00			`switch (mType) {`
			`case EltwiseType_PROD:`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`opType = BinaryOpOperation_MUL;`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`break;`
			`case EltwiseType_SUM:`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`opType = BinaryOpOperation_ADD;`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`break;`
			`case EltwiseType_MAXIMUM:`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`opType = BinaryOpOperation_MAXIMUM;`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`break;`
beta 0.2.0.0 - replace FreeImage with stb_image - warn unicode error in Windows compiling - separate clang/gcc build script for android - add default values in fbs - optimize CPU conv / conv depthwise / deconv / deconv depthwise / lstm / sigmoid - add sub support in eltwise - add reciprocal / log1p / log in unary - add zero like / select / set diff 1d - add batch support for permute - add training codes - fix metal error in dynamic separate storage type handling 2019-06-17 20:10:35 +08:00			`case EltwiseType_SUB:`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`opType = BinaryOpOperation_SUB;`
beta 0.2.0.0 - replace FreeImage with stb_image - warn unicode error in Windows compiling - separate clang/gcc build script for android - add default values in fbs - optimize CPU conv / conv depthwise / deconv / deconv depthwise / lstm / sigmoid - add sub support in eltwise - add reciprocal / log1p / log in unary - add zero like / select / set diff 1d - add batch support for permute - add training codes - fix metal error in dynamic separate storage type handling 2019-06-17 20:10:35 +08:00			`break;`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`default:`
			`MNN_ERROR("Don't support %d type for eltwise", mType);`
			`return INPUT_DATA_ERROR;`
			`}`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`auto proc = core->MNNSelectBinaryFunctionForFloat(opType);`
Update 2020-02-26 09:57:17 +08:00			`auto schedule = ((CPUBackend*)backend())->multiThreadDivide(size);`
			`int sizeDivide = schedule.first;`
			`int scheduleNumber = schedule.second;`

- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr; 2019-10-29 13:37:26 +08:00			`MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {`
			`int start = sizeDivide * (int)tId;`
			`int realSize = sizeDivide;`
			`if (tId == scheduleNumber -1 ) {`
			`realSize = size - start;`
			`}`
			`if (realSize > 0) {`
			`auto inputT1 = inputs[1];`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`auto inp0 = input0Ptr + start * core->bytes;`
			`auto inp1 = input1Ptr + start * core->bytes;`
			`auto out = outputHost + start * core->bytes;`

			`proc(out, inp0, inp1, realSize, -1);`
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr; 2019-10-29 13:37:26 +08:00			`for (int i = 2; i < inputs.size(); ++i) {`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`proc(out, out, inputs[i]->host<uint8_t>() + start * core->bytes, realSize, -1);`
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr; 2019-10-29 13:37:26 +08:00			`}`
			`}`
			`}`
			`MNN_CONCURRENCY_END();`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`return NO_ERROR;`
			`}`

fix spelling mistake, eltwies -> eltwise 2020-03-17 18:20:21 +08:00			`class CPUEltwiseCreator : public CPUBackend::Creator {`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`public:`
			`virtual Execution onCreate(const std::vector<Tensor > &inputs, const std::vector<Tensor *> &outputs,`
			`const MNN::Op op, Backend backend) const {`
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr; 2019-10-29 13:37:26 +08:00			`auto eltwiseParam = op->main_as_Eltwise();`
			`auto type = eltwiseParam->type();`
			`std::vector<float> coeff;`
			`// keep compatible with old model`
			`if (eltwiseParam->coeff()) {`
			`const int size = eltwiseParam->coeff()->size();`
			`coeff.resize(size);`
			`memcpy(coeff.data(), eltwiseParam->coeff()->data(), size * sizeof(float));`
			`}`
			`return new CPUEltwise(backend, type, coeff);`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`
			`};`
fix spelling mistake, eltwies -> eltwise 2020-03-17 18:20:21 +08:00			`REGISTER_CPU_OP_CREATOR(CPUEltwiseCreator, OpType_Eltwise);`
beta 0.1.0 2019-04-17 10:49:11 +08:00
			`} // namespace MNN`