MNN/source/backend/cpu/CPUBinary.cpp

//
//  CPUBinary.cpp
//  MNN
//
//  Created by MNN on 2018/08/02.
//  Copyright © 2018, Alibaba Group Holding Limited
//

#include "CPUBinary.hpp"
#include <math.h>
#include <algorithm>
#include "CPUBackend.hpp"
#include "compute/CommonOptFunction.h"
#include "compute/ConvOpt.h"
#include "core/Macro.h"
#include "core/Concurrency.h"
#include "core/OpCommonUtils.hpp"
namespace MNN {
#define MAX_DIM 6
CPUBinaryInt::CPUBinaryInt(Backend* b, int32_t type) : MNN::Execution(b), mType(type) {
    // nothing to do
}
CPUBinaryFloat::CPUBinaryFloat(Backend* b, int32_t type) : MNN::Execution(b), mType(type) {
    // nothing to do
}

ErrorCode CPUBinaryFloat::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
    MNN_ASSERT(1 == outputs.size());
    const int input0DataCount = inputs[0]->elementSize();
    const int input1DataCount = inputs[1]->elementSize();
    const int outputDataCount = outputs[0]->elementSize();
    int maxCount = input0DataCount > input1DataCount ?  input0DataCount : input1DataCount;
    mElementProc = nullptr;
    mSupportScale = false;
    if (outputs[0]->getType().code != halide_type_float || maxCount < 4 || (outputDataCount > input0DataCount && outputDataCount > input1DataCount)) {
        // Can't optimize
        return NO_ERROR;
    }
    auto eleProc = mElementProc;// Set nullptr for begin
    switch (mType) {
        case BinaryOpOperation_MUL:
            eleProc = MNNMatrixProdCommon;
            break;
        case BinaryOpOperation_ADD:
            eleProc = MNNMatrixAddCommon;
            break;
        case BinaryOpOperation_MAXIMUM:
            eleProc = MNNMatrixMaxCommon;
            break;
        case BinaryOpOperation_SUB:
            eleProc = MNNMatrixSubCommon;
            break;
        default:
            break;
    }
    if (input1DataCount == input0DataCount) {
        mOutside = 1;
        mInside = input0DataCount;
        mElementProc = eleProc;
        return NO_ERROR;
    }
    if (input1DataCount == 1 || input0DataCount == 1) {
        mAxis = 1;
        mOutside = 1;
        switch (mType) {
            case BinaryOpOperation_MUL:
            case BinaryOpOperation_ADD:
            case BinaryOpOperation_SUB:
                mSupportScale = true;
                break;
            default:
                break;
        }
        return NO_ERROR;
    }
    if (nullptr == eleProc) {
        return NO_ERROR;
    }
    // For AddBias / Mul Sqrt
    int dims[MAX_DIM];
    int stride[MAX_DIM];
    int iStride0[MAX_DIM];
    int iStride1[MAX_DIM];
    const Tensor* input0 = inputs[0];
    const Tensor* input1 = inputs[1];
    const Tensor* output = outputs[0];
    if (input0DataCount < input1DataCount) {
        input0 = inputs[1];
        input1 = inputs[0];
    }
    OpCommonUtils::broastCastComputeDim(dims, stride, iStride0, iStride1, input0, input1, output);
    int breakPos = -1;
    for (int i=0; i<MAX_DIM; ++i) {
        if (iStride1[i] > 0) {
            if (breakPos >= 0) {
                // Failed to optmize
                return NO_ERROR;
            }
            breakPos = i;
        }
    }
    MNN_ASSERT(breakPos >= 0);
    //FUNC_PRINT(breakPos);
    mOutside = 1;
    mInside = 1;
    for (int i=0; i<breakPos; ++i) {
        mOutside *= dims[i];
    }
    mAxis = dims[breakPos];
    for (int i=breakPos+1; i<MAX_DIM; ++i) {
        mInside *= dims[i];
    }
    // Serveral Machine need memory 4 * sizeof(float) align
    if (1 == mInside && mAxis >= 4) {
        mElementProc = eleProc;
        //MNN_PRINT("Open Optimize\n");
    } else if (BinaryOpOperation_MAXIMUM != mType && mInside >= 4) {
        mSupportScale = true;
    }
    //MNN_PRINT("%d, %d, %d\n", mInside, mAxis, mOutside);
    return NO_ERROR;
}

template <typename Tin, typename Tout, typename Func>
static ErrorCode _binaryOp(Tensor* input0, Tensor* input1, Tensor* output) {
    Func f;
    const int input0DataCount = input0->elementSize();
    const int input1DataCount = input1->elementSize();
    const Tin* input0Data = input0->host<Tin>();
    const Tin* input1Data = input1->host<Tin>();
    Tout* outputData      = output->host<Tout>();

    if (input0DataCount == 1) { // data count == 1, not only mean scalar input, maybe of shape (1, 1, 1, ...,1)
        for (int i = 0; i < input1DataCount; i++) {
            outputData[i] = static_cast<Tout>(f(input0Data[0], input1Data[i]));
        }
    } else if (input1DataCount == 1) {
        for (int i = 0; i < input0DataCount; i++) {
            outputData[i] = static_cast<Tout>(f(input0Data[i], input1Data[0]));
        }
    } else { // both input contains more than one element，which means no scalar input
        bool sameShape = true;
        {
            if (input0->dimensions() == input1->dimensions()) {
                for (int i = 0; i < input0->buffer().dimensions; i++) {
                    if (input0->buffer().dim[i].extent != input1->buffer().dim[i].extent) {
                        sameShape = false;
                        break;
                    }
                }
            }
            else {
                sameShape = false;
            }
        }
        if (sameShape) { // two inputs have the same shape, apply element-wise operation
            for (int i = 0; i < input0DataCount; i++) {
                outputData[i] = static_cast<Tout>(f(input0Data[i], input1Data[i]));
            }
        } else { // not the same shape, use broadcast
            MNN_ASSERT(output->dimensions() <= MAX_DIM);
            int dims[MAX_DIM];
            int stride[MAX_DIM];
            int iStride0[MAX_DIM];
            int iStride1[MAX_DIM];
            OpCommonUtils::broastCastComputeDim(dims, stride, iStride0, iStride1, input0, input1, output);
            for (int w = 0; w < dims[5]; ++w) {
                auto ow  = outputData + w * stride[5];
                auto i0w = input0Data + w * iStride0[5];
                auto i1w = input1Data + w * iStride1[5];
#define PTR(x, y, i)                      \
    auto o##x  = o##y + x * stride[i];    \
    auto i0##x = i0##y + x * iStride0[i]; \
    auto i1##x = i1##y + x * iStride1[i]

                for (int v = 0; v < dims[4]; ++v) {
                    PTR(v, w, 4);
                    for (int u = 0; u < dims[3]; ++u) {
                        PTR(u, v, 3);
                        for (int z = 0; z < dims[2]; ++z) {
                            PTR(z, u, 2);
                            for (int y = 0; y < dims[1]; ++y) {
                                PTR(y, z, 1);
                                for (int x = 0; x < dims[0]; ++x) {
                                    PTR(x, y, 0);
                                    *ox = static_cast<Tout>(f(*i0x, *i1x));
                                }
                            }
                        }
                    }
                }
            }
#undef MAX_DIM
#undef PTR
        }
        // broadcast-capable check is done in compute size
    }

    return NO_ERROR;
}

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryMax : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return std::max(x, y);
    }
};

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryMin : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return std::min(x, y);
    }
};

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryMul : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return x * y;
    }
};

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryAdd : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return x + y;
    }
};

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinarySub : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return x - y;
    }
};

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryRealDiv : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return x / y;
    }
};

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryMod : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return x - x / y;
    }
};

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryGreater : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return (_ErrorCode)((x > y) ? 1 : 0);
    }
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryLess : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return (_ErrorCode)((x < y) ? 1 : 0);
    }
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryGreaterEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return (_ErrorCode)((x >= y) ? 1 : 0);
    }
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryLessEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return (_ErrorCode)((x <= y) ? 1 : 0);
    }
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return (_ErrorCode)((x == y) ? 1 : 0);
    }
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryFloorDiv : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return floor(x / y);
    }
};

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryFloorMod : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return x - floor(x / y) * y;
    }
};

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinarySquaredDifference : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return (x - y) * (x - y);
    }
};

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryPow : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return pow(x, y);
    }
};

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryAtan2 : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return atan(x / y);
    }
};

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryLogicalOr : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return (_ErrorCode)((x || y) ? 1 : 0);
    }
};

template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryNotEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
        return (_ErrorCode)((x != y) ? 1 : 0);
    }
};

static void callEleFunc(void(*proc)(float* C, const float* A, const float* B, size_t width, size_t cStride, size_t aStride, size_t bStride, size_t height),
                        float* C, const float* A, const float* B, size_t size, bool swap) {
    if (swap) {
        proc(C, B, A, size, 0, 0, 0, 1);
    } else {
        proc(C, A, B, size, 0, 0, 0, 1);
    }
}

ErrorCode CPUBinaryFloat::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
    auto input  = inputs[0];
    auto input1 = inputs[1];
    auto output = outputs[0];
    
    if (nullptr != mElementProc || mSupportScale) {
        auto numberThread = ((CPUBackend*)backend())->threadNumber();
        auto i1Size = input->elementSize();
        auto i2Size = input1->elementSize();
        bool swap = false;
        if (i1Size < i2Size) {
            auto temp = i2Size;
            i2Size = i1Size;
            i1Size = temp;
            input = inputs[1];
            input1 = inputs[0];
            swap = true;
        }
        auto size = i1Size;
        auto schedule = ((CPUBackend*)backend())->multiThreadDivide(size);
        int sizeDivide = schedule.first;
        int scheduleNumber = schedule.second;
        if (nullptr != mElementProc) {
            if (mOutside == 1) {
                MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
                    int start = sizeDivide * (int)tId;
                    int realSize = sizeDivide;
                    if (tId == scheduleNumber -1 ) {
                        realSize = size - start;
                    }
                    if (realSize > 0) {
                        mElementProc(output->host<float>() + start, input->host<float>() + start, input1->host<float>() + start, realSize, 0, 0, 0, 1);
                    }
                }
                MNN_CONCURRENCY_END();
            } else {
                MNN_CONCURRENCY_BEGIN(tId, numberThread) {
                    for (int y = tId; y < mOutside; y+=numberThread) {
                        callEleFunc(mElementProc, output->host<float>() + y * mAxis, input->host<float>() + y * mAxis, input1->host<float>(), mAxis, swap);
                    }
                }
                MNN_CONCURRENCY_END();
            }
        } else {
            if (mOutside == 1 && mAxis == 1) {
                float* inputPtr = input->host<float>();
                float scalar = input1->host<float>()[0];
                float scale = scalar;
                float bias = 0.0f;
                switch (mType) {
                    case BinaryOpOperation_ADD:
                        scale = 1.0f;
                        bias = scalar;
                        break;
                    case BinaryOpOperation_SUB:
                        if (!swap) {
                            scale = 1.0f;
                            bias = -scalar;
                        } else {
                            scale = -1.0f;
                            bias = scalar;
                        }
                        break;
                    default:
                        break;
                }

                MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
                    int start = sizeDivide * (int)tId;
                    int realSize = sizeDivide;
                    if (tId == scheduleNumber -1 ) {
                        realSize = size - start;
                    }
                    if (realSize > 0) {
                        MNNScaleAndAddBiasScalar(output->host<float>() + start, inputPtr + start, bias, scale, realSize);
                    }
                }
                MNN_CONCURRENCY_END();
            } else {
                float* inputPtr = input->host<float>();
                float* input1Ptr = input1->host<float>();
                auto total = mOutside * mAxis;
                MNN_CONCURRENCY_BEGIN(tId, numberThread) {
                    for (int index = (int)tId; index < total; index += numberThread) {
                        auto axis = index % mAxis;
                        float scalar = input1Ptr[axis];
                        float scale = scalar;
                        float bias = 0.0f;
                        switch (mType) {
                            case BinaryOpOperation_ADD:
                                scale = 1.0f;
                                bias = scalar;
                                break;
                            case BinaryOpOperation_SUB:
                                if (!swap) {
                                    scale = 1.0f;
                                    bias = -scalar;
                                } else {
                                    scale = -1.0f;
                                    bias = scalar;
                                }
                                break;
                            default:
                                break;
                        }
                        MNNScaleAndAddBiasScalar(output->host<float>() + mInside * index, inputPtr +  mInside * index, bias, scale, mInside);
                    }
                }
                MNN_CONCURRENCY_END();
            }

        }
        return NO_ERROR;
    }

    switch (mType) {
        case BinaryOpOperation_MUL:
            _binaryOp<float, float, BinaryMul<float, float, float>>(input, input1, output);
            break;
        case BinaryOpOperation_ADD:
            _binaryOp<float, float, BinaryAdd<float, float, float>>(input, input1, output);
            break;
        case BinaryOpOperation_SUB:
            _binaryOp<float, float, BinarySub<float, float, float>>(input, input1, output);
            break;

        case BinaryOpOperation_REALDIV:
            _binaryOp<float, float, BinaryRealDiv<float, float, float>>(input, input1, output);
            break;
        case BinaryOpOperation_MINIMUM:
            _binaryOp<float, float, BinaryMin<float, float, float>>(input, input1, output);
            break;
        case BinaryOpOperation_MAXIMUM:
            _binaryOp<float, float, BinaryMax<float, float, float>>(input, input1, output);
            break;
        case BinaryOpOperation_GREATER:
            _binaryOp<float, int32_t, BinaryGreater<float, float, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_LESS:
            _binaryOp<float, int32_t, BinaryLess<float, float, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_LESS_EQUAL:
            _binaryOp<float, int32_t, BinaryLessEqual<float, float, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_GREATER_EQUAL:
            _binaryOp<float, int32_t, BinaryGreaterEqual<float, float, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_EQUAL:
            _binaryOp<float, int32_t, BinaryEqual<float, float, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_FLOORDIV:
            _binaryOp<float, float, BinaryFloorDiv<float, float, float>>(input, input1, output);
            break;
        case BinaryOpOperation_FLOORMOD:
            _binaryOp<float, float, BinaryFloorMod<float, float, float>>(input, input1, output);
            break;
        case BinaryOpOperation_POW:
            _binaryOp<float, float, BinaryPow<float, float, float>>(input, input1, output);
            break;
        case BinaryOpOperation_SquaredDifference:
            _binaryOp<float, float, BinarySquaredDifference<float, float, float>>(input, input1, output);
            break;
        case BinaryOpOperation_ATAN2:
            _binaryOp<float, float, BinaryAtan2<float, float, float>>(input, input1, output);
            break;
        case BinaryOpOperation_NOTEQUAL:
            _binaryOp<float, int32_t, BinaryNotEqual<float, float, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_MOD:
            _binaryOp<float, float, BinaryMod<float, float, float>>(input, input1, output);
            break;
        default:
            MNN_ASSERT(false);
            break;
    }
    return NO_ERROR;
}

ErrorCode CPUBinaryInt::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
    auto input  = inputs[0];
    auto input1 = inputs[1];
    auto output = outputs[0];
    switch (mType) {
        case BinaryOpOperation_MUL:
            _binaryOp<int32_t, int32_t, BinaryMul<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_ADD:
            _binaryOp<int32_t, int32_t, BinaryAdd<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_SUB:
            _binaryOp<int32_t, int32_t, BinarySub<int32_t, int32_t, int32_t>>(input, input1, output);
            break;

        case BinaryOpOperation_REALDIV:
            _binaryOp<int32_t, int32_t, BinaryRealDiv<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_MINIMUM:
            _binaryOp<int32_t, int32_t, BinaryMin<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_MAXIMUM:
            _binaryOp<int32_t, int32_t, BinaryMax<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_GREATER:
            _binaryOp<int32_t, int32_t, BinaryGreater<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_LESS:
            _binaryOp<int32_t, int32_t, BinaryLess<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_LESS_EQUAL:
            _binaryOp<int32_t, int32_t, BinaryLessEqual<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_GREATER_EQUAL:
            _binaryOp<int32_t, int32_t, BinaryGreaterEqual<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_EQUAL:
            _binaryOp<int32_t, int32_t, BinaryEqual<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_FLOORDIV:
            _binaryOp<int32_t, int32_t, BinaryFloorDiv<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_FLOORMOD:
            _binaryOp<int32_t, int32_t, BinaryFloorMod<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_SquaredDifference:
            _binaryOp<int32_t, int32_t, BinarySquaredDifference<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_LOGICALOR:
            _binaryOp<int32_t, int32_t, BinaryLogicalOr<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_NOTEQUAL:
            _binaryOp<int32_t, int32_t, BinaryNotEqual<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        case BinaryOpOperation_MOD:
            _binaryOp<int32_t, int32_t, BinaryMod<int32_t, int32_t, int32_t>>(input, input1, output);
            break;
        default:
            MNN_ASSERT(false);
            break;
    }
    return NO_ERROR;
}

class CPUBinaryCreator : public CPUBackend::Creator {
public:
    virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
                                const MNN::Op* op, Backend* backend) const override {
        // auto dataType   = outputs[0]->getType();
        int32_t type = op->main_as_BinaryOp()->opType();
        // auto dataType = op->main_as_BinaryOp()->T();
        auto dataType = inputs[0]->getType();
        if (dataType.bits == 32) {
            if (dataType.code == halide_type_int) {
                return new CPUBinaryInt(backend, type);
            } else if (dataType.code == halide_type_float) {
                return new CPUBinaryFloat(backend, type);
            }
        }
        MNN_ERROR("CpuBinary: unsupported data type (bits: %d, code: %d)\n",
                  dataType.bits, dataType.code);
        return nullptr;
    }
};

REGISTER_CPU_OP_CREATOR(CPUBinaryCreator, OpType_BinaryOp);

} // namespace MNN
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								//
 								//  CPUBinary.cpp
 								//  MNN
 								//
 								//  Created by MNN on 2018/08/02.
 								//  Copyright © 2018, Alibaba Group Holding Limited
 								//
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								#include "CPUBinary.hpp"
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								#include <math.h>
 								#include <algorithm>
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								#include "CPUBackend.hpp"
 								#include "compute/CommonOptFunction.h"
 								#include "compute/ConvOpt.h"
-												Update

											
										
										
											2019-12-27 22:16:57 +08:00
+								#include "core/Macro.h"
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								#include "core/Concurrency.h"
-												[PATCH 094/160] [MNN:Refractor] Sepearte compute dim from CPUBinary

											
										
										
											2020-03-08 10:20:18 +08:00
+								#include "core/OpCommonUtils.hpp"
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								namespace MNN {
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								#define MAX_DIM 6
 								CPUBinaryInt::CPUBinaryInt(Backend* b, int32_t type) : MNN::Execution(b), mType(type) {
 								    // nothing to do
 								}
 								CPUBinaryFloat::CPUBinaryFloat(Backend* b, int32_t type) : MNN::Execution(b), mType(type) {
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								    // nothing to do
 								}
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								ErrorCode CPUBinaryFloat::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								    MNN_ASSERT(1 == outputs.size());
-												- build:
	- unify schema building in core and converter;
	- add more build script for android;
	- add linux build script for python;

- ops impl:
	- add floor mod support in binary;
	- use eltwise impl in add/max/sub/mul binary for optimization;
	- remove fake double support in cast;
	- fix 5d support for concat;
	- add adjX and adjY support for batch matmul;
	- optimize conv2d back prop filter;
	- add pad mode support for conv3d;
	- fix bug in conv2d & conv depthwise with very small feature map;
	- optimize binary without broacast;
	- add data types support for gather;
	- add gather ND support;
	- use uint8 data type in gather v2;
	- add transpose support for matmul;
	- add matrix band part;
	- add dim != 4 support for padding, reshape & tensor convert;
	- add pad type support for pool3d;
	- make ops based on TensorFlow Lite quantization optional;
	- add all & any support for reduction;
	- use type in parameter as output type in reduction;
	- add int support for unary;
	- add variable weight support for conv2d;
	- fix conv2d depthwise weights initialization;
	- fix type support for transpose;
	- fix grad outputs count for  reduce grad and reshape grad;
	- fix priorbox & detection output;
	- fix metal softmax error;

- python:
	- add runSessionWithCallBackInfo interface;
	- add max nodes limit (1400) for visualization tool;
	- fix save error in python3;
	- align default dim;

- convert:
	- add extra design for optimization;
	- add more post converting optimizers;
	- add caffe v1 weights blob support;
	- add cast, unary, conv transpose support for onnx model;
	- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
	- add cos/sin/atan/tan support for unary for tensorflow model;
	- add any/all support for reduction for tensorflow model;
	- add elu, conv3d, pool3d support for tensorflow model;
	- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;

- others:
	- fix size computer lock;
	- fix thread pool deadlock;
	- add express & parameters in express;
	- rewrite blitter chooser without static map;
	- add tests for expr;

											
										
										
											2019-10-29 13:37:26 +08:00
+								    const int input0DataCount = inputs[0]->elementSize();
 								    const int input1DataCount = inputs[1]->elementSize();
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								    const int outputDataCount = outputs[0]->elementSize();
 								    int maxCount = input0DataCount > input1DataCount ?  input0DataCount : input1DataCount;
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								    mElementProc = nullptr;
 								    mSupportScale = false;
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								    if (outputs[0]->getType().code != halide_type_float || maxCount < 4 || (outputDataCount > input0DataCount && outputDataCount > input1DataCount)) {
 								        // Can't optimize
 								        return NO_ERROR;
 								    }
 								    auto eleProc = mElementProc;// Set nullptr for begin
 								    switch (mType) {
 								        case BinaryOpOperation_MUL:
 								            eleProc = MNNMatrixProdCommon;
 								            break;
 								        case BinaryOpOperation_ADD:
 								            eleProc = MNNMatrixAddCommon;
 								            break;
 								        case BinaryOpOperation_MAXIMUM:
 								            eleProc = MNNMatrixMaxCommon;
 								            break;
 								        case BinaryOpOperation_SUB:
 								            eleProc = MNNMatrixSubCommon;
 								            break;
 								        default:
 								            break;
 								    }
 								    if (input1DataCount == input0DataCount) {
 								        mOutside = 1;
 								        mInside = input0DataCount;
 								        mElementProc = eleProc;
 								        return NO_ERROR;
 								    }
 								    if (input1DataCount == 1 || input0DataCount == 1) {
 								        mAxis = 1;
 								        mOutside = 1;
 								        switch (mType) {
 								            case BinaryOpOperation_MUL:
 								            case BinaryOpOperation_ADD:
 								            case BinaryOpOperation_SUB:
 								                mSupportScale = true;
 								                break;
 								            default:
 								                break;
 								        }
 								        return NO_ERROR;
 								    }
 								    if (nullptr == eleProc) {
 								        return NO_ERROR;
 								    }
 								    // For AddBias / Mul Sqrt
 								    int dims[MAX_DIM];
 								    int stride[MAX_DIM];
 								    int iStride0[MAX_DIM];
 								    int iStride1[MAX_DIM];
 								    const Tensor* input0 = inputs[0];
 								    const Tensor* input1 = inputs[1];
 								    const Tensor* output = outputs[0];
 								    if (input0DataCount < input1DataCount) {
 								        input0 = inputs[1];
 								        input1 = inputs[0];
 								    }
-												[PATCH 094/160] [MNN:Refractor] Sepearte compute dim from CPUBinary

											
										
										
											2020-03-08 10:20:18 +08:00
+								    OpCommonUtils::broastCastComputeDim(dims, stride, iStride0, iStride1, input0, input1, output);
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								    int breakPos = -1;
 								    for (int i=0; i<MAX_DIM; ++i) {
 								        if (iStride1[i] > 0) {
 								            if (breakPos >= 0) {
 								                // Failed to optmize
 								                return NO_ERROR;
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								            }
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            breakPos = i;
-												- build:
	- unify schema building in core and converter;
	- add more build script for android;
	- add linux build script for python;

- ops impl:
	- add floor mod support in binary;
	- use eltwise impl in add/max/sub/mul binary for optimization;
	- remove fake double support in cast;
	- fix 5d support for concat;
	- add adjX and adjY support for batch matmul;
	- optimize conv2d back prop filter;
	- add pad mode support for conv3d;
	- fix bug in conv2d & conv depthwise with very small feature map;
	- optimize binary without broacast;
	- add data types support for gather;
	- add gather ND support;
	- use uint8 data type in gather v2;
	- add transpose support for matmul;
	- add matrix band part;
	- add dim != 4 support for padding, reshape & tensor convert;
	- add pad type support for pool3d;
	- make ops based on TensorFlow Lite quantization optional;
	- add all & any support for reduction;
	- use type in parameter as output type in reduction;
	- add int support for unary;
	- add variable weight support for conv2d;
	- fix conv2d depthwise weights initialization;
	- fix type support for transpose;
	- fix grad outputs count for  reduce grad and reshape grad;
	- fix priorbox & detection output;
	- fix metal softmax error;

- python:
	- add runSessionWithCallBackInfo interface;
	- add max nodes limit (1400) for visualization tool;
	- fix save error in python3;
	- align default dim;

- convert:
	- add extra design for optimization;
	- add more post converting optimizers;
	- add caffe v1 weights blob support;
	- add cast, unary, conv transpose support for onnx model;
	- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
	- add cos/sin/atan/tan support for unary for tensorflow model;
	- add any/all support for reduction for tensorflow model;
	- add elu, conv3d, pool3d support for tensorflow model;
	- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;

- others:
	- fix size computer lock;
	- fix thread pool deadlock;
	- add express & parameters in express;
	- rewrite blitter chooser without static map;
	- add tests for expr;

											
										
										
											2019-10-29 13:37:26 +08:00
+								        }
 								    }
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								    MNN_ASSERT(breakPos >= 0);
 								    //FUNC_PRINT(breakPos);
 								    mOutside = 1;
 								    mInside = 1;
 								    for (int i=0; i<breakPos; ++i) {
 								        mOutside *= dims[i];
 								    }
 								    mAxis = dims[breakPos];
 								    for (int i=breakPos+1; i<MAX_DIM; ++i) {
 								        mInside *= dims[i];
 								    }
 								    // Serveral Machine need memory 4 * sizeof(float) align
 								    if (1 == mInside && mAxis >= 4) {
 								        mElementProc = eleProc;
 								        //MNN_PRINT("Open Optimize\n");
 								    } else if (BinaryOpOperation_MAXIMUM != mType && mInside >= 4) {
 								        mSupportScale = true;
 								    }
 								    //MNN_PRINT("%d, %d, %d\n", mInside, mAxis, mOutside);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								    return NO_ERROR;
 								}
 								template <typename Tin, typename Tout, typename Func>
 								static ErrorCode _binaryOp(Tensor* input0, Tensor* input1, Tensor* output) {
 								    Func f;
-												beta 0.2.0.0
- replace FreeImage with stb_image
- warn unicode error in Windows compiling
- separate clang/gcc build script for android
- add default values in fbs
- optimize CPU conv / conv depthwise / deconv / deconv depthwise / lstm / sigmoid
- add sub support in eltwise
- add reciprocal / log1p / log in unary
- add zero like / select / set diff 1d
- add batch support for permute
- add training codes
- fix metal error in dynamic separate storage type handling

											
										
										
											2019-06-17 20:10:35 +08:00
+								    const int input0DataCount = input0->elementSize();
 								    const int input1DataCount = input1->elementSize();
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								    const Tin* input0Data = input0->host<Tin>();
 								    const Tin* input1Data = input1->host<Tin>();
 								    Tout* outputData      = output->host<Tout>();
 								    if (input0DataCount == 1) { // data count == 1, not only mean scalar input, maybe of shape (1, 1, 1, ...,1)
 								        for (int i = 0; i < input1DataCount; i++) {
 								            outputData[i] = static_cast<Tout>(f(input0Data[0], input1Data[i]));
 								        }
 								    } else if (input1DataCount == 1) {
 								        for (int i = 0; i < input0DataCount; i++) {
 								            outputData[i] = static_cast<Tout>(f(input0Data[i], input1Data[0]));
 								        }
 								    } else { // both input contains more than one element，which means no scalar input
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								        bool sameShape = true;
 								        {
 								            if (input0->dimensions() == input1->dimensions()) {
 								                for (int i = 0; i < input0->buffer().dimensions; i++) {
 								                    if (input0->buffer().dim[i].extent != input1->buffer().dim[i].extent) {
 								                        sameShape = false;
 								                        break;
 								                    }
 								                }
 								            }
 								            else {
 								                sameShape = false;
 								            }
 								        }
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								        if (sameShape) { // two inputs have the same shape, apply element-wise operation
 								            for (int i = 0; i < input0DataCount; i++) {
 								                outputData[i] = static_cast<Tout>(f(input0Data[i], input1Data[i]));
 								            }
 								        } else { // not the same shape, use broadcast
 								            MNN_ASSERT(output->dimensions() <= MAX_DIM);
 								            int dims[MAX_DIM];
 								            int stride[MAX_DIM];
 								            int iStride0[MAX_DIM];
 								            int iStride1[MAX_DIM];
-												[PATCH 094/160] [MNN:Refractor] Sepearte compute dim from CPUBinary

											
										
										
											2020-03-08 10:20:18 +08:00
+								            OpCommonUtils::broastCastComputeDim(dims, stride, iStride0, iStride1, input0, input1, output);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								            for (int w = 0; w < dims[5]; ++w) {
 								                auto ow  = outputData + w * stride[5];
 								                auto i0w = input0Data + w * iStride0[5];
 								                auto i1w = input1Data + w * iStride1[5];
 								#define PTR(x, y, i)                      \
 								    auto o##x  = o##y + x * stride[i];    \
 								    auto i0##x = i0##y + x * iStride0[i]; \
 								    auto i1##x = i1##y + x * iStride1[i]
 								                for (int v = 0; v < dims[4]; ++v) {
 								                    PTR(v, w, 4);
 								                    for (int u = 0; u < dims[3]; ++u) {
 								                        PTR(u, v, 3);
 								                        for (int z = 0; z < dims[2]; ++z) {
 								                            PTR(z, u, 2);
 								                            for (int y = 0; y < dims[1]; ++y) {
 								                                PTR(y, z, 1);
 								                                for (int x = 0; x < dims[0]; ++x) {
 								                                    PTR(x, y, 0);
 								                                    *ox = static_cast<Tout>(f(*i0x, *i1x));
 								                                }
 								                            }
 								                        }
 								                    }
 								                }
 								            }
 								#undef MAX_DIM
 								#undef PTR
 								        }
 								        // broadcast-capable check is done in compute size
 								    }
 								    return NO_ERROR;
 								}
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryMax : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return std::max(x, y);
 								    }
 								};
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryMin : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return std::min(x, y);
 								    }
 								};
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryMul : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return x * y;
 								    }
 								};
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryAdd : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return x + y;
 								    }
 								};
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinarySub : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return x - y;
 								    }
 								};
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryRealDiv : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return x / y;
 								    }
 								};
-												Update

											
										
										
											2019-12-27 22:16:57 +08:00
+								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryMod : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return x - x / y;
 								    }
 								};
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryGreater : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
-												beta 0.2.0.0
- replace FreeImage with stb_image
- warn unicode error in Windows compiling
- separate clang/gcc build script for android
- add default values in fbs
- optimize CPU conv / conv depthwise / deconv / deconv depthwise / lstm / sigmoid
- add sub support in eltwise
- add reciprocal / log1p / log in unary
- add zero like / select / set diff 1d
- add batch support for permute
- add training codes
- fix metal error in dynamic separate storage type handling

											
										
										
											2019-06-17 20:10:35 +08:00
+								        return (_ErrorCode)((x > y) ? 1 : 0);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								    }
 								};
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryLess : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
-												beta 0.2.0.0
- replace FreeImage with stb_image
- warn unicode error in Windows compiling
- separate clang/gcc build script for android
- add default values in fbs
- optimize CPU conv / conv depthwise / deconv / deconv depthwise / lstm / sigmoid
- add sub support in eltwise
- add reciprocal / log1p / log in unary
- add zero like / select / set diff 1d
- add batch support for permute
- add training codes
- fix metal error in dynamic separate storage type handling

											
										
										
											2019-06-17 20:10:35 +08:00
+								        return (_ErrorCode)((x < y) ? 1 : 0);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								    }
 								};
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryGreaterEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
-												beta 0.2.0.0
- replace FreeImage with stb_image
- warn unicode error in Windows compiling
- separate clang/gcc build script for android
- add default values in fbs
- optimize CPU conv / conv depthwise / deconv / deconv depthwise / lstm / sigmoid
- add sub support in eltwise
- add reciprocal / log1p / log in unary
- add zero like / select / set diff 1d
- add batch support for permute
- add training codes
- fix metal error in dynamic separate storage type handling

											
										
										
											2019-06-17 20:10:35 +08:00
+								        return (_ErrorCode)((x >= y) ? 1 : 0);
 								    }
 								};
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
-												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes

											
										
										
											2019-09-26 21:02:07 +08:00
+								struct BinaryLessEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return (_ErrorCode)((x <= y) ? 1 : 0);
 								    }
 								};
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
-												beta 0.2.0.0
- replace FreeImage with stb_image
- warn unicode error in Windows compiling
- separate clang/gcc build script for android
- add default values in fbs
- optimize CPU conv / conv depthwise / deconv / deconv depthwise / lstm / sigmoid
- add sub support in eltwise
- add reciprocal / log1p / log in unary
- add zero like / select / set diff 1d
- add batch support for permute
- add training codes
- fix metal error in dynamic separate storage type handling

											
										
										
											2019-06-17 20:10:35 +08:00
+								struct BinaryEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return (_ErrorCode)((x == y) ? 1 : 0);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								    }
 								};
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryFloorDiv : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return floor(x / y);
 								    }
 								};
-												- build:
	- unify schema building in core and converter;
	- add more build script for android;
	- add linux build script for python;

- ops impl:
	- add floor mod support in binary;
	- use eltwise impl in add/max/sub/mul binary for optimization;
	- remove fake double support in cast;
	- fix 5d support for concat;
	- add adjX and adjY support for batch matmul;
	- optimize conv2d back prop filter;
	- add pad mode support for conv3d;
	- fix bug in conv2d & conv depthwise with very small feature map;
	- optimize binary without broacast;
	- add data types support for gather;
	- add gather ND support;
	- use uint8 data type in gather v2;
	- add transpose support for matmul;
	- add matrix band part;
	- add dim != 4 support for padding, reshape & tensor convert;
	- add pad type support for pool3d;
	- make ops based on TensorFlow Lite quantization optional;
	- add all & any support for reduction;
	- use type in parameter as output type in reduction;
	- add int support for unary;
	- add variable weight support for conv2d;
	- fix conv2d depthwise weights initialization;
	- fix type support for transpose;
	- fix grad outputs count for  reduce grad and reshape grad;
	- fix priorbox & detection output;
	- fix metal softmax error;

- python:
	- add runSessionWithCallBackInfo interface;
	- add max nodes limit (1400) for visualization tool;
	- fix save error in python3;
	- align default dim;

- convert:
	- add extra design for optimization;
	- add more post converting optimizers;
	- add caffe v1 weights blob support;
	- add cast, unary, conv transpose support for onnx model;
	- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
	- add cos/sin/atan/tan support for unary for tensorflow model;
	- add any/all support for reduction for tensorflow model;
	- add elu, conv3d, pool3d support for tensorflow model;
	- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;

- others:
	- fix size computer lock;
	- fix thread pool deadlock;
	- add express & parameters in express;
	- rewrite blitter chooser without static map;
	- add tests for expr;

											
										
										
											2019-10-29 13:37:26 +08:00
+								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryFloorMod : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return x - floor(x / y) * y;
 								    }
 								};
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinarySquaredDifference : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return (x - y) * (x - y);
 								    }
 								};
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryPow : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return pow(x, y);
 								    }
 								};
-												Update

											
										
										
											2019-12-27 22:16:57 +08:00
+								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryAtan2 : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return atan(x / y);
 								    }
 								};
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryLogicalOr : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return (_ErrorCode)((x || y) ? 1 : 0);
 								    }
 								};
 								template <typename _Arg1, typename _Arg2, typename _ErrorCode>
 								struct BinaryNotEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
 								    _ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
 								        return (_ErrorCode)((x != y) ? 1 : 0);
 								    }
 								};
-												Fix bug for sub swap error

											
										
										
											2020-05-28 14:54:41 +08:00
+								static void callEleFunc(void(*proc)(float* C, const float* A, const float* B, size_t width, size_t cStride, size_t aStride, size_t bStride, size_t height),
 								                        float* C, const float* A, const float* B, size_t size, bool swap) {
 								    if (swap) {
 								        proc(C, B, A, size, 0, 0, 0, 1);
 								    } else {
 								        proc(C, A, B, size, 0, 0, 0, 1);
 								    }
 								}
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								ErrorCode CPUBinaryFloat::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								    auto input  = inputs[0];
 								    auto input1 = inputs[1];
 								    auto output = outputs[0];
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
 								    if (nullptr != mElementProc || mSupportScale) {
 								        auto numberThread = ((CPUBackend*)backend())->threadNumber();
 								        auto i1Size = input->elementSize();
 								        auto i2Size = input1->elementSize();
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								        bool swap = false;
 								        if (i1Size < i2Size) {
 								            auto temp = i2Size;
 								            i2Size = i1Size;
 								            i1Size = temp;
 								            input = inputs[1];
 								            input1 = inputs[0];
 								            swap = true;
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								        }
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								        auto size = i1Size;
 								        auto schedule = ((CPUBackend*)backend())->multiThreadDivide(size);
 								        int sizeDivide = schedule.first;
 								        int scheduleNumber = schedule.second;
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								        if (nullptr != mElementProc) {
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            if (mOutside == 1) {
 								                MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
 								                    int start = sizeDivide * (int)tId;
 								                    int realSize = sizeDivide;
 								                    if (tId == scheduleNumber -1 ) {
 								                        realSize = size - start;
 								                    }
 								                    if (realSize > 0) {
 								                        mElementProc(output->host<float>() + start, input->host<float>() + start, input1->host<float>() + start, realSize, 0, 0, 0, 1);
 								                    }
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								                }
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								                MNN_CONCURRENCY_END();
 								            } else {
 								                MNN_CONCURRENCY_BEGIN(tId, numberThread) {
 								                    for (int y = tId; y < mOutside; y+=numberThread) {
-												Fix bug for sub swap error

											
										
										
											2020-05-28 14:54:41 +08:00
+								                        callEleFunc(mElementProc, output->host<float>() + y * mAxis, input->host<float>() + y * mAxis, input1->host<float>(), mAxis, swap);
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								                    }
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								                }
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								                MNN_CONCURRENCY_END();
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								            }
 								        } else {
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            if (mOutside == 1 && mAxis == 1) {
 								                float* inputPtr = input->host<float>();
 								                float scalar = input1->host<float>()[0];
 								                float scale = scalar;
 								                float bias = 0.0f;
 								                switch (mType) {
 								                    case BinaryOpOperation_ADD:
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								                        scale = 1.0f;
 								                        bias = scalar;
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								                        break;
 								                    case BinaryOpOperation_SUB:
 								                        if (!swap) {
 								                            scale = 1.0f;
 								                            bias = -scalar;
 								                        } else {
 								                            scale = -1.0f;
 								                            bias = scalar;
 								                        }
 								                        break;
 								                    default:
 								                        break;
 								                }
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								                MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
 								                    int start = sizeDivide * (int)tId;
 								                    int realSize = sizeDivide;
 								                    if (tId == scheduleNumber -1 ) {
 								                        realSize = size - start;
 								                    }
 								                    if (realSize > 0) {
 								                        MNNScaleAndAddBiasScalar(output->host<float>() + start, inputPtr + start, bias, scale, realSize);
 								                    }
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								                }
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								                MNN_CONCURRENCY_END();
 								            } else {
 								                float* inputPtr = input->host<float>();
 								                float* input1Ptr = input1->host<float>();
 								                auto total = mOutside * mAxis;
 								                MNN_CONCURRENCY_BEGIN(tId, numberThread) {
-												Github release 1.1.0

											
										
										
											2020-11-05 16:41:56 +08:00
+								                    for (int index = (int)tId; index < total; index += numberThread) {
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								                        auto axis = index % mAxis;
 								                        float scalar = input1Ptr[axis];
 								                        float scale = scalar;
 								                        float bias = 0.0f;
 								                        switch (mType) {
 								                            case BinaryOpOperation_ADD:
 								                                scale = 1.0f;
 								                                bias = scalar;
 								                                break;
 								                            case BinaryOpOperation_SUB:
 								                                if (!swap) {
 								                                    scale = 1.0f;
 								                                    bias = -scalar;
 								                                } else {
 								                                    scale = -1.0f;
 								                                    bias = scalar;
 								                                }
 								                                break;
 								                            default:
 								                                break;
 								                        }
 								                        MNNScaleAndAddBiasScalar(output->host<float>() + mInside * index, inputPtr +  mInside * index, bias, scale, mInside);
 								                    }
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								                }
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								                MNN_CONCURRENCY_END();
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								            }
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
-												Sync. Fix OpenGL related building issues. Build the whole suite on Android CI (#580)

* Sync code with latest internal version

* Update CMake

* Fix logging issues

* Fix OpenGL Building

* Bump CMakeLists version. Update Podspec

* Update MetalLib Lookup logic

* Fix Windows Build

											
										
										
											2020-01-15 13:33:47 +08:00
+								        }
 								        return NO_ERROR;
 								    }
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
 								    switch (mType) {
 								        case BinaryOpOperation_MUL:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<float, float, BinaryMul<float, float, float>>(input, input1, output);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								            break;
 								        case BinaryOpOperation_ADD:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<float, float, BinaryAdd<float, float, float>>(input, input1, output);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								            break;
 								        case BinaryOpOperation_SUB:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<float, float, BinarySub<float, float, float>>(input, input1, output);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								            break;
 								        case BinaryOpOperation_REALDIV:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<float, float, BinaryRealDiv<float, float, float>>(input, input1, output);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								            break;
 								        case BinaryOpOperation_MINIMUM:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<float, float, BinaryMin<float, float, float>>(input, input1, output);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								            break;
 								        case BinaryOpOperation_MAXIMUM:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<float, float, BinaryMax<float, float, float>>(input, input1, output);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								            break;
 								        case BinaryOpOperation_GREATER:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<float, int32_t, BinaryGreater<float, float, int32_t>>(input, input1, output);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								            break;
 								        case BinaryOpOperation_LESS:
-												Fix cpu binary.

											
										
										
											2020-05-15 20:32:30 +08:00
+								            _binaryOp<float, int32_t, BinaryLess<float, float, int32_t>>(input, input1, output);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								            break;
-												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes

											
										
										
											2019-09-26 21:02:07 +08:00
+								        case BinaryOpOperation_LESS_EQUAL:
-												Fix cpu binary.

											
										
										
											2020-05-15 20:32:30 +08:00
+								            _binaryOp<float, int32_t, BinaryLessEqual<float, float, int32_t>>(input, input1, output);
-												- dynamic computation graph (beta)
	- add supports (/express)
	- add tests
	- add benchmarks with it (/benchmark/exprModels)
- Python
	- MNN engine and tools were submitted to pip
	- available on Windows/macOS/Linux
- Engine/Converter
	- add supports for each op benchmarking
	- refactor optimizer by separating steps
- CPU
	- add supports for Conv3D, Pool3D, ELU, ReverseSequence
	- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
	- add half transform in CPU
	- add broadcast supports for binary
	- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
	- add sub, real div supports for binary
	- add supports for unary
	- optimize Conv2D, Reshape
- Vulkan
	- add max supports for eltwise
- Metal
	- fix metallib missing problem
- Train/Quantization
	- use express to refactor training codes

											
										
										
											2019-09-26 21:02:07 +08:00
+								            break;
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								        case BinaryOpOperation_GREATER_EQUAL:
-												Fix cpu binary.

											
										
										
											2020-05-15 20:32:30 +08:00
+								            _binaryOp<float, int32_t, BinaryGreaterEqual<float, float, int32_t>>(input, input1, output);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								            break;
-												beta 0.2.0.0
- replace FreeImage with stb_image
- warn unicode error in Windows compiling
- separate clang/gcc build script for android
- add default values in fbs
- optimize CPU conv / conv depthwise / deconv / deconv depthwise / lstm / sigmoid
- add sub support in eltwise
- add reciprocal / log1p / log in unary
- add zero like / select / set diff 1d
- add batch support for permute
- add training codes
- fix metal error in dynamic separate storage type handling

											
										
										
											2019-06-17 20:10:35 +08:00
+								        case BinaryOpOperation_EQUAL:
-												Fix cpu binary.

											
										
										
											2020-05-15 20:32:30 +08:00
+								            _binaryOp<float, int32_t, BinaryEqual<float, float, int32_t>>(input, input1, output);
-												beta 0.2.0.0
- replace FreeImage with stb_image
- warn unicode error in Windows compiling
- separate clang/gcc build script for android
- add default values in fbs
- optimize CPU conv / conv depthwise / deconv / deconv depthwise / lstm / sigmoid
- add sub support in eltwise
- add reciprocal / log1p / log in unary
- add zero like / select / set diff 1d
- add batch support for permute
- add training codes
- fix metal error in dynamic separate storage type handling

											
										
										
											2019-06-17 20:10:35 +08:00
+								            break;
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								        case BinaryOpOperation_FLOORDIV:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<float, float, BinaryFloorDiv<float, float, float>>(input, input1, output);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								            break;
-												- build:
	- unify schema building in core and converter;
	- add more build script for android;
	- add linux build script for python;

- ops impl:
	- add floor mod support in binary;
	- use eltwise impl in add/max/sub/mul binary for optimization;
	- remove fake double support in cast;
	- fix 5d support for concat;
	- add adjX and adjY support for batch matmul;
	- optimize conv2d back prop filter;
	- add pad mode support for conv3d;
	- fix bug in conv2d & conv depthwise with very small feature map;
	- optimize binary without broacast;
	- add data types support for gather;
	- add gather ND support;
	- use uint8 data type in gather v2;
	- add transpose support for matmul;
	- add matrix band part;
	- add dim != 4 support for padding, reshape & tensor convert;
	- add pad type support for pool3d;
	- make ops based on TensorFlow Lite quantization optional;
	- add all & any support for reduction;
	- use type in parameter as output type in reduction;
	- add int support for unary;
	- add variable weight support for conv2d;
	- fix conv2d depthwise weights initialization;
	- fix type support for transpose;
	- fix grad outputs count for  reduce grad and reshape grad;
	- fix priorbox & detection output;
	- fix metal softmax error;

- python:
	- add runSessionWithCallBackInfo interface;
	- add max nodes limit (1400) for visualization tool;
	- fix save error in python3;
	- align default dim;

- convert:
	- add extra design for optimization;
	- add more post converting optimizers;
	- add caffe v1 weights blob support;
	- add cast, unary, conv transpose support for onnx model;
	- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
	- add cos/sin/atan/tan support for unary for tensorflow model;
	- add any/all support for reduction for tensorflow model;
	- add elu, conv3d, pool3d support for tensorflow model;
	- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;

- others:
	- fix size computer lock;
	- fix thread pool deadlock;
	- add express & parameters in express;
	- rewrite blitter chooser without static map;
	- add tests for expr;

											
										
										
											2019-10-29 13:37:26 +08:00
+								        case BinaryOpOperation_FLOORMOD:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<float, float, BinaryFloorMod<float, float, float>>(input, input1, output);
-												- build:
	- unify schema building in core and converter;
	- add more build script for android;
	- add linux build script for python;

- ops impl:
	- add floor mod support in binary;
	- use eltwise impl in add/max/sub/mul binary for optimization;
	- remove fake double support in cast;
	- fix 5d support for concat;
	- add adjX and adjY support for batch matmul;
	- optimize conv2d back prop filter;
	- add pad mode support for conv3d;
	- fix bug in conv2d & conv depthwise with very small feature map;
	- optimize binary without broacast;
	- add data types support for gather;
	- add gather ND support;
	- use uint8 data type in gather v2;
	- add transpose support for matmul;
	- add matrix band part;
	- add dim != 4 support for padding, reshape & tensor convert;
	- add pad type support for pool3d;
	- make ops based on TensorFlow Lite quantization optional;
	- add all & any support for reduction;
	- use type in parameter as output type in reduction;
	- add int support for unary;
	- add variable weight support for conv2d;
	- fix conv2d depthwise weights initialization;
	- fix type support for transpose;
	- fix grad outputs count for  reduce grad and reshape grad;
	- fix priorbox & detection output;
	- fix metal softmax error;

- python:
	- add runSessionWithCallBackInfo interface;
	- add max nodes limit (1400) for visualization tool;
	- fix save error in python3;
	- align default dim;

- convert:
	- add extra design for optimization;
	- add more post converting optimizers;
	- add caffe v1 weights blob support;
	- add cast, unary, conv transpose support for onnx model;
	- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
	- add cos/sin/atan/tan support for unary for tensorflow model;
	- add any/all support for reduction for tensorflow model;
	- add elu, conv3d, pool3d support for tensorflow model;
	- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;

- others:
	- fix size computer lock;
	- fix thread pool deadlock;
	- add express & parameters in express;
	- rewrite blitter chooser without static map;
	- add tests for expr;

											
										
										
											2019-10-29 13:37:26 +08:00
+								            break;
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								        case BinaryOpOperation_POW:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<float, float, BinaryPow<float, float, float>>(input, input1, output);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								            break;
 								        case BinaryOpOperation_SquaredDifference:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<float, float, BinarySquaredDifference<float, float, float>>(input, input1, output);
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								            break;
-												Update

											
										
										
											2019-12-27 22:16:57 +08:00
+								        case BinaryOpOperation_ATAN2:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<float, float, BinaryAtan2<float, float, float>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_NOTEQUAL:
-												Fix cpu binary.

											
										
										
											2020-05-15 20:32:30 +08:00
+								            _binaryOp<float, int32_t, BinaryNotEqual<float, float, int32_t>>(input, input1, output);
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            break;
 								        case BinaryOpOperation_MOD:
 								            _binaryOp<float, float, BinaryMod<float, float, float>>(input, input1, output);
 								            break;
 								        default:
 								            MNN_ASSERT(false);
 								            break;
 								    }
 								    return NO_ERROR;
 								}
 								ErrorCode CPUBinaryInt::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
 								    auto input  = inputs[0];
 								    auto input1 = inputs[1];
 								    auto output = outputs[0];
 								    switch (mType) {
 								        case BinaryOpOperation_MUL:
 								            _binaryOp<int32_t, int32_t, BinaryMul<int32_t, int32_t, int32_t>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_ADD:
 								            _binaryOp<int32_t, int32_t, BinaryAdd<int32_t, int32_t, int32_t>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_SUB:
 								            _binaryOp<int32_t, int32_t, BinarySub<int32_t, int32_t, int32_t>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_REALDIV:
 								            _binaryOp<int32_t, int32_t, BinaryRealDiv<int32_t, int32_t, int32_t>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_MINIMUM:
 								            _binaryOp<int32_t, int32_t, BinaryMin<int32_t, int32_t, int32_t>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_MAXIMUM:
 								            _binaryOp<int32_t, int32_t, BinaryMax<int32_t, int32_t, int32_t>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_GREATER:
 								            _binaryOp<int32_t, int32_t, BinaryGreater<int32_t, int32_t, int32_t>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_LESS:
 								            _binaryOp<int32_t, int32_t, BinaryLess<int32_t, int32_t, int32_t>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_LESS_EQUAL:
 								            _binaryOp<int32_t, int32_t, BinaryLessEqual<int32_t, int32_t, int32_t>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_GREATER_EQUAL:
 								            _binaryOp<int32_t, int32_t, BinaryGreaterEqual<int32_t, int32_t, int32_t>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_EQUAL:
 								            _binaryOp<int32_t, int32_t, BinaryEqual<int32_t, int32_t, int32_t>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_FLOORDIV:
 								            _binaryOp<int32_t, int32_t, BinaryFloorDiv<int32_t, int32_t, int32_t>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_FLOORMOD:
 								            _binaryOp<int32_t, int32_t, BinaryFloorMod<int32_t, int32_t, int32_t>>(input, input1, output);
 								            break;
 								        case BinaryOpOperation_SquaredDifference:
 								            _binaryOp<int32_t, int32_t, BinarySquaredDifference<int32_t, int32_t, int32_t>>(input, input1, output);
-												Update

											
										
										
											2019-12-27 22:16:57 +08:00
+								            break;
 								        case BinaryOpOperation_LOGICALOR:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<int32_t, int32_t, BinaryLogicalOr<int32_t, int32_t, int32_t>>(input, input1, output);
-												Update

											
										
										
											2019-12-27 22:16:57 +08:00
+								            break;
 								        case BinaryOpOperation_NOTEQUAL:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<int32_t, int32_t, BinaryNotEqual<int32_t, int32_t, int32_t>>(input, input1, output);
-												Update

											
										
										
											2019-12-27 22:16:57 +08:00
+								            break;
 								        case BinaryOpOperation_MOD:
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								            _binaryOp<int32_t, int32_t, BinaryMod<int32_t, int32_t, int32_t>>(input, input1, output);
-												Update

											
										
										
											2019-12-27 22:16:57 +08:00
+								            break;
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								        default:
 								            MNN_ASSERT(false);
 								            break;
 								    }
 								    return NO_ERROR;
 								}
 								class CPUBinaryCreator : public CPUBackend::Creator {
 								public:
 								    virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
 								                                const MNN::Op* op, Backend* backend) const override {
-												Fix cpu binary.

											
										
										
											2020-05-15 20:32:30 +08:00
+								        // auto dataType   = outputs[0]->getType();
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								        int32_t type = op->main_as_BinaryOp()->opType();
-												Fix cpu binary.

											
										
										
											2020-05-15 20:32:30 +08:00
+								        // auto dataType = op->main_as_BinaryOp()->T();
 								        auto dataType = inputs[0]->getType();
-												beta 0.2.0.0
- replace FreeImage with stb_image
- warn unicode error in Windows compiling
- separate clang/gcc build script for android
- add default values in fbs
- optimize CPU conv / conv depthwise / deconv / deconv depthwise / lstm / sigmoid
- add sub support in eltwise
- add reciprocal / log1p / log in unary
- add zero like / select / set diff 1d
- add batch support for permute
- add training codes
- fix metal error in dynamic separate storage type handling

											
										
										
											2019-06-17 20:10:35 +08:00
+								        if (dataType.bits == 32) {
 								            if (dataType.code == halide_type_int) {
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								                return new CPUBinaryInt(backend, type);
-												Fix cpu binary.

											
										
										
											2020-05-15 20:32:30 +08:00
+								            } else if (dataType.code == halide_type_float) {
-												Update

											
										
										
											2020-02-26 09:57:17 +08:00
+								                return new CPUBinaryFloat(backend, type);
-												beta 0.2.0.0
- replace FreeImage with stb_image
- warn unicode error in Windows compiling
- separate clang/gcc build script for android
- add default values in fbs
- optimize CPU conv / conv depthwise / deconv / deconv depthwise / lstm / sigmoid
- add sub support in eltwise
- add reciprocal / log1p / log in unary
- add zero like / select / set diff 1d
- add batch support for permute
- add training codes
- fix metal error in dynamic separate storage type handling

											
										
										
											2019-06-17 20:10:35 +08:00
+								            }
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								        }
-												Fix cpu binary.

											
										
										
											2020-05-15 20:32:30 +08:00
+								        MNN_ERROR("CpuBinary: unsupported data type (bits: %d, code: %d)\n",
 								                  dataType.bits, dataType.code);
-												beta 0.2.0.0
- replace FreeImage with stb_image
- warn unicode error in Windows compiling
- separate clang/gcc build script for android
- add default values in fbs
- optimize CPU conv / conv depthwise / deconv / deconv depthwise / lstm / sigmoid
- add sub support in eltwise
- add reciprocal / log1p / log in unary
- add zero like / select / set diff 1d
- add batch support for permute
- add training codes
- fix metal error in dynamic separate storage type handling

											
										
										
											2019-06-17 20:10:35 +08:00
+								        return nullptr;
-												beta 0.1.0

											
										
										
											2019-04-17 10:49:11 +08:00
+								    }
 								};
 								REGISTER_CPU_OP_CREATOR(CPUBinaryCreator, OpType_BinaryOp);
 								} // namespace MNN