MNN/source/backend/cpu/CPUBinary.cpp

605 lines
23 KiB
C++
Raw Normal View History

2019-04-17 10:49:11 +08:00
//
// CPUBinary.cpp
// MNN
//
// Created by MNN on 2018/08/02.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include "CPUBinary.hpp"
2019-04-17 10:49:11 +08:00
#include <math.h>
#include <algorithm>
#include "CPUBackend.hpp"
#include "compute/CommonOptFunction.h"
#include "compute/ConvOpt.h"
2019-12-27 22:16:57 +08:00
#include "core/Macro.h"
#include "core/Concurrency.h"
#include "core/OpCommonUtils.hpp"
2019-04-17 10:49:11 +08:00
namespace MNN {
2020-02-26 09:57:17 +08:00
#define MAX_DIM 6
CPUBinaryInt::CPUBinaryInt(Backend* b, int32_t type) : MNN::Execution(b), mType(type) {
// nothing to do
}
CPUBinaryFloat::CPUBinaryFloat(Backend* b, int32_t type) : MNN::Execution(b), mType(type) {
2019-04-17 10:49:11 +08:00
// nothing to do
}
2020-02-26 09:57:17 +08:00
ErrorCode CPUBinaryFloat::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
2019-04-17 10:49:11 +08:00
MNN_ASSERT(1 == outputs.size());
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
const int input0DataCount = inputs[0]->elementSize();
const int input1DataCount = inputs[1]->elementSize();
2020-02-26 09:57:17 +08:00
const int outputDataCount = outputs[0]->elementSize();
int maxCount = input0DataCount > input1DataCount ? input0DataCount : input1DataCount;
mElementProc = nullptr;
mSupportScale = false;
2020-02-26 09:57:17 +08:00
if (outputs[0]->getType().code != halide_type_float || maxCount < 4 || (outputDataCount > input0DataCount && outputDataCount > input1DataCount)) {
// Can't optimize
return NO_ERROR;
}
auto eleProc = mElementProc;// Set nullptr for begin
switch (mType) {
case BinaryOpOperation_MUL:
eleProc = MNNMatrixProdCommon;
break;
case BinaryOpOperation_ADD:
eleProc = MNNMatrixAddCommon;
break;
case BinaryOpOperation_MAXIMUM:
eleProc = MNNMatrixMaxCommon;
break;
case BinaryOpOperation_SUB:
eleProc = MNNMatrixSubCommon;
break;
default:
break;
}
if (input1DataCount == input0DataCount) {
mOutside = 1;
mInside = input0DataCount;
mElementProc = eleProc;
return NO_ERROR;
}
if (input1DataCount == 1 || input0DataCount == 1) {
mAxis = 1;
mOutside = 1;
switch (mType) {
case BinaryOpOperation_MUL:
case BinaryOpOperation_ADD:
case BinaryOpOperation_SUB:
mSupportScale = true;
break;
default:
break;
}
return NO_ERROR;
}
if (nullptr == eleProc) {
return NO_ERROR;
}
// For AddBias / Mul Sqrt
int dims[MAX_DIM];
int stride[MAX_DIM];
int iStride0[MAX_DIM];
int iStride1[MAX_DIM];
const Tensor* input0 = inputs[0];
const Tensor* input1 = inputs[1];
const Tensor* output = outputs[0];
if (input0DataCount < input1DataCount) {
input0 = inputs[1];
input1 = inputs[0];
}
OpCommonUtils::broastCastComputeDim(dims, stride, iStride0, iStride1, input0, input1, output);
2020-02-26 09:57:17 +08:00
int breakPos = -1;
for (int i=0; i<MAX_DIM; ++i) {
if (iStride1[i] > 0) {
if (breakPos >= 0) {
// Failed to optmize
return NO_ERROR;
}
2020-02-26 09:57:17 +08:00
breakPos = i;
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
}
}
2020-02-26 09:57:17 +08:00
MNN_ASSERT(breakPos >= 0);
//FUNC_PRINT(breakPos);
mOutside = 1;
mInside = 1;
for (int i=0; i<breakPos; ++i) {
mOutside *= dims[i];
}
mAxis = dims[breakPos];
for (int i=breakPos+1; i<MAX_DIM; ++i) {
mInside *= dims[i];
}
// Serveral Machine need memory 4 * sizeof(float) align
if (1 == mInside && mAxis >= 4) {
mElementProc = eleProc;
//MNN_PRINT("Open Optimize\n");
} else if (BinaryOpOperation_MAXIMUM != mType && mInside >= 4) {
mSupportScale = true;
}
//MNN_PRINT("%d, %d, %d\n", mInside, mAxis, mOutside);
2019-04-17 10:49:11 +08:00
return NO_ERROR;
}
template <typename Tin, typename Tout, typename Func>
static ErrorCode _binaryOp(Tensor* input0, Tensor* input1, Tensor* output) {
Func f;
const int input0DataCount = input0->elementSize();
const int input1DataCount = input1->elementSize();
2019-04-17 10:49:11 +08:00
const Tin* input0Data = input0->host<Tin>();
const Tin* input1Data = input1->host<Tin>();
Tout* outputData = output->host<Tout>();
if (input0DataCount == 1) { // data count == 1, not only mean scalar input, maybe of shape (1, 1, 1, ...,1)
for (int i = 0; i < input1DataCount; i++) {
outputData[i] = static_cast<Tout>(f(input0Data[0], input1Data[i]));
}
} else if (input1DataCount == 1) {
for (int i = 0; i < input0DataCount; i++) {
outputData[i] = static_cast<Tout>(f(input0Data[i], input1Data[0]));
}
} else { // both input contains more than one elementwhich means no scalar input
2020-02-26 09:57:17 +08:00
bool sameShape = true;
{
if (input0->dimensions() == input1->dimensions()) {
for (int i = 0; i < input0->buffer().dimensions; i++) {
if (input0->buffer().dim[i].extent != input1->buffer().dim[i].extent) {
sameShape = false;
break;
}
}
}
else {
sameShape = false;
}
}
2019-04-17 10:49:11 +08:00
if (sameShape) { // two inputs have the same shape, apply element-wise operation
for (int i = 0; i < input0DataCount; i++) {
outputData[i] = static_cast<Tout>(f(input0Data[i], input1Data[i]));
}
} else { // not the same shape, use broadcast
MNN_ASSERT(output->dimensions() <= MAX_DIM);
int dims[MAX_DIM];
int stride[MAX_DIM];
int iStride0[MAX_DIM];
int iStride1[MAX_DIM];
OpCommonUtils::broastCastComputeDim(dims, stride, iStride0, iStride1, input0, input1, output);
2019-04-17 10:49:11 +08:00
for (int w = 0; w < dims[5]; ++w) {
auto ow = outputData + w * stride[5];
auto i0w = input0Data + w * iStride0[5];
auto i1w = input1Data + w * iStride1[5];
#define PTR(x, y, i) \
auto o##x = o##y + x * stride[i]; \
auto i0##x = i0##y + x * iStride0[i]; \
auto i1##x = i1##y + x * iStride1[i]
for (int v = 0; v < dims[4]; ++v) {
PTR(v, w, 4);
for (int u = 0; u < dims[3]; ++u) {
PTR(u, v, 3);
for (int z = 0; z < dims[2]; ++z) {
PTR(z, u, 2);
for (int y = 0; y < dims[1]; ++y) {
PTR(y, z, 1);
for (int x = 0; x < dims[0]; ++x) {
PTR(x, y, 0);
*ox = static_cast<Tout>(f(*i0x, *i1x));
}
}
}
}
}
}
#undef MAX_DIM
#undef PTR
}
// broadcast-capable check is done in compute size
}
return NO_ERROR;
}
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryMax : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return std::max(x, y);
}
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryMin : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return std::min(x, y);
}
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryMul : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return x * y;
}
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryAdd : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return x + y;
}
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinarySub : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return x - y;
}
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryRealDiv : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return x / y;
}
};
2019-12-27 22:16:57 +08:00
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryMod : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return x - x / y;
}
};
2019-04-17 10:49:11 +08:00
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryGreater : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return (_ErrorCode)((x > y) ? 1 : 0);
2019-04-17 10:49:11 +08:00
}
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryLess : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return (_ErrorCode)((x < y) ? 1 : 0);
2019-04-17 10:49:11 +08:00
}
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryGreaterEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return (_ErrorCode)((x >= y) ? 1 : 0);
}
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryLessEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return (_ErrorCode)((x <= y) ? 1 : 0);
}
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return (_ErrorCode)((x == y) ? 1 : 0);
2019-04-17 10:49:11 +08:00
}
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryFloorDiv : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return floor(x / y);
}
};
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryFloorMod : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return x - floor(x / y) * y;
}
};
2019-04-17 10:49:11 +08:00
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinarySquaredDifference : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return (x - y) * (x - y);
}
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryPow : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return pow(x, y);
}
};
2019-12-27 22:16:57 +08:00
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryAtan2 : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return atan(x / y);
}
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryLogicalOr : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return (_ErrorCode)((x || y) ? 1 : 0);
}
};
template <typename _Arg1, typename _Arg2, typename _ErrorCode>
struct BinaryNotEqual : std::binary_function<_Arg1, _Arg2, _ErrorCode> {
_ErrorCode operator()(const _Arg1& x, const _Arg2& y) const {
return (_ErrorCode)((x != y) ? 1 : 0);
}
};
2020-05-28 14:54:41 +08:00
static void callEleFunc(void(*proc)(float* C, const float* A, const float* B, size_t width, size_t cStride, size_t aStride, size_t bStride, size_t height),
float* C, const float* A, const float* B, size_t size, bool swap) {
if (swap) {
proc(C, B, A, size, 0, 0, 0, 1);
} else {
proc(C, A, B, size, 0, 0, 0, 1);
}
}
2020-02-26 09:57:17 +08:00
ErrorCode CPUBinaryFloat::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
2019-04-17 10:49:11 +08:00
auto input = inputs[0];
auto input1 = inputs[1];
auto output = outputs[0];
if (nullptr != mElementProc || mSupportScale) {
auto numberThread = ((CPUBackend*)backend())->threadNumber();
auto i1Size = input->elementSize();
auto i2Size = input1->elementSize();
2020-02-26 09:57:17 +08:00
bool swap = false;
if (i1Size < i2Size) {
auto temp = i2Size;
i2Size = i1Size;
i1Size = temp;
input = inputs[1];
input1 = inputs[0];
swap = true;
}
2020-02-26 09:57:17 +08:00
auto size = i1Size;
auto schedule = ((CPUBackend*)backend())->multiThreadDivide(size);
int sizeDivide = schedule.first;
int scheduleNumber = schedule.second;
if (nullptr != mElementProc) {
2020-02-26 09:57:17 +08:00
if (mOutside == 1) {
MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
int start = sizeDivide * (int)tId;
int realSize = sizeDivide;
if (tId == scheduleNumber -1 ) {
realSize = size - start;
}
if (realSize > 0) {
mElementProc(output->host<float>() + start, input->host<float>() + start, input1->host<float>() + start, realSize, 0, 0, 0, 1);
}
}
2020-02-26 09:57:17 +08:00
MNN_CONCURRENCY_END();
} else {
MNN_CONCURRENCY_BEGIN(tId, numberThread) {
for (int y = tId; y < mOutside; y+=numberThread) {
2020-05-28 14:54:41 +08:00
callEleFunc(mElementProc, output->host<float>() + y * mAxis, input->host<float>() + y * mAxis, input1->host<float>(), mAxis, swap);
2020-02-26 09:57:17 +08:00
}
}
2020-02-26 09:57:17 +08:00
MNN_CONCURRENCY_END();
}
} else {
2020-02-26 09:57:17 +08:00
if (mOutside == 1 && mAxis == 1) {
float* inputPtr = input->host<float>();
float scalar = input1->host<float>()[0];
float scale = scalar;
float bias = 0.0f;
switch (mType) {
case BinaryOpOperation_ADD:
scale = 1.0f;
bias = scalar;
2020-02-26 09:57:17 +08:00
break;
case BinaryOpOperation_SUB:
if (!swap) {
scale = 1.0f;
bias = -scalar;
} else {
scale = -1.0f;
bias = scalar;
}
break;
default:
break;
}
2020-02-26 09:57:17 +08:00
MNN_CONCURRENCY_BEGIN(tId, scheduleNumber) {
int start = sizeDivide * (int)tId;
int realSize = sizeDivide;
if (tId == scheduleNumber -1 ) {
realSize = size - start;
}
if (realSize > 0) {
MNNScaleAndAddBiasScalar(output->host<float>() + start, inputPtr + start, bias, scale, realSize);
}
}
2020-02-26 09:57:17 +08:00
MNN_CONCURRENCY_END();
} else {
float* inputPtr = input->host<float>();
float* input1Ptr = input1->host<float>();
auto total = mOutside * mAxis;
MNN_CONCURRENCY_BEGIN(tId, numberThread) {
2020-11-05 16:41:56 +08:00
for (int index = (int)tId; index < total; index += numberThread) {
2020-02-26 09:57:17 +08:00
auto axis = index % mAxis;
float scalar = input1Ptr[axis];
float scale = scalar;
float bias = 0.0f;
switch (mType) {
case BinaryOpOperation_ADD:
scale = 1.0f;
bias = scalar;
break;
case BinaryOpOperation_SUB:
if (!swap) {
scale = 1.0f;
bias = -scalar;
} else {
scale = -1.0f;
bias = scalar;
}
break;
default:
break;
}
MNNScaleAndAddBiasScalar(output->host<float>() + mInside * index, inputPtr + mInside * index, bias, scale, mInside);
}
}
2020-02-26 09:57:17 +08:00
MNN_CONCURRENCY_END();
}
2020-02-26 09:57:17 +08:00
}
return NO_ERROR;
}
2019-04-17 10:49:11 +08:00
switch (mType) {
case BinaryOpOperation_MUL:
2020-02-26 09:57:17 +08:00
_binaryOp<float, float, BinaryMul<float, float, float>>(input, input1, output);
2019-04-17 10:49:11 +08:00
break;
case BinaryOpOperation_ADD:
2020-02-26 09:57:17 +08:00
_binaryOp<float, float, BinaryAdd<float, float, float>>(input, input1, output);
2019-04-17 10:49:11 +08:00
break;
case BinaryOpOperation_SUB:
2020-02-26 09:57:17 +08:00
_binaryOp<float, float, BinarySub<float, float, float>>(input, input1, output);
2019-04-17 10:49:11 +08:00
break;
case BinaryOpOperation_REALDIV:
2020-02-26 09:57:17 +08:00
_binaryOp<float, float, BinaryRealDiv<float, float, float>>(input, input1, output);
2019-04-17 10:49:11 +08:00
break;
case BinaryOpOperation_MINIMUM:
2020-02-26 09:57:17 +08:00
_binaryOp<float, float, BinaryMin<float, float, float>>(input, input1, output);
2019-04-17 10:49:11 +08:00
break;
case BinaryOpOperation_MAXIMUM:
2020-02-26 09:57:17 +08:00
_binaryOp<float, float, BinaryMax<float, float, float>>(input, input1, output);
2019-04-17 10:49:11 +08:00
break;
case BinaryOpOperation_GREATER:
2020-02-26 09:57:17 +08:00
_binaryOp<float, int32_t, BinaryGreater<float, float, int32_t>>(input, input1, output);
2019-04-17 10:49:11 +08:00
break;
case BinaryOpOperation_LESS:
2020-05-15 20:32:30 +08:00
_binaryOp<float, int32_t, BinaryLess<float, float, int32_t>>(input, input1, output);
2019-04-17 10:49:11 +08:00
break;
case BinaryOpOperation_LESS_EQUAL:
2020-05-15 20:32:30 +08:00
_binaryOp<float, int32_t, BinaryLessEqual<float, float, int32_t>>(input, input1, output);
break;
2019-04-17 10:49:11 +08:00
case BinaryOpOperation_GREATER_EQUAL:
2020-05-15 20:32:30 +08:00
_binaryOp<float, int32_t, BinaryGreaterEqual<float, float, int32_t>>(input, input1, output);
2019-04-17 10:49:11 +08:00
break;
case BinaryOpOperation_EQUAL:
2020-05-15 20:32:30 +08:00
_binaryOp<float, int32_t, BinaryEqual<float, float, int32_t>>(input, input1, output);
break;
2019-04-17 10:49:11 +08:00
case BinaryOpOperation_FLOORDIV:
2020-02-26 09:57:17 +08:00
_binaryOp<float, float, BinaryFloorDiv<float, float, float>>(input, input1, output);
2019-04-17 10:49:11 +08:00
break;
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
case BinaryOpOperation_FLOORMOD:
2020-02-26 09:57:17 +08:00
_binaryOp<float, float, BinaryFloorMod<float, float, float>>(input, input1, output);
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
break;
2019-04-17 10:49:11 +08:00
case BinaryOpOperation_POW:
2020-02-26 09:57:17 +08:00
_binaryOp<float, float, BinaryPow<float, float, float>>(input, input1, output);
2019-04-17 10:49:11 +08:00
break;
case BinaryOpOperation_SquaredDifference:
2020-02-26 09:57:17 +08:00
_binaryOp<float, float, BinarySquaredDifference<float, float, float>>(input, input1, output);
2019-04-17 10:49:11 +08:00
break;
2019-12-27 22:16:57 +08:00
case BinaryOpOperation_ATAN2:
2020-02-26 09:57:17 +08:00
_binaryOp<float, float, BinaryAtan2<float, float, float>>(input, input1, output);
break;
case BinaryOpOperation_NOTEQUAL:
2020-05-15 20:32:30 +08:00
_binaryOp<float, int32_t, BinaryNotEqual<float, float, int32_t>>(input, input1, output);
2020-02-26 09:57:17 +08:00
break;
case BinaryOpOperation_MOD:
_binaryOp<float, float, BinaryMod<float, float, float>>(input, input1, output);
break;
default:
MNN_ASSERT(false);
break;
}
return NO_ERROR;
}
ErrorCode CPUBinaryInt::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
auto input = inputs[0];
auto input1 = inputs[1];
auto output = outputs[0];
switch (mType) {
case BinaryOpOperation_MUL:
_binaryOp<int32_t, int32_t, BinaryMul<int32_t, int32_t, int32_t>>(input, input1, output);
break;
case BinaryOpOperation_ADD:
_binaryOp<int32_t, int32_t, BinaryAdd<int32_t, int32_t, int32_t>>(input, input1, output);
break;
case BinaryOpOperation_SUB:
_binaryOp<int32_t, int32_t, BinarySub<int32_t, int32_t, int32_t>>(input, input1, output);
break;
case BinaryOpOperation_REALDIV:
_binaryOp<int32_t, int32_t, BinaryRealDiv<int32_t, int32_t, int32_t>>(input, input1, output);
break;
case BinaryOpOperation_MINIMUM:
_binaryOp<int32_t, int32_t, BinaryMin<int32_t, int32_t, int32_t>>(input, input1, output);
break;
case BinaryOpOperation_MAXIMUM:
_binaryOp<int32_t, int32_t, BinaryMax<int32_t, int32_t, int32_t>>(input, input1, output);
break;
case BinaryOpOperation_GREATER:
_binaryOp<int32_t, int32_t, BinaryGreater<int32_t, int32_t, int32_t>>(input, input1, output);
break;
case BinaryOpOperation_LESS:
_binaryOp<int32_t, int32_t, BinaryLess<int32_t, int32_t, int32_t>>(input, input1, output);
break;
case BinaryOpOperation_LESS_EQUAL:
_binaryOp<int32_t, int32_t, BinaryLessEqual<int32_t, int32_t, int32_t>>(input, input1, output);
break;
case BinaryOpOperation_GREATER_EQUAL:
_binaryOp<int32_t, int32_t, BinaryGreaterEqual<int32_t, int32_t, int32_t>>(input, input1, output);
break;
case BinaryOpOperation_EQUAL:
_binaryOp<int32_t, int32_t, BinaryEqual<int32_t, int32_t, int32_t>>(input, input1, output);
break;
case BinaryOpOperation_FLOORDIV:
_binaryOp<int32_t, int32_t, BinaryFloorDiv<int32_t, int32_t, int32_t>>(input, input1, output);
break;
case BinaryOpOperation_FLOORMOD:
_binaryOp<int32_t, int32_t, BinaryFloorMod<int32_t, int32_t, int32_t>>(input, input1, output);
break;
case BinaryOpOperation_SquaredDifference:
_binaryOp<int32_t, int32_t, BinarySquaredDifference<int32_t, int32_t, int32_t>>(input, input1, output);
2019-12-27 22:16:57 +08:00
break;
case BinaryOpOperation_LOGICALOR:
2020-02-26 09:57:17 +08:00
_binaryOp<int32_t, int32_t, BinaryLogicalOr<int32_t, int32_t, int32_t>>(input, input1, output);
2019-12-27 22:16:57 +08:00
break;
case BinaryOpOperation_NOTEQUAL:
2020-02-26 09:57:17 +08:00
_binaryOp<int32_t, int32_t, BinaryNotEqual<int32_t, int32_t, int32_t>>(input, input1, output);
2019-12-27 22:16:57 +08:00
break;
case BinaryOpOperation_MOD:
2020-02-26 09:57:17 +08:00
_binaryOp<int32_t, int32_t, BinaryMod<int32_t, int32_t, int32_t>>(input, input1, output);
2019-12-27 22:16:57 +08:00
break;
2019-04-17 10:49:11 +08:00
default:
MNN_ASSERT(false);
break;
}
return NO_ERROR;
}
class CPUBinaryCreator : public CPUBackend::Creator {
public:
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
const MNN::Op* op, Backend* backend) const override {
2020-05-15 20:32:30 +08:00
// auto dataType = outputs[0]->getType();
2019-04-17 10:49:11 +08:00
int32_t type = op->main_as_BinaryOp()->opType();
2020-05-15 20:32:30 +08:00
// auto dataType = op->main_as_BinaryOp()->T();
auto dataType = inputs[0]->getType();
if (dataType.bits == 32) {
if (dataType.code == halide_type_int) {
2020-02-26 09:57:17 +08:00
return new CPUBinaryInt(backend, type);
2020-05-15 20:32:30 +08:00
} else if (dataType.code == halide_type_float) {
2020-02-26 09:57:17 +08:00
return new CPUBinaryFloat(backend, type);
}
2019-04-17 10:49:11 +08:00
}
2020-05-15 20:32:30 +08:00
MNN_ERROR("CpuBinary: unsupported data type (bits: %d, code: %d)\n",
dataType.bits, dataType.code);
return nullptr;
2019-04-17 10:49:11 +08:00
}
};
REGISTER_CPU_OP_CREATOR(CPUBinaryCreator, OpType_BinaryOp);
} // namespace MNN