2019-04-17 10:49:11 +08:00
|
|
|
//
|
|
|
|
// EltwiseExecution.cpp
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on 2019/02/28.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
2021-03-12 18:41:50 +08:00
|
|
|
#include "backend/opencl/execution/image/EltwiseExecution.hpp"
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "core/Macro.h"
|
2019-04-17 10:49:11 +08:00
|
|
|
#include <string.h>
|
2020-07-23 10:35:12 +08:00
|
|
|
#include <string>
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "core/TensorUtils.hpp"
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2020-07-23 10:35:12 +08:00
|
|
|
using std::string;
|
2019-04-17 10:49:11 +08:00
|
|
|
namespace MNN {
|
|
|
|
namespace OpenCL {
|
|
|
|
|
2020-07-23 10:35:12 +08:00
|
|
|
static string swapComputeIn0In1(const string& computeOrigin) {
|
|
|
|
string compute = computeOrigin;
|
|
|
|
for (int i = 2; i < compute.length(); ++i) {
|
|
|
|
if (compute.substr(i - 2, 2) == "in") {
|
|
|
|
compute[i] = (compute[i] == '0' ? '1' : '0');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return compute;
|
|
|
|
}
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
EltwiseExecution::EltwiseExecution(const std::vector<Tensor *> &inputs, const std::string &compute, const MNN::Op *op, Backend *backend,
|
2020-07-23 10:35:12 +08:00
|
|
|
float operatorData, bool broadCast)
|
|
|
|
: CommonExecution(backend), mCompute(compute), mBroadCast(broadCast), mOperatorData(operatorData) {
|
2019-04-17 10:49:11 +08:00
|
|
|
mBuildOptions.emplace("-DOPERATOR=" + compute);
|
2020-11-05 16:41:56 +08:00
|
|
|
mOp = op;
|
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
|
2021-04-28 18:02:10 +08:00
|
|
|
uint32_t EltwiseExecution::realSize(const Tensor* tensor) {
|
|
|
|
uint32_t num = 1;
|
|
|
|
for(int i = 0; i < tensor->dimensions(); i++) {
|
|
|
|
num *= tensor->length(i);
|
|
|
|
}
|
|
|
|
return num;
|
|
|
|
}
|
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
ErrorCode EltwiseExecution::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
|
|
|
MNN_ASSERT(inputs.size() >= 2);
|
|
|
|
mUnits.resize(inputs.size() - 1);
|
2019-12-27 22:16:57 +08:00
|
|
|
|
|
|
|
auto openCLBackend = static_cast<OpenCLBackend*>(backend());
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2021-04-28 18:02:10 +08:00
|
|
|
auto output = outputs[0];
|
|
|
|
auto inputShape0 = tensorShapeFormat(inputs[0]);
|
|
|
|
auto inputShape1 = tensorShapeFormat(inputs[1]);
|
|
|
|
auto outputShape = tensorShapeFormat(output);
|
|
|
|
auto runTime = ((OpenCLBackend *)backend())->getOpenCLRuntime();
|
|
|
|
int shape[4] = {outputShape[0], outputShape[1], outputShape[2], UP_DIV(outputShape[3], 4)};
|
|
|
|
int fullCount[2] = {1, 1};
|
|
|
|
|
|
|
|
auto &unit = mUnits[0];
|
|
|
|
unit.kernel = runTime->buildKernel("binary", "binary", mBuildOptions);
|
|
|
|
mMaxWorkGroupSize = static_cast<uint32_t>(runTime->getMaxWorkGroupSize(unit.kernel));
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
|
2021-04-28 18:02:10 +08:00
|
|
|
mGlobalWorkSize = {(uint32_t)UP_DIV(outputShape[3], 4)*outputShape[2],
|
|
|
|
(uint32_t)outputShape[0] * outputShape[1]};
|
|
|
|
|
|
|
|
if(inputs.size() == 2) {
|
|
|
|
fullCount[0] = realSize(inputs[0]) == 1 ? 0 : 1;
|
|
|
|
fullCount[1] = realSize(inputs[1]) == 1 ? 0 : 1;
|
|
|
|
|
|
|
|
uint32_t index = 0;
|
|
|
|
unit.kernel.setArg(index++, mGlobalWorkSize[0]);
|
|
|
|
unit.kernel.setArg(index++, mGlobalWorkSize[1]);
|
|
|
|
unit.kernel.setArg(index++, openCLImage(inputs[0]));
|
|
|
|
unit.kernel.setArg(index++, openCLImage(inputs[1]));
|
|
|
|
unit.kernel.setArg(index++, openCLImage(output));
|
|
|
|
unit.kernel.setArg(index++, shape);
|
|
|
|
unit.kernel.setArg(index++, fullCount);
|
|
|
|
|
|
|
|
std::string name = "binary";
|
|
|
|
mLocalWorkSize = localWS2DDefault(mGlobalWorkSize, mMaxWorkGroupSize, openCLBackend->getOpenCLRuntime(), name, unit.kernel).first;
|
|
|
|
|
|
|
|
unit.globalWorkSize = {mGlobalWorkSize[0], mGlobalWorkSize[1]};
|
|
|
|
unit.localWorkSize = {mLocalWorkSize[0], mLocalWorkSize[1]};
|
|
|
|
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
if (inputs.size() > 2) {
|
|
|
|
auto output = outputs[0];
|
|
|
|
mTempOutput.reset(Tensor::createDevice(output->shape(), output->getType(), output->getDimensionType()));
|
|
|
|
bool res = openCLBackend->onAcquireBuffer(mTempOutput.get(), Backend::DYNAMIC);
|
|
|
|
if (!res) {
|
|
|
|
return OUT_OF_MEMORY;
|
|
|
|
}
|
|
|
|
openCLBackend->onReleaseBuffer(mTempOutput.get(), Backend::DYNAMIC);
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
bool useTempAsOutput = (inputs.size() % 2 != 0);
|
2021-04-28 18:02:10 +08:00
|
|
|
fullCount[1] = 1;
|
|
|
|
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
for (int i = 0; i < inputs.size(); ++i) {
|
|
|
|
if (i == 1)
|
|
|
|
continue;
|
2019-07-25 13:36:35 +08:00
|
|
|
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
auto &unit = (i >= 2) ? mUnits[i - 1] : mUnits[i];
|
2021-04-28 18:02:10 +08:00
|
|
|
unit.kernel = runTime->buildKernel("binary", "binary", mBuildOptions);
|
|
|
|
|
|
|
|
auto input0 = inputs[0];
|
|
|
|
fullCount[0] = realSize(input0) == 1 ? 0 : 1;
|
2020-11-05 16:41:56 +08:00
|
|
|
if (i >= 2) {
|
|
|
|
input0 = useTempAsOutput ? outputs[0] : mTempOutput.get();
|
2021-04-28 18:02:10 +08:00
|
|
|
fullCount[0] = 1;
|
2020-11-05 16:41:56 +08:00
|
|
|
}
|
2021-04-28 18:02:10 +08:00
|
|
|
|
|
|
|
auto input1 = (i >= 2) ? inputs[i] : inputs[i + 1];
|
|
|
|
fullCount[1] = realSize(input1) == 1 ? 0 : 1;
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
auto output = useTempAsOutput ? mTempOutput.get() : outputs[0];
|
|
|
|
useTempAsOutput = !useTempAsOutput;
|
|
|
|
|
2021-04-28 18:02:10 +08:00
|
|
|
uint32_t index = 0;
|
|
|
|
unit.kernel.setArg(index++, mGlobalWorkSize[0]);
|
|
|
|
unit.kernel.setArg(index++, mGlobalWorkSize[1]);
|
|
|
|
unit.kernel.setArg(index++, openCLImage(input0));
|
|
|
|
unit.kernel.setArg(index++, openCLImage(input1));
|
|
|
|
unit.kernel.setArg(index++, openCLImage(output));
|
|
|
|
unit.kernel.setArg(index++, shape);
|
|
|
|
unit.kernel.setArg(index++, fullCount);
|
|
|
|
|
|
|
|
if(i == 0) {
|
|
|
|
std::string name = "binary";
|
|
|
|
mLocalWorkSize = localWS2DDefault(mGlobalWorkSize, mMaxWorkGroupSize, openCLBackend->getOpenCLRuntime(), name, unit.kernel).first;
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
}
|
2021-04-28 18:02:10 +08:00
|
|
|
|
|
|
|
unit.globalWorkSize = {mGlobalWorkSize[0], mGlobalWorkSize[1]};
|
|
|
|
unit.localWorkSize = {mLocalWorkSize[0], mLocalWorkSize[1]};
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
class EltwiseCreator : public OpenCLBackend::Creator {
|
|
|
|
public:
|
|
|
|
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
|
|
|
|
const MNN::Op *op, Backend *backend) const override {
|
|
|
|
if (op->type() == OpType_Eltwise) {
|
|
|
|
switch (op->main_as_Eltwise()->type()) {
|
|
|
|
case EltwiseType_SUM:
|
2020-11-05 16:41:56 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0+in1", op, backend);
|
2021-03-14 19:16:39 +08:00
|
|
|
case EltwiseType_SUB:
|
|
|
|
return new EltwiseExecution(inputs, "in0-in1", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
case EltwiseType_PROD:
|
2020-11-05 16:41:56 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0*in1", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
case EltwiseType_MAXIMUM:
|
2021-03-12 18:41:50 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0>in1?in0:in1", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
if (op->type() == OpType_BinaryOp) {
|
|
|
|
MNN_ASSERT(inputs.size() > 1);
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
switch (op->main_as_BinaryOp()->opType()) {
|
2021-04-08 15:34:23 +08:00
|
|
|
case BinaryOpOperation_MUL:
|
|
|
|
return new EltwiseExecution(inputs, "in0*in1", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
case BinaryOpOperation_ADD:
|
2020-11-05 16:41:56 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0+in1", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
case BinaryOpOperation_SUB:
|
2020-11-05 16:41:56 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0-in1", op, backend);
|
2021-04-08 15:34:23 +08:00
|
|
|
case BinaryOpOperation_REALDIV:
|
2021-03-12 18:41:50 +08:00
|
|
|
return new EltwiseExecution(inputs, "sign(in1)*in0/(fabs(in1)>(FLOAT4)((FLOAT)0.0000001)?fabs(in1):(FLOAT4)((FLOAT)0.0000001))", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
case BinaryOpOperation_MINIMUM:
|
2021-03-12 18:41:50 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0>in1?in1:in0", op, backend);
|
2021-04-08 15:34:23 +08:00
|
|
|
case BinaryOpOperation_MAXIMUM:
|
|
|
|
return new EltwiseExecution(inputs, "in0>in1?in0:in1", op, backend);
|
|
|
|
case BinaryOpOperation_GREATER:
|
|
|
|
return new EltwiseExecution(inputs, "convert_float4(isgreater(in0,in1))", op, backend);
|
|
|
|
case BinaryOpOperation_LESS:
|
|
|
|
return new EltwiseExecution(inputs, "convert_float4(isless(in0,in1))", op, backend);
|
|
|
|
case BinaryOpOperation_LESS_EQUAL:
|
|
|
|
return new EltwiseExecution(inputs, "convert_float4(islessequal(in0,in1))", op, backend);
|
|
|
|
case BinaryOpOperation_GREATER_EQUAL:
|
|
|
|
return new EltwiseExecution(inputs, "convert_float4(isgreaterequal(in0,in1))", op, backend);
|
|
|
|
case BinaryOpOperation_EQUAL:
|
|
|
|
return new EltwiseExecution(inputs, "convert_float4(isequal(in0,in1))", op, backend);
|
|
|
|
case BinaryOpOperation_FLOORDIV:
|
|
|
|
return new EltwiseExecution(inputs, "floor(sign(in1)*in0/(fabs(in1)>(FLOAT4)((FLOAT)0.0000001)?fabs(in1):(FLOAT4)((FLOAT)0.0000001)))", op, backend);
|
|
|
|
case BinaryOpOperation_FLOORMOD:
|
|
|
|
return new EltwiseExecution(inputs, "in0-floor(sign(in1)*in0/(fabs(in1)>(FLOAT4)((FLOAT)0.0000001)?fabs(in1):(FLOAT4)((FLOAT)0.0000001)))*in1", op, backend);
|
|
|
|
case BinaryOpOperation_POW:
|
|
|
|
return new EltwiseExecution(inputs, "pow(in0,in1)", op, backend);
|
|
|
|
case BinaryOpOperation_SquaredDifference:
|
|
|
|
return new EltwiseExecution(inputs, "(in0-in1)*(in0-in1)", op, backend);
|
|
|
|
case BinaryOpOperation_ATAN2:
|
|
|
|
return new EltwiseExecution(inputs, "atan(sign(in1)*in0/(fabs(in1)>(FLOAT4)((FLOAT)0.0000001)?fabs(in1):(FLOAT4)((FLOAT)0.0000001)))", op, backend);
|
|
|
|
case BinaryOpOperation_NOTEQUAL:
|
|
|
|
return new EltwiseExecution(inputs, "convert_float4(isnotequal(in0,in1))", op, backend);
|
|
|
|
case BinaryOpOperation_MOD:
|
|
|
|
return new EltwiseExecution(inputs, "in0-sign(in1)*in0/(fabs(in1)>(FLOAT4)((FLOAT)0.0000001)?fabs(in1):(FLOAT4)((FLOAT)0.0000001))", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-03-12 18:41:50 +08:00
|
|
|
OpenCLCreatorRegister<EltwiseCreator> __eltwise_op(OpType_Eltwise, IMAGE);
|
|
|
|
OpenCLCreatorRegister<EltwiseCreator> __binary_op(OpType_BinaryOp, IMAGE);
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
} // namespace OpenCL
|
|
|
|
} // namespace MNN
|