2019-04-17 10:49:11 +08:00
|
|
|
//
|
|
|
|
// EltwiseExecution.cpp
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on 2019/02/28.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
2021-03-12 18:41:50 +08:00
|
|
|
#include "backend/opencl/execution/image/EltwiseExecution.hpp"
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "core/Macro.h"
|
2019-04-17 10:49:11 +08:00
|
|
|
#include <string.h>
|
2020-07-23 10:35:12 +08:00
|
|
|
#include <string>
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "core/TensorUtils.hpp"
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2020-07-23 10:35:12 +08:00
|
|
|
using std::string;
|
2019-04-17 10:49:11 +08:00
|
|
|
namespace MNN {
|
|
|
|
namespace OpenCL {
|
|
|
|
|
2020-07-23 10:35:12 +08:00
|
|
|
static string swapComputeIn0In1(const string& computeOrigin) {
|
|
|
|
string compute = computeOrigin;
|
|
|
|
for (int i = 2; i < compute.length(); ++i) {
|
|
|
|
if (compute.substr(i - 2, 2) == "in") {
|
|
|
|
compute[i] = (compute[i] == '0' ? '1' : '0');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return compute;
|
|
|
|
}
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
EltwiseExecution::EltwiseExecution(const std::vector<Tensor *> &inputs, const std::string &compute, const MNN::Op *op, Backend *backend,
|
2020-07-23 10:35:12 +08:00
|
|
|
float operatorData, bool broadCast)
|
|
|
|
: CommonExecution(backend), mCompute(compute), mBroadCast(broadCast), mOperatorData(operatorData) {
|
2019-04-17 10:49:11 +08:00
|
|
|
mBuildOptions.emplace("-DOPERATOR=" + compute);
|
2020-11-05 16:41:56 +08:00
|
|
|
mOp = op;
|
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ErrorCode EltwiseExecution::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
|
|
|
MNN_ASSERT(inputs.size() >= 2);
|
|
|
|
mUnits.resize(inputs.size() - 1);
|
2019-12-27 22:16:57 +08:00
|
|
|
|
|
|
|
auto openCLBackend = static_cast<OpenCLBackend*>(backend());
|
2019-04-17 10:49:11 +08:00
|
|
|
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
auto nhwc0 = tensorShapeFormat(inputs[0]);
|
2019-04-17 10:49:11 +08:00
|
|
|
auto nhwc = tensorShapeFormat(outputs[0]);
|
|
|
|
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
int nhwcArray[] = {nhwc[0], nhwc[1], nhwc[2], UP_DIV(nhwc[3], 4)};
|
2019-04-17 10:49:11 +08:00
|
|
|
auto imageWidth = nhwcArray[2] * nhwcArray[3];
|
|
|
|
auto imageHeight = nhwcArray[0] * nhwcArray[1];
|
|
|
|
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
int wh0[] = {nhwc0[2], nhwc0[1]};
|
|
|
|
int wh[] = {nhwc[2], nhwc[1]};
|
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
int input1Stride[] = {1, 1, 1, 1};
|
2021-03-12 18:41:50 +08:00
|
|
|
cl::NDRange localSize = {4, 4};
|
|
|
|
cl::NDRange globalSize = {(uint32_t)UP_DIV(imageWidth, 4) * 4, (uint32_t)UP_DIV(imageHeight, 4) * 4};
|
2020-11-05 16:41:56 +08:00
|
|
|
if (inputs.size() > 2) {
|
|
|
|
auto output = outputs[0];
|
|
|
|
mTempOutput.reset(Tensor::createDevice(output->shape(), output->getType(), output->getDimensionType()));
|
|
|
|
bool res = openCLBackend->onAcquireBuffer(mTempOutput.get(), Backend::DYNAMIC);
|
|
|
|
if (!res) {
|
|
|
|
return OUT_OF_MEMORY;
|
|
|
|
}
|
|
|
|
openCLBackend->onReleaseBuffer(mTempOutput.get(), Backend::DYNAMIC);
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
auto runTime = ((OpenCLBackend *)backend())->getOpenCLRuntime();
|
2020-11-05 16:41:56 +08:00
|
|
|
bool useTempAsOutput = (inputs.size() % 2 != 0);
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
for (int i = 0; i < inputs.size(); ++i) {
|
|
|
|
if (i == 1)
|
|
|
|
continue;
|
2019-07-25 13:36:35 +08:00
|
|
|
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
auto &unit = (i >= 2) ? mUnits[i - 1] : mUnits[i];
|
|
|
|
int dimension = (i >= 2) ? inputs[i]->dimensions() : inputs[i + 1]->dimensions();
|
2020-04-29 12:10:16 +08:00
|
|
|
int nums = 1;
|
|
|
|
const auto& shape = (i >= 2) ? inputs[i]->shape() : inputs[i + 1]->shape();
|
|
|
|
for (auto axis_len:shape) {
|
|
|
|
nums*=axis_len;
|
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
/*
|
|
|
|
DONT REMOVE THIS!!!!!
|
|
|
|
When we do binary operation on many (>= 3) input image2d_t, we need:
|
|
|
|
fun(outputs[0], inputs[i]) -> temp, then fun(temp, inputs[i+1]) -> outputs[0] and so on,
|
|
|
|
instead of fun(outputs[0], inputs[i]) -> outputs[0]
|
|
|
|
|
|
|
|
It's very very important for correctness on many common GPUs (Intel Iris GPU on MacBook Pro 15, for example) on Opencl 1.2.
|
|
|
|
Opencl 1.2 do not guarantee correctness for kernel using same image2d_t as input and output, because Opencl 1.2 specification
|
|
|
|
only support __read_only and __write_only, no include __read_write which is support on Opencl 2.x
|
|
|
|
Your device may support it and get right result if remove this, but it is defined by the specification.
|
|
|
|
If you insist on modifying this, please please contact hebin first. Thank you very much.
|
|
|
|
*/
|
|
|
|
const Tensor* input0 = inputs[0];
|
|
|
|
if (i >= 2) {
|
|
|
|
input0 = useTempAsOutput ? outputs[0] : mTempOutput.get();
|
|
|
|
}
|
|
|
|
auto output = useTempAsOutput ? mTempOutput.get() : outputs[0];
|
|
|
|
useTempAsOutput = !useTempAsOutput;
|
|
|
|
|
2020-04-29 12:10:16 +08:00
|
|
|
if(dimension == 0 || nums == 1) {
|
2019-12-27 22:16:57 +08:00
|
|
|
auto input = (i >= 2) ? inputs[i] : inputs[i + 1];
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
unit.kernel = runTime->buildKernel("binary", "binary_value", mBuildOptions);
|
|
|
|
unit.kernel.setArg(0, openCLImage(input0));
|
2019-12-27 22:16:57 +08:00
|
|
|
unit.kernel.setArg(1, openCLImage(input));
|
|
|
|
unit.kernel.setArg(2, openCLImage(output));
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
unit.kernel.setArg(3, nhwcArray);
|
|
|
|
unit.kernel.setArg(4, wh);
|
|
|
|
unit.kernel.setArg(5, input1Stride);
|
|
|
|
} else {
|
|
|
|
const Tensor* input = (i >= 2) ? inputs[i] : inputs[i + 1];
|
|
|
|
auto nhwc_0 = (i >= 2) ? nhwc : nhwc0;
|
|
|
|
auto wh_v = (i >= 2) ? wh : wh0;
|
|
|
|
int wh_0[] = {wh_v[0], wh_v[1]};
|
|
|
|
auto nhwc_1 = tensorShapeFormat(input);
|
|
|
|
int wh1[] = {nhwc_1[2], nhwc_1[1]};
|
|
|
|
for (int dim = 0; dim < nhwc_0.size(); dim++) {
|
|
|
|
if (nhwc_0[dim] != nhwc_1[dim]) {
|
|
|
|
mBroadCast = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mBroadCast) {
|
|
|
|
if (nhwc_0[3] != nhwc_1[3]) {
|
|
|
|
if (nhwc_0[3] == 1) {
|
|
|
|
unit.kernel = (wh_0[0] != 1 && wh_0[1] != 1) ?
|
|
|
|
runTime->buildKernel("binary",
|
|
|
|
"binary_1toM_channel_broadcast_on_awh", mBuildOptions) :
|
|
|
|
runTime->buildKernel("binary",
|
|
|
|
"binary_1toM_channel_broadcast_on_1wh", mBuildOptions);
|
|
|
|
unit.kernel.setArg(0, openCLImage(input0));
|
|
|
|
unit.kernel.setArg(1, openCLImage(input));
|
|
|
|
unit.kernel.setArg(4, wh_0);
|
|
|
|
unit.kernel.setArg(5, wh1);
|
|
|
|
} else {
|
2020-07-23 10:35:12 +08:00
|
|
|
mBuildOptions.erase("-DOPERATOR=" + mCompute);
|
|
|
|
mBuildOptions.emplace("-DOPERATOR=" + swapComputeIn0In1(mCompute));
|
|
|
|
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
unit.kernel = (wh1[0] != 1 && wh1[1] != 1) ?
|
|
|
|
runTime->buildKernel("binary",
|
|
|
|
"binary_1toM_channel_broadcast_on_awh", mBuildOptions) :
|
|
|
|
runTime->buildKernel("binary",
|
|
|
|
"binary_1toM_channel_broadcast_on_1wh", mBuildOptions);
|
|
|
|
unit.kernel.setArg(0, openCLImage(input));
|
|
|
|
unit.kernel.setArg(1, openCLImage(input0));
|
|
|
|
unit.kernel.setArg(4, wh1);
|
|
|
|
unit.kernel.setArg(5, wh_0);
|
|
|
|
}
|
2019-12-27 22:16:57 +08:00
|
|
|
unit.kernel.setArg(2, openCLImage(output));
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
unit.kernel.setArg(3, nhwcArray);
|
|
|
|
unit.kernel.setArg(6, wh);
|
|
|
|
} else {
|
|
|
|
unit.kernel = runTime->buildKernel("binary",
|
|
|
|
"binary_same_channel_broadcast", mBuildOptions);
|
|
|
|
if (wh_0[0] == 1 || wh_0[1] == 1) {
|
|
|
|
unit.kernel.setArg(0, openCLImage(input0));
|
|
|
|
unit.kernel.setArg(1, openCLImage(input));
|
|
|
|
unit.kernel.setArg(4, wh_0);
|
|
|
|
unit.kernel.setArg(5, wh1);
|
|
|
|
|
|
|
|
} else {
|
2020-07-23 10:35:12 +08:00
|
|
|
mBuildOptions.erase("-DOPERATOR=" + mCompute);
|
|
|
|
mBuildOptions.emplace("-DOPERATOR=" + swapComputeIn0In1(mCompute));
|
|
|
|
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
unit.kernel.setArg(0, openCLImage(input));
|
|
|
|
unit.kernel.setArg(1, openCLImage(input0));
|
|
|
|
unit.kernel.setArg(4, wh1);
|
|
|
|
unit.kernel.setArg(5, wh_0);
|
|
|
|
}
|
2019-12-27 22:16:57 +08:00
|
|
|
unit.kernel.setArg(2, openCLImage(output));
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
unit.kernel.setArg(3, nhwcArray);
|
|
|
|
unit.kernel.setArg(6, wh);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
unit.kernel = runTime->buildKernel("binary", "binary", mBuildOptions);
|
|
|
|
unit.kernel.setArg(0, openCLImage(input0));
|
|
|
|
unit.kernel.setArg(1, openCLImage(input));
|
2019-12-27 22:16:57 +08:00
|
|
|
unit.kernel.setArg(2, openCLImage(output));
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
unit.kernel.setArg(3, nhwcArray);
|
|
|
|
unit.kernel.setArg(4, wh);
|
|
|
|
unit.kernel.setArg(5, input1Stride);
|
|
|
|
}
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
unit.globalWorkSize = globalSize;
|
|
|
|
unit.localWorkSize = localSize;
|
|
|
|
}
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
class EltwiseCreator : public OpenCLBackend::Creator {
|
|
|
|
public:
|
|
|
|
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
|
|
|
|
const MNN::Op *op, Backend *backend) const override {
|
|
|
|
if (op->type() == OpType_Eltwise) {
|
|
|
|
switch (op->main_as_Eltwise()->type()) {
|
|
|
|
case EltwiseType_SUM:
|
2020-11-05 16:41:56 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0+in1", op, backend);
|
2021-03-14 19:16:39 +08:00
|
|
|
case EltwiseType_SUB:
|
|
|
|
return new EltwiseExecution(inputs, "in0-in1", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
case EltwiseType_PROD:
|
2020-11-05 16:41:56 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0*in1", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
case EltwiseType_MAXIMUM:
|
2021-03-12 18:41:50 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0>in1?in0:in1", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
if (op->type() == OpType_BinaryOp) {
|
|
|
|
MNN_ASSERT(inputs.size() > 1);
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
switch (op->main_as_BinaryOp()->opType()) {
|
|
|
|
case BinaryOpOperation_ADD:
|
2020-11-05 16:41:56 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0+in1", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
case BinaryOpOperation_SUB:
|
2020-11-05 16:41:56 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0-in1", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
case BinaryOpOperation_MUL:
|
2020-11-05 16:41:56 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0*in1", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
case BinaryOpOperation_POW:
|
2020-11-05 16:41:56 +08:00
|
|
|
return new EltwiseExecution(inputs, "pow(in0,in1)", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
case BinaryOpOperation_DIV:
|
2021-03-12 18:41:50 +08:00
|
|
|
return new EltwiseExecution(inputs, "sign(in1)*in0/(fabs(in1)>(FLOAT4)((FLOAT)0.0000001)?fabs(in1):(FLOAT4)((FLOAT)0.0000001))", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
case BinaryOpOperation_MAXIMUM:
|
2021-03-12 18:41:50 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0>in1?in0:in1", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
case BinaryOpOperation_MINIMUM:
|
2021-03-12 18:41:50 +08:00
|
|
|
return new EltwiseExecution(inputs, "in0>in1?in1:in0", op, backend);
|
2019-07-25 13:36:35 +08:00
|
|
|
case BinaryOpOperation_REALDIV:
|
2021-03-12 18:41:50 +08:00
|
|
|
return new EltwiseExecution(inputs, "sign(in1)*in0/(fabs(in1)>(FLOAT4)((FLOAT)0.0000001)?fabs(in1):(FLOAT4)((FLOAT)0.0000001))", op, backend);
|
2019-04-17 10:49:11 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-03-12 18:41:50 +08:00
|
|
|
OpenCLCreatorRegister<EltwiseCreator> __eltwise_op(OpType_Eltwise, IMAGE);
|
|
|
|
OpenCLCreatorRegister<EltwiseCreator> __binary_op(OpType_BinaryOp, IMAGE);
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
} // namespace OpenCL
|
|
|
|
} // namespace MNN
|