2020-11-05 16:41:56 +08:00
|
|
|
#include "BinaryExecution.hpp"
|
2021-06-11 17:17:13 +08:00
|
|
|
#include "Raster.cuh"
|
2020-11-05 16:41:56 +08:00
|
|
|
namespace MNN {
|
|
|
|
namespace CUDA {
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
__global__ void ATAN2(const T *input0, const T* input1, T *output, size_t count, size_t s0, size_t s1) {
|
|
|
|
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
|
|
|
|
T x = input0[i * s0];
|
|
|
|
T y = input1[i * s1];
|
|
|
|
output[i] = atan2(x, y);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
__global__ void MOD(const T *input0, const T* input1, T *output, size_t count, size_t s0, size_t s1) {
|
|
|
|
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
|
|
|
|
T x = input0[i * s0];
|
|
|
|
T y = input1[i * s1];
|
|
|
|
output[i] = x - x / y;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
__global__ void LOGICALOR(const T *input0, const T* input1, T *output, size_t count, size_t s0, size_t s1) {
|
|
|
|
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
|
|
|
|
T x = input0[i * s0];
|
|
|
|
T y = input1[i * s1];
|
|
|
|
output[i] = (x || y) ? 1 : 0;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2022-09-30 10:02:52 +08:00
|
|
|
BinaryExecution::BinaryExecution(int opType, Backend *backend, int activationType) : Execution(backend) {
|
2020-11-05 16:41:56 +08:00
|
|
|
mType = opType;
|
2022-09-30 10:02:52 +08:00
|
|
|
mActivationType = activationType;
|
2020-11-05 16:41:56 +08:00
|
|
|
}
|
|
|
|
BinaryExecution::~BinaryExecution(){
|
|
|
|
// Do nothing
|
|
|
|
}
|
|
|
|
ErrorCode BinaryExecution::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
|
|
|
auto count = CUDABackend::realSize(outputs[0]);
|
|
|
|
auto inputS0 = CUDABackend::realSize(inputs[0]);
|
|
|
|
auto inputS1 = CUDABackend::realSize(inputs[1]);
|
|
|
|
int s0 = inputS0 == 1 ? 0 : 1;
|
|
|
|
int s1 = inputS1 == 1 ? 0 : 1;
|
|
|
|
auto runtime = static_cast<CUDABackend*>(backend())->getCUDARuntime();
|
|
|
|
//printf("%d - %d\n", block_num, threads_num);
|
2021-06-11 17:17:13 +08:00
|
|
|
int size[3] = {1, 1, count};
|
|
|
|
int stride0[3] = {0, 0, s0};
|
|
|
|
int stride1[3] = {0, 0, s1};
|
|
|
|
int stride2[3] = {0, 0, 1};
|
2023-06-16 09:42:45 +08:00
|
|
|
|
2022-02-18 11:30:27 +08:00
|
|
|
auto type = outputs[0]->getType();
|
|
|
|
if (type.code == halide_type_float) {
|
|
|
|
// Use Half or float
|
|
|
|
type.bits = static_cast<CUDABackend*>(backend())->getBytes(inputs[0]) * 8;
|
|
|
|
}
|
2023-06-16 09:42:45 +08:00
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
auto computeFunction = [&](Tensor* input0T, Tensor* input1T, Tensor* outputT) {
|
2021-06-11 17:17:13 +08:00
|
|
|
auto input0 = (uint8_t*)input0T->deviceId();
|
|
|
|
auto input1 = (uint8_t*)input1T->deviceId();
|
|
|
|
auto output = (uint8_t*)outputT->deviceId();
|
2022-09-30 10:02:52 +08:00
|
|
|
BinaryBlit(output, input0, input1, size, stride0, stride1, stride2, type, runtime, mType, mActivationType);
|
2020-11-05 16:41:56 +08:00
|
|
|
};
|
|
|
|
computeFunction(inputs[0], inputs[1], outputs[0]);
|
|
|
|
for (int i=2; i<inputs.size(); ++i) {
|
|
|
|
computeFunction(outputs[0], inputs[i], outputs[0]);
|
|
|
|
}
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
class BinaryCreator : public CUDABackend::Creator {
|
|
|
|
public:
|
|
|
|
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
|
|
|
|
const MNN::Op* op, Backend* backend) const override {
|
|
|
|
if (op->type() == OpType_BinaryOp) {
|
2023-06-16 09:42:45 +08:00
|
|
|
#ifdef ENABLE_CUDA_QUANT
|
|
|
|
if (CUDABackend::getDataType(inputs[0]) == DataType_DT_INT8 || inputs[0]->getType().bytes() == 1) {
|
|
|
|
return new BinaryInt8Execution(op, backend);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
// MNN_PRINT("binary act:%d %d\n", op->main_as_BinaryOp()->opType(), op->main_as_BinaryOp()->activationType());
|
2022-09-30 10:02:52 +08:00
|
|
|
return new BinaryExecution(op->main_as_BinaryOp()->opType(), backend, op->main_as_BinaryOp()->activationType());
|
2020-11-05 16:41:56 +08:00
|
|
|
}
|
|
|
|
if (op->type() == OpType_Eltwise) {
|
|
|
|
switch (op->main_as_Eltwise()->type()) {
|
|
|
|
case EltwiseType_SUM:
|
|
|
|
return new BinaryExecution(BinaryOpOperation_ADD, backend);
|
|
|
|
case EltwiseType_PROD:
|
|
|
|
return new BinaryExecution(BinaryOpOperation_MUL, backend);
|
|
|
|
case EltwiseType_MAXIMUM:
|
|
|
|
return new BinaryExecution(BinaryOpOperation_MAXIMUM, backend);
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
static CUDACreatorRegister<BinaryCreator> __init(OpType_BinaryOp);
|
|
|
|
static CUDACreatorRegister<BinaryCreator> __init2(OpType_Eltwise);
|
|
|
|
}
|
|
|
|
}
|