2019-04-17 10:49:11 +08:00
|
|
|
//
|
|
|
|
// CPUUnary.cpp
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on 2018/08/02.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "backend/cpu/CPUUnary.hpp"
|
2019-04-17 10:49:11 +08:00
|
|
|
#include <cmath>
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "backend/cpu/CPUBackend.hpp"
|
|
|
|
#include "core/Macro.h"
|
2020-01-15 13:33:47 +08:00
|
|
|
#include "core/Concurrency.h"
|
2020-02-26 09:57:17 +08:00
|
|
|
#include "compute/ConvOpt.h"
|
|
|
|
#include "compute/CommonOptFunction.h"
|
|
|
|
#include <MNN/AutoTime.hpp>
|
2019-12-27 22:16:57 +08:00
|
|
|
#include <vector>
|
|
|
|
#include <limits>
|
2019-04-17 10:49:11 +08:00
|
|
|
|
|
|
|
namespace MNN {
|
|
|
|
CPUUnary::CPUUnary(Backend *b, UnaryOpOperation type) : MNN::Execution(b), mType(type) {
|
|
|
|
// nothing to do
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorCode CPUUnary::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
|
|
|
MNN_ASSERT(1 == outputs.size());
|
2020-12-15 14:12:35 +08:00
|
|
|
MNN_ASSERT(inputs[0]->getType() == halide_type_of<float>() || inputs[0]->getType() == halide_type_of<int32_t>());
|
2019-04-17 10:49:11 +08:00
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
- build:
- unify schema building in core and converter;
- add more build script for android;
- add linux build script for python;
- ops impl:
- add floor mod support in binary;
- use eltwise impl in add/max/sub/mul binary for optimization;
- remove fake double support in cast;
- fix 5d support for concat;
- add adjX and adjY support for batch matmul;
- optimize conv2d back prop filter;
- add pad mode support for conv3d;
- fix bug in conv2d & conv depthwise with very small feature map;
- optimize binary without broacast;
- add data types support for gather;
- add gather ND support;
- use uint8 data type in gather v2;
- add transpose support for matmul;
- add matrix band part;
- add dim != 4 support for padding, reshape & tensor convert;
- add pad type support for pool3d;
- make ops based on TensorFlow Lite quantization optional;
- add all & any support for reduction;
- use type in parameter as output type in reduction;
- add int support for unary;
- add variable weight support for conv2d;
- fix conv2d depthwise weights initialization;
- fix type support for transpose;
- fix grad outputs count for reduce grad and reshape grad;
- fix priorbox & detection output;
- fix metal softmax error;
- python:
- add runSessionWithCallBackInfo interface;
- add max nodes limit (1400) for visualization tool;
- fix save error in python3;
- align default dim;
- convert:
- add extra design for optimization;
- add more post converting optimizers;
- add caffe v1 weights blob support;
- add cast, unary, conv transpose support for onnx model;
- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
- add cos/sin/atan/tan support for unary for tensorflow model;
- add any/all support for reduction for tensorflow model;
- add elu, conv3d, pool3d support for tensorflow model;
- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
- fix size computer lock;
- fix thread pool deadlock;
- add express & parameters in express;
- rewrite blitter chooser without static map;
- add tests for expr;
2019-10-29 13:37:26 +08:00
|
|
|
template <typename Func, typename T>
|
2021-04-08 15:34:23 +08:00
|
|
|
static void _unaryOp(void* inputPtr, void* outputPtr, int elementSize) {
|
2019-04-17 10:49:11 +08:00
|
|
|
Func f;
|
2020-01-15 13:33:47 +08:00
|
|
|
const T *inputData = (T*)inputPtr;
|
|
|
|
T *outputData = (T *)outputPtr;
|
2021-04-08 15:34:23 +08:00
|
|
|
for (int i=0; i<elementSize; ++i) {
|
|
|
|
outputData[i] = f(inputData[i]);
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnarySquare : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return x * x;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryRsqrt : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return 1.f / sqrt(x);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnarySqrt : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return sqrt(x);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryNeg {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return -x;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryExp : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
2019-11-15 14:22:45 +08:00
|
|
|
return exp(x);
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryAbs : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
2019-11-15 14:22:45 +08:00
|
|
|
return abs(x);
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryCeil : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
2019-11-15 14:22:45 +08:00
|
|
|
return ceil(x);
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
|
|
|
};
|
2019-06-17 20:10:35 +08:00
|
|
|
template <typename T>
|
|
|
|
struct UnaryRecipocal : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)1 / (x);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryLog1p : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
2020-12-16 17:03:10 +08:00
|
|
|
return (T)logf((T)1 + (x));
|
2019-06-17 20:10:35 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryLog : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
2020-12-16 17:03:10 +08:00
|
|
|
return (T)logf((T)(x));
|
2019-06-17 20:10:35 +08:00
|
|
|
}
|
|
|
|
};
|
- build:
- unify schema building in core and converter;
- add more build script for android;
- add linux build script for python;
- ops impl:
- add floor mod support in binary;
- use eltwise impl in add/max/sub/mul binary for optimization;
- remove fake double support in cast;
- fix 5d support for concat;
- add adjX and adjY support for batch matmul;
- optimize conv2d back prop filter;
- add pad mode support for conv3d;
- fix bug in conv2d & conv depthwise with very small feature map;
- optimize binary without broacast;
- add data types support for gather;
- add gather ND support;
- use uint8 data type in gather v2;
- add transpose support for matmul;
- add matrix band part;
- add dim != 4 support for padding, reshape & tensor convert;
- add pad type support for pool3d;
- make ops based on TensorFlow Lite quantization optional;
- add all & any support for reduction;
- use type in parameter as output type in reduction;
- add int support for unary;
- add variable weight support for conv2d;
- fix conv2d depthwise weights initialization;
- fix type support for transpose;
- fix grad outputs count for reduce grad and reshape grad;
- fix priorbox & detection output;
- fix metal softmax error;
- python:
- add runSessionWithCallBackInfo interface;
- add max nodes limit (1400) for visualization tool;
- fix save error in python3;
- align default dim;
- convert:
- add extra design for optimization;
- add more post converting optimizers;
- add caffe v1 weights blob support;
- add cast, unary, conv transpose support for onnx model;
- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
- add cos/sin/atan/tan support for unary for tensorflow model;
- add any/all support for reduction for tensorflow model;
- add elu, conv3d, pool3d support for tensorflow model;
- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
- fix size computer lock;
- fix thread pool deadlock;
- add express & parameters in express;
- rewrite blitter chooser without static map;
- add tests for expr;
2019-10-29 13:37:26 +08:00
|
|
|
template <typename T>
|
|
|
|
struct UnaryCos : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)cosf((T)(x));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
template <typename T>
|
|
|
|
struct UnarySin : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)sinf((T)(x));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryTan : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)tanf((T)(x));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryATan : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)atanf((T)(x));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryFloor : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)floor((T)(x));
|
|
|
|
}
|
|
|
|
};
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2019-12-27 22:16:57 +08:00
|
|
|
template <typename T>
|
|
|
|
struct UnarySign : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
if (x > 0) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (x < 0) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryBNLL : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
float r = x > 0 ? (x + log(1. + exp(-x))) : log(1. + exp(x));
|
|
|
|
return (T)r;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryAcosh : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)acoshf((T)(x));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnarySinh : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)sinhf((T)(x));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryAsinh : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)asinhf((T)(x));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryAtanh : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)atanhf((T)(x));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryRound : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)roundf((T)(x));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryCosh : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)coshf((T)(x));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
T evalPoly(T x, const std::vector<float> kErfTCoefficient) {
|
|
|
|
auto poly = 0.0f;
|
|
|
|
for (auto c : kErfTCoefficient) {
|
|
|
|
poly = poly * x + c;
|
|
|
|
}
|
|
|
|
return poly;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
T erfImpl(T x) {
|
|
|
|
// Coefficients for by erf(f32), from Cephes. tensorflow
|
|
|
|
static const std::vector<float> kErfTCoefficient {
|
2020-09-01 18:57:24 +08:00
|
|
|
+7.853861353153693E-5f, -8.010193625184903E-4f, +5.188327685732524E-3f,
|
|
|
|
-2.685381193529856E-2f, +1.128358514861418E-1f, -3.761262582423300E-1f,
|
|
|
|
+1.128379165726710E+0f,
|
2019-12-27 22:16:57 +08:00
|
|
|
};
|
|
|
|
return x * evalPoly(x * x, kErfTCoefficient);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
T erfcImpl(T x) {
|
|
|
|
// Coefficients for erfc(f32), from Cephes. tensorflow
|
|
|
|
const double kMaxlog = 88.72283905206835;
|
|
|
|
// erfc(x) = exp(-x^2) P(1/x^2), 1 < x < 2
|
|
|
|
static const std::vector<float> kErfcPCoefficient{
|
2020-09-01 18:57:24 +08:00
|
|
|
+2.326819970068386E-2f, -1.387039388740657E-1f, +3.687424674597105E-1f,
|
|
|
|
-5.824733027278666E-1f, +6.210004621745983E-1f, -4.944515323274145E-1f,
|
|
|
|
+3.404879937665872E-1f, -2.741127028184656E-1f, +5.638259427386472E-1f,
|
2019-12-27 22:16:57 +08:00
|
|
|
};
|
|
|
|
// erfc(x) = exp(-x^2) R(1/x^2), 2 <= x < kMaxlog
|
|
|
|
static const std::vector<float> kErfcRCoefficient{
|
2020-09-01 18:57:24 +08:00
|
|
|
-1.047766399936249E+1f, +1.297719955372516E+1f, -7.495518717768503E+0f,
|
|
|
|
+2.921019019210786E+0f, -1.015265279202700E+0f, +4.218463358204948E-1f,
|
|
|
|
-2.820767439740514E-1f, +5.641895067754075E-1f,
|
2019-12-27 22:16:57 +08:00
|
|
|
};
|
|
|
|
float absX = fabsf(x);
|
|
|
|
float z = expf(-x * x);
|
|
|
|
float q = 1.0 / absX;
|
|
|
|
float y = q * q;
|
|
|
|
float p;
|
|
|
|
if (absX < 2.0f) {
|
|
|
|
p = evalPoly(y, kErfcPCoefficient);
|
|
|
|
} else {
|
|
|
|
p = evalPoly(y, kErfcRCoefficient);
|
|
|
|
}
|
|
|
|
y = z * q * p;
|
|
|
|
float yClamp;
|
|
|
|
if (z < -kMaxlog) {
|
|
|
|
yClamp = 0.0f;
|
|
|
|
} else {
|
|
|
|
yClamp = y;
|
|
|
|
}
|
|
|
|
if (x < 0) {
|
|
|
|
return T(2.0f - yClamp);
|
|
|
|
} else {
|
|
|
|
return T(yClamp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryErf : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
if (abs(x) < T(1.)) {
|
|
|
|
return erfImpl(x);
|
|
|
|
} else {
|
|
|
|
return T(1.) - erfcImpl(x);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryErfc : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
if (abs(x) > T(1.)) {
|
|
|
|
return erfcImpl(x);
|
|
|
|
} else {
|
|
|
|
return T(1.) - erfImpl(x);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryErfinv : std::unary_function<T, T> {
|
|
|
|
// referenced from tensorflow
|
|
|
|
const int kDegree = 9;
|
|
|
|
const std::vector<float> w_less_than_5_constants = {
|
|
|
|
2.81022636e-08f, 3.43273939e-07f, -3.5233877e-06f,
|
|
|
|
-4.39150654e-06f, 0.00021858087f, -0.00125372503f,
|
|
|
|
-0.00417768164f, 0.246640727f, 1.50140941f};
|
|
|
|
const std::vector<float> w_greater_than_5_constants = {
|
|
|
|
-0.000200214257f, 0.000100950558f, 0.00134934322f,
|
|
|
|
-0.00367342844f, 0.00573950773f, -0.0076224613f,
|
|
|
|
0.00943887047f, 1.00167406f, 2.83297682f};
|
|
|
|
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
// Compute logarithm of (1+arg) using log1p(arg) which is more precise than
|
|
|
|
// log(1+arg) when arg is close to zero. For more details, see
|
|
|
|
// https://en.cppreference.com/w/cpp/numeric/math/log1p
|
|
|
|
auto w = -log1p(-x * x);
|
|
|
|
bool lt = (w < 5.0);
|
|
|
|
auto coefficient = [&](int i) {
|
|
|
|
if (lt) {
|
|
|
|
return w_less_than_5_constants[i];
|
|
|
|
} else {
|
|
|
|
return w_greater_than_5_constants[i];
|
|
|
|
}
|
|
|
|
};
|
|
|
|
if (lt) {
|
|
|
|
w = w - 2.5;
|
|
|
|
} else {
|
|
|
|
w = sqrt(w) - 3.0;
|
|
|
|
}
|
|
|
|
auto p = coefficient(0);
|
|
|
|
for (int i = 1; i < kDegree; i++) {
|
|
|
|
p = coefficient(i) + p * w;
|
|
|
|
}
|
|
|
|
auto result = p * x;
|
|
|
|
if (fabsf(fabsf(x) - 1) < 1e-8) {
|
|
|
|
return std::numeric_limits<float>::infinity();
|
|
|
|
} else {
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryExpm1 : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)expm1((T)(x));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryAsin : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)asin((T)(x));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct UnaryAcos : std::unary_function<T, T> {
|
|
|
|
T operator()(const T &x) const {
|
|
|
|
return (T)acos((T)(x));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
ErrorCode CPUUnary::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
|
|
|
auto input = inputs[0];
|
|
|
|
auto output = outputs[0];
|
- build:
- unify schema building in core and converter;
- add more build script for android;
- add linux build script for python;
- ops impl:
- add floor mod support in binary;
- use eltwise impl in add/max/sub/mul binary for optimization;
- remove fake double support in cast;
- fix 5d support for concat;
- add adjX and adjY support for batch matmul;
- optimize conv2d back prop filter;
- add pad mode support for conv3d;
- fix bug in conv2d & conv depthwise with very small feature map;
- optimize binary without broacast;
- add data types support for gather;
- add gather ND support;
- use uint8 data type in gather v2;
- add transpose support for matmul;
- add matrix band part;
- add dim != 4 support for padding, reshape & tensor convert;
- add pad type support for pool3d;
- make ops based on TensorFlow Lite quantization optional;
- add all & any support for reduction;
- use type in parameter as output type in reduction;
- add int support for unary;
- add variable weight support for conv2d;
- fix conv2d depthwise weights initialization;
- fix type support for transpose;
- fix grad outputs count for reduce grad and reshape grad;
- fix priorbox & detection output;
- fix metal softmax error;
- python:
- add runSessionWithCallBackInfo interface;
- add max nodes limit (1400) for visualization tool;
- fix save error in python3;
- align default dim;
- convert:
- add extra design for optimization;
- add more post converting optimizers;
- add caffe v1 weights blob support;
- add cast, unary, conv transpose support for onnx model;
- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
- add cos/sin/atan/tan support for unary for tensorflow model;
- add any/all support for reduction for tensorflow model;
- add elu, conv3d, pool3d support for tensorflow model;
- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
- fix size computer lock;
- fix thread pool deadlock;
- add express & parameters in express;
- rewrite blitter chooser without static map;
- add tests for expr;
2019-10-29 13:37:26 +08:00
|
|
|
auto dtype = input->getType().code;
|
|
|
|
|
|
|
|
if (dtype == halide_type_int) {
|
|
|
|
switch (mType) {
|
|
|
|
case UnaryOpOperation_ABS:
|
2021-04-08 15:34:23 +08:00
|
|
|
_unaryOp<UnaryAbs<int32_t>, int32_t>(input->host<void>(), output->host<void>(), input->elementSize());
|
|
|
|
break;
|
- build:
- unify schema building in core and converter;
- add more build script for android;
- add linux build script for python;
- ops impl:
- add floor mod support in binary;
- use eltwise impl in add/max/sub/mul binary for optimization;
- remove fake double support in cast;
- fix 5d support for concat;
- add adjX and adjY support for batch matmul;
- optimize conv2d back prop filter;
- add pad mode support for conv3d;
- fix bug in conv2d & conv depthwise with very small feature map;
- optimize binary without broacast;
- add data types support for gather;
- add gather ND support;
- use uint8 data type in gather v2;
- add transpose support for matmul;
- add matrix band part;
- add dim != 4 support for padding, reshape & tensor convert;
- add pad type support for pool3d;
- make ops based on TensorFlow Lite quantization optional;
- add all & any support for reduction;
- use type in parameter as output type in reduction;
- add int support for unary;
- add variable weight support for conv2d;
- fix conv2d depthwise weights initialization;
- fix type support for transpose;
- fix grad outputs count for reduce grad and reshape grad;
- fix priorbox & detection output;
- fix metal softmax error;
- python:
- add runSessionWithCallBackInfo interface;
- add max nodes limit (1400) for visualization tool;
- fix save error in python3;
- align default dim;
- convert:
- add extra design for optimization;
- add more post converting optimizers;
- add caffe v1 weights blob support;
- add cast, unary, conv transpose support for onnx model;
- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
- add cos/sin/atan/tan support for unary for tensorflow model;
- add any/all support for reduction for tensorflow model;
- add elu, conv3d, pool3d support for tensorflow model;
- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
- fix size computer lock;
- fix thread pool deadlock;
- add express & parameters in express;
- rewrite blitter chooser without static map;
- add tests for expr;
2019-10-29 13:37:26 +08:00
|
|
|
case UnaryOpOperation_NEG:
|
2021-04-08 15:34:23 +08:00
|
|
|
_unaryOp<UnaryNeg<int32_t>, int32_t>(input->host<void>(), output->host<void>(), input->elementSize());
|
|
|
|
break;
|
- build:
- unify schema building in core and converter;
- add more build script for android;
- add linux build script for python;
- ops impl:
- add floor mod support in binary;
- use eltwise impl in add/max/sub/mul binary for optimization;
- remove fake double support in cast;
- fix 5d support for concat;
- add adjX and adjY support for batch matmul;
- optimize conv2d back prop filter;
- add pad mode support for conv3d;
- fix bug in conv2d & conv depthwise with very small feature map;
- optimize binary without broacast;
- add data types support for gather;
- add gather ND support;
- use uint8 data type in gather v2;
- add transpose support for matmul;
- add matrix band part;
- add dim != 4 support for padding, reshape & tensor convert;
- add pad type support for pool3d;
- make ops based on TensorFlow Lite quantization optional;
- add all & any support for reduction;
- use type in parameter as output type in reduction;
- add int support for unary;
- add variable weight support for conv2d;
- fix conv2d depthwise weights initialization;
- fix type support for transpose;
- fix grad outputs count for reduce grad and reshape grad;
- fix priorbox & detection output;
- fix metal softmax error;
- python:
- add runSessionWithCallBackInfo interface;
- add max nodes limit (1400) for visualization tool;
- fix save error in python3;
- align default dim;
- convert:
- add extra design for optimization;
- add more post converting optimizers;
- add caffe v1 weights blob support;
- add cast, unary, conv transpose support for onnx model;
- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
- add cos/sin/atan/tan support for unary for tensorflow model;
- add any/all support for reduction for tensorflow model;
- add elu, conv3d, pool3d support for tensorflow model;
- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
- fix size computer lock;
- fix thread pool deadlock;
- add express & parameters in express;
- rewrite blitter chooser without static map;
- add tests for expr;
2019-10-29 13:37:26 +08:00
|
|
|
case UnaryOpOperation_SQUARE:
|
2021-04-08 15:34:23 +08:00
|
|
|
_unaryOp<UnarySquare<int32_t>, int32_t>(input->host<void>(), output->host<void>(), input->elementSize());
|
|
|
|
break;
|
2021-03-31 11:50:55 +08:00
|
|
|
case UnaryOpOperation_SIGN:
|
2021-04-08 15:34:23 +08:00
|
|
|
_unaryOp<UnarySign<int32_t>, int32_t>(input->host<void>(), output->host<void>(), input->elementSize());
|
|
|
|
break;
|
- build:
- unify schema building in core and converter;
- add more build script for android;
- add linux build script for python;
- ops impl:
- add floor mod support in binary;
- use eltwise impl in add/max/sub/mul binary for optimization;
- remove fake double support in cast;
- fix 5d support for concat;
- add adjX and adjY support for batch matmul;
- optimize conv2d back prop filter;
- add pad mode support for conv3d;
- fix bug in conv2d & conv depthwise with very small feature map;
- optimize binary without broacast;
- add data types support for gather;
- add gather ND support;
- use uint8 data type in gather v2;
- add transpose support for matmul;
- add matrix band part;
- add dim != 4 support for padding, reshape & tensor convert;
- add pad type support for pool3d;
- make ops based on TensorFlow Lite quantization optional;
- add all & any support for reduction;
- use type in parameter as output type in reduction;
- add int support for unary;
- add variable weight support for conv2d;
- fix conv2d depthwise weights initialization;
- fix type support for transpose;
- fix grad outputs count for reduce grad and reshape grad;
- fix priorbox & detection output;
- fix metal softmax error;
- python:
- add runSessionWithCallBackInfo interface;
- add max nodes limit (1400) for visualization tool;
- fix save error in python3;
- align default dim;
- convert:
- add extra design for optimization;
- add more post converting optimizers;
- add caffe v1 weights blob support;
- add cast, unary, conv transpose support for onnx model;
- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
- add cos/sin/atan/tan support for unary for tensorflow model;
- add any/all support for reduction for tensorflow model;
- add elu, conv3d, pool3d support for tensorflow model;
- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
- fix size computer lock;
- fix thread pool deadlock;
- add express & parameters in express;
- rewrite blitter chooser without static map;
- add tests for expr;
2019-10-29 13:37:26 +08:00
|
|
|
default:
|
|
|
|
MNN_ERROR("Int-Unary not support %d\n", mType);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return NO_ERROR;
|
|
|
|
}
|
2020-02-26 09:57:17 +08:00
|
|
|
auto size = input->elementSize();
|
|
|
|
auto schedule = ((CPUBackend*)backend())->multiThreadDivide(size);
|
|
|
|
auto inputPtr = input->host<float>();
|
|
|
|
auto outputPtr = output->host<float>();
|
2021-04-08 15:34:23 +08:00
|
|
|
auto precision = static_cast<CPUBackend*>(backend())->precisionMode();
|
|
|
|
MNN_CONCURRENCY_BEGIN(tId, schedule.second) {
|
|
|
|
int start = schedule.first * (int)tId;
|
|
|
|
int realSize = schedule.first;
|
|
|
|
if (tId == schedule.second -1 ) {
|
|
|
|
realSize = size - start;
|
2020-02-26 09:57:17 +08:00
|
|
|
}
|
2021-04-08 15:34:23 +08:00
|
|
|
if (realSize > 0) {
|
|
|
|
auto inp = inputPtr + start;
|
|
|
|
auto out = outputPtr + start;
|
|
|
|
switch (mType) {
|
|
|
|
case UnaryOpOperation_ABS:
|
|
|
|
MNNReluWithSlopeCommon(out, inp, realSize, -1.0f);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_SQUARE:
|
|
|
|
MNNMatrixProdCommon(out, inp, inp, realSize, 0, 0, 0, 1);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_NEG:
|
|
|
|
MNNScaleAndAddBiasScalar(out, inp, 0.0f, -1.0f, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_RSQRT:
|
|
|
|
_unaryOp<UnaryRsqrt<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_EXP:
|
|
|
|
MNNScaleAndAddBiasScalar(out, inp, 0.0f, -1.0f, realSize);
|
|
|
|
MNNExp(out, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_COS:
|
|
|
|
_unaryOp<UnaryCos<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_SIN:
|
|
|
|
MNNSin(out, inp, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_SIGMOID:
|
|
|
|
if (BackendConfig::Precision_Low == precision) {
|
|
|
|
MNNSigmoidLowp(out, inp, realSize);
|
|
|
|
} else {
|
|
|
|
MNNSigmoid(out, inp, realSize);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_TANH:
|
|
|
|
MNNTanh(out, inp, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_TAN:
|
|
|
|
_unaryOp<UnaryTan<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_ATAN:
|
|
|
|
_unaryOp<UnaryATan<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_SQRT:
|
|
|
|
_unaryOp<UnarySqrt<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_CEIL:
|
|
|
|
_unaryOp<UnaryCeil<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_RECIPROCAL:
|
|
|
|
_unaryOp<UnaryRecipocal<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_LOG1P:
|
|
|
|
_unaryOp<UnaryLog1p<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_LOG:
|
|
|
|
_unaryOp<UnaryLog<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_FLOOR:
|
|
|
|
_unaryOp<UnaryFloor<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_BNLL:
|
|
|
|
_unaryOp<UnaryBNLL<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_ACOSH:
|
|
|
|
_unaryOp<UnaryAcosh<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_SINH:
|
|
|
|
_unaryOp<UnarySinh<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_ASINH:
|
|
|
|
_unaryOp<UnaryAsinh<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_ATANH:
|
|
|
|
_unaryOp<UnaryAtanh<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_SIGN:
|
|
|
|
_unaryOp<UnarySign<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_ROUND:
|
|
|
|
_unaryOp<UnaryRound<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_COSH:
|
|
|
|
_unaryOp<UnaryCosh<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_ERF:
|
|
|
|
_unaryOp<UnaryErf<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_ERFC:
|
|
|
|
_unaryOp<UnaryErfc<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_ERFINV:
|
|
|
|
_unaryOp<UnaryErfinv<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_EXPM1:
|
|
|
|
_unaryOp<UnaryExpm1<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_ASIN:
|
|
|
|
_unaryOp<UnaryAsin<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_ACOS:
|
|
|
|
_unaryOp<UnaryAcos<float>, float>(inp, out, realSize);
|
|
|
|
break;
|
|
|
|
case UnaryOpOperation_HARDSWISH:
|
|
|
|
MNNHardSwishCommon(out, inp, realSize);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
MNN_ASSERT(false);
|
|
|
|
break;
|
2020-02-26 09:57:17 +08:00
|
|
|
}
|
|
|
|
}
|
2019-04-17 10:49:11 +08:00
|
|
|
}
|
2021-04-08 15:34:23 +08:00
|
|
|
MNN_CONCURRENCY_END();
|
|
|
|
|
- build:
- unify schema building in core and converter;
- add more build script for android;
- add linux build script for python;
- ops impl:
- add floor mod support in binary;
- use eltwise impl in add/max/sub/mul binary for optimization;
- remove fake double support in cast;
- fix 5d support for concat;
- add adjX and adjY support for batch matmul;
- optimize conv2d back prop filter;
- add pad mode support for conv3d;
- fix bug in conv2d & conv depthwise with very small feature map;
- optimize binary without broacast;
- add data types support for gather;
- add gather ND support;
- use uint8 data type in gather v2;
- add transpose support for matmul;
- add matrix band part;
- add dim != 4 support for padding, reshape & tensor convert;
- add pad type support for pool3d;
- make ops based on TensorFlow Lite quantization optional;
- add all & any support for reduction;
- use type in parameter as output type in reduction;
- add int support for unary;
- add variable weight support for conv2d;
- fix conv2d depthwise weights initialization;
- fix type support for transpose;
- fix grad outputs count for reduce grad and reshape grad;
- fix priorbox & detection output;
- fix metal softmax error;
- python:
- add runSessionWithCallBackInfo interface;
- add max nodes limit (1400) for visualization tool;
- fix save error in python3;
- align default dim;
- convert:
- add extra design for optimization;
- add more post converting optimizers;
- add caffe v1 weights blob support;
- add cast, unary, conv transpose support for onnx model;
- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
- add cos/sin/atan/tan support for unary for tensorflow model;
- add any/all support for reduction for tensorflow model;
- add elu, conv3d, pool3d support for tensorflow model;
- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
- fix size computer lock;
- fix thread pool deadlock;
- add express & parameters in express;
- rewrite blitter chooser without static map;
- add tests for expr;
2019-10-29 13:37:26 +08:00
|
|
|
|
2019-04-17 10:49:11 +08:00
|
|
|
return NO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
class CPUUnaryCreator : public CPUBackend::Creator {
|
|
|
|
public:
|
|
|
|
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
|
|
|
|
const MNN::Op *op, Backend *backend) const override {
|
|
|
|
return new CPUUnary(backend, op->main_as_UnaryOp()->opType());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
REGISTER_CPU_OP_CREATOR(CPUUnaryCreator, OpType_UnaryOp);
|
|
|
|
|
|
|
|
} // namespace MNN
|