2019-09-01 19:25:26 +08:00
|
|
|
//
|
|
|
|
// FunctionDispatcher.cpp
|
|
|
|
// MNN
|
|
|
|
//
|
- dynamic computation graph (beta)
- add supports (/express)
- add tests
- add benchmarks with it (/benchmark/exprModels)
- Python
- MNN engine and tools were submitted to pip
- available on Windows/macOS/Linux
- Engine/Converter
- add supports for each op benchmarking
- refactor optimizer by separating steps
- CPU
- add supports for Conv3D, Pool3D, ELU, ReverseSequence
- fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf
- OpenCL
- add half transform in CPU
- add broadcast supports for binary
- optimize Conv2D, Reshape, Eltwise, Gemm, etc.
- OpenGL
- add sub, real div supports for binary
- add supports for unary
- optimize Conv2D, Reshape
- Vulkan
- add max supports for eltwise
- Metal
- fix metallib missing problem
- Train/Quantization
- use express to refactor training codes
2019-09-26 21:02:07 +08:00
|
|
|
// Created by MNN on 2019/08/25.
|
2019-09-01 19:25:26 +08:00
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
2020-11-05 16:41:56 +08:00
|
|
|
|
|
|
|
#include <limits>
|
2021-01-05 15:30:28 +08:00
|
|
|
#include "avx512/FunctionSummary.hpp"
|
2020-11-05 16:41:56 +08:00
|
|
|
#include "avx/FunctionSummary.hpp"
|
2019-12-27 22:16:57 +08:00
|
|
|
#include "backend/cpu/compute/CommonOptFunction.h"
|
|
|
|
#include "backend/cpu/compute/ConvOpt.h"
|
2020-02-26 09:57:17 +08:00
|
|
|
#include "backend/cpu/compute/Int8FunctionsOpt.h"
|
2020-07-04 01:21:30 +08:00
|
|
|
#include "cpu_id.h"
|
2020-11-05 16:41:56 +08:00
|
|
|
#include "sse/FunctionSummary.hpp"
|
2020-04-14 22:52:24 +08:00
|
|
|
// https://stackoverflow.com/a/11230437
|
|
|
|
#if defined(_MSC_VER)
|
|
|
|
#include <intrin.h>
|
|
|
|
#else
|
2020-04-10 14:44:01 +08:00
|
|
|
#include <x86intrin.h>
|
2020-04-14 22:52:24 +08:00
|
|
|
#endif
|
2020-07-04 01:21:30 +08:00
|
|
|
|
2020-04-10 14:44:01 +08:00
|
|
|
bool MNNReorder4x4ByPlatform(float* dst, size_t number) {
|
2020-12-15 14:12:35 +08:00
|
|
|
return _SSE_MNNReorder4x4ByPlatform(dst, number);
|
2020-04-10 14:44:01 +08:00
|
|
|
}
|
|
|
|
|
2020-07-04 01:21:30 +08:00
|
|
|
struct FunctionGroup {
|
2020-11-05 16:41:56 +08:00
|
|
|
int tileNumber = 8;
|
|
|
|
int eP = 12;
|
|
|
|
int lP = 1;
|
|
|
|
int hP = 4;
|
|
|
|
void (*MNNAddBias)(float* dst, const float* bias, size_t planeNumber, size_t biasNumber) = _SSE_MNNAddBias;
|
|
|
|
void (*MNNAddBiasRelu)(float* dst, const float* bias, size_t planeNumber, size_t biasNumber) = _SSE_MNNAddBiasRelu;
|
|
|
|
void (*MNNAddBiasRelu6)(float* dst, const float* bias, size_t planeNumber,
|
|
|
|
size_t biasNumber) = _SSE_MNNAddBiasRelu6;
|
2020-07-04 01:21:30 +08:00
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
void (*MNNMatrixAdd)(float* C, const float* A, const float* B, size_t widthC4, size_t cStride, size_t aStride,
|
|
|
|
size_t bStride, size_t height) = _SSE_MNNMatrixAdd;
|
|
|
|
void (*MNNMatrixSub)(float* C, const float* A, const float* B, size_t widthC4, size_t cStride, size_t aStride,
|
|
|
|
size_t bStride, size_t height) = _SSE_MNNMatrixSub;
|
|
|
|
|
|
|
|
void (*MNNGemmFloatUnit_4)(float* dstOrigin, const float* src, const float* weight, size_t src_depth_quad,
|
|
|
|
size_t dst_step, size_t dst_depth_quad,
|
|
|
|
size_t weight_depth_offset) = _SSE_MNNGemmFloatUnit_4;
|
|
|
|
void (*MNNGemmFloatCommon_4)(float* dst, const float* src, const float* weight, size_t src_depth_quad,
|
|
|
|
size_t dst_step, size_t dst_depth_quad, size_t width,
|
|
|
|
size_t weight_depth_offset) = _SSE_MNNGemmFloatCommon_4;
|
|
|
|
void (*MNNPackC4ForMatMul_A)(float* dest, const float* source, size_t e, size_t l,
|
|
|
|
size_t eReal) = _SSE_MNNPackC4ForMatMul_A;
|
2020-12-15 14:12:35 +08:00
|
|
|
void (*MNNPackForMatMul_B)(float* dest, const float* source, size_t h, size_t l, bool transpose) = _SSE_MNNPackForMatMul_B;
|
2020-11-05 16:41:56 +08:00
|
|
|
void (*MNNPackedMatMul)(float* C, const float* A, const float* B, const size_t* parameter, float* cache,
|
|
|
|
const float* postParameters, const float* bias) = _SSE_MNNPackedMatMul;
|
|
|
|
void (*MNNPackedMatMulRemain)(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter,
|
|
|
|
float* cache, const float* postParameters,
|
|
|
|
const float* bias) = _SSE_MNNPackedMatMulRemain;
|
|
|
|
void (*MNNConvRunForLineDepthwise)(float* dst, const float* src, const float* weight, size_t width, size_t src_w_setup,
|
|
|
|
size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step, size_t height,
|
|
|
|
size_t srcHStep, size_t dstHStep) = _SSE_MNNConvRunForLineDepthwise;
|
2020-12-10 12:47:38 +08:00
|
|
|
void (*MNNGemmInt8AddBiasScale_16x4_Unit)(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst) = _SSE_MNNGemmInt8AddBiasScale_16x4_Unit;
|
2020-11-05 16:41:56 +08:00
|
|
|
void (*MNNExpC8)(float* dest, const float* source, const float* parameters, size_t countC8) = _SSE_MNNExpC8;
|
2020-12-10 11:28:55 +08:00
|
|
|
void (*MNNFloat2Int8)(const float* src, int8_t* dst, size_t sizeQuad, const float* scalep, ssize_t minValue,
|
2021-01-06 16:29:37 +08:00
|
|
|
ssize_t maxValue, ssize_t zeroPoint) = _SSE_MNNFloat2Int8;
|
|
|
|
void (*MNNInt8ScaleToFloat)(float* dst, const int8_t* src, const float* scale, size_t size, ssize_t zeroPoint) = _SSE_MNNInt8ScaleToFloat;
|
|
|
|
void (*MNNLineDepthWiseInt8AddBiasScaleUnit)(int8_t* dst, const int8_t* src, const int8_t* weight, const QuanPostTreatParameters* parameters, size_t width, size_t src_w_step, size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step) = _SSE_MNNLineDepthWiseInt8AddBiasScaleUnit;
|
|
|
|
void (*MNNComputeMatMulForE_1)(const float* A, const float* B, float* C, const float* biasPtr, const MatMulParam* param, size_t tId) = _SSE_MNNComputeMatMulForE_1;
|
2020-07-04 01:21:30 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static FunctionGroup gFunc;
|
|
|
|
void MNNFunctionInit() {
|
|
|
|
auto cpuFlags = libyuv::InitCpuFlags();
|
2021-01-06 16:29:37 +08:00
|
|
|
if (cpuFlags & libyuv::kCpuHasAVX2) {
|
2020-11-05 16:41:56 +08:00
|
|
|
gFunc.MNNAddBias = _AVX_MNNAddBias;
|
|
|
|
gFunc.MNNAddBiasRelu = _AVX_MNNAddBiasRelu;
|
|
|
|
gFunc.MNNAddBiasRelu6 = _AVX_MNNAddBiasRelu6;
|
|
|
|
gFunc.MNNMatrixAdd = _AVX_MNNMatrixAdd;
|
|
|
|
gFunc.MNNMatrixSub = _AVX_MNNMatrixSub;
|
|
|
|
gFunc.MNNGemmFloatUnit_4 = _AVX_MNNGemmFloatUnit_4;
|
|
|
|
gFunc.MNNGemmFloatCommon_4 = _AVX_MNNGemmFloatCommon_4;
|
|
|
|
gFunc.MNNPackedMatMul = _AVX_MNNPackedMatMul;
|
2020-07-04 01:21:30 +08:00
|
|
|
gFunc.MNNPackedMatMulRemain = _AVX_MNNPackedMatMulRemain;
|
2020-11-05 16:41:56 +08:00
|
|
|
gFunc.eP = 24;
|
|
|
|
gFunc.MNNPackC4ForMatMul_A = _AVX_MNNPackC4ForMatMul_A;
|
|
|
|
gFunc.MNNConvRunForLineDepthwise = _AVX_MNNConvRunForLineDepthwise;
|
|
|
|
gFunc.MNNGemmInt8AddBiasScale_16x4_Unit = _AVX_MNNGemmInt8AddBiasScale_16x4_Unit;
|
2020-11-10 16:53:52 +08:00
|
|
|
gFunc.MNNExpC8 = _AVX_MNNExpC8;
|
2020-12-10 11:28:55 +08:00
|
|
|
gFunc.MNNFloat2Int8 = _AVX_MNNFloat2Int8;
|
2020-12-10 17:53:24 +08:00
|
|
|
gFunc.MNNInt8ScaleToFloat = _AVX_MNNInt8ScaleToFloat;
|
2021-01-06 16:29:37 +08:00
|
|
|
gFunc.MNNLineDepthWiseInt8AddBiasScaleUnit = _AVX_MNNLineDepthWiseInt8AddBiasScaleUnit;
|
|
|
|
gFunc.MNNComputeMatMulForE_1 = _AVX_MNNComputeMatMulForE_1;
|
2020-07-04 01:21:30 +08:00
|
|
|
if (cpuFlags & libyuv::kCpuHasFMA3) {
|
2020-11-05 16:41:56 +08:00
|
|
|
gFunc.MNNGemmFloatUnit_4 = _AVX_MNNGemmFloatUnitFMA_4;
|
|
|
|
gFunc.MNNGemmFloatCommon_4 = _AVX_MNNGemmFloatCommonFMA_4;
|
|
|
|
gFunc.MNNPackedMatMul = _AVX_MNNPackedMatMulFMA;
|
2020-07-04 01:21:30 +08:00
|
|
|
gFunc.MNNPackedMatMulRemain = _AVX_MNNPackedMatMulRemainFMA;
|
2021-01-06 16:29:37 +08:00
|
|
|
gFunc.MNNComputeMatMulForE_1 = _AVX_MNNComputeMatMulForE_1FMA;
|
2020-07-04 01:21:30 +08:00
|
|
|
}
|
|
|
|
}
|
2021-01-06 16:29:37 +08:00
|
|
|
#ifdef MNN_AVX512
|
|
|
|
if (cpuFlags & libyuv::kCpuHasAVX512VL) {
|
|
|
|
// gFunc.MNNPackForMatMul_B = _AVX512_MNNPackForMatMul_B;
|
|
|
|
// gFunc.MNNPackC4ForMatMul_A = _AVX512_MNNPackC4ForMatMul_A;
|
|
|
|
// gFunc.MNNPackedMatMul = _AVX512_MNNPackedMatMul;
|
|
|
|
// gFunc.MNNPackedMatMulRemain = _AVX512_MNNPackedMatMulRemain;
|
|
|
|
// gFunc.eP = 48;
|
|
|
|
// gFunc.hP = 8;
|
|
|
|
gFunc.MNNGemmInt8AddBiasScale_16x4_Unit = _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit;
|
|
|
|
}
|
|
|
|
#endif
|
2020-07-04 01:21:30 +08:00
|
|
|
}
|
|
|
|
|
2019-09-01 19:25:26 +08:00
|
|
|
// ========= CommonOptFunction.cpp ===========
|
|
|
|
void MNNAddBias(float* dst, const float* bias, size_t planeNumber, size_t biasNumber) {
|
2020-07-04 01:21:30 +08:00
|
|
|
return gFunc.MNNAddBias(dst, bias, planeNumber, biasNumber);
|
2019-09-01 19:25:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void MNNAddBiasRelu(float* dst, const float* bias, size_t planeNumber, size_t biasNumber) {
|
2020-07-04 01:21:30 +08:00
|
|
|
return gFunc.MNNAddBiasRelu(dst, bias, planeNumber, biasNumber);
|
2019-09-01 19:25:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void MNNAddBiasRelu6(float* dst, const float* bias, size_t planeNumber, size_t biasNumber) {
|
2020-07-04 01:21:30 +08:00
|
|
|
return gFunc.MNNAddBiasRelu6(dst, bias, planeNumber, biasNumber);
|
2019-09-01 19:25:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void MNNCopyC4WithStride(const float* source, float* dest, size_t srcStride, size_t dstStride, size_t count) {
|
|
|
|
_SSE_MNNCopyC4WithStride(source, dest, srcStride, dstStride, count);
|
|
|
|
}
|
|
|
|
|
|
|
|
void MNNAddC4WithStride(const float* source, float* dest, size_t srcStride, size_t dstStride, size_t count) {
|
|
|
|
_SSE_MNNAddC4WithStride(source, dest, srcStride, dstStride, count);
|
|
|
|
}
|
|
|
|
|
2020-04-10 23:42:16 +08:00
|
|
|
void MNNGemmFloatUnit_4(float* dstOrigin, const float* src, const float* weight, size_t src_depth_quad, size_t dst_step,
|
|
|
|
size_t dst_depth_quad, size_t weight_depth_offset) {
|
2020-07-04 01:21:30 +08:00
|
|
|
gFunc.MNNGemmFloatUnit_4(dstOrigin, src, weight, src_depth_quad, dst_step, dst_depth_quad, weight_depth_offset);
|
2020-04-10 23:42:16 +08:00
|
|
|
}
|
|
|
|
|
2019-09-01 19:25:26 +08:00
|
|
|
// ========= MNNGemmFloatCommon_4.cpp ===========
|
|
|
|
void MNNGemmFloatCommon_4(float* dst, const float* src, const float* weight, size_t src_depth_quad, size_t dst_step,
|
|
|
|
size_t dst_depth_quad, size_t width, size_t weight_depth_offset) {
|
2020-07-04 01:21:30 +08:00
|
|
|
gFunc.MNNGemmFloatCommon_4(dst, src, weight, src_depth_quad, dst_step, dst_depth_quad, width, weight_depth_offset);
|
2019-09-01 19:25:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// ========= MNNMatrixAdd.cpp ===========
|
|
|
|
void MNNMatrixAdd(float* C, const float* A, const float* B, size_t widthC4, size_t cStride, size_t aStride,
|
|
|
|
size_t bStride, size_t height) {
|
2020-07-04 01:21:30 +08:00
|
|
|
gFunc.MNNMatrixAdd(C, A, B, widthC4, cStride, aStride, bStride, height);
|
2019-09-01 19:25:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// ========= MNNMatrixSub.cpp ===========
|
|
|
|
void MNNMatrixSub(float* C, const float* A, const float* B, size_t widthC4, size_t cStride, size_t aStride,
|
|
|
|
size_t bStride, size_t height) {
|
2020-07-04 01:21:30 +08:00
|
|
|
gFunc.MNNMatrixSub(C, A, B, widthC4, cStride, aStride, bStride, height);
|
2019-09-01 19:25:26 +08:00
|
|
|
}
|
2020-02-26 09:57:17 +08:00
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
void MNNReluWithSlopeChannel(float* dst, const float* src, const float* slope, size_t sizeQuad, size_t depthQuad) {
|
|
|
|
return _SSE_MNNReluWithSlopeChannel(dst, src, slope, sizeQuad, depthQuad);
|
2020-02-26 09:57:17 +08:00
|
|
|
}
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
void MNNPackC4ForMatMul_A(float* dest, const float* source, size_t e, size_t l, size_t eReal) {
|
|
|
|
return gFunc.MNNPackC4ForMatMul_A(dest, source, e, l, eReal);
|
|
|
|
}
|
|
|
|
|
|
|
|
void MNNPackForMatMul_B(float* dest, const float* source, size_t h, size_t l, bool transpose) {
|
2020-12-15 14:12:35 +08:00
|
|
|
gFunc.MNNPackForMatMul_B(dest, source, h, l, transpose);
|
2020-05-15 19:08:36 +08:00
|
|
|
}
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
void MNNGetMatMulPackMode(int* eP, int* lP, int* hP) {
|
|
|
|
*eP = gFunc.eP;
|
|
|
|
*lP = gFunc.lP;
|
|
|
|
*hP = gFunc.hP;
|
2020-05-16 15:24:22 +08:00
|
|
|
}
|
2020-05-18 07:22:30 +08:00
|
|
|
|
2020-07-04 01:21:30 +08:00
|
|
|
int MNNGetConvolutionTileNumber() {
|
|
|
|
return gFunc.tileNumber;
|
|
|
|
}
|
2020-12-10 11:28:55 +08:00
|
|
|
void MNNFloat2Int8(const float* src, int8_t* dst, size_t sizeQuad, const float* scalep, ssize_t minValue,
|
2021-01-06 16:29:37 +08:00
|
|
|
ssize_t maxValue, ssize_t zeroPoint) {
|
|
|
|
return gFunc.MNNFloat2Int8(src, dst, sizeQuad, scalep, minValue, maxValue, zeroPoint);
|
2020-12-10 17:53:24 +08:00
|
|
|
}
|
2021-01-06 16:29:37 +08:00
|
|
|
void MNNInt8ScaleToFloat(float* dst, const int8_t* src, const float* scale, size_t size, ssize_t zeroPoint) {
|
|
|
|
return gFunc.MNNInt8ScaleToFloat(dst, src, scale, size, zeroPoint);
|
2020-12-10 11:28:55 +08:00
|
|
|
}
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
void MNNPackedMatMul(float* C, const float* A, const float* B, const size_t* parameter, float* cache,
|
|
|
|
const float* postParameters, const float* bias) {
|
2020-07-04 01:21:30 +08:00
|
|
|
return gFunc.MNNPackedMatMul(C, A, B, parameter, cache, postParameters, bias);
|
2020-05-17 23:42:39 +08:00
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
void MNNPackedMatMulRemain(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter,
|
|
|
|
float* cache, const float* postParameters, const float* bias) {
|
2020-07-04 01:21:30 +08:00
|
|
|
return gFunc.MNNPackedMatMulRemain(C, A, B, eSize, parameter, cache, postParameters, bias);
|
2020-05-17 23:42:39 +08:00
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
void MNNExpC8(float* dest, const float* source, const float* parameters, size_t countC8) {
|
|
|
|
gFunc.MNNExpC8(dest, source, parameters, countC8);
|
|
|
|
}
|
|
|
|
void MNNConvRunForLineDepthwise(float* dst, const float* src, const float* weight, size_t width, size_t src_w_setup,
|
|
|
|
size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step, size_t height,
|
|
|
|
size_t srcHStep, size_t dstHStep) {
|
|
|
|
return gFunc.MNNConvRunForLineDepthwise(dst, src, weight, width, src_w_setup, fw, fh, dilateX_step, dilateY_step, height, srcHStep, dstHStep);
|
|
|
|
}
|
|
|
|
void MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step,
|
2020-12-10 12:47:38 +08:00
|
|
|
size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst) {
|
|
|
|
return gFunc.MNNGemmInt8AddBiasScale_16x4_Unit(dst, src, weight, src_depth_quad, dst_step, dst_depth_quad, post, realDst);
|
2020-11-05 16:41:56 +08:00
|
|
|
}
|
2021-01-06 16:29:37 +08:00
|
|
|
|
|
|
|
void MNNLineDepthWiseInt8AddBiasScaleUnit(int8_t* dst, const int8_t* src, const int8_t* weight, const QuanPostTreatParameters* parameters, size_t width, size_t src_w_step, size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step) {
|
|
|
|
gFunc.MNNLineDepthWiseInt8AddBiasScaleUnit(dst, src, weight, parameters, width, src_w_step, fw, fh, dilateX_step, dilateY_step);
|
|
|
|
}
|
|
|
|
void MNNInt8ToInt16(int16_t* dest, const int8_t* source, size_t count) {
|
|
|
|
_SSE_MNNInt8ToInt16(dest, source, count);
|
|
|
|
}
|
|
|
|
|
|
|
|
void MNNComputeMatMulForE_1(const float* A, const float* B, float* C, const float* biasPtr, const MatMulParam* param, size_t tId) {
|
|
|
|
gFunc.MNNComputeMatMulForE_1(A, B, C, biasPtr, param, tId);
|
|
|
|
}
|