2021-09-18 15:52:30 +08:00
|
|
|
//
|
|
|
|
// AVX2Functions.cpp
|
|
|
|
// MNN
|
|
|
|
//
|
|
|
|
// Created by MNN on b'2021/05/17'.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
2021-06-11 17:17:13 +08:00
|
|
|
#include "AVX2Functions.hpp"
|
|
|
|
#include "AVX2Backend.hpp"
|
|
|
|
#include "avx/FunctionSummary.hpp"
|
|
|
|
#include "avxfma/FunctionSummary.hpp"
|
|
|
|
#include "avx512/FunctionSummary.hpp"
|
2021-11-30 10:10:53 +08:00
|
|
|
#include "sse/FunctionSummary.hpp"
|
2021-06-11 17:17:13 +08:00
|
|
|
namespace MNN {
|
2021-11-30 10:10:53 +08:00
|
|
|
static int geP, glP, ghP;
|
2021-06-11 17:17:13 +08:00
|
|
|
static CoreFunctions* gAVX2CoreFunctions = nullptr;
|
2021-09-18 15:52:30 +08:00
|
|
|
static CoreInt8Functions* gAVX2CoreInt8Functions = nullptr;
|
2021-06-11 17:17:13 +08:00
|
|
|
static void _MNNGetMatMulPackMode(int* eP, int *lP, int* hP) {
|
2021-11-30 10:10:53 +08:00
|
|
|
*eP = geP;
|
|
|
|
*lP = glP;
|
|
|
|
*hP = ghP;
|
2021-06-11 17:17:13 +08:00
|
|
|
}
|
|
|
|
|
2024-11-18 14:37:45 +08:00
|
|
|
#ifndef MNN_USE_AVX
|
|
|
|
bool AVX2Functions::init(int cpuFlags) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
|
2021-06-11 17:17:13 +08:00
|
|
|
bool AVX2Functions::init(int cpuFlags) {
|
|
|
|
gAVX2CoreFunctions = new CoreFunctions;
|
|
|
|
auto coreFunction = gAVX2CoreFunctions;
|
2021-09-18 15:52:30 +08:00
|
|
|
gAVX2CoreInt8Functions = new CoreInt8Functions;
|
2021-06-11 17:17:13 +08:00
|
|
|
// Init default functions
|
|
|
|
*coreFunction = *MNNGetCoreFunctions();
|
2021-09-18 15:52:30 +08:00
|
|
|
*gAVX2CoreInt8Functions = *MNNGetInt8CoreFunctions();
|
|
|
|
_AVX_MNNInt8FunctionInit(gAVX2CoreInt8Functions);
|
2021-06-11 17:17:13 +08:00
|
|
|
// Init AVX2
|
|
|
|
coreFunction->MNNGetMatMulPackMode = _MNNGetMatMulPackMode;
|
2021-11-30 10:10:53 +08:00
|
|
|
geP = 24;
|
|
|
|
glP = 1;
|
|
|
|
ghP = 4;
|
2021-09-18 15:52:30 +08:00
|
|
|
_AVX_ReorderInit(coreFunction);
|
|
|
|
|
2021-06-11 17:17:13 +08:00
|
|
|
coreFunction->MNNPackedMatMul = _AVX_MNNPackedMatMul;
|
|
|
|
coreFunction->MNNPackedMatMulRemain = _AVX_MNNPackedMatMulRemain;
|
2024-09-12 12:57:57 +08:00
|
|
|
#ifdef MNN_CPU_WEIGHT_DEQUANT_GEMM
|
2023-06-27 10:33:16 +08:00
|
|
|
coreFunction->MNNPackedMatMul_int8 = _AVX_MNNPackedMatMul_int8;
|
|
|
|
coreFunction->MNNPackedMatMulRemain_int8 = _AVX_MNNPackedMatMulRemain_int8;
|
2024-09-12 12:57:57 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef MNN_LOW_MEMORY
|
2024-04-19 11:58:21 +08:00
|
|
|
coreFunction->MNNAbsMax = _AVX_MNNAbsMaxFP32;
|
2025-02-12 11:14:19 +08:00
|
|
|
coreFunction->MNNDynamicQuant = _AVX_MNNDynamicQuant;
|
2023-06-27 10:33:16 +08:00
|
|
|
#endif
|
2021-06-11 17:17:13 +08:00
|
|
|
coreFunction->MNNPackC4ForMatMul_A = _AVX_MNNPackC4ForMatMul_A;
|
|
|
|
coreFunction->MNNPackForMatMul_B = _AVX_MNNPackForMatMul_B;
|
|
|
|
coreFunction->MNNComputeMatMulForE_1 = _AVX_MNNComputeMatMulForE_1;
|
|
|
|
coreFunction->MNNComputeMatMulForH_1 = _AVX_MNNComputeMatMulForH_1;
|
2024-04-19 11:58:21 +08:00
|
|
|
// Dynamic Quant
|
|
|
|
coreFunction->MNNCountMaxMinValue = _AVX_MNNComputeScaleZeroScalar;
|
2021-09-18 15:52:30 +08:00
|
|
|
|
|
|
|
// For Packed Functions
|
|
|
|
coreFunction->pack = 8;
|
2021-06-11 17:17:13 +08:00
|
|
|
_AVX_ExtraInit(coreFunction);
|
|
|
|
// Winograd
|
|
|
|
_AVX_WinogradInit(coreFunction);
|
|
|
|
if (cpuFlags & libyuv::kCpuHasFMA3) {
|
|
|
|
coreFunction->MNNPackedMatMul = _AVX_MNNPackedMatMulFMA;
|
|
|
|
coreFunction->MNNPackedMatMulRemain = _AVX_MNNPackedMatMulRemainFMA;
|
|
|
|
coreFunction->MNNComputeMatMulForE_1 = _AVX_MNNComputeMatMulForE_1FMA;
|
|
|
|
coreFunction->MNNComputeMatMulForH_1 = _AVX_MNNComputeMatMulForH_1FMA;
|
2021-09-18 15:52:30 +08:00
|
|
|
_AVX_ExtraInitFMA(coreFunction);
|
2021-06-11 17:17:13 +08:00
|
|
|
}
|
|
|
|
#ifdef MNN_AVX512
|
|
|
|
if ((cpuFlags & libyuv::kCpuHasAVX512VNNI)
|
|
|
|
|| (cpuFlags & libyuv::kCpuHasAVX512VL)
|
|
|
|
|| (cpuFlags & libyuv::kCpuHasAVX512BW)
|
|
|
|
|| (cpuFlags & libyuv::kCpuHasAVX512VBMI)
|
|
|
|
|| (cpuFlags & libyuv::kCpuHasAVX512VBITALG)
|
|
|
|
|| (cpuFlags & libyuv::kCpuHasAVX512VPOPCNTDQ)
|
|
|
|
|| (cpuFlags & libyuv::kCpuHasAVX512VBMI2)
|
|
|
|
) {
|
2021-09-18 15:52:30 +08:00
|
|
|
coreFunction->pack = 16;
|
|
|
|
_AVX512_ReorderInit(coreFunction);
|
|
|
|
_AVX512_ExtraInit(coreFunction);
|
|
|
|
_AVX512_WinogradInit(coreFunction);
|
2021-06-11 17:17:13 +08:00
|
|
|
coreFunction->MNNPackForMatMul_B = _AVX512_MNNPackForMatMul_B;
|
|
|
|
coreFunction->MNNPackC4ForMatMul_A = _AVX512_MNNPackC8ForMatMul_A;
|
|
|
|
coreFunction->MNNPackedMatMul = _AVX512_MNNPackedMatMul;
|
|
|
|
coreFunction->MNNPackedMatMulRemain = _AVX512_MNNPackedMatMulRemain;
|
2021-11-30 10:10:53 +08:00
|
|
|
geP = 48;
|
|
|
|
ghP = 8;
|
|
|
|
glP = 1;
|
|
|
|
_AVX512_MNNInt8FunctionInit(gAVX2CoreInt8Functions, cpuFlags & libyuv::kCpuHasAVX512VNNI);
|
2022-05-06 19:51:20 +08:00
|
|
|
memcpy(coreFunction->MNNPackedMatMulOC16Functions, _AVX512_MNNPackedMatMulOC16Functions,
|
|
|
|
sizeof(MNN::CoreFunctions::MNNPackedMatMulKernel) * AVX512_INPUT_TILE_MAX);
|
|
|
|
memcpy(coreFunction->MNNPackedMatMulOC32Functions, _AVX512_MNNPackedMatMulOC32Functions,
|
|
|
|
sizeof(MNN::CoreFunctions::MNNPackedMatMulKernel) * AVX512_INPUT_TILE_MAX);
|
|
|
|
memcpy(coreFunction->MNNPackedMatMulOC48Functions, _AVX512_MNNPackedMatMulOC48Functions,
|
|
|
|
sizeof(MNN::CoreFunctions::MNNPackedMatMulKernel) * AVX512_INPUT_TILE_MAX);
|
2021-06-11 17:17:13 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return true;
|
|
|
|
}
|
2024-11-18 14:37:45 +08:00
|
|
|
#endif
|
|
|
|
|
2021-06-11 17:17:13 +08:00
|
|
|
CoreFunctions* AVX2Functions::get() {
|
|
|
|
return gAVX2CoreFunctions;
|
|
|
|
}
|
2021-09-18 15:52:30 +08:00
|
|
|
CoreInt8Functions* AVX2Functions::getInt8() {
|
|
|
|
return gAVX2CoreInt8Functions;
|
|
|
|
}
|
2021-06-11 17:17:13 +08:00
|
|
|
};
|