| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | //  AVX2Functions.cpp
 | 
					
						
							|  |  |  | //  MNN
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //  Created by MNN on b'2021/05/17'.
 | 
					
						
							|  |  |  | //  Copyright © 2018, Alibaba Group Holding Limited
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  | #include "AVX2Functions.hpp"
 | 
					
						
							|  |  |  | #include "AVX2Backend.hpp"
 | 
					
						
							|  |  |  | #include "avx/FunctionSummary.hpp"
 | 
					
						
							|  |  |  | #include "avxfma/FunctionSummary.hpp"
 | 
					
						
							|  |  |  | #include "avx512/FunctionSummary.hpp"
 | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  | #include "sse/FunctionSummary.hpp"
 | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  | namespace MNN { | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  | static int geP, glP, ghP; | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  | static CoreFunctions* gAVX2CoreFunctions = nullptr; | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  | static CoreInt8Functions* gAVX2CoreInt8Functions = nullptr; | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  | static void _MNNGetMatMulPackMode(int* eP, int *lP, int* hP) { | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     *eP = geP; | 
					
						
							|  |  |  |     *lP = glP; | 
					
						
							|  |  |  |     *hP = ghP; | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-11-18 14:37:45 +08:00
										 |  |  | #ifndef MNN_USE_AVX
 | 
					
						
							|  |  |  | bool AVX2Functions::init(int cpuFlags) { | 
					
						
							|  |  |  |     return false; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  | bool AVX2Functions::init(int cpuFlags) { | 
					
						
							|  |  |  |     gAVX2CoreFunctions = new CoreFunctions; | 
					
						
							|  |  |  |     auto coreFunction = gAVX2CoreFunctions; | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |     gAVX2CoreInt8Functions = new CoreInt8Functions; | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  |     // Init default functions
 | 
					
						
							|  |  |  |     *coreFunction = *MNNGetCoreFunctions(); | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |     *gAVX2CoreInt8Functions = *MNNGetInt8CoreFunctions(); | 
					
						
							|  |  |  |     _AVX_MNNInt8FunctionInit(gAVX2CoreInt8Functions); | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  |     // Init AVX2
 | 
					
						
							|  |  |  |     coreFunction->MNNGetMatMulPackMode = _MNNGetMatMulPackMode; | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |     geP = 24; | 
					
						
							|  |  |  |     glP = 1; | 
					
						
							|  |  |  |     ghP = 4; | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |     _AVX_ReorderInit(coreFunction); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  |     coreFunction->MNNPackedMatMul       = _AVX_MNNPackedMatMul; | 
					
						
							|  |  |  |     coreFunction->MNNPackedMatMulRemain = _AVX_MNNPackedMatMulRemain; | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  | #ifdef MNN_CPU_WEIGHT_DEQUANT_GEMM
 | 
					
						
							| 
									
										
										
										
											2023-06-27 10:33:16 +08:00
										 |  |  |     coreFunction->MNNPackedMatMul_int8       = _AVX_MNNPackedMatMul_int8; | 
					
						
							|  |  |  |     coreFunction->MNNPackedMatMulRemain_int8 = _AVX_MNNPackedMatMulRemain_int8; | 
					
						
							| 
									
										
										
										
											2024-09-12 12:57:57 +08:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef MNN_LOW_MEMORY
 | 
					
						
							| 
									
										
										
										
											2024-04-19 11:58:21 +08:00
										 |  |  |     coreFunction->MNNAbsMax = _AVX_MNNAbsMaxFP32; | 
					
						
							| 
									
										
										
										
											2025-02-12 11:14:19 +08:00
										 |  |  |     coreFunction->MNNDynamicQuant = _AVX_MNNDynamicQuant; | 
					
						
							| 
									
										
										
										
											2023-06-27 10:33:16 +08:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  |     coreFunction->MNNPackC4ForMatMul_A  = _AVX_MNNPackC4ForMatMul_A; | 
					
						
							|  |  |  |     coreFunction->MNNPackForMatMul_B    = _AVX_MNNPackForMatMul_B; | 
					
						
							|  |  |  |     coreFunction->MNNComputeMatMulForE_1 = _AVX_MNNComputeMatMulForE_1; | 
					
						
							|  |  |  |     coreFunction->MNNComputeMatMulForH_1 = _AVX_MNNComputeMatMulForH_1; | 
					
						
							| 
									
										
										
										
											2024-04-19 11:58:21 +08:00
										 |  |  |     // Dynamic Quant
 | 
					
						
							|  |  |  |     coreFunction->MNNCountMaxMinValue = _AVX_MNNComputeScaleZeroScalar; | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     // For Packed Functions
 | 
					
						
							|  |  |  |     coreFunction->pack = 8; | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  |     _AVX_ExtraInit(coreFunction); | 
					
						
							|  |  |  |     // Winograd
 | 
					
						
							|  |  |  |     _AVX_WinogradInit(coreFunction); | 
					
						
							|  |  |  |     if (cpuFlags & libyuv::kCpuHasFMA3) { | 
					
						
							|  |  |  |         coreFunction->MNNPackedMatMul       = _AVX_MNNPackedMatMulFMA; | 
					
						
							|  |  |  |         coreFunction->MNNPackedMatMulRemain = _AVX_MNNPackedMatMulRemainFMA; | 
					
						
							|  |  |  |         coreFunction->MNNComputeMatMulForE_1 = _AVX_MNNComputeMatMulForE_1FMA; | 
					
						
							|  |  |  |         coreFunction->MNNComputeMatMulForH_1 = _AVX_MNNComputeMatMulForH_1FMA; | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |         _AVX_ExtraInitFMA(coreFunction); | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | #ifdef MNN_AVX512
 | 
					
						
							|  |  |  |     if ((cpuFlags & libyuv::kCpuHasAVX512VNNI) | 
					
						
							|  |  |  |         || (cpuFlags & libyuv::kCpuHasAVX512VL) | 
					
						
							|  |  |  |         || (cpuFlags & libyuv::kCpuHasAVX512BW) | 
					
						
							|  |  |  |         || (cpuFlags & libyuv::kCpuHasAVX512VBMI) | 
					
						
							|  |  |  |         || (cpuFlags & libyuv::kCpuHasAVX512VBITALG) | 
					
						
							|  |  |  |         || (cpuFlags & libyuv::kCpuHasAVX512VPOPCNTDQ) | 
					
						
							|  |  |  |         || (cpuFlags & libyuv::kCpuHasAVX512VBMI2) | 
					
						
							|  |  |  |         ) { | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |         coreFunction->pack = 16; | 
					
						
							|  |  |  |         _AVX512_ReorderInit(coreFunction); | 
					
						
							|  |  |  |         _AVX512_ExtraInit(coreFunction); | 
					
						
							|  |  |  |         _AVX512_WinogradInit(coreFunction); | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  |         coreFunction->MNNPackForMatMul_B    = _AVX512_MNNPackForMatMul_B; | 
					
						
							|  |  |  |         coreFunction->MNNPackC4ForMatMul_A  = _AVX512_MNNPackC8ForMatMul_A; | 
					
						
							|  |  |  |         coreFunction->MNNPackedMatMul = _AVX512_MNNPackedMatMul; | 
					
						
							|  |  |  |         coreFunction->MNNPackedMatMulRemain = _AVX512_MNNPackedMatMulRemain; | 
					
						
							| 
									
										
										
										
											2021-11-30 10:10:53 +08:00
										 |  |  |         geP = 48; | 
					
						
							|  |  |  |         ghP = 8; | 
					
						
							|  |  |  |         glP = 1; | 
					
						
							|  |  |  |         _AVX512_MNNInt8FunctionInit(gAVX2CoreInt8Functions, cpuFlags & libyuv::kCpuHasAVX512VNNI); | 
					
						
							| 
									
										
										
										
											2022-05-06 19:51:20 +08:00
										 |  |  |         memcpy(coreFunction->MNNPackedMatMulOC16Functions, _AVX512_MNNPackedMatMulOC16Functions, | 
					
						
							|  |  |  |             sizeof(MNN::CoreFunctions::MNNPackedMatMulKernel) * AVX512_INPUT_TILE_MAX); | 
					
						
							|  |  |  |         memcpy(coreFunction->MNNPackedMatMulOC32Functions, _AVX512_MNNPackedMatMulOC32Functions, | 
					
						
							|  |  |  |             sizeof(MNN::CoreFunctions::MNNPackedMatMulKernel) * AVX512_INPUT_TILE_MAX); | 
					
						
							|  |  |  |         memcpy(coreFunction->MNNPackedMatMulOC48Functions, _AVX512_MNNPackedMatMulOC48Functions, | 
					
						
							|  |  |  |             sizeof(MNN::CoreFunctions::MNNPackedMatMulKernel) * AVX512_INPUT_TILE_MAX); | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     return true; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2024-11-18 14:37:45 +08:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  | CoreFunctions* AVX2Functions::get() { | 
					
						
							|  |  |  |     return gAVX2CoreFunctions; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  | CoreInt8Functions* AVX2Functions::getInt8() { | 
					
						
							|  |  |  |     return gAVX2CoreInt8Functions; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2021-06-11 17:17:13 +08:00
										 |  |  | }; |