From 380370fcf825c0342655024a6a02bbfc8a2e4a0a Mon Sep 17 00:00:00 2001 From: SixtyWang Date: Thu, 31 Jul 2025 16:56:51 +0800 Subject: [PATCH] Fix bug in KleidiAIDenseConvolution, KleidiAIConvolution and QI4_SYM_CHNLQT_F32 - Corrected outputWidth calculation in KleidiAIDenseConvolution - Fixed use-after-free due to late call to getPostParameters in KleidiAIConvolution - Resolved SME symmetry quantization kernel problem --- source/backend/cpu/arm/CMakeLists.txt | 2 +- source/backend/cpu/arm/mnn_kleidiai.h | 5 +++++ source/backend/cpu/compute/KleidiAIConvolution.cpp | 6 ++++-- source/backend/cpu/compute/KleidiAIConvolution.hpp | 2 +- source/backend/cpu/compute/KleidiAIDenseConvolution.hpp | 2 +- 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/source/backend/cpu/arm/CMakeLists.txt b/source/backend/cpu/arm/CMakeLists.txt index e505f995..675bc170 100644 --- a/source/backend/cpu/arm/CMakeLists.txt +++ b/source/backend/cpu/arm/CMakeLists.txt @@ -103,7 +103,7 @@ if (MNN_KLEIDIAI) ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_imatmul_pack_kxn_x16p2vlx2b_x16_x16_sme.c ) - set_source_files_properties(${MNN_SOURCES_KLEIDIAI} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+i8mm+dotprod+sve+sve2+fp16) + set_source_files_properties(${MNN_SOURCES_KLEIDIAI} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+i8mm+dotprod+sve+sve2+fp16") set_source_files_properties(${KLEIDIAI_FILES_SME2} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2") endif() diff --git a/source/backend/cpu/arm/mnn_kleidiai.h b/source/backend/cpu/arm/mnn_kleidiai.h index e48d2392..2f884886 100644 --- a/source/backend/cpu/arm/mnn_kleidiai.h +++ b/source/backend/cpu/arm/mnn_kleidiai.h @@ -211,6 +211,11 @@ namespace MNN { return false; } } + if(type == AccelType::QI4_SYM_CHNLQT_F32){ + if(common->inputCount() % 2 != 0) { + return false; + } + } if(common->kernelX() == 1 && common->kernelY() == 1 && common->padX() == 0 && common->padY() == 0 && common->strideX() == 1 && common->strideY() == 1 diff --git a/source/backend/cpu/compute/KleidiAIConvolution.cpp b/source/backend/cpu/compute/KleidiAIConvolution.cpp index e97b809a..e50e7afd 100644 --- a/source/backend/cpu/compute/KleidiAIConvolution.cpp +++ b/source/backend/cpu/compute/KleidiAIConvolution.cpp @@ -175,8 +175,11 @@ ErrorCode KleidiAIConvolution::onResize(const std::vector &inputs, con if (outputOriginFmt != MNN_DATA_FORMAT_NHWC){ b->onReleaseBuffer(mOutputConvertBuffer.get(), Backend::DYNAMIC); } + + mPostParameters = getPostParameters(); return NO_ERROR; } + ErrorCode KleidiAIConvolution::onExecute(const std::vector &inputs, const std::vector &outputs) { auto input = inputs[0]; auto output = outputs[0]; @@ -209,13 +212,12 @@ ErrorCode KleidiAIConvolution::onExecute(const std::vector &inputs, co } auto outputDes = TensorUtils::getDescribe(outputs[0]); - auto postPtr = getPostParameters(); auto outputPtr = output->host(); if(outputDes->dimensionFormat != MNN_DATA_FORMAT_NHWC){ outputPtr = mOutputConvertBuffer->host(); } - kai.runMatmul(mAccelType, m, n, k, 0, lhsPacked, weightPtr, outputPtr, n * elementSize, elementSize, postPtr[3], postPtr[2]); + kai.runMatmul(mAccelType, m, n, k, 0, lhsPacked, weightPtr, outputPtr, n * elementSize, elementSize, mPostParameters[3], mPostParameters[2]); if(outputDes->dimensionFormat != MNN_DATA_FORMAT_NHWC){ MNN_CONCURRENCY_BEGIN(tId, threadNum) { diff --git a/source/backend/cpu/compute/KleidiAIConvolution.hpp b/source/backend/cpu/compute/KleidiAIConvolution.hpp index 5f8f1964..ef119d8d 100644 --- a/source/backend/cpu/compute/KleidiAIConvolution.hpp +++ b/source/backend/cpu/compute/KleidiAIConvolution.hpp @@ -28,7 +28,7 @@ class KleidiAIConvolution : public CPUConvolution{ std::shared_ptr mOutputConvertBuffer; std::shared_ptr mResource; KleidiAI::AccelType mAccelType = KleidiAI::AccelType::ACC_TYPE_NUMBER; - + std::vector mPostParameters; }; #endif //MNN_KLEIDIAI_ENABLED diff --git a/source/backend/cpu/compute/KleidiAIDenseConvolution.hpp b/source/backend/cpu/compute/KleidiAIDenseConvolution.hpp index 22157d1e..bbc5acca 100644 --- a/source/backend/cpu/compute/KleidiAIDenseConvolution.hpp +++ b/source/backend/cpu/compute/KleidiAIDenseConvolution.hpp @@ -37,7 +37,7 @@ struct ConvParams { int dilatedKernelWidth = kernelSizeWithDilated(kernelWidth, dilatedWidth); int outputHeight = outputSize(inputHeight, padTop, padBottom, dilatedKernelHeight, strideHeight); - int outputWidth = outputSize(inputHeight, padLeft, padRight, dilatedKernelWidth, strideWidth); + int outputWidth = outputSize(inputWidth, padLeft, padRight, dilatedKernelWidth, strideWidth); return {outputHeight, outputWidth}; }