mirror of https://github.com/alibaba/MNN.git
Merge pull request #3770 from SixtyWang/kai/fix-bugs
Fix bug in KleidiAI Code
This commit is contained in:
commit
7a92132554
|
@ -103,7 +103,7 @@ if (MNN_KLEIDIAI)
|
|||
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_imatmul_pack_kxn_x16p2vlx2b_x16_x16_sme.c
|
||||
)
|
||||
|
||||
set_source_files_properties(${MNN_SOURCES_KLEIDIAI} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+i8mm+dotprod+sve+sve2+fp16)
|
||||
set_source_files_properties(${MNN_SOURCES_KLEIDIAI} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+i8mm+dotprod+sve+sve2+fp16")
|
||||
set_source_files_properties(${KLEIDIAI_FILES_SME2} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2")
|
||||
|
||||
endif()
|
||||
|
|
|
@ -211,6 +211,11 @@ namespace MNN {
|
|||
return false;
|
||||
}
|
||||
}
|
||||
if(type == AccelType::QI4_SYM_CHNLQT_F32){
|
||||
if(common->inputCount() % 2 != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if(common->kernelX() == 1 && common->kernelY() == 1
|
||||
&& common->padX() == 0 && common->padY() == 0
|
||||
&& common->strideX() == 1 && common->strideY() == 1
|
||||
|
|
|
@ -175,8 +175,11 @@ ErrorCode KleidiAIConvolution::onResize(const std::vector<Tensor *> &inputs, con
|
|||
if (outputOriginFmt != MNN_DATA_FORMAT_NHWC){
|
||||
b->onReleaseBuffer(mOutputConvertBuffer.get(), Backend::DYNAMIC);
|
||||
}
|
||||
|
||||
mPostParameters = getPostParameters();
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
||||
ErrorCode KleidiAIConvolution::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
||||
auto input = inputs[0];
|
||||
auto output = outputs[0];
|
||||
|
@ -209,13 +212,12 @@ ErrorCode KleidiAIConvolution::onExecute(const std::vector<Tensor *> &inputs, co
|
|||
}
|
||||
|
||||
auto outputDes = TensorUtils::getDescribe(outputs[0]);
|
||||
auto postPtr = getPostParameters();
|
||||
auto outputPtr = output->host<uint8_t>();
|
||||
if(outputDes->dimensionFormat != MNN_DATA_FORMAT_NHWC){
|
||||
outputPtr = mOutputConvertBuffer->host<uint8_t>();
|
||||
}
|
||||
|
||||
kai.runMatmul(mAccelType, m, n, k, 0, lhsPacked, weightPtr, outputPtr, n * elementSize, elementSize, postPtr[3], postPtr[2]);
|
||||
kai.runMatmul(mAccelType, m, n, k, 0, lhsPacked, weightPtr, outputPtr, n * elementSize, elementSize, mPostParameters[3], mPostParameters[2]);
|
||||
|
||||
if(outputDes->dimensionFormat != MNN_DATA_FORMAT_NHWC){
|
||||
MNN_CONCURRENCY_BEGIN(tId, threadNum) {
|
||||
|
|
|
@ -28,7 +28,7 @@ class KleidiAIConvolution : public CPUConvolution{
|
|||
std::shared_ptr<Tensor> mOutputConvertBuffer;
|
||||
std::shared_ptr<CPUConvolution::Resource> mResource;
|
||||
KleidiAI::AccelType mAccelType = KleidiAI::AccelType::ACC_TYPE_NUMBER;
|
||||
|
||||
std::vector<float> mPostParameters;
|
||||
};
|
||||
#endif //MNN_KLEIDIAI_ENABLED
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ struct ConvParams {
|
|||
int dilatedKernelWidth = kernelSizeWithDilated(kernelWidth, dilatedWidth);
|
||||
|
||||
int outputHeight = outputSize(inputHeight, padTop, padBottom, dilatedKernelHeight, strideHeight);
|
||||
int outputWidth = outputSize(inputHeight, padLeft, padRight, dilatedKernelWidth, strideWidth);
|
||||
int outputWidth = outputSize(inputWidth, padLeft, padRight, dilatedKernelWidth, strideWidth);
|
||||
|
||||
return {outputHeight, outputWidth};
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue