Merge 7b9854b4af into 6dfe547796

upda README
Merge pull request #3710 from kira-offgrid/fix-cve-2021-41495-pymnn-test-playground-requirements.txt-47b6
2025-07-18 04:04:22 +00:00 · 2025-07-18 12:04:11 +08:00 · 2025-07-17 16:58:55 +08:00 · 2025-07-17 15:54:58 +08:00 · 2025-07-17 15:54:18 +08:00 · 2025-07-16 18:29:42 +08:00
7 changed files with 87 additions and 29 deletions
--- a/.trivyignore
+++ b/.trivyignore
@ -0,0 +1,15 @@
+**/*.md
+**/*.min.js
+**/*.min.css
+**/*.svg
+**/*.png
+**/*.jpg
+**/*.jpeg
+**/*.gif
+**/*.woff
+**/*.woff2
+**/*.map
+**/*.webp
+**/*.ico
+**/*.ttf
+**/*.eot
--- a/apps/Android/MnnLlmChat/README.md
+++ b/apps/Android/MnnLlmChat/README.md
@ -36,7 +36,7 @@ This is our full multimodal language model (LLM) Android app

 # Development
 ## Windows   
-基于`AndroidStuido` IDE进行构建:
+Using `Android Studio` IDE for building:
 + Clone the repository：
  ```shell
  git clone https://github.com/alibaba/MNN.git
--- a/apps/Android/MnnLlmChat/README_CN.md
+++ b/apps/Android/MnnLlmChat/README_CN.md
@ -44,22 +44,22 @@
 + `AndroidStuido`左上角File->Open,选择该工程，点击`Build`,选择`Make Project`或者`Build App Bundle(s)/APK(s)`,即可生成APK  
 ## Linux  
 + 克隆代码库：
- ```shell
- git clone https://github.com/alibaba/MNN.git
- ```
+  ```shell
+  git clone https://github.com/alibaba/MNN.git
+  ```
 + 配置Android SDK与NDK  
- ```  
- #here we use sdkmanager to install SDK/NDK tools
- sudo sdkmanager "platforms;android-35"  
- sudo sdkmanager "build-tools;33.0.1"  
- ```    
+  ```shell    
+  #here we use sdkmanager to install SDK/NDK tools
+  sudo sdkmanager "platforms;android-35"  
+  sudo sdkmanager "build-tools;33.0.1"  
+  ```    
 + 编译构建
- ```  
- #仅编译debug版本
- ./gradlew assembleDebug  
- #编译且安装,需要确保安卓设备已连接
-./gradlew installDebug  
- ```   
+  ```shell    
+  #仅编译debug版本
+  ./gradlew assembleDebug  
+  #编译且安装,需要确保安卓设备已连接
+  ./gradlew installDebug  
+  ```   

 # Releases

--- a/benchmark/scripts/tvm/ios_bert.py
+++ b/benchmark/scripts/tvm/ios_bert.py
@ -1,3 +1,4 @@
+import ast
 import tvm
 from tvm import relay, autotvm
 from tvm import rpc, relay
@ -60,7 +61,7 @@ def prepare_input():
    img_path = download_testdata(img_url, "cat.png", module="data")
    synset_path = download_testdata(synset_url, synset_name, module="data")
    with open(synset_path) as f:
-        synset = eval(f.read())
+        synset = ast.literal_eval(f.read())
        image = Image.open(img_path).resize((224, 224))

    image = np.array(image) - np.array([123.0, 117.0, 104.0])
--- a/pymnn/test/playground/requirements.txt
+++ b/pymnn/test/playground/requirements.txt
@ -1,6 +1,6 @@
 jedi==0.17.2
 watchdog==0.10.6
-numpy==1.16.4
+numpy>=1.22.0
 flatbuffers==1.12
 validators==0.14.2
 opencv-python==4.8.1.78
--- a/source/backend/cpu/compute/ConvolutionFloatFactory.cpp
+++ b/source/backend/cpu/compute/ConvolutionFloatFactory.cpp
@ -97,12 +97,12 @@ static Execution* _createUnit(const Tensor* input, const Tensor* output, Backend
 #else
    if (cpuBackend->memoryMode() == BackendConfig::Memory_Low) {
 #ifdef MNN_KLEIDIAI_ENABLED
-	if (MNNGetCPUInfo()->sme2 && !weigthQauntInfo && cpuBackend->functions()->bytes == 4) {
+	if (MNNGetCPUInfo()->sme2 && !weightQuantInfo) {
 	    return new KleidiAIDenseConvolution(common, backend, originWeight, originWeightSize, bias, biasSize, weightQuantInfo);
 	}
-#else
-        return new DenseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, bias, biasSize, weightQuantInfo);
 #endif
+
+        return new DenseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, bias, biasSize, weightQuantInfo);
    }
 #endif

@ -122,7 +122,7 @@ static Execution* _createUnit(const Tensor* input, const Tensor* output, Backend
 #endif

 #ifdef MNN_KLEIDIAI_ENABLED
-    if (MNNGetCPUInfo()->sme2 && !weightQuantInfo && cpuBackend->functions()->bytes == 4) {
+    if (MNNGetCPUInfo()->sme2 && !weightQuantInfo) {
 	return new KleidiAIDenseConvolution(common, backend, originWeight, originWeightSize, bias, biasSize, weightQuantInfo);
    }
 #endif
--- a/source/backend/cpu/compute/KleidiAIDenseConvolution.cpp
+++ b/source/backend/cpu/compute/KleidiAIDenseConvolution.cpp
@ -9,8 +9,11 @@
 #include "backend/cpu/CPUTensorConvert.hpp"
 #include "core/Macro.h"
 #include "core/TensorUtils.hpp"
+#include "kai_imatmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa.h"
 #include "kai_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme2_mopa.h"
+#include "kai_lhs_imatmul_pack_x16p2vlx2_x16p_sme.h"
 #include "kai_lhs_imatmul_pack_x32p2vlx1_x32p_sme.h"
+#include "kai_rhs_imatmul_pack_kxn_x16p2vlx2b_x16_x16_sme.h"
 #include "kai_rhs_imatmul_pack_kxn_x32p2vlx1b_x32_x32_sme.h"

 namespace MNN {
@ -26,8 +29,11 @@ static void initWeight(const T* weight, const T* bias, T* cache, T* output, cons
    if (bytes == 4) {
        kai_run_rhs_imatmul_pack_kxn_x32p2vlx1b_x32_x32_sme(outputCount, kh * kw, srcCount, outputCount * sizeof(T),
                                                            cache, bias, output);
+    } else if (bytes == 2) {
+        kai_run_rhs_imatmul_pack_kxn_x16p2vlx2b_x16_x16_sme(outputCount, kh * kw, srcCount, outputCount * sizeof(T),
+                                                            cache, bias, output);
    } else {
-        MNN_ERROR("Not fp32, should not be called here\n");
+        MNN_ERROR("Not fp32 and fp16, should not be called here\n");
        abort();
    }
 }
@ -49,8 +55,11 @@ KleidiAIDenseConvolution::KleidiAIDenseConvolution(const Convolution2DCommon* co
    if (core->bytes == 4) {
        kai_rhs_packed_size = kai_get_rhs_packed_size_rhs_imatmul_pack_kxn_x32p2vlx1b_x32_x32_sme(
            outputCount, common->kernelY() * common->kernelX(), srcCount);
+    } else if (core->bytes == 2) {
+        kai_rhs_packed_size = kai_get_rhs_packed_size_rhs_imatmul_pack_kxn_x16p2vlx2b_x16_x16_sme(
+            outputCount, common->kernelY() * common->kernelX(), srcCount);
    } else {
-        MNN_ERROR("Not fp32, should not be called here\n");
+        MNN_ERROR("Not fp32 and fp16, should not be called here\n");
        abort();
    }
    mResource->mWeight.reset(Tensor::createDevice<uint8_t>({kai_rhs_packed_size}));
@ -76,8 +85,17 @@ KleidiAIDenseConvolution::KleidiAIDenseConvolution(const Convolution2DCommon* co
    if (core->bytes == 4) {
        MNN::initWeight(originWeight, bias, cache->host<float>(), mResource->mWeight->host<float>(), oihwShape,
                        core->bytes);
+    } else if (core->bytes == 2) {
+        for (int i = 0; i < outputCount; i++) {
+            mResource->mBias->host<__fp16>()[i] = (__fp16)(bias[i]);
+        }
+        ConvertOIHWToHWIO(cache->host<__fp16>(), originWeight,
+                          {outputCount, srcCount, common->kernelY(), common->kernelX()});
+        kai_run_rhs_imatmul_pack_kxn_x16p2vlx2b_x16_x16_sme(
+            outputCount, common->kernelY() * common->kernelX(), srcCount, outputCount * sizeof(__fp16),
+            cache->host<__fp16>(), mResource->mBias->host<__fp16>(), mResource->mWeight->host<__fp16>());
    } else {
-        MNN_ERROR("Not fp32, should not be called here\n");
+        MNN_ERROR("Not fp32 and fp16, should not be called here\n");
        abort();
    }

@ -135,8 +153,11 @@ ErrorCode KleidiAIDenseConvolutionMultiInput::onExecute(const std::vector<Tensor
    if (function->bytes == 4) {
        initWeight(source, mInputs[2]->host<float>(), cache, mTempWeight->host<float>(), inputs[1]->shape(),
                   function->bytes);
+    } else if (function->bytes == 2) {
+        initWeight(reinterpret_cast<const __fp16*>(source), mInputs[2]->host<__fp16>(),
+                   reinterpret_cast<__fp16*>(cache), mTempWeight->host<__fp16>(), inputs[1]->shape(), function->bytes);
    } else {
-        MNN_ERROR("Not fp32, should not be called here\n");
+        MNN_ERROR("Not fp32 and fp16, should not be called here\n");
        abort();
    }
    return mProxy->onExecute(mInputs, outputs);
@ -150,8 +171,12 @@ ErrorCode KleidiAIDenseConvolutionMultiInput::onResize(const std::vector<Tensor*
        int kai_rhs_packed_size = kai_get_rhs_packed_size_rhs_imatmul_pack_kxn_x32p2vlx1b_x32_x32_sme(
            outputCount, inputs[1]->stride(1), depth);
        mTempWeight.reset(Tensor::createDevice<uint8_t>({kai_rhs_packed_size}));
+    } else if (function->bytes == 2) {
+        int kai_rhs_packed_size = kai_get_rhs_packed_size_rhs_imatmul_pack_kxn_x16p2vlx2b_x16_x16_sme(
+            outputCount, inputs[1]->stride(1), depth);
+        mTempWeight.reset(Tensor::createDevice<uint8_t>({kai_rhs_packed_size}));
    } else {
-        MNN_ERROR("Not fp32, should not be called here\n");
+        MNN_ERROR("Not fp32 and fp16, should not be called here\n");
        abort();
    }
    mTempWeightCache.reset(Tensor::createDevice<float>(
@ -206,8 +231,11 @@ ErrorCode KleidiAIDenseConvolutionImpl::onResize(const std::vector<Tensor*>& inp
    if (core->bytes == 4) {
        mTempBufferTranspose.buffer().dim[0].extent =
            kai_get_lhs_packed_size_lhs_imatmul_pack_x32p2vlx1_x32p_sme(outputNhwSize, kernelSize, ic);
+    } else if (core->bytes == 2) {
+        mTempBufferTranspose.buffer().dim[0].extent =
+            kai_get_lhs_packed_size_lhs_imatmul_pack_x16p2vlx2_x16p_sme(outputNhwSize, kernelSize, ic);
    } else {
-        MNN_ERROR("Not fp32, should not be called here\n");
+        MNN_ERROR("Not fp32 and fp16, should not be called here\n");
        abort();
    }
    TensorUtils::setLinearLayout(&mTempBufferTranspose);
@ -289,8 +317,16 @@ ErrorCode KleidiAIDenseConvolutionImpl::onResize(const std::vector<Tensor*>& inp
            kai_run_lhs_imatmul_pack_x32p2vlx1_x32p_sme(outputNhwSize, kernelSize, ic, table.data.data(), 0,
                                                        mPadBuffer.host<uint8_t>(),
                                                        mTempBufferTranspose.host<uint8_t>());
+        } else if (bytes == 2) {
+            int blockSize = kai_get_m_step_lhs_imatmul_pack_x16p2vlx2_x16p_sme();
+            ::memset(mPadBuffer.host<__fp16>(), 0, params.inputChannel * sizeof(__fp16));
+            auto table = IndirectionTable<__fp16>(mInputNHWC.shape(), params, mInputNHWC.host<__fp16>(),
+                                                  mPadBuffer.host<__fp16>(), blockSize);
+            kai_run_lhs_imatmul_pack_x16p2vlx2_x16p_sme(outputNhwSize, kernelSize, ic, table.data.data(), 0,
+                                                        mPadBuffer.host<uint8_t>(),
+                                                        mTempBufferTranspose.host<uint8_t>());
        } else {
-            MNN_ERROR("Not fp32, should not be called here\n");
+            MNN_ERROR("Not fp32 and fp16, should not be called here\n");
            abort();
        }

@ -300,8 +336,14 @@ ErrorCode KleidiAIDenseConvolutionImpl::onResize(const std::vector<Tensor*>& inp
                outputNhwSize, outputChannel, kernelSize, ic, mTempBufferTranspose.host<uint8_t>(),
                weight->host<uint8_t>(), mOutputNHWC.host<uint8_t>(), outputChannel * sizeof(float), postParameters[2],
                postParameters[3]);
+        } else if (bytes == 2) {
+            float max = postParameters[3] > 65504.f ? 65504.f : postParameters[3];
+            kai_run_imatmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa(
+                outputNhwSize, outputChannel, kernelSize, ic, mTempBufferTranspose.host<uint8_t>(),
+                weight->host<uint8_t>(), mOutputNHWC.host<uint8_t>(), outputChannel * sizeof(__fp16), postParameters[2],
+                max);
        } else {
-            MNN_ERROR("Not fp32, should not be called here\n");
+            MNN_ERROR("Not fp32 and fp16, should not be called here\n");
            abort();
        }
Author	SHA1	Message	Date
Yu FranzKafka	b1a5eec9c2	Merge `7b9854b4af` into `6dfe547796`	2025-07-18 04:04:22 +00:00
franzkafkayu	7b9854b4af	upda README	2025-07-18 12:04:11 +08:00
jxt1234	6dfe547796	Merge pull request #3710 from kira-offgrid/fix-cve-2021-41495-pymnn-test-playground-requirements.txt-47b6 pymnn-linux / pymnn_linux_buil_test (push) Has been cancelled Details pymnn-macos / pymnn_macos_buil_test (push) Has been cancelled Details pymnn-windows / pymnn_windows_buil_test (push) Has been cancelled Details Fix: Data Sorting Function Could Crash Application Due to Missing Checks in pymnn/test/playground/requirements.txt	2025-07-17 16:58:55 +08:00
jxt1234	78bf2e636e	Merge pull request #3711 from kira-offgrid/fix-python.lang.security.audit.eval-detected.eval-detected-benchmark-scripts-tvm-ios_bert.py-9b9e Fix: Unsafe Code Execution Function Could Allow External Attacks in benchmark/scripts/tvm/ios_bert.py	2025-07-17 15:54:58 +08:00
jxt1234	7cd2c7cbad	Merge pull request #3724 from FranzKafkaYu/develop compile issue fix:add gradle.properties to enable AndroidX feature	2025-07-17 15:54:18 +08:00
王召德	9a085992ea	Merge pull request #3725 from yanzhang-dev/features/imatmul-fp16 android / android_build (push) Has been cancelled Details ios / ios_build (push) Has been cancelled Details linux / linux_buil_test (push) Has been cancelled Details macos / macos_buil_test (push) Has been cancelled Details windows / windows_build_test (push) Has been cancelled Details Add imatmul fp16 support for DenseConv	2025-07-16 18:29:42 +08:00
yanzhang	8e7a63d622	Add imatmul fp16 support for DenseConv Change-Id: Ifb6972146a9c7013328fc75e30ab27c6e3d92d6a Signed-off-by: yanzhang <yanzhang.wang@arm.com>	2025-07-16 18:24:21 +08:00
kira-offgrid	3dc5d452a5	fix: python.lang.security.audit.eval-detected.eval-detected-benchmark-scripts-tvm-ios_bert.py	2025-07-10 04:05:45 +00:00
kira-offgrid	00c796ec29	fix: CVE-2021-41495-pymnn-test-playground-requirements.txt	2025-07-10 02:35:54 +00:00