refactor cpuids setting from BackendConfig to HintMode

2025-07-18 09:42:59 +00:00 · 2025-07-18 09:42:59 +00:00 · c018eacc00
parent 8907c96b4e
commit c018eacc00
14 changed files with 96 additions and 70 deletions
--- a/docs/inference/session.md
+++ b/docs/inference/session.md
@ -185,13 +185,10 @@ struct BackendConfig {
        void* sharedContext = nullptr;
        size_t flags; // Valid for CPU Backend
    };
-
-    /** user specified cpu cores */
-    std::vector<int> cpuIds;
 };
 ```

-`memory`、`power`、`precision`分别为内存、功耗和精度偏好。支持这些选项的后端会在执行时做出相应调整；若不支持，则忽略选项。`cpuIds`允许用户指定一组CPU核心用于计算，但这些指定会被严格的校验合法性，当线程数量与合法的指定核心数量相同时得到最佳性能。
+`memory`、`power`、`precision`分别为内存、功耗和精度偏好。支持这些选项的后端会在执行时做出相应调整；若不支持，则忽略选项。

 示例：
 后端 **OpenCL**
--- a/express/Executor.cpp
+++ b/express/Executor.cpp
@ -231,6 +231,14 @@ void Executor::RuntimeManager::setHint(Interpreter::HintMode mode, int value) {
        iter.second->setRuntimeHint(mInside->mContent->modes.runtimeHint);
    }
 }
+void Executor::RuntimeManager::setHint(Interpreter::HintMode mode, int* value, size_t size) {
+    mInside->mContent->modes.setHint(mode, value, size);
+    auto current = ExecutorScope::Current();
+    auto rt = current->getRuntime();
+    for (auto& iter : rt.first) {
+        iter.second->setRuntimeHint(mInside->mContent->modes.runtimeHint);
+    }
+}
 void Executor::RuntimeManager::setExternalPath(std::string path, int type) {
    mInside->mContent->modes.setExternalPath(path, type);
 }
--- a/include/MNN/Interpreter.hpp
+++ b/include/MNN/Interpreter.hpp
@ -245,7 +245,10 @@ public:
        USE_CACHED_MMAP = 12,
        
        // Multi-Thread Load module, default is 0 (don't use other Thread)
-        INIT_THREAD_NUMBER = 13
+        INIT_THREAD_NUMBER = 13,
+
+        // CPU core ids
+        CPU_CORE_IDS = 14,
    };

    enum ExternalPathType {
@ -280,10 +283,12 @@ public:

    /**
     * @brief The API shoud be called before create session.
-     * @param mode      Hint type
+     * @param hint      Hint type
     * @param value     Hint value
+     * @param size      Hint value size(when use a ptr)
     */
-    void setSessionHint(HintMode mode, int value);
+    void setSessionHint(HintMode hint, int value);
+    void setSessionHint(HintMode hint, int* value, size_t size);
 public:
    /**
     * @brief create runtimeInfo separately with schedule config.
--- a/include/MNN/MNNForwardType.h
+++ b/include/MNN/MNNForwardType.h
@ -10,7 +10,6 @@
 #define MNNForwardType_h
 #include <stdint.h>
 #include <stddef.h>
-#include <vector>

 typedef enum {
    MNN_FORWARD_CPU = 0,
@ -93,9 +92,6 @@ struct BackendConfig {
        void* sharedContext = nullptr;
        size_t flags; // Valid for CPU Backend
    };
-
-    /** user specified cpu cores */
-    std::vector<int> cpuIds;
 };

    /** acquire runtime status by Runtime::getCurrentStatus with following keys,
--- a/include/MNN/expr/Executor.hpp
+++ b/include/MNN/expr/Executor.hpp
@ -126,6 +126,7 @@ public:
        friend class Executor;
        void setMode(Interpreter::SessionMode mode);
        void setHint(Interpreter::HintMode mode, int value);
+        void setHint(Interpreter::HintMode mode, int* value, size_t size);
        void setHintPtr(Interpreter::HintMode mode, void* value);
        bool getInfo(Interpreter::SessionInfoCode code, void* ptr);
        BackendConfig* getBnConfig();
--- a/source/backend/cpu/CPUBackend.cpp
+++ b/source/backend/cpu/CPUBackend.cpp
@ -113,7 +113,7 @@ void CPURuntime::_bindCPUCore() const {
 #endif
 }

-void CPURuntime::_resetThreadPool() {
+void CPURuntime::_resetThreadPool() const{
    mThreadNumber = std::max(1, mThreadNumber);
    mThreadNumber = std::min(mThreadNumber, MAX_THREAD_NUMBER);
 #ifdef MNN_USE_THREAD_POOL
@ -136,7 +136,7 @@ void CPURuntime::_resetThreadPool() {
    // Reset tid to rebind cpu if necessary
    mCurrentTID = 0;
 }
-void CPURuntime::_validateCpuIds() {
+void CPURuntime::_validateCpuIds() const{
    bool valid = true;

    do {
@ -200,8 +200,7 @@ void CPURuntime::_validateCpuIds() {
            case BackendConfig::Power_Low:
                    mCpuIds = cpuInfo->groups[0].ids;
                break;
-            case BackendConfig::Power_High:
-            {
+            case BackendConfig::Power_High: {
                int selectCPUSize = 0;
                int groupIndex = cpuInfo->groups.size() - 1;
                while (selectCPUSize < mThreadNumber && groupIndex >= 0) {
@ -220,7 +219,6 @@ void CPURuntime::_validateCpuIds() {
 void CPURuntime::onReset(int numberThread, const BackendConfig* config, bool full) {
    if (config != nullptr) {
        mPower = config->power;
-        mCpuIds = config->cpuIds;
        if (full) {
            mPrecision = config->precision;
            mMemory = config->memory;
@ -228,6 +226,7 @@ void CPURuntime::onReset(int numberThread, const BackendConfig* config, bool ful
        }
    }
    mThreadNumber = numberThread;
+    mCpuIds = hint().cpuIds;
    _validateCpuIds();
    mCpuMask = MNNGetCPUMask(mCpuIds);
    _resetThreadPool();
@ -250,11 +249,7 @@ CPURuntime::CPURuntime(const Backend::Info& info) {
        mPower = info.user->power;
        mMemory = info.user->memory;
        mFlags = info.user->flags;
-        mCpuIds = info.user->cpuIds;
    }
-    _validateCpuIds();
-    mCpuMask = MNNGetCPUMask(mCpuIds);
-    _resetThreadPool();
 #ifdef LOG_VERBOSE
    MNN_PRINT("create CPURuntime:%p\n", this);
 #endif
@ -287,6 +282,12 @@ SingleBufferWithAllocator* CPURuntime::buffer(int index) const {
 }

 Backend* CPURuntime::onCreate(const BackendConfig* config, Backend* origin) const {
+    {
+        mCpuIds = hint().cpuIds;
+        _validateCpuIds();
+        mCpuMask = MNNGetCPUMask(mCpuIds);
+        _resetThreadPool();
+    }
    if (hint().midMemoryPath.size() > 0) {
        if (mDynamicMmap.empty()) {
            // Only support set featuremap dir once
--- a/source/backend/cpu/CPUBackend.hpp
+++ b/source/backend/cpu/CPUBackend.hpp
@ -54,16 +54,16 @@ public:

 private:
    void _bindCPUCore() const;
-    void _resetThreadPool();
-    void _validateCpuIds();
+    void _resetThreadPool() const;
+    void _validateCpuIds() const;
    mutable std::shared_ptr<EagerBufferAllocator> mStaticAllocator;
-    int mThreadNumber;
-    std::vector<int> mCpuIds;
-    unsigned long mCpuMask;
+    mutable int mThreadNumber;
+    mutable std::vector<int> mCpuIds;
+    mutable unsigned long mCpuMask;
 #ifdef MNN_USE_THREAD_POOL
    mutable int mTaskIndex = -1;
    mutable int mThreadOpen = 0;
-    ThreadPool* mThreadPool = nullptr;
+    mutable ThreadPool* mThreadPool = nullptr;
 #endif
    BackendConfig::MemoryMode mMemory;
    BackendConfig::PowerMode mPower;
--- a/source/core/Backend.hpp
+++ b/source/core/Backend.hpp
@ -58,6 +58,9 @@ struct RuntimeHint {
    // op encoder number for once commit
    int encorderNumForCommit = 10;
    int initThreadNumber = 0;
+
+    // cpu core ids
+    std::vector<int> cpuIds;
 };
 /** abstract backend */
 class Backend : public NonCopyable {
--- a/source/core/Interpreter.cpp
+++ b/source/core/Interpreter.cpp
@ -140,8 +140,12 @@ Interpreter* Interpreter::createFromBufferInternal(Content* net, bool enforceAut
    return new Interpreter(net);
 }

-void Interpreter::setSessionHint(HintMode mode, int hint) {
-    mNet->modes.setHint(mode, hint);
+void Interpreter::setSessionHint(HintMode hint, int value) {
+    mNet->modes.setHint(hint, value);
+}
+
+void Interpreter::setSessionHint(HintMode hint, int* value, size_t size) {
+    mNet->modes.setHint(hint, value, size);
 }

 void Interpreter::setSessionMode(SessionMode mode) {
--- a/source/core/Session.cpp
+++ b/source/core/Session.cpp
@ -68,46 +68,55 @@ void Session::ModeGroup::setMode(Interpreter::SessionMode mode) {
        codegenMode = mode;
    }
 }
-void Session::ModeGroup::setHint(Interpreter::HintMode mode, int hint) {
-    switch (mode) {
-        case Interpreter::MAX_TUNING_NUMBER:
-            maxTuningNumber = hint;
+void Session::ModeGroup::setHint(Interpreter::HintMode hint, int value) {
+    switch (hint) {
+        case Interpreter::HintMode::MAX_TUNING_NUMBER:
+            maxTuningNumber = value;
            break;
-        case Interpreter::MEM_ALLOCATOR_TYPE:
-            runtimeHint.memoryAllocatorType = hint;
+        case Interpreter::HintMode::MEM_ALLOCATOR_TYPE:
+            runtimeHint.memoryAllocatorType = value;
            break;
-        case Interpreter::WINOGRAD_MEMORY_LEVEL:
-            runtimeHint.winogradMemoryUsed = hint;
+        case Interpreter::HintMode::WINOGRAD_MEMORY_LEVEL:
+            runtimeHint.winogradMemoryUsed = value;
            break;
-        case Interpreter::CPU_LITTLECORE_DECREASE_RATE:
-            runtimeHint.cpuDecreaseRate = hint;
+        case Interpreter::HintMode::CPU_LITTLECORE_DECREASE_RATE:
+            runtimeHint.cpuDecreaseRate = value;
            break;
-        case Interpreter::GEOMETRY_COMPUTE_MASK:
-            geometryMask = hint;
+        case Interpreter::HintMode::GEOMETRY_COMPUTE_MASK:
+            geometryMask = value;
            break;
-        case Interpreter::STRICT_CHECK_MODEL:
-            checkNetBuffer = hint > 0;
+        case Interpreter::HintMode::STRICT_CHECK_MODEL:
+            checkNetBuffer = value > 0;
            break;
-        case Interpreter::DYNAMIC_QUANT_OPTIONS:
-            runtimeHint.dynamicQuantOption = hint;
+        case Interpreter::HintMode::DYNAMIC_QUANT_OPTIONS:
+            runtimeHint.dynamicQuantOption = value;
            break;
-        case Interpreter::QKV_QUANT_OPTIONS:
-            runtimeHint.qkvQuantOption = hint;
+        case Interpreter::HintMode::QKV_QUANT_OPTIONS:
+            runtimeHint.qkvQuantOption = value;
            break;
-        case Interpreter::KVCACHE_SIZE_LIMIT:
-            runtimeHint.kvcacheSizeLimit = hint;
+        case Interpreter::HintMode::KVCACHE_SIZE_LIMIT:
+            runtimeHint.kvcacheSizeLimit = value;
            break;
-        case Interpreter::OP_ENCODER_NUMBER_FOR_COMMIT:
-            runtimeHint.encorderNumForCommit = hint;
+        case Interpreter::HintMode::OP_ENCODER_NUMBER_FOR_COMMIT:
+            runtimeHint.encorderNumForCommit = value;
            break;
-        case Interpreter::MMAP_FILE_SIZE:
-            runtimeHint.mmapFileSize = hint;
+        case Interpreter::HintMode::MMAP_FILE_SIZE:
+            runtimeHint.mmapFileSize = value;
            break;
-        case Interpreter::USE_CACHED_MMAP:
-            runtimeHint.useCachedMmap = hint;
+        case Interpreter::HintMode::USE_CACHED_MMAP:
+            runtimeHint.useCachedMmap = value;
            break;
-        case Interpreter::INIT_THREAD_NUMBER:
-            runtimeHint.initThreadNumber = hint;
+        case Interpreter::HintMode::INIT_THREAD_NUMBER:
+            runtimeHint.initThreadNumber = value;
+            break;
+        default:
+            break;
+    }
+}
+void Session::ModeGroup::setHint(Interpreter::HintMode hint, int* value, size_t size) {
+    switch (hint) {
+        case Interpreter::HintMode::CPU_CORE_IDS:
+            runtimeHint.cpuIds = std::vector<int>(value, value + size);
            break;
        default:
            break;
--- a/source/core/Session.hpp
+++ b/source/core/Session.hpp
@ -37,7 +37,9 @@ public:
        int geometryMask = 0xFFFF;
        bool checkNetBuffer = true;
        RuntimeHint runtimeHint;
-        void setHint(Interpreter::HintMode hint, int magic);
+        void setHint(Interpreter::HintMode hint, int value);
+        void setHint(Interpreter::HintMode hint, int* value, size_t size);
+        void setHintPtr(Interpreter::HintMode hint, int value);
        void setMode(Interpreter::SessionMode mode);
        void setExternalPath(std::string path, int type);
    };
--- a/tools/cpp/MNNV2Basic.cpp
+++ b/tools/cpp/MNNV2Basic.cpp
@ -282,6 +282,7 @@ static int test_main(int argc, const char* argv[]) {
    if (runMask & 32) {
        net->setSessionHint(Interpreter::WINOGRAD_MEMORY_LEVEL, 0);
    }
+    net->setSessionHint(Interpreter::HintMode::CPU_CORE_IDS, cpuIds.data(), cpuIds.size());

    // create session
    MNN::ScheduleConfig config;
@ -295,7 +296,6 @@ static int test_main(int argc, const char* argv[]) {
    // backendConfig.power = BackendConfig::Power_High;
    backendConfig.precision = static_cast<MNN::BackendConfig::PrecisionMode>(precision);
    backendConfig.memory = static_cast<MNN::BackendConfig::MemoryMode>(memory);
-    backendConfig.cpuIds = cpuIds;
    config.backendConfig     = &backendConfig;
    MNN::Session* session    = NULL;
    MNN::Tensor* inputTensor = nullptr;
--- a/tools/cpp/ModuleBasic.cpp
+++ b/tools/cpp/ModuleBasic.cpp
@ -264,7 +264,6 @@ int main(int argc, char *argv[]) {
    backendConfig.power = (BackendConfig::PowerMode)power;
    backendConfig.precision = static_cast<MNN::BackendConfig::PrecisionMode>(precision);
    backendConfig.memory = static_cast<MNN::BackendConfig::MemoryMode>(memory);
-    backendConfig.cpuIds = cpuIds;
    config.backendConfig     = &backendConfig;

    MNN::Express::Module::Config mConfig;
@ -275,6 +274,7 @@ int main(int argc, char *argv[]) {
    std::shared_ptr<Executor::RuntimeManager> rtmgr(Executor::RuntimeManager::createRuntimeManager(config));
    rtmgr->setCache(cacheFileName);
    rtmgr->setHint(MNN::Interpreter::INIT_THREAD_NUMBER, 4);
+    rtmgr->setHint(MNN::Interpreter::HintMode::CPU_CORE_IDS, cpuIds.data(), cpuIds.size());

    if (cpuDecreaseRate > 0 && cpuDecreaseRate <= 100) {
        rtmgr->setHint(Interpreter::CPU_LITTLECORE_DECREASE_RATE, cpuDecreaseRate);
--- a/tools/cpp/timeProfile.cpp
+++ b/tools/cpp/timeProfile.cpp
@ -119,6 +119,7 @@ int main(int argc, const char* argv[]) {
    }
    revertor.reset();
    net->setSessionMode(Interpreter::Session_Debug);
+    net->setSessionHint(Interpreter::HintMode::CPU_CORE_IDS, cpuIds.data(), cpuIds.size());

    // create session
    MNN::ScheduleConfig config;
@ -126,7 +127,6 @@ int main(int argc, const char* argv[]) {
    config.numThread      = threadNumber;
    BackendConfig backendConfig;
    backendConfig.precision = precision;
-    backendConfig.cpuIds = cpuIds;
    config.backendConfig  = &backendConfig;
    MNN::Session* session = NULL;
    session               = net->createSession(config);