MNN/source/backend/cpu/ThreadPool.cpp

//
//  ThreadPool.cpp
//  MNN
//
//  Created by MNN on 2019/06/30.
//  Copyright © 2018, Alibaba Group Holding Limited
//
#ifdef MNN_USE_THREAD_POOL
#include "backend/cpu/ThreadPool.hpp"
#include <string.h>
#include <unordered_map>
#include <MNN/MNNDefine.h>
#include "ThreadPool.hpp"

#define MNN_THREAD_POOL_MAX_TASKS 2
namespace MNN {
static std::unordered_map<long int, ThreadPool*> gInstances;
static std::mutex gInitMutex;
int ThreadPool::init(int numberThread, unsigned long cpuMask, ThreadPool*& threadPool) {
    if (1 >= numberThread) {
        numberThread = 1;
    }
    std::lock_guard<std::mutex> _l(gInitMutex);

    if (gInstances.find(cpuMask) == gInstances.end()){
        gInstances[cpuMask] = new ThreadPool(numberThread);
    }
    threadPool = gInstances[cpuMask];
    if (gInstances[cpuMask]->numberThread() < numberThread){
        return gInstances[cpuMask]->numberThread();
    }
    return numberThread;
}

void ThreadPool::destroy() {
    std::lock_guard<std::mutex> _l(gInitMutex);
    for (auto i= gInstances.begin(); i != gInstances.end(); i++){
        if (i->second){
            delete i->second;
        }
    }
    gInstances.clear();
}

ThreadPool::ThreadPool(int numberThread) {
    mNumberThread = numberThread;
    mActiveCount  = 0;
    mTaskAvailable.resize(MNN_THREAD_POOL_MAX_TASKS);
    mTasks.resize(MNN_THREAD_POOL_MAX_TASKS);
    for (int t = 0; t < mTasks.size(); ++t) {
        mTaskAvailable[t] = true;
        for (int i = 0; i < mNumberThread; ++i) {
            mTasks[t].second.emplace_back(new std::atomic_bool{false});
        }
    }
    for (int i = 1; i < mNumberThread; ++i) {
        int threadIndex = i;
        mWorkers.emplace_back([this, threadIndex]() {
            while (!mStop) {
                while (mActiveCount > 0) {
                    for (int i = 0; i < MNN_THREAD_POOL_MAX_TASKS; ++i) {
                        if (*mTasks[i].second[threadIndex]) {
                            mTasks[i].first.first(threadIndex);
                            { *mTasks[i].second[threadIndex] = false; }
                        }
                    }
                    std::this_thread::yield();
                }
                std::unique_lock<std::mutex> _l(mQueueMutex);
                mCondition.wait(_l, [this] { return mStop || mActiveCount > 0; });
            }
        });
    }
}

ThreadPool::~ThreadPool() {
    {
        std::lock_guard<std::mutex> _l(mQueueMutex);
        mStop = true;
    }
    mCondition.notify_all();
    for (auto& worker : mWorkers) {
        worker.join();
    }
    for (auto& task : mTasks) {
        for (auto c : task.second) {
            delete c;
        }
    }
}

int ThreadPool::acquireWorkIndex() {
    std::lock_guard<std::mutex> _l(mQueueMutex);
    for (int i = 0; i < MNN_THREAD_POOL_MAX_TASKS; ++i) {
        if (mTaskAvailable[i]) {
            mTaskAvailable[i] = false;
            return i;
        }
    }
    return -1;
}
void ThreadPool::releaseWorkIndex(int index) {
    if (index < 0 || index >= MNN_THREAD_POOL_MAX_TASKS) {
        return;
    }
    std::lock_guard<std::mutex> _l(mQueueMutex);
    mTaskAvailable[index] = true;
}

void ThreadPool::active() {
    {
        std::lock_guard<std::mutex> _l(mQueueMutex);
        mActiveCount++;
    }
    mCondition.notify_all();
}
void ThreadPool::deactive() {
    mActiveCount--;
}

void ThreadPool::enqueue(TASK&& task, int index) {
    if (1 >= task.second || 0 > index) {
        for (int i = 0; i < task.second; ++i) {
            task.first(i);
        }
        return;
    }
    enqueueInternal(std::move(task), index);
}
void ThreadPool::enqueueInternal(TASK&& task, int index) {
    if (mActiveCount == 0) {
        for (int i = 0; i < task.second; ++i) {
            task.first(i);
        }
        return;
    }
    int workSize = task.second;
    if (workSize > mNumberThread) {
        mTasks[index].first = std::make_pair(
            [workSize, &task, this](int tId) {
                for (int v = tId; v < workSize; v += mNumberThread) {
                    task.first(v);
                }
            },
            mNumberThread);
        workSize = mNumberThread;
    } else {
        mTasks[index].first = std::move(task);
    }
    {
        for (int i = 1; i < workSize; ++i) {
            *mTasks[index].second[i] = true;
        }
    }
    mTasks[index].first.first(0);
    bool complete = true;
    do {
        complete = true;
        for (int i = 1; i < workSize; ++i) {
            if (*mTasks[index].second[i]) {
                complete = false;
                break;
            }
        }
        std::this_thread::yield();
        // FUNC_PRINT(notComplete);
    } while (!complete);
}
} // namespace MNN
#endif
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`//`
			`// ThreadPool.cpp`
			`// MNN`
			`//`
			`// Created by MNN on 2019/06/30.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`
			`#ifdef MNN_USE_THREAD_POOL`
Update 2019-12-27 22:16:57 +08:00			`#include "backend/cpu/ThreadPool.hpp"`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`#include <string.h>`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`#include <unordered_map>`
Update 2019-12-27 22:16:57 +08:00			`#include <MNN/MNNDefine.h>`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`#include "ThreadPool.hpp"`
Fix compile errors caused by "#define MNN_THREAD_LOCK_CPU" error: sort is not a member of std error: gettid was not declared in this scope error: __NR_sched_setaffinity was not declared in this scope error: syscall was not declared in this scope 2022-08-25 12:25:30 +08:00
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`#define MNN_THREAD_POOL_MAX_TASKS 2`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`namespace MNN {`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`static std::unordered_map<long int, ThreadPool*> gInstances;`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`static std::mutex gInitMutex;`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`int ThreadPool::init(int numberThread, unsigned long cpuMask, ThreadPool*& threadPool) {`
			`if (1 >= numberThread) {`
			`numberThread = 1;`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`}`
Update ThreadPool.cpp Replacing the unique_lock to the lock_guard, in the right places. 2019-07-30 17:12:52 +08:00			`std::lock_guard<std::mutex> _l(gInitMutex);`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00
			`if (gInstances.find(cpuMask) == gInstances.end()){`
			`gInstances[cpuMask] = new ThreadPool(numberThread);`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`threadPool = gInstances[cpuMask];`
			`if (gInstances[cpuMask]->numberThread() < numberThread){`
			`return gInstances[cpuMask]->numberThread();`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`return numberThread;`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`void ThreadPool::destroy() {`
Update ThreadPool.cpp Replacing the unique_lock to the lock_guard, in the right places. 2019-07-30 17:12:52 +08:00			`std::lock_guard<std::mutex> _l(gInitMutex);`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`for (auto i= gInstances.begin(); i != gInstances.end(); i++){`
			`if (i->second){`
			`delete i->second;`
			`}`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`gInstances.clear();`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`

			`ThreadPool::ThreadPool(int numberThread) {`
			`mNumberThread = numberThread;`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`mActiveCount = 0;`
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`mTaskAvailable.resize(MNN_THREAD_POOL_MAX_TASKS);`
Revert "[MNN:Bugfix] Fix bug for ThreadPool destroy dead lock" This reverts commit 07e85594366c6b48b6f44293cde02420215ab32d. 2020-11-04 14:47:02 +08:00			`mTasks.resize(MNN_THREAD_POOL_MAX_TASKS);`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`for (int t = 0; t < mTasks.size(); ++t) {`
			`mTaskAvailable[t] = true;`
			`for (int i = 0; i < mNumberThread; ++i) {`
			`mTasks[t].second.emplace_back(new std::atomic_bool{false});`
			`}`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`
			`for (int i = 1; i < mNumberThread; ++i) {`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`int threadIndex = i;`
Revert "[MNN:Bugfix] Fix bug for ThreadPool destroy dead lock" This reverts commit 07e85594366c6b48b6f44293cde02420215ab32d. 2020-11-04 14:47:02 +08:00			`mWorkers.emplace_back([this, threadIndex]() {`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`while (!mStop) {`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`while (mActiveCount > 0) {`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`for (int i = 0; i < MNN_THREAD_POOL_MAX_TASKS; ++i) {`
			`if (*mTasks[i].second[threadIndex]) {`
			`mTasks[i].first.first(threadIndex);`
			`{ *mTasks[i].second[threadIndex] = false; }`
			`}`
			`}`
			`std::this_thread::yield();`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`
Revert "[MNN:Bugfix] Fix bug for ThreadPool destroy dead lock" This reverts commit 07e85594366c6b48b6f44293cde02420215ab32d. 2020-11-04 14:47:02 +08:00			`std::unique_lock<std::mutex> _l(mQueueMutex);`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`mCondition.wait(_l, [this] { return mStop \|\| mActiveCount > 0; });`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`
			`});`
			`}`
			`}`

			`ThreadPool::~ThreadPool() {`
[PATCH 196/350] fix threadpool may not destroyed 2020-12-03 15:25:45 +08:00			`{`
			`std::lock_guard<std::mutex> _l(mQueueMutex);`
			`mStop = true;`
			`}`
Revert "[MNN:Bugfix] Fix bug for ThreadPool destroy dead lock" This reverts commit 07e85594366c6b48b6f44293cde02420215ab32d. 2020-11-04 14:47:02 +08:00			`mCondition.notify_all();`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`for (auto& worker : mWorkers) {`
Revert "[MNN:Bugfix] Fix bug for ThreadPool destroy dead lock" This reverts commit 07e85594366c6b48b6f44293cde02420215ab32d. 2020-11-04 14:47:02 +08:00			`worker.join();`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`for (auto& task : mTasks) {`
			`for (auto c : task.second) {`
			`delete c;`
			`}`
			`}`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`

beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`int ThreadPool::acquireWorkIndex() {`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`std::lock_guard<std::mutex> _l(mQueueMutex);`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`for (int i = 0; i < MNN_THREAD_POOL_MAX_TASKS; ++i) {`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`if (mTaskAvailable[i]) {`
			`mTaskAvailable[i] = false;`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`return i;`
			`}`
			`}`
			`return -1;`
			`}`
			`void ThreadPool::releaseWorkIndex(int index) {`
			`if (index < 0 \|\| index >= MNN_THREAD_POOL_MAX_TASKS) {`
			`return;`
			`}`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`std::lock_guard<std::mutex> _l(mQueueMutex);`
			`mTaskAvailable[index] = true;`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`}`

Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`void ThreadPool::active() {`
[PATCH 196/350] fix threadpool may not destroyed 2020-12-03 15:25:45 +08:00			`{`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`std::lock_guard<std::mutex> _l(mQueueMutex);`
			`mActiveCount++;`
[PATCH 196/350] fix threadpool may not destroyed 2020-12-03 15:25:45 +08:00			`}`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`mCondition.notify_all();`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`}`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`void ThreadPool::deactive() {`
			`mActiveCount--;`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`}`

Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`void ThreadPool::enqueue(TASK&& task, int index) {`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`if (1 >= task.second \|\| 0 > index) {`
			`for (int i = 0; i < task.second; ++i) {`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`task.first(i);`
			`}`
			`return;`
			`}`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`enqueueInternal(std::move(task), index);`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`}`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`void ThreadPool::enqueueInternal(TASK&& task, int index) {`
			`if (mActiveCount == 0) {`
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr; 2019-10-29 13:37:26 +08:00			`for (int i = 0; i < task.second; ++i) {`
			`task.first(i);`
			`}`
			`return;`
			`}`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`int workSize = task.second;`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`if (workSize > mNumberThread) {`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`mTasks[index].first = std::make_pair(`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`[workSize, &task, this](int tId) {`
			`for (int v = tId; v < workSize; v += mNumberThread) {`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`task.first(v);`
			`}`
Implement cpuMask-based near-singleton global Thread Pool 2025-07-16 13:11:57 +08:00			`},`
			`mNumberThread);`
			`workSize = mNumberThread;`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`} else {`
			`mTasks[index].first = std::move(task);`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`
			`{`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`for (int i = 1; i < workSize; ++i) {`
			`*mTasks[index].second[i] = true;`
			`}`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`mTasks[index].first.first(0);`
			`bool complete = true;`
			`do {`
			`complete = true;`
			`for (int i = 1; i < workSize; ++i) {`
			`if (*mTasks[index].second[i]) {`
			`complete = false;`
			`break;`
			`}`
			`}`
Only yield at the end if the completion loop needs another pass Currently the yield is occuring every time a completion loop iterates and this is quite an expensive kernel system call. It is not really required if we break out of the loop, so move the yield to the end of the do-while loop to reduce the yielding overhead Perf metrics show that the current code eats up ~2.4% CPU yielding whereas this change reduces this down to ~0.6% of the total CPU run time. Signed-off-by: Colin Ian King <colin.king@intel.com> 2023-04-14 21:45:47 +08:00			`std::this_thread::yield();`
beta 0.2.0.4 - bug fix for quantization tool - bug fix/performance update for thread pool - bug fix for converters - tutorial/doc update - more op support 2019-07-19 17:09:09 +08:00			`// FUNC_PRINT(notComplete);`
			`} while (!complete);`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`
			`} // namespace MNN`
			`#endif`