MNN/source/backend/cpu/ThreadPool.cpp

171 lines
4.6 KiB
C++
Raw Normal View History

//
// ThreadPool.cpp
// MNN
//
// Created by MNN on 2019/06/30.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifdef MNN_USE_THREAD_POOL
2019-12-27 22:16:57 +08:00
#include "backend/cpu/ThreadPool.hpp"
#include <string.h>
#include <unordered_map>
2019-12-27 22:16:57 +08:00
#include <MNN/MNNDefine.h>
#include "ThreadPool.hpp"
#define MNN_THREAD_POOL_MAX_TASKS 2
namespace MNN {
static std::unordered_map<long int, ThreadPool*> gInstances;
static std::mutex gInitMutex;
int ThreadPool::init(int numberThread, unsigned long cpuMask, ThreadPool*& threadPool) {
if (1 >= numberThread) {
numberThread = 1;
}
std::lock_guard<std::mutex> _l(gInitMutex);
if (gInstances.find(cpuMask) == gInstances.end()){
gInstances[cpuMask] = new ThreadPool(numberThread);
}
threadPool = gInstances[cpuMask];
if (gInstances[cpuMask]->numberThread() < numberThread){
return gInstances[cpuMask]->numberThread();
}
return numberThread;
}
void ThreadPool::destroy() {
std::lock_guard<std::mutex> _l(gInitMutex);
for (auto i= gInstances.begin(); i != gInstances.end(); i++){
if (i->second){
delete i->second;
}
}
gInstances.clear();
}
ThreadPool::ThreadPool(int numberThread) {
mNumberThread = numberThread;
mActiveCount = 0;
2020-11-05 16:41:56 +08:00
mTaskAvailable.resize(MNN_THREAD_POOL_MAX_TASKS);
mTasks.resize(MNN_THREAD_POOL_MAX_TASKS);
for (int t = 0; t < mTasks.size(); ++t) {
mTaskAvailable[t] = true;
for (int i = 0; i < mNumberThread; ++i) {
mTasks[t].second.emplace_back(new std::atomic_bool{false});
}
}
for (int i = 1; i < mNumberThread; ++i) {
int threadIndex = i;
mWorkers.emplace_back([this, threadIndex]() {
while (!mStop) {
while (mActiveCount > 0) {
for (int i = 0; i < MNN_THREAD_POOL_MAX_TASKS; ++i) {
if (*mTasks[i].second[threadIndex]) {
mTasks[i].first.first(threadIndex);
{ *mTasks[i].second[threadIndex] = false; }
}
}
std::this_thread::yield();
}
std::unique_lock<std::mutex> _l(mQueueMutex);
mCondition.wait(_l, [this] { return mStop || mActiveCount > 0; });
}
});
}
}
ThreadPool::~ThreadPool() {
{
std::lock_guard<std::mutex> _l(mQueueMutex);
mStop = true;
}
mCondition.notify_all();
for (auto& worker : mWorkers) {
worker.join();
}
for (auto& task : mTasks) {
for (auto c : task.second) {
delete c;
}
}
}
int ThreadPool::acquireWorkIndex() {
std::lock_guard<std::mutex> _l(mQueueMutex);
for (int i = 0; i < MNN_THREAD_POOL_MAX_TASKS; ++i) {
if (mTaskAvailable[i]) {
mTaskAvailable[i] = false;
return i;
}
}
return -1;
}
void ThreadPool::releaseWorkIndex(int index) {
if (index < 0 || index >= MNN_THREAD_POOL_MAX_TASKS) {
return;
}
std::lock_guard<std::mutex> _l(mQueueMutex);
mTaskAvailable[index] = true;
}
void ThreadPool::active() {
{
std::lock_guard<std::mutex> _l(mQueueMutex);
mActiveCount++;
}
mCondition.notify_all();
}
void ThreadPool::deactive() {
mActiveCount--;
}
void ThreadPool::enqueue(TASK&& task, int index) {
if (1 >= task.second || 0 > index) {
for (int i = 0; i < task.second; ++i) {
task.first(i);
}
return;
}
enqueueInternal(std::move(task), index);
}
void ThreadPool::enqueueInternal(TASK&& task, int index) {
if (mActiveCount == 0) {
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
for (int i = 0; i < task.second; ++i) {
task.first(i);
}
return;
}
int workSize = task.second;
if (workSize > mNumberThread) {
mTasks[index].first = std::make_pair(
[workSize, &task, this](int tId) {
for (int v = tId; v < workSize; v += mNumberThread) {
task.first(v);
}
},
mNumberThread);
workSize = mNumberThread;
} else {
mTasks[index].first = std::move(task);
}
{
for (int i = 1; i < workSize; ++i) {
*mTasks[index].second[i] = true;
}
}
mTasks[index].first.first(0);
bool complete = true;
do {
complete = true;
for (int i = 1; i < workSize; ++i) {
if (*mTasks[index].second[i]) {
complete = false;
break;
}
}
std::this_thread::yield();
// FUNC_PRINT(notComplete);
} while (!complete);
}
} // namespace MNN
#endif