MNN/source/backend/cpu/CPUPool.cpp

//
//  CPUPool.cpp
//  MNN
//
//  Created by MNN on 2018/07/15.
//  Copyright © 2018, Alibaba Group Holding Limited
//
#include "backend/cpu/CPUBackend.hpp"
#include "core/Concurrency.h"
#include "backend/cpu/CPUPool.hpp"
#include "compute/CommonOptFunction.h"
#include "math/Vec.hpp"

using Vec4 = MNN::Math::Vec<float, 4>;
using Vec16 = MNN::Math::Vec<int8_t, 16>;

namespace MNN {

class CPUPool : public Execution {
public:
    CPUPool(Backend *b, const Pool *parameter, void* func, int bytes) : MNN::Execution(b), mParameter(parameter) {
        mCompute = (decltype(mCompute))func;
        mBytes = bytes;
    }
    virtual ~CPUPool() = default;
    virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override {
        auto layer       = mParameter;
        int strideWidth  = layer->strideX();
        int strideHeight = layer->strideY();
        int padWidth     = layer->padX();
        int padHeight    = layer->padY();
        auto core   = static_cast<CPUBackend*>(backend())->functions();
        
        // edit const if global
        auto input       = inputs[0];
        auto output      = outputs[0];
        int kernelWidth  = layer->kernelX();
        int kernelHeight = layer->kernelY();
        if (layer->isGlobal()) {
            kernelWidth  = input->width();
            kernelHeight = input->height();
            strideWidth  = input->width();
            strideHeight = input->height();
            padWidth     = 0;
            padHeight    = 0;
        }
        if (layer->padType() == PoolPadType_SAME) {
            int padNeededWidth  = (output->width() - 1) * strideWidth + kernelWidth - input->width();
            int padNeededHeight = (output->height() - 1) * strideHeight + kernelHeight - input->height();
            padWidth            = padNeededWidth > 0 ? padNeededWidth / 2 : 0;
            padHeight           = padNeededHeight > 0 ? padNeededHeight / 2 : 0;
        } else if (layer->padType() == PoolPadType_VALID) {
            padWidth = padHeight = 0;
        }
        auto totalDepth        = input->batch() * UP_DIV(input->channel(), core->pack);
        auto inputData         = input->host<uint8_t>();
        auto outputData        = output->host<uint8_t>();
        auto inputPlaneStride  = core->pack * input->width() * input->height();
        auto outputPlaneStride = core->pack * output->width() * output->height();
        int threadNumber       = ((CPUBackend *)backend())->threadNumber();
        auto padType           = layer->padType();
        auto countType         = layer->countType();
        if (layer->pads() != nullptr && padType == PoolPadType_CAFFE) {
            padType = PoolPadType_VALID;
        }
        mFunction = std::make_pair(threadNumber, [=](int tId) {
            for (int channel = (int)tId; channel < totalDepth; channel += threadNumber) {
                // run
                mCompute(inputData + channel * inputPlaneStride * mBytes, input->width(), input->height(),
                              outputData + outputPlaneStride * channel * mBytes, output->width(), output->height(), kernelWidth,
                              kernelHeight, strideWidth, strideHeight, padWidth, padHeight, padType, countType);
            }
        });
        return NO_ERROR;
    }
    virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override {
        MNN_CONCURRENCY_BEGIN(tId, mFunction.first) {
            mFunction.second((int)tId);
        }
        MNN_CONCURRENCY_END();
        return NO_ERROR;
    }

private:
    const Pool *mParameter;
    void(*mCompute)(const void* channelInput, int inputWidth, int inputHeight, void *channelOutput,
                           int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth,
                           int strideHeight, int padWidth, int padHeight, int padType, int countType);
    std::pair<int, std::function<void(int)> > mFunction;
    int mBytes;
};
class CPUPoolCreator : public CPUBackend::Creator {
public:
    virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
                                const MNN::Op *op, Backend *backend) const override {
        void* func = nullptr;
        if (inputs[0]->getType() == halide_type_of<int8_t>()) {
            if (op->main_as_Pool()->type() == PoolType_AVEPOOL) {
                func = (void*)(poolingAvg<int8_t, Vec16, 4>);
            } else {
                func = (void*)(poolingMax<int8_t, Vec16, 4, -128>);
            }
            return new CPUPool(backend, op->main_as_Pool(), func, 1);
        }
        auto core = static_cast<CPUBackend*>(backend)->functions();
        if (op->main_as_Pool()->type() == PoolType_AVEPOOL) {
            func = (void*)(core->MNNPoolingAvg);
        } else {
            func = (void*)(core->MNNPoolingMax);
        }
        return new CPUPool(backend, op->main_as_Pool(), func, core->bytes);
    }
};

REGISTER_CPU_OP_CREATOR(CPUPoolCreator, OpType_Pooling);

} // namespace MNN
beta 0.1.0 2019-04-17 10:49:11 +08:00			`//`
			`// CPUPool.cpp`
			`// MNN`
			`//`
			`// Created by MNN on 2018/07/15.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`#include "backend/cpu/CPUBackend.hpp"`
			`#include "core/Concurrency.h"`
Update 2019-12-27 22:16:57 +08:00			`#include "backend/cpu/CPUPool.hpp"`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`#include "compute/CommonOptFunction.h"`
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`#include "math/Vec.hpp"`
- dynamic computation graph (beta) - add supports (/express) - add tests - add benchmarks with it (/benchmark/exprModels) - Python - MNN engine and tools were submitted to pip - available on Windows/macOS/Linux - Engine/Converter - add supports for each op benchmarking - refactor optimizer by separating steps - CPU - add supports for Conv3D, Pool3D, ELU, ReverseSequence - fix ArgMax, Permute, Scale, BinaryOp, Slice, SliceTf - OpenCL - add half transform in CPU - add broadcast supports for binary - optimize Conv2D, Reshape, Eltwise, Gemm, etc. - OpenGL - add sub, real div supports for binary - add supports for unary - optimize Conv2D, Reshape - Vulkan - add max supports for eltwise - Metal - fix metallib missing problem - Train/Quantization - use express to refactor training codes 2019-09-26 21:02:07 +08:00
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`using Vec4 = MNN::Math::Vec<float, 4>;`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`using Vec16 = MNN::Math::Vec<int8_t, 16>;`
beta 0.1.0 2019-04-17 10:49:11 +08:00
			`namespace MNN {`

Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`class CPUPool : public Execution {`
			`public:`
			`CPUPool(Backend b, const Pool parameter, void* func, int bytes) : MNN::Execution(b), mParameter(parameter) {`
			`mCompute = (decltype(mCompute))func;`
			`mBytes = bytes;`
			`}`
			`virtual ~CPUPool() = default;`
			`virtual ErrorCode onResize(const std::vector<Tensor > &inputs, const std::vector<Tensor > &outputs) override {`
			`auto layer = mParameter;`
			`int strideWidth = layer->strideX();`
			`int strideHeight = layer->strideY();`
			`int padWidth = layer->padX();`
			`int padHeight = layer->padY();`
			`auto core = static_cast<CPUBackend*>(backend())->functions();`

			`// edit const if global`
			`auto input = inputs[0];`
			`auto output = outputs[0];`
			`int kernelWidth = layer->kernelX();`
			`int kernelHeight = layer->kernelY();`
			`if (layer->isGlobal()) {`
			`kernelWidth = input->width();`
			`kernelHeight = input->height();`
			`strideWidth = input->width();`
			`strideHeight = input->height();`
			`padWidth = 0;`
			`padHeight = 0;`
			`}`
			`if (layer->padType() == PoolPadType_SAME) {`
			`int padNeededWidth = (output->width() - 1) * strideWidth + kernelWidth - input->width();`
			`int padNeededHeight = (output->height() - 1) * strideHeight + kernelHeight - input->height();`
			`padWidth = padNeededWidth > 0 ? padNeededWidth / 2 : 0;`
			`padHeight = padNeededHeight > 0 ? padNeededHeight / 2 : 0;`
			`} else if (layer->padType() == PoolPadType_VALID) {`
			`padWidth = padHeight = 0;`
			`}`
			`auto totalDepth = input->batch() * UP_DIV(input->channel(), core->pack);`
			`auto inputData = input->host<uint8_t>();`
			`auto outputData = output->host<uint8_t>();`
			`auto inputPlaneStride = core->pack * input->width() * input->height();`
			`auto outputPlaneStride = core->pack * output->width() * output->height();`
			`int threadNumber = ((CPUBackend *)backend())->threadNumber();`
			`auto padType = layer->padType();`
			`auto countType = layer->countType();`
			`if (layer->pads() != nullptr && padType == PoolPadType_CAFFE) {`
			`padType = PoolPadType_VALID;`
			`}`
			`mFunction = std::make_pair(threadNumber, [=](int tId) {`
			`for (int channel = (int)tId; channel < totalDepth; channel += threadNumber) {`
			`// run`
			`mCompute(inputData + channel * inputPlaneStride * mBytes, input->width(), input->height(),`
			`outputData + outputPlaneStride * channel * mBytes, output->width(), output->height(), kernelWidth,`
			`kernelHeight, strideWidth, strideHeight, padWidth, padHeight, padType, countType);`
			`}`
			`});`
			`return NO_ERROR;`
			`}`
			`virtual ErrorCode onExecute(const std::vector<Tensor > &inputs, const std::vector<Tensor > &outputs) override {`
			`MNN_CONCURRENCY_BEGIN(tId, mFunction.first) {`
			`mFunction.second((int)tId);`
			`}`
			`MNN_CONCURRENCY_END();`
			`return NO_ERROR;`
			`}`
beta 0.1.0 2019-04-17 10:49:11 +08:00
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`private:`
			`const Pool *mParameter;`
			`void(mCompute)(const void channelInput, int inputWidth, int inputHeight, void *channelOutput,`
			`int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth,`
			`int strideHeight, int padWidth, int padHeight, int padType, int countType);`
			`std::pair<int, std::function<void(int)> > mFunction;`
			`int mBytes;`
			`};`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`class CPUPoolCreator : public CPUBackend::Creator {`
			`public:`
			`virtual Execution onCreate(const std::vector<Tensor > &inputs, const std::vector<Tensor *> &outputs,`
			`const MNN::Op op, Backend backend) const override {`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`void* func = nullptr;`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`if (inputs[0]->getType() == halide_type_of<int8_t>()) {`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`if (op->main_as_Pool()->type() == PoolType_AVEPOOL) {`
			`func = (void*)(poolingAvg<int8_t, Vec16, 4>);`
			`} else {`
			`func = (void*)(poolingMax<int8_t, Vec16, 4, -128>);`
			`}`
			`return new CPUPool(backend, op->main_as_Pool(), func, 1);`
			`}`
			`auto core = static_cast<CPUBackend*>(backend)->functions();`
			`if (op->main_as_Pool()->type() == PoolType_AVEPOOL) {`
			`func = (void*)(core->MNNPoolingAvg);`
			`} else {`
			`func = (void*)(core->MNNPoolingMax);`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`}`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`return new CPUPool(backend, op->main_as_Pool(), func, core->bytes);`
beta 0.1.0 2019-04-17 10:49:11 +08:00			`}`
			`};`

			`REGISTER_CPU_OP_CREATOR(CPUPoolCreator, OpType_Pooling);`
Update 2019-12-27 22:16:57 +08:00
beta 0.1.0 2019-04-17 10:49:11 +08:00			`} // namespace MNN`