MNN/source/backend/cpu/CPUPool.hpp

304 lines
14 KiB
C++
Raw Normal View History

2019-04-17 10:49:11 +08:00
//
// CPUPool.hpp
// MNN
//
// Created by MNN on 2018/07/15.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef CPUPool_hpp
#define CPUPool_hpp
2021-04-08 15:34:23 +08:00
#include <float.h>
#include <math.h>
#include "core/Macro.h"
#include "CaffeOp_generated.h"
2019-04-17 10:49:11 +08:00
namespace MNN {
2021-04-08 15:34:23 +08:00
template<typename T, typename VEC, int PACK, int MAXVALUE>
2021-04-08 15:34:23 +08:00
static void pooling_max_pad(const T* channelInput, T* offsetOutput, int inputWidth, int inputHeight,
int inputStep4, int inputSize4, int kernelWidth, int kernelHeight, int iw, int ih) {
VEC max = VEC(MAXVALUE);
2021-04-08 15:34:23 +08:00
const T *bottomLine = channelInput + inputSize4 - inputStep4;
for (int kh = 0; kh < kernelHeight; kh++) {
const int h = ih + kh;
const T *paddedLineInput = nullptr;
if (h < 0) { // top replicate
paddedLineInput = channelInput;
} else if (h >= inputHeight) { // bottom replicate
paddedLineInput = bottomLine;
} else {
paddedLineInput = channelInput + h * inputStep4;
}
const T *rightEdge = paddedLineInput + inputStep4 - PACK;
2021-04-08 15:34:23 +08:00
for (int kw = 0; kw < kernelWidth; kw++) {
const int w = iw + kw;
const T *cursorInput = nullptr;
if (w < 0) { // left replicate
cursorInput = paddedLineInput;
} else if (w >= inputWidth) { // right replicate
cursorInput = rightEdge;
} else {
cursorInput = paddedLineInput + PACK * w;
2021-04-08 15:34:23 +08:00
}
max = VEC::max(max, VEC::load(cursorInput));
}
}
VEC::save(offsetOutput, max);
}
template<typename T, typename VEC, int PACK, int MAXVALUE>
2021-04-08 15:34:23 +08:00
static void poolingMax(const T *channelInput, int inputWidth, int inputHeight, T *channelOutput,
int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth,
int strideHeight, int padWidth, int padHeight, MNN::PoolPadType padType, MNN::AvgPoolCountType countType) {
// Compute Mid Rect
int l = 0, t = 0, r = outputWidth, b = outputHeight;
for (; l * strideWidth - padWidth < 0 && l < outputWidth; l++) {
// do nothing
}
for (; t * strideHeight - padHeight < 0 && t < outputHeight; t++) {
// do nothing
}
for (; (r - 1) * strideWidth - padWidth + (kernelWidth - 1) >= inputWidth && r > l; r--) {
// do nothing
}
for (; (b - 1) * strideHeight - padHeight + (kernelHeight - 1) >= inputHeight && b > t; b--) {
// do nothing
}
int padTop = t, padBottom = b, padLeft = l, padRight = r;
const int inputStep4 = PACK * inputWidth;
2021-04-08 15:34:23 +08:00
const int inputSize4 = inputStep4 * inputHeight;
const int strideInputStep4 = strideHeight * inputStep4;
const int outputStep4 = PACK * outputWidth;
const int strideWidth4 = PACK * strideWidth;
2021-04-08 15:34:23 +08:00
{ // handle paddings top
T *lineOutput = channelOutput;
for (int oh = 0, ih = -padHeight; oh < padTop; oh++, ih += strideHeight, lineOutput += outputStep4) {
T *offsetOutput = lineOutput;
for (int ow = 0, iw = -padWidth; ow < outputWidth; ow++, iw += strideWidth, offsetOutput += PACK) {
pooling_max_pad<T, VEC, PACK, MAXVALUE>(channelInput, offsetOutput, inputWidth, inputHeight, inputStep4, inputSize4,
2021-04-08 15:34:23 +08:00
kernelWidth, kernelHeight, iw, ih);
}
}
for (int oh = padTop, ih = -padHeight + oh * strideHeight; oh < padBottom;
oh++, ih += strideHeight, lineOutput += outputStep4) {
T *offsetOutput = lineOutput;
for (int ow = 0, iw = -padWidth; ow < padLeft; ow++, iw += strideWidth, offsetOutput += PACK) {
pooling_max_pad<T, VEC, PACK, MAXVALUE>(channelInput, offsetOutput, inputWidth, inputHeight, inputStep4, inputSize4,
2021-04-08 15:34:23 +08:00
kernelWidth, kernelHeight, iw, ih);
}
offsetOutput = lineOutput + padRight * PACK;
2021-04-08 15:34:23 +08:00
for (int ow = padRight, iw = -padWidth + ow * strideWidth; ow < outputWidth;
ow++, iw += strideWidth, offsetOutput += PACK) {
pooling_max_pad<T, VEC, PACK, MAXVALUE>(channelInput, offsetOutput, inputWidth, inputHeight, inputStep4, inputSize4,
2021-04-08 15:34:23 +08:00
kernelWidth, kernelHeight, iw, ih);
}
}
for (int oh = padBottom, ih = -padHeight + oh * strideHeight; oh < outputHeight;
oh++, ih += strideHeight, lineOutput += outputStep4) {
T *offsetOutput = lineOutput;
for (int ow = 0, iw = -padWidth; ow < outputWidth; ow++, iw += strideWidth, offsetOutput += PACK) {
pooling_max_pad<T, VEC, PACK, MAXVALUE>(channelInput, offsetOutput, inputWidth, inputHeight, inputStep4, inputSize4,
2021-04-08 15:34:23 +08:00
kernelWidth, kernelHeight, iw, ih);
}
}
}
{ // handle no paddings
const T *lineInput =
channelInput + (padTop * strideHeight - padHeight) * inputStep4 + (padLeft * strideWidth - padWidth) * PACK;
T *lineOutput = channelOutput + padTop * outputStep4 + padLeft * PACK;
2021-04-08 15:34:23 +08:00
int wCount = padRight - padLeft;
int wCountC4 = wCount / 4;
int wCountRemain = wCount - wCountC4 * 4;
int strideWidthFuse = strideWidth4 * 4;
for (int oh = padTop, ih = -padHeight + oh * strideHeight; oh < padBottom;
oh++, ih += strideHeight, lineOutput += outputStep4, lineInput += strideInputStep4) {
const T *offsetInput = lineInput;
T *offsetOutput = lineOutput;
for (int owf = 0; owf < wCountC4; ++owf, offsetOutput += 4 * PACK, offsetInput += strideWidthFuse) {
VEC max0 = VEC(MAXVALUE);
VEC max1 = VEC(MAXVALUE);
VEC max2 = VEC(MAXVALUE);
VEC max3 = VEC(MAXVALUE);
2021-04-08 15:34:23 +08:00
const T *kernelInput = offsetInput;
for (int kh = 0; kh < kernelHeight; kh++, kernelInput += inputStep4) {
const T *cursorInput = kernelInput;
for (int kw = 0; kw < kernelWidth; kw++, cursorInput += PACK) {
2021-04-08 15:34:23 +08:00
max0 = VEC::max(max0, VEC::load(cursorInput + 0 * strideWidth4));
max1 = VEC::max(max1, VEC::load(cursorInput + 1 * strideWidth4));
max2 = VEC::max(max2, VEC::load(cursorInput + 2 * strideWidth4));
max3 = VEC::max(max3, VEC::load(cursorInput + 3 * strideWidth4));
}
}
VEC::save(offsetOutput + PACK * 0, max0);
VEC::save(offsetOutput + PACK * 1, max1);
VEC::save(offsetOutput + PACK * 2, max2);
VEC::save(offsetOutput + PACK * 3, max3);
2021-04-08 15:34:23 +08:00
}
for (int ow = 0; ow < wCountRemain;
ow++, offsetOutput += PACK, offsetInput += strideWidth4) {
2021-04-08 15:34:23 +08:00
const T *kernelInput = offsetInput;
VEC max = VEC(MAXVALUE);
2021-04-08 15:34:23 +08:00
for (int kh = 0; kh < kernelHeight; kh++, kernelInput += inputStep4) {
const T *cursorInput = kernelInput;
for (int kw = 0; kw < kernelWidth; kw++, cursorInput += PACK) {
2021-04-08 15:34:23 +08:00
max = VEC::max(max, VEC::load(cursorInput));
}
}
VEC::save(offsetOutput, max);
}
}
}
}
template<typename T, typename VEC, int PACK>
2021-04-08 15:34:23 +08:00
static void poolingAvgPad(const T *offsetInput, T *offsetOutput, int inputWidth, int inputHeight,
int kernelWidth, int kernelHeight, int inputStep4, int iw, int ih, int padWidth,
int padHeight, MNN::PoolPadType padType, MNN::AvgPoolCountType countType) {
VEC sum = VEC(0.0f);
const int khs = 0 < -ih ? -ih : 0; // max
const int khe = kernelHeight < inputHeight - ih ? kernelHeight : inputHeight - ih; // min
const int kws = 0 < -iw ? -iw : 0; // max
const int kwe = kernelWidth < inputWidth - iw ? kernelWidth : inputWidth - iw; // min
// sum
int count = 0;
if (countType == MNN::AvgPoolCountType_DEFAULT) {
if (padType == MNN::PoolPadType_CAFFE) {
countType = MNN::AvgPoolCountType_INCLUDE_PADDING;
} else {
countType = MNN::AvgPoolCountType_EXCLUDE_PADDING;
}
}
if (countType == MNN::AvgPoolCountType_INCLUDE_PADDING) {
count = (ALIMIN(ih + kernelHeight, inputHeight + padHeight) - ih) *
(ALIMIN(iw + kernelWidth, inputWidth + padWidth) - iw);
} else {
count = (khe - khs) * (kwe - kws);
}
const T *kernelInput = offsetInput + khs * inputStep4;
for (int kh = khs; kh < khe; kh++, kernelInput += inputStep4) {
const T *cursorInput = kernelInput + kws * PACK;
for (int kw = kws; kw < kwe; kw++, cursorInput += PACK) {
2021-04-08 15:34:23 +08:00
sum = sum + VEC::load(cursorInput);
}
}
// avg
if (count > 0) {
VEC divs = VEC(1.0f / count);
VEC::save(offsetOutput, sum * divs);
} else {
VEC::save(offsetOutput, VEC(0.0f));
}
}
template<typename T, typename VEC, int PACK>
2021-04-08 15:34:23 +08:00
static void poolingAvg(const T* channelInput, int inputWidth, int inputHeight, T *channelOutput,
int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth,
int strideHeight, int padWidth, int padHeight, MNN::PoolPadType padType, MNN::AvgPoolCountType countType) {
// Compute Mid Rect
int l = 0, t = 0, r = outputWidth, b = outputHeight;
for (; l * strideWidth - padWidth < 0 && l < outputWidth; l++) {
// do nothing
}
for (; t * strideHeight - padHeight < 0 && t < outputHeight; t++) {
// do nothing
}
for (; (r - 1) * strideWidth - padWidth + (kernelWidth - 1) >= inputWidth && r > l; r--) {
// do nothing
}
for (; (b - 1) * strideHeight - padHeight + (kernelHeight - 1) >= inputHeight && b > t; b--) {
// do nothing
}
int padTop = t, padBottom = b, padLeft = l, padRight = r;
const int inputStep4 = PACK * inputWidth;
2021-04-08 15:34:23 +08:00
const int strideInputStep4 = strideHeight * inputStep4;
const int outputStep4 = PACK * outputWidth;
const int strideWidth4 = PACK * strideWidth;
2021-04-08 15:34:23 +08:00
{ // handle paddings
const T *lineInput = channelInput - padHeight * inputStep4 - padWidth * PACK;
2021-04-08 15:34:23 +08:00
T *lineOutput = channelOutput;
for (int oh = 0, ih = -padHeight; oh < padTop;
oh++, ih += strideHeight, lineOutput += outputStep4, lineInput += strideInputStep4) {
const T *offsetInput = lineInput;
T *offsetOutput = lineOutput;
for (int ow = 0, iw = -padWidth; ow < outputWidth;
ow++, iw += strideWidth, offsetOutput += PACK, offsetInput += strideWidth4) {
poolingAvgPad<T, VEC, PACK>(offsetInput, offsetOutput, inputWidth, inputHeight, kernelWidth, kernelHeight, inputStep4,
2021-04-08 15:34:23 +08:00
iw, ih, padWidth, padHeight, padType, countType);
}
}
for (int oh = padTop, ih = -padHeight + oh * strideHeight; oh < padBottom;
oh++, ih += strideHeight, lineOutput += outputStep4, lineInput += strideInputStep4) {
const T *offsetInput = lineInput;
T *offsetOutput = lineOutput;
for (int ow = 0, iw = -padWidth; ow < padLeft;
ow++, iw += strideWidth, offsetOutput += PACK, offsetInput += strideWidth4) {
poolingAvgPad<T, VEC, PACK>(offsetInput, offsetOutput, inputWidth, inputHeight, kernelWidth, kernelHeight, inputStep4,
2021-04-08 15:34:23 +08:00
iw, ih, padWidth, padHeight, padType, countType);
}
offsetInput = lineInput + padRight * strideWidth * PACK;
offsetOutput = lineOutput + padRight * PACK;
2021-04-08 15:34:23 +08:00
for (int ow = padRight, iw = -padWidth + ow * strideWidth; ow < outputWidth;
ow++, iw += strideWidth, offsetOutput += PACK, offsetInput += strideWidth4) {
poolingAvgPad<T, VEC, PACK>(offsetInput, offsetOutput, inputWidth, inputHeight, kernelWidth, kernelHeight, inputStep4,
2021-04-08 15:34:23 +08:00
iw, ih, padWidth, padHeight, padType, countType);
}
}
for (int oh = padBottom, ih = -padHeight + oh * strideHeight; oh < outputHeight;
oh++, ih += strideHeight, lineOutput += outputStep4, lineInput += strideInputStep4) {
const T *offsetInput = lineInput;
T *offsetOutput = lineOutput;
for (int ow = 0, iw = -padWidth; ow < outputWidth;
ow++, iw += strideWidth, offsetOutput += PACK, offsetInput += strideWidth4) {
poolingAvgPad<T, VEC, PACK>(offsetInput, offsetOutput, inputWidth, inputHeight, kernelWidth, kernelHeight, inputStep4,
2021-04-08 15:34:23 +08:00
iw, ih, padWidth, padHeight, padType, countType);
}
}
}
{ // handle no paddings
const T *lineInput =
channelInput + (padTop * strideHeight - padHeight) * inputStep4 + (padLeft * strideWidth - padWidth) * PACK;
T *lineOutput = channelOutput + padTop * outputStep4 + padLeft * PACK;
2021-04-08 15:34:23 +08:00
int count = kernelHeight * kernelWidth;
VEC divs = VEC(1.0f / count);
for (int oh = padTop, ih = -padHeight + oh * strideHeight; oh < padBottom;
oh++, ih += strideHeight, lineOutput += outputStep4, lineInput += strideInputStep4) {
const T *offsetInput = lineInput;
T *offsetOutput = lineOutput;
for (int ow = padLeft, iw = -padWidth + ow * strideWidth; ow < padRight;
ow++, iw += strideWidth, offsetOutput += PACK, offsetInput += strideWidth4) {
2021-04-08 15:34:23 +08:00
VEC sum = VEC(0);
// sum
const T *kernelInput = offsetInput;
for (int kh = 0; kh < kernelHeight; kh++, kernelInput += inputStep4) {
const T *cursorInput = kernelInput;
for (int kw = 0; kw < kernelWidth; kw++, cursorInput += PACK) {
2022-12-30 15:18:58 +08:00
sum = sum + VEC::load(cursorInput) * divs;
2021-04-08 15:34:23 +08:00
}
}
2022-12-30 15:18:58 +08:00
VEC::save(offsetOutput, sum);
2021-04-08 15:34:23 +08:00
}
}
}
}
2019-04-17 10:49:11 +08:00
} // namespace MNN
#endif /* CPUPool_hpp */