MNN/source/backend/cpu/CPUConvolution.hpp

84 lines
3.2 KiB
C++
Raw Normal View History

2019-04-17 10:49:11 +08:00
//
// CPUConvolution.hpp
// MNN
//
// Created by MNN on 2018/07/15.
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifndef CPUConvolution_hpp
#define CPUConvolution_hpp
2020-07-04 01:21:30 +08:00
#include "CPUBackend.hpp"
2020-11-05 16:41:56 +08:00
#include "core/ConvolutionCommon.hpp"
2019-04-17 10:49:11 +08:00
namespace MNN {
class CPUConvolution : public Execution {
public:
2021-01-06 16:29:37 +08:00
struct Resource {
std::shared_ptr<Tensor> mWeight;
std::shared_ptr<Tensor> mBias;
Backend* backend;
2021-04-08 15:34:23 +08:00
bool copyBiasAlign(const float* bias, int outputCount);
2021-01-06 16:29:37 +08:00
~ Resource() {
if (nullptr != mBias) {
backend->onReleaseBuffer(mBias.get(), Backend::STATIC);
}
if (nullptr != mWeight) {
backend->onReleaseBuffer(mWeight.get(), Backend::STATIC);
}
}
};
struct ResourceInt8 {
std::vector<int> mInt8WeightKernelSum;
std::shared_ptr<Tensor> mWeightInt8;
std::shared_ptr<Tensor> mBiasInt32;
std::shared_ptr<Tensor> mScaleFloat;
// relu or relu6
bool mRelu;
int mActBits;
int8_t mInputZeroPoint;
int8_t mOutputZeroPoint;
int8_t mClampMin;
int8_t mClampMax;
Backend* backend;
float mInputScale;
float mOutputScale;
#ifdef MNN_USE_SSE
std::vector<int> offsets;
#endif
void updateInputOutputScale(std::vector<float> inputQuantInfo, std::vector<float> outputQuantInfo);
~ ResourceInt8();
};
static std::shared_ptr<ResourceInt8> makeResourceInt8(Backend *backend, const MNN::Convolution2D *convOp,
std::vector<float> inputQuantInfo, std::vector<float> outputQuantInfo);
2019-04-17 10:49:11 +08:00
CPUConvolution(const Convolution2DCommon *convOp, Backend *b);
virtual ~CPUConvolution() = default;
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
2020-11-05 16:41:56 +08:00
static int reorderWeightSize(int depth, int outputCount, int kernelSize, int unitDepth, int unitOC);
// Inefficient but need not cache, use it when speed insensitive (init, onResize)
2021-04-08 15:34:23 +08:00
// source shape: [outputCount, depth, kernelSize]
// dest shape:
// transpose=false: [UP_DIV(outputCount,unitOC), UP_DIV(depth,unitDepth), kernelSize, unitDepth, unitOC]
// transpose=true: [UP_DIV(outputCount,unitOC), UP_DIV(depth,unitDepth), kernelSize, unitOC, unitDepth]
2020-11-05 16:41:56 +08:00
template<typename T> static void reorderWeightSlow(T* dest, const T* source, size_t depth, size_t outputCount, size_t kernelSize,
size_t unitDepth, size_t unitOC, bool transpose = false);
/* Inefficient because of not use memcpy to support different type copy (T -> U), use it when speed insensitive (init, onResize)
return: False if acquire failed
*/
template<typename T, typename U> static bool acquireMemoryAndCopy(std::shared_ptr<Tensor> dest, const T* source, size_t count, Backend*);
2020-07-04 01:21:30 +08:00
std::vector<float> getPostParameters() const;
2019-04-17 10:49:11 +08:00
protected:
const Convolution2DCommon *mCommon;
// In execute, use pad from mPadX and mPadY, don't use mCommon's pad
mutable int mPadX;
mutable int mPadY;
};
} // namespace MNN
#endif /* CPUConvolution_hpp */