mirror of https://github.com/alibaba/MNN.git
67 lines
2.7 KiB
C++
67 lines
2.7 KiB
C++
//
|
|
// SparseConvInt8TiledExecutor.hpp
|
|
// MNN
|
|
//
|
|
// Created by MNN on 2021/6/09.
|
|
// Copyright © 2018 - 2021, Alibaba Group Holding Limited
|
|
//
|
|
|
|
|
|
#ifndef SparseConvInt8TiledExecutor_hpp
|
|
#define SparseConvInt8TiledExecutor_hpp
|
|
#include "ConvInt8TiledExecutor.hpp"
|
|
#include "backend/cpu/CPUConvolution.hpp"
|
|
#include "Int8FunctionsOpt.h"
|
|
|
|
#define SPARSITY_THRESHOLD (0.2f)
|
|
|
|
namespace MNN {
|
|
|
|
|
|
struct SparseQuantMatMulParam {
|
|
// only use size_t type
|
|
size_t eSize; // left matrix length of real value
|
|
size_t eP; // left matrix pack Unit
|
|
size_t aStride; // left matrix stride
|
|
size_t l; // left matrix row, (kh * kw * ic/4 * 4)
|
|
size_t h; // right matrix colum, (oc)
|
|
size_t cStride; // output matrix Stride on highest dim (ow * oh * C4Unit * bytes)
|
|
};
|
|
|
|
class SparseConvInt8TiledExecutor : public ConvInt8TiledExecutor {
|
|
public:
|
|
// given weight+bias+scale, do post process
|
|
SparseConvInt8TiledExecutor(Backend* backend, const Op* op, std::shared_ptr<ResourceInt8> res);
|
|
virtual ~SparseConvInt8TiledExecutor();
|
|
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
|
|
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
|
|
virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override;
|
|
|
|
void getPackParameter(int* Unit, int* SrcUnit, int* DestUnit, const CoreInt8Functions* core);
|
|
bool reorderWeight(Backend* b, const Convolution2DCommon* common, const std::shared_ptr<Tensor>& weightOrigin,
|
|
std::shared_ptr<Tensor>& weight, const SparseCommon* sparseCommon);
|
|
|
|
static bool shouldUseSparse(const Convolution2D* conv2d) {
|
|
auto common = conv2d->common();
|
|
size_t originWeightSize = common->outputCount() * common->inputCount() * common->kernelY() * common->kernelX();
|
|
const SparseCommon* sparseCommon = conv2d->sparseParameter();
|
|
// MNN_PRINT("SparseConvInt8TiledExecutor sparsity:%f\n", 1 - float(sparseCommon->args()->LookupByKey("NNZElement")->i())/originWeightSize);
|
|
return originWeightSize - sparseCommon->args()->LookupByKey("NNZElement")->i() >= originWeightSize * SPARSITY_THRESHOLD;
|
|
}
|
|
|
|
private:
|
|
SparseConvInt8TiledExecutor(Backend* backend, const Op* op, const SparseConvInt8TiledExecutor& exe);
|
|
|
|
SparseQuantMatMulParam mSparseQuantParam;
|
|
decltype(CoreInt8Functions::MNNPackedSparseQuantMatMulEpx1) mSparseQuantMatMulKernel;
|
|
std::shared_ptr<Tensor> mNNZMap;
|
|
std::shared_ptr<Tensor> mDataOffsetMap;
|
|
int mSparseBlockOC;
|
|
};
|
|
|
|
} // namespace MNN
|
|
|
|
#undef SPARSITY_THRESHOLD
|
|
|
|
#endif /* SparseConvInt8TiledExecutor_hpp */
|