mirror of https://github.com/alibaba/MNN.git
105 lines
3.5 KiB
C++
105 lines
3.5 KiB
C++
//
|
|
// calibration.hpp
|
|
// MNN
|
|
//
|
|
// Created by MNN on 2019/04/23.
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
//
|
|
|
|
#ifndef CALIBRATION_HPP
|
|
#define CALIBRATION_HPP
|
|
|
|
#include <map>
|
|
|
|
#include <MNN/ImageProcess.hpp>
|
|
#include <MNN/Interpreter.hpp>
|
|
#include "TensorStatistic.hpp"
|
|
#include "MNN_generated.h"
|
|
#include "Helper.hpp"
|
|
#include "logkit.h"
|
|
|
|
// Calibration find the optimal threshold according to KL-divergence
|
|
// process: the below process is applied on the whole Conv|DepthwiseConv layers
|
|
// 1. run the model on the batch samples, update the max(abs(feature_maps)) when the op is Convolution|Depthwise
|
|
// 2. cut the max(abs(feature_maps)) into 2048 slices
|
|
// 3. run the model on the batch samples again, update the distribution of feature maps every Conv|DepthwiseConv layer
|
|
// 4. apply Calibration on every distribution to get the optimal thereshold
|
|
// 5. compute the (input_scale * weight_scale) / output_scale, update the scale of symmetricQuan in Convolution Paramter
|
|
class Calibration {
|
|
public:
|
|
Calibration(MNN::NetT* model, const uint8_t* modelBuffer, const int bufferSize, const std::string& configPath, std::string originalModelFile, std::string dstModelFile);
|
|
|
|
void runQuantizeModel();
|
|
|
|
void dumpTensorScales(const std::string& modelFile);
|
|
void ComputeUnaryBuffer(MNN::NetT* net);
|
|
bool valid() const {
|
|
return mValid;
|
|
}
|
|
private:
|
|
Calibration();
|
|
MNN::NetT* _originalModel;
|
|
std::shared_ptr<MNN::CV::ImageProcess> _process;
|
|
bool mValid = true;
|
|
const int _binNums = 2048;
|
|
int _calibrationFileNum = 0;
|
|
int _width;
|
|
int _height;
|
|
int _channels;
|
|
int _batch = 32;
|
|
int _quant_bits = 8;
|
|
bool _winogradOpt = false;
|
|
Helper::PreprocessConfig _preprocessConfig;
|
|
Helper::InputType _inputType;
|
|
std::string _calibrationFilePath;
|
|
std::string _originalModelFile;
|
|
std::string _destModelFile;
|
|
MNN::CV::ImageProcess::Config _imageProcessConfig;
|
|
std::vector<std::string> _calibrationFiles;
|
|
|
|
// Tensor and Info
|
|
std::map<const MNN::Tensor*, std::shared_ptr<TensorStatistic>> _featureInfo;
|
|
std::map<const MNN::Tensor*, std::shared_ptr<TensorStatistic>> _featureInfoOrigin;
|
|
std::map<int, const MNN::Tensor*> _tensorMap;
|
|
|
|
// The scale results
|
|
std::map<const MNN::Tensor*, std::pair<float, int8_t>> _scales;
|
|
|
|
std::shared_ptr<MNN::Interpreter> _interpreter;
|
|
// keep mnn forward information
|
|
MNN::Session* _session;
|
|
MNN::Tensor* _inputTensor;
|
|
std::vector<int> _inputTensorDims;
|
|
|
|
std::shared_ptr<MNN::Interpreter> _interpreterOrigin;
|
|
MNN::Session* _sessionOrigin;
|
|
MNN::Tensor* _inputTensorOrigin;
|
|
|
|
std::string _featureQuantizeMethod = "KL";
|
|
std::string _weightQuantizeMethod = "MAX_ABS";
|
|
|
|
float _featureClampValue = 127.0f;
|
|
float _weightClampValue = 127.0f;
|
|
std::vector<std::string> _skip_quant_ops;
|
|
bool _debug = false;
|
|
|
|
std::vector<int> _getInputShape(std::string filename);
|
|
void _resizeIfNeeded(std::string filename, bool force = false);
|
|
void _initMNNSession(const uint8_t* modelBuffer, const int bufferSize);
|
|
void _initMaps();
|
|
|
|
// compute min/max value for every Tensor
|
|
void _computeFeatureMapsRange();
|
|
void _collectFeatureMapsDistribution();
|
|
void _computeFeatureScaleKL();
|
|
void _computeFeatureScaleADMM();
|
|
void _quantizeModelEMA();
|
|
void _computeFeatureScaleMoving();
|
|
void _fake_quant_weights();
|
|
void _computeQuantError();
|
|
void _insertScale();
|
|
};
|
|
int quant_main(int argc, const char* argv[]);
|
|
|
|
#endif // CALIBRATION_HPP
|