2019-07-11 13:56:52 +08:00
|
|
|
//
|
|
|
|
|
// quantizeWeight.hpp
|
|
|
|
|
// MNN
|
|
|
|
|
//
|
|
|
|
|
// Created by MNN on 2019/04/21.
|
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
#ifndef QUANTIZEWEIGHT_HPP
|
|
|
|
|
#define QUANTIZEWEIGHT_HPP
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <vector>
|
2019-08-08 14:41:22 +08:00
|
|
|
#include <string>
|
|
|
|
|
|
2019-07-11 13:56:52 +08:00
|
|
|
// default: quantize weight every channel
|
|
|
|
|
int SymmetricQuantizeWeight(const float* weight, const int size, int8_t* quantizedWeight, float* scale,
|
2021-01-06 19:54:08 +08:00
|
|
|
const int channels, float weightClampValue);
|
2021-04-08 15:34:23 +08:00
|
|
|
int QuantizeWeightADMM(const float* weight, const int weightNum, int8_t* quantizedWeight, float* alpha,
|
|
|
|
|
const int kernelNum, const float weightClampValue);
|
2019-07-11 13:56:52 +08:00
|
|
|
|
|
|
|
|
// quantize convolution weight per channle
|
|
|
|
|
// firstly, multiply float weight by input_scale, then quantize the result to get input_sacle*weight_scale
|
|
|
|
|
// secondly, divide input_sacle*weight_scale by output_scale
|
|
|
|
|
int QuantizeConvPerChannel(const float* weight, const int size, const float* bias, int8_t* quantizedWeight,
|
2021-04-08 15:34:23 +08:00
|
|
|
int32_t* quantizedBias, float* scale, const float inputScale, const float outputScale,
|
|
|
|
|
const int inputChannel, const int outputChannel, std::string method, float weightClampValue, bool mergeChannel = true);
|
2019-07-11 13:56:52 +08:00
|
|
|
|
|
|
|
|
int QuantizeDepthwiseConv(const float* weight, const int size, const float* bias, int8_t* quantizedWeight,
|
2021-04-08 15:34:23 +08:00
|
|
|
int32_t* quantizedBias, float* scale, const float inputScale, const float outputScale,
|
|
|
|
|
const int inputChannel, const int outputChannel, std::string method, float weightClampValue, bool mergeChannel = true);
|
2019-07-11 13:56:52 +08:00
|
|
|
|
|
|
|
|
#endif // QUANTIZEWEIGHT_HPP
|