MNN/tools/quantization/quantized.cpp

//
//  quantized.cpp
//  MNN
//
//  Created by MNN on 2019/07/01.
//  Copyright © 2018, Alibaba Group Holding Limited
//

#include <fstream>
#include <sstream>
#include <string>
#include "calibration.hpp"
#include "logkit.h"

int main(int argc, const char* argv[]) {
    if (argc < 4) {
        DLOG(INFO) << "Usage: ./quantized.out src.mnn dst.mnn preTreatConfig.json\n";
        return 0;
    }
    const char* modelFile      = argv[1];
    const char* preTreatConfig = argv[3];
    const char* dstFile        = argv[2];
    DLOG(INFO) << ">>> modelFile: " << modelFile;
    DLOG(INFO) << ">>> preTreatConfig: " << preTreatConfig;
    DLOG(INFO) << ">>> dstFile: " << dstFile;
    std::unique_ptr<MNN::NetT> netT;
    {
        //std::ifstream input(modelFile);
        std::ifstream input(modelFile, std::ifstream::in | std::ifstream::binary);
        std::ostringstream outputOs;
        outputOs << input.rdbuf();
        netT = MNN::UnPackNet(outputOs.str().c_str());
    }

    // temp build net for inference
    flatbuffers::FlatBufferBuilder builder(1024);
    auto offset = MNN::Net::Pack(builder, netT.get());
    builder.Finish(offset);
    int size      = builder.GetSize();
    auto ocontent = builder.GetBufferPointer();

    // model buffer for creating mnn Interpreter
    std::unique_ptr<uint8_t> modelForInference(new uint8_t[size]);
    memcpy(modelForInference.get(), ocontent, size);

    std::unique_ptr<uint8_t> modelOriginal(new uint8_t[size]);
    memcpy(modelOriginal.get(), ocontent, size);

    netT.reset();
    netT = MNN::UnPackNet(modelOriginal.get());

    // quantize model's weight
    DLOG(INFO) << "Calibrate the feature and quantize model...";
    std::shared_ptr<Calibration> calibration(
        new Calibration(netT.get(), modelForInference.get(), size, preTreatConfig, std::string(modelFile), std::string(dstFile)));
    calibration->runQuantizeModel();
    calibration->dumpTensorScales(dstFile);
    DLOG(INFO) << "Quantize model done!";

    return 0;
}
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`//`
			`// quantized.cpp`
			`// MNN`
			`//`
			`// Created by MNN on 2019/07/01.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`

			`#include <fstream>`
			`#include <sstream>`
sync from internal repo 2021-04-28 18:02:10 +08:00			`#include <string>`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`#include "calibration.hpp"`
			`#include "logkit.h"`
[PATCH 05/19] [Quantization:Feature] add option to set clamp value 2021-01-06 19:54:08 +08:00
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`int main(int argc, const char* argv[]) {`
			`if (argc < 4) {`
beta 0.2.0.8 - add NaN check-up - add quantification support for ScaleAdd Op - add binary to eltwise optimization - add console logs for quantization tool - better document for quantization tool - replace redundant dimension flags with dimension format - optimize performance of TensorFlow Lite Quantized Convolution - fix axis support for ONNX softmax - fix get performance compile error on Windows 2019-08-22 20:13:46 +08:00			`DLOG(INFO) << "Usage: ./quantized.out src.mnn dst.mnn preTreatConfig.json\n";`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`return 0;`
			`}`
			`const char* modelFile = argv[1];`
			`const char* preTreatConfig = argv[3];`
			`const char* dstFile = argv[2];`
beta 0.2.0.8 - add NaN check-up - add quantification support for ScaleAdd Op - add binary to eltwise optimization - add console logs for quantization tool - better document for quantization tool - replace redundant dimension flags with dimension format - optimize performance of TensorFlow Lite Quantized Convolution - fix axis support for ONNX softmax - fix get performance compile error on Windows 2019-08-22 20:13:46 +08:00			`DLOG(INFO) << ">>> modelFile: " << modelFile;`
			`DLOG(INFO) << ">>> preTreatConfig: " << preTreatConfig;`
			`DLOG(INFO) << ">>> dstFile: " << dstFile;`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`std::unique_ptr<MNN::NetT> netT;`
			`{`
fix(quantize):fix the bug that readClibrationFiles funtion doesn't work when quantize model on MSVC Signed-off-by: shufu <shufuu@qq.com> 2022-03-01 14:33:13 +08:00			`//std::ifstream input(modelFile);`
			`std::ifstream input(modelFile, std::ifstream::in \| std::ifstream::binary);`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`std::ostringstream outputOs;`
			`outputOs << input.rdbuf();`
			`netT = MNN::UnPackNet(outputOs.str().c_str());`
			`}`

			`// temp build net for inference`
			`flatbuffers::FlatBufferBuilder builder(1024);`
			`auto offset = MNN::Net::Pack(builder, netT.get());`
			`builder.Finish(offset);`
			`int size = builder.GetSize();`
			`auto ocontent = builder.GetBufferPointer();`

			`// model buffer for creating mnn Interpreter`
			`std::unique_ptr<uint8_t> modelForInference(new uint8_t[size]);`
			`memcpy(modelForInference.get(), ocontent, size);`

			`std::unique_ptr<uint8_t> modelOriginal(new uint8_t[size]);`
			`memcpy(modelOriginal.get(), ocontent, size);`

			`netT.reset();`
			`netT = MNN::UnPackNet(modelOriginal.get());`

			`// quantize model's weight`
beta 0.2.0.8 - add NaN check-up - add quantification support for ScaleAdd Op - add binary to eltwise optimization - add console logs for quantization tool - better document for quantization tool - replace redundant dimension flags with dimension format - optimize performance of TensorFlow Lite Quantized Convolution - fix axis support for ONNX softmax - fix get performance compile error on Windows 2019-08-22 20:13:46 +08:00			`DLOG(INFO) << "Calibrate the feature and quantize model...";`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`std::shared_ptr<Calibration> calibration(`
sync from internal repo 2021-04-28 18:02:10 +08:00			`new Calibration(netT.get(), modelForInference.get(), size, preTreatConfig, std::string(modelFile), std::string(dstFile)));`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`calibration->runQuantizeModel();`
Add quantization debug tool. (#1317) * Add Calibration::dumpTensorScales to dump tensor scales for quantization debug. * Refactoring analyzeQuantErrors.py to dump dequant output. * Refactoring analyzeQuantErrors.py. Co-authored-by: jun.lv <jun.lv@17zuoye.com> 2021-02-03 10:04:41 +08:00			`calibration->dumpTensorScales(dstFile);`
beta 0.2.0.8 - add NaN check-up - add quantification support for ScaleAdd Op - add binary to eltwise optimization - add console logs for quantization tool - better document for quantization tool - replace redundant dimension flags with dimension format - optimize performance of TensorFlow Lite Quantized Convolution - fix axis support for ONNX softmax - fix get performance compile error on Windows 2019-08-22 20:13:46 +08:00			`DLOG(INFO) << "Quantize model done!";`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00
[PATCH 197/350] Move source code into ptqtools, Add opttools 2020-12-11 11:23:31 +08:00			`return 0;`
beta 0.2.0.3 - add quantization tool & cpu impl & demo/exec - add thread pool - add tests - fix onnx converter tensor name mismatch - optimize cpu performance with SSE for windows 2019-07-11 13:56:52 +08:00			`}`