2019-04-17 10:49:11 +08:00
|
|
|
//
|
|
|
|
// writeFb.cpp
|
|
|
|
// MNNConverter
|
|
|
|
//
|
|
|
|
// Created by MNN on 2019/01/31.
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
//
|
|
|
|
|
|
|
|
#include <fstream>
|
|
|
|
#include <iostream>
|
2020-11-05 16:41:56 +08:00
|
|
|
#include <algorithm>
|
2019-11-15 14:22:45 +08:00
|
|
|
#include <set>
|
2021-04-08 15:34:23 +08:00
|
|
|
#include <string>
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2019-11-15 14:22:45 +08:00
|
|
|
#include "MNN_generated.h"
|
|
|
|
#include "half.hpp"
|
2019-07-11 13:56:52 +08:00
|
|
|
#include "logkit.h"
|
|
|
|
#include "writeFb.hpp"
|
2020-11-05 16:41:56 +08:00
|
|
|
#include "cpp/ConfigFile.hpp"
|
|
|
|
#include <MNN/MNNDefine.h>
|
2020-11-25 19:03:07 +08:00
|
|
|
#include "cli.hpp"
|
2021-04-08 15:34:23 +08:00
|
|
|
#include "MNN_compression.pb.h"
|
|
|
|
#include "MNN/expr/ExprCreator.hpp"
|
|
|
|
#include "cpp/IDSTEncoder.hpp"
|
2021-06-11 17:17:13 +08:00
|
|
|
#include "core/OpCommonUtils.hpp"
|
|
|
|
#include "backend/cpu/compute/SparseConvolutionTiledExecutor.hpp"
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
using namespace MNN;
|
2021-04-08 15:34:23 +08:00
|
|
|
using namespace MNN::Express;
|
2020-11-05 16:41:56 +08:00
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
static float findAbsMax(const float *weights, const int count) {
|
|
|
|
float absMax = abs(weights[0]);
|
|
|
|
for (int i = 1; i < count; i++) {
|
|
|
|
float value = abs(weights[i]);
|
|
|
|
if (value > absMax) {
|
|
|
|
absMax = value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return absMax;
|
|
|
|
}
|
|
|
|
|
2020-11-25 19:03:07 +08:00
|
|
|
static std::vector<float> findMinMax(const float *weights, const int count) {
|
|
|
|
float min = weights[0];
|
|
|
|
float max = weights[0];
|
|
|
|
|
|
|
|
for (int i = 1; i < count; i++) {
|
|
|
|
float value = weights[i];
|
|
|
|
if (value > max) {
|
|
|
|
max = value;
|
|
|
|
}
|
|
|
|
if (value < min) {
|
|
|
|
min = value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return {min, max};
|
|
|
|
}
|
|
|
|
|
2021-06-11 17:17:13 +08:00
|
|
|
int writeFb(std::unique_ptr<MNN::NetT>& netT, const std::string& MNNModelFile, const modelConfig& config) {
|
2020-11-05 16:41:56 +08:00
|
|
|
auto RemoveParams = [](std::unique_ptr<MNN::OpT>& op) {
|
|
|
|
const auto opType = op->type;
|
|
|
|
switch (opType) {
|
|
|
|
case MNN::OpType_Convolution:
|
|
|
|
case MNN::OpType_Deconvolution:
|
|
|
|
case MNN::OpType_ConvolutionDepthwise: {
|
|
|
|
auto param = op->main.AsConvolution2D();
|
|
|
|
param->weight.clear();
|
|
|
|
param->bias.clear();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case MNN::OpType_TfQuantizedConv2D: {
|
|
|
|
auto param = op->main.AsTfQuantizedConv2D();
|
|
|
|
param->weight.clear();
|
|
|
|
param->bias.clear();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case MNN::OpType_MatMul: {
|
|
|
|
auto param = op->main.AsMatMul();
|
|
|
|
param->weight.clear();
|
|
|
|
param->bias.clear();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case MNN::OpType_BatchNorm: {
|
|
|
|
auto param = op->main.AsBatchNorm();
|
|
|
|
param->slopeData.clear();
|
|
|
|
param->meanData.clear();
|
|
|
|
param->varData.clear();
|
|
|
|
param->biasData.clear();
|
|
|
|
param->Adata.clear();
|
|
|
|
param->Bdata.clear();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case MNN::OpType_Scale: {
|
|
|
|
auto param = op->main.AsScale();
|
|
|
|
param->scaleData.clear();
|
|
|
|
param->biasData.clear();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
if (config.benchmarkModel) {
|
2019-11-15 14:22:45 +08:00
|
|
|
for (auto& op : netT->oplists) {
|
2020-11-05 16:41:56 +08:00
|
|
|
RemoveParams(op);
|
|
|
|
}
|
|
|
|
for (auto& subgraph : netT->subgraphs) {
|
|
|
|
for (auto& op : subgraph->nodes) {
|
|
|
|
RemoveParams(op);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
auto CastParamsToHalf = [](std::unique_ptr<MNN::OpT>& op) {
|
|
|
|
const auto opType = op->type;
|
|
|
|
switch (opType) {
|
|
|
|
case MNN::OpType_Convolution:
|
|
|
|
case MNN::OpType_ConvolutionDepthwise: {
|
|
|
|
auto param = op->main.AsConvolution2D();
|
|
|
|
const int weightSize = param->weight.size();
|
|
|
|
// const int biasSize = param->bias.size();
|
|
|
|
std::vector<half_float::half> quantizedFp16Weight;
|
|
|
|
quantizedFp16Weight.resize(weightSize);
|
|
|
|
std::transform(param->weight.begin(), param->weight.end(), quantizedFp16Weight.begin(),
|
|
|
|
[](float w) { return half_float::half(w); });
|
|
|
|
// std::vector<half_float::half> quantizedFp16Bias;
|
|
|
|
// quantizedFp16Bias.resize(biasSize);
|
|
|
|
// std::transform(param->bias.begin(), param->bias.end(), quantizedFp16Bias.begin(), [](float
|
|
|
|
// b){return half_float::half(b); });
|
|
|
|
param->weight.clear();
|
|
|
|
// param->bias.clear();
|
|
|
|
|
|
|
|
param->quanParameter.reset(new MNN::IDSTQuanT);
|
|
|
|
param->quanParameter->type = 3;
|
|
|
|
int8_t* halfWeight = reinterpret_cast<int8_t*>(quantizedFp16Weight.data());
|
|
|
|
param->quanParameter->buffer.assign(halfWeight, halfWeight + sizeof(half_float::half) * weightSize);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case MNN::OpType_Const: {
|
|
|
|
auto blob = op->main.AsBlob();
|
|
|
|
if (blob->dataType == MNN::DataType_DT_FLOAT) {
|
|
|
|
blob->dataType = MNN::DataType_DT_HALF;
|
|
|
|
blob->uint8s.resize(sizeof(half_float::half) * blob->float32s.size());
|
|
|
|
auto size = blob->float32s.size();
|
|
|
|
auto dst = (half_float::half*)blob->uint8s.data();
|
|
|
|
for (int i=0; i<size; ++i) {
|
|
|
|
dst[i] = blob->float32s[i];
|
2020-07-16 17:56:21 +08:00
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
blob->float32s.clear();
|
2020-07-16 17:56:21 +08:00
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
if (config.saveHalfFloat) {
|
|
|
|
for (auto& op : netT->oplists) {
|
|
|
|
CastParamsToHalf(op);
|
|
|
|
}
|
|
|
|
for (auto& subgraph : netT->subgraphs) {
|
|
|
|
for (auto& op : subgraph->nodes) {
|
|
|
|
CastParamsToHalf(op);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-11 17:17:13 +08:00
|
|
|
auto AddSparseInfo = [&](std::unique_ptr<MNN::OpT>& op, Compression::Pipeline proto) {
|
|
|
|
auto prune_algo_type = MNN::SparseAlgo_RANDOM;
|
|
|
|
int sparseBlockOC = 1;
|
|
|
|
int sparseBlockKernel = 1;
|
|
|
|
|
|
|
|
for (const auto& algo : proto.algo()) {
|
|
|
|
if (algo.type() == Compression::CompressionAlgo::PRUNE) {
|
|
|
|
auto prune_type = algo.prune_params().type();
|
|
|
|
prune_algo_type = MNN::SparseAlgo(prune_type);
|
|
|
|
if (prune_type == Compression::PruneParams_PruneType_SIMD_OC) {
|
|
|
|
sparseBlockOC = algo.prune_params().simd_oc_pruner_params().oc_blocks(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto opType = op->type;
|
|
|
|
switch (opType) {
|
|
|
|
case MNN::OpType_Convolution:
|
|
|
|
case MNN::OpType_ConvolutionDepthwise: {
|
|
|
|
auto param = op->main.AsConvolution2D();
|
|
|
|
if (param->weight.empty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
int weightSize = param->weight.size();
|
|
|
|
int biasSize = param->bias.size();
|
|
|
|
size_t weightNNZElement, weightBlockNumber = 0;
|
|
|
|
OpCommonUtils::statisticWeightSparsity(weightNNZElement, weightBlockNumber, param->weight.data(), biasSize, weightSize / biasSize, sparseBlockOC);
|
|
|
|
float sparsity = 1. - float(weightNNZElement) / weightSize;
|
|
|
|
if (sparsity < SPARSITY_THRESHOLD) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
MNN::AttributeT* arg1(new MNN::AttributeT);
|
|
|
|
arg1->key = "sparseBlockOC";
|
|
|
|
arg1->i = sparseBlockOC;
|
|
|
|
|
|
|
|
MNN::AttributeT* arg2(new MNN::AttributeT);
|
|
|
|
arg2->key = "sparseBlockKernel";
|
|
|
|
arg2->i = sparseBlockKernel;
|
|
|
|
|
|
|
|
MNN::AttributeT* arg3(new MNN::AttributeT);
|
|
|
|
arg3->key = "NNZElement";
|
|
|
|
arg3->i = weightNNZElement;
|
|
|
|
|
|
|
|
MNN::AttributeT* arg4(new MNN::AttributeT);
|
|
|
|
arg4->key = "blockNumber";
|
|
|
|
arg4->i = weightBlockNumber;
|
|
|
|
|
|
|
|
flatbuffers::FlatBufferBuilder builder;
|
|
|
|
std::vector<flatbuffers::Offset<MNN::Attribute>> argsVector;
|
|
|
|
auto sparseArg1 = MNN::CreateAttribute(builder, arg1);
|
|
|
|
auto sparseArg2 = MNN::CreateAttribute(builder, arg2);
|
|
|
|
auto sparseArg3 = MNN::CreateAttribute(builder, arg3);
|
|
|
|
auto sparseArg4 = MNN::CreateAttribute(builder, arg4);
|
|
|
|
|
|
|
|
argsVector.emplace_back(sparseArg1);
|
|
|
|
argsVector.emplace_back(sparseArg2);
|
|
|
|
argsVector.emplace_back(sparseArg3);
|
|
|
|
argsVector.emplace_back(sparseArg4);
|
|
|
|
|
|
|
|
auto sparseArgs = builder.CreateVectorOfSortedTables<MNN::Attribute>(&argsVector);
|
|
|
|
auto sparseCom = MNN::CreateSparseCommon(builder, prune_algo_type, sparseArgs);
|
|
|
|
builder.Finish(sparseCom);
|
|
|
|
auto sparseComPtr = flatbuffers::GetRoot<MNN::SparseCommon>(builder.GetBufferPointer())->UnPack();
|
|
|
|
|
|
|
|
param->sparseParameter.reset(sparseComPtr);
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
{
|
|
|
|
std::string compressFileName = config.compressionParamsFile;
|
|
|
|
Compression::Pipeline proto;
|
|
|
|
if (compressFileName != "") {
|
|
|
|
std::fstream input(compressFileName.c_str(), std::ios::in | std::ios::binary);
|
|
|
|
if (!proto.ParseFromIstream(&input)) {
|
|
|
|
MNN_ERROR("Failed to parse compression pipeline proto.\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (auto& op : netT->oplists) {
|
|
|
|
AddSparseInfo(op, proto);
|
|
|
|
}
|
|
|
|
for (auto& subgraph : netT->subgraphs) {
|
|
|
|
for (auto& op : subgraph->nodes) {
|
|
|
|
AddSparseInfo(op, proto);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:34:23 +08:00
|
|
|
auto FullQuantAndCoding = [&](std::unique_ptr<MNN::OpT>& op, Compression::Pipeline& proto, SubGraphProtoT* subgraph) {
|
|
|
|
std::string outputTensorName = subgraph ? subgraph->tensors[op->outputIndexes[0]] : netT->tensorName[op->outputIndexes[0]];;
|
|
|
|
auto opType = op->type;
|
|
|
|
if (opType != MNN::OpType_Convolution && opType != MNN::OpType_ConvolutionDepthwise) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto findQuantParameters = [&](Compression::Pipeline& proto, std::string outputTensorName) {
|
|
|
|
for (const auto& algo : proto.algo()) {
|
|
|
|
if (algo.type() == Compression::CompressionAlgo::QUANTIZE) {
|
|
|
|
auto quantParams = algo.quant_params();
|
|
|
|
for (const auto& layerProto : quantParams.layer()) {
|
|
|
|
const std::string& outputName = layerProto.output(0).name();
|
|
|
|
if (outputName == outputTensorName) {
|
|
|
|
return layerProto;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
MNN::Compression::LayerQuantizeParams empty;
|
|
|
|
return empty;
|
|
|
|
};
|
|
|
|
|
|
|
|
auto inputIndex = op->inputIndexes[0];
|
|
|
|
int outputIndex = op->outputIndexes[0];
|
|
|
|
auto quantParams = findQuantParameters(proto, outputTensorName);
|
|
|
|
if (quantParams.weight_size() == 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto inputParams = quantParams.input(0);
|
|
|
|
auto outputParams = quantParams.output(0);
|
|
|
|
auto weightParams = quantParams.weight(0);
|
|
|
|
auto& tensorDescribe = subgraph ? subgraph->extraTensorDescribe : netT->extraTensorDescribe;
|
|
|
|
|
2021-06-11 17:17:13 +08:00
|
|
|
auto findInDescribe = [&] (int index) {
|
|
|
|
for (int i = 0; i < tensorDescribe.size(); i++) {
|
|
|
|
if (tensorDescribe[i]->index == index) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
|
|
|
if (!findInDescribe(inputIndex)) {
|
|
|
|
std::unique_ptr<MNN::TensorDescribeT> inDescribe(new MNN::TensorDescribeT);
|
|
|
|
inDescribe->index = inputIndex;
|
|
|
|
std::unique_ptr<MNN::TensorQuantInfoT> inputQuantInfo(new MNN::TensorQuantInfoT);
|
|
|
|
inputQuantInfo->zero = inputParams.zero_point();
|
|
|
|
inputQuantInfo->scale = inputParams.scales(0);
|
|
|
|
inputQuantInfo->min = inputParams.clamp_min();
|
|
|
|
inputQuantInfo->max = inputParams.clamp_max();
|
|
|
|
inputQuantInfo->type = MNN::DataType_DT_INT8;
|
|
|
|
inDescribe->quantInfo = std::move(inputQuantInfo);
|
|
|
|
tensorDescribe.emplace_back(std::move(inDescribe));
|
|
|
|
}
|
2021-04-08 15:34:23 +08:00
|
|
|
|
2021-06-11 17:17:13 +08:00
|
|
|
if (!findInDescribe(outputIndex)) {
|
|
|
|
std::unique_ptr<MNN::TensorDescribeT> outDescribe(new MNN::TensorDescribeT);
|
|
|
|
outDescribe->index = outputIndex;
|
|
|
|
std::unique_ptr<MNN::TensorQuantInfoT> outputQuantInfo(new MNN::TensorQuantInfoT);
|
|
|
|
outputQuantInfo->zero = outputParams.zero_point();
|
|
|
|
outputQuantInfo->scale = outputParams.scales(0);
|
|
|
|
outputQuantInfo->min = outputParams.clamp_min();
|
|
|
|
outputQuantInfo->max = outputParams.clamp_max();
|
|
|
|
outputQuantInfo->type = MNN::DataType_DT_INT8;
|
|
|
|
outDescribe->quantInfo = std::move(outputQuantInfo);
|
|
|
|
tensorDescribe.emplace_back(std::move(outDescribe));
|
|
|
|
}
|
2021-04-08 15:34:23 +08:00
|
|
|
|
|
|
|
auto convParams = op->main.AsConvolution2D();
|
|
|
|
auto weightFloat = convParams->weight;
|
|
|
|
auto biasFloat = convParams->bias;
|
|
|
|
auto& common = convParams->common;
|
|
|
|
|
|
|
|
const int ko = common->outputCount;
|
|
|
|
const int ki = common->inputCount / common->group;
|
|
|
|
const int kh = common->kernelY;
|
|
|
|
const int kw = common->kernelX;
|
|
|
|
const int kernelNum = common->outputCount;
|
|
|
|
const int kernelSize = weightFloat.size() / kernelNum;
|
|
|
|
|
|
|
|
VARP weightVar = _Const(weightFloat.data(), {ko, ki, kh, kw}, NCHW);
|
|
|
|
VARP biasVar = _Const(biasFloat.data(), {ko, 1, 1, 1}, NCHW);
|
|
|
|
VARP inputScaleVar = _Const(inputParams.scales(0), {}, NCHW);
|
|
|
|
VARP outputScaleVar = _Const(outputParams.scales(0), {}, NCHW);
|
|
|
|
|
|
|
|
float wClampMin = weightParams.clamp_min();
|
|
|
|
float wClampMax = weightParams.clamp_max();
|
|
|
|
|
|
|
|
std::vector<float> weightScaleVector(weightParams.scales().begin(), weightParams.scales().end());
|
|
|
|
VARP weightScale = _Const(weightScaleVector.data(), {(int)weightScaleVector.size(), 1, 1, 1}, NCHW, halide_type_of<float>());
|
|
|
|
auto quanWeightTemp = _Round(weightVar * _Reciprocal(weightScale));
|
|
|
|
auto quanWeightClamp = MNN::Express::_Maximum(_Minimum(quanWeightTemp, _Scalar<float>(wClampMax)), _Scalar<float>(wClampMin));
|
|
|
|
auto quanWeight = _Cast<int8_t>(quanWeightClamp);
|
|
|
|
auto convScale = _Reshape(_Reciprocal(outputScaleVar), {-1, 1, 1, 1}) * weightScale * inputScaleVar;
|
|
|
|
|
|
|
|
std::vector<float> quantWeightFloat;
|
|
|
|
std::vector<int8_t> quantWeights;
|
|
|
|
std::vector<float> biasData;
|
|
|
|
std::vector<float> scale;
|
|
|
|
|
|
|
|
{
|
|
|
|
auto info = quanWeight->getInfo();
|
|
|
|
quantWeights.resize(info->size);
|
|
|
|
quantWeightFloat.resize(info->size);
|
|
|
|
auto ptr = quanWeight->readMap<int8_t>();
|
|
|
|
for (int i = 0; i < quantWeightFloat.size(); i++) {
|
|
|
|
quantWeightFloat[i] = ptr[i];
|
|
|
|
quantWeights[i] = ptr[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
{
|
2021-04-08 14:24:07 +08:00
|
|
|
auto biasinfo = biasVar->getInfo();
|
2021-04-08 15:34:23 +08:00
|
|
|
biasData.resize(biasinfo->size);
|
2021-04-08 14:24:07 +08:00
|
|
|
auto ptr = biasVar->readMap<float>();
|
2021-04-08 15:34:23 +08:00
|
|
|
::memcpy(biasData.data(), ptr, biasData.size() * sizeof(int32_t));
|
|
|
|
|
|
|
|
auto info = weightScale->getInfo();
|
|
|
|
scale.resize(info->size);
|
|
|
|
MNN_ASSERT(scale.size() == biasData.size());
|
|
|
|
auto ptrScale = weightScale->readMap<float>();
|
|
|
|
::memcpy(scale.data(), ptrScale, scale.size() * sizeof(float));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool asymmetricQuantFlag = false;
|
|
|
|
std::vector<float> fakeScales(kernelNum, 1.0f);
|
|
|
|
convParams->quanParameter = IDSTEncoder::encode(quantWeightFloat, fakeScales, kernelSize, kernelNum, asymmetricQuantFlag, quantWeights.data(), wClampMin);
|
|
|
|
convParams->weight.clear();
|
|
|
|
convParams->quanParameter->alpha = std::move(scale);
|
|
|
|
convParams->quanParameter->scaleIn = inputParams.scales(0);
|
|
|
|
convParams->quanParameter->scaleOut = outputParams.scales(0);
|
|
|
|
|
|
|
|
convParams->symmetricQuan.reset(new MNN::QuantizedFloatParamT);
|
|
|
|
convParams->symmetricQuan->method = MNN::QuantizeAlgo(int(quantParams.method()));
|
|
|
|
convParams->symmetricQuan->nbits = outputParams.bits();
|
|
|
|
|
2021-04-08 14:24:07 +08:00
|
|
|
convParams->symmetricQuan->zeroPoint = inputParams.zero_point();
|
|
|
|
convParams->symmetricQuan->outputZeroPoint = outputParams.zero_point();
|
|
|
|
convParams->symmetricQuan->clampMin = outputParams.clamp_min();
|
|
|
|
convParams->symmetricQuan->clampMax = outputParams.clamp_max();
|
|
|
|
|
2021-04-08 15:34:23 +08:00
|
|
|
convParams->bias = std::move(biasData);
|
|
|
|
};
|
|
|
|
|
|
|
|
{
|
2021-06-11 17:17:13 +08:00
|
|
|
std::string compressFileName = config.compressionParamsFile;
|
2021-04-08 15:34:23 +08:00
|
|
|
if (compressFileName != "") {
|
|
|
|
Compression::Pipeline proto;
|
|
|
|
std::fstream input(compressFileName.c_str(), std::ios::in | std::ios::binary);
|
|
|
|
if (!proto.ParseFromIstream(&input)) {
|
|
|
|
MNN_ERROR("Failed to parse compression pipeline proto.\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto& op : netT->oplists) {
|
|
|
|
FullQuantAndCoding(op, proto, nullptr);
|
|
|
|
}
|
|
|
|
for (auto& subgraph : netT->subgraphs) {
|
|
|
|
for (auto& op : subgraph->nodes) {
|
|
|
|
FullQuantAndCoding(op, proto, subgraph.get());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-17 19:56:35 +08:00
|
|
|
auto WeightQuantAndCoding = [&](std::unique_ptr<MNN::OpT>& op) {
|
2020-12-17 14:01:20 +08:00
|
|
|
const auto opType = op->type;
|
2020-12-17 20:33:45 +08:00
|
|
|
// config.weightQuantBits only control weight quantization for float convolution
|
|
|
|
// by default, do coding for convint8 and depthwiseconvint8, if there is any
|
|
|
|
if ((config.weightQuantBits == 0) && (
|
|
|
|
opType != MNN::OpType_ConvInt8 && opType != MNN::OpType_DepthwiseConvInt8)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-12-17 14:01:20 +08:00
|
|
|
if (opType != MNN::OpType_Convolution && opType != MNN::OpType_ConvolutionDepthwise &&
|
|
|
|
opType != MNN::OpType_Deconvolution && opType != MNN::OpType_DeconvolutionDepthwise &&
|
|
|
|
opType != MNN::OpType_ConvInt8 && opType != MNN::OpType_DepthwiseConvInt8) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-12-17 19:49:35 +08:00
|
|
|
int bits = 8;
|
2020-12-17 20:33:45 +08:00
|
|
|
if ((config.weightQuantBits > 0) && (
|
|
|
|
opType != MNN::OpType_ConvInt8 && opType != MNN::OpType_DepthwiseConvInt8)) {
|
2020-12-17 19:49:35 +08:00
|
|
|
bits = config.weightQuantBits;
|
|
|
|
}
|
2020-12-17 20:33:45 +08:00
|
|
|
// Bits must from 2-8
|
|
|
|
bits = std::max(bits, 2);
|
|
|
|
bits = std::min(bits, 8);
|
2020-12-17 19:49:35 +08:00
|
|
|
|
2020-12-17 14:01:20 +08:00
|
|
|
auto param = op->main.AsConvolution2D();
|
|
|
|
auto& common = param->common;
|
|
|
|
if (param->quanParameter.get() != nullptr) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
int weightSize = param->weight.size();
|
|
|
|
if (opType == MNN::OpType_ConvInt8 || opType == MNN::OpType_DepthwiseConvInt8) {
|
|
|
|
weightSize = param->symmetricQuan->weight.size();
|
|
|
|
}
|
|
|
|
int kernelNum = common->outputCount;
|
|
|
|
int kernelSize = weightSize / kernelNum;
|
|
|
|
|
2021-06-11 17:17:13 +08:00
|
|
|
bool asymmetricQuantFlag = config.weightQuantAsymmetric;
|
2020-12-17 14:01:20 +08:00
|
|
|
|
2021-04-08 15:34:23 +08:00
|
|
|
float threshold = (float)(1 << (bits - 1)) - 1.0f;
|
|
|
|
float clampMin = -threshold;
|
|
|
|
if (asymmetricQuantFlag) {
|
|
|
|
clampMin = -threshold - 1;
|
|
|
|
}
|
2020-12-31 15:00:05 +08:00
|
|
|
std::vector<float> weightData, scales;
|
2021-04-08 15:34:23 +08:00
|
|
|
std::vector<int8_t> quantWeights;
|
2020-12-17 14:01:20 +08:00
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
switch (opType) {
|
|
|
|
case MNN::OpType_Convolution:
|
|
|
|
case MNN::OpType_ConvolutionDepthwise:
|
|
|
|
case MNN::OpType_Deconvolution:
|
|
|
|
case MNN::OpType_DeconvolutionDepthwise: {
|
2020-12-31 15:00:05 +08:00
|
|
|
weightData = param->weight;
|
2020-12-17 14:01:20 +08:00
|
|
|
|
|
|
|
if (asymmetricQuantFlag) {
|
|
|
|
scales.resize(kernelNum*2);
|
|
|
|
for (int k = 0; k < kernelNum; k++) {
|
|
|
|
int beginIndex = k * kernelSize;
|
2020-12-31 15:00:05 +08:00
|
|
|
auto minAndMax = findMinMax(weightData.data() + beginIndex, kernelSize);
|
2020-12-17 14:01:20 +08:00
|
|
|
float min = minAndMax[0];
|
|
|
|
float max = minAndMax[1];
|
2021-04-08 15:34:23 +08:00
|
|
|
float scale = (max - min) / (threshold - clampMin);
|
2020-12-17 14:01:20 +08:00
|
|
|
|
|
|
|
scales[2*k] = min;
|
|
|
|
scales[2*k+1] = scale;
|
2021-04-08 15:34:23 +08:00
|
|
|
|
|
|
|
for (int ii = 0; ii < kernelSize; ii++) {
|
|
|
|
float* ptr = weightData.data() + beginIndex;
|
|
|
|
int8_t quantValue = int8_t(std::round((ptr[ii] - min) / scale + clampMin));
|
|
|
|
quantWeights.emplace_back(quantValue);
|
|
|
|
}
|
2020-07-16 17:56:21 +08:00
|
|
|
}
|
2020-12-17 14:01:20 +08:00
|
|
|
} else {
|
|
|
|
scales.resize(kernelNum);
|
|
|
|
for (int k = 0; k < kernelNum; k++) {
|
|
|
|
int beginIndex = k * kernelSize;
|
2020-12-31 15:00:05 +08:00
|
|
|
auto absMax = findAbsMax(weightData.data() + beginIndex, kernelSize);
|
2020-12-17 14:01:20 +08:00
|
|
|
|
2021-04-08 15:34:23 +08:00
|
|
|
scales[k] = absMax / threshold;
|
|
|
|
|
|
|
|
for (int ii = 0; ii < kernelSize; ii++) {
|
|
|
|
float* ptr = weightData.data() + beginIndex;
|
|
|
|
int8_t quantValue = int8_t(std::round(ptr[ii] / scales[k]));
|
|
|
|
quantWeights.emplace_back(quantValue);
|
|
|
|
}
|
2020-11-25 19:03:07 +08:00
|
|
|
}
|
2019-11-15 14:22:45 +08:00
|
|
|
}
|
2020-12-17 14:01:20 +08:00
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
break;
|
|
|
|
}
|
2020-12-14 17:44:25 +08:00
|
|
|
case MNN::OpType_ConvInt8:
|
|
|
|
case MNN::OpType_DepthwiseConvInt8: {
|
|
|
|
auto& int8Params = param->symmetricQuan;
|
2020-12-17 14:01:20 +08:00
|
|
|
for (int i = 0; i < int8Params->weight.size(); i++) {
|
2020-12-31 15:34:19 +08:00
|
|
|
weightData.emplace_back(float(int8Params->weight[i]));
|
2020-12-14 17:44:25 +08:00
|
|
|
}
|
2020-12-17 14:01:20 +08:00
|
|
|
scales.resize(kernelNum, 1.0f);
|
|
|
|
|
2020-12-14 17:44:25 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2020-12-17 14:01:20 +08:00
|
|
|
|
|
|
|
if (opType == MNN::OpType_ConvInt8 || opType == MNN::OpType_DepthwiseConvInt8) {
|
2021-04-08 15:34:23 +08:00
|
|
|
param->quanParameter = IDSTEncoder::encode(weightData, scales, kernelSize, kernelNum, false, param->symmetricQuan->weight.data(), int(clampMin));
|
2020-12-17 14:01:20 +08:00
|
|
|
param->symmetricQuan->weight.clear();
|
|
|
|
param->quanParameter->alpha = {1.0f}; // fake scales
|
|
|
|
} else {
|
2021-04-08 15:34:23 +08:00
|
|
|
param->quanParameter = IDSTEncoder::encode(weightData, scales, kernelSize, kernelNum, asymmetricQuantFlag, quantWeights.data(), int(clampMin));
|
2020-12-17 14:01:20 +08:00
|
|
|
param->weight.clear();
|
|
|
|
}
|
2020-12-14 17:44:25 +08:00
|
|
|
};
|
2020-12-17 14:01:20 +08:00
|
|
|
|
2020-12-17 19:49:35 +08:00
|
|
|
{
|
2020-12-14 17:44:25 +08:00
|
|
|
for (auto& op : netT->oplists) {
|
2020-12-17 19:49:35 +08:00
|
|
|
WeightQuantAndCoding(op);
|
2020-12-14 17:44:25 +08:00
|
|
|
}
|
|
|
|
for (auto& subgraph : netT->subgraphs) {
|
|
|
|
for (auto& op : subgraph->nodes) {
|
2020-12-17 19:49:35 +08:00
|
|
|
WeightQuantAndCoding(op);
|
2020-12-14 17:44:25 +08:00
|
|
|
}
|
2019-11-15 14:22:45 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
- build:
- unify schema building in core and converter;
- add more build script for android;
- add linux build script for python;
- ops impl:
- add floor mod support in binary;
- use eltwise impl in add/max/sub/mul binary for optimization;
- remove fake double support in cast;
- fix 5d support for concat;
- add adjX and adjY support for batch matmul;
- optimize conv2d back prop filter;
- add pad mode support for conv3d;
- fix bug in conv2d & conv depthwise with very small feature map;
- optimize binary without broacast;
- add data types support for gather;
- add gather ND support;
- use uint8 data type in gather v2;
- add transpose support for matmul;
- add matrix band part;
- add dim != 4 support for padding, reshape & tensor convert;
- add pad type support for pool3d;
- make ops based on TensorFlow Lite quantization optional;
- add all & any support for reduction;
- use type in parameter as output type in reduction;
- add int support for unary;
- add variable weight support for conv2d;
- fix conv2d depthwise weights initialization;
- fix type support for transpose;
- fix grad outputs count for reduce grad and reshape grad;
- fix priorbox & detection output;
- fix metal softmax error;
- python:
- add runSessionWithCallBackInfo interface;
- add max nodes limit (1400) for visualization tool;
- fix save error in python3;
- align default dim;
- convert:
- add extra design for optimization;
- add more post converting optimizers;
- add caffe v1 weights blob support;
- add cast, unary, conv transpose support for onnx model;
- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
- add cos/sin/atan/tan support for unary for tensorflow model;
- add any/all support for reduction for tensorflow model;
- add elu, conv3d, pool3d support for tensorflow model;
- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
- fix size computer lock;
- fix thread pool deadlock;
- add express & parameters in express;
- rewrite blitter chooser without static map;
- add tests for expr;
2019-10-29 13:37:26 +08:00
|
|
|
std::set<std::string> notSupportOps;
|
2020-11-05 16:41:56 +08:00
|
|
|
auto CheckIfNotSupported = [&] (const std::unique_ptr<MNN::OpT>& op) {
|
- build:
- unify schema building in core and converter;
- add more build script for android;
- add linux build script for python;
- ops impl:
- add floor mod support in binary;
- use eltwise impl in add/max/sub/mul binary for optimization;
- remove fake double support in cast;
- fix 5d support for concat;
- add adjX and adjY support for batch matmul;
- optimize conv2d back prop filter;
- add pad mode support for conv3d;
- fix bug in conv2d & conv depthwise with very small feature map;
- optimize binary without broacast;
- add data types support for gather;
- add gather ND support;
- use uint8 data type in gather v2;
- add transpose support for matmul;
- add matrix band part;
- add dim != 4 support for padding, reshape & tensor convert;
- add pad type support for pool3d;
- make ops based on TensorFlow Lite quantization optional;
- add all & any support for reduction;
- use type in parameter as output type in reduction;
- add int support for unary;
- add variable weight support for conv2d;
- fix conv2d depthwise weights initialization;
- fix type support for transpose;
- fix grad outputs count for reduce grad and reshape grad;
- fix priorbox & detection output;
- fix metal softmax error;
- python:
- add runSessionWithCallBackInfo interface;
- add max nodes limit (1400) for visualization tool;
- fix save error in python3;
- align default dim;
- convert:
- add extra design for optimization;
- add more post converting optimizers;
- add caffe v1 weights blob support;
- add cast, unary, conv transpose support for onnx model;
- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
- add cos/sin/atan/tan support for unary for tensorflow model;
- add any/all support for reduction for tensorflow model;
- add elu, conv3d, pool3d support for tensorflow model;
- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
- fix size computer lock;
- fix thread pool deadlock;
- add express & parameters in express;
- rewrite blitter chooser without static map;
- add tests for expr;
2019-10-29 13:37:26 +08:00
|
|
|
if (op->type == MNN::OpType_Extra) {
|
|
|
|
if (op->main.AsExtra()->engine != "MNN") {
|
|
|
|
notSupportOps.insert(op->main.AsExtra()->engine + "::" + op->main.AsExtra()->type);
|
|
|
|
}
|
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
};
|
|
|
|
for (auto& op : netT->oplists) {
|
|
|
|
CheckIfNotSupported(op);
|
|
|
|
}
|
|
|
|
for (auto& subgraph : netT->subgraphs) {
|
|
|
|
for (auto& op : subgraph->nodes) {
|
|
|
|
CheckIfNotSupported(op);
|
|
|
|
}
|
- build:
- unify schema building in core and converter;
- add more build script for android;
- add linux build script for python;
- ops impl:
- add floor mod support in binary;
- use eltwise impl in add/max/sub/mul binary for optimization;
- remove fake double support in cast;
- fix 5d support for concat;
- add adjX and adjY support for batch matmul;
- optimize conv2d back prop filter;
- add pad mode support for conv3d;
- fix bug in conv2d & conv depthwise with very small feature map;
- optimize binary without broacast;
- add data types support for gather;
- add gather ND support;
- use uint8 data type in gather v2;
- add transpose support for matmul;
- add matrix band part;
- add dim != 4 support for padding, reshape & tensor convert;
- add pad type support for pool3d;
- make ops based on TensorFlow Lite quantization optional;
- add all & any support for reduction;
- use type in parameter as output type in reduction;
- add int support for unary;
- add variable weight support for conv2d;
- fix conv2d depthwise weights initialization;
- fix type support for transpose;
- fix grad outputs count for reduce grad and reshape grad;
- fix priorbox & detection output;
- fix metal softmax error;
- python:
- add runSessionWithCallBackInfo interface;
- add max nodes limit (1400) for visualization tool;
- fix save error in python3;
- align default dim;
- convert:
- add extra design for optimization;
- add more post converting optimizers;
- add caffe v1 weights blob support;
- add cast, unary, conv transpose support for onnx model;
- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
- add cos/sin/atan/tan support for unary for tensorflow model;
- add any/all support for reduction for tensorflow model;
- add elu, conv3d, pool3d support for tensorflow model;
- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
- fix size computer lock;
- fix thread pool deadlock;
- add express & parameters in express;
- rewrite blitter chooser without static map;
- add tests for expr;
2019-10-29 13:37:26 +08:00
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
|
- build:
- unify schema building in core and converter;
- add more build script for android;
- add linux build script for python;
- ops impl:
- add floor mod support in binary;
- use eltwise impl in add/max/sub/mul binary for optimization;
- remove fake double support in cast;
- fix 5d support for concat;
- add adjX and adjY support for batch matmul;
- optimize conv2d back prop filter;
- add pad mode support for conv3d;
- fix bug in conv2d & conv depthwise with very small feature map;
- optimize binary without broacast;
- add data types support for gather;
- add gather ND support;
- use uint8 data type in gather v2;
- add transpose support for matmul;
- add matrix band part;
- add dim != 4 support for padding, reshape & tensor convert;
- add pad type support for pool3d;
- make ops based on TensorFlow Lite quantization optional;
- add all & any support for reduction;
- use type in parameter as output type in reduction;
- add int support for unary;
- add variable weight support for conv2d;
- fix conv2d depthwise weights initialization;
- fix type support for transpose;
- fix grad outputs count for reduce grad and reshape grad;
- fix priorbox & detection output;
- fix metal softmax error;
- python:
- add runSessionWithCallBackInfo interface;
- add max nodes limit (1400) for visualization tool;
- fix save error in python3;
- align default dim;
- convert:
- add extra design for optimization;
- add more post converting optimizers;
- add caffe v1 weights blob support;
- add cast, unary, conv transpose support for onnx model;
- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
- add cos/sin/atan/tan support for unary for tensorflow model;
- add any/all support for reduction for tensorflow model;
- add elu, conv3d, pool3d support for tensorflow model;
- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
- fix size computer lock;
- fix thread pool deadlock;
- add express & parameters in express;
- rewrite blitter chooser without static map;
- add tests for expr;
2019-10-29 13:37:26 +08:00
|
|
|
std::ostringstream notSupportInfo;
|
|
|
|
if (!notSupportOps.empty()) {
|
|
|
|
for (auto name : notSupportOps) {
|
|
|
|
notSupportInfo << name << " | ";
|
|
|
|
}
|
2020-10-30 18:15:48 +08:00
|
|
|
auto opNames = notSupportInfo.str();
|
|
|
|
LOG(FATAL) << "These Op Not Support: " << opNames.substr(0, opNames.size() - 2);
|
2021-06-11 17:17:13 +08:00
|
|
|
return 1;
|
- build:
- unify schema building in core and converter;
- add more build script for android;
- add linux build script for python;
- ops impl:
- add floor mod support in binary;
- use eltwise impl in add/max/sub/mul binary for optimization;
- remove fake double support in cast;
- fix 5d support for concat;
- add adjX and adjY support for batch matmul;
- optimize conv2d back prop filter;
- add pad mode support for conv3d;
- fix bug in conv2d & conv depthwise with very small feature map;
- optimize binary without broacast;
- add data types support for gather;
- add gather ND support;
- use uint8 data type in gather v2;
- add transpose support for matmul;
- add matrix band part;
- add dim != 4 support for padding, reshape & tensor convert;
- add pad type support for pool3d;
- make ops based on TensorFlow Lite quantization optional;
- add all & any support for reduction;
- use type in parameter as output type in reduction;
- add int support for unary;
- add variable weight support for conv2d;
- fix conv2d depthwise weights initialization;
- fix type support for transpose;
- fix grad outputs count for reduce grad and reshape grad;
- fix priorbox & detection output;
- fix metal softmax error;
- python:
- add runSessionWithCallBackInfo interface;
- add max nodes limit (1400) for visualization tool;
- fix save error in python3;
- align default dim;
- convert:
- add extra design for optimization;
- add more post converting optimizers;
- add caffe v1 weights blob support;
- add cast, unary, conv transpose support for onnx model;
- optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model;
- add cos/sin/atan/tan support for unary for tensorflow model;
- add any/all support for reduction for tensorflow model;
- add elu, conv3d, pool3d support for tensorflow model;
- optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model;
- others:
- fix size computer lock;
- fix thread pool deadlock;
- add express & parameters in express;
- rewrite blitter chooser without static map;
- add tests for expr;
2019-10-29 13:37:26 +08:00
|
|
|
}
|
2020-11-05 16:41:56 +08:00
|
|
|
|
2021-04-08 15:34:23 +08:00
|
|
|
// dump input and output tensor name
|
|
|
|
{
|
|
|
|
std::set<int> inputIdx, outputIdx, realInput, realOutput;
|
|
|
|
for (const auto& op : netT->oplists) {
|
|
|
|
for (auto i : op->inputIndexes) {
|
|
|
|
inputIdx.insert(i);
|
|
|
|
}
|
|
|
|
for (auto o : op->outputIndexes) {
|
|
|
|
outputIdx.insert(o);
|
|
|
|
if (op->type == OpType_Input) {
|
|
|
|
realInput.insert(o);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::set_difference(outputIdx.begin(), outputIdx.end(), inputIdx.begin(), inputIdx.end(), std::inserter(realOutput, realOutput.begin()));
|
|
|
|
std::cout << "inputTensors : [ ";
|
|
|
|
for (int i : realInput) {
|
|
|
|
std::cout << netT->tensorName[i] << ", ";
|
|
|
|
}
|
|
|
|
std::cout << "]\noutputTensors: [ ";
|
|
|
|
for (int i : realOutput) {
|
|
|
|
std::cout << netT->tensorName[i] << ", ";
|
|
|
|
}
|
|
|
|
std::cout << "]" << std::endl;
|
|
|
|
}
|
|
|
|
|
2019-07-11 13:56:52 +08:00
|
|
|
flatbuffers::FlatBufferBuilder builderOutput(1024);
|
|
|
|
builderOutput.ForceDefaults(true);
|
|
|
|
auto len = MNN::Net::Pack(builderOutput, netT.get());
|
|
|
|
builderOutput.Finish(len);
|
|
|
|
int sizeOutput = builderOutput.GetSize();
|
|
|
|
auto bufferOutput = builderOutput.GetBufferPointer();
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
if (config.saveStaticModel && netT->usage != MNN::Usage_INFERENCE_STATIC) {
|
|
|
|
std::map<std::string, std::vector<int>> inputConfig;
|
|
|
|
// get config to set input size
|
|
|
|
if (config.inputConfigFile.size() > 0) {
|
|
|
|
ConfigFile conf(config.inputConfigFile);
|
|
|
|
auto numOfInputs = conf.Read<int>("input_size");
|
|
|
|
auto inputNames = splitNames(numOfInputs, conf.Read<std::string>("input_names"));
|
|
|
|
auto inputDims = splitDims(numOfInputs, conf.Read<std::string>("input_dims"));
|
|
|
|
for (int i = 0; i < numOfInputs; i++) {
|
|
|
|
inputConfig.insert(std::make_pair(inputNames[i], inputDims[i]));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const Net* net = flatbuffers::GetRoot<MNN::Net>(bufferOutput);
|
|
|
|
converToStaticModel(net, inputConfig, MNNModelFile);
|
|
|
|
} else {
|
|
|
|
std::ofstream output(MNNModelFile, std::ofstream::binary);
|
|
|
|
output.write((const char*)bufferOutput, sizeOutput);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef MNN_DUMP_SUBGRAPH
|
|
|
|
for (int i = 0; i < netT->subgraphs.size(); ++i) {
|
|
|
|
std::unique_ptr<MNN::NetT> subnet(new MNN::NetT);
|
|
|
|
auto& subgraph = netT->subgraphs[i];
|
|
|
|
subnet->oplists = std::move(subgraph->nodes);
|
|
|
|
subnet->tensorName = subgraph->tensors;
|
|
|
|
subnet->sourceType = netT->sourceType;
|
|
|
|
subnet->bizCode = netT->bizCode;
|
2019-04-17 10:49:11 +08:00
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
flatbuffers::FlatBufferBuilder builder(1024);
|
|
|
|
builder.ForceDefaults(true);
|
|
|
|
auto len = MNN::Net::Pack(builder, subnet.get());
|
|
|
|
builder.Finish(len);
|
|
|
|
int output_size = builder.GetSize();
|
|
|
|
auto* output_ptr = builder.GetBufferPointer();
|
|
|
|
|
|
|
|
std::string filename =
|
|
|
|
MNNModelFile + "_subgraph_" + std::to_string(i) + ".mnn";
|
|
|
|
std::ofstream output(filename.c_str(), std::ofstream::binary);
|
|
|
|
output.write((const char*)output_ptr, output_size);
|
|
|
|
}
|
|
|
|
#endif
|
2019-04-17 10:49:11 +08:00
|
|
|
return 0;
|
|
|
|
}
|