MNN/tools/converter/source/common/writeFb.cpp

740 lines
25 KiB
C++
Raw Normal View History

2019-04-17 10:49:11 +08:00
//
// writeFb.cpp
// MNNConverter
//
// Created by MNN on 2019/01/31.
// Copyright © 2018, Alibaba Group Holding Limited
//
#include <fstream>
#include <iostream>
2020-11-05 16:41:56 +08:00
#include <algorithm>
#include <set>
2019-04-17 10:49:11 +08:00
#include "MNN_generated.h"
#include "half.hpp"
#include "logkit.h"
#include "writeFb.hpp"
2020-11-05 16:41:56 +08:00
#include "cpp/ConfigFile.hpp"
#include <MNN/MNNDefine.h>
2020-11-25 19:03:07 +08:00
#include "cli.hpp"
#include "../../common/Global.hpp"
2019-04-17 10:49:11 +08:00
2020-11-05 16:41:56 +08:00
using namespace MNN;
using namespace std;
static float findAbsMax(const float *weights, const int count) {
float absMax = abs(weights[0]);
for (int i = 1; i < count; i++) {
float value = abs(weights[i]);
if (value > absMax) {
absMax = value;
}
}
return absMax;
}
2020-11-25 19:03:07 +08:00
static std::vector<float> findMinMax(const float *weights, const int count) {
float min = weights[0];
float max = weights[0];
for (int i = 1; i < count; i++) {
float value = weights[i];
if (value > max) {
max = value;
}
if (value < min) {
min = value;
}
}
return {min, max};
}
2020-11-05 16:41:56 +08:00
static void WriteBlobDim(ostream &out, std::vector<int> dims)
{
char tmp[4];
((unsigned char *)tmp)[0] = (unsigned char)dims.size();
out.write(tmp, 1);
for (int i = 0; i < dims.size(); i++)
{
unsigned short tmpShort = (unsigned short)dims[i];
out.write((const char*)(&tmpShort), 2);
}
}
2020-11-25 19:03:07 +08:00
2020-11-05 16:41:56 +08:00
static void FillBuffer(char *buf, unsigned int buf_len, const char *arr, unsigned int arr_len, unsigned char iNeedBits)
{
memset(buf, 0, buf_len);
char *tmp = buf;
int iOffset = 0;
unsigned char cMask = (1 << iNeedBits) - 1;
for (int i = 0; i < arr_len; i++)
{
char value = arr[i];
int uShift = 8 - iNeedBits - iOffset % 8;
if (uShift < 0)
{
tmp[iOffset / 8] |= ((value & cMask) >> (0 - uShift));
tmp[(iOffset / 8) + 1] |= ((value & cMask) << (8 + uShift));
}
else
{
tmp[iOffset / 8] |= ((value & cMask) << uShift);
}
iOffset += iNeedBits;
if (iOffset % 8 == 0)
{
tmp += iOffset / 8;
iOffset = 0;
}
}
}
2020-11-25 19:03:07 +08:00
static void GetWeightSet(set<int> &setWeight, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)
2020-11-05 16:41:56 +08:00
{
setWeight.clear();
2020-11-25 19:03:07 +08:00
if (asymmetricQuantFlag) {
for (int i = 0; i < channel; i++)
2020-11-05 16:41:56 +08:00
{
2020-11-25 19:03:07 +08:00
float min = alphaData[2*i];
float alpha = alphaData[2*i+1];
if (alpha <= 1e-6f)
{
setWeight.insert(-128);
continue;
}
for (int j = 0; j < area; j++)
{
float weight = weightData[i * area + j];
setWeight.insert(round((weight - min) / alpha) + (-128));
}
2020-11-05 16:41:56 +08:00
}
2020-11-25 19:03:07 +08:00
} else {
for (int i = 0; i < channel; i++)
2020-11-05 16:41:56 +08:00
{
2020-11-25 19:03:07 +08:00
float alpha = alphaData[i];
if (alpha <= 1e-6f)
{
setWeight.insert(0);
continue;
}
for (int j = 0; j < area; j++)
{
float weight = weightData[i * area + j];
setWeight.insert(round(weight / alpha));
}
2020-11-05 16:41:56 +08:00
}
}
}
static float GetSparsity(const float* weightData, int weightSize, unsigned int& nnz, const float* alphaData, int area, int channel, bool asymmetricQuantFlag, int iMaxStep = -1)
2020-11-05 16:41:56 +08:00
{
nnz = 0;
int iPreIdx = 0;
float sparsity;
if (asymmetricQuantFlag) {
for (int i = 0; i < weightSize; i++)
{
float min = alphaData[2*(i/area)];
float alpha = alphaData[2*(i/area)+1];
int zeroQuant = -128;
if (alpha > 1e-6) {
zeroQuant = round((0.0f - min) / alpha) + (-128);
}
float weight = weightData[i];
int value = -128;
if (alpha > 1e-6)
{
value = round((weight - min) / alpha) + (-128);
}
if (value != zeroQuant)
{
nnz++;
iPreIdx = i;
}
if ((i - iPreIdx >= iMaxStep) && (iMaxStep != -1))
{
nnz++;
iPreIdx = i;
}
}
} else {
for (int i = 0; i < weightSize; i++)
{
float alpha = alphaData[i / area];
float weight = weightData[i];
int value = 0;
if (alpha > 1e-6f)
{
value = round(weight / alpha);
}
if (value != 0)
{
nnz++;
iPreIdx = i;
}
if ((i - iPreIdx >= iMaxStep) && (iMaxStep != -1))
{
nnz++;
iPreIdx = i;
}
}
}
2020-11-05 16:41:56 +08:00
sparsity = 1 - 1.0f * nnz / weightSize;
return sparsity;
}
unsigned int GetBestMaxStep(const float* weightData, int weightSize, unsigned char& iMaxStepBits, int BlobDataSize, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)
2020-11-05 16:41:56 +08:00
{
size_t szBestSize = 1000000000;
unsigned int best_nnz = 0;
for (int i = 2; i < 9; i++)
{
unsigned int nnz = 0;
GetSparsity(weightData, weightSize, nnz, alphaData, area, channel, asymmetricQuantFlag, pow(2, i) - 1);
2020-11-05 16:41:56 +08:00
size_t tmp = ceil(0.125 * nnz * i) + ceil(0.125 * nnz * BlobDataSize);
if (tmp < szBestSize)
{
iMaxStepBits = (unsigned char) i;
szBestSize = tmp;
best_nnz = nnz;
}
}
return best_nnz;
}
2020-11-25 19:03:07 +08:00
static void WriteCQBlobs(ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)
2020-11-05 16:41:56 +08:00
{
//push values into buffer
//Find int values in all blobs and check;
set<int> setWeight;
2020-11-25 19:03:07 +08:00
GetWeightSet(setWeight, weightData, alphaData, area, channel, asymmetricQuantFlag);
2020-11-05 16:41:56 +08:00
int iCount = setWeight.size();
int iNeedBits = ceil(log2(iCount));
if (iNeedBits > 8) {
MNN_ERROR("The Bits need large than 8, the model may be error for user\n");
return;
}
map<int, unsigned char> mapWeight;
int iIdx = 0;
for (set<int>::iterator it = setWeight.begin(); it != setWeight.end(); it++)
{
mapWeight[*it] = iIdx++;
}
size_t buf_len = size_t(ceil(0.125 * iNeedBits * area * channel));
char *buf = new char[buf_len];
{
char *arr = new char[area * channel];
char *tmp = arr;
2020-11-25 19:03:07 +08:00
if (asymmetricQuantFlag) {
for (int i = 0; i < channel; i++)
2020-11-05 16:41:56 +08:00
{
2020-11-25 19:03:07 +08:00
float min = alphaData[2*i];
float alpha = alphaData[2*i+1];
for (int j = 0; j < area; j++)
2020-11-05 16:41:56 +08:00
{
2020-11-25 19:03:07 +08:00
float weight = weightData[i * area + j];
int value = -128;
if (alpha > 1e-6f)
{
value = round((weight - min) / alpha) + (-128);
}
*tmp = mapWeight[value];
tmp++;
}
}
} else {
for (int i = 0; i < channel; i++)
{
float alpha = alphaData[i];
for (int j = 0; j < area; j++)
{
float weight = weightData[i * area + j];
int value = 0;
if (alpha > 1e-6f)
{
value = round(weight / alpha);
}
*tmp = mapWeight[value];
tmp++;
2019-04-17 10:49:11 +08:00
}
}
}
2020-11-05 16:41:56 +08:00
FillBuffer(buf, buf_len, arr, area * channel, iNeedBits);
delete[] arr;
2019-04-17 10:49:11 +08:00
}
2020-11-05 16:41:56 +08:00
//begin write to file
{
char tmp[100];
//1. weights blob shape(unsigned int32)
WriteBlobDim(out, {channel, area});
// 2. Avalable values Count(unsigned char)
tmp[0] = (unsigned char)iCount;
out.write(tmp, 1);
// 3. valueset(signed char * valueset_size)
for (set<int>::iterator it = setWeight.begin(); it != setWeight.end(); it++)
{
tmp[0] = (unsigned char)*it;
out.write(tmp, 1);
}
// 4. weights indexes(size = ceil(0.125*weights_count*ceil(log2(Avalable_values_Count))))
out.write(buf, buf_len);
//g_totalSize += 1 + setWeight.size() + buf_len;
}
delete[] buf;
}
2020-11-25 19:03:07 +08:00
static void WriteSparseQuanBlobs(ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)
2020-11-05 16:41:56 +08:00
{
set<int> setWeight;
2020-11-25 19:03:07 +08:00
GetWeightSet(setWeight, weightData, alphaData, area, channel, asymmetricQuantFlag);
2020-11-05 16:41:56 +08:00
int iDataNeedBits = ceil(log2(setWeight.size()));
unsigned int nnz = 0;
int weightSize = area * channel;
map<int, unsigned char> mapWeight;
{
int iIdx = 0;
for (set<int>::iterator it = setWeight.begin(); it != setWeight.end(); it++)
{
mapWeight[*it] = iIdx++;
}
}
unsigned char iNeedBits;
nnz = GetBestMaxStep(weightData, weightSize, iNeedBits, iDataNeedBits, alphaData, area, channel, asymmetricQuantFlag);
2020-11-05 16:41:56 +08:00
//weight buf
size_t data_buf_len = size_t(ceil(0.125 * iDataNeedBits * nnz));
char* data_buf = new char[data_buf_len];
//sparse COO buf
size_t buf_len = size_t(ceil(0.125 * iNeedBits * nnz));
char* buf = new char[buf_len];
{ //fill buf with step values;
unsigned char* arr_idx = new unsigned char[nnz];
unsigned char* data_arr = new unsigned char[nnz];
unsigned char* tmp = arr_idx;
int iMaxStep = pow(2, iNeedBits) - 1;
int iPreIdx = 0;
unsigned char* dTmp = data_arr;
2020-11-25 19:03:07 +08:00
if (asymmetricQuantFlag) {
for (int i = 0; i < weightSize; i++)
2020-11-05 16:41:56 +08:00
{
2020-11-25 19:03:07 +08:00
float min = alphaData[2*(i/area)];
float alpha = alphaData[2*(i/area)+1];
int zeroQuant = -128;
if (alpha > 1e-6) {
zeroQuant = round((0.0f - min) / alpha) + (-128);
}
float weight = weightData[i];
int value = -128;
if (alpha > 1e-6)
{
value = round((weight - min) / alpha) + (-128);
}
if (value != zeroQuant)
{
*dTmp = mapWeight[value];
*tmp = i - iPreIdx;
iPreIdx = i;
tmp++;
dTmp++;
}
if (i - iPreIdx >= iMaxStep)
{
*dTmp = mapWeight[zeroQuant];
*tmp = i - iPreIdx;
iPreIdx = i;
tmp++;
dTmp++;
}
2020-11-05 16:41:56 +08:00
}
2020-11-25 19:03:07 +08:00
} else {
for (int i = 0; i < weightSize; i++)
{
float alpha = alphaData[i / area];
float weight = weightData[i];
int value = 0;
if (alpha > 1e-6f)
{
value = round(weight / alpha);
}
2020-11-05 16:41:56 +08:00
2020-11-25 19:03:07 +08:00
if (value != 0)
{
*dTmp = mapWeight[value];
*tmp = i - iPreIdx;
iPreIdx = i;
tmp++;
dTmp++;
}
if (i - iPreIdx >= iMaxStep)
{
*dTmp = mapWeight[0];
*tmp = i - iPreIdx;
iPreIdx = i;
tmp++;
dTmp++;
}
}
}
2020-11-05 16:41:56 +08:00
FillBuffer(buf, buf_len, (char*) arr_idx, nnz, iNeedBits);
FillBuffer(data_buf, data_buf_len, (char*) data_arr, nnz, iDataNeedBits);
delete[] arr_idx;
delete[] data_arr;
}
{ //write
char tmp[100];
// 1.weights blob shape(unsigned int32)
WriteBlobDim(out, {channel, area});
// 2. nnz
out.write((const char*) &nnz, 4);
// 3. max_step use # bits () (unsigned char)
out.write((const char*) &iNeedBits, 1);
// 4. buf for steps ceil(nnz*step need bits/8)
out.write(buf, buf_len);
// 5. Avalable values Count(unsigned char)
tmp[0] = (unsigned char) setWeight.size();
out.write(tmp, 1);
// 6. valueset(signed char * valueset_size)
for (set<int>::iterator it = setWeight.begin(); it != setWeight.end(); it++)
{
tmp[0] = (unsigned char) *it;
out.write(tmp, 1);
}
// 7. none zero weights indexes(nnz*ceil(log2(Avalable_values_Count))/8)
out.write((const char*) data_buf, data_buf_len);
}
delete[] buf;
delete[] data_buf;
}
int writeFb(std::unique_ptr<MNN::NetT>& netT, const std::string& MNNModelFile, modelConfig config) {
auto RemoveParams = [](std::unique_ptr<MNN::OpT>& op) {
const auto opType = op->type;
switch (opType) {
case MNN::OpType_Convolution:
case MNN::OpType_Deconvolution:
case MNN::OpType_ConvolutionDepthwise: {
auto param = op->main.AsConvolution2D();
param->weight.clear();
param->bias.clear();
break;
}
case MNN::OpType_TfQuantizedConv2D: {
auto param = op->main.AsTfQuantizedConv2D();
param->weight.clear();
param->bias.clear();
break;
}
case MNN::OpType_MatMul: {
auto param = op->main.AsMatMul();
param->weight.clear();
param->bias.clear();
break;
}
case MNN::OpType_BatchNorm: {
auto param = op->main.AsBatchNorm();
param->slopeData.clear();
param->meanData.clear();
param->varData.clear();
param->biasData.clear();
param->Adata.clear();
param->Bdata.clear();
break;
}
case MNN::OpType_Scale: {
auto param = op->main.AsScale();
param->scaleData.clear();
param->biasData.clear();
break;
}
default:
break;
}
};
if (config.benchmarkModel) {
for (auto& op : netT->oplists) {
2020-11-05 16:41:56 +08:00
RemoveParams(op);
}
for (auto& subgraph : netT->subgraphs) {
for (auto& op : subgraph->nodes) {
RemoveParams(op);
}
}
}
auto CastParamsToHalf = [](std::unique_ptr<MNN::OpT>& op) {
const auto opType = op->type;
switch (opType) {
case MNN::OpType_Convolution:
case MNN::OpType_ConvolutionDepthwise: {
auto param = op->main.AsConvolution2D();
const int weightSize = param->weight.size();
// const int biasSize = param->bias.size();
std::vector<half_float::half> quantizedFp16Weight;
quantizedFp16Weight.resize(weightSize);
std::transform(param->weight.begin(), param->weight.end(), quantizedFp16Weight.begin(),
[](float w) { return half_float::half(w); });
// std::vector<half_float::half> quantizedFp16Bias;
// quantizedFp16Bias.resize(biasSize);
// std::transform(param->bias.begin(), param->bias.end(), quantizedFp16Bias.begin(), [](float
// b){return half_float::half(b); });
param->weight.clear();
// param->bias.clear();
param->quanParameter.reset(new MNN::IDSTQuanT);
param->quanParameter->type = 3;
int8_t* halfWeight = reinterpret_cast<int8_t*>(quantizedFp16Weight.data());
param->quanParameter->buffer.assign(halfWeight, halfWeight + sizeof(half_float::half) * weightSize);
break;
}
case MNN::OpType_Const: {
auto blob = op->main.AsBlob();
if (blob->dataType == MNN::DataType_DT_FLOAT) {
blob->dataType = MNN::DataType_DT_HALF;
blob->uint8s.resize(sizeof(half_float::half) * blob->float32s.size());
auto size = blob->float32s.size();
auto dst = (half_float::half*)blob->uint8s.data();
for (int i=0; i<size; ++i) {
dst[i] = blob->float32s[i];
2020-07-16 17:56:21 +08:00
}
2020-11-05 16:41:56 +08:00
blob->float32s.clear();
2020-07-16 17:56:21 +08:00
}
2020-11-05 16:41:56 +08:00
break;
}
default:
break;
}
};
if (config.saveHalfFloat) {
for (auto& op : netT->oplists) {
CastParamsToHalf(op);
}
for (auto& subgraph : netT->subgraphs) {
for (auto& op : subgraph->nodes) {
CastParamsToHalf(op);
}
}
}
2020-12-17 19:56:35 +08:00
auto WeightQuantAndCoding = [&](std::unique_ptr<MNN::OpT>& op) {
2020-12-17 14:01:20 +08:00
const auto opType = op->type;
2020-12-17 20:33:45 +08:00
// config.weightQuantBits only control weight quantization for float convolution
// by default, do coding for convint8 and depthwiseconvint8, if there is any
if ((config.weightQuantBits == 0) && (
opType != MNN::OpType_ConvInt8 && opType != MNN::OpType_DepthwiseConvInt8)) {
return;
}
2020-12-17 14:01:20 +08:00
if (opType != MNN::OpType_Convolution && opType != MNN::OpType_ConvolutionDepthwise &&
opType != MNN::OpType_Deconvolution && opType != MNN::OpType_DeconvolutionDepthwise &&
opType != MNN::OpType_ConvInt8 && opType != MNN::OpType_DepthwiseConvInt8) {
return;
}
2020-12-17 19:49:35 +08:00
int bits = 8;
2020-12-17 20:33:45 +08:00
if ((config.weightQuantBits > 0) && (
opType != MNN::OpType_ConvInt8 && opType != MNN::OpType_DepthwiseConvInt8)) {
2020-12-17 19:49:35 +08:00
bits = config.weightQuantBits;
}
2020-12-17 20:33:45 +08:00
// Bits must from 2-8
bits = std::max(bits, 2);
bits = std::min(bits, 8);
2020-12-17 19:49:35 +08:00
2020-12-17 14:01:20 +08:00
auto param = op->main.AsConvolution2D();
auto& common = param->common;
if (param->quanParameter.get() != nullptr) {
return;
}
int weightSize = param->weight.size();
if (opType == MNN::OpType_ConvInt8 || opType == MNN::OpType_DepthwiseConvInt8) {
weightSize = param->symmetricQuan->weight.size();
}
int kernelNum = common->outputCount;
int kernelSize = weightSize / kernelNum;
2020-11-25 19:03:07 +08:00
auto gConverterConfig = Global<modelConfig>::Get();
bool asymmetricQuantFlag = gConverterConfig->weightQuantAsymmetric;
2020-12-17 14:01:20 +08:00
std::vector<float> weightData, scales;
2020-12-17 14:01:20 +08:00
2020-11-05 16:41:56 +08:00
switch (opType) {
case MNN::OpType_Convolution:
case MNN::OpType_ConvolutionDepthwise:
case MNN::OpType_Deconvolution:
case MNN::OpType_DeconvolutionDepthwise: {
float thredhold = (float)(1 << (bits - 1)) - 1.0f;
weightData = param->weight;
2020-12-17 14:01:20 +08:00
if (asymmetricQuantFlag) {
scales.resize(kernelNum*2);
for (int k = 0; k < kernelNum; k++) {
int beginIndex = k * kernelSize;
auto minAndMax = findMinMax(weightData.data() + beginIndex, kernelSize);
2020-12-17 14:01:20 +08:00
float min = minAndMax[0];
float max = minAndMax[1];
float scale = (max - min) / (127 + 128);
scales[2*k] = min;
scales[2*k+1] = scale;
2020-07-16 17:56:21 +08:00
}
2020-12-17 14:01:20 +08:00
} else {
scales.resize(kernelNum);
for (int k = 0; k < kernelNum; k++) {
int beginIndex = k * kernelSize;
auto absMax = findAbsMax(weightData.data() + beginIndex, kernelSize);
2020-12-17 14:01:20 +08:00
scales[k] = absMax / thredhold;
2020-11-25 19:03:07 +08:00
}
}
2020-12-17 14:01:20 +08:00
2020-11-05 16:41:56 +08:00
break;
}
case MNN::OpType_ConvInt8:
case MNN::OpType_DepthwiseConvInt8: {
auto& int8Params = param->symmetricQuan;
2020-12-17 14:01:20 +08:00
for (int i = 0; i < int8Params->weight.size(); i++) {
2020-12-31 15:34:19 +08:00
weightData.emplace_back(float(int8Params->weight[i]));
}
2020-12-17 14:01:20 +08:00
scales.resize(kernelNum, 1.0f);
if (asymmetricQuantFlag) {
scales.resize(kernelNum*2, 1.0f);
}
break;
}
default:
break;
}
2020-12-17 14:01:20 +08:00
std::ostringstream outputStringStreamCQ, outputStringStreamSQ;
WriteCQBlobs(outputStringStreamCQ, weightData.data(), scales.data(), kernelSize, kernelNum, asymmetricQuantFlag);
WriteSparseQuanBlobs(outputStringStreamSQ, weightData.data(), scales.data(), kernelSize, kernelNum, asymmetricQuantFlag);
2020-12-17 14:01:20 +08:00
if (opType == MNN::OpType_ConvInt8 || opType == MNN::OpType_DepthwiseConvInt8) {
if (weightSize < (outputStringStreamCQ.str().size() + sizeof(float)) && weightSize < (outputStringStreamSQ.str().size() + sizeof(float))) {
return; // only encode when it is smaller
}
}
param->quanParameter.reset(new MNN::IDSTQuanT);
auto tempString = outputStringStreamCQ.str();
param->quanParameter->type = 1;
if (outputStringStreamSQ.str().size() < tempString.size()) {
tempString = outputStringStreamSQ.str();
param->quanParameter->type = 2;
}
param->quanParameter->buffer.resize(tempString.size());
::memcpy(param->quanParameter->buffer.data(), tempString.data(), tempString.size());
param->quanParameter->quantScale = 1.0f;
if (asymmetricQuantFlag) {
param->quanParameter->readType = kernelNum;
}
if (opType == MNN::OpType_ConvInt8 || opType == MNN::OpType_DepthwiseConvInt8) {
param->symmetricQuan->weight.clear();
param->quanParameter->alpha = {1.0f}; // fake scales
param->quanParameter->has_scaleInt = true;
} else {
param->weight.clear();
param->quanParameter->alpha = std::move(scales);
}
};
2020-12-17 14:01:20 +08:00
2020-12-17 19:49:35 +08:00
{
for (auto& op : netT->oplists) {
2020-12-17 19:49:35 +08:00
WeightQuantAndCoding(op);
}
for (auto& subgraph : netT->subgraphs) {
for (auto& op : subgraph->nodes) {
2020-12-17 19:49:35 +08:00
WeightQuantAndCoding(op);
}
}
}
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
std::set<std::string> notSupportOps;
2020-11-05 16:41:56 +08:00
auto CheckIfNotSupported = [&] (const std::unique_ptr<MNN::OpT>& op) {
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
if (op->type == MNN::OpType_Extra) {
if (op->main.AsExtra()->engine != "MNN") {
notSupportOps.insert(op->main.AsExtra()->engine + "::" + op->main.AsExtra()->type);
}
}
2020-11-05 16:41:56 +08:00
};
for (auto& op : netT->oplists) {
CheckIfNotSupported(op);
}
for (auto& subgraph : netT->subgraphs) {
for (auto& op : subgraph->nodes) {
CheckIfNotSupported(op);
}
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
}
2020-11-05 16:41:56 +08:00
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
std::ostringstream notSupportInfo;
if (!notSupportOps.empty()) {
for (auto name : notSupportOps) {
notSupportInfo << name << " | ";
}
auto opNames = notSupportInfo.str();
LOG(FATAL) << "These Op Not Support: " << opNames.substr(0, opNames.size() - 2);
- build: - unify schema building in core and converter; - add more build script for android; - add linux build script for python; - ops impl: - add floor mod support in binary; - use eltwise impl in add/max/sub/mul binary for optimization; - remove fake double support in cast; - fix 5d support for concat; - add adjX and adjY support for batch matmul; - optimize conv2d back prop filter; - add pad mode support for conv3d; - fix bug in conv2d & conv depthwise with very small feature map; - optimize binary without broacast; - add data types support for gather; - add gather ND support; - use uint8 data type in gather v2; - add transpose support for matmul; - add matrix band part; - add dim != 4 support for padding, reshape & tensor convert; - add pad type support for pool3d; - make ops based on TensorFlow Lite quantization optional; - add all & any support for reduction; - use type in parameter as output type in reduction; - add int support for unary; - add variable weight support for conv2d; - fix conv2d depthwise weights initialization; - fix type support for transpose; - fix grad outputs count for reduce grad and reshape grad; - fix priorbox & detection output; - fix metal softmax error; - python: - add runSessionWithCallBackInfo interface; - add max nodes limit (1400) for visualization tool; - fix save error in python3; - align default dim; - convert: - add extra design for optimization; - add more post converting optimizers; - add caffe v1 weights blob support; - add cast, unary, conv transpose support for onnx model; - optimize batchnorm, conv with variable weights, prelu, reshape, slice, upsample for onnx model; - add cos/sin/atan/tan support for unary for tensorflow model; - add any/all support for reduction for tensorflow model; - add elu, conv3d, pool3d support for tensorflow model; - optimize argmax, batchnorm, concat, batch to space, conv with variable weights, prelu, slice for tensorflow model; - others: - fix size computer lock; - fix thread pool deadlock; - add express & parameters in express; - rewrite blitter chooser without static map; - add tests for expr;
2019-10-29 13:37:26 +08:00
}
2020-11-05 16:41:56 +08:00
flatbuffers::FlatBufferBuilder builderOutput(1024);
builderOutput.ForceDefaults(true);
auto len = MNN::Net::Pack(builderOutput, netT.get());
builderOutput.Finish(len);
int sizeOutput = builderOutput.GetSize();
auto bufferOutput = builderOutput.GetBufferPointer();
2019-04-17 10:49:11 +08:00
2020-11-05 16:41:56 +08:00
if (config.saveStaticModel && netT->usage != MNN::Usage_INFERENCE_STATIC) {
std::map<std::string, std::vector<int>> inputConfig;
// get config to set input size
if (config.inputConfigFile.size() > 0) {
ConfigFile conf(config.inputConfigFile);
auto numOfInputs = conf.Read<int>("input_size");
auto inputNames = splitNames(numOfInputs, conf.Read<std::string>("input_names"));
auto inputDims = splitDims(numOfInputs, conf.Read<std::string>("input_dims"));
for (int i = 0; i < numOfInputs; i++) {
inputConfig.insert(std::make_pair(inputNames[i], inputDims[i]));
}
}
const Net* net = flatbuffers::GetRoot<MNN::Net>(bufferOutput);
converToStaticModel(net, inputConfig, MNNModelFile);
} else {
std::ofstream output(MNNModelFile, std::ofstream::binary);
output.write((const char*)bufferOutput, sizeOutput);
}
#ifdef MNN_DUMP_SUBGRAPH
for (int i = 0; i < netT->subgraphs.size(); ++i) {
std::unique_ptr<MNN::NetT> subnet(new MNN::NetT);
auto& subgraph = netT->subgraphs[i];
subnet->oplists = std::move(subgraph->nodes);
subnet->tensorName = subgraph->tensors;
subnet->sourceType = netT->sourceType;
subnet->bizCode = netT->bizCode;
2019-04-17 10:49:11 +08:00
2020-11-05 16:41:56 +08:00
flatbuffers::FlatBufferBuilder builder(1024);
builder.ForceDefaults(true);
auto len = MNN::Net::Pack(builder, subnet.get());
builder.Finish(len);
int output_size = builder.GetSize();
auto* output_ptr = builder.GetBufferPointer();
std::string filename =
MNNModelFile + "_subgraph_" + std::to_string(i) + ".mnn";
std::ofstream output(filename.c_str(), std::ofstream::binary);
output.write((const char*)output_ptr, output_size);
}
#endif
2019-04-17 10:49:11 +08:00
return 0;
}