MNN/tools/cpp/IDSTEncoder.hpp

//
//  IDSTEncoder.hpp
//  MNN
//
//  Created by MNN on 2021/02/26.
//  Copyright © 2018, Alibaba Group Holding Limited
//

#ifndef IDSTENCODER_HPP
#define IDSTENCODER_HPP

#include <map>
#include <sstream>
#include "MNN_generated.h"
#include <cmath>

using namespace MNN;

namespace IDSTEncoder {

static void WriteBlobDim(std::ostream &out, std::vector<int> dims)
{
    char tmp[4];
    ((unsigned char *)tmp)[0] = (unsigned char)dims.size();
    out.write(tmp, 1);
    for (int i = 0; i < dims.size(); i++)
    {
        unsigned short tmpShort = (unsigned short)dims[i];
        out.write((const char*)(&tmpShort), 2);
    }
}

static void FillBuffer(char *buf, unsigned int buf_len, const char *arr, unsigned int arr_len, unsigned char iNeedBits)
{
    memset(buf, 0, buf_len);
    char *tmp = buf;
    int iOffset = 0;
    unsigned char cMask = (1 << iNeedBits) - 1;
    for (int i = 0; i < arr_len; i++)
    {
        char value = arr[i];
        int uShift = 8 - iNeedBits - iOffset % 8;
        if (uShift < 0)
        {
            tmp[iOffset / 8] |= ((value & cMask) >> (0 - uShift));
            tmp[(iOffset / 8) + 1] |= ((value & cMask) << (8 + uShift));
        }
        else
        {
            tmp[iOffset / 8] |= ((value & cMask) << uShift);
        }
        iOffset += iNeedBits;
        if (iOffset % 8 == 0)
        {
            tmp += iOffset / 8;
            iOffset = 0;
        }
    }
}

static void GetWeightSet(std::set<int> &setWeight, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)
{
    setWeight.clear();
    if (asymmetricQuantFlag) {
        for (int i = 0; i < channel; i++)
        {
            float min = alphaData[2*i];
            float alpha = alphaData[2*i+1];
            if (alpha <= 1e-6f)
            {
                setWeight.insert(-128);
                continue;
            }
            for (int j = 0; j < area; j++)
            {
                float weight = weightData[i * area + j];
                setWeight.insert(fmax(fmin(round((weight - min) / alpha) + (-128), 127), -128));
            }
        }
    } else {
        for (int i = 0; i < channel; i++)
        {
            float alpha = alphaData[i];
            if (alpha <= 1e-6f)
            {
                setWeight.insert(0);
                continue;
            }
            for (int j = 0; j < area; j++)
            {
                float weight = weightData[i * area + j];
                setWeight.insert(fmax(fmin(round(weight / alpha), 127), -128));
            }
        }
    }
}

static float GetSparsity(const float* weightData, int weightSize, unsigned int& nnz, const float* alphaData, int area, int channel, bool asymmetricQuantFlag, int iMaxStep = -1)
{
    nnz = 0;
    int iPreIdx = 0;
    float sparsity;
    if (asymmetricQuantFlag) {
        for (int i = 0; i < weightSize; i++)
        {
            float min = alphaData[2*(i/area)];
            float alpha = alphaData[2*(i/area)+1];
            int zeroQuant = -128;
            if (alpha > 1e-6) {
                zeroQuant = round((0.0f - min) / alpha) + (-128);
            }

            float weight = weightData[i];
            int value = -128;
            if (alpha > 1e-6)
            {
                value = round((weight - min) / alpha) + (-128);
            }

            if (value != zeroQuant)
            {
                nnz++;
                iPreIdx = i;
            }
            if ((i - iPreIdx >= iMaxStep) && (iMaxStep != -1))
            {
                nnz++;
                iPreIdx = i;
            }
        }
    } else {
        for (int i = 0; i < weightSize; i++)
        {
            float alpha = alphaData[i / area];
            float weight = weightData[i];
            int value = 0;
            if (alpha > 1e-6f)
            {
                value = round(weight / alpha);
            }

            if (value != 0)
            {
                nnz++;
                iPreIdx = i;
            }
            if ((i - iPreIdx >= iMaxStep) && (iMaxStep != -1))
            {
                nnz++;
                iPreIdx = i;
            }
        }
    }
    sparsity = 1 - 1.0f * nnz / weightSize;
    return sparsity;
}

static unsigned int GetBestMaxStep(const float* weightData, int weightSize, unsigned char& iMaxStepBits, int BlobDataSize, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)
{
    size_t szBestSize = 1000000000;
    unsigned int best_nnz = 0;
    for (int i = 2; i < 9; i++)
    {
        unsigned int nnz = 0;
        GetSparsity(weightData, weightSize, nnz, alphaData, area, channel, asymmetricQuantFlag, pow(2, i) - 1);
        size_t tmp = ceil(0.125 * nnz * i) + ceil(0.125 * nnz * BlobDataSize);
        if (tmp < szBestSize)
        {
            iMaxStepBits = (unsigned char) i;
            szBestSize = tmp;
            best_nnz = nnz;
        }
    }
    return best_nnz;
}

static void WriteCQBlobs(std::ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)
{
    //push values into buffer
    //Find int values in all blobs and check;
    std::set<int> setWeight;
    GetWeightSet(setWeight, weightData, alphaData, area, channel, asymmetricQuantFlag);
    int iCount = setWeight.size();
    int iNeedBits = ceil(log2(iCount));
    iNeedBits = iNeedBits < 1 ? 1 : iNeedBits;
    if (iNeedBits > 8) {
        MNN_ERROR("The Bits need large than 8, the model may be error for user\n");
        return;
    }
    std::map<int, unsigned char> mapWeight;
    int iIdx = 0;
    for (std::set<int>::iterator it = setWeight.begin(); it != setWeight.end(); it++)
    {
        mapWeight[*it] = iIdx++;
    }
    size_t buf_len = size_t(ceil(0.125 * iNeedBits * area * channel));
    char *buf = new char[buf_len];
    {
        char *arr = new char[area * channel];
        char *tmp = arr;
        if (asymmetricQuantFlag) {
            for (int i = 0; i < channel; i++)
            {
                float min = alphaData[2*i];
                float alpha = alphaData[2*i+1];
                for (int j = 0; j < area; j++)
                {
                    float weight = weightData[i * area + j];
                    int value = -128;
                    if (alpha > 1e-6f)
                    {
                        value = fmax(fmin(round((weight - min) / alpha) + (-128), 127), -128);
                    }
                    *tmp = mapWeight[value];
                    tmp++;
                }
            }
        } else {
            for (int i = 0; i < channel; i++)
            {
                float alpha = alphaData[i];
                for (int j = 0; j < area; j++)
                {
                    float weight = weightData[i * area + j];
                    int value = 0;
                    if (alpha > 1e-6f)
                    {
                        value = fmax(fmin(round(weight / alpha), 127), -128);
                    }
                    *tmp = mapWeight[value];
                    tmp++;
                }
            }
        }
        FillBuffer(buf, buf_len, arr, area * channel, iNeedBits);
        delete[] arr;
    }
    //begin write to file
    {
        char tmp[100];
        //1. weights blob shape(unsigned int32)
        WriteBlobDim(out, {channel, area});
        // 2. Avalable values Count(unsigned char)
        tmp[0] = (unsigned char)iCount;
        out.write(tmp, 1);
        // 3. valueset(signed char * valueset_size)
        for (auto it = setWeight.begin(); it != setWeight.end(); it++)
        {
            tmp[0] = (unsigned char)*it;
            out.write(tmp, 1);
        }
        // 4. weights indexes(size = ceil(0.125*weights_count*ceil(log2(Avalable_values_Count))))
        out.write(buf, buf_len);
        //g_totalSize += 1 + setWeight.size() + buf_len;
    }
    delete[] buf;
}

static void WriteSparseQuanBlobs(std::ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)
{
    std::set<int> setWeight;
    GetWeightSet(setWeight, weightData, alphaData, area, channel, asymmetricQuantFlag);
    int iDataNeedBits = ceil(log2(setWeight.size()));
    iDataNeedBits = iDataNeedBits < 1 ? 1 : iDataNeedBits;
    unsigned int nnz = 0;
    int weightSize = area * channel;
    std::map<int, unsigned char> mapWeight;
    {
        int iIdx = 0;
        for (auto it = setWeight.begin(); it != setWeight.end(); it++)
        {
            mapWeight[*it] = iIdx++;
        }
    }
    unsigned char iNeedBits;
    nnz = GetBestMaxStep(weightData, weightSize, iNeedBits, iDataNeedBits, alphaData, area, channel, asymmetricQuantFlag);
    //weight buf
    size_t data_buf_len = size_t(ceil(0.125 * iDataNeedBits * nnz));
    char* data_buf = new char[data_buf_len];
    //sparse COO buf
    size_t buf_len = size_t(ceil(0.125 * iNeedBits * nnz));
    char* buf = new char[buf_len];
    { //fill buf with step values;
        unsigned char* arr_idx = new unsigned char[nnz];
        unsigned char* data_arr = new unsigned char[nnz];
        unsigned char* tmp = arr_idx;
        int iMaxStep = pow(2, iNeedBits) - 1;
        int iPreIdx = 0;
        unsigned char* dTmp = data_arr;
        if (asymmetricQuantFlag) {
            for (int i = 0; i < weightSize; i++)
            {
                float min = alphaData[2*(i/area)];
                float alpha = alphaData[2*(i/area)+1];
                int zeroQuant = -128;
                if (alpha > 1e-6) {
                    zeroQuant = round((0.0f - min) / alpha) + (-128);
                }

                float weight = weightData[i];
                int value = -128;
                if (alpha > 1e-6)
                {
                    value = round((weight - min) / alpha) + (-128);
                }

                if (value != zeroQuant)
                {
                    *dTmp = mapWeight[value];
                    *tmp = i - iPreIdx;
                    iPreIdx = i;
                    tmp++;
                    dTmp++;
                }
                if (i - iPreIdx >= iMaxStep)
                {
                    *dTmp = mapWeight[zeroQuant];
                    *tmp = i - iPreIdx;
                    iPreIdx = i;
                    tmp++;
                    dTmp++;
                }
            }
        } else {
            for (int i = 0; i < weightSize; i++)
            {
                float alpha = alphaData[i / area];
                float weight = weightData[i];
                int value = 0;
                if (alpha > 1e-6f)
                {
                    value = round(weight / alpha);
                }

                if (value != 0)
                {
                    *dTmp = mapWeight[value];
                    *tmp = i - iPreIdx;
                    iPreIdx = i;
                    tmp++;
                    dTmp++;
                }
                if (i - iPreIdx >= iMaxStep)
                {
                    *dTmp = mapWeight[0];
                    *tmp = i - iPreIdx;
                    iPreIdx = i;
                    tmp++;
                    dTmp++;
                }
            }
        }
        FillBuffer(buf, buf_len, (char*) arr_idx, nnz, iNeedBits);
        FillBuffer(data_buf, data_buf_len, (char*) data_arr, nnz, iDataNeedBits);
        delete[] arr_idx;
        delete[] data_arr;
    }
    { //write
        char tmp[100];
        // 1.weights blob shape(unsigned int32)
        WriteBlobDim(out, {channel, area});
        // 2. nnz
        out.write((const char*) &nnz, 4);
        // 3. max_step use # bits () (unsigned char)
        out.write((const char*) &iNeedBits, 1);
        // 4. buf for steps ceil(nnz*step need bits/8)
        out.write(buf, buf_len);
        // 5. Avalable values Count(unsigned char)
        tmp[0] = (unsigned char) setWeight.size();
        out.write(tmp, 1);
        // 6. valueset(signed char * valueset_size)
        for (auto it = setWeight.begin(); it != setWeight.end(); it++)
        {
            tmp[0] = (unsigned char) *it;
            out.write(tmp, 1);
        }
        // 7. none zero weights indexes(nnz*ceil(log2(Avalable_values_Count))/8)
        out.write((const char*) data_buf, data_buf_len);
    }
    delete[] buf;
    delete[] data_buf;
}

static std::unique_ptr<IDSTQuanT> encode(const std::vector<float>& weight, const std::vector<float>& scale, int kernelSize, int kernelNum,
                                         bool asymmetricQuantFlag, const int8_t* quantWeightPtr, const int clampMin) {
    std::ostringstream outputStringStreamCQ, outputStringStreamSQ;
    WriteCQBlobs(outputStringStreamCQ, weight.data(), scale.data(), kernelSize, kernelNum, asymmetricQuantFlag);
    WriteSparseQuanBlobs(outputStringStreamSQ, weight.data(), scale.data(), kernelSize, kernelNum, asymmetricQuantFlag);
    std::unique_ptr<IDSTQuanT> idst(new IDSTQuanT);
    auto cqStr = outputStringStreamCQ.str();
    auto sqStr = outputStringStreamSQ.str();
    int int8Size = kernelNum * kernelSize;
    if (quantWeightPtr && (int8Size <= cqStr.size() && int8Size <= sqStr.size())) {
        idst->type = 4;
        idst->aMax = kernelNum;
        idst->buffer.resize(int8Size);
        ::memcpy(idst->buffer.data(), quantWeightPtr, int8Size);
    } else if (cqStr.size() <= sqStr.size()) {
        idst->type = 1;
        idst->buffer.resize(cqStr.size());
        ::memcpy(idst->buffer.data(), cqStr.data(), cqStr.size());
    } else {
        idst->type = 2;
        idst->buffer.resize(sqStr.size());
        ::memcpy(idst->buffer.data(), sqStr.data(), sqStr.size());
    }
    idst->alpha.resize(scale.size());
    ::memcpy(idst->alpha.data(), scale.data(), scale.size() * sizeof(float));
    idst->quantScale = 1.f;
    if (asymmetricQuantFlag) {
        idst->readType = kernelNum;
        idst->aMin = clampMin;
    }
    return idst;
}

} // namespace IDSTEncoder

#endif // IDSTENCODER_HPP
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`//`
			`// IDSTEncoder.hpp`
			`// MNN`
			`//`
			`// Created by MNN on 2021/02/26.`
			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`

			`#ifndef IDSTENCODER_HPP`
			`#define IDSTENCODER_HPP`

			`#include <map>`
			`#include <sstream>`
			`#include "MNN_generated.h"`
[PATCH 08/36] [Train:Feature:Bugfix] train quant support full quant 2021-04-08 14:24:07 +08:00			`#include <cmath>`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00
			`using namespace MNN;`

			`namespace IDSTEncoder {`

			`static void WriteBlobDim(std::ostream &out, std::vector<int> dims)`
			`{`
			`char tmp[4];`
			`((unsigned char *)tmp)[0] = (unsigned char)dims.size();`
			`out.write(tmp, 1);`
			`for (int i = 0; i < dims.size(); i++)`
			`{`
			`unsigned short tmpShort = (unsigned short)dims[i];`
			`out.write((const char*)(&tmpShort), 2);`
			`}`
			`}`

			`static void FillBuffer(char buf, unsigned int buf_len, const char arr, unsigned int arr_len, unsigned char iNeedBits)`
			`{`
			`memset(buf, 0, buf_len);`
			`char *tmp = buf;`
			`int iOffset = 0;`
			`unsigned char cMask = (1 << iNeedBits) - 1;`
			`for (int i = 0; i < arr_len; i++)`
			`{`
			`char value = arr[i];`
			`int uShift = 8 - iNeedBits - iOffset % 8;`
			`if (uShift < 0)`
			`{`
			`tmp[iOffset / 8] \|= ((value & cMask) >> (0 - uShift));`
			`tmp[(iOffset / 8) + 1] \|= ((value & cMask) << (8 + uShift));`
			`}`
			`else`
			`{`
			`tmp[iOffset / 8] \|= ((value & cMask) << uShift);`
			`}`
			`iOffset += iNeedBits;`
			`if (iOffset % 8 == 0)`
			`{`
			`tmp += iOffset / 8;`
			`iOffset = 0;`
			`}`
			`}`
			`}`

			`static void GetWeightSet(std::set<int> &setWeight, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)`
			`{`
			`setWeight.clear();`
			`if (asymmetricQuantFlag) {`
			`for (int i = 0; i < channel; i++)`
			`{`
			`float min = alphaData[2*i];`
			`float alpha = alphaData[2*i+1];`
			`if (alpha <= 1e-6f)`
			`{`
			`setWeight.insert(-128);`
			`continue;`
			`}`
			`for (int j = 0; j < area; j++)`
			`{`
			`float weight = weightData[i * area + j];`
			`setWeight.insert(fmax(fmin(round((weight - min) / alpha) + (-128), 127), -128));`
			`}`
			`}`
			`} else {`
			`for (int i = 0; i < channel; i++)`
			`{`
			`float alpha = alphaData[i];`
			`if (alpha <= 1e-6f)`
			`{`
			`setWeight.insert(0);`
			`continue;`
			`}`
			`for (int j = 0; j < area; j++)`
			`{`
			`float weight = weightData[i * area + j];`
			`setWeight.insert(fmax(fmin(round(weight / alpha), 127), -128));`
			`}`
			`}`
			`}`
			`}`

			`static float GetSparsity(const float* weightData, int weightSize, unsigned int& nnz, const float* alphaData, int area, int channel, bool asymmetricQuantFlag, int iMaxStep = -1)`
			`{`
			`nnz = 0;`
			`int iPreIdx = 0;`
			`float sparsity;`
			`if (asymmetricQuantFlag) {`
			`for (int i = 0; i < weightSize; i++)`
			`{`
			`float min = alphaData[2*(i/area)];`
			`float alpha = alphaData[2*(i/area)+1];`
			`int zeroQuant = -128;`
			`if (alpha > 1e-6) {`
			`zeroQuant = round((0.0f - min) / alpha) + (-128);`
			`}`

			`float weight = weightData[i];`
			`int value = -128;`
			`if (alpha > 1e-6)`
			`{`
			`value = round((weight - min) / alpha) + (-128);`
			`}`

			`if (value != zeroQuant)`
			`{`
			`nnz++;`
			`iPreIdx = i;`
			`}`
			`if ((i - iPreIdx >= iMaxStep) && (iMaxStep != -1))`
			`{`
			`nnz++;`
			`iPreIdx = i;`
			`}`
			`}`
			`} else {`
			`for (int i = 0; i < weightSize; i++)`
			`{`
			`float alpha = alphaData[i / area];`
			`float weight = weightData[i];`
			`int value = 0;`
			`if (alpha > 1e-6f)`
			`{`
			`value = round(weight / alpha);`
			`}`

			`if (value != 0)`
			`{`
			`nnz++;`
			`iPreIdx = i;`
			`}`
			`if ((i - iPreIdx >= iMaxStep) && (iMaxStep != -1))`
			`{`
			`nnz++;`
			`iPreIdx = i;`
			`}`
			`}`
			`}`
			`sparsity = 1 - 1.0f * nnz / weightSize;`
			`return sparsity;`
			`}`

			`static unsigned int GetBestMaxStep(const float* weightData, int weightSize, unsigned char& iMaxStepBits, int BlobDataSize, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)`
			`{`
			`size_t szBestSize = 1000000000;`
			`unsigned int best_nnz = 0;`
			`for (int i = 2; i < 9; i++)`
			`{`
			`unsigned int nnz = 0;`
			`GetSparsity(weightData, weightSize, nnz, alphaData, area, channel, asymmetricQuantFlag, pow(2, i) - 1);`
			`size_t tmp = ceil(0.125 * nnz * i) + ceil(0.125 * nnz * BlobDataSize);`
			`if (tmp < szBestSize)`
			`{`
			`iMaxStepBits = (unsigned char) i;`
			`szBestSize = tmp;`
			`best_nnz = nnz;`
			`}`
			`}`
			`return best_nnz;`
			`}`

			`static void WriteCQBlobs(std::ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)`
			`{`
			`//push values into buffer`
			`//Find int values in all blobs and check;`
			`std::set<int> setWeight;`
			`GetWeightSet(setWeight, weightData, alphaData, area, channel, asymmetricQuantFlag);`
			`int iCount = setWeight.size();`
			`int iNeedBits = ceil(log2(iCount));`
[PATCH 6/7] [QAUNT:Bugfix] Bugfix for IDST encode when weight value = 1. 2021-04-20 17:30:48 +08:00			`iNeedBits = iNeedBits < 1 ? 1 : iNeedBits;`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`if (iNeedBits > 8) {`
			`MNN_ERROR("The Bits need large than 8, the model may be error for user\n");`
			`return;`
			`}`
			`std::map<int, unsigned char> mapWeight;`
			`int iIdx = 0;`
			`for (std::set<int>::iterator it = setWeight.begin(); it != setWeight.end(); it++)`
			`{`
			`mapWeight[*it] = iIdx++;`
			`}`
			`size_t buf_len = size_t(ceil(0.125 * iNeedBits * area * channel));`
			`char *buf = new char[buf_len];`
			`{`
			`char arr = new char[area channel];`
			`char *tmp = arr;`
			`if (asymmetricQuantFlag) {`
			`for (int i = 0; i < channel; i++)`
			`{`
			`float min = alphaData[2*i];`
			`float alpha = alphaData[2*i+1];`
			`for (int j = 0; j < area; j++)`
			`{`
			`float weight = weightData[i * area + j];`
			`int value = -128;`
			`if (alpha > 1e-6f)`
			`{`
			`value = fmax(fmin(round((weight - min) / alpha) + (-128), 127), -128);`
			`}`
			`*tmp = mapWeight[value];`
			`tmp++;`
			`}`
			`}`
			`} else {`
			`for (int i = 0; i < channel; i++)`
			`{`
			`float alpha = alphaData[i];`
			`for (int j = 0; j < area; j++)`
			`{`
			`float weight = weightData[i * area + j];`
			`int value = 0;`
			`if (alpha > 1e-6f)`
			`{`
			`value = fmax(fmin(round(weight / alpha), 127), -128);`
			`}`
			`*tmp = mapWeight[value];`
			`tmp++;`
			`}`
			`}`
			`}`
			`FillBuffer(buf, buf_len, arr, area * channel, iNeedBits);`
			`delete[] arr;`
			`}`
			`//begin write to file`
			`{`
			`char tmp[100];`
			`//1. weights blob shape(unsigned int32)`
			`WriteBlobDim(out, {channel, area});`
			`// 2. Avalable values Count(unsigned char)`
			`tmp[0] = (unsigned char)iCount;`
			`out.write(tmp, 1);`
			`// 3. valueset(signed char * valueset_size)`
			`for (auto it = setWeight.begin(); it != setWeight.end(); it++)`
			`{`
			`tmp[0] = (unsigned char)*it;`
			`out.write(tmp, 1);`
			`}`
			`// 4. weights indexes(size = ceil(0.125weights_countceil(log2(Avalable_values_Count))))`
			`out.write(buf, buf_len);`
			`//g_totalSize += 1 + setWeight.size() + buf_len;`
			`}`
			`delete[] buf;`
			`}`

			`static void WriteSparseQuanBlobs(std::ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)`
			`{`
			`std::set<int> setWeight;`
			`GetWeightSet(setWeight, weightData, alphaData, area, channel, asymmetricQuantFlag);`
			`int iDataNeedBits = ceil(log2(setWeight.size()));`
[PATCH 6/7] [QAUNT:Bugfix] Bugfix for IDST encode when weight value = 1. 2021-04-20 17:30:48 +08:00			`iDataNeedBits = iDataNeedBits < 1 ? 1 : iDataNeedBits;`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`unsigned int nnz = 0;`
			`int weightSize = area * channel;`
			`std::map<int, unsigned char> mapWeight;`
			`{`
			`int iIdx = 0;`
			`for (auto it = setWeight.begin(); it != setWeight.end(); it++)`
			`{`
			`mapWeight[*it] = iIdx++;`
			`}`
			`}`
			`unsigned char iNeedBits;`
			`nnz = GetBestMaxStep(weightData, weightSize, iNeedBits, iDataNeedBits, alphaData, area, channel, asymmetricQuantFlag);`
			`//weight buf`
			`size_t data_buf_len = size_t(ceil(0.125 * iDataNeedBits * nnz));`
			`char* data_buf = new char[data_buf_len];`
			`//sparse COO buf`
			`size_t buf_len = size_t(ceil(0.125 * iNeedBits * nnz));`
			`char* buf = new char[buf_len];`
			`{ //fill buf with step values;`
			`unsigned char* arr_idx = new unsigned char[nnz];`
			`unsigned char* data_arr = new unsigned char[nnz];`
			`unsigned char* tmp = arr_idx;`
			`int iMaxStep = pow(2, iNeedBits) - 1;`
			`int iPreIdx = 0;`
			`unsigned char* dTmp = data_arr;`
			`if (asymmetricQuantFlag) {`
			`for (int i = 0; i < weightSize; i++)`
			`{`
			`float min = alphaData[2*(i/area)];`
			`float alpha = alphaData[2*(i/area)+1];`
			`int zeroQuant = -128;`
			`if (alpha > 1e-6) {`
			`zeroQuant = round((0.0f - min) / alpha) + (-128);`
			`}`

			`float weight = weightData[i];`
			`int value = -128;`
			`if (alpha > 1e-6)`
			`{`
			`value = round((weight - min) / alpha) + (-128);`
			`}`

			`if (value != zeroQuant)`
			`{`
			`*dTmp = mapWeight[value];`
			`*tmp = i - iPreIdx;`
			`iPreIdx = i;`
			`tmp++;`
			`dTmp++;`
			`}`
			`if (i - iPreIdx >= iMaxStep)`
			`{`
			`*dTmp = mapWeight[zeroQuant];`
			`*tmp = i - iPreIdx;`
			`iPreIdx = i;`
			`tmp++;`
			`dTmp++;`
			`}`
			`}`
			`} else {`
			`for (int i = 0; i < weightSize; i++)`
			`{`
			`float alpha = alphaData[i / area];`
			`float weight = weightData[i];`
			`int value = 0;`
			`if (alpha > 1e-6f)`
			`{`
			`value = round(weight / alpha);`
			`}`

			`if (value != 0)`
			`{`
			`*dTmp = mapWeight[value];`
			`*tmp = i - iPreIdx;`
			`iPreIdx = i;`
			`tmp++;`
			`dTmp++;`
			`}`
			`if (i - iPreIdx >= iMaxStep)`
			`{`
			`*dTmp = mapWeight[0];`
			`*tmp = i - iPreIdx;`
			`iPreIdx = i;`
			`tmp++;`
			`dTmp++;`
			`}`
			`}`
			`}`
			`FillBuffer(buf, buf_len, (char*) arr_idx, nnz, iNeedBits);`
			`FillBuffer(data_buf, data_buf_len, (char*) data_arr, nnz, iDataNeedBits);`
			`delete[] arr_idx;`
			`delete[] data_arr;`
			`}`
			`{ //write`
			`char tmp[100];`
			`// 1.weights blob shape(unsigned int32)`
			`WriteBlobDim(out, {channel, area});`
			`// 2. nnz`
			`out.write((const char*) &nnz, 4);`
			`// 3. max_step use # bits () (unsigned char)`
			`out.write((const char*) &iNeedBits, 1);`
			`// 4. buf for steps ceil(nnz*step need bits/8)`
			`out.write(buf, buf_len);`
			`// 5. Avalable values Count(unsigned char)`
			`tmp[0] = (unsigned char) setWeight.size();`
			`out.write(tmp, 1);`
			`// 6. valueset(signed char * valueset_size)`
			`for (auto it = setWeight.begin(); it != setWeight.end(); it++)`
			`{`
			`tmp[0] = (unsigned char) *it;`
			`out.write(tmp, 1);`
			`}`
			`// 7. none zero weights indexes(nnz*ceil(log2(Avalable_values_Count))/8)`
			`out.write((const char*) data_buf, data_buf_len);`
			`}`
			`delete[] buf;`
			`delete[] data_buf;`
			`}`

			`static std::unique_ptr<IDSTQuanT> encode(const std::vector<float>& weight, const std::vector<float>& scale, int kernelSize, int kernelNum,`
			`bool asymmetricQuantFlag, const int8_t* quantWeightPtr, const int clampMin) {`
			`std::ostringstream outputStringStreamCQ, outputStringStreamSQ;`
			`WriteCQBlobs(outputStringStreamCQ, weight.data(), scale.data(), kernelSize, kernelNum, asymmetricQuantFlag);`
			`WriteSparseQuanBlobs(outputStringStreamSQ, weight.data(), scale.data(), kernelSize, kernelNum, asymmetricQuantFlag);`
			`std::unique_ptr<IDSTQuanT> idst(new IDSTQuanT);`
			`auto cqStr = outputStringStreamCQ.str();`
			`auto sqStr = outputStringStreamSQ.str();`
			`int int8Size = kernelNum * kernelSize;`
			`if (quantWeightPtr && (int8Size <= cqStr.size() && int8Size <= sqStr.size())) {`
			`idst->type = 4;`
			`idst->aMax = kernelNum;`
			`idst->buffer.resize(int8Size);`
			`::memcpy(idst->buffer.data(), quantWeightPtr, int8Size);`
			`} else if (cqStr.size() <= sqStr.size()) {`
			`idst->type = 1;`
			`idst->buffer.resize(cqStr.size());`
			`::memcpy(idst->buffer.data(), cqStr.data(), cqStr.size());`
			`} else {`
			`idst->type = 2;`
			`idst->buffer.resize(sqStr.size());`
			`::memcpy(idst->buffer.data(), sqStr.data(), sqStr.size());`
			`}`
			`idst->alpha.resize(scale.size());`
			`::memcpy(idst->alpha.data(), scale.data(), scale.size() * sizeof(float));`
			`idst->quantScale = 1.f;`
			`if (asymmetricQuantFlag) {`
			`idst->readType = kernelNum;`
			`idst->aMin = clampMin;`
			`}`
			`return idst;`
			`}`

			`} // namespace IDSTEncoder`

			`#endif // IDSTENCODER_HPP`