mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
				
	
	
		
			420 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			420 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| //  IDSTEncoder.hpp
 | |
| //  MNN
 | |
| //
 | |
| //  Created by MNN on 2021/02/26.
 | |
| //  Copyright © 2018, Alibaba Group Holding Limited
 | |
| //
 | |
| 
 | |
| #ifndef IDSTENCODER_HPP
 | |
| #define IDSTENCODER_HPP
 | |
| 
 | |
| #include <map>
 | |
| #include <sstream>
 | |
| #include "MNN_generated.h"
 | |
| #include <cmath>
 | |
| 
 | |
| using namespace MNN;
 | |
| 
 | |
| namespace IDSTEncoder {
 | |
| 
 | |
| static void WriteBlobDim(std::ostream &out, std::vector<int> dims)
 | |
| {
 | |
|     char tmp[4];
 | |
|     ((unsigned char *)tmp)[0] = (unsigned char)dims.size();
 | |
|     out.write(tmp, 1);
 | |
|     for (int i = 0; i < dims.size(); i++)
 | |
|     {
 | |
|         unsigned short tmpShort = (unsigned short)dims[i];
 | |
|         out.write((const char*)(&tmpShort), 2);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void FillBuffer(char *buf, unsigned int buf_len, const char *arr, unsigned int arr_len, unsigned char iNeedBits)
 | |
| {
 | |
|     memset(buf, 0, buf_len);
 | |
|     char *tmp = buf;
 | |
|     int iOffset = 0;
 | |
|     unsigned char cMask = (1 << iNeedBits) - 1;
 | |
|     for (int i = 0; i < arr_len; i++)
 | |
|     {
 | |
|         char value = arr[i];
 | |
|         int uShift = 8 - iNeedBits - iOffset % 8;
 | |
|         if (uShift < 0)
 | |
|         {
 | |
|             tmp[iOffset / 8] |= ((value & cMask) >> (0 - uShift));
 | |
|             tmp[(iOffset / 8) + 1] |= ((value & cMask) << (8 + uShift));
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             tmp[iOffset / 8] |= ((value & cMask) << uShift);
 | |
|         }
 | |
|         iOffset += iNeedBits;
 | |
|         if (iOffset % 8 == 0)
 | |
|         {
 | |
|             tmp += iOffset / 8;
 | |
|             iOffset = 0;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void GetWeightSet(std::set<int> &setWeight, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)
 | |
| {
 | |
|     setWeight.clear();
 | |
|     if (asymmetricQuantFlag) {
 | |
|         for (int i = 0; i < channel; i++)
 | |
|         {
 | |
|             float min = alphaData[2*i];
 | |
|             float alpha = alphaData[2*i+1];
 | |
|             if (alpha <= 1e-6f)
 | |
|             {
 | |
|                 setWeight.insert(-128);
 | |
|                 continue;
 | |
|             }
 | |
|             for (int j = 0; j < area; j++)
 | |
|             {
 | |
|                 float weight = weightData[i * area + j];
 | |
|                 setWeight.insert(fmax(fmin(round((weight - min) / alpha) + (-128), 127), -128));
 | |
|             }
 | |
|         }
 | |
|     } else {
 | |
|         for (int i = 0; i < channel; i++)
 | |
|         {
 | |
|             float alpha = alphaData[i];
 | |
|             if (alpha <= 1e-6f)
 | |
|             {
 | |
|                 setWeight.insert(0);
 | |
|                 continue;
 | |
|             }
 | |
|             for (int j = 0; j < area; j++)
 | |
|             {
 | |
|                 float weight = weightData[i * area + j];
 | |
|                 setWeight.insert(fmax(fmin(round(weight / alpha), 127), -128));
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static float GetSparsity(const float* weightData, int weightSize, unsigned int& nnz, const float* alphaData, int area, int channel, bool asymmetricQuantFlag, int iMaxStep = -1)
 | |
| {
 | |
|     nnz = 0;
 | |
|     int iPreIdx = 0;
 | |
|     float sparsity;
 | |
|     if (asymmetricQuantFlag) {
 | |
|         for (int i = 0; i < weightSize; i++)
 | |
|         {
 | |
|             float min = alphaData[2*(i/area)];
 | |
|             float alpha = alphaData[2*(i/area)+1];
 | |
|             int zeroQuant = -128;
 | |
|             if (alpha > 1e-6) {
 | |
|                 zeroQuant = round((0.0f - min) / alpha) + (-128);
 | |
|             }
 | |
| 
 | |
|             float weight = weightData[i];
 | |
|             int value = -128;
 | |
|             if (alpha > 1e-6)
 | |
|             {
 | |
|                 value = round((weight - min) / alpha) + (-128);
 | |
|             }
 | |
| 
 | |
|             if (value != zeroQuant)
 | |
|             {
 | |
|                 nnz++;
 | |
|                 iPreIdx = i;
 | |
|             }
 | |
|             if ((i - iPreIdx >= iMaxStep) && (iMaxStep != -1))
 | |
|             {
 | |
|                 nnz++;
 | |
|                 iPreIdx = i;
 | |
|             }
 | |
|         }
 | |
|     } else {
 | |
|         for (int i = 0; i < weightSize; i++)
 | |
|         {
 | |
|             float alpha = alphaData[i / area];
 | |
|             float weight = weightData[i];
 | |
|             int value = 0;
 | |
|             if (alpha > 1e-6f)
 | |
|             {
 | |
|                 value = round(weight / alpha);
 | |
|             }
 | |
| 
 | |
|             if (value != 0)
 | |
|             {
 | |
|                 nnz++;
 | |
|                 iPreIdx = i;
 | |
|             }
 | |
|             if ((i - iPreIdx >= iMaxStep) && (iMaxStep != -1))
 | |
|             {
 | |
|                 nnz++;
 | |
|                 iPreIdx = i;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     sparsity = 1 - 1.0f * nnz / weightSize;
 | |
|     return sparsity;
 | |
| }
 | |
| 
 | |
| static unsigned int GetBestMaxStep(const float* weightData, int weightSize, unsigned char& iMaxStepBits, int BlobDataSize, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)
 | |
| {
 | |
|     size_t szBestSize = 1000000000;
 | |
|     unsigned int best_nnz = 0;
 | |
|     for (int i = 2; i < 9; i++)
 | |
|     {
 | |
|         unsigned int nnz = 0;
 | |
|         GetSparsity(weightData, weightSize, nnz, alphaData, area, channel, asymmetricQuantFlag, pow(2, i) - 1);
 | |
|         size_t tmp = ceil(0.125 * nnz * i) + ceil(0.125 * nnz * BlobDataSize);
 | |
|         if (tmp < szBestSize)
 | |
|         {
 | |
|             iMaxStepBits = (unsigned char) i;
 | |
|             szBestSize = tmp;
 | |
|             best_nnz = nnz;
 | |
|         }
 | |
|     }
 | |
|     return best_nnz;
 | |
| }
 | |
| 
 | |
| static void WriteCQBlobs(std::ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)
 | |
| {
 | |
|     //push values into buffer
 | |
|     //Find int values in all blobs and check;
 | |
|     std::set<int> setWeight;
 | |
|     GetWeightSet(setWeight, weightData, alphaData, area, channel, asymmetricQuantFlag);
 | |
|     int iCount = setWeight.size();
 | |
|     int iNeedBits = ceil(log2(iCount));
 | |
|     iNeedBits = iNeedBits < 1 ? 1 : iNeedBits;
 | |
|     if (iNeedBits > 8) {
 | |
|         MNN_ERROR("The Bits need large than 8, the model may be error for user\n");
 | |
|         return;
 | |
|     }
 | |
|     std::map<int, unsigned char> mapWeight;
 | |
|     int iIdx = 0;
 | |
|     for (std::set<int>::iterator it = setWeight.begin(); it != setWeight.end(); it++)
 | |
|     {
 | |
|         mapWeight[*it] = iIdx++;
 | |
|     }
 | |
|     size_t buf_len = size_t(ceil(0.125 * iNeedBits * area * channel));
 | |
|     char *buf = new char[buf_len];
 | |
|     {
 | |
|         char *arr = new char[area * channel];
 | |
|         char *tmp = arr;
 | |
|         if (asymmetricQuantFlag) {
 | |
|             for (int i = 0; i < channel; i++)
 | |
|             {
 | |
|                 float min = alphaData[2*i];
 | |
|                 float alpha = alphaData[2*i+1];
 | |
|                 for (int j = 0; j < area; j++)
 | |
|                 {
 | |
|                     float weight = weightData[i * area + j];
 | |
|                     int value = -128;
 | |
|                     if (alpha > 1e-6f)
 | |
|                     {
 | |
|                         value = fmax(fmin(round((weight - min) / alpha) + (-128), 127), -128);
 | |
|                     }
 | |
|                     *tmp = mapWeight[value];
 | |
|                     tmp++;
 | |
|                 }
 | |
|             }
 | |
|         } else {
 | |
|             for (int i = 0; i < channel; i++)
 | |
|             {
 | |
|                 float alpha = alphaData[i];
 | |
|                 for (int j = 0; j < area; j++)
 | |
|                 {
 | |
|                     float weight = weightData[i * area + j];
 | |
|                     int value = 0;
 | |
|                     if (alpha > 1e-6f)
 | |
|                     {
 | |
|                         value = fmax(fmin(round(weight / alpha), 127), -128);
 | |
|                     }
 | |
|                     *tmp = mapWeight[value];
 | |
|                     tmp++;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         FillBuffer(buf, buf_len, arr, area * channel, iNeedBits);
 | |
|         delete[] arr;
 | |
|     }
 | |
|     //begin write to file
 | |
|     {
 | |
|         char tmp[100];
 | |
|         //1. weights blob shape(unsigned int32)
 | |
|         WriteBlobDim(out, {channel, area});
 | |
|         // 2. Avalable values Count(unsigned char)
 | |
|         tmp[0] = (unsigned char)iCount;
 | |
|         out.write(tmp, 1);
 | |
|         // 3. valueset(signed char * valueset_size)
 | |
|         for (auto it = setWeight.begin(); it != setWeight.end(); it++)
 | |
|         {
 | |
|             tmp[0] = (unsigned char)*it;
 | |
|             out.write(tmp, 1);
 | |
|         }
 | |
|         // 4. weights indexes(size = ceil(0.125*weights_count*ceil(log2(Avalable_values_Count))))
 | |
|         out.write(buf, buf_len);
 | |
|         //g_totalSize += 1 + setWeight.size() + buf_len;
 | |
|     }
 | |
|     delete[] buf;
 | |
| }
 | |
| 
 | |
| static void WriteSparseQuanBlobs(std::ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag)
 | |
| {
 | |
|     std::set<int> setWeight;
 | |
|     GetWeightSet(setWeight, weightData, alphaData, area, channel, asymmetricQuantFlag);
 | |
|     int iDataNeedBits = ceil(log2(setWeight.size()));
 | |
|     iDataNeedBits = iDataNeedBits < 1 ? 1 : iDataNeedBits;
 | |
|     unsigned int nnz = 0;
 | |
|     int weightSize = area * channel;
 | |
|     std::map<int, unsigned char> mapWeight;
 | |
|     {
 | |
|         int iIdx = 0;
 | |
|         for (auto it = setWeight.begin(); it != setWeight.end(); it++)
 | |
|         {
 | |
|             mapWeight[*it] = iIdx++;
 | |
|         }
 | |
|     }
 | |
|     unsigned char iNeedBits;
 | |
|     nnz = GetBestMaxStep(weightData, weightSize, iNeedBits, iDataNeedBits, alphaData, area, channel, asymmetricQuantFlag);
 | |
|     //weight buf
 | |
|     size_t data_buf_len = size_t(ceil(0.125 * iDataNeedBits * nnz));
 | |
|     char* data_buf = new char[data_buf_len];
 | |
|     //sparse COO buf
 | |
|     size_t buf_len = size_t(ceil(0.125 * iNeedBits * nnz));
 | |
|     char* buf = new char[buf_len];
 | |
|     { //fill buf with step values;
 | |
|         unsigned char* arr_idx = new unsigned char[nnz];
 | |
|         unsigned char* data_arr = new unsigned char[nnz];
 | |
|         unsigned char* tmp = arr_idx;
 | |
|         int iMaxStep = pow(2, iNeedBits) - 1;
 | |
|         int iPreIdx = 0;
 | |
|         unsigned char* dTmp = data_arr;
 | |
|         if (asymmetricQuantFlag) {
 | |
|             for (int i = 0; i < weightSize; i++)
 | |
|             {
 | |
|                 float min = alphaData[2*(i/area)];
 | |
|                 float alpha = alphaData[2*(i/area)+1];
 | |
|                 int zeroQuant = -128;
 | |
|                 if (alpha > 1e-6) {
 | |
|                     zeroQuant = round((0.0f - min) / alpha) + (-128);
 | |
|                 }
 | |
| 
 | |
|                 float weight = weightData[i];
 | |
|                 int value = -128;
 | |
|                 if (alpha > 1e-6)
 | |
|                 {
 | |
|                     value = round((weight - min) / alpha) + (-128);
 | |
|                 }
 | |
| 
 | |
|                 if (value != zeroQuant)
 | |
|                 {
 | |
|                     *dTmp = mapWeight[value];
 | |
|                     *tmp = i - iPreIdx;
 | |
|                     iPreIdx = i;
 | |
|                     tmp++;
 | |
|                     dTmp++;
 | |
|                 }
 | |
|                 if (i - iPreIdx >= iMaxStep)
 | |
|                 {
 | |
|                     *dTmp = mapWeight[zeroQuant];
 | |
|                     *tmp = i - iPreIdx;
 | |
|                     iPreIdx = i;
 | |
|                     tmp++;
 | |
|                     dTmp++;
 | |
|                 }
 | |
|             }
 | |
|         } else {
 | |
|             for (int i = 0; i < weightSize; i++)
 | |
|             {
 | |
|                 float alpha = alphaData[i / area];
 | |
|                 float weight = weightData[i];
 | |
|                 int value = 0;
 | |
|                 if (alpha > 1e-6f)
 | |
|                 {
 | |
|                     value = round(weight / alpha);
 | |
|                 }
 | |
| 
 | |
|                 if (value != 0)
 | |
|                 {
 | |
|                     *dTmp = mapWeight[value];
 | |
|                     *tmp = i - iPreIdx;
 | |
|                     iPreIdx = i;
 | |
|                     tmp++;
 | |
|                     dTmp++;
 | |
|                 }
 | |
|                 if (i - iPreIdx >= iMaxStep)
 | |
|                 {
 | |
|                     *dTmp = mapWeight[0];
 | |
|                     *tmp = i - iPreIdx;
 | |
|                     iPreIdx = i;
 | |
|                     tmp++;
 | |
|                     dTmp++;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         FillBuffer(buf, buf_len, (char*) arr_idx, nnz, iNeedBits);
 | |
|         FillBuffer(data_buf, data_buf_len, (char*) data_arr, nnz, iDataNeedBits);
 | |
|         delete[] arr_idx;
 | |
|         delete[] data_arr;
 | |
|     }
 | |
|     { //write
 | |
|         char tmp[100];
 | |
|         // 1.weights blob shape(unsigned int32)
 | |
|         WriteBlobDim(out, {channel, area});
 | |
|         // 2. nnz
 | |
|         out.write((const char*) &nnz, 4);
 | |
|         // 3. max_step use # bits () (unsigned char)
 | |
|         out.write((const char*) &iNeedBits, 1);
 | |
|         // 4. buf for steps ceil(nnz*step need bits/8)
 | |
|         out.write(buf, buf_len);
 | |
|         // 5. Avalable values Count(unsigned char)
 | |
|         tmp[0] = (unsigned char) setWeight.size();
 | |
|         out.write(tmp, 1);
 | |
|         // 6. valueset(signed char * valueset_size)
 | |
|         for (auto it = setWeight.begin(); it != setWeight.end(); it++)
 | |
|         {
 | |
|             tmp[0] = (unsigned char) *it;
 | |
|             out.write(tmp, 1);
 | |
|         }
 | |
|         // 7. none zero weights indexes(nnz*ceil(log2(Avalable_values_Count))/8)
 | |
|         out.write((const char*) data_buf, data_buf_len);
 | |
|     }
 | |
|     delete[] buf;
 | |
|     delete[] data_buf;
 | |
| }
 | |
| 
 | |
| static std::unique_ptr<IDSTQuanT> encode(const std::vector<float>& weight, const std::vector<float>& scale, int kernelSize, int kernelNum,
 | |
|                                          bool asymmetricQuantFlag, const int8_t* quantWeightPtr, const int clampMin) {
 | |
|     std::ostringstream outputStringStreamCQ, outputStringStreamSQ;
 | |
|     WriteCQBlobs(outputStringStreamCQ, weight.data(), scale.data(), kernelSize, kernelNum, asymmetricQuantFlag);
 | |
|     WriteSparseQuanBlobs(outputStringStreamSQ, weight.data(), scale.data(), kernelSize, kernelNum, asymmetricQuantFlag);
 | |
|     std::unique_ptr<IDSTQuanT> idst(new IDSTQuanT);
 | |
|     auto cqStr = outputStringStreamCQ.str();
 | |
|     auto sqStr = outputStringStreamSQ.str();
 | |
|     int int8Size = kernelNum * kernelSize;
 | |
|     if (quantWeightPtr && (int8Size <= cqStr.size() && int8Size <= sqStr.size())) {
 | |
|         idst->type = 4;
 | |
|         idst->aMax = kernelNum;
 | |
|         idst->buffer.resize(int8Size);
 | |
|         ::memcpy(idst->buffer.data(), quantWeightPtr, int8Size);
 | |
|     } else if (cqStr.size() <= sqStr.size()) {
 | |
|         idst->type = 1;
 | |
|         idst->buffer.resize(cqStr.size());
 | |
|         ::memcpy(idst->buffer.data(), cqStr.data(), cqStr.size());
 | |
|     } else {
 | |
|         idst->type = 2;
 | |
|         idst->buffer.resize(sqStr.size());
 | |
|         ::memcpy(idst->buffer.data(), sqStr.data(), sqStr.size());
 | |
|     }
 | |
|     idst->alpha.resize(scale.size());
 | |
|     ::memcpy(idst->alpha.data(), scale.data(), scale.size() * sizeof(float));
 | |
|     idst->quantScale = 1.f;
 | |
|     if (asymmetricQuantFlag) {
 | |
|         idst->readType = kernelNum;
 | |
|         idst->aMin = clampMin;
 | |
|     }
 | |
|     return idst;
 | |
| }
 | |
| 
 | |
| } // namespace IDSTEncoder
 | |
| 
 | |
| #endif // IDSTENCODER_HPP
 |