mirror of https://github.com/alibaba/MNN.git
				
				
				
			
		
			
	
	
		
			417 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
		
		
			
		
	
	
			417 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
|  | //
 | ||
|  | //  IDSTEncoder.hpp
 | ||
|  | //  MNN
 | ||
|  | //
 | ||
|  | //  Created by MNN on 2021/02/26.
 | ||
|  | //  Copyright © 2018, Alibaba Group Holding Limited
 | ||
|  | //
 | ||
|  | 
 | ||
|  | #ifndef IDSTENCODER_HPP
 | ||
|  | #define IDSTENCODER_HPP
 | ||
|  | 
 | ||
|  | #include <map>
 | ||
|  | #include <sstream>
 | ||
|  | #include "MNN_generated.h"
 | ||
|  | 
 | ||
|  | using namespace MNN; | ||
|  | 
 | ||
|  | namespace IDSTEncoder { | ||
|  | 
 | ||
|  | static void WriteBlobDim(std::ostream &out, std::vector<int> dims) | ||
|  | { | ||
|  |     char tmp[4]; | ||
|  |     ((unsigned char *)tmp)[0] = (unsigned char)dims.size(); | ||
|  |     out.write(tmp, 1); | ||
|  |     for (int i = 0; i < dims.size(); i++) | ||
|  |     { | ||
|  |         unsigned short tmpShort = (unsigned short)dims[i]; | ||
|  |         out.write((const char*)(&tmpShort), 2); | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | static void FillBuffer(char *buf, unsigned int buf_len, const char *arr, unsigned int arr_len, unsigned char iNeedBits) | ||
|  | { | ||
|  |     memset(buf, 0, buf_len); | ||
|  |     char *tmp = buf; | ||
|  |     int iOffset = 0; | ||
|  |     unsigned char cMask = (1 << iNeedBits) - 1; | ||
|  |     for (int i = 0; i < arr_len; i++) | ||
|  |     { | ||
|  |         char value = arr[i]; | ||
|  |         int uShift = 8 - iNeedBits - iOffset % 8; | ||
|  |         if (uShift < 0) | ||
|  |         { | ||
|  |             tmp[iOffset / 8] |= ((value & cMask) >> (0 - uShift)); | ||
|  |             tmp[(iOffset / 8) + 1] |= ((value & cMask) << (8 + uShift)); | ||
|  |         } | ||
|  |         else | ||
|  |         { | ||
|  |             tmp[iOffset / 8] |= ((value & cMask) << uShift); | ||
|  |         } | ||
|  |         iOffset += iNeedBits; | ||
|  |         if (iOffset % 8 == 0) | ||
|  |         { | ||
|  |             tmp += iOffset / 8; | ||
|  |             iOffset = 0; | ||
|  |         } | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | static void GetWeightSet(std::set<int> &setWeight, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag) | ||
|  | { | ||
|  |     setWeight.clear(); | ||
|  |     if (asymmetricQuantFlag) { | ||
|  |         for (int i = 0; i < channel; i++) | ||
|  |         { | ||
|  |             float min = alphaData[2*i]; | ||
|  |             float alpha = alphaData[2*i+1]; | ||
|  |             if (alpha <= 1e-6f) | ||
|  |             { | ||
|  |                 setWeight.insert(-128); | ||
|  |                 continue; | ||
|  |             } | ||
|  |             for (int j = 0; j < area; j++) | ||
|  |             { | ||
|  |                 float weight = weightData[i * area + j]; | ||
|  |                 setWeight.insert(fmax(fmin(round((weight - min) / alpha) + (-128), 127), -128)); | ||
|  |             } | ||
|  |         } | ||
|  |     } else { | ||
|  |         for (int i = 0; i < channel; i++) | ||
|  |         { | ||
|  |             float alpha = alphaData[i]; | ||
|  |             if (alpha <= 1e-6f) | ||
|  |             { | ||
|  |                 setWeight.insert(0); | ||
|  |                 continue; | ||
|  |             } | ||
|  |             for (int j = 0; j < area; j++) | ||
|  |             { | ||
|  |                 float weight = weightData[i * area + j]; | ||
|  |                 setWeight.insert(fmax(fmin(round(weight / alpha), 127), -128)); | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | static float GetSparsity(const float* weightData, int weightSize, unsigned int& nnz, const float* alphaData, int area, int channel, bool asymmetricQuantFlag, int iMaxStep = -1) | ||
|  | { | ||
|  |     nnz = 0; | ||
|  |     int iPreIdx = 0; | ||
|  |     float sparsity; | ||
|  |     if (asymmetricQuantFlag) { | ||
|  |         for (int i = 0; i < weightSize; i++) | ||
|  |         { | ||
|  |             float min = alphaData[2*(i/area)]; | ||
|  |             float alpha = alphaData[2*(i/area)+1]; | ||
|  |             int zeroQuant = -128; | ||
|  |             if (alpha > 1e-6) { | ||
|  |                 zeroQuant = round((0.0f - min) / alpha) + (-128); | ||
|  |             } | ||
|  | 
 | ||
|  |             float weight = weightData[i]; | ||
|  |             int value = -128; | ||
|  |             if (alpha > 1e-6) | ||
|  |             { | ||
|  |                 value = round((weight - min) / alpha) + (-128); | ||
|  |             } | ||
|  | 
 | ||
|  |             if (value != zeroQuant) | ||
|  |             { | ||
|  |                 nnz++; | ||
|  |                 iPreIdx = i; | ||
|  |             } | ||
|  |             if ((i - iPreIdx >= iMaxStep) && (iMaxStep != -1)) | ||
|  |             { | ||
|  |                 nnz++; | ||
|  |                 iPreIdx = i; | ||
|  |             } | ||
|  |         } | ||
|  |     } else { | ||
|  |         for (int i = 0; i < weightSize; i++) | ||
|  |         { | ||
|  |             float alpha = alphaData[i / area]; | ||
|  |             float weight = weightData[i]; | ||
|  |             int value = 0; | ||
|  |             if (alpha > 1e-6f) | ||
|  |             { | ||
|  |                 value = round(weight / alpha); | ||
|  |             } | ||
|  | 
 | ||
|  |             if (value != 0) | ||
|  |             { | ||
|  |                 nnz++; | ||
|  |                 iPreIdx = i; | ||
|  |             } | ||
|  |             if ((i - iPreIdx >= iMaxStep) && (iMaxStep != -1)) | ||
|  |             { | ||
|  |                 nnz++; | ||
|  |                 iPreIdx = i; | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     sparsity = 1 - 1.0f * nnz / weightSize; | ||
|  |     return sparsity; | ||
|  | } | ||
|  | 
 | ||
|  | static unsigned int GetBestMaxStep(const float* weightData, int weightSize, unsigned char& iMaxStepBits, int BlobDataSize, const float* alphaData, int area, int channel, bool asymmetricQuantFlag) | ||
|  | { | ||
|  |     size_t szBestSize = 1000000000; | ||
|  |     unsigned int best_nnz = 0; | ||
|  |     for (int i = 2; i < 9; i++) | ||
|  |     { | ||
|  |         unsigned int nnz = 0; | ||
|  |         GetSparsity(weightData, weightSize, nnz, alphaData, area, channel, asymmetricQuantFlag, pow(2, i) - 1); | ||
|  |         size_t tmp = ceil(0.125 * nnz * i) + ceil(0.125 * nnz * BlobDataSize); | ||
|  |         if (tmp < szBestSize) | ||
|  |         { | ||
|  |             iMaxStepBits = (unsigned char) i; | ||
|  |             szBestSize = tmp; | ||
|  |             best_nnz = nnz; | ||
|  |         } | ||
|  |     } | ||
|  |     return best_nnz; | ||
|  | } | ||
|  | 
 | ||
|  | static void WriteCQBlobs(std::ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag) | ||
|  | { | ||
|  |     //push values into buffer
 | ||
|  |     //Find int values in all blobs and check;
 | ||
|  |     std::set<int> setWeight; | ||
|  |     GetWeightSet(setWeight, weightData, alphaData, area, channel, asymmetricQuantFlag); | ||
|  |     int iCount = setWeight.size(); | ||
|  |     int iNeedBits = ceil(log2(iCount)); | ||
|  |     if (iNeedBits > 8) { | ||
|  |         MNN_ERROR("The Bits need large than 8, the model may be error for user\n"); | ||
|  |         return; | ||
|  |     } | ||
|  |     std::map<int, unsigned char> mapWeight; | ||
|  |     int iIdx = 0; | ||
|  |     for (std::set<int>::iterator it = setWeight.begin(); it != setWeight.end(); it++) | ||
|  |     { | ||
|  |         mapWeight[*it] = iIdx++; | ||
|  |     } | ||
|  |     size_t buf_len = size_t(ceil(0.125 * iNeedBits * area * channel)); | ||
|  |     char *buf = new char[buf_len]; | ||
|  |     { | ||
|  |         char *arr = new char[area * channel]; | ||
|  |         char *tmp = arr; | ||
|  |         if (asymmetricQuantFlag) { | ||
|  |             for (int i = 0; i < channel; i++) | ||
|  |             { | ||
|  |                 float min = alphaData[2*i]; | ||
|  |                 float alpha = alphaData[2*i+1]; | ||
|  |                 for (int j = 0; j < area; j++) | ||
|  |                 { | ||
|  |                     float weight = weightData[i * area + j]; | ||
|  |                     int value = -128; | ||
|  |                     if (alpha > 1e-6f) | ||
|  |                     { | ||
|  |                         value = fmax(fmin(round((weight - min) / alpha) + (-128), 127), -128); | ||
|  |                     } | ||
|  |                     *tmp = mapWeight[value]; | ||
|  |                     tmp++; | ||
|  |                 } | ||
|  |             } | ||
|  |         } else { | ||
|  |             for (int i = 0; i < channel; i++) | ||
|  |             { | ||
|  |                 float alpha = alphaData[i]; | ||
|  |                 for (int j = 0; j < area; j++) | ||
|  |                 { | ||
|  |                     float weight = weightData[i * area + j]; | ||
|  |                     int value = 0; | ||
|  |                     if (alpha > 1e-6f) | ||
|  |                     { | ||
|  |                         value = fmax(fmin(round(weight / alpha), 127), -128); | ||
|  |                     } | ||
|  |                     *tmp = mapWeight[value]; | ||
|  |                     tmp++; | ||
|  |                 } | ||
|  |             } | ||
|  |         } | ||
|  |         FillBuffer(buf, buf_len, arr, area * channel, iNeedBits); | ||
|  |         delete[] arr; | ||
|  |     } | ||
|  |     //begin write to file
 | ||
|  |     { | ||
|  |         char tmp[100]; | ||
|  |         //1. weights blob shape(unsigned int32)
 | ||
|  |         WriteBlobDim(out, {channel, area}); | ||
|  |         // 2. Avalable values Count(unsigned char)
 | ||
|  |         tmp[0] = (unsigned char)iCount; | ||
|  |         out.write(tmp, 1); | ||
|  |         // 3. valueset(signed char * valueset_size)
 | ||
|  |         for (auto it = setWeight.begin(); it != setWeight.end(); it++) | ||
|  |         { | ||
|  |             tmp[0] = (unsigned char)*it; | ||
|  |             out.write(tmp, 1); | ||
|  |         } | ||
|  |         // 4. weights indexes(size = ceil(0.125*weights_count*ceil(log2(Avalable_values_Count))))
 | ||
|  |         out.write(buf, buf_len); | ||
|  |         //g_totalSize += 1 + setWeight.size() + buf_len;
 | ||
|  |     } | ||
|  |     delete[] buf; | ||
|  | } | ||
|  | 
 | ||
|  | static void WriteSparseQuanBlobs(std::ostream &out, const float* weightData, const float* alphaData, int area, int channel, bool asymmetricQuantFlag) | ||
|  | { | ||
|  |     std::set<int> setWeight; | ||
|  |     GetWeightSet(setWeight, weightData, alphaData, area, channel, asymmetricQuantFlag); | ||
|  |     int iDataNeedBits = ceil(log2(setWeight.size())); | ||
|  |     unsigned int nnz = 0; | ||
|  |     int weightSize = area * channel; | ||
|  |     std::map<int, unsigned char> mapWeight; | ||
|  |     { | ||
|  |         int iIdx = 0; | ||
|  |         for (auto it = setWeight.begin(); it != setWeight.end(); it++) | ||
|  |         { | ||
|  |             mapWeight[*it] = iIdx++; | ||
|  |         } | ||
|  |     } | ||
|  |     unsigned char iNeedBits; | ||
|  |     nnz = GetBestMaxStep(weightData, weightSize, iNeedBits, iDataNeedBits, alphaData, area, channel, asymmetricQuantFlag); | ||
|  |     //weight buf
 | ||
|  |     size_t data_buf_len = size_t(ceil(0.125 * iDataNeedBits * nnz)); | ||
|  |     char* data_buf = new char[data_buf_len]; | ||
|  |     //sparse COO buf
 | ||
|  |     size_t buf_len = size_t(ceil(0.125 * iNeedBits * nnz)); | ||
|  |     char* buf = new char[buf_len]; | ||
|  |     { //fill buf with step values;
 | ||
|  |         unsigned char* arr_idx = new unsigned char[nnz]; | ||
|  |         unsigned char* data_arr = new unsigned char[nnz]; | ||
|  |         unsigned char* tmp = arr_idx; | ||
|  |         int iMaxStep = pow(2, iNeedBits) - 1; | ||
|  |         int iPreIdx = 0; | ||
|  |         unsigned char* dTmp = data_arr; | ||
|  |         if (asymmetricQuantFlag) { | ||
|  |             for (int i = 0; i < weightSize; i++) | ||
|  |             { | ||
|  |                 float min = alphaData[2*(i/area)]; | ||
|  |                 float alpha = alphaData[2*(i/area)+1]; | ||
|  |                 int zeroQuant = -128; | ||
|  |                 if (alpha > 1e-6) { | ||
|  |                     zeroQuant = round((0.0f - min) / alpha) + (-128); | ||
|  |                 } | ||
|  | 
 | ||
|  |                 float weight = weightData[i]; | ||
|  |                 int value = -128; | ||
|  |                 if (alpha > 1e-6) | ||
|  |                 { | ||
|  |                     value = round((weight - min) / alpha) + (-128); | ||
|  |                 } | ||
|  | 
 | ||
|  |                 if (value != zeroQuant) | ||
|  |                 { | ||
|  |                     *dTmp = mapWeight[value]; | ||
|  |                     *tmp = i - iPreIdx; | ||
|  |                     iPreIdx = i; | ||
|  |                     tmp++; | ||
|  |                     dTmp++; | ||
|  |                 } | ||
|  |                 if (i - iPreIdx >= iMaxStep) | ||
|  |                 { | ||
|  |                     *dTmp = mapWeight[zeroQuant]; | ||
|  |                     *tmp = i - iPreIdx; | ||
|  |                     iPreIdx = i; | ||
|  |                     tmp++; | ||
|  |                     dTmp++; | ||
|  |                 } | ||
|  |             } | ||
|  |         } else { | ||
|  |             for (int i = 0; i < weightSize; i++) | ||
|  |             { | ||
|  |                 float alpha = alphaData[i / area]; | ||
|  |                 float weight = weightData[i]; | ||
|  |                 int value = 0; | ||
|  |                 if (alpha > 1e-6f) | ||
|  |                 { | ||
|  |                     value = round(weight / alpha); | ||
|  |                 } | ||
|  | 
 | ||
|  |                 if (value != 0) | ||
|  |                 { | ||
|  |                     *dTmp = mapWeight[value]; | ||
|  |                     *tmp = i - iPreIdx; | ||
|  |                     iPreIdx = i; | ||
|  |                     tmp++; | ||
|  |                     dTmp++; | ||
|  |                 } | ||
|  |                 if (i - iPreIdx >= iMaxStep) | ||
|  |                 { | ||
|  |                     *dTmp = mapWeight[0]; | ||
|  |                     *tmp = i - iPreIdx; | ||
|  |                     iPreIdx = i; | ||
|  |                     tmp++; | ||
|  |                     dTmp++; | ||
|  |                 } | ||
|  |             } | ||
|  |         } | ||
|  |         FillBuffer(buf, buf_len, (char*) arr_idx, nnz, iNeedBits); | ||
|  |         FillBuffer(data_buf, data_buf_len, (char*) data_arr, nnz, iDataNeedBits); | ||
|  |         delete[] arr_idx; | ||
|  |         delete[] data_arr; | ||
|  |     } | ||
|  |     { //write
 | ||
|  |         char tmp[100]; | ||
|  |         // 1.weights blob shape(unsigned int32)
 | ||
|  |         WriteBlobDim(out, {channel, area}); | ||
|  |         // 2. nnz
 | ||
|  |         out.write((const char*) &nnz, 4); | ||
|  |         // 3. max_step use # bits () (unsigned char)
 | ||
|  |         out.write((const char*) &iNeedBits, 1); | ||
|  |         // 4. buf for steps ceil(nnz*step need bits/8)
 | ||
|  |         out.write(buf, buf_len); | ||
|  |         // 5. Avalable values Count(unsigned char)
 | ||
|  |         tmp[0] = (unsigned char) setWeight.size(); | ||
|  |         out.write(tmp, 1); | ||
|  |         // 6. valueset(signed char * valueset_size)
 | ||
|  |         for (auto it = setWeight.begin(); it != setWeight.end(); it++) | ||
|  |         { | ||
|  |             tmp[0] = (unsigned char) *it; | ||
|  |             out.write(tmp, 1); | ||
|  |         } | ||
|  |         // 7. none zero weights indexes(nnz*ceil(log2(Avalable_values_Count))/8)
 | ||
|  |         out.write((const char*) data_buf, data_buf_len); | ||
|  |     } | ||
|  |     delete[] buf; | ||
|  |     delete[] data_buf; | ||
|  | } | ||
|  | 
 | ||
|  | static std::unique_ptr<IDSTQuanT> encode(const std::vector<float>& weight, const std::vector<float>& scale, int kernelSize, int kernelNum, | ||
|  |                                          bool asymmetricQuantFlag, const int8_t* quantWeightPtr, const int clampMin) { | ||
|  |     std::ostringstream outputStringStreamCQ, outputStringStreamSQ; | ||
|  |     WriteCQBlobs(outputStringStreamCQ, weight.data(), scale.data(), kernelSize, kernelNum, asymmetricQuantFlag); | ||
|  |     WriteSparseQuanBlobs(outputStringStreamSQ, weight.data(), scale.data(), kernelSize, kernelNum, asymmetricQuantFlag); | ||
|  |     std::unique_ptr<IDSTQuanT> idst(new IDSTQuanT); | ||
|  |     auto cqStr = outputStringStreamCQ.str(); | ||
|  |     auto sqStr = outputStringStreamSQ.str(); | ||
|  |     int int8Size = kernelNum * kernelSize; | ||
|  |     if (quantWeightPtr && (int8Size <= cqStr.size() && int8Size <= sqStr.size())) { | ||
|  |         idst->type = 4; | ||
|  |         idst->aMax = kernelNum; | ||
|  |         idst->buffer.resize(int8Size); | ||
|  |         ::memcpy(idst->buffer.data(), quantWeightPtr, int8Size); | ||
|  |     } else if (cqStr.size() <= sqStr.size()) { | ||
|  |         idst->type = 1; | ||
|  |         idst->buffer.resize(cqStr.size()); | ||
|  |         ::memcpy(idst->buffer.data(), cqStr.data(), cqStr.size()); | ||
|  |     } else { | ||
|  |         idst->type = 2; | ||
|  |         idst->buffer.resize(sqStr.size()); | ||
|  |         ::memcpy(idst->buffer.data(), sqStr.data(), sqStr.size()); | ||
|  |     } | ||
|  |     idst->alpha.resize(scale.size()); | ||
|  |     ::memcpy(idst->alpha.data(), scale.data(), scale.size() * sizeof(float)); | ||
|  |     idst->quantScale = 1.f; | ||
|  |     if (asymmetricQuantFlag) { | ||
|  |         idst->readType = kernelNum; | ||
|  |         idst->aMin = clampMin; | ||
|  |     } | ||
|  |     return idst; | ||
|  | } | ||
|  | 
 | ||
|  | } // namespace IDSTEncoder
 | ||
|  | 
 | ||
|  | #endif // IDSTENCODER_HPP
 |