| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  | //
 | 
					
						
							|  |  |  | //  SparseConvInt8TiledExecutor.hpp
 | 
					
						
							|  |  |  | //  MNN
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //  Created by MNN on 2021/6/09.
 | 
					
						
							|  |  |  | //  Copyright © 2018 - 2021, Alibaba Group Holding Limited
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "SparseConvInt8TiledExecutor.hpp"
 | 
					
						
							| 
									
										
										
										
											2023-06-16 09:42:45 +08:00
										 |  |  | #include "ConvolutionTiledExecutor.hpp"
 | 
					
						
							|  |  |  | #include "core/BufferAllocator.hpp"
 | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  | #include "core/Macro.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <math.h>
 | 
					
						
							| 
									
										
										
										
											2023-06-16 09:42:45 +08:00
										 |  |  | #include "CommonOptFunction.h"
 | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  | #include "core/Concurrency.h"
 | 
					
						
							|  |  |  | #include "core/TensorUtils.hpp"
 | 
					
						
							| 
									
										
										
										
											2024-05-11 19:17:02 +08:00
										 |  |  | #include "core/MemoryFormater.h"
 | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  | #include "MNN/AutoTime.hpp"
 | 
					
						
							|  |  |  | #include <math.h>
 | 
					
						
							|  |  |  | #ifdef MNN_USE_SSE
 | 
					
						
							|  |  |  | extern "C" { | 
					
						
							|  |  |  | void MNNInt8ToUInt8(void* ptr, int count); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | namespace MNN { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | bool SparseConvInt8TiledExecutor::reorderWeight(Backend* b, const Convolution2DCommon* common, | 
					
						
							|  |  |  |                           const std::shared_ptr<Tensor>& weightOrigin, | 
					
						
							|  |  |  |                           std::shared_ptr<Tensor>& weight, const SparseCommon* sparseCommon) { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     int eP, lP, hP; | 
					
						
							|  |  |  |     auto core = static_cast<CPUBackend*>(b)->int8Functions(); | 
					
						
							|  |  |  |     core->MNNGetSparseQuantMatMulPackMode(&eP, &lP, &hP); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     int oc = common->outputCount(), ic = common->inputCount(), kernelCount = common->kernelX() * common->kernelY(); | 
					
						
							|  |  |  |     auto sparseBlockOC = sparseCommon->args()->LookupByKey("sparseBlockOC")->i(); | 
					
						
							|  |  |  |     size_t weightNNZElement = sparseCommon->args()->LookupByKey("NNZElement")->i(); | 
					
						
							|  |  |  |     size_t weightBlockNumber = sparseCommon->args()->LookupByKey("blockNumber")->i(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // MNN_PRINT("1x%d weightNNZElement%zu, weightBlockNumber:%zu\n", sparseBlockOC, weightNNZElement, weightBlockNumber);
 | 
					
						
							|  |  |  |     weight.reset(Tensor::createDevice<uint8_t>({ static_cast<int>(weightNNZElement + 1)}));   // one more element in case of weight are all zeros
 | 
					
						
							|  |  |  |     mNNZMap.reset(Tensor::createDevice<unsigned int>({oc / sparseBlockOC + oc % sparseBlockOC})); | 
					
						
							|  |  |  |     mDataOffsetMap.reset(Tensor::createDevice<int>({static_cast<int>(weightBlockNumber + 1)})); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     mValid = backend()->onAcquireBuffer(weight.get(), Backend::STATIC); | 
					
						
							|  |  |  |     mValid = mValid && backend()->onAcquireBuffer(mNNZMap.get(), Backend::STATIC); | 
					
						
							|  |  |  |     mValid = mValid && backend()->onAcquireBuffer(mDataOffsetMap.get(), Backend::STATIC); | 
					
						
							|  |  |  |     if(!mValid) { | 
					
						
							|  |  |  |         MNN_PRINT("in: %s, out of memory!\n", __FUNCTION__); | 
					
						
							|  |  |  |         return false; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     // MNN_PRINT("oc:%d, sparseBlockOC:%d,\n", oc, sparseBlockOC);
 | 
					
						
							|  |  |  |     core->MNNPackForSparseQuantMatMul_B(weight->host<int8_t>(), mNNZMap->host<unsigned int>(), | 
					
						
							|  |  |  |                                        mDataOffsetMap->host<int>(), sparseBlockOC, weightOrigin->host<int8_t>(), oc, kernelCount, ic, eP); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // MNN_PRINT("\nBCSR int8 weight:");
 | 
					
						
							|  |  |  |     // formatMatrix(weight->host<int8_t>(), {static_cast<int>(weightNNZElement)});
 | 
					
						
							|  |  |  |     // MNN_PRINT("\nBCSR int8 weight nnzmap:");
 | 
					
						
							|  |  |  |     // formatMatrix(mNNZMap->host<unsigned int>(), {oc / sparseBlockOC + oc % sparseBlockOC});
 | 
					
						
							|  |  |  |     // MNN_PRINT("\nBCSR int8 weight dataOffsetMap:");
 | 
					
						
							|  |  |  |     // formatMatrix(mDataOffsetMap->host<int>(), {static_cast<int>(weightBlockNumber + 1)});
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return true; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  | SparseConvInt8TiledExecutor::SparseConvInt8TiledExecutor(Backend* backend, const Op* op, std::shared_ptr<ResourceInt8> res) : ConvInt8TiledExecutor(backend, op, res) { | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |     auto convOp = op->main_as_Convolution2D(); | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |     std::shared_ptr<Tensor> weightOrigin; | 
					
						
							| 
									
										
										
										
											2024-07-22 19:51:53 +08:00
										 |  |  |     weightOrigin.swap(mResourceInt8->mWeightInt8); | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |     const SparseCommon* sparseCommon = convOp->sparseParameter(); | 
					
						
							| 
									
										
										
										
											2024-07-22 19:51:53 +08:00
										 |  |  |     mValid = reorderWeight(backend, convOp->common(), weightOrigin, mResourceInt8->mWeightInt8, sparseCommon); | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |     if(!mValid) { | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // choose int8 sparse gemm kernel
 | 
					
						
							|  |  |  |     auto sparseBlockOC = sparseCommon->args()->LookupByKey("sparseBlockOC")->i(); | 
					
						
							|  |  |  |     auto core = static_cast<CPUBackend*>(backend)->int8Functions(); | 
					
						
							|  |  |  |     mSparseQuantMatMulKernel = sparseBlockOC == 4 ? core->MNNPackedSparseQuantMatMulEpx4 : core->MNNPackedSparseQuantMatMulEpx1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  | SparseConvInt8TiledExecutor::SparseConvInt8TiledExecutor(Backend* backend, const Op* op, | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |                                                          const SparseConvInt8TiledExecutor& exe) | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |     : ConvInt8TiledExecutor(backend, op, exe.mResourceInt8), | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |       mNNZMap(exe.mNNZMap), | 
					
						
							|  |  |  |       mDataOffsetMap(exe.mDataOffsetMap), | 
					
						
							|  |  |  |       mSparseBlockOC(exe.mSparseBlockOC), | 
					
						
							|  |  |  |       mSparseQuantMatMulKernel(exe.mSparseQuantMatMulKernel) { | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | SparseConvInt8TiledExecutor::~SparseConvInt8TiledExecutor() { | 
					
						
							|  |  |  |     // Do nothing
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | bool SparseConvInt8TiledExecutor::onClone(Backend* bn, const Op* op, Execution** dst) { | 
					
						
							|  |  |  |     if (nullptr == dst) { | 
					
						
							|  |  |  |         return true; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |     auto exe = new SparseConvInt8TiledExecutor(bn, op, *this); | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |     if (!exe->valid()) { | 
					
						
							|  |  |  |         return false; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     *dst = exe; | 
					
						
							|  |  |  |     return true; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void SparseConvInt8TiledExecutor::getPackParameter(int* Unit, int* SrcUnit, int* DestUnit, const CoreInt8Functions* core) { | 
					
						
							|  |  |  |     core->MNNGetSparseQuantMatMulPackMode(DestUnit, Unit, SrcUnit); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ErrorCode SparseConvInt8TiledExecutor::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Timer kernelTimer;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ConvInt8TiledExecutor::onResize(inputs, outputs); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     int eP, lP, hP; | 
					
						
							|  |  |  |     auto core = static_cast<CPUBackend*>(backend())->int8Functions(); | 
					
						
							|  |  |  |     getPackParameter(&lP, &hP, &eP, core); | 
					
						
							|  |  |  |     int lSize = mIm2ColParamter.icDiv4 * mIm2ColParamter.packCUnit * mCommon->kernelX() * mCommon->kernelY(); | 
					
						
							| 
									
										
										
										
											2023-06-16 09:42:45 +08:00
										 |  |  |     mIm2ColCount = 1; | 
					
						
							|  |  |  |     auto output = outputs[0]; | 
					
						
							|  |  |  |     auto planeSize = output->width() * output->height() * output->batch(); | 
					
						
							|  |  |  |     auto DynamicDestUnit = eP * mIm2ColCount; | 
					
						
							|  |  |  |     mTileCount        = UP_DIV(planeSize, DynamicDestUnit); | 
					
						
							|  |  |  |     const int threads = std::max(static_cast<CPUBackend*>(backend())->threadNumber(), 1); | 
					
						
							|  |  |  |     mThreadNums       = std::min(threads, mTileCount); | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     mIm2ColParamter.destICStride = mIm2ColParamter.icDiv4 * mIm2ColParamter.packCUnit * eP; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     mSparseQuantParam.eP = eP; | 
					
						
							|  |  |  |     mSparseQuantParam.l = lSize; | 
					
						
							|  |  |  |     mSparseQuantParam.h = mCommon->outputCount(); | 
					
						
							|  |  |  |     mSparseQuantParam.aStride = eP * mSparseQuantParam.l; | 
					
						
							|  |  |  |     mSparseQuantParam.cStride = outputs[0]->batch() * outputs[0]->height() * outputs[0]->width() * static_cast<CPUBackend*>(backend())->functions()->pack; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     mTempIm2ColBuffer.reset(Tensor::createDevice<int8_t>({mThreadNums, eP, UP_DIV(lSize, lP) * lP})); | 
					
						
							|  |  |  |     bool success = backend()->onAcquireBuffer(mTempIm2ColBuffer.get(), Backend::DYNAMIC); | 
					
						
							|  |  |  |     if (!success) { | 
					
						
							|  |  |  |         return OUT_OF_MEMORY; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-06-16 09:42:45 +08:00
										 |  |  |     auto bufferAlloc = static_cast<CPUBackend*>(backend())->getBufferAllocator(); | 
					
						
							|  |  |  |     auto blitInfoSize = ConvolutionTiledExecutor::computeBlitInfoSize(eP, mIm2ColParamter.ow, mIm2ColParamter.kernelX * mIm2ColParamter.kernelY, mThreadNums); | 
					
						
							|  |  |  |     mBlitInfo = bufferAlloc->alloc(blitInfoSize.first); | 
					
						
							| 
									
										
										
										
											2023-09-04 10:42:11 +08:00
										 |  |  |     if (mBlitInfo.invalid()) { | 
					
						
							| 
									
										
										
										
											2023-06-16 09:42:45 +08:00
										 |  |  |         return OUT_OF_MEMORY; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     bufferAlloc->free(mBlitInfo); | 
					
						
							|  |  |  |     mBlitInfoStride = blitInfoSize.second; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |     backend()->onReleaseBuffer(mTempIm2ColBuffer.get(), Backend::DYNAMIC); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // MNN_PRINT("sparse conv2d int8 resize: cost time: %llu us\n", kernelTimer.durationInUs());
 | 
					
						
							|  |  |  |     return NO_ERROR; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ErrorCode SparseConvInt8TiledExecutor::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) { | 
					
						
							|  |  |  |     // Timer kernelTimer;
 | 
					
						
							|  |  |  |     const auto input = inputs[0]; | 
					
						
							|  |  |  |     auto output      = outputs[0]; | 
					
						
							|  |  |  |     auto core = static_cast<CPUBackend*>(backend())->int8Functions(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     int PackUnit = static_cast<CPUBackend*>(backend())->functions()->pack; | 
					
						
							| 
									
										
										
										
											2023-06-16 09:42:45 +08:00
										 |  |  |     auto blitProc = core->MNNPackC4Int8ForMatMul_ASparse; | 
					
						
							|  |  |  |     const int outputPlaneLen = output->height() * output->width() * output->batch(); | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     const int batch = input->batch(); | 
					
						
							|  |  |  |     const int ocDivPack = UP_DIV(output->channel(), PackUnit); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const auto inputDataPtr = input->host<int8_t>(); | 
					
						
							| 
									
										
										
										
											2024-07-22 19:51:53 +08:00
										 |  |  |     const auto weightDataPtr = mResourceInt8->mWeightInt8->host<int8_t>(); | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |     const auto NNZMapPtr     = mNNZMap->host<unsigned int>(); | 
					
						
							|  |  |  |     const auto dataOffsetPtr = mDataOffsetMap->host<int>(); | 
					
						
							|  |  |  |     auto im2colPtr           = mTempIm2ColBuffer->host<int8_t>(); | 
					
						
							|  |  |  |     auto outputDataPtr       = output->host<int8_t>(); | 
					
						
							|  |  |  |     QuanPostTreatParameters quanParam; | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |     quanParam.bias = mMutableResource->mBiasInt32->host<int32_t>(); | 
					
						
							|  |  |  |     quanParam.scale = mMutableResource->mScaleFloat->host<float>(); | 
					
						
							|  |  |  |     quanParam.maxValue = mMutableResource->mClampMax; | 
					
						
							| 
									
										
										
										
											2024-07-22 19:51:53 +08:00
										 |  |  |     if (mResourceInt8->mRelu) { | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |         quanParam.minValue = mMutableResource->mOutputZeroPoint; | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |     } else { | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |         quanParam.minValue = mMutableResource->mClampMin; | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |     } | 
					
						
							|  |  |  |     // MNN_PRINT("outputPlaneLen: %d, reduce l:%zu, minValue:%d, maxValue:%d, mTileCount:%d\n", outputPlaneLen, mSparseQuantParam.l, quanParam.minValue, quanParam.maxValue, mTileCount);
 | 
					
						
							| 
									
										
										
										
											2023-06-16 09:42:45 +08:00
										 |  |  |     const int col_buffer_size = mTempIm2ColBuffer->stride(0); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |     auto threadFunction = [&](int tId) { | 
					
						
							|  |  |  |         auto colAddr        = im2colPtr + tId * mTempIm2ColBuffer->stride(0); | 
					
						
							| 
									
										
										
										
											2023-06-16 09:42:45 +08:00
										 |  |  |         int32_t info[4]; | 
					
						
							|  |  |  |         info[1] = mIm2ColParamter.iw * mIm2ColParamter.ih * batch; | 
					
						
							|  |  |  |         info[2] = (int)mSparseQuantParam.eP; | 
					
						
							|  |  |  |         info[3] = mIm2ColParamter.strideX; | 
					
						
							| 
									
										
										
										
											2023-09-04 10:42:11 +08:00
										 |  |  |         auto srcPtr     = (int8_t const **)(mBlitInfo.ptr() + tId * mBlitInfoStride.first); | 
					
						
							| 
									
										
										
										
											2023-06-16 09:42:45 +08:00
										 |  |  |         auto el         = (int32_t *)(srcPtr + mBlitInfoStride.second); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for (int tIndex = tId; tIndex < mTileCount; tIndex += mThreadNums) { | 
					
						
							|  |  |  |             SparseQuantMatMulParam sparseQuantParam = mSparseQuantParam; | 
					
						
							|  |  |  |             const int xIndexStart  = tIndex * sparseQuantParam.eP; | 
					
						
							|  |  |  |             const int realDstCount = ALIMIN(outputPlaneLen - xIndexStart, sparseQuantParam.eP); | 
					
						
							|  |  |  |             sparseQuantParam.eSize = realDstCount; | 
					
						
							|  |  |  |             // im2col
 | 
					
						
							|  |  |  |             auto res = ConvolutionTiledExecutor::turnIm2ColToBlitInfo((const float**)srcPtr, el, xIndexStart, realDstCount, mIm2ColParamter, (const uint8_t*)inputDataPtr, 1); | 
					
						
							|  |  |  |             int number = res.first; | 
					
						
							|  |  |  |             bool needZero = res.second; | 
					
						
							|  |  |  |             if (needZero) { | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  | #ifdef MNN_USE_SSE
 | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |                 ::memset(colAddr, mMutableResource->mInputZeroPoint + 128, col_buffer_size); | 
					
						
							| 
									
										
										
										
											2023-06-16 09:42:45 +08:00
										 |  |  | #else
 | 
					
						
							| 
									
										
										
										
											2024-08-24 15:46:21 +08:00
										 |  |  |                 ::memset(colAddr, mMutableResource->mInputZeroPoint, col_buffer_size); | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  | #endif
 | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2023-06-16 09:42:45 +08:00
										 |  |  |             info[0] = number; | 
					
						
							|  |  |  |             if (number > 0) { | 
					
						
							|  |  |  |                 blitProc(colAddr, srcPtr, info, el); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             // MNN_PRINT("batch:%d, realDstCount:%d, InputZeroPoint:%d, inputdata matrix im2col:\n", bIndex, realDstCount, mResource->mInputZeroPoint);
 | 
					
						
							|  |  |  |             // formatMatrix(colAddr, {static_cast<int>(UP_DIV(realDstCount, sparseQuantParam.eP)), static_cast<int>(sparseQuantParam.l), static_cast<int>(sparseQuantParam.eP)});
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef MNN_USE_SSE
 | 
					
						
							|  |  |  |             const int col_buffer_size = sparseQuantParam.aStride * sizeof(int8_t); | 
					
						
							|  |  |  |             MNNInt8ToUInt8(colAddr, col_buffer_size); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |             auto outputInTilePtr = outputDataPtr + xIndexStart * PackUnit; | 
					
						
							|  |  |  |             // MNN_PRINT("bIndex:%d, offset:%zu, spmm sparseMatmul tile:\n", bIndex, outputInTilePtr - outputDataPtr);
 | 
					
						
							|  |  |  |             mSparseQuantMatMulKernel(outputInTilePtr, colAddr, weightDataPtr, (size_t*)&sparseQuantParam, &quanParam, NNZMapPtr, dataOffsetPtr); | 
					
						
							|  |  |  |             // formatMatrix(outputInTilePtr, {static_cast<int>(UP_DIV(sparseQuantParam.h, PackUnit)), realDstCount, PackUnit});
 | 
					
						
							| 
									
										
										
										
											2021-09-18 15:52:30 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  |     MNN_CONCURRENCY_BEGIN(tId, mThreadNums) { | 
					
						
							|  |  |  |         threadFunction((int)tId); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     MNN_CONCURRENCY_END(); | 
					
						
							|  |  |  |     // MNN_PRINT("sparse conv2d int8 execute: cost time: %llu us\n", kernelTimer.durationInUs());
 | 
					
						
							|  |  |  |     return NO_ERROR; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } // namespace MNN
 |