2019-04-17 10:49:11 +08:00
//
// CPUConvolution.cpp
// MNN
//
// Created by MNN on 2018/07/15.
// Copyright © 2018, Alibaba Group Holding Limited
//
2019-12-27 22:16:57 +08:00
# include "backend/cpu/CPUConvolution.hpp"
2019-04-17 10:49:11 +08:00
# include <math.h>
2019-12-27 22:16:57 +08:00
# include "backend/cpu/compute/CommonOptFunction.h"
# include "core/Macro.h"
2020-07-04 01:21:30 +08:00
# include <limits>
2019-12-27 22:16:57 +08:00
# include "backend/cpu/compute/ConvolutionFloatFactory.h"
2019-06-17 20:10:35 +08:00
//#define MNN_OPEN_TIME_TRACE
2019-12-27 22:16:57 +08:00
# include <MNN/AutoTime.hpp>
2020-03-12 20:29:43 +08:00
# include "core/ConvolutionCommon.hpp"
2019-04-17 10:49:11 +08:00
namespace MNN {
2021-04-08 15:34:23 +08:00
bool CPUConvolution : : Resource : : copyBiasAlign ( const float * bias , int outputCount ) {
auto core = static_cast < CPUBackend * > ( backend ) - > functions ( ) ;
int bytes = core - > bytes ;
int unit = core - > pack ;
auto alignOutput = UP_DIV ( outputCount , unit ) * unit ;
int remain = alignOutput - outputCount ;
mBias . reset ( Tensor : : createDevice < uint8_t > ( std : : vector < int > { alignOutput * bytes } ) ) ;
bool success = backend - > onAcquireBuffer ( mBias . get ( ) , Backend : : STATIC ) ;
if ( ! success ) {
MNN_ERROR ( " Error for alloc memory for Alloc Bias \n " ) ;
return false ; ;
}
if ( bytes < 4 ) {
core - > MNNFp32ToLowp ( bias , mBias - > host < int16_t > ( ) , outputCount ) ;
} else {
: : memcpy ( mBias - > host < float > ( ) , bias , outputCount * bytes ) ;
}
if ( remain > 0 ) {
: : memset ( mBias - > host < uint8_t > ( ) + outputCount * bytes , 0 , remain * bytes ) ;
}
return true ;
}
2019-04-17 10:49:11 +08:00
CPUConvolution : : CPUConvolution ( const Convolution2DCommon * convOp , Backend * b ) : MNN : : Execution ( b ) , mCommon ( convOp ) {
2021-04-08 15:34:23 +08:00
// Do nothing
2019-04-17 10:49:11 +08:00
}
2020-07-04 01:21:30 +08:00
std : : vector < float > CPUConvolution : : getPostParameters ( ) const {
std : : vector < float > postParameters = {
1.0f ,
1.0f ,
- std : : numeric_limits < float > ( ) . max ( ) ,
std : : numeric_limits < float > ( ) . max ( ) ,
} ;
if ( mCommon - > relu ( ) ) {
postParameters [ 2 ] = 0.0f ;
}
if ( mCommon - > relu6 ( ) ) {
postParameters [ 2 ] = 0.0f ;
postParameters [ 3 ] = 6.0f ;
}
return postParameters ;
}
2019-04-17 10:49:11 +08:00
2020-11-05 16:41:56 +08:00
int CPUConvolution : : reorderWeightSize ( int depth , int outputCount , int kernelSize , int unitDepth , int unitOC ) {
return UP_DIV ( outputCount , unitOC ) * UP_DIV ( depth , unitDepth ) * kernelSize * unitDepth * unitOC ;
2019-04-17 10:49:11 +08:00
}
2020-11-05 16:41:56 +08:00
template < typename T >
void CPUConvolution : : reorderWeightSlow ( T * dest , const T * source , size_t depth , size_t outputCount , size_t kernelSize ,
size_t unitDepth , size_t unitOC , bool transpose ) {
memset ( dest , 0 , reorderWeightSize ( depth , outputCount , kernelSize , unitDepth , unitOC ) * sizeof ( T ) ) ;
for ( int dz = 0 ; dz < outputCount ; + + dz ) {
auto dz_unit = dz / unitOC ;
auto mx = dz % unitOC ;
auto dst_dz = dest + dz_unit * UP_DIV ( depth , unitDepth ) * kernelSize * unitDepth * unitOC ;
for ( int sz = 0 ; sz < depth ; + + sz ) {
auto sz_unit = sz / unitDepth ;
auto my = sz % unitDepth ;
auto dst_sz = dst_dz + sz_unit * kernelSize * unitDepth * unitOC ;
auto src = source + kernelSize * ( sz + dz * depth ) ;
for ( int ki = 0 ; ki < kernelSize ; + + ki ) {
auto dst_i = dst_sz + ki * unitDepth * unitOC ;
if ( transpose ) {
dst_i [ unitDepth * mx + my ] = src [ ki ] ;
} else {
dst_i [ unitOC * my + mx ] = src [ ki ] ;
}
}
}
2019-04-17 10:49:11 +08:00
}
}
2020-11-05 16:41:56 +08:00
template void CPUConvolution : : reorderWeightSlow < int8_t > ( int8_t * , const int8_t * , size_t , size_t , size_t , size_t , size_t , bool ) ;
2021-04-08 15:34:23 +08:00
template void CPUConvolution : : reorderWeightSlow < int16_t > ( int16_t * , const int16_t * , size_t , size_t , size_t , size_t , size_t , bool ) ; // FLOAT16(__fp16) is not available here, so use int16_t (2 byte also)
2020-11-05 16:41:56 +08:00
template < typename T , typename U > // T -> U
bool CPUConvolution : : acquireMemoryAndCopy ( std : : shared_ptr < Tensor > dest , const T * source , size_t count , Backend * backend ) {
bool allocRes = ( ( CPUBackend * ) backend ) - > onAcquireBuffer ( dest . get ( ) , Backend : : STATIC ) ;
if ( ! allocRes ) {
return false ;
}
auto dataPtr = dest - > host < U > ( ) ;
memset ( dataPtr , 0 , dest - > size ( ) ) ;
for ( int i = 0 ; i < count ; + + i ) {
dataPtr [ i ] = source [ i ] ; // type cast T -> U elementwise
}
return true ;
}
template bool CPUConvolution : : acquireMemoryAndCopy < int32_t , float > ( std : : shared_ptr < Tensor > , const int32_t * , size_t , Backend * ) ;
template bool CPUConvolution : : acquireMemoryAndCopy < float , float > ( std : : shared_ptr < Tensor > , const float * , size_t , Backend * ) ;
2021-04-08 15:34:23 +08:00
2019-04-17 10:49:11 +08:00
ErrorCode CPUConvolution : : onResize ( const std : : vector < Tensor * > & inputs , const std : : vector < Tensor * > & outputs ) {
auto input = inputs [ 0 ] ;
auto output = outputs [ 0 ] ;
2020-03-12 20:29:43 +08:00
auto pad = ConvolutionCommon : : convolutionPad ( input , output , mCommon ) ;
mPadY = pad . second ;
mPadX = pad . first ;
2019-04-17 10:49:11 +08:00
return NO_ERROR ;
}
class ConvolutionFactory : public CPUBackend : : Creator {
public :
virtual Execution * onCreate ( const std : : vector < Tensor * > & inputs , const std : : vector < Tensor * > & outputs ,
const MNN : : Op * op , Backend * backend ) const override {
return ConvolutionFloatFactory : : create ( inputs , outputs , op , backend ) ;
}
} ;
REGISTER_CPU_OP_CREATOR ( ConvolutionFactory , OpType_Convolution ) ;
} // namespace MNN