2020-03-02 22:13:38 +08:00
|
|
|
//
|
|
|
|
|
// ConvolutionCommon.cpp
|
|
|
|
|
// MNN
|
|
|
|
|
//
|
2020-11-05 16:41:56 +08:00
|
|
|
// Created by MNN on 2020/03/02.
|
2020-03-02 22:13:38 +08:00
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
#include "ConvolutionCommon.hpp"
|
2020-11-05 16:41:56 +08:00
|
|
|
#include <math.h>
|
2023-05-18 19:11:50 +08:00
|
|
|
#include "backend/cpu/compute/CommonOptFunction.h"
|
2020-11-05 16:41:56 +08:00
|
|
|
#include "half.hpp"
|
2023-09-20 20:16:25 +08:00
|
|
|
#include "core/OpCommonUtils.hpp"
|
2024-05-11 19:17:02 +08:00
|
|
|
#include "core/IDSTDecoder.hpp"
|
2023-06-27 10:33:16 +08:00
|
|
|
|
2020-03-02 22:13:38 +08:00
|
|
|
namespace MNN {
|
2023-09-20 20:16:25 +08:00
|
|
|
|
|
|
|
|
std::shared_ptr<ConvolutionCommon::Int8Common> ConvolutionCommon::load(const Convolution2D *conv, Backend* backend, bool forceFloat, bool forceInt8) {
|
|
|
|
|
auto quan = conv->quanParameter();
|
|
|
|
|
auto result = std::make_shared<Int8Common>();
|
2023-05-18 19:11:50 +08:00
|
|
|
result->quan = quan;
|
2023-09-20 20:16:25 +08:00
|
|
|
size_t buffer_size = 0, alpha_size = 0;
|
|
|
|
|
const int8_t* buffer_ptr = nullptr;
|
|
|
|
|
const float* alpha_ptr = nullptr;
|
2024-04-19 11:58:21 +08:00
|
|
|
if (quan->buffer()) {
|
|
|
|
|
buffer_size = quan->buffer()->size();
|
|
|
|
|
buffer_ptr = quan->buffer()->data();
|
|
|
|
|
}
|
|
|
|
|
if (quan->alpha()) {
|
|
|
|
|
alpha_size = quan->alpha()->size();
|
|
|
|
|
alpha_ptr = quan->alpha()->data();
|
2023-09-20 20:16:25 +08:00
|
|
|
}
|
2023-05-18 19:11:50 +08:00
|
|
|
if (quan->index() != nullptr) {
|
|
|
|
|
if (forceFloat) {
|
|
|
|
|
// Expand sparse to dense
|
|
|
|
|
result->weightFloat.reset(quan->weightSize());
|
|
|
|
|
if (nullptr == result->weightFloat.get()) {
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
::memset(result->weightFloat.get(), 0, quan->weightSize() * sizeof(float));
|
|
|
|
|
auto index = quan->index()->data();
|
|
|
|
|
auto indexSize = quan->index()->size();
|
2023-09-20 20:16:25 +08:00
|
|
|
if (nullptr == alpha_ptr || alpha_size != indexSize) {
|
2023-05-18 19:11:50 +08:00
|
|
|
MNN_ERROR("The model is error, don't has alpha but has index\n");
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
for (uint32_t i=0; i<indexSize; ++i) {
|
2023-09-20 20:16:25 +08:00
|
|
|
result->weightFloat.get()[index[i]] = alpha_ptr[i];
|
2023-05-18 19:11:50 +08:00
|
|
|
}
|
|
|
|
|
} // Otherwise needn't treat, just return result with quan info
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
size_t weightLength = 0;
|
2020-03-02 22:13:38 +08:00
|
|
|
int8_t *buffer = nullptr;
|
2023-09-20 20:16:25 +08:00
|
|
|
auto originBuffer = (unsigned char *)buffer_ptr;
|
2024-06-15 15:39:59 +08:00
|
|
|
|
2020-03-02 22:13:38 +08:00
|
|
|
if (1 == quan->type()) {
|
2024-05-11 19:17:02 +08:00
|
|
|
buffer = IDSTDecoder::ReadQuanData_c(originBuffer, &weightLength, result.get(), quan->shapeInt32());
|
2020-03-02 22:13:38 +08:00
|
|
|
}
|
|
|
|
|
if (2 == quan->type()) {
|
2024-05-11 19:17:02 +08:00
|
|
|
buffer = IDSTDecoder::ReadSparseQuanData_c(originBuffer, &weightLength, alpha_ptr, alpha_size, result.get(), quan->shapeInt32());
|
2023-05-18 19:11:50 +08:00
|
|
|
}
|
2024-04-19 11:58:21 +08:00
|
|
|
if (result->weightMap.size() > 0) {
|
2023-05-18 19:11:50 +08:00
|
|
|
result->canUseInt4 = true;
|
2024-04-19 11:58:21 +08:00
|
|
|
for (auto value : result->weightMap) {
|
|
|
|
|
if (value < -8 || value > 7) {
|
|
|
|
|
result->canUseInt4 = false;
|
|
|
|
|
}
|
2023-05-18 19:11:50 +08:00
|
|
|
}
|
2020-03-02 22:13:38 +08:00
|
|
|
}
|
|
|
|
|
// read fp16 data
|
|
|
|
|
if (3 == quan->type()) {
|
2023-09-20 20:16:25 +08:00
|
|
|
weightLength = buffer_size / sizeof(half_float::half);
|
|
|
|
|
std::vector<int8_t> tempHalfWeight(buffer_size);
|
|
|
|
|
::memcpy(tempHalfWeight.data(), buffer_ptr, buffer_size);
|
2020-03-02 22:13:38 +08:00
|
|
|
auto halfWeight = reinterpret_cast<half_float::half *>(tempHalfWeight.data());
|
|
|
|
|
result->weightFloat.reset(weightLength);
|
|
|
|
|
if (nullptr == result->weightFloat.get()) {
|
|
|
|
|
MNN_PRINT("Alloc memory error for extract fp16 back to float\n");
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
std::transform(halfWeight, halfWeight + weightLength, result->weightFloat.get(),
|
|
|
|
|
[](half_float::half h) { return float(h); });
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
// weight int8 only
|
|
|
|
|
if (4 == quan->type()) {
|
2023-09-20 20:16:25 +08:00
|
|
|
weightLength = buffer_size;
|
2021-04-08 15:34:23 +08:00
|
|
|
result->weight.reset(weightLength);
|
2023-09-20 20:16:25 +08:00
|
|
|
::memcpy(result->weight.get(), buffer_ptr, weightLength);
|
2020-11-05 16:41:56 +08:00
|
|
|
}
|
|
|
|
|
|
2021-04-08 15:34:23 +08:00
|
|
|
if (result->weight.get() == nullptr) {
|
|
|
|
|
if (nullptr == buffer) {
|
|
|
|
|
MNN_PRINT("Alloc memory error for extract idst int8\n");
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
result->weight.set(buffer, weightLength);
|
2020-03-02 22:13:38 +08:00
|
|
|
}
|
2023-09-20 20:16:25 +08:00
|
|
|
result->alpha.reset(alpha_size);
|
2020-03-02 22:13:38 +08:00
|
|
|
if (nullptr == result->alpha.get()) {
|
|
|
|
|
MNN_PRINT("Alloc memory error for extract idst int8\n");
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
2023-09-20 20:16:25 +08:00
|
|
|
::memcpy(result->alpha.get(), alpha_ptr, alpha_size * sizeof(float));
|
2023-05-18 19:11:50 +08:00
|
|
|
{
|
|
|
|
|
int outputCount = 0;
|
|
|
|
|
bool oldType4 = (quan->type() == 4 && quan->aMin() == 0 && std::abs(quan->quantScale()) < 1e-6);
|
|
|
|
|
if (quan->readType() != 0 || oldType4) {
|
|
|
|
|
result->asymmetric = true;
|
|
|
|
|
outputCount = result->alpha.size() / 2;
|
|
|
|
|
} else {
|
|
|
|
|
result->asymmetric = false;
|
|
|
|
|
outputCount = result->alpha.size(); // backward compability with previous symmetric quantization
|
|
|
|
|
}
|
|
|
|
|
if (result->asymmetric) {
|
|
|
|
|
// clampMin is minVal in asymmetric quant, clampMin = -(2^(bit))
|
|
|
|
|
// and old version clampMin is -128
|
|
|
|
|
float clampMin = quan->aMin() == 0 ? -128 : quan->aMin();
|
|
|
|
|
for (int o = 0; o < outputCount; ++o) {
|
|
|
|
|
result->alpha.get()[2 * o] = result->alpha.get()[2 * o] - clampMin * result->alpha.get()[2 * o + 1];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!quan->has_scaleInt()) {
|
|
|
|
|
float extraFactor = quan->quantScale();
|
|
|
|
|
// for old type 4 models, their quan->quantScale is 0. which will introduce a bug here
|
|
|
|
|
if (oldType4) {
|
|
|
|
|
extraFactor = 1.0f;
|
|
|
|
|
}
|
|
|
|
|
for (int o=0; o<result->alpha.size(); ++o) {
|
|
|
|
|
result->alpha.get()[o] *= extraFactor;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-03-13 14:55:54 +08:00
|
|
|
if (forceInt8) {
|
2021-04-08 15:34:23 +08:00
|
|
|
return result;
|
|
|
|
|
}
|
2020-03-02 22:13:38 +08:00
|
|
|
if (!quan->has_scaleInt() || forceFloat) {
|
|
|
|
|
// Back to float
|
|
|
|
|
result->weightFloat.reset(weightLength);
|
|
|
|
|
if (nullptr == result->weightFloat.get()) {
|
|
|
|
|
MNN_PRINT("Alloc memory error for extract idst int8/ Back to float\n");
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
2020-11-25 19:03:07 +08:00
|
|
|
int outputCount = 0;
|
2023-05-18 19:11:50 +08:00
|
|
|
if (result->asymmetric) {
|
|
|
|
|
outputCount = result->alpha.size() / 2;
|
2020-11-25 19:03:07 +08:00
|
|
|
} else {
|
2023-05-18 19:11:50 +08:00
|
|
|
outputCount = result->alpha.size();
|
2020-11-25 19:03:07 +08:00
|
|
|
}
|
2020-03-02 22:13:38 +08:00
|
|
|
int partWeightSize = weightLength / outputCount;
|
|
|
|
|
for (int o = 0; o < outputCount; ++o) {
|
2023-05-18 19:11:50 +08:00
|
|
|
float min = 0.0f;
|
|
|
|
|
float alpha = 0.0f;
|
|
|
|
|
if (result->asymmetric) {
|
|
|
|
|
min = result->alpha.get()[2*o];
|
|
|
|
|
alpha = result->alpha.get()[2*o+1];
|
|
|
|
|
} else {
|
|
|
|
|
alpha = result->alpha.get()[o];
|
|
|
|
|
}
|
2020-03-02 22:13:38 +08:00
|
|
|
auto dstW = result->weightFloat.get() + o * partWeightSize;
|
|
|
|
|
auto srcW = result->weight.get() + o * partWeightSize;
|
2023-05-18 19:11:50 +08:00
|
|
|
for (int v=0; v < partWeightSize; ++v) {
|
|
|
|
|
dstW[v] = (float)srcW[v] * alpha + min;
|
2020-03-02 22:13:38 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
result->weight.release();
|
|
|
|
|
result->alpha.release();
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
2020-03-12 20:29:43 +08:00
|
|
|
|
2023-09-20 20:16:25 +08:00
|
|
|
void ConvolutionCommon::getConvParameters(std::shared_ptr<Int8Common> *quanCommon, Backend* backend, const MNN::Convolution2D *conv2d, const float** originWeight, int* originWeightSize) {
|
2020-11-05 16:41:56 +08:00
|
|
|
*originWeight = nullptr;
|
|
|
|
|
*originWeightSize = 0;
|
|
|
|
|
if (nullptr != conv2d->quanParameter()) {
|
2023-08-21 14:51:54 +08:00
|
|
|
bool forceFloat = conv2d->quanParameter()->index() != nullptr;
|
2023-09-20 20:16:25 +08:00
|
|
|
*quanCommon = load(conv2d, backend, forceFloat);
|
2020-11-05 16:41:56 +08:00
|
|
|
*originWeight = (*quanCommon)->weightFloat.get();
|
|
|
|
|
*originWeightSize = (*quanCommon)->weightFloat.size();
|
|
|
|
|
}
|
|
|
|
|
if (*originWeight == nullptr) {
|
|
|
|
|
*originWeight = conv2d->weight()->data();
|
|
|
|
|
*originWeightSize = conv2d->weight()->size();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-20 20:16:25 +08:00
|
|
|
bool ConvolutionCommon::getConvInt8Parameters(const MNN::Convolution2D* conv2d, std::shared_ptr<Int8Common>& quanCommon, Backend* backend,
|
2022-12-30 15:18:58 +08:00
|
|
|
const int8_t*& weight, int& weightSize, float*& scale, int32_t*& bias) {
|
2021-04-08 15:34:23 +08:00
|
|
|
int outputCount = conv2d->common()->outputCount();
|
2021-09-06 16:53:47 +08:00
|
|
|
weightSize = 0;
|
2021-06-23 17:36:42 +08:00
|
|
|
// fix xcode UndefinedBehaviorSanitizer
|
|
|
|
|
if (conv2d->symmetricQuan()->weight() != nullptr) {
|
|
|
|
|
weight = conv2d->symmetricQuan()->weight()->data();
|
2021-09-06 16:53:47 +08:00
|
|
|
weightSize = conv2d->symmetricQuan()->weight()->size();
|
2021-06-23 17:36:42 +08:00
|
|
|
}
|
2021-04-08 20:15:32 +08:00
|
|
|
if (conv2d->quanParameter() && conv2d->quanParameter()->buffer()) {
|
2023-09-20 20:16:25 +08:00
|
|
|
quanCommon = ConvolutionCommon::load(conv2d, backend, false, true);
|
2021-04-08 15:34:23 +08:00
|
|
|
weight = quanCommon->weight.get();
|
2021-09-06 16:53:47 +08:00
|
|
|
weightSize = quanCommon->weight.size();
|
2021-04-08 15:34:23 +08:00
|
|
|
}
|
|
|
|
|
if (weight == nullptr) {
|
|
|
|
|
MNN_ERROR("ConvolutionCommon::getConvInt8Parameters: No weight data!");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (conv2d->symmetricQuan()->bias() && conv2d->symmetricQuan()->scale()) {
|
2023-02-15 10:30:27 +08:00
|
|
|
// Compability for old model
|
2021-04-08 15:34:23 +08:00
|
|
|
MNN_ASSERT(conv2d->symmetricQuan()->bias()->size() == outputCount && conv2d->symmetricQuan()->scale()->size() == outputCount);
|
|
|
|
|
::memcpy(bias, conv2d->symmetricQuan()->bias()->data(), outputCount * sizeof(int32_t));
|
|
|
|
|
::memcpy(scale, conv2d->symmetricQuan()->scale()->data(), outputCount * sizeof(float));
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2021-04-08 20:15:32 +08:00
|
|
|
if (conv2d->bias() && conv2d->quanParameter()->alpha()) {
|
2022-12-30 15:18:58 +08:00
|
|
|
::memcpy(bias, conv2d->bias()->data(), outputCount * sizeof(float));
|
|
|
|
|
::memcpy(scale, conv2d->quanParameter()->alpha()->data(), outputCount * sizeof(float));
|
2021-04-08 15:34:23 +08:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
MNN_ERROR("ConvolutionCommon::getConvInt8Parameters: No bias & scale data!");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
std::pair<int, int> ConvolutionCommon::convolutionPad(const Tensor *input, const Tensor *output,
|
|
|
|
|
const Convolution2DCommon *mCommon) {
|
2020-03-12 20:29:43 +08:00
|
|
|
if (mCommon->padMode() == PadMode_SAME) {
|
|
|
|
|
int kernelWidthSize = (mCommon->kernelX() - 1) * mCommon->dilateX() + 1;
|
|
|
|
|
int kernelHeightSize = (mCommon->kernelY() - 1) * mCommon->dilateY() + 1;
|
|
|
|
|
|
|
|
|
|
int padNeededWidth = (output->width() - 1) * mCommon->strideX() + kernelWidthSize - input->width();
|
|
|
|
|
int padNeededHeight = (output->height() - 1) * mCommon->strideY() + kernelHeightSize - input->height();
|
2020-11-05 16:41:56 +08:00
|
|
|
auto mPadX = padNeededWidth / 2;
|
|
|
|
|
auto mPadY = padNeededHeight / 2;
|
2020-03-12 20:29:43 +08:00
|
|
|
return std::make_pair(mPadX, mPadY);
|
|
|
|
|
}
|
|
|
|
|
auto mPadX = mCommon->padX();
|
|
|
|
|
auto mPadY = mCommon->padY();
|
2021-06-11 17:17:13 +08:00
|
|
|
if (nullptr != mCommon->pads() && mCommon->pads()->size() >= 2) {
|
2020-03-12 20:29:43 +08:00
|
|
|
mPadX = mCommon->pads()->data()[1];
|
|
|
|
|
mPadY = mCommon->pads()->data()[0];
|
|
|
|
|
}
|
|
|
|
|
return std::make_pair(mPadX, mPadY);
|
|
|
|
|
}
|
2021-01-06 16:29:37 +08:00
|
|
|
|
|
|
|
|
std::tuple<int, int, int, int> ConvolutionCommon::convolutionPadFull(const Tensor* input, const Tensor* output,
|
|
|
|
|
const Convolution2DCommon* common) {
|
|
|
|
|
auto pad = convolutionPad(input, output, common);
|
|
|
|
|
int iw = input->width();
|
|
|
|
|
int ih = input->height();
|
|
|
|
|
int ow = output->width();
|
|
|
|
|
int oh = output->height();
|
|
|
|
|
|
|
|
|
|
int right = (ow - 1) * common->strideX() + (common->kernelX() - 1) * common->dilateX() - pad.first;
|
|
|
|
|
int padRight = 0;
|
|
|
|
|
if (right >= iw) {
|
|
|
|
|
padRight = right - iw + 1;
|
|
|
|
|
}
|
|
|
|
|
int bottom = (oh - 1) * common->strideY() + (common->kernelY() - 1) * common->dilateY() - pad.second;
|
|
|
|
|
int padBottom = 0;
|
|
|
|
|
if (bottom >= ih) {
|
|
|
|
|
padBottom = bottom - ih + 1;
|
|
|
|
|
}
|
|
|
|
|
return std::make_tuple(pad.first, pad.second, padRight, padBottom);
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
std::pair<int, int> ConvolutionCommon::convolutionTransposePad(const Tensor *input, const Tensor *output,
|
|
|
|
|
const Convolution2DCommon *mCommon) {
|
2020-03-12 20:29:43 +08:00
|
|
|
if (mCommon->padMode() == PadMode_SAME) {
|
|
|
|
|
const int outputWidth = output->width();
|
|
|
|
|
const int outputHeight = output->height();
|
|
|
|
|
|
|
|
|
|
const int outputWidthPadded = (input->width() - 1) * mCommon->strideX() + mCommon->kernelX();
|
|
|
|
|
const int outputHeightPadded = (input->height() - 1) * mCommon->strideY() + mCommon->kernelY();
|
|
|
|
|
|
|
|
|
|
const int padNeededWidth = outputWidthPadded - outputWidth;
|
|
|
|
|
const int padNeededHeight = outputHeightPadded - outputHeight;
|
|
|
|
|
|
|
|
|
|
auto mPadX = padNeededWidth / 2;
|
|
|
|
|
auto mPadY = padNeededHeight / 2;
|
|
|
|
|
return std::make_pair(mPadX, mPadY);
|
|
|
|
|
}
|
|
|
|
|
auto mPadX = mCommon->padX();
|
|
|
|
|
auto mPadY = mCommon->padY();
|
2021-06-11 17:17:13 +08:00
|
|
|
if (nullptr != mCommon->pads() && mCommon->pads()->size() >= 2) {
|
2020-03-12 20:29:43 +08:00
|
|
|
mPadY = mCommon->pads()->data()[0];
|
|
|
|
|
mPadX = mCommon->pads()->data()[1];
|
|
|
|
|
}
|
|
|
|
|
return std::make_pair(mPadX, mPadY);
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-05 16:41:56 +08:00
|
|
|
} // namespace MNN
|