MNN/source/core/ConvolutionCommon.cpp

//
//  ConvolutionCommon.cpp
//  MNN
//
//  Created by MNN on 2020/03/02.
//  Copyright © 2018, Alibaba Group Holding Limited
//

#include "ConvolutionCommon.hpp"
#include <math.h>
#include "backend/cpu/compute/CommonOptFunction.h"
#include "half.hpp"
#include "core/OpCommonUtils.hpp"
#include "core/IDSTDecoder.hpp"

namespace MNN {

std::shared_ptr<ConvolutionCommon::Int8Common> ConvolutionCommon::load(const Convolution2D *conv, Backend* backend, bool forceFloat, bool forceInt8) {
    auto quan = conv->quanParameter();
    auto result = std::make_shared<Int8Common>();
    result->quan = quan;
    size_t buffer_size = 0, alpha_size = 0;
    const int8_t* buffer_ptr = nullptr;
    const float* alpha_ptr = nullptr;
    if (quan->buffer()) {
        buffer_size = quan->buffer()->size();
        buffer_ptr = quan->buffer()->data();
    }
    if (quan->alpha()) {
        alpha_size = quan->alpha()->size();
        alpha_ptr = quan->alpha()->data();
    }
    if (quan->index() != nullptr) {
        if (forceFloat) {
            // Expand sparse to dense
            result->weightFloat.reset(quan->weightSize());
            if (nullptr == result->weightFloat.get()) {
                return nullptr;
            }
            ::memset(result->weightFloat.get(), 0, quan->weightSize() * sizeof(float));
            auto index = quan->index()->data();
            auto indexSize = quan->index()->size();
            if (nullptr == alpha_ptr || alpha_size != indexSize) {
                MNN_ERROR("The model is error, don't has alpha but has index\n");
                return nullptr;
            }
            for (uint32_t i=0; i<indexSize; ++i) {
                result->weightFloat.get()[index[i]] = alpha_ptr[i];
            }
        } // Otherwise needn't treat, just return result with quan info
        return result;
    }
    size_t weightLength = 0;
    int8_t *buffer        = nullptr;
    auto originBuffer     = (unsigned char *)buffer_ptr;

    if (1 == quan->type()) {
        buffer = IDSTDecoder::ReadQuanData_c(originBuffer, &weightLength, result.get(), quan->shapeInt32());
    }
    if (2 == quan->type()) {
        buffer = IDSTDecoder::ReadSparseQuanData_c(originBuffer, &weightLength, alpha_ptr, alpha_size, result.get(), quan->shapeInt32());
    }
    if (result->weightMap.size() > 0) {
        result->canUseInt4 = true;
        for (auto value : result->weightMap) {
            if (value < -8 || value > 7) {
                result->canUseInt4 = false;
            }
        }
    }
    // read fp16 data
    if (3 == quan->type()) {
        weightLength = buffer_size / sizeof(half_float::half);
        std::vector<int8_t> tempHalfWeight(buffer_size);
        ::memcpy(tempHalfWeight.data(), buffer_ptr, buffer_size);
        auto halfWeight = reinterpret_cast<half_float::half *>(tempHalfWeight.data());
        result->weightFloat.reset(weightLength);
        if (nullptr == result->weightFloat.get()) {
            MNN_PRINT("Alloc memory error for extract fp16 back to float\n");
            return nullptr;
        }
        std::transform(halfWeight, halfWeight + weightLength, result->weightFloat.get(),
                       [](half_float::half h) { return float(h); });
        return result;
    }

    // weight int8 only
    if (4 == quan->type()) {
        weightLength = buffer_size;
        result->weight.reset(weightLength);
        ::memcpy(result->weight.get(), buffer_ptr, weightLength);
    }

    if (result->weight.get() == nullptr) {
        if (nullptr == buffer) {
            MNN_PRINT("Alloc memory error for extract idst int8\n");
            return nullptr;
        }
        result->weight.set(buffer, weightLength);
    }
    result->alpha.reset(alpha_size);
    if (nullptr == result->alpha.get()) {
        MNN_PRINT("Alloc memory error for extract idst int8\n");
        return nullptr;
    }
    ::memcpy(result->alpha.get(), alpha_ptr, alpha_size * sizeof(float));
    {
        int outputCount = 0;
        bool oldType4 = (quan->type() == 4 && quan->aMin() == 0 && std::abs(quan->quantScale()) < 1e-6);
        if (quan->readType() != 0 || oldType4) {
            result->asymmetric = true;
            outputCount   = result->alpha.size() / 2;
        } else {
            result->asymmetric = false;
            outputCount   = result->alpha.size(); // backward compability with previous symmetric quantization
        }
        if (result->asymmetric) {
            // clampMin is minVal in asymmetric quant, clampMin = -(2^(bit))
            // and old version clampMin is -128
            float clampMin = quan->aMin() == 0 ? -128 : quan->aMin();
            for (int o = 0; o < outputCount; ++o) {
                result->alpha.get()[2 * o] = result->alpha.get()[2 * o] - clampMin * result->alpha.get()[2 * o + 1];
            }
        }
        if (!quan->has_scaleInt()) {
            float extraFactor = quan->quantScale();
            // for old type 4 models, their quan->quantScale is 0. which will introduce a bug here
            if (oldType4) {
                extraFactor = 1.0f;
            }
            for (int o=0; o<result->alpha.size(); ++o) {
                result->alpha.get()[o] *= extraFactor;
            }
        }
    }
    if (forceInt8) {
        return result;
    }
    if (!quan->has_scaleInt() || forceFloat) {
        // Back to float
        result->weightFloat.reset(weightLength);
        if (nullptr == result->weightFloat.get()) {
            MNN_PRINT("Alloc memory error for extract idst int8/ Back to float\n");
            return nullptr;
        }
        int outputCount = 0;
        if (result->asymmetric) {
            outputCount = result->alpha.size() / 2;
        } else {
            outputCount = result->alpha.size();
        }
        int partWeightSize = weightLength / outputCount;
        for (int o = 0; o < outputCount; ++o) {
            float min = 0.0f;
            float alpha = 0.0f;
            if (result->asymmetric) {
                min = result->alpha.get()[2*o];
                alpha = result->alpha.get()[2*o+1];
            } else {
                alpha = result->alpha.get()[o];
            }
            auto dstW   = result->weightFloat.get() + o * partWeightSize;
            auto srcW   = result->weight.get() + o * partWeightSize;
            for (int v=0; v < partWeightSize; ++v) {
                dstW[v] = (float)srcW[v] * alpha + min;
            }
        }
        result->weight.release();
        result->alpha.release();
    }
    return result;
}

void ConvolutionCommon::getConvParameters(std::shared_ptr<Int8Common> *quanCommon, Backend* backend, const MNN::Convolution2D *conv2d, const float** originWeight, int* originWeightSize) {
    *originWeight = nullptr;
    *originWeightSize = 0;
    if (nullptr != conv2d->quanParameter()) {
        bool forceFloat = conv2d->quanParameter()->index() != nullptr;
        *quanCommon = load(conv2d, backend, forceFloat);
        *originWeight     = (*quanCommon)->weightFloat.get();
        *originWeightSize = (*quanCommon)->weightFloat.size();
    }
    if (*originWeight == nullptr) {
        *originWeight = conv2d->weight()->data();
        *originWeightSize = conv2d->weight()->size();
    }
}

bool ConvolutionCommon::getConvInt8Parameters(const MNN::Convolution2D* conv2d, std::shared_ptr<Int8Common>& quanCommon, Backend* backend,
                                              const int8_t*& weight, int& weightSize, float*& scale, int32_t*& bias) {
    int outputCount = conv2d->common()->outputCount();
    weightSize = 0;
    // fix xcode UndefinedBehaviorSanitizer
    if (conv2d->symmetricQuan()->weight() != nullptr) {
        weight = conv2d->symmetricQuan()->weight()->data();
        weightSize = conv2d->symmetricQuan()->weight()->size();
    }
    if (conv2d->quanParameter() && conv2d->quanParameter()->buffer()) {
        quanCommon = ConvolutionCommon::load(conv2d, backend, false, true);
        weight = quanCommon->weight.get();
        weightSize = quanCommon->weight.size();
    }
    if (weight == nullptr) {
        MNN_ERROR("ConvolutionCommon::getConvInt8Parameters: No weight data!");
        return false;
    }
    if (conv2d->symmetricQuan()->bias() && conv2d->symmetricQuan()->scale()) {
        // Compability for old model
        MNN_ASSERT(conv2d->symmetricQuan()->bias()->size() == outputCount && conv2d->symmetricQuan()->scale()->size() == outputCount);
        ::memcpy(bias, conv2d->symmetricQuan()->bias()->data(), outputCount * sizeof(int32_t));
        ::memcpy(scale, conv2d->symmetricQuan()->scale()->data(), outputCount * sizeof(float));
        return true;
    }
    if (conv2d->bias() && conv2d->quanParameter()->alpha()) {
        ::memcpy(bias, conv2d->bias()->data(), outputCount * sizeof(float));
        ::memcpy(scale, conv2d->quanParameter()->alpha()->data(), outputCount * sizeof(float));
        return true;
    }
    MNN_ERROR("ConvolutionCommon::getConvInt8Parameters: No bias & scale data!");
    return false;
}

std::pair<int, int> ConvolutionCommon::convolutionPad(const Tensor *input, const Tensor *output,
                                                      const Convolution2DCommon *mCommon) {
    if (mCommon->padMode() == PadMode_SAME) {
        int kernelWidthSize  = (mCommon->kernelX() - 1) * mCommon->dilateX() + 1;
        int kernelHeightSize = (mCommon->kernelY() - 1) * mCommon->dilateY() + 1;

        int padNeededWidth  = (output->width() - 1) * mCommon->strideX() + kernelWidthSize - input->width();
        int padNeededHeight = (output->height() - 1) * mCommon->strideY() + kernelHeightSize - input->height();
        auto mPadX          = padNeededWidth / 2;
        auto mPadY          = padNeededHeight / 2;
        return std::make_pair(mPadX, mPadY);
    }
    auto mPadX = mCommon->padX();
    auto mPadY = mCommon->padY();
    if (nullptr != mCommon->pads() && mCommon->pads()->size() >= 2) {
        mPadX = mCommon->pads()->data()[1];
        mPadY = mCommon->pads()->data()[0];
    }
    return std::make_pair(mPadX, mPadY);
}

std::tuple<int, int, int, int> ConvolutionCommon::convolutionPadFull(const Tensor* input, const Tensor* output,
                                                         const Convolution2DCommon* common) {
    auto pad = convolutionPad(input, output, common);
    int iw = input->width();
    int ih = input->height();
    int ow = output->width();
    int oh = output->height();

    int right = (ow - 1) * common->strideX() + (common->kernelX() - 1) * common->dilateX() - pad.first;
    int padRight = 0;
    if (right >= iw) {
        padRight = right - iw + 1;
    }
    int bottom = (oh - 1) * common->strideY() + (common->kernelY() - 1) * common->dilateY() - pad.second;
    int padBottom = 0;
    if (bottom >= ih) {
        padBottom = bottom - ih + 1;
    }
    return std::make_tuple(pad.first, pad.second, padRight, padBottom);
}

std::pair<int, int> ConvolutionCommon::convolutionTransposePad(const Tensor *input, const Tensor *output,
                                                               const Convolution2DCommon *mCommon) {
    if (mCommon->padMode() == PadMode_SAME) {
        const int outputWidth  = output->width();
        const int outputHeight = output->height();

        const int outputWidthPadded  = (input->width() - 1) * mCommon->strideX() + mCommon->kernelX();
        const int outputHeightPadded = (input->height() - 1) * mCommon->strideY() + mCommon->kernelY();

        const int padNeededWidth  = outputWidthPadded - outputWidth;
        const int padNeededHeight = outputHeightPadded - outputHeight;

        auto mPadX = padNeededWidth / 2;
        auto mPadY = padNeededHeight / 2;
        return std::make_pair(mPadX, mPadY);
    }
    auto mPadX = mCommon->padX();
    auto mPadY = mCommon->padY();
    if (nullptr != mCommon->pads() && mCommon->pads()->size() >= 2) {
        mPadY = mCommon->pads()->data()[0];
        mPadX = mCommon->pads()->data()[1];
    }
    return std::make_pair(mPadX, mPadY);
}

} // namespace MNN
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`//`
			`// ConvolutionCommon.cpp`
			`// MNN`
			`//`
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`// Created by MNN on 2020/03/02.`
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`// Copyright © 2018, Alibaba Group Holding Limited`
			`//`

			`#include "ConvolutionCommon.hpp"`
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`#include <math.h>`
[MNN:Sync] Sync Internal Gitlab: 2.5.1 2023-05-18 19:11:50 +08:00			`#include "backend/cpu/compute/CommonOptFunction.h"`
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`#include "half.hpp"`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`#include "core/OpCommonUtils.hpp"`
MNN:Sync Sync Internal 2.9.0 2024-05-11 19:17:02 +08:00			`#include "core/IDSTDecoder.hpp"`
[MNN:Sync] Sync Internal code, support low_memory for conv. 2023-06-27 10:33:16 +08:00
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`namespace MNN {`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00
			`std::shared_ptr<ConvolutionCommon::Int8Common> ConvolutionCommon::load(const Convolution2D conv, Backend backend, bool forceFloat, bool forceInt8) {`
			`auto quan = conv->quanParameter();`
			`auto result = std::make_shared<Int8Common>();`
[MNN:Sync] Sync Internal Gitlab: 2.5.1 2023-05-18 19:11:50 +08:00			`result->quan = quan;`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`size_t buffer_size = 0, alpha_size = 0;`
			`const int8_t* buffer_ptr = nullptr;`
			`const float* alpha_ptr = nullptr;`
[MNN:Sync] Sync Internal 2.8.4 2024-04-19 11:58:21 +08:00			`if (quan->buffer()) {`
			`buffer_size = quan->buffer()->size();`
			`buffer_ptr = quan->buffer()->data();`
			`}`
			`if (quan->alpha()) {`
			`alpha_size = quan->alpha()->size();`
			`alpha_ptr = quan->alpha()->data();`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`}`
[MNN:Sync] Sync Internal Gitlab: 2.5.1 2023-05-18 19:11:50 +08:00			`if (quan->index() != nullptr) {`
			`if (forceFloat) {`
			`// Expand sparse to dense`
			`result->weightFloat.reset(quan->weightSize());`
			`if (nullptr == result->weightFloat.get()) {`
			`return nullptr;`
			`}`
			`::memset(result->weightFloat.get(), 0, quan->weightSize() * sizeof(float));`
			`auto index = quan->index()->data();`
			`auto indexSize = quan->index()->size();`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`if (nullptr == alpha_ptr \|\| alpha_size != indexSize) {`
[MNN:Sync] Sync Internal Gitlab: 2.5.1 2023-05-18 19:11:50 +08:00			`MNN_ERROR("The model is error, don't has alpha but has index\n");`
			`return nullptr;`
			`}`
			`for (uint32_t i=0; i<indexSize; ++i) {`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`result->weightFloat.get()[index[i]] = alpha_ptr[i];`
[MNN:Sync] Sync Internal Gitlab: 2.5.1 2023-05-18 19:11:50 +08:00			`}`
			`} // Otherwise needn't treat, just return result with quan info`
			`return result;`
			`}`
			`size_t weightLength = 0;`
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`int8_t *buffer = nullptr;`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`auto originBuffer = (unsigned char *)buffer_ptr;`
MNN:Sync: Fix bug for llama2/llama3 attention fuse, refract llm usage 2024-06-15 15:39:59 +08:00
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`if (1 == quan->type()) {`
MNN:Sync Sync Internal 2.9.0 2024-05-11 19:17:02 +08:00			`buffer = IDSTDecoder::ReadQuanData_c(originBuffer, &weightLength, result.get(), quan->shapeInt32());`
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`}`
			`if (2 == quan->type()) {`
MNN:Sync Sync Internal 2.9.0 2024-05-11 19:17:02 +08:00			`buffer = IDSTDecoder::ReadSparseQuanData_c(originBuffer, &weightLength, alpha_ptr, alpha_size, result.get(), quan->shapeInt32());`
[MNN:Sync] Sync Internal Gitlab: 2.5.1 2023-05-18 19:11:50 +08:00			`}`
[MNN:Sync] Sync Internal 2.8.4 2024-04-19 11:58:21 +08:00			`if (result->weightMap.size() > 0) {`
[MNN:Sync] Sync Internal Gitlab: 2.5.1 2023-05-18 19:11:50 +08:00			`result->canUseInt4 = true;`
[MNN:Sync] Sync Internal 2.8.4 2024-04-19 11:58:21 +08:00			`for (auto value : result->weightMap) {`
			`if (value < -8 \|\| value > 7) {`
			`result->canUseInt4 = false;`
			`}`
[MNN:Sync] Sync Internal Gitlab: 2.5.1 2023-05-18 19:11:50 +08:00			`}`
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`}`
			`// read fp16 data`
			`if (3 == quan->type()) {`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`weightLength = buffer_size / sizeof(half_float::half);`
			`std::vector<int8_t> tempHalfWeight(buffer_size);`
			`::memcpy(tempHalfWeight.data(), buffer_ptr, buffer_size);`
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`auto halfWeight = reinterpret_cast<half_float::half *>(tempHalfWeight.data());`
			`result->weightFloat.reset(weightLength);`
			`if (nullptr == result->weightFloat.get()) {`
			`MNN_PRINT("Alloc memory error for extract fp16 back to float\n");`
			`return nullptr;`
			`}`
			`std::transform(halfWeight, halfWeight + weightLength, result->weightFloat.get(),`
			`[](half_float::half h) { return float(h); });`
			`return result;`
			`}`

Github release 1.1.0 2020-11-05 16:41:56 +08:00			`// weight int8 only`
			`if (4 == quan->type()) {`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`weightLength = buffer_size;`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`result->weight.reset(weightLength);`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`::memcpy(result->weight.get(), buffer_ptr, weightLength);`
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`}`

[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`if (result->weight.get() == nullptr) {`
			`if (nullptr == buffer) {`
			`MNN_PRINT("Alloc memory error for extract idst int8\n");`
			`return nullptr;`
			`}`
			`result->weight.set(buffer, weightLength);`
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`}`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`result->alpha.reset(alpha_size);`
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`if (nullptr == result->alpha.get()) {`
			`MNN_PRINT("Alloc memory error for extract idst int8\n");`
			`return nullptr;`
			`}`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`::memcpy(result->alpha.get(), alpha_ptr, alpha_size * sizeof(float));`
[MNN:Sync] Sync Internal Gitlab: 2.5.1 2023-05-18 19:11:50 +08:00			`{`
			`int outputCount = 0;`
			`bool oldType4 = (quan->type() == 4 && quan->aMin() == 0 && std::abs(quan->quantScale()) < 1e-6);`
			`if (quan->readType() != 0 \|\| oldType4) {`
			`result->asymmetric = true;`
			`outputCount = result->alpha.size() / 2;`
			`} else {`
			`result->asymmetric = false;`
			`outputCount = result->alpha.size(); // backward compability with previous symmetric quantization`
			`}`
			`if (result->asymmetric) {`
			`// clampMin is minVal in asymmetric quant, clampMin = -(2^(bit))`
			`// and old version clampMin is -128`
			`float clampMin = quan->aMin() == 0 ? -128 : quan->aMin();`
			`for (int o = 0; o < outputCount; ++o) {`
			`result->alpha.get()[2 * o] = result->alpha.get()[2 * o] - clampMin * result->alpha.get()[2 * o + 1];`
			`}`
			`}`
			`if (!quan->has_scaleInt()) {`
			`float extraFactor = quan->quantScale();`
			`// for old type 4 models, their quan->quantScale is 0. which will introduce a bug here`
			`if (oldType4) {`
			`extraFactor = 1.0f;`
			`}`
			`for (int o=0; o<result->alpha.size(); ++o) {`
			`result->alpha.get()[o] *= extraFactor;`
			`}`
			`}`
			`}`
[MNN:Sync] Sync Internal 2.8.3 2024-03-13 14:55:54 +08:00			`if (forceInt8) {`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`return result;`
			`}`
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`if (!quan->has_scaleInt() \|\| forceFloat) {`
			`// Back to float`
			`result->weightFloat.reset(weightLength);`
			`if (nullptr == result->weightFloat.get()) {`
			`MNN_PRINT("Alloc memory error for extract idst int8/ Back to float\n");`
			`return nullptr;`
			`}`
[MNN:Sync] Sync internal gitlab 2020-11-25 19:03:07 +08:00			`int outputCount = 0;`
[MNN:Sync] Sync Internal Gitlab: 2.5.1 2023-05-18 19:11:50 +08:00			`if (result->asymmetric) {`
			`outputCount = result->alpha.size() / 2;`
[MNN:Sync] Sync internal gitlab 2020-11-25 19:03:07 +08:00			`} else {`
[MNN:Sync] Sync Internal Gitlab: 2.5.1 2023-05-18 19:11:50 +08:00			`outputCount = result->alpha.size();`
[MNN:Sync] Sync internal gitlab 2020-11-25 19:03:07 +08:00			`}`
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`int partWeightSize = weightLength / outputCount;`
			`for (int o = 0; o < outputCount; ++o) {`
[MNN:Sync] Sync Internal Gitlab: 2.5.1 2023-05-18 19:11:50 +08:00			`float min = 0.0f;`
			`float alpha = 0.0f;`
			`if (result->asymmetric) {`
			`min = result->alpha.get()[2*o];`
			`alpha = result->alpha.get()[2*o+1];`
			`} else {`
			`alpha = result->alpha.get()[o];`
			`}`
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`auto dstW = result->weightFloat.get() + o * partWeightSize;`
			`auto srcW = result->weight.get() + o * partWeightSize;`
[MNN:Sync] Sync Internal Gitlab: 2.5.1 2023-05-18 19:11:50 +08:00			`for (int v=0; v < partWeightSize; ++v) {`
			`dstW[v] = (float)srcW[v] * alpha + min;`
[PATCH 070/160] [MNN:Refract] Seperate the load of quan and half to ConvolutionCommon 2020-03-02 22:13:38 +08:00			`}`
			`}`
			`result->weight.release();`
			`result->alpha.release();`
			`}`
			`return result;`
			`}`
[PATCH 114/160] [MNN:Refractor] Seperate the pad compute for convolution and deconvolution 2020-03-12 20:29:43 +08:00
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`void ConvolutionCommon::getConvParameters(std::shared_ptr<Int8Common> quanCommon, Backend backend, const MNN::Convolution2D conv2d, const float* originWeight, int* originWeightSize) {`
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`*originWeight = nullptr;`
			`*originWeightSize = 0;`
			`if (nullptr != conv2d->quanParameter()) {`
[MNN:Sync] Sync Internal 2.6.3 2023-08-21 14:51:54 +08:00			`bool forceFloat = conv2d->quanParameter()->index() != nullptr;`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`*quanCommon = load(conv2d, backend, forceFloat);`
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`originWeight = (quanCommon)->weightFloat.get();`
			`originWeightSize = (quanCommon)->weightFloat.size();`
			`}`
			`if (*originWeight == nullptr) {`
			`*originWeight = conv2d->weight()->data();`
			`*originWeightSize = conv2d->weight()->size();`
			`}`
			`}`

[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`bool ConvolutionCommon::getConvInt8Parameters(const MNN::Convolution2D* conv2d, std::shared_ptr<Int8Common>& quanCommon, Backend* backend,`
[MNN:Sync] Sync Internal 2.3.0 2022-12-30 15:18:58 +08:00			`const int8_t& weight, int& weightSize, float& scale, int32_t*& bias) {`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`int outputCount = conv2d->common()->outputCount();`
Fix crash bug for origin quan model 2021-09-06 16:53:47 +08:00			`weightSize = 0;`
fix alpha div by zero bug and arm server compile bug 2021-06-23 17:36:42 +08:00			`// fix xcode UndefinedBehaviorSanitizer`
			`if (conv2d->symmetricQuan()->weight() != nullptr) {`
			`weight = conv2d->symmetricQuan()->weight()->data();`
Fix crash bug for origin quan model 2021-09-06 16:53:47 +08:00			`weightSize = conv2d->symmetricQuan()->weight()->size();`
fix alpha div by zero bug and arm server compile bug 2021-06-23 17:36:42 +08:00			`}`
[PATCH 12/36] [Train:Featue] support full quant for train quant, encode when save 2021-04-08 20:15:32 +08:00			`if (conv2d->quanParameter() && conv2d->quanParameter()->buffer()) {`
[MNN:Sync] Sync Internal 2.7.1 2023-09-20 20:16:25 +08:00			`quanCommon = ConvolutionCommon::load(conv2d, backend, false, true);`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`weight = quanCommon->weight.get();`
Fix crash bug for origin quan model 2021-09-06 16:53:47 +08:00			`weightSize = quanCommon->weight.size();`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`}`
			`if (weight == nullptr) {`
			`MNN_ERROR("ConvolutionCommon::getConvInt8Parameters: No weight data!");`
			`return false;`
			`}`
			`if (conv2d->symmetricQuan()->bias() && conv2d->symmetricQuan()->scale()) {`
[MNN:Sync] Sync Internal 2.3.1 2023-02-15 10:30:27 +08:00			`// Compability for old model`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`MNN_ASSERT(conv2d->symmetricQuan()->bias()->size() == outputCount && conv2d->symmetricQuan()->scale()->size() == outputCount);`
			`::memcpy(bias, conv2d->symmetricQuan()->bias()->data(), outputCount * sizeof(int32_t));`
			`::memcpy(scale, conv2d->symmetricQuan()->scale()->data(), outputCount * sizeof(float));`
			`return true;`
			`}`
[PATCH 12/36] [Train:Featue] support full quant for train quant, encode when save 2021-04-08 20:15:32 +08:00			`if (conv2d->bias() && conv2d->quanParameter()->alpha()) {`
[MNN:Sync] Sync Internal 2.3.0 2022-12-30 15:18:58 +08:00			`::memcpy(bias, conv2d->bias()->data(), outputCount * sizeof(float));`
			`::memcpy(scale, conv2d->quanParameter()->alpha()->data(), outputCount * sizeof(float));`
[MNN:Sync] Sync internal Gitlab 2021-04-08 15:34:23 +08:00			`return true;`
			`}`
			`MNN_ERROR("ConvolutionCommon::getConvInt8Parameters: No bias & scale data!");`
			`return false;`
			`}`

Github release 1.1.0 2020-11-05 16:41:56 +08:00			`std::pair<int, int> ConvolutionCommon::convolutionPad(const Tensor input, const Tensor output,`
			`const Convolution2DCommon *mCommon) {`
[PATCH 114/160] [MNN:Refractor] Seperate the pad compute for convolution and deconvolution 2020-03-12 20:29:43 +08:00			`if (mCommon->padMode() == PadMode_SAME) {`
			`int kernelWidthSize = (mCommon->kernelX() - 1) * mCommon->dilateX() + 1;`
			`int kernelHeightSize = (mCommon->kernelY() - 1) * mCommon->dilateY() + 1;`

			`int padNeededWidth = (output->width() - 1) * mCommon->strideX() + kernelWidthSize - input->width();`
			`int padNeededHeight = (output->height() - 1) * mCommon->strideY() + kernelHeightSize - input->height();`
Github release 1.1.0 2020-11-05 16:41:56 +08:00			`auto mPadX = padNeededWidth / 2;`
			`auto mPadY = padNeededHeight / 2;`
[PATCH 114/160] [MNN:Refractor] Seperate the pad compute for convolution and deconvolution 2020-03-12 20:29:43 +08:00			`return std::make_pair(mPadX, mPadY);`
			`}`
			`auto mPadX = mCommon->padX();`
			`auto mPadY = mCommon->padY();`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`if (nullptr != mCommon->pads() && mCommon->pads()->size() >= 2) {`
[PATCH 114/160] [MNN:Refractor] Seperate the pad compute for convolution and deconvolution 2020-03-12 20:29:43 +08:00			`mPadX = mCommon->pads()->data()[1];`
			`mPadY = mCommon->pads()->data()[0];`
			`}`
			`return std::make_pair(mPadX, mPadY);`
			`}`
[MNN:Sync] Sync internal git 2021-01-06 16:29:37 +08:00
			`std::tuple<int, int, int, int> ConvolutionCommon::convolutionPadFull(const Tensor* input, const Tensor* output,`
			`const Convolution2DCommon* common) {`
			`auto pad = convolutionPad(input, output, common);`
			`int iw = input->width();`
			`int ih = input->height();`
			`int ow = output->width();`
			`int oh = output->height();`

			`int right = (ow - 1) * common->strideX() + (common->kernelX() - 1) * common->dilateX() - pad.first;`
			`int padRight = 0;`
			`if (right >= iw) {`
			`padRight = right - iw + 1;`
			`}`
			`int bottom = (oh - 1) * common->strideY() + (common->kernelY() - 1) * common->dilateY() - pad.second;`
			`int padBottom = 0;`
			`if (bottom >= ih) {`
			`padBottom = bottom - ih + 1;`
			`}`
			`return std::make_tuple(pad.first, pad.second, padRight, padBottom);`
			`}`

Github release 1.1.0 2020-11-05 16:41:56 +08:00			`std::pair<int, int> ConvolutionCommon::convolutionTransposePad(const Tensor input, const Tensor output,`
			`const Convolution2DCommon *mCommon) {`
[PATCH 114/160] [MNN:Refractor] Seperate the pad compute for convolution and deconvolution 2020-03-12 20:29:43 +08:00			`if (mCommon->padMode() == PadMode_SAME) {`
			`const int outputWidth = output->width();`
			`const int outputHeight = output->height();`

			`const int outputWidthPadded = (input->width() - 1) * mCommon->strideX() + mCommon->kernelX();`
			`const int outputHeightPadded = (input->height() - 1) * mCommon->strideY() + mCommon->kernelY();`

			`const int padNeededWidth = outputWidthPadded - outputWidth;`
			`const int padNeededHeight = outputHeightPadded - outputHeight;`

			`auto mPadX = padNeededWidth / 2;`
			`auto mPadY = padNeededHeight / 2;`
			`return std::make_pair(mPadX, mPadY);`
			`}`
			`auto mPadX = mCommon->padX();`
			`auto mPadY = mCommon->padY();`
Synchronize internal github for version 1.2.0 (#1518) 2021-06-11 17:17:13 +08:00			`if (nullptr != mCommon->pads() && mCommon->pads()->size() >= 2) {`
[PATCH 114/160] [MNN:Refractor] Seperate the pad compute for convolution and deconvolution 2020-03-12 20:29:43 +08:00			`mPadY = mCommon->pads()->data()[0];`
			`mPadX = mCommon->pads()->data()[1];`
			`}`
			`return std::make_pair(mPadX, mPadY);`
			`}`

Github release 1.1.0 2020-11-05 16:41:56 +08:00			`} // namespace MNN`