mirror of https://github.com/alibaba/MNN.git
78 lines
2.4 KiB
C++
78 lines
2.4 KiB
C++
//
|
|
// CPULayerNorm.cpp
|
|
// MNN
|
|
//
|
|
// Created by MNN on 2020/07/15.
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
//
|
|
|
|
#include <cmath>
|
|
#include "backend/cpu/CPUDynamicQuant.hpp"
|
|
#include "backend/cpu/CPUBackend.hpp"
|
|
#include "backend/cpu/compute/CommonOptFunction.h"
|
|
#include "backend/cpu/compute/Int8FunctionsOpt.h"
|
|
#include "core/Execution.hpp"
|
|
#include "core/Concurrency.h"
|
|
#include "core/OpCommonUtils.hpp"
|
|
#include "MNN_generated.h"
|
|
namespace MNN {
|
|
#ifndef MNN_REDUCE_SIZE
|
|
|
|
CPUDynamicQuant::CPUDynamicQuant(const MNN::Op* op, Backend* backend) : Execution(backend) {
|
|
|
|
}
|
|
|
|
ErrorCode CPUDynamicQuant::onResize(const std::vector<Tensor*> &inputs,
|
|
const std::vector<Tensor*> &outputs) {
|
|
return NO_ERROR;
|
|
}
|
|
|
|
ErrorCode CPUDynamicQuant::onExecute(const std::vector<Tensor*> &inputs,
|
|
const std::vector<Tensor*> &outputs) {
|
|
auto core = static_cast<CPUBackend*>(backend())->functions();
|
|
auto int8core = static_cast<CPUBackend*>(backend())->int8Functions();
|
|
float *inputPtr = inputs[0]->host<float>();
|
|
int8_t *outputPtr = outputs[0]->host<int8_t>();
|
|
int size = static_cast<CPUBackend*>(backend())->getTensorSize(inputs[0]);
|
|
float quantScale = 0.f, dequantScale = 0.f, zeroPoint = 0.f;
|
|
float maxVal = 0.f, minVal = 0.f;
|
|
core->MNNCountMaxMinValue(inputPtr, &minVal, &maxVal, size);
|
|
// Compute scale and zero
|
|
float range = maxVal - minVal;
|
|
MNN_ASSERT(range != 0);
|
|
quantScale = 255.0f / range;
|
|
dequantScale = range / 255.0f;
|
|
zeroPoint = roundf(-(minVal * 255.f) / range) - 128.0f;
|
|
int pack = core->pack;
|
|
std::vector<float> qsVec(pack, quantScale);
|
|
int sizeDiv = UP_DIV(size, pack);
|
|
int8core->MNNFloat2Int8(inputPtr, outputPtr, sizeDiv, &quantScale, -128, 127, &zeroPoint, 0);
|
|
float* scale = outputs[1]->host<float>();
|
|
float* zeros = outputs[2]->host<float>();
|
|
*scale = dequantScale;
|
|
*zeros = zeroPoint;
|
|
|
|
return NO_ERROR;
|
|
}
|
|
|
|
|
|
|
|
CPUDynamicQuant::~CPUDynamicQuant() {
|
|
|
|
}
|
|
#endif
|
|
class CPUDynamicQuantCreator : public CPUBackend::Creator {
|
|
public:
|
|
Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, const MNN::Op* op, Backend* backend) const override {
|
|
#ifndef MNN_REDUCE_SIZE
|
|
return new CPUDynamicQuant(op, backend);
|
|
#else
|
|
return nullptr;
|
|
#endif
|
|
}
|
|
};
|
|
|
|
REGISTER_CPU_OP_CREATOR(CPUDynamicQuantCreator, OpType_DynamicQuant);
|
|
|
|
} // namespace MNN
|