fix alpha div by zero bug and arm server compile bug

This commit is contained in:
tianhang.yth 2021-06-23 17:36:42 +08:00 committed by xiaying
parent 1a6cacc808
commit 4eb1096b9c
4 changed files with 49 additions and 38 deletions

View File

@ -97,21 +97,21 @@ loop_e16:
load_e16h4_end:
ldr w20, [x15], #4
mov v17.4s, v16.4s
mov v18.4s, v16.4s
mov v19.4s, v16.4s
mov v20.4s, v16.4s
mov v21.4s, v16.4s
mov v22.4s, v16.4s
mov v23.4s, v16.4s
mov v24.4s, v16.4s
mov v25.4s, v16.4s
mov v26.4s, v16.4s
mov v27.4s, v16.4s
mov v28.4s, v16.4s
mov v29.4s, v16.4s
mov v30.4s, v16.4s
mov v31.4s, v16.4s
mov v17.16b, v16.16b
mov v18.16b, v16.16b
mov v19.16b, v16.16b
mov v20.16b, v16.16b
mov v21.16b, v16.16b
mov v22.16b, v16.16b
mov v23.16b, v16.16b
mov v24.16b, v16.16b
mov v25.16b, v16.16b
mov v26.16b, v16.16b
mov v27.16b, v16.16b
mov v28.16b, v16.16b
mov v29.16b, v16.16b
mov v30.16b, v16.16b
mov v31.16b, v16.16b
cbz w20, loop_e16h4l1_end
loop_e16h4l1:
@ -382,13 +382,13 @@ beq loop_e4
load_e8h4_end:
ldr w20, [x15], #4
mov v17.4s, v16.4s
mov v18.4s, v16.4s
mov v19.4s, v16.4s
mov v20.4s, v16.4s
mov v21.4s, v16.4s
mov v22.4s, v16.4s
mov v23.4s, v16.4s
mov v17.16b, v16.16b
mov v18.16b, v16.16b
mov v19.16b, v16.16b
mov v20.16b, v16.16b
mov v21.16b, v16.16b
mov v22.16b, v16.16b
mov v23.16b, v16.16b
cbz w20, loop_e8h4l1_end
loop_e8h4l1:
@ -530,9 +530,9 @@ beq loop_e2
load_e4h4_end:
ldr w20, [x15], #4
mov v17.4s, v16.4s
mov v18.4s, v16.4s
mov v19.4s, v16.4s
mov v17.16b, v16.16b
mov v18.16b, v16.16b
mov v19.16b, v16.16b
cbz w20, loop_e4h4l1_end
loop_e4h4l1:
@ -644,7 +644,7 @@ beq loop_e1
load_e2h4_end:
ldr w20, [x15], #4
mov v17.4s, v16.4s
mov v17.16b, v16.16b
cbz w20, loop_e2h4l1_end
loop_e2h4l1:

View File

@ -67,7 +67,7 @@ static void MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, co
if (post != nullptr) {
dst_x[j] = MNNInt32ToInt8(dstTemp[j], bias_dz[j], scale_dz[j], post->maxValue, post->minValue);
} else {
dst_x[j] = dstTemp[j];
+ ((float*)dst_x)[j] = (float)(dstTemp[j] + bias_dz[j]);
}
}
}

View File

@ -6,6 +6,10 @@
// Copyright © 2018, Alibaba Group Holding Limited
//
#if __GNUC__ == 4
#pragma GCC optimize("-flax-vector-conversions")
#endif
#include <limits>
#include <vector>
#include <map>
@ -64,10 +68,10 @@ static void _sourceTransUnit4x4Pack4x4(const int8_t* srcStart, int8_t* dstStart,
};
for (int i = 0; i < 4; ++i) {
auto tmp = vreinterpretq_s32_s8(m[i].value);
vst1q_lane_s32(dstStart + 0 * dstZStep, tmp, 0);
vst1q_lane_s32(dstStart + 1 * dstZStep, tmp, 1);
vst1q_lane_s32(dstStart + 2 * dstZStep, tmp, 2);
vst1q_lane_s32(dstStart + 3 * dstZStep, tmp, 3);
vst1q_lane_s32((int32_t*)(dstStart + 0 * dstZStep), tmp, 0);
vst1q_lane_s32((int32_t*)(dstStart + 1 * dstZStep), tmp, 1);
vst1q_lane_s32((int32_t*)(dstStart + 2 * dstZStep), tmp, 2);
vst1q_lane_s32((int32_t*)(dstStart + 3 * dstZStep), tmp, 3);
dstStart += dstXStep;
}
dstStart -= dstXStep * 4;
@ -151,10 +155,10 @@ static void _sourceTransUnit4x4Pack16x4(const int8_t* srcStart, int8_t* dstStart
};
for (int i = 0; i < 4; ++i) {
auto tmp = vreinterpretq_s32_s8(m[i].value);
vst1q_lane_s32(dstStart + 0 * dstZStep, tmp, 0);
vst1q_lane_s32(dstStart + 1 * dstZStep, tmp, 1);
vst1q_lane_s32(dstStart + 2 * dstZStep, tmp, 2);
vst1q_lane_s32(dstStart + 3 * dstZStep, tmp, 3);
vst1q_lane_s32((int32_t*)(dstStart + 0 * dstZStep), tmp, 0);
vst1q_lane_s32((int32_t*)(dstStart + 1 * dstZStep), tmp, 1);
vst1q_lane_s32((int32_t*)(dstStart + 2 * dstZStep), tmp, 2);
vst1q_lane_s32((int32_t*)(dstStart + 3 * dstZStep), tmp, 3);
dstStart += dstXStep;
}
dstStart -= dstXStep * 4;

View File

@ -481,7 +481,10 @@ bool ConvolutionCommon::getConvInt8Parameters(const MNN::Convolution2D* conv2d,
const int8_t*& weight, float*& scale, int32_t*& bias,
float inputScale, float outputScale, int inputZeroPoint, int outputZeroPoint) {
int outputCount = conv2d->common()->outputCount();
weight = conv2d->symmetricQuan()->weight()->data();
// fix xcode UndefinedBehaviorSanitizer
if (conv2d->symmetricQuan()->weight() != nullptr) {
weight = conv2d->symmetricQuan()->weight()->data();
}
if (conv2d->quanParameter() && conv2d->quanParameter()->buffer()) {
quanCommon = ConvolutionCommon::load(conv2d->quanParameter(), false, true);
weight = quanCommon->weight.get();
@ -529,10 +532,14 @@ bool ConvolutionCommon::getConvInt8Parameters(const MNN::Convolution2D* conv2d,
auto alphaData = conv2d->quanParameter()->alpha()->data();
auto alphaScale = inputScale / outputScale;
for (int i = 0; i < outputCount; i++) {
scale[i] = alphaData[i] * alphaScale;
auto alphaValue = alphaData[i];
if (fabs(alphaValue) < 1e-6) {
alphaValue = 1e-6;
}
scale[i] = alphaValue * alphaScale;
// compute outputZeroPointFused in asymmetric quant
int outputZeroPointFused = static_cast<int32_t>(outputZeroPoint / scale[i]);
bias[i] = static_cast<int32_t>(biasData[i] / (inputScale * alphaData[i])) - remains[i] + outputZeroPointFused;
bias[i] = static_cast<int32_t>(biasData[i] / (inputScale * alphaValue)) - remains[i] + outputZeroPointFused;
}
return true;
}