2019-07-11 13:56:52 +08:00
|
|
|
//
|
|
|
|
|
// MNNInt8ScaleToFloat.S
|
|
|
|
|
// MNN
|
|
|
|
|
//
|
|
|
|
|
// Created by MNN on 2019/06/15.
|
|
|
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
#ifdef __aarch64__
|
|
|
|
|
|
|
|
|
|
#include "MNNAsmGlobal.h"
|
|
|
|
|
|
|
|
|
|
.text
|
|
|
|
|
.align 5
|
|
|
|
|
|
|
|
|
|
asm_function MNNInt8ScaleToFloat
|
|
|
|
|
|
|
|
|
|
// void MNNInt8ScaleToFloat(float* dst,
|
2021-04-08 15:34:23 +08:00
|
|
|
// const int8_t* src, const float* scale, size_t size, ssize_t zeroPoint)
|
2019-07-11 13:56:52 +08:00
|
|
|
|
|
|
|
|
// Auto Load:
|
2021-04-08 15:34:23 +08:00
|
|
|
// x0: dst*, x1: src*, x2: scale*, x3: size, x4: zeroPoint
|
|
|
|
|
|
|
|
|
|
// copy zero point
|
|
|
|
|
mov v28.s[0], w4
|
|
|
|
|
mov v28.s[1], w4
|
|
|
|
|
mov v28.s[2], w4
|
|
|
|
|
mov v28.s[3], w4
|
|
|
|
|
scvtf v28.4s, v28.4s
|
2019-07-11 13:56:52 +08:00
|
|
|
|
|
|
|
|
cmp x3, #0
|
|
|
|
|
beq End
|
|
|
|
|
|
|
|
|
|
ld1 {v16.4s}, [x2]
|
|
|
|
|
|
|
|
|
|
L4:
|
|
|
|
|
cmp x3, #4
|
|
|
|
|
blt L1
|
|
|
|
|
|
|
|
|
|
L4Loop:
|
|
|
|
|
ld1 {v17.16b}, [x1], #16
|
|
|
|
|
sub x3, x3, #4
|
|
|
|
|
sxtl v18.8h, v17.8b
|
|
|
|
|
sxtl2 v19.8h, v17.16b
|
|
|
|
|
|
|
|
|
|
sxtl v0.4s, v18.4h
|
|
|
|
|
sxtl2 v1.4s, v18.8h
|
|
|
|
|
sxtl v2.4s, v19.4h
|
|
|
|
|
sxtl2 v3.4s, v19.8h
|
|
|
|
|
scvtf v4.4s, v0.4s
|
|
|
|
|
scvtf v5.4s, v1.4s
|
|
|
|
|
scvtf v6.4s, v2.4s
|
2021-04-08 15:34:23 +08:00
|
|
|
fsub v4.4s, v4.4s, v28.4s
|
|
|
|
|
fsub v5.4s, v5.4s, v28.4s
|
2019-07-11 13:56:52 +08:00
|
|
|
fmul v0.4s, v4.4s, v16.4s
|
|
|
|
|
fmul v1.4s, v5.4s, v16.4s
|
|
|
|
|
scvtf v7.4s, v3.4s
|
2021-04-08 15:34:23 +08:00
|
|
|
fsub v6.4s, v6.4s, v28.4s
|
2019-07-11 13:56:52 +08:00
|
|
|
fmul v2.4s, v6.4s, v16.4s
|
|
|
|
|
st1 {v0.4s, v1.4s}, [x0], #32
|
2021-04-08 15:34:23 +08:00
|
|
|
fsub v7.4s, v7.4s, v28.4s
|
2019-07-11 13:56:52 +08:00
|
|
|
fmul v3.4s, v7.4s, v16.4s
|
|
|
|
|
cmp x3, #4
|
|
|
|
|
st1 {v2.4s, v3.4s}, [x0], #32
|
|
|
|
|
bge L4Loop
|
|
|
|
|
L1:
|
|
|
|
|
cmp x3, #0
|
|
|
|
|
beq End
|
|
|
|
|
|
|
|
|
|
L1Loop:
|
|
|
|
|
ld1 {v17.s}[0], [x1], #4
|
|
|
|
|
subs x3, x3, #1
|
|
|
|
|
sxtl v0.8h, v17.8b
|
|
|
|
|
sxtl v1.4s, v0.4h
|
|
|
|
|
scvtf v2.4s, v1.4s
|
2021-04-08 15:34:23 +08:00
|
|
|
fsub v2.4s, v2.4s, v28.4s
|
2019-07-11 13:56:52 +08:00
|
|
|
fmul v1.4s, v2.4s, v16.4s
|
|
|
|
|
st1 {v1.4s}, [x0], #16
|
|
|
|
|
|
|
|
|
|
bne L1Loop
|
|
|
|
|
|
|
|
|
|
End:
|
|
|
|
|
|
|
|
|
|
ret
|
|
|
|
|
#endif
|