Compare commits

..

2 Commits

Author SHA1 Message Date
Bin Peng 3115c0c0e6 lavc/aarch64: Fix addp overflow in ff_pred16x16_plane_neon_10
The mismatch between neon and C functions can be reproduced
using the following bitstream and command line on aarch64 devices.

wget https://streams.videolan.org/ffmpeg/incoming/replay_intra_pred_16x16.h264
 ./ffmpeg -cpuflags 0  -threads 1 -i replay_intra_pred_16x16.h264  -f framemd5 -y md5_ref
 ./ffmpeg              -threads 1 -i replay_intra_pred_16x16.h264 -f framemd5 -y md5_neon

Signed-off-by: Bin Peng <pengbin@visionular.com>
2025-10-24 15:32:35 +00:00
Andreas Rheinhardt 7e8ef2ded2 configure: Add mxpeg->hpeldsp dependency
Forgotten in 124c856d38.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-10-24 12:56:24 +02:00
2 changed files with 7 additions and 8 deletions

2
configure vendored
View File

@ -3126,7 +3126,7 @@ mts2_decoder_select="jpegtables mss34dsp"
mv30_decoder_select="aandcttables blockdsp"
mvha_decoder_select="inflate_wrapper llviddsp"
mwsc_decoder_select="inflate_wrapper"
mxpeg_decoder_select="mjpeg_decoder"
mxpeg_decoder_select="hpeldsp mjpeg_decoder"
nellymoser_decoder_select="sinewin"
nellymoser_encoder_select="audio_frame_queue sinewin"
notchlc_decoder_select="lzf"

View File

@ -489,10 +489,10 @@ function ff_pred16x16_plane_neon_10, export=1
mul v2.8h, v2.8h, v0.8h
mul v3.8h, v3.8h, v0.8h
addp v2.8h, v2.8h, v3.8h
addp v2.8h, v2.8h, v2.8h
addp v2.4h, v2.4h, v2.4h
sshll v3.4s, v2.4h, #2
saddw v2.4s, v3.4s, v2.4h
saddlp v2.4s, v2.8h
addp v2.4s, v2.4s, v2.4s
shl v3.4s, v2.4s, #2
add v2.4s, v3.4s, v2.4s
rshrn v4.4h, v2.4s, #6
trn2 v5.4h, v4.4h, v4.4h
add v2.4h, v4.4h, v5.4h
@ -506,14 +506,13 @@ function ff_pred16x16_plane_neon_10, export=1
sxtl v6.4s, v5.4h // c
mov v0.h[0], wzr
mul v0.8h, v0.8h, v4.h[0]
dup v16.4s, v2.s[0]
dup v17.4s, v2.s[0]
dup v2.8h, v4.h[0] // b
dup v3.4s, v6.s[0] // c
sshll v2.4s, v2.4h, #3 // b * 8
saddw v16.4s, v16.4s, v0.4h
saddw2 v17.4s, v17.4s, v0.8h
smlal v16.4s, v0.4h, v4.h[0]
smlal2 v17.4s, v0.8h, v4.h[0]
sub v3.4s, v3.4s, v2.4s
mov w3, #16