Compare commits

..

2 Commits

Author SHA1 Message Date
Bin Peng 3115c0c0e6 lavc/aarch64: Fix addp overflow in ff_pred16x16_plane_neon_10
The mismatch between neon and C functions can be reproduced
using the following bitstream and command line on aarch64 devices.

wget https://streams.videolan.org/ffmpeg/incoming/replay_intra_pred_16x16.h264
 ./ffmpeg -cpuflags 0  -threads 1 -i replay_intra_pred_16x16.h264  -f framemd5 -y md5_ref
 ./ffmpeg              -threads 1 -i replay_intra_pred_16x16.h264 -f framemd5 -y md5_neon

Signed-off-by: Bin Peng <pengbin@visionular.com>
2025-10-24 15:32:35 +00:00
Andreas Rheinhardt 7e8ef2ded2 configure: Add mxpeg->hpeldsp dependency
Forgotten in 124c856d38.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-10-24 12:56:24 +02:00
2 changed files with 7 additions and 8 deletions

2
configure vendored
View File

@ -3126,7 +3126,7 @@ mts2_decoder_select="jpegtables mss34dsp"
mv30_decoder_select="aandcttables blockdsp" mv30_decoder_select="aandcttables blockdsp"
mvha_decoder_select="inflate_wrapper llviddsp" mvha_decoder_select="inflate_wrapper llviddsp"
mwsc_decoder_select="inflate_wrapper" mwsc_decoder_select="inflate_wrapper"
mxpeg_decoder_select="mjpeg_decoder" mxpeg_decoder_select="hpeldsp mjpeg_decoder"
nellymoser_decoder_select="sinewin" nellymoser_decoder_select="sinewin"
nellymoser_encoder_select="audio_frame_queue sinewin" nellymoser_encoder_select="audio_frame_queue sinewin"
notchlc_decoder_select="lzf" notchlc_decoder_select="lzf"

View File

@ -489,10 +489,10 @@ function ff_pred16x16_plane_neon_10, export=1
mul v2.8h, v2.8h, v0.8h mul v2.8h, v2.8h, v0.8h
mul v3.8h, v3.8h, v0.8h mul v3.8h, v3.8h, v0.8h
addp v2.8h, v2.8h, v3.8h addp v2.8h, v2.8h, v3.8h
addp v2.8h, v2.8h, v2.8h saddlp v2.4s, v2.8h
addp v2.4h, v2.4h, v2.4h addp v2.4s, v2.4s, v2.4s
sshll v3.4s, v2.4h, #2 shl v3.4s, v2.4s, #2
saddw v2.4s, v3.4s, v2.4h add v2.4s, v3.4s, v2.4s
rshrn v4.4h, v2.4s, #6 rshrn v4.4h, v2.4s, #6
trn2 v5.4h, v4.4h, v4.4h trn2 v5.4h, v4.4h, v4.4h
add v2.4h, v4.4h, v5.4h add v2.4h, v4.4h, v5.4h
@ -506,14 +506,13 @@ function ff_pred16x16_plane_neon_10, export=1
sxtl v6.4s, v5.4h // c sxtl v6.4s, v5.4h // c
mov v0.h[0], wzr mov v0.h[0], wzr
mul v0.8h, v0.8h, v4.h[0]
dup v16.4s, v2.s[0] dup v16.4s, v2.s[0]
dup v17.4s, v2.s[0] dup v17.4s, v2.s[0]
dup v2.8h, v4.h[0] // b dup v2.8h, v4.h[0] // b
dup v3.4s, v6.s[0] // c dup v3.4s, v6.s[0] // c
sshll v2.4s, v2.4h, #3 // b * 8 sshll v2.4s, v2.4h, #3 // b * 8
saddw v16.4s, v16.4s, v0.4h smlal v16.4s, v0.4h, v4.h[0]
saddw2 v17.4s, v17.4s, v0.8h smlal2 v17.4s, v0.8h, v4.h[0]
sub v3.4s, v3.4s, v2.4s sub v3.4s, v3.4s, v2.4s
mov w3, #16 mov w3, #16