libavcodec/ppc: remove broken AltiVec acceleration for lossless audio

It fails sporadically on big- or little-endian POWER/PPC, as well as triggers the undefined-behaviour & address scanners in GCC.
avutil/hwcontext_vulkan: actually limit number of queues
2025-11-12 14:40:14 -05:00 · 2025-11-12 15:25:07 +00:00 · 2025-11-12 14:50:46 +01:00 · 2025-11-12 14:50:21 +01:00
9 changed files with 33 additions and 114 deletions
--- a/libavcodec/bit_depth_template.c
+++ b/libavcodec/bit_depth_template.c
@ -43,11 +43,13 @@
 #   undef FUNCC
 #   undef av_clip_pixel
 #   undef PIXEL_SPLAT_X4
+#   undef PIXELSIZE
 #else
 #   define AVCODEC_BIT_DEPTH_TEMPLATE_C
 #endif

 #if BIT_DEPTH > 8
+#   define PIXELSIZE 16
 #   define pixel  uint16_t
 #   define pixel2 uint32_t
 #   define pixel4 uint64_t
@ -76,6 +78,7 @@
 #   define av_clip_pixel(a) av_clip_uintp2(a, BIT_DEPTH)
 #   define CLIP(a)          av_clip_uintp2(a, BIT_DEPTH)
 #else
+#   define PIXELSIZE 8
 #   define pixel  uint8_t
 #   define pixel2 uint16_t
 #   define pixel4 uint32_t
@ -100,6 +103,7 @@
 #define FUNC2(a, b, c)  FUNC3(a, b, c)
 #define FUNC(a)  FUNC2(a, BIT_DEPTH,)
 #define FUNCC(a) FUNC2(a, BIT_DEPTH, _c)
+#define FUNCC2(a) FUNC2(a, PIXELSIZE, _c)
 #define FUNC4(a, b, c)  a ## _int ## b ## _ ## c ## bit
 #define FUNC5(a, b, c)  FUNC4(a, b, c)
 #define FUNC6(a)  FUNC5(a, IN_IDCT_DEPTH, BIT_DEPTH)
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@ -69,14 +69,19 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
 #undef FUNC
 #define FUNC(a, depth) a ## _ ## depth ## _c

-#define ADDPX_DSP(depth) \
+#define SET_PIXSIZE_FUNCS(depth) \
+    c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
+    if (chroma_format_idc <= 1)\
+        c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
+    else\
+        c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
    c->h264_add_pixels4_clear = FUNC(ff_h264_add_pixels4, depth);\
    c->h264_add_pixels8_clear = FUNC(ff_h264_add_pixels8, depth)

    if (bit_depth > 8 && bit_depth <= 16) {
-        ADDPX_DSP(16);
+        SET_PIXSIZE_FUNCS(16);
    } else {
-        ADDPX_DSP(8);
+        SET_PIXSIZE_FUNCS(8);
    }

 #define H264_DSP(depth) \
@ -91,11 +96,6 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
    else\
        c->h264_idct_add8  = FUNC(ff_h264_idct_add8_422, depth);\
    c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\
-    c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
-    if (chroma_format_idc <= 1)\
-        c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
-    else\
-        c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
 \
    c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16, depth);\
    c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels8, depth);\
--- a/libavcodec/h264idct.h
+++ b/libavcodec/h264idct.h
@ -31,9 +31,6 @@ void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffse
 void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[5 * 8]);\
 void ff_h264_idct_add8_422_ ## depth ## _c(uint8_t **dest, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 * 8]);\
 void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 * 8]);\
-void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(int16_t *output, int16_t *input, int qmul);\
-void ff_h264_chroma422_dc_dequant_idct_ ## depth ## _c(int16_t *block, int qmul);\
-void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(int16_t *block, int qmul);

 H264_IDCT( 8)
 H264_IDCT( 9)
@ -41,4 +38,12 @@ H264_IDCT(10)
 H264_IDCT(12)
 H264_IDCT(14)

+#define H264_IDCT2(pixsize) \
+void ff_h264_luma_dc_dequant_idct_ ## pixsize ## _c(int16_t *output, int16_t *input, int qmul);\
+void ff_h264_chroma422_dc_dequant_idct_ ## pixsize ## _c(int16_t *block, int qmul);\
+void ff_h264_chroma_dc_dequant_idct_ ## pixsize ## _c(int16_t *block, int qmul);
+
+H264_IDCT2( 8)
+H264_IDCT2(16)
+
 #endif /* AVCODEC_H264IDCT_H */
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@ -244,11 +244,13 @@ void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, int16
    }
 }

+#if BIT_DEPTH == 8 || BIT_DEPTH == 9
 /**
 * IDCT transforms the 16 dc values and dequantizes them.
 * @param qmul quantization parameter
 */
-void FUNCC(ff_h264_luma_dc_dequant_idct)(int16_t *_output, int16_t *_input, int qmul){
+void FUNCC2(ff_h264_luma_dc_dequant_idct)(int16_t *_output, int16_t *_input, int qmul)
+{
 #define stride 16
    int i;
    int temp[16];
@ -283,7 +285,8 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(int16_t *_output, int16_t *_input, int
 #undef stride
 }

-void FUNCC(ff_h264_chroma422_dc_dequant_idct)(int16_t *_block, int qmul){
+void FUNCC2(ff_h264_chroma422_dc_dequant_idct)(int16_t *_block, int qmul)
+{
    const int stride= 16*2;
    const int xStride= 16;
    int i;
@ -310,7 +313,8 @@ void FUNCC(ff_h264_chroma422_dc_dequant_idct)(int16_t *_block, int qmul){
    }
 }

-void FUNCC(ff_h264_chroma_dc_dequant_idct)(int16_t *_block, int qmul){
+void FUNCC2(ff_h264_chroma_dc_dequant_idct)(int16_t *_block, int qmul)
+{
    const int stride= 16*2;
    const int xStride= 16;
    SUINT a,b,c,d,e;
@ -331,3 +335,4 @@ void FUNCC(ff_h264_chroma_dc_dequant_idct)(int16_t *_block, int qmul){
    block[stride*1 + xStride*0]= (int)((a-c)*qmul) >> 7;
    block[stride*1 + xStride*1]= (int)((e-b)*qmul) >> 7;
 }
+#endif
--- a/libavcodec/lossless_audiodsp.c
+++ b/libavcodec/lossless_audiodsp.c
@ -61,8 +61,6 @@ av_cold void ff_llauddsp_init(LLAudDSPContext *c)

 #if ARCH_ARM
    ff_llauddsp_init_arm(c);
-#elif ARCH_PPC
-    ff_llauddsp_init_ppc(c);
 #elif ARCH_RISCV
    ff_llauddsp_init_riscv(c);
 #elif ARCH_X86
--- a/libavcodec/lossless_audiodsp.h
+++ b/libavcodec/lossless_audiodsp.h
@ -45,7 +45,6 @@ typedef struct LLAudDSPContext {

 void ff_llauddsp_init(LLAudDSPContext *c);
 void ff_llauddsp_init_arm(LLAudDSPContext *c);
-void ff_llauddsp_init_ppc(LLAudDSPContext *c);
 void ff_llauddsp_init_riscv(LLAudDSPContext *c);
 void ff_llauddsp_init_x86(LLAudDSPContext *c);

--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@ -21,7 +21,6 @@ OBJS-$(CONFIG_VP8DSP)                  += ppc/vp8dsp_altivec.o

 # decoders/encoders
 OBJS-$(CONFIG_HEVC_DECODER)            += ppc/hevcdsp.o
-OBJS-$(CONFIG_LLAUDDSP)                += ppc/lossless_audiodsp_altivec.o
 OBJS-$(CONFIG_MPEG4_DECODER)           += ppc/mpeg4videodsp.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += ppc/svq1enc_altivec.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += ppc/vorbisdsp_altivec.o
--- a/libavcodec/ppc/lossless_audiodsp_altivec.c
+++ b/libavcodec/ppc/lossless_audiodsp_altivec.c
@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-
-#include "libavutil/attributes.h"
-#include "libavutil/cpu.h"
-#include "libavutil/ppc/cpu.h"
-#include "libavutil/ppc/util_altivec.h"
-
-#include "libavcodec/lossless_audiodsp.h"
-
-#if HAVE_BIGENDIAN
-#define GET_T(tt0,tt1,src,a,b){       \
-        a = vec_ld(16, src);          \
-        tt0 = vec_perm(b, a, align);  \
-        b = vec_ld(32, src);          \
-        tt1 = vec_perm(a, b, align);  \
- }
-#else
-#define GET_T(tt0,tt1,src,a,b){       \
-        tt0 = vec_vsx_ld(0, src);     \
-        tt1 = vec_vsx_ld(16, src);    \
- }
-#endif
-
-#if HAVE_ALTIVEC
-static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,
-                                                    const int16_t *v2,
-                                                    const int16_t *v3,
-                                                    int order, int mul)
-{
-    LOAD_ZERO;
-    vec_s16 *pv1 = (vec_s16 *) v1;
-    register vec_s16 muls = { mul, mul, mul, mul, mul, mul, mul, mul };
-    register vec_s16 t0, t1, i0, i1, i4, i2, i3;
-    register vec_s32 res = zero_s32v;
-#if HAVE_BIGENDIAN
-    register vec_u8 align = vec_lvsl(0, v2);
-    i2 = vec_ld(0, v2);
-    i3 = vec_ld(0, v3);
-#endif
-    int32_t ires;
-
-    order >>= 4;
-    do {
-        GET_T(t0,t1,v2,i1,i2);
-        i0     = pv1[0];
-        i1     = pv1[1];
-        res    = vec_msum(t0, i0, res);
-        res    = vec_msum(t1, i1, res);
-        GET_T(t0,t1,v3,i4,i3);
-        pv1[0] = vec_mladd(t0, muls, i0);
-        pv1[1] = vec_mladd(t1, muls, i1);
-        pv1   += 2;
-        v2    += 16;
-        v3    += 16;
-    } while (--order);
-    res = vec_splat(vec_sums(res, zero_s32v), 3);
-    vec_ste(res, 0, &ires);
-
-    return ires;
-}
-#endif /* HAVE_ALTIVEC */
-
-av_cold void ff_llauddsp_init_ppc(LLAudDSPContext *c)
-{
-#if HAVE_ALTIVEC
-    if (!PPC_ALTIVEC(av_get_cpu_flags()))
-        return;
-
-    c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec;
-#endif /* HAVE_ALTIVEC */
-}
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@ -1772,6 +1772,11 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
    dev_info.pNext = p->feats.device.pNext;
    dev_info.pEnabledFeatures = &p->feats.device.features;

+    /* Limit queues to a given number if needed */
+    opt_d = av_dict_get(opts, "limit_queues", NULL, 0);
+    if (opt_d)
+        p->limit_queues = strtol(opt_d->value, NULL, 10);
+
    /* Setup enabled queue families */
    if ((err = setup_queue_families(ctx, &dev_info)))
        goto end;
@ -1799,11 +1804,6 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
    if (opt_d)
        p->use_linear_images = strtol(opt_d->value, NULL, 10);

-    /* Limit queues to a given number if needed */
-    opt_d = av_dict_get(opts, "limit_queues", NULL, 0);
-    if (opt_d)
-        p->limit_queues = strtol(opt_d->value, NULL, 10);
-
    /* The disable_multiplane argument takes precedent over the option */
    p->disable_multiplane = disable_multiplane;
    if (!p->disable_multiplane) {
Author	SHA1	Message	Date
Sean McGovern	35464ad9eb	libavcodec/ppc: remove broken AltiVec acceleration for lossless audio It fails sporadically on big- or little-endian POWER/PPC, as well as triggers the undefined-behaviour & address scanners in GCC.	2025-11-12 14:40:14 -05:00
Niklas Haas	203c6a93d7	avutil/hwcontext_vulkan: actually limit number of queues This option is parsed after it is used.	2025-11-12 15:25:07 +00:00
Andreas Rheinhardt	2452b81769	avcodec/h264idct_template: Deduplicate h264_{luma,chroma}_dc_dequant_idct All the high bit depth functions of these types are identical. Reviewed-by: Kacper Michajłow <kasper93@gmail.com> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>	2025-11-12 14:50:46 +01:00
Andreas Rheinhardt	1fafb13cd4	avcodec/bit_depth_template: Add PIXELSIZE Sometimes functions for bit depth 9..16 are the same (because they actually only depend on the underlying pixel type). The macros added here allow to support this usecase. Reviewed-by: Kacper Michajłow <kasper93@gmail.com> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>	2025-11-12 14:50:21 +01:00