Compare commits

...

4 Commits

Author SHA1 Message Date
Sean McGovern 35464ad9eb libavcodec/ppc: remove broken AltiVec acceleration for lossless audio
It fails sporadically on big- or little-endian POWER/PPC, as well as
triggers the undefined-behaviour & address scanners in GCC.
2025-11-12 14:40:14 -05:00
Niklas Haas 203c6a93d7 avutil/hwcontext_vulkan: actually limit number of queues
This option is parsed after it is used.
2025-11-12 15:25:07 +00:00
Andreas Rheinhardt 2452b81769 avcodec/h264idct_template: Deduplicate h264_{luma,chroma}_dc_dequant_idct
All the high bit depth functions of these types are identical.

Reviewed-by: Kacper Michajłow <kasper93@gmail.com>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-12 14:50:46 +01:00
Andreas Rheinhardt 1fafb13cd4 avcodec/bit_depth_template: Add PIXELSIZE
Sometimes functions for bit depth 9..16 are the same (because they
actually only depend on the underlying pixel type). The macros added
here allow to support this usecase.

Reviewed-by: Kacper Michajłow <kasper93@gmail.com>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-12 14:50:21 +01:00
9 changed files with 33 additions and 114 deletions

View File

@ -43,11 +43,13 @@
# undef FUNCC
# undef av_clip_pixel
# undef PIXEL_SPLAT_X4
# undef PIXELSIZE
#else
# define AVCODEC_BIT_DEPTH_TEMPLATE_C
#endif
#if BIT_DEPTH > 8
# define PIXELSIZE 16
# define pixel uint16_t
# define pixel2 uint32_t
# define pixel4 uint64_t
@ -76,6 +78,7 @@
# define av_clip_pixel(a) av_clip_uintp2(a, BIT_DEPTH)
# define CLIP(a) av_clip_uintp2(a, BIT_DEPTH)
#else
# define PIXELSIZE 8
# define pixel uint8_t
# define pixel2 uint16_t
# define pixel4 uint32_t
@ -100,6 +103,7 @@
#define FUNC2(a, b, c) FUNC3(a, b, c)
#define FUNC(a) FUNC2(a, BIT_DEPTH,)
#define FUNCC(a) FUNC2(a, BIT_DEPTH, _c)
#define FUNCC2(a) FUNC2(a, PIXELSIZE, _c)
#define FUNC4(a, b, c) a ## _int ## b ## _ ## c ## bit
#define FUNC5(a, b, c) FUNC4(a, b, c)
#define FUNC6(a) FUNC5(a, IN_IDCT_DEPTH, BIT_DEPTH)

View File

@ -69,14 +69,19 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
#undef FUNC
#define FUNC(a, depth) a ## _ ## depth ## _c
#define ADDPX_DSP(depth) \
#define SET_PIXSIZE_FUNCS(depth) \
c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
if (chroma_format_idc <= 1)\
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
else\
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
c->h264_add_pixels4_clear = FUNC(ff_h264_add_pixels4, depth);\
c->h264_add_pixels8_clear = FUNC(ff_h264_add_pixels8, depth)
if (bit_depth > 8 && bit_depth <= 16) {
ADDPX_DSP(16);
SET_PIXSIZE_FUNCS(16);
} else {
ADDPX_DSP(8);
SET_PIXSIZE_FUNCS(8);
}
#define H264_DSP(depth) \
@ -91,11 +96,6 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
else\
c->h264_idct_add8 = FUNC(ff_h264_idct_add8_422, depth);\
c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\
c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
if (chroma_format_idc <= 1)\
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
else\
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
\
c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16, depth);\
c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels8, depth);\

View File

@ -31,9 +31,6 @@ void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffse
void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[5 * 8]);\
void ff_h264_idct_add8_422_ ## depth ## _c(uint8_t **dest, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 * 8]);\
void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 * 8]);\
void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(int16_t *output, int16_t *input, int qmul);\
void ff_h264_chroma422_dc_dequant_idct_ ## depth ## _c(int16_t *block, int qmul);\
void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(int16_t *block, int qmul);
H264_IDCT( 8)
H264_IDCT( 9)
@ -41,4 +38,12 @@ H264_IDCT(10)
H264_IDCT(12)
H264_IDCT(14)
#define H264_IDCT2(pixsize) \
void ff_h264_luma_dc_dequant_idct_ ## pixsize ## _c(int16_t *output, int16_t *input, int qmul);\
void ff_h264_chroma422_dc_dequant_idct_ ## pixsize ## _c(int16_t *block, int qmul);\
void ff_h264_chroma_dc_dequant_idct_ ## pixsize ## _c(int16_t *block, int qmul);
H264_IDCT2( 8)
H264_IDCT2(16)
#endif /* AVCODEC_H264IDCT_H */

View File

@ -244,11 +244,13 @@ void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, int16
}
}
#if BIT_DEPTH == 8 || BIT_DEPTH == 9
/**
* IDCT transforms the 16 dc values and dequantizes them.
* @param qmul quantization parameter
*/
void FUNCC(ff_h264_luma_dc_dequant_idct)(int16_t *_output, int16_t *_input, int qmul){
void FUNCC2(ff_h264_luma_dc_dequant_idct)(int16_t *_output, int16_t *_input, int qmul)
{
#define stride 16
int i;
int temp[16];
@ -283,7 +285,8 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(int16_t *_output, int16_t *_input, int
#undef stride
}
void FUNCC(ff_h264_chroma422_dc_dequant_idct)(int16_t *_block, int qmul){
void FUNCC2(ff_h264_chroma422_dc_dequant_idct)(int16_t *_block, int qmul)
{
const int stride= 16*2;
const int xStride= 16;
int i;
@ -310,7 +313,8 @@ void FUNCC(ff_h264_chroma422_dc_dequant_idct)(int16_t *_block, int qmul){
}
}
void FUNCC(ff_h264_chroma_dc_dequant_idct)(int16_t *_block, int qmul){
void FUNCC2(ff_h264_chroma_dc_dequant_idct)(int16_t *_block, int qmul)
{
const int stride= 16*2;
const int xStride= 16;
SUINT a,b,c,d,e;
@ -331,3 +335,4 @@ void FUNCC(ff_h264_chroma_dc_dequant_idct)(int16_t *_block, int qmul){
block[stride*1 + xStride*0]= (int)((a-c)*qmul) >> 7;
block[stride*1 + xStride*1]= (int)((e-b)*qmul) >> 7;
}
#endif

View File

@ -61,8 +61,6 @@ av_cold void ff_llauddsp_init(LLAudDSPContext *c)
#if ARCH_ARM
ff_llauddsp_init_arm(c);
#elif ARCH_PPC
ff_llauddsp_init_ppc(c);
#elif ARCH_RISCV
ff_llauddsp_init_riscv(c);
#elif ARCH_X86

View File

@ -45,7 +45,6 @@ typedef struct LLAudDSPContext {
void ff_llauddsp_init(LLAudDSPContext *c);
void ff_llauddsp_init_arm(LLAudDSPContext *c);
void ff_llauddsp_init_ppc(LLAudDSPContext *c);
void ff_llauddsp_init_riscv(LLAudDSPContext *c);
void ff_llauddsp_init_x86(LLAudDSPContext *c);

View File

@ -21,7 +21,6 @@ OBJS-$(CONFIG_VP8DSP) += ppc/vp8dsp_altivec.o
# decoders/encoders
OBJS-$(CONFIG_HEVC_DECODER) += ppc/hevcdsp.o
OBJS-$(CONFIG_LLAUDDSP) += ppc/lossless_audiodsp_altivec.o
OBJS-$(CONFIG_MPEG4_DECODER) += ppc/mpeg4videodsp.o
OBJS-$(CONFIG_SVQ1_ENCODER) += ppc/svq1enc_altivec.o
OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o

View File

@ -1,91 +0,0 @@
/*
* Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/ppc/cpu.h"
#include "libavutil/ppc/util_altivec.h"
#include "libavcodec/lossless_audiodsp.h"
#if HAVE_BIGENDIAN
#define GET_T(tt0,tt1,src,a,b){ \
a = vec_ld(16, src); \
tt0 = vec_perm(b, a, align); \
b = vec_ld(32, src); \
tt1 = vec_perm(a, b, align); \
}
#else
#define GET_T(tt0,tt1,src,a,b){ \
tt0 = vec_vsx_ld(0, src); \
tt1 = vec_vsx_ld(16, src); \
}
#endif
#if HAVE_ALTIVEC
static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,
const int16_t *v2,
const int16_t *v3,
int order, int mul)
{
LOAD_ZERO;
vec_s16 *pv1 = (vec_s16 *) v1;
register vec_s16 muls = { mul, mul, mul, mul, mul, mul, mul, mul };
register vec_s16 t0, t1, i0, i1, i4, i2, i3;
register vec_s32 res = zero_s32v;
#if HAVE_BIGENDIAN
register vec_u8 align = vec_lvsl(0, v2);
i2 = vec_ld(0, v2);
i3 = vec_ld(0, v3);
#endif
int32_t ires;
order >>= 4;
do {
GET_T(t0,t1,v2,i1,i2);
i0 = pv1[0];
i1 = pv1[1];
res = vec_msum(t0, i0, res);
res = vec_msum(t1, i1, res);
GET_T(t0,t1,v3,i4,i3);
pv1[0] = vec_mladd(t0, muls, i0);
pv1[1] = vec_mladd(t1, muls, i1);
pv1 += 2;
v2 += 16;
v3 += 16;
} while (--order);
res = vec_splat(vec_sums(res, zero_s32v), 3);
vec_ste(res, 0, &ires);
return ires;
}
#endif /* HAVE_ALTIVEC */
av_cold void ff_llauddsp_init_ppc(LLAudDSPContext *c)
{
#if HAVE_ALTIVEC
if (!PPC_ALTIVEC(av_get_cpu_flags()))
return;
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec;
#endif /* HAVE_ALTIVEC */
}

View File

@ -1772,6 +1772,11 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
dev_info.pNext = p->feats.device.pNext;
dev_info.pEnabledFeatures = &p->feats.device.features;
/* Limit queues to a given number if needed */
opt_d = av_dict_get(opts, "limit_queues", NULL, 0);
if (opt_d)
p->limit_queues = strtol(opt_d->value, NULL, 10);
/* Setup enabled queue families */
if ((err = setup_queue_families(ctx, &dev_info)))
goto end;
@ -1799,11 +1804,6 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
if (opt_d)
p->use_linear_images = strtol(opt_d->value, NULL, 10);
/* Limit queues to a given number if needed */
opt_d = av_dict_get(opts, "limit_queues", NULL, 0);
if (opt_d)
p->limit_queues = strtol(opt_d->value, NULL, 10);
/* The disable_multiplane argument takes precedent over the option */
p->disable_multiplane = disable_multiplane;
if (!p->disable_multiplane) {