Compare commits

...

9 Commits

Author SHA1 Message Date
Andreas Rheinhardt 8e90f150eb avcodec/decode: Optimize lcevc away if disabled
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-02 16:50:36 +01:00
Andreas Rheinhardt 63685709b9 avcodec/decode: Mark init,close functions as av_cold
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-02 16:49:26 +01:00
Andreas Rheinhardt 2786e5a9ad avcodec/decode: Put lcevc fields into structure of their own
Makes it easier to see that width and height in DecodeContext is
actually a lcevc field.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-02 16:00:06 +01:00
Andreas Rheinhardt 182b9c7a4a avcodec/decode: Don't allocate LCEVC context for non-video
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-02 15:29:59 +01:00
Andreas Rheinhardt abf819cff6 avcodec/pthread_frame: Call ff_decode_internal_sync() only during init
It is not necessary to do it more than once, as none of the fields
set change after init.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-02 15:09:43 +01:00
Andreas Rheinhardt 1e0cd4b650 .gitignore: Add config_components.asm
Forgotten in c607aae2b9.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-02 13:10:58 +00:00
Andreas Rheinhardt a2914a1ec3 avcodec/x86/hpeldsp: Don't use PAVGB macro
It was only needed for MMX and there are no MMX functions here any more.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-02 12:05:52 +01:00
Andreas Rheinhardt c5e94a564c avcodec/x86/hpeldsp: Don't use saturated addition when unnecessary
The numbers here are small (sums of values unpacked from bytes).

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-02 12:05:52 +01:00
Andreas Rheinhardt 2be1b2ea96 avcodec/x86/hpeldsp: Actually use constants in registers
Forgotten in 36f92206bb.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-02 12:05:52 +01:00
6 changed files with 109 additions and 103 deletions

1
.gitignore vendored
View File

@ -36,6 +36,7 @@
/ffprobe
/config.asm
/config.h
/config_components.asm
/config_components.h
/coverage.info
/lcov/

View File

@ -47,7 +47,6 @@ OBJS = ac3_parser.o \
get_buffer.o \
imgconvert.o \
jni.o \
lcevcdec.o \
mathtables.o \
mediacodec.o \
mpeg12framerate.o \
@ -130,6 +129,7 @@ OBJS-$(CONFIG_IVIDSP) += ivi_dsp.o
OBJS-$(CONFIG_JNI) += ffjni.o jni.o
OBJS-$(CONFIG_JPEGTABLES) += jpegtables.o
OBJS-$(CONFIG_LCMS2) += fflcms2.o
OBJS-$(CONFIG_LIBLCEVC_DEC) += lcevcdec.o
OBJS-$(CONFIG_LLAUDDSP) += lossless_audiodsp.o
OBJS-$(CONFIG_LLVIDDSP) += lossless_videodsp.o
OBJS-$(CONFIG_LLVIDENCDSP) += lossless_videoencdsp.o

View File

@ -94,10 +94,14 @@ typedef struct DecodeContext {
*/
uint64_t side_data_pref_mask;
FFLCEVCContext *lcevc;
int lcevc_frame;
int width;
int height;
#if CONFIG_LIBLCEVC_DEC
struct {
FFLCEVCContext *ctx;
int frame;
int width;
int height;
} lcevc;
#endif
} DecodeContext;
static DecodeContext *decode_ctx(AVCodecInternal *avci)
@ -1658,26 +1662,29 @@ int ff_attach_decode_data(AVFrame *frame)
static void update_frame_props(AVCodecContext *avctx, AVFrame *frame)
{
#if CONFIG_LIBLCEVC_DEC
AVCodecInternal *avci = avctx->internal;
DecodeContext *dc = decode_ctx(avci);
dc->lcevc_frame = dc->lcevc && avctx->codec_type == AVMEDIA_TYPE_VIDEO &&
dc->lcevc.frame = dc->lcevc.ctx && avctx->codec_type == AVMEDIA_TYPE_VIDEO &&
av_frame_get_side_data(frame, AV_FRAME_DATA_LCEVC);
if (dc->lcevc_frame) {
dc->width = frame->width;
dc->height = frame->height;
if (dc->lcevc.frame) {
dc->lcevc.width = frame->width;
dc->lcevc.height = frame->height;
frame->width = frame->width * 2 / FFMAX(frame->sample_aspect_ratio.den, 1);
frame->height = frame->height * 2 / FFMAX(frame->sample_aspect_ratio.num, 1);
}
#endif
}
static int attach_post_process_data(AVCodecContext *avctx, AVFrame *frame)
{
#if CONFIG_LIBLCEVC_DEC
AVCodecInternal *avci = avctx->internal;
DecodeContext *dc = decode_ctx(avci);
if (dc->lcevc_frame) {
if (dc->lcevc.frame) {
FrameDecodeData *fdd = frame->private_ref;
FFLCEVCFrame *frame_ctx;
int ret;
@ -1692,13 +1699,13 @@ static int attach_post_process_data(AVCodecContext *avctx, AVFrame *frame)
return AVERROR(ENOMEM);
}
frame_ctx->lcevc = av_refstruct_ref(dc->lcevc);
frame_ctx->lcevc = av_refstruct_ref(dc->lcevc.ctx);
frame_ctx->frame->width = frame->width;
frame_ctx->frame->height = frame->height;
frame_ctx->frame->format = frame->format;
frame->width = dc->width;
frame->height = dc->height;
frame->width = dc->lcevc.width;
frame->height = dc->lcevc.height;
ret = avctx->get_buffer2(avctx, frame_ctx->frame, 0);
if (ret < 0) {
@ -1712,7 +1719,8 @@ static int attach_post_process_data(AVCodecContext *avctx, AVFrame *frame)
fdd->post_process_opaque_free = ff_lcevc_unref;
fdd->post_process = ff_lcevc_process;
}
dc->lcevc_frame = 0;
dc->lcevc.frame = 0;
#endif
return 0;
}
@ -1960,7 +1968,7 @@ static av_cold void progress_frame_pool_free_entry_cb(AVRefStructOpaque opaque,
av_frame_free(&progress->f);
}
int ff_decode_preinit(AVCodecContext *avctx)
av_cold int ff_decode_preinit(AVCodecContext *avctx)
{
AVCodecInternal *avci = avctx->internal;
DecodeContext *dc = decode_ctx(avci);
@ -2080,9 +2088,13 @@ int ff_decode_preinit(AVCodecContext *avctx)
return ret;
if (!(avctx->export_side_data & AV_CODEC_EXPORT_DATA_ENHANCEMENTS)) {
ret = ff_lcevc_alloc(&dc->lcevc);
if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
return ret;
if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
#if CONFIG_LIBLCEVC_DEC
ret = ff_lcevc_alloc(&dc->lcevc.ctx);
if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
return ret;
#endif
}
}
return 0;
@ -2292,7 +2304,7 @@ int ff_hwaccel_frame_priv_alloc(AVCodecContext *avctx, void **hwaccel_picture_pr
return 0;
}
void ff_decode_flush_buffers(AVCodecContext *avctx)
av_cold void ff_decode_flush_buffers(AVCodecContext *avctx)
{
AVCodecInternal *avci = avctx->internal;
DecodeContext *dc = decode_ctx(avci);
@ -2310,27 +2322,31 @@ void ff_decode_flush_buffers(AVCodecContext *avctx)
dc->draining_started = 0;
}
AVCodecInternal *ff_decode_internal_alloc(void)
av_cold AVCodecInternal *ff_decode_internal_alloc(void)
{
return av_mallocz(sizeof(DecodeContext));
}
void ff_decode_internal_sync(AVCodecContext *dst, const AVCodecContext *src)
av_cold void ff_decode_internal_sync(AVCodecContext *dst, const AVCodecContext *src)
{
const DecodeContext *src_dc = decode_ctx(src->internal);
DecodeContext *dst_dc = decode_ctx(dst->internal);
dst_dc->initial_pict_type = src_dc->initial_pict_type;
dst_dc->intra_only_flag = src_dc->intra_only_flag;
av_refstruct_replace(&dst_dc->lcevc, src_dc->lcevc);
#if CONFIG_LIBLCEVC_DEC
av_refstruct_replace(&dst_dc->lcevc.ctx, src_dc->lcevc.ctx);
#endif
}
void ff_decode_internal_uninit(AVCodecContext *avctx)
av_cold void ff_decode_internal_uninit(AVCodecContext *avctx)
{
#if CONFIG_LIBLCEVC_DEC
AVCodecInternal *avci = avctx->internal;
DecodeContext *dc = decode_ctx(avci);
av_refstruct_unref(&dc->lcevc);
av_refstruct_unref(&dc->lcevc.ctx);
#endif
}
static int attach_displaymatrix(AVCodecContext *avctx, AVFrame *frame, int orientation)

View File

@ -16,8 +16,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config_components.h"
#include "libavutil/avassert.h"
#include "libavutil/frame.h"
#include "libavutil/imgutils.h"
@ -28,7 +26,6 @@
#include "decode.h"
#include "lcevcdec.h"
#if CONFIG_LIBLCEVC_DEC
static LCEVC_ColorFormat map_format(int format)
{
switch (format) {
@ -249,11 +246,9 @@ static void lcevc_free(AVRefStructOpaque unused, void *obj)
LCEVC_DestroyDecoder(lcevc->decoder);
memset(lcevc, 0, sizeof(*lcevc));
}
#endif
static int lcevc_init(FFLCEVCContext *lcevc, void *logctx)
{
#if CONFIG_LIBLCEVC_DEC
LCEVC_AccelContextHandle dummy = { 0 };
const int32_t event = LCEVC_Log;
@ -272,7 +267,6 @@ static int lcevc_init(FFLCEVCContext *lcevc, void *logctx)
return AVERROR_EXTERNAL;
}
#endif
lcevc->initialized = 1;
return 0;
@ -291,7 +285,6 @@ int ff_lcevc_process(void *logctx, AVFrame *frame)
return ret;
}
#if CONFIG_LIBLCEVC_DEC
av_assert0(frame_ctx->frame);
@ -304,7 +297,6 @@ int ff_lcevc_process(void *logctx, AVFrame *frame)
return ret;
av_frame_remove_side_data(frame, AV_FRAME_DATA_LCEVC);
#endif
return 0;
}
@ -312,11 +304,9 @@ int ff_lcevc_process(void *logctx, AVFrame *frame)
int ff_lcevc_alloc(FFLCEVCContext **plcevc)
{
FFLCEVCContext *lcevc = NULL;
#if CONFIG_LIBLCEVC_DEC
lcevc = av_refstruct_alloc_ext(sizeof(*lcevc), 0, NULL, lcevc_free);
if (!lcevc)
return AVERROR(ENOMEM);
#endif
*plcevc = lcevc;
return 0;
}

View File

@ -401,7 +401,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
dst->hwaccel_flags = src->hwaccel_flags;
av_refstruct_replace(&dst->internal->pool, src->internal->pool);
ff_decode_internal_sync(dst, src);
}
if (for_user) {

View File

@ -54,8 +54,8 @@ cglobal put_pixels8_x2, 4,5
pavgb m0, m2
pavgb m1, m3
%else
PAVGB m0, [r1]
PAVGB m1, [r1+r2]
pavgb m0, [r1]
pavgb m1, [r1+r2]
%endif
mova [r0], m0
mova [r0+r2], m1
@ -69,8 +69,8 @@ cglobal put_pixels8_x2, 4,5
pavgb m0, m2
pavgb m1, m3
%else
PAVGB m0, [r1]
PAVGB m1, [r1+r2]
pavgb m0, [r1]
pavgb m1, [r1+r2]
%endif
add r1, r4
mova [r0], m0
@ -103,8 +103,8 @@ cglobal put_no_rnd_pixels8_x2, 4,5
add r1, r4
psubusb m0, m6
psubusb m2, m6
PAVGB m0, m1
PAVGB m2, m3
pavgb m0, m1
pavgb m2, m3
mova [r0], m0
mova [r0+r2], m2
mova m0, [r1]
@ -115,8 +115,8 @@ cglobal put_no_rnd_pixels8_x2, 4,5
add r1, r4
psubusb m0, m6
psubusb m2, m6
PAVGB m0, m1
PAVGB m2, m3
pavgb m0, m1
pavgb m2, m3
mova [r0], m0
mova [r0+r2], m2
add r0, r4
@ -143,8 +143,8 @@ cglobal %1_no_rnd_pixels8_x2_exact, 4,5
pxor m2, m4
pxor m1, m4
pxor m3, m4
PAVGB m0, m1
PAVGB m2, m3
pavgb m0, m1
pavgb m2, m3
pxor m0, m4
pxor m2, m4
%ifidn %1, avg
@ -161,8 +161,8 @@ cglobal %1_no_rnd_pixels8_x2_exact, 4,5
pxor m1, m4
pxor m2, m4
pxor m3, m4
PAVGB m0, m1
PAVGB m2, m3
pavgb m0, m1
pavgb m2, m3
pxor m0, m4
pxor m2, m4
%ifidn %1, avg
@ -198,16 +198,16 @@ cglobal put_pixels8_y2, 4,5
movu m1, [r1+r2]
movu m2, [r1+r4]
add r1, r4
PAVGB m0, m1
PAVGB m1, m2
pavgb m0, m1
pavgb m1, m2
mova [r0+r2], m0
mova [r0+r4], m1
movu m1, [r1+r2]
movu m0, [r1+r4]
add r0, r4
add r1, r4
PAVGB m2, m1
PAVGB m1, m0
pavgb m2, m1
pavgb m1, m0
mova [r0+r2], m2
mova [r0+r4], m1
add r0, r4
@ -235,8 +235,8 @@ cglobal put_no_rnd_pixels8_y2, 4,5
mova m2, [r1+r4]
add r1, r4
psubusb m1, m6
PAVGB m0, m1
PAVGB m1, m2
pavgb m0, m1
pavgb m1, m2
mova [r0+r2], m0
mova [r0+r4], m1
mova m1, [r1+r2]
@ -244,8 +244,8 @@ cglobal put_no_rnd_pixels8_y2, 4,5
add r0, r4
add r1, r4
psubusb m1, m6
PAVGB m2, m1
PAVGB m1, m0
pavgb m2, m1
pavgb m1, m0
mova [r0+r2], m2
mova [r0+r4], m1
add r0, r4
@ -271,8 +271,8 @@ cglobal %1_no_rnd_pixels8_y2_exact, 4,5
movu m2, [r1+r2]
pxor m1, m3
pxor m2, m3
PAVGB m0, m1
PAVGB m1, m2
pavgb m0, m1
pavgb m1, m2
pxor m0, m3
pxor m1, m3
%ifidn %1, avg
@ -285,8 +285,8 @@ cglobal %1_no_rnd_pixels8_y2_exact, 4,5
movu m0, [r1+r4]
pxor m1, m3
pxor m0, m3
PAVGB m2, m1
PAVGB m1, m0
pavgb m2, m1
pavgb m1, m0
pxor m2, m3
pxor m1, m3
%ifidn %1, avg
@ -325,11 +325,11 @@ cglobal avg_pixels8_x2, 4,5
pavgb m0, m1
pavgb m2, m3
%else
PAVGB m0, [r1+1], m3, m5
PAVGB m2, [r1+r2+1], m4, m5
pavgb m0, [r1+1]
pavgb m2, [r1+r2+1]
%endif
PAVGB m0, [r0], m3, m5
PAVGB m2, [r0+r2], m4, m5
pavgb m0, [r0]
pavgb m2, [r0+r2]
add r1, r4
mova [r0], m0
mova [r0+r2], m2
@ -341,13 +341,13 @@ cglobal avg_pixels8_x2, 4,5
pavgb m0, m1
pavgb m2, m3
%else
PAVGB m0, [r1+1], m3, m5
PAVGB m2, [r1+r2+1], m4, m5
pavgb m0, [r1+1]
pavgb m2, [r1+r2+1]
%endif
add r0, r4
add r1, r4
PAVGB m0, [r0], m3, m5
PAVGB m2, [r0+r2], m4, m5
pavgb m0, [r0]
pavgb m2, [r0+r2]
mova [r0], m0
mova [r0+r2], m2
add r0, r4
@ -377,20 +377,20 @@ cglobal avg_pixels8_y2, 4,5
movu m1, [r1+r2]
movu m2, [r1+r4]
add r1, r4
PAVGB m0, m1
PAVGB m1, m2
PAVGB m0, [r0+r2]
PAVGB m1, [r0+r4]
pavgb m0, m1
pavgb m1, m2
pavgb m0, [r0+r2]
pavgb m1, [r0+r4]
mova [r0+r2], m0
mova [r0+r4], m1
movu m1, [r1+r2]
movu m0, [r1+r4]
PAVGB m2, m1
PAVGB m1, m0
pavgb m2, m1
pavgb m1, m0
add r0, r4
add r1, r4
PAVGB m2, [r0+r2]
PAVGB m1, [r0+r4]
pavgb m2, [r0+r2]
pavgb m1, [r0+r4]
mova [r0+r2], m2
mova [r0+r4], m1
add r0, r4
@ -423,12 +423,12 @@ cglobal %1%3_pixels8_xy2, 4,5,5
punpcklbw m0, m1
pmaddubsw m0, m4
%ifidn %3, _no_rnd
paddusw m2, m3
paddusw m2, m0
paddw m2, m3
paddw m2, m0
psrlw m2, 2
%else
paddusw m2, m0
pmulhrsw m2, [pw_8192]
paddw m2, m0
pmulhrsw m2, m3
%endif
%ifidn %1, avg
movh m1, [r0+r4]
@ -445,12 +445,12 @@ cglobal %1%3_pixels8_xy2, 4,5,5
punpcklbw m2, m1
pmaddubsw m2, m4
%ifidn %3, _no_rnd
paddusw m0, m3
paddusw m0, m2
paddw m0, m3
paddw m0, m2
psrlw m0, 2
%else
paddusw m0, m2
pmulhrsw m0, [pw_8192]
paddw m0, m2
pmulhrsw m0, m3
%endif
%ifidn %1, avg
movh m1, [r0+r4]
@ -485,8 +485,8 @@ cglobal %1%3_pixels16_xy2, 4,5,8
punpcklbw m4, m7
punpckhbw m1, m7
punpckhbw m5, m7
paddusw m4, m0
paddusw m5, m1
paddw m4, m0
paddw m5, m1
xor r4, r4
add r1, r2
.loop:
@ -498,18 +498,18 @@ cglobal %1%3_pixels16_xy2, 4,5,8
punpcklbw m2, m7
punpckhbw m1, m7
punpckhbw m3, m7
paddusw m0, m2
paddusw m1, m3
paddusw m4, m6
paddusw m5, m6
paddusw m4, m0
paddusw m5, m1
paddw m0, m2
paddw m1, m3
paddw m4, m6
paddw m5, m6
paddw m4, m0
paddw m5, m1
psrlw m4, 2
psrlw m5, 2
%ifidn %1, avg
mova m3, [r0+r4]
packuswb m4, m5
PAVGB m4, m3
pavgb m4, m3
%else
packuswb m4, m5
%endif
@ -524,18 +524,18 @@ cglobal %1%3_pixels16_xy2, 4,5,8
punpcklbw m4, m7
punpckhbw m3, m7
punpckhbw m5, m7
paddusw m4, m2
paddusw m5, m3
paddusw m0, m6
paddusw m1, m6
paddusw m0, m4
paddusw m1, m5
paddw m4, m2
paddw m5, m3
paddw m0, m6
paddw m1, m6
paddw m0, m4
paddw m1, m5
psrlw m0, 2
psrlw m1, 2
%ifidn %1, avg
mova m3, [r0+r4]
packuswb m0, m1
PAVGB m0, m3
pavgb m0, m3
%else
packuswb m0, m1
%endif
@ -567,8 +567,8 @@ cglobal %1_pixels16_xy2, 4,5,%2
movu m3, [r1+r4+1]
pmaddubsw m2, m5
pmaddubsw m3, m5
paddusw m0, m2
paddusw m1, m3
paddw m0, m2
paddw m1, m3
pmulhrsw m0, [pw_8192]
pmulhrsw m1, [pw_8192]
%ifidn %1, avg
@ -587,8 +587,8 @@ cglobal %1_pixels16_xy2, 4,5,%2
movu m1, [r1+r4+1]
pmaddubsw m0, m5
pmaddubsw m1, m5
paddusw m2, m0
paddusw m3, m1
paddw m2, m0
paddw m3, m1
pmulhrsw m2, [pw_8192]
pmulhrsw m3, [pw_8192]
%ifidn %1, avg