Compare commits

...

5 Commits

Author SHA1 Message Date
Rémi Denis-Courmont 7435a3180d riscv/cpu: add av_const for VLEN helpers
This is read from a read-only constant system register.
2025-11-07 08:43:23 +00:00
Rémi Denis-Courmont 39abb1ac94 pixblockdsp: avoid segments on R-V V diff_pixels_unaligned
On SpacemiT X86, before:
diff_pixels_unaligned_rvv_i32:                         250.2 ( 0.59x)
...after:
diff_pixels_unaligned_rvv_i32:                          56.9 ( 2.60x)
2025-11-07 08:43:23 +00:00
Rémi Denis-Courmont c17d304e1f pixblockdsp: avoid segments on R-V V get_pixels_unaligned
On SpacemiT X86, before:
get_pixels_unaligned_rvv_i32:                          172.4 ( 0.37x)
...after:
get_pixels_unaligned_rvv_i32:                           34.4 ( 1.84x)
2025-11-07 08:43:23 +00:00
Rémi Denis-Courmont e3b0d58394 Revert "lavc/pixblockdsp: rework R-V V get_pixels_unaligned"
The optimised version does not work if the stride is not a multiple 8,
which can occur as reproduce by vsynth3-asv1 and vsynth3-asv2 tests.

This reverts commit 02594c8c01.

Conflicts:
	libavcodec/riscv/pixblockdsp_init.c
	libavcodec/riscv/pixblockdsp_rvv.S
2025-11-07 08:43:23 +00:00
Michael Yang df1fd43db4 libavcodec/vulkan_encode_av1: fix non-monotonic DTS
Combine P-frame with following B-frame into a single packet with the
latter's order. Emit a tail packet with a show_existing_frame header
to show it at the correct PTS.
2025-11-06 22:06:42 +00:00
6 changed files with 153 additions and 41 deletions

View File

@ -65,15 +65,18 @@ av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c,
if ((cpu_flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) { if ((cpu_flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
c->diff_pixels = ff_diff_pixels_unaligned_rvv; c->diff_pixels = ff_diff_pixels_unaligned_rvv;
c->diff_pixels_unaligned = ff_diff_pixels_unaligned_rvv; c->diff_pixels_unaligned = ff_diff_pixels_unaligned_rvv;
}
if ((cpu_flags & AV_CPU_FLAG_RVV_I64) && ff_get_rv_vlenb() >= 16) {
if (!high_bit_depth) { if (!high_bit_depth) {
c->get_pixels = ff_get_pixels_8_rvv; c->get_pixels = ff_get_pixels_unaligned_8_rvv;
c->get_pixels_unaligned = ff_get_pixels_unaligned_8_rvv; c->get_pixels_unaligned = ff_get_pixels_unaligned_8_rvv;
} }
c->diff_pixels = ff_diff_pixels_rvv; if (cpu_flags & AV_CPU_FLAG_RVV_I64) {
if (!high_bit_depth)
c->get_pixels = ff_get_pixels_8_rvv;
c->diff_pixels = ff_diff_pixels_rvv;
}
} }
#endif #endif
#endif #endif

View File

@ -24,7 +24,6 @@ func ff_get_pixels_8_rvv, zve64x
lpad 0 lpad 0
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8 li t0, 8 * 8
1:
vlse64.v v16, (a1), a2 vlse64.v v16, (a1), a2
vsetvli zero, t0, e8, m4, ta, ma vsetvli zero, t0, e8, m4, ta, ma
vwcvtu.x.x.v v8, v16 vwcvtu.x.x.v v8, v16
@ -32,24 +31,24 @@ func ff_get_pixels_8_rvv, zve64x
ret ret
endfunc endfunc
func ff_get_pixels_unaligned_8_rvv, zve64x func ff_get_pixels_unaligned_8_rvv, zve32x
lpad 0 lpad 0
andi t1, a1, 7 li t2, 8
vsetivli zero, 8, e64, m4, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8 1:
beqz t1, 1b add t1, a1, a2
andi a1, a1, -8 vle8.v v16, (a1)
slli t2, t1, 3 addi t0, a0, 2 * 8
addi t1, a1, 8 vle8.v v17, (t1)
sub t3, t0, t2 addi t2, t2, -2
vlse64.v v16, (a1), a2
vlse64.v v24, (t1), a2
vsrl.vx v16, v16, t2
vsll.vx v24, v24, t3
vor.vv v16, v16, v24
vsetvli zero, t0, e8, m4, ta, ma
vwcvtu.x.x.v v8, v16 vwcvtu.x.x.v v8, v16
vwcvtu.x.x.v v9, v17
vse16.v v8, (a0) vse16.v v8, (a0)
add a1, t1, a2
vse16.v v9, (t0)
addi a0, t0, 2 * 8
bnez t2, 1b
ret ret
endfunc endfunc
@ -67,17 +66,18 @@ endfunc
func ff_diff_pixels_unaligned_rvv, zve32x func ff_diff_pixels_unaligned_rvv, zve32x
lpad 0 lpad 0
li t3, 8
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
vlsseg8e8.v v16, (a1), a3 1:
vlsseg8e8.v v24, (a2), a3 vle8.v v16, (a1)
add a1, a1, a3
vle8.v v24, (a2)
add a2, a2, a3
vwsubu.vv v8, v16, v24 vwsubu.vv v8, v16, v24
vwsubu.vv v9, v17, v25 addi t3, t3, -1
vwsubu.vv v10, v18, v26 vse16.v v8, (a0)
vwsubu.vv v11, v19, v27 addi a0, a0, 2 * 8
vwsubu.vv v12, v20, v28 bnez t3, 1b
vwsubu.vv v13, v21, v29
vwsubu.vv v14, v22, v30
vwsubu.vv v15, v23, v31
vsseg8e16.v v8, (a0)
ret ret
endfunc endfunc

View File

@ -464,6 +464,9 @@ static int vulkan_encode_output(AVCodecContext *avctx,
VkResult ret; VkResult ret;
FFVulkanEncodePicture *vp = base_pic->priv; FFVulkanEncodePicture *vp = base_pic->priv;
FFVulkanEncodeContext *ctx = avctx->priv_data; FFVulkanEncodeContext *ctx = avctx->priv_data;
FFHWBaseEncodeContext *base_ctx = &ctx->base;
AVPacket *pkt_ptr = pkt;
FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data; FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
uint32_t *query_data; uint32_t *query_data;
@ -513,20 +516,56 @@ static int vulkan_encode_output(AVCodecContext *avctx,
vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf); vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf);
} }
pkt->data = sd_buf->mapped_mem; if (vp->non_independent_frame) {
pkt->size = vp->slices_offset + /* base offset */ av_assert0(!ctx->prev_buf_ref);
query_data[0] /* secondary offset */ + size_t prev_buf_size = vp->slices_offset + query_data[0] + query_data[1];
query_data[1] /* size */; ctx->prev_buf_ref = vp->pkt_buf;
ctx->prev_buf_size = prev_buf_size;
vp->pkt_buf = NULL;
/* Move reference */ if (vp->tail_size) {
pkt->buf = vp->pkt_buf; if (base_ctx->tail_pkt->size)
vp->pkt_buf = NULL; return AVERROR_BUG;
ret = ff_get_encode_buffer(avctx, base_ctx->tail_pkt, vp->tail_size, 0);
if (ret < 0)
return ret;
memcpy(base_ctx->tail_pkt->data, vp->tail_data, vp->tail_size);
pkt_ptr = base_ctx->tail_pkt;
}
} else {
if (ctx->prev_buf_ref) {
FFVkBuffer *prev_sd_buf = (FFVkBuffer *)ctx->prev_buf_ref->data;
size_t prev_size = ctx->prev_buf_size;
size_t size = (vp->slices_offset + query_data[0] + query_data[1]);
ret = ff_get_encode_buffer(avctx, pkt, prev_size + size, 0);
if (ret < 0)
return ret;
memcpy(pkt->data, prev_sd_buf->mapped_mem, prev_size);
memcpy(pkt->data + prev_size, sd_buf->mapped_mem, size);
av_buffer_unref(&ctx->prev_buf_ref);
av_buffer_unref(&vp->pkt_buf);
} else {
pkt->data = sd_buf->mapped_mem;
pkt->size = vp->slices_offset + /* base offset */
query_data[0] /* secondary offset */ +
query_data[1] /* size */;
/* Move reference */
pkt->buf = vp->pkt_buf;
vp->pkt_buf = NULL;
}
}
av_log(avctx, AV_LOG_DEBUG, "Frame %"PRId64"/%"PRId64 " encoded\n", av_log(avctx, AV_LOG_DEBUG, "Frame %"PRId64"/%"PRId64 " encoded\n",
base_pic->display_order, base_pic->encode_order); base_pic->display_order, base_pic->encode_order);
return ff_hw_base_encode_set_output_property(&ctx->base, avctx, return ff_hw_base_encode_set_output_property(&ctx->base, avctx,
base_pic, pkt, base_pic, pkt_ptr,
ctx->codec->flags & VK_ENC_FLAG_NO_DELAY); ctx->codec->flags & VK_ENC_FLAG_NO_DELAY);
} }

View File

@ -57,6 +57,10 @@ typedef struct FFVulkanEncodePicture {
FFVkExecContext *exec; FFVkExecContext *exec;
AVBufferRef *pkt_buf; AVBufferRef *pkt_buf;
int slices_offset; int slices_offset;
int non_independent_frame;
char tail_data[16];
size_t tail_size;
} FFVulkanEncodePicture; } FFVulkanEncodePicture;
/** /**
@ -192,6 +196,9 @@ typedef struct FFVulkanEncodeContext {
FFVkExecPool enc_pool; FFVkExecPool enc_pool;
FFHWBaseEncodePicture *slots[32]; FFHWBaseEncodePicture *slots[32];
AVBufferRef *prev_buf_ref;
size_t prev_buf_size;
} FFVulkanEncodeContext; } FFVulkanEncodeContext;
#define VULKAN_ENCODE_COMMON_OPTIONS \ #define VULKAN_ENCODE_COMMON_OPTIONS \

View File

@ -80,6 +80,8 @@ typedef struct VulkanEncodeAV1Context {
AV1RawOBU seq_hdr_obu; AV1RawOBU seq_hdr_obu;
AV1RawOBU meta_cll_obu; AV1RawOBU meta_cll_obu;
AV1RawOBU meta_mastering_obu; AV1RawOBU meta_mastering_obu;
AV1RawOBU hidden_obu;
AV1RawOBU tail_obu;
VkVideoEncodeAV1ProfileInfoKHR profile; VkVideoEncodeAV1ProfileInfoKHR profile;
@ -172,7 +174,6 @@ static void set_name_slot(int slot, int *slot_indices, uint32_t allowed_idx, int
av_assert0(0); av_assert0(0);
} }
static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
VkVideoEncodeInfoKHR *encode_info) VkVideoEncodeInfoKHR *encode_info)
{ {
@ -542,6 +543,45 @@ static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
} }
} }
FFVulkanEncodePicture *vp = pic->priv;
vp->tail_size = 0;
vp->non_independent_frame = pic->encode_order < pic->display_order;
if (vp->non_independent_frame) {
AV1RawOBU *obu = &enc->hidden_obu;
AV1RawFrameHeader *fh = &obu->obu.frame_header;
/** hidden frame header */
memset(obu, 0, sizeof(*obu));
obu->header.obu_type = AV1_OBU_FRAME_HEADER;
obu->header.obu_has_size_field = 1;
fh->frame_type = AV1_FRAME_INTER;
fh->refresh_frame_flags = 1 << ap->slot;
fh->frame_width_minus_1 = base_ctx->surface_width - 1;
fh->frame_height_minus_1 = base_ctx->surface_height - 1;
fh->render_width_minus_1 = fh->frame_width_minus_1;
fh->render_height_minus_1 = fh->frame_height_minus_1;
memcpy(fh->loop_filter_ref_deltas, default_loop_filter_ref_deltas,
AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
obu = &enc->tail_obu;
fh = &obu->obu.frame_header;
/** tail frame header */
memset(obu, 0, sizeof(*obu));
obu->header.obu_type = AV1_OBU_FRAME_HEADER;
obu->header.obu_has_size_field = 1;
fh->show_existing_frame = 1;
fh->frame_to_show_map_idx = ap->slot != 0;
fh->frame_type = AV1_FRAME_INTER;
fh->frame_width_minus_1 = base_ctx->surface_width - 1;
fh->frame_height_minus_1 = base_ctx->surface_height - 1;
fh->render_width_minus_1 = fh->frame_width_minus_1;
fh->render_height_minus_1 = fh->frame_height_minus_1;
}
return 0; return 0;
} }
@ -1079,8 +1119,31 @@ static int write_extra_headers(AVCodecContext *avctx,
int err; int err;
VulkanEncodeAV1Context *enc = avctx->priv_data; VulkanEncodeAV1Context *enc = avctx->priv_data;
VulkanEncodeAV1Picture *ap = base_pic->codec_priv; VulkanEncodeAV1Picture *ap = base_pic->codec_priv;
FFVulkanEncodePicture *vp = base_pic->priv;
CodedBitstreamFragment *obu = &enc->current_access_unit; CodedBitstreamFragment *obu = &enc->current_access_unit;
if (vp->non_independent_frame) {
err = vulkan_encode_av1_add_obu(avctx, obu, AV1_OBU_FRAME_HEADER, &enc->hidden_obu);
if (err < 0)
goto fail;
// Only for tracking ref frame in context, not to be output
err = ff_cbs_write_fragment_data(enc->cbs, obu);
if (err < 0)
goto fail;
ff_cbs_fragment_reset(obu);
((CodedBitstreamAV1Context *)enc->cbs->priv_data)->seen_frame_header = 0;
err = vulkan_encode_av1_add_obu(avctx, obu, AV1_OBU_FRAME_HEADER, &enc->tail_obu);
if (err < 0)
goto fail;
err = vulkan_encode_av1_write_obu(avctx, vp->tail_data, &vp->tail_size, obu);
if (err < 0)
goto fail;
}
if (ap->units_needed & UNIT_MASTERING_DISPLAY) { if (ap->units_needed & UNIT_MASTERING_DISPLAY) {
err = vulkan_encode_av1_add_obu(avctx, obu, err = vulkan_encode_av1_add_obu(avctx, obu,
AV1_OBU_METADATA, AV1_OBU_METADATA,

View File

@ -45,7 +45,7 @@ static inline av_const bool ff_rv_zbb_support(void)
* Returns the vector size in bytes (always a power of two and at least 4). * Returns the vector size in bytes (always a power of two and at least 4).
* This is undefined behaviour if vectors are not implemented. * This is undefined behaviour if vectors are not implemented.
*/ */
static inline size_t ff_get_rv_vlenb(void) static inline av_const size_t ff_get_rv_vlenb(void)
{ {
size_t vlenb; size_t vlenb;
@ -61,7 +61,7 @@ static inline size_t ff_get_rv_vlenb(void)
* Checks that the vector bit-size is at least the given value. * Checks that the vector bit-size is at least the given value.
* This is potentially undefined behaviour if vectors are not implemented. * This is potentially undefined behaviour if vectors are not implemented.
*/ */
static inline bool ff_rv_vlen_least(unsigned int bits) static inline av_const bool ff_rv_vlen_least(unsigned int bits)
{ {
#ifdef __riscv_v_min_vlen #ifdef __riscv_v_min_vlen
if (bits <= __riscv_v_min_vlen) if (bits <= __riscv_v_min_vlen)