Compare commits

...

5 Commits

Author SHA1 Message Date
Rémi Denis-Courmont 7435a3180d riscv/cpu: add av_const for VLEN helpers
This is read from a read-only constant system register.
2025-11-07 08:43:23 +00:00
Rémi Denis-Courmont 39abb1ac94 pixblockdsp: avoid segments on R-V V diff_pixels_unaligned
On SpacemiT X86, before:
diff_pixels_unaligned_rvv_i32:                         250.2 ( 0.59x)
...after:
diff_pixels_unaligned_rvv_i32:                          56.9 ( 2.60x)
2025-11-07 08:43:23 +00:00
Rémi Denis-Courmont c17d304e1f pixblockdsp: avoid segments on R-V V get_pixels_unaligned
On SpacemiT X86, before:
get_pixels_unaligned_rvv_i32:                          172.4 ( 0.37x)
...after:
get_pixels_unaligned_rvv_i32:                           34.4 ( 1.84x)
2025-11-07 08:43:23 +00:00
Rémi Denis-Courmont e3b0d58394 Revert "lavc/pixblockdsp: rework R-V V get_pixels_unaligned"
The optimised version does not work if the stride is not a multiple 8,
which can occur as reproduce by vsynth3-asv1 and vsynth3-asv2 tests.

This reverts commit 02594c8c01.

Conflicts:
	libavcodec/riscv/pixblockdsp_init.c
	libavcodec/riscv/pixblockdsp_rvv.S
2025-11-07 08:43:23 +00:00
Michael Yang df1fd43db4 libavcodec/vulkan_encode_av1: fix non-monotonic DTS
Combine P-frame with following B-frame into a single packet with the
latter's order. Emit a tail packet with a show_existing_frame header
to show it at the correct PTS.
2025-11-06 22:06:42 +00:00
6 changed files with 153 additions and 41 deletions

View File

@ -65,16 +65,19 @@ av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c,
if ((cpu_flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
c->diff_pixels = ff_diff_pixels_unaligned_rvv;
c->diff_pixels_unaligned = ff_diff_pixels_unaligned_rvv;
}
if ((cpu_flags & AV_CPU_FLAG_RVV_I64) && ff_get_rv_vlenb() >= 16) {
if (!high_bit_depth) {
c->get_pixels = ff_get_pixels_8_rvv;
c->get_pixels = ff_get_pixels_unaligned_8_rvv;
c->get_pixels_unaligned = ff_get_pixels_unaligned_8_rvv;
}
if (cpu_flags & AV_CPU_FLAG_RVV_I64) {
if (!high_bit_depth)
c->get_pixels = ff_get_pixels_8_rvv;
c->diff_pixels = ff_diff_pixels_rvv;
}
}
#endif
#endif
}

View File

@ -24,7 +24,6 @@ func ff_get_pixels_8_rvv, zve64x
lpad 0
vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8
1:
vlse64.v v16, (a1), a2
vsetvli zero, t0, e8, m4, ta, ma
vwcvtu.x.x.v v8, v16
@ -32,24 +31,24 @@ func ff_get_pixels_8_rvv, zve64x
ret
endfunc
func ff_get_pixels_unaligned_8_rvv, zve64x
func ff_get_pixels_unaligned_8_rvv, zve32x
lpad 0
andi t1, a1, 7
vsetivli zero, 8, e64, m4, ta, ma
li t0, 8 * 8
beqz t1, 1b
andi a1, a1, -8
slli t2, t1, 3
addi t1, a1, 8
sub t3, t0, t2
vlse64.v v16, (a1), a2
vlse64.v v24, (t1), a2
vsrl.vx v16, v16, t2
vsll.vx v24, v24, t3
vor.vv v16, v16, v24
vsetvli zero, t0, e8, m4, ta, ma
li t2, 8
vsetivli zero, 8, e8, mf2, ta, ma
1:
add t1, a1, a2
vle8.v v16, (a1)
addi t0, a0, 2 * 8
vle8.v v17, (t1)
addi t2, t2, -2
vwcvtu.x.x.v v8, v16
vwcvtu.x.x.v v9, v17
vse16.v v8, (a0)
add a1, t1, a2
vse16.v v9, (t0)
addi a0, t0, 2 * 8
bnez t2, 1b
ret
endfunc
@ -67,17 +66,18 @@ endfunc
func ff_diff_pixels_unaligned_rvv, zve32x
lpad 0
li t3, 8
vsetivli zero, 8, e8, mf2, ta, ma
vlsseg8e8.v v16, (a1), a3
vlsseg8e8.v v24, (a2), a3
1:
vle8.v v16, (a1)
add a1, a1, a3
vle8.v v24, (a2)
add a2, a2, a3
vwsubu.vv v8, v16, v24
vwsubu.vv v9, v17, v25
vwsubu.vv v10, v18, v26
vwsubu.vv v11, v19, v27
vwsubu.vv v12, v20, v28
vwsubu.vv v13, v21, v29
vwsubu.vv v14, v22, v30
vwsubu.vv v15, v23, v31
vsseg8e16.v v8, (a0)
addi t3, t3, -1
vse16.v v8, (a0)
addi a0, a0, 2 * 8
bnez t3, 1b
ret
endfunc

View File

@ -464,6 +464,9 @@ static int vulkan_encode_output(AVCodecContext *avctx,
VkResult ret;
FFVulkanEncodePicture *vp = base_pic->priv;
FFVulkanEncodeContext *ctx = avctx->priv_data;
FFHWBaseEncodeContext *base_ctx = &ctx->base;
AVPacket *pkt_ptr = pkt;
FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
uint32_t *query_data;
@ -513,6 +516,40 @@ static int vulkan_encode_output(AVCodecContext *avctx,
vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf);
}
if (vp->non_independent_frame) {
av_assert0(!ctx->prev_buf_ref);
size_t prev_buf_size = vp->slices_offset + query_data[0] + query_data[1];
ctx->prev_buf_ref = vp->pkt_buf;
ctx->prev_buf_size = prev_buf_size;
vp->pkt_buf = NULL;
if (vp->tail_size) {
if (base_ctx->tail_pkt->size)
return AVERROR_BUG;
ret = ff_get_encode_buffer(avctx, base_ctx->tail_pkt, vp->tail_size, 0);
if (ret < 0)
return ret;
memcpy(base_ctx->tail_pkt->data, vp->tail_data, vp->tail_size);
pkt_ptr = base_ctx->tail_pkt;
}
} else {
if (ctx->prev_buf_ref) {
FFVkBuffer *prev_sd_buf = (FFVkBuffer *)ctx->prev_buf_ref->data;
size_t prev_size = ctx->prev_buf_size;
size_t size = (vp->slices_offset + query_data[0] + query_data[1]);
ret = ff_get_encode_buffer(avctx, pkt, prev_size + size, 0);
if (ret < 0)
return ret;
memcpy(pkt->data, prev_sd_buf->mapped_mem, prev_size);
memcpy(pkt->data + prev_size, sd_buf->mapped_mem, size);
av_buffer_unref(&ctx->prev_buf_ref);
av_buffer_unref(&vp->pkt_buf);
} else {
pkt->data = sd_buf->mapped_mem;
pkt->size = vp->slices_offset + /* base offset */
query_data[0] /* secondary offset */ +
@ -521,12 +558,14 @@ static int vulkan_encode_output(AVCodecContext *avctx,
/* Move reference */
pkt->buf = vp->pkt_buf;
vp->pkt_buf = NULL;
}
}
av_log(avctx, AV_LOG_DEBUG, "Frame %"PRId64"/%"PRId64 " encoded\n",
base_pic->display_order, base_pic->encode_order);
return ff_hw_base_encode_set_output_property(&ctx->base, avctx,
base_pic, pkt,
base_pic, pkt_ptr,
ctx->codec->flags & VK_ENC_FLAG_NO_DELAY);
}

View File

@ -57,6 +57,10 @@ typedef struct FFVulkanEncodePicture {
FFVkExecContext *exec;
AVBufferRef *pkt_buf;
int slices_offset;
int non_independent_frame;
char tail_data[16];
size_t tail_size;
} FFVulkanEncodePicture;
/**
@ -192,6 +196,9 @@ typedef struct FFVulkanEncodeContext {
FFVkExecPool enc_pool;
FFHWBaseEncodePicture *slots[32];
AVBufferRef *prev_buf_ref;
size_t prev_buf_size;
} FFVulkanEncodeContext;
#define VULKAN_ENCODE_COMMON_OPTIONS \

View File

@ -80,6 +80,8 @@ typedef struct VulkanEncodeAV1Context {
AV1RawOBU seq_hdr_obu;
AV1RawOBU meta_cll_obu;
AV1RawOBU meta_mastering_obu;
AV1RawOBU hidden_obu;
AV1RawOBU tail_obu;
VkVideoEncodeAV1ProfileInfoKHR profile;
@ -172,7 +174,6 @@ static void set_name_slot(int slot, int *slot_indices, uint32_t allowed_idx, int
av_assert0(0);
}
static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
VkVideoEncodeInfoKHR *encode_info)
{
@ -542,6 +543,45 @@ static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
}
}
FFVulkanEncodePicture *vp = pic->priv;
vp->tail_size = 0;
vp->non_independent_frame = pic->encode_order < pic->display_order;
if (vp->non_independent_frame) {
AV1RawOBU *obu = &enc->hidden_obu;
AV1RawFrameHeader *fh = &obu->obu.frame_header;
/** hidden frame header */
memset(obu, 0, sizeof(*obu));
obu->header.obu_type = AV1_OBU_FRAME_HEADER;
obu->header.obu_has_size_field = 1;
fh->frame_type = AV1_FRAME_INTER;
fh->refresh_frame_flags = 1 << ap->slot;
fh->frame_width_minus_1 = base_ctx->surface_width - 1;
fh->frame_height_minus_1 = base_ctx->surface_height - 1;
fh->render_width_minus_1 = fh->frame_width_minus_1;
fh->render_height_minus_1 = fh->frame_height_minus_1;
memcpy(fh->loop_filter_ref_deltas, default_loop_filter_ref_deltas,
AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
obu = &enc->tail_obu;
fh = &obu->obu.frame_header;
/** tail frame header */
memset(obu, 0, sizeof(*obu));
obu->header.obu_type = AV1_OBU_FRAME_HEADER;
obu->header.obu_has_size_field = 1;
fh->show_existing_frame = 1;
fh->frame_to_show_map_idx = ap->slot != 0;
fh->frame_type = AV1_FRAME_INTER;
fh->frame_width_minus_1 = base_ctx->surface_width - 1;
fh->frame_height_minus_1 = base_ctx->surface_height - 1;
fh->render_width_minus_1 = fh->frame_width_minus_1;
fh->render_height_minus_1 = fh->frame_height_minus_1;
}
return 0;
}
@ -1079,8 +1119,31 @@ static int write_extra_headers(AVCodecContext *avctx,
int err;
VulkanEncodeAV1Context *enc = avctx->priv_data;
VulkanEncodeAV1Picture *ap = base_pic->codec_priv;
FFVulkanEncodePicture *vp = base_pic->priv;
CodedBitstreamFragment *obu = &enc->current_access_unit;
if (vp->non_independent_frame) {
err = vulkan_encode_av1_add_obu(avctx, obu, AV1_OBU_FRAME_HEADER, &enc->hidden_obu);
if (err < 0)
goto fail;
// Only for tracking ref frame in context, not to be output
err = ff_cbs_write_fragment_data(enc->cbs, obu);
if (err < 0)
goto fail;
ff_cbs_fragment_reset(obu);
((CodedBitstreamAV1Context *)enc->cbs->priv_data)->seen_frame_header = 0;
err = vulkan_encode_av1_add_obu(avctx, obu, AV1_OBU_FRAME_HEADER, &enc->tail_obu);
if (err < 0)
goto fail;
err = vulkan_encode_av1_write_obu(avctx, vp->tail_data, &vp->tail_size, obu);
if (err < 0)
goto fail;
}
if (ap->units_needed & UNIT_MASTERING_DISPLAY) {
err = vulkan_encode_av1_add_obu(avctx, obu,
AV1_OBU_METADATA,

View File

@ -45,7 +45,7 @@ static inline av_const bool ff_rv_zbb_support(void)
* Returns the vector size in bytes (always a power of two and at least 4).
* This is undefined behaviour if vectors are not implemented.
*/
static inline size_t ff_get_rv_vlenb(void)
static inline av_const size_t ff_get_rv_vlenb(void)
{
size_t vlenb;
@ -61,7 +61,7 @@ static inline size_t ff_get_rv_vlenb(void)
* Checks that the vector bit-size is at least the given value.
* This is potentially undefined behaviour if vectors are not implemented.
*/
static inline bool ff_rv_vlen_least(unsigned int bits)
static inline av_const bool ff_rv_vlen_least(unsigned int bits)
{
#ifdef __riscv_v_min_vlen
if (bits <= __riscv_v_min_vlen)