mirror of https://github.com/FFmpeg/FFmpeg.git
Compare commits
5 Commits
dd80ecb666
...
7435a3180d
| Author | SHA1 | Date |
|---|---|---|
|
|
7435a3180d | |
|
|
39abb1ac94 | |
|
|
c17d304e1f | |
|
|
e3b0d58394 | |
|
|
df1fd43db4 |
|
|
@ -65,15 +65,18 @@ av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c,
|
||||||
if ((cpu_flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
|
if ((cpu_flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
|
||||||
c->diff_pixels = ff_diff_pixels_unaligned_rvv;
|
c->diff_pixels = ff_diff_pixels_unaligned_rvv;
|
||||||
c->diff_pixels_unaligned = ff_diff_pixels_unaligned_rvv;
|
c->diff_pixels_unaligned = ff_diff_pixels_unaligned_rvv;
|
||||||
}
|
|
||||||
|
|
||||||
if ((cpu_flags & AV_CPU_FLAG_RVV_I64) && ff_get_rv_vlenb() >= 16) {
|
|
||||||
if (!high_bit_depth) {
|
if (!high_bit_depth) {
|
||||||
c->get_pixels = ff_get_pixels_8_rvv;
|
c->get_pixels = ff_get_pixels_unaligned_8_rvv;
|
||||||
c->get_pixels_unaligned = ff_get_pixels_unaligned_8_rvv;
|
c->get_pixels_unaligned = ff_get_pixels_unaligned_8_rvv;
|
||||||
}
|
}
|
||||||
|
|
||||||
c->diff_pixels = ff_diff_pixels_rvv;
|
if (cpu_flags & AV_CPU_FLAG_RVV_I64) {
|
||||||
|
if (!high_bit_depth)
|
||||||
|
c->get_pixels = ff_get_pixels_8_rvv;
|
||||||
|
|
||||||
|
c->diff_pixels = ff_diff_pixels_rvv;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@ func ff_get_pixels_8_rvv, zve64x
|
||||||
lpad 0
|
lpad 0
|
||||||
vsetivli zero, 8, e8, mf2, ta, ma
|
vsetivli zero, 8, e8, mf2, ta, ma
|
||||||
li t0, 8 * 8
|
li t0, 8 * 8
|
||||||
1:
|
|
||||||
vlse64.v v16, (a1), a2
|
vlse64.v v16, (a1), a2
|
||||||
vsetvli zero, t0, e8, m4, ta, ma
|
vsetvli zero, t0, e8, m4, ta, ma
|
||||||
vwcvtu.x.x.v v8, v16
|
vwcvtu.x.x.v v8, v16
|
||||||
|
|
@ -32,24 +31,24 @@ func ff_get_pixels_8_rvv, zve64x
|
||||||
ret
|
ret
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
func ff_get_pixels_unaligned_8_rvv, zve64x
|
func ff_get_pixels_unaligned_8_rvv, zve32x
|
||||||
lpad 0
|
lpad 0
|
||||||
andi t1, a1, 7
|
li t2, 8
|
||||||
vsetivli zero, 8, e64, m4, ta, ma
|
vsetivli zero, 8, e8, mf2, ta, ma
|
||||||
li t0, 8 * 8
|
1:
|
||||||
beqz t1, 1b
|
add t1, a1, a2
|
||||||
andi a1, a1, -8
|
vle8.v v16, (a1)
|
||||||
slli t2, t1, 3
|
addi t0, a0, 2 * 8
|
||||||
addi t1, a1, 8
|
vle8.v v17, (t1)
|
||||||
sub t3, t0, t2
|
addi t2, t2, -2
|
||||||
vlse64.v v16, (a1), a2
|
|
||||||
vlse64.v v24, (t1), a2
|
|
||||||
vsrl.vx v16, v16, t2
|
|
||||||
vsll.vx v24, v24, t3
|
|
||||||
vor.vv v16, v16, v24
|
|
||||||
vsetvli zero, t0, e8, m4, ta, ma
|
|
||||||
vwcvtu.x.x.v v8, v16
|
vwcvtu.x.x.v v8, v16
|
||||||
|
vwcvtu.x.x.v v9, v17
|
||||||
vse16.v v8, (a0)
|
vse16.v v8, (a0)
|
||||||
|
add a1, t1, a2
|
||||||
|
vse16.v v9, (t0)
|
||||||
|
addi a0, t0, 2 * 8
|
||||||
|
bnez t2, 1b
|
||||||
|
|
||||||
ret
|
ret
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
|
@ -67,17 +66,18 @@ endfunc
|
||||||
|
|
||||||
func ff_diff_pixels_unaligned_rvv, zve32x
|
func ff_diff_pixels_unaligned_rvv, zve32x
|
||||||
lpad 0
|
lpad 0
|
||||||
|
li t3, 8
|
||||||
vsetivli zero, 8, e8, mf2, ta, ma
|
vsetivli zero, 8, e8, mf2, ta, ma
|
||||||
vlsseg8e8.v v16, (a1), a3
|
1:
|
||||||
vlsseg8e8.v v24, (a2), a3
|
vle8.v v16, (a1)
|
||||||
|
add a1, a1, a3
|
||||||
|
vle8.v v24, (a2)
|
||||||
|
add a2, a2, a3
|
||||||
vwsubu.vv v8, v16, v24
|
vwsubu.vv v8, v16, v24
|
||||||
vwsubu.vv v9, v17, v25
|
addi t3, t3, -1
|
||||||
vwsubu.vv v10, v18, v26
|
vse16.v v8, (a0)
|
||||||
vwsubu.vv v11, v19, v27
|
addi a0, a0, 2 * 8
|
||||||
vwsubu.vv v12, v20, v28
|
bnez t3, 1b
|
||||||
vwsubu.vv v13, v21, v29
|
|
||||||
vwsubu.vv v14, v22, v30
|
|
||||||
vwsubu.vv v15, v23, v31
|
|
||||||
vsseg8e16.v v8, (a0)
|
|
||||||
ret
|
ret
|
||||||
endfunc
|
endfunc
|
||||||
|
|
|
||||||
|
|
@ -464,6 +464,9 @@ static int vulkan_encode_output(AVCodecContext *avctx,
|
||||||
VkResult ret;
|
VkResult ret;
|
||||||
FFVulkanEncodePicture *vp = base_pic->priv;
|
FFVulkanEncodePicture *vp = base_pic->priv;
|
||||||
FFVulkanEncodeContext *ctx = avctx->priv_data;
|
FFVulkanEncodeContext *ctx = avctx->priv_data;
|
||||||
|
FFHWBaseEncodeContext *base_ctx = &ctx->base;
|
||||||
|
AVPacket *pkt_ptr = pkt;
|
||||||
|
|
||||||
FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
|
FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
|
||||||
uint32_t *query_data;
|
uint32_t *query_data;
|
||||||
|
|
||||||
|
|
@ -513,20 +516,56 @@ static int vulkan_encode_output(AVCodecContext *avctx,
|
||||||
vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf);
|
vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
pkt->data = sd_buf->mapped_mem;
|
if (vp->non_independent_frame) {
|
||||||
pkt->size = vp->slices_offset + /* base offset */
|
av_assert0(!ctx->prev_buf_ref);
|
||||||
query_data[0] /* secondary offset */ +
|
size_t prev_buf_size = vp->slices_offset + query_data[0] + query_data[1];
|
||||||
query_data[1] /* size */;
|
ctx->prev_buf_ref = vp->pkt_buf;
|
||||||
|
ctx->prev_buf_size = prev_buf_size;
|
||||||
|
vp->pkt_buf = NULL;
|
||||||
|
|
||||||
/* Move reference */
|
if (vp->tail_size) {
|
||||||
pkt->buf = vp->pkt_buf;
|
if (base_ctx->tail_pkt->size)
|
||||||
vp->pkt_buf = NULL;
|
return AVERROR_BUG;
|
||||||
|
|
||||||
|
ret = ff_get_encode_buffer(avctx, base_ctx->tail_pkt, vp->tail_size, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
memcpy(base_ctx->tail_pkt->data, vp->tail_data, vp->tail_size);
|
||||||
|
pkt_ptr = base_ctx->tail_pkt;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (ctx->prev_buf_ref) {
|
||||||
|
FFVkBuffer *prev_sd_buf = (FFVkBuffer *)ctx->prev_buf_ref->data;
|
||||||
|
size_t prev_size = ctx->prev_buf_size;
|
||||||
|
size_t size = (vp->slices_offset + query_data[0] + query_data[1]);
|
||||||
|
|
||||||
|
ret = ff_get_encode_buffer(avctx, pkt, prev_size + size, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
memcpy(pkt->data, prev_sd_buf->mapped_mem, prev_size);
|
||||||
|
memcpy(pkt->data + prev_size, sd_buf->mapped_mem, size);
|
||||||
|
|
||||||
|
av_buffer_unref(&ctx->prev_buf_ref);
|
||||||
|
av_buffer_unref(&vp->pkt_buf);
|
||||||
|
} else {
|
||||||
|
pkt->data = sd_buf->mapped_mem;
|
||||||
|
pkt->size = vp->slices_offset + /* base offset */
|
||||||
|
query_data[0] /* secondary offset */ +
|
||||||
|
query_data[1] /* size */;
|
||||||
|
|
||||||
|
/* Move reference */
|
||||||
|
pkt->buf = vp->pkt_buf;
|
||||||
|
vp->pkt_buf = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
av_log(avctx, AV_LOG_DEBUG, "Frame %"PRId64"/%"PRId64 " encoded\n",
|
av_log(avctx, AV_LOG_DEBUG, "Frame %"PRId64"/%"PRId64 " encoded\n",
|
||||||
base_pic->display_order, base_pic->encode_order);
|
base_pic->display_order, base_pic->encode_order);
|
||||||
|
|
||||||
return ff_hw_base_encode_set_output_property(&ctx->base, avctx,
|
return ff_hw_base_encode_set_output_property(&ctx->base, avctx,
|
||||||
base_pic, pkt,
|
base_pic, pkt_ptr,
|
||||||
ctx->codec->flags & VK_ENC_FLAG_NO_DELAY);
|
ctx->codec->flags & VK_ENC_FLAG_NO_DELAY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,10 @@ typedef struct FFVulkanEncodePicture {
|
||||||
FFVkExecContext *exec;
|
FFVkExecContext *exec;
|
||||||
AVBufferRef *pkt_buf;
|
AVBufferRef *pkt_buf;
|
||||||
int slices_offset;
|
int slices_offset;
|
||||||
|
|
||||||
|
int non_independent_frame;
|
||||||
|
char tail_data[16];
|
||||||
|
size_t tail_size;
|
||||||
} FFVulkanEncodePicture;
|
} FFVulkanEncodePicture;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -192,6 +196,9 @@ typedef struct FFVulkanEncodeContext {
|
||||||
FFVkExecPool enc_pool;
|
FFVkExecPool enc_pool;
|
||||||
|
|
||||||
FFHWBaseEncodePicture *slots[32];
|
FFHWBaseEncodePicture *slots[32];
|
||||||
|
|
||||||
|
AVBufferRef *prev_buf_ref;
|
||||||
|
size_t prev_buf_size;
|
||||||
} FFVulkanEncodeContext;
|
} FFVulkanEncodeContext;
|
||||||
|
|
||||||
#define VULKAN_ENCODE_COMMON_OPTIONS \
|
#define VULKAN_ENCODE_COMMON_OPTIONS \
|
||||||
|
|
|
||||||
|
|
@ -80,6 +80,8 @@ typedef struct VulkanEncodeAV1Context {
|
||||||
AV1RawOBU seq_hdr_obu;
|
AV1RawOBU seq_hdr_obu;
|
||||||
AV1RawOBU meta_cll_obu;
|
AV1RawOBU meta_cll_obu;
|
||||||
AV1RawOBU meta_mastering_obu;
|
AV1RawOBU meta_mastering_obu;
|
||||||
|
AV1RawOBU hidden_obu;
|
||||||
|
AV1RawOBU tail_obu;
|
||||||
|
|
||||||
VkVideoEncodeAV1ProfileInfoKHR profile;
|
VkVideoEncodeAV1ProfileInfoKHR profile;
|
||||||
|
|
||||||
|
|
@ -172,7 +174,6 @@ static void set_name_slot(int slot, int *slot_indices, uint32_t allowed_idx, int
|
||||||
av_assert0(0);
|
av_assert0(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
|
static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
|
||||||
VkVideoEncodeInfoKHR *encode_info)
|
VkVideoEncodeInfoKHR *encode_info)
|
||||||
{
|
{
|
||||||
|
|
@ -542,6 +543,45 @@ static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FFVulkanEncodePicture *vp = pic->priv;
|
||||||
|
vp->tail_size = 0;
|
||||||
|
vp->non_independent_frame = pic->encode_order < pic->display_order;
|
||||||
|
if (vp->non_independent_frame) {
|
||||||
|
AV1RawOBU *obu = &enc->hidden_obu;
|
||||||
|
AV1RawFrameHeader *fh = &obu->obu.frame_header;
|
||||||
|
|
||||||
|
/** hidden frame header */
|
||||||
|
memset(obu, 0, sizeof(*obu));
|
||||||
|
obu->header.obu_type = AV1_OBU_FRAME_HEADER;
|
||||||
|
obu->header.obu_has_size_field = 1;
|
||||||
|
|
||||||
|
fh->frame_type = AV1_FRAME_INTER;
|
||||||
|
fh->refresh_frame_flags = 1 << ap->slot;
|
||||||
|
fh->frame_width_minus_1 = base_ctx->surface_width - 1;
|
||||||
|
fh->frame_height_minus_1 = base_ctx->surface_height - 1;
|
||||||
|
fh->render_width_minus_1 = fh->frame_width_minus_1;
|
||||||
|
fh->render_height_minus_1 = fh->frame_height_minus_1;
|
||||||
|
|
||||||
|
memcpy(fh->loop_filter_ref_deltas, default_loop_filter_ref_deltas,
|
||||||
|
AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
|
||||||
|
|
||||||
|
obu = &enc->tail_obu;
|
||||||
|
fh = &obu->obu.frame_header;
|
||||||
|
|
||||||
|
/** tail frame header */
|
||||||
|
memset(obu, 0, sizeof(*obu));
|
||||||
|
obu->header.obu_type = AV1_OBU_FRAME_HEADER;
|
||||||
|
obu->header.obu_has_size_field = 1;
|
||||||
|
|
||||||
|
fh->show_existing_frame = 1;
|
||||||
|
fh->frame_to_show_map_idx = ap->slot != 0;
|
||||||
|
fh->frame_type = AV1_FRAME_INTER;
|
||||||
|
fh->frame_width_minus_1 = base_ctx->surface_width - 1;
|
||||||
|
fh->frame_height_minus_1 = base_ctx->surface_height - 1;
|
||||||
|
fh->render_width_minus_1 = fh->frame_width_minus_1;
|
||||||
|
fh->render_height_minus_1 = fh->frame_height_minus_1;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1079,8 +1119,31 @@ static int write_extra_headers(AVCodecContext *avctx,
|
||||||
int err;
|
int err;
|
||||||
VulkanEncodeAV1Context *enc = avctx->priv_data;
|
VulkanEncodeAV1Context *enc = avctx->priv_data;
|
||||||
VulkanEncodeAV1Picture *ap = base_pic->codec_priv;
|
VulkanEncodeAV1Picture *ap = base_pic->codec_priv;
|
||||||
|
FFVulkanEncodePicture *vp = base_pic->priv;
|
||||||
CodedBitstreamFragment *obu = &enc->current_access_unit;
|
CodedBitstreamFragment *obu = &enc->current_access_unit;
|
||||||
|
|
||||||
|
if (vp->non_independent_frame) {
|
||||||
|
err = vulkan_encode_av1_add_obu(avctx, obu, AV1_OBU_FRAME_HEADER, &enc->hidden_obu);
|
||||||
|
if (err < 0)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
// Only for tracking ref frame in context, not to be output
|
||||||
|
err = ff_cbs_write_fragment_data(enc->cbs, obu);
|
||||||
|
if (err < 0)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
ff_cbs_fragment_reset(obu);
|
||||||
|
((CodedBitstreamAV1Context *)enc->cbs->priv_data)->seen_frame_header = 0;
|
||||||
|
|
||||||
|
err = vulkan_encode_av1_add_obu(avctx, obu, AV1_OBU_FRAME_HEADER, &enc->tail_obu);
|
||||||
|
if (err < 0)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
err = vulkan_encode_av1_write_obu(avctx, vp->tail_data, &vp->tail_size, obu);
|
||||||
|
if (err < 0)
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
if (ap->units_needed & UNIT_MASTERING_DISPLAY) {
|
if (ap->units_needed & UNIT_MASTERING_DISPLAY) {
|
||||||
err = vulkan_encode_av1_add_obu(avctx, obu,
|
err = vulkan_encode_av1_add_obu(avctx, obu,
|
||||||
AV1_OBU_METADATA,
|
AV1_OBU_METADATA,
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ static inline av_const bool ff_rv_zbb_support(void)
|
||||||
* Returns the vector size in bytes (always a power of two and at least 4).
|
* Returns the vector size in bytes (always a power of two and at least 4).
|
||||||
* This is undefined behaviour if vectors are not implemented.
|
* This is undefined behaviour if vectors are not implemented.
|
||||||
*/
|
*/
|
||||||
static inline size_t ff_get_rv_vlenb(void)
|
static inline av_const size_t ff_get_rv_vlenb(void)
|
||||||
{
|
{
|
||||||
size_t vlenb;
|
size_t vlenb;
|
||||||
|
|
||||||
|
|
@ -61,7 +61,7 @@ static inline size_t ff_get_rv_vlenb(void)
|
||||||
* Checks that the vector bit-size is at least the given value.
|
* Checks that the vector bit-size is at least the given value.
|
||||||
* This is potentially undefined behaviour if vectors are not implemented.
|
* This is potentially undefined behaviour if vectors are not implemented.
|
||||||
*/
|
*/
|
||||||
static inline bool ff_rv_vlen_least(unsigned int bits)
|
static inline av_const bool ff_rv_vlen_least(unsigned int bits)
|
||||||
{
|
{
|
||||||
#ifdef __riscv_v_min_vlen
|
#ifdef __riscv_v_min_vlen
|
||||||
if (bits <= __riscv_v_min_vlen)
|
if (bits <= __riscv_v_min_vlen)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue