Compare commits

...

7 Commits

Author SHA1 Message Date
Andreas Rheinhardt d01608e022 avcodec/proresdec: Remove unused hwaccel_last_picture_private
ProRes is an intra-only codec.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-10-25 22:34:30 +02:00
averne 23df9d4172 avcodec/prores: add parser
Introduce a basic parser for ProRes frame headers.
This avoid having to decode an entire frame to
extract codec information.
2025-10-25 19:56:44 +00:00
averne 98412edfed lavc: add a ProRes Vulkan hwaccel
Add a shader-based Apple ProRes decoder.
It supports all codec features for profiles up to
the 4444 XQ profile, ie.:
- 4:2:2 and 4:4:4 chroma subsampling
- 10- and 12-bit component depth
- Interlacing
- Alpha

The implementation consists in two shaders: the
VLD kernel does entropy decoding for color/alpha,
and the IDCT kernel performs the inverse transform
on color components.

Benchmarks for a 4k yuv422p10 sample:
- AMD Radeon 6700XT:   178 fps
- Intel i7 Tiger Lake: 37 fps
- NVidia Orin Nano:    70 fps
2025-10-25 19:54:13 +00:00
averne 3fd55d952e avcodec/proresdec: save slice width parameter in codec context
Save the log2_desired_slice_size_in_mb syntax
element in the codec context.
Required by the Vulkan hwaccel to compute slice
widths and positions.
2025-10-25 19:54:13 +00:00
averne 987368ef25 avcodec/prores: adapt hwaccel code for slice-based accelerators
In preparation for the Vulkan hwaccel.
The existing hwaccel code was designed around
videotoolbox, which ingests the whole frame
bitstream including picture headers.
This adapts the code to accomodate lower-level,
slice-based hwaccels.
2025-10-25 19:54:13 +00:00
averne 9195af77eb proresdec: allocate private memory for hwaccel pictures
In preparation for the Vulkan hwaccel, which
stores per-frame acceleration structures.
2025-10-25 19:54:13 +00:00
Anders Rein f5f72b4f8a Fix incorrect extraction of Origin
The code that attempted to extract origin (4B.02) was put in the wrong
metadata handler and was also read as the wrong integer type. According
ST 377-1 Origin is of type "Position" (Int64) in the Timeline Track, not
a 8 bit unsigned integer in the Sequence.
2025-10-25 19:22:48 +00:00
16 changed files with 1239 additions and 38 deletions

2
configure vendored
View File

@ -3343,6 +3343,8 @@ prores_videotoolbox_hwaccel_deps="videotoolbox"
prores_videotoolbox_hwaccel_select="prores_decoder"
prores_raw_vulkan_hwaccel_deps="vulkan spirv_compiler"
prores_raw_vulkan_hwaccel_select="prores_raw_decoder"
prores_vulkan_hwaccel_deps="vulkan spirv_compiler"
prores_vulkan_hwaccel_select="prores_decoder"
vc1_d3d11va_hwaccel_deps="d3d11va"
vc1_d3d11va_hwaccel_select="vc1_decoder"
vc1_d3d11va2_hwaccel_deps="d3d11va"

View File

@ -1106,6 +1106,7 @@ OBJS-$(CONFIG_VP9_VULKAN_HWACCEL) += vulkan_decode.o vulkan_vp9.o
OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o
OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o
OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan_decode.o vulkan_prores_raw.o
OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan_decode.o vulkan_prores.o
# Objects duplicated from other libraries for shared builds
SHLIBOBJS += log2_tab.o reverse.o
@ -1281,6 +1282,7 @@ OBJS-$(CONFIG_PNG_PARSER) += png_parser.o
OBJS-$(CONFIG_PNM_PARSER) += pnm_parser.o pnm.o
OBJS-$(CONFIG_PRORES_RAW_PARSER) += prores_raw_parser.o
OBJS-$(CONFIG_QOI_PARSER) += qoi_parser.o
OBJS-$(CONFIG_PRORES_PARSER) += prores_parser.o
OBJS-$(CONFIG_RV34_PARSER) += rv34_parser.o
OBJS-$(CONFIG_SBC_PARSER) += sbc_parser.o
OBJS-$(CONFIG_SIPR_PARSER) += sipr_parser.o
@ -1350,7 +1352,7 @@ SKIPHEADERS-$(CONFIG_QSVENC) += qsvenc.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h
SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h
SKIPHEADERS-$(CONFIG_VULKAN) += ffv1_vulkan.h vulkan_video.h \
SKIPHEADERS-$(CONFIG_VULKAN) += ffv1_vulkan.h prores_vulkan.h vulkan_video.h \
vulkan_encode.h vulkan_decode.h
SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h

View File

@ -68,6 +68,7 @@ extern const struct FFHWAccel ff_mpeg4_vdpau_hwaccel;
extern const struct FFHWAccel ff_mpeg4_videotoolbox_hwaccel;
extern const struct FFHWAccel ff_prores_videotoolbox_hwaccel;
extern const struct FFHWAccel ff_prores_raw_vulkan_hwaccel;
extern const struct FFHWAccel ff_prores_vulkan_hwaccel;
extern const struct FFHWAccel ff_vc1_d3d11va_hwaccel;
extern const struct FFHWAccel ff_vc1_d3d11va2_hwaccel;
extern const struct FFHWAccel ff_vc1_d3d12va_hwaccel;

View File

@ -67,6 +67,7 @@ extern const AVCodecParser ff_mpeg4video_parser;
extern const AVCodecParser ff_mpegaudio_parser;
extern const AVCodecParser ff_mpegvideo_parser;
extern const AVCodecParser ff_opus_parser;
extern const AVCodecParser ff_prores_parser;
extern const AVCodecParser ff_png_parser;
extern const AVCodecParser ff_pnm_parser;
extern const AVCodecParser ff_prores_raw_parser;

128
libavcodec/prores_parser.c Normal file
View File

@ -0,0 +1,128 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/intreadwrite.h"
#include "bytestream.h"
#include "avcodec.h"
static int parse(AVCodecParserContext *s,
AVCodecContext *avctx,
const uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size)
{
GetByteContext gb;
uint8_t flags, depth, chroma_format, alpha_channel_type;
*poutbuf = buf;
*poutbuf_size = buf_size;
/* Frame fields + frame header size */
if (buf_size < 28)
return buf_size;
bytestream2_init(&gb, buf, buf_size);
/* Frame size */
if (bytestream2_get_be32(&gb) != buf_size)
return buf_size;
/* Frame identifier */
if (bytestream2_get_le32(&gb) != MKTAG('i','c','p','f'))
return buf_size;
/* Frame header size */
if (bytestream2_get_be16(&gb) < 20)
return buf_size;
bytestream2_skip(&gb, 6); /* Bitstream version, encoder identifier */
s->key_frame = 1;
s->pict_type = AV_PICTURE_TYPE_I;
s->width = bytestream2_get_be16(&gb);
s->height = bytestream2_get_be16(&gb);
s->coded_width = FFALIGN(s->width, 16);
s->coded_height = FFALIGN(s->height, 16);
flags = bytestream2_get_byte(&gb);
/* Interlace mode */
switch (flags >> 2 & 3) {
case 0:
s->field_order = AV_FIELD_PROGRESSIVE;
s->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
break;
case 1:
s->field_order = AV_FIELD_TT;
s->picture_structure = AV_PICTURE_STRUCTURE_TOP_FIELD;
break;
case 2:
s->field_order = AV_FIELD_BB;
s->picture_structure = AV_PICTURE_STRUCTURE_BOTTOM_FIELD;
break;
default:
break;
}
bytestream2_skip(&gb, 4); /* Aspect ratio information, frame rate code, color primaries, transfer characteristic, matrix coefficients */
/* Determine pixel format based on color depth, chroma format and alpha type */
switch (avctx->codec_tag) {
case MKTAG('a','p','c','o'):
case MKTAG('a','p','c','s'):
case MKTAG('a','p','c','n'):
case MKTAG('a','p','c','h'):
depth = 10;
break;
case MKTAG('a','p','4','h'):
case MKTAG('a','p','4','x'):
depth = 12;
break;
default:
return buf_size;
}
chroma_format = flags >> 6 & 3;
if (chroma_format < 2)
return buf_size;
alpha_channel_type = bytestream2_get_byte(&gb) & 0xf;
switch (depth | (chroma_format << 4) | (alpha_channel_type << 8)) {
case 10 | (2 << 4) | (0 << 8): s->format = AV_PIX_FMT_YUV422P10; break;
case 10 | (2 << 4) | (1 << 8):
case 10 | (2 << 4) | (2 << 8): s->format = AV_PIX_FMT_YUVA422P10; break;
case 10 | (3 << 4) | (0 << 8): s->format = AV_PIX_FMT_YUV444P10; break;
case 10 | (3 << 4) | (1 << 8):
case 10 | (3 << 4) | (2 << 8): s->format = AV_PIX_FMT_YUVA444P10; break;
case 12 | (2 << 4) | (0 << 8): s->format = AV_PIX_FMT_YUV422P12; break;
case 12 | (2 << 4) | (1 << 8):
case 12 | (2 << 4) | (2 << 8): s->format = AV_PIX_FMT_YUVA422P12; break;
case 12 | (3 << 4) | (0 << 8): s->format = AV_PIX_FMT_YUV444P12; break;
case 12 | (3 << 4) | (1 << 8):
case 12 | (3 << 4) | (2 << 8): s->format = AV_PIX_FMT_YUVA444P12; break;
}
return buf_size;
}
const AVCodecParser ff_prores_parser = {
.codec_ids = { AV_CODEC_ID_PRORES },
.parser_parse = parse,
};

View File

@ -251,7 +251,7 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
}
if (pix_fmt != ctx->pix_fmt) {
#define HWACCEL_MAX (CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL)
#define HWACCEL_MAX (CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL + CONFIG_PRORES_VULKAN_HWACCEL)
#if HWACCEL_MAX
enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
int ret;
@ -260,6 +260,9 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
#if CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL
*fmtp++ = AV_PIX_FMT_VIDEOTOOLBOX;
#endif
#if CONFIG_PRORES_VULKAN_HWACCEL
*fmtp++ = AV_PIX_FMT_VULKAN;
#endif
*fmtp++ = ctx->pix_fmt;
*fmtp = AV_PIX_FMT_NONE;
@ -273,10 +276,10 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
#endif
}
ctx->frame->color_primaries = buf[14];
ctx->frame->color_trc = buf[15];
ctx->frame->colorspace = buf[16];
ctx->frame->color_range = AVCOL_RANGE_MPEG;
avctx->color_primaries = buf[14];
avctx->color_trc = buf[15];
avctx->colorspace = buf[16];
avctx->color_range = AVCOL_RANGE_MPEG;
ptr = buf + 20;
flags = buf[19];
@ -335,6 +338,9 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons
return AVERROR_INVALIDDATA;
}
ctx->slice_mb_width = 1 << log2_slice_mb_width;
ctx->slice_mb_height = 1 << log2_slice_mb_height;
ctx->mb_width = (avctx->width + 15) >> 4;
if (ctx->frame_type)
ctx->mb_height = (avctx->height + 31) >> 5;
@ -344,7 +350,7 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons
// QT ignores the written value
// slice_count = AV_RB16(buf + 5);
slice_count = ctx->mb_height * ((ctx->mb_width >> log2_slice_mb_width) +
av_popcount(ctx->mb_width & (1 << log2_slice_mb_width) - 1));
av_popcount(ctx->mb_width & ctx->slice_mb_width - 1));
if (ctx->slice_count != slice_count || !ctx->slices) {
av_freep(&ctx->slices);
@ -367,7 +373,7 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons
index_ptr = buf + hdr_size;
data_ptr = index_ptr + slice_count*2;
slice_mb_count = 1 << log2_slice_mb_width;
slice_mb_count = ctx->slice_mb_width;
mb_x = 0;
mb_y = 0;
@ -392,7 +398,7 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons
mb_x += slice_mb_count;
if (mb_x == ctx->mb_width) {
slice_mb_count = 1 << log2_slice_mb_width;
slice_mb_count = ctx->slice_mb_width;
mb_x = 0;
mb_y++;
}
@ -756,6 +762,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
const uint8_t *buf = avpkt->data;
int buf_size = avpkt->size;
int frame_hdr_size, pic_size, ret;
int i;
if (buf_size < 28 || AV_RL32(buf + 4) != AV_RL32("icpf")) {
av_log(avctx, AV_LOG_ERROR, "invalid frame header\n");
@ -772,26 +779,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
if (frame_hdr_size < 0)
return frame_hdr_size;
if (avctx->skip_frame == AVDISCARD_ALL)
return 0;
buf += frame_hdr_size;
buf_size -= frame_hdr_size;
if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0)
return ret;
ff_thread_finish_setup(avctx);
if (HWACCEL_MAX && avctx->hwaccel) {
const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel);
ret = hwaccel->start_frame(avctx, avpkt->buf, avpkt->data, avpkt->size);
if (ret < 0)
return ret;
ret = hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
if (ret < 0)
return ret;
ret = hwaccel->end_frame(avctx);
if (ret < 0)
return ret;
goto finish;
}
av_refstruct_unref(&ctx->hwaccel_picture_private);
if ((ret = ff_hwaccel_frame_priv_alloc(avctx, &ctx->hwaccel_picture_private)) < 0)
return ret;
ff_thread_finish_setup(avctx);
decode_picture:
pic_size = decode_picture_header(avctx, buf, buf_size);
@ -800,7 +802,23 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
return pic_size;
}
if ((ret = decode_picture(avctx)) < 0) {
if (HWACCEL_MAX && avctx->hwaccel) {
const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel);
ret = hwaccel->start_frame(avctx, avpkt->buf, avpkt->data, avpkt->size);
if (ret < 0)
return ret;
for (i = 0; i < ctx->slice_count; ++i) {
ret = hwaccel->decode_slice(avctx, ctx->slices[i].data, ctx->slices[i].data_size);
if (ret < 0)
return ret;
}
ret = hwaccel->end_frame(avctx);
if (ret < 0)
return ret;
} else if ((ret = decode_picture(avctx)) < 0) {
av_log(avctx, AV_LOG_ERROR, "error decoding picture\n");
return ret;
}
@ -813,7 +831,8 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
goto decode_picture;
}
finish:
av_refstruct_unref(&ctx->hwaccel_picture_private);
*got_frame = 1;
return avpkt->size;
@ -824,6 +843,7 @@ static av_cold int decode_close(AVCodecContext *avctx)
ProresContext *ctx = avctx->priv_data;
av_freep(&ctx->slices);
av_refstruct_unref(&ctx->hwaccel_picture_private);
return 0;
}
@ -851,11 +871,15 @@ const FFCodec ff_prores_decoder = {
FF_CODEC_DECODE_CB(decode_frame),
UPDATE_THREAD_CONTEXT(update_thread_context),
.p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
.caps_internal = FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
.p.profiles = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
#if HWACCEL_MAX
.hw_configs = (const AVCodecHWConfigInternal *const []) {
#if CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL
HWACCEL_VIDEOTOOLBOX(prores),
#endif
#if CONFIG_PRORES_VULKAN_HWACCEL
HWACCEL_VULKAN(prores),
#endif
NULL
},

View File

@ -44,6 +44,7 @@ typedef struct {
BlockDSPContext bdsp;
ProresDSPContext prodsp;
AVFrame *frame;
void *hwaccel_picture_private;
int frame_type; ///< 0 = progressive, 1 = tff, 2 = bff
uint8_t qmat_luma[64];
uint8_t qmat_chroma[64];
@ -51,6 +52,8 @@ typedef struct {
int slice_count; ///< number of slices in the current picture
unsigned mb_width; ///< width of the current picture in mb
unsigned mb_height; ///< height of the current picture in mb
unsigned slice_mb_width; ///< maximum width of a slice in mb
unsigned slice_mb_height; ///< maximum height of a slice in mb
uint8_t progressive_scan[64];
uint8_t interlaced_scan[64];
const uint8_t *scan;

View File

@ -1161,16 +1161,21 @@ static int videotoolbox_prores_start_frame(AVCodecContext *avctx,
const uint8_t *buffer,
uint32_t size)
{
return 0;
VTContext *vtctx = avctx->internal->hwaccel_priv_data;
ProresContext *ctx = avctx->priv_data;
/* Videotoolbox decodes both fields simultaneously */
if (!ctx->first_field)
return 0;
return ff_videotoolbox_buffer_copy(vtctx, buffer, size);
}
static int videotoolbox_prores_decode_slice(AVCodecContext *avctx,
const uint8_t *buffer,
uint32_t size)
{
VTContext *vtctx = avctx->internal->hwaccel_priv_data;
return ff_videotoolbox_buffer_copy(vtctx, buffer, size);
return 0;
}
static int videotoolbox_prores_end_frame(AVCodecContext *avctx)
@ -1178,6 +1183,9 @@ static int videotoolbox_prores_end_frame(AVCodecContext *avctx)
ProresContext *ctx = avctx->priv_data;
AVFrame *frame = ctx->frame;
if (!ctx->first_field)
return 0;
return ff_videotoolbox_common_end_frame(avctx, frame);
}

View File

@ -17,6 +17,11 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \
OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/common.o \
vulkan/prores_raw.o
OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o \
vulkan/prores_reset.o \
vulkan/prores_vld.o \
vulkan/prores_idct.o
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
.SECONDARY: $(VULKAN:.comp=.c)
libavcodec/vulkan/%.c: TAG = VULKAN

View File

@ -0,0 +1,123 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* Two macroblocks, padded to avoid bank conflicts */
shared float blocks[4*2][8*(8+1)];
uint get_px(uint tex_idx, ivec2 pos)
{
#ifndef INTERLACED
return imageLoad(dst[tex_idx], pos).x;
#else
return imageLoad(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field)).x;
#endif
}
void put_px(uint tex_idx, ivec2 pos, uint v)
{
#ifndef INTERLACED
imageStore(dst[tex_idx], pos, uvec4(v));
#else
imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), uvec4(v));
#endif
}
/* 7.4 Inverse Transform */
void idct(uint block, uint offset, uint stride)
{
float c0 = blocks[block][0*stride + offset];
float c1 = blocks[block][1*stride + offset];
float c2 = blocks[block][2*stride + offset];
float c3 = blocks[block][3*stride + offset];
float c4 = blocks[block][4*stride + offset];
float c5 = blocks[block][5*stride + offset];
float c6 = blocks[block][6*stride + offset];
float c7 = blocks[block][7*stride + offset];
float tmp1 = c6 * 1.4142134189605712891 + (c2 - c6);
float tmp2 = c6 * 1.4142134189605712891 - (c2 - c6);
float a1 = (c0 + c4) * 0.35355341434478759766 + tmp1 * 0.46193981170654296875;
float a4 = (c0 + c4) * 0.35355341434478759766 - tmp1 * 0.46193981170654296875;
float a3 = (c0 - c4) * 0.35355341434478759766 + tmp2 * 0.19134169816970825195;
float a2 = (c0 - c4) * 0.35355341434478759766 - tmp2 * 0.19134169816970825195;
float tmp3 = (c3 - c5) * 0.70710682868957519531 + c7;
float tmp4 = (c3 - c5) * 0.70710682868957519531 - c7;
float tmp5 = (c5 - c7) * 1.4142134189605712891 + (c5 - c7) + (c1 - c3);
float tmp6 = (c5 - c7) * -1.4142134189605712891 + (c5 - c7) + (c1 - c3);
float m1 = tmp3 * 2.6131260395050048828 + tmp5;
float m4 = tmp3 * -2.6131260395050048828 + tmp5;
float m2 = tmp4 * 1.0823919773101806641 + tmp6;
float m3 = tmp4 * -1.0823919773101806641 + tmp6;
blocks[block][0*stride + offset] = m1 * 0.49039259552955627441 + a1;
blocks[block][7*stride + offset] = m1 * -0.49039259552955627441 + a1;
blocks[block][1*stride + offset] = m2 * 0.41573479771614074707 + a2;
blocks[block][6*stride + offset] = m2 * -0.41573479771614074707 + a2;
blocks[block][2*stride + offset] = m3 * 0.27778509259223937988 + a3;
blocks[block][5*stride + offset] = m3 * -0.27778509259223937988 + a3;
blocks[block][3*stride + offset] = m4 * 0.097545139491558074951 + a4;
blocks[block][4*stride + offset] = m4 * -0.097545139491558074951 + a4;
}
void main(void)
{
uvec3 gid = gl_GlobalInvocationID, lid = gl_LocalInvocationID;
uint comp = gid.z, block = (lid.y << 2) | (lid.x >> 3), idx = lid.x & 0x7;
uint chroma_shift = comp != 0 ? log2_chroma_w : 0;
bool act = gid.x < mb_width << (4 - chroma_shift);
/* Coalesced load of DCT coeffs in shared memory, second part of inverse quantization */
if (act) {
/**
* According to spec indexing an array in push constant memory with
* a non-dynamically uniform value is illegal ($15.9.1 in v1.4.326),
* so copy the whole matrix locally.
*/
uint8_t[64] qmat = comp == 0 ? qmat_luma : qmat_chroma;
[[unroll]] for (uint i = 0; i < 8; ++i) {
int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) | i))), 16);
blocks[block][i * 9 + idx] = float(v * int(qmat[(i << 3) + idx]));
}
}
/* Row-wise iDCT */
barrier();
idct(block, idx * 9, 1);
/* Column-wise iDCT */
barrier();
idct(block, idx, 9);
float fact = 1.0f / (1 << (12 - depth)), off = 1 << (depth - 1);
int maxv = (1 << depth) - 1;
/* 7.5.1 Color Component Samples. Rescale, clamp and write back to global memory */
barrier();
if (act) {
[[unroll]] for (uint i = 0; i < 8; ++i) {
float v = blocks[block][i * 9 + idx] * fact + off;
put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0, maxv));
}
}
}

View File

@ -0,0 +1,38 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
void main(void)
{
uvec3 gid = gl_GlobalInvocationID;
#ifndef INTERLACED
ivec2 pos = ivec2(gid);
#else
ivec2 pos = ivec2(gid.x, (gid.y << 1) + bottom_field);
#endif
/* Clear luma plane */
imageStore(dst[0], pos, uvec4(0));
/* Clear chroma plane */
if (gid.x < mb_width << (4 - log2_chroma_w)) {
imageStore(dst[1], pos, uvec4(0));
imageStore(dst[2], pos, uvec4(0));
}
/* Alpha plane doesn't need a clear because it is not sparsely encoded */
}

View File

@ -0,0 +1,317 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define U8(x) (uint8_t (x))
#define U16(x) (uint16_t(x))
void put_px(uint tex_idx, ivec2 pos, uint v)
{
#ifndef INTERLACED
imageStore(dst[tex_idx], pos, uvec4(v));
#else
imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), uvec4(v));
#endif
}
/* 7.5.3 Pixel Arrangement */
ivec2 pos_to_block(uint pos, uint luma)
{
return ivec2((pos & -luma - 2) + luma >> 1, pos >> luma & 1) << 3;
}
/* 7.1.1.2 Signed Golomb Combination Codes */
uint to_signed(uint x)
{
return (x >> 1) ^ -(x & 1);
}
/* 7.1.1.1 Golomb Combination Codes */
uint decode_codeword(inout GetBitContext gb, int codebook)
{
int last_rice_q = bitfieldExtract(codebook, 0, 4),
krice = bitfieldExtract(codebook, 4, 4),
kexp = bitfieldExtract(codebook, 8, 4);
int q = 31 - findMSB(show_bits(gb, 32));
if (q <= last_rice_q) {
/* Golomb-Rice encoding */
return (get_bits(gb, krice + q + 1) & ~(1 << krice)) + (q << krice);
} else {
/* exp-Golomb encoding */
return get_bits(gb, (q << 1) + kexp - last_rice_q) - (1 << kexp) + ((last_rice_q + 1) << krice);
}
}
void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
{
uvec3 gid = gl_GlobalInvocationID;
uint is_luma = uint(gid.z == 0);
uint chroma_shift = bool(is_luma) ? 0 : log2_chroma_w;
uint num_blocks = mb_count << (2 - chroma_shift);
ivec2 base_pos = ivec2(mb_pos.x << (4 - chroma_shift), mb_pos.y << 4);
/* 7.1.1.3 DC Coefficients */
{
/* First coeff */
uint c = to_signed(decode_codeword(gb, 0x650));
put_px(gid.z, base_pos, c * qscale & 0xffff);
/**
* Table 9, encoded as (last_rice_q << 0) | (krice or kexp << 4) | ((kexp or kexp + 1) << 8)
* According to the SMPTE document, abs(prev_dc_diff) should be used
* to index the table, duplicating the entries removes the abs operation.
*/
const uint16_t dc_codebook[] = { U16(0x100),
U16(0x210), U16(0x210),
U16(0x321), U16(0x321),
U16(0x430), U16(0x430), };
uint cw = 5, prev_dc_diff = 0;
for (int i = 1; i < num_blocks; ++i) {
cw = decode_codeword(gb, dc_codebook[min(cw, 6)]);
int s = int(prev_dc_diff) >> 31;
c += prev_dc_diff = (to_signed(cw) ^ s) - s;
put_px(gid.z, base_pos + pos_to_block(i, is_luma), c * qscale & 0xffff);
}
}
/* 7.1.1.4 AC Coefficients */
{
/* Table 10 */
const uint16_t ac_run_codebook [] = { U16(0x102), U16(0x102), U16(0x101), U16(0x101),
U16(0x100), U16(0x211), U16(0x211), U16(0x211),
U16(0x211), U16(0x210), U16(0x210), U16(0x210),
U16(0x210), U16(0x210), U16(0x210), U16(0x320), };
/* Table 11 */
const uint16_t ac_level_codebook[] = { U16(0x202), U16(0x101), U16(0x102), U16(0x100),
U16(0x210), U16(0x210), U16(0x210), U16(0x210),
U16(0x320) };
#ifndef INTERLACED
/* Figure 4, encoded as (x << 0) | (y << 4) */
const uint8_t scan_tbl[] = {
U8(0x00), U8(0x01), U8(0x10), U8(0x11), U8(0x02), U8(0x03), U8(0x12), U8(0x13),
U8(0x20), U8(0x21), U8(0x30), U8(0x31), U8(0x22), U8(0x23), U8(0x32), U8(0x33),
U8(0x04), U8(0x05), U8(0x14), U8(0x24), U8(0x15), U8(0x06), U8(0x07), U8(0x16),
U8(0x25), U8(0x34), U8(0x35), U8(0x26), U8(0x17), U8(0x27), U8(0x36), U8(0x37),
U8(0x40), U8(0x41), U8(0x50), U8(0x60), U8(0x51), U8(0x42), U8(0x43), U8(0x52),
U8(0x61), U8(0x70), U8(0x71), U8(0x62), U8(0x53), U8(0x44), U8(0x45), U8(0x54),
U8(0x63), U8(0x72), U8(0x73), U8(0x64), U8(0x55), U8(0x46), U8(0x47), U8(0x56),
U8(0x65), U8(0x74), U8(0x75), U8(0x66), U8(0x57), U8(0x67), U8(0x76), U8(0x77),
};
#else
/* Figure 5 */
const uint8_t scan_tbl[] = {
U8(0x00), U8(0x10), U8(0x01), U8(0x11), U8(0x20), U8(0x30), U8(0x21), U8(0x31),
U8(0x02), U8(0x12), U8(0x03), U8(0x13), U8(0x22), U8(0x32), U8(0x23), U8(0x33),
U8(0x40), U8(0x50), U8(0x41), U8(0x42), U8(0x51), U8(0x60), U8(0x70), U8(0x61),
U8(0x52), U8(0x43), U8(0x53), U8(0x62), U8(0x71), U8(0x72), U8(0x63), U8(0x73),
U8(0x04), U8(0x14), U8(0x05), U8(0x06), U8(0x15), U8(0x24), U8(0x34), U8(0x25),
U8(0x16), U8(0x07), U8(0x17), U8(0x26), U8(0x35), U8(0x44), U8(0x54), U8(0x45),
U8(0x36), U8(0x27), U8(0x37), U8(0x46), U8(0x55), U8(0x64), U8(0x74), U8(0x65),
U8(0x56), U8(0x47), U8(0x57), U8(0x66), U8(0x75), U8(0x76), U8(0x67), U8(0x77),
};
#endif
uint block_mask = num_blocks - 1;
uint block_shift = findLSB(num_blocks);
uint pos = num_blocks - 1, run = 4, level = 1, s;
while (pos < num_blocks << 6) {
int left = left_bits(gb);
if (left <= 0 || (left < 32 && show_bits(gb, left) == 0))
break;
run = decode_codeword(gb, ac_run_codebook [min(run, 15)]);
level = decode_codeword(gb, ac_level_codebook[min(level, 8 )]);
s = get_bits(gb, 1);
pos += run + 1;
uint bidx = pos & block_mask, scan = scan_tbl[pos >> block_shift];
ivec2 spos = pos_to_block(bidx, is_luma);
ivec2 bpos = ivec2(scan & 0xf, scan >> 4);
uint c = ((level + 1) ^ -s) + s;
put_px(gid.z, base_pos + spos + bpos, c * qscale & 0xffff);
}
}
}
/* 7.1.2 Scanned Alpha */
void decode_alpha(in GetBitContext gb, uvec2 mb_pos, uint mb_count)
{
uvec3 gid = gl_GlobalInvocationID;
ivec2 base_pos = ivec2(mb_pos) << 4;
uint block_shift = findMSB(mb_count) + 4, block_mask = (1 << block_shift) - 1;
uint mask = (1 << (4 << alpha_info)) - 1;
uint num_values = (mb_count << 4) * min(height - (gid.y << 4), 16);
int num_cw_bits = alpha_info == 1 ? 5 : 8,
num_flc_bits = alpha_info == 1 ? 9 : 17;
uint alpha_rescale_lshift = alpha_info == 1 ? depth - 8 : 16,
alpha_rescale_rshift = 16 - depth;
uint alpha = -1;
for (uint pos = 0; pos < num_values;) {
uint diff, run;
/* Decode run value */
{
uint bits = show_bits(gb, num_cw_bits), q = num_cw_bits - 1 - findMSB(bits);
/* Tables 13/14 */
if (q != 0) {
uint m = (bits >> 1) + 1, s = bits & 1;
diff = (m ^ -s) + s;
skip_bits(gb, num_cw_bits);
} else {
diff = get_bits(gb, num_flc_bits);
}
alpha = alpha + diff & mask;
}
/* Decode run length */
{
uint bits = show_bits(gb, 5), q = 4 - findMSB(bits);
/* Table 12 */
if (q == 0) {
run = 1;
skip_bits(gb, 1);
} else if (q <= 4) {
run = bits + 1;
skip_bits(gb, 5);
} else {
run = get_bits(gb, 16) + 1;
}
run = min(run, num_values - pos);
}
/**
* FFmpeg doesn't support color and alpha with different precision,
* so we need to rescale to the color range.
*/
uint val = (alpha << alpha_rescale_lshift) | (alpha >> alpha_rescale_rshift);
for (uint end = pos + run; pos < end; ++pos)
put_px(3, base_pos + ivec2(pos & block_mask, pos >> block_shift), val & 0xffff);
}
}
void main(void)
{
uvec3 gid = gl_GlobalInvocationID;
if (gid.x >= slice_width || gid.y >= slice_height)
return;
uint slice_idx = gid.y * slice_width + gid.x;
uint slice_off = slice_offsets[slice_idx],
slice_size = slice_offsets[slice_idx + 1] - slice_off;
u8buf bs = u8buf(slice_data + slice_off);
/* Decode slice header */
uint hdr_size, y_size, u_size, v_size, a_size;
hdr_size = bs[0].v >> 3;
/* Table 15 */
uint qidx = clamp(bs[1].v, 1, 224),
qscale = qidx > 128 ? (qidx - 96) << 2 : qidx;
y_size = (uint(bs[2].v) << 8) | bs[3].v;
u_size = (uint(bs[4].v) << 8) | bs[5].v;
/**
* The alpha_info field can be 0 even when an alpha plane is present,
* if skip_alpha is enabled, so use the header size instead.
*/
if (hdr_size > 6)
v_size = (uint(bs[6].v) << 8) | bs[7].v;
else
v_size = slice_size - hdr_size - y_size - u_size;
a_size = slice_size - hdr_size - y_size - u_size - v_size;
GetBitContext gb;
switch (gid.z) {
case 0:
init_get_bits(gb, u8buf(bs + hdr_size), int(y_size));
break;
case 1:
init_get_bits(gb, u8buf(bs + hdr_size + y_size), int(u_size));
break;
case 2:
init_get_bits(gb, u8buf(bs + hdr_size + y_size + u_size), int(v_size));
break;
case 3:
init_get_bits(gb, u8buf(bs + hdr_size + y_size + u_size + v_size), int(a_size));
break;
}
/**
* Support for the grayscale "extension" in the prores_aw encoder.
* According to the spec, entropy coded data should never be empty,
* and instead contain at least the DC coefficients.
* This avoids undefined behavior.
*/
if (left_bits(gb) == 0)
return;
/**
* 4 ProRes Frame Structure
* ProRes tiles pictures into a grid of slices, whose size is determined
* by the log2_slice_width parameter (height is always 1 MB).
* Each slice has a width of (1 << log2_slice_width) MBs, until the picture
* cannot accommodate a full one. At this point, the remaining space
* is recursively completed using the first smaller power of two that fits
* (see Figure 1).
* The maximum number of extra slices is 3, when log2_slice_width is 3,
* with sizes 4, 2 and 1 MBs.
* The mb_width parameter therefore also represents the number of full slices,
* when interpreted as a fixed-point number with log2_slice_width fractional bits.
*/
uint frac = bitfieldExtract(uint(mb_width), 0, log2_slice_width),
num_extra = bitCount(frac);
uint diff = slice_width - gid.x - 1,
off = max(int(diff - num_extra + 1) << 2, 0);
uint log2_width = min(findLSB(frac - diff >> diff) + diff + off, log2_slice_width);
uint mb_x = (min(gid.x, slice_width - num_extra) << log2_slice_width) +
(frac & (0xf << log2_width + 1)),
mb_y = gid.y;
uint mb_count = 1 << log2_width;
if (gid.z < 3) {
/* Color entropy decoding, inverse scanning, first part of inverse quantization */
decode_comp(gb, uvec2(mb_x, mb_y), mb_count, qscale);
} else {
/* Alpha entropy decoding */
decode_alpha(gb, uvec2(mb_x, mb_y), mb_count);
}
}

View File

@ -26,7 +26,8 @@
#define DECODER_IS_SDR(codec_id) \
(((codec_id) == AV_CODEC_ID_FFV1) || \
((codec_id) == AV_CODEC_ID_PRORES_RAW))
((codec_id) == AV_CODEC_ID_PRORES_RAW) || \
((codec_id) == AV_CODEC_ID_PRORES))
#if CONFIG_H264_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc;
@ -46,6 +47,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc;
#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc;
#endif
#if CONFIG_PRORES_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc;
#endif
static const FFVulkanDecodeDescriptor *dec_descs[] = {
#if CONFIG_H264_VULKAN_HWACCEL
@ -66,6 +70,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
&ff_vk_dec_prores_raw_desc,
#endif
#if CONFIG_PRORES_VULKAN_HWACCEL
&ff_vk_dec_prores_desc,
#endif
};
typedef struct FFVulkanDecodeProfileData {

541
libavcodec/vulkan_prores.c Normal file
View File

@ -0,0 +1,541 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "proresdec.h"
#include "vulkan_decode.h"
#include "hwaccel_internal.h"
#include "libavutil/mem.h"
#include "libavutil/vulkan.h"
#include "libavutil/vulkan_loader.h"
#include "libavutil/vulkan_spirv.h"
extern const char *ff_source_common_comp;
extern const char *ff_source_prores_reset_comp;
extern const char *ff_source_prores_vld_comp;
extern const char *ff_source_prores_idct_comp;
const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc = {
.codec_id = AV_CODEC_ID_PRORES,
.queue_flags = VK_QUEUE_COMPUTE_BIT,
};
typedef struct ProresVulkanDecodePicture {
FFVulkanDecodePicture vp;
AVBufferRef *slice_offset_buf;
uint32_t slice_num;
uint32_t bitstream_start;
uint32_t bitstream_size;
} ProresVulkanDecodePicture;
typedef struct ProresVulkanDecodeContext {
struct ProresVulkanShaderVariants {
FFVulkanShader reset;
FFVulkanShader vld;
FFVulkanShader idct;
} shaders[2]; /* Progressive/interlaced */
AVBufferPool *slice_offset_pool;
} ProresVulkanDecodeContext;
typedef struct ProresVkParameters {
VkDeviceAddress slice_data;
uint32_t bitstream_size;
uint16_t width;
uint16_t height;
uint16_t mb_width;
uint16_t mb_height;
uint16_t slice_width;
uint16_t slice_height;
uint8_t log2_slice_width;
uint8_t log2_chroma_w;
uint8_t depth;
uint8_t alpha_info;
uint8_t bottom_field;
uint8_t qmat_luma [64];
uint8_t qmat_chroma[64];
} ProresVkParameters;
static int vk_prores_start_frame(AVCodecContext *avctx,
const AVBufferRef *buffer_ref,
av_unused const uint8_t *buffer,
av_unused uint32_t size)
{
ProresContext *pr = avctx->priv_data;
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
FFVulkanDecodeShared *ctx = dec->shared_ctx;
ProresVulkanDecodeContext *pv = ctx->sd_ctx;
ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
FFVulkanDecodePicture *vp = &pp->vp;
int err;
/* Host map the input slices data if supported */
if (!vp->slices_buf && ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
RET(ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data,
buffer_ref,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT));
/* Allocate slice offsets buffer */
RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->slice_offset_pool,
&pp->slice_offset_buf,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
NULL, (pr->slice_count + 1) * sizeof(uint32_t),
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
/* Prepare frame to be used */
RET(ff_vk_decode_prepare_frame_sdr(dec, pr->frame, vp, 1,
FF_VK_REP_NATIVE, 0));
pp->slice_num = 0;
pp->bitstream_start = pp->bitstream_size = 0;
fail:
return err;
}
static int vk_prores_decode_slice(AVCodecContext *avctx,
const uint8_t *data,
uint32_t size)
{
ProresContext *pr = avctx->priv_data;
ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
FFVulkanDecodePicture *vp = &pp->vp;
FFVkBuffer *slice_offset = (FFVkBuffer *)pp->slice_offset_buf->data;
FFVkBuffer *slices_buf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL;
/* Skip picture header */
if (slices_buf && slices_buf->host_ref && !pp->slice_num)
pp->bitstream_size = data - slices_buf->mapped_mem;
AV_WN32(slice_offset->mapped_mem + (pp->slice_num + 0) * sizeof(uint32_t),
pp->bitstream_size);
AV_WN32(slice_offset->mapped_mem + (pp->slice_num + 1) * sizeof(uint32_t),
pp->bitstream_size += size);
if (!slices_buf || !slices_buf->host_ref) {
int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
&pp->slice_num, NULL);
if (err < 0)
return err;
} else {
pp->slice_num++;
}
return 0;
}
static int vk_prores_end_frame(AVCodecContext *avctx)
{
ProresContext *pr = avctx->priv_data;
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
FFVulkanDecodeShared *ctx = dec->shared_ctx;
FFVulkanFunctions *vk = &ctx->s.vkfn;
ProresVulkanDecodeContext *pv = ctx->sd_ctx;
ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
FFVulkanDecodePicture *vp = &pp->vp;
ProresVkParameters pd;
FFVkBuffer *slice_data, *slice_offsets;
struct ProresVulkanShaderVariants *shaders;
VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
VkBufferMemoryBarrier2 buf_bar[2];
int nb_img_bar = 0, nb_buf_bar = 0, err;
const AVPixFmtDescriptor *pix_desc;
if (!pp->slice_num)
return 0;
pix_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
if (!pix_desc)
return AVERROR(EINVAL);
slice_data = (FFVkBuffer *)vp->slices_buf->data;
slice_offsets = (FFVkBuffer *)pp->slice_offset_buf->data;
shaders = &pv->shaders[pr->frame_type != 0];
pd = (ProresVkParameters) {
.slice_data = slice_data->address,
.bitstream_size = pp->bitstream_size,
.width = avctx->width,
.height = avctx->height,
.mb_width = pr->mb_width,
.mb_height = pr->mb_height,
.slice_width = pr->slice_count / pr->mb_height,
.slice_height = pr->mb_height,
.log2_slice_width = av_log2(pr->slice_mb_width),
.log2_chroma_w = pix_desc->log2_chroma_w,
.depth = avctx->bits_per_raw_sample,
.alpha_info = pr->alpha_info,
.bottom_field = pr->first_field ^ (pr->frame_type == 1),
};
memcpy(pd.qmat_luma, pr->qmat_luma, sizeof(pd.qmat_luma ));
memcpy(pd.qmat_chroma, pr->qmat_chroma, sizeof(pd.qmat_chroma));
FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
RET(ff_vk_exec_start(&ctx->s, exec));
/* Prepare deps */
RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, pr->frame,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
pr->frame));
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec,
(AVBufferRef *[]){ vp->slices_buf, pp->slice_offset_buf },
2, 0));
/* Transfer ownership to the exec context */
vp->slices_buf = pp->slice_offset_buf = NULL;
/* Input frame barrier */
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
nb_img_bar = nb_buf_bar = 0;
/* Reset */
ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->reset,
pr->frame, vp->view.out,
0, 0,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->reset,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->reset);
vk->CmdDispatch(exec->buf, pr->mb_width << 1, pr->mb_height << 1, 1);
/* Input frame barrier after reset */
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
nb_img_bar = nb_buf_bar = 0;
/* Entropy decode */
ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->vld,
0, 0, 0,
slice_offsets,
0, (pp->slice_num + 1) * sizeof(uint32_t),
VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->vld,
pr->frame, vp->view.out,
0, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->vld,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->vld);
vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->slice_count / pr->mb_height, 3), AV_CEIL_RSHIFT(pr->mb_height, 3),
3 + !!pr->alpha_info);
/* Synchronize vld and idct shaders */
nb_img_bar = 0;
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
nb_img_bar = nb_buf_bar = 0;
/* Inverse transform */
ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->idct,
pr->frame, vp->view.out,
0, 0,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->idct);
ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->idct,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->mb_width, 1), pr->mb_height, 3);
RET(ff_vk_exec_submit(&ctx->s, exec));
fail:
return err;
}
static int add_push_data(FFVulkanShader *shd)
{
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLC(1, u8buf slice_data; );
GLSLC(1, uint bitstream_size; );
GLSLC(0, );
GLSLC(1, uint16_t width; );
GLSLC(1, uint16_t height; );
GLSLC(1, uint16_t mb_width; );
GLSLC(1, uint16_t mb_height; );
GLSLC(1, uint16_t slice_width; );
GLSLC(1, uint16_t slice_height; );
GLSLC(1, uint8_t log2_slice_width; );
GLSLC(1, uint8_t log2_chroma_w; );
GLSLC(1, uint8_t depth; );
GLSLC(1, uint8_t alpha_info; );
GLSLC(1, uint8_t bottom_field; );
GLSLC(0, );
GLSLC(1, uint8_t qmat_luma [8*8]; );
GLSLC(1, uint8_t qmat_chroma[8*8]; );
GLSLC(0, }; );
return ff_vk_shader_add_push_const(shd, 0, sizeof(ProresVkParameters),
VK_SHADER_STAGE_COMPUTE_BIT);
}
static int init_shader(AVCodecContext *avctx, FFVulkanContext *s,
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
FFVulkanShader *shd, const char *name, const char *entrypoint,
FFVulkanDescriptorSetBinding *descs, int num_descs,
const char *source, int local_size, int interlaced)
{
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
int err;
RET(ff_vk_shader_init(s, shd, name,
VK_SHADER_STAGE_COMPUTE_BIT,
(const char *[]) { "GL_EXT_buffer_reference",
"GL_EXT_buffer_reference2" }, 2,
local_size >> 16 & 0xff, local_size >> 8 & 0xff, local_size >> 0 & 0xff,
0));
/* Common code */
GLSLD(ff_source_common_comp);
/* Push constants layout */
RET(add_push_data(shd));
RET(ff_vk_shader_add_descriptor_set(s, shd, descs, num_descs, 0, 0));
if (interlaced)
av_bprintf(&shd->src, "#define INTERLACED\n");
/* Main code */
GLSLD(source);
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, entrypoint,
&spv_opaque));
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, entrypoint));
RET(ff_vk_shader_register_exec(s, pool, shd));
fail:
if (spv_opaque)
spv->free_shader(spv, &spv_opaque);
return 0;
}
static void vk_decode_prores_uninit(FFVulkanDecodeShared *ctx)
{
ProresVulkanDecodeContext *pv = ctx->sd_ctx;
int i;
for (i = 0; i < FF_ARRAY_ELEMS(pv->shaders); ++i) {
ff_vk_shader_free(&ctx->s, &pv->shaders[i].reset);
ff_vk_shader_free(&ctx->s, &pv->shaders[i].vld);
ff_vk_shader_free(&ctx->s, &pv->shaders[i].idct);
}
av_buffer_pool_uninit(&pv->slice_offset_pool);
av_freep(&pv);
}
static int vk_decode_prores_init(AVCodecContext *avctx)
{
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
FFVulkanDecodeShared *ctx = NULL;
AVHWFramesContext *out_frames_ctx;
ProresVulkanDecodeContext *pv;
FFVkSPIRVCompiler *spv;
FFVulkanDescriptorSetBinding *desc_set;
int max_num_slices, i, err;
max_num_slices = (avctx->coded_width >> 4) * (avctx->coded_height >> 4);
spv = ff_vk_spirv_init();
if (!spv) {
av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
}
err = ff_vk_decode_init(avctx);
if (err < 0)
return err;
ctx = dec->shared_ctx;
pv = ctx->sd_ctx = av_mallocz(sizeof(*pv));
if (!pv) {
err = AVERROR(ENOMEM);
goto fail;
}
out_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
ctx->sd_ctx_free = vk_decode_prores_uninit;
for (i = 0; i < FF_ARRAY_ELEMS(pv->shaders); ++i) { /* Progressive/interlaced */
struct ProresVulkanShaderVariants *shaders = &pv->shaders[i];
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "dst",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
.mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
FF_VK_REP_NATIVE),
.mem_quali = "writeonly",
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->reset,
"prores_dec_reset", "main", desc_set, 1,
ff_source_prores_reset_comp, 0x080801, i));
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "slice_offsets_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_quali = "readonly",
.buf_content = "uint32_t slice_offsets",
.buf_elems = max_num_slices + 1,
},
{
.name = "dst",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
.mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
FF_VK_REP_NATIVE),
.mem_quali = "writeonly",
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->vld,
"prores_dec_vld", "main", desc_set, 2,
ff_source_prores_vld_comp, 0x080801, i));
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "dst",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
.mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
FF_VK_REP_NATIVE),
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->idct,
"prores_dec_idct", "main", desc_set, 1,
ff_source_prores_idct_comp, 0x200201, i));
}
err = 0;
fail:
spv->uninit(&spv);
return err;
}
static void vk_prores_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
{
AVHWDeviceContext *dev_ctx = _hwctx.nc;
ProresVulkanDecodePicture *pp = data;
ff_vk_decode_free_frame(dev_ctx, &pp->vp);
}
const FFHWAccel ff_prores_vulkan_hwaccel = {
.p.name = "prores_vulkan",
.p.type = AVMEDIA_TYPE_VIDEO,
.p.id = AV_CODEC_ID_PRORES,
.p.pix_fmt = AV_PIX_FMT_VULKAN,
.start_frame = &vk_prores_start_frame,
.decode_slice = &vk_prores_decode_slice,
.end_frame = &vk_prores_end_frame,
.free_frame_priv = &vk_prores_free_frame_priv,
.frame_priv_data_size = sizeof(ProresVulkanDecodePicture),
.init = &vk_decode_prores_init,
.update_thread_context = &ff_vk_update_thread_context,
.decode_params = &ff_vk_params_invalidate,
.flush = &ff_vk_decode_flush,
.uninit = &ff_vk_decode_uninit,
.frame_params = &ff_vk_frame_params,
.priv_data_size = sizeof(FFVulkanDecodeContext),
.caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
};

View File

@ -2991,6 +2991,7 @@ static int mov_finalize_stsd_codec(MOVContext *c, AVIOContext *pb,
sti->need_parsing = AVSTREAM_PARSE_FULL;
break;
case AV_CODEC_ID_PRORES_RAW:
case AV_CODEC_ID_PRORES:
case AV_CODEC_ID_APV:
case AV_CODEC_ID_EVC:
case AV_CODEC_ID_AV1:

View File

@ -145,7 +145,6 @@ typedef struct MXFSequence {
UID *structural_components_refs;
int structural_components_count;
int64_t duration;
uint8_t origin;
} MXFSequence;
typedef struct MXFTimecodeComponent {
@ -189,6 +188,7 @@ typedef struct {
int body_sid;
MXFWrappingScheme wrapping;
int edit_units_per_packet; /* how many edit units to read at a time (PCM, ClipWrapped) */
int64_t origin;
} MXFTrack;
typedef struct MXFDescriptor {
@ -1155,6 +1155,9 @@ static int mxf_read_track(void *arg, AVIOContext *pb, int tag, int size, UID uid
track->edit_rate.num = avio_rb32(pb);
track->edit_rate.den = avio_rb32(pb);
break;
case 0x4b02:
track->origin = avio_rb64(pb);
break;
case 0x4803:
avio_read(pb, track->sequence_ref, 16);
break;
@ -1172,9 +1175,6 @@ static int mxf_read_sequence(void *arg, AVIOContext *pb, int tag, int size, UID
case 0x0201:
avio_read(pb, sequence->data_definition_ul, 16);
break;
case 0x4b02:
sequence->origin = avio_r8(pb);
break;
case 0x1001:
return mxf_read_strong_ref_array(pb, &sequence->structural_components_refs,
&sequence->structural_components_count);
@ -3025,11 +3025,11 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
}
}
sti->need_parsing = AVSTREAM_PARSE_HEADERS;
if (material_track->sequence->origin) {
av_dict_set_int(&st->metadata, "material_track_origin", material_track->sequence->origin, 0);
if (material_track->origin) {
av_dict_set_int(&st->metadata, "material_track_origin", material_track->origin, 0);
}
if (source_track->sequence->origin) {
av_dict_set_int(&st->metadata, "source_track_origin", source_track->sequence->origin, 0);
if (source_track->origin) {
av_dict_set_int(&st->metadata, "source_track_origin", source_track->origin, 0);
}
if (descriptor->aspect_ratio.num && descriptor->aspect_ratio.den)
sti->display_aspect_ratio = descriptor->aspect_ratio;