mirror of https://github.com/FFmpeg/FFmpeg.git
Compare commits
7 Commits
defd5f3f64
...
d01608e022
| Author | SHA1 | Date |
|---|---|---|
|
|
d01608e022 | |
|
|
23df9d4172 | |
|
|
98412edfed | |
|
|
3fd55d952e | |
|
|
987368ef25 | |
|
|
9195af77eb | |
|
|
f5f72b4f8a |
|
|
@ -3343,6 +3343,8 @@ prores_videotoolbox_hwaccel_deps="videotoolbox"
|
|||
prores_videotoolbox_hwaccel_select="prores_decoder"
|
||||
prores_raw_vulkan_hwaccel_deps="vulkan spirv_compiler"
|
||||
prores_raw_vulkan_hwaccel_select="prores_raw_decoder"
|
||||
prores_vulkan_hwaccel_deps="vulkan spirv_compiler"
|
||||
prores_vulkan_hwaccel_select="prores_decoder"
|
||||
vc1_d3d11va_hwaccel_deps="d3d11va"
|
||||
vc1_d3d11va_hwaccel_select="vc1_decoder"
|
||||
vc1_d3d11va2_hwaccel_deps="d3d11va"
|
||||
|
|
|
|||
|
|
@ -1106,6 +1106,7 @@ OBJS-$(CONFIG_VP9_VULKAN_HWACCEL) += vulkan_decode.o vulkan_vp9.o
|
|||
OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o
|
||||
OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o
|
||||
OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan_decode.o vulkan_prores_raw.o
|
||||
OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan_decode.o vulkan_prores.o
|
||||
|
||||
# Objects duplicated from other libraries for shared builds
|
||||
SHLIBOBJS += log2_tab.o reverse.o
|
||||
|
|
@ -1281,6 +1282,7 @@ OBJS-$(CONFIG_PNG_PARSER) += png_parser.o
|
|||
OBJS-$(CONFIG_PNM_PARSER) += pnm_parser.o pnm.o
|
||||
OBJS-$(CONFIG_PRORES_RAW_PARSER) += prores_raw_parser.o
|
||||
OBJS-$(CONFIG_QOI_PARSER) += qoi_parser.o
|
||||
OBJS-$(CONFIG_PRORES_PARSER) += prores_parser.o
|
||||
OBJS-$(CONFIG_RV34_PARSER) += rv34_parser.o
|
||||
OBJS-$(CONFIG_SBC_PARSER) += sbc_parser.o
|
||||
OBJS-$(CONFIG_SIPR_PARSER) += sipr_parser.o
|
||||
|
|
@ -1350,7 +1352,7 @@ SKIPHEADERS-$(CONFIG_QSVENC) += qsvenc.h
|
|||
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
|
||||
SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h
|
||||
SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h
|
||||
SKIPHEADERS-$(CONFIG_VULKAN) += ffv1_vulkan.h vulkan_video.h \
|
||||
SKIPHEADERS-$(CONFIG_VULKAN) += ffv1_vulkan.h prores_vulkan.h vulkan_video.h \
|
||||
vulkan_encode.h vulkan_decode.h
|
||||
SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
|
||||
SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ extern const struct FFHWAccel ff_mpeg4_vdpau_hwaccel;
|
|||
extern const struct FFHWAccel ff_mpeg4_videotoolbox_hwaccel;
|
||||
extern const struct FFHWAccel ff_prores_videotoolbox_hwaccel;
|
||||
extern const struct FFHWAccel ff_prores_raw_vulkan_hwaccel;
|
||||
extern const struct FFHWAccel ff_prores_vulkan_hwaccel;
|
||||
extern const struct FFHWAccel ff_vc1_d3d11va_hwaccel;
|
||||
extern const struct FFHWAccel ff_vc1_d3d11va2_hwaccel;
|
||||
extern const struct FFHWAccel ff_vc1_d3d12va_hwaccel;
|
||||
|
|
|
|||
|
|
@ -67,6 +67,7 @@ extern const AVCodecParser ff_mpeg4video_parser;
|
|||
extern const AVCodecParser ff_mpegaudio_parser;
|
||||
extern const AVCodecParser ff_mpegvideo_parser;
|
||||
extern const AVCodecParser ff_opus_parser;
|
||||
extern const AVCodecParser ff_prores_parser;
|
||||
extern const AVCodecParser ff_png_parser;
|
||||
extern const AVCodecParser ff_pnm_parser;
|
||||
extern const AVCodecParser ff_prores_raw_parser;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,128 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/intreadwrite.h"
|
||||
#include "bytestream.h"
|
||||
|
||||
#include "avcodec.h"
|
||||
|
||||
static int parse(AVCodecParserContext *s,
|
||||
AVCodecContext *avctx,
|
||||
const uint8_t **poutbuf, int *poutbuf_size,
|
||||
const uint8_t *buf, int buf_size)
|
||||
{
|
||||
GetByteContext gb;
|
||||
uint8_t flags, depth, chroma_format, alpha_channel_type;
|
||||
|
||||
*poutbuf = buf;
|
||||
*poutbuf_size = buf_size;
|
||||
|
||||
/* Frame fields + frame header size */
|
||||
if (buf_size < 28)
|
||||
return buf_size;
|
||||
|
||||
bytestream2_init(&gb, buf, buf_size);
|
||||
|
||||
/* Frame size */
|
||||
if (bytestream2_get_be32(&gb) != buf_size)
|
||||
return buf_size;
|
||||
|
||||
/* Frame identifier */
|
||||
if (bytestream2_get_le32(&gb) != MKTAG('i','c','p','f'))
|
||||
return buf_size;
|
||||
|
||||
/* Frame header size */
|
||||
if (bytestream2_get_be16(&gb) < 20)
|
||||
return buf_size;
|
||||
|
||||
bytestream2_skip(&gb, 6); /* Bitstream version, encoder identifier */
|
||||
|
||||
s->key_frame = 1;
|
||||
s->pict_type = AV_PICTURE_TYPE_I;
|
||||
|
||||
s->width = bytestream2_get_be16(&gb);
|
||||
s->height = bytestream2_get_be16(&gb);
|
||||
s->coded_width = FFALIGN(s->width, 16);
|
||||
s->coded_height = FFALIGN(s->height, 16);
|
||||
|
||||
flags = bytestream2_get_byte(&gb);
|
||||
|
||||
/* Interlace mode */
|
||||
switch (flags >> 2 & 3) {
|
||||
case 0:
|
||||
s->field_order = AV_FIELD_PROGRESSIVE;
|
||||
s->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
|
||||
break;
|
||||
case 1:
|
||||
s->field_order = AV_FIELD_TT;
|
||||
s->picture_structure = AV_PICTURE_STRUCTURE_TOP_FIELD;
|
||||
break;
|
||||
case 2:
|
||||
s->field_order = AV_FIELD_BB;
|
||||
s->picture_structure = AV_PICTURE_STRUCTURE_BOTTOM_FIELD;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
bytestream2_skip(&gb, 4); /* Aspect ratio information, frame rate code, color primaries, transfer characteristic, matrix coefficients */
|
||||
|
||||
/* Determine pixel format based on color depth, chroma format and alpha type */
|
||||
switch (avctx->codec_tag) {
|
||||
case MKTAG('a','p','c','o'):
|
||||
case MKTAG('a','p','c','s'):
|
||||
case MKTAG('a','p','c','n'):
|
||||
case MKTAG('a','p','c','h'):
|
||||
depth = 10;
|
||||
break;
|
||||
case MKTAG('a','p','4','h'):
|
||||
case MKTAG('a','p','4','x'):
|
||||
depth = 12;
|
||||
break;
|
||||
default:
|
||||
return buf_size;
|
||||
}
|
||||
|
||||
chroma_format = flags >> 6 & 3;
|
||||
if (chroma_format < 2)
|
||||
return buf_size;
|
||||
|
||||
alpha_channel_type = bytestream2_get_byte(&gb) & 0xf;
|
||||
|
||||
switch (depth | (chroma_format << 4) | (alpha_channel_type << 8)) {
|
||||
case 10 | (2 << 4) | (0 << 8): s->format = AV_PIX_FMT_YUV422P10; break;
|
||||
case 10 | (2 << 4) | (1 << 8):
|
||||
case 10 | (2 << 4) | (2 << 8): s->format = AV_PIX_FMT_YUVA422P10; break;
|
||||
case 10 | (3 << 4) | (0 << 8): s->format = AV_PIX_FMT_YUV444P10; break;
|
||||
case 10 | (3 << 4) | (1 << 8):
|
||||
case 10 | (3 << 4) | (2 << 8): s->format = AV_PIX_FMT_YUVA444P10; break;
|
||||
case 12 | (2 << 4) | (0 << 8): s->format = AV_PIX_FMT_YUV422P12; break;
|
||||
case 12 | (2 << 4) | (1 << 8):
|
||||
case 12 | (2 << 4) | (2 << 8): s->format = AV_PIX_FMT_YUVA422P12; break;
|
||||
case 12 | (3 << 4) | (0 << 8): s->format = AV_PIX_FMT_YUV444P12; break;
|
||||
case 12 | (3 << 4) | (1 << 8):
|
||||
case 12 | (3 << 4) | (2 << 8): s->format = AV_PIX_FMT_YUVA444P12; break;
|
||||
}
|
||||
|
||||
return buf_size;
|
||||
}
|
||||
|
||||
const AVCodecParser ff_prores_parser = {
|
||||
.codec_ids = { AV_CODEC_ID_PRORES },
|
||||
.parser_parse = parse,
|
||||
};
|
||||
|
|
@ -251,7 +251,7 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
|
|||
}
|
||||
|
||||
if (pix_fmt != ctx->pix_fmt) {
|
||||
#define HWACCEL_MAX (CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL)
|
||||
#define HWACCEL_MAX (CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL + CONFIG_PRORES_VULKAN_HWACCEL)
|
||||
#if HWACCEL_MAX
|
||||
enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
|
||||
int ret;
|
||||
|
|
@ -260,6 +260,9 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
|
|||
|
||||
#if CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL
|
||||
*fmtp++ = AV_PIX_FMT_VIDEOTOOLBOX;
|
||||
#endif
|
||||
#if CONFIG_PRORES_VULKAN_HWACCEL
|
||||
*fmtp++ = AV_PIX_FMT_VULKAN;
|
||||
#endif
|
||||
*fmtp++ = ctx->pix_fmt;
|
||||
*fmtp = AV_PIX_FMT_NONE;
|
||||
|
|
@ -273,10 +276,10 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
|
|||
#endif
|
||||
}
|
||||
|
||||
ctx->frame->color_primaries = buf[14];
|
||||
ctx->frame->color_trc = buf[15];
|
||||
ctx->frame->colorspace = buf[16];
|
||||
ctx->frame->color_range = AVCOL_RANGE_MPEG;
|
||||
avctx->color_primaries = buf[14];
|
||||
avctx->color_trc = buf[15];
|
||||
avctx->colorspace = buf[16];
|
||||
avctx->color_range = AVCOL_RANGE_MPEG;
|
||||
|
||||
ptr = buf + 20;
|
||||
flags = buf[19];
|
||||
|
|
@ -335,6 +338,9 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons
|
|||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
ctx->slice_mb_width = 1 << log2_slice_mb_width;
|
||||
ctx->slice_mb_height = 1 << log2_slice_mb_height;
|
||||
|
||||
ctx->mb_width = (avctx->width + 15) >> 4;
|
||||
if (ctx->frame_type)
|
||||
ctx->mb_height = (avctx->height + 31) >> 5;
|
||||
|
|
@ -344,7 +350,7 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons
|
|||
// QT ignores the written value
|
||||
// slice_count = AV_RB16(buf + 5);
|
||||
slice_count = ctx->mb_height * ((ctx->mb_width >> log2_slice_mb_width) +
|
||||
av_popcount(ctx->mb_width & (1 << log2_slice_mb_width) - 1));
|
||||
av_popcount(ctx->mb_width & ctx->slice_mb_width - 1));
|
||||
|
||||
if (ctx->slice_count != slice_count || !ctx->slices) {
|
||||
av_freep(&ctx->slices);
|
||||
|
|
@ -367,7 +373,7 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons
|
|||
index_ptr = buf + hdr_size;
|
||||
data_ptr = index_ptr + slice_count*2;
|
||||
|
||||
slice_mb_count = 1 << log2_slice_mb_width;
|
||||
slice_mb_count = ctx->slice_mb_width;
|
||||
mb_x = 0;
|
||||
mb_y = 0;
|
||||
|
||||
|
|
@ -392,7 +398,7 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons
|
|||
|
||||
mb_x += slice_mb_count;
|
||||
if (mb_x == ctx->mb_width) {
|
||||
slice_mb_count = 1 << log2_slice_mb_width;
|
||||
slice_mb_count = ctx->slice_mb_width;
|
||||
mb_x = 0;
|
||||
mb_y++;
|
||||
}
|
||||
|
|
@ -756,6 +762,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
|
|||
const uint8_t *buf = avpkt->data;
|
||||
int buf_size = avpkt->size;
|
||||
int frame_hdr_size, pic_size, ret;
|
||||
int i;
|
||||
|
||||
if (buf_size < 28 || AV_RL32(buf + 4) != AV_RL32("icpf")) {
|
||||
av_log(avctx, AV_LOG_ERROR, "invalid frame header\n");
|
||||
|
|
@ -772,26 +779,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
|
|||
if (frame_hdr_size < 0)
|
||||
return frame_hdr_size;
|
||||
|
||||
if (avctx->skip_frame == AVDISCARD_ALL)
|
||||
return 0;
|
||||
|
||||
buf += frame_hdr_size;
|
||||
buf_size -= frame_hdr_size;
|
||||
|
||||
if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0)
|
||||
return ret;
|
||||
ff_thread_finish_setup(avctx);
|
||||
|
||||
if (HWACCEL_MAX && avctx->hwaccel) {
|
||||
const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel);
|
||||
ret = hwaccel->start_frame(avctx, avpkt->buf, avpkt->data, avpkt->size);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = hwaccel->end_frame(avctx);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
goto finish;
|
||||
}
|
||||
av_refstruct_unref(&ctx->hwaccel_picture_private);
|
||||
|
||||
if ((ret = ff_hwaccel_frame_priv_alloc(avctx, &ctx->hwaccel_picture_private)) < 0)
|
||||
return ret;
|
||||
|
||||
ff_thread_finish_setup(avctx);
|
||||
|
||||
decode_picture:
|
||||
pic_size = decode_picture_header(avctx, buf, buf_size);
|
||||
|
|
@ -800,7 +802,23 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
|
|||
return pic_size;
|
||||
}
|
||||
|
||||
if ((ret = decode_picture(avctx)) < 0) {
|
||||
if (HWACCEL_MAX && avctx->hwaccel) {
|
||||
const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel);
|
||||
|
||||
ret = hwaccel->start_frame(avctx, avpkt->buf, avpkt->data, avpkt->size);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
for (i = 0; i < ctx->slice_count; ++i) {
|
||||
ret = hwaccel->decode_slice(avctx, ctx->slices[i].data, ctx->slices[i].data_size);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = hwaccel->end_frame(avctx);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
} else if ((ret = decode_picture(avctx)) < 0) {
|
||||
av_log(avctx, AV_LOG_ERROR, "error decoding picture\n");
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -813,7 +831,8 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
|
|||
goto decode_picture;
|
||||
}
|
||||
|
||||
finish:
|
||||
av_refstruct_unref(&ctx->hwaccel_picture_private);
|
||||
|
||||
*got_frame = 1;
|
||||
|
||||
return avpkt->size;
|
||||
|
|
@ -824,6 +843,7 @@ static av_cold int decode_close(AVCodecContext *avctx)
|
|||
ProresContext *ctx = avctx->priv_data;
|
||||
|
||||
av_freep(&ctx->slices);
|
||||
av_refstruct_unref(&ctx->hwaccel_picture_private);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -851,11 +871,15 @@ const FFCodec ff_prores_decoder = {
|
|||
FF_CODEC_DECODE_CB(decode_frame),
|
||||
UPDATE_THREAD_CONTEXT(update_thread_context),
|
||||
.p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
|
||||
.caps_internal = FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
|
||||
.p.profiles = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
|
||||
#if HWACCEL_MAX
|
||||
.hw_configs = (const AVCodecHWConfigInternal *const []) {
|
||||
#if CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL
|
||||
HWACCEL_VIDEOTOOLBOX(prores),
|
||||
#endif
|
||||
#if CONFIG_PRORES_VULKAN_HWACCEL
|
||||
HWACCEL_VULKAN(prores),
|
||||
#endif
|
||||
NULL
|
||||
},
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ typedef struct {
|
|||
BlockDSPContext bdsp;
|
||||
ProresDSPContext prodsp;
|
||||
AVFrame *frame;
|
||||
void *hwaccel_picture_private;
|
||||
int frame_type; ///< 0 = progressive, 1 = tff, 2 = bff
|
||||
uint8_t qmat_luma[64];
|
||||
uint8_t qmat_chroma[64];
|
||||
|
|
@ -51,6 +52,8 @@ typedef struct {
|
|||
int slice_count; ///< number of slices in the current picture
|
||||
unsigned mb_width; ///< width of the current picture in mb
|
||||
unsigned mb_height; ///< height of the current picture in mb
|
||||
unsigned slice_mb_width; ///< maximum width of a slice in mb
|
||||
unsigned slice_mb_height; ///< maximum height of a slice in mb
|
||||
uint8_t progressive_scan[64];
|
||||
uint8_t interlaced_scan[64];
|
||||
const uint8_t *scan;
|
||||
|
|
|
|||
|
|
@ -1161,16 +1161,21 @@ static int videotoolbox_prores_start_frame(AVCodecContext *avctx,
|
|||
const uint8_t *buffer,
|
||||
uint32_t size)
|
||||
{
|
||||
return 0;
|
||||
VTContext *vtctx = avctx->internal->hwaccel_priv_data;
|
||||
ProresContext *ctx = avctx->priv_data;
|
||||
|
||||
/* Videotoolbox decodes both fields simultaneously */
|
||||
if (!ctx->first_field)
|
||||
return 0;
|
||||
|
||||
return ff_videotoolbox_buffer_copy(vtctx, buffer, size);
|
||||
}
|
||||
|
||||
static int videotoolbox_prores_decode_slice(AVCodecContext *avctx,
|
||||
const uint8_t *buffer,
|
||||
uint32_t size)
|
||||
{
|
||||
VTContext *vtctx = avctx->internal->hwaccel_priv_data;
|
||||
|
||||
return ff_videotoolbox_buffer_copy(vtctx, buffer, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int videotoolbox_prores_end_frame(AVCodecContext *avctx)
|
||||
|
|
@ -1178,6 +1183,9 @@ static int videotoolbox_prores_end_frame(AVCodecContext *avctx)
|
|||
ProresContext *ctx = avctx->priv_data;
|
||||
AVFrame *frame = ctx->frame;
|
||||
|
||||
if (!ctx->first_field)
|
||||
return 0;
|
||||
|
||||
return ff_videotoolbox_common_end_frame(avctx, frame);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,11 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \
|
|||
OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/common.o \
|
||||
vulkan/prores_raw.o
|
||||
|
||||
OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o \
|
||||
vulkan/prores_reset.o \
|
||||
vulkan/prores_vld.o \
|
||||
vulkan/prores_idct.o
|
||||
|
||||
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
|
||||
.SECONDARY: $(VULKAN:.comp=.c)
|
||||
libavcodec/vulkan/%.c: TAG = VULKAN
|
||||
|
|
|
|||
|
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/* Two macroblocks, padded to avoid bank conflicts */
|
||||
shared float blocks[4*2][8*(8+1)];
|
||||
|
||||
uint get_px(uint tex_idx, ivec2 pos)
|
||||
{
|
||||
#ifndef INTERLACED
|
||||
return imageLoad(dst[tex_idx], pos).x;
|
||||
#else
|
||||
return imageLoad(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field)).x;
|
||||
#endif
|
||||
}
|
||||
|
||||
void put_px(uint tex_idx, ivec2 pos, uint v)
|
||||
{
|
||||
#ifndef INTERLACED
|
||||
imageStore(dst[tex_idx], pos, uvec4(v));
|
||||
#else
|
||||
imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), uvec4(v));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 7.4 Inverse Transform */
|
||||
void idct(uint block, uint offset, uint stride)
|
||||
{
|
||||
float c0 = blocks[block][0*stride + offset];
|
||||
float c1 = blocks[block][1*stride + offset];
|
||||
float c2 = blocks[block][2*stride + offset];
|
||||
float c3 = blocks[block][3*stride + offset];
|
||||
float c4 = blocks[block][4*stride + offset];
|
||||
float c5 = blocks[block][5*stride + offset];
|
||||
float c6 = blocks[block][6*stride + offset];
|
||||
float c7 = blocks[block][7*stride + offset];
|
||||
|
||||
float tmp1 = c6 * 1.4142134189605712891 + (c2 - c6);
|
||||
float tmp2 = c6 * 1.4142134189605712891 - (c2 - c6);
|
||||
|
||||
float a1 = (c0 + c4) * 0.35355341434478759766 + tmp1 * 0.46193981170654296875;
|
||||
float a4 = (c0 + c4) * 0.35355341434478759766 - tmp1 * 0.46193981170654296875;
|
||||
|
||||
float a3 = (c0 - c4) * 0.35355341434478759766 + tmp2 * 0.19134169816970825195;
|
||||
float a2 = (c0 - c4) * 0.35355341434478759766 - tmp2 * 0.19134169816970825195;
|
||||
|
||||
float tmp3 = (c3 - c5) * 0.70710682868957519531 + c7;
|
||||
float tmp4 = (c3 - c5) * 0.70710682868957519531 - c7;
|
||||
|
||||
float tmp5 = (c5 - c7) * 1.4142134189605712891 + (c5 - c7) + (c1 - c3);
|
||||
float tmp6 = (c5 - c7) * -1.4142134189605712891 + (c5 - c7) + (c1 - c3);
|
||||
|
||||
float m1 = tmp3 * 2.6131260395050048828 + tmp5;
|
||||
float m4 = tmp3 * -2.6131260395050048828 + tmp5;
|
||||
|
||||
float m2 = tmp4 * 1.0823919773101806641 + tmp6;
|
||||
float m3 = tmp4 * -1.0823919773101806641 + tmp6;
|
||||
|
||||
blocks[block][0*stride + offset] = m1 * 0.49039259552955627441 + a1;
|
||||
blocks[block][7*stride + offset] = m1 * -0.49039259552955627441 + a1;
|
||||
blocks[block][1*stride + offset] = m2 * 0.41573479771614074707 + a2;
|
||||
blocks[block][6*stride + offset] = m2 * -0.41573479771614074707 + a2;
|
||||
blocks[block][2*stride + offset] = m3 * 0.27778509259223937988 + a3;
|
||||
blocks[block][5*stride + offset] = m3 * -0.27778509259223937988 + a3;
|
||||
blocks[block][3*stride + offset] = m4 * 0.097545139491558074951 + a4;
|
||||
blocks[block][4*stride + offset] = m4 * -0.097545139491558074951 + a4;
|
||||
}
|
||||
|
||||
void main(void)
|
||||
{
|
||||
uvec3 gid = gl_GlobalInvocationID, lid = gl_LocalInvocationID;
|
||||
uint comp = gid.z, block = (lid.y << 2) | (lid.x >> 3), idx = lid.x & 0x7;
|
||||
uint chroma_shift = comp != 0 ? log2_chroma_w : 0;
|
||||
bool act = gid.x < mb_width << (4 - chroma_shift);
|
||||
|
||||
/* Coalesced load of DCT coeffs in shared memory, second part of inverse quantization */
|
||||
if (act) {
|
||||
/**
|
||||
* According to spec indexing an array in push constant memory with
|
||||
* a non-dynamically uniform value is illegal ($15.9.1 in v1.4.326),
|
||||
* so copy the whole matrix locally.
|
||||
*/
|
||||
uint8_t[64] qmat = comp == 0 ? qmat_luma : qmat_chroma;
|
||||
[[unroll]] for (uint i = 0; i < 8; ++i) {
|
||||
int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) | i))), 16);
|
||||
blocks[block][i * 9 + idx] = float(v * int(qmat[(i << 3) + idx]));
|
||||
}
|
||||
}
|
||||
|
||||
/* Row-wise iDCT */
|
||||
barrier();
|
||||
idct(block, idx * 9, 1);
|
||||
|
||||
/* Column-wise iDCT */
|
||||
barrier();
|
||||
idct(block, idx, 9);
|
||||
|
||||
float fact = 1.0f / (1 << (12 - depth)), off = 1 << (depth - 1);
|
||||
int maxv = (1 << depth) - 1;
|
||||
|
||||
/* 7.5.1 Color Component Samples. Rescale, clamp and write back to global memory */
|
||||
barrier();
|
||||
if (act) {
|
||||
[[unroll]] for (uint i = 0; i < 8; ++i) {
|
||||
float v = blocks[block][i * 9 + idx] * fact + off;
|
||||
put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0, maxv));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
void main(void)
|
||||
{
|
||||
uvec3 gid = gl_GlobalInvocationID;
|
||||
#ifndef INTERLACED
|
||||
ivec2 pos = ivec2(gid);
|
||||
#else
|
||||
ivec2 pos = ivec2(gid.x, (gid.y << 1) + bottom_field);
|
||||
#endif
|
||||
|
||||
/* Clear luma plane */
|
||||
imageStore(dst[0], pos, uvec4(0));
|
||||
|
||||
/* Clear chroma plane */
|
||||
if (gid.x < mb_width << (4 - log2_chroma_w)) {
|
||||
imageStore(dst[1], pos, uvec4(0));
|
||||
imageStore(dst[2], pos, uvec4(0));
|
||||
}
|
||||
|
||||
/* Alpha plane doesn't need a clear because it is not sparsely encoded */
|
||||
}
|
||||
|
|
@ -0,0 +1,317 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#define U8(x) (uint8_t (x))
|
||||
#define U16(x) (uint16_t(x))
|
||||
|
||||
void put_px(uint tex_idx, ivec2 pos, uint v)
|
||||
{
|
||||
#ifndef INTERLACED
|
||||
imageStore(dst[tex_idx], pos, uvec4(v));
|
||||
#else
|
||||
imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), uvec4(v));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 7.5.3 Pixel Arrangement */
|
||||
ivec2 pos_to_block(uint pos, uint luma)
|
||||
{
|
||||
return ivec2((pos & -luma - 2) + luma >> 1, pos >> luma & 1) << 3;
|
||||
}
|
||||
|
||||
/* 7.1.1.2 Signed Golomb Combination Codes */
|
||||
uint to_signed(uint x)
|
||||
{
|
||||
return (x >> 1) ^ -(x & 1);
|
||||
}
|
||||
|
||||
/* 7.1.1.1 Golomb Combination Codes */
|
||||
uint decode_codeword(inout GetBitContext gb, int codebook)
|
||||
{
|
||||
int last_rice_q = bitfieldExtract(codebook, 0, 4),
|
||||
krice = bitfieldExtract(codebook, 4, 4),
|
||||
kexp = bitfieldExtract(codebook, 8, 4);
|
||||
|
||||
int q = 31 - findMSB(show_bits(gb, 32));
|
||||
if (q <= last_rice_q) {
|
||||
/* Golomb-Rice encoding */
|
||||
return (get_bits(gb, krice + q + 1) & ~(1 << krice)) + (q << krice);
|
||||
} else {
|
||||
/* exp-Golomb encoding */
|
||||
return get_bits(gb, (q << 1) + kexp - last_rice_q) - (1 << kexp) + ((last_rice_q + 1) << krice);
|
||||
}
|
||||
}
|
||||
|
||||
void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
|
||||
{
|
||||
uvec3 gid = gl_GlobalInvocationID;
|
||||
uint is_luma = uint(gid.z == 0);
|
||||
uint chroma_shift = bool(is_luma) ? 0 : log2_chroma_w;
|
||||
|
||||
uint num_blocks = mb_count << (2 - chroma_shift);
|
||||
ivec2 base_pos = ivec2(mb_pos.x << (4 - chroma_shift), mb_pos.y << 4);
|
||||
|
||||
/* 7.1.1.3 DC Coefficients */
|
||||
{
|
||||
/* First coeff */
|
||||
uint c = to_signed(decode_codeword(gb, 0x650));
|
||||
put_px(gid.z, base_pos, c * qscale & 0xffff);
|
||||
|
||||
/**
|
||||
* Table 9, encoded as (last_rice_q << 0) | (krice or kexp << 4) | ((kexp or kexp + 1) << 8)
|
||||
* According to the SMPTE document, abs(prev_dc_diff) should be used
|
||||
* to index the table, duplicating the entries removes the abs operation.
|
||||
*/
|
||||
const uint16_t dc_codebook[] = { U16(0x100),
|
||||
U16(0x210), U16(0x210),
|
||||
U16(0x321), U16(0x321),
|
||||
U16(0x430), U16(0x430), };
|
||||
|
||||
uint cw = 5, prev_dc_diff = 0;
|
||||
for (int i = 1; i < num_blocks; ++i) {
|
||||
cw = decode_codeword(gb, dc_codebook[min(cw, 6)]);
|
||||
|
||||
int s = int(prev_dc_diff) >> 31;
|
||||
c += prev_dc_diff = (to_signed(cw) ^ s) - s;
|
||||
|
||||
put_px(gid.z, base_pos + pos_to_block(i, is_luma), c * qscale & 0xffff);
|
||||
}
|
||||
}
|
||||
|
||||
/* 7.1.1.4 AC Coefficients */
|
||||
{
|
||||
/* Table 10 */
|
||||
const uint16_t ac_run_codebook [] = { U16(0x102), U16(0x102), U16(0x101), U16(0x101),
|
||||
U16(0x100), U16(0x211), U16(0x211), U16(0x211),
|
||||
U16(0x211), U16(0x210), U16(0x210), U16(0x210),
|
||||
U16(0x210), U16(0x210), U16(0x210), U16(0x320), };
|
||||
|
||||
/* Table 11 */
|
||||
const uint16_t ac_level_codebook[] = { U16(0x202), U16(0x101), U16(0x102), U16(0x100),
|
||||
U16(0x210), U16(0x210), U16(0x210), U16(0x210),
|
||||
U16(0x320) };
|
||||
|
||||
#ifndef INTERLACED
|
||||
/* Figure 4, encoded as (x << 0) | (y << 4) */
|
||||
const uint8_t scan_tbl[] = {
|
||||
U8(0x00), U8(0x01), U8(0x10), U8(0x11), U8(0x02), U8(0x03), U8(0x12), U8(0x13),
|
||||
U8(0x20), U8(0x21), U8(0x30), U8(0x31), U8(0x22), U8(0x23), U8(0x32), U8(0x33),
|
||||
U8(0x04), U8(0x05), U8(0x14), U8(0x24), U8(0x15), U8(0x06), U8(0x07), U8(0x16),
|
||||
U8(0x25), U8(0x34), U8(0x35), U8(0x26), U8(0x17), U8(0x27), U8(0x36), U8(0x37),
|
||||
U8(0x40), U8(0x41), U8(0x50), U8(0x60), U8(0x51), U8(0x42), U8(0x43), U8(0x52),
|
||||
U8(0x61), U8(0x70), U8(0x71), U8(0x62), U8(0x53), U8(0x44), U8(0x45), U8(0x54),
|
||||
U8(0x63), U8(0x72), U8(0x73), U8(0x64), U8(0x55), U8(0x46), U8(0x47), U8(0x56),
|
||||
U8(0x65), U8(0x74), U8(0x75), U8(0x66), U8(0x57), U8(0x67), U8(0x76), U8(0x77),
|
||||
};
|
||||
#else
|
||||
/* Figure 5 */
|
||||
const uint8_t scan_tbl[] = {
|
||||
U8(0x00), U8(0x10), U8(0x01), U8(0x11), U8(0x20), U8(0x30), U8(0x21), U8(0x31),
|
||||
U8(0x02), U8(0x12), U8(0x03), U8(0x13), U8(0x22), U8(0x32), U8(0x23), U8(0x33),
|
||||
U8(0x40), U8(0x50), U8(0x41), U8(0x42), U8(0x51), U8(0x60), U8(0x70), U8(0x61),
|
||||
U8(0x52), U8(0x43), U8(0x53), U8(0x62), U8(0x71), U8(0x72), U8(0x63), U8(0x73),
|
||||
U8(0x04), U8(0x14), U8(0x05), U8(0x06), U8(0x15), U8(0x24), U8(0x34), U8(0x25),
|
||||
U8(0x16), U8(0x07), U8(0x17), U8(0x26), U8(0x35), U8(0x44), U8(0x54), U8(0x45),
|
||||
U8(0x36), U8(0x27), U8(0x37), U8(0x46), U8(0x55), U8(0x64), U8(0x74), U8(0x65),
|
||||
U8(0x56), U8(0x47), U8(0x57), U8(0x66), U8(0x75), U8(0x76), U8(0x67), U8(0x77),
|
||||
};
|
||||
#endif
|
||||
|
||||
uint block_mask = num_blocks - 1;
|
||||
uint block_shift = findLSB(num_blocks);
|
||||
|
||||
uint pos = num_blocks - 1, run = 4, level = 1, s;
|
||||
while (pos < num_blocks << 6) {
|
||||
int left = left_bits(gb);
|
||||
if (left <= 0 || (left < 32 && show_bits(gb, left) == 0))
|
||||
break;
|
||||
|
||||
run = decode_codeword(gb, ac_run_codebook [min(run, 15)]);
|
||||
level = decode_codeword(gb, ac_level_codebook[min(level, 8 )]);
|
||||
s = get_bits(gb, 1);
|
||||
|
||||
pos += run + 1;
|
||||
|
||||
uint bidx = pos & block_mask, scan = scan_tbl[pos >> block_shift];
|
||||
ivec2 spos = pos_to_block(bidx, is_luma);
|
||||
ivec2 bpos = ivec2(scan & 0xf, scan >> 4);
|
||||
|
||||
uint c = ((level + 1) ^ -s) + s;
|
||||
put_px(gid.z, base_pos + spos + bpos, c * qscale & 0xffff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* 7.1.2 Scanned Alpha */
|
||||
void decode_alpha(in GetBitContext gb, uvec2 mb_pos, uint mb_count)
|
||||
{
|
||||
uvec3 gid = gl_GlobalInvocationID;
|
||||
|
||||
ivec2 base_pos = ivec2(mb_pos) << 4;
|
||||
uint block_shift = findMSB(mb_count) + 4, block_mask = (1 << block_shift) - 1;
|
||||
|
||||
uint mask = (1 << (4 << alpha_info)) - 1;
|
||||
uint num_values = (mb_count << 4) * min(height - (gid.y << 4), 16);
|
||||
|
||||
int num_cw_bits = alpha_info == 1 ? 5 : 8,
|
||||
num_flc_bits = alpha_info == 1 ? 9 : 17;
|
||||
|
||||
uint alpha_rescale_lshift = alpha_info == 1 ? depth - 8 : 16,
|
||||
alpha_rescale_rshift = 16 - depth;
|
||||
|
||||
uint alpha = -1;
|
||||
for (uint pos = 0; pos < num_values;) {
|
||||
uint diff, run;
|
||||
|
||||
/* Decode run value */
|
||||
{
|
||||
uint bits = show_bits(gb, num_cw_bits), q = num_cw_bits - 1 - findMSB(bits);
|
||||
|
||||
/* Tables 13/14 */
|
||||
if (q != 0) {
|
||||
uint m = (bits >> 1) + 1, s = bits & 1;
|
||||
diff = (m ^ -s) + s;
|
||||
skip_bits(gb, num_cw_bits);
|
||||
} else {
|
||||
diff = get_bits(gb, num_flc_bits);
|
||||
}
|
||||
|
||||
alpha = alpha + diff & mask;
|
||||
}
|
||||
|
||||
/* Decode run length */
|
||||
{
|
||||
uint bits = show_bits(gb, 5), q = 4 - findMSB(bits);
|
||||
|
||||
/* Table 12 */
|
||||
if (q == 0) {
|
||||
run = 1;
|
||||
skip_bits(gb, 1);
|
||||
} else if (q <= 4) {
|
||||
run = bits + 1;
|
||||
skip_bits(gb, 5);
|
||||
} else {
|
||||
run = get_bits(gb, 16) + 1;
|
||||
}
|
||||
|
||||
run = min(run, num_values - pos);
|
||||
}
|
||||
|
||||
/**
|
||||
* FFmpeg doesn't support color and alpha with different precision,
|
||||
* so we need to rescale to the color range.
|
||||
*/
|
||||
uint val = (alpha << alpha_rescale_lshift) | (alpha >> alpha_rescale_rshift);
|
||||
for (uint end = pos + run; pos < end; ++pos)
|
||||
put_px(3, base_pos + ivec2(pos & block_mask, pos >> block_shift), val & 0xffff);
|
||||
}
|
||||
}
|
||||
|
||||
void main(void)
|
||||
{
|
||||
uvec3 gid = gl_GlobalInvocationID;
|
||||
if (gid.x >= slice_width || gid.y >= slice_height)
|
||||
return;
|
||||
|
||||
uint slice_idx = gid.y * slice_width + gid.x;
|
||||
uint slice_off = slice_offsets[slice_idx],
|
||||
slice_size = slice_offsets[slice_idx + 1] - slice_off;
|
||||
|
||||
u8buf bs = u8buf(slice_data + slice_off);
|
||||
|
||||
/* Decode slice header */
|
||||
uint hdr_size, y_size, u_size, v_size, a_size;
|
||||
hdr_size = bs[0].v >> 3;
|
||||
|
||||
/* Table 15 */
|
||||
uint qidx = clamp(bs[1].v, 1, 224),
|
||||
qscale = qidx > 128 ? (qidx - 96) << 2 : qidx;
|
||||
|
||||
y_size = (uint(bs[2].v) << 8) | bs[3].v;
|
||||
u_size = (uint(bs[4].v) << 8) | bs[5].v;
|
||||
|
||||
/**
|
||||
* The alpha_info field can be 0 even when an alpha plane is present,
|
||||
* if skip_alpha is enabled, so use the header size instead.
|
||||
*/
|
||||
if (hdr_size > 6)
|
||||
v_size = (uint(bs[6].v) << 8) | bs[7].v;
|
||||
else
|
||||
v_size = slice_size - hdr_size - y_size - u_size;
|
||||
|
||||
a_size = slice_size - hdr_size - y_size - u_size - v_size;
|
||||
|
||||
GetBitContext gb;
|
||||
switch (gid.z) {
|
||||
case 0:
|
||||
init_get_bits(gb, u8buf(bs + hdr_size), int(y_size));
|
||||
break;
|
||||
case 1:
|
||||
init_get_bits(gb, u8buf(bs + hdr_size + y_size), int(u_size));
|
||||
break;
|
||||
case 2:
|
||||
init_get_bits(gb, u8buf(bs + hdr_size + y_size + u_size), int(v_size));
|
||||
break;
|
||||
case 3:
|
||||
init_get_bits(gb, u8buf(bs + hdr_size + y_size + u_size + v_size), int(a_size));
|
||||
break;
|
||||
}
|
||||
|
||||
/**
|
||||
* Support for the grayscale "extension" in the prores_aw encoder.
|
||||
* According to the spec, entropy coded data should never be empty,
|
||||
* and instead contain at least the DC coefficients.
|
||||
* This avoids undefined behavior.
|
||||
*/
|
||||
if (left_bits(gb) == 0)
|
||||
return;
|
||||
|
||||
/**
|
||||
* 4 ProRes Frame Structure
|
||||
* ProRes tiles pictures into a grid of slices, whose size is determined
|
||||
* by the log2_slice_width parameter (height is always 1 MB).
|
||||
* Each slice has a width of (1 << log2_slice_width) MBs, until the picture
|
||||
* cannot accommodate a full one. At this point, the remaining space
|
||||
* is recursively completed using the first smaller power of two that fits
|
||||
* (see Figure 1).
|
||||
* The maximum number of extra slices is 3, when log2_slice_width is 3,
|
||||
* with sizes 4, 2 and 1 MBs.
|
||||
* The mb_width parameter therefore also represents the number of full slices,
|
||||
* when interpreted as a fixed-point number with log2_slice_width fractional bits.
|
||||
*/
|
||||
uint frac = bitfieldExtract(uint(mb_width), 0, log2_slice_width),
|
||||
num_extra = bitCount(frac);
|
||||
|
||||
uint diff = slice_width - gid.x - 1,
|
||||
off = max(int(diff - num_extra + 1) << 2, 0);
|
||||
|
||||
uint log2_width = min(findLSB(frac - diff >> diff) + diff + off, log2_slice_width);
|
||||
|
||||
uint mb_x = (min(gid.x, slice_width - num_extra) << log2_slice_width) +
|
||||
(frac & (0xf << log2_width + 1)),
|
||||
mb_y = gid.y;
|
||||
uint mb_count = 1 << log2_width;
|
||||
|
||||
if (gid.z < 3) {
|
||||
/* Color entropy decoding, inverse scanning, first part of inverse quantization */
|
||||
decode_comp(gb, uvec2(mb_x, mb_y), mb_count, qscale);
|
||||
} else {
|
||||
/* Alpha entropy decoding */
|
||||
decode_alpha(gb, uvec2(mb_x, mb_y), mb_count);
|
||||
}
|
||||
}
|
||||
|
|
@ -26,7 +26,8 @@
|
|||
|
||||
#define DECODER_IS_SDR(codec_id) \
|
||||
(((codec_id) == AV_CODEC_ID_FFV1) || \
|
||||
((codec_id) == AV_CODEC_ID_PRORES_RAW))
|
||||
((codec_id) == AV_CODEC_ID_PRORES_RAW) || \
|
||||
((codec_id) == AV_CODEC_ID_PRORES))
|
||||
|
||||
#if CONFIG_H264_VULKAN_HWACCEL
|
||||
extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc;
|
||||
|
|
@ -46,6 +47,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc;
|
|||
#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
|
||||
extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc;
|
||||
#endif
|
||||
#if CONFIG_PRORES_VULKAN_HWACCEL
|
||||
extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc;
|
||||
#endif
|
||||
|
||||
static const FFVulkanDecodeDescriptor *dec_descs[] = {
|
||||
#if CONFIG_H264_VULKAN_HWACCEL
|
||||
|
|
@ -66,6 +70,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
|
|||
#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
|
||||
&ff_vk_dec_prores_raw_desc,
|
||||
#endif
|
||||
#if CONFIG_PRORES_VULKAN_HWACCEL
|
||||
&ff_vk_dec_prores_desc,
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef struct FFVulkanDecodeProfileData {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,541 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "proresdec.h"
|
||||
#include "vulkan_decode.h"
|
||||
#include "hwaccel_internal.h"
|
||||
#include "libavutil/mem.h"
|
||||
#include "libavutil/vulkan.h"
|
||||
#include "libavutil/vulkan_loader.h"
|
||||
#include "libavutil/vulkan_spirv.h"
|
||||
|
||||
extern const char *ff_source_common_comp;
|
||||
extern const char *ff_source_prores_reset_comp;
|
||||
extern const char *ff_source_prores_vld_comp;
|
||||
extern const char *ff_source_prores_idct_comp;
|
||||
|
||||
const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc = {
|
||||
.codec_id = AV_CODEC_ID_PRORES,
|
||||
.queue_flags = VK_QUEUE_COMPUTE_BIT,
|
||||
};
|
||||
|
||||
typedef struct ProresVulkanDecodePicture {
|
||||
FFVulkanDecodePicture vp;
|
||||
|
||||
AVBufferRef *slice_offset_buf;
|
||||
uint32_t slice_num;
|
||||
|
||||
uint32_t bitstream_start;
|
||||
uint32_t bitstream_size;
|
||||
} ProresVulkanDecodePicture;
|
||||
|
||||
typedef struct ProresVulkanDecodeContext {
|
||||
struct ProresVulkanShaderVariants {
|
||||
FFVulkanShader reset;
|
||||
FFVulkanShader vld;
|
||||
FFVulkanShader idct;
|
||||
} shaders[2]; /* Progressive/interlaced */
|
||||
|
||||
AVBufferPool *slice_offset_pool;
|
||||
} ProresVulkanDecodeContext;
|
||||
|
||||
typedef struct ProresVkParameters {
|
||||
VkDeviceAddress slice_data;
|
||||
uint32_t bitstream_size;
|
||||
|
||||
uint16_t width;
|
||||
uint16_t height;
|
||||
uint16_t mb_width;
|
||||
uint16_t mb_height;
|
||||
uint16_t slice_width;
|
||||
uint16_t slice_height;
|
||||
uint8_t log2_slice_width;
|
||||
uint8_t log2_chroma_w;
|
||||
uint8_t depth;
|
||||
uint8_t alpha_info;
|
||||
uint8_t bottom_field;
|
||||
|
||||
uint8_t qmat_luma [64];
|
||||
uint8_t qmat_chroma[64];
|
||||
} ProresVkParameters;
|
||||
|
||||
static int vk_prores_start_frame(AVCodecContext *avctx,
|
||||
const AVBufferRef *buffer_ref,
|
||||
av_unused const uint8_t *buffer,
|
||||
av_unused uint32_t size)
|
||||
{
|
||||
ProresContext *pr = avctx->priv_data;
|
||||
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
|
||||
FFVulkanDecodeShared *ctx = dec->shared_ctx;
|
||||
ProresVulkanDecodeContext *pv = ctx->sd_ctx;
|
||||
ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
|
||||
FFVulkanDecodePicture *vp = &pp->vp;
|
||||
|
||||
int err;
|
||||
|
||||
/* Host map the input slices data if supported */
|
||||
if (!vp->slices_buf && ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
|
||||
RET(ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data,
|
||||
buffer_ref,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT));
|
||||
|
||||
/* Allocate slice offsets buffer */
|
||||
RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->slice_offset_pool,
|
||||
&pp->slice_offset_buf,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
NULL, (pr->slice_count + 1) * sizeof(uint32_t),
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
|
||||
|
||||
/* Prepare frame to be used */
|
||||
RET(ff_vk_decode_prepare_frame_sdr(dec, pr->frame, vp, 1,
|
||||
FF_VK_REP_NATIVE, 0));
|
||||
|
||||
pp->slice_num = 0;
|
||||
pp->bitstream_start = pp->bitstream_size = 0;
|
||||
|
||||
fail:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int vk_prores_decode_slice(AVCodecContext *avctx,
|
||||
const uint8_t *data,
|
||||
uint32_t size)
|
||||
{
|
||||
ProresContext *pr = avctx->priv_data;
|
||||
ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
|
||||
FFVulkanDecodePicture *vp = &pp->vp;
|
||||
|
||||
FFVkBuffer *slice_offset = (FFVkBuffer *)pp->slice_offset_buf->data;
|
||||
FFVkBuffer *slices_buf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL;
|
||||
|
||||
/* Skip picture header */
|
||||
if (slices_buf && slices_buf->host_ref && !pp->slice_num)
|
||||
pp->bitstream_size = data - slices_buf->mapped_mem;
|
||||
|
||||
AV_WN32(slice_offset->mapped_mem + (pp->slice_num + 0) * sizeof(uint32_t),
|
||||
pp->bitstream_size);
|
||||
AV_WN32(slice_offset->mapped_mem + (pp->slice_num + 1) * sizeof(uint32_t),
|
||||
pp->bitstream_size += size);
|
||||
|
||||
if (!slices_buf || !slices_buf->host_ref) {
|
||||
int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
|
||||
&pp->slice_num, NULL);
|
||||
if (err < 0)
|
||||
return err;
|
||||
} else {
|
||||
pp->slice_num++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vk_prores_end_frame(AVCodecContext *avctx)
|
||||
{
|
||||
ProresContext *pr = avctx->priv_data;
|
||||
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
|
||||
FFVulkanDecodeShared *ctx = dec->shared_ctx;
|
||||
FFVulkanFunctions *vk = &ctx->s.vkfn;
|
||||
ProresVulkanDecodeContext *pv = ctx->sd_ctx;
|
||||
ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
|
||||
FFVulkanDecodePicture *vp = &pp->vp;
|
||||
|
||||
ProresVkParameters pd;
|
||||
FFVkBuffer *slice_data, *slice_offsets;
|
||||
struct ProresVulkanShaderVariants *shaders;
|
||||
VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
|
||||
VkBufferMemoryBarrier2 buf_bar[2];
|
||||
int nb_img_bar = 0, nb_buf_bar = 0, err;
|
||||
const AVPixFmtDescriptor *pix_desc;
|
||||
|
||||
if (!pp->slice_num)
|
||||
return 0;
|
||||
|
||||
pix_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
|
||||
if (!pix_desc)
|
||||
return AVERROR(EINVAL);
|
||||
|
||||
slice_data = (FFVkBuffer *)vp->slices_buf->data;
|
||||
slice_offsets = (FFVkBuffer *)pp->slice_offset_buf->data;
|
||||
|
||||
shaders = &pv->shaders[pr->frame_type != 0];
|
||||
|
||||
pd = (ProresVkParameters) {
|
||||
.slice_data = slice_data->address,
|
||||
.bitstream_size = pp->bitstream_size,
|
||||
|
||||
.width = avctx->width,
|
||||
.height = avctx->height,
|
||||
.mb_width = pr->mb_width,
|
||||
.mb_height = pr->mb_height,
|
||||
.slice_width = pr->slice_count / pr->mb_height,
|
||||
.slice_height = pr->mb_height,
|
||||
.log2_slice_width = av_log2(pr->slice_mb_width),
|
||||
.log2_chroma_w = pix_desc->log2_chroma_w,
|
||||
.depth = avctx->bits_per_raw_sample,
|
||||
.alpha_info = pr->alpha_info,
|
||||
.bottom_field = pr->first_field ^ (pr->frame_type == 1),
|
||||
};
|
||||
|
||||
memcpy(pd.qmat_luma, pr->qmat_luma, sizeof(pd.qmat_luma ));
|
||||
memcpy(pd.qmat_chroma, pr->qmat_chroma, sizeof(pd.qmat_chroma));
|
||||
|
||||
FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
|
||||
RET(ff_vk_exec_start(&ctx->s, exec));
|
||||
|
||||
/* Prepare deps */
|
||||
RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, pr->frame,
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
||||
|
||||
RET(ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
|
||||
pr->frame));
|
||||
|
||||
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec,
|
||||
(AVBufferRef *[]){ vp->slices_buf, pp->slice_offset_buf },
|
||||
2, 0));
|
||||
|
||||
/* Transfer ownership to the exec context */
|
||||
vp->slices_buf = pp->slice_offset_buf = NULL;
|
||||
|
||||
/* Input frame barrier */
|
||||
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_QUEUE_FAMILY_IGNORED);
|
||||
|
||||
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
.pBufferMemoryBarriers = buf_bar,
|
||||
.bufferMemoryBarrierCount = nb_buf_bar,
|
||||
.pImageMemoryBarriers = img_bar,
|
||||
.imageMemoryBarrierCount = nb_img_bar,
|
||||
});
|
||||
nb_img_bar = nb_buf_bar = 0;
|
||||
|
||||
/* Reset */
|
||||
ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->reset,
|
||||
pr->frame, vp->view.out,
|
||||
0, 0,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
|
||||
ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->reset,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd), &pd);
|
||||
|
||||
ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->reset);
|
||||
|
||||
vk->CmdDispatch(exec->buf, pr->mb_width << 1, pr->mb_height << 1, 1);
|
||||
|
||||
/* Input frame barrier after reset */
|
||||
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_QUEUE_FAMILY_IGNORED);
|
||||
|
||||
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
.pBufferMemoryBarriers = buf_bar,
|
||||
.bufferMemoryBarrierCount = nb_buf_bar,
|
||||
.pImageMemoryBarriers = img_bar,
|
||||
.imageMemoryBarrierCount = nb_img_bar,
|
||||
});
|
||||
nb_img_bar = nb_buf_bar = 0;
|
||||
|
||||
/* Entropy decode */
|
||||
ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->vld,
|
||||
0, 0, 0,
|
||||
slice_offsets,
|
||||
0, (pp->slice_num + 1) * sizeof(uint32_t),
|
||||
VK_FORMAT_UNDEFINED);
|
||||
ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->vld,
|
||||
pr->frame, vp->view.out,
|
||||
0, 1,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
|
||||
ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->vld,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd), &pd);
|
||||
|
||||
ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->vld);
|
||||
|
||||
vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->slice_count / pr->mb_height, 3), AV_CEIL_RSHIFT(pr->mb_height, 3),
|
||||
3 + !!pr->alpha_info);
|
||||
|
||||
/* Synchronize vld and idct shaders */
|
||||
nb_img_bar = 0;
|
||||
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_QUEUE_FAMILY_IGNORED);
|
||||
|
||||
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
.pBufferMemoryBarriers = buf_bar,
|
||||
.bufferMemoryBarrierCount = nb_buf_bar,
|
||||
.pImageMemoryBarriers = img_bar,
|
||||
.imageMemoryBarrierCount = nb_img_bar,
|
||||
});
|
||||
nb_img_bar = nb_buf_bar = 0;
|
||||
|
||||
/* Inverse transform */
|
||||
ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->idct,
|
||||
pr->frame, vp->view.out,
|
||||
0, 0,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
|
||||
ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->idct);
|
||||
|
||||
ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->idct,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd), &pd);
|
||||
|
||||
vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->mb_width, 1), pr->mb_height, 3);
|
||||
|
||||
RET(ff_vk_exec_submit(&ctx->s, exec));
|
||||
|
||||
fail:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int add_push_data(FFVulkanShader *shd)
|
||||
{
|
||||
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
|
||||
GLSLC(1, u8buf slice_data; );
|
||||
GLSLC(1, uint bitstream_size; );
|
||||
GLSLC(0, );
|
||||
GLSLC(1, uint16_t width; );
|
||||
GLSLC(1, uint16_t height; );
|
||||
GLSLC(1, uint16_t mb_width; );
|
||||
GLSLC(1, uint16_t mb_height; );
|
||||
GLSLC(1, uint16_t slice_width; );
|
||||
GLSLC(1, uint16_t slice_height; );
|
||||
GLSLC(1, uint8_t log2_slice_width; );
|
||||
GLSLC(1, uint8_t log2_chroma_w; );
|
||||
GLSLC(1, uint8_t depth; );
|
||||
GLSLC(1, uint8_t alpha_info; );
|
||||
GLSLC(1, uint8_t bottom_field; );
|
||||
GLSLC(0, );
|
||||
GLSLC(1, uint8_t qmat_luma [8*8]; );
|
||||
GLSLC(1, uint8_t qmat_chroma[8*8]; );
|
||||
GLSLC(0, }; );
|
||||
|
||||
return ff_vk_shader_add_push_const(shd, 0, sizeof(ProresVkParameters),
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
}
|
||||
|
||||
static int init_shader(AVCodecContext *avctx, FFVulkanContext *s,
|
||||
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
|
||||
FFVulkanShader *shd, const char *name, const char *entrypoint,
|
||||
FFVulkanDescriptorSetBinding *descs, int num_descs,
|
||||
const char *source, int local_size, int interlaced)
|
||||
{
|
||||
uint8_t *spv_data;
|
||||
size_t spv_len;
|
||||
void *spv_opaque = NULL;
|
||||
int err;
|
||||
|
||||
RET(ff_vk_shader_init(s, shd, name,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
(const char *[]) { "GL_EXT_buffer_reference",
|
||||
"GL_EXT_buffer_reference2" }, 2,
|
||||
local_size >> 16 & 0xff, local_size >> 8 & 0xff, local_size >> 0 & 0xff,
|
||||
0));
|
||||
|
||||
/* Common code */
|
||||
GLSLD(ff_source_common_comp);
|
||||
|
||||
/* Push constants layout */
|
||||
RET(add_push_data(shd));
|
||||
|
||||
RET(ff_vk_shader_add_descriptor_set(s, shd, descs, num_descs, 0, 0));
|
||||
|
||||
if (interlaced)
|
||||
av_bprintf(&shd->src, "#define INTERLACED\n");
|
||||
|
||||
/* Main code */
|
||||
GLSLD(source);
|
||||
|
||||
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, entrypoint,
|
||||
&spv_opaque));
|
||||
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, entrypoint));
|
||||
|
||||
RET(ff_vk_shader_register_exec(s, pool, shd));
|
||||
|
||||
fail:
|
||||
if (spv_opaque)
|
||||
spv->free_shader(spv, &spv_opaque);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vk_decode_prores_uninit(FFVulkanDecodeShared *ctx)
|
||||
{
|
||||
ProresVulkanDecodeContext *pv = ctx->sd_ctx;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < FF_ARRAY_ELEMS(pv->shaders); ++i) {
|
||||
ff_vk_shader_free(&ctx->s, &pv->shaders[i].reset);
|
||||
ff_vk_shader_free(&ctx->s, &pv->shaders[i].vld);
|
||||
ff_vk_shader_free(&ctx->s, &pv->shaders[i].idct);
|
||||
}
|
||||
|
||||
av_buffer_pool_uninit(&pv->slice_offset_pool);
|
||||
|
||||
av_freep(&pv);
|
||||
}
|
||||
|
||||
static int vk_decode_prores_init(AVCodecContext *avctx)
|
||||
{
|
||||
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
|
||||
FFVulkanDecodeShared *ctx = NULL;
|
||||
|
||||
AVHWFramesContext *out_frames_ctx;
|
||||
ProresVulkanDecodeContext *pv;
|
||||
FFVkSPIRVCompiler *spv;
|
||||
FFVulkanDescriptorSetBinding *desc_set;
|
||||
int max_num_slices, i, err;
|
||||
|
||||
max_num_slices = (avctx->coded_width >> 4) * (avctx->coded_height >> 4);
|
||||
|
||||
spv = ff_vk_spirv_init();
|
||||
if (!spv) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
err = ff_vk_decode_init(avctx);
|
||||
if (err < 0)
|
||||
return err;
|
||||
ctx = dec->shared_ctx;
|
||||
|
||||
pv = ctx->sd_ctx = av_mallocz(sizeof(*pv));
|
||||
if (!pv) {
|
||||
err = AVERROR(ENOMEM);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
out_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
|
||||
|
||||
ctx->sd_ctx_free = vk_decode_prores_uninit;
|
||||
|
||||
for (i = 0; i < FF_ARRAY_ELEMS(pv->shaders); ++i) { /* Progressive/interlaced */
|
||||
struct ProresVulkanShaderVariants *shaders = &pv->shaders[i];
|
||||
|
||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "dst",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.dimensions = 2,
|
||||
.mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
|
||||
FF_VK_REP_NATIVE),
|
||||
.mem_quali = "writeonly",
|
||||
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
};
|
||||
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->reset,
|
||||
"prores_dec_reset", "main", desc_set, 1,
|
||||
ff_source_prores_reset_comp, 0x080801, i));
|
||||
|
||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "slice_offsets_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_quali = "readonly",
|
||||
.buf_content = "uint32_t slice_offsets",
|
||||
.buf_elems = max_num_slices + 1,
|
||||
},
|
||||
{
|
||||
.name = "dst",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.dimensions = 2,
|
||||
.mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
|
||||
FF_VK_REP_NATIVE),
|
||||
.mem_quali = "writeonly",
|
||||
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
};
|
||||
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->vld,
|
||||
"prores_dec_vld", "main", desc_set, 2,
|
||||
ff_source_prores_vld_comp, 0x080801, i));
|
||||
|
||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "dst",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.dimensions = 2,
|
||||
.mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
|
||||
FF_VK_REP_NATIVE),
|
||||
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
};
|
||||
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->idct,
|
||||
"prores_dec_idct", "main", desc_set, 1,
|
||||
ff_source_prores_idct_comp, 0x200201, i));
|
||||
}
|
||||
|
||||
err = 0;
|
||||
|
||||
fail:
|
||||
spv->uninit(&spv);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void vk_prores_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
|
||||
{
|
||||
AVHWDeviceContext *dev_ctx = _hwctx.nc;
|
||||
ProresVulkanDecodePicture *pp = data;
|
||||
|
||||
ff_vk_decode_free_frame(dev_ctx, &pp->vp);
|
||||
}
|
||||
|
||||
const FFHWAccel ff_prores_vulkan_hwaccel = {
|
||||
.p.name = "prores_vulkan",
|
||||
.p.type = AVMEDIA_TYPE_VIDEO,
|
||||
.p.id = AV_CODEC_ID_PRORES,
|
||||
.p.pix_fmt = AV_PIX_FMT_VULKAN,
|
||||
.start_frame = &vk_prores_start_frame,
|
||||
.decode_slice = &vk_prores_decode_slice,
|
||||
.end_frame = &vk_prores_end_frame,
|
||||
.free_frame_priv = &vk_prores_free_frame_priv,
|
||||
.frame_priv_data_size = sizeof(ProresVulkanDecodePicture),
|
||||
.init = &vk_decode_prores_init,
|
||||
.update_thread_context = &ff_vk_update_thread_context,
|
||||
.decode_params = &ff_vk_params_invalidate,
|
||||
.flush = &ff_vk_decode_flush,
|
||||
.uninit = &ff_vk_decode_uninit,
|
||||
.frame_params = &ff_vk_frame_params,
|
||||
.priv_data_size = sizeof(FFVulkanDecodeContext),
|
||||
.caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
|
||||
};
|
||||
|
|
@ -2991,6 +2991,7 @@ static int mov_finalize_stsd_codec(MOVContext *c, AVIOContext *pb,
|
|||
sti->need_parsing = AVSTREAM_PARSE_FULL;
|
||||
break;
|
||||
case AV_CODEC_ID_PRORES_RAW:
|
||||
case AV_CODEC_ID_PRORES:
|
||||
case AV_CODEC_ID_APV:
|
||||
case AV_CODEC_ID_EVC:
|
||||
case AV_CODEC_ID_AV1:
|
||||
|
|
|
|||
|
|
@ -145,7 +145,6 @@ typedef struct MXFSequence {
|
|||
UID *structural_components_refs;
|
||||
int structural_components_count;
|
||||
int64_t duration;
|
||||
uint8_t origin;
|
||||
} MXFSequence;
|
||||
|
||||
typedef struct MXFTimecodeComponent {
|
||||
|
|
@ -189,6 +188,7 @@ typedef struct {
|
|||
int body_sid;
|
||||
MXFWrappingScheme wrapping;
|
||||
int edit_units_per_packet; /* how many edit units to read at a time (PCM, ClipWrapped) */
|
||||
int64_t origin;
|
||||
} MXFTrack;
|
||||
|
||||
typedef struct MXFDescriptor {
|
||||
|
|
@ -1155,6 +1155,9 @@ static int mxf_read_track(void *arg, AVIOContext *pb, int tag, int size, UID uid
|
|||
track->edit_rate.num = avio_rb32(pb);
|
||||
track->edit_rate.den = avio_rb32(pb);
|
||||
break;
|
||||
case 0x4b02:
|
||||
track->origin = avio_rb64(pb);
|
||||
break;
|
||||
case 0x4803:
|
||||
avio_read(pb, track->sequence_ref, 16);
|
||||
break;
|
||||
|
|
@ -1172,9 +1175,6 @@ static int mxf_read_sequence(void *arg, AVIOContext *pb, int tag, int size, UID
|
|||
case 0x0201:
|
||||
avio_read(pb, sequence->data_definition_ul, 16);
|
||||
break;
|
||||
case 0x4b02:
|
||||
sequence->origin = avio_r8(pb);
|
||||
break;
|
||||
case 0x1001:
|
||||
return mxf_read_strong_ref_array(pb, &sequence->structural_components_refs,
|
||||
&sequence->structural_components_count);
|
||||
|
|
@ -3025,11 +3025,11 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
|
|||
}
|
||||
}
|
||||
sti->need_parsing = AVSTREAM_PARSE_HEADERS;
|
||||
if (material_track->sequence->origin) {
|
||||
av_dict_set_int(&st->metadata, "material_track_origin", material_track->sequence->origin, 0);
|
||||
if (material_track->origin) {
|
||||
av_dict_set_int(&st->metadata, "material_track_origin", material_track->origin, 0);
|
||||
}
|
||||
if (source_track->sequence->origin) {
|
||||
av_dict_set_int(&st->metadata, "source_track_origin", source_track->sequence->origin, 0);
|
||||
if (source_track->origin) {
|
||||
av_dict_set_int(&st->metadata, "source_track_origin", source_track->origin, 0);
|
||||
}
|
||||
if (descriptor->aspect_ratio.num && descriptor->aspect_ratio.den)
|
||||
sti->display_aspect_ratio = descriptor->aspect_ratio;
|
||||
|
|
|
|||
Loading…
Reference in New Issue