Compare commits

...

9 Commits

Author SHA1 Message Date
Michael Yang 20051ed3af avcodec/vulkan_encode_av1: fix level index 2025-10-16 21:59:24 +00:00
Michael Yang 62d43ba2e3 libavfilter/vf_nlmeans_vulkan: fix str defaults
Revert back to NAN as -1.0 was erroneously to 0.0 to fit in the options
range.

Add special handling of str per requested.
2025-10-16 21:32:43 +00:00
Michael Yang e8213f766f libavfilter/vf_nlmeans_vulkan: amend doc 2025-10-16 21:32:43 +00:00
Michael Yang 7d65ce7763 libavfilter/vf_nlmeans_vulkan: clean up defaults
Change per-plane strength defaults to -1.0.
2025-10-16 21:32:43 +00:00
Michael Yang 26dee5b43e libavfilter/vf_nlmeans_vulkan: reverse img_bar 2025-10-16 21:32:43 +00:00
Michael Yang 71ff349cc1 libavfilter/vf_nlmeans_vulkan: lower strength min
Lower (per-component) strength minimum from 1.0 to 0.0, with 0.0 skipping
integral and weights calculations.
2025-10-16 21:32:43 +00:00
Michael Yang 2e12b3251d libavfilter/vf_nlmeans_vulkan: clean up naming
Add `nb_components` to push data.

Rename `ws_total_*`` to `ws_*`.
2025-10-16 21:32:43 +00:00
Michael Yang 3fac2d8593 avfilter/vf_nlmeans_vulkan: rewrite filter
This is a major rewrite of the exising nlmeans vulkan code, with bug
fixes and major performance improvement.

Fix visual artifacts found in ticket #10661, #10733. Add OOB checks for
image loading and patch sized area around the border. Correct chroma
plane height, strength and buffer barrier index.

Improve parallelism with component workgroup axis and more but smaller
workgroups. Split weights pass into vertical/horizontal (integral) and
weights passes. Remove h/v order logic to always calculate sum on
vertical pass. Remove atomic float requirement, which causes high memory
locking contentions, at the cost of higher memory usage of w/s buffer.
Use cache blocking in h pass to reduce memory bandwidth usage.
2025-10-16 21:32:43 +00:00
Martin Storsjö 36896af64a movenc: Make the hybrid_fragmented mode more robust
Write the moov tag at the end first, before overwriting the mdat size
at the start of the file.

In case writing the final moov box fails (e.g. due to being out
of disk), we haven't broken the initial moov box yet.

Thus if writing stops between these steps, we could end up with
a file with two moov boxes - which arguably is more feasible to
recover from, than from a file with no moov boxes at all.
2025-10-16 18:58:54 +00:00
4 changed files with 609 additions and 485 deletions

View File

@ -29101,7 +29101,7 @@ The filter accepts the following options.
@table @option @table @option
@item s @item s
Set denoising strength for all components. Default is 1.0. Must be in range [1.0, 100.0]. Set denoising strength for all components. Default is 1.0. Must be in range [0.0, 100.0].
@item p @item p
Set patch size for all planes. Default is 7. Must be odd number in range [0, 99]. Set patch size for all planes. Default is 7. Must be odd number in range [0, 99].
@ -29110,17 +29110,16 @@ Set patch size for all planes. Default is 7. Must be odd number in range [0, 99]
Set research size. Default is 15. Must be odd number in range [0, 99]. Set research size. Default is 15. Must be odd number in range [0, 99].
@item t @item t
Set parallelism. Default is 36. Must be a number in the range [1, 168]. Set parallelism. Default is 8. Must be a number in the range [1, 64].
Larger values may speed up processing, at the cost of more VRAM. Larger values will use more VRAM but may not result in greater speed.
Lower values will slow it down, reducing VRAM usage. The optimal value is hardware and input dependent.
Only supported on GPUs with atomic float operations (RDNA3+, Ampere+).
@item s0 @item s0
@item s1 @item s1
@item s2 @item s2
@item s3 @item s3
Set denoising strength for a specific component. Default is @var{1}, equal to @option{s}. Set denoising strength for a specific component. Default is @var{1.0}, equal to @option{s}.
Must be odd number in range [1, 100]. Must be in range [0.0, 100.0]. 0.0 disables denoising in that component.
@item p0 @item p0
@item p1 @item p1

View File

@ -605,7 +605,7 @@ static int init_profile(AVCodecContext *avctx,
enc->tile_cols, framerate); enc->tile_cols, framerate);
if (level) { if (level) {
av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name); av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name);
enc->seq_level_idx = ff_vk_av1_level_to_vk(level->level_idx); enc->seq_level_idx = level->level_idx;
} else { } else {
av_log(avctx, AV_LOG_VERBOSE, "Stream will not conform to " av_log(avctx, AV_LOG_VERBOSE, "Stream will not conform to "
"any normal level, using level 7.3 by default.\n"); "any normal level, using level 7.3 by default.\n");

File diff suppressed because it is too large Load Diff

View File

@ -8546,6 +8546,28 @@ static int shift_data(AVFormatContext *s)
return ff_format_shift_data(s, mov->reserved_header_pos, moov_size); return ff_format_shift_data(s, mov->reserved_header_pos, moov_size);
} }
static void mov_write_mdat_size(AVFormatContext *s)
{
MOVMuxContext *mov = s->priv_data;
AVIOContext *pb = s->pb;
/* Write size of mdat tag */
if (mov->mdat_size + 8 <= UINT32_MAX) {
avio_seek(pb, mov->mdat_pos, SEEK_SET);
avio_wb32(pb, mov->mdat_size + 8);
if (mov->flags & FF_MOV_FLAG_HYBRID_FRAGMENTED)
ffio_wfourcc(pb, "mdat"); // overwrite the original moov into a mdat
} else {
/* overwrite 'wide' placeholder atom */
avio_seek(pb, mov->mdat_pos - 8, SEEK_SET);
/* special value: real atom size will be 64 bit value after
* tag field */
avio_wb32(pb, 1);
ffio_wfourcc(pb, "mdat");
avio_wb64(pb, mov->mdat_size + 16);
}
}
static int mov_write_trailer(AVFormatContext *s) static int mov_write_trailer(AVFormatContext *s)
{ {
MOVMuxContext *mov = s->priv_data; MOVMuxContext *mov = s->priv_data;
@ -8605,21 +8627,9 @@ static int mov_write_trailer(AVFormatContext *s)
moov_pos = avio_tell(pb); moov_pos = avio_tell(pb);
/* Write size of mdat tag */ if (!(mov->flags & FF_MOV_FLAG_HYBRID_FRAGMENTED))
if (mov->mdat_size + 8 <= UINT32_MAX) { mov_write_mdat_size(s);
avio_seek(pb, mov->mdat_pos, SEEK_SET);
avio_wb32(pb, mov->mdat_size + 8);
if (mov->flags & FF_MOV_FLAG_HYBRID_FRAGMENTED)
ffio_wfourcc(pb, "mdat"); // overwrite the original moov into a mdat
} else {
/* overwrite 'wide' placeholder atom */
avio_seek(pb, mov->mdat_pos - 8, SEEK_SET);
/* special value: real atom size will be 64 bit value after
* tag field */
avio_wb32(pb, 1);
ffio_wfourcc(pb, "mdat");
avio_wb64(pb, mov->mdat_size + 16);
}
avio_seek(pb, mov->reserved_moov_size > 0 ? mov->reserved_header_pos : moov_pos, SEEK_SET); avio_seek(pb, mov->reserved_moov_size > 0 ? mov->reserved_header_pos : moov_pos, SEEK_SET);
if (mov->flags & FF_MOV_FLAG_FASTSTART) { if (mov->flags & FF_MOV_FLAG_FASTSTART) {
@ -8647,6 +8657,15 @@ static int mov_write_trailer(AVFormatContext *s)
if ((res = mov_write_moov_tag(pb, mov, s)) < 0) if ((res = mov_write_moov_tag(pb, mov, s)) < 0)
return res; return res;
} }
if (mov->flags & FF_MOV_FLAG_HYBRID_FRAGMENTED) {
// With hybrid fragmentation, only write the mdat size (hiding
// the original moov and all the fragments within the mdat)
// after we've successfully written the complete moov, to avoid
// risk for an unreadable file if writing the final moov fails.
mov_write_mdat_size(s);
}
res = 0; res = 0;
} else { } else {
mov_auto_flush_fragment(s, 1); mov_auto_flush_fragment(s, 1);