Compare commits

...

13 Commits

Author SHA1 Message Date
Hendi b399896046 avformat/dashdec: Fix urls with special characters in manifest
This was especially a problem with ampersands, which occur
frequently as part of query parameters.
2025-11-18 22:10:34 +00:00
Stefan Breunig 4c4ab2ec6f fate/filter-video: add frei0r test where input is realigned
An installation of frei0r-plugins is required to run the tests,
which is usually seperate from the build headers. Some systems
have it packaged (e.g. apt install frei0r-plugins). An upstream
release extracted to FREI0R_PATH also works.

The distort0r filter requires dimensions to be divisible by 8.
2025-11-18 21:26:36 +00:00
Stefan Breunig f8bfc20281 avfilter/vf_frei0r: fix time when input is realigned
av_frame_copy doesn't copy the input's PTS property, which resulted
in the frei0r filter always receiving the same static time.

Example that has a static distortion without patch:

ffmpeg -filter_complex "testsrc2=s=328x240:d=5,frei0r=distort0r" out.mp4
2025-11-18 21:26:36 +00:00
Andreas Rheinhardt 5bf57a925c avutil/x86/asm: Remove wrong comment, rename FF_REG_sp
Before FFmpeg commit 531b0a316b,
FFmpeg used REG_SP as macro for the stack pointer, yet this
clashed with a REG_SP define in Solaris system headers, so it
was changed to REG_sp and a comment was added for this.

Libav fixed it by adding an FF_ prefix to the macros in
1e9c5bf4c1. FFmpeg switched
to using these prefixes in 9eb3da2f99,
using FF_REG_sp instead of Libav's FF_REG_SP. In said commit
the comment was changed to claim that Solaris system headers
define FF_REG_SP, but this is (most likely) wrong.

This commit removes the wrong comment and renames the (actually unused)
macro to FF_REG_SP to make it consistent with FF_REG_BP.

Reviewed-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-18 20:41:13 +01:00
Andreas Rheinhardt 99209c2876 avcodec/x86/mpegvideoenc_template: Reduce number of registers used
qmat and bias always have a constant offset, so one can use one register
to address both of them. This allows to remove the check for HAVE_6REGS
(untested on a system where HAVE_6REGS is false).
Also avoid FF_REG_a while at it.

Reviewed-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-18 20:41:13 +01:00
Andreas Rheinhardt b890cd0f73 avcodec/x86/mpegvideoenc_template: Avoid touching nonvolatile register
xmm7 is nonvolatile on x64 Windows.

Reviewed-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-18 20:41:13 +01:00
Andreas Rheinhardt aeb138679a avcodec/x86/mpegvideoencdsp: Port add_8x8basis_ssse3() to ASM
Both GCC and Clang completely unroll the unlikely loop at -O3,
leading to codesize bloat; their code is also suboptimal, as they
don't make use of pmulhrsw (even with -mssse3). This commit
therefore ports the whole function to external assembly. The new
function occupies 176B here vs 1406B for GCC.

Benchmarks for a testcase with huge qscale (notice that the C version
is unrolled just like the unlikely loop in the SSSE3 version):
add_8x8basis_c:                                         43.4 ( 1.00x)
add_8x8basis_ssse3 (old):                               43.6 ( 1.00x)
add_8x8basis_ssse3 (new):                               11.9 ( 3.63x)

Reviewed-by: Kieran Kunhya <kieran@kunhya.com>
Reviewed-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-18 20:41:12 +01:00
Andreas Rheinhardt 0d3a88e55f tests/checkasm/mpegvideoencdsp: Test denoise_dct
Reviewed-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-18 20:41:12 +01:00
Andreas Rheinhardt 1c00e09427 avcodec/mpegvideo_enc: Port denoise_dct to MpegvideoEncDSPContext
It is very simple to remove the MPVEncContext from it.
Notice that this also fixes a bug in x86/mpegvideoenc.c: It only
used the SSE2 version of denoise_dct when dct_algo was auto or mmx
(and it was therefore unused during FATE).

Reviewed-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-18 20:41:12 +01:00
Andreas Rheinhardt d633fa0433 avcodec/x86/mpegvideoenc: Port denoise_dct_sse2 to external assembly
Reviewed-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-18 20:41:12 +01:00
Andreas Rheinhardt 2cfef7031c avcodec/x86/mpegvideoenc: Reduce number of registers used
Avoids a push+pop on x64 Windows.

Reviewed-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-18 20:41:12 +01:00
Andreas Rheinhardt 503afa40f7 avcodec/x86/mpegvideoenc: Remove check for MMX
Reviewed-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-11-18 20:41:12 +01:00
Marvin Scholz 00ef656a85 .forgejo/CODEOWNERS: add myself to VideoToolbox and Icecast 2025-11-18 15:17:05 +01:00
20 changed files with 246 additions and 214 deletions

View File

@ -62,6 +62,7 @@ libavcodec/smpte_436m.* @programmerjake
libavcodec/svq1.* @pross
libavcodec/svq3.* @pross
libavcodec/.*vc2.* @lynne
libavcodec/videotoolbox.* @ePirat
libavcodec/vp3.* @pross
libavcodec/vp4.* @pross
libavcodec/vp5.* @pross
@ -134,6 +135,7 @@ libavformat/.*exif.* @Traneptora
libavformat/filmstrip.* @pross
libavformat/frm.* @pross
libavformat/iamf.* @jamrial
libavformat/icecast.c @ePirat
libavformat/ico.* @pross
libavformat/iff.* @pross
libavformat/.*jpegxl.* @Traneptora
@ -165,6 +167,7 @@ libavutil/film_grain.* @haasn
libavutil/dovi_meta.* @haasn
libavutil/hwcontext_oh.* @quink
libavutil/hwcontext_mediacodec.* @quink
libavutil/hwcontext_videotoolbox.* @ePirat
libavutil/iamf.* @jamrial
libavutil/integer.* @michaelni
libavutil/lfg.* @michaelni

View File

@ -54,7 +54,6 @@ OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_init_mips.o
OBJS-$(CONFIG_PIXBLOCKDSP) += mips/pixblockdsp_init_mips.o
OBJS-$(CONFIG_IDCTDSP) += mips/idctdsp_init_mips.o
OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_init_mips.o
OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoenc_init_mips.o
OBJS-$(CONFIG_MPEGVIDEOENCDSP) += mips/mpegvideoencdsp_init_mips.o
OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_init_mips.o
OBJS-$(CONFIG_MPEG4_DECODER) += mips/xvididct_init_mips.o
@ -100,7 +99,7 @@ MMI-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o
MMI-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
MMI-OBJS-$(CONFIG_H264PRED) += mips/h264pred_mmi.o
MMI-OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_mmi.o
MMI-OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoenc_mmi.o
MMI-OBJS-$(CONFIG_MPEGVIDEOENCDSP) += mips/mpegvideoenc_mmi.o
MMI-OBJS-$(CONFIG_IDCTDSP) += mips/idctdsp_mmi.o \
mips/simple_idct_mmi.o
MMI-OBJS-$(CONFIG_MPEG4_DECODER) += mips/xvid_idct_mmi.o

View File

@ -22,7 +22,6 @@
#define AVCODEC_MIPS_MPEGVIDEO_MIPS_H
#include "libavcodec/mpegvideo.h"
#include "libavcodec/mpegvideoenc.h"
void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
int n, int qscale);
@ -34,6 +33,6 @@ void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block,
int n, int qscale);
void ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext *s, int16_t *block,
int n, int qscale);
void ff_denoise_dct_mmi(MPVEncContext *s, int16_t *block);
void ff_denoise_dct_mmi(int16_t block[64], int sum[64], const uint16_t offset[64]);
#endif /* AVCODEC_MIPS_MPEGVIDEO_MIPS_H */

View File

@ -1,33 +0,0 @@
/*
* Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/mips/cpu.h"
#include "libavcodec/mpegvideoenc.h"
#include "mpegvideo_mips.h"
av_cold void ff_mpvenc_dct_init_mips(MPVEncContext *s)
{
int cpu_flags = av_get_cpu_flags();
if (have_mmi(cpu_flags)) {
s->denoise_dct = ff_denoise_dct_mmi;
}
}

View File

@ -23,12 +23,17 @@
#include "libavcodec/bit_depth_template.c"
#include "libavcodec/mpegvideoencdsp.h"
#include "h263dsp_mips.h"
#include "mpegvideo_mips.h"
av_cold void ff_mpegvideoencdsp_init_mips(MpegvideoEncDSPContext *c,
AVCodecContext *avctx)
{
int cpu_flags = av_get_cpu_flags();
if (have_mmi(cpu_flags)) {
c->denoise_dct = ff_denoise_dct_mmi;
}
if (have_msa(cpu_flags)) {
#if BIT_DEPTH == 8
c->pix_sum = ff_pix_sum_msa;

View File

@ -25,17 +25,12 @@
#include "mpegvideo_mips.h"
#include "libavutil/mips/mmiutils.h"
void ff_denoise_dct_mmi(MPVEncContext *s, int16_t *block)
void ff_denoise_dct_mmi(int16_t block[64], int sum[64], const uint16_t offset[64])
{
const int intra = s->c.mb_intra;
int *sum = s->dct_error_sum[intra];
uint16_t *offset = s->dct_offset[intra];
double ftmp[8];
mips_reg addr[1];
DECLARE_VAR_ALL64;
s->dct_count[intra]++;
__asm__ volatile(
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"1: \n\t"

View File

@ -86,7 +86,6 @@
static int encode_picture(MPVMainEncContext *const s, const AVPacket *pkt);
static int dct_quantize_refine(MPVEncContext *const s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
static int sse_mb(MPVEncContext *const s);
static void denoise_dct_c(MPVEncContext *const s, int16_t *block);
static int dct_quantize_c(MPVEncContext *const s,
int16_t *block, int n,
int qscale, int *overflow);
@ -300,11 +299,8 @@ static av_cold void mpv_encode_defaults(MPVMainEncContext *const m)
av_cold void ff_dct_encode_init(MPVEncContext *const s)
{
s->dct_quantize = dct_quantize_c;
s->denoise_dct = denoise_dct_c;
#if ARCH_MIPS
ff_mpvenc_dct_init_mips(s);
#elif ARCH_X86
#if ARCH_X86
ff_dct_encode_init_x86(s);
#endif
@ -3955,29 +3951,14 @@ static int encode_picture(MPVMainEncContext *const m, const AVPacket *pkt)
return 0;
}
static void denoise_dct_c(MPVEncContext *const s, int16_t *block)
static inline void denoise_dct(MPVEncContext *const s, int16_t block[])
{
if (!s->dct_error_sum)
return;
const int intra = s->c.mb_intra;
int i;
s->dct_count[intra]++;
for(i=0; i<64; i++){
int level= block[i];
if(level){
if(level>0){
s->dct_error_sum[intra][i] += level;
level -= s->dct_offset[intra][i];
if(level<0) level=0;
}else{
s->dct_error_sum[intra][i] -= level;
level += s->dct_offset[intra][i];
if(level>0) level=0;
}
block[i]= level;
}
}
s->mpvencdsp.denoise_dct(block, s->dct_error_sum[intra], s->dct_offset[intra]);
}
static int dct_quantize_trellis_c(MPVEncContext *const s,
@ -4009,8 +3990,8 @@ static int dct_quantize_trellis_c(MPVEncContext *const s,
s->fdsp.fdct(block);
if(s->dct_error_sum)
s->denoise_dct(s, block);
denoise_dct(s, block);
qmul= qscale*16;
qadd= ((qscale-1)|1)*8;
@ -4678,8 +4659,7 @@ static int dct_quantize_c(MPVEncContext *const s,
s->fdsp.fdct(block);
if(s->dct_error_sum)
s->denoise_dct(s, block);
denoise_dct(s, block);
if (s->c.mb_intra) {
scantable = s->c.intra_scantable.scantable;

View File

@ -123,7 +123,6 @@ typedef struct MPVEncContext {
uint16_t (*q_inter_matrix16)[2][64];
/* noise reduction */
void (*denoise_dct)(struct MPVEncContext *s, int16_t *block);
int (*dct_error_sum)[64];
int dct_count[2];
uint16_t (*dct_offset)[64];
@ -397,7 +396,6 @@ int ff_mpv_reallocate_putbitbuffer(MPVEncContext *s, size_t threshold, size_t si
void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix);
void ff_dct_encode_init(MPVEncContext *s);
void ff_mpvenc_dct_init_mips(MPVEncContext *s);
void ff_dct_encode_init_x86(MPVEncContext *s);
void ff_convert_matrix(MPVEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[2][64],

View File

@ -28,6 +28,29 @@
#include "mathops.h"
#include "mpegvideoencdsp.h"
static void denoise_dct_c(int16_t block[64], int dct_error_sum[64],
const uint16_t dct_offset[64])
{
for (int i = 0; i < 64; ++i) {
int level = block[i];
if (level) {
if (level > 0) {
dct_error_sum[i] += level;
level -= dct_offset[i];
if (level < 0)
level = 0;
} else {
dct_error_sum[i] -= level;
level += dct_offset[i];
if (level > 0)
level = 0;
}
block[i] = level;
}
}
}
static int try_8x8basis_c(const int16_t rem[64], const int16_t weight[64],
const int16_t basis[64], int scale)
{
@ -253,6 +276,8 @@ static void shrink88(uint8_t *dst, ptrdiff_t dst_wrap,
av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
AVCodecContext *avctx)
{
c->denoise_dct = denoise_dct_c;
c->try_8x8basis = try_8x8basis_c;
c->add_8x8basis = add_8x8basis_c;

View File

@ -30,6 +30,9 @@
#define EDGE_BOTTOM 2
typedef struct MpegvideoEncDSPContext {
void (*denoise_dct)(int16_t block[64], int dct_error_sum[64],
const uint16_t dct_offset[64]);
int (*try_8x8basis)(const int16_t rem[64], const int16_t weight[64],
const int16_t basis[64], int scale);
void (*add_8x8basis)(int16_t rem[64], const int16_t basis[64], int scale);

View File

@ -39,8 +39,6 @@ DECLARE_ALIGNED(16, static const uint16_t, inv_zigzag_direct16)[64] = {
36, 37, 49, 50, 58, 59, 63, 64,
};
#if HAVE_6REGS
#if HAVE_SSE2_INLINE
#define COMPILE_TEMPLATE_SSSE3 0
#define RENAME(a) a ## _sse2
@ -55,85 +53,17 @@ DECLARE_ALIGNED(16, static const uint16_t, inv_zigzag_direct16)[64] = {
#include "mpegvideoenc_template.c"
#endif /* HAVE_SSSE3_INLINE */
#endif /* HAVE_6REGS */
#if HAVE_INLINE_ASM
#if HAVE_SSE2_INLINE
static void denoise_dct_sse2(MPVEncContext *const s, int16_t block[])
{
const int intra = s->c.mb_intra;
int *sum= s->dct_error_sum[intra];
uint16_t *offset= s->dct_offset[intra];
s->dct_count[intra]++;
__asm__ volatile(
"pxor %%xmm7, %%xmm7 \n\t"
"1: \n\t"
"pxor %%xmm0, %%xmm0 \n\t"
"pxor %%xmm1, %%xmm1 \n\t"
"movdqa (%0), %%xmm2 \n\t"
"movdqa 16(%0), %%xmm3 \n\t"
"pcmpgtw %%xmm2, %%xmm0 \n\t"
"pcmpgtw %%xmm3, %%xmm1 \n\t"
"pxor %%xmm0, %%xmm2 \n\t"
"pxor %%xmm1, %%xmm3 \n\t"
"psubw %%xmm0, %%xmm2 \n\t"
"psubw %%xmm1, %%xmm3 \n\t"
"movdqa %%xmm2, %%xmm4 \n\t"
"movdqa %%xmm3, %%xmm5 \n\t"
"psubusw (%2), %%xmm2 \n\t"
"psubusw 16(%2), %%xmm3 \n\t"
"pxor %%xmm0, %%xmm2 \n\t"
"pxor %%xmm1, %%xmm3 \n\t"
"psubw %%xmm0, %%xmm2 \n\t"
"psubw %%xmm1, %%xmm3 \n\t"
"movdqa %%xmm2, (%0) \n\t"
"movdqa %%xmm3, 16(%0) \n\t"
"movdqa %%xmm4, %%xmm6 \n\t"
"movdqa %%xmm5, %%xmm0 \n\t"
"punpcklwd %%xmm7, %%xmm4 \n\t"
"punpckhwd %%xmm7, %%xmm6 \n\t"
"punpcklwd %%xmm7, %%xmm5 \n\t"
"punpckhwd %%xmm7, %%xmm0 \n\t"
"paddd (%1), %%xmm4 \n\t"
"paddd 16(%1), %%xmm6 \n\t"
"paddd 32(%1), %%xmm5 \n\t"
"paddd 48(%1), %%xmm0 \n\t"
"movdqa %%xmm4, (%1) \n\t"
"movdqa %%xmm6, 16(%1) \n\t"
"movdqa %%xmm5, 32(%1) \n\t"
"movdqa %%xmm0, 48(%1) \n\t"
"add $32, %0 \n\t"
"add $64, %1 \n\t"
"add $32, %2 \n\t"
"cmp %3, %0 \n\t"
" jb 1b \n\t"
: "+r" (block), "+r" (sum), "+r" (offset)
: "r"(block+64)
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7")
);
}
#endif /* HAVE_SSE2_INLINE */
#endif /* HAVE_INLINE_ASM */
av_cold void ff_dct_encode_init_x86(MPVEncContext *const s)
{
const int dct_algo = s->c.avctx->dct_algo;
if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) {
#if HAVE_MMX_INLINE
int cpu_flags = av_get_cpu_flags();
#if HAVE_SSE2_INLINE
int cpu_flags = av_get_cpu_flags();
if (INLINE_SSE2(cpu_flags)) {
#if HAVE_6REGS
s->dct_quantize = dct_quantize_sse2;
#endif
s->denoise_dct = denoise_dct_sse2;
}
#endif
#if HAVE_6REGS && HAVE_SSSE3_INLINE
#if HAVE_SSSE3_INLINE
if (INLINE_SSSE3(cpu_flags))
s->dct_quantize = dct_quantize_ssse3;
#endif

View File

@ -70,24 +70,25 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
{
x86_reg last_non_zero_p1;
int level=0, q; //=0 is because gcc says uninitialized ...
const uint16_t *qmat, *bias;
const uint16_t *qmat;
LOCAL_ALIGNED_16(int16_t, temp_block, [64]);
//s->fdct (block);
ff_fdct_sse2(block); // cannot be anything else ...
if(s->dct_error_sum)
s->denoise_dct(s, block);
if (s->dct_error_sum) {
const int intra = s->c.mb_intra;
s->dct_count[intra]++;
s->mpvencdsp.denoise_dct(block, s->dct_error_sum[intra], s->dct_offset[intra]);
}
if (s->c.mb_intra) {
int dummy;
if (n < 4){
q = s->c.y_dc_scale;
bias = s->q_intra_matrix16[qscale][1];
qmat = s->q_intra_matrix16[qscale][0];
}else{
q = s->c.c_dc_scale;
bias = s->q_chroma_intra_matrix16[qscale][1];
qmat = s->q_chroma_intra_matrix16[qscale][0];
}
/* note: block[0] is assumed to be positive */
@ -106,7 +107,6 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
last_non_zero_p1 = 1;
} else {
last_non_zero_p1 = 0;
bias = s->q_inter_matrix16[qscale][1];
qmat = s->q_inter_matrix16[qscale][0];
}
@ -114,11 +114,11 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
__asm__ volatile(
"movd %%"FF_REG_a", %%xmm3 \n\t" // last_non_zero_p1
SPREADW("%%xmm3")
"pxor %%xmm7, %%xmm7 \n\t" // 0
"pxor %%xmm2, %%xmm2 \n\t" // 0
"pxor %%xmm4, %%xmm4 \n\t" // 0
"movdqa (%2), %%xmm5 \n\t" // qmat[0]
"pxor %%xmm6, %%xmm6 \n\t"
"psubw (%3), %%xmm6 \n\t" // -bias[0]
"psubw 128(%2), %%xmm6 \n\t" // -bias[0]
"mov $-128, %%"FF_REG_a" \n\t"
".p2align 4 \n\t"
"1: \n\t"
@ -128,10 +128,10 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
"pmulhw %%xmm5, %%xmm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
"por %%xmm0, %%xmm4 \n\t"
RESTORE_SIGN("%%xmm1", "%%xmm0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
"movdqa %%xmm0, (%5, %%"FF_REG_a") \n\t"
"pcmpeqw %%xmm7, %%xmm0 \n\t" // out==0 ? 0xFF : 0x00
"movdqa (%4, %%"FF_REG_a"), %%xmm1 \n\t"
"movdqa %%xmm7, (%1, %%"FF_REG_a") \n\t" // 0
"movdqa %%xmm0, (%4, %0) \n\t"
"pcmpeqw %%xmm2, %%xmm0 \n\t" // out==0 ? 0xFF : 0x00
"movdqa (%3, %0), %%xmm1 \n\t"
"movdqa %%xmm2, (%1, %%"FF_REG_a") \n\t" // 0
"pandn %%xmm1, %%xmm0 \n\t"
"pmaxsw %%xmm0, %%xmm3 \n\t"
"add $16, %%"FF_REG_a" \n\t"
@ -140,32 +140,32 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
"movd %%xmm3, %%"FF_REG_a" \n\t"
"movzbl %%al, %%eax \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1)
: "r" (block+64), "r" (qmat), "r" (bias),
: "r" (block+64), "r" (qmat),
"r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64)
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7")
"%xmm4", "%xmm5", "%xmm6")
);
}else{ // FMT_H263
__asm__ volatile(
"movd %%"FF_REG_a", %%xmm3 \n\t" // last_non_zero_p1
SPREADW("%%xmm3")
"pxor %%xmm7, %%xmm7 \n\t" // 0
"pxor %%xmm2, %%xmm2 \n\t" // 0
"pxor %%xmm4, %%xmm4 \n\t" // 0
"mov $-128, %%"FF_REG_a" \n\t"
".p2align 4 \n\t"
"1: \n\t"
"movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t" // block[i]
SAVE_SIGN("%%xmm1", "%%xmm0") // ABS(block[i])
"movdqa (%3, %%"FF_REG_a"), %%xmm6 \n\t" // bias[0]
"movdqa 128(%2, %0), %%xmm6 \n\t" // bias[i]
"paddusw %%xmm6, %%xmm0 \n\t" // ABS(block[i]) + bias[0]
"movdqa (%2, %%"FF_REG_a"), %%xmm5 \n\t" // qmat[i]
"pmulhw %%xmm5, %%xmm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
"por %%xmm0, %%xmm4 \n\t"
RESTORE_SIGN("%%xmm1", "%%xmm0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
"movdqa %%xmm0, (%5, %%"FF_REG_a") \n\t"
"pcmpeqw %%xmm7, %%xmm0 \n\t" // out==0 ? 0xFF : 0x00
"movdqa (%4, %%"FF_REG_a"), %%xmm1 \n\t"
"movdqa %%xmm7, (%1, %%"FF_REG_a") \n\t" // 0
"movdqa %%xmm0, (%4, %0) \n\t"
"pcmpeqw %%xmm2, %%xmm0 \n\t" // out==0 ? 0xFF : 0x00
"movdqa (%3, %0), %%xmm1 \n\t"
"movdqa %%xmm2, (%1, %%"FF_REG_a") \n\t" // 0
"pandn %%xmm1, %%xmm0 \n\t"
"pmaxsw %%xmm0, %%xmm3 \n\t"
"add $16, %%"FF_REG_a" \n\t"
@ -174,10 +174,10 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
"movd %%xmm3, %%"FF_REG_a" \n\t"
"movzbl %%al, %%eax \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1)
: "r" (block+64), "r" (qmat+64), "r" (bias+64),
: "r" (block+64), "r" (qmat+64),
"r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64)
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7")
"%xmm4", "%xmm5", "%xmm6")
);
}
__asm__ volatile(

View File

@ -24,6 +24,104 @@
%include "libavutil/x86/x86util.asm"
SECTION .text
; void ff_add_8x8basis_ssse3(int16_t rem[64], const int16_t basis[64], int scale)
INIT_XMM ssse3
cglobal add_8x8basis, 3, 3+ARCH_X86_64, 4, rem, basis, scale
movd m0, scaled
add scaled, 1024
add basisq, 128
add remq, 128
%if ARCH_X86_64
%define OFF r3q
mov r3q, -128
cmp scaled, 2047
%else
%define OFF r2q
cmp scaled, 2047
mov r2q, -128
%endif
ja .huge_scale
punpcklwd m0, m0
pshufd m0, m0, 0x0
psllw m0, 5
.loop1:
mova m1, [basisq+OFF]
mova m2, [basisq+OFF+16]
pmulhrsw m1, m0
pmulhrsw m2, m0
paddw m1, [remq+OFF]
paddw m2, [remq+OFF+16]
mova [remq+OFF], m1
mova [remq+OFF+16], m2
add OFF, 32
js .loop1
RET
.huge_scale:
pslld m0, 6
punpcklwd m0, m0
pshufd m1, m0, 0x55
psrlw m0, 1
pshufd m0, m0, 0x0
.loop2:
mova m2, [basisq+OFF]
pmulhrsw m3, m2, m0
pmullw m2, m1
paddw m2, m3
paddw m2, [remq+OFF]
mova [remq+OFF], m2
add OFF, 16
js .loop2
RET
INIT_XMM sse2
cglobal mpv_denoise_dct, 3, 4, 7, block, sum, offset
pxor m6, m6
lea r3, [sumq+256]
.loop:
mova m2, [blockq]
mova m3, [blockq+16]
mova m0, m6
mova m1, m6
pcmpgtw m0, m2
pcmpgtw m1, m3
pxor m2, m0
pxor m3, m1
psubw m2, m0
psubw m3, m1
psubusw m4, m2, [offsetq]
psubusw m5, m3, [offsetq+16]
pxor m4, m0
pxor m5, m1
add offsetq, 32
psubw m4, m0
psubw m5, m1
mova [blockq], m4
mova [blockq+16], m5
mova m0, m2
mova m1, m3
add blockq, 32
punpcklwd m0, m6
punpckhwd m2, m6
punpcklwd m1, m6
punpckhwd m3, m6
paddd m0, [sumq]
paddd m2, [sumq+16]
paddd m1, [sumq+32]
paddd m3, [sumq+48]
mova [sumq], m0
mova [sumq+16], m2
mova [sumq+32], m1
mova [sumq+48], m3
add sumq, 64
cmp sumq, r3
jb .loop
RET
; int ff_pix_sum16(const uint8_t *pix, ptrdiff_t line_size)
; %1 = number of loops
; %2 = number of GPRs used

View File

@ -27,9 +27,12 @@
#include "libavcodec/avcodec.h"
#include "libavcodec/mpegvideoencdsp.h"
void ff_mpv_denoise_dct_sse2(int16_t block[64], int dct_error_sum[64],
const uint16_t dct_offset[64]);
int ff_pix_sum16_sse2(const uint8_t *pix, ptrdiff_t line_size);
int ff_pix_sum16_xop(const uint8_t *pix, ptrdiff_t line_size);
int ff_pix_norm1_sse2(const uint8_t *pix, ptrdiff_t line_size);
void ff_add_8x8basis_ssse3(int16_t rem[64], const int16_t basis[64], int scale);
#if HAVE_INLINE_ASM
#if HAVE_SSSE3_INLINE
@ -81,41 +84,6 @@ static int try_8x8basis_ssse3(const int16_t rem[64], const int16_t weight[64], c
);
return i;
}
static void add_8x8basis_ssse3(int16_t rem[64], const int16_t basis[64], int scale)
{
x86_reg i=0;
if (FFABS(scale) < 1024) {
scale *= 1 << (16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT);
__asm__ volatile(
"movd %3, %%xmm2 \n\t"
"punpcklwd %%xmm2, %%xmm2 \n\t"
"pshufd $0, %%xmm2, %%xmm2 \n\t"
".p2align 4 \n\t"
"1: \n\t"
"movdqa (%1, %0), %%xmm0 \n\t"
"movdqa 16(%1, %0), %%xmm1 \n\t"
"pmulhrsw %%xmm2, %%xmm0 \n\t"
"pmulhrsw %%xmm2, %%xmm1 \n\t"
"paddw (%2, %0), %%xmm0 \n\t"
"paddw 16(%2, %0), %%xmm1 \n\t"
"movdqa %%xmm0, (%2, %0) \n\t"
"movdqa %%xmm1, 16(%2, %0) \n\t"
"add $32, %0 \n\t"
"cmp $128, %0 \n\t" // FIXME optimize & bench
" jb 1b \n\t"
: "+r" (i)
: "r"(basis), "r"(rem), "g"(scale)
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2")
);
} else {
for (i=0; i<8*8; i++) {
rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
}
}
}
#endif /* HAVE_SSSE3_INLINE */
/* Draw the edges of width 'w' of an image of size width, height */
@ -209,6 +177,7 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_SSE2(cpu_flags)) {
c->denoise_dct = ff_mpv_denoise_dct_sse2;
c->pix_sum = ff_pix_sum16_sse2;
c->pix_norm1 = ff_pix_norm1_sse2;
}
@ -224,15 +193,17 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
c->draw_edges = draw_edges_mmx;
}
}
#endif /* HAVE_INLINE_ASM */
if (X86_SSSE3(cpu_flags)) {
#if HAVE_SSSE3_INLINE
if (INLINE_SSSE3(cpu_flags)) {
if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
c->try_8x8basis = try_8x8basis_ssse3;
}
c->add_8x8basis = add_8x8basis_ssse3;
}
#endif /* HAVE_SSSE3_INLINE */
#if HAVE_SSSE3_EXTERNAL
c->add_8x8basis = ff_add_8x8basis_ssse3;
#endif
}
#endif /* HAVE_INLINE_ASM */
}

View File

@ -375,6 +375,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
if (!in2)
goto fail;
av_frame_copy(in2, in);
if (av_frame_copy_props(in2, in) < 0) {
av_frame_free(&in2);
goto fail;
}
av_frame_free(&in);
in = in2;
}

View File

@ -780,7 +780,13 @@ static int resolve_content_path(AVFormatContext *s, const char *url, int *max_ur
}
root_url = (av_strcasecmp(baseurl, "")) ? baseurl : path;
if (node) {
xmlNodeSetContent(node, root_url);
xmlChar *escaped = xmlEncodeSpecialChars(NULL, root_url);
if (!escaped) {
updated = AVERROR(ENOMEM);
goto end;
}
xmlNodeSetContent(node, escaped);
xmlFree(escaped);
updated = 1;
}
@ -814,9 +820,15 @@ static int resolve_content_path(AVFormatContext *s, const char *url, int *max_ur
memset(p + 1, 0, strlen(p));
}
av_strlcat(tmp_str, text + start, tmp_max_url_size);
xmlNodeSetContent(baseurl_nodes[i], tmp_str);
updated = 1;
xmlFree(text);
xmlChar* escaped = xmlEncodeSpecialChars(NULL, tmp_str);
if (!escaped) {
updated = AVERROR(ENOMEM);
goto end;
}
xmlNodeSetContent(baseurl_nodes[i], escaped);
updated = 1;
xmlFree(escaped);
}
}

View File

@ -38,8 +38,7 @@ typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg;
# define FF_PTR_SIZE "8"
typedef int64_t x86_reg;
/* FF_REG_SP is defined in Solaris sys headers, so use FF_REG_sp */
# define FF_REG_sp "rsp"
# define FF_REG_SP "rsp"
# define FF_REG_BP "rbp"
# define FF_REGBP rbp
# define FF_REGa rax
@ -60,7 +59,7 @@ typedef int64_t x86_reg;
# define FF_PTR_SIZE "4"
typedef int32_t x86_reg;
# define FF_REG_sp "esp"
# define FF_REG_SP "esp"
# define FF_REG_BP "ebp"
# define FF_REGBP ebp
# define FF_REGa eax

View File

@ -37,6 +37,37 @@
buf[j] = rnd() % (max - min + 1) + min; \
} while (0)
static void check_denoise_dct(MpegvideoEncDSPContext *c)
{
declare_func(void, int16_t block[64], int dct_error_sum[64],
const uint16_t dct_offset[64]);
if (check_func(c->denoise_dct, "denoise_dct")) {
DECLARE_ALIGNED(16, int16_t, block_ref)[64];
DECLARE_ALIGNED(16, int16_t, block_new)[64];
DECLARE_ALIGNED(16, int, dct_error_sum_ref)[64];
DECLARE_ALIGNED(16, int, dct_error_sum_new)[64];
DECLARE_ALIGNED(16, uint16_t, dct_offset)[64];
for (size_t i = 0; i < FF_ARRAY_ELEMS(block_ref); ++i) {
unsigned random = rnd();
block_ref[i] = random & (1 << 16) ? random : 0;
}
randomize_buffers(dct_offset, sizeof(dct_offset));
randomize_buffer_clipped(dct_error_sum_ref, 0, (1 << 24) - 1);
memcpy(block_new, block_ref, sizeof(block_new));
memcpy(dct_error_sum_new, dct_error_sum_ref, sizeof(dct_error_sum_ref));
call_ref(block_ref, dct_error_sum_ref, dct_offset);
call_new(block_new, dct_error_sum_new, dct_offset);
if (memcmp(block_ref, block_new, sizeof(block_ref)) ||
memcmp(dct_error_sum_new, dct_error_sum_ref, sizeof(dct_error_sum_new)))
fail();
bench_new(block_new, dct_error_sum_new, dct_offset);
}
}
static void check_add_8x8basis(MpegvideoEncDSPContext *c)
{
declare_func(void, int16_t rem[64], const int16_t basis[64], int scale);
@ -166,6 +197,8 @@ void checkasm_check_mpegvideoencdsp(void)
ff_mpegvideoencdsp_init(&c, &avctx);
check_denoise_dct(&c);
report("denoise_dct");
check_pix_sum(&c);
report("pix_sum");
check_pix_norm1(&c);

View File

@ -717,8 +717,9 @@ $(FATE_FILTER_VSYNTH-yes): SRC = $(TARGET_PATH)/tests/vsynth1/%02d.pgm
FATE_FFMPEG += $(FATE_FILTER_VSYNTH-yes)
FATE_FILTER_FREI0R-$(call FILTERFRAMECRC, TESTSRC2, FREI0R_FILTER) = fate-filter-frei0r-filter
FATE_FILTER_FREI0R-$(call FILTERFRAMECRC, TESTSRC2, FREI0R_FILTER) = fate-filter-frei0r-filter fate-filter-frei0r-filter-unaligned
fate-filter-frei0r-filter: CMD = framecrc -lavfi "testsrc2=r=1:d=5,frei0r=enable=gte(n\,3):filter_name=distort0r"
fate-filter-frei0r-filter-unaligned: CMD = framecrc -lavfi "testsrc2=s=328x240:r=1:d=5,frei0r=filter_name=distort0r"
FATE_FFMPEG += $(FATE_FILTER_FREI0R-yes)
#

View File

@ -0,0 +1,10 @@
#tb 0: 1/1
#media_type 0: video
#codec_id 0: rawvideo
#dimensions 0: 328x240
#sar 0: 1/1
0, 0, 0, 1, 314880, 0x7b9cad8f
0, 1, 1, 1, 314880, 0x0184436f
0, 2, 2, 1, 314880, 0x7e3f2776
0, 3, 3, 1, 314880, 0x0dc5e915
0, 4, 4, 1, 314880, 0xcf9c76ef