mirror of https://github.com/FFmpeg/FFmpeg.git
Compare commits
13 Commits
1eb2cbd865
...
b399896046
| Author | SHA1 | Date |
|---|---|---|
|
|
b399896046 | |
|
|
4c4ab2ec6f | |
|
|
f8bfc20281 | |
|
|
5bf57a925c | |
|
|
99209c2876 | |
|
|
b890cd0f73 | |
|
|
aeb138679a | |
|
|
0d3a88e55f | |
|
|
1c00e09427 | |
|
|
d633fa0433 | |
|
|
2cfef7031c | |
|
|
503afa40f7 | |
|
|
00ef656a85 |
|
|
@ -62,6 +62,7 @@ libavcodec/smpte_436m.* @programmerjake
|
||||||
libavcodec/svq1.* @pross
|
libavcodec/svq1.* @pross
|
||||||
libavcodec/svq3.* @pross
|
libavcodec/svq3.* @pross
|
||||||
libavcodec/.*vc2.* @lynne
|
libavcodec/.*vc2.* @lynne
|
||||||
|
libavcodec/videotoolbox.* @ePirat
|
||||||
libavcodec/vp3.* @pross
|
libavcodec/vp3.* @pross
|
||||||
libavcodec/vp4.* @pross
|
libavcodec/vp4.* @pross
|
||||||
libavcodec/vp5.* @pross
|
libavcodec/vp5.* @pross
|
||||||
|
|
@ -134,6 +135,7 @@ libavformat/.*exif.* @Traneptora
|
||||||
libavformat/filmstrip.* @pross
|
libavformat/filmstrip.* @pross
|
||||||
libavformat/frm.* @pross
|
libavformat/frm.* @pross
|
||||||
libavformat/iamf.* @jamrial
|
libavformat/iamf.* @jamrial
|
||||||
|
libavformat/icecast.c @ePirat
|
||||||
libavformat/ico.* @pross
|
libavformat/ico.* @pross
|
||||||
libavformat/iff.* @pross
|
libavformat/iff.* @pross
|
||||||
libavformat/.*jpegxl.* @Traneptora
|
libavformat/.*jpegxl.* @Traneptora
|
||||||
|
|
@ -165,6 +167,7 @@ libavutil/film_grain.* @haasn
|
||||||
libavutil/dovi_meta.* @haasn
|
libavutil/dovi_meta.* @haasn
|
||||||
libavutil/hwcontext_oh.* @quink
|
libavutil/hwcontext_oh.* @quink
|
||||||
libavutil/hwcontext_mediacodec.* @quink
|
libavutil/hwcontext_mediacodec.* @quink
|
||||||
|
libavutil/hwcontext_videotoolbox.* @ePirat
|
||||||
libavutil/iamf.* @jamrial
|
libavutil/iamf.* @jamrial
|
||||||
libavutil/integer.* @michaelni
|
libavutil/integer.* @michaelni
|
||||||
libavutil/lfg.* @michaelni
|
libavutil/lfg.* @michaelni
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,6 @@ OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_init_mips.o
|
||||||
OBJS-$(CONFIG_PIXBLOCKDSP) += mips/pixblockdsp_init_mips.o
|
OBJS-$(CONFIG_PIXBLOCKDSP) += mips/pixblockdsp_init_mips.o
|
||||||
OBJS-$(CONFIG_IDCTDSP) += mips/idctdsp_init_mips.o
|
OBJS-$(CONFIG_IDCTDSP) += mips/idctdsp_init_mips.o
|
||||||
OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_init_mips.o
|
OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_init_mips.o
|
||||||
OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoenc_init_mips.o
|
|
||||||
OBJS-$(CONFIG_MPEGVIDEOENCDSP) += mips/mpegvideoencdsp_init_mips.o
|
OBJS-$(CONFIG_MPEGVIDEOENCDSP) += mips/mpegvideoencdsp_init_mips.o
|
||||||
OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_init_mips.o
|
OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_init_mips.o
|
||||||
OBJS-$(CONFIG_MPEG4_DECODER) += mips/xvididct_init_mips.o
|
OBJS-$(CONFIG_MPEG4_DECODER) += mips/xvididct_init_mips.o
|
||||||
|
|
@ -100,7 +99,7 @@ MMI-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o
|
||||||
MMI-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
|
MMI-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
|
||||||
MMI-OBJS-$(CONFIG_H264PRED) += mips/h264pred_mmi.o
|
MMI-OBJS-$(CONFIG_H264PRED) += mips/h264pred_mmi.o
|
||||||
MMI-OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_mmi.o
|
MMI-OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_mmi.o
|
||||||
MMI-OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoenc_mmi.o
|
MMI-OBJS-$(CONFIG_MPEGVIDEOENCDSP) += mips/mpegvideoenc_mmi.o
|
||||||
MMI-OBJS-$(CONFIG_IDCTDSP) += mips/idctdsp_mmi.o \
|
MMI-OBJS-$(CONFIG_IDCTDSP) += mips/idctdsp_mmi.o \
|
||||||
mips/simple_idct_mmi.o
|
mips/simple_idct_mmi.o
|
||||||
MMI-OBJS-$(CONFIG_MPEG4_DECODER) += mips/xvid_idct_mmi.o
|
MMI-OBJS-$(CONFIG_MPEG4_DECODER) += mips/xvid_idct_mmi.o
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,6 @@
|
||||||
#define AVCODEC_MIPS_MPEGVIDEO_MIPS_H
|
#define AVCODEC_MIPS_MPEGVIDEO_MIPS_H
|
||||||
|
|
||||||
#include "libavcodec/mpegvideo.h"
|
#include "libavcodec/mpegvideo.h"
|
||||||
#include "libavcodec/mpegvideoenc.h"
|
|
||||||
|
|
||||||
void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
|
void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
|
||||||
int n, int qscale);
|
int n, int qscale);
|
||||||
|
|
@ -34,6 +33,6 @@ void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block,
|
||||||
int n, int qscale);
|
int n, int qscale);
|
||||||
void ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext *s, int16_t *block,
|
void ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext *s, int16_t *block,
|
||||||
int n, int qscale);
|
int n, int qscale);
|
||||||
void ff_denoise_dct_mmi(MPVEncContext *s, int16_t *block);
|
void ff_denoise_dct_mmi(int16_t block[64], int sum[64], const uint16_t offset[64]);
|
||||||
|
|
||||||
#endif /* AVCODEC_MIPS_MPEGVIDEO_MIPS_H */
|
#endif /* AVCODEC_MIPS_MPEGVIDEO_MIPS_H */
|
||||||
|
|
|
||||||
|
|
@ -1,33 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
|
|
||||||
*
|
|
||||||
* This file is part of FFmpeg.
|
|
||||||
*
|
|
||||||
* FFmpeg is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU Lesser General Public
|
|
||||||
* License as published by the Free Software Foundation; either
|
|
||||||
* version 2.1 of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* FFmpeg is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
* Lesser General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU Lesser General Public
|
|
||||||
* License along with FFmpeg; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "libavutil/attributes.h"
|
|
||||||
#include "libavutil/mips/cpu.h"
|
|
||||||
#include "libavcodec/mpegvideoenc.h"
|
|
||||||
#include "mpegvideo_mips.h"
|
|
||||||
|
|
||||||
av_cold void ff_mpvenc_dct_init_mips(MPVEncContext *s)
|
|
||||||
{
|
|
||||||
int cpu_flags = av_get_cpu_flags();
|
|
||||||
|
|
||||||
if (have_mmi(cpu_flags)) {
|
|
||||||
s->denoise_dct = ff_denoise_dct_mmi;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -23,12 +23,17 @@
|
||||||
#include "libavcodec/bit_depth_template.c"
|
#include "libavcodec/bit_depth_template.c"
|
||||||
#include "libavcodec/mpegvideoencdsp.h"
|
#include "libavcodec/mpegvideoencdsp.h"
|
||||||
#include "h263dsp_mips.h"
|
#include "h263dsp_mips.h"
|
||||||
|
#include "mpegvideo_mips.h"
|
||||||
|
|
||||||
av_cold void ff_mpegvideoencdsp_init_mips(MpegvideoEncDSPContext *c,
|
av_cold void ff_mpegvideoencdsp_init_mips(MpegvideoEncDSPContext *c,
|
||||||
AVCodecContext *avctx)
|
AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (have_mmi(cpu_flags)) {
|
||||||
|
c->denoise_dct = ff_denoise_dct_mmi;
|
||||||
|
}
|
||||||
|
|
||||||
if (have_msa(cpu_flags)) {
|
if (have_msa(cpu_flags)) {
|
||||||
#if BIT_DEPTH == 8
|
#if BIT_DEPTH == 8
|
||||||
c->pix_sum = ff_pix_sum_msa;
|
c->pix_sum = ff_pix_sum_msa;
|
||||||
|
|
|
||||||
|
|
@ -25,17 +25,12 @@
|
||||||
#include "mpegvideo_mips.h"
|
#include "mpegvideo_mips.h"
|
||||||
#include "libavutil/mips/mmiutils.h"
|
#include "libavutil/mips/mmiutils.h"
|
||||||
|
|
||||||
void ff_denoise_dct_mmi(MPVEncContext *s, int16_t *block)
|
void ff_denoise_dct_mmi(int16_t block[64], int sum[64], const uint16_t offset[64])
|
||||||
{
|
{
|
||||||
const int intra = s->c.mb_intra;
|
|
||||||
int *sum = s->dct_error_sum[intra];
|
|
||||||
uint16_t *offset = s->dct_offset[intra];
|
|
||||||
double ftmp[8];
|
double ftmp[8];
|
||||||
mips_reg addr[1];
|
mips_reg addr[1];
|
||||||
DECLARE_VAR_ALL64;
|
DECLARE_VAR_ALL64;
|
||||||
|
|
||||||
s->dct_count[intra]++;
|
|
||||||
|
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
|
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
|
|
@ -86,7 +86,6 @@
|
||||||
static int encode_picture(MPVMainEncContext *const s, const AVPacket *pkt);
|
static int encode_picture(MPVMainEncContext *const s, const AVPacket *pkt);
|
||||||
static int dct_quantize_refine(MPVEncContext *const s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
|
static int dct_quantize_refine(MPVEncContext *const s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
|
||||||
static int sse_mb(MPVEncContext *const s);
|
static int sse_mb(MPVEncContext *const s);
|
||||||
static void denoise_dct_c(MPVEncContext *const s, int16_t *block);
|
|
||||||
static int dct_quantize_c(MPVEncContext *const s,
|
static int dct_quantize_c(MPVEncContext *const s,
|
||||||
int16_t *block, int n,
|
int16_t *block, int n,
|
||||||
int qscale, int *overflow);
|
int qscale, int *overflow);
|
||||||
|
|
@ -300,11 +299,8 @@ static av_cold void mpv_encode_defaults(MPVMainEncContext *const m)
|
||||||
av_cold void ff_dct_encode_init(MPVEncContext *const s)
|
av_cold void ff_dct_encode_init(MPVEncContext *const s)
|
||||||
{
|
{
|
||||||
s->dct_quantize = dct_quantize_c;
|
s->dct_quantize = dct_quantize_c;
|
||||||
s->denoise_dct = denoise_dct_c;
|
|
||||||
|
|
||||||
#if ARCH_MIPS
|
#if ARCH_X86
|
||||||
ff_mpvenc_dct_init_mips(s);
|
|
||||||
#elif ARCH_X86
|
|
||||||
ff_dct_encode_init_x86(s);
|
ff_dct_encode_init_x86(s);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -3955,29 +3951,14 @@ static int encode_picture(MPVMainEncContext *const m, const AVPacket *pkt)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void denoise_dct_c(MPVEncContext *const s, int16_t *block)
|
static inline void denoise_dct(MPVEncContext *const s, int16_t block[])
|
||||||
{
|
{
|
||||||
|
if (!s->dct_error_sum)
|
||||||
|
return;
|
||||||
|
|
||||||
const int intra = s->c.mb_intra;
|
const int intra = s->c.mb_intra;
|
||||||
int i;
|
|
||||||
|
|
||||||
s->dct_count[intra]++;
|
s->dct_count[intra]++;
|
||||||
|
s->mpvencdsp.denoise_dct(block, s->dct_error_sum[intra], s->dct_offset[intra]);
|
||||||
for(i=0; i<64; i++){
|
|
||||||
int level= block[i];
|
|
||||||
|
|
||||||
if(level){
|
|
||||||
if(level>0){
|
|
||||||
s->dct_error_sum[intra][i] += level;
|
|
||||||
level -= s->dct_offset[intra][i];
|
|
||||||
if(level<0) level=0;
|
|
||||||
}else{
|
|
||||||
s->dct_error_sum[intra][i] -= level;
|
|
||||||
level += s->dct_offset[intra][i];
|
|
||||||
if(level>0) level=0;
|
|
||||||
}
|
|
||||||
block[i]= level;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int dct_quantize_trellis_c(MPVEncContext *const s,
|
static int dct_quantize_trellis_c(MPVEncContext *const s,
|
||||||
|
|
@ -4009,8 +3990,8 @@ static int dct_quantize_trellis_c(MPVEncContext *const s,
|
||||||
|
|
||||||
s->fdsp.fdct(block);
|
s->fdsp.fdct(block);
|
||||||
|
|
||||||
if(s->dct_error_sum)
|
denoise_dct(s, block);
|
||||||
s->denoise_dct(s, block);
|
|
||||||
qmul= qscale*16;
|
qmul= qscale*16;
|
||||||
qadd= ((qscale-1)|1)*8;
|
qadd= ((qscale-1)|1)*8;
|
||||||
|
|
||||||
|
|
@ -4678,8 +4659,7 @@ static int dct_quantize_c(MPVEncContext *const s,
|
||||||
|
|
||||||
s->fdsp.fdct(block);
|
s->fdsp.fdct(block);
|
||||||
|
|
||||||
if(s->dct_error_sum)
|
denoise_dct(s, block);
|
||||||
s->denoise_dct(s, block);
|
|
||||||
|
|
||||||
if (s->c.mb_intra) {
|
if (s->c.mb_intra) {
|
||||||
scantable = s->c.intra_scantable.scantable;
|
scantable = s->c.intra_scantable.scantable;
|
||||||
|
|
|
||||||
|
|
@ -123,7 +123,6 @@ typedef struct MPVEncContext {
|
||||||
uint16_t (*q_inter_matrix16)[2][64];
|
uint16_t (*q_inter_matrix16)[2][64];
|
||||||
|
|
||||||
/* noise reduction */
|
/* noise reduction */
|
||||||
void (*denoise_dct)(struct MPVEncContext *s, int16_t *block);
|
|
||||||
int (*dct_error_sum)[64];
|
int (*dct_error_sum)[64];
|
||||||
int dct_count[2];
|
int dct_count[2];
|
||||||
uint16_t (*dct_offset)[64];
|
uint16_t (*dct_offset)[64];
|
||||||
|
|
@ -397,7 +396,6 @@ int ff_mpv_reallocate_putbitbuffer(MPVEncContext *s, size_t threshold, size_t si
|
||||||
void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix);
|
void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix);
|
||||||
|
|
||||||
void ff_dct_encode_init(MPVEncContext *s);
|
void ff_dct_encode_init(MPVEncContext *s);
|
||||||
void ff_mpvenc_dct_init_mips(MPVEncContext *s);
|
|
||||||
void ff_dct_encode_init_x86(MPVEncContext *s);
|
void ff_dct_encode_init_x86(MPVEncContext *s);
|
||||||
|
|
||||||
void ff_convert_matrix(MPVEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[2][64],
|
void ff_convert_matrix(MPVEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[2][64],
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,29 @@
|
||||||
#include "mathops.h"
|
#include "mathops.h"
|
||||||
#include "mpegvideoencdsp.h"
|
#include "mpegvideoencdsp.h"
|
||||||
|
|
||||||
|
static void denoise_dct_c(int16_t block[64], int dct_error_sum[64],
|
||||||
|
const uint16_t dct_offset[64])
|
||||||
|
{
|
||||||
|
for (int i = 0; i < 64; ++i) {
|
||||||
|
int level = block[i];
|
||||||
|
|
||||||
|
if (level) {
|
||||||
|
if (level > 0) {
|
||||||
|
dct_error_sum[i] += level;
|
||||||
|
level -= dct_offset[i];
|
||||||
|
if (level < 0)
|
||||||
|
level = 0;
|
||||||
|
} else {
|
||||||
|
dct_error_sum[i] -= level;
|
||||||
|
level += dct_offset[i];
|
||||||
|
if (level > 0)
|
||||||
|
level = 0;
|
||||||
|
}
|
||||||
|
block[i] = level;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int try_8x8basis_c(const int16_t rem[64], const int16_t weight[64],
|
static int try_8x8basis_c(const int16_t rem[64], const int16_t weight[64],
|
||||||
const int16_t basis[64], int scale)
|
const int16_t basis[64], int scale)
|
||||||
{
|
{
|
||||||
|
|
@ -253,6 +276,8 @@ static void shrink88(uint8_t *dst, ptrdiff_t dst_wrap,
|
||||||
av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
|
av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
|
||||||
AVCodecContext *avctx)
|
AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
|
c->denoise_dct = denoise_dct_c;
|
||||||
|
|
||||||
c->try_8x8basis = try_8x8basis_c;
|
c->try_8x8basis = try_8x8basis_c;
|
||||||
c->add_8x8basis = add_8x8basis_c;
|
c->add_8x8basis = add_8x8basis_c;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,9 @@
|
||||||
#define EDGE_BOTTOM 2
|
#define EDGE_BOTTOM 2
|
||||||
|
|
||||||
typedef struct MpegvideoEncDSPContext {
|
typedef struct MpegvideoEncDSPContext {
|
||||||
|
void (*denoise_dct)(int16_t block[64], int dct_error_sum[64],
|
||||||
|
const uint16_t dct_offset[64]);
|
||||||
|
|
||||||
int (*try_8x8basis)(const int16_t rem[64], const int16_t weight[64],
|
int (*try_8x8basis)(const int16_t rem[64], const int16_t weight[64],
|
||||||
const int16_t basis[64], int scale);
|
const int16_t basis[64], int scale);
|
||||||
void (*add_8x8basis)(int16_t rem[64], const int16_t basis[64], int scale);
|
void (*add_8x8basis)(int16_t rem[64], const int16_t basis[64], int scale);
|
||||||
|
|
|
||||||
|
|
@ -39,8 +39,6 @@ DECLARE_ALIGNED(16, static const uint16_t, inv_zigzag_direct16)[64] = {
|
||||||
36, 37, 49, 50, 58, 59, 63, 64,
|
36, 37, 49, 50, 58, 59, 63, 64,
|
||||||
};
|
};
|
||||||
|
|
||||||
#if HAVE_6REGS
|
|
||||||
|
|
||||||
#if HAVE_SSE2_INLINE
|
#if HAVE_SSE2_INLINE
|
||||||
#define COMPILE_TEMPLATE_SSSE3 0
|
#define COMPILE_TEMPLATE_SSSE3 0
|
||||||
#define RENAME(a) a ## _sse2
|
#define RENAME(a) a ## _sse2
|
||||||
|
|
@ -55,85 +53,17 @@ DECLARE_ALIGNED(16, static const uint16_t, inv_zigzag_direct16)[64] = {
|
||||||
#include "mpegvideoenc_template.c"
|
#include "mpegvideoenc_template.c"
|
||||||
#endif /* HAVE_SSSE3_INLINE */
|
#endif /* HAVE_SSSE3_INLINE */
|
||||||
|
|
||||||
#endif /* HAVE_6REGS */
|
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
|
||||||
#if HAVE_SSE2_INLINE
|
|
||||||
static void denoise_dct_sse2(MPVEncContext *const s, int16_t block[])
|
|
||||||
{
|
|
||||||
const int intra = s->c.mb_intra;
|
|
||||||
int *sum= s->dct_error_sum[intra];
|
|
||||||
uint16_t *offset= s->dct_offset[intra];
|
|
||||||
|
|
||||||
s->dct_count[intra]++;
|
|
||||||
|
|
||||||
__asm__ volatile(
|
|
||||||
"pxor %%xmm7, %%xmm7 \n\t"
|
|
||||||
"1: \n\t"
|
|
||||||
"pxor %%xmm0, %%xmm0 \n\t"
|
|
||||||
"pxor %%xmm1, %%xmm1 \n\t"
|
|
||||||
"movdqa (%0), %%xmm2 \n\t"
|
|
||||||
"movdqa 16(%0), %%xmm3 \n\t"
|
|
||||||
"pcmpgtw %%xmm2, %%xmm0 \n\t"
|
|
||||||
"pcmpgtw %%xmm3, %%xmm1 \n\t"
|
|
||||||
"pxor %%xmm0, %%xmm2 \n\t"
|
|
||||||
"pxor %%xmm1, %%xmm3 \n\t"
|
|
||||||
"psubw %%xmm0, %%xmm2 \n\t"
|
|
||||||
"psubw %%xmm1, %%xmm3 \n\t"
|
|
||||||
"movdqa %%xmm2, %%xmm4 \n\t"
|
|
||||||
"movdqa %%xmm3, %%xmm5 \n\t"
|
|
||||||
"psubusw (%2), %%xmm2 \n\t"
|
|
||||||
"psubusw 16(%2), %%xmm3 \n\t"
|
|
||||||
"pxor %%xmm0, %%xmm2 \n\t"
|
|
||||||
"pxor %%xmm1, %%xmm3 \n\t"
|
|
||||||
"psubw %%xmm0, %%xmm2 \n\t"
|
|
||||||
"psubw %%xmm1, %%xmm3 \n\t"
|
|
||||||
"movdqa %%xmm2, (%0) \n\t"
|
|
||||||
"movdqa %%xmm3, 16(%0) \n\t"
|
|
||||||
"movdqa %%xmm4, %%xmm6 \n\t"
|
|
||||||
"movdqa %%xmm5, %%xmm0 \n\t"
|
|
||||||
"punpcklwd %%xmm7, %%xmm4 \n\t"
|
|
||||||
"punpckhwd %%xmm7, %%xmm6 \n\t"
|
|
||||||
"punpcklwd %%xmm7, %%xmm5 \n\t"
|
|
||||||
"punpckhwd %%xmm7, %%xmm0 \n\t"
|
|
||||||
"paddd (%1), %%xmm4 \n\t"
|
|
||||||
"paddd 16(%1), %%xmm6 \n\t"
|
|
||||||
"paddd 32(%1), %%xmm5 \n\t"
|
|
||||||
"paddd 48(%1), %%xmm0 \n\t"
|
|
||||||
"movdqa %%xmm4, (%1) \n\t"
|
|
||||||
"movdqa %%xmm6, 16(%1) \n\t"
|
|
||||||
"movdqa %%xmm5, 32(%1) \n\t"
|
|
||||||
"movdqa %%xmm0, 48(%1) \n\t"
|
|
||||||
"add $32, %0 \n\t"
|
|
||||||
"add $64, %1 \n\t"
|
|
||||||
"add $32, %2 \n\t"
|
|
||||||
"cmp %3, %0 \n\t"
|
|
||||||
" jb 1b \n\t"
|
|
||||||
: "+r" (block), "+r" (sum), "+r" (offset)
|
|
||||||
: "r"(block+64)
|
|
||||||
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
|
|
||||||
"%xmm4", "%xmm5", "%xmm6", "%xmm7")
|
|
||||||
);
|
|
||||||
}
|
|
||||||
#endif /* HAVE_SSE2_INLINE */
|
|
||||||
#endif /* HAVE_INLINE_ASM */
|
|
||||||
|
|
||||||
av_cold void ff_dct_encode_init_x86(MPVEncContext *const s)
|
av_cold void ff_dct_encode_init_x86(MPVEncContext *const s)
|
||||||
{
|
{
|
||||||
const int dct_algo = s->c.avctx->dct_algo;
|
const int dct_algo = s->c.avctx->dct_algo;
|
||||||
|
|
||||||
if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) {
|
if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) {
|
||||||
#if HAVE_MMX_INLINE
|
|
||||||
int cpu_flags = av_get_cpu_flags();
|
|
||||||
#if HAVE_SSE2_INLINE
|
#if HAVE_SSE2_INLINE
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
if (INLINE_SSE2(cpu_flags)) {
|
if (INLINE_SSE2(cpu_flags)) {
|
||||||
#if HAVE_6REGS
|
|
||||||
s->dct_quantize = dct_quantize_sse2;
|
s->dct_quantize = dct_quantize_sse2;
|
||||||
#endif
|
|
||||||
s->denoise_dct = denoise_dct_sse2;
|
|
||||||
}
|
}
|
||||||
#endif
|
#if HAVE_SSSE3_INLINE
|
||||||
#if HAVE_6REGS && HAVE_SSSE3_INLINE
|
|
||||||
if (INLINE_SSSE3(cpu_flags))
|
if (INLINE_SSSE3(cpu_flags))
|
||||||
s->dct_quantize = dct_quantize_ssse3;
|
s->dct_quantize = dct_quantize_ssse3;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -70,24 +70,25 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
|
||||||
{
|
{
|
||||||
x86_reg last_non_zero_p1;
|
x86_reg last_non_zero_p1;
|
||||||
int level=0, q; //=0 is because gcc says uninitialized ...
|
int level=0, q; //=0 is because gcc says uninitialized ...
|
||||||
const uint16_t *qmat, *bias;
|
const uint16_t *qmat;
|
||||||
LOCAL_ALIGNED_16(int16_t, temp_block, [64]);
|
LOCAL_ALIGNED_16(int16_t, temp_block, [64]);
|
||||||
|
|
||||||
//s->fdct (block);
|
//s->fdct (block);
|
||||||
ff_fdct_sse2(block); // cannot be anything else ...
|
ff_fdct_sse2(block); // cannot be anything else ...
|
||||||
|
|
||||||
if(s->dct_error_sum)
|
if (s->dct_error_sum) {
|
||||||
s->denoise_dct(s, block);
|
const int intra = s->c.mb_intra;
|
||||||
|
s->dct_count[intra]++;
|
||||||
|
s->mpvencdsp.denoise_dct(block, s->dct_error_sum[intra], s->dct_offset[intra]);
|
||||||
|
}
|
||||||
|
|
||||||
if (s->c.mb_intra) {
|
if (s->c.mb_intra) {
|
||||||
int dummy;
|
int dummy;
|
||||||
if (n < 4){
|
if (n < 4){
|
||||||
q = s->c.y_dc_scale;
|
q = s->c.y_dc_scale;
|
||||||
bias = s->q_intra_matrix16[qscale][1];
|
|
||||||
qmat = s->q_intra_matrix16[qscale][0];
|
qmat = s->q_intra_matrix16[qscale][0];
|
||||||
}else{
|
}else{
|
||||||
q = s->c.c_dc_scale;
|
q = s->c.c_dc_scale;
|
||||||
bias = s->q_chroma_intra_matrix16[qscale][1];
|
|
||||||
qmat = s->q_chroma_intra_matrix16[qscale][0];
|
qmat = s->q_chroma_intra_matrix16[qscale][0];
|
||||||
}
|
}
|
||||||
/* note: block[0] is assumed to be positive */
|
/* note: block[0] is assumed to be positive */
|
||||||
|
|
@ -106,7 +107,6 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
|
||||||
last_non_zero_p1 = 1;
|
last_non_zero_p1 = 1;
|
||||||
} else {
|
} else {
|
||||||
last_non_zero_p1 = 0;
|
last_non_zero_p1 = 0;
|
||||||
bias = s->q_inter_matrix16[qscale][1];
|
|
||||||
qmat = s->q_inter_matrix16[qscale][0];
|
qmat = s->q_inter_matrix16[qscale][0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -114,11 +114,11 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
"movd %%"FF_REG_a", %%xmm3 \n\t" // last_non_zero_p1
|
"movd %%"FF_REG_a", %%xmm3 \n\t" // last_non_zero_p1
|
||||||
SPREADW("%%xmm3")
|
SPREADW("%%xmm3")
|
||||||
"pxor %%xmm7, %%xmm7 \n\t" // 0
|
"pxor %%xmm2, %%xmm2 \n\t" // 0
|
||||||
"pxor %%xmm4, %%xmm4 \n\t" // 0
|
"pxor %%xmm4, %%xmm4 \n\t" // 0
|
||||||
"movdqa (%2), %%xmm5 \n\t" // qmat[0]
|
"movdqa (%2), %%xmm5 \n\t" // qmat[0]
|
||||||
"pxor %%xmm6, %%xmm6 \n\t"
|
"pxor %%xmm6, %%xmm6 \n\t"
|
||||||
"psubw (%3), %%xmm6 \n\t" // -bias[0]
|
"psubw 128(%2), %%xmm6 \n\t" // -bias[0]
|
||||||
"mov $-128, %%"FF_REG_a" \n\t"
|
"mov $-128, %%"FF_REG_a" \n\t"
|
||||||
".p2align 4 \n\t"
|
".p2align 4 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
|
|
@ -128,10 +128,10 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
|
||||||
"pmulhw %%xmm5, %%xmm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
|
"pmulhw %%xmm5, %%xmm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
|
||||||
"por %%xmm0, %%xmm4 \n\t"
|
"por %%xmm0, %%xmm4 \n\t"
|
||||||
RESTORE_SIGN("%%xmm1", "%%xmm0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
|
RESTORE_SIGN("%%xmm1", "%%xmm0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
|
||||||
"movdqa %%xmm0, (%5, %%"FF_REG_a") \n\t"
|
"movdqa %%xmm0, (%4, %0) \n\t"
|
||||||
"pcmpeqw %%xmm7, %%xmm0 \n\t" // out==0 ? 0xFF : 0x00
|
"pcmpeqw %%xmm2, %%xmm0 \n\t" // out==0 ? 0xFF : 0x00
|
||||||
"movdqa (%4, %%"FF_REG_a"), %%xmm1 \n\t"
|
"movdqa (%3, %0), %%xmm1 \n\t"
|
||||||
"movdqa %%xmm7, (%1, %%"FF_REG_a") \n\t" // 0
|
"movdqa %%xmm2, (%1, %%"FF_REG_a") \n\t" // 0
|
||||||
"pandn %%xmm1, %%xmm0 \n\t"
|
"pandn %%xmm1, %%xmm0 \n\t"
|
||||||
"pmaxsw %%xmm0, %%xmm3 \n\t"
|
"pmaxsw %%xmm0, %%xmm3 \n\t"
|
||||||
"add $16, %%"FF_REG_a" \n\t"
|
"add $16, %%"FF_REG_a" \n\t"
|
||||||
|
|
@ -140,32 +140,32 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
|
||||||
"movd %%xmm3, %%"FF_REG_a" \n\t"
|
"movd %%xmm3, %%"FF_REG_a" \n\t"
|
||||||
"movzbl %%al, %%eax \n\t" // last_non_zero_p1
|
"movzbl %%al, %%eax \n\t" // last_non_zero_p1
|
||||||
: "+a" (last_non_zero_p1)
|
: "+a" (last_non_zero_p1)
|
||||||
: "r" (block+64), "r" (qmat), "r" (bias),
|
: "r" (block+64), "r" (qmat),
|
||||||
"r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64)
|
"r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64)
|
||||||
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
|
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
|
||||||
"%xmm4", "%xmm5", "%xmm6", "%xmm7")
|
"%xmm4", "%xmm5", "%xmm6")
|
||||||
);
|
);
|
||||||
}else{ // FMT_H263
|
}else{ // FMT_H263
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
"movd %%"FF_REG_a", %%xmm3 \n\t" // last_non_zero_p1
|
"movd %%"FF_REG_a", %%xmm3 \n\t" // last_non_zero_p1
|
||||||
SPREADW("%%xmm3")
|
SPREADW("%%xmm3")
|
||||||
"pxor %%xmm7, %%xmm7 \n\t" // 0
|
"pxor %%xmm2, %%xmm2 \n\t" // 0
|
||||||
"pxor %%xmm4, %%xmm4 \n\t" // 0
|
"pxor %%xmm4, %%xmm4 \n\t" // 0
|
||||||
"mov $-128, %%"FF_REG_a" \n\t"
|
"mov $-128, %%"FF_REG_a" \n\t"
|
||||||
".p2align 4 \n\t"
|
".p2align 4 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t" // block[i]
|
"movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t" // block[i]
|
||||||
SAVE_SIGN("%%xmm1", "%%xmm0") // ABS(block[i])
|
SAVE_SIGN("%%xmm1", "%%xmm0") // ABS(block[i])
|
||||||
"movdqa (%3, %%"FF_REG_a"), %%xmm6 \n\t" // bias[0]
|
"movdqa 128(%2, %0), %%xmm6 \n\t" // bias[i]
|
||||||
"paddusw %%xmm6, %%xmm0 \n\t" // ABS(block[i]) + bias[0]
|
"paddusw %%xmm6, %%xmm0 \n\t" // ABS(block[i]) + bias[0]
|
||||||
"movdqa (%2, %%"FF_REG_a"), %%xmm5 \n\t" // qmat[i]
|
"movdqa (%2, %%"FF_REG_a"), %%xmm5 \n\t" // qmat[i]
|
||||||
"pmulhw %%xmm5, %%xmm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
|
"pmulhw %%xmm5, %%xmm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
|
||||||
"por %%xmm0, %%xmm4 \n\t"
|
"por %%xmm0, %%xmm4 \n\t"
|
||||||
RESTORE_SIGN("%%xmm1", "%%xmm0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
|
RESTORE_SIGN("%%xmm1", "%%xmm0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
|
||||||
"movdqa %%xmm0, (%5, %%"FF_REG_a") \n\t"
|
"movdqa %%xmm0, (%4, %0) \n\t"
|
||||||
"pcmpeqw %%xmm7, %%xmm0 \n\t" // out==0 ? 0xFF : 0x00
|
"pcmpeqw %%xmm2, %%xmm0 \n\t" // out==0 ? 0xFF : 0x00
|
||||||
"movdqa (%4, %%"FF_REG_a"), %%xmm1 \n\t"
|
"movdqa (%3, %0), %%xmm1 \n\t"
|
||||||
"movdqa %%xmm7, (%1, %%"FF_REG_a") \n\t" // 0
|
"movdqa %%xmm2, (%1, %%"FF_REG_a") \n\t" // 0
|
||||||
"pandn %%xmm1, %%xmm0 \n\t"
|
"pandn %%xmm1, %%xmm0 \n\t"
|
||||||
"pmaxsw %%xmm0, %%xmm3 \n\t"
|
"pmaxsw %%xmm0, %%xmm3 \n\t"
|
||||||
"add $16, %%"FF_REG_a" \n\t"
|
"add $16, %%"FF_REG_a" \n\t"
|
||||||
|
|
@ -174,10 +174,10 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
|
||||||
"movd %%xmm3, %%"FF_REG_a" \n\t"
|
"movd %%xmm3, %%"FF_REG_a" \n\t"
|
||||||
"movzbl %%al, %%eax \n\t" // last_non_zero_p1
|
"movzbl %%al, %%eax \n\t" // last_non_zero_p1
|
||||||
: "+a" (last_non_zero_p1)
|
: "+a" (last_non_zero_p1)
|
||||||
: "r" (block+64), "r" (qmat+64), "r" (bias+64),
|
: "r" (block+64), "r" (qmat+64),
|
||||||
"r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64)
|
"r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64)
|
||||||
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
|
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
|
||||||
"%xmm4", "%xmm5", "%xmm6", "%xmm7")
|
"%xmm4", "%xmm5", "%xmm6")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,104 @@
|
||||||
%include "libavutil/x86/x86util.asm"
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
|
; void ff_add_8x8basis_ssse3(int16_t rem[64], const int16_t basis[64], int scale)
|
||||||
|
INIT_XMM ssse3
|
||||||
|
cglobal add_8x8basis, 3, 3+ARCH_X86_64, 4, rem, basis, scale
|
||||||
|
movd m0, scaled
|
||||||
|
add scaled, 1024
|
||||||
|
add basisq, 128
|
||||||
|
add remq, 128
|
||||||
|
%if ARCH_X86_64
|
||||||
|
%define OFF r3q
|
||||||
|
mov r3q, -128
|
||||||
|
cmp scaled, 2047
|
||||||
|
%else
|
||||||
|
%define OFF r2q
|
||||||
|
cmp scaled, 2047
|
||||||
|
mov r2q, -128
|
||||||
|
%endif
|
||||||
|
ja .huge_scale
|
||||||
|
|
||||||
|
punpcklwd m0, m0
|
||||||
|
pshufd m0, m0, 0x0
|
||||||
|
psllw m0, 5
|
||||||
|
.loop1:
|
||||||
|
mova m1, [basisq+OFF]
|
||||||
|
mova m2, [basisq+OFF+16]
|
||||||
|
pmulhrsw m1, m0
|
||||||
|
pmulhrsw m2, m0
|
||||||
|
paddw m1, [remq+OFF]
|
||||||
|
paddw m2, [remq+OFF+16]
|
||||||
|
mova [remq+OFF], m1
|
||||||
|
mova [remq+OFF+16], m2
|
||||||
|
add OFF, 32
|
||||||
|
js .loop1
|
||||||
|
RET
|
||||||
|
|
||||||
|
.huge_scale:
|
||||||
|
pslld m0, 6
|
||||||
|
punpcklwd m0, m0
|
||||||
|
pshufd m1, m0, 0x55
|
||||||
|
psrlw m0, 1
|
||||||
|
pshufd m0, m0, 0x0
|
||||||
|
.loop2:
|
||||||
|
mova m2, [basisq+OFF]
|
||||||
|
pmulhrsw m3, m2, m0
|
||||||
|
pmullw m2, m1
|
||||||
|
paddw m2, m3
|
||||||
|
paddw m2, [remq+OFF]
|
||||||
|
mova [remq+OFF], m2
|
||||||
|
add OFF, 16
|
||||||
|
js .loop2
|
||||||
|
RET
|
||||||
|
|
||||||
|
|
||||||
|
INIT_XMM sse2
|
||||||
|
cglobal mpv_denoise_dct, 3, 4, 7, block, sum, offset
|
||||||
|
pxor m6, m6
|
||||||
|
lea r3, [sumq+256]
|
||||||
|
.loop:
|
||||||
|
mova m2, [blockq]
|
||||||
|
mova m3, [blockq+16]
|
||||||
|
mova m0, m6
|
||||||
|
mova m1, m6
|
||||||
|
pcmpgtw m0, m2
|
||||||
|
pcmpgtw m1, m3
|
||||||
|
pxor m2, m0
|
||||||
|
pxor m3, m1
|
||||||
|
psubw m2, m0
|
||||||
|
psubw m3, m1
|
||||||
|
psubusw m4, m2, [offsetq]
|
||||||
|
psubusw m5, m3, [offsetq+16]
|
||||||
|
pxor m4, m0
|
||||||
|
pxor m5, m1
|
||||||
|
add offsetq, 32
|
||||||
|
psubw m4, m0
|
||||||
|
psubw m5, m1
|
||||||
|
mova [blockq], m4
|
||||||
|
mova [blockq+16], m5
|
||||||
|
mova m0, m2
|
||||||
|
mova m1, m3
|
||||||
|
add blockq, 32
|
||||||
|
punpcklwd m0, m6
|
||||||
|
punpckhwd m2, m6
|
||||||
|
punpcklwd m1, m6
|
||||||
|
punpckhwd m3, m6
|
||||||
|
paddd m0, [sumq]
|
||||||
|
paddd m2, [sumq+16]
|
||||||
|
paddd m1, [sumq+32]
|
||||||
|
paddd m3, [sumq+48]
|
||||||
|
mova [sumq], m0
|
||||||
|
mova [sumq+16], m2
|
||||||
|
mova [sumq+32], m1
|
||||||
|
mova [sumq+48], m3
|
||||||
|
add sumq, 64
|
||||||
|
cmp sumq, r3
|
||||||
|
jb .loop
|
||||||
|
RET
|
||||||
|
|
||||||
|
|
||||||
; int ff_pix_sum16(const uint8_t *pix, ptrdiff_t line_size)
|
; int ff_pix_sum16(const uint8_t *pix, ptrdiff_t line_size)
|
||||||
; %1 = number of loops
|
; %1 = number of loops
|
||||||
; %2 = number of GPRs used
|
; %2 = number of GPRs used
|
||||||
|
|
|
||||||
|
|
@ -27,9 +27,12 @@
|
||||||
#include "libavcodec/avcodec.h"
|
#include "libavcodec/avcodec.h"
|
||||||
#include "libavcodec/mpegvideoencdsp.h"
|
#include "libavcodec/mpegvideoencdsp.h"
|
||||||
|
|
||||||
|
void ff_mpv_denoise_dct_sse2(int16_t block[64], int dct_error_sum[64],
|
||||||
|
const uint16_t dct_offset[64]);
|
||||||
int ff_pix_sum16_sse2(const uint8_t *pix, ptrdiff_t line_size);
|
int ff_pix_sum16_sse2(const uint8_t *pix, ptrdiff_t line_size);
|
||||||
int ff_pix_sum16_xop(const uint8_t *pix, ptrdiff_t line_size);
|
int ff_pix_sum16_xop(const uint8_t *pix, ptrdiff_t line_size);
|
||||||
int ff_pix_norm1_sse2(const uint8_t *pix, ptrdiff_t line_size);
|
int ff_pix_norm1_sse2(const uint8_t *pix, ptrdiff_t line_size);
|
||||||
|
void ff_add_8x8basis_ssse3(int16_t rem[64], const int16_t basis[64], int scale);
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
#if HAVE_SSSE3_INLINE
|
#if HAVE_SSSE3_INLINE
|
||||||
|
|
@ -81,41 +84,6 @@ static int try_8x8basis_ssse3(const int16_t rem[64], const int16_t weight[64], c
|
||||||
);
|
);
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void add_8x8basis_ssse3(int16_t rem[64], const int16_t basis[64], int scale)
|
|
||||||
{
|
|
||||||
x86_reg i=0;
|
|
||||||
|
|
||||||
if (FFABS(scale) < 1024) {
|
|
||||||
scale *= 1 << (16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT);
|
|
||||||
__asm__ volatile(
|
|
||||||
"movd %3, %%xmm2 \n\t"
|
|
||||||
"punpcklwd %%xmm2, %%xmm2 \n\t"
|
|
||||||
"pshufd $0, %%xmm2, %%xmm2 \n\t"
|
|
||||||
".p2align 4 \n\t"
|
|
||||||
"1: \n\t"
|
|
||||||
"movdqa (%1, %0), %%xmm0 \n\t"
|
|
||||||
"movdqa 16(%1, %0), %%xmm1 \n\t"
|
|
||||||
"pmulhrsw %%xmm2, %%xmm0 \n\t"
|
|
||||||
"pmulhrsw %%xmm2, %%xmm1 \n\t"
|
|
||||||
"paddw (%2, %0), %%xmm0 \n\t"
|
|
||||||
"paddw 16(%2, %0), %%xmm1 \n\t"
|
|
||||||
"movdqa %%xmm0, (%2, %0) \n\t"
|
|
||||||
"movdqa %%xmm1, 16(%2, %0) \n\t"
|
|
||||||
"add $32, %0 \n\t"
|
|
||||||
"cmp $128, %0 \n\t" // FIXME optimize & bench
|
|
||||||
" jb 1b \n\t"
|
|
||||||
: "+r" (i)
|
|
||||||
: "r"(basis), "r"(rem), "g"(scale)
|
|
||||||
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2")
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
for (i=0; i<8*8; i++) {
|
|
||||||
rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* HAVE_SSSE3_INLINE */
|
#endif /* HAVE_SSSE3_INLINE */
|
||||||
|
|
||||||
/* Draw the edges of width 'w' of an image of size width, height */
|
/* Draw the edges of width 'w' of an image of size width, height */
|
||||||
|
|
@ -209,6 +177,7 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
|
c->denoise_dct = ff_mpv_denoise_dct_sse2;
|
||||||
c->pix_sum = ff_pix_sum16_sse2;
|
c->pix_sum = ff_pix_sum16_sse2;
|
||||||
c->pix_norm1 = ff_pix_norm1_sse2;
|
c->pix_norm1 = ff_pix_norm1_sse2;
|
||||||
}
|
}
|
||||||
|
|
@ -224,15 +193,17 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
|
||||||
c->draw_edges = draw_edges_mmx;
|
c->draw_edges = draw_edges_mmx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
|
||||||
|
if (X86_SSSE3(cpu_flags)) {
|
||||||
#if HAVE_SSSE3_INLINE
|
#if HAVE_SSSE3_INLINE
|
||||||
if (INLINE_SSSE3(cpu_flags)) {
|
|
||||||
if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
|
if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
|
||||||
c->try_8x8basis = try_8x8basis_ssse3;
|
c->try_8x8basis = try_8x8basis_ssse3;
|
||||||
}
|
}
|
||||||
c->add_8x8basis = add_8x8basis_ssse3;
|
|
||||||
}
|
|
||||||
#endif /* HAVE_SSSE3_INLINE */
|
#endif /* HAVE_SSSE3_INLINE */
|
||||||
|
#if HAVE_SSSE3_EXTERNAL
|
||||||
|
c->add_8x8basis = ff_add_8x8basis_ssse3;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* HAVE_INLINE_ASM */
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -375,6 +375,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
|
||||||
if (!in2)
|
if (!in2)
|
||||||
goto fail;
|
goto fail;
|
||||||
av_frame_copy(in2, in);
|
av_frame_copy(in2, in);
|
||||||
|
if (av_frame_copy_props(in2, in) < 0) {
|
||||||
|
av_frame_free(&in2);
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
av_frame_free(&in);
|
av_frame_free(&in);
|
||||||
in = in2;
|
in = in2;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -780,7 +780,13 @@ static int resolve_content_path(AVFormatContext *s, const char *url, int *max_ur
|
||||||
}
|
}
|
||||||
root_url = (av_strcasecmp(baseurl, "")) ? baseurl : path;
|
root_url = (av_strcasecmp(baseurl, "")) ? baseurl : path;
|
||||||
if (node) {
|
if (node) {
|
||||||
xmlNodeSetContent(node, root_url);
|
xmlChar *escaped = xmlEncodeSpecialChars(NULL, root_url);
|
||||||
|
if (!escaped) {
|
||||||
|
updated = AVERROR(ENOMEM);
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
xmlNodeSetContent(node, escaped);
|
||||||
|
xmlFree(escaped);
|
||||||
updated = 1;
|
updated = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -814,9 +820,15 @@ static int resolve_content_path(AVFormatContext *s, const char *url, int *max_ur
|
||||||
memset(p + 1, 0, strlen(p));
|
memset(p + 1, 0, strlen(p));
|
||||||
}
|
}
|
||||||
av_strlcat(tmp_str, text + start, tmp_max_url_size);
|
av_strlcat(tmp_str, text + start, tmp_max_url_size);
|
||||||
xmlNodeSetContent(baseurl_nodes[i], tmp_str);
|
|
||||||
updated = 1;
|
|
||||||
xmlFree(text);
|
xmlFree(text);
|
||||||
|
xmlChar* escaped = xmlEncodeSpecialChars(NULL, tmp_str);
|
||||||
|
if (!escaped) {
|
||||||
|
updated = AVERROR(ENOMEM);
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
xmlNodeSetContent(baseurl_nodes[i], escaped);
|
||||||
|
updated = 1;
|
||||||
|
xmlFree(escaped);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -38,8 +38,7 @@ typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg;
|
||||||
# define FF_PTR_SIZE "8"
|
# define FF_PTR_SIZE "8"
|
||||||
typedef int64_t x86_reg;
|
typedef int64_t x86_reg;
|
||||||
|
|
||||||
/* FF_REG_SP is defined in Solaris sys headers, so use FF_REG_sp */
|
# define FF_REG_SP "rsp"
|
||||||
# define FF_REG_sp "rsp"
|
|
||||||
# define FF_REG_BP "rbp"
|
# define FF_REG_BP "rbp"
|
||||||
# define FF_REGBP rbp
|
# define FF_REGBP rbp
|
||||||
# define FF_REGa rax
|
# define FF_REGa rax
|
||||||
|
|
@ -60,7 +59,7 @@ typedef int64_t x86_reg;
|
||||||
# define FF_PTR_SIZE "4"
|
# define FF_PTR_SIZE "4"
|
||||||
typedef int32_t x86_reg;
|
typedef int32_t x86_reg;
|
||||||
|
|
||||||
# define FF_REG_sp "esp"
|
# define FF_REG_SP "esp"
|
||||||
# define FF_REG_BP "ebp"
|
# define FF_REG_BP "ebp"
|
||||||
# define FF_REGBP ebp
|
# define FF_REGBP ebp
|
||||||
# define FF_REGa eax
|
# define FF_REGa eax
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,37 @@
|
||||||
buf[j] = rnd() % (max - min + 1) + min; \
|
buf[j] = rnd() % (max - min + 1) + min; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
static void check_denoise_dct(MpegvideoEncDSPContext *c)
|
||||||
|
{
|
||||||
|
declare_func(void, int16_t block[64], int dct_error_sum[64],
|
||||||
|
const uint16_t dct_offset[64]);
|
||||||
|
|
||||||
|
if (check_func(c->denoise_dct, "denoise_dct")) {
|
||||||
|
DECLARE_ALIGNED(16, int16_t, block_ref)[64];
|
||||||
|
DECLARE_ALIGNED(16, int16_t, block_new)[64];
|
||||||
|
DECLARE_ALIGNED(16, int, dct_error_sum_ref)[64];
|
||||||
|
DECLARE_ALIGNED(16, int, dct_error_sum_new)[64];
|
||||||
|
DECLARE_ALIGNED(16, uint16_t, dct_offset)[64];
|
||||||
|
|
||||||
|
for (size_t i = 0; i < FF_ARRAY_ELEMS(block_ref); ++i) {
|
||||||
|
unsigned random = rnd();
|
||||||
|
block_ref[i] = random & (1 << 16) ? random : 0;
|
||||||
|
}
|
||||||
|
randomize_buffers(dct_offset, sizeof(dct_offset));
|
||||||
|
randomize_buffer_clipped(dct_error_sum_ref, 0, (1 << 24) - 1);
|
||||||
|
memcpy(block_new, block_ref, sizeof(block_new));
|
||||||
|
memcpy(dct_error_sum_new, dct_error_sum_ref, sizeof(dct_error_sum_ref));
|
||||||
|
|
||||||
|
call_ref(block_ref, dct_error_sum_ref, dct_offset);
|
||||||
|
call_new(block_new, dct_error_sum_new, dct_offset);
|
||||||
|
if (memcmp(block_ref, block_new, sizeof(block_ref)) ||
|
||||||
|
memcmp(dct_error_sum_new, dct_error_sum_ref, sizeof(dct_error_sum_new)))
|
||||||
|
fail();
|
||||||
|
|
||||||
|
bench_new(block_new, dct_error_sum_new, dct_offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void check_add_8x8basis(MpegvideoEncDSPContext *c)
|
static void check_add_8x8basis(MpegvideoEncDSPContext *c)
|
||||||
{
|
{
|
||||||
declare_func(void, int16_t rem[64], const int16_t basis[64], int scale);
|
declare_func(void, int16_t rem[64], const int16_t basis[64], int scale);
|
||||||
|
|
@ -166,6 +197,8 @@ void checkasm_check_mpegvideoencdsp(void)
|
||||||
|
|
||||||
ff_mpegvideoencdsp_init(&c, &avctx);
|
ff_mpegvideoencdsp_init(&c, &avctx);
|
||||||
|
|
||||||
|
check_denoise_dct(&c);
|
||||||
|
report("denoise_dct");
|
||||||
check_pix_sum(&c);
|
check_pix_sum(&c);
|
||||||
report("pix_sum");
|
report("pix_sum");
|
||||||
check_pix_norm1(&c);
|
check_pix_norm1(&c);
|
||||||
|
|
|
||||||
|
|
@ -717,8 +717,9 @@ $(FATE_FILTER_VSYNTH-yes): SRC = $(TARGET_PATH)/tests/vsynth1/%02d.pgm
|
||||||
|
|
||||||
FATE_FFMPEG += $(FATE_FILTER_VSYNTH-yes)
|
FATE_FFMPEG += $(FATE_FILTER_VSYNTH-yes)
|
||||||
|
|
||||||
FATE_FILTER_FREI0R-$(call FILTERFRAMECRC, TESTSRC2, FREI0R_FILTER) = fate-filter-frei0r-filter
|
FATE_FILTER_FREI0R-$(call FILTERFRAMECRC, TESTSRC2, FREI0R_FILTER) = fate-filter-frei0r-filter fate-filter-frei0r-filter-unaligned
|
||||||
fate-filter-frei0r-filter: CMD = framecrc -lavfi "testsrc2=r=1:d=5,frei0r=enable=gte(n\,3):filter_name=distort0r"
|
fate-filter-frei0r-filter: CMD = framecrc -lavfi "testsrc2=r=1:d=5,frei0r=enable=gte(n\,3):filter_name=distort0r"
|
||||||
|
fate-filter-frei0r-filter-unaligned: CMD = framecrc -lavfi "testsrc2=s=328x240:r=1:d=5,frei0r=filter_name=distort0r"
|
||||||
FATE_FFMPEG += $(FATE_FILTER_FREI0R-yes)
|
FATE_FFMPEG += $(FATE_FILTER_FREI0R-yes)
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
#tb 0: 1/1
|
||||||
|
#media_type 0: video
|
||||||
|
#codec_id 0: rawvideo
|
||||||
|
#dimensions 0: 328x240
|
||||||
|
#sar 0: 1/1
|
||||||
|
0, 0, 0, 1, 314880, 0x7b9cad8f
|
||||||
|
0, 1, 1, 1, 314880, 0x0184436f
|
||||||
|
0, 2, 2, 1, 314880, 0x7e3f2776
|
||||||
|
0, 3, 3, 1, 314880, 0x0dc5e915
|
||||||
|
0, 4, 4, 1, 314880, 0xcf9c76ef
|
||||||
Loading…
Reference in New Issue