Merge commit '1a880b2fb8456ce68eefe5902bac95fea1e6a72d'

* commit '1a880b2fb8456ce68eefe5902bac95fea1e6a72d':
  hevc: SSE2 and SSSE3 loop filters

Conflicts:
	libavcodec/hevcdsp.c
	libavcodec/hevcdsp.h
	libavcodec/x86/Makefile
	libavcodec/x86/hevc_deblock.asm
	libavcodec/x86/hevcdsp_init.c

See: de7b89fd43 and several others
Merged-by: Michael Niedermayer <michaelni@gmx.at>
pull/79/merge
Michael Niedermayer 10 years ago
commit 706f81a2c2
  1. 4
      libavcodec/hevcdsp.c
  2. 2
      libavcodec/hevcdsp.h
  3. 12
      libavcodec/x86/hevc_deblock.asm
  4. 36
      libavcodec/x86/hevcdsp_init.c

@ -247,5 +247,7 @@ int i = 0;
HEVC_DSP(8);
break;
}
if (ARCH_X86) ff_hevcdsp_init_x86(hevcdsp, bit_depth);
if (ARCH_X86)
ff_hevc_dsp_init_x86(hevcdsp, bit_depth);
}

@ -123,6 +123,6 @@ void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
extern const int8_t ff_hevc_epel_filters[7][4];
extern const int8_t ff_hevc_qpel_filters[3][16];
void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth);
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
#endif /* AVCODEC_HEVCDSP_H */

@ -660,7 +660,8 @@ ALIGN 16
INIT_XMM sse2
;-----------------------------------------------------------------------------
; void ff_hevc_v_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q)
; void ff_hevc_v_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc,
; uint8_t *_no_p, uint8_t *_no_q);
;-----------------------------------------------------------------------------
cglobal hevc_v_loop_filter_chroma_8, 3, 5, 7, pix, stride, tc, pix0, r3stride
sub pixq, 2
@ -693,7 +694,8 @@ cglobal hevc_v_loop_filter_chroma_12, 3, 5, 7, pix, stride, tc, pix0, r3stride
RET
;-----------------------------------------------------------------------------
; void ff_hevc_h_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q
; void ff_hevc_h_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc,
; uint8_t *_no_p, uint8_t *_no_q);
;-----------------------------------------------------------------------------
cglobal hevc_h_loop_filter_chroma_8, 3, 4, 7, pix, stride, tc, pix0
mov pix0q, pixq
@ -749,7 +751,8 @@ cglobal hevc_h_loop_filter_chroma_12, 3, 4, 7, pix, stride, tc, pix0
%if ARCH_X86_64
%macro LOOP_FILTER_LUMA 0
;-----------------------------------------------------------------------------
; void ff_hevc_v_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int *_beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
; void ff_hevc_v_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int beta,
; int *_tc, uint8_t *_no_p, uint8_t *_no_q);
;-----------------------------------------------------------------------------
cglobal hevc_v_loop_filter_luma_8, 4, 15, 16, pix, stride, beta, tc
sub r0, 4
@ -788,7 +791,8 @@ cglobal hevc_v_loop_filter_luma_12, 4, 15, 16, pix, stride, beta, tc
RET
;-----------------------------------------------------------------------------
; void ff_hevc_h_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int *_beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
; void ff_hevc_h_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int beta,
; int *_tc, uint8_t *_no_p, uint8_t *_no_q);
;-----------------------------------------------------------------------------
cglobal hevc_h_loop_filter_luma_8, 4, 15, 16, pix, stride, beta, tc, count, pix0, src3stride
lea src3strideq, [3 * strideq]

@ -28,13 +28,11 @@
#include "libavcodec/hevcdsp.h"
#include "libavcodec/x86/hevcdsp.h"
#define LFC_FUNC(DIR, DEPTH, OPT) \
void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int *tc, uint8_t *no_p, uint8_t *no_q);
#define LFL_FUNC(DIR, DEPTH, OPT) \
void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t stride, int _beta, int *_tc, \
uint8_t *_no_p, uint8_t *_no_q);
void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, int *tc, uint8_t *no_p, uint8_t *no_q);
#define LFC_FUNCS(type, depth, opt) \
LFC_FUNC(h, depth, opt) \
@ -456,16 +454,16 @@ mc_bi_w_funcs(qpel_hv, 12, sse4);
PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
{
int mm_flags = av_get_cpu_flags();
int cpu_flags = av_get_cpu_flags();
if (bit_depth == 8) {
if (EXTERNAL_MMXEXT(mm_flags)) {
if (EXTERNAL_MMXEXT(cpu_flags)) {
c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext;
c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext;
}
if (EXTERNAL_SSE2(mm_flags)) {
if (EXTERNAL_SSE2(cpu_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
if (ARCH_X86_64) {
@ -477,11 +475,11 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2;
}
if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
}
if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {
if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4);
EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4);
@ -493,16 +491,16 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4);
QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
}
if (EXTERNAL_AVX2(mm_flags)) {
if (EXTERNAL_AVX2(cpu_flags)) {
c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2;
}
} else if (bit_depth == 10) {
if (EXTERNAL_MMXEXT(mm_flags)) {
if (EXTERNAL_MMXEXT(cpu_flags)) {
c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext;
c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext;
}
if (EXTERNAL_SSE2(mm_flags)) {
if (EXTERNAL_SSE2(cpu_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
if (ARCH_X86_64) {
@ -514,11 +512,11 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2;
}
if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
}
if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {
if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4);
EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4);
@ -529,13 +527,13 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
}
if (EXTERNAL_AVX2(mm_flags)) {
if (EXTERNAL_AVX2(cpu_flags)) {
c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2;
}
} else if (bit_depth == 12) {
if (EXTERNAL_SSE2(mm_flags)) {
if (EXTERNAL_SSE2(cpu_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
if (ARCH_X86_64) {
@ -543,11 +541,11 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
}
}
if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
}
if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {
if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4);
EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4);

Loading…
Cancel
Save