mirror of https://github.com/FFmpeg/FFmpeg.git
parent
6931d12745
commit
28a2107a8d
12 changed files with 498 additions and 1 deletions
@ -0,0 +1,153 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "common.h" |
||||
#include "pixelutils.h" |
||||
|
||||
#if CONFIG_PIXELUTILS |
||||
|
||||
#include "x86/pixelutils.h" |
||||
|
||||
static av_always_inline int sad_wxh(const uint8_t *src1, ptrdiff_t stride1, |
||||
const uint8_t *src2, ptrdiff_t stride2, |
||||
int w, int h) |
||||
{ |
||||
int x, y, sum = 0; |
||||
|
||||
for (y = 0; y < h; y++) { |
||||
for (x = 0; x < w; x++) |
||||
sum += abs(src1[x] - src2[x]); |
||||
src1 += stride1; |
||||
src2 += stride2; |
||||
} |
||||
return sum; |
||||
} |
||||
|
||||
#define DECLARE_BLOCK_FUNCTIONS(size) \ |
||||
static int block_sad_##size##x##size##_c(const uint8_t *src1, ptrdiff_t stride1, \
|
||||
const uint8_t *src2, ptrdiff_t stride2) \
|
||||
{ \
|
||||
return sad_wxh(src1, stride1, src2, stride2, size, size); \
|
||||
} |
||||
|
||||
DECLARE_BLOCK_FUNCTIONS(2) |
||||
DECLARE_BLOCK_FUNCTIONS(4) |
||||
DECLARE_BLOCK_FUNCTIONS(8) |
||||
DECLARE_BLOCK_FUNCTIONS(16) |
||||
|
||||
static const av_pixelutils_sad_fn sad_c[] = { |
||||
block_sad_2x2_c, |
||||
block_sad_4x4_c, |
||||
block_sad_8x8_c, |
||||
block_sad_16x16_c, |
||||
}; |
||||
|
||||
#endif /* CONFIG_PIXELUTILS */ |
||||
|
||||
av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, int aligned, void *log_ctx) |
||||
{ |
||||
#if !CONFIG_PIXELUTILS |
||||
av_log(log_ctx, AV_LOG_ERROR, "pixelutils support is required " |
||||
"but libavutil is not compiled with it\n"); |
||||
return NULL; |
||||
#else |
||||
av_pixelutils_sad_fn sad[FF_ARRAY_ELEMS(sad_c)]; |
||||
|
||||
memcpy(sad, sad_c, sizeof(sad)); |
||||
|
||||
if (w_bits < 1 || w_bits > FF_ARRAY_ELEMS(sad) || |
||||
h_bits < 1 || h_bits > FF_ARRAY_ELEMS(sad)) |
||||
return NULL; |
||||
if (w_bits != h_bits) // only squared sad for now
|
||||
return NULL; |
||||
|
||||
#if ARCH_X86 |
||||
ff_pixelutils_sad_init_x86(sad, aligned); |
||||
#endif |
||||
|
||||
return sad[w_bits - 1]; |
||||
#endif |
||||
} |
||||
|
||||
#ifdef TEST |
||||
#define W1 320 |
||||
#define H1 240 |
||||
#define W2 640 |
||||
#define H2 480 |
||||
|
||||
static int run_test(const char *test, |
||||
const uint32_t *b1, const uint32_t *b2) |
||||
{ |
||||
int i, a, ret = 0; |
||||
|
||||
for (a = 0; a < 3; a++) { |
||||
const uint8_t *block1 = (const uint8_t *)b1; |
||||
const uint8_t *block2 = (const uint8_t *)b2; |
||||
|
||||
switch (a) { |
||||
case 0: block1++; block2++; break; |
||||
case 1: block2++; break; |
||||
case 2: break; |
||||
} |
||||
for (i = 1; i <= FF_ARRAY_ELEMS(sad_c); i++) { |
||||
av_pixelutils_sad_fn f_ref = sad_c[i - 1]; |
||||
av_pixelutils_sad_fn f_out = av_pixelutils_get_sad_fn(i, i, a, NULL); |
||||
const int out = f_out(block1, W1, block2, W2); |
||||
const int ref = f_ref(block1, W1, block2, W2); |
||||
printf("[%s] [%c%c] SAD [%s] %dx%d=%d ref=%d\n", |
||||
out == ref ? "OK" : "FAIL", |
||||
a ? 'A' : 'U', a == 2 ? 'A' : 'U', |
||||
test, 1<<i, 1<<i, out, ref); |
||||
if (out != ref) |
||||
ret = 1; |
||||
} |
||||
} |
||||
return ret; |
||||
} |
||||
|
||||
int main(void) |
||||
{ |
||||
int i, ret; |
||||
DECLARE_ALIGNED(32, uint32_t, buf1)[W1*H1]; |
||||
DECLARE_ALIGNED(32, uint32_t, buf2)[W2*H2]; |
||||
uint32_t state = 0; |
||||
|
||||
for (i = 0; i < W1*H1; i++) { |
||||
state = state * 1664525 + 1013904223; |
||||
buf1[i] = state; |
||||
} |
||||
for (i = 0; i < W2*H2; i++) { |
||||
state = state * 1664525 + 1013904223; |
||||
buf2[i] = state; |
||||
} |
||||
ret = run_test("random", buf1, buf2); |
||||
if (ret < 0) |
||||
return ret; |
||||
|
||||
memset(buf1, 0xff, sizeof(buf1)); |
||||
memset(buf2, 0x00, sizeof(buf2)); |
||||
ret = run_test("max", buf1, buf2); |
||||
if (ret < 0) |
||||
return ret; |
||||
|
||||
memset(buf1, 0x90, sizeof(buf1)); |
||||
memset(buf2, 0x90, sizeof(buf2)); |
||||
return run_test("min", buf1, buf2); |
||||
} |
||||
#endif /* TEST */ |
@ -0,0 +1,52 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVUTIL_PIXELUTILS_H |
||||
#define AVUTIL_PIXELUTILS_H |
||||
|
||||
#include <stddef.h> |
||||
#include <stdint.h> |
||||
#include "common.h" |
||||
|
||||
/**
|
||||
* Sum of abs(src1[x] - src2[x]) |
||||
*/ |
||||
typedef int (*av_pixelutils_sad_fn)(const uint8_t *src1, ptrdiff_t stride1, |
||||
const uint8_t *src2, ptrdiff_t stride2); |
||||
|
||||
/**
|
||||
* Get a potentially optimized pointer to a Sum-of-absolute-differences |
||||
* function (see the av_pixelutils_sad_fn prototype). |
||||
* |
||||
* @param w_bits 1<<w_bits is the requested width of the block size |
||||
* @param h_bits 1<<h_bits is the requested height of the block size |
||||
* @param aligned If set to 2, the returned sad function will assume src1 and |
||||
* src2 addresses are aligned on the block size. |
||||
* If set to 1, the returned sad function will assume src1 is |
||||
* aligned on the block size. |
||||
* If set to 0, the returned sad function assume no particular |
||||
* alignment. |
||||
* @param log_ctx context used for logging, can be NULL |
||||
* |
||||
* @return a pointer to the SAD function or NULL in case of error (because of |
||||
* invalid parameters) |
||||
*/ |
||||
av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, |
||||
int aligned, void *log_ctx); |
||||
|
||||
#endif /* AVUTIL_PIXELUTILS_H */ |
@ -0,0 +1,155 @@ |
||||
;****************************************************************************** |
||||
;* Pixel utilities SIMD |
||||
;* |
||||
;* Copyright (C) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
||||
;* Copyright (C) 2014 Clément Bœsch <u pkh me> |
||||
;* |
||||
;* This file is part of FFmpeg. |
||||
;* |
||||
;* FFmpeg is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* FFmpeg is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with FFmpeg; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%include "x86util.asm" |
||||
|
||||
SECTION_TEXT |
||||
|
||||
;------------------------------------------------------------------------------- |
||||
; int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, |
||||
; const uint8_t *src2, ptrdiff_t stride2); |
||||
;------------------------------------------------------------------------------- |
||||
INIT_MMX mmx |
||||
cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 |
||||
pxor m7, m7 |
||||
pxor m6, m6 |
||||
%rep 4 |
||||
mova m0, [src1q] |
||||
mova m2, [src1q + stride1q] |
||||
mova m1, [src2q] |
||||
mova m3, [src2q + stride2q] |
||||
psubusb m4, m0, m1 |
||||
psubusb m5, m2, m3 |
||||
psubusb m1, m0 |
||||
psubusb m3, m2 |
||||
por m1, m4 |
||||
por m3, m5 |
||||
punpcklbw m0, m1, m7 |
||||
punpcklbw m2, m3, m7 |
||||
punpckhbw m1, m7 |
||||
punpckhbw m3, m7 |
||||
paddw m0, m1 |
||||
paddw m2, m3 |
||||
paddw m0, m2 |
||||
paddw m6, m0 |
||||
lea src1q, [src1q + 2*stride1q] |
||||
lea src2q, [src2q + 2*stride2q] |
||||
%endrep |
||||
psrlq m0, m6, 32 |
||||
paddw m6, m0 |
||||
psrlq m0, m6, 16 |
||||
paddw m6, m0 |
||||
movd eax, m6 |
||||
movzx eax, ax |
||||
RET |
||||
|
||||
;------------------------------------------------------------------------------- |
||||
; int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, |
||||
; const uint8_t *src2, ptrdiff_t stride2); |
||||
;------------------------------------------------------------------------------- |
||||
INIT_MMX mmxext |
||||
cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 |
||||
pxor m2, m2 |
||||
%rep 4 |
||||
mova m0, [src1q] |
||||
mova m1, [src1q + stride1q] |
||||
psadbw m0, [src2q] |
||||
psadbw m1, [src2q + stride2q] |
||||
paddw m2, m0 |
||||
paddw m2, m1 |
||||
lea src1q, [src1q + 2*stride1q] |
||||
lea src2q, [src2q + 2*stride2q] |
||||
%endrep |
||||
movd eax, m2 |
||||
RET |
||||
|
||||
;------------------------------------------------------------------------------- |
||||
; int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1, |
||||
; const uint8_t *src2, ptrdiff_t stride2); |
||||
;------------------------------------------------------------------------------- |
||||
INIT_MMX mmxext |
||||
cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2 |
||||
pxor m2, m2 |
||||
%rep 16 |
||||
mova m0, [src1q] |
||||
mova m1, [src1q + 8] |
||||
psadbw m0, [src2q] |
||||
psadbw m1, [src2q + 8] |
||||
paddw m2, m0 |
||||
paddw m2, m1 |
||||
add src1q, stride1q |
||||
add src2q, stride2q |
||||
%endrep |
||||
movd eax, m2 |
||||
RET |
||||
|
||||
;------------------------------------------------------------------------------- |
||||
; int ff_pixelutils_sad_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, |
||||
; const uint8_t *src2, ptrdiff_t stride2); |
||||
;------------------------------------------------------------------------------- |
||||
INIT_XMM sse2 |
||||
cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2 |
||||
pxor m4, m4 |
||||
%rep 8 |
||||
movu m0, [src1q] |
||||
movu m1, [src1q + stride1q] |
||||
movu m2, [src2q] |
||||
movu m3, [src2q + stride2q] |
||||
psadbw m0, m2 |
||||
psadbw m1, m3 |
||||
paddw m4, m0 |
||||
paddw m4, m1 |
||||
lea src1q, [src1q + 2*stride1q] |
||||
lea src2q, [src2q + 2*stride2q] |
||||
%endrep |
||||
movhlps m0, m4 |
||||
paddw m4, m0 |
||||
movd eax, m4 |
||||
RET |
||||
|
||||
;------------------------------------------------------------------------------- |
||||
; int ff_pixelutils_sad_[au]_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, |
||||
; const uint8_t *src2, ptrdiff_t stride2); |
||||
;------------------------------------------------------------------------------- |
||||
%macro SAD_XMM_16x16 1 |
||||
INIT_XMM sse2 |
||||
cglobal pixelutils_sad_%1_16x16, 4,4,3, src1, stride1, src2, stride2 |
||||
pxor m2, m2 |
||||
%rep 8 |
||||
mov%1 m0, [src2q] |
||||
mov%1 m1, [src2q + stride2q] |
||||
psadbw m0, [src1q] |
||||
psadbw m1, [src1q + stride1q] |
||||
paddw m2, m0 |
||||
paddw m2, m1 |
||||
lea src1q, [src1q + 2*stride1q] |
||||
lea src2q, [src2q + 2*stride2q] |
||||
%endrep |
||||
movhlps m0, m2 |
||||
paddw m2, m0 |
||||
movd eax, m2 |
||||
RET |
||||
%endmacro |
||||
|
||||
SAD_XMM_16x16 a |
||||
SAD_XMM_16x16 u |
@ -0,0 +1,26 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVUTIL_X86_PIXELUTILS_H |
||||
#define AVUTIL_X86_PIXELUTILS_H |
||||
|
||||
#include "libavutil/pixelutils.h" |
||||
|
||||
void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned); |
||||
|
||||
#endif /* AVUTIL_X86_PIXELUTILS_H */ |
@ -0,0 +1,58 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
|
||||
#include "pixelutils.h" |
||||
#include "cpu.h" |
||||
|
||||
int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, |
||||
const uint8_t *src2, ptrdiff_t stride2); |
||||
int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, |
||||
const uint8_t *src2, ptrdiff_t stride2); |
||||
|
||||
int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1, |
||||
const uint8_t *src2, ptrdiff_t stride2); |
||||
int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, |
||||
const uint8_t *src2, ptrdiff_t stride2); |
||||
int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, |
||||
const uint8_t *src2, ptrdiff_t stride2); |
||||
int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, |
||||
const uint8_t *src2, ptrdiff_t stride2); |
||||
|
||||
void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (EXTERNAL_MMX(cpu_flags)) { |
||||
sad[2] = ff_pixelutils_sad_8x8_mmx; |
||||
} |
||||
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) { |
||||
sad[2] = ff_pixelutils_sad_8x8_mmxext; |
||||
sad[3] = ff_pixelutils_sad_16x16_mmxext; |
||||
} |
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) { |
||||
switch (aligned) { |
||||
case 0: sad[3] = ff_pixelutils_sad_16x16_sse2; break; // src1 unaligned, src2 unaligned
|
||||
case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1 aligned, src2 unaligned
|
||||
case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned
|
||||
} |
||||
} |
||||
} |
@ -0,0 +1,36 @@ |
||||
[OK] [UU] SAD [random] 2x2=314 ref=314 |
||||
[OK] [UU] SAD [random] 4x4=1129 ref=1129 |
||||
[OK] [UU] SAD [random] 8x8=4936 ref=4936 |
||||
[OK] [UU] SAD [random] 16x16=20704 ref=20704 |
||||
[OK] [AU] SAD [random] 2x2=440 ref=440 |
||||
[OK] [AU] SAD [random] 4x4=1317 ref=1317 |
||||
[OK] [AU] SAD [random] 8x8=5262 ref=5262 |
||||
[OK] [AU] SAD [random] 16x16=21040 ref=21040 |
||||
[OK] [AA] SAD [random] 2x2=196 ref=196 |
||||
[OK] [AA] SAD [random] 4x4=1225 ref=1225 |
||||
[OK] [AA] SAD [random] 8x8=4712 ref=4712 |
||||
[OK] [AA] SAD [random] 16x16=21184 ref=21184 |
||||
[OK] [UU] SAD [max] 2x2=1020 ref=1020 |
||||
[OK] [UU] SAD [max] 4x4=4080 ref=4080 |
||||
[OK] [UU] SAD [max] 8x8=16320 ref=16320 |
||||
[OK] [UU] SAD [max] 16x16=65280 ref=65280 |
||||
[OK] [AU] SAD [max] 2x2=1020 ref=1020 |
||||
[OK] [AU] SAD [max] 4x4=4080 ref=4080 |
||||
[OK] [AU] SAD [max] 8x8=16320 ref=16320 |
||||
[OK] [AU] SAD [max] 16x16=65280 ref=65280 |
||||
[OK] [AA] SAD [max] 2x2=1020 ref=1020 |
||||
[OK] [AA] SAD [max] 4x4=4080 ref=4080 |
||||
[OK] [AA] SAD [max] 8x8=16320 ref=16320 |
||||
[OK] [AA] SAD [max] 16x16=65280 ref=65280 |
||||
[OK] [UU] SAD [min] 2x2=0 ref=0 |
||||
[OK] [UU] SAD [min] 4x4=0 ref=0 |
||||
[OK] [UU] SAD [min] 8x8=0 ref=0 |
||||
[OK] [UU] SAD [min] 16x16=0 ref=0 |
||||
[OK] [AU] SAD [min] 2x2=0 ref=0 |
||||
[OK] [AU] SAD [min] 4x4=0 ref=0 |
||||
[OK] [AU] SAD [min] 8x8=0 ref=0 |
||||
[OK] [AU] SAD [min] 16x16=0 ref=0 |
||||
[OK] [AA] SAD [min] 2x2=0 ref=0 |
||||
[OK] [AA] SAD [min] 4x4=0 ref=0 |
||||
[OK] [AA] SAD [min] 8x8=0 ref=0 |
||||
[OK] [AA] SAD [min] 16x16=0 ref=0 |
Loading…
Reference in new issue