|
|
|
@ -26,10 +26,22 @@ |
|
|
|
|
#include "libavutil/x86/cpu.h" |
|
|
|
|
#include "libavfilter/gradfun.h" |
|
|
|
|
|
|
|
|
|
#if HAVE_YASM |
|
|
|
|
void ff_gradfun_filter_line_mmxext(intptr_t x, uint8_t *dst, uint8_t *src, |
|
|
|
|
uint16_t *dc, int thresh, |
|
|
|
|
const uint16_t *dithers); |
|
|
|
|
|
|
|
|
|
void ff_gradfun_filter_line_ssse3(intptr_t x, uint8_t *dst, uint8_t *src, |
|
|
|
|
uint16_t *dc, int thresh, |
|
|
|
|
const uint16_t *dithers); |
|
|
|
|
|
|
|
|
|
void ff_gradfun_blur_line_movdqa_sse2(intptr_t x, uint16_t *buf, |
|
|
|
|
uint16_t *buf1, uint16_t *dc, |
|
|
|
|
uint8_t *src1, uint8_t *src2); |
|
|
|
|
void ff_gradfun_blur_line_movdqu_sse2(intptr_t x, uint16_t *buf, |
|
|
|
|
uint16_t *buf1, uint16_t *dc, |
|
|
|
|
uint8_t *src1, uint8_t *src2); |
|
|
|
|
|
|
|
|
|
#if HAVE_YASM |
|
|
|
|
static void gradfun_filter_line_mmxext(uint8_t *dst, uint8_t *src, uint16_t *dc, |
|
|
|
|
int width, int thresh, |
|
|
|
|
const uint16_t *dithers) |
|
|
|
@ -37,45 +49,44 @@ static void gradfun_filter_line_mmxext(uint8_t *dst, uint8_t *src, uint16_t *dc, |
|
|
|
|
intptr_t x; |
|
|
|
|
if (width & 3) { |
|
|
|
|
x = width & ~3; |
|
|
|
|
ff_gradfun_filter_line_c(dst + x, src + x, dc + x / 2, width - x, thresh, dithers); |
|
|
|
|
ff_gradfun_filter_line_c(dst + x, src + x, dc + x / 2, |
|
|
|
|
width - x, thresh, dithers); |
|
|
|
|
width = x; |
|
|
|
|
} |
|
|
|
|
x = -width; |
|
|
|
|
ff_gradfun_filter_line_mmxext(x, dst + width, src + width, dc + width/2, |
|
|
|
|
ff_gradfun_filter_line_mmxext(x, dst + width, src + width, dc + width / 2, |
|
|
|
|
thresh, dithers); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_gradfun_filter_line_ssse3(intptr_t x, uint8_t *dst, uint8_t *src, |
|
|
|
|
uint16_t *dc, int thresh, |
|
|
|
|
const uint16_t *dithers); |
|
|
|
|
static void gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) |
|
|
|
|
static void gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, |
|
|
|
|
int width, int thresh, |
|
|
|
|
const uint16_t *dithers) |
|
|
|
|
{ |
|
|
|
|
intptr_t x; |
|
|
|
|
if (width & 7) { |
|
|
|
|
// could be 10% faster if I somehow eliminated this
|
|
|
|
|
x = width & ~7; |
|
|
|
|
ff_gradfun_filter_line_c(dst + x, src + x, dc + x / 2, width - x, thresh, dithers); |
|
|
|
|
ff_gradfun_filter_line_c(dst + x, src + x, dc + x / 2, |
|
|
|
|
width - x, thresh, dithers); |
|
|
|
|
width = x; |
|
|
|
|
} |
|
|
|
|
x = -width; |
|
|
|
|
ff_gradfun_filter_line_ssse3(x, dst + width, src + width, dc + width/2, |
|
|
|
|
ff_gradfun_filter_line_ssse3(x, dst + width, src + width, dc + width / 2, |
|
|
|
|
thresh, dithers); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_gradfun_blur_line_movdqa_sse2(intptr_t x, uint16_t *buf, uint16_t *buf1, uint16_t *dc, uint8_t *src1, uint8_t *src2); |
|
|
|
|
void ff_gradfun_blur_line_movdqu_sse2(intptr_t x, uint16_t *buf, uint16_t *buf1, uint16_t *dc, uint8_t *src1, uint8_t *src2); |
|
|
|
|
static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width) |
|
|
|
|
static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, |
|
|
|
|
uint8_t *src, int src_linesize, int width) |
|
|
|
|
{ |
|
|
|
|
intptr_t x = -2*width; |
|
|
|
|
if (((intptr_t) src | src_linesize) & 15) { |
|
|
|
|
intptr_t x = -2 * width; |
|
|
|
|
if (((intptr_t) src | src_linesize) & 15) |
|
|
|
|
ff_gradfun_blur_line_movdqu_sse2(x, buf + width, buf1 + width, |
|
|
|
|
dc + width, src + width * 2, |
|
|
|
|
src + width * 2 + src_linesize); |
|
|
|
|
} else { |
|
|
|
|
else |
|
|
|
|
ff_gradfun_blur_line_movdqa_sse2(x, buf + width, buf1 + width, |
|
|
|
|
dc + width, src + width * 2, |
|
|
|
|
src + width * 2 + src_linesize); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#endif /* HAVE_YASM */ |
|
|
|
|
|
|
|
|
|