|
|
|
@ -256,6 +256,77 @@ static void check_yuv2yuvX(int accurate) |
|
|
|
|
#undef FILTER_SIZES |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void check_yuv2nv12cX(int accurate) |
|
|
|
|
{ |
|
|
|
|
SwsContext *sws; |
|
|
|
|
SwsInternal *c; |
|
|
|
|
#define LARGEST_FILTER 16 |
|
|
|
|
const int filter_sizes[] = {2, 4, 8, 16}; |
|
|
|
|
#define LARGEST_INPUT_SIZE 512 |
|
|
|
|
static const int input_sizes[] = {8, 24, 128, 144, 256, 512}; |
|
|
|
|
const char *accurate_str = (accurate) ? "accurate" : "approximate"; |
|
|
|
|
|
|
|
|
|
declare_func_emms(AV_CPU_FLAG_MMX, void, enum AVPixelFormat dstFormat, |
|
|
|
|
const uint8_t *chrDither, const int16_t *chrFilter, |
|
|
|
|
int chrFilterSize, const int16_t **chrUSrc, |
|
|
|
|
const int16_t **chrVSrc, uint8_t *dest, int dstW); |
|
|
|
|
|
|
|
|
|
const int16_t *srcU[LARGEST_FILTER], *srcV[LARGEST_FILTER]; |
|
|
|
|
LOCAL_ALIGNED_16(int16_t, srcU_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]); |
|
|
|
|
LOCAL_ALIGNED_16(int16_t, srcV_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]); |
|
|
|
|
LOCAL_ALIGNED_16(int16_t, filter_coeff, [LARGEST_FILTER]); |
|
|
|
|
LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE * 2]); |
|
|
|
|
LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE * 2]); |
|
|
|
|
LOCAL_ALIGNED_16(uint8_t, dither, [LARGEST_INPUT_SIZE]); |
|
|
|
|
uint8_t d_val = rnd(); |
|
|
|
|
memset(dither, d_val, LARGEST_INPUT_SIZE); |
|
|
|
|
randomize_buffers((uint8_t*)srcU_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t)); |
|
|
|
|
randomize_buffers((uint8_t*)srcV_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t)); |
|
|
|
|
for (int i = 0; i < LARGEST_FILTER; i++) { |
|
|
|
|
srcU[i] = &srcU_pixels[i * LARGEST_INPUT_SIZE]; |
|
|
|
|
srcV[i] = &srcV_pixels[i * LARGEST_INPUT_SIZE]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
sws = sws_alloc_context(); |
|
|
|
|
sws->dst_format = AV_PIX_FMT_NV12; |
|
|
|
|
if (accurate) |
|
|
|
|
sws->flags |= SWS_ACCURATE_RND; |
|
|
|
|
if (sws_init_context(sws, NULL, NULL) < 0) |
|
|
|
|
fail(); |
|
|
|
|
|
|
|
|
|
c = sws_internal(sws); |
|
|
|
|
ff_sws_init_scale(c); |
|
|
|
|
for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++){ |
|
|
|
|
const int dstW = input_sizes[isi]; |
|
|
|
|
for (int fsi = 0; fsi < FF_ARRAY_ELEMS(filter_sizes); fsi++) { |
|
|
|
|
const int filter_size = filter_sizes[fsi]; |
|
|
|
|
for (int i = 0; i < filter_size; i++) |
|
|
|
|
filter_coeff[i] = -((1 << 12) / (filter_size - 1)); |
|
|
|
|
filter_coeff[rnd() % filter_size] = (1 << 13) - 1; |
|
|
|
|
|
|
|
|
|
if (check_func(c->yuv2nv12cX, "yuv2nv12cX_%d_%d_%s", filter_size, dstW, accurate_str)){ |
|
|
|
|
memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0])); |
|
|
|
|
memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0])); |
|
|
|
|
|
|
|
|
|
call_ref(sws->dst_format, dither, &filter_coeff[0], filter_size, srcU, srcV, dst0, dstW); |
|
|
|
|
call_new(sws->dst_format, dither, &filter_coeff[0], filter_size, srcU, srcV, dst1, dstW); |
|
|
|
|
|
|
|
|
|
if (cmp_off_by_n(dst0, dst1, dstW * 2 * sizeof(dst0[0]), accurate ? 0 : 2)) { |
|
|
|
|
fail(); |
|
|
|
|
printf("failed: yuv2nv12wX_%d_%d_%s\n", filter_size, dstW, accurate_str); |
|
|
|
|
show_differences(dst0, dst1, dstW * 2 * sizeof(dst0[0])); |
|
|
|
|
} |
|
|
|
|
if (dstW == LARGEST_INPUT_SIZE) |
|
|
|
|
bench_new(sws->dst_format, dither, &filter_coeff[0], filter_size, srcU, srcV, dst1, dstW); |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
sws_freeContext(sws); |
|
|
|
|
} |
|
|
|
|
#undef LARGEST_FILTER |
|
|
|
|
#undef LARGEST_INPUT_SIZE |
|
|
|
|
|
|
|
|
|
#undef SRC_PIXELS |
|
|
|
|
#define SRC_PIXELS 512 |
|
|
|
|
|
|
|
|
@ -373,4 +444,7 @@ void checkasm_check_sw_scale(void) |
|
|
|
|
check_yuv2yuvX(0); |
|
|
|
|
check_yuv2yuvX(1); |
|
|
|
|
report("yuv2yuvX"); |
|
|
|
|
check_yuv2nv12cX(0); |
|
|
|
|
check_yuv2nv12cX(1); |
|
|
|
|
report("yuv2nv12cX"); |
|
|
|
|
} |
|
|
|
|