|
|
@ -22,6 +22,7 @@ |
|
|
|
#include "checkasm.h" |
|
|
|
#include "checkasm.h" |
|
|
|
#include "libavcodec/avcodec.h" |
|
|
|
#include "libavcodec/avcodec.h" |
|
|
|
#include "libavcodec/h264dsp.h" |
|
|
|
#include "libavcodec/h264dsp.h" |
|
|
|
|
|
|
|
#include "libavcodec/h264data.h" |
|
|
|
#include "libavutil/common.h" |
|
|
|
#include "libavutil/common.h" |
|
|
|
#include "libavutil/internal.h" |
|
|
|
#include "libavutil/internal.h" |
|
|
|
#include "libavutil/intreadwrite.h" |
|
|
|
#include "libavutil/intreadwrite.h" |
|
|
@ -223,10 +224,97 @@ static void check_idct(void) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
report("idct"); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void check_idct_multiple(void) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
LOCAL_ALIGNED_16(uint8_t, dst_full, [16 * 16 * 2]); |
|
|
|
|
|
|
|
LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]); |
|
|
|
|
|
|
|
LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16 * 2]); |
|
|
|
|
|
|
|
LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16 * 2]); |
|
|
|
|
|
|
|
LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]); |
|
|
|
|
|
|
|
LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]); |
|
|
|
|
|
|
|
LOCAL_ALIGNED_16(uint8_t, nnzc, [15*8]); |
|
|
|
|
|
|
|
H264DSPContext h; |
|
|
|
|
|
|
|
int bit_depth, i, y, func; |
|
|
|
|
|
|
|
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (bit_depth = 8; bit_depth <= 10; bit_depth++) { |
|
|
|
|
|
|
|
ff_h264dsp_init(&h, bit_depth, 1); |
|
|
|
|
|
|
|
for (func = 0; func < 3; func++) { |
|
|
|
|
|
|
|
void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL; |
|
|
|
|
|
|
|
const char *name; |
|
|
|
|
|
|
|
int sz = 4, intra = 0; |
|
|
|
|
|
|
|
int block_offset[16] = { 0 }; |
|
|
|
|
|
|
|
switch (func) { |
|
|
|
|
|
|
|
case 0: |
|
|
|
|
|
|
|
idct = h.h264_idct_add16; |
|
|
|
|
|
|
|
name = "h264_idct_add16"; |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
case 1: |
|
|
|
|
|
|
|
idct = h.h264_idct_add16intra; |
|
|
|
|
|
|
|
name = "h264_idct_add16intra"; |
|
|
|
|
|
|
|
intra = 1; |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
case 2: |
|
|
|
|
|
|
|
idct = h.h264_idct8_add4; |
|
|
|
|
|
|
|
name = "h264_idct8_add4"; |
|
|
|
|
|
|
|
sz = 8; |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
memset(nnzc, 0, 15 * 8); |
|
|
|
|
|
|
|
memset(coef_full, 0, 16 * 16 * SIZEOF_COEF); |
|
|
|
|
|
|
|
for (i = 0; i < 16 * 16; i += sz * sz) { |
|
|
|
|
|
|
|
uint8_t src[8 * 8 * 2]; |
|
|
|
|
|
|
|
uint8_t dst[8 * 8 * 2]; |
|
|
|
|
|
|
|
int16_t coef[8 * 8 * 2]; |
|
|
|
|
|
|
|
int index = i / sz; |
|
|
|
|
|
|
|
int block_y = (index / 16) * sz; |
|
|
|
|
|
|
|
int block_x = index % 16; |
|
|
|
|
|
|
|
int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL; |
|
|
|
|
|
|
|
int nnz = rnd() % 3; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
randomize_buffers(); |
|
|
|
|
|
|
|
if (sz == 4) |
|
|
|
|
|
|
|
dct4x4(coef, bit_depth); |
|
|
|
|
|
|
|
else |
|
|
|
|
|
|
|
dct8x8(coef, bit_depth); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (y = 0; y < sz; y++) |
|
|
|
|
|
|
|
memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL], |
|
|
|
|
|
|
|
&dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (nnz > 1) |
|
|
|
|
|
|
|
nnz = sz*sz; |
|
|
|
|
|
|
|
memcpy(&coef_full[i*SIZEOF_COEF/sizeof(coef[0])], |
|
|
|
|
|
|
|
coef, nnz * SIZEOF_COEF); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (intra && nnz == 1) |
|
|
|
|
|
|
|
nnz = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
nnzc[scan8[i / 16]] = nnz; |
|
|
|
|
|
|
|
block_offset[i / 16] = offset; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (check_func(idct, "%s_%dbpp", name, bit_depth)) { |
|
|
|
|
|
|
|
memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF); |
|
|
|
|
|
|
|
memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF); |
|
|
|
|
|
|
|
memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL); |
|
|
|
|
|
|
|
memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL); |
|
|
|
|
|
|
|
call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc); |
|
|
|
|
|
|
|
call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc); |
|
|
|
|
|
|
|
if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) || |
|
|
|
|
|
|
|
memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF)) |
|
|
|
|
|
|
|
fail(); |
|
|
|
|
|
|
|
bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void checkasm_check_h264dsp(void) |
|
|
|
void checkasm_check_h264dsp(void) |
|
|
|
{ |
|
|
|
{ |
|
|
|
check_idct(); |
|
|
|
check_idct(); |
|
|
|
|
|
|
|
check_idct_multiple(); |
|
|
|
|
|
|
|
report("idct"); |
|
|
|
} |
|
|
|
} |
|
|
|