* commit '8bc67ec2c0d2b5444d51a1bed1d50f0e10d92717': Checkasm: assembly testing and benchmarking tool Merged-by: Michael Niedermayer <michael@niedermayer.cc>pull/140/head
commit
f14fc55969
8 changed files with 1086 additions and 0 deletions
@ -0,0 +1,33 @@ |
|||||||
|
# libavcodec tests
|
||||||
|
AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o
|
||||||
|
|
||||||
|
CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes)
|
||||||
|
|
||||||
|
|
||||||
|
-include $(SRC_PATH)/tests/checkasm/$(ARCH)/Makefile |
||||||
|
|
||||||
|
CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o
|
||||||
|
CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%))
|
||||||
|
|
||||||
|
-include $(CHECKASMOBJS:.o=.d) |
||||||
|
|
||||||
|
CHECKASMDIRS := $(sort $(dir $(CHECKASMOBJS)))
|
||||||
|
$(CHECKASMOBJS): | $(CHECKASMDIRS) |
||||||
|
OBJDIRS += $(CHECKASMDIRS)
|
||||||
|
|
||||||
|
# We rely on function pointers intentionally declared without specified argument types.
|
||||||
|
tests/checkasm/%.o: CFLAGS := $(CFLAGS:-Wstrict-prototypes=-Wno-strict-prototypes) |
||||||
|
|
||||||
|
CHECKASM := tests/checkasm/checkasm$(EXESUF)
|
||||||
|
|
||||||
|
$(CHECKASM): $(EXEOBJS) $(CHECKASMOBJS) $(FF_DEP_LIBS) |
||||||
|
$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_EXTRALIBS)
|
||||||
|
|
||||||
|
checkasm: $(CHECKASM) |
||||||
|
|
||||||
|
clean:: checkasmclean |
||||||
|
|
||||||
|
checkasmclean: |
||||||
|
$(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%)
|
||||||
|
|
||||||
|
.PHONY: checkasm |
@ -0,0 +1,484 @@ |
|||||||
|
/*
|
||||||
|
* Assembly testing and benchmarking tool |
||||||
|
* Copyright (c) 2015 Henrik Gramner |
||||||
|
* Copyright (c) 2008 Loren Merritt |
||||||
|
* |
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or modify |
||||||
|
* it under the terms of the GNU General Public License as published by |
||||||
|
* the Free Software Foundation; either version 2 of the License, or |
||||||
|
* (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
|
* GNU General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU General Public License along |
||||||
|
* with FFmpeg; if not, write to the Free Software Foundation, Inc., |
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <stdarg.h> |
||||||
|
#include <stdio.h> |
||||||
|
#include <stdlib.h> |
||||||
|
#include <string.h> |
||||||
|
#include "checkasm.h" |
||||||
|
#include "libavutil/common.h" |
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/random_seed.h" |
||||||
|
|
||||||
|
#if ARCH_X86 |
||||||
|
#include "libavutil/x86/cpu.h" |
||||||
|
#endif |
||||||
|
|
||||||
|
#if HAVE_SETCONSOLETEXTATTRIBUTE |
||||||
|
#include <windows.h> |
||||||
|
#define COLOR_RED FOREGROUND_RED |
||||||
|
#define COLOR_GREEN FOREGROUND_GREEN |
||||||
|
#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN) |
||||||
|
#else |
||||||
|
#define COLOR_RED 1 |
||||||
|
#define COLOR_GREEN 2 |
||||||
|
#define COLOR_YELLOW 3 |
||||||
|
#endif |
||||||
|
|
||||||
|
#if HAVE_UNISTD_H |
||||||
|
#include <unistd.h> |
||||||
|
#endif |
||||||
|
|
||||||
|
#if !HAVE_ISATTY |
||||||
|
#define isatty(fd) 1 |
||||||
|
#endif |
||||||
|
|
||||||
|
/* List of tests to invoke */ |
||||||
|
static void (* const tests[])(void) = { |
||||||
|
#if CONFIG_H264PRED |
||||||
|
checkasm_check_h264pred, |
||||||
|
#endif |
||||||
|
NULL |
||||||
|
}; |
||||||
|
|
||||||
|
/* List of cpu flags to check */ |
||||||
|
static const struct { |
||||||
|
const char *name; |
||||||
|
const char *suffix; |
||||||
|
int flag; |
||||||
|
} cpus[] = { |
||||||
|
#if ARCH_X86 |
||||||
|
{ "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, |
||||||
|
{ "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, |
||||||
|
{ "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW }, |
||||||
|
{ "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT }, |
||||||
|
{ "SSE", "sse", AV_CPU_FLAG_SSE }, |
||||||
|
{ "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW }, |
||||||
|
{ "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW }, |
||||||
|
{ "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM }, |
||||||
|
{ "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 }, |
||||||
|
{ "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 }, |
||||||
|
{ "AVX", "avx", AV_CPU_FLAG_AVX }, |
||||||
|
{ "XOP", "xop", AV_CPU_FLAG_XOP }, |
||||||
|
{ "FMA3", "fma3", AV_CPU_FLAG_FMA3 }, |
||||||
|
{ "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, |
||||||
|
{ "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, |
||||||
|
#endif |
||||||
|
{ NULL } |
||||||
|
}; |
||||||
|
|
||||||
|
typedef struct CheckasmFuncVersion { |
||||||
|
struct CheckasmFuncVersion *next; |
||||||
|
intptr_t (*func)(); |
||||||
|
int ok; |
||||||
|
int cpu; |
||||||
|
int iterations; |
||||||
|
uint64_t cycles; |
||||||
|
} CheckasmFuncVersion; |
||||||
|
|
||||||
|
/* Binary search tree node */ |
||||||
|
typedef struct CheckasmFunc { |
||||||
|
struct CheckasmFunc *child[2]; |
||||||
|
CheckasmFuncVersion versions; |
||||||
|
char name[1]; |
||||||
|
} CheckasmFunc; |
||||||
|
|
||||||
|
/* Internal state */ |
||||||
|
static struct { |
||||||
|
CheckasmFunc *funcs; |
||||||
|
CheckasmFunc *current_func; |
||||||
|
CheckasmFuncVersion *current_func_ver; |
||||||
|
const char *bench_pattern; |
||||||
|
int bench_pattern_len; |
||||||
|
int num_checked; |
||||||
|
int num_failed; |
||||||
|
int nop_time; |
||||||
|
int cpu_flag; |
||||||
|
const char *cpu_flag_name; |
||||||
|
} state; |
||||||
|
|
||||||
|
/* PRNG state */ |
||||||
|
AVLFG checkasm_lfg; |
||||||
|
|
||||||
|
/* Print colored text to stderr if the terminal supports it */ |
||||||
|
static void color_printf(int color, const char *fmt, ...) |
||||||
|
{ |
||||||
|
static int use_color = -1; |
||||||
|
va_list arg; |
||||||
|
|
||||||
|
#if HAVE_SETCONSOLETEXTATTRIBUTE |
||||||
|
static HANDLE con; |
||||||
|
static WORD org_attributes; |
||||||
|
|
||||||
|
if (use_color < 0) { |
||||||
|
CONSOLE_SCREEN_BUFFER_INFO con_info; |
||||||
|
con = GetStdHandle(STD_ERROR_HANDLE); |
||||||
|
if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) { |
||||||
|
org_attributes = con_info.wAttributes; |
||||||
|
use_color = 1; |
||||||
|
} else |
||||||
|
use_color = 0; |
||||||
|
} |
||||||
|
if (use_color) |
||||||
|
SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f)); |
||||||
|
#else |
||||||
|
if (use_color < 0) { |
||||||
|
const char *term = getenv("TERM"); |
||||||
|
use_color = term && strcmp(term, "dumb") && isatty(2); |
||||||
|
} |
||||||
|
if (use_color) |
||||||
|
fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07); |
||||||
|
#endif |
||||||
|
|
||||||
|
va_start(arg, fmt); |
||||||
|
vfprintf(stderr, fmt, arg); |
||||||
|
va_end(arg); |
||||||
|
|
||||||
|
if (use_color) { |
||||||
|
#if HAVE_SETCONSOLETEXTATTRIBUTE |
||||||
|
SetConsoleTextAttribute(con, org_attributes); |
||||||
|
#else |
||||||
|
fprintf(stderr, "\x1b[0m"); |
||||||
|
#endif |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* Deallocate a tree */ |
||||||
|
static void destroy_func_tree(CheckasmFunc *f) |
||||||
|
{ |
||||||
|
if (f) { |
||||||
|
CheckasmFuncVersion *v = f->versions.next; |
||||||
|
while (v) { |
||||||
|
CheckasmFuncVersion *next = v->next; |
||||||
|
free(v); |
||||||
|
v = next; |
||||||
|
} |
||||||
|
|
||||||
|
destroy_func_tree(f->child[0]); |
||||||
|
destroy_func_tree(f->child[1]); |
||||||
|
free(f); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* Allocate a zero-initialized block, clean up and exit on failure */ |
||||||
|
static void *checkasm_malloc(size_t size) |
||||||
|
{ |
||||||
|
void *ptr = calloc(1, size); |
||||||
|
if (!ptr) { |
||||||
|
fprintf(stderr, "checkasm: malloc failed\n"); |
||||||
|
destroy_func_tree(state.funcs); |
||||||
|
exit(1); |
||||||
|
} |
||||||
|
return ptr; |
||||||
|
} |
||||||
|
|
||||||
|
/* Get the suffix of the specified cpu flag */ |
||||||
|
static const char *cpu_suffix(int cpu) |
||||||
|
{ |
||||||
|
int i = FF_ARRAY_ELEMS(cpus); |
||||||
|
|
||||||
|
while (--i >= 0) |
||||||
|
if (cpu & cpus[i].flag) |
||||||
|
return cpus[i].suffix; |
||||||
|
|
||||||
|
return "c"; |
||||||
|
} |
||||||
|
|
||||||
|
#ifdef AV_READ_TIME |
||||||
|
static int cmp_nop(const void *a, const void *b) |
||||||
|
{ |
||||||
|
return *(const uint16_t*)a - *(const uint16_t*)b; |
||||||
|
} |
||||||
|
|
||||||
|
/* Measure the overhead of the timing code (in decicycles) */ |
||||||
|
static int measure_nop_time(void) |
||||||
|
{ |
||||||
|
uint16_t nops[10000]; |
||||||
|
int i, nop_sum = 0; |
||||||
|
|
||||||
|
for (i = 0; i < 10000; i++) { |
||||||
|
uint64_t t = AV_READ_TIME(); |
||||||
|
nops[i] = AV_READ_TIME() - t; |
||||||
|
} |
||||||
|
|
||||||
|
qsort(nops, 10000, sizeof(uint16_t), cmp_nop); |
||||||
|
for (i = 2500; i < 7500; i++) |
||||||
|
nop_sum += nops[i]; |
||||||
|
|
||||||
|
return nop_sum / 500; |
||||||
|
} |
||||||
|
|
||||||
|
/* Print benchmark results */ |
||||||
|
static void print_benchs(CheckasmFunc *f) |
||||||
|
{ |
||||||
|
if (f) { |
||||||
|
print_benchs(f->child[0]); |
||||||
|
|
||||||
|
/* Only print functions with at least one assembly version */ |
||||||
|
if (f->versions.cpu || f->versions.next) { |
||||||
|
CheckasmFuncVersion *v = &f->versions; |
||||||
|
do { |
||||||
|
if (v->iterations) { |
||||||
|
int decicycles = (10*v->cycles/v->iterations - state.nop_time) / 4; |
||||||
|
printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10); |
||||||
|
} |
||||||
|
} while ((v = v->next)); |
||||||
|
} |
||||||
|
|
||||||
|
print_benchs(f->child[1]); |
||||||
|
} |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
/* ASCIIbetical sort except preserving natural order for numbers */ |
||||||
|
static int cmp_func_names(const char *a, const char *b) |
||||||
|
{ |
||||||
|
int ascii_diff, digit_diff; |
||||||
|
|
||||||
|
for (; !(ascii_diff = *a - *b) && *a; a++, b++); |
||||||
|
for (; av_isdigit(*a) && av_isdigit(*b); a++, b++); |
||||||
|
|
||||||
|
return (digit_diff = av_isdigit(*a) - av_isdigit(*b)) ? digit_diff : ascii_diff; |
||||||
|
} |
||||||
|
|
||||||
|
/* Get a node with the specified name, creating it if it doesn't exist */ |
||||||
|
static CheckasmFunc *get_func(const char *name, int length) |
||||||
|
{ |
||||||
|
CheckasmFunc *f, **f_ptr = &state.funcs; |
||||||
|
|
||||||
|
/* Search the tree for a matching node */ |
||||||
|
while ((f = *f_ptr)) { |
||||||
|
int cmp = cmp_func_names(name, f->name); |
||||||
|
if (!cmp) |
||||||
|
return f; |
||||||
|
|
||||||
|
f_ptr = &f->child[(cmp > 0)]; |
||||||
|
} |
||||||
|
|
||||||
|
/* Allocate and insert a new node into the tree */ |
||||||
|
f = *f_ptr = checkasm_malloc(sizeof(CheckasmFunc) + length); |
||||||
|
memcpy(f->name, name, length+1); |
||||||
|
|
||||||
|
return f; |
||||||
|
} |
||||||
|
|
||||||
|
/* Perform tests and benchmarks for the specified cpu flag if supported by the host */ |
||||||
|
static void check_cpu_flag(const char *name, int flag) |
||||||
|
{ |
||||||
|
int old_cpu_flag = state.cpu_flag; |
||||||
|
|
||||||
|
flag |= old_cpu_flag; |
||||||
|
av_set_cpu_flags_mask(flag); |
||||||
|
state.cpu_flag = av_get_cpu_flags(); |
||||||
|
|
||||||
|
if (!flag || state.cpu_flag != old_cpu_flag) { |
||||||
|
int i; |
||||||
|
|
||||||
|
state.cpu_flag_name = name; |
||||||
|
for (i = 0; tests[i]; i++) |
||||||
|
tests[i](); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* Print the name of the current CPU flag, but only do it once */ |
||||||
|
static void print_cpu_name(void) |
||||||
|
{ |
||||||
|
if (state.cpu_flag_name) { |
||||||
|
color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name); |
||||||
|
state.cpu_flag_name = NULL; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
int main(int argc, char *argv[]) |
||||||
|
{ |
||||||
|
int i, seed, ret = 0; |
||||||
|
|
||||||
|
if (!tests[0] || !cpus[0].flag) { |
||||||
|
fprintf(stderr, "checkasm: no tests to perform\n"); |
||||||
|
return 1; |
||||||
|
} |
||||||
|
|
||||||
|
if (argc > 1 && !strncmp(argv[1], "--bench", 7)) { |
||||||
|
#ifndef AV_READ_TIME |
||||||
|
fprintf(stderr, "checkasm: --bench is not supported on your system\n"); |
||||||
|
return 1; |
||||||
|
#endif |
||||||
|
if (argv[1][7] == '=') { |
||||||
|
state.bench_pattern = argv[1] + 8; |
||||||
|
state.bench_pattern_len = strlen(state.bench_pattern); |
||||||
|
} else |
||||||
|
state.bench_pattern = ""; |
||||||
|
|
||||||
|
argc--; |
||||||
|
argv++; |
||||||
|
} |
||||||
|
|
||||||
|
seed = (argc > 1) ? atoi(argv[1]) : av_get_random_seed(); |
||||||
|
fprintf(stderr, "checkasm: using random seed %u\n", seed); |
||||||
|
av_lfg_init(&checkasm_lfg, seed); |
||||||
|
|
||||||
|
check_cpu_flag(NULL, 0); |
||||||
|
for (i = 0; cpus[i].flag; i++) |
||||||
|
check_cpu_flag(cpus[i].name, cpus[i].flag); |
||||||
|
|
||||||
|
if (state.num_failed) { |
||||||
|
fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked); |
||||||
|
ret = 1; |
||||||
|
} else { |
||||||
|
fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked); |
||||||
|
#ifdef AV_READ_TIME |
||||||
|
if (state.bench_pattern) { |
||||||
|
state.nop_time = measure_nop_time(); |
||||||
|
printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10); |
||||||
|
print_benchs(state.funcs); |
||||||
|
} |
||||||
|
#endif |
||||||
|
} |
||||||
|
|
||||||
|
destroy_func_tree(state.funcs); |
||||||
|
return ret; |
||||||
|
} |
||||||
|
|
||||||
|
/* Decide whether or not the specified function needs to be tested and
|
||||||
|
* allocate/initialize data structures if needed. Returns a pointer to a |
||||||
|
* reference function if the function should be tested, otherwise NULL */ |
||||||
|
intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))() |
||||||
|
{ |
||||||
|
char name_buf[256]; |
||||||
|
intptr_t (*ref)() = func; |
||||||
|
CheckasmFuncVersion *v; |
||||||
|
int name_length; |
||||||
|
va_list arg; |
||||||
|
|
||||||
|
va_start(arg, name); |
||||||
|
name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg); |
||||||
|
va_end(arg); |
||||||
|
|
||||||
|
if (!func || name_length <= 0 || name_length >= sizeof(name_buf)) |
||||||
|
return NULL; |
||||||
|
|
||||||
|
state.current_func = get_func(name_buf, name_length); |
||||||
|
v = &state.current_func->versions; |
||||||
|
|
||||||
|
if (v->func) { |
||||||
|
CheckasmFuncVersion *prev; |
||||||
|
do { |
||||||
|
/* Only test functions that haven't already been tested */ |
||||||
|
if (v->func == func) |
||||||
|
return NULL; |
||||||
|
|
||||||
|
if (v->ok) |
||||||
|
ref = v->func; |
||||||
|
|
||||||
|
prev = v; |
||||||
|
} while ((v = v->next)); |
||||||
|
|
||||||
|
v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion)); |
||||||
|
} |
||||||
|
|
||||||
|
v->func = func; |
||||||
|
v->ok = 1; |
||||||
|
v->cpu = state.cpu_flag; |
||||||
|
state.current_func_ver = v; |
||||||
|
|
||||||
|
if (state.cpu_flag) |
||||||
|
state.num_checked++; |
||||||
|
|
||||||
|
return ref; |
||||||
|
} |
||||||
|
|
||||||
|
/* Decide whether or not the current function needs to be benchmarked */ |
||||||
|
int checkasm_bench_func(void) |
||||||
|
{ |
||||||
|
return !state.num_failed && state.bench_pattern && |
||||||
|
!strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len); |
||||||
|
} |
||||||
|
|
||||||
|
/* Indicate that the current test has failed */ |
||||||
|
void checkasm_fail_func(const char *msg, ...) |
||||||
|
{ |
||||||
|
if (state.current_func_ver->cpu && state.current_func_ver->ok) { |
||||||
|
va_list arg; |
||||||
|
|
||||||
|
print_cpu_name(); |
||||||
|
fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu)); |
||||||
|
va_start(arg, msg); |
||||||
|
vfprintf(stderr, msg, arg); |
||||||
|
va_end(arg); |
||||||
|
fprintf(stderr, ")\n"); |
||||||
|
|
||||||
|
state.current_func_ver->ok = 0; |
||||||
|
state.num_failed++; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* Update benchmark results of the current function */ |
||||||
|
void checkasm_update_bench(int iterations, uint64_t cycles) |
||||||
|
{ |
||||||
|
state.current_func_ver->iterations += iterations; |
||||||
|
state.current_func_ver->cycles += cycles; |
||||||
|
} |
||||||
|
|
||||||
|
/* Print the outcome of all tests performed since the last time this function was called */ |
||||||
|
void checkasm_report(const char *name, ...) |
||||||
|
{ |
||||||
|
static int prev_checked, prev_failed, max_length; |
||||||
|
|
||||||
|
if (state.num_checked > prev_checked) { |
||||||
|
print_cpu_name(); |
||||||
|
|
||||||
|
if (*name) { |
||||||
|
int pad_length = max_length; |
||||||
|
va_list arg; |
||||||
|
|
||||||
|
fprintf(stderr, " - "); |
||||||
|
va_start(arg, name); |
||||||
|
pad_length -= vfprintf(stderr, name, arg); |
||||||
|
va_end(arg); |
||||||
|
fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '['); |
||||||
|
} else |
||||||
|
fprintf(stderr, " - %-*s [", max_length, state.current_func->name); |
||||||
|
|
||||||
|
if (state.num_failed == prev_failed) |
||||||
|
color_printf(COLOR_GREEN, "OK"); |
||||||
|
else |
||||||
|
color_printf(COLOR_RED, "FAILED"); |
||||||
|
fprintf(stderr, "]\n"); |
||||||
|
|
||||||
|
prev_checked = state.num_checked; |
||||||
|
prev_failed = state.num_failed; |
||||||
|
} else if (!state.cpu_flag) { |
||||||
|
int length; |
||||||
|
|
||||||
|
/* Calculate the amount of padding required to make the output vertically aligned */ |
||||||
|
if (*name) { |
||||||
|
va_list arg; |
||||||
|
va_start(arg, name); |
||||||
|
length = vsnprintf(NULL, 0, name, arg); |
||||||
|
va_end(arg); |
||||||
|
} else |
||||||
|
length = strlen(state.current_func->name); |
||||||
|
|
||||||
|
if (length > max_length) |
||||||
|
max_length = length; |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,115 @@ |
|||||||
|
/*
|
||||||
|
* Assembly testing and benchmarking tool |
||||||
|
* Copyright (c) 2015 Henrik Gramner |
||||||
|
* Copyright (c) 2008 Loren Merritt |
||||||
|
* |
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or modify |
||||||
|
* it under the terms of the GNU General Public License as published by |
||||||
|
* the Free Software Foundation; either version 2 of the License, or |
||||||
|
* (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
|
* GNU General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU General Public License along |
||||||
|
* with FFmpeg; if not, write to the Free Software Foundation, Inc., |
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef CHECKASM_H |
||||||
|
#define CHECKASM_H |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
#include "config.h" |
||||||
|
#include "libavutil/avstring.h" |
||||||
|
#include "libavutil/lfg.h" |
||||||
|
#include "libavutil/timer.h" |
||||||
|
|
||||||
|
void checkasm_check_h264pred(void); |
||||||
|
|
||||||
|
intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))() av_printf_format(2, 3); |
||||||
|
int checkasm_bench_func(void); |
||||||
|
void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2); |
||||||
|
void checkasm_update_bench(int iterations, uint64_t cycles); |
||||||
|
void checkasm_report(const char *name, ...) av_printf_format(1, 2); |
||||||
|
|
||||||
|
extern AVLFG checkasm_lfg; |
||||||
|
#define rnd() av_lfg_get(&checkasm_lfg) |
||||||
|
|
||||||
|
static av_unused intptr_t (*func_ref)(); |
||||||
|
static av_unused intptr_t (*func_new)(); |
||||||
|
|
||||||
|
#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */ |
||||||
|
|
||||||
|
/* Decide whether or not the specified function needs to be tested */ |
||||||
|
#define check_func(func, ...) ((func_new = (intptr_t (*)())func) &&\ |
||||||
|
(func_ref = checkasm_check_func(func_new, __VA_ARGS__))) |
||||||
|
|
||||||
|
/* Indicate that the current test has failed */ |
||||||
|
#define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__) |
||||||
|
|
||||||
|
/* Print the test outcome */ |
||||||
|
#define report(...) checkasm_report("" __VA_ARGS__) |
||||||
|
|
||||||
|
/* Call the reference function */ |
||||||
|
#define call_ref(...) func_ref(__VA_ARGS__) |
||||||
|
|
||||||
|
#if ARCH_X86 && HAVE_YASM |
||||||
|
/* Verifies that clobbered callee-saved registers are properly saved and restored */ |
||||||
|
intptr_t checkasm_checked_call(intptr_t (*func)(), ...); |
||||||
|
#endif |
||||||
|
|
||||||
|
/* Call the function */ |
||||||
|
#if ARCH_X86_64 && HAVE_YASM |
||||||
|
/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
|
||||||
|
* This is done by clobbering the stack with junk around the stack pointer and calling the |
||||||
|
* assembly function through x264_checkasm_call with added dummy arguments which forces all |
||||||
|
* real arguments to be passed on the stack and not in registers. For 32-bit arguments the |
||||||
|
* upper half of the 64-bit register locations on the stack will now contain junk which will |
||||||
|
* cause misbehaving functions to either produce incorrect output or segfault. Note that |
||||||
|
* even though this works extremely well in practice, it's technically not guaranteed |
||||||
|
* and false negatives is theoretically possible, but there can never be any false positives. |
||||||
|
*/ |
||||||
|
void checkasm_stack_clobber(uint64_t clobber, ...); |
||||||
|
#define CLOB (UINT64_C(0xdeadbeefdeadbeef)) |
||||||
|
#define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\ |
||||||
|
CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
|
||||||
|
checkasm_checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__)) |
||||||
|
#elif ARCH_X86_32 && HAVE_YASM |
||||||
|
#define call_new(...) checkasm_checked_call(func_new, __VA_ARGS__) |
||||||
|
#else |
||||||
|
#define call_new(...) func_new(__VA_ARGS__) |
||||||
|
#endif |
||||||
|
|
||||||
|
/* Benchmark the function */ |
||||||
|
#ifdef AV_READ_TIME |
||||||
|
#define bench_new(...)\ |
||||||
|
do {\
|
||||||
|
if (checkasm_bench_func()) {\
|
||||||
|
intptr_t (*tfunc)() = func_new;\
|
||||||
|
uint64_t tsum = 0;\
|
||||||
|
int ti, tcount = 0;\
|
||||||
|
for (ti = 0; ti < BENCH_RUNS; ti++) {\
|
||||||
|
uint64_t t = AV_READ_TIME();\
|
||||||
|
tfunc(__VA_ARGS__);\
|
||||||
|
tfunc(__VA_ARGS__);\
|
||||||
|
tfunc(__VA_ARGS__);\
|
||||||
|
tfunc(__VA_ARGS__);\
|
||||||
|
t = AV_READ_TIME() - t;\
|
||||||
|
if (t*tcount <= tsum*4 && ti > 0) {\
|
||||||
|
tsum += t;\
|
||||||
|
tcount++;\
|
||||||
|
}\
|
||||||
|
}\
|
||||||
|
checkasm_update_bench(tcount, tsum);\
|
||||||
|
}\
|
||||||
|
} while (0) |
||||||
|
#else |
||||||
|
#define bench_new(...) |
||||||
|
#endif |
||||||
|
|
||||||
|
#endif |
@ -0,0 +1,252 @@ |
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015 Henrik Gramner |
||||||
|
* |
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or modify |
||||||
|
* it under the terms of the GNU General Public License as published by |
||||||
|
* the Free Software Foundation; either version 2 of the License, or |
||||||
|
* (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
|
* GNU General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU General Public License along |
||||||
|
* with FFmpeg; if not, write to the Free Software Foundation, Inc., |
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <string.h> |
||||||
|
#include "checkasm.h" |
||||||
|
#include "libavcodec/avcodec.h" |
||||||
|
#include "libavcodec/h264pred.h" |
||||||
|
#include "libavutil/common.h" |
||||||
|
#include "libavutil/intreadwrite.h" |
||||||
|
|
||||||
|
static const int codec_ids[4] = { AV_CODEC_ID_H264, AV_CODEC_ID_VP8, AV_CODEC_ID_RV40, AV_CODEC_ID_SVQ3 }; |
||||||
|
|
||||||
|
static const char * const pred4x4_modes[4][15] = { |
||||||
|
{ /* H264 */ |
||||||
|
[VERT_PRED ] = "vertical", |
||||||
|
[HOR_PRED ] = "horizontal", |
||||||
|
[DC_PRED ] = "dc", |
||||||
|
[DIAG_DOWN_LEFT_PRED ] = "down_left", |
||||||
|
[DIAG_DOWN_RIGHT_PRED] = "down_right", |
||||||
|
[VERT_RIGHT_PRED ] = "vertical_right", |
||||||
|
[HOR_DOWN_PRED ] = "horizontal_right", |
||||||
|
[VERT_LEFT_PRED ] = "vertical_left", |
||||||
|
[HOR_UP_PRED ] = "horizontal_up", |
||||||
|
[LEFT_DC_PRED ] = "left_dc", |
||||||
|
[TOP_DC_PRED ] = "top_dc", |
||||||
|
[DC_128_PRED ] = "dc_128", |
||||||
|
}, |
||||||
|
{ /* VP8 */ |
||||||
|
[VERT_PRED ] = "vertical_vp8", |
||||||
|
[HOR_PRED ] = "horizontal_vp8", |
||||||
|
[VERT_LEFT_PRED] = "vertical_left_vp8", |
||||||
|
[TM_VP8_PRED ] = "tm_vp8", |
||||||
|
[DC_127_PRED ] = "dc_127_vp8", |
||||||
|
[DC_129_PRED ] = "dc_129_vp8", |
||||||
|
}, |
||||||
|
{ /* RV40 */ |
||||||
|
[DIAG_DOWN_LEFT_PRED ] = "down_left_rv40", |
||||||
|
[VERT_LEFT_PRED ] = "vertical_left_rv40", |
||||||
|
[HOR_UP_PRED ] = "horizontal_up_rv40", |
||||||
|
[DIAG_DOWN_LEFT_PRED_RV40_NODOWN] = "down_left_nodown_rv40", |
||||||
|
[HOR_UP_PRED_RV40_NODOWN ] = "horizontal_up_nodown_rv40", |
||||||
|
[VERT_LEFT_PRED_RV40_NODOWN ] = "vertical_left_nodown_rv40", |
||||||
|
}, |
||||||
|
{ /* SVQ3 */ |
||||||
|
[DIAG_DOWN_LEFT_PRED] = "down_left_svq3", |
||||||
|
}, |
||||||
|
}; |
||||||
|
|
||||||
|
static const char * const pred8x8_modes[4][11] = { |
||||||
|
{ /* H264 */ |
||||||
|
[DC_PRED8x8 ] = "dc", |
||||||
|
[HOR_PRED8x8 ] = "horizontal", |
||||||
|
[VERT_PRED8x8 ] = "vertical", |
||||||
|
[PLANE_PRED8x8 ] = "plane", |
||||||
|
[LEFT_DC_PRED8x8 ] = "left_dc", |
||||||
|
[TOP_DC_PRED8x8 ] = "top_dc", |
||||||
|
[DC_128_PRED8x8 ] = "dc_128", |
||||||
|
[ALZHEIMER_DC_L0T_PRED8x8] = "mad_cow_dc_l0t", |
||||||
|
[ALZHEIMER_DC_0LT_PRED8x8] = "mad_cow_dc_0lt", |
||||||
|
[ALZHEIMER_DC_L00_PRED8x8] = "mad_cow_dc_l00", |
||||||
|
[ALZHEIMER_DC_0L0_PRED8x8] = "mad_cow_dc_0l0", |
||||||
|
}, |
||||||
|
{ /* VP8 */ |
||||||
|
[PLANE_PRED8x8 ] = "tm_vp8", |
||||||
|
[DC_127_PRED8x8] = "dc_127_vp8", |
||||||
|
[DC_129_PRED8x8] = "dc_129_vp8", |
||||||
|
}, |
||||||
|
{ /* RV40 */ |
||||||
|
[DC_PRED8x8 ] = "dc_rv40", |
||||||
|
[LEFT_DC_PRED8x8] = "left_dc_rv40", |
||||||
|
[TOP_DC_PRED8x8 ] = "top_dc_rv40", |
||||||
|
}, |
||||||
|
{ /* SVQ3 */ |
||||||
|
}, |
||||||
|
}; |
||||||
|
|
||||||
|
static const char * const pred16x16_modes[4][9] = { |
||||||
|
{ /* H264 */ |
||||||
|
[DC_PRED8x8 ] = "dc", |
||||||
|
[HOR_PRED8x8 ] = "horizontal", |
||||||
|
[VERT_PRED8x8 ] = "vertical", |
||||||
|
[PLANE_PRED8x8 ] = "plane", |
||||||
|
[LEFT_DC_PRED8x8] = "left_dc", |
||||||
|
[TOP_DC_PRED8x8 ] = "top_dc", |
||||||
|
[DC_128_PRED8x8 ] = "dc_128", |
||||||
|
}, |
||||||
|
{ /* VP8 */ |
||||||
|
[PLANE_PRED8x8 ] = "tm_vp8", |
||||||
|
[DC_127_PRED8x8] = "dc_127_vp8", |
||||||
|
[DC_129_PRED8x8] = "dc_129_vp8", |
||||||
|
}, |
||||||
|
{ /* RV40 */ |
||||||
|
[PLANE_PRED8x8] = "plane_rv40", |
||||||
|
}, |
||||||
|
{ /* SVQ3 */ |
||||||
|
[PLANE_PRED8x8] = "plane_svq3", |
||||||
|
}, |
||||||
|
}; |
||||||
|
|
||||||
|
static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff }; |
||||||
|
|
||||||
|
#define SIZEOF_PIXEL ((bit_depth + 7) / 8) |
||||||
|
#define BUF_SIZE (3*16*17) |
||||||
|
|
||||||
|
#define check_pred_func(func, name, mode_name)\ |
||||||
|
(mode_name && ((codec_ids[codec] == AV_CODEC_ID_H264) ?\
|
||||||
|
check_func(func, "pred%s_%s_%d", name, mode_name, bit_depth) :\
|
||||||
|
check_func(func, "pred%s_%s", name, mode_name))) |
||||||
|
|
||||||
|
#define randomize_buffers()\ |
||||||
|
do {\
|
||||||
|
uint32_t mask = pixel_mask[bit_depth-8];\
|
||||||
|
int i;\
|
||||||
|
for (i = 0; i < BUF_SIZE; i += 4) {\
|
||||||
|
uint32_t r = rnd() & mask;\
|
||||||
|
AV_WN32A(buf0+i, r);\
|
||||||
|
AV_WN32A(buf1+i, r);\
|
||||||
|
}\
|
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define src0 (buf0 + 4*16) /* Offset to allow room for top and left */ |
||||||
|
#define src1 (buf1 + 4*16) |
||||||
|
|
||||||
|
static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, |
||||||
|
int codec, int chroma_format, int bit_depth) |
||||||
|
{ |
||||||
|
if (chroma_format == 1) { |
||||||
|
uint8_t *topright = buf0 + 2*16; |
||||||
|
int pred_mode; |
||||||
|
for (pred_mode = 0; pred_mode < 15; pred_mode++) { |
||||||
|
if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) { |
||||||
|
randomize_buffers(); |
||||||
|
call_ref(src0, topright, (ptrdiff_t)12*SIZEOF_PIXEL); |
||||||
|
call_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL); |
||||||
|
if (memcmp(buf0, buf1, BUF_SIZE)) |
||||||
|
fail(); |
||||||
|
bench_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, |
||||||
|
int codec, int chroma_format, int bit_depth) |
||||||
|
{ |
||||||
|
int pred_mode; |
||||||
|
for (pred_mode = 0; pred_mode < 11; pred_mode++) { |
||||||
|
if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8", |
||||||
|
pred8x8_modes[codec][pred_mode])) { |
||||||
|
randomize_buffers(); |
||||||
|
call_ref(src0, (ptrdiff_t)24*SIZEOF_PIXEL); |
||||||
|
call_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL); |
||||||
|
if (memcmp(buf0, buf1, BUF_SIZE)) |
||||||
|
fail(); |
||||||
|
bench_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, |
||||||
|
int codec, int chroma_format, int bit_depth) |
||||||
|
{ |
||||||
|
if (chroma_format == 1) { |
||||||
|
int pred_mode; |
||||||
|
for (pred_mode = 0; pred_mode < 9; pred_mode++) { |
||||||
|
if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) { |
||||||
|
randomize_buffers(); |
||||||
|
call_ref(src0, (ptrdiff_t)48); |
||||||
|
call_new(src1, (ptrdiff_t)48); |
||||||
|
if (memcmp(buf0, buf1, BUF_SIZE)) |
||||||
|
fail(); |
||||||
|
bench_new(src1, (ptrdiff_t)48); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, |
||||||
|
int codec, int chroma_format, int bit_depth) |
||||||
|
{ |
||||||
|
if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) { |
||||||
|
int pred_mode; |
||||||
|
for (pred_mode = 0; pred_mode < 12; pred_mode++) { |
||||||
|
if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) { |
||||||
|
int neighbors; |
||||||
|
for (neighbors = 0; neighbors <= 0xc000; neighbors += 0x4000) { |
||||||
|
int has_topleft = neighbors & 0x8000; |
||||||
|
int has_topright = neighbors & 0x4000; |
||||||
|
|
||||||
|
if ((pred_mode == DIAG_DOWN_RIGHT_PRED || pred_mode == VERT_RIGHT_PRED) && !has_topleft) |
||||||
|
continue; /* Those aren't allowed according to the spec */ |
||||||
|
|
||||||
|
randomize_buffers(); |
||||||
|
call_ref(src0, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); |
||||||
|
call_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); |
||||||
|
if (memcmp(buf0, buf1, BUF_SIZE)) |
||||||
|
fail(); |
||||||
|
bench_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* TODO: Add tests for H.264 lossless H/V prediction */ |
||||||
|
|
||||||
|
void checkasm_check_h264pred(void) |
||||||
|
{ |
||||||
|
static const struct { |
||||||
|
void (*func)(H264PredContext*, uint8_t*, uint8_t*, int, int, int); |
||||||
|
const char *name; |
||||||
|
} tests[] = { |
||||||
|
{ check_pred4x4, "pred4x4" }, |
||||||
|
{ check_pred8x8, "pred8x8" }, |
||||||
|
{ check_pred16x16, "pred16x16" }, |
||||||
|
{ check_pred8x8l, "pred8x8l" }, |
||||||
|
}; |
||||||
|
|
||||||
|
DECLARE_ALIGNED(16, uint8_t, buf0)[BUF_SIZE]; |
||||||
|
DECLARE_ALIGNED(16, uint8_t, buf1)[BUF_SIZE]; |
||||||
|
H264PredContext h; |
||||||
|
int test, codec, chroma_format, bit_depth; |
||||||
|
|
||||||
|
for (test = 0; test < FF_ARRAY_ELEMS(tests); test++) { |
||||||
|
for (codec = 0; codec < 4; codec++) { |
||||||
|
int codec_id = codec_ids[codec]; |
||||||
|
for (bit_depth = 8; bit_depth <= (codec_id == AV_CODEC_ID_H264 ? 10 : 8); bit_depth++) |
||||||
|
for (chroma_format = 1; chroma_format <= (codec_id == AV_CODEC_ID_H264 ? 2 : 1); chroma_format++) { |
||||||
|
ff_h264_pred_init(&h, codec_id, bit_depth, chroma_format); |
||||||
|
tests[test].func(&h, buf0, buf1, codec, chroma_format, bit_depth); |
||||||
|
} |
||||||
|
} |
||||||
|
report("%s", tests[test].name); |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,6 @@ |
|||||||
|
CHECKASMOBJS-$(HAVE_YASM) += x86/checkasm.o
|
||||||
|
|
||||||
|
tests/checkasm/x86/%.o: tests/checkasm/x86/%.asm |
||||||
|
$(DEPYASM) $(YASMFLAGS) -I $(<D)/ -M -o $@ $< > $(@:.o=.d)
|
||||||
|
$(YASM) $(YASMFLAGS) -I $(<D)/ -o $@ $<
|
||||||
|
-$(STRIP) $(STRIPFLAGS) $@
|
@ -0,0 +1,193 @@ |
|||||||
|
;***************************************************************************** |
||||||
|
;* Assembly testing and benchmarking tool |
||||||
|
;* Copyright (c) 2008 Loren Merritt |
||||||
|
;* Copyright (c) 2012 Henrik Gramner |
||||||
|
;* |
||||||
|
;* This file is part of FFmpeg. |
||||||
|
;* |
||||||
|
;* FFmpeg is free software; you can redistribute it and/or modify |
||||||
|
;* it under the terms of the GNU General Public License as published by |
||||||
|
;* the Free Software Foundation; either version 2 of the License, or |
||||||
|
;* (at your option) any later version. |
||||||
|
;* |
||||||
|
;* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
|
;* GNU General Public License for more details. |
||||||
|
;* |
||||||
|
;* You should have received a copy of the GNU General Public License |
||||||
|
;* along with this program; if not, write to the Free Software |
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
||||||
|
;***************************************************************************** |
||||||
|
|
||||||
|
%define private_prefix checkasm |
||||||
|
%include "libavutil/x86/x86inc.asm" |
||||||
|
|
||||||
|
SECTION_RODATA |
||||||
|
|
||||||
|
error_message: db "failed to preserve register", 0 |
||||||
|
|
||||||
|
%if ARCH_X86_64 |
||||||
|
; just random numbers to reduce the chance of incidental match |
||||||
|
ALIGN 16 |
||||||
|
x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064 |
||||||
|
x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636 |
||||||
|
x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e |
||||||
|
x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f |
||||||
|
x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9 |
||||||
|
x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d |
||||||
|
x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b |
||||||
|
x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786 |
||||||
|
x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef |
||||||
|
x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5 |
||||||
|
n7: dq 0x21f86d66c8ca00ce |
||||||
|
n8: dq 0x75b6ba21077c48ad |
||||||
|
n9: dq 0xed56bb2dcb3c7736 |
||||||
|
n10: dq 0x8bda43d3fd1a7e06 |
||||||
|
n11: dq 0xb64a9c9e5d318408 |
||||||
|
n12: dq 0xdf9a54b303f1d3a3 |
||||||
|
n13: dq 0x4a75479abd64e097 |
||||||
|
n14: dq 0x249214109d5d1c88 |
||||||
|
%endif |
||||||
|
|
||||||
|
SECTION .text |
||||||
|
|
||||||
|
cextern fail_func |
||||||
|
|
||||||
|
; max number of args used by any asm function. |
||||||
|
; (max_args % 4) must equal 3 for stack alignment |
||||||
|
%define max_args 15 |
||||||
|
|
||||||
|
%if ARCH_X86_64 |
||||||
|
|
||||||
|
;----------------------------------------------------------------------------- |
||||||
|
; int checkasm_stack_clobber(uint64_t clobber, ...) |
||||||
|
;----------------------------------------------------------------------------- |
||||||
|
cglobal stack_clobber, 1,2 |
||||||
|
; Clobber the stack with junk below the stack pointer |
||||||
|
%define size (max_args+6)*8 |
||||||
|
SUB rsp, size |
||||||
|
mov r1, size-8 |
||||||
|
.loop: |
||||||
|
mov [rsp+r1], r0 |
||||||
|
sub r1, 8 |
||||||
|
jge .loop |
||||||
|
ADD rsp, size |
||||||
|
RET |
||||||
|
|
||||||
|
%if WIN64 |
||||||
|
%assign free_regs 7 |
||||||
|
%else |
||||||
|
%assign free_regs 9 |
||||||
|
%endif |
||||||
|
|
||||||
|
;----------------------------------------------------------------------------- |
||||||
|
; intptr_t checkasm_checked_call(intptr_t (*func)(), ...) |
||||||
|
;----------------------------------------------------------------------------- |
||||||
|
INIT_XMM |
||||||
|
cglobal checked_call, 2,15,16,max_args*8+8 |
||||||
|
mov r6, r0 |
||||||
|
|
||||||
|
; All arguments have been pushed on the stack instead of registers in order to |
||||||
|
; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit. |
||||||
|
mov r0, r6mp |
||||||
|
mov r1, r7mp |
||||||
|
mov r2, r8mp |
||||||
|
mov r3, r9mp |
||||||
|
%if UNIX64 |
||||||
|
mov r4, r10mp |
||||||
|
mov r5, r11mp |
||||||
|
%assign i 6 |
||||||
|
%rep max_args-6 |
||||||
|
mov r9, [rsp+stack_offset+(i+1)*8] |
||||||
|
mov [rsp+(i-6)*8], r9 |
||||||
|
%assign i i+1 |
||||||
|
%endrep |
||||||
|
%else |
||||||
|
%assign i 4 |
||||||
|
%rep max_args-4 |
||||||
|
mov r9, [rsp+stack_offset+(i+7)*8] |
||||||
|
mov [rsp+i*8], r9 |
||||||
|
%assign i i+1 |
||||||
|
%endrep |
||||||
|
%endif |
||||||
|
|
||||||
|
%if WIN64 |
||||||
|
%assign i 6 |
||||||
|
%rep 16-6 |
||||||
|
mova m %+ i, [x %+ i] |
||||||
|
%assign i i+1 |
||||||
|
%endrep |
||||||
|
%endif |
||||||
|
|
||||||
|
%assign i 14 |
||||||
|
%rep 15-free_regs |
||||||
|
mov r %+ i, [n %+ i] |
||||||
|
%assign i i-1 |
||||||
|
%endrep |
||||||
|
call r6 |
||||||
|
%assign i 14 |
||||||
|
%rep 15-free_regs |
||||||
|
xor r %+ i, [n %+ i] |
||||||
|
or r14, r %+ i |
||||||
|
%assign i i-1 |
||||||
|
%endrep |
||||||
|
|
||||||
|
%if WIN64 |
||||||
|
%assign i 6 |
||||||
|
%rep 16-6 |
||||||
|
pxor m %+ i, [x %+ i] |
||||||
|
por m6, m %+ i |
||||||
|
%assign i i+1 |
||||||
|
%endrep |
||||||
|
packsswb m6, m6 |
||||||
|
movq r5, m6 |
||||||
|
or r14, r5 |
||||||
|
%endif |
||||||
|
|
||||||
|
jz .ok |
||||||
|
mov r9, rax |
||||||
|
lea r0, [error_message] |
||||||
|
call fail_func |
||||||
|
mov rax, r9 |
||||||
|
.ok: |
||||||
|
RET |
||||||
|
|
||||||
|
%else |
||||||
|
|
||||||
|
; just random numbers to reduce the chance of incidental match |
||||||
|
%define n3 dword 0x6549315c |
||||||
|
%define n4 dword 0xe02f3e23 |
||||||
|
%define n5 dword 0xb78d0d1d |
||||||
|
%define n6 dword 0x33627ba7 |
||||||
|
|
||||||
|
;----------------------------------------------------------------------------- |
||||||
|
; intptr_t checkasm_checked_call(intptr_t (*func)(), ...) |
||||||
|
;----------------------------------------------------------------------------- |
||||||
|
cglobal checked_call, 1,7 |
||||||
|
mov r3, n3 |
||||||
|
mov r4, n4 |
||||||
|
mov r5, n5 |
||||||
|
mov r6, n6 |
||||||
|
%rep max_args |
||||||
|
PUSH dword [esp+20+max_args*4] |
||||||
|
%endrep |
||||||
|
call r0 |
||||||
|
xor r3, n3 |
||||||
|
xor r4, n4 |
||||||
|
xor r5, n5 |
||||||
|
xor r6, n6 |
||||||
|
or r3, r4 |
||||||
|
or r5, r6 |
||||||
|
or r3, r5 |
||||||
|
jz .ok |
||||||
|
mov r3, eax |
||||||
|
lea r0, [error_message] |
||||||
|
mov [esp], r0 |
||||||
|
call fail_func |
||||||
|
mov eax, r3 |
||||||
|
.ok: |
||||||
|
add esp, max_args*4 |
||||||
|
REP_RET |
||||||
|
|
||||||
|
%endif ; ARCH_X86_64 |
Loading…
Reference in new issue