From b2654069601e6137c4999bce024228ef969f4ebe Mon Sep 17 00:00:00 2001 From: Nguyen Anh Quynh Date: Fri, 3 Jan 2014 17:08:58 +0800 Subject: [PATCH] cache insns for fast lookup in mapping.c. based on the idea of Dang Hoang Vu --- arch/AArch64/mapping.c | 16 +++++++++++++-- arch/AArch64/mapping.h | 3 +++ arch/AArch64/module.c | 8 +++++++- arch/ARM/mapping.c | 21 ++++++++++++++++---- arch/ARM/mapping.h | 3 +++ arch/ARM/module.c | 6 ++++++ arch/Mips/mapping.c | 16 +++++++++++++-- arch/Mips/mapping.h | 3 +++ arch/Mips/module.c | 6 ++++++ arch/PowerPC/module.c | 5 +++++ arch/X86/mapping.c | 15 ++++++++++++-- arch/X86/mapping.h | 3 +++ arch/X86/module.c | 6 ++++++ cs.c | 4 ++++ cs_priv.h | 3 +++ include/capstone.h | 4 ++++ utils.c | 44 +++++++++++++++++++++++++----------------- utils.h | 5 +++-- 18 files changed, 140 insertions(+), 31 deletions(-) diff --git a/arch/AArch64/mapping.c b/arch/AArch64/mapping.c index 0fb5ec01..3ffb6f85 100644 --- a/arch/AArch64/mapping.c +++ b/arch/AArch64/mapping.c @@ -254,6 +254,8 @@ const char *AArch64_reg_name(csh handle, unsigned int reg) } static insn_map insns[] = { + { 0, 0, { 0 }, { 0 }, { 0 }, 0, 0 }, // dummy item + { AArch64_ABS16b, ARM64_INS_ABS, { 0 }, { 0 }, { ARM64_GRP_NEON, 0 }, 0, 0 }, { AArch64_ABS2d, ARM64_INS_ABS, { 0 }, { 0 }, { ARM64_GRP_NEON, 0 }, 0, 0 }, { AArch64_ABS2s, ARM64_INS_ABS, { 0 }, { 0 }, { ARM64_GRP_NEON, 0 }, 0, 0 }, @@ -2990,10 +2992,13 @@ static insn_map alias_insns[] = { // { AArch64_SUBSxxx_lsl, ARM64_INS_NEGS, { 0 }, { ARM64_REG_NZCV, 0 }, { 0 } }, }; +static unsigned short *insn_cache = NULL; + +// given internal insn id, return public instruction info void AArch64_get_insn_id(cs_insn *insn, unsigned int id, int detail) { - int i = insn_find(insns, ARR_SIZE(insns), id); - if (i != -1) { + int i = insn_find(insns, ARR_SIZE(insns), id, &insn_cache); + if (i != 0) { insn->id = insns[i].mapid; if (detail) { @@ -3523,3 +3528,10 @@ arm64_reg AArch64_map_insn(const char *name) return (i != -1)? i : ARM64_REG_INVALID; } +void AArch64_free_cache(void) +{ + if (insn_cache) + free(insn_cache); + + insn_cache = NULL; +} diff --git a/arch/AArch64/mapping.h b/arch/AArch64/mapping.h index 08b28702..4bcd40df 100644 --- a/arch/AArch64/mapping.h +++ b/arch/AArch64/mapping.h @@ -21,4 +21,7 @@ const char *AArch64_insn_name(csh handle, unsigned int id); // map instruction name to public instruction ID arm64_reg AArch64_map_insn(const char *name); +// free insn cache +void AArch64_free_cache(void); + #endif diff --git a/arch/AArch64/module.c b/arch/AArch64/module.c index cbf638b0..f22253cf 100644 --- a/arch/AArch64/module.c +++ b/arch/AArch64/module.c @@ -30,10 +30,16 @@ static cs_err option(cs_struct *handle, cs_opt_type type, size_t value) return CS_ERR_OK; } +static void destroy(cs_struct *handle) +{ + AArch64_free_cache(); +} + static void __attribute__ ((constructor)) __init_arm64__() { arch_init[CS_ARCH_ARM64] = init; - arch_option[CS_ARCH_ARM] = option; + arch_option[CS_ARCH_ARM64] = option; + arch_destroy[CS_ARCH_ARM64] = destroy; // support this arch all_arch |= (1 << CS_ARCH_ARM64); diff --git a/arch/ARM/mapping.c b/arch/ARM/mapping.c index 98e55752..d8b55846 100644 --- a/arch/ARM/mapping.c +++ b/arch/ARM/mapping.c @@ -135,6 +135,8 @@ const char *ARM_reg_name(csh handle, unsigned int reg) } static insn_map insns[] = { + { 0, 0, { 0 }, { 0 }, { 0 }, 0, 0 }, // dummy item + { ARM_ADCri, ARM_INS_ADC, { ARM_REG_CPSR, 0 }, { ARM_REG_CPSR, 0 }, { ARM_GRP_ARM, 0 }, 0, 0 }, { ARM_ADCrr, ARM_INS_ADC, { ARM_REG_CPSR, 0 }, { ARM_REG_CPSR, 0 }, { ARM_GRP_ARM, 0 }, 0, 0 }, { ARM_ADCrsi, ARM_INS_ADC, { ARM_REG_CPSR, 0 }, { ARM_REG_CPSR, 0 }, { ARM_GRP_ARM, 0 }, 0, 0 }, @@ -2298,10 +2300,13 @@ static insn_map insns[] = { { ARM_tUXTH, ARM_INS_UXTH, { 0 }, { 0 }, { ARM_GRP_THUMB, ARM_GRP_THUMB1ONLY, ARM_GRP_V6, 0 }, 0, 0 }, }; + +static unsigned short *insn_cache = NULL; + void ARM_get_insn_id(cs_insn *insn, unsigned int id, int detail) { - int i = insn_find(insns, ARR_SIZE(insns), id); - if (i != -1) { + int i = insn_find(insns, ARR_SIZE(insns), id, &insn_cache); + if (i != 0) { insn->id = insns[i].mapid; if (detail) { @@ -2788,11 +2793,19 @@ arm_reg ARM_map_insn(const char *name) bool ARM_rel_branch(unsigned int id) { - int i = insn_find(insns, ARR_SIZE(insns), id); - if (i != -1) + int i = insn_find(insns, ARR_SIZE(insns), id, &insn_cache); + if (i != 0) return (insns[i].branch && !insns[i].indirect_branch); else { printf("ALERT: rel_branch() got incorrect id!\n"); return false; } } + +void ARM_free_cache(void) +{ + if (insn_cache) + free(insn_cache); + + insn_cache = NULL; +} diff --git a/arch/ARM/mapping.h b/arch/ARM/mapping.h index 10866fb2..62b8d92e 100644 --- a/arch/ARM/mapping.h +++ b/arch/ARM/mapping.h @@ -25,4 +25,7 @@ arm_reg ARM_map_insn(const char *name); // check if this insn is relative branch bool ARM_rel_branch(unsigned int insn_id); +// free insn cache +void ARM_free_cache(void); + #endif diff --git a/arch/ARM/module.c b/arch/ARM/module.c index 7484a341..54108dd1 100644 --- a/arch/ARM/module.c +++ b/arch/ARM/module.c @@ -43,10 +43,16 @@ static cs_err option(cs_struct *handle, cs_opt_type type, size_t value) return CS_ERR_OK; } +static void destroy(cs_struct *handle) +{ + ARM_free_cache(); +} + static void __attribute__ ((constructor)) __init_arm__() { arch_init[CS_ARCH_ARM] = init; arch_option[CS_ARCH_ARM] = option; + arch_destroy[CS_ARCH_ARM] = destroy; // support this arch all_arch |= (1 << CS_ARCH_ARM); diff --git a/arch/Mips/mapping.c b/arch/Mips/mapping.c index 4893ddbd..40362934 100644 --- a/arch/Mips/mapping.c +++ b/arch/Mips/mapping.c @@ -182,6 +182,8 @@ const char *Mips_reg_name(csh handle, unsigned int reg) } static insn_map insns[] = { + { 0, 0, { 0 }, { 0 }, { 0 }, 0, 0 }, // dummy item + { Mips_ABSQ_S_PH, MIPS_INS_ABSQ_S, { 0 }, { MIPS_REG_DSPOUTFLAG20, 0 }, { MIPS_GRP_DSP, 0 }, 0, 0 }, { Mips_ABSQ_S_QB, MIPS_INS_ABSQ_S, { 0 }, { MIPS_REG_DSPOUTFLAG20, 0 }, { MIPS_GRP_DSPR2, 0 }, 0, 0 }, { Mips_ABSQ_S_W, MIPS_INS_ABSQ_S, { 0 }, { MIPS_REG_DSPOUTFLAG20, 0 }, { MIPS_GRP_DSP, 0 }, 0, 0 }, @@ -1386,6 +1388,8 @@ static insn_map alias_insns[] = { { Mips_SUBu, MIPS_INS_NEGU, { 0 }, { 0 }, { MIPS_GRP_STDENC, 0 }, 0, 0 }, }; +static unsigned short *insn_cache = NULL; + // given internal insn id, return public instruction info void Mips_get_insn_id(cs_insn *insn, unsigned int id, int detail) { @@ -1417,8 +1421,8 @@ void Mips_get_insn_id(cs_insn *insn, unsigned int id, int detail) } } - i = insn_find(insns, ARR_SIZE(insns), id); - if (i != -1) { + i = insn_find(insns, ARR_SIZE(insns), id, &insn_cache); + if (i != 0) { insn->id = insns[i].mapid; if (detail) { @@ -2030,3 +2034,11 @@ mips_reg Mips_map_register(unsigned int r) // cannot find this register return 0; } + +void Mips_free_cache(void) +{ + if (insn_cache) + free(insn_cache); + + insn_cache = NULL; +} diff --git a/arch/Mips/mapping.h b/arch/Mips/mapping.h index 78642deb..7d9e74cf 100644 --- a/arch/Mips/mapping.h +++ b/arch/Mips/mapping.h @@ -25,4 +25,7 @@ mips_reg Mips_map_insn(const char *name); // map internal raw register to 'public' register mips_reg Mips_map_register(unsigned int r); +// free insn cache +void Mips_free_cache(void); + #endif diff --git a/arch/Mips/module.c b/arch/Mips/module.c index 73fd8f86..e4d12c9c 100644 --- a/arch/Mips/module.c +++ b/arch/Mips/module.c @@ -41,10 +41,16 @@ static cs_err option(cs_struct *handle, cs_opt_type type, size_t value) return CS_ERR_OK; } +static void destroy(cs_struct *handle) +{ + Mips_free_cache(); +} + static void __attribute__ ((constructor)) __init_mips__() { arch_init[CS_ARCH_MIPS] = init; arch_option[CS_ARCH_MIPS] = option; + arch_destroy[CS_ARCH_MIPS] = destroy; // support this arch all_arch |= (1 << CS_ARCH_MIPS); diff --git a/arch/PowerPC/module.c b/arch/PowerPC/module.c index 8f37347c..3e53d9bd 100644 --- a/arch/PowerPC/module.c +++ b/arch/PowerPC/module.c @@ -33,10 +33,15 @@ static cs_err option(cs_struct *handle, cs_opt_type type, size_t value) return CS_ERR_OK; } +static void destroy(cs_struct *handle) +{ +} + static void __attribute__ ((constructor)) __init_mips__() { arch_init[CS_ARCH_PPC] = init; arch_option[CS_ARCH_PPC] = option; + arch_destroy[CS_ARCH_PPC] = destroy; // support this arch all_arch |= (1 << CS_ARCH_PPC); diff --git a/arch/X86/mapping.c b/arch/X86/mapping.c index f668c78f..620a6264 100644 --- a/arch/X86/mapping.c +++ b/arch/X86/mapping.c @@ -1605,6 +1605,8 @@ x86_reg X86_map_insn(const char *name) #include "X86GenInstrInfo.inc" static insn_map insns[] = { + { 0, 0, { 0 }, { 0 }, { 0 }, 0, 0 }, // dummy item + { X86_AAA, X86_INS_AAA, { 0 }, { 0 }, { X86_GRP_MODE32, 0 }, 0, 0 }, { X86_AAD8i8, X86_INS_AAD, { 0 }, { 0 }, { X86_GRP_MODE32, 0 }, 0, 0 }, { X86_AAM8i8, X86_INS_AAM, { 0 }, { 0 }, { X86_GRP_MODE32, 0 }, 0, 0 }, @@ -6604,11 +6606,13 @@ void X86_post_printer(csh handle, cs_insn *insn, char *insn_asm) } } +static unsigned short *insn_cache = NULL; + // given internal insn id, return public instruction info void X86_get_insn_id(cs_insn *insn, unsigned int id, int detail) { - int i = insn_find(insns, ARR_SIZE(insns), id); - if (i != -1) { + int i = insn_find(insns, ARR_SIZE(insns), id, &insn_cache); + if (i != 0) { insn->id = insns[i].mapid; if (detail) { @@ -6636,3 +6640,10 @@ unsigned int X86_get_insn_id2(unsigned int id) return insn_reverse_id(insns, ARR_SIZE(insns), id); } +void X86_free_cache(void) +{ + if (insn_cache) + free(insn_cache); + + insn_cache = NULL; +} diff --git a/arch/X86/mapping.h b/arch/X86/mapping.h index a6ec3856..10656268 100644 --- a/arch/X86/mapping.h +++ b/arch/X86/mapping.h @@ -37,4 +37,7 @@ unsigned int X86_get_insn_id2(unsigned int insn_id); // post printer for X86. void X86_post_printer(csh handle, cs_insn *pub_insn, char *insn_asm); +// free insn cache +void X86_free_cache(void); + #endif diff --git a/arch/X86/module.c b/arch/X86/module.c index 6c008a94..eab9f4c5 100644 --- a/arch/X86/module.c +++ b/arch/X86/module.c @@ -44,10 +44,16 @@ static cs_err option(cs_struct *handle, cs_opt_type type, size_t value) return CS_ERR_OK; } +static void destroy(cs_struct *handle) +{ + X86_free_cache(); +} + static void __attribute__ ((constructor)) __init_x86__() { arch_init[CS_ARCH_X86] = init; arch_option[CS_ARCH_X86] = option; + arch_destroy[CS_ARCH_X86] = destroy; // support this arch all_arch |= (1 << CS_ARCH_X86); diff --git a/cs.c b/cs.c index 47fa14b4..52a0d286 100644 --- a/cs.c +++ b/cs.c @@ -15,6 +15,7 @@ cs_err (*arch_init[MAX_ARCH])(cs_struct *) = { NULL }; cs_err (*arch_option[MAX_ARCH]) (cs_struct*, cs_opt_type, size_t value); +void (*arch_destroy[MAX_ARCH]) (cs_struct*); unsigned int all_arch = 0; @@ -101,6 +102,9 @@ cs_err cs_close(csh handle) memset(ud, 0, sizeof(*ud)); free(ud); + if (arch_destroy[ud->arch]) + arch_destroy[ud->arch](ud); + return CS_ERR_OK; } diff --git a/cs_priv.h b/cs_priv.h index d6598732..b80b7f8c 100644 --- a/cs_priv.h +++ b/cs_priv.h @@ -54,6 +54,9 @@ extern cs_err (*arch_init[MAX_ARCH]) (cs_struct *); // support cs_option() for all archs extern cs_err (*arch_option[MAX_ARCH]) (cs_struct*, cs_opt_type, size_t value); +// deinitialized functions: to be called when cs_close() is called +extern void (*arch_destroy[MAX_ARCH]) (cs_struct*); + extern unsigned int all_arch; #endif diff --git a/include/capstone.h b/include/capstone.h index 1ef0876d..64c889a8 100644 --- a/include/capstone.h +++ b/include/capstone.h @@ -190,6 +190,10 @@ cs_err cs_open(cs_arch arch, cs_mode mode, csh *handle); /* Close CS handle: MUST do to release the handle when it is not used anymore. + NOTE: this must be only called when there is no longer usage of Capstone, + not even access to cs_insn array. The reason is the this API releases some + cached memory, thus access to any Capstone API after cs_close() might crash + your application. @handle: handle returned by cs_open() diff --git a/utils.c b/utils.c index 15040485..edf3c028 100644 --- a/utils.c +++ b/utils.c @@ -1,6 +1,7 @@ /* Capstone Disassembler Engine */ /* By Nguyen Anh Quynh , 2013> */ +#include #include #include "utils.h" @@ -20,26 +21,32 @@ int str_in_list(char **list, char *s) return -1; } -// binary searching -int insn_find(insn_map *m, unsigned int max, unsigned int id) +// create a cache for fast id lookup +static unsigned short *make_id2insn(insn_map *insns, unsigned int size) { - unsigned int i, begin, end; - - begin = 0; - end = max; - - while(begin <= end) { - i = (begin + end) / 2; - if (id == m[i].id) - return i; - else if (id < m[i].id) - end = i - 1; - else - begin = i + 1; - } + // NOTE: assume that the max id is always put at the end of insns array + unsigned short max_id = insns[size - 1].id; + unsigned int i; - // found nothing - return -1; + unsigned short *cache = (unsigned short *)calloc(sizeof(*cache), max_id); + + for (i = 1; i < size; i++) + cache[insns[i].id] = i; + + return cache; +} + +// look for @id in @insns, given its size in @max. first time call will update @cache. +// return 0 if not found +unsigned short insn_find(insn_map *insns, unsigned int max, unsigned int id, unsigned short **cache) +{ + if (id > insns[max - 1].id) + return 0; + + if (*cache == NULL) + *cache = make_id2insn(insns, max); + + return (*cache)[id]; } int name2id(name_map* map, int max, const char *name) @@ -79,3 +86,4 @@ unsigned int count_positive(unsigned char *list) return c; } + diff --git a/utils.h b/utils.h index dc73c7ad..fb320fc2 100644 --- a/utils.h +++ b/utils.h @@ -29,8 +29,9 @@ typedef struct insn_map { // or -1 if given string is not in the list int str_in_list(char **list, char *s); -// binary searching in @m, given its size in @max, and @id -int insn_find(insn_map *m, unsigned int max, unsigned int id); +// look for @id in @m, given its size in @max. first time call will update @cache. +// return 0 if not found +unsigned short insn_find(insn_map *m, unsigned int max, unsigned int id, unsigned short **cache); // map id to string typedef struct name_map {