Merge branch 'newapi' of https://github.com/aquynh/capstone into next

test2
Nguyen Anh Quynh 10 years ago
commit f0acace803
  1. 2
      CMakeLists.txt
  2. 119
      cs.c
  3. 90
      include/capstone.h
  4. 2
      tests/Makefile
  5. 9
      tests/README
  6. 3
      tests/test_detail.c
  7. 264
      tests/test_iter.c

@ -50,7 +50,7 @@ set(SOURCES
utils.c
)
set(TEST_SOURCES test.c test_detail.c test_skipdata.c)
set(TEST_SOURCES test.c test_detail.c test_skipdata.c test_iter.c)
## architecture support
if (CAPSTONE_ARM_SUPPORT)

119
cs.c

@ -238,9 +238,8 @@ cs_err cs_close(csh *handle)
if (ud->printer_info)
cs_mem_free(ud->printer_info);
// arch_destroy[ud->arch](ud);
cs_mem_free(ud->insn_cache);
memset(ud, 0, sizeof(*ud));
cs_mem_free(ud);
@ -410,7 +409,7 @@ static void skipdata_opstr(char *opstr, const uint8_t *buffer, size_t size)
CAPSTONE_EXPORT
size_t cs_disasm(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, size_t count, cs_insn **insn)
{
struct cs_struct *handle = (struct cs_struct *)(uintptr_t)ud;
struct cs_struct *handle;
MCInst mci;
uint16_t insn_size;
size_t c = 0, i;
@ -427,6 +426,7 @@ size_t cs_disasm(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, si
unsigned int cache_size = INSN_CACHE_SIZE;
size_t next_offset;
handle = (struct cs_struct *)(uintptr_t)ud;
if (!handle) {
// FIXME: how to handle this case:
// handle->errnum = CS_ERR_HANDLE;
@ -612,6 +612,119 @@ void cs_free(cs_insn *insn, size_t count)
cs_mem_free(insn);
}
CAPSTONE_EXPORT
cs_insn *cs_malloc(csh ud)
{
cs_insn *insn;
struct cs_struct *handle = (struct cs_struct *)(uintptr_t)ud;
insn = cs_mem_malloc(sizeof(cs_insn));
if (!insn) {
// insufficient memory
handle->errnum = CS_ERR_MEM;
return NULL;
} else {
if (handle->detail) {
// allocate memory for @detail pointer
insn->detail = cs_mem_malloc(sizeof(cs_detail));
if (insn->detail == NULL) { // insufficient memory
cs_mem_free(insn);
handle->errnum = CS_ERR_MEM;
return NULL;
}
} else
insn->detail = NULL;
}
return insn;
}
// iterator for instruction "single-stepping"
CAPSTONE_EXPORT
bool cs_disasm_iter(csh ud, const uint8_t **code, size_t *size,
uint64_t *address, cs_insn *insn)
{
struct cs_struct *handle;
uint16_t insn_size;
MCInst mci;
bool r;
handle = (struct cs_struct *)(uintptr_t)ud;
if (!handle) {
return NULL;
}
handle->errnum = CS_ERR_OK;
MCInst_Init(&mci);
mci.csh = handle;
// relative branches need to know the address & size of current insn
mci.address = *address;
// save all the information for non-detailed mode
mci.flat_insn = insn;
mci.flat_insn->address = *address;
#ifdef CAPSTONE_DIET
// zero out mnemonic & op_str
mci.flat_insn->mnemonic[0] = '\0';
mci.flat_insn->op_str[0] = '\0';
#endif
r = handle->disasm(ud, *code, *size, &mci, &insn_size, *address, handle->getinsn_info);
if (r) {
SStream ss;
SStream_Init(&ss);
mci.flat_insn->size = insn_size;
handle->printer(&mci, &ss, handle->printer_info);
fill_insn(handle, insn, ss.buffer, &mci, handle->post_printer, *code);
*code += insn_size;
*size -= insn_size;
*address += insn_size;
} else { // encounter a broken instruction
size_t skipdata_bytes;
// if there is no request to skip data, or remaining data is too small,
// then bail out
if (!handle->skipdata || handle->skipdata_size > *size)
return false;
if (handle->skipdata_setup.callback) {
skipdata_bytes = handle->skipdata_setup.callback(*code, *size,
0, handle->skipdata_setup.user_data);
if (skipdata_bytes > *size)
// remaining data is not enough
return false;
if (!skipdata_bytes)
// user requested not to skip data, so bail out
return false;
} else
skipdata_bytes = handle->skipdata_size;
// we have to skip some amount of data, depending on arch & mode
insn->id = 0; // invalid ID for this "data" instruction
insn->address = *address;
insn->size = (uint16_t)skipdata_bytes;
memcpy(insn->bytes, *code, skipdata_bytes);
strncpy(insn->mnemonic, handle->skipdata_setup.mnemonic,
sizeof(insn->mnemonic) - 1);
skipdata_opstr(insn->op_str, *code, skipdata_bytes);
// NOTE: if detail mode is OFF, content of detail pointer is irrelevant
// to be sure, zero out content of detail pointer
if (insn->detail)
memset(insn->detail, 0, sizeof(cs_detail));
*code += skipdata_bytes;
*size -= skipdata_bytes;
*address += skipdata_bytes;
}
return true;
}
// return friendly name of regiser in a string
CAPSTONE_EXPORT
const char *cs_reg_name(csh ud, unsigned int reg)

@ -126,15 +126,18 @@ typedef enum cs_opt_value {
CS_OPT_SYNTAX_NOREGNAME, // Prints register name with only number (CS_OPT_SYNTAX)
} cs_opt_value;
// User-defined callback function for SKIPDATA option
// @code: the input buffer containing code to be disassembled. This is the
// same buffer passed to cs_disasm().
// @code_size: size (in bytes) of the above @code buffer.
// @offset: the position of the currently-examining byte in the input
// buffer @code mentioned above.
// @user_data: user-data passed to cs_option() via @user_data field in
// cs_opt_skipdata struct below.
// @return: return number of bytes to skip, or 0 to immediately stop disassembling.
/*
User-defined callback function for SKIPDATA option
@code: the input buffer containing code to be disassembled.
This is the same buffer passed to cs_disasm().
@code_size: size (in bytes) of the above @code buffer.
@offset: the position of the currently-examining byte in the input
buffer @code mentioned above.
@user_data: user-data passed to cs_option() via @user_data field in
cs_opt_skipdata struct below.
@return: return number of bytes to skip, or 0 to immediately stop disassembling.
*/
typedef size_t (*cs_skipdata_cb_t)(const uint8_t *code, size_t code_size, size_t offset, void *user_data);
// User-customized setup for SKIPDATA option
@ -334,7 +337,7 @@ cs_err cs_close(csh *handle);
@type: type of option to be set
@value: option value corresponding with @type
@return CS_ERR_OK on success, or other value on failure.
@return: CS_ERR_OK on success, or other value on failure.
Refer to cs_err enum for detailed error.
NOTE: in the case of CS_OPT_MEM, handle's value can be anything,
@ -368,21 +371,24 @@ CAPSTONE_EXPORT
const char *cs_strerror(cs_err code);
/*
Dynamicly allocate memory to contain disasm insn
Disassembled instructions will be put into @*insn
Disassemble binary code, given the code buffer, size, address and number
of instructions to be decoded.
This API dynamicly allocate memory to contain disassembled instruction.
Resulted instructions will be put into @*insn
NOTE 1: this API will automatically determine memory needed to contain
output disassembled instructions in @insn.
NOTE 2: caller must free() the allocated memory itself to avoid memory leaking
NOTE 2: caller must free the allocated memory itself to avoid memory leaking.
@handle: handle returned by cs_open()
@code: buffer containing raw binary code to be disassembled
@code_size: size of above code
@address: address of the first insn in given raw code buffer
@insn: array of insn filled in by this function
@code: buffer containing raw binary code to be disassembled.
@code_size: size of the above code buffer.
@address: address of the first instruction in given raw code buffer.
@insn: array of instructions filled in by this API.
NOTE: @insn will be allocated by this function, and should be freed
with cs_free() API.
@count: number of instrutions to be disassembled, or 0 to get all of them
@return: the number of succesfully disassembled instructions,
or 0 if this function failed to disassemble the given code
@ -408,14 +414,57 @@ size_t cs_disasm_ex(csh handle,
cs_insn **insn);
/*
Free memory allocated in @insn by cs_disasm()
Free memory allocated by cs_malloc() or cs_disasm() (argument @insn)
@insn: pointer returned by @insn argument in cs_disasm()
@count: number of cs_insn structures returned by cs_disasm()
@insn: pointer returned by @insn argument in cs_disasm() or cs_malloc()
@count: number of cs_insn structures returned by cs_disasm(), or 1
to free memory allocated by cs_malloc().
*/
CAPSTONE_EXPORT
void cs_free(cs_insn *insn, size_t count);
/*
Allocate memory for 1 instruction to be used by cs_disasm_iter().
@handle: handle returned by cs_open()
NOTE: when no longer in use, you can reclaim the memory allocated for
this instruction with cs_free(insn, 1)
*/
CAPSTONE_EXPORT
cs_insn *cs_malloc(csh handle);
/*
Fast API to disassemble binary code, given the code buffer, size, address
and number of instructions to be decoded.
This API put the resulted instruction into a given cache in @insn.
NOTE 1: this API will update @code, @size & @address to point to the next
instruction in the input buffer. Therefore, it is covenient to use
cs_disasm_iter() inside a loop to quickly iterate all the instructions.
While decoding one instruction at a time can also be achieved with
cs_disasm(count=1), some benchmarks shown that cs_disasm_iter() can be 30%
faster on random input.
NOTE 2: the cache in @insn can be created with cs_malloc() API.
@handle: handle returned by cs_open()
@code: buffer containing raw binary code to be disassembled
@code_size: size of above code
@address: address of the first insn in given raw code buffer
@insn: pointer to instruction to be filled in by this API.
@return: true if this API successfully decode 1 instruction,
or false otherwise.
On failure, call cs_errno() for error code.
*/
CAPSTONE_EXPORT
bool cs_disasm_iter(csh handle,
const uint8_t **code, size_t *size,
uint64_t *address, cs_insn *insn);
/*
Return friendly name of regiser in a string.
Find the instruction id from header file of corresponding architecture (arm.h for ARM,
@ -426,6 +475,7 @@ void cs_free(cs_insn *insn, size_t count);
@handle: handle returned by cs_open()
@reg_id: register id
@return: string name of the register, or NULL if @reg_id is invalid.
*/
CAPSTONE_EXPORT

@ -64,7 +64,7 @@ endif
.PHONY: all clean
SOURCES = test.c test_detail.c test_skipdata.c
SOURCES = test.c test_detail.c test_skipdata.c test_iter.c
ifneq (,$(findstring arm,$(CAPSTONE_ARCHS)))
SOURCES += test_arm.c
endif

@ -10,6 +10,15 @@ This directory contains some test code to show how to use Capstone API.
instructions, such as implicit registers read/written, or groups of instructions
that this instruction belong to.
- test_skipdata.c:
This code shows how to use SKIPDATA option to skip broken instructions (most likely
some data mixed with instructions) and continue to decode at the next legitimate
instructions.
- test_iter.c:
This code shows how to use the API cs_disasm_iter() to decode one instruction at
a time inside a loop.
- test_<arch>.c
These code show how to access architecture-specific information for each
architecture.

@ -215,9 +215,6 @@ static void test()
// print implicit registers used by this instruction
detail = i->detail;
// detail can be NULL on "data" instruction since we turned on SKIPDATA option above.
if (!detail)
continue;
if (detail->regs_read_count > 0) {
printf("\tImplicit registers read: ");

@ -0,0 +1,264 @@
/* Capstone Disassembler Engine */
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
// This sample code demonstrates the APIs cs_malloc() & cs_disasm_iter().
#include <stdio.h>
#include <stdlib.h>
#include "../inttypes.h"
#include <capstone.h>
struct platform {
cs_arch arch;
cs_mode mode;
unsigned char *code;
size_t size;
char *comment;
cs_opt_type opt_type;
cs_opt_value opt_value;
};
static void print_string_hex(unsigned char *str, size_t len)
{
unsigned char *c;
printf("Code: ");
for (c = str; c < str + len; c++) {
printf("0x%02x ", *c & 0xff);
}
printf("\n");
}
static void test()
{
#define X86_CODE16 "\x8d\x4c\x32\x08\x01\xd8\x81\xc6\x34\x12\x00\x00"
#define X86_CODE32 "\x8d\x4c\x32\x08\x01\xd8\x81\xc6\x34\x12\x00\x00"
//#define X86_CODE32 "\x0f\xa7\xc0" // xstorerng
#define X86_CODE64 "\x55\x48\x8b\x05\xb8\x13\x00\x00"
//#define ARM_CODE "\x04\xe0\x2d\xe5"
#define ARM_CODE "\xED\xFF\xFF\xEB\x04\xe0\x2d\xe5\x00\x00\x00\x00\xe0\x83\x22\xe5\xf1\x02\x03\x0e\x00\x00\xa0\xe3\x02\x30\xc1\xe7\x00\x00\x53\xe3"
#define ARM_CODE2 "\x10\xf1\x10\xe7\x11\xf2\x31\xe7\xdc\xa1\x2e\xf3\xe8\x4e\x62\xf3"
#define THUMB_CODE "\x70\x47\xeb\x46\x83\xb0\xc9\x68"
#define THUMB_CODE2 "\x4f\xf0\x00\x01\xbd\xe8\x00\x88\xd1\xe8\x00\xf0"
#define MIPS_CODE "\x0C\x10\x00\x97\x00\x00\x00\x00\x24\x02\x00\x0c\x8f\xa2\x00\x00\x34\x21\x34\x56\x00\x80\x04\x08"
//#define MIPS_CODE "\x21\x38\x00\x01"
//#define MIPS_CODE "\x21\x30\xe6\x70"
//#define MIPS_CODE "\x1c\x00\x40\x14"
#define MIPS_CODE2 "\x56\x34\x21\x34\xc2\x17\x01\x00"
//#define ARM64_CODE "\xe1\x0b\x40\xb9" // ldr w1, [sp, #0x8]
//#define ARM64_CODE "\x00\x40\x21\x4b" // sub w0, w0, w1, uxtw
//#define ARM64_CODE "\x21\x7c\x02\x9b" // mul x1, x1, x2
//#define ARM64_CODE "\x20\x74\x0b\xd5" // dc zva, x0
//#define ARM64_CODE "\x20\xfc\x02\x9b" // mneg x0, x1, x2
//#define ARM64_CODE "\x21\x7c\x02\x9b\x21\x7c\x00\x53\x00\x40\x21\x4b\xe1\x0b\x40\xb9\x10\x20\x21\x1e"
//#define ARM64_CODE "\x21\x7c\x00\x53"
#define ARM64_CODE "\x09\x00\x38\xd5\xbf\x40\x00\xd5\x0c\x05\x13\xd5\x20\x50\x02\x0e\x20\xe4\x3d\x0f\x00\x18\xa0\x5f\xa2\x00\xae\x9e\x9f\x37\x03\xd5\xbf\x33\x03\xd5\xdf\x3f\x03\xd5\x21\x7c\x02\x9b\x21\x7c\x00\x53\x00\x40\x21\x4b\xe1\x0b\x40\xb9\x20\x04\x81\xda\x20\x08\x02\x8b\x10\x5b\xe8\x3c"
//#define THUMB_CODE "\x0a\xbf" // itet eq
//#define X86_CODE32 "\x77\x04" // ja +6
#define PPC_CODE "\x80\x20\x00\x00\x80\x3f\x00\x00\x10\x43\x23\x0e\xd0\x44\x00\x80\x4c\x43\x22\x02\x2d\x03\x00\x80\x7c\x43\x20\x14\x7c\x43\x20\x93\x4f\x20\x00\x21\x4c\xc8\x00\x21\x40\x82\x00\x14"
#define SPARC_CODE "\x80\xa0\x40\x02\x85\xc2\x60\x08\x85\xe8\x20\x01\x81\xe8\x00\x00\x90\x10\x20\x01\xd5\xf6\x10\x16\x21\x00\x00\x0a\x86\x00\x40\x02\x01\x00\x00\x00\x12\xbf\xff\xff\x10\xbf\xff\xff\xa0\x02\x00\x09\x0d\xbf\xff\xff\xd4\x20\x60\x00\xd4\x4e\x00\x16\x2a\xc2\x80\x03"
#define SPARCV9_CODE "\x81\xa8\x0a\x24\x89\xa0\x10\x20\x89\xa0\x1a\x60\x89\xa0\x00\xe0"
#define SYSZ_CODE "\xed\x00\x00\x00\x00\x1a\x5a\x0f\x1f\xff\xc2\x09\x80\x00\x00\x00\x07\xf7\xeb\x2a\xff\xff\x7f\x57\xe3\x01\xff\xff\x7f\x57\xeb\x00\xf0\x00\x00\x24\xb2\x4f\x00\x78"
#define XCORE_CODE "\xfe\x0f\xfe\x17\x13\x17\xc6\xfe\xec\x17\x97\xf8\xec\x4f\x1f\xfd\xec\x37\x07\xf2\x45\x5b\xf9\xfa\x02\x06\x1b\x10"
struct platform platforms[] = {
{
CS_ARCH_X86,
CS_MODE_16,
(unsigned char *)X86_CODE16,
sizeof(X86_CODE32) - 1,
"X86 16bit (Intel syntax)"
},
{
CS_ARCH_X86,
CS_MODE_32,
(unsigned char *)X86_CODE32,
sizeof(X86_CODE32) - 1,
"X86 32bit (ATT syntax)",
CS_OPT_SYNTAX,
CS_OPT_SYNTAX_ATT,
},
{
CS_ARCH_X86,
CS_MODE_32,
(unsigned char *)X86_CODE32,
sizeof(X86_CODE32) - 1,
"X86 32 (Intel syntax)"
},
{
CS_ARCH_X86,
CS_MODE_64,
(unsigned char *)X86_CODE64,
sizeof(X86_CODE64) - 1,
"X86 64 (Intel syntax)"
},
{
CS_ARCH_ARM,
CS_MODE_ARM,
(unsigned char *)ARM_CODE,
sizeof(ARM_CODE) - 1,
"ARM"
},
{
CS_ARCH_ARM,
CS_MODE_THUMB,
(unsigned char *)THUMB_CODE2,
sizeof(THUMB_CODE2) - 1,
"THUMB-2"
},
{
CS_ARCH_ARM,
CS_MODE_ARM,
(unsigned char *)ARM_CODE2,
sizeof(ARM_CODE2) - 1,
"ARM: Cortex-A15 + NEON"
},
{
CS_ARCH_ARM,
CS_MODE_THUMB,
(unsigned char *)THUMB_CODE,
sizeof(THUMB_CODE) - 1,
"THUMB"
},
{
CS_ARCH_MIPS,
(cs_mode)(CS_MODE_32 + CS_MODE_BIG_ENDIAN),
(unsigned char *)MIPS_CODE,
sizeof(MIPS_CODE) - 1,
"MIPS-32 (Big-endian)"
},
{
CS_ARCH_MIPS,
(cs_mode)(CS_MODE_64 + CS_MODE_LITTLE_ENDIAN),
(unsigned char *)MIPS_CODE2,
sizeof(MIPS_CODE2) - 1,
"MIPS-64-EL (Little-endian)"
},
{
CS_ARCH_ARM64,
CS_MODE_ARM,
(unsigned char *)ARM64_CODE,
sizeof(ARM64_CODE) - 1,
"ARM-64"
},
{
CS_ARCH_PPC,
CS_MODE_BIG_ENDIAN,
(unsigned char*)PPC_CODE,
sizeof(PPC_CODE) - 1,
"PPC-64"
},
{
CS_ARCH_SPARC,
CS_MODE_BIG_ENDIAN,
(unsigned char*)SPARC_CODE,
sizeof(SPARC_CODE) - 1,
"Sparc"
},
{
CS_ARCH_SPARC,
(cs_mode)(CS_MODE_BIG_ENDIAN + CS_MODE_V9),
(unsigned char*)SPARCV9_CODE,
sizeof(SPARCV9_CODE) - 1,
"SparcV9"
},
{
CS_ARCH_SYSZ,
(cs_mode)0,
(unsigned char*)SYSZ_CODE,
sizeof(SYSZ_CODE) - 1,
"SystemZ"
},
{
CS_ARCH_XCORE,
(cs_mode)0,
(unsigned char*)XCORE_CODE,
sizeof(XCORE_CODE) - 1,
"XCore"
},
};
csh handle;
uint64_t address;
cs_insn *insn;
cs_detail *detail;
int i;
cs_err err;
const uint8_t *code;
size_t size;
for (i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) {
printf("****************\n");
printf("Platform: %s\n", platforms[i].comment);
err = cs_open(platforms[i].arch, platforms[i].mode, &handle);
if (err) {
printf("Failed on cs_open() with error returned: %u\n", err);
continue;
}
if (platforms[i].opt_type)
cs_option(handle, platforms[i].opt_type, platforms[i].opt_value);
cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON);
// allocate memory for the cache to be used by cs_disasm_iter()
insn = cs_malloc(handle);
print_string_hex(platforms[i].code, platforms[i].size);
printf("Disasm:\n");
address = 0x1000;
code = platforms[i].code;
size = platforms[i].size;
while(cs_disasm_iter(handle, &code, &size, &address, insn)) {
int n;
printf("0x%"PRIx64":\t%s\t\t%s // insn-ID: %u, insn-mnem: %s\n",
insn->address, insn->mnemonic, insn->op_str,
insn->id, cs_insn_name(handle, insn->id));
// print implicit registers used by this instruction
detail = insn->detail;
if (detail->regs_read_count > 0) {
printf("\tImplicit registers read: ");
for (n = 0; n < detail->regs_read_count; n++) {
printf("%s ", cs_reg_name(handle, detail->regs_read[n]));
}
printf("\n");
}
// print implicit registers modified by this instruction
if (detail->regs_write_count > 0) {
printf("\tImplicit registers modified: ");
for (n = 0; n < detail->regs_write_count; n++) {
printf("%s ", cs_reg_name(handle, detail->regs_write[n]));
}
printf("\n");
}
// print the groups this instruction belong to
if (detail->groups_count > 0) {
printf("\tThis instruction belongs to groups: ");
for (n = 0; n < detail->groups_count; n++) {
printf("%s ", cs_group_name(handle, detail->groups[n]));
}
printf("\n");
}
}
printf("\n");
// free memory allocated by cs_malloc()
cs_free(insn, 1);
cs_close(&handle);
}
}
int main()
{
test();
return 0;
}
Loading…
Cancel
Save