Yasm Assembler mainline development tree (ffmpeg 依赖)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

761 lines
18 KiB

/* $IdPath$
* Bytecode utility functions
*
* Copyright (C) 2001 Peter Johnson
*
* This file is part of YASM.
*
* YASM is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* YASM is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "util.h"
#include <stdio.h>
#ifdef STDC_HEADERS
# include <stdlib.h>
# include <string.h>
#endif
#include <libintl.h>
#define _(String) gettext(String)
#include "globals.h"
#include "errwarn.h"
#include "floatnum.h"
#include "expr.h"
#include "bytecode.h"
#include "section.h"
RCSID("$IdPath$");
struct effaddr {
expr *disp; /* address displacement */
unsigned char len; /* length of disp (in bytes), 0 if none */
unsigned char segment; /* segment override, 0 if none */
unsigned char modrm;
unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */
unsigned char need_modrm; /* 1 if Mod/RM byte needed, 0 if not */
unsigned char sib;
unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */
unsigned char need_sib; /* 1 if SIB byte needed, 0 if not */
};
struct immval {
expr *val;
unsigned char len; /* length of val (in bytes), 0 if none */
unsigned char isneg; /* the value has been explicitly negated */
unsigned char f_len; /* final imm length */
unsigned char f_sign; /* 1 if final imm should be signed */
};
struct dataval {
STAILQ_ENTRY(dataval) link;
enum { DV_EMPTY, DV_EXPR, DV_FLOAT, DV_STRING } type;
union {
expr *expn;
floatnum *flt;
char *str_val;
} data;
};
struct bytecode {
STAILQ_ENTRY(bytecode) link;
enum { BC_EMPTY, BC_INSN, BC_JMPREL, BC_DATA, BC_RESERVE } type;
/* This union has been somewhat tweaked to get it as small as possible
* on the 4-byte-aligned x86 architecture (without resorting to
* bitfields). In particular, insn and jmprel are the largest structures
* in the union, and are also the same size (after padding). jmprel
* can have another unsigned char added to the end without affecting
* its size.
*
* Don't worry about this too much, but keep it in mind when changing
* this structure. We care about the size of bytecode in particular
* because it accounts for the majority of the memory usage in the
* assembler when assembling a large file.
*/
union {
struct {
effaddr *ea; /* effective address */
immval *imm; /* immediate or relative value */
unsigned char opcode[3]; /* opcode */
unsigned char opcode_len;
unsigned char addrsize; /* 0 indicates no override */
unsigned char opersize; /* 0 indicates no override */
unsigned char lockrep_pre; /* 0 indicates no prefix */
/* HACK, but a space-saving one: shift opcodes have an immediate
* form and a ,1 form (with no immediate). In the parser, we
* set this and opcode_len=1, but store the ,1 version in the
* second byte of the opcode array. We then choose between the
* two versions once we know the actual value of imm (because we
* don't know it in the parser module).
*
* A override to force the imm version should just leave this at
* 0. Then later code won't know the ,1 version even exists.
* TODO: Figure out how this affects CPU flags processing.
*
* Call SetInsnShiftFlag() to set this flag to 1.
*/
unsigned char shift_op;
} insn;
struct {
expr *target; /* target location */
struct {
unsigned char opcode[3];
unsigned char opcode_len; /* 0 = no opc for this version */
} shortop, nearop;
/* which opcode are we using? */
/* The *FORCED forms are specified in the source as such */
jmprel_opcode_sel op_sel;
unsigned char addrsize; /* 0 indicates no override */
unsigned char opersize; /* 0 indicates no override */
unsigned char lockrep_pre; /* 0 indicates no prefix */
} jmprel;
struct {
/* non-converted data (linked list) */
datavalhead datahead;
/* final (converted) size of each element (in bytes) */
unsigned char size;
} data;
struct {
expr *numitems; /* number of items to reserve */
unsigned char itemsize; /* size of each item (in bytes) */
} reserve;
} data;
unsigned long len; /* total length of entire bytecode */
/* where it came from */
char *filename;
unsigned int lineno;
/* other assembler state info */
unsigned long offset;
unsigned char mode_bits;
};
/* Static structures for when NULL is passed to conversion functions. */
/* for Convert*ToBytes() */
unsigned char bytes_static[16];
static bytecode *bytecode_new_common(void);
effaddr *
effaddr_new_reg(unsigned long reg)
{
effaddr *ea = xmalloc(sizeof(effaddr));
ea->len = 0;
ea->segment = 0;
ea->modrm = 0xC0 | (reg & 0x07); /* Mod=11, R/M=Reg, Reg=0 */
ea->valid_modrm = 1;
ea->need_modrm = 1;
ea->valid_sib = 0;
ea->need_sib = 0;
return ea;
}
effaddr *
effaddr_new_expr(expr *expr_ptr)
{
effaddr *ea = xmalloc(sizeof(effaddr));
ea->segment = 0;
ea->valid_modrm = 0;
ea->need_modrm = 1;
ea->valid_sib = 0;
ea->need_sib = 0;
ea->disp = expr_ptr;
return ea;
}
effaddr *
effaddr_new_imm(immval *im_ptr, unsigned char im_len)
{
effaddr *ea = xmalloc(sizeof(effaddr));
ea->disp = im_ptr->val;
if (im_ptr->len > im_len)
Warning(_("%s value exceeds bounds"), "word");
ea->len = im_len;
ea->segment = 0;
ea->valid_modrm = 0;
ea->need_modrm = 0;
ea->valid_sib = 0;
ea->need_sib = 0;
return ea;
}
immval *
immval_new_int(unsigned long int_val)
{
immval *im = xmalloc(sizeof(immval));
im->val = expr_new_ident(ExprInt(int_val));
if ((int_val & 0xFF) == int_val)
im->len = 1;
else if ((int_val & 0xFFFF) == int_val)
im->len = 2;
else
im->len = 4;
im->isneg = 0;
return im;
}
immval *
immval_new_expr(expr *expr_ptr)
{
immval *im = xmalloc(sizeof(immval));
im->val = expr_ptr;
im->isneg = 0;
return im;
}
void
SetEASegment(effaddr *ptr, unsigned char segment)
{
if (!ptr)
return;
if (segment != 0 && ptr->segment != 0)
Warning(_("multiple segment overrides, using leftmost"));
ptr->segment = segment;
}
void
SetEALen(effaddr *ptr, unsigned char len)
{
if (!ptr)
return;
/* Currently don't warn if length truncated, as this is called only from
* an explicit override, where we expect the user knows what they're doing.
*/
ptr->len = len;
}
effaddr *
GetInsnEA(bytecode *bc)
{
if (!bc)
return NULL;
if (bc->type != BC_INSN)
InternalError(__LINE__, __FILE__,
_("Trying to get EA of non-instruction"));
return bc->data.insn.ea;
}
void
SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize)
{
if (!bc)
return;
switch (bc->type) {
case BC_INSN:
bc->data.insn.opersize = opersize;
break;
case BC_JMPREL:
bc->data.jmprel.opersize = opersize;
break;
default:
InternalError(__LINE__, __FILE__,
_("OperSize override applied to non-instruction"));
return;
}
}
void
SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize)
{
if (!bc)
return;
switch (bc->type) {
case BC_INSN:
bc->data.insn.addrsize = addrsize;
break;
case BC_JMPREL:
bc->data.jmprel.addrsize = addrsize;
break;
default:
InternalError(__LINE__, __FILE__,
_("AddrSize override applied to non-instruction"));
return;
}
}
void
SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix)
{
unsigned char *lockrep_pre = (unsigned char *)NULL;
if (!bc)
return;
switch (bc->type) {
case BC_INSN:
lockrep_pre = &bc->data.insn.lockrep_pre;
break;
case BC_JMPREL:
lockrep_pre = &bc->data.jmprel.lockrep_pre;
break;
default:
InternalError(__LINE__, __FILE__,
_("LockRep prefix applied to non-instruction"));
return;
}
if (*lockrep_pre != 0)
Warning(_("multiple LOCK or REP prefixes, using leftmost"));
*lockrep_pre = prefix;
}
void
SetInsnShiftFlag(bytecode *bc)
{
if (!bc)
return;
if (bc->type != BC_INSN)
InternalError(__LINE__, __FILE__,
_("Attempted to set shift flag on non-instruction"));
bc->data.insn.shift_op = 1;
}
void
SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel)
{
if (!old_sel)
return;
if (new_sel != JR_NONE && ((*old_sel == JR_SHORT_FORCED) ||
(*old_sel == JR_NEAR_FORCED)))
Warning(_("multiple SHORT or NEAR specifiers, using leftmost"));
*old_sel = new_sel;
}
static bytecode *
bytecode_new_common(void)
{
bytecode *bc = xmalloc(sizeof(bytecode));
bc->len = 0;
bc->filename = xstrdup(in_filename);
bc->lineno = line_number;
bc->offset = 0;
bc->mode_bits = mode_bits;
return bc;
}
bytecode *
bytecode_new_insn(unsigned char opersize,
unsigned char opcode_len,
unsigned char op0,
unsigned char op1,
unsigned char op2,
effaddr *ea_ptr,
unsigned char spare,
immval *im_ptr,
unsigned char im_len,
unsigned char im_sign)
{
bytecode *bc = bytecode_new_common();
bc->type = BC_INSN;
bc->data.insn.ea = ea_ptr;
if (ea_ptr) {
bc->data.insn.ea->modrm &= 0xC7; /* zero spare/reg bits */
bc->data.insn.ea->modrm |= (spare << 3) & 0x38; /* plug in provided bits */
}
bc->data.insn.imm = im_ptr;
if (im_ptr) {
bc->data.insn.imm->f_sign = im_sign;
bc->data.insn.imm->f_len = im_len;
}
bc->data.insn.opcode[0] = op0;
bc->data.insn.opcode[1] = op1;
bc->data.insn.opcode[2] = op2;
bc->data.insn.opcode_len = opcode_len;
bc->data.insn.addrsize = 0;
bc->data.insn.opersize = opersize;
bc->data.insn.lockrep_pre = 0;
bc->data.insn.shift_op = 0;
return bc;
}
bytecode *
bytecode_new_jmprel(targetval *target,
unsigned char short_opcode_len,
unsigned char short_op0,
unsigned char short_op1,
unsigned char short_op2,
unsigned char near_opcode_len,
unsigned char near_op0,
unsigned char near_op1,
unsigned char near_op2,
unsigned char addrsize)
{
bytecode *bc = bytecode_new_common();
bc->type = BC_JMPREL;
bc->data.jmprel.target = target->val;
bc->data.jmprel.op_sel = target->op_sel;
if ((target->op_sel == JR_SHORT_FORCED) && (near_opcode_len == 0))
Error(_("no SHORT form of that jump instruction exists"));
if ((target->op_sel == JR_NEAR_FORCED) && (short_opcode_len == 0))
Error(_("no NEAR form of that jump instruction exists"));
bc->data.jmprel.shortop.opcode[0] = short_op0;
bc->data.jmprel.shortop.opcode[1] = short_op1;
bc->data.jmprel.shortop.opcode[2] = short_op2;
bc->data.jmprel.shortop.opcode_len = short_opcode_len;
bc->data.jmprel.nearop.opcode[0] = near_op0;
bc->data.jmprel.nearop.opcode[1] = near_op1;
bc->data.jmprel.nearop.opcode[2] = near_op2;
bc->data.jmprel.nearop.opcode_len = near_opcode_len;
bc->data.jmprel.addrsize = addrsize;
bc->data.jmprel.opersize = 0;
bc->data.jmprel.lockrep_pre = 0;
return bc;
}
bytecode *
bytecode_new_data(datavalhead *datahead, unsigned long size)
{
bytecode *bc;
dataval *cur;
/* First check to see if all the data elements are valid for the size
* being set.
* Validity table:
* db (1) -> expr, string
* dw (2) -> expr, string
* dd (4) -> expr, float, string
* dq (8) -> expr, float, string
* dt (10) -> float, string
*
* Once we calculate expr we'll have to validate it against the size
* and warn/error appropriately (symbol constants versus labels:
* constants (equ's) should always be legal, but labels should raise
* warnings when used in db or dq context at the minimum).
*/
STAILQ_FOREACH(cur, datahead, link) {
switch (cur->type) {
case DV_EMPTY:
case DV_STRING:
/* string is valid in every size */
break;
case DV_FLOAT:
if (size == 1)
Error(_("floating-point constant encountered in `%s'"),
"DB");
else if (size == 2)
Error(_("floating-point constant encountered in `%s'"),
"DW");
break;
case DV_EXPR:
if (size == 10)
Error(_("non-floating-point value encountered in `%s'"),
"DT");
break;
}
}
bc = bytecode_new_common();
bc->type = BC_DATA;
bc->data.data.datahead = *datahead;
bc->data.data.size = size;
return bc;
}
bytecode *
bytecode_new_reserve(expr *numitems, unsigned long itemsize)
{
bytecode *bc = bytecode_new_common();
bc->type = BC_RESERVE;
bc->data.reserve.numitems = numitems;
bc->data.reserve.itemsize = itemsize;
return bc;
}
int
bytecode_get_offset(section *sect, bytecode *bc, unsigned long *ret_val)
{
return 0; /* TODO */
}
void
bytecode_print(bytecode *bc)
{
switch (bc->type) {
case BC_EMPTY:
printf("_Empty_\n");
break;
case BC_INSN:
printf("_Instruction_\n");
printf("Effective Address:");
if (!bc->data.insn.ea)
printf(" (nil)\n");
else {
printf("\n Disp=");
expr_print(bc->data.insn.ea->disp);
printf("\n");
printf(" Len=%u SegmentOv=%2x\n",
(unsigned int)bc->data.insn.ea->len,
(unsigned int)bc->data.insn.ea->segment);
printf(" ModRM=%2x ValidRM=%u NeedRM=%u\n",
(unsigned int)bc->data.insn.ea->modrm,
(unsigned int)bc->data.insn.ea->valid_modrm,
(unsigned int)bc->data.insn.ea->need_modrm);
printf(" SIB=%2x ValidSIB=%u NeedSIB=%u\n",
(unsigned int)bc->data.insn.ea->sib,
(unsigned int)bc->data.insn.ea->valid_sib,
(unsigned int)bc->data.insn.ea->need_sib);
}
printf("Immediate Value:\n");
printf(" Val=");
if (!bc->data.insn.imm)
printf("(nil)");
else {
expr_print(bc->data.insn.imm->val);
printf("\n");
printf(" Len=%u, IsNeg=%u\n",
(unsigned int)bc->data.insn.imm->len,
(unsigned int)bc->data.insn.imm->isneg);
printf(" FLen=%u, FSign=%u\n",
(unsigned int)bc->data.insn.imm->f_len,
(unsigned int)bc->data.insn.imm->f_sign);
printf("Opcode: %2x %2x %2x OpLen=%u\n",
(unsigned int)bc->data.insn.opcode[0],
(unsigned int)bc->data.insn.opcode[1],
(unsigned int)bc->data.insn.opcode[2],
(unsigned int)bc->data.insn.opcode_len);
printf("AddrSize=%u OperSize=%u LockRepPre=%2x\n",
(unsigned int)bc->data.insn.addrsize,
(unsigned int)bc->data.insn.opersize,
(unsigned int)bc->data.insn.lockrep_pre);
}
break;
case BC_JMPREL:
printf("_Relative Jump_\n");
printf("Target=");
expr_print(bc->data.jmprel.target);
printf("\nShort Form:\n");
if (!bc->data.jmprel.shortop.opcode_len == 0)
printf(" None\n");
else
printf(" Opcode: %2x %2x %2x OpLen=%u\n",
(unsigned int)bc->data.jmprel.shortop.opcode[0],
(unsigned int)bc->data.jmprel.shortop.opcode[1],
(unsigned int)bc->data.jmprel.shortop.opcode[2],
(unsigned int)bc->data.jmprel.shortop.opcode_len);
if (!bc->data.jmprel.nearop.opcode_len == 0)
printf(" None\n");
else
printf(" Opcode: %2x %2x %2x OpLen=%u\n",
(unsigned int)bc->data.jmprel.nearop.opcode[0],
(unsigned int)bc->data.jmprel.nearop.opcode[1],
(unsigned int)bc->data.jmprel.nearop.opcode[2],
(unsigned int)bc->data.jmprel.nearop.opcode_len);
printf("OpSel=");
switch (bc->data.jmprel.op_sel) {
case JR_NONE:
printf("None");
break;
case JR_SHORT:
printf("Short");
break;
case JR_NEAR:
printf("Near");
break;
case JR_SHORT_FORCED:
printf("Forced Short");
break;
case JR_NEAR_FORCED:
printf("Forced Near");
break;
default:
printf("UNKNOWN!!");
break;
}
printf("\nAddrSize=%u OperSize=%u LockRepPre=%2x\n",
(unsigned int)bc->data.jmprel.addrsize,
(unsigned int)bc->data.jmprel.opersize,
(unsigned int)bc->data.jmprel.lockrep_pre);
break;
case BC_DATA:
printf("_Data_\n");
printf("Final Element Size=%u\n",
(unsigned int)bc->data.data.size);
printf("Elements:\n");
dataval_print(&bc->data.data.datahead);
break;
case BC_RESERVE:
printf("_Reserve_\n");
printf("Num Items=");
expr_print(bc->data.reserve.numitems);
printf("\nItem Size=%u\n",
(unsigned int)bc->data.reserve.itemsize);
break;
default:
printf("_Unknown_\n");
}
printf("Length=%lu\n", bc->len);
printf("Filename=\"%s\" Line Number=%u\n",
bc->filename ? bc->filename : "<UNKNOWN>", bc->lineno);
printf("Offset=%lx BITS=%u\n", bc->offset, bc->mode_bits);
}
bytecode *
bytecodes_append(bytecodehead *headp, bytecode *bc)
{
if (bc) {
if (bc->type != BC_EMPTY) {
STAILQ_INSERT_TAIL(headp, bc, link);
return bc;
} else {
free(bc);
}
}
return (bytecode *)NULL;
}
dataval *
dataval_new_expr(expr *expn)
{
dataval *retval = xmalloc(sizeof(dataval));
retval->type = DV_EXPR;
retval->data.expn = expn;
return retval;
}
dataval *
dataval_new_float(floatnum *flt)
{
dataval *retval = xmalloc(sizeof(dataval));
retval->type = DV_FLOAT;
retval->data.flt = flt;
return retval;
}
dataval *
dataval_new_string(char *str_val)
{
dataval *retval = xmalloc(sizeof(dataval));
retval->type = DV_STRING;
retval->data.str_val = str_val;
return retval;
}
dataval *
datavals_append(datavalhead *headp, dataval *dv)
{
if (dv) {
STAILQ_INSERT_TAIL(headp, dv, link);
return dv;
}
return (dataval *)NULL;
}
void
dataval_print(datavalhead *head)
{
dataval *cur;
STAILQ_FOREACH(cur, head, link) {
switch (cur->type) {
case DV_EMPTY:
printf(" Empty\n");
break;
case DV_EXPR:
printf(" Expr=");
expr_print(cur->data.expn);
printf("\n");
break;
case DV_FLOAT:
printf(" Float=");
floatnum_print(cur->data.flt);
printf("\n");
break;
case DV_STRING:
printf(" String=%s\n", cur->data.str_val);
break;
}
}
}