mirror of https://github.com/yasm/yasm.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
349 lines
8.9 KiB
349 lines
8.9 KiB
/* $IdPath$ |
|
* NASM-compatible lex lexer |
|
* |
|
* Copyright (C) 2001 Peter Johnson |
|
* |
|
* This file is part of YASM. |
|
* |
|
* YASM is free software; you can redistribute it and/or modify |
|
* it under the terms of the GNU General Public License as published by |
|
* the Free Software Foundation; either version 2 of the License, or |
|
* (at your option) any later version. |
|
* |
|
* YASM is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
* GNU General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU General Public License |
|
* along with this program; if not, write to the Free Software |
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
*/ |
|
%{ |
|
#ifdef HAVE_CONFIG_H |
|
# include "config.h" |
|
#endif |
|
|
|
#include "util.h" |
|
|
|
#ifdef STDC_HEADERS |
|
# include <stdlib.h> |
|
# include <string.h> |
|
#endif |
|
|
|
#include <libintl.h> |
|
#define _(String) gettext(String) |
|
|
|
#include "errwarn.h" |
|
#include "expr.h" |
|
#include "symrec.h" |
|
|
|
#include "bytecode.h" |
|
|
|
#include "bison.h" |
|
|
|
RCSID("$IdPath$"); |
|
|
|
#define YY_NEVER_INTERACTIVE 1 |
|
|
|
#define yylval nasm_parser_lval |
|
|
|
extern int (*nasm_parser_yyinput) (char *buf, int max_size); |
|
#undef YY_INPUT |
|
#define YY_INPUT(b, r, ms) (r = nasm_parser_yyinput(b, ms)) |
|
|
|
/* starting size of string buffer */ |
|
#define STRBUF_ALLOC_SIZE 128 |
|
|
|
/* string buffer used when parsing strings/character constants */ |
|
static char *strbuf = (char *)NULL; |
|
|
|
/* length of strbuf (including terminating NULL character) */ |
|
static size_t strbuf_size = 0; |
|
|
|
/* last "base" label for local (.) labels */ |
|
symrec *locallabel_base = (symrec *)NULL; |
|
|
|
/* current line number */ |
|
int line_number; |
|
|
|
%} |
|
%option noyywrap |
|
%option nounput |
|
%option case-insensitive |
|
|
|
%x DIRECTIVE DIRECTIVE2 |
|
|
|
DIGIT [0-9] |
|
BINDIGIT [01] |
|
OCTDIGIT [0-7] |
|
HEXDIGIT [0-9a-f] |
|
WS [ \t\r] |
|
|
|
%% |
|
|
|
/* standard decimal integer */ |
|
{DIGIT}+ { |
|
yylval.int_val = strtoul(yytext, (char **)NULL, 10); |
|
return INTNUM; |
|
} |
|
|
|
/* 10010011b - binary number */ |
|
{BINDIGIT}+b { |
|
yylval.int_val = strtoul(yytext, (char **)NULL, 2); |
|
return INTNUM; |
|
} |
|
|
|
/* 777q - octal number */ |
|
{OCTDIGIT}+q { |
|
yylval.int_val = strtoul(yytext, (char **)NULL, 8); |
|
return INTNUM; |
|
} |
|
|
|
/* 0AAh form of hexidecimal number */ |
|
0{HEXDIGIT}+h { |
|
yylval.int_val = strtoul(yytext+1, (char **)NULL, 16); |
|
return INTNUM; |
|
} |
|
|
|
/* $0AA and 0xAA forms of hexidecimal number */ |
|
(\$0|0x){HEXDIGIT}+ { |
|
yylval.int_val = strtoul(yytext+2, (char **)NULL, 16); |
|
return INTNUM; |
|
} |
|
|
|
/* floating point value */ |
|
{DIGIT}+\.{DIGIT}*(e[-+]?{DIGIT}+)? { |
|
yylval.double_val = strtod(yytext, (char **)NULL); |
|
return FLTNUM; |
|
} |
|
|
|
/* string/character constant values */ |
|
["'] { |
|
int inch, count; |
|
char endch = yytext[0]; |
|
|
|
strbuf = malloc(STRBUF_ALLOC_SIZE); |
|
if (!strbuf) |
|
Fatal(FATAL_NOMEM); |
|
|
|
strbuf_size = STRBUF_ALLOC_SIZE; |
|
inch = input(); |
|
count = 0; |
|
while (inch != EOF && inch != endch && inch != '\n') { |
|
strbuf[count++] = inch; |
|
if (count >= strbuf_size) { |
|
strbuf = realloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE); |
|
if (!strbuf) |
|
Fatal(FATAL_NOMEM); |
|
strbuf_size += STRBUF_ALLOC_SIZE; |
|
} |
|
inch = input(); |
|
} |
|
|
|
if (inch == '\n') |
|
Error(_("unterminated string")); |
|
else if (inch == EOF) |
|
Error(_("unexpected end of file in string")); |
|
|
|
strbuf[count] = '\0'; |
|
|
|
yylval.str_val = strbuf; |
|
return STRING; |
|
} |
|
|
|
/* directive: [name value] */ |
|
^{WS}*"[" { BEGIN DIRECTIVE; return '['; } |
|
<DIRECTIVE>"]" { BEGIN INITIAL; return ']'; } |
|
<DIRECTIVE2>"]" { BEGIN INITIAL; return ']'; } |
|
<DIRECTIVE>\n { BEGIN INITIAL; return '\n'; } |
|
<DIRECTIVE2>\n { BEGIN INITIAL; return '\n'; } |
|
<DIRECTIVE>{WS}+ ; |
|
<DIRECTIVE2>{WS}+ ; |
|
|
|
<DIRECTIVE>[a-z]+ { |
|
BEGIN DIRECTIVE2; |
|
yylval.str_val = strdup(yytext); |
|
if (!yylval.str_val) |
|
Fatal(FATAL_NOMEM); |
|
return DIRECTIVE_NAME; |
|
} |
|
/* everything printable except for ' ', '[' and ']'. */ |
|
<DIRECTIVE2>[!-@a-z\\^-`{|}~]+ { |
|
yylval.str_val = strdup(yytext); |
|
if (!yylval.str_val) |
|
Fatal(FATAL_NOMEM); |
|
return DIRECTIVE_VAL; |
|
} |
|
<DIRECTIVE>. { |
|
Warning(_("ignoring unrecognized character `%s'"), |
|
conv_unprint(yytext[0])); |
|
} |
|
<DIRECTIVE2>. { |
|
Warning(_("ignoring unrecognized character `%s'"), |
|
conv_unprint(yytext[0])); |
|
} |
|
|
|
/* size specifiers */ |
|
byte { yylval.int_val = 1; return BYTE; } |
|
word { yylval.int_val = 2; return WORD; } |
|
dword { yylval.int_val = 4; return DWORD; } |
|
qword { yylval.int_val = 8; return QWORD; } |
|
tword { yylval.int_val = 10; return TWORD; } |
|
dqword { yylval.int_val = 16; return DQWORD; } |
|
|
|
/* pseudo-instructions */ |
|
db { yylval.int_val = 1; return DECLARE_DATA; } |
|
dw { yylval.int_val = 2; return DECLARE_DATA; } |
|
dd { yylval.int_val = 4; return DECLARE_DATA; } |
|
dq { yylval.int_val = 8; return DECLARE_DATA; } |
|
dt { yylval.int_val = 10; return DECLARE_DATA; } |
|
|
|
resb { yylval.int_val = 1; return RESERVE_SPACE; } |
|
resw { yylval.int_val = 2; return RESERVE_SPACE; } |
|
resd { yylval.int_val = 4; return RESERVE_SPACE; } |
|
resq { yylval.int_val = 8; return RESERVE_SPACE; } |
|
rest { yylval.int_val = 10; return RESERVE_SPACE; } |
|
|
|
incbin { return INCBIN; } |
|
|
|
equ { return EQU; } |
|
|
|
times { return TIMES; } |
|
|
|
seg { return SEG; } |
|
wrt { return WRT; } |
|
near { return NEAR; } |
|
short { return SHORT; } |
|
far { return FAR; } |
|
|
|
nosplit { return NOSPLIT; } |
|
|
|
org { return ORG; } |
|
|
|
to { return TO; } |
|
|
|
/* operand size overrides */ |
|
o16 { yylval.int_val = 16; return OPERSIZE; } |
|
o32 { yylval.int_val = 32; return OPERSIZE; } |
|
/* address size overrides */ |
|
a16 { yylval.int_val = 16; return ADDRSIZE; } |
|
a32 { yylval.int_val = 32; return ADDRSIZE; } |
|
|
|
/* instruction prefixes */ |
|
lock { return LOCK; } |
|
repne { return REPNZ; } |
|
repnz { return REPNZ; } |
|
rep { return REP; } |
|
repe { return REPZ; } |
|
repz { return REPZ; } |
|
|
|
/* control, debug, and test registers */ |
|
cr4 { yylval.int_val = 4; return CR4; } |
|
cr[023] { yylval.int_val = yytext[2]-'0'; return CRREG_NOTCR4; } |
|
dr[0-367] { yylval.int_val = yytext[2]-'0'; return DRREG; } |
|
tr[3-7] { yylval.int_val = yytext[2]-'0'; return TRREG; } |
|
|
|
/* floating point, MMX, and SSE registers */ |
|
st0 { yylval.int_val = 0; return ST0; } |
|
st[1-7] { yylval.int_val = yytext[2]-'0'; return FPUREG_NOTST0; } |
|
mm[0-7] { yylval.int_val = yytext[2]-'0'; return MMXREG; } |
|
xmm[0-7] { yylval.int_val = yytext[3]-'0'; return XMMREG; } |
|
|
|
/* integer registers */ |
|
eax { yylval.int_val = 0; return REG_EAX; } |
|
ecx { yylval.int_val = 1; return REG_ECX; } |
|
edx { yylval.int_val = 2; return REG_EDX; } |
|
ebx { yylval.int_val = 3; return REG_EBX; } |
|
esp { yylval.int_val = 4; return REG_ESP; } |
|
ebp { yylval.int_val = 5; return REG_EBP; } |
|
esi { yylval.int_val = 6; return REG_ESI; } |
|
edi { yylval.int_val = 7; return REG_EDI; } |
|
|
|
ax { yylval.int_val = 0; return REG_AX; } |
|
cx { yylval.int_val = 1; return REG_CX; } |
|
dx { yylval.int_val = 2; return REG_DX; } |
|
bx { yylval.int_val = 3; return REG_BX; } |
|
sp { yylval.int_val = 4; return REG_SP; } |
|
bp { yylval.int_val = 5; return REG_BP; } |
|
si { yylval.int_val = 6; return REG_SI; } |
|
di { yylval.int_val = 7; return REG_DI; } |
|
|
|
al { yylval.int_val = 0; return REG_AL; } |
|
cl { yylval.int_val = 1; return REG_CL; } |
|
dl { yylval.int_val = 2; return REG_DL; } |
|
bl { yylval.int_val = 3; return REG_BL; } |
|
ah { yylval.int_val = 4; return REG_AH; } |
|
ch { yylval.int_val = 5; return REG_CH; } |
|
dh { yylval.int_val = 6; return REG_DH; } |
|
bh { yylval.int_val = 7; return REG_BH; } |
|
|
|
/* segment registers */ |
|
es { yylval.int_val = 0; return REG_ES; } |
|
cs { yylval.int_val = 1; return REG_CS; } |
|
ss { yylval.int_val = 2; return REG_SS; } |
|
ds { yylval.int_val = 3; return REG_DS; } |
|
fs { yylval.int_val = 4; return REG_FS; } |
|
gs { yylval.int_val = 5; return REG_GS; } |
|
|
|
/* operators */ |
|
"<<" { return LEFT_OP; } |
|
">>" { return RIGHT_OP; } |
|
"//" { return SIGNDIV; } |
|
"%%" { return SIGNMOD; } |
|
[-+|^&*/%~():[\],] { return yytext[0]; } |
|
|
|
/* special non-local ..@label and labels like ..start */ |
|
$$|$|\.\.[a-z0-9_$#@~.?]+ { |
|
yylval.syminfo.name = strdup(yytext); |
|
if (!yylval.syminfo.name) |
|
Fatal(FATAL_NOMEM); |
|
yylval.syminfo.line = line_number; |
|
|
|
return SPECIAL_ID; |
|
} |
|
|
|
/* local label (.label) */ |
|
\.[a-z0-9_$#@~?][a-z0-9_$#@~.?]* { |
|
if (!locallabel_base) { |
|
Warning(_("no non-local label before `%s'"), yytext); |
|
yylval.syminfo.name = strdup(yytext); |
|
if (!yylval.syminfo.name) |
|
Fatal(FATAL_NOMEM); |
|
} else { |
|
yylval.syminfo.name = malloc(strlen(yytext) + |
|
strlen(locallabel_base->name) + 1); |
|
if (!yylval.syminfo.name) |
|
Fatal(FATAL_NOMEM); |
|
strcpy(yylval.syminfo.name, locallabel_base->name); |
|
strcat(yylval.syminfo.name, yytext); |
|
} |
|
yylval.syminfo.line = line_number; |
|
|
|
return LOCAL_ID; |
|
} |
|
|
|
/* instructions */ |
|
/* @INSTRUCTIONS@ */ |
|
|
|
/* label */ |
|
[a-z_?][a-z0-9_$#@~.?]* { |
|
yylval.syminfo.name = strdup(yytext); |
|
if (!yylval.syminfo.name) |
|
Fatal(FATAL_NOMEM); |
|
yylval.syminfo.line = line_number; |
|
|
|
return ID; |
|
} |
|
|
|
;.* ; |
|
|
|
{WS}+ ; |
|
|
|
\n return '\n'; |
|
|
|
. { |
|
Warning(_("ignoring unrecognized character `%s'"), |
|
conv_unprint(yytext[0])); |
|
} |
|
|
|
|