Port re2c parser from YACC to recursive descent.

svn path=/trunk/yasm/; revision=1718
0.6.0
Peter Johnson 18 years ago
parent 6e6580144a
commit 36a4a6c3a3
  1. 2
      Mkfiles/Makefile.dj
  2. 2
      Mkfiles/Makefile.flat
  3. 8
      Mkfiles/vc/re2c/re2c.vcproj
  4. 32
      Mkfiles/vc8/re2c/re2c.vcproj
  5. 42
      tools/re2c/Makefile.inc
  6. 249
      tools/re2c/parser.c
  7. 33
      tools/re2c/parser.h
  8. 181
      tools/re2c/re2c-parser.y
  9. 2
      tools/re2c/scanner.c
  10. 2
      tools/re2c/scanner.re

@ -185,7 +185,7 @@ RE2C_SRCS= \
tools/re2c/main.c \
tools/re2c/code.c \
tools/re2c/dfa.c \
re2c-parser.c \
tools/re2c/parser.c \
tools/re2c/actions.c \
tools/re2c/scanner.c \
tools/re2c/mbo_getopt.c \

@ -188,7 +188,7 @@ RE2C_SRCS= \
tools/re2c/main.c \
tools/re2c/code.c \
tools/re2c/dfa.c \
re2c-parser.c \
tools/re2c/parser.c \
tools/re2c/actions.c \
tools/re2c/scanner.c \
tools/re2c/mbo_getopt.c \

@ -133,10 +133,7 @@
RelativePath="..\..\..\tools\re2c\mbo_getopt.c">
</File>
<File
RelativePath="..\..\..\re2c-parser.c">
</File>
<File
RelativePath="..\..\..\re2c-parser.h">
RelativePath="..\..\..\tools\re2c\parser.c">
</File>
<File
RelativePath="..\..\..\tools\re2c\scanner.c">
@ -170,6 +167,9 @@
<File
RelativePath="..\..\..\tools\re2c\parse.h">
</File>
<File
RelativePath="..\..\..\tools\re2c\parser.h">
</File>
<File
RelativePath="..\..\..\tools\re2c\re.h">
</File>

@ -218,35 +218,9 @@
>
</File>
<File
RelativePath="..\..\..\re2c-parser.c"
RelativePath="..\..\..\tools\re2c\parser.c"
>
</File>
<File
RelativePath="..\..\..\re2c-parser.h"
>
</File>
<File
RelativePath="..\..\..\tools\re2c\re2c-parser.y"
>
<FileConfiguration
Name="Debug|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="Bison"
OutputStem="../../../re2c-parser"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="Bison"
OutputStem="../../../re2c-parser"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\..\tools\re2c\scanner.c"
>
@ -285,6 +259,10 @@
RelativePath="..\..\..\tools\re2c\parse.h"
>
</File>
<File
RelativePath="..\..\..\tools\re2c\parser.h"
>
</File>
<File
RelativePath="..\..\..\tools\re2c\re.h"
>

@ -16,7 +16,8 @@ EXTRA_DIST += tools/re2c/code.c
EXTRA_DIST += tools/re2c/dfa.h
EXTRA_DIST += tools/re2c/dfa.c
EXTRA_DIST += tools/re2c/parse.h
EXTRA_DIST += tools/re2c/re2c-parser.y
EXTRA_DIST += tools/re2c/parser.h
EXTRA_DIST += tools/re2c/parser.c
EXTRA_DIST += tools/re2c/actions.c
EXTRA_DIST += tools/re2c/scanner.h
EXTRA_DIST += tools/re2c/scanner.c
@ -45,8 +46,8 @@ re2c-code.$(OBJEXT): tools/re2c/code.c
re2c-dfa.$(OBJEXT): tools/re2c/dfa.c
$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/dfa.c || echo '$(srcdir)/'`tools/re2c/dfa.c
re2c-parser.$(OBJEXT): re2c-parser.c
$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f re2c-parser.c || echo '$(srcdir)/'`re2c-parser.c
re2c-parser.$(OBJEXT): tools/re2c/parser.c
$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/parser.c || echo '$(srcdir)/'`tools/re2c/parser.c
re2c-actions.$(OBJEXT): tools/re2c/actions.c
$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/actions.c || echo '$(srcdir)/'`tools/re2c/actions.c
@ -63,41 +64,6 @@ re2c-substr.$(OBJEXT): tools/re2c/substr.c
re2c-translate.$(OBJEXT): tools/re2c/translate.c
$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/translate.c || echo '$(srcdir)/'`tools/re2c/translate.c
re2c-parser.c: tools/re2c/re2c-parser.y
$(YACC) $(YFLAGS) $(AM_YFLAGS) `test -f 'tools/re2c/re2c-parser.y' || echo '$(srcdir)/'`tools/re2c/re2c-parser.y
if test -f y.tab.h; then \
to=`echo "re2c-parser_H" | sed \
-e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
-e 's/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ]/_/g'`; \
sed "/^#/ s/Y_TAB_H/$$to/g" y.tab.h >re2c-parser.ht; \
rm -f y.tab.h; \
if cmp -s re2c-parser.ht re2c-parser.h; then \
rm -f re2c-parser.ht ;\
else \
mv re2c-parser.ht re2c-parser.h; \
fi; \
fi
if test -f y.output; then \
mv y.output re2c-parser.output; \
fi
sed '/^#/ s|y\.tab\.c|re2c-parser.c|' y.tab.c >re2c-parser.ct && mv re2c-parser.ct re2c-parser.c
rm -f y.tab.c
re2c-parser.h: re2c-parser.c
@if test ! -f $@; then \
rm -f re2c-parser.c; \
$(MAKE) re2c-parser.c; \
else :; fi
BUILT_SOURCES += re2c-parser.c
BUILT_SOURCES += re2c-parser.h
CLEANFILES += re2c-parser.c
CLEANFILES += re2c-parser.h
EXTRA_DIST += re2c-parser.c
EXTRA_DIST += re2c-parser.h
EXTRA_DIST += tools/re2c/CHANGELOG
EXTRA_DIST += tools/re2c/NO_WARRANTY
EXTRA_DIST += tools/re2c/README

@ -0,0 +1,249 @@
#include <time.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "tools/re2c/globals.h"
#include "tools/re2c/parse.h"
#include "tools/re2c/parser.h"
int yylex(void);
static RegExp *parse_expr(void);
static RegExp *parse_diff(void);
static RegExp *parse_term(void);
static RegExp *parse_factor(void);
static RegExp *parse_primary(void);
static unsigned int accept;
static RegExp *spec;
static Scanner *in;
static int curtok, peektok;
yystype yylval;
static yystype peekval;
#define get_next_token() (curtok = yylex())
static void
get_peek_token(void)
{
yystype temp = yylval; /* structure copy */
if (peektok != NONE)
Scanner_fatal(in, "more than one token of lookahead?");
peektok = yylex();
peekval = yylval; /* structure copy */
yylval = temp;
}
static void
yyparse(void)
{
RegExp *re, *look;
accept = 0;
spec = NULL;
get_next_token();
while (curtok != 0) {
switch (curtok) {
case ID:
get_peek_token();
if (peektok == '=') {
/* ID = expr; */
Symbol *sym = yylval.symbol;
get_next_token(); /* id */
get_next_token(); /* = */
re = parse_expr();
if (curtok != ';')
Scanner_fatal(in, "missing `;' after regexp");
get_next_token(); /* ; */
if (sym->re)
Scanner_fatal(in, "sym already defined");
sym->re = re;
break;
}
/*@fallthrough@*/
default:
/* rule: expr [/ expr] CODE */
re = parse_expr();
if (!re)
Scanner_fatal(in, "expression syntax error");
if (curtok == '/') {
get_next_token(); /* / */
look = parse_expr();
} else
look = RegExp_new_NullOp();
if (curtok != CODE)
Scanner_fatal(in, "missing code after regexp");
re = RegExp_new_RuleOp(re, look, yylval.token, accept++);
get_next_token(); /* CODE */
spec = spec ? mkAlt(spec, re) : re;
}
}
}
static RegExp *
parse_expr(void)
{
RegExp *e, *f;
e = parse_diff();
while (curtok == '|') {
get_next_token(); /* | */
f = parse_diff();
e = mkAlt(e, f);
}
return e;
}
static RegExp *
parse_diff(void)
{
RegExp *e, *f;
e = parse_term();
while (curtok == '\\') {
get_next_token(); /* \ */
f = parse_term();
e = mkDiff(e, f);
if(!e)
Scanner_fatal(in, "can only difference char sets");
}
return e;
}
static RegExp *
parse_term(void)
{
RegExp *e, *f;
e = parse_factor();
while ((f = parse_factor())) {
e = RegExp_new_CatOp(e, f);
}
return e;
}
static RegExp *
parse_factor(void)
{
RegExp *e;
char ch;
e = parse_primary();
while (curtok == CLOSE || curtok == CLOSESIZE) {
switch (curtok) {
case CLOSE:
ch = yylval.op;
while (get_next_token() == CLOSE) {
if (ch != yylval.op)
ch = '*';
}
switch (ch) {
case '*':
e = mkAlt(RegExp_new_CloseOp(e), RegExp_new_NullOp());
break;
case '+':
e = RegExp_new_CloseOp(e);
break;
case '?':
e = mkAlt(e, RegExp_new_NullOp());
break;
}
break;
case CLOSESIZE:
e = RegExp_new_CloseVOp(e, yylval.extop.minsize,
yylval.extop.maxsize);
get_next_token(); /* CLOSESIZE */
break;
default:
Scanner_fatal(in, "parse error");
break;
}
}
return e;
}
static RegExp *
parse_primary(void)
{
RegExp *e;
switch (curtok) {
case ID:
if (!yylval.symbol->re)
Scanner_fatal(in, "can't find symbol");
e = yylval.symbol->re;
get_next_token();
break;
case RANGE:
case STRING:
e = yylval.regexp;
get_next_token();
break;
case '(':
get_next_token();
e = parse_expr();
if (curtok != ')')
Scanner_fatal(in, "missing closing parenthesis");
get_next_token();
break;
default:
return NULL;
}
return e;
}
int
yylex(void)
{
if (peektok != NONE) {
int tok = peektok;
yylval = peekval;
peektok = NONE;
return tok;
}
return Scanner_scan(in);
}
void line_source(FILE *o, unsigned int line)
{
char * fnamebuf;
char * token;
if (iFlag)
return;
fprintf(o, "#line %u \"", line);
if( fileName != NULL ) {
fnamebuf = mystrdup( fileName );
} else {
fnamebuf = mystrdup( "<stdin>" );
}
token = strtok( fnamebuf, "\\" );
for(;;) {
fprintf(o, "%s", token);
token = strtok( NULL, "\\" );
if( token == NULL ) break;
fputs("\\\\", o);
}
fputs("\"\n", o); oline++;
free( fnamebuf );
}
void parse(FILE *i, FILE *o){
time_t now;
time(&now);
peektok = NONE;
fputs("/* Generated by re2c 0.9.1-C on ", o);
fprintf(o, "%-24s", ctime(&now));
fputs(" */\n", o); oline+=2;
in = Scanner_new(i);
line_source(o, Scanner_line(in));
while(Scanner_echo(in, o)){
yyparse();
if(spec)
genCode(o, spec);
line_source(o, Scanner_line(in));
}
}

@ -0,0 +1,33 @@
#ifndef RE2C_PARSER_H
#define RE2C_PARSER_H
/* Tokens */
enum yytokentype {
CLOSESIZE = 258,
CLOSE = 259,
ID = 260,
CODE = 261,
RANGE = 262,
STRING = 263,
NONE = 264
};
#define CLOSESIZE 258
#define CLOSE 259
#define ID 260
#define CODE 261
#define RANGE 262
#define STRING 263
#define NONE 264
typedef union {
Symbol *symbol;
RegExp *regexp;
Token *token;
char op;
ExtOp extop;
} yystype;
extern yystype yylval;
#endif

@ -1,181 +0,0 @@
%{
#include <time.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "tools/re2c/globals.h"
#include "tools/re2c/parse.h"
/* Work around bug in Bison 2.1 */
#define YYPARSE_PARAM unused
int yylex(void);
void yyerror(const char*);
static unsigned int accept;
static RegExp *spec;
static Scanner *in;
%}
%start spec
%union {
Symbol *symbol;
RegExp *regexp;
Token *token;
char op;
ExtOp extop;
}
%token CLOSESIZE CLOSE ID CODE RANGE STRING
%type <op> CLOSE
%type <op> close
%type <extop> CLOSESIZE
%type <symbol> ID
%type <token> CODE
%type <regexp> RANGE STRING
%type <regexp> rule look expr diff term factor primary
%%
spec :
{ accept = 0;
spec = NULL; }
| spec rule
{ spec = spec? mkAlt(spec, $2) : $2; }
| spec decl
;
decl : ID '=' expr ';'
{ if($1->re)
Scanner_fatal(in, "sym already defined");
$1->re = $3; }
;
rule : expr look CODE
{ $$ = RegExp_new_RuleOp($1, $2, $3, accept++); }
;
look :
{ $$ = RegExp_new_NullOp(); }
| '/' expr
{ $$ = $2; }
;
expr : diff
{ $$ = $1; }
| expr '|' diff
{ $$ = mkAlt($1, $3); }
;
diff : term
{ $$ = $1; }
| diff '\\' term
{ $$ = mkDiff($1, $3);
if(!$$)
Scanner_fatal(in, "can only difference char sets");
}
;
term : factor
{ $$ = $1; }
| term factor
{ $$ = RegExp_new_CatOp($1, $2); }
;
factor : primary
{ $$ = $1; }
| primary close
{
switch($2){
case '*':
$$ = mkAlt(RegExp_new_CloseOp($1), RegExp_new_NullOp());
break;
case '+':
$$ = RegExp_new_CloseOp($1);
break;
case '?':
$$ = mkAlt($1, RegExp_new_NullOp());
break;
}
}
| primary CLOSESIZE
{
$$ = RegExp_new_CloseVOp($1, $2.minsize, $2.maxsize);
}
;
close : CLOSE
{ $$ = $1; }
| close CLOSE
{ $$ = ($1 == $2) ? $1 : '*'; }
;
primary : ID
{ if(!$1->re)
Scanner_fatal(in, "can't find symbol");
$$ = $1->re; }
| RANGE
{ $$ = $1; }
| STRING
{ $$ = $1; }
| '(' expr ')'
{ $$ = $2; }
;
%%
void yyerror(const char* s){
Scanner_fatal(in, s);
}
int yylex(){
return Scanner_scan(in);
}
void line_source(FILE *o, unsigned int line)
{
char * fnamebuf;
char * token;
if (iFlag)
return;
fprintf(o, "#line %u \"", line);
if( fileName != NULL ) {
fnamebuf = mystrdup( fileName );
} else {
fnamebuf = mystrdup( "<stdin>" );
}
token = strtok( fnamebuf, "\\" );
for(;;) {
fprintf(o, "%s", token);
token = strtok( NULL, "\\" );
if( token == NULL ) break;
fputs("\\\\", o);
}
fputs("\"\n", o); oline++;
free( fnamebuf );
}
void parse(FILE *i, FILE *o){
time_t now;
time(&now);
fputs("/* Generated by re2c 0.9.1-C on ", o);
fprintf(o, "%-24s", ctime(&now));
fputs(" */\n", o); oline+=2;
in = Scanner_new(i);
line_source(o, Scanner_line(in));
while(Scanner_echo(in, o)){
yyparse(NULL);
if(spec)
genCode(o, spec);
line_source(o, Scanner_line(in));
}
}

@ -6,7 +6,7 @@
#include "tools/re2c/scanner.h"
#include "tools/re2c/parse.h"
#include "tools/re2c/globals.h"
#include "re2c-parser.h"
#include "tools/re2c/parser.h"
#ifndef MAX
#define MAX(a,b) (((a)>(b))?(a):(b))

@ -3,7 +3,7 @@
#include "tools/re2c/scanner.h"
#include "tools/re2c/parse.h"
#include "tools/re2c/globals.h"
#include "re2c-parser.h"
#include "tools/re2c/parser.h"
#ifndef MAX
#define MAX(a,b) (((a)>(b))?(a):(b))

Loading…
Cancel
Save