From 88d5a1e6c4f5c7037fa49e60472360bfeb295c89 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 8 Apr 2002 07:45:03 +0000 Subject: [PATCH] Continuing C conversion (not everything is compilable yet). svn path=/trunk/yasm/; revision=581 --- tools/re2c/actions.c | 575 ++++++++++++++++++++++++------------------ tools/re2c/code.c | 240 +++++++++--------- tools/re2c/dfa.c | 100 ++++---- tools/re2c/dfa.h | 255 ++++++++++--------- tools/re2c/globals.h | 4 +- tools/re2c/ins.h | 31 ++- tools/re2c/parse.h | 29 ++- tools/re2c/parser.h | 29 ++- tools/re2c/re.h | 298 +++++++++++----------- tools/re2c/scanner.h | 50 ++-- tools/re2c/scanner.re | 153 +++++------ tools/re2c/token.h | 30 ++- 12 files changed, 974 insertions(+), 820 deletions(-) diff --git a/tools/re2c/actions.c b/tools/re2c/actions.c index 0260b5fb..8d4fbd13 100644 --- a/tools/re2c/actions.c +++ b/tools/re2c/actions.c @@ -1,25 +1,33 @@ #include #include -#include -#include +#include #include "globals.h" #include "parser.h" #include "dfa.h" -Symbol *Symbol::first = NULL; +static Symbol *first = NULL; -Symbol::Symbol(const SubStr &str) : next(first), name(str), re(NULL) { - first = this; +void +Symbol_init(Symbol *r, const SubStr *str) +{ + r->next = first; + Str_init(&r->name, str); + r->re = NULL; + first = r; } -Symbol *Symbol::find(const SubStr &str){ - for(Symbol *sym = first; sym; sym = sym->next) - if(sym->name == str) return sym; - return new Symbol(str); +Symbol * +Symbol_find(const SubStr *str) +{ + Symbol *sym; + for(sym = first; sym; sym = sym->next) + if(SubStr_eq(&sym->name, str)) return sym; + return Symbol_new(str); } -void showIns(ostream &o, const Ins &i, const Ins &base){ +/* +void showIns(FILE *o, const Ins *i, const Ins *base){ o.width(3); o << &i - &base << ": "; switch(i.i.tag){ @@ -43,36 +51,261 @@ void showIns(ostream &o, const Ins &i, const Ins &base){ } o << "\n"; } +*/ + +static uint +AltOp_fixedLength(RegExp *r) +{ + uint l1 = RegExp_fixedLength(r->d.AltCatOp.exp1); + /* XXX? Should be exp2? */ + uint l2 = RegExp_fixedLength(r->d.AltCatOp.exp1); + if(l1 != l2 || l1 == ~0u) + return ~0; + return l1; +} -uint RegExp::fixedLength(){ +static uint +CatOp_fixedLength(RegExp *r) +{ + uint l1, l2; + if((l1 = RegExp_fixedLength(r->d.AltCatOp.exp1)) != ~0u ) + if((l2 = RegExp_fixedLength(r->d.AltCatOp.exp2)) != ~0u) + return l1+l2; return ~0; } -char *NullOp::type = "NullOp"; +uint +RegExp_fixedLength(RegExp *r) +{ + switch (r->type) { + case NULLOP: + return 0; + case MATCHOP: + return 1; + case ALTOP: + return AltOp_fixedLength(r); + case CATOP: + return CatOp_fixedLength(r); + default: + return ~0; + } + return ~0; +} -void NullOp::calcSize(Char*){ - size = 0; +void +RegExp_calcSize(RegExp *re, Char *rep) +{ + Range *r; + uint c; + + switch (re->type) { + case NULLOP: + re->size = 0; + break; + case MATCHOP: + re->size = 1; + for(r = re->d.match; r; r = r->next) + for(c = r->lb; c < r->ub; ++c) + if(rep[c] == c) + ++re->size; + break; + case RULEOP: + RegExp_calcSize(re->d.RuleOp.exp, rep); + RegExp_calcSize(re->d.RuleOp.ctx, rep); + re->size = re->d.RuleOp.exp->size + re->d.RuleOp.ctx->size + 1; + break; + case ALTOP: + RegExp_calcSize(re->d.AltCatOp.exp1, rep); + RegExp_calcSize(re->d.AltCatOp.exp2, rep); + re->size = re->d.AltCatOp.exp1->size + + re->d.AltCatOp.exp2->size + 2; + break; + case CATOP: + RegExp_calcSize(re->d.AltCatOp.exp1, rep); + RegExp_calcSize(re->d.AltCatOp.exp2, rep); + re->size = re->d.AltCatOp.exp1->size + re->d.AltCatOp.exp2->size; + break; + case CLOSEOP: + RegExp_calcSize(re->d.exp, rep); + re->size = re->d.exp->size + 1; + break; + } } -uint NullOp::fixedLength(){ - return 0; +static void +MatchOp_compile(RegExp *re, Char *rep, Ins *i) +{ + Ins *j; + uint bump; + Range *r; + uint c; + + i->i.tag = CHAR; + i->i.link = &i[re->size]; + j = &i[1]; + bump = re->size; + for(r = re->d.match; r; r = r->next){ + for(c = r->lb; c < r->ub; ++c){ + if(rep[c] == c){ + j->c.value = c; + j->c.bump = --bump; + j++; + } + } + } } -void NullOp::compile(Char*, Ins*){ - ; +static void +AltOp_compile(RegExp *re, Char *rep, Ins *i){ + Ins *j; + + i->i.tag = FORK; + j = &i[re->d.AltCatOp.exp1->size + 1]; + i->i.link = &j[1]; + RegExp_compile(re->d.AltCatOp.exp1, rep, &i[1]); + j->i.tag = GOTO; + j->i.link = &j[re->d.AltCatOp.exp2->size + 1]; + RegExp_compile(re->d.AltCatOp.exp2, rep, &j[1]); +} + +void +RegExp_compile(RegExp *re, Char *rep, Ins *i) +{ + switch (re->type) { + case NULLOP: + break; + case MATCHOP: + MatchOp_compile(re, rep, i); + break; + case RULEOP: + re->d.RuleOp.ins = i; + RegExp_compile(re->d.RuleOp.exp, rep, &i[0]); + i += re->d.RuleOp.exp->size; + RegExp_compile(re->d.RuleOp.ctx, rep, &i[0]); + i += re->d.RuleOp.ctx->size; + i->i.tag = TERM; + i->i.link = re; + break; + case ALTOP: + AltOp_compile(re, rep, i); + break; + case CATOP: + RegExp_compile(re->d.AltCatOp.exp1, rep, &i[0]); + RegExp_compile(re->d.AltCatOp.exp2, rep, + &i[re->d.AltCatOp.exp1->size]); + break; + case CLOSEOP: + RegExp_compile(re->d.exp, rep, &i[0]); + i += re->d.exp->size; + i->i.tag = FORK; + i->i.link = i - re->d.exp->size; + break; + } } -void NullOp::split(CharSet&){ - ; +static void +MatchOp_split(RegExp *re, CharSet *s) +{ + Range *r; + uint c; + + for(r = re->d.match; r; r = r->next){ + for(c = r->lb; c < r->ub; ++c){ + CharPtn *x = s->rep[c], *a = x->nxt; + if(!a){ + if(x->card == 1) + continue; + x->nxt = a = s->freeHead; + if(!(s->freeHead = s->freeHead->nxt)) + s->freeTail = &s->freeHead; + a->nxt = NULL; + x->fix = s->fix; + s->fix = x; + } + if(--(x->card) == 0){ + *s->freeTail = x; + *(s->freeTail = &x->nxt) = NULL; + } + s->rep[c] = a; + ++(a->card); + } + } + for(; s->fix; s->fix = s->fix->fix) + if(s->fix->card) + s->fix->nxt = NULL; +} + +void +RegExp_split(RegExp *re, CharSet *s) +{ + switch (re->type) { + case NULLOP: + break; + case MATCHOP: + MatchOp_split(re, s); + break; + case RULEOP: + RegExp_split(re->d.RuleOp.exp, s); + RegExp_split(re->d.RuleOp.ctx, s); + break; + case ALTOP: + /* FALLTHROUGH */ + case CATOP: + RegExp_split(re->d.AltCatOp.exp1, s); + RegExp_split(re->d.AltCatOp.exp2, s); + break; + case CLOSEOP: + RegExp_split(re->d.exp, s); + break; + } } -ostream& operator<<(ostream &o, const Range &r){ - if((r.ub - r.lb) == 1){ - prtCh(o, r.lb); +void +RegExp_display(RegExp *re, FILE *o) +{ + switch (re->type) { + case NULLOP: + fputc('_', o); + break; + case MATCHOP: + Range_out(o, re->d.match); + break; + case RULEOP: + RegExp_display(re->d.RuleOp.exp, o); + fputc('/', o); + RegExp_display(re->d.RuleOp.ctx, o); + fputc(';', o); + break; + case ALTOP: + RegExp_display(re->d.AltCatOp.exp1, o); + fputc('|', o); + RegExp_display(re->d.AltCatOp.exp2, o); + break; + case CATOP: + RegExp_display(re->d.AltCatOp.exp1, o); + RegExp_display(re->d.AltCatOp.exp2, o); + break; + case CLOSEOP: + RegExp_display(re->d.exp, o); + fputc('+', o); + break; + } +} + +void +Range_out(FILE *o, const Range *r) +{ + if(!r) + return; + + if((r->ub - r->lb) == 1){ + prtCh(o, r->lb); } else { - prtCh(o, r.lb); o << "-"; prtCh(o, r.ub-1); + prtCh(o, r->lb); + fputc('-', o); + prtCh(o, r->ub-1); } - return o << r.next; + Range_out(o, r->next); } Range *doUnion(Range *r1, Range *r2){ @@ -80,9 +313,9 @@ Range *doUnion(Range *r1, Range *r2){ for(;;){ Range *s; if(r1->lb <= r2->lb){ - s = new Range(*r1); + s = Range_new_copy(r1); } else { - s = new Range(*r2); + s = Range_new_copy(r2); } *rP = s; rP = &s->next; @@ -129,13 +362,13 @@ Range *doDiff(Range *r1, Range *r2){ for(; r2 && r2->ub <= r1->lb; r2 = r2->next); for(; r2 && r2->lb < r1->ub; r2 = r2->next){ if(lb < r2->lb){ - *rP = s = new Range(lb, r2->lb); + *rP = s = Range_new(lb, r2->lb); rP = &s->next; } if((lb = r2->ub) >= r1->ub) goto noMore; } - *rP = s = new Range(lb, r1->ub); + *rP = s = Range_new(lb, r1->ub); rP = &s->next; noMore:; } @@ -143,83 +376,23 @@ Range *doDiff(Range *r1, Range *r2){ return r; } -MatchOp *merge(MatchOp *m1, MatchOp *m2){ +RegExp *merge(RegExp *m1, RegExp *m2){ if(!m1) return m2; if(!m2) return m1; - return new MatchOp(doUnion(m1->match, m2->match)); -} - -char *MatchOp::type = "MatchOp"; - -void MatchOp::display(ostream &o) const{ - o << match; -} - -void MatchOp::calcSize(Char *rep){ - size = 1; - for(Range *r = match; r; r = r->next) - for(uint c = r->lb; c < r->ub; ++c) - if(rep[c] == c) - ++size; -} - -uint MatchOp::fixedLength(){ - return 1; -} - -void MatchOp::compile(Char *rep, Ins *i){ - i->i.tag = CHAR; - i->i.link = &i[size]; - Ins *j = &i[1]; - uint bump = size; - for(Range *r = match; r; r = r->next){ - for(uint c = r->lb; c < r->ub; ++c){ - if(rep[c] == c){ - j->c.value = c; - j->c.bump = --bump; - j++; - } - } - } -} - -void MatchOp::split(CharSet &s){ - for(Range *r = match; r; r = r->next){ - for(uint c = r->lb; c < r->ub; ++c){ - CharPtn *x = s.rep[c], *a = x->nxt; - if(!a){ - if(x->card == 1) - continue; - x->nxt = a = s.freeHead; - if(!(s.freeHead = s.freeHead->nxt)) - s.freeTail = &s.freeHead; - a->nxt = NULL; - x->fix = s.fix; - s.fix = x; - } - if(--(x->card) == 0){ - *s.freeTail = x; - *(s.freeTail = &x->nxt) = NULL; - } - s.rep[c] = a; - ++(a->card); - } - } - for(; s.fix; s.fix = s.fix->fix) - if(s.fix->card) - s.fix->nxt = NULL; + return RegExp_new_MatchOp(doUnion(m1->d.match, m2->d.match)); } RegExp *mkDiff(RegExp *e1, RegExp *e2){ - MatchOp *m1, *m2; - if(!(m1 = (MatchOp*) e1->isA(MatchOp::type))) + RegExp *m1, *m2; + Range *r; + if(!(m1 = RegExp_isA(e1, MATCHOP))) return NULL; - if(!(m2 = (MatchOp*) e2->isA(MatchOp::type))) + if(!(m2 = RegExp_isA(e2, MATCHOP))) return NULL; - Range *r = doDiff(m1->match, m2->match); - return r? (RegExp*) new MatchOp(r) : (RegExp*) new NullOp; + r = doDiff(m1->d.match, m2->d.match); + return r? RegExp_new_MatchOp(r) : RegExp_new_NullOp(); } RegExp *doAlt(RegExp *e1, RegExp *e2){ @@ -227,111 +400,35 @@ RegExp *doAlt(RegExp *e1, RegExp *e2){ return e2; if(!e2) return e1; - return new AltOp(e1, e2); + return RegExp_new_AltOp(e1, e2); } RegExp *mkAlt(RegExp *e1, RegExp *e2){ - AltOp *a; - MatchOp *m1, *m2; - if((a = (AltOp*) e1->isA(AltOp::type))){ - if((m1 = (MatchOp*) a->exp1->isA(MatchOp::type))) - e1 = a->exp2; - } else if((m1 = (MatchOp*) e1->isA(MatchOp::type))){ + RegExp *a; + RegExp *m1, *m2; + if((a = RegExp_isA(e1, ALTOP))){ + if((m1 = RegExp_isA(a->d.AltCatOp.exp1, MATCHOP))) + e1 = a->d.AltCatOp.exp2; + } else if((m1 = RegExp_isA(e1, MATCHOP))){ e1 = NULL; } - if((a = (AltOp*) e2->isA(AltOp::type))){ - if((m2 = (MatchOp*) a->exp1->isA(MatchOp::type))) - e2 = a->exp2; - } else if((m2 = (MatchOp*) e2->isA(MatchOp::type))){ + if((a = RegExp_isA(e2, ALTOP))){ + if((m2 = RegExp_isA(a->d.AltCatOp.exp1, MATCHOP))) + e2 = a->d.AltCatOp.exp2; + } else if((m2 = RegExp_isA(e2, MATCHOP))){ e2 = NULL; } return doAlt(merge(m1, m2), doAlt(e1, e2)); } -char *AltOp::type = "AltOp"; - -void AltOp::calcSize(Char *rep){ - exp1->calcSize(rep); - exp2->calcSize(rep); - size = exp1->size + exp2->size + 2; -} - -uint AltOp::fixedLength(){ - uint l1 = exp1->fixedLength(); - uint l2 = exp1->fixedLength(); - if(l1 != l2 || l1 == ~0u) - return ~0; - return l1; -} - -void AltOp::compile(Char *rep, Ins *i){ - i->i.tag = FORK; - Ins *j = &i[exp1->size + 1]; - i->i.link = &j[1]; - exp1->compile(rep, &i[1]); - j->i.tag = GOTO; - j->i.link = &j[exp2->size + 1]; - exp2->compile(rep, &j[1]); -} - -void AltOp::split(CharSet &s){ - exp1->split(s); - exp2->split(s); -} - -char *CatOp::type = "CatOp"; - -void CatOp::calcSize(Char *rep){ - exp1->calcSize(rep); - exp2->calcSize(rep); - size = exp1->size + exp2->size; -} - -uint CatOp::fixedLength(){ - uint l1, l2; - if((l1 = exp1->fixedLength()) != ~0u ) - if((l2 = exp2->fixedLength()) != ~0u) - return l1+l2; - return ~0; -} - -void CatOp::compile(Char *rep, Ins *i){ - exp1->compile(rep, &i[0]); - exp2->compile(rep, &i[exp1->size]); -} - -void CatOp::split(CharSet &s){ - exp1->split(s); - exp2->split(s); -} - -char *CloseOp::type = "CloseOp"; - -void CloseOp::calcSize(Char *rep){ - exp->calcSize(rep); - size = exp->size + 1; -} - -void CloseOp::compile(Char *rep, Ins *i){ - exp->compile(rep, &i[0]); - i += exp->size; - i->i.tag = FORK; - i->i.link = i - exp->size; -} - -void CloseOp::split(CharSet &s){ - exp->split(s); -} - -RegExp *expr(Scanner &); - -uchar unescape(SubStr &s){ - s.len--; +uchar unescape(SubStr *s){ uchar c; - if((c = *s.str++) != '\\' || s.len == 0) + uchar v; + s->len--; + if((c = *s->str++) != '\\' || s->len == 0) return xlat[c]; - s.len--; - switch(c = *s.str++){ + s->len--; + switch(c = *s->str++){ case 'n': return xlat['\n']; case 't': @@ -348,8 +445,8 @@ uchar unescape(SubStr &s){ return xlat['\a']; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { - uchar v = c - '0'; - for(; s.len != 0 && '0' <= (c = *s.str) && c <= '7'; s.len--, s.str++) + v = c - '0'; + for(; s->len != 0 && '0' <= (c = *s->str) && c <= '7'; s->len--, s->str++) v = v*8 + (c - '0'); return v; } default: @@ -357,74 +454,61 @@ uchar unescape(SubStr &s){ } } -Range *getRange(SubStr &s){ +Range *getRange(SubStr *s){ uchar lb = unescape(s), ub; - if(s.len < 2 || *s.str != '-'){ + if(s->len < 2 || *s->str != '-'){ ub = lb; } else { - s.len--; s.str++; + s->len--; s->str++; ub = unescape(s); if(ub < lb){ uchar tmp; tmp = lb; lb = ub; ub = tmp; } } - return new Range(lb, ub+1); + return Range_new(lb, ub+1); } RegExp *matchChar(uint c){ - return new MatchOp(new Range(c, c+1)); + return RegExp_new_MatchOp(Range_new(c, c+1)); } RegExp *strToRE(SubStr s){ + RegExp *re; s.len -= 2; s.str += 1; if(s.len == 0) - return new NullOp; - RegExp *re = matchChar(unescape(s)); + return RegExp_new_NullOp(); + re = matchChar(unescape(&s)); while(s.len > 0) - re = new CatOp(re, matchChar(unescape(s))); + re = RegExp_new_CatOp(re, matchChar(unescape(&s))); return re; } RegExp *ranToRE(SubStr s){ + Range *r; s.len -= 2; s.str += 1; if(s.len == 0) - return new NullOp; - Range *r = getRange(s); + return RegExp_new_NullOp(); + r = getRange(&s); while(s.len > 0) - r = doUnion(r, getRange(s)); - return new MatchOp(r); -} - -char *RuleOp::type = "RuleOp"; - -RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, uint a) - : exp(e), ctx(c), ins(NULL), accept(a), code(t) { - ; -} - -void RuleOp::calcSize(Char *rep){ - exp->calcSize(rep); - ctx->calcSize(rep); - size = exp->size + ctx->size + 1; -} - -void RuleOp::compile(Char *rep, Ins *i){ - ins = i; - exp->compile(rep, &i[0]); - i += exp->size; - ctx->compile(rep, &i[0]); - i += ctx->size; - i->i.tag = TERM; - i->i.link = this; -} - -void RuleOp::split(CharSet &s){ - exp->split(s); - ctx->split(s); + r = doUnion(r, getRange(&s)); + return RegExp_new_MatchOp(r); +} + +RegExp * +RegExp_new_RuleOp(RegExp *e, RegExp *c, Token *t, uint a) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = RULEOP; + r->d.RuleOp.exp = e; + r->d.RuleOp.ctx = c; + r->d.RuleOp.ins = NULL; + r->d.RuleOp.accept = a; + r->d.RuleOp.code = t; + return r; } -extern void printSpan(ostream&, uint, uint); +extern void printSpan(FILE *, uint, uint); void optimize(Ins *i){ while(!isMarked(i)){ @@ -453,9 +537,13 @@ void optimize(Ins *i){ } } -void genCode(ostream& o, RegExp *re){ +void genCode(FILE *o, RegExp *re){ CharSet cs; uint j; + Char rep[nChars]; + Ins *ins, *eoi; + DFA *dfa; + memset(&cs, 0, sizeof(cs)); for(j = 0; j < nChars; ++j){ cs.rep[j] = &cs.ptn[0]; @@ -465,7 +553,7 @@ void genCode(ostream& o, RegExp *re){ *(cs.freeTail = &cs.ptn[nChars-1].nxt) = NULL; cs.ptn[0].card = nChars; cs.ptn[0].nxt = NULL; - re->split(cs); + RegExp_split(re, &cs); /* for(uint k = 0; k < nChars;){ for(j = k; ++k < nChars && cs.rep[k] == cs.rep[j];); @@ -473,18 +561,17 @@ void genCode(ostream& o, RegExp *re){ cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl; } */ - Char rep[nChars]; for(j = 0; j < nChars; ++j){ if(!cs.rep[j]->nxt) cs.rep[j]->nxt = &cs.ptn[j]; rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]); } - re->calcSize(rep); - Ins *ins = new Ins[re->size+1]; + RegExp_calcSize(re, rep); + ins = malloc(sizeof(Ins)*(re->size+1)); memset(ins, 0, (re->size+1)*sizeof(Ins)); - re->compile(rep, ins); - Ins *eoi = &ins[re->size]; + RegExp_compile(re, rep, ins); + eoi = &ins[re->size]; eoi->i.tag = GOTO; eoi->i.link = eoi; @@ -498,8 +585,8 @@ void genCode(ostream& o, RegExp *re){ } } - DFA *dfa = new DFA(ins, re->size, 0, 256, rep); - dfa->emit(o); - delete dfa; - delete [] ins; + dfa = DFA_new(ins, re->size, 0, 256, rep); + DFA_emit(dfa, o); + DFA_delete(dfa); + free(ins); } diff --git a/tools/re2c/code.c b/tools/re2c/code.c index 8aaf6a88..27a071fe 100644 --- a/tools/re2c/code.c +++ b/tools/re2c/code.c @@ -148,14 +148,14 @@ prt(cerr, b->go, b->on); cerr << endl; first = NULL; } -void genGoTo(ostream &o, State *to){ - o << "\tgoto yy" << to->label << ";\n"; +void genGoTo(FILE *o, State *to){ + fprintf(o, "\tgoto yy%u;\n", to->label); } -void genIf(ostream &o, char *cmp, uint v){ - o << "\tif(yych " << cmp << " '"; +void genIf(FILE *o, const char *cmp, uint v){ + fprintf(o, "\tif(yych %s '", cmp); prtCh(o, v); - o << "')"; + fputs("')", o); } void indent(ostream &o, uint i){ @@ -171,78 +171,82 @@ static void need(ostream &o, uint n){ o << "\tyych = *YYCURSOR;\n"; } -void Match::emit(ostream &o){ - if(state->link){ - o << "\t++YYCURSOR;\n"; - need(o, state->depth); - } else { - o << "\tyych = *++YYCURSOR;\n"; - } -} - -void Enter::emit(ostream &o){ - if(state->link){ - o << "\t++YYCURSOR;\n"; - o << "yy" << label << ":\n"; - need(o, state->depth); - } else { - o << "\tyych = *++YYCURSOR;\n"; - o << "yy" << label << ":\n"; - } -} - -void Save::emit(ostream &o){ - o << "\tyyaccept = " << selector << ";\n"; - if(state->link){ - o << "\tYYMARKER = ++YYCURSOR;\n"; - need(o, state->depth); - } else { - o << "\tyych = *(YYMARKER = ++YYCURSOR);\n"; - } -} - -Move::Move(State *s) : Action(s) { - ; -} - -void Move::emit(ostream &o){ - ; -} - -Accept::Accept(State *x, uint n, uint *s, State **r) - : Action(x), nRules(n), saves(s), rules(r){ - ; -} +void +Action_emit(Action *a, FILE *o) +{ + int first = 1; + uint i; + uint back; -void Accept::emit(ostream &o){ - bool first = true; - for(uint i = 0; i < nRules; ++i) - if(saves[i] != ~0u){ - if(first){ - first = false; - o << "\tYYCURSOR = YYMARKER;\n"; - o << "\tswitch(yyaccept){\n"; + switch (a->type) { + case MATCHACT: + if(a->state->link){ + fputs("\t++YYCURSOR;\n", o); + need(o, a->state->depth); + } else { + fputs("\tyych = *++YYCURSOR;\n", o); } - o << "\tcase " << saves[i] << ":"; - genGoTo(o, rules[i]); - } - if(!first) - o << "\t}\n"; -} - -Rule::Rule(State *s, RuleOp *r) : Action(s), rule(r) { - ; + break; + case ENTERACT: + if(a->state->link){ + fputs("\t++YYCURSOR;\n", o); + fprintf(o, "yy%u:\n", a->d.label); + need(o, a->state->depth); + } else { + fputs("\tyych = *++YYCURSOR;\n", o); + fprintf(o, "yy%u:\n", a->d.label); + } + break; + case SAVEMATCHACT: + fprintf(o, "\tyyaccept = %u;\n", selector); + if(a->state->link){ + fputs("\tYYMARKER = ++YYCURSOR;\n", o); + need(o, a->state->depth); + } else { + fputs("\tyych = *(YYMARKER = ++YYCURSOR);\n", o); + } + break; + case MOVEACT: + break; + case ACCEPTACT: + for(i = 0; i < a->d.Accept.nRules; ++i) + if(a->d.Accept.saves[i] != ~0u){ + if(first){ + first = 0; + fputs("\tYYCURSOR = YYMARKER;\n", o); + fputs("\tswitch(yyaccept){\n", o); + } + fprintf(o, "\tcase %u:", a->d.Accept.saves[i]); + genGoTo(o, a->d.Accept.rules[i]); + } + if(!first) + fputs("\t}\n", o); + break; + case RULEACT: + back = RegExp_fixedLength(a->d.rule->d.RuleOp.ctx); + if(back != ~0u && back > 0u) + fprintf(o, "\tYYCURSOR -= %u;", back); + fprintf(o, "\n#line %u\n\t", a->d.rule->d.RuleOp.code->line); + Str_out(o, a->d.rule->d.RuleOp.code->text); + fprintf(o, "\n"); + break; + } } -void Rule::emit(ostream &o){ - uint back = rule->ctx->fixedLength(); - if(back != ~0u && back > 0u) - o << "\tYYCURSOR -= " << back << ";"; - o << "\n#line " << rule->code->line - << "\n\t" << rule->code->text << "\n"; +Action * +Action_new_Accept(State *x, uint n, uint *s, State **r) +{ + Action *a = malloc(sizeof(Action)); + a->type = ACCEPTACT; + a->state = x; + a->d.Accept.nRules = n; + a->d.Accept.saves = s; + a->d.Accept.rules = r; + x->action = a; + return a; } -void doLinear(ostream &o, uint i, Span *s, uint n, State *next){ +void doLinear(FILE *o, uint i, Span *s, uint n, State *next){ for(;;){ State *bg = s[0].to; while(n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1){ @@ -269,8 +273,9 @@ void doLinear(ostream &o, uint i, Span *s, uint n, State *next){ } } -void Go::genLinear(ostream &o, State *next){ - doLinear(o, 0, span, nSpans, next); +void +Go_genLinear(Go *g, FILE *o, State *next){ + doLinear(o, 0, g->span, g->nSpans, next); } void genCases(ostream &o, uint lb, Span *s){ @@ -284,111 +289,118 @@ void genCases(ostream &o, uint lb, Span *s){ } } -void Go::genSwitch(ostream &o, State *next){ - if(nSpans <= 2){ +void +Go_genSwitch(Go *g, FILE *o, State *next){ + if(g->nSpans <= 2){ genLinear(o, next); } else { - State *def = span[nSpans-1].to; - Span **sP = new Span*[nSpans-1], **r, **s, **t; + State *def = g->span[g->nSpans-1].to; + Span **sP = malloc(sizeof(Span*)*(g->nSpans-1)), **r, **s, **t; + uint i; t = &sP[0]; - for(uint i = 0; i < nSpans; ++i) - if(span[i].to != def) - *(t++) = &span[i]; + for(i = 0; i < g->nSpans; ++i) + if(g->span[i].to != def) + *(t++) = &g->span[i]; - o << "\tswitch(yych){\n"; + fputs("\tswitch(yych){\n", o); while(t != &sP[0]){ + State *to; r = s = &sP[0]; - if(*s == &span[0]) + if(*s == &g->span[0]) genCases(o, 0, *s); else genCases(o, (*s)[-1].ub, *s); - State *to = (*s)->to; + to = (*s)->to; while(++s < t){ - if((*s)->to == to) + if((*s)->to == g->to) genCases(o, (*s)[-1].ub, *s); else *(r++) = *s; } - genGoTo(o, to); + genGoTo(o, g->to); t = r; } - o << "\tdefault:"; + fputs("\tdefault:", o); genGoTo(o, def); - o << "\t}\n"; + fputs("\t}\n", o); - delete [] sP; + free(sP); } } -void doBinary(ostream &o, uint i, Span *s, uint n, State *next){ +void doBinary(FILE *o, uint i, Span *s, uint n, State *next){ if(n <= 4){ doLinear(o, i, s, n, next); } else { uint h = n/2; - indent(o, i); genIf(o, "<=", s[h-1].ub - 1); o << "{\n"; + indent(o, i); genIf(o, "<=", s[h-1].ub - 1); fputs("{\n", o); doBinary(o, i+1, &s[0], h, next); - indent(o, i); o << "\t} else {\n"; + indent(o, i); fputs("\t} else {\n", o); doBinary(o, i+1, &s[h], n - h, next); - indent(o, i); o << "\t}\n"; + indent(o, i); fputs("\t}\n", o); } } -void Go::genBinary(ostream &o, State *next){ - doBinary(o, 0, span, nSpans, next); +void +Go_genBinary(Go *g, FILE *o, State *next){ + doBinary(o, 0, g->span, g->nSpans, next); } -void Go::genBase(ostream &o, State *next){ - if(nSpans == 0) +void +Go_genBase(Go *g, FILE *o, State *next){ + if(g->nSpans == 0) return; if(!sFlag){ - genSwitch(o, next); + Go_genSwitch(g, o, next); return; } - if(nSpans > 8){ - Span *bot = &span[0], *top = &span[nSpans-1]; + if(g->nSpans > 8){ + Span *bot = &g->span[0], *top = &g->span[g->nSpans-1]; uint util; if(bot[0].to == top[0].to){ - util = (top[-1].ub - bot[0].ub)/(nSpans - 2); + util = (top[-1].ub - bot[0].ub)/(g->nSpans - 2); } else { if(bot[0].ub > (top[0].ub - top[-1].ub)){ - util = (top[0].ub - bot[0].ub)/(nSpans - 1); + util = (top[0].ub - bot[0].ub)/(g->nSpans - 1); } else { - util = top[-1].ub/(nSpans - 1); + util = top[-1].ub/(g->nSpans - 1); } } if(util <= 2){ - genSwitch(o, next); + Go_genSwitch(g, o, next); return; } } if(nSpans > 5){ - genBinary(o, next); + Go_genBinary(g, o, next); } else { - genLinear(o, next); + Go_genLinear(g, o, next); } } -void Go::genGoto(ostream &o, State *next){ +void +Go_genGoto(Go *g, FILE *o, State *next){ + uint i; if(bFlag){ - for(uint i = 0; i < nSpans; ++i){ - State *to = span[i].to; + for(i = 0; i < g->nSpans; ++i){ + State *to = g->span[i].to; if(to && to->isBase){ BitMap *b = BitMap::find(to); - if(b && matches(b->go, b->on, this, to)){ + if(b && matches(b->go, b->on, g, to)){ Go go; - go.span = new Span[nSpans]; - go.unmap(this, to); + go.span = malloc(sizeof(Span)*g->nSpans); + go.unmap(g, to); o << "\tif(yybm[" << b->i << "+yych] & " << (uint) b->m << ")"; genGoTo(o, to); - go.genBase(o, next); - delete [] go.span; + Go_genBase(go, o, next); + free(go.span); return; } } } } - genBase(o, next); + Go_genBase(g, o, next); } void State::emit(ostream &o){ diff --git a/tools/re2c/dfa.c b/tools/re2c/dfa.c index c1ff0545..fc55318b 100644 --- a/tools/re2c/dfa.c +++ b/tools/re2c/dfa.c @@ -5,77 +5,91 @@ #include "substr.h" #include "dfa.h" -inline char octCh(uint c){ - return '0' + c%8; -} +#define octCh(c) ('0' + c%8) -void prtCh(ostream &o, uchar c){ +void prtCh(FILE *o, uchar c){ uchar oc = talx[c]; switch(oc){ - case '\'': o << "\\'"; break; - case '\n': o << "\\n"; break; - case '\t': o << "\\t"; break; - case '\v': o << "\\v"; break; - case '\b': o << "\\b"; break; - case '\r': o << "\\r"; break; - case '\f': o << "\\f"; break; - case '\a': o << "\\a"; break; - case '\\': o << "\\\\"; break; + case '\'': fputs("\\'", o); break; + case '\n': fputs("\\n", o); break; + case '\t': fputs("\\t", o); break; + case '\v': fputs("\\v", o); break; + case '\b': fputs("\\b", o); break; + case '\r': fputs("\\r", o); break; + case '\f': fputs("\\f", o); break; + case '\a': fputs("\\a", o); break; + case '\\': fputs("\\\\", o); break; default: if(isprint(oc)) - o << (char) oc; + fputc(oc, o); else - o << '\\' << octCh(c/64) << octCh(c/8) << octCh(c); + fprintf(o, "\\%c%c%c", octCh(c/64), octCh(c/8), octCh(c)); } } -void printSpan(ostream &o, uint lb, uint ub){ +void printSpan(FILE *o, uint lb, uint ub){ if(lb > ub) - o << "*"; - o << "["; + fputc('*', o); + fputc('[', o); if((ub - lb) == 1){ prtCh(o, lb); } else { prtCh(o, lb); - o << "-"; + fputc('-', o); prtCh(o, ub-1); } - o << "]"; + fputc(']', o); } -uint Span::show(ostream &o, uint lb){ - if(to){ - printSpan(o, lb, ub); - o << " " << to->label << "; "; +uint +Span_show(Span *s, FILE *o, uint lb) +{ + if(s->to){ + printSpan(o, lb, s->ub); + fprintf(o, " %u; ", s->to->label); } return ub; } -ostream& operator<<(ostream &o, const State &s){ - o << "state " << s.label; - if(s.rule) - o << " accepts " << s.rule->accept; - o << "\n"; - uint lb = 0; - for(uint i = 0; i < s.go.nSpans; ++i) - lb = s.go.span[i].show(o, lb); - return o; +void +State_out(FILE *o, const State *s){ + uint lb, i; + fprintf(o, "state %u", s->label); + if(s->rule) + fprintf(o, " accepts %u", s->rule->d.RuleOp.accept); + fputs("\n", o); + lb = 0; + for(i = 0; i < s->go.nSpans; ++i) + lb = s->go.span[i].show(o, lb); } -ostream& operator<<(ostream &o, const DFA &dfa){ - for(State *s = dfa.head; s; s = s->next) - o << s << "\n\n"; - return o; +void +DFA_out(FILE *o, const DFA *dfa){ + State *s; + for(s = dfa->head; s; s = s->next) { + State_out(o, s); + fputs("\n\n", o); + } } -State::State() : rule(NULL), link(NULL), kCount(0), kernel(NULL), action(NULL) { - go.nSpans = 0; - go.span = NULL; +State * +State_new(void) +{ + State *s = malloc(sizeof(State)); + s->rule = s->link = NULL; + s->kCount = 0; + s->kernel = s->action = NULL; + s->go.nSpans = 0; + s->go.span = NULL; + return s; } -State::~State(){ - delete [] kernel; - delete [] go.span; +void +State_delete(State *s) +{ + free(s->kernel); + free(s->go.span); + free(s); } static Ins **closure(Ins **cP, Ins *i){ diff --git a/tools/re2c/dfa.h b/tools/re2c/dfa.h index edd018c3..406bcce3 100644 --- a/tools/re2c/dfa.h +++ b/tools/re2c/dfa.h @@ -1,149 +1,154 @@ -#ifndef _dfa_h -#define _dfa_h +#ifndef re2c_dfa_h +#define re2c_dfa_h -#include +#include #include "re.h" -extern void prtCh(ostream&, uchar); -extern void printSpan(ostream&, uint, uint); - -class DFA; -class State; - -class Action { -public: - State *state; -public: - Action(State*); - virtual void emit(ostream&) = 0; -}; - -class Match: public Action { -public: - Match(State*); - void emit(ostream&); -}; - -class Enter: public Action { -public: - uint label; -public: - Enter(State*, uint); - void emit(ostream&); -}; - -class Save: public Match { -public: - uint selector; -public: - Save(State*, uint); - void emit(ostream&); -}; - -class Move: public Action { -public: - Move(State*); - void emit(ostream&); -}; - -class Accept: public Action { -public: - uint nRules; - uint *saves; - State **rules; -public: - Accept(State*, uint, uint*, State**); - void emit(ostream&); -}; - -class Rule: public Action { -public: - RuleOp *rule; -public: - Rule(State*, RuleOp*); - void emit(ostream&); -}; - -class Span { -public: +extern void prtCh(FILE *, uchar); +extern void printSpan(FILE *, uint, uint); + +struct DFA; +struct State; + +typedef enum { + MATCHACT = 1, + ENTERACT, + SAVEMATCHACT, + MOVEACT, + ACCEPTACT, + RULEACT +} ActionType; + +typedef struct Action { + struct State *state; + ActionType type; + union { + /* data for Enter */ + uint label; + /* data for SaveMatch */ + uint selector; + /* data for Accept */ + struct { + uint nRules; + uint *saves; + struct State **rules; + } Accept; + /* data for Rule */ + RegExp *rule; /* RuleOp */ + } d; +} Action; + +void Action_emit(Action*, FILE *); + +typedef struct Span { uint ub; - State *to; -public: - uint show(ostream&, uint); -}; + struct State *to; +} Span; + +uint Span_show(Span*, FILE *, uint); -class Go { -public: +typedef struct Go { uint nSpans; Span *span; -public: - void genGoto(ostream&, State*); - void genBase(ostream&, State*); - void genLinear(ostream&, State*); - void genBinary(ostream&, State*); - void genSwitch(ostream&, State*); - void compact(); - void unmap(Go*, State*); -}; - -class State { -public: +} Go; + +typedef struct State { uint label; - RuleOp *rule; - State *next; - State *link; - uint depth; // for finding SCCs + RegExp *rule; /* RuleOp */ + struct State *next; + struct State *link; + uint depth; /* for finding SCCs */ uint kCount; Ins **kernel; - bool isBase:1; + uint isBase:1; Go go; Action *action; -public: - State(); - ~State(); - void emit(ostream&); - friend ostream& operator<<(ostream&, const State&); - friend ostream& operator<<(ostream&, const State*); -}; - -class DFA { -public: +} State; + +void Go_genGoto(Go*, FILE *, State*); +void Go_genBase(Go*, FILE *, State*); +void Go_genLinear(Go*, FILE *, State*); +void Go_genBinary(Go*, FILE *, State*); +void Go_genSwitch(Go*, FILE *, State*); +void Go_compact(Go*); +void Go_unmap(Go*, Go*, State*); + +State *State_new(void); +void State_delete(State*); +void State_emit(State*, FILE *); +void State_out(FILE *, const State*); + +typedef struct DFA { uint lbChar; uint ubChar; uint nStates; State *head, **tail; State *toDo; -public: - DFA(Ins*, uint, uint, uint, Char*); - ~DFA(); - void addState(State**, State*); - State *findState(Ins**, uint); - void split(State*); - - void findSCCs(); - void emit(ostream&); - - friend ostream& operator<<(ostream&, const DFA&); - friend ostream& operator<<(ostream&, const DFA*); -}; - -inline Action::Action(State *s) : state(s) { - s->action = this; +} DFA; + +DFA *DFA_new(Ins*, uint, uint, uint, Char*); +void DFA_delete(DFA*); +void DFA_addState(DFA*, State**, State*); +State *DFA_findState(DFA*, Ins**, uint); +void DFA_split(DFA*, State*); + +void DFA_findSCCs(DFA*); +void DFA_emit(DFA*, FILE *); +void DFA_out(FILE *, const DFA*); + +static inline Action * +Action_new_Match(State *s) +{ + Action *a = malloc(sizeof(Action)); + a->type = MATCHACT; + a->state = s; + s->action = a; + return a; } -inline Match::Match(State *s) : Action(s) - { } - -inline Enter::Enter(State *s, uint l) : Action(s), label(l) - { } +static inline Action * +Action_new_Enter(State *s, uint l) +{ + Action *a = malloc(sizeof(Action)); + a->type = ENTERACT; + a->state = s; + a->d.label = l; + s->action = a; + return a; +} -inline Save::Save(State *s, uint i) : Match(s), selector(i) - { } +static inline Action * +Action_new_Save(State *s, uint i) +{ + Action *a = malloc(sizeof(Action)); + a->type = SAVEMATCHACT; + a->state = s; + a->d.selector = i; + s->action = a; + return a; +} -inline ostream& operator<<(ostream &o, const State *s) - { return o << *s; } +static inline Action * +Action_new_Move(State *s) +{ + Action *a = malloc(sizeof(Action)); + a->type = MOVEACT; + a->state = s; + s->action = a; + return a; +} -inline ostream& operator<<(ostream &o, const DFA *dfa) - { return o << *dfa; } +Action *Action_new_Accept(State*, uint, uint*, State**); + +static inline Action * +Action_new_Rule(State *s, RegExp *r) /* RuleOp */ +{ + Action *a = malloc(sizeof(Action)); + a->type = RULEACT; + a->state = s; + a->d.rule = r; + s->action = a; + return a; +} #endif diff --git a/tools/re2c/globals.h b/tools/re2c/globals.h index 4c522995..e0c402c7 100644 --- a/tools/re2c/globals.h +++ b/tools/re2c/globals.h @@ -4,8 +4,8 @@ #include "basics.h" extern char *fileName; -extern bool sFlag; -extern bool bFlag; +extern int sFlag; +extern int bFlag; extern uchar asc2ebc[256]; extern uchar ebc2asc[256]; diff --git a/tools/re2c/ins.h b/tools/re2c/ins.h index 5d08cca2..30d2d178 100644 --- a/tools/re2c/ins.h +++ b/tools/re2c/ins.h @@ -1,19 +1,18 @@ -#ifndef _ins_h -#define _ins_h +#ifndef re2c_ins_h +#define re2c_ins_h -#include #include "basics.h" -const uint nChars = 256; +#define nChars 256 typedef uchar Char; -const uint CHAR = 0; -const uint GOTO = 1; -const uint FORK = 2; -const uint TERM = 3; -const uint CTXT = 4; +#define CHAR 0 +#define GOTO 1 +#define FORK 2 +#define TERM 3 +#define CTXT 4 -union Ins { +typedef union Ins { struct { byte tag; byte marked; @@ -24,18 +23,18 @@ union Ins { ushort bump; void *link; } c; -}; +} Ins; -inline bool isMarked(Ins *i){ +static inline int isMarked(Ins *i){ return i->i.marked != 0; } -inline void mark(Ins *i){ - i->i.marked = true; +static inline void mark(Ins *i){ + i->i.marked = 1; } -inline void unmark(Ins *i){ - i->i.marked = false; +static inline void unmark(Ins *i){ + i->i.marked = 0; } #endif diff --git a/tools/re2c/parse.h b/tools/re2c/parse.h index 56178a80..605ce357 100644 --- a/tools/re2c/parse.h +++ b/tools/re2c/parse.h @@ -1,20 +1,27 @@ -#ifndef _parser_h -#define _parser_h +#ifndef re2c_parser_h +#define re2c_parser_h #include "scanner.h" #include "re.h" -class Symbol { -public: - static Symbol *first; - Symbol *next; +typedef struct Symbol { + struct Symbol *next; Str name; RegExp *re; -public: - Symbol(const SubStr&); - static Symbol *find(const SubStr&); -}; +} Symbol; -void parse(int, ostream&); +void Symbol_init(Symbol *, const SubStr*); +static inline Symbol *Symbol_new(const SubStr*); +Symbol *Symbol_find(const SubStr*); + +void parse(int, FILE *); + +static inline Symbol * +Symbol_new(const SubStr *str) +{ + Symbol *r = malloc(sizeof(Symbol)); + Symbol_init(r, str); + return r; +} #endif diff --git a/tools/re2c/parser.h b/tools/re2c/parser.h index 56178a80..605ce357 100644 --- a/tools/re2c/parser.h +++ b/tools/re2c/parser.h @@ -1,20 +1,27 @@ -#ifndef _parser_h -#define _parser_h +#ifndef re2c_parser_h +#define re2c_parser_h #include "scanner.h" #include "re.h" -class Symbol { -public: - static Symbol *first; - Symbol *next; +typedef struct Symbol { + struct Symbol *next; Str name; RegExp *re; -public: - Symbol(const SubStr&); - static Symbol *find(const SubStr&); -}; +} Symbol; -void parse(int, ostream&); +void Symbol_init(Symbol *, const SubStr*); +static inline Symbol *Symbol_new(const SubStr*); +Symbol *Symbol_find(const SubStr*); + +void parse(int, FILE *); + +static inline Symbol * +Symbol_new(const SubStr *str) +{ + Symbol *r = malloc(sizeof(Symbol)); + Symbol_init(r, str); + return r; +} #endif diff --git a/tools/re2c/re.h b/tools/re2c/re.h index 2ea6e63b..c34cd3ec 100644 --- a/tools/re2c/re.h +++ b/tools/re2c/re.h @@ -1,178 +1,164 @@ -#ifndef _re_h -#define _re_h +#ifndef re2c_re_h +#define re2c_re_h -#include +#include #include "token.h" #include "ins.h" -struct CharPtn { +typedef struct CharPtn { uint card; - CharPtn *fix; - CharPtn *nxt; -}; + struct CharPtn *fix; + struct CharPtn *nxt; +} CharPtn; -struct CharSet { +typedef struct CharSet { CharPtn *fix; CharPtn *freeHead, **freeTail; CharPtn *rep[nChars]; CharPtn ptn[nChars]; -}; - -class Range { -public: - Range *next; - uint lb, ub; // [lb,ub) -public: - Range(uint l, uint u) : next(NULL), lb(l), ub(u) - { } - Range(Range &r) : next(NULL), lb(r.lb), ub(r.ub) - { } - friend ostream& operator<<(ostream&, const Range&); - friend ostream& operator<<(ostream&, const Range*); -}; - -inline ostream& operator<<(ostream &o, const Range *r){ - return r? o << *r : o; +} CharSet; + +typedef struct Range { + struct Range *next; + uint lb, ub; /* [lb,ub) */ +} Range; + +static inline void +Range_init(Range *r, uint l, uint u) +{ + r->next = NULL; + r->lb = l; + r->ub = u; +} + +static inline Range * +Range_new(uint l, uint u) +{ + Range *r = malloc(sizeof(Range)); + r->next = NULL; + r->lb = l; + r->ub = u; + return r; +} + +static inline void +Range_copy(Range *ro, const Range *r) +{ + ro->next = NULL; + ro->lb = r->lb; + ro->ub = r->ub; } -class RegExp { -public: +static inline Range * +Range_new_copy(Range *r) +{ + Range *ro = malloc(sizeof(Range)); + ro->next = NULL; + ro->lb = r->lb; + ro->ub = r->ub; + return ro; +} + +void Range_out(FILE *, const Range *); + +typedef enum { + NULLOP = 1, + MATCHOP, + RULEOP, + ALTOP, + CATOP, + CLOSEOP +} RegExpType; + +typedef struct RegExp { + RegExpType type; uint size; -public: - virtual char *typeOf() = 0; - RegExp *isA(char *t) - { return typeOf() == t? this : NULL; } - virtual void split(CharSet&) = 0; - virtual void calcSize(Char*) = 0; - virtual uint fixedLength(); - virtual void compile(Char*, Ins*) = 0; - virtual void display(ostream&) const = 0; - friend ostream& operator<<(ostream&, const RegExp&); - friend ostream& operator<<(ostream&, const RegExp*); -}; - -inline ostream& operator<<(ostream &o, const RegExp &re){ - re.display(o); - return o; + union { + /* for MatchOp */ + Range *match; + /* for RuleOp */ + struct { + struct RegExp *exp; + struct RegExp *ctx; + Ins *ins; + uint accept; + Token *code; + uint line; + } RuleOp; + /* for AltOp and CatOp*/ + struct { + struct RegExp *exp1, *exp2; + } AltCatOp; + /* for CloseOp */ + struct RegExp *exp; + } d; +} RegExp; + +static inline RegExp * +RegExp_isA(RegExp *r, RegExpType t) +{ + return r->type == t ? r : NULL; } -inline ostream& operator<<(ostream &o, const RegExp *re){ - return o << *re; +void RegExp_split(RegExp*, CharSet*); +void RegExp_calcSize(RegExp*, Char*); +uint RegExp_fixedLength(RegExp*); +void RegExp_compile(RegExp*, Char*, Ins*); +void RegExp_display(RegExp*, FILE *); + +static inline RegExp * +RegExp_new_NullOp(void) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = NULLOP; + return r; } -class NullOp: public RegExp { -public: - static char *type; -public: - char *typeOf() - { return type; } - void split(CharSet&); - void calcSize(Char*); - uint fixedLength(); - void compile(Char*, Ins*); - void display(ostream &o) const { - o << "_"; - } -}; - -class MatchOp: public RegExp { -public: - static char *type; - Range *match; -public: - MatchOp(Range *m) : match(m) - { } - char *typeOf() - { return type; } - void split(CharSet&); - void calcSize(Char*); - uint fixedLength(); - void compile(Char*, Ins*); - void display(ostream&) const; -}; - -class RuleOp: public RegExp { -private: - RegExp *exp; -public: - RegExp *ctx; - static char *type; - Ins *ins; - uint accept; - Token *code; - uint line; -public: - RuleOp(RegExp*, RegExp*, Token*, uint); - char *typeOf() - { return type; } - void split(CharSet&); - void calcSize(Char*); - void compile(Char*, Ins*); - void display(ostream &o) const { - o << exp << "/" << ctx << ";"; - } -}; - -class AltOp: public RegExp { -private: - RegExp *exp1, *exp2; -public: - static char *type; -public: - AltOp(RegExp *e1, RegExp *e2) - { exp1 = e1; exp2 = e2; } - char *typeOf() - { return type; } - void split(CharSet&); - void calcSize(Char*); - uint fixedLength(); - void compile(Char*, Ins*); - void display(ostream &o) const { - o << exp1 << "|" << exp2; - } - friend RegExp *mkAlt(RegExp*, RegExp*); -}; - -class CatOp: public RegExp { -private: - RegExp *exp1, *exp2; -public: - static char *type; -public: - CatOp(RegExp *e1, RegExp *e2) - { exp1 = e1; exp2 = e2; } - char *typeOf() - { return type; } - void split(CharSet&); - void calcSize(Char*); - uint fixedLength(); - void compile(Char*, Ins*); - void display(ostream &o) const { - o << exp1 << exp2; - } -}; - -class CloseOp: public RegExp { -private: - RegExp *exp; -public: - static char *type; -public: - CloseOp(RegExp *e) - { exp = e; } - char *typeOf() - { return type; } - void split(CharSet&); - void calcSize(Char*); - void compile(Char*, Ins*); - void display(ostream &o) const { - o << exp << "+"; - } -}; - -extern void genCode(ostream&, RegExp*); +static inline RegExp * +RegExp_new_MatchOp(Range *m) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = MATCHOP; + r->d.match = m; + return r; +} + +RegExp *RegExp_new_RuleOp(RegExp*, RegExp*, Token*, uint); + +static inline RegExp * +RegExp_new_AltOp(RegExp *e1, RegExp *e2) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = ALTOP; + r->d.AltCatOp.exp1 = e1; + r->d.AltCatOp.exp2 = e2; + return r; +} + +static inline RegExp * +RegExp_new_CatOp(RegExp *e1, RegExp *e2) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = CATOP; + r->d.AltCatOp.exp1 = e1; + r->d.AltCatOp.exp2 = e2; + return r; +} + +static inline RegExp * +RegExp_new_CloseOp(RegExp *e) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = CLOSEOP; + r->d.exp = e; + return r; +} + +extern void genCode(FILE *, RegExp*); extern RegExp *mkDiff(RegExp*, RegExp*); extern RegExp *strToRE(SubStr); extern RegExp *ranToRE(SubStr); +extern RegExp *mkAlt(RegExp*, RegExp*); + #endif diff --git a/tools/re2c/scanner.h b/tools/re2c/scanner.h index cf5bb1f2..eddb92b2 100644 --- a/tools/re2c/scanner.h +++ b/tools/re2c/scanner.h @@ -1,30 +1,44 @@ #ifndef _scanner_h #define _scanner_h +#include #include "token.h" -class Scanner { - private: - int in; +typedef struct Scanner { + FILE *in; uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; uint tchar, tline, cline; - private: - uchar *fill(uchar*); - public: - Scanner(int); - int echo(ostream&); - int scan(); - void fatal(char*); - SubStr token(); - uint line(); -}; - -inline SubStr Scanner::token(){ - return SubStr(tok, cur - tok); +} Scanner; + +void Scanner_init(Scanner*, FILE *); +static inline Scanner *Scanner_new(FILE *); + +int Scanner_echo(Scanner*, FILE *); +int Scanner_scan(Scanner*); +void Scanner_fatal(Scanner*, char*); +SubStr Scanner_token(Scanner*); +static inline uint Scanner_line(Scanner*); + +inline SubStr +Scanner_token(Scanner *s) +{ + SubStr r; + SubStr_init_u(&r, s->tok, s->cur - s->tok); + return r; +} + +static inline uint +Scanner_line(Scanner *s) +{ + return s->cline; } -inline uint Scanner::line(){ - return cline; +static inline Scanner * +Scanner_new(FILE *i) +{ + Scanner *r = malloc(sizeof(Scanner)); + Scanner_init(r, i); + return r; } #endif diff --git a/tools/re2c/scanner.re b/tools/re2c/scanner.re index f7b48cbb..6e483518 100644 --- a/tools/re2c/scanner.re +++ b/tools/re2c/scanner.re @@ -1,7 +1,5 @@ #include #include -#include -#include #include "scanner.h" #include "parser.h" #include "y.tab.h" @@ -12,46 +10,53 @@ extern YYSTYPE yylval; #define YYCTYPE uchar #define YYCURSOR cursor -#define YYLIMIT lim -#define YYMARKER ptr -#define YYFILL(n) {cursor = fill(cursor);} - -#define RETURN(i) {cur = cursor; return i;} - - -Scanner::Scanner(int i) : in(i), - bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL), - top(NULL), eof(NULL), tchar(0), tline(0), cline(1) { - ; +#define YYLIMIT s->lim +#define YYMARKER s->ptr +#define YYFILL(n) {cursor = fill(s, cursor);} + +#define RETURN(i) {s->cur = cursor; return i;} + +static uchar *fill(Scanner*, uchar*); + +void +Scanner_init(Scanner *s, FILE *i) +{ + s->in = i; + s->bot = s->tok = s->ptr = s->cur = s->pos = s->lim = s->top = + s->eof = NULL; + s->tchar = s->tline = 0; + s->cline = 1; } -uchar *Scanner::fill(uchar *cursor){ - if(!eof){ - uint cnt = tok - bot; +static uchar * +fill(Scanner *s, uchar *cursor) +{ + if(!s->eof){ + uint cnt = s->tok - s->bot; if(cnt){ - memcpy(bot, tok, lim - tok); - tok = bot; - ptr -= cnt; + memcpy(s->bot, s->tok, s->lim - s->tok); + s->tok = s->bot; + s->ptr -= cnt; cursor -= cnt; - pos -= cnt; - lim -= cnt; + s->pos -= cnt; + s->lim -= cnt; } - if((top - lim) < BSIZE){ - uchar *buf = new uchar[(lim - bot) + BSIZE]; - memcpy(buf, tok, lim - tok); - tok = buf; - ptr = &buf[ptr - bot]; - cursor = &buf[cursor - bot]; - pos = &buf[pos - bot]; - lim = &buf[lim - bot]; - top = &lim[BSIZE]; - delete [] bot; - bot = buf; + if((s->top - s->lim) < BSIZE){ + uchar *buf = malloc(sizeof(uchar)*((s->lim - s->bot) + BSIZE)); + memcpy(buf, s->tok, s->lim - s->tok); + s->tok = buf; + s->ptr = &buf[s->ptr - s->bot]; + cursor = &buf[cursor - s->bot]; + s->pos = &buf[s->pos - s->bot]; + s->lim = &buf[s->lim - s->bot]; + s->top = &s->lim[BSIZE]; + free(s->bot); + s->bot = buf; } - if((cnt = read(in, (char*) lim, BSIZE)) != BSIZE){ - eof = &lim[cnt]; *eof++ = '\n'; + if((cnt = fread(s->lim, sizeof(uchar), BSIZE, s->in)) != BSIZE){ + s->eof = &s->lim[cnt]; *s->eof++ = '\n'; } - lim += cnt; + s->lim += cnt; } return cursor; } @@ -67,31 +72,35 @@ letter = [a-zA-Z]; digit = [0-9]; */ -int Scanner::echo(ostream &out){ - uchar *cursor = cur; - tok = cursor; +int +Scanner_echo(Scanner *s, FILE *out) +{ + uchar *cursor = s->cur; + s->tok = cursor; echo: /*!re2c - "/*!re2c" { out.write(tok, &cursor[-7] - tok); - tok = cursor; + "/*!re2c" { fwrite(s->tok, 1, &cursor[-7] - s->tok, out); + s->tok = cursor; RETURN(1); } - "\n" { if(cursor == eof) RETURN(0); - out.write(tok, cursor - tok); - tok = pos = cursor; cline++; + "\n" { if(cursor == s->eof) RETURN(0); + fwrite(s->tok, 1, cursor - s->tok, out); + s->tok = s->pos = cursor; s->cline++; goto echo; } any { goto echo; } */ } -int Scanner::scan(){ - uchar *cursor = cur; +int +Scanner_scan(Scanner *s) +{ + uchar *cursor = s->cur; uint depth; scan: - tchar = cursor - pos; - tline = cline; - tok = cursor; + s->tchar = cursor - s->pos; + s->tline = s->cline; + s->tok = cursor; /*!re2c "{" { depth = 1; goto code; @@ -99,36 +108,37 @@ scan: "/*" { depth = 1; goto comment; } - "*/" { tok = cursor; + "*/" { s->tok = cursor; RETURN(0); } - dstring { cur = cursor; - yylval.regexp = strToRE(token()); + dstring { s->cur = cursor; + yylval.regexp = strToRE(Scanner_token(s)); return STRING; } - "\"" { fatal("bad string"); } + "\"" { Scanner_fatal(s, "bad string"); } - cstring { cur = cursor; - yylval.regexp = ranToRE(token()); + cstring { s->cur = cursor; + yylval.regexp = ranToRE(Scanner_token(s)); return RANGE; } - "[" { fatal("bad character constant"); } + "[" { Scanner_fatal(s, "bad character constant"); } - [()|=;/\\] { RETURN(*tok); } + [()|=;/\\] { RETURN(*s->tok); } - [*+?] { yylval.op = *tok; + [*+?] { yylval.op = *s->tok; RETURN(CLOSE); } - letter (letter|digit)* { cur = cursor; - yylval.symbol = Symbol::find(token()); + letter (letter|digit)* { SubStr substr = Scanner_token(s); + s->cur = cursor; + yylval.symbol = Symbol_find(&substr); return ID; } [ \t]+ { goto scan; } - "\n" { if(cursor == eof) RETURN(0); - pos = cursor; cline++; + "\n" { if(cursor == s->eof) RETURN(0); + s->pos = cursor; s->cline++; goto scan; } - any { cerr << "unexpected character: " << *tok << endl; + any { fprintf(stderr, "unexpected character: '%c'\n", *s->tok); goto scan; } */ @@ -136,15 +146,15 @@ scan: code: /*!re2c "}" { if(--depth == 0){ - cur = cursor; - yylval.token = new Token(token(), tline); + s->cur = cursor; + yylval.token = Token_new(Scanner_token(s), s->tline); return CODE; } goto code; } "{" { ++depth; goto code; } - "\n" { if(cursor == eof) fatal("missing '}'"); - pos = cursor; cline++; + "\n" { if(cursor == s->eof) Scanner_fatal(s, "missing '}'"); + s->pos = cursor; s->cline++; goto code; } dstring | sstring | any { goto code; } @@ -158,16 +168,17 @@ comment: goto comment; } "/*" { ++depth; goto comment; } - "\n" { if(cursor == eof) RETURN(0); - tok = pos = cursor; cline++; + "\n" { if(cursor == s->eof) RETURN(0); + s->tok = s->pos = cursor; s->cline++; goto comment; } any { goto comment; } */ } -void Scanner::fatal(char *msg){ - cerr << "line " << tline << ", column " << (tchar + 1) << ": " - << msg << endl; +void +Scanner_fatal(Scanner *s, char *msg) +{ + fprintf(stderr, "line %d, column %d: %s\n", s->tline, s->tchar + 1, msg); exit(1); } diff --git a/tools/re2c/token.h b/tools/re2c/token.h index de51eb48..9c1e5157 100644 --- a/tools/re2c/token.h +++ b/tools/re2c/token.h @@ -1,18 +1,30 @@ -#ifndef _token_h -#define _token_h +#ifndef re2c_token_h +#define re2c_token_h #include "substr.h" -class Token { - public: +typedef struct Token { Str text; uint line; - public: - Token(SubStr, uint); -}; +} Token; -inline Token::Token(SubStr t, uint l) : text(t), line(l) { - ; +static inline void Token_init(Token *, SubStr, uint); +static inline Token *Token_new(SubStr, uint); + +static inline void +Token_init(Token *r, SubStr t, uint l) +{ + Str_copy(&r->text, &t); + r->line = l; +} + +static inline Token * +Token_new(SubStr t, uint l) +{ + Token *r = malloc(sizeof(Token)); + Str_copy(&r->text, &t); + r->line = l; + return r; } #endif