Continuing C conversion (not everything is compilable yet).

svn path=/trunk/yasm/; revision=581
0.3
Peter Johnson 23 years ago
parent 05224a9fd9
commit 88d5a1e6c4
  1. 575
      tools/re2c/actions.c
  2. 240
      tools/re2c/code.c
  3. 100
      tools/re2c/dfa.c
  4. 255
      tools/re2c/dfa.h
  5. 4
      tools/re2c/globals.h
  6. 31
      tools/re2c/ins.h
  7. 29
      tools/re2c/parse.h
  8. 29
      tools/re2c/parser.h
  9. 298
      tools/re2c/re.h
  10. 50
      tools/re2c/scanner.h
  11. 153
      tools/re2c/scanner.re
  12. 30
      tools/re2c/token.h

@ -1,25 +1,33 @@
#include <time.h>
#include <string.h>
#include <iostream.h>
#include <iomanip.h>
#include <stdio.h>
#include "globals.h"
#include "parser.h"
#include "dfa.h"
Symbol *Symbol::first = NULL;
static Symbol *first = NULL;
Symbol::Symbol(const SubStr &str) : next(first), name(str), re(NULL) {
first = this;
void
Symbol_init(Symbol *r, const SubStr *str)
{
r->next = first;
Str_init(&r->name, str);
r->re = NULL;
first = r;
}
Symbol *Symbol::find(const SubStr &str){
for(Symbol *sym = first; sym; sym = sym->next)
if(sym->name == str) return sym;
return new Symbol(str);
Symbol *
Symbol_find(const SubStr *str)
{
Symbol *sym;
for(sym = first; sym; sym = sym->next)
if(SubStr_eq(&sym->name, str)) return sym;
return Symbol_new(str);
}
void showIns(ostream &o, const Ins &i, const Ins &base){
/*
void showIns(FILE *o, const Ins *i, const Ins *base){
o.width(3);
o << &i - &base << ": ";
switch(i.i.tag){
@ -43,36 +51,261 @@ void showIns(ostream &o, const Ins &i, const Ins &base){
}
o << "\n";
}
*/
static uint
AltOp_fixedLength(RegExp *r)
{
uint l1 = RegExp_fixedLength(r->d.AltCatOp.exp1);
/* XXX? Should be exp2? */
uint l2 = RegExp_fixedLength(r->d.AltCatOp.exp1);
if(l1 != l2 || l1 == ~0u)
return ~0;
return l1;
}
uint RegExp::fixedLength(){
static uint
CatOp_fixedLength(RegExp *r)
{
uint l1, l2;
if((l1 = RegExp_fixedLength(r->d.AltCatOp.exp1)) != ~0u )
if((l2 = RegExp_fixedLength(r->d.AltCatOp.exp2)) != ~0u)
return l1+l2;
return ~0;
}
char *NullOp::type = "NullOp";
uint
RegExp_fixedLength(RegExp *r)
{
switch (r->type) {
case NULLOP:
return 0;
case MATCHOP:
return 1;
case ALTOP:
return AltOp_fixedLength(r);
case CATOP:
return CatOp_fixedLength(r);
default:
return ~0;
}
return ~0;
}
void NullOp::calcSize(Char*){
size = 0;
void
RegExp_calcSize(RegExp *re, Char *rep)
{
Range *r;
uint c;
switch (re->type) {
case NULLOP:
re->size = 0;
break;
case MATCHOP:
re->size = 1;
for(r = re->d.match; r; r = r->next)
for(c = r->lb; c < r->ub; ++c)
if(rep[c] == c)
++re->size;
break;
case RULEOP:
RegExp_calcSize(re->d.RuleOp.exp, rep);
RegExp_calcSize(re->d.RuleOp.ctx, rep);
re->size = re->d.RuleOp.exp->size + re->d.RuleOp.ctx->size + 1;
break;
case ALTOP:
RegExp_calcSize(re->d.AltCatOp.exp1, rep);
RegExp_calcSize(re->d.AltCatOp.exp2, rep);
re->size = re->d.AltCatOp.exp1->size
+ re->d.AltCatOp.exp2->size + 2;
break;
case CATOP:
RegExp_calcSize(re->d.AltCatOp.exp1, rep);
RegExp_calcSize(re->d.AltCatOp.exp2, rep);
re->size = re->d.AltCatOp.exp1->size + re->d.AltCatOp.exp2->size;
break;
case CLOSEOP:
RegExp_calcSize(re->d.exp, rep);
re->size = re->d.exp->size + 1;
break;
}
}
uint NullOp::fixedLength(){
return 0;
static void
MatchOp_compile(RegExp *re, Char *rep, Ins *i)
{
Ins *j;
uint bump;
Range *r;
uint c;
i->i.tag = CHAR;
i->i.link = &i[re->size];
j = &i[1];
bump = re->size;
for(r = re->d.match; r; r = r->next){
for(c = r->lb; c < r->ub; ++c){
if(rep[c] == c){
j->c.value = c;
j->c.bump = --bump;
j++;
}
}
}
}
void NullOp::compile(Char*, Ins*){
;
static void
AltOp_compile(RegExp *re, Char *rep, Ins *i){
Ins *j;
i->i.tag = FORK;
j = &i[re->d.AltCatOp.exp1->size + 1];
i->i.link = &j[1];
RegExp_compile(re->d.AltCatOp.exp1, rep, &i[1]);
j->i.tag = GOTO;
j->i.link = &j[re->d.AltCatOp.exp2->size + 1];
RegExp_compile(re->d.AltCatOp.exp2, rep, &j[1]);
}
void
RegExp_compile(RegExp *re, Char *rep, Ins *i)
{
switch (re->type) {
case NULLOP:
break;
case MATCHOP:
MatchOp_compile(re, rep, i);
break;
case RULEOP:
re->d.RuleOp.ins = i;
RegExp_compile(re->d.RuleOp.exp, rep, &i[0]);
i += re->d.RuleOp.exp->size;
RegExp_compile(re->d.RuleOp.ctx, rep, &i[0]);
i += re->d.RuleOp.ctx->size;
i->i.tag = TERM;
i->i.link = re;
break;
case ALTOP:
AltOp_compile(re, rep, i);
break;
case CATOP:
RegExp_compile(re->d.AltCatOp.exp1, rep, &i[0]);
RegExp_compile(re->d.AltCatOp.exp2, rep,
&i[re->d.AltCatOp.exp1->size]);
break;
case CLOSEOP:
RegExp_compile(re->d.exp, rep, &i[0]);
i += re->d.exp->size;
i->i.tag = FORK;
i->i.link = i - re->d.exp->size;
break;
}
}
void NullOp::split(CharSet&){
;
static void
MatchOp_split(RegExp *re, CharSet *s)
{
Range *r;
uint c;
for(r = re->d.match; r; r = r->next){
for(c = r->lb; c < r->ub; ++c){
CharPtn *x = s->rep[c], *a = x->nxt;
if(!a){
if(x->card == 1)
continue;
x->nxt = a = s->freeHead;
if(!(s->freeHead = s->freeHead->nxt))
s->freeTail = &s->freeHead;
a->nxt = NULL;
x->fix = s->fix;
s->fix = x;
}
if(--(x->card) == 0){
*s->freeTail = x;
*(s->freeTail = &x->nxt) = NULL;
}
s->rep[c] = a;
++(a->card);
}
}
for(; s->fix; s->fix = s->fix->fix)
if(s->fix->card)
s->fix->nxt = NULL;
}
void
RegExp_split(RegExp *re, CharSet *s)
{
switch (re->type) {
case NULLOP:
break;
case MATCHOP:
MatchOp_split(re, s);
break;
case RULEOP:
RegExp_split(re->d.RuleOp.exp, s);
RegExp_split(re->d.RuleOp.ctx, s);
break;
case ALTOP:
/* FALLTHROUGH */
case CATOP:
RegExp_split(re->d.AltCatOp.exp1, s);
RegExp_split(re->d.AltCatOp.exp2, s);
break;
case CLOSEOP:
RegExp_split(re->d.exp, s);
break;
}
}
ostream& operator<<(ostream &o, const Range &r){
if((r.ub - r.lb) == 1){
prtCh(o, r.lb);
void
RegExp_display(RegExp *re, FILE *o)
{
switch (re->type) {
case NULLOP:
fputc('_', o);
break;
case MATCHOP:
Range_out(o, re->d.match);
break;
case RULEOP:
RegExp_display(re->d.RuleOp.exp, o);
fputc('/', o);
RegExp_display(re->d.RuleOp.ctx, o);
fputc(';', o);
break;
case ALTOP:
RegExp_display(re->d.AltCatOp.exp1, o);
fputc('|', o);
RegExp_display(re->d.AltCatOp.exp2, o);
break;
case CATOP:
RegExp_display(re->d.AltCatOp.exp1, o);
RegExp_display(re->d.AltCatOp.exp2, o);
break;
case CLOSEOP:
RegExp_display(re->d.exp, o);
fputc('+', o);
break;
}
}
void
Range_out(FILE *o, const Range *r)
{
if(!r)
return;
if((r->ub - r->lb) == 1){
prtCh(o, r->lb);
} else {
prtCh(o, r.lb); o << "-"; prtCh(o, r.ub-1);
prtCh(o, r->lb);
fputc('-', o);
prtCh(o, r->ub-1);
}
return o << r.next;
Range_out(o, r->next);
}
Range *doUnion(Range *r1, Range *r2){
@ -80,9 +313,9 @@ Range *doUnion(Range *r1, Range *r2){
for(;;){
Range *s;
if(r1->lb <= r2->lb){
s = new Range(*r1);
s = Range_new_copy(r1);
} else {
s = new Range(*r2);
s = Range_new_copy(r2);
}
*rP = s;
rP = &s->next;
@ -129,13 +362,13 @@ Range *doDiff(Range *r1, Range *r2){
for(; r2 && r2->ub <= r1->lb; r2 = r2->next);
for(; r2 && r2->lb < r1->ub; r2 = r2->next){
if(lb < r2->lb){
*rP = s = new Range(lb, r2->lb);
*rP = s = Range_new(lb, r2->lb);
rP = &s->next;
}
if((lb = r2->ub) >= r1->ub)
goto noMore;
}
*rP = s = new Range(lb, r1->ub);
*rP = s = Range_new(lb, r1->ub);
rP = &s->next;
noMore:;
}
@ -143,83 +376,23 @@ Range *doDiff(Range *r1, Range *r2){
return r;
}
MatchOp *merge(MatchOp *m1, MatchOp *m2){
RegExp *merge(RegExp *m1, RegExp *m2){
if(!m1)
return m2;
if(!m2)
return m1;
return new MatchOp(doUnion(m1->match, m2->match));
}
char *MatchOp::type = "MatchOp";
void MatchOp::display(ostream &o) const{
o << match;
}
void MatchOp::calcSize(Char *rep){
size = 1;
for(Range *r = match; r; r = r->next)
for(uint c = r->lb; c < r->ub; ++c)
if(rep[c] == c)
++size;
}
uint MatchOp::fixedLength(){
return 1;
}
void MatchOp::compile(Char *rep, Ins *i){
i->i.tag = CHAR;
i->i.link = &i[size];
Ins *j = &i[1];
uint bump = size;
for(Range *r = match; r; r = r->next){
for(uint c = r->lb; c < r->ub; ++c){
if(rep[c] == c){
j->c.value = c;
j->c.bump = --bump;
j++;
}
}
}
}
void MatchOp::split(CharSet &s){
for(Range *r = match; r; r = r->next){
for(uint c = r->lb; c < r->ub; ++c){
CharPtn *x = s.rep[c], *a = x->nxt;
if(!a){
if(x->card == 1)
continue;
x->nxt = a = s.freeHead;
if(!(s.freeHead = s.freeHead->nxt))
s.freeTail = &s.freeHead;
a->nxt = NULL;
x->fix = s.fix;
s.fix = x;
}
if(--(x->card) == 0){
*s.freeTail = x;
*(s.freeTail = &x->nxt) = NULL;
}
s.rep[c] = a;
++(a->card);
}
}
for(; s.fix; s.fix = s.fix->fix)
if(s.fix->card)
s.fix->nxt = NULL;
return RegExp_new_MatchOp(doUnion(m1->d.match, m2->d.match));
}
RegExp *mkDiff(RegExp *e1, RegExp *e2){
MatchOp *m1, *m2;
if(!(m1 = (MatchOp*) e1->isA(MatchOp::type)))
RegExp *m1, *m2;
Range *r;
if(!(m1 = RegExp_isA(e1, MATCHOP)))
return NULL;
if(!(m2 = (MatchOp*) e2->isA(MatchOp::type)))
if(!(m2 = RegExp_isA(e2, MATCHOP)))
return NULL;
Range *r = doDiff(m1->match, m2->match);
return r? (RegExp*) new MatchOp(r) : (RegExp*) new NullOp;
r = doDiff(m1->d.match, m2->d.match);
return r? RegExp_new_MatchOp(r) : RegExp_new_NullOp();
}
RegExp *doAlt(RegExp *e1, RegExp *e2){
@ -227,111 +400,35 @@ RegExp *doAlt(RegExp *e1, RegExp *e2){
return e2;
if(!e2)
return e1;
return new AltOp(e1, e2);
return RegExp_new_AltOp(e1, e2);
}
RegExp *mkAlt(RegExp *e1, RegExp *e2){
AltOp *a;
MatchOp *m1, *m2;
if((a = (AltOp*) e1->isA(AltOp::type))){
if((m1 = (MatchOp*) a->exp1->isA(MatchOp::type)))
e1 = a->exp2;
} else if((m1 = (MatchOp*) e1->isA(MatchOp::type))){
RegExp *a;
RegExp *m1, *m2;
if((a = RegExp_isA(e1, ALTOP))){
if((m1 = RegExp_isA(a->d.AltCatOp.exp1, MATCHOP)))
e1 = a->d.AltCatOp.exp2;
} else if((m1 = RegExp_isA(e1, MATCHOP))){
e1 = NULL;
}
if((a = (AltOp*) e2->isA(AltOp::type))){
if((m2 = (MatchOp*) a->exp1->isA(MatchOp::type)))
e2 = a->exp2;
} else if((m2 = (MatchOp*) e2->isA(MatchOp::type))){
if((a = RegExp_isA(e2, ALTOP))){
if((m2 = RegExp_isA(a->d.AltCatOp.exp1, MATCHOP)))
e2 = a->d.AltCatOp.exp2;
} else if((m2 = RegExp_isA(e2, MATCHOP))){
e2 = NULL;
}
return doAlt(merge(m1, m2), doAlt(e1, e2));
}
char *AltOp::type = "AltOp";
void AltOp::calcSize(Char *rep){
exp1->calcSize(rep);
exp2->calcSize(rep);
size = exp1->size + exp2->size + 2;
}
uint AltOp::fixedLength(){
uint l1 = exp1->fixedLength();
uint l2 = exp1->fixedLength();
if(l1 != l2 || l1 == ~0u)
return ~0;
return l1;
}
void AltOp::compile(Char *rep, Ins *i){
i->i.tag = FORK;
Ins *j = &i[exp1->size + 1];
i->i.link = &j[1];
exp1->compile(rep, &i[1]);
j->i.tag = GOTO;
j->i.link = &j[exp2->size + 1];
exp2->compile(rep, &j[1]);
}
void AltOp::split(CharSet &s){
exp1->split(s);
exp2->split(s);
}
char *CatOp::type = "CatOp";
void CatOp::calcSize(Char *rep){
exp1->calcSize(rep);
exp2->calcSize(rep);
size = exp1->size + exp2->size;
}
uint CatOp::fixedLength(){
uint l1, l2;
if((l1 = exp1->fixedLength()) != ~0u )
if((l2 = exp2->fixedLength()) != ~0u)
return l1+l2;
return ~0;
}
void CatOp::compile(Char *rep, Ins *i){
exp1->compile(rep, &i[0]);
exp2->compile(rep, &i[exp1->size]);
}
void CatOp::split(CharSet &s){
exp1->split(s);
exp2->split(s);
}
char *CloseOp::type = "CloseOp";
void CloseOp::calcSize(Char *rep){
exp->calcSize(rep);
size = exp->size + 1;
}
void CloseOp::compile(Char *rep, Ins *i){
exp->compile(rep, &i[0]);
i += exp->size;
i->i.tag = FORK;
i->i.link = i - exp->size;
}
void CloseOp::split(CharSet &s){
exp->split(s);
}
RegExp *expr(Scanner &);
uchar unescape(SubStr &s){
s.len--;
uchar unescape(SubStr *s){
uchar c;
if((c = *s.str++) != '\\' || s.len == 0)
uchar v;
s->len--;
if((c = *s->str++) != '\\' || s->len == 0)
return xlat[c];
s.len--;
switch(c = *s.str++){
s->len--;
switch(c = *s->str++){
case 'n':
return xlat['\n'];
case 't':
@ -348,8 +445,8 @@ uchar unescape(SubStr &s){
return xlat['\a'];
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7': {
uchar v = c - '0';
for(; s.len != 0 && '0' <= (c = *s.str) && c <= '7'; s.len--, s.str++)
v = c - '0';
for(; s->len != 0 && '0' <= (c = *s->str) && c <= '7'; s->len--, s->str++)
v = v*8 + (c - '0');
return v;
} default:
@ -357,74 +454,61 @@ uchar unescape(SubStr &s){
}
}
Range *getRange(SubStr &s){
Range *getRange(SubStr *s){
uchar lb = unescape(s), ub;
if(s.len < 2 || *s.str != '-'){
if(s->len < 2 || *s->str != '-'){
ub = lb;
} else {
s.len--; s.str++;
s->len--; s->str++;
ub = unescape(s);
if(ub < lb){
uchar tmp;
tmp = lb; lb = ub; ub = tmp;
}
}
return new Range(lb, ub+1);
return Range_new(lb, ub+1);
}
RegExp *matchChar(uint c){
return new MatchOp(new Range(c, c+1));
return RegExp_new_MatchOp(Range_new(c, c+1));
}
RegExp *strToRE(SubStr s){
RegExp *re;
s.len -= 2; s.str += 1;
if(s.len == 0)
return new NullOp;
RegExp *re = matchChar(unescape(s));
return RegExp_new_NullOp();
re = matchChar(unescape(&s));
while(s.len > 0)
re = new CatOp(re, matchChar(unescape(s)));
re = RegExp_new_CatOp(re, matchChar(unescape(&s)));
return re;
}
RegExp *ranToRE(SubStr s){
Range *r;
s.len -= 2; s.str += 1;
if(s.len == 0)
return new NullOp;
Range *r = getRange(s);
return RegExp_new_NullOp();
r = getRange(&s);
while(s.len > 0)
r = doUnion(r, getRange(s));
return new MatchOp(r);
}
char *RuleOp::type = "RuleOp";
RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, uint a)
: exp(e), ctx(c), ins(NULL), accept(a), code(t) {
;
}
void RuleOp::calcSize(Char *rep){
exp->calcSize(rep);
ctx->calcSize(rep);
size = exp->size + ctx->size + 1;
}
void RuleOp::compile(Char *rep, Ins *i){
ins = i;
exp->compile(rep, &i[0]);
i += exp->size;
ctx->compile(rep, &i[0]);
i += ctx->size;
i->i.tag = TERM;
i->i.link = this;
}
void RuleOp::split(CharSet &s){
exp->split(s);
ctx->split(s);
r = doUnion(r, getRange(&s));
return RegExp_new_MatchOp(r);
}
RegExp *
RegExp_new_RuleOp(RegExp *e, RegExp *c, Token *t, uint a)
{
RegExp *r = malloc(sizeof(RegExp));
r->type = RULEOP;
r->d.RuleOp.exp = e;
r->d.RuleOp.ctx = c;
r->d.RuleOp.ins = NULL;
r->d.RuleOp.accept = a;
r->d.RuleOp.code = t;
return r;
}
extern void printSpan(ostream&, uint, uint);
extern void printSpan(FILE *, uint, uint);
void optimize(Ins *i){
while(!isMarked(i)){
@ -453,9 +537,13 @@ void optimize(Ins *i){
}
}
void genCode(ostream& o, RegExp *re){
void genCode(FILE *o, RegExp *re){
CharSet cs;
uint j;
Char rep[nChars];
Ins *ins, *eoi;
DFA *dfa;
memset(&cs, 0, sizeof(cs));
for(j = 0; j < nChars; ++j){
cs.rep[j] = &cs.ptn[0];
@ -465,7 +553,7 @@ void genCode(ostream& o, RegExp *re){
*(cs.freeTail = &cs.ptn[nChars-1].nxt) = NULL;
cs.ptn[0].card = nChars;
cs.ptn[0].nxt = NULL;
re->split(cs);
RegExp_split(re, &cs);
/*
for(uint k = 0; k < nChars;){
for(j = k; ++k < nChars && cs.rep[k] == cs.rep[j];);
@ -473,18 +561,17 @@ void genCode(ostream& o, RegExp *re){
cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl;
}
*/
Char rep[nChars];
for(j = 0; j < nChars; ++j){
if(!cs.rep[j]->nxt)
cs.rep[j]->nxt = &cs.ptn[j];
rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]);
}
re->calcSize(rep);
Ins *ins = new Ins[re->size+1];
RegExp_calcSize(re, rep);
ins = malloc(sizeof(Ins)*(re->size+1));
memset(ins, 0, (re->size+1)*sizeof(Ins));
re->compile(rep, ins);
Ins *eoi = &ins[re->size];
RegExp_compile(re, rep, ins);
eoi = &ins[re->size];
eoi->i.tag = GOTO;
eoi->i.link = eoi;
@ -498,8 +585,8 @@ void genCode(ostream& o, RegExp *re){
}
}
DFA *dfa = new DFA(ins, re->size, 0, 256, rep);
dfa->emit(o);
delete dfa;
delete [] ins;
dfa = DFA_new(ins, re->size, 0, 256, rep);
DFA_emit(dfa, o);
DFA_delete(dfa);
free(ins);
}

@ -148,14 +148,14 @@ prt(cerr, b->go, b->on); cerr << endl;
first = NULL;
}
void genGoTo(ostream &o, State *to){
o << "\tgoto yy" << to->label << ";\n";
void genGoTo(FILE *o, State *to){
fprintf(o, "\tgoto yy%u;\n", to->label);
}
void genIf(ostream &o, char *cmp, uint v){
o << "\tif(yych " << cmp << " '";
void genIf(FILE *o, const char *cmp, uint v){
fprintf(o, "\tif(yych %s '", cmp);
prtCh(o, v);
o << "')";
fputs("')", o);
}
void indent(ostream &o, uint i){
@ -171,78 +171,82 @@ static void need(ostream &o, uint n){
o << "\tyych = *YYCURSOR;\n";
}
void Match::emit(ostream &o){
if(state->link){
o << "\t++YYCURSOR;\n";
need(o, state->depth);
} else {
o << "\tyych = *++YYCURSOR;\n";
}
}
void Enter::emit(ostream &o){
if(state->link){
o << "\t++YYCURSOR;\n";
o << "yy" << label << ":\n";
need(o, state->depth);
} else {
o << "\tyych = *++YYCURSOR;\n";
o << "yy" << label << ":\n";
}
}
void Save::emit(ostream &o){
o << "\tyyaccept = " << selector << ";\n";
if(state->link){
o << "\tYYMARKER = ++YYCURSOR;\n";
need(o, state->depth);
} else {
o << "\tyych = *(YYMARKER = ++YYCURSOR);\n";
}
}
Move::Move(State *s) : Action(s) {
;
}
void Move::emit(ostream &o){
;
}
Accept::Accept(State *x, uint n, uint *s, State **r)
: Action(x), nRules(n), saves(s), rules(r){
;
}
void
Action_emit(Action *a, FILE *o)
{
int first = 1;
uint i;
uint back;
void Accept::emit(ostream &o){
bool first = true;
for(uint i = 0; i < nRules; ++i)
if(saves[i] != ~0u){
if(first){
first = false;
o << "\tYYCURSOR = YYMARKER;\n";
o << "\tswitch(yyaccept){\n";
switch (a->type) {
case MATCHACT:
if(a->state->link){
fputs("\t++YYCURSOR;\n", o);
need(o, a->state->depth);
} else {
fputs("\tyych = *++YYCURSOR;\n", o);
}
o << "\tcase " << saves[i] << ":";
genGoTo(o, rules[i]);
}
if(!first)
o << "\t}\n";
}
Rule::Rule(State *s, RuleOp *r) : Action(s), rule(r) {
;
break;
case ENTERACT:
if(a->state->link){
fputs("\t++YYCURSOR;\n", o);
fprintf(o, "yy%u:\n", a->d.label);
need(o, a->state->depth);
} else {
fputs("\tyych = *++YYCURSOR;\n", o);
fprintf(o, "yy%u:\n", a->d.label);
}
break;
case SAVEMATCHACT:
fprintf(o, "\tyyaccept = %u;\n", selector);
if(a->state->link){
fputs("\tYYMARKER = ++YYCURSOR;\n", o);
need(o, a->state->depth);
} else {
fputs("\tyych = *(YYMARKER = ++YYCURSOR);\n", o);
}
break;
case MOVEACT:
break;
case ACCEPTACT:
for(i = 0; i < a->d.Accept.nRules; ++i)
if(a->d.Accept.saves[i] != ~0u){
if(first){
first = 0;
fputs("\tYYCURSOR = YYMARKER;\n", o);
fputs("\tswitch(yyaccept){\n", o);
}
fprintf(o, "\tcase %u:", a->d.Accept.saves[i]);
genGoTo(o, a->d.Accept.rules[i]);
}
if(!first)
fputs("\t}\n", o);
break;
case RULEACT:
back = RegExp_fixedLength(a->d.rule->d.RuleOp.ctx);
if(back != ~0u && back > 0u)
fprintf(o, "\tYYCURSOR -= %u;", back);
fprintf(o, "\n#line %u\n\t", a->d.rule->d.RuleOp.code->line);
Str_out(o, a->d.rule->d.RuleOp.code->text);
fprintf(o, "\n");
break;
}
}
void Rule::emit(ostream &o){
uint back = rule->ctx->fixedLength();
if(back != ~0u && back > 0u)
o << "\tYYCURSOR -= " << back << ";";
o << "\n#line " << rule->code->line
<< "\n\t" << rule->code->text << "\n";
Action *
Action_new_Accept(State *x, uint n, uint *s, State **r)
{
Action *a = malloc(sizeof(Action));
a->type = ACCEPTACT;
a->state = x;
a->d.Accept.nRules = n;
a->d.Accept.saves = s;
a->d.Accept.rules = r;
x->action = a;
return a;
}
void doLinear(ostream &o, uint i, Span *s, uint n, State *next){
void doLinear(FILE *o, uint i, Span *s, uint n, State *next){
for(;;){
State *bg = s[0].to;
while(n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1){
@ -269,8 +273,9 @@ void doLinear(ostream &o, uint i, Span *s, uint n, State *next){
}
}
void Go::genLinear(ostream &o, State *next){
doLinear(o, 0, span, nSpans, next);
void
Go_genLinear(Go *g, FILE *o, State *next){
doLinear(o, 0, g->span, g->nSpans, next);
}
void genCases(ostream &o, uint lb, Span *s){
@ -284,111 +289,118 @@ void genCases(ostream &o, uint lb, Span *s){
}
}
void Go::genSwitch(ostream &o, State *next){
if(nSpans <= 2){
void
Go_genSwitch(Go *g, FILE *o, State *next){
if(g->nSpans <= 2){
genLinear(o, next);
} else {
State *def = span[nSpans-1].to;
Span **sP = new Span*[nSpans-1], **r, **s, **t;
State *def = g->span[g->nSpans-1].to;
Span **sP = malloc(sizeof(Span*)*(g->nSpans-1)), **r, **s, **t;
uint i;
t = &sP[0];
for(uint i = 0; i < nSpans; ++i)
if(span[i].to != def)
*(t++) = &span[i];
for(i = 0; i < g->nSpans; ++i)
if(g->span[i].to != def)
*(t++) = &g->span[i];
o << "\tswitch(yych){\n";
fputs("\tswitch(yych){\n", o);
while(t != &sP[0]){
State *to;
r = s = &sP[0];
if(*s == &span[0])
if(*s == &g->span[0])
genCases(o, 0, *s);
else
genCases(o, (*s)[-1].ub, *s);
State *to = (*s)->to;
to = (*s)->to;
while(++s < t){
if((*s)->to == to)
if((*s)->to == g->to)
genCases(o, (*s)[-1].ub, *s);
else
*(r++) = *s;
}
genGoTo(o, to);
genGoTo(o, g->to);
t = r;
}
o << "\tdefault:";
fputs("\tdefault:", o);
genGoTo(o, def);
o << "\t}\n";
fputs("\t}\n", o);
delete [] sP;
free(sP);
}
}
void doBinary(ostream &o, uint i, Span *s, uint n, State *next){
void doBinary(FILE *o, uint i, Span *s, uint n, State *next){
if(n <= 4){
doLinear(o, i, s, n, next);
} else {
uint h = n/2;
indent(o, i); genIf(o, "<=", s[h-1].ub - 1); o << "{\n";
indent(o, i); genIf(o, "<=", s[h-1].ub - 1); fputs("{\n", o);
doBinary(o, i+1, &s[0], h, next);
indent(o, i); o << "\t} else {\n";
indent(o, i); fputs("\t} else {\n", o);
doBinary(o, i+1, &s[h], n - h, next);
indent(o, i); o << "\t}\n";
indent(o, i); fputs("\t}\n", o);
}
}
void Go::genBinary(ostream &o, State *next){
doBinary(o, 0, span, nSpans, next);
void
Go_genBinary(Go *g, FILE *o, State *next){
doBinary(o, 0, g->span, g->nSpans, next);
}
void Go::genBase(ostream &o, State *next){
if(nSpans == 0)
void
Go_genBase(Go *g, FILE *o, State *next){
if(g->nSpans == 0)
return;
if(!sFlag){
genSwitch(o, next);
Go_genSwitch(g, o, next);
return;
}
if(nSpans > 8){
Span *bot = &span[0], *top = &span[nSpans-1];
if(g->nSpans > 8){
Span *bot = &g->span[0], *top = &g->span[g->nSpans-1];
uint util;
if(bot[0].to == top[0].to){
util = (top[-1].ub - bot[0].ub)/(nSpans - 2);
util = (top[-1].ub - bot[0].ub)/(g->nSpans - 2);
} else {
if(bot[0].ub > (top[0].ub - top[-1].ub)){
util = (top[0].ub - bot[0].ub)/(nSpans - 1);
util = (top[0].ub - bot[0].ub)/(g->nSpans - 1);
} else {
util = top[-1].ub/(nSpans - 1);
util = top[-1].ub/(g->nSpans - 1);
}
}
if(util <= 2){
genSwitch(o, next);
Go_genSwitch(g, o, next);
return;
}
}
if(nSpans > 5){
genBinary(o, next);
Go_genBinary(g, o, next);
} else {
genLinear(o, next);
Go_genLinear(g, o, next);
}
}
void Go::genGoto(ostream &o, State *next){
void
Go_genGoto(Go *g, FILE *o, State *next){
uint i;
if(bFlag){
for(uint i = 0; i < nSpans; ++i){
State *to = span[i].to;
for(i = 0; i < g->nSpans; ++i){
State *to = g->span[i].to;
if(to && to->isBase){
BitMap *b = BitMap::find(to);
if(b && matches(b->go, b->on, this, to)){
if(b && matches(b->go, b->on, g, to)){
Go go;
go.span = new Span[nSpans];
go.unmap(this, to);
go.span = malloc(sizeof(Span)*g->nSpans);
go.unmap(g, to);
o << "\tif(yybm[" << b->i << "+yych] & " << (uint) b->m << ")";
genGoTo(o, to);
go.genBase(o, next);
delete [] go.span;
Go_genBase(go, o, next);
free(go.span);
return;
}
}
}
}
genBase(o, next);
Go_genBase(g, o, next);
}
void State::emit(ostream &o){

@ -5,77 +5,91 @@
#include "substr.h"
#include "dfa.h"
inline char octCh(uint c){
return '0' + c%8;
}
#define octCh(c) ('0' + c%8)
void prtCh(ostream &o, uchar c){
void prtCh(FILE *o, uchar c){
uchar oc = talx[c];
switch(oc){
case '\'': o << "\\'"; break;
case '\n': o << "\\n"; break;
case '\t': o << "\\t"; break;
case '\v': o << "\\v"; break;
case '\b': o << "\\b"; break;
case '\r': o << "\\r"; break;
case '\f': o << "\\f"; break;
case '\a': o << "\\a"; break;
case '\\': o << "\\\\"; break;
case '\'': fputs("\\'", o); break;
case '\n': fputs("\\n", o); break;
case '\t': fputs("\\t", o); break;
case '\v': fputs("\\v", o); break;
case '\b': fputs("\\b", o); break;
case '\r': fputs("\\r", o); break;
case '\f': fputs("\\f", o); break;
case '\a': fputs("\\a", o); break;
case '\\': fputs("\\\\", o); break;
default:
if(isprint(oc))
o << (char) oc;
fputc(oc, o);
else
o << '\\' << octCh(c/64) << octCh(c/8) << octCh(c);
fprintf(o, "\\%c%c%c", octCh(c/64), octCh(c/8), octCh(c));
}
}
void printSpan(ostream &o, uint lb, uint ub){
void printSpan(FILE *o, uint lb, uint ub){
if(lb > ub)
o << "*";
o << "[";
fputc('*', o);
fputc('[', o);
if((ub - lb) == 1){
prtCh(o, lb);
} else {
prtCh(o, lb);
o << "-";
fputc('-', o);
prtCh(o, ub-1);
}
o << "]";
fputc(']', o);
}
uint Span::show(ostream &o, uint lb){
if(to){
printSpan(o, lb, ub);
o << " " << to->label << "; ";
uint
Span_show(Span *s, FILE *o, uint lb)
{
if(s->to){
printSpan(o, lb, s->ub);
fprintf(o, " %u; ", s->to->label);
}
return ub;
}
ostream& operator<<(ostream &o, const State &s){
o << "state " << s.label;
if(s.rule)
o << " accepts " << s.rule->accept;
o << "\n";
uint lb = 0;
for(uint i = 0; i < s.go.nSpans; ++i)
lb = s.go.span[i].show(o, lb);
return o;
void
State_out(FILE *o, const State *s){
uint lb, i;
fprintf(o, "state %u", s->label);
if(s->rule)
fprintf(o, " accepts %u", s->rule->d.RuleOp.accept);
fputs("\n", o);
lb = 0;
for(i = 0; i < s->go.nSpans; ++i)
lb = s->go.span[i].show(o, lb);
}
ostream& operator<<(ostream &o, const DFA &dfa){
for(State *s = dfa.head; s; s = s->next)
o << s << "\n\n";
return o;
void
DFA_out(FILE *o, const DFA *dfa){
State *s;
for(s = dfa->head; s; s = s->next) {
State_out(o, s);
fputs("\n\n", o);
}
}
State::State() : rule(NULL), link(NULL), kCount(0), kernel(NULL), action(NULL) {
go.nSpans = 0;
go.span = NULL;
State *
State_new(void)
{
State *s = malloc(sizeof(State));
s->rule = s->link = NULL;
s->kCount = 0;
s->kernel = s->action = NULL;
s->go.nSpans = 0;
s->go.span = NULL;
return s;
}
State::~State(){
delete [] kernel;
delete [] go.span;
void
State_delete(State *s)
{
free(s->kernel);
free(s->go.span);
free(s);
}
static Ins **closure(Ins **cP, Ins *i){

@ -1,149 +1,154 @@
#ifndef _dfa_h
#define _dfa_h
#ifndef re2c_dfa_h
#define re2c_dfa_h
#include <iostream.h>
#include <stdio.h>
#include "re.h"
extern void prtCh(ostream&, uchar);
extern void printSpan(ostream&, uint, uint);
class DFA;
class State;
class Action {
public:
State *state;
public:
Action(State*);
virtual void emit(ostream&) = 0;
};
class Match: public Action {
public:
Match(State*);
void emit(ostream&);
};
class Enter: public Action {
public:
uint label;
public:
Enter(State*, uint);
void emit(ostream&);
};
class Save: public Match {
public:
uint selector;
public:
Save(State*, uint);
void emit(ostream&);
};
class Move: public Action {
public:
Move(State*);
void emit(ostream&);
};
class Accept: public Action {
public:
uint nRules;
uint *saves;
State **rules;
public:
Accept(State*, uint, uint*, State**);
void emit(ostream&);
};
class Rule: public Action {
public:
RuleOp *rule;
public:
Rule(State*, RuleOp*);
void emit(ostream&);
};
class Span {
public:
extern void prtCh(FILE *, uchar);
extern void printSpan(FILE *, uint, uint);
struct DFA;
struct State;
typedef enum {
MATCHACT = 1,
ENTERACT,
SAVEMATCHACT,
MOVEACT,
ACCEPTACT,
RULEACT
} ActionType;
typedef struct Action {
struct State *state;
ActionType type;
union {
/* data for Enter */
uint label;
/* data for SaveMatch */
uint selector;
/* data for Accept */
struct {
uint nRules;
uint *saves;
struct State **rules;
} Accept;
/* data for Rule */
RegExp *rule; /* RuleOp */
} d;
} Action;
void Action_emit(Action*, FILE *);
typedef struct Span {
uint ub;
State *to;
public:
uint show(ostream&, uint);
};
struct State *to;
} Span;
uint Span_show(Span*, FILE *, uint);
class Go {
public:
typedef struct Go {
uint nSpans;
Span *span;
public:
void genGoto(ostream&, State*);
void genBase(ostream&, State*);
void genLinear(ostream&, State*);
void genBinary(ostream&, State*);
void genSwitch(ostream&, State*);
void compact();
void unmap(Go*, State*);
};
class State {
public:
} Go;
typedef struct State {
uint label;
RuleOp *rule;
State *next;
State *link;
uint depth; // for finding SCCs
RegExp *rule; /* RuleOp */
struct State *next;
struct State *link;
uint depth; /* for finding SCCs */
uint kCount;
Ins **kernel;
bool isBase:1;
uint isBase:1;
Go go;
Action *action;
public:
State();
~State();
void emit(ostream&);
friend ostream& operator<<(ostream&, const State&);
friend ostream& operator<<(ostream&, const State*);
};
class DFA {
public:
} State;
void Go_genGoto(Go*, FILE *, State*);
void Go_genBase(Go*, FILE *, State*);
void Go_genLinear(Go*, FILE *, State*);
void Go_genBinary(Go*, FILE *, State*);
void Go_genSwitch(Go*, FILE *, State*);
void Go_compact(Go*);
void Go_unmap(Go*, Go*, State*);
State *State_new(void);
void State_delete(State*);
void State_emit(State*, FILE *);
void State_out(FILE *, const State*);
typedef struct DFA {
uint lbChar;
uint ubChar;
uint nStates;
State *head, **tail;
State *toDo;
public:
DFA(Ins*, uint, uint, uint, Char*);
~DFA();
void addState(State**, State*);
State *findState(Ins**, uint);
void split(State*);
void findSCCs();
void emit(ostream&);
friend ostream& operator<<(ostream&, const DFA&);
friend ostream& operator<<(ostream&, const DFA*);
};
inline Action::Action(State *s) : state(s) {
s->action = this;
} DFA;
DFA *DFA_new(Ins*, uint, uint, uint, Char*);
void DFA_delete(DFA*);
void DFA_addState(DFA*, State**, State*);
State *DFA_findState(DFA*, Ins**, uint);
void DFA_split(DFA*, State*);
void DFA_findSCCs(DFA*);
void DFA_emit(DFA*, FILE *);
void DFA_out(FILE *, const DFA*);
static inline Action *
Action_new_Match(State *s)
{
Action *a = malloc(sizeof(Action));
a->type = MATCHACT;
a->state = s;
s->action = a;
return a;
}
inline Match::Match(State *s) : Action(s)
{ }
inline Enter::Enter(State *s, uint l) : Action(s), label(l)
{ }
static inline Action *
Action_new_Enter(State *s, uint l)
{
Action *a = malloc(sizeof(Action));
a->type = ENTERACT;
a->state = s;
a->d.label = l;
s->action = a;
return a;
}
inline Save::Save(State *s, uint i) : Match(s), selector(i)
{ }
static inline Action *
Action_new_Save(State *s, uint i)
{
Action *a = malloc(sizeof(Action));
a->type = SAVEMATCHACT;
a->state = s;
a->d.selector = i;
s->action = a;
return a;
}
inline ostream& operator<<(ostream &o, const State *s)
{ return o << *s; }
static inline Action *
Action_new_Move(State *s)
{
Action *a = malloc(sizeof(Action));
a->type = MOVEACT;
a->state = s;
s->action = a;
return a;
}
inline ostream& operator<<(ostream &o, const DFA *dfa)
{ return o << *dfa; }
Action *Action_new_Accept(State*, uint, uint*, State**);
static inline Action *
Action_new_Rule(State *s, RegExp *r) /* RuleOp */
{
Action *a = malloc(sizeof(Action));
a->type = RULEACT;
a->state = s;
a->d.rule = r;
s->action = a;
return a;
}
#endif

@ -4,8 +4,8 @@
#include "basics.h"
extern char *fileName;
extern bool sFlag;
extern bool bFlag;
extern int sFlag;
extern int bFlag;
extern uchar asc2ebc[256];
extern uchar ebc2asc[256];

@ -1,19 +1,18 @@
#ifndef _ins_h
#define _ins_h
#ifndef re2c_ins_h
#define re2c_ins_h
#include <iostream.h>
#include "basics.h"
const uint nChars = 256;
#define nChars 256
typedef uchar Char;
const uint CHAR = 0;
const uint GOTO = 1;
const uint FORK = 2;
const uint TERM = 3;
const uint CTXT = 4;
#define CHAR 0
#define GOTO 1
#define FORK 2
#define TERM 3
#define CTXT 4
union Ins {
typedef union Ins {
struct {
byte tag;
byte marked;
@ -24,18 +23,18 @@ union Ins {
ushort bump;
void *link;
} c;
};
} Ins;
inline bool isMarked(Ins *i){
static inline int isMarked(Ins *i){
return i->i.marked != 0;
}
inline void mark(Ins *i){
i->i.marked = true;
static inline void mark(Ins *i){
i->i.marked = 1;
}
inline void unmark(Ins *i){
i->i.marked = false;
static inline void unmark(Ins *i){
i->i.marked = 0;
}
#endif

@ -1,20 +1,27 @@
#ifndef _parser_h
#define _parser_h
#ifndef re2c_parser_h
#define re2c_parser_h
#include "scanner.h"
#include "re.h"
class Symbol {
public:
static Symbol *first;
Symbol *next;
typedef struct Symbol {
struct Symbol *next;
Str name;
RegExp *re;
public:
Symbol(const SubStr&);
static Symbol *find(const SubStr&);
};
} Symbol;
void parse(int, ostream&);
void Symbol_init(Symbol *, const SubStr*);
static inline Symbol *Symbol_new(const SubStr*);
Symbol *Symbol_find(const SubStr*);
void parse(int, FILE *);
static inline Symbol *
Symbol_new(const SubStr *str)
{
Symbol *r = malloc(sizeof(Symbol));
Symbol_init(r, str);
return r;
}
#endif

@ -1,20 +1,27 @@
#ifndef _parser_h
#define _parser_h
#ifndef re2c_parser_h
#define re2c_parser_h
#include "scanner.h"
#include "re.h"
class Symbol {
public:
static Symbol *first;
Symbol *next;
typedef struct Symbol {
struct Symbol *next;
Str name;
RegExp *re;
public:
Symbol(const SubStr&);
static Symbol *find(const SubStr&);
};
} Symbol;
void parse(int, ostream&);
void Symbol_init(Symbol *, const SubStr*);
static inline Symbol *Symbol_new(const SubStr*);
Symbol *Symbol_find(const SubStr*);
void parse(int, FILE *);
static inline Symbol *
Symbol_new(const SubStr *str)
{
Symbol *r = malloc(sizeof(Symbol));
Symbol_init(r, str);
return r;
}
#endif

@ -1,178 +1,164 @@
#ifndef _re_h
#define _re_h
#ifndef re2c_re_h
#define re2c_re_h
#include <iostream.h>
#include <stdio.h>
#include "token.h"
#include "ins.h"
struct CharPtn {
typedef struct CharPtn {
uint card;
CharPtn *fix;
CharPtn *nxt;
};
struct CharPtn *fix;
struct CharPtn *nxt;
} CharPtn;
struct CharSet {
typedef struct CharSet {
CharPtn *fix;
CharPtn *freeHead, **freeTail;
CharPtn *rep[nChars];
CharPtn ptn[nChars];
};
class Range {
public:
Range *next;
uint lb, ub; // [lb,ub)
public:
Range(uint l, uint u) : next(NULL), lb(l), ub(u)
{ }
Range(Range &r) : next(NULL), lb(r.lb), ub(r.ub)
{ }
friend ostream& operator<<(ostream&, const Range&);
friend ostream& operator<<(ostream&, const Range*);
};
inline ostream& operator<<(ostream &o, const Range *r){
return r? o << *r : o;
} CharSet;
typedef struct Range {
struct Range *next;
uint lb, ub; /* [lb,ub) */
} Range;
static inline void
Range_init(Range *r, uint l, uint u)
{
r->next = NULL;
r->lb = l;
r->ub = u;
}
static inline Range *
Range_new(uint l, uint u)
{
Range *r = malloc(sizeof(Range));
r->next = NULL;
r->lb = l;
r->ub = u;
return r;
}
static inline void
Range_copy(Range *ro, const Range *r)
{
ro->next = NULL;
ro->lb = r->lb;
ro->ub = r->ub;
}
class RegExp {
public:
static inline Range *
Range_new_copy(Range *r)
{
Range *ro = malloc(sizeof(Range));
ro->next = NULL;
ro->lb = r->lb;
ro->ub = r->ub;
return ro;
}
void Range_out(FILE *, const Range *);
typedef enum {
NULLOP = 1,
MATCHOP,
RULEOP,
ALTOP,
CATOP,
CLOSEOP
} RegExpType;
typedef struct RegExp {
RegExpType type;
uint size;
public:
virtual char *typeOf() = 0;
RegExp *isA(char *t)
{ return typeOf() == t? this : NULL; }
virtual void split(CharSet&) = 0;
virtual void calcSize(Char*) = 0;
virtual uint fixedLength();
virtual void compile(Char*, Ins*) = 0;
virtual void display(ostream&) const = 0;
friend ostream& operator<<(ostream&, const RegExp&);
friend ostream& operator<<(ostream&, const RegExp*);
};
inline ostream& operator<<(ostream &o, const RegExp &re){
re.display(o);
return o;
union {
/* for MatchOp */
Range *match;
/* for RuleOp */
struct {
struct RegExp *exp;
struct RegExp *ctx;
Ins *ins;
uint accept;
Token *code;
uint line;
} RuleOp;
/* for AltOp and CatOp*/
struct {
struct RegExp *exp1, *exp2;
} AltCatOp;
/* for CloseOp */
struct RegExp *exp;
} d;
} RegExp;
static inline RegExp *
RegExp_isA(RegExp *r, RegExpType t)
{
return r->type == t ? r : NULL;
}
inline ostream& operator<<(ostream &o, const RegExp *re){
return o << *re;
void RegExp_split(RegExp*, CharSet*);
void RegExp_calcSize(RegExp*, Char*);
uint RegExp_fixedLength(RegExp*);
void RegExp_compile(RegExp*, Char*, Ins*);
void RegExp_display(RegExp*, FILE *);
static inline RegExp *
RegExp_new_NullOp(void)
{
RegExp *r = malloc(sizeof(RegExp));
r->type = NULLOP;
return r;
}
class NullOp: public RegExp {
public:
static char *type;
public:
char *typeOf()
{ return type; }
void split(CharSet&);
void calcSize(Char*);
uint fixedLength();
void compile(Char*, Ins*);
void display(ostream &o) const {
o << "_";
}
};
class MatchOp: public RegExp {
public:
static char *type;
Range *match;
public:
MatchOp(Range *m) : match(m)
{ }
char *typeOf()
{ return type; }
void split(CharSet&);
void calcSize(Char*);
uint fixedLength();
void compile(Char*, Ins*);
void display(ostream&) const;
};
class RuleOp: public RegExp {
private:
RegExp *exp;
public:
RegExp *ctx;
static char *type;
Ins *ins;
uint accept;
Token *code;
uint line;
public:
RuleOp(RegExp*, RegExp*, Token*, uint);
char *typeOf()
{ return type; }
void split(CharSet&);
void calcSize(Char*);
void compile(Char*, Ins*);
void display(ostream &o) const {
o << exp << "/" << ctx << ";";
}
};
class AltOp: public RegExp {
private:
RegExp *exp1, *exp2;
public:
static char *type;
public:
AltOp(RegExp *e1, RegExp *e2)
{ exp1 = e1; exp2 = e2; }
char *typeOf()
{ return type; }
void split(CharSet&);
void calcSize(Char*);
uint fixedLength();
void compile(Char*, Ins*);
void display(ostream &o) const {
o << exp1 << "|" << exp2;
}
friend RegExp *mkAlt(RegExp*, RegExp*);
};
class CatOp: public RegExp {
private:
RegExp *exp1, *exp2;
public:
static char *type;
public:
CatOp(RegExp *e1, RegExp *e2)
{ exp1 = e1; exp2 = e2; }
char *typeOf()
{ return type; }
void split(CharSet&);
void calcSize(Char*);
uint fixedLength();
void compile(Char*, Ins*);
void display(ostream &o) const {
o << exp1 << exp2;
}
};
class CloseOp: public RegExp {
private:
RegExp *exp;
public:
static char *type;
public:
CloseOp(RegExp *e)
{ exp = e; }
char *typeOf()
{ return type; }
void split(CharSet&);
void calcSize(Char*);
void compile(Char*, Ins*);
void display(ostream &o) const {
o << exp << "+";
}
};
extern void genCode(ostream&, RegExp*);
static inline RegExp *
RegExp_new_MatchOp(Range *m)
{
RegExp *r = malloc(sizeof(RegExp));
r->type = MATCHOP;
r->d.match = m;
return r;
}
RegExp *RegExp_new_RuleOp(RegExp*, RegExp*, Token*, uint);
static inline RegExp *
RegExp_new_AltOp(RegExp *e1, RegExp *e2)
{
RegExp *r = malloc(sizeof(RegExp));
r->type = ALTOP;
r->d.AltCatOp.exp1 = e1;
r->d.AltCatOp.exp2 = e2;
return r;
}
static inline RegExp *
RegExp_new_CatOp(RegExp *e1, RegExp *e2)
{
RegExp *r = malloc(sizeof(RegExp));
r->type = CATOP;
r->d.AltCatOp.exp1 = e1;
r->d.AltCatOp.exp2 = e2;
return r;
}
static inline RegExp *
RegExp_new_CloseOp(RegExp *e)
{
RegExp *r = malloc(sizeof(RegExp));
r->type = CLOSEOP;
r->d.exp = e;
return r;
}
extern void genCode(FILE *, RegExp*);
extern RegExp *mkDiff(RegExp*, RegExp*);
extern RegExp *strToRE(SubStr);
extern RegExp *ranToRE(SubStr);
extern RegExp *mkAlt(RegExp*, RegExp*);
#endif

@ -1,30 +1,44 @@
#ifndef _scanner_h
#define _scanner_h
#include <stdio.h>
#include "token.h"
class Scanner {
private:
int in;
typedef struct Scanner {
FILE *in;
uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
uint tchar, tline, cline;
private:
uchar *fill(uchar*);
public:
Scanner(int);
int echo(ostream&);
int scan();
void fatal(char*);
SubStr token();
uint line();
};
inline SubStr Scanner::token(){
return SubStr(tok, cur - tok);
} Scanner;
void Scanner_init(Scanner*, FILE *);
static inline Scanner *Scanner_new(FILE *);
int Scanner_echo(Scanner*, FILE *);
int Scanner_scan(Scanner*);
void Scanner_fatal(Scanner*, char*);
SubStr Scanner_token(Scanner*);
static inline uint Scanner_line(Scanner*);
inline SubStr
Scanner_token(Scanner *s)
{
SubStr r;
SubStr_init_u(&r, s->tok, s->cur - s->tok);
return r;
}
static inline uint
Scanner_line(Scanner *s)
{
return s->cline;
}
inline uint Scanner::line(){
return cline;
static inline Scanner *
Scanner_new(FILE *i)
{
Scanner *r = malloc(sizeof(Scanner));
Scanner_init(r, i);
return r;
}
#endif

@ -1,7 +1,5 @@
#include <stdlib.h>
#include <string.h>
#include <iostream.h>
#include <unistd.h>
#include "scanner.h"
#include "parser.h"
#include "y.tab.h"
@ -12,46 +10,53 @@ extern YYSTYPE yylval;
#define YYCTYPE uchar
#define YYCURSOR cursor
#define YYLIMIT lim
#define YYMARKER ptr
#define YYFILL(n) {cursor = fill(cursor);}
#define RETURN(i) {cur = cursor; return i;}
Scanner::Scanner(int i) : in(i),
bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL),
top(NULL), eof(NULL), tchar(0), tline(0), cline(1) {
;
#define YYLIMIT s->lim
#define YYMARKER s->ptr
#define YYFILL(n) {cursor = fill(s, cursor);}
#define RETURN(i) {s->cur = cursor; return i;}
static uchar *fill(Scanner*, uchar*);
void
Scanner_init(Scanner *s, FILE *i)
{
s->in = i;
s->bot = s->tok = s->ptr = s->cur = s->pos = s->lim = s->top =
s->eof = NULL;
s->tchar = s->tline = 0;
s->cline = 1;
}
uchar *Scanner::fill(uchar *cursor){
if(!eof){
uint cnt = tok - bot;
static uchar *
fill(Scanner *s, uchar *cursor)
{
if(!s->eof){
uint cnt = s->tok - s->bot;
if(cnt){
memcpy(bot, tok, lim - tok);
tok = bot;
ptr -= cnt;
memcpy(s->bot, s->tok, s->lim - s->tok);
s->tok = s->bot;
s->ptr -= cnt;
cursor -= cnt;
pos -= cnt;
lim -= cnt;
s->pos -= cnt;
s->lim -= cnt;
}
if((top - lim) < BSIZE){
uchar *buf = new uchar[(lim - bot) + BSIZE];
memcpy(buf, tok, lim - tok);
tok = buf;
ptr = &buf[ptr - bot];
cursor = &buf[cursor - bot];
pos = &buf[pos - bot];
lim = &buf[lim - bot];
top = &lim[BSIZE];
delete [] bot;
bot = buf;
if((s->top - s->lim) < BSIZE){
uchar *buf = malloc(sizeof(uchar)*((s->lim - s->bot) + BSIZE));
memcpy(buf, s->tok, s->lim - s->tok);
s->tok = buf;
s->ptr = &buf[s->ptr - s->bot];
cursor = &buf[cursor - s->bot];
s->pos = &buf[s->pos - s->bot];
s->lim = &buf[s->lim - s->bot];
s->top = &s->lim[BSIZE];
free(s->bot);
s->bot = buf;
}
if((cnt = read(in, (char*) lim, BSIZE)) != BSIZE){
eof = &lim[cnt]; *eof++ = '\n';
if((cnt = fread(s->lim, sizeof(uchar), BSIZE, s->in)) != BSIZE){
s->eof = &s->lim[cnt]; *s->eof++ = '\n';
}
lim += cnt;
s->lim += cnt;
}
return cursor;
}
@ -67,31 +72,35 @@ letter = [a-zA-Z];
digit = [0-9];
*/
int Scanner::echo(ostream &out){
uchar *cursor = cur;
tok = cursor;
int
Scanner_echo(Scanner *s, FILE *out)
{
uchar *cursor = s->cur;
s->tok = cursor;
echo:
/*!re2c
"/*!re2c" { out.write(tok, &cursor[-7] - tok);
tok = cursor;
"/*!re2c" { fwrite(s->tok, 1, &cursor[-7] - s->tok, out);
s->tok = cursor;
RETURN(1); }
"\n" { if(cursor == eof) RETURN(0);
out.write(tok, cursor - tok);
tok = pos = cursor; cline++;
"\n" { if(cursor == s->eof) RETURN(0);
fwrite(s->tok, 1, cursor - s->tok, out);
s->tok = s->pos = cursor; s->cline++;
goto echo; }
any { goto echo; }
*/
}
int Scanner::scan(){
uchar *cursor = cur;
int
Scanner_scan(Scanner *s)
{
uchar *cursor = s->cur;
uint depth;
scan:
tchar = cursor - pos;
tline = cline;
tok = cursor;
s->tchar = cursor - s->pos;
s->tline = s->cline;
s->tok = cursor;
/*!re2c
"{" { depth = 1;
goto code;
@ -99,36 +108,37 @@ scan:
"/*" { depth = 1;
goto comment; }
"*/" { tok = cursor;
"*/" { s->tok = cursor;
RETURN(0); }
dstring { cur = cursor;
yylval.regexp = strToRE(token());
dstring { s->cur = cursor;
yylval.regexp = strToRE(Scanner_token(s));
return STRING; }
"\"" { fatal("bad string"); }
"\"" { Scanner_fatal(s, "bad string"); }
cstring { cur = cursor;
yylval.regexp = ranToRE(token());
cstring { s->cur = cursor;
yylval.regexp = ranToRE(Scanner_token(s));
return RANGE; }
"[" { fatal("bad character constant"); }
"[" { Scanner_fatal(s, "bad character constant"); }
[()|=;/\\] { RETURN(*tok); }
[()|=;/\\] { RETURN(*s->tok); }
[*+?] { yylval.op = *tok;
[*+?] { yylval.op = *s->tok;
RETURN(CLOSE); }
letter (letter|digit)* { cur = cursor;
yylval.symbol = Symbol::find(token());
letter (letter|digit)* { SubStr substr = Scanner_token(s);
s->cur = cursor;
yylval.symbol = Symbol_find(&substr);
return ID; }
[ \t]+ { goto scan; }
"\n" { if(cursor == eof) RETURN(0);
pos = cursor; cline++;
"\n" { if(cursor == s->eof) RETURN(0);
s->pos = cursor; s->cline++;
goto scan;
}
any { cerr << "unexpected character: " << *tok << endl;
any { fprintf(stderr, "unexpected character: '%c'\n", *s->tok);
goto scan;
}
*/
@ -136,15 +146,15 @@ scan:
code:
/*!re2c
"}" { if(--depth == 0){
cur = cursor;
yylval.token = new Token(token(), tline);
s->cur = cursor;
yylval.token = Token_new(Scanner_token(s), s->tline);
return CODE;
}
goto code; }
"{" { ++depth;
goto code; }
"\n" { if(cursor == eof) fatal("missing '}'");
pos = cursor; cline++;
"\n" { if(cursor == s->eof) Scanner_fatal(s, "missing '}'");
s->pos = cursor; s->cline++;
goto code;
}
dstring | sstring | any { goto code; }
@ -158,16 +168,17 @@ comment:
goto comment; }
"/*" { ++depth;
goto comment; }
"\n" { if(cursor == eof) RETURN(0);
tok = pos = cursor; cline++;
"\n" { if(cursor == s->eof) RETURN(0);
s->tok = s->pos = cursor; s->cline++;
goto comment;
}
any { goto comment; }
*/
}
void Scanner::fatal(char *msg){
cerr << "line " << tline << ", column " << (tchar + 1) << ": "
<< msg << endl;
void
Scanner_fatal(Scanner *s, char *msg)
{
fprintf(stderr, "line %d, column %d: %s\n", s->tline, s->tchar + 1, msg);
exit(1);
}

@ -1,18 +1,30 @@
#ifndef _token_h
#define _token_h
#ifndef re2c_token_h
#define re2c_token_h
#include "substr.h"
class Token {
public:
typedef struct Token {
Str text;
uint line;
public:
Token(SubStr, uint);
};
} Token;
inline Token::Token(SubStr t, uint l) : text(t), line(l) {
;
static inline void Token_init(Token *, SubStr, uint);
static inline Token *Token_new(SubStr, uint);
static inline void
Token_init(Token *r, SubStr t, uint l)
{
Str_copy(&r->text, &t);
r->line = l;
}
static inline Token *
Token_new(SubStr t, uint l)
{
Token *r = malloc(sizeof(Token));
Str_copy(&r->text, &t);
r->line = l;
return r;
}
#endif

Loading…
Cancel
Save