mirror of https://github.com/yasm/yasm.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
540 lines
17 KiB
540 lines
17 KiB
/* |
|
* |
|
* Generate Minimal Perfect Hash (genperf) |
|
* |
|
* Copyright (C) 2006-2007 Peter Johnson |
|
* |
|
* Redistribution and use in source and binary forms, with or without |
|
* modification, are permitted provided that the following conditions |
|
* are met: |
|
* 1. Redistributions of source code must retain the above copyright |
|
* notice, this list of conditions and the following disclaimer. |
|
* 2. Redistributions in binary form must reproduce the above copyright |
|
* notice, this list of conditions and the following disclaimer in the |
|
* documentation and/or other materials provided with the distribution. |
|
* |
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' |
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE |
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
* POSSIBILITY OF SUCH DAMAGE. |
|
*/ |
|
#include <stdio.h> |
|
#include <ctype.h> |
|
#include <stdarg.h> |
|
#include <string.h> |
|
#include "tools/genperf/perfect.h" |
|
#include "libyasm/compat-queue.h" |
|
#include "libyasm/coretype.h" |
|
#include "libyasm/errwarn.h" |
|
|
|
typedef STAILQ_HEAD(slist, sval) slist; |
|
typedef struct sval { |
|
STAILQ_ENTRY(sval) link; |
|
char *str; |
|
} sval; |
|
|
|
typedef STAILQ_HEAD(keyword_list, keyword) keyword_list; |
|
typedef struct keyword { |
|
STAILQ_ENTRY(keyword) link; |
|
char *name; |
|
char *args; |
|
unsigned int line; |
|
} keyword; |
|
|
|
static unsigned int cur_line = 1; |
|
static int errors = 0; |
|
|
|
static void |
|
report_error(const char *fmt, ...) |
|
{ |
|
va_list ap; |
|
|
|
fprintf(stderr, "%u: ", cur_line); |
|
va_start(ap, fmt); |
|
vfprintf(stderr, fmt, ap); |
|
va_end(ap); |
|
fputc('\n', stderr); |
|
errors++; |
|
} |
|
|
|
void |
|
yasm__fatal(const char *message, ...) |
|
{ |
|
abort(); |
|
} |
|
|
|
/* make the c output for the perfect hash tab array */ |
|
static void |
|
make_c_tab( |
|
FILE *f, |
|
bstuff *tab, /* table indexed by b */ |
|
ub4 smax, /* range of scramble[] */ |
|
ub4 blen, /* b in 0..blen-1, power of 2 */ |
|
ub4 *scramble) /* used in final hash */ |
|
{ |
|
ub4 i; |
|
/* table for the mapping for the perfect hash */ |
|
if (blen >= USE_SCRAMBLE) { |
|
/* A way to make the 1-byte values in tab bigger */ |
|
if (smax > UB2MAXVAL+1) { |
|
fprintf(f, " static const unsigned long scramble[] = {\n"); |
|
for (i=0; i<=UB1MAXVAL; i+=4) |
|
fprintf(f, " 0x%.8lx, 0x%.8lx, 0x%.8lx, 0x%.8lx,\n", |
|
scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]); |
|
} else { |
|
fprintf(f, " static const unsigned short scramble[] = {\n"); |
|
for (i=0; i<=UB1MAXVAL; i+=8) |
|
fprintf(f, |
|
" 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx,\n", |
|
scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3], |
|
scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]); |
|
} |
|
fprintf(f, " };\n"); |
|
fprintf(f, "\n"); |
|
} |
|
|
|
if (blen > 0) { |
|
/* small adjustments to _a_ to make values distinct */ |
|
if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE) |
|
fprintf(f, " static const unsigned char "); |
|
else |
|
fprintf(f, " static const unsigned short "); |
|
fprintf(f, "tab[] = {\n"); |
|
|
|
if (blen < 16) { |
|
for (i=0; i<blen; ++i) |
|
fprintf(f, "%3ld,", scramble[tab[i].val_b]); |
|
} else if (blen <= 1024) { |
|
for (i=0; i<blen; i+=16) |
|
fprintf(f, " %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n", |
|
scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], |
|
scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], |
|
scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], |
|
scramble[tab[i+6].val_b], scramble[tab[i+7].val_b], |
|
scramble[tab[i+8].val_b], scramble[tab[i+9].val_b], |
|
scramble[tab[i+10].val_b], scramble[tab[i+11].val_b], |
|
scramble[tab[i+12].val_b], scramble[tab[i+13].val_b], |
|
scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]); |
|
} else if (blen < USE_SCRAMBLE) { |
|
for (i=0; i<blen; i+=8) |
|
fprintf(f, " %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n", |
|
scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], |
|
scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], |
|
scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], |
|
scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]); |
|
} else { |
|
for (i=0; i<blen; i+=16) |
|
fprintf(f, " %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,\n", |
|
tab[i+0].val_b, tab[i+1].val_b, |
|
tab[i+2].val_b, tab[i+3].val_b, |
|
tab[i+4].val_b, tab[i+5].val_b, |
|
tab[i+6].val_b, tab[i+7].val_b, |
|
tab[i+8].val_b, tab[i+9].val_b, |
|
tab[i+10].val_b, tab[i+11].val_b, |
|
tab[i+12].val_b, tab[i+13].val_b, |
|
tab[i+14].val_b, tab[i+15].val_b); |
|
} |
|
fprintf(f, " };\n"); |
|
fprintf(f, "\n"); |
|
} |
|
} |
|
|
|
static void |
|
perfect_gen(FILE *out, const char *lookup_function_name, |
|
const char *struct_name, keyword_list *kws, |
|
const char *filename) |
|
{ |
|
ub4 nkeys; |
|
key *keys; |
|
hashform form; |
|
bstuff *tab; /* table indexed by b */ |
|
hstuff *tabh; /* table indexed by hash value */ |
|
ub4 smax; /* scramble[] values in 0..smax-1, a power of 2 */ |
|
ub4 alen; /* a in 0..alen-1, a power of 2 */ |
|
ub4 blen; /* b in 0..blen-1, a power of 2 */ |
|
ub4 salt; /* a parameter to the hash function */ |
|
gencode final; /* code for final hash */ |
|
ub4 i; |
|
ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */ |
|
char buf[10][80]; /* buffer for generated code */ |
|
char *buf2[10]; /* also for generated code */ |
|
keyword *kw; |
|
|
|
/* perfect hash configuration */ |
|
form.mode = NORMAL_HM; |
|
form.hashtype = STRING_HT; |
|
form.perfect = MINIMAL_HP; |
|
form.speed = SLOW_HS; |
|
|
|
/* set up code for final hash */ |
|
final.line = buf2; |
|
final.used = 0; |
|
final.len = 10; |
|
for (i=0; i<10; i++) |
|
final.line[i] = buf[i]; |
|
|
|
/* build list of keys */ |
|
nkeys = 0; |
|
keys = NULL; |
|
STAILQ_FOREACH(kw, kws, link) { |
|
key *k = yasm_xmalloc(sizeof(key)); |
|
|
|
k->name_k = yasm__xstrdup(kw->name); |
|
k->len_k = (ub4)strlen(kw->name); |
|
k->next_k = keys; |
|
keys = k; |
|
nkeys++; |
|
} |
|
|
|
/* find the hash */ |
|
findhash(&tab, &tabh, &alen, &blen, &salt, &final, |
|
scramble, &smax, keys, nkeys, &form); |
|
|
|
/* The hash function beginning */ |
|
fprintf(out, "static const struct %s *\n", struct_name); |
|
fprintf(out, "%s(const char *key, size_t len)\n", lookup_function_name); |
|
fprintf(out, "{\n"); |
|
|
|
/* output the dir table: this should loop up to smax for NORMAL_HP, |
|
* or up to pakd.nkeys for MINIMAL_HP. |
|
*/ |
|
fprintf(out, " static const struct %s pd[%lu] = {\n", struct_name, nkeys); |
|
for (i=0; i<nkeys; i++) { |
|
if (tabh[i].key_h) { |
|
STAILQ_FOREACH(kw, kws, link) { |
|
if (strcmp(kw->name, tabh[i].key_h->name_k) == 0) |
|
break; |
|
} |
|
if (!kw) { |
|
report_error("internal error: could not find `%s'", |
|
tabh[i].key_h->name_k); |
|
break; |
|
} |
|
fprintf(out, "#line %u \"%s\"\n", kw->line, filename); |
|
fprintf(out, " {\"%s\"%s}", kw->name, kw->args); |
|
} else |
|
fprintf(out, " { NULL }"); |
|
|
|
if (i < nkeys-1) |
|
fprintf(out, ","); |
|
fprintf(out, "\n"); |
|
} |
|
fprintf(out, " };\n"); |
|
|
|
/* output the hash tab[] array */ |
|
make_c_tab(out, tab, smax, blen, scramble); |
|
|
|
/* The hash function body */ |
|
fprintf(out, " const struct %s *ret;\n", struct_name); |
|
for (i=0; i<final.used; ++i) |
|
fprintf(out, "%s", final.line[i]); |
|
fprintf(out, " if (rsl >= %lu) return NULL;\n", nkeys); |
|
fprintf(out, " ret = &pd[rsl];\n"); |
|
fprintf(out, " if (strcmp(key, ret->name) != 0) return NULL;\n"); |
|
fprintf(out, " return ret;\n"); |
|
fprintf(out, "}\n"); |
|
fprintf(out, "\n"); |
|
|
|
free(tab); |
|
free(tabh); |
|
} |
|
|
|
int |
|
main(int argc, char *argv[]) |
|
{ |
|
FILE *in, *out; |
|
size_t i; |
|
char *ch; |
|
static char line[1024], tmp[1024]; |
|
static char struct_name[128] = ""; |
|
static char lookup_function_name[128] = "in_word_set"; |
|
static char language[16] = ""; |
|
static char delimiters[16] = ",\r\n"; |
|
static char name[128]; |
|
static char filename[768]; |
|
int need_struct = 0; |
|
int have_struct = 0; |
|
int go_keywords = 0; |
|
int ignore_case = 0; |
|
int compare_strncmp = 0; |
|
int readonly_tables = 0; |
|
slist usercode, usercode2; |
|
keyword_list keywords; |
|
sval *sv; |
|
keyword *kw; |
|
|
|
if (argc != 3) { |
|
fprintf(stderr, "Usage: genperf <in> <out>\n"); |
|
return EXIT_FAILURE; |
|
} |
|
|
|
in = fopen(argv[1], "rt"); |
|
if (!in) { |
|
fprintf(stderr, "Could not open `%s' for reading\n", argv[1]); |
|
return EXIT_FAILURE; |
|
} |
|
|
|
ch = argv[1]; |
|
i = 0; |
|
while (*ch && i < 767) { |
|
if (*ch == '\\') { |
|
filename[i++] = '/'; |
|
ch++; |
|
} else |
|
filename[i++] = *ch++; |
|
} |
|
filename[i] = '\0'; |
|
|
|
STAILQ_INIT(&usercode); |
|
STAILQ_INIT(&usercode2); |
|
STAILQ_INIT(&keywords); |
|
|
|
/* Parse declarations section */ |
|
while (fgets(line, 1024, in)) { |
|
/* Comments start with # as the first thing on a line */ |
|
if (line[0] == '#') { |
|
cur_line++; |
|
continue; |
|
} |
|
|
|
/* Handle structure declaration */ |
|
if (strncmp(line, "struct", 6) == 0) { |
|
int braces; |
|
|
|
if (!need_struct) { |
|
report_error("struct without %%struct-type declaration"); |
|
return EXIT_FAILURE; |
|
} |
|
if (have_struct) { |
|
report_error("more than one struct declaration"); |
|
return EXIT_FAILURE; |
|
} |
|
have_struct = 1; |
|
|
|
/* copy struct name */ |
|
ch = &line[6]; |
|
while (isspace(*ch)) |
|
ch++; |
|
i = 0; |
|
while ((isalnum(*ch) || *ch == '_') && i < 127) |
|
struct_name[i++] = *ch++; |
|
if (i == 127) { |
|
report_error("struct name too long"); |
|
return EXIT_FAILURE; |
|
} |
|
struct_name[i] = '\0'; |
|
|
|
sv = yasm_xmalloc(sizeof(sval)); |
|
sprintf(tmp, "#line %u \"%s\"\n", cur_line, filename); |
|
sv->str = yasm__xstrdup(tmp); |
|
STAILQ_INSERT_TAIL(&usercode, sv, link); |
|
|
|
braces = 0; |
|
do { |
|
/* count braces to determine when we're done */ |
|
ch = line; |
|
while (*ch != '\0') { |
|
if (*ch == '{') |
|
braces++; |
|
if (*ch == '}') |
|
braces--; |
|
ch++; |
|
} |
|
sv = yasm_xmalloc(sizeof(sval)); |
|
sv->str = yasm__xstrdup(line); |
|
STAILQ_INSERT_TAIL(&usercode, sv, link); |
|
cur_line++; |
|
if (braces <= 0) |
|
break; |
|
} while (fgets(line, 1024, in)); |
|
cur_line++; |
|
continue; |
|
} |
|
|
|
/* Ignore non-declaration lines */ |
|
if (line[0] != '%') { |
|
cur_line++; |
|
continue; |
|
} |
|
|
|
/* %% terminates declarations section */ |
|
if (line[1] == '%') { |
|
if (need_struct && !have_struct) { |
|
report_error("%%struct-type declaration, but no struct found"); |
|
return EXIT_FAILURE; |
|
} |
|
go_keywords = 1; |
|
break; /* move on to keywords section */ |
|
} |
|
|
|
/* %{ begins a verbatim code section that ends with %} */ |
|
if (line[1] == '{') { |
|
sv = yasm_xmalloc(sizeof(sval)); |
|
sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename); |
|
sv->str = yasm__xstrdup(tmp); |
|
STAILQ_INSERT_TAIL(&usercode, sv, link); |
|
|
|
while (fgets(line, 1024, in)) { |
|
cur_line++; |
|
if (line[0] == '%' && line[1] == '}') |
|
break; |
|
sv = yasm_xmalloc(sizeof(sval)); |
|
sv->str = yasm__xstrdup(line); |
|
STAILQ_INSERT_TAIL(&usercode, sv, link); |
|
} |
|
cur_line++; |
|
continue; |
|
} |
|
|
|
if (strncmp(&line[1], "ignore-case", 11) == 0) { |
|
ignore_case = 1; |
|
} else if (strncmp(&line[1], "compare-strncmp", 15) == 0) { |
|
compare_strncmp = 1; |
|
} else if (strncmp(&line[1], "readonly-tables", 15) == 0) { |
|
readonly_tables = 1; |
|
} else if (strncmp(&line[1], "language=", 9) == 0) { |
|
ch = &line[10]; |
|
i = 0; |
|
while (*ch != '\n' && i<15) |
|
language[i++] = *ch++; |
|
language[i] = '\0'; |
|
} else if (strncmp(&line[1], "delimiters=", 11) == 0) { |
|
ch = &line[12]; |
|
i = 0; |
|
while (i<15) |
|
delimiters[i++] = *ch++; |
|
delimiters[i] = '\0'; |
|
} else if (strncmp(&line[1], "enum", 4) == 0) { |
|
/* unused */ |
|
} else if (strncmp(&line[1], "struct-type", 11) == 0) { |
|
need_struct = 1; |
|
} else if (strncmp(&line[1], "define", 6) == 0) { |
|
/* Several different defines we need to handle */ |
|
ch = &line[7]; |
|
while (isspace(*ch)) |
|
ch++; |
|
|
|
if (strncmp(ch, "hash-function-name", 18) == 0) { |
|
/* unused */ |
|
} else if (strncmp(ch, "lookup-function-name", 20) == 0) { |
|
ch = &line[7+20+1]; |
|
while (isspace(*ch)) |
|
ch++; |
|
i = 0; |
|
while ((isalnum(*ch) || *ch == '_') && i < 127) |
|
lookup_function_name[i++] = *ch++; |
|
if (i == 127) { |
|
report_error("struct name too long"); |
|
return EXIT_FAILURE; |
|
} |
|
lookup_function_name[i] = '\0'; |
|
} else { |
|
fprintf(stderr, "%u: unrecognized define `%s'\n", cur_line, |
|
line); |
|
} |
|
} else { |
|
fprintf(stderr, "%u: unrecognized declaration `%s'\n", cur_line, |
|
line); |
|
} |
|
|
|
cur_line++; |
|
} |
|
|
|
if (!go_keywords) { |
|
report_error("no keywords section found"); |
|
return EXIT_FAILURE; |
|
} |
|
|
|
/* Parse keywords section */ |
|
while (fgets(line, 1024, in)) { |
|
char *d; |
|
|
|
/* Comments start with # as the first thing on a line */ |
|
if (line[0] == '#') { |
|
cur_line++; |
|
continue; |
|
} |
|
|
|
/* Keywords section terminated with %% */ |
|
if (line[0] == '%' && line[1] == '%') |
|
break; |
|
|
|
/* Look for name */ |
|
ch = &line[0]; |
|
i = 0; |
|
while (strchr(delimiters, *ch) == NULL && i < 127) |
|
name[i++] = *ch++; |
|
if (i == 127) { |
|
report_error("keyword name too long"); |
|
return EXIT_FAILURE; |
|
} |
|
name[i] = '\0'; |
|
|
|
/* Strip EOL */ |
|
d = strrchr(ch, '\n'); |
|
if (d) |
|
*d = '\0'; |
|
d = strrchr(ch, '\r'); |
|
if (d) |
|
*d = '\0'; |
|
kw = yasm_xmalloc(sizeof(keyword)); |
|
kw->name = yasm__xstrdup(name); |
|
kw->args = yasm__xstrdup(ch); |
|
kw->line = cur_line; |
|
STAILQ_INSERT_TAIL(&keywords, kw, link); |
|
cur_line++; |
|
} |
|
|
|
if (errors > 0) |
|
return EXIT_FAILURE; |
|
|
|
/* Pull in any end code */ |
|
if (!feof(in)) { |
|
sv = yasm_xmalloc(sizeof(sval)); |
|
sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename); |
|
sv->str = yasm__xstrdup(tmp); |
|
STAILQ_INSERT_TAIL(&usercode2, sv, link); |
|
|
|
while (fgets(line, 1024, in)) { |
|
sv = yasm_xmalloc(sizeof(sval)); |
|
sv->str = yasm__xstrdup(line); |
|
STAILQ_INSERT_TAIL(&usercode2, sv, link); |
|
} |
|
} |
|
|
|
/* output code */ |
|
out = fopen(argv[2], "wt"); |
|
if (!out) { |
|
fprintf(stderr, "Could not open `%s' for writing\n", argv[2]); |
|
return EXIT_FAILURE; |
|
} |
|
|
|
fprintf(out, "/* %s code produced by genperf */\n", language); |
|
fprintf(out, "/* Command-line: genperf %s %s */\n", argv[1], argv[2]); |
|
|
|
STAILQ_FOREACH(sv, &usercode, link) |
|
fprintf(out, "%s", sv->str); |
|
|
|
/* Get perfect hash */ |
|
perfect_gen(out, lookup_function_name, struct_name, &keywords, filename); |
|
|
|
STAILQ_FOREACH(sv, &usercode2, link) |
|
fprintf(out, "%s", sv->str); |
|
|
|
fclose(out); |
|
|
|
if (errors > 0) { |
|
remove(argv[2]); |
|
return EXIT_FAILURE; |
|
} |
|
|
|
return EXIT_SUCCESS; |
|
} |
|
|
|
|