It can successfully parse SpeedMessage1. Preliminary results: 750MB/s on Core2 2.4GHz. This number is 2.5x proto2. This isn't apples-to-apples, because proto2 is parsing to a struct and we are just doing stream parsing, but for apps that are currently using proto2, this is the improvement they would see if they could move to stream-based processing. Unfortunately perf-regression-test.py is broken, and I'm not 100% sure why. It would be nice to fix it first (to ensure that there are no performance regressions for the table-based decoder) but I'm really impatient to get the JIT checked in.pull/13171/head
parent
19517cc6f3
commit
9eb4d695c4
30 changed files with 7589 additions and 402 deletions
@ -0,0 +1,58 @@ |
||||
DynASM is taken from LuaJIT 2, which carries the following license statement: |
||||
|
||||
=============================================================================== |
||||
LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ |
||||
|
||||
Copyright (C) 2005-2011 Mike Pall. All rights reserved. |
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
of this software and associated documentation files (the "Software"), to deal |
||||
in the Software without restriction, including without limitation the rights |
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
copies of the Software, and to permit persons to whom the Software is |
||||
furnished to do so, subject to the following conditions: |
||||
|
||||
The above copyright notice and this permission notice shall be included in |
||||
all copies or substantial portions of the Software. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
THE SOFTWARE. |
||||
|
||||
[ MIT license: http://www.opensource.org/licenses/mit-license.php ] |
||||
|
||||
=============================================================================== |
||||
[ LuaJIT includes code from Lua 5.1/5.2, which has this license statement: ] |
||||
|
||||
Copyright (C) 1994-2011 Lua.org, PUC-Rio. |
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
of this software and associated documentation files (the "Software"), to deal |
||||
in the Software without restriction, including without limitation the rights |
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
copies of the Software, and to permit persons to whom the Software is |
||||
furnished to do so, subject to the following conditions: |
||||
|
||||
The above copyright notice and this permission notice shall be included in |
||||
all copies or substantial portions of the Software. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
THE SOFTWARE. |
||||
|
||||
=============================================================================== |
||||
[ LuaJIT includes code from dlmalloc, which has this license statement: ] |
||||
|
||||
This is a version (aka dlmalloc) of malloc/free/realloc written by |
||||
Doug Lea and released to the public domain, as explained at |
||||
http://creativecommons.org/licenses/publicdomain |
||||
|
||||
=============================================================================== |
@ -0,0 +1,440 @@ |
||||
/*
|
||||
** DynASM ARM encoding engine. |
||||
** Copyright (C) 2005-2011 Mike Pall. All rights reserved. |
||||
** Released under the MIT/X license. See dynasm.lua for full copyright notice. |
||||
*/ |
||||
|
||||
#include <stddef.h> |
||||
#include <stdarg.h> |
||||
#include <string.h> |
||||
#include <stdlib.h> |
||||
|
||||
#define DASM_ARCH "arm" |
||||
|
||||
#ifndef DASM_EXTERN |
||||
#define DASM_EXTERN(a,b,c,d) 0 |
||||
#endif |
||||
|
||||
/* Action definitions. */ |
||||
enum { |
||||
DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, |
||||
/* The following actions need a buffer position. */ |
||||
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, |
||||
/* The following actions also have an argument. */ |
||||
DASM_REL_PC, DASM_LABEL_PC, |
||||
DASM_IMM, DASM_IMM12, DASM_IMM16, DASM_IMML8, DASM_IMML12, |
||||
DASM__MAX |
||||
}; |
||||
|
||||
/* Maximum number of section buffer positions for a single dasm_put() call. */ |
||||
#define DASM_MAXSECPOS 25 |
||||
|
||||
/* DynASM encoder status codes. Action list offset or number are or'ed in. */ |
||||
#define DASM_S_OK 0x00000000 |
||||
#define DASM_S_NOMEM 0x01000000 |
||||
#define DASM_S_PHASE 0x02000000 |
||||
#define DASM_S_MATCH_SEC 0x03000000 |
||||
#define DASM_S_RANGE_I 0x11000000 |
||||
#define DASM_S_RANGE_SEC 0x12000000 |
||||
#define DASM_S_RANGE_LG 0x13000000 |
||||
#define DASM_S_RANGE_PC 0x14000000 |
||||
#define DASM_S_RANGE_REL 0x15000000 |
||||
#define DASM_S_UNDEF_LG 0x21000000 |
||||
#define DASM_S_UNDEF_PC 0x22000000 |
||||
|
||||
/* Macros to convert positions (8 bit section + 24 bit index). */ |
||||
#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) |
||||
#define DASM_POS2BIAS(pos) ((pos)&0xff000000) |
||||
#define DASM_SEC2POS(sec) ((sec)<<24) |
||||
#define DASM_POS2SEC(pos) ((pos)>>24) |
||||
#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) |
||||
|
||||
/* Action list type. */ |
||||
typedef const unsigned int *dasm_ActList; |
||||
|
||||
/* Per-section structure. */ |
||||
typedef struct dasm_Section { |
||||
int *rbuf; /* Biased buffer pointer (negative section bias). */ |
||||
int *buf; /* True buffer pointer. */ |
||||
size_t bsize; /* Buffer size in bytes. */ |
||||
int pos; /* Biased buffer position. */ |
||||
int epos; /* End of biased buffer position - max single put. */ |
||||
int ofs; /* Byte offset into section. */ |
||||
} dasm_Section; |
||||
|
||||
/* Core structure holding the DynASM encoding state. */ |
||||
struct dasm_State { |
||||
size_t psize; /* Allocated size of this structure. */ |
||||
dasm_ActList actionlist; /* Current actionlist pointer. */ |
||||
int *lglabels; /* Local/global chain/pos ptrs. */ |
||||
size_t lgsize; |
||||
int *pclabels; /* PC label chains/pos ptrs. */ |
||||
size_t pcsize; |
||||
void **globals; /* Array of globals (bias -10). */ |
||||
dasm_Section *section; /* Pointer to active section. */ |
||||
size_t codesize; /* Total size of all code sections. */ |
||||
int maxsection; /* 0 <= sectionidx < maxsection. */ |
||||
int status; /* Status code. */ |
||||
dasm_Section sections[1]; /* All sections. Alloc-extended. */ |
||||
}; |
||||
|
||||
/* The size of the core structure depends on the max. number of sections. */ |
||||
#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) |
||||
|
||||
|
||||
/* Initialize DynASM state. */ |
||||
void dasm_init(Dst_DECL, int maxsection) |
||||
{ |
||||
dasm_State *D; |
||||
size_t psz = 0; |
||||
int i; |
||||
Dst_REF = NULL; |
||||
DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); |
||||
D = Dst_REF; |
||||
D->psize = psz; |
||||
D->lglabels = NULL; |
||||
D->lgsize = 0; |
||||
D->pclabels = NULL; |
||||
D->pcsize = 0; |
||||
D->globals = NULL; |
||||
D->maxsection = maxsection; |
||||
for (i = 0; i < maxsection; i++) { |
||||
D->sections[i].buf = NULL; /* Need this for pass3. */ |
||||
D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); |
||||
D->sections[i].bsize = 0; |
||||
D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ |
||||
} |
||||
} |
||||
|
||||
/* Free DynASM state. */ |
||||
void dasm_free(Dst_DECL) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
int i; |
||||
for (i = 0; i < D->maxsection; i++) |
||||
if (D->sections[i].buf) |
||||
DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); |
||||
if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); |
||||
if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); |
||||
DASM_M_FREE(Dst, D, D->psize); |
||||
} |
||||
|
||||
/* Setup global label array. Must be called before dasm_setup(). */ |
||||
void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
D->globals = gl - 10; /* Negative bias to compensate for locals. */ |
||||
DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); |
||||
} |
||||
|
||||
/* Grow PC label array. Can be called after dasm_setup(), too. */ |
||||
void dasm_growpc(Dst_DECL, unsigned int maxpc) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
size_t osz = D->pcsize; |
||||
DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); |
||||
memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); |
||||
} |
||||
|
||||
/* Setup encoder. */ |
||||
void dasm_setup(Dst_DECL, const void *actionlist) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
int i; |
||||
D->actionlist = (dasm_ActList)actionlist; |
||||
D->status = DASM_S_OK; |
||||
D->section = &D->sections[0]; |
||||
memset((void *)D->lglabels, 0, D->lgsize); |
||||
if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); |
||||
for (i = 0; i < D->maxsection; i++) { |
||||
D->sections[i].pos = DASM_SEC2POS(i); |
||||
D->sections[i].ofs = 0; |
||||
} |
||||
} |
||||
|
||||
|
||||
#ifdef DASM_CHECKS |
||||
#define CK(x, st) \ |
||||
do { if (!(x)) { \
|
||||
D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) |
||||
#define CKPL(kind, st) \ |
||||
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
|
||||
D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) |
||||
#else |
||||
#define CK(x, st) ((void)0) |
||||
#define CKPL(kind, st) ((void)0) |
||||
#endif |
||||
|
||||
static int dasm_imm12(unsigned int n) |
||||
{ |
||||
int i; |
||||
for (i = 0; i < 16; i++, n = (n << 2) | (n >> 30)) |
||||
if (n <= 255) return (int)(n + (i << 8)); |
||||
return -1; |
||||
} |
||||
|
||||
/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ |
||||
void dasm_put(Dst_DECL, int start, ...) |
||||
{ |
||||
va_list ap; |
||||
dasm_State *D = Dst_REF; |
||||
dasm_ActList p = D->actionlist + start; |
||||
dasm_Section *sec = D->section; |
||||
int pos = sec->pos, ofs = sec->ofs; |
||||
int *b; |
||||
|
||||
if (pos >= sec->epos) { |
||||
DASM_M_GROW(Dst, int, sec->buf, sec->bsize, |
||||
sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); |
||||
sec->rbuf = sec->buf - DASM_POS2BIAS(pos); |
||||
sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); |
||||
} |
||||
|
||||
b = sec->rbuf; |
||||
b[pos++] = start; |
||||
|
||||
va_start(ap, start); |
||||
while (1) { |
||||
unsigned int ins = *p++; |
||||
unsigned int action = (ins >> 16); |
||||
if (action >= DASM__MAX) { |
||||
ofs += 4; |
||||
} else { |
||||
int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; |
||||
switch (action) { |
||||
case DASM_STOP: goto stop; |
||||
case DASM_SECTION: |
||||
n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); |
||||
D->section = &D->sections[n]; goto stop; |
||||
case DASM_ESC: p++; ofs += 4; break; |
||||
case DASM_REL_EXT: break; |
||||
case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; |
||||
case DASM_REL_LG: |
||||
n = (ins & 2047) - 10; pl = D->lglabels + n; |
||||
if (n >= 0) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */ |
||||
pl += 10; n = *pl; |
||||
if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ |
||||
goto linkrel; |
||||
case DASM_REL_PC: |
||||
pl = D->pclabels + n; CKPL(pc, PC); |
||||
putrel: |
||||
n = *pl; |
||||
if (n < 0) { /* Label exists. Get label pos and store it. */ |
||||
b[pos] = -n; |
||||
} else { |
||||
linkrel: |
||||
b[pos] = n; /* Else link to rel chain, anchored at label. */ |
||||
*pl = pos; |
||||
} |
||||
pos++; |
||||
break; |
||||
case DASM_LABEL_LG: |
||||
pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; |
||||
case DASM_LABEL_PC: |
||||
pl = D->pclabels + n; CKPL(pc, PC); |
||||
putlabel: |
||||
n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ |
||||
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; |
||||
} |
||||
*pl = -pos; /* Label exists now. */ |
||||
b[pos++] = ofs; /* Store pass1 offset estimate. */ |
||||
break; |
||||
case DASM_IMM: |
||||
case DASM_IMM16: |
||||
#ifdef DASM_CHECKS |
||||
CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); |
||||
if ((ins & 0x8000)) |
||||
CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); |
||||
else |
||||
CK((n>>((ins>>5)&31)) == 0, RANGE_I); |
||||
#endif |
||||
b[pos++] = n; |
||||
break; |
||||
case DASM_IMML8: |
||||
case DASM_IMML12: |
||||
CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) : |
||||
(((-n)>>((ins>>5)&31)) == 0), RANGE_I); |
||||
b[pos++] = n; |
||||
break; |
||||
case DASM_IMM12: |
||||
CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); |
||||
b[pos++] = n; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
stop: |
||||
va_end(ap); |
||||
sec->pos = pos; |
||||
sec->ofs = ofs; |
||||
} |
||||
#undef CK |
||||
|
||||
/* Pass 2: Link sections, shrink aligns, fix label offsets. */ |
||||
int dasm_link(Dst_DECL, size_t *szp) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
int secnum; |
||||
int ofs = 0; |
||||
|
||||
#ifdef DASM_CHECKS |
||||
*szp = 0; |
||||
if (D->status != DASM_S_OK) return D->status; |
||||
{ |
||||
int pc; |
||||
for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) |
||||
if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; |
||||
} |
||||
#endif |
||||
|
||||
{ /* Handle globals not defined in this translation unit. */ |
||||
int idx; |
||||
for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { |
||||
int n = D->lglabels[idx]; |
||||
/* Undefined label: Collapse rel chain and replace with marker (< 0). */ |
||||
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } |
||||
} |
||||
} |
||||
|
||||
/* Combine all code sections. No support for data sections (yet). */ |
||||
for (secnum = 0; secnum < D->maxsection; secnum++) { |
||||
dasm_Section *sec = D->sections + secnum; |
||||
int *b = sec->rbuf; |
||||
int pos = DASM_SEC2POS(secnum); |
||||
int lastpos = sec->pos; |
||||
|
||||
while (pos != lastpos) { |
||||
dasm_ActList p = D->actionlist + b[pos++]; |
||||
while (1) { |
||||
unsigned int ins = *p++; |
||||
unsigned int action = (ins >> 16); |
||||
switch (action) { |
||||
case DASM_STOP: case DASM_SECTION: goto stop; |
||||
case DASM_ESC: p++; break; |
||||
case DASM_REL_EXT: break; |
||||
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; |
||||
case DASM_REL_LG: case DASM_REL_PC: pos++; break; |
||||
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; |
||||
case DASM_IMM: case DASM_IMM12: case DASM_IMM16: |
||||
case DASM_IMML8: case DASM_IMML12: pos++; break; |
||||
} |
||||
} |
||||
stop: (void)0; |
||||
} |
||||
ofs += sec->ofs; /* Next section starts right after current section. */ |
||||
} |
||||
|
||||
D->codesize = ofs; /* Total size of all code sections */ |
||||
*szp = ofs; |
||||
return DASM_S_OK; |
||||
} |
||||
|
||||
#ifdef DASM_CHECKS |
||||
#define CK(x, st) \ |
||||
do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) |
||||
#else |
||||
#define CK(x, st) ((void)0) |
||||
#endif |
||||
|
||||
/* Pass 3: Encode sections. */ |
||||
int dasm_encode(Dst_DECL, void *buffer) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
char *base = (char *)buffer; |
||||
unsigned int *cp = (unsigned int *)buffer; |
||||
int secnum; |
||||
|
||||
/* Encode all code sections. No support for data sections (yet). */ |
||||
for (secnum = 0; secnum < D->maxsection; secnum++) { |
||||
dasm_Section *sec = D->sections + secnum; |
||||
int *b = sec->buf; |
||||
int *endb = sec->rbuf + sec->pos; |
||||
|
||||
while (b != endb) { |
||||
dasm_ActList p = D->actionlist + *b++; |
||||
while (1) { |
||||
unsigned int ins = *p++; |
||||
unsigned int action = (ins >> 16); |
||||
int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; |
||||
switch (action) { |
||||
case DASM_STOP: case DASM_SECTION: goto stop; |
||||
case DASM_ESC: *cp++ = *p++; break; |
||||
case DASM_REL_EXT: |
||||
n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1); |
||||
goto patchrel; |
||||
case DASM_ALIGN: |
||||
ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; |
||||
break; |
||||
case DASM_REL_LG: |
||||
CK(n >= 0, UNDEF_LG); |
||||
case DASM_REL_PC: |
||||
CK(n >= 0, UNDEF_PC); |
||||
n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); |
||||
patchrel: |
||||
CK((n & 3) == 0 && ((n-4+0x02000000) >> 26) == 0, RANGE_REL); |
||||
cp[-1] |= (((n-4) >> 2) & 0x00ffffff); |
||||
break; |
||||
case DASM_LABEL_LG: |
||||
ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); |
||||
break; |
||||
case DASM_LABEL_PC: break; |
||||
case DASM_IMM: |
||||
cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31); |
||||
break; |
||||
case DASM_IMM12: |
||||
cp[-1] |= dasm_imm12((unsigned int)n); |
||||
break; |
||||
case DASM_IMM16: |
||||
cp[-1] |= ((n & 0xf000) << 4) | (n & 0x0fff); |
||||
break; |
||||
case DASM_IMML8: |
||||
cp[-1] |= n >= 0 ? (0x00800000 | (n & 0x0f) | ((n & 0xf0) << 4)) : |
||||
((-n & 0x0f) | ((-n & 0xf0) << 4)); |
||||
break; |
||||
case DASM_IMML12: |
||||
cp[-1] |= n >= 0 ? (0x00800000 | n) : (-n); |
||||
break; |
||||
default: *cp++ = ins; break; |
||||
} |
||||
} |
||||
stop: (void)0; |
||||
} |
||||
} |
||||
|
||||
if (base + D->codesize != (char *)cp) /* Check for phase errors. */ |
||||
return DASM_S_PHASE; |
||||
return DASM_S_OK; |
||||
} |
||||
#undef CK |
||||
|
||||
/* Get PC label offset. */ |
||||
int dasm_getpclabel(Dst_DECL, unsigned int pc) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
if (pc*sizeof(int) < D->pcsize) { |
||||
int pos = D->pclabels[pc]; |
||||
if (pos < 0) return *DASM_POS2PTR(D, -pos); |
||||
if (pos > 0) return -1; /* Undefined. */ |
||||
} |
||||
return -2; /* Unused or out of range. */ |
||||
} |
||||
|
||||
#ifdef DASM_CHECKS |
||||
/* Optional sanity checker to call between isolated encoding steps. */ |
||||
int dasm_checkstep(Dst_DECL, int secmatch) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
if (D->status == DASM_S_OK) { |
||||
int i; |
||||
for (i = 1; i <= 9; i++) { |
||||
if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } |
||||
D->lglabels[i] = 0; |
||||
} |
||||
} |
||||
if (D->status == DASM_S_OK && secmatch >= 0 && |
||||
D->section != &D->sections[secmatch]) |
||||
D->status = DASM_S_MATCH_SEC|(D->section-D->sections); |
||||
return D->status; |
||||
} |
||||
#endif |
||||
|
@ -0,0 +1,933 @@ |
||||
------------------------------------------------------------------------------ |
||||
-- DynASM ARM module. |
||||
-- |
||||
-- Copyright (C) 2005-2011 Mike Pall. All rights reserved. |
||||
-- See dynasm.lua for full copyright notice. |
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Module information: |
||||
local _info = { |
||||
arch = "arm", |
||||
description = "DynASM ARM module", |
||||
version = "1.2.2", |
||||
vernum = 10202, |
||||
release = "2011-03-23", |
||||
author = "Mike Pall", |
||||
license = "MIT", |
||||
} |
||||
|
||||
-- Exported glue functions for the arch-specific module. |
||||
local _M = { _info = _info } |
||||
|
||||
-- Cache library functions. |
||||
local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs |
||||
local assert, setmetatable, rawget = assert, setmetatable, rawget |
||||
local _s = string |
||||
local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char |
||||
local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub |
||||
local concat, sort = table.concat, table.sort |
||||
|
||||
-- Inherited tables and callbacks. |
||||
local g_opt, g_arch |
||||
local wline, werror, wfatal, wwarn |
||||
|
||||
-- Action name list. |
||||
-- CHECK: Keep this in sync with the C code! |
||||
local action_names = { |
||||
"STOP", "SECTION", "ESC", "REL_EXT", |
||||
"ALIGN", "REL_LG", "LABEL_LG", |
||||
"REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12", |
||||
} |
||||
|
||||
-- Maximum number of section buffer positions for dasm_put(). |
||||
-- CHECK: Keep this in sync with the C code! |
||||
local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. |
||||
|
||||
-- Action name -> action number. |
||||
local map_action = {} |
||||
for n,name in ipairs(action_names) do |
||||
map_action[name] = n-1 |
||||
end |
||||
|
||||
-- Action list buffer. |
||||
local actlist = {} |
||||
|
||||
-- Argument list for next dasm_put(). Start with offset 0 into action list. |
||||
local actargs = { 0 } |
||||
|
||||
-- Current number of section buffer positions for dasm_put(). |
||||
local secpos = 1 |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Return 8 digit hex number. |
||||
local function tohex(x) |
||||
return sub(format("%08x", x), -8) -- Avoid 64 bit portability problem in Lua. |
||||
end |
||||
|
||||
-- Dump action names and numbers. |
||||
local function dumpactions(out) |
||||
out:write("DynASM encoding engine action codes:\n") |
||||
for n,name in ipairs(action_names) do |
||||
local num = map_action[name] |
||||
out:write(format(" %-10s %02X %d\n", name, num, num)) |
||||
end |
||||
out:write("\n") |
||||
end |
||||
|
||||
-- Write action list buffer as a huge static C array. |
||||
local function writeactions(out, name) |
||||
local nn = #actlist |
||||
if nn == 0 then nn = 1; actlist[0] = map_action.STOP end |
||||
out:write("static const unsigned int ", name, "[", nn, "] = {\n") |
||||
for i = 1,nn-1 do |
||||
assert(out:write("0x", tohex(actlist[i]), ",\n")) |
||||
end |
||||
assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) |
||||
end |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Add word to action list. |
||||
local function wputxw(n) |
||||
assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") |
||||
actlist[#actlist+1] = n |
||||
end |
||||
|
||||
-- Add action to list with optional arg. Advance buffer pos, too. |
||||
local function waction(action, val, a, num) |
||||
local w = assert(map_action[action], "bad action name `"..action.."'") |
||||
wputxw(w * 0x10000 + (val or 0)) |
||||
if a then actargs[#actargs+1] = a end |
||||
if a or num then secpos = secpos + (num or 1) end |
||||
end |
||||
|
||||
-- Flush action list (intervening C code or buffer pos overflow). |
||||
local function wflush(term) |
||||
if #actlist == actargs[1] then return end -- Nothing to flush. |
||||
if not term then waction("STOP") end -- Terminate action list. |
||||
wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) |
||||
actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). |
||||
secpos = 1 -- The actionlist offset occupies a buffer position, too. |
||||
end |
||||
|
||||
-- Put escaped word. |
||||
local function wputw(n) |
||||
if n <= 0x000fffff then waction("ESC") end |
||||
wputxw(n) |
||||
end |
||||
|
||||
-- Reserve position for word. |
||||
local function wpos() |
||||
local pos = #actlist+1 |
||||
actlist[pos] = "" |
||||
return pos |
||||
end |
||||
|
||||
-- Store word to reserved position. |
||||
local function wputpos(pos, n) |
||||
assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") |
||||
actlist[pos] = n |
||||
end |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Global label name -> global label number. With auto assignment on 1st use. |
||||
local next_global = 20 |
||||
local map_global = setmetatable({}, { __index = function(t, name) |
||||
if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end |
||||
local n = next_global |
||||
if n > 2047 then werror("too many global labels") end |
||||
next_global = n + 1 |
||||
t[name] = n |
||||
return n |
||||
end}) |
||||
|
||||
-- Dump global labels. |
||||
local function dumpglobals(out, lvl) |
||||
local t = {} |
||||
for name, n in pairs(map_global) do t[n] = name end |
||||
out:write("Global labels:\n") |
||||
for i=20,next_global-1 do |
||||
out:write(format(" %s\n", t[i])) |
||||
end |
||||
out:write("\n") |
||||
end |
||||
|
||||
-- Write global label enum. |
||||
local function writeglobals(out, prefix) |
||||
local t = {} |
||||
for name, n in pairs(map_global) do t[n] = name end |
||||
out:write("enum {\n") |
||||
for i=20,next_global-1 do |
||||
out:write(" ", prefix, t[i], ",\n") |
||||
end |
||||
out:write(" ", prefix, "_MAX\n};\n") |
||||
end |
||||
|
||||
-- Write global label names. |
||||
local function writeglobalnames(out, name) |
||||
local t = {} |
||||
for name, n in pairs(map_global) do t[n] = name end |
||||
out:write("static const char *const ", name, "[] = {\n") |
||||
for i=20,next_global-1 do |
||||
out:write(" \"", t[i], "\",\n") |
||||
end |
||||
out:write(" (const char *)0\n};\n") |
||||
end |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Extern label name -> extern label number. With auto assignment on 1st use. |
||||
local next_extern = 0 |
||||
local map_extern_ = {} |
||||
local map_extern = setmetatable({}, { __index = function(t, name) |
||||
-- No restrictions on the name for now. |
||||
local n = next_extern |
||||
if n > 2047 then werror("too many extern labels") end |
||||
next_extern = n + 1 |
||||
t[name] = n |
||||
map_extern_[n] = name |
||||
return n |
||||
end}) |
||||
|
||||
-- Dump extern labels. |
||||
local function dumpexterns(out, lvl) |
||||
out:write("Extern labels:\n") |
||||
for i=0,next_extern-1 do |
||||
out:write(format(" %s\n", map_extern_[i])) |
||||
end |
||||
out:write("\n") |
||||
end |
||||
|
||||
-- Write extern label names. |
||||
local function writeexternnames(out, name) |
||||
out:write("static const char *const ", name, "[] = {\n") |
||||
for i=0,next_extern-1 do |
||||
out:write(" \"", map_extern_[i], "\",\n") |
||||
end |
||||
out:write(" (const char *)0\n};\n") |
||||
end |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Arch-specific maps. |
||||
|
||||
-- Ext. register name -> int. name. |
||||
local map_archdef = { sp = "r13", lr = "r14", pc = "r15", } |
||||
|
||||
-- Int. register name -> ext. name. |
||||
local map_reg_rev = { r13 = "sp", r14 = "lr", r15 = "pc", } |
||||
|
||||
local map_type = {} -- Type name -> { ctype, reg } |
||||
local ctypenum = 0 -- Type number (for Dt... macros). |
||||
|
||||
-- Reverse defines for registers. |
||||
function _M.revdef(s) |
||||
return map_reg_rev[s] or s |
||||
end |
||||
|
||||
local map_shift = { lsl = 0, lsr = 1, asr = 2, ror = 3, } |
||||
|
||||
local map_cond = { |
||||
eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, |
||||
hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, |
||||
hs = 2, lo = 3, |
||||
} |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Template strings for ARM instructions. |
||||
local map_op = { |
||||
-- Basic data processing instructions. |
||||
and_3 = "e0000000DNPs", |
||||
eor_3 = "e0200000DNPs", |
||||
sub_3 = "e0400000DNPs", |
||||
rsb_3 = "e0600000DNPs", |
||||
add_3 = "e0800000DNPs", |
||||
adc_3 = "e0a00000DNPs", |
||||
sbc_3 = "e0c00000DNPs", |
||||
rsc_3 = "e0e00000DNPs", |
||||
tst_2 = "e1100000NP", |
||||
teq_2 = "e1300000NP", |
||||
cmp_2 = "e1500000NP", |
||||
cmn_2 = "e1700000NP", |
||||
orr_3 = "e1800000DNPs", |
||||
mov_2 = "e1a00000DPs", |
||||
bic_3 = "e1c00000DNPs", |
||||
mvn_2 = "e1e00000DPs", |
||||
|
||||
and_4 = "e0000000DNMps", |
||||
eor_4 = "e0200000DNMps", |
||||
sub_4 = "e0400000DNMps", |
||||
rsb_4 = "e0600000DNMps", |
||||
add_4 = "e0800000DNMps", |
||||
adc_4 = "e0a00000DNMps", |
||||
sbc_4 = "e0c00000DNMps", |
||||
rsc_4 = "e0e00000DNMps", |
||||
tst_3 = "e1100000NMp", |
||||
teq_3 = "e1300000NMp", |
||||
cmp_3 = "e1500000NMp", |
||||
cmn_3 = "e1700000NMp", |
||||
orr_4 = "e1800000DNMps", |
||||
mov_3 = "e1a00000DMps", |
||||
bic_4 = "e1c00000DNMps", |
||||
mvn_3 = "e1e00000DMps", |
||||
|
||||
lsl_3 = "e1a00000DMvs", |
||||
lsr_3 = "e1a00020DMvs", |
||||
asr_3 = "e1a00040DMvs", |
||||
ror_3 = "e1a00060DMvs", |
||||
rrx_2 = "e1a00060DMs", |
||||
|
||||
-- Multiply and multiply-accumulate. |
||||
mul_3 = "e0000090NMSs", |
||||
mla_4 = "e0200090NMSDs", |
||||
umaal_4 = "e0400090DNMSs", -- v6 |
||||
mls_4 = "e0600090DNMSs", -- v6T2 |
||||
umull_4 = "e0800090DNMSs", |
||||
umlal_4 = "e0a00090DNMSs", |
||||
smull_4 = "e0c00090DNMSs", |
||||
smlal_4 = "e0e00090DNMSs", |
||||
|
||||
-- Halfword multiply and multiply-accumulate. |
||||
smlabb_4 = "e1000080NMSD", -- v5TE |
||||
smlatb_4 = "e10000a0NMSD", -- v5TE |
||||
smlabt_4 = "e10000c0NMSD", -- v5TE |
||||
smlatt_4 = "e10000e0NMSD", -- v5TE |
||||
smlawb_4 = "e1200080NMSD", -- v5TE |
||||
smulwb_3 = "e12000a0NMS", -- v5TE |
||||
smlawt_4 = "e12000c0NMSD", -- v5TE |
||||
smulwt_3 = "e12000e0NMS", -- v5TE |
||||
smlalbb_4 = "e1400080NMSD", -- v5TE |
||||
smlaltb_4 = "e14000a0NMSD", -- v5TE |
||||
smlalbt_4 = "e14000c0NMSD", -- v5TE |
||||
smlaltt_4 = "e14000e0NMSD", -- v5TE |
||||
smulbb_3 = "e1600080NMS", -- v5TE |
||||
smultb_3 = "e16000a0NMS", -- v5TE |
||||
smulbt_3 = "e16000c0NMS", -- v5TE |
||||
smultt_3 = "e16000e0NMS", -- v5TE |
||||
|
||||
-- Miscellaneous data processing instructions. |
||||
clz_2 = "e16f0f10DM", -- v5T |
||||
rev_2 = "e6bf0f30DM", -- v6 |
||||
rev16_2 = "e6bf0fb0DM", -- v6 |
||||
revsh_2 = "e6ff0fb0DM", -- v6 |
||||
sel_3 = "e6800fb0DNM", -- v6 |
||||
usad8_3 = "e780f010NMS", -- v6 |
||||
usada8_4 = "e7800010NMSD", -- v6 |
||||
rbit_2 = "e6ff0f30DM", -- v6T2 |
||||
movw_2 = "e3000000DW", -- v6T2 |
||||
movt_2 = "e3400000DW", -- v6T2 |
||||
-- Note: the X encodes width-1, not width. |
||||
sbfx_4 = "e7a00050DMvX", -- v6T2 |
||||
ubfx_4 = "e7e00050DMvX", -- v6T2 |
||||
-- Note: the X encodes the msb field, not the width. |
||||
bfc_3 = "e7c0001fDvX", -- v6T2 |
||||
bfi_4 = "e7c00010DMvX", -- v6T2 |
||||
|
||||
-- Packing and unpacking instructions. |
||||
pkhbt_3 = "e6800010DNM", pkhbt_4 = "e6800010DNMv", -- v6 |
||||
pkhtb_3 = "e6800050DNM", pkhtb_4 = "e6800050DNMv", -- v6 |
||||
sxtab_3 = "e6a00070DNM", sxtab_4 = "e6a00070DNMv", -- v6 |
||||
sxtab16_3 = "e6800070DNM", sxtab16_4 = "e6800070DNMv", -- v6 |
||||
sxtah_3 = "e6b00070DNM", sxtah_4 = "e6b00070DNMv", -- v6 |
||||
sxtb_2 = "e6af0070DM", sxtb_3 = "e6af0070DMv", -- v6 |
||||
sxtb16_2 = "e68f0070DM", sxtb16_3 = "e68f0070DMv", -- v6 |
||||
sxth_2 = "e6bf0070DM", sxth_3 = "e6bf0070DMv", -- v6 |
||||
uxtab_3 = "e6e00070DNM", uxtab_4 = "e6e00070DNMv", -- v6 |
||||
uxtab16_3 = "e6c00070DNM", uxtab16_4 = "e6c00070DNMv", -- v6 |
||||
uxtah_3 = "e6f00070DNM", uxtah_4 = "e6f00070DNMv", -- v6 |
||||
uxtb_2 = "e6ef0070DM", uxtb_3 = "e6ef0070DMv", -- v6 |
||||
uxtb16_2 = "e6cf0070DM", uxtb16_3 = "e6cf0070DMv", -- v6 |
||||
uxth_2 = "e6ff0070DM", uxth_3 = "e6ff0070DMv", -- v6 |
||||
|
||||
-- Saturating instructions. |
||||
qadd_3 = "e1000050DMN", -- v5TE |
||||
qsub_3 = "e1200050DMN", -- v5TE |
||||
qdadd_3 = "e1400050DMN", -- v5TE |
||||
qdsub_3 = "e1600050DMN", -- v5TE |
||||
-- Note: the X for ssat* encodes sat_imm-1, not sat_imm. |
||||
ssat_3 = "e6a00010DXM", ssat_4 = "e6a00010DXMp", -- v6 |
||||
usat_3 = "e6e00010DXM", usat_4 = "e6e00010DXMp", -- v6 |
||||
ssat16_3 = "e6a00f30DXM", -- v6 |
||||
usat16_3 = "e6e00f30DXM", -- v6 |
||||
|
||||
-- Parallel addition and subtraction. |
||||
sadd16_3 = "e6100f10DNM", -- v6 |
||||
sasx_3 = "e6100f30DNM", -- v6 |
||||
ssax_3 = "e6100f50DNM", -- v6 |
||||
ssub16_3 = "e6100f70DNM", -- v6 |
||||
sadd8_3 = "e6100f90DNM", -- v6 |
||||
ssub8_3 = "e6100ff0DNM", -- v6 |
||||
qadd16_3 = "e6200f10DNM", -- v6 |
||||
qasx_3 = "e6200f30DNM", -- v6 |
||||
qsax_3 = "e6200f50DNM", -- v6 |
||||
qsub16_3 = "e6200f70DNM", -- v6 |
||||
qadd8_3 = "e6200f90DNM", -- v6 |
||||
qsub8_3 = "e6200ff0DNM", -- v6 |
||||
shadd16_3 = "e6300f10DNM", -- v6 |
||||
shasx_3 = "e6300f30DNM", -- v6 |
||||
shsax_3 = "e6300f50DNM", -- v6 |
||||
shsub16_3 = "e6300f70DNM", -- v6 |
||||
shadd8_3 = "e6300f90DNM", -- v6 |
||||
shsub8_3 = "e6300ff0DNM", -- v6 |
||||
uadd16_3 = "e6500f10DNM", -- v6 |
||||
uasx_3 = "e6500f30DNM", -- v6 |
||||
usax_3 = "e6500f50DNM", -- v6 |
||||
usub16_3 = "e6500f70DNM", -- v6 |
||||
uadd8_3 = "e6500f90DNM", -- v6 |
||||
usub8_3 = "e6500ff0DNM", -- v6 |
||||
uqadd16_3 = "e6600f10DNM", -- v6 |
||||
uqasx_3 = "e6600f30DNM", -- v6 |
||||
uqsax_3 = "e6600f50DNM", -- v6 |
||||
uqsub16_3 = "e6600f70DNM", -- v6 |
||||
uqadd8_3 = "e6600f90DNM", -- v6 |
||||
uqsub8_3 = "e6600ff0DNM", -- v6 |
||||
uhadd16_3 = "e6700f10DNM", -- v6 |
||||
uhasx_3 = "e6700f30DNM", -- v6 |
||||
uhsax_3 = "e6700f50DNM", -- v6 |
||||
uhsub16_3 = "e6700f70DNM", -- v6 |
||||
uhadd8_3 = "e6700f90DNM", -- v6 |
||||
uhsub8_3 = "e6700ff0DNM", -- v6 |
||||
|
||||
-- Load/store instructions. |
||||
str_2 = "e4000000DL", str_3 = "e4000000DL", str_4 = "e4000000DL", |
||||
strb_2 = "e4400000DL", strb_3 = "e4400000DL", strb_4 = "e4400000DL", |
||||
ldr_2 = "e4100000DL", ldr_3 = "e4100000DL", ldr_4 = "e4100000DL", |
||||
ldrb_2 = "e4500000DL", ldrb_3 = "e4500000DL", ldrb_4 = "e4500000DL", |
||||
strh_2 = "e00000b0DL", strh_3 = "e00000b0DL", |
||||
ldrh_2 = "e01000b0DL", ldrh_3 = "e01000b0DL", |
||||
ldrd_2 = "e00000d0DL", ldrd_3 = "e00000d0DL", -- v5TE |
||||
ldrsb_2 = "e01000d0DL", ldrsb_3 = "e01000d0DL", |
||||
strd_2 = "e00000f0DL", strd_3 = "e00000f0DL", -- v5TE |
||||
ldrsh_2 = "e01000f0DL", ldrsh_3 = "e01000f0DL", |
||||
|
||||
ldm_2 = "e8900000nR", ldmia_2 = "e8900000nR", ldmfd_2 = "e8900000nR", |
||||
ldmda_2 = "e8100000nR", ldmfa_2 = "e8100000nR", |
||||
ldmdb_2 = "e9100000nR", ldmea_2 = "e9100000nR", |
||||
ldmib_2 = "e9900000nR", ldmed_2 = "e9900000nR", |
||||
stm_2 = "e8800000nR", stmia_2 = "e8800000nR", stmfd_2 = "e8800000nR", |
||||
stmda_2 = "e8000000nR", stmfa_2 = "e8000000nR", |
||||
stmdb_2 = "e9000000nR", stmea_2 = "e9000000nR", |
||||
stmib_2 = "e9800000nR", stmed_2 = "e9800000nR", |
||||
pop_1 = "e8bd0000R", push_1 = "e92d0000R", |
||||
|
||||
-- Branch instructions. |
||||
b_1 = "ea000000B", |
||||
bl_1 = "eb000000B", |
||||
blx_1 = "e12fff30C", |
||||
bx_1 = "e12fff10M", |
||||
|
||||
-- Miscellaneous instructions. |
||||
nop_0 = "e1a00000", |
||||
mrs_1 = "e10f0000D", |
||||
bkpt_1 = "e1200070K", -- v5T |
||||
svc_1 = "ef000000T", swi_1 = "ef000000T", |
||||
ud_0 = "e7f001f0", |
||||
|
||||
-- NYI: Advanced SIMD and VFP instructions. |
||||
|
||||
-- NYI instructions, since I have no need for them right now: |
||||
-- swp, swpb, strex, ldrex, strexd, ldrexd, strexb, ldrexb, strexh, ldrexh |
||||
-- msr, nopv6, yield, wfe, wfi, sev, dbg, bxj, smc, srs, rfe |
||||
-- cps, setend, pli, pld, pldw, clrex, dsb, dmb, isb |
||||
-- stc, ldc, mcr, mcr2, mrc, mrc2, mcrr, mcrr2, mrrc, mrrc2, cdp, cdp2 |
||||
} |
||||
|
||||
-- Add mnemonics for "s" variants. |
||||
do |
||||
local t = {} |
||||
for k,v in pairs(map_op) do |
||||
if sub(v, -1) == "s" then |
||||
local v2 = sub(v, 1, 2)..char(byte(v, 3)+1)..sub(v, 4, -2) |
||||
t[sub(k, 1, -3).."s"..sub(k, -2)] = v2 |
||||
end |
||||
end |
||||
for k,v in pairs(t) do |
||||
map_op[k] = v |
||||
end |
||||
end |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
local function parse_gpr(expr) |
||||
local tname, ovreg = match(expr, "^([%w_]+):(r1?[0-9])$") |
||||
local tp = map_type[tname or expr] |
||||
if tp then |
||||
local reg = ovreg or tp.reg |
||||
if not reg then |
||||
werror("type `"..(tname or expr).."' needs a register override") |
||||
end |
||||
expr = reg |
||||
end |
||||
local r = match(expr, "^r(1?[0-9])$") |
||||
if r then |
||||
r = tonumber(r) |
||||
if r <= 15 then return r, tp end |
||||
end |
||||
werror("bad register name `"..expr.."'") |
||||
end |
||||
|
||||
local function parse_gpr_pm(expr) |
||||
local pm, expr2 = match(expr, "^([+-]?)(.*)$") |
||||
return parse_gpr(expr2), (pm == "-") |
||||
end |
||||
|
||||
local function parse_reglist(reglist) |
||||
reglist = match(reglist, "^{%s*([^}]*)}$") |
||||
if not reglist then werror("register list expected") end |
||||
local rr = 0 |
||||
for p in gmatch(reglist..",", "%s*([^,]*),") do |
||||
local rbit = 2^parse_gpr(gsub(p, "%s+$", "")) |
||||
if ((rr - (rr % rbit)) / rbit) % 2 ~= 0 then |
||||
werror("duplicate register `"..p.."'") |
||||
end |
||||
rr = rr + rbit |
||||
end |
||||
return rr |
||||
end |
||||
|
||||
local function parse_imm(imm, bits, shift, scale, signed) |
||||
imm = match(imm, "^#(.*)$") |
||||
if not imm then werror("expected immediate operand") end |
||||
local n = tonumber(imm) |
||||
if n then |
||||
if n % 2^scale == 0 then |
||||
n = n / 2^scale |
||||
if signed then |
||||
if n >= 0 then |
||||
if n < 2^(bits-1) then return n*2^shift end |
||||
else |
||||
if n >= -(2^(bits-1))-1 then return (n+2^bits)*2^shift end |
||||
end |
||||
else |
||||
if n >= 0 and n <= 2^bits-1 then return n*2^shift end |
||||
end |
||||
end |
||||
werror("out of range immediate `"..imm.."'") |
||||
else |
||||
waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) |
||||
return 0 |
||||
end |
||||
end |
||||
|
||||
local function parse_imm12(imm) |
||||
local n = tonumber(imm) |
||||
if n then |
||||
local m = n |
||||
for i=0,-15,-1 do |
||||
if m >= 0 and m <= 255 and n % 1 == 0 then return m + (i%16) * 256 end |
||||
local t = m % 4 |
||||
m = (m - t) / 4 + t * 2^30 |
||||
end |
||||
werror("out of range immediate `"..imm.."'") |
||||
else |
||||
waction("IMM12", 0, imm) |
||||
return 0 |
||||
end |
||||
end |
||||
|
||||
local function parse_imm16(imm) |
||||
imm = match(imm, "^#(.*)$") |
||||
if not imm then werror("expected immediate operand") end |
||||
local n = tonumber(imm) |
||||
if n then |
||||
if n >= 0 and n <= 65535 and n % 1 == 0 then |
||||
local t = n % 4096 |
||||
return (n - t) * 16 + t |
||||
end |
||||
werror("out of range immediate `"..imm.."'") |
||||
else |
||||
waction("IMM16", 32*16, imm) |
||||
return 0 |
||||
end |
||||
end |
||||
|
||||
local function parse_imm_load(imm, ext) |
||||
local n = tonumber(imm) |
||||
if n then |
||||
if ext then |
||||
if n >= -255 and n <= 255 then |
||||
local up = 0x00800000 |
||||
if n < 0 then n = -n; up = 0 end |
||||
return (n-(n%16))*16+(n%16) + up |
||||
end |
||||
else |
||||
if n >= -4095 and n <= 4095 then |
||||
if n >= 0 then return n+0x00800000 end |
||||
return -n |
||||
end |
||||
end |
||||
werror("out of range immediate `"..imm.."'") |
||||
else |
||||
waction(ext and "IMML8" or "IMML12", 32768 + 32*(ext and 8 or 12), imm) |
||||
return 0 |
||||
end |
||||
end |
||||
|
||||
local function parse_shift(shift, gprok) |
||||
if shift == "rrx" then |
||||
return 3 * 32 |
||||
else |
||||
local s, s2 = match(shift, "^(%S+)%s*(.*)$") |
||||
s = map_shift[s] |
||||
if not s then werror("expected shift operand") end |
||||
if sub(s2, 1, 1) == "#" then |
||||
return parse_imm(s2, 5, 7, 0, false) + s * 32 |
||||
else |
||||
if not gprok then werror("expected immediate shift operand") end |
||||
return parse_gpr(s2) * 256 + s * 32 + 16 |
||||
end |
||||
end |
||||
end |
||||
|
||||
local function parse_load(params, nparams, n, op) |
||||
local oplo = op % 256 |
||||
local ext, ldrd = (oplo ~= 0), (oplo == 208) |
||||
local d |
||||
if (ldrd or oplo == 240) then |
||||
d = ((op - (op % 4096)) / 4096) % 16 |
||||
if d % 2 ~= 0 then werror("odd destination register") end |
||||
end |
||||
local p1, wb = match(params[n], "^%[%s*(.-)%s*%](!?)$") |
||||
local p2 = params[n+1] |
||||
if not p1 then |
||||
if not p2 then |
||||
local reg, tailr = match(params[n], "^([%w_:]+)%s*(.*)$") |
||||
if reg and tailr ~= "" then |
||||
local d, tp = parse_gpr(reg) |
||||
if tp then |
||||
waction(ext and "IMML8" or "IMML12", 32768 + 32*(ext and 8 or 12), |
||||
format(tp.ctypefmt, tailr)) |
||||
return op + d * 65536 + 0x01000000 + (ext and 0x00400000 or 0) |
||||
end |
||||
end |
||||
end |
||||
werror("expected address operand") |
||||
end |
||||
if wb == "!" then op = op + 0x00200000 end |
||||
if p2 then |
||||
if wb == "!" then werror("bad use of '!'") end |
||||
local p3 = params[n+2] |
||||
op = op + parse_gpr(p1) * 65536 |
||||
local imm = match(p2, "^#(.*)$") |
||||
if imm then |
||||
local m = parse_imm_load(imm, ext) |
||||
if p3 then werror("too many parameters") end |
||||
op = op + m + (ext and 0x00400000 or 0) |
||||
else |
||||
local m, neg = parse_gpr_pm(p2) |
||||
if ldrd and (m == d or m-1 == d) then werror("register conflict") end |
||||
op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000) |
||||
if p3 then op = op + parse_shift(p3) end |
||||
end |
||||
else |
||||
local p1a, p2 = match(p1, "^([^,%s]*)%s*(.*)$") |
||||
local n = parse_gpr(p1a) |
||||
op = op + parse_gpr(p1a) * 65536 + 0x01000000 |
||||
if p2 ~= "" then |
||||
local imm = match(p2, "^,%s*#(.*)$") |
||||
if imm then |
||||
local m = parse_imm_load(imm, ext) |
||||
op = op + m + (ext and 0x00400000 or 0) |
||||
else |
||||
local p2a, p3 = match(p2, "^,%s*([^,%s]*)%s*,?%s*(.*)$") |
||||
local m, neg = parse_gpr_pm(p2a) |
||||
if ldrd and (m == d or m-1 == d) then werror("register conflict") end |
||||
op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000) |
||||
if p3 ~= "" then |
||||
if ext then werror("too many parameters") end |
||||
op = op + parse_shift(p3) |
||||
end |
||||
end |
||||
else |
||||
if wb == "!" then werror("bad use of '!'") end |
||||
op = op + (ext and 0x00c00000 or 0x00800000) |
||||
end |
||||
end |
||||
return op |
||||
end |
||||
|
||||
local function parse_label(label, def) |
||||
local prefix = sub(label, 1, 2) |
||||
-- =>label (pc label reference) |
||||
if prefix == "=>" then |
||||
return "PC", 0, sub(label, 3) |
||||
end |
||||
-- ->name (global label reference) |
||||
if prefix == "->" then |
||||
return "LG", map_global[sub(label, 3)] |
||||
end |
||||
if def then |
||||
-- [1-9] (local label definition) |
||||
if match(label, "^[1-9]$") then |
||||
return "LG", 10+tonumber(label) |
||||
end |
||||
else |
||||
-- [<>][1-9] (local label reference) |
||||
local dir, lnum = match(label, "^([<>])([1-9])$") |
||||
if dir then -- Fwd: 1-9, Bkwd: 11-19. |
||||
return "LG", lnum + (dir == ">" and 0 or 10) |
||||
end |
||||
-- extern label (extern label reference) |
||||
local extname = match(label, "^extern%s+(%S+)$") |
||||
if extname then |
||||
return "EXT", map_extern[extname] |
||||
end |
||||
end |
||||
werror("bad label `"..label.."'") |
||||
end |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Handle opcodes defined with template strings. |
||||
map_op[".template__"] = function(params, template, nparams) |
||||
if not params then return sub(template, 9) end |
||||
local op = tonumber(sub(template, 1, 8), 16) |
||||
local n = 1 |
||||
|
||||
-- Limit number of section buffer positions used by a single dasm_put(). |
||||
-- A single opcode needs a maximum of 3 positions (rlwinm). |
||||
if secpos+3 > maxsecpos then wflush() end |
||||
local pos = wpos() |
||||
|
||||
-- Process each character. |
||||
for p in gmatch(sub(template, 9), ".") do |
||||
if p == "D" then |
||||
op = op + parse_gpr(params[n]) * 4096; n = n + 1 |
||||
elseif p == "N" then |
||||
op = op + parse_gpr(params[n]) * 65536; n = n + 1 |
||||
elseif p == "S" then |
||||
op = op + parse_gpr(params[n]) * 256; n = n + 1 |
||||
elseif p == "M" then |
||||
op = op + parse_gpr(params[n]); n = n + 1 |
||||
elseif p == "P" then |
||||
local imm = match(params[n], "^#(.*)$") |
||||
if imm then |
||||
op = op + parse_imm12(imm) + 0x02000000 |
||||
else |
||||
op = op + parse_gpr(params[n]) |
||||
end |
||||
n = n + 1 |
||||
elseif p == "p" then |
||||
op = op + parse_shift(params[n], true); n = n + 1 |
||||
elseif p == "L" then |
||||
op = parse_load(params, nparams, n, op) |
||||
elseif p == "B" then |
||||
local mode, n, s = parse_label(params[n], false) |
||||
waction("REL_"..mode, n, s, 1) |
||||
elseif p == "C" then -- blx gpr vs. blx label. |
||||
local p = params[n] |
||||
if match(p, "^([%w_]+):(r1?[0-9])$") or match(p, "^r(1?[0-9])$") then |
||||
op = op + parse_gpr(p) |
||||
else |
||||
if op < 0xe0000000 then werror("unconditional instruction") end |
||||
local mode, n, s = parse_label(params[n], false) |
||||
waction("REL_"..mode, n, s, 1) |
||||
op = 0xfa000000 |
||||
end |
||||
elseif p == "n" then |
||||
local r, wb = match(params[n], "^([^!]*)(!?)$") |
||||
op = op + parse_gpr(r) * 65536 + (wb == "!" and 0x00200000 or 0) |
||||
n = n + 1 |
||||
elseif p == "R" then |
||||
op = op + parse_reglist(params[n]); n = n + 1 |
||||
elseif p == "W" then |
||||
op = op + parse_imm16(params[n]); n = n + 1 |
||||
elseif p == "v" then |
||||
op = op + parse_imm(params[n], 5, 7, 0, false); n = n + 1 |
||||
elseif p == "X" then |
||||
op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1 |
||||
elseif p == "K" then |
||||
local imm = tonumber(match(params[n], "^#(.*)$")); n = n + 1 |
||||
if not imm or imm % 1 ~= 0 or imm < 0 or imm > 0xffff then |
||||
werror("bad immediate operand") |
||||
end |
||||
local t = imm % 16 |
||||
op = op + (imm - t) * 16 + t |
||||
elseif p == "T" then |
||||
op = op + parse_imm(params[n], 24, 0, 0, false); n = n + 1 |
||||
elseif p == "s" then |
||||
-- Ignored. |
||||
else |
||||
assert(false) |
||||
end |
||||
end |
||||
wputpos(pos, op) |
||||
end |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Pseudo-opcode to mark the position where the action list is to be emitted. |
||||
map_op[".actionlist_1"] = function(params) |
||||
if not params then return "cvar" end |
||||
local name = params[1] -- No syntax check. You get to keep the pieces. |
||||
wline(function(out) writeactions(out, name) end) |
||||
end |
||||
|
||||
-- Pseudo-opcode to mark the position where the global enum is to be emitted. |
||||
map_op[".globals_1"] = function(params) |
||||
if not params then return "prefix" end |
||||
local prefix = params[1] -- No syntax check. You get to keep the pieces. |
||||
wline(function(out) writeglobals(out, prefix) end) |
||||
end |
||||
|
||||
-- Pseudo-opcode to mark the position where the global names are to be emitted. |
||||
map_op[".globalnames_1"] = function(params) |
||||
if not params then return "cvar" end |
||||
local name = params[1] -- No syntax check. You get to keep the pieces. |
||||
wline(function(out) writeglobalnames(out, name) end) |
||||
end |
||||
|
||||
-- Pseudo-opcode to mark the position where the extern names are to be emitted. |
||||
map_op[".externnames_1"] = function(params) |
||||
if not params then return "cvar" end |
||||
local name = params[1] -- No syntax check. You get to keep the pieces. |
||||
wline(function(out) writeexternnames(out, name) end) |
||||
end |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Label pseudo-opcode (converted from trailing colon form). |
||||
map_op[".label_1"] = function(params) |
||||
if not params then return "[1-9] | ->global | =>pcexpr" end |
||||
if secpos+1 > maxsecpos then wflush() end |
||||
local mode, n, s = parse_label(params[1], true) |
||||
if mode == "EXT" then werror("bad label definition") end |
||||
waction("LABEL_"..mode, n, s, 1) |
||||
end |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Pseudo-opcodes for data storage. |
||||
map_op[".long_*"] = function(params) |
||||
if not params then return "imm..." end |
||||
for _,p in ipairs(params) do |
||||
local n = tonumber(p) |
||||
if not n then werror("bad immediate `"..p.."'") end |
||||
if n < 0 then n = n + 2^32 end |
||||
wputw(n) |
||||
if secpos+2 > maxsecpos then wflush() end |
||||
end |
||||
end |
||||
|
||||
-- Alignment pseudo-opcode. |
||||
map_op[".align_1"] = function(params) |
||||
if not params then return "numpow2" end |
||||
if secpos+1 > maxsecpos then wflush() end |
||||
local align = tonumber(params[1]) |
||||
if align then |
||||
local x = align |
||||
-- Must be a power of 2 in the range (2 ... 256). |
||||
for i=1,8 do |
||||
x = x / 2 |
||||
if x == 1 then |
||||
waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. |
||||
return |
||||
end |
||||
end |
||||
end |
||||
werror("bad alignment") |
||||
end |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Pseudo-opcode for (primitive) type definitions (map to C types). |
||||
map_op[".type_3"] = function(params, nparams) |
||||
if not params then |
||||
return nparams == 2 and "name, ctype" or "name, ctype, reg" |
||||
end |
||||
local name, ctype, reg = params[1], params[2], params[3] |
||||
if not match(name, "^[%a_][%w_]*$") then |
||||
werror("bad type name `"..name.."'") |
||||
end |
||||
local tp = map_type[name] |
||||
if tp then |
||||
werror("duplicate type `"..name.."'") |
||||
end |
||||
-- Add #type to defines. A bit unclean to put it in map_archdef. |
||||
map_archdef["#"..name] = "sizeof("..ctype..")" |
||||
-- Add new type and emit shortcut define. |
||||
local num = ctypenum + 1 |
||||
map_type[name] = { |
||||
ctype = ctype, |
||||
ctypefmt = format("Dt%X(%%s)", num), |
||||
reg = reg, |
||||
} |
||||
wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) |
||||
ctypenum = num |
||||
end |
||||
map_op[".type_2"] = map_op[".type_3"] |
||||
|
||||
-- Dump type definitions. |
||||
local function dumptypes(out, lvl) |
||||
local t = {} |
||||
for name in pairs(map_type) do t[#t+1] = name end |
||||
sort(t) |
||||
out:write("Type definitions:\n") |
||||
for _,name in ipairs(t) do |
||||
local tp = map_type[name] |
||||
local reg = tp.reg or "" |
||||
out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) |
||||
end |
||||
out:write("\n") |
||||
end |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Set the current section. |
||||
function _M.section(num) |
||||
waction("SECTION", num) |
||||
wflush(true) -- SECTION is a terminal action. |
||||
end |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Dump architecture description. |
||||
function _M.dumparch(out) |
||||
out:write(format("DynASM %s version %s, released %s\n\n", |
||||
_info.arch, _info.version, _info.release)) |
||||
dumpactions(out) |
||||
end |
||||
|
||||
-- Dump all user defined elements. |
||||
function _M.dumpdef(out, lvl) |
||||
dumptypes(out, lvl) |
||||
dumpglobals(out, lvl) |
||||
dumpexterns(out, lvl) |
||||
end |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
||||
-- Pass callbacks from/to the DynASM core. |
||||
function _M.passcb(wl, we, wf, ww) |
||||
wline, werror, wfatal, wwarn = wl, we, wf, ww |
||||
return wflush |
||||
end |
||||
|
||||
-- Setup the arch-specific module. |
||||
function _M.setup(arch, opt) |
||||
g_arch, g_opt = arch, opt |
||||
end |
||||
|
||||
-- Merge the core maps and the arch-specific maps. |
||||
function _M.mergemaps(map_coreop, map_def) |
||||
setmetatable(map_op, { __index = function(t, k) |
||||
local v = map_coreop[k] |
||||
if v then return v end |
||||
local cc = sub(k, -4, -3) |
||||
local cv = map_cond[cc] |
||||
if cv then |
||||
local v = rawget(t, sub(k, 1, -5)..sub(k, -2)) |
||||
if v then return format("%x%s", cv, sub(v, 2)) end |
||||
end |
||||
end }) |
||||
setmetatable(map_def, { __index = map_archdef }) |
||||
return map_op, map_def |
||||
end |
||||
|
||||
return _M |
||||
|
||||
------------------------------------------------------------------------------ |
||||
|
@ -0,0 +1,408 @@ |
||||
/*
|
||||
** DynASM PPC encoding engine. |
||||
** Copyright (C) 2005-2011 Mike Pall. All rights reserved. |
||||
** Released under the MIT/X license. See dynasm.lua for full copyright notice. |
||||
*/ |
||||
|
||||
#include <stddef.h> |
||||
#include <stdarg.h> |
||||
#include <string.h> |
||||
#include <stdlib.h> |
||||
|
||||
#define DASM_ARCH "ppc" |
||||
|
||||
#ifndef DASM_EXTERN |
||||
#define DASM_EXTERN(a,b,c,d) 0 |
||||
#endif |
||||
|
||||
/* Action definitions. */ |
||||
enum { |
||||
DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, |
||||
/* The following actions need a buffer position. */ |
||||
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, |
||||
/* The following actions also have an argument. */ |
||||
DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, |
||||
DASM__MAX |
||||
}; |
||||
|
||||
/* Maximum number of section buffer positions for a single dasm_put() call. */ |
||||
#define DASM_MAXSECPOS 25 |
||||
|
||||
/* DynASM encoder status codes. Action list offset or number are or'ed in. */ |
||||
#define DASM_S_OK 0x00000000 |
||||
#define DASM_S_NOMEM 0x01000000 |
||||
#define DASM_S_PHASE 0x02000000 |
||||
#define DASM_S_MATCH_SEC 0x03000000 |
||||
#define DASM_S_RANGE_I 0x11000000 |
||||
#define DASM_S_RANGE_SEC 0x12000000 |
||||
#define DASM_S_RANGE_LG 0x13000000 |
||||
#define DASM_S_RANGE_PC 0x14000000 |
||||
#define DASM_S_RANGE_REL 0x15000000 |
||||
#define DASM_S_UNDEF_LG 0x21000000 |
||||
#define DASM_S_UNDEF_PC 0x22000000 |
||||
|
||||
/* Macros to convert positions (8 bit section + 24 bit index). */ |
||||
#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) |
||||
#define DASM_POS2BIAS(pos) ((pos)&0xff000000) |
||||
#define DASM_SEC2POS(sec) ((sec)<<24) |
||||
#define DASM_POS2SEC(pos) ((pos)>>24) |
||||
#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) |
||||
|
||||
/* Action list type. */ |
||||
typedef const unsigned int *dasm_ActList; |
||||
|
||||
/* Per-section structure. */ |
||||
typedef struct dasm_Section { |
||||
int *rbuf; /* Biased buffer pointer (negative section bias). */ |
||||
int *buf; /* True buffer pointer. */ |
||||
size_t bsize; /* Buffer size in bytes. */ |
||||
int pos; /* Biased buffer position. */ |
||||
int epos; /* End of biased buffer position - max single put. */ |
||||
int ofs; /* Byte offset into section. */ |
||||
} dasm_Section; |
||||
|
||||
/* Core structure holding the DynASM encoding state. */ |
||||
struct dasm_State { |
||||
size_t psize; /* Allocated size of this structure. */ |
||||
dasm_ActList actionlist; /* Current actionlist pointer. */ |
||||
int *lglabels; /* Local/global chain/pos ptrs. */ |
||||
size_t lgsize; |
||||
int *pclabels; /* PC label chains/pos ptrs. */ |
||||
size_t pcsize; |
||||
void **globals; /* Array of globals (bias -10). */ |
||||
dasm_Section *section; /* Pointer to active section. */ |
||||
size_t codesize; /* Total size of all code sections. */ |
||||
int maxsection; /* 0 <= sectionidx < maxsection. */ |
||||
int status; /* Status code. */ |
||||
dasm_Section sections[1]; /* All sections. Alloc-extended. */ |
||||
}; |
||||
|
||||
/* The size of the core structure depends on the max. number of sections. */ |
||||
#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) |
||||
|
||||
|
||||
/* Initialize DynASM state. */ |
||||
void dasm_init(Dst_DECL, int maxsection) |
||||
{ |
||||
dasm_State *D; |
||||
size_t psz = 0; |
||||
int i; |
||||
Dst_REF = NULL; |
||||
DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); |
||||
D = Dst_REF; |
||||
D->psize = psz; |
||||
D->lglabels = NULL; |
||||
D->lgsize = 0; |
||||
D->pclabels = NULL; |
||||
D->pcsize = 0; |
||||
D->globals = NULL; |
||||
D->maxsection = maxsection; |
||||
for (i = 0; i < maxsection; i++) { |
||||
D->sections[i].buf = NULL; /* Need this for pass3. */ |
||||
D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); |
||||
D->sections[i].bsize = 0; |
||||
D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ |
||||
} |
||||
} |
||||
|
||||
/* Free DynASM state. */ |
||||
void dasm_free(Dst_DECL) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
int i; |
||||
for (i = 0; i < D->maxsection; i++) |
||||
if (D->sections[i].buf) |
||||
DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); |
||||
if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); |
||||
if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); |
||||
DASM_M_FREE(Dst, D, D->psize); |
||||
} |
||||
|
||||
/* Setup global label array. Must be called before dasm_setup(). */ |
||||
void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
D->globals = gl - 10; /* Negative bias to compensate for locals. */ |
||||
DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); |
||||
} |
||||
|
||||
/* Grow PC label array. Can be called after dasm_setup(), too. */ |
||||
void dasm_growpc(Dst_DECL, unsigned int maxpc) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
size_t osz = D->pcsize; |
||||
DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); |
||||
memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); |
||||
} |
||||
|
||||
/* Setup encoder. */ |
||||
void dasm_setup(Dst_DECL, const void *actionlist) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
int i; |
||||
D->actionlist = (dasm_ActList)actionlist; |
||||
D->status = DASM_S_OK; |
||||
D->section = &D->sections[0]; |
||||
memset((void *)D->lglabels, 0, D->lgsize); |
||||
if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); |
||||
for (i = 0; i < D->maxsection; i++) { |
||||
D->sections[i].pos = DASM_SEC2POS(i); |
||||
D->sections[i].ofs = 0; |
||||
} |
||||
} |
||||
|
||||
|
||||
#ifdef DASM_CHECKS |
||||
#define CK(x, st) \ |
||||
do { if (!(x)) { \
|
||||
D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) |
||||
#define CKPL(kind, st) \ |
||||
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
|
||||
D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) |
||||
#else |
||||
#define CK(x, st) ((void)0) |
||||
#define CKPL(kind, st) ((void)0) |
||||
#endif |
||||
|
||||
/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ |
||||
void dasm_put(Dst_DECL, int start, ...) |
||||
{ |
||||
va_list ap; |
||||
dasm_State *D = Dst_REF; |
||||
dasm_ActList p = D->actionlist + start; |
||||
dasm_Section *sec = D->section; |
||||
int pos = sec->pos, ofs = sec->ofs; |
||||
int *b; |
||||
|
||||
if (pos >= sec->epos) { |
||||
DASM_M_GROW(Dst, int, sec->buf, sec->bsize, |
||||
sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); |
||||
sec->rbuf = sec->buf - DASM_POS2BIAS(pos); |
||||
sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); |
||||
} |
||||
|
||||
b = sec->rbuf; |
||||
b[pos++] = start; |
||||
|
||||
va_start(ap, start); |
||||
while (1) { |
||||
unsigned int ins = *p++; |
||||
unsigned int action = (ins >> 16); |
||||
if (action >= DASM__MAX) { |
||||
ofs += 4; |
||||
} else { |
||||
int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; |
||||
switch (action) { |
||||
case DASM_STOP: goto stop; |
||||
case DASM_SECTION: |
||||
n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); |
||||
D->section = &D->sections[n]; goto stop; |
||||
case DASM_ESC: p++; ofs += 4; break; |
||||
case DASM_REL_EXT: break; |
||||
case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; |
||||
case DASM_REL_LG: |
||||
n = (ins & 2047) - 10; pl = D->lglabels + n; |
||||
if (n >= 0) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */ |
||||
pl += 10; n = *pl; |
||||
if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ |
||||
goto linkrel; |
||||
case DASM_REL_PC: |
||||
pl = D->pclabels + n; CKPL(pc, PC); |
||||
putrel: |
||||
n = *pl; |
||||
if (n < 0) { /* Label exists. Get label pos and store it. */ |
||||
b[pos] = -n; |
||||
} else { |
||||
linkrel: |
||||
b[pos] = n; /* Else link to rel chain, anchored at label. */ |
||||
*pl = pos; |
||||
} |
||||
pos++; |
||||
break; |
||||
case DASM_LABEL_LG: |
||||
pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; |
||||
case DASM_LABEL_PC: |
||||
pl = D->pclabels + n; CKPL(pc, PC); |
||||
putlabel: |
||||
n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ |
||||
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; |
||||
} |
||||
*pl = -pos; /* Label exists now. */ |
||||
b[pos++] = ofs; /* Store pass1 offset estimate. */ |
||||
break; |
||||
case DASM_IMM: |
||||
#ifdef DASM_CHECKS |
||||
CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); |
||||
if (ins & 0x8000) |
||||
CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); |
||||
else |
||||
CK((n>>((ins>>5)&31)) == 0, RANGE_I); |
||||
#endif |
||||
b[pos++] = n; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
stop: |
||||
va_end(ap); |
||||
sec->pos = pos; |
||||
sec->ofs = ofs; |
||||
} |
||||
#undef CK |
||||
|
||||
/* Pass 2: Link sections, shrink aligns, fix label offsets. */ |
||||
int dasm_link(Dst_DECL, size_t *szp) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
int secnum; |
||||
int ofs = 0; |
||||
|
||||
#ifdef DASM_CHECKS |
||||
*szp = 0; |
||||
if (D->status != DASM_S_OK) return D->status; |
||||
{ |
||||
int pc; |
||||
for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) |
||||
if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; |
||||
} |
||||
#endif |
||||
|
||||
{ /* Handle globals not defined in this translation unit. */ |
||||
int idx; |
||||
for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { |
||||
int n = D->lglabels[idx]; |
||||
/* Undefined label: Collapse rel chain and replace with marker (< 0). */ |
||||
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } |
||||
} |
||||
} |
||||
|
||||
/* Combine all code sections. No support for data sections (yet). */ |
||||
for (secnum = 0; secnum < D->maxsection; secnum++) { |
||||
dasm_Section *sec = D->sections + secnum; |
||||
int *b = sec->rbuf; |
||||
int pos = DASM_SEC2POS(secnum); |
||||
int lastpos = sec->pos; |
||||
|
||||
while (pos != lastpos) { |
||||
dasm_ActList p = D->actionlist + b[pos++]; |
||||
while (1) { |
||||
unsigned int ins = *p++; |
||||
unsigned int action = (ins >> 16); |
||||
switch (action) { |
||||
case DASM_STOP: case DASM_SECTION: goto stop; |
||||
case DASM_ESC: p++; break; |
||||
case DASM_REL_EXT: break; |
||||
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; |
||||
case DASM_REL_LG: case DASM_REL_PC: pos++; break; |
||||
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; |
||||
case DASM_IMM: pos++; break; |
||||
} |
||||
} |
||||
stop: (void)0; |
||||
} |
||||
ofs += sec->ofs; /* Next section starts right after current section. */ |
||||
} |
||||
|
||||
D->codesize = ofs; /* Total size of all code sections */ |
||||
*szp = ofs; |
||||
return DASM_S_OK; |
||||
} |
||||
|
||||
#ifdef DASM_CHECKS |
||||
#define CK(x, st) \ |
||||
do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) |
||||
#else |
||||
#define CK(x, st) ((void)0) |
||||
#endif |
||||
|
||||
/* Pass 3: Encode sections. */ |
||||
int dasm_encode(Dst_DECL, void *buffer) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
char *base = (char *)buffer; |
||||
unsigned int *cp = (unsigned int *)buffer; |
||||
int secnum; |
||||
|
||||
/* Encode all code sections. No support for data sections (yet). */ |
||||
for (secnum = 0; secnum < D->maxsection; secnum++) { |
||||
dasm_Section *sec = D->sections + secnum; |
||||
int *b = sec->buf; |
||||
int *endb = sec->rbuf + sec->pos; |
||||
|
||||
while (b != endb) { |
||||
dasm_ActList p = D->actionlist + *b++; |
||||
while (1) { |
||||
unsigned int ins = *p++; |
||||
unsigned int action = (ins >> 16); |
||||
int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; |
||||
switch (action) { |
||||
case DASM_STOP: case DASM_SECTION: goto stop; |
||||
case DASM_ESC: *cp++ = *p++; break; |
||||
case DASM_REL_EXT: |
||||
n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1); |
||||
goto patchrel; |
||||
case DASM_ALIGN: |
||||
ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; |
||||
break; |
||||
case DASM_REL_LG: |
||||
CK(n >= 0, UNDEF_LG); |
||||
case DASM_REL_PC: |
||||
CK(n >= 0, UNDEF_PC); |
||||
n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); |
||||
patchrel: |
||||
CK((n & 3) == 0 && |
||||
(((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >> |
||||
((ins & 2048) ? 16 : 26)) == 0, RANGE_REL); |
||||
cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc)); |
||||
break; |
||||
case DASM_LABEL_LG: |
||||
ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); |
||||
break; |
||||
case DASM_LABEL_PC: break; |
||||
case DASM_IMM: |
||||
cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31); |
||||
break; |
||||
default: *cp++ = ins; break; |
||||
} |
||||
} |
||||
stop: (void)0; |
||||
} |
||||
} |
||||
|
||||
if (base + D->codesize != (char *)cp) /* Check for phase errors. */ |
||||
return DASM_S_PHASE; |
||||
return DASM_S_OK; |
||||
} |
||||
#undef CK |
||||
|
||||
/* Get PC label offset. */ |
||||
int dasm_getpclabel(Dst_DECL, unsigned int pc) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
if (pc*sizeof(int) < D->pcsize) { |
||||
int pos = D->pclabels[pc]; |
||||
if (pos < 0) return *DASM_POS2PTR(D, -pos); |
||||
if (pos > 0) return -1; /* Undefined. */ |
||||
} |
||||
return -2; /* Unused or out of range. */ |
||||
} |
||||
|
||||
#ifdef DASM_CHECKS |
||||
/* Optional sanity checker to call between isolated encoding steps. */ |
||||
int dasm_checkstep(Dst_DECL, int secmatch) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
if (D->status == DASM_S_OK) { |
||||
int i; |
||||
for (i = 1; i <= 9; i++) { |
||||
if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } |
||||
D->lglabels[i] = 0; |
||||
} |
||||
} |
||||
if (D->status == DASM_S_OK && secmatch >= 0 && |
||||
D->section != &D->sections[secmatch]) |
||||
D->status = DASM_S_MATCH_SEC|(D->section-D->sections); |
||||
return D->status; |
||||
} |
||||
#endif |
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,83 @@ |
||||
/*
|
||||
** DynASM encoding engine prototypes. |
||||
** Copyright (C) 2005-2011 Mike Pall. All rights reserved. |
||||
** Released under the MIT/X license. See dynasm.lua for full copyright notice. |
||||
*/ |
||||
|
||||
#ifndef _DASM_PROTO_H |
||||
#define _DASM_PROTO_H |
||||
|
||||
#include <stddef.h> |
||||
#include <stdarg.h> |
||||
|
||||
#define DASM_IDENT "DynASM 1.2.2" |
||||
#define DASM_VERSION 10202 /* 1.2.2 */ |
||||
|
||||
#ifndef Dst_DECL |
||||
#define Dst_DECL dasm_State **Dst |
||||
#endif |
||||
|
||||
#ifndef Dst_REF |
||||
#define Dst_REF (*Dst) |
||||
#endif |
||||
|
||||
#ifndef DASM_FDEF |
||||
#define DASM_FDEF extern |
||||
#endif |
||||
|
||||
#ifndef DASM_M_GROW |
||||
#define DASM_M_GROW(ctx, t, p, sz, need) \ |
||||
do { \
|
||||
size_t _sz = (sz), _need = (need); \
|
||||
if (_sz < _need) { \
|
||||
if (_sz < 16) _sz = 16; \
|
||||
while (_sz < _need) _sz += _sz; \
|
||||
(p) = (t *)realloc((p), _sz); \
|
||||
if ((p) == NULL) exit(1); \
|
||||
(sz) = _sz; \
|
||||
} \
|
||||
} while(0) |
||||
#endif |
||||
|
||||
#ifndef DASM_M_FREE |
||||
#define DASM_M_FREE(ctx, p, sz) free(p) |
||||
#endif |
||||
|
||||
/* Internal DynASM encoder state. */ |
||||
typedef struct dasm_State dasm_State; |
||||
|
||||
|
||||
/* Initialize and free DynASM state. */ |
||||
DASM_FDEF void dasm_init(Dst_DECL, int maxsection); |
||||
DASM_FDEF void dasm_free(Dst_DECL); |
||||
|
||||
/* Setup global array. Must be called before dasm_setup(). */ |
||||
DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl); |
||||
|
||||
/* Grow PC label array. Can be called after dasm_setup(), too. */ |
||||
DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc); |
||||
|
||||
/* Setup encoder. */ |
||||
DASM_FDEF void dasm_setup(Dst_DECL, const void *actionlist); |
||||
|
||||
/* Feed encoder with actions. Calls are generated by pre-processor. */ |
||||
DASM_FDEF void dasm_put(Dst_DECL, int start, ...); |
||||
|
||||
/* Link sections and return the resulting size. */ |
||||
DASM_FDEF int dasm_link(Dst_DECL, size_t *szp); |
||||
|
||||
/* Encode sections into buffer. */ |
||||
DASM_FDEF int dasm_encode(Dst_DECL, void *buffer); |
||||
|
||||
/* Get PC label offset. */ |
||||
DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc); |
||||
|
||||
#ifdef DASM_CHECKS |
||||
/* Optional sanity checker to call between isolated encoding steps. */ |
||||
DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch); |
||||
#else |
||||
#define dasm_checkstep(a, b) 0 |
||||
#endif |
||||
|
||||
|
||||
#endif /* _DASM_PROTO_H */ |
@ -0,0 +1,12 @@ |
||||
------------------------------------------------------------------------------ |
||||
-- DynASM x64 module. |
||||
-- |
||||
-- Copyright (C) 2005-2011 Mike Pall. All rights reserved. |
||||
-- See dynasm.lua for full copyright notice. |
||||
------------------------------------------------------------------------------ |
||||
-- This module just sets 64 bit mode for the combined x86/x64 module. |
||||
-- All the interesting stuff is there. |
||||
------------------------------------------------------------------------------ |
||||
|
||||
x64 = true -- Using a global is an ugly, but effective solution. |
||||
return require("dasm_x86") |
@ -0,0 +1,470 @@ |
||||
/*
|
||||
** DynASM x86 encoding engine. |
||||
** Copyright (C) 2005-2011 Mike Pall. All rights reserved. |
||||
** Released under the MIT/X license. See dynasm.lua for full copyright notice. |
||||
*/ |
||||
|
||||
#include <stddef.h> |
||||
#include <stdarg.h> |
||||
#include <string.h> |
||||
#include <stdlib.h> |
||||
|
||||
#define DASM_ARCH "x86" |
||||
|
||||
#ifndef DASM_EXTERN |
||||
#define DASM_EXTERN(a,b,c,d) 0 |
||||
#endif |
||||
|
||||
/* Action definitions. DASM_STOP must be 255. */ |
||||
enum { |
||||
DASM_DISP = 233, |
||||
DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB, |
||||
DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC, |
||||
DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN, |
||||
DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP |
||||
}; |
||||
|
||||
/* Maximum number of section buffer positions for a single dasm_put() call. */ |
||||
#define DASM_MAXSECPOS 25 |
||||
|
||||
/* DynASM encoder status codes. Action list offset or number are or'ed in. */ |
||||
#define DASM_S_OK 0x00000000 |
||||
#define DASM_S_NOMEM 0x01000000 |
||||
#define DASM_S_PHASE 0x02000000 |
||||
#define DASM_S_MATCH_SEC 0x03000000 |
||||
#define DASM_S_RANGE_I 0x11000000 |
||||
#define DASM_S_RANGE_SEC 0x12000000 |
||||
#define DASM_S_RANGE_LG 0x13000000 |
||||
#define DASM_S_RANGE_PC 0x14000000 |
||||
#define DASM_S_RANGE_VREG 0x15000000 |
||||
#define DASM_S_UNDEF_L 0x21000000 |
||||
#define DASM_S_UNDEF_PC 0x22000000 |
||||
|
||||
/* Macros to convert positions (8 bit section + 24 bit index). */ |
||||
#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) |
||||
#define DASM_POS2BIAS(pos) ((pos)&0xff000000) |
||||
#define DASM_SEC2POS(sec) ((sec)<<24) |
||||
#define DASM_POS2SEC(pos) ((pos)>>24) |
||||
#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) |
||||
|
||||
/* Action list type. */ |
||||
typedef const unsigned char *dasm_ActList; |
||||
|
||||
/* Per-section structure. */ |
||||
typedef struct dasm_Section { |
||||
int *rbuf; /* Biased buffer pointer (negative section bias). */ |
||||
int *buf; /* True buffer pointer. */ |
||||
size_t bsize; /* Buffer size in bytes. */ |
||||
int pos; /* Biased buffer position. */ |
||||
int epos; /* End of biased buffer position - max single put. */ |
||||
int ofs; /* Byte offset into section. */ |
||||
} dasm_Section; |
||||
|
||||
/* Core structure holding the DynASM encoding state. */ |
||||
struct dasm_State { |
||||
size_t psize; /* Allocated size of this structure. */ |
||||
dasm_ActList actionlist; /* Current actionlist pointer. */ |
||||
int *lglabels; /* Local/global chain/pos ptrs. */ |
||||
size_t lgsize; |
||||
int *pclabels; /* PC label chains/pos ptrs. */ |
||||
size_t pcsize; |
||||
void **globals; /* Array of globals (bias -10). */ |
||||
dasm_Section *section; /* Pointer to active section. */ |
||||
size_t codesize; /* Total size of all code sections. */ |
||||
int maxsection; /* 0 <= sectionidx < maxsection. */ |
||||
int status; /* Status code. */ |
||||
dasm_Section sections[1]; /* All sections. Alloc-extended. */ |
||||
}; |
||||
|
||||
/* The size of the core structure depends on the max. number of sections. */ |
||||
#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) |
||||
|
||||
|
||||
/* Initialize DynASM state. */ |
||||
void dasm_init(Dst_DECL, int maxsection) |
||||
{ |
||||
dasm_State *D; |
||||
size_t psz = 0; |
||||
int i; |
||||
Dst_REF = NULL; |
||||
DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); |
||||
D = Dst_REF; |
||||
D->psize = psz; |
||||
D->lglabels = NULL; |
||||
D->lgsize = 0; |
||||
D->pclabels = NULL; |
||||
D->pcsize = 0; |
||||
D->globals = NULL; |
||||
D->maxsection = maxsection; |
||||
for (i = 0; i < maxsection; i++) { |
||||
D->sections[i].buf = NULL; /* Need this for pass3. */ |
||||
D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); |
||||
D->sections[i].bsize = 0; |
||||
D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ |
||||
} |
||||
} |
||||
|
||||
/* Free DynASM state. */ |
||||
void dasm_free(Dst_DECL) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
int i; |
||||
for (i = 0; i < D->maxsection; i++) |
||||
if (D->sections[i].buf) |
||||
DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); |
||||
if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); |
||||
if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); |
||||
DASM_M_FREE(Dst, D, D->psize); |
||||
} |
||||
|
||||
/* Setup global label array. Must be called before dasm_setup(). */ |
||||
void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
D->globals = gl - 10; /* Negative bias to compensate for locals. */ |
||||
DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); |
||||
} |
||||
|
||||
/* Grow PC label array. Can be called after dasm_setup(), too. */ |
||||
void dasm_growpc(Dst_DECL, unsigned int maxpc) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
size_t osz = D->pcsize; |
||||
DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); |
||||
memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); |
||||
} |
||||
|
||||
/* Setup encoder. */ |
||||
void dasm_setup(Dst_DECL, const void *actionlist) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
int i; |
||||
D->actionlist = (dasm_ActList)actionlist; |
||||
D->status = DASM_S_OK; |
||||
D->section = &D->sections[0]; |
||||
memset((void *)D->lglabels, 0, D->lgsize); |
||||
if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); |
||||
for (i = 0; i < D->maxsection; i++) { |
||||
D->sections[i].pos = DASM_SEC2POS(i); |
||||
D->sections[i].ofs = 0; |
||||
} |
||||
} |
||||
|
||||
|
||||
#ifdef DASM_CHECKS |
||||
#define CK(x, st) \ |
||||
do { if (!(x)) { \
|
||||
D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) |
||||
#define CKPL(kind, st) \ |
||||
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
|
||||
D->status=DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) |
||||
#else |
||||
#define CK(x, st) ((void)0) |
||||
#define CKPL(kind, st) ((void)0) |
||||
#endif |
||||
|
||||
/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ |
||||
void dasm_put(Dst_DECL, int start, ...) |
||||
{ |
||||
va_list ap; |
||||
dasm_State *D = Dst_REF; |
||||
dasm_ActList p = D->actionlist + start; |
||||
dasm_Section *sec = D->section; |
||||
int pos = sec->pos, ofs = sec->ofs, mrm = 4; |
||||
int *b; |
||||
|
||||
if (pos >= sec->epos) { |
||||
DASM_M_GROW(Dst, int, sec->buf, sec->bsize, |
||||
sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); |
||||
sec->rbuf = sec->buf - DASM_POS2BIAS(pos); |
||||
sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); |
||||
} |
||||
|
||||
b = sec->rbuf; |
||||
b[pos++] = start; |
||||
|
||||
va_start(ap, start); |
||||
while (1) { |
||||
int action = *p++; |
||||
if (action < DASM_DISP) { |
||||
ofs++; |
||||
} else if (action <= DASM_REL_A) { |
||||
int n = va_arg(ap, int); |
||||
b[pos++] = n; |
||||
switch (action) { |
||||
case DASM_DISP: |
||||
if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; } |
||||
case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; |
||||
case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ |
||||
case DASM_IMM_D: ofs += 4; break; |
||||
case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob; |
||||
case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break; |
||||
case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; |
||||
case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; |
||||
case DASM_SPACE: p++; ofs += n; break; |
||||
case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ |
||||
case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG); |
||||
if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue; |
||||
} |
||||
mrm = 4; |
||||
} else { |
||||
int *pl, n; |
||||
switch (action) { |
||||
case DASM_REL_LG: |
||||
case DASM_IMM_LG: |
||||
n = *p++; pl = D->lglabels + n; |
||||
if (n <= 246) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */ |
||||
pl -= 246; n = *pl; |
||||
if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ |
||||
goto linkrel; |
||||
case DASM_REL_PC: |
||||
case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); |
||||
putrel: |
||||
n = *pl; |
||||
if (n < 0) { /* Label exists. Get label pos and store it. */ |
||||
b[pos] = -n; |
||||
} else { |
||||
linkrel: |
||||
b[pos] = n; /* Else link to rel chain, anchored at label. */ |
||||
*pl = pos; |
||||
} |
||||
pos++; |
||||
ofs += 4; /* Maximum offset needed. */ |
||||
if (action == DASM_REL_LG || action == DASM_REL_PC) |
||||
b[pos++] = ofs; /* Store pass1 offset estimate. */ |
||||
break; |
||||
case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; |
||||
case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); |
||||
putlabel: |
||||
n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ |
||||
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; } |
||||
*pl = -pos; /* Label exists now. */ |
||||
b[pos++] = ofs; /* Store pass1 offset estimate. */ |
||||
break; |
||||
case DASM_ALIGN: |
||||
ofs += *p++; /* Maximum alignment needed (arg is 2**n-1). */ |
||||
b[pos++] = ofs; /* Store pass1 offset estimate. */ |
||||
break; |
||||
case DASM_EXTERN: p += 2; ofs += 4; break; |
||||
case DASM_ESC: p++; ofs++; break; |
||||
case DASM_MARK: mrm = p[-2]; break; |
||||
case DASM_SECTION: |
||||
n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n]; |
||||
case DASM_STOP: goto stop; |
||||
} |
||||
} |
||||
} |
||||
stop: |
||||
va_end(ap); |
||||
sec->pos = pos; |
||||
sec->ofs = ofs; |
||||
} |
||||
#undef CK |
||||
|
||||
/* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */ |
||||
int dasm_link(Dst_DECL, size_t *szp) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
int secnum; |
||||
int ofs = 0; |
||||
|
||||
#ifdef DASM_CHECKS |
||||
*szp = 0; |
||||
if (D->status != DASM_S_OK) return D->status; |
||||
{ |
||||
int pc; |
||||
for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) |
||||
if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; |
||||
} |
||||
#endif |
||||
|
||||
{ /* Handle globals not defined in this translation unit. */ |
||||
int idx; |
||||
for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { |
||||
int n = D->lglabels[idx]; |
||||
/* Undefined label: Collapse rel chain and replace with marker (< 0). */ |
||||
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } |
||||
} |
||||
} |
||||
|
||||
/* Combine all code sections. No support for data sections (yet). */ |
||||
for (secnum = 0; secnum < D->maxsection; secnum++) { |
||||
dasm_Section *sec = D->sections + secnum; |
||||
int *b = sec->rbuf; |
||||
int pos = DASM_SEC2POS(secnum); |
||||
int lastpos = sec->pos; |
||||
|
||||
while (pos != lastpos) { |
||||
dasm_ActList p = D->actionlist + b[pos++]; |
||||
while (1) { |
||||
int op, action = *p++; |
||||
switch (action) { |
||||
case DASM_REL_LG: p++; op = p[-3]; goto rel_pc; |
||||
case DASM_REL_PC: op = p[-2]; rel_pc: { |
||||
int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0); |
||||
if (shrink) { /* Shrinkable branch opcode? */ |
||||
int lofs, lpos = b[pos]; |
||||
if (lpos < 0) goto noshrink; /* Ext global? */ |
||||
lofs = *DASM_POS2PTR(D, lpos); |
||||
if (lpos > pos) { /* Fwd label: add cumulative section offsets. */ |
||||
int i; |
||||
for (i = secnum; i < DASM_POS2SEC(lpos); i++) |
||||
lofs += D->sections[i].ofs; |
||||
} else { |
||||
lofs -= ofs; /* Bkwd label: unfix offset. */ |
||||
} |
||||
lofs -= b[pos+1]; /* Short branch ok? */ |
||||
if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink; /* Yes. */ |
||||
else { noshrink: shrink = 0; } /* No, cannot shrink op. */ |
||||
} |
||||
b[pos+1] = shrink; |
||||
pos += 2; |
||||
break; |
||||
} |
||||
case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++; |
||||
case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W: |
||||
case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB: |
||||
case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break; |
||||
case DASM_LABEL_LG: p++; |
||||
case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ |
||||
case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ |
||||
case DASM_EXTERN: p += 2; break; |
||||
case DASM_ESC: p++; break; |
||||
case DASM_MARK: break; |
||||
case DASM_SECTION: case DASM_STOP: goto stop; |
||||
} |
||||
} |
||||
stop: (void)0; |
||||
} |
||||
ofs += sec->ofs; /* Next section starts right after current section. */ |
||||
} |
||||
|
||||
D->codesize = ofs; /* Total size of all code sections */ |
||||
*szp = ofs; |
||||
return DASM_S_OK; |
||||
} |
||||
|
||||
#define dasmb(x) *cp++ = (unsigned char)(x) |
||||
#ifndef DASM_ALIGNED_WRITES |
||||
#define dasmw(x) \ |
||||
do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0) |
||||
#define dasmd(x) \ |
||||
do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0) |
||||
#else |
||||
#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) |
||||
#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) |
||||
#endif |
||||
|
||||
/* Pass 3: Encode sections. */ |
||||
int dasm_encode(Dst_DECL, void *buffer) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
unsigned char *base = (unsigned char *)buffer; |
||||
unsigned char *cp = base; |
||||
int secnum; |
||||
|
||||
/* Encode all code sections. No support for data sections (yet). */ |
||||
for (secnum = 0; secnum < D->maxsection; secnum++) { |
||||
dasm_Section *sec = D->sections + secnum; |
||||
int *b = sec->buf; |
||||
int *endb = sec->rbuf + sec->pos; |
||||
|
||||
while (b != endb) { |
||||
dasm_ActList p = D->actionlist + *b++; |
||||
unsigned char *mark = NULL; |
||||
while (1) { |
||||
int action = *p++; |
||||
int n = (action >= DASM_DISP && action <= DASM_ALIGN) ? *b++ : 0; |
||||
switch (action) { |
||||
case DASM_DISP: if (!mark) mark = cp; { |
||||
unsigned char *mm = mark; |
||||
if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL; |
||||
if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7; |
||||
if (mrm != 5) { mm[-1] -= 0x80; break; } } |
||||
if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40; |
||||
} |
||||
case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break; |
||||
case DASM_IMM_DB: if (((n+128)&-256) == 0) { |
||||
db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb; |
||||
} else mark = NULL; |
||||
case DASM_IMM_D: wd: dasmd(n); break; |
||||
case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; |
||||
case DASM_IMM_W: dasmw(n); break; |
||||
case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; } |
||||
case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; |
||||
b++; n = (int)(ptrdiff_t)D->globals[-n]; |
||||
case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ |
||||
case DASM_REL_PC: rel_pc: { |
||||
int shrink = *b++; |
||||
int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } |
||||
n = *pb - ((int)(cp-base) + 4-shrink); |
||||
if (shrink == 0) goto wd; |
||||
if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb; |
||||
goto wb; |
||||
} |
||||
case DASM_IMM_LG: |
||||
p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } |
||||
case DASM_IMM_PC: { |
||||
int *pb = DASM_POS2PTR(D, n); |
||||
n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); |
||||
goto wd; |
||||
} |
||||
case DASM_LABEL_LG: { |
||||
int idx = *p++; |
||||
if (idx >= 10) |
||||
D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n)); |
||||
break; |
||||
} |
||||
case DASM_LABEL_PC: case DASM_SETLABEL: break; |
||||
case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; } |
||||
case DASM_ALIGN: |
||||
n = *p++; |
||||
while (((cp-base) & n)) *cp++ = 0x90; /* nop */ |
||||
break; |
||||
case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd; |
||||
case DASM_MARK: mark = cp; break; |
||||
case DASM_ESC: action = *p++; |
||||
default: *cp++ = action; break; |
||||
case DASM_SECTION: case DASM_STOP: goto stop; |
||||
} |
||||
} |
||||
stop: (void)0; |
||||
} |
||||
} |
||||
|
||||
if (base + D->codesize != cp) /* Check for phase errors. */ |
||||
return DASM_S_PHASE; |
||||
return DASM_S_OK; |
||||
} |
||||
|
||||
/* Get PC label offset. */ |
||||
int dasm_getpclabel(Dst_DECL, unsigned int pc) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
if (pc*sizeof(int) < D->pcsize) { |
||||
int pos = D->pclabels[pc]; |
||||
if (pos < 0) return *DASM_POS2PTR(D, -pos); |
||||
if (pos > 0) return -1; /* Undefined. */ |
||||
} |
||||
return -2; /* Unused or out of range. */ |
||||
} |
||||
|
||||
#ifdef DASM_CHECKS |
||||
/* Optional sanity checker to call between isolated encoding steps. */ |
||||
int dasm_checkstep(Dst_DECL, int secmatch) |
||||
{ |
||||
dasm_State *D = Dst_REF; |
||||
if (D->status == DASM_S_OK) { |
||||
int i; |
||||
for (i = 1; i <= 9; i++) { |
||||
if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; } |
||||
D->lglabels[i] = 0; |
||||
} |
||||
} |
||||
if (D->status == DASM_S_OK && secmatch >= 0 && |
||||
D->section != &D->sections[secmatch]) |
||||
D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); |
||||
return D->status; |
||||
} |
||||
#endif |
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,228 +0,0 @@ |
||||
DEFAULT REL ; Default to RIP-relative addressing instead of absolute. |
||||
|
||||
extern _upb_decode_varint_fast64 |
||||
|
||||
SECTION .data |
||||
|
||||
; Our dispatch table; used to jump to the right handler, keyed on the field's |
||||
; type. |
||||
dispatch_table: |
||||
dq _upb_fastdecode.cant_fast_path ; field not in table (type == 0). (check_4). |
||||
dq _upb_fastdecode.fixed64 ; double |
||||
dq _upb_fastdecode.fixed32 ; float |
||||
dq _upb_fastdecode.varint ; int64 |
||||
dq _upb_fastdecode.varint ; uint64 |
||||
dq _upb_fastdecode.varint ; int32 |
||||
dq _upb_fastdecode.fixed64 ; fixed64 |
||||
dq _upb_fastdecode.fixed32 ; fixed32 |
||||
dq _upb_fastdecode.varint ; bool |
||||
dq _upb_fastdecode.string ; string |
||||
dq _upb_fastdecode.cant_fast_path ; group (check_6) |
||||
dq _upb_fastdecode.cant_fast_path ; message |
||||
dq _upb_fastdecode.string ; bytes |
||||
dq _upb_fastdecode.varint ; uint32 |
||||
dq _upb_fastdecode.varint ; enum |
||||
dq _upb_fastdecode.fixed32 ; sfixed32 |
||||
dq _upb_fastdecode.fixed64 ; sfixed64 |
||||
dq _upb_fastdecode.varint_sint32 ; sint32 |
||||
dq _upb_fastdecode.varint_sint64 ; sint64 |
||||
|
||||
GLOBAL _upb_decode_fast |
||||
|
||||
SECTION .text |
||||
; Register allocation. |
||||
%define BUF rbx ; const char *p, current buf position. |
||||
%define END rbp ; const char *end, where the buf ends (either submsg end or buf end) |
||||
%define STRING r12 ; unused |
||||
%define FVAL r13 ; upb_value fval, needs to be preserved across varint decoding call. |
||||
%define UNUSED r14 |
||||
%define CLOSURE r15 |
||||
|
||||
; Stack layout: *tableptr, uint32_t maxfield_times_8 |
||||
%define STACK_SPACE 24 ; this value + 8 must be a multiple of 16. |
||||
%define TABLE_SPILL [rsp] ; our lookup table, indexed by field number. |
||||
%define COMMITTED_BUF_SPILL [rsp+8] |
||||
%define MAXFIELD_TIMES_8_SPILL [rsp+16] |
||||
|
||||
|
||||
; Executing the fast path requires the following conditions: |
||||
; - check_1: there are >=12 bytes left (<=2 byte tag and <=10 byte varint). |
||||
; - check_2: the tag is <= 2 bytes. |
||||
; - check_3: the field number is <= the table size |
||||
; (ie. it must be an array lookup, not a hash lookup). |
||||
; - check_4: the field is known (found in the table). |
||||
; - check_5: the wire type we read is correct for the field number, |
||||
; ("packed" fields are not accepted, yet. this could be handled |
||||
; efficiently by doing an extra check on the "type check failed" |
||||
; path that goes into a tight loop if the encoding was packed). |
||||
; - check_6: the field is not a group or a message (or string, TODO) |
||||
; (this could be relaxed, but due to delegation it's a bit tricky). |
||||
; - check_7: if the value is a string, the entire string is available in |
||||
; the buffer, and our cached string object can be recycled, and |
||||
; our string object already references the source buffer, so |
||||
; absolutely no refcount twiddling is required. |
||||
|
||||
|
||||
%macro decode_and_dispatch_ 0 |
||||
align 16 |
||||
.decode_and_dispatch: |
||||
; Load a few values we'll need in a sec. |
||||
mov r8, TABLE_SPILL |
||||
mov r9d, MAXFIELD_TIMES_8_SPILL |
||||
|
||||
mov rax, END |
||||
sub rax, BUF |
||||
cmp rax, 12 |
||||
jb _upb_fastdecode.cant_fast_path ; check_1 (<12 bytes left). |
||||
|
||||
; Decode a 1 or 2-byte varint -> eax. |
||||
mov cl, byte [BUF] |
||||
lea rdi, [BUF+1] |
||||
movzx eax, cl |
||||
and eax, 0x7f |
||||
test cl, cl |
||||
jns .one_byte_tag ; Should be predictable if fields are in order. |
||||
movzx ecx, byte [BUF+1] |
||||
lea rdi, [BUF+2] |
||||
mov edx, ecx |
||||
and edx, 0x7f |
||||
shl edx, 7 |
||||
or eax, edx |
||||
test al, al |
||||
js _upb_fastdecode.cant_fast_path ; check_2 (tag was >2 bytes). |
||||
.one_byte_tag: |
||||
mov BUF, rdi |
||||
|
||||
; Decode tag and dispatch. |
||||
mov ecx, eax |
||||
and eax, 0x3ff8 ; eax now contains field number * 8 |
||||
lea r11, [r8+rax*2] ; *2 is really *16, since rax is already *8. |
||||
and ecx, 0x7 ; ecx now contains wire type. |
||||
cmp eax, r9d |
||||
jae _upb_fastdecode.cant_fast_path ; check_3 (field number > table size) |
||||
mov FIELDDEF, [r11+8] ; Lookup fielddef (upb_itof_ent.f) |
||||
movzx rdx, BYTE [r11+1] ; Lookup field type. |
||||
mov rax, qword dispatch_table |
||||
jmp [rax+rdx*8] |
||||
%endmacro |
||||
|
||||
%macro decode_and_dispatch 0 |
||||
jmp .decode_and_dispatch |
||||
%endmacro |
||||
|
||||
%macro call_callback 0 |
||||
; Value arg must already be in rdx when macro is called. |
||||
mov rdi, CLOSURE |
||||
mov rsi, FIELDDEF |
||||
mov rcx, 33 ; RAW; we could pass the correct type, or only do this in non-debug modes. |
||||
call CALLBACK |
||||
mov COMMITTED_BUF_SPILL, BUF |
||||
cmp eax, 0 |
||||
jne .done ; Caller requested BREAK or SKIPSUBMSG. |
||||
%endmacro |
||||
|
||||
%macro check_type 1 |
||||
cmp ecx, %1 |
||||
jne _upb_fastdecode.cant_fast_path ; check_5 (wire type check failed). |
||||
%endmacro |
||||
|
||||
; extern upb_flow_t upb_fastdecode(const char **p, const char *end, |
||||
; upb_value_handler_t value_cb, void *closure, |
||||
; void *table, int table_size); |
||||
align 16 |
||||
global _upb_fastdecode |
||||
_upb_fastdecode: |
||||
; We use all callee-save regs. |
||||
push rbx |
||||
push rbp |
||||
push r12 |
||||
push r13 |
||||
push r14 |
||||
push r15 |
||||
sub rsp, STACK_SPACE |
||||
|
||||
; Parse arguments into reg vals and stack. |
||||
mov BUF, rdi |
||||
mov COMMITTED_BUF_SPILL, rdi |
||||
mov END, rsi |
||||
mov CALLBACK, rdx |
||||
mov CLOSURE, rcx |
||||
mov TABLE_SPILL, r8 |
||||
shl r9, 3 |
||||
mov MAXFIELD_TIMES_8_SPILL, r9 |
||||
|
||||
decode_and_dispatch |
||||
|
||||
align 16 |
||||
.varint: |
||||
call _upb_decode_varint_fast64 ; BUF is already in rdi. |
||||
test rax, rax |
||||
jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error. |
||||
mov BUF, rax |
||||
call_callback ; rdx already holds value. |
||||
decode_and_dispatch_ |
||||
|
||||
align 16 |
||||
.fixed32: |
||||
mov edx, DWORD [BUF] ; Might be unaligned, but that's ok. |
||||
add BUF, 4 |
||||
call_callback |
||||
decode_and_dispatch |
||||
|
||||
align 16 |
||||
.fixed64: |
||||
mov rdx, QWORD [BUF] ; Might be unaligned, but that's ok. |
||||
add BUF, 8 |
||||
call_callback |
||||
decode_and_dispatch |
||||
|
||||
align 16 |
||||
.varint_sint32: |
||||
call _upb_decode_varint_fast64 ; BUF is already in rdi. |
||||
test rax, rax |
||||
jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error. |
||||
mov BUF, rax |
||||
|
||||
; Perform 32-bit zig-zag decoding. |
||||
mov ecx, edx |
||||
shr edx, 1 |
||||
and ecx, 0x1 |
||||
neg ecx |
||||
xor edx, ecx |
||||
call_callback |
||||
decode_and_dispatch |
||||
|
||||
align 16 |
||||
.varint_sint64: |
||||
call _upb_decode_varint_fast64 ; BUF is already in rdi. |
||||
test rax, rax |
||||
jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error. |
||||
mov BUF, rax |
||||
|
||||
; Perform 64-bit zig-zag decoding. |
||||
mov rcx, rdx |
||||
shr rdx, 1 |
||||
and ecx, 0x1 |
||||
neg rcx |
||||
xor rdx, rcx |
||||
call_callback |
||||
decode_and_dispatch |
||||
|
||||
align 16 |
||||
.string: |
||||
|
||||
.cant_fast_path: |
||||
mov rax, 0 ; UPB_CONTINUE -- continue as before. |
||||
.done: |
||||
; If coming via done, preserve the user callback's return in rax. |
||||
|
||||
; Return committed buf pointer as second parameter. |
||||
mov rdx, COMMITTED_BUF_SPILL |
||||
add rsp, STACK_SPACE |
||||
pop r15 |
||||
pop r14 |
||||
pop r13 |
||||
pop r12 |
||||
pop rbp |
||||
pop rbx |
||||
ret |
@ -0,0 +1,649 @@ |
||||
|// |
||||
|// upb - a minimalist implementation of protocol buffers. |
||||
|// |
||||
|// Copyright (c) 2011 Google Inc. See LICENSE for details. |
||||
|// Author: Josh Haberman <jhaberman@gmail.com> |
||||
|// |
||||
|// JIT compiler for upb_decoder on x86. Given a upb_handlers object, |
||||
|// generates code specialized to parsing the specific message and |
||||
|// calling specific handlers. |
||||
|
||||
#define UPB_NONE -1 |
||||
#define UPB_MULTIPLE -2 |
||||
#define UPB_TOPLEVEL_ONE -3 |
||||
|
||||
#include <sys/mman.h> |
||||
#include "dynasm/dasm_proto.h" |
||||
#include "dynasm/dasm_x86.h" |
||||
|
||||
// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code |
||||
// at runtime. GDB 7.x+ has defined an interface for doing this, and these |
||||
// structure/function defintions are copied out of gdb/jit.h |
||||
// |
||||
// We need to give GDB an ELF file at runtime describing the symbols we have |
||||
// generated. To avoid implementing the ELF format, we generate an ELF file |
||||
// at compile-time and compile it in as a character string. We can replace |
||||
// a few key constants (address of JIT-ted function and its size) by looking |
||||
// for a few magic numbers and doing a dumb string replacement. |
||||
#include "jit_debug_elf_file.h" |
||||
|
||||
typedef enum |
||||
{ |
||||
GDB_JIT_NOACTION = 0, |
||||
GDB_JIT_REGISTER, |
||||
GDB_JIT_UNREGISTER |
||||
} jit_actions_t; |
||||
|
||||
typedef struct gdb_jit_entry { |
||||
struct gdb_jit_entry *next_entry; |
||||
struct gdb_jit_entry *prev_entry; |
||||
const char *symfile_addr; |
||||
uint64_t symfile_size; |
||||
} gdb_jit_entry; |
||||
|
||||
typedef struct { |
||||
uint32_t version; |
||||
uint32_t action_flag; |
||||
gdb_jit_entry *relevant_entry; |
||||
gdb_jit_entry *first_entry; |
||||
} gdb_jit_descriptor; |
||||
|
||||
gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL}; |
||||
|
||||
void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); } |
||||
|
||||
|.arch x64 |
||||
|.actionlist upb_jit_actionlist |
||||
|.globals UPB_JIT_GLOBAL_ |
||||
|.globalnames upb_jit_globalnames |
||||
| |
||||
|// Calling conventions. |
||||
|.define ARG1_64, rdi |
||||
|.define ARG2_8, sil |
||||
|.define ARG2_32, esi |
||||
|.define ARG2_64, rsi |
||||
|.define ARG3_8, dl |
||||
|.define ARG3_32, edx |
||||
|.define ARG3_64, rdx |
||||
| |
||||
|// Register allocation / type map. |
||||
|// ALL of the code in this file uses these register allocations. |
||||
|// When we "call" within this file, we do not use regular calling |
||||
|// conventions, but of course when calling to user callbacks we must. |
||||
|.define PTR, rbx |
||||
|.define CLOSURE, r12 |
||||
|.type FRAME, upb_dispatcher_frame, r13 |
||||
|.type STRING, upb_string, r14 |
||||
|.type DECODER, upb_decoder, r15 |
||||
| |
||||
|.macro callp, addr |
||||
|| if ((uintptr_t)addr < 0xffffffff) { |
||||
| call &addr |
||||
|| } else { |
||||
| mov64 rax, (uintptr_t)addr |
||||
| call rax |
||||
|| } |
||||
|.endmacro |
||||
| |
||||
|// Checks PTR for end-of-buffer. |
||||
|.macro check_eob, m |
||||
| cmp PTR, DECODER->effective_end |
||||
|| if (m->is_group) { |
||||
| jae ->exit_jit |
||||
|| } else { |
||||
| jae =>m->jit_endofbuf_pclabel |
||||
|| } |
||||
|.endmacro |
||||
| |
||||
|// Decodes varint from [PTR + offset] -> ARG3. |
||||
|// Saves new pointer as rax. |
||||
|.macro decode_loaded_varint, offset |
||||
| // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder. |
||||
| lea rax, [PTR + offset + 1] |
||||
| mov ARG3_32, ecx |
||||
| and ARG3_32, 0x7f |
||||
| test cl, cl |
||||
| jns >9 |
||||
| lea rax, [PTR + offset + 2] |
||||
| movzx esi, ch |
||||
| and esi, 0x7f |
||||
| shl esi, 7 |
||||
| or ARG3_32, esi |
||||
| test cx, cx |
||||
| jns >9 |
||||
| mov ARG1_64, rax |
||||
| mov ARG2_32, ARG3_32 |
||||
| callp upb_vdecode_max8_fast |
||||
| test rax, rax |
||||
| jz ->exit_jit // >10-byte varint. |
||||
|9: |
||||
|.endmacro |
||||
| |
||||
|.macro decode_varint, offset |
||||
| mov ecx, dword [PTR + offset] |
||||
| decode_loaded_varint offset |
||||
| mov PTR, rax |
||||
|.endmacro |
||||
| |
||||
|// Decode the tag -> edx. |
||||
|// Could specialize this by avoiding the value masking: could just key the |
||||
|// table on the raw (length-masked) varint to save 3-4 cycles of latency. |
||||
|// Currently only support tables where all entries are in the array part. |
||||
|.macro dyndispatch, m |
||||
| decode_loaded_varint, 0 |
||||
| mov ecx, edx |
||||
| shr ecx, 3 |
||||
| and edx, 0x7 |
||||
| cmp ecx, m->max_field_number // Bounds-check the field. |
||||
| ja ->exit_jit // In the future; could be unknown label |
||||
| mov rcx, qword [rcx*8 + m->tablearray] // TODO: support hybrid array/hash tables. |
||||
| jmp rcx // Dispatch: unpredictable jump. |
||||
|.endmacro |
||||
| |
||||
|.macro setmsgend, m |
||||
| mov rsi, DECODER->jit_end |
||||
|| if (m->is_group) { |
||||
| mov64 rax, 0xffffffffffffffff |
||||
| mov qword DECODER->submsg_end, rax |
||||
| mov DECODER->effective_end, rsi |
||||
|| } else { |
||||
| // Could store a correctly-biased version in the frame, at the cost of |
||||
| // a larger stack. |
||||
| mov eax, dword FRAME->end_offset |
||||
| add rax, qword DECODER->buf |
||||
| mov DECODER->submsg_end, rax // submsg_end = d->buf + f->end_offset |
||||
| cmp rax, rsi |
||||
| jb >1 |
||||
| mov rax, rsi // effective_end = min(d->submsg_end, d->jit_end) |
||||
|1: |
||||
| mov DECODER->effective_end, rax |
||||
|| } |
||||
|.endmacro |
||||
| |
||||
|// rax contains the tag, compare it against "tag", but since it is a varint |
||||
|// we must only compare as many bytes as actually have data. |
||||
|.macro checktag, tag |
||||
|| switch (upb_value_size(tag)) { |
||||
|| case 1: |
||||
| cmp cl, tag |
||||
|| break; |
||||
|| case 2: |
||||
| cmp cx, tag |
||||
|| break; |
||||
|| case 3: |
||||
| and ecx, 0xffffff // 3 bytes |
||||
| cmp rcx, tag |
||||
|| case 4: |
||||
| cmp ecx, tag |
||||
|| break; |
||||
|| case 5: |
||||
| mov64 rdx, 0xffffffffff // 5 bytes |
||||
| and rcx, rdx |
||||
| cmp rcx, tag |
||||
|| break; |
||||
|| default: abort(); |
||||
|| } |
||||
|.endmacro |
||||
| |
||||
|// TODO: optimize for 0 (xor) and 32-bits. |
||||
|.macro loadfval, f |
||||
|| if (f->fval.val.uint64 == 0) { |
||||
| xor ARG2_32, ARG2_32 |
||||
|| } else { |
||||
| mov ARG2_64, f->fval.val.uint64 |
||||
|| } |
||||
|.endmacro |
||||
|
||||
#include <stdlib.h> |
||||
#include "upb_varint_decoder.h" |
||||
|
||||
static size_t upb_value_size(uint64_t val) { |
||||
#ifdef __GNUC__ |
||||
int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. |
||||
#else |
||||
int high_bit = 0; |
||||
uint64_t tmp = val; |
||||
while(tmp >>= 1) high_bit++; |
||||
#endif |
||||
return val == 0 ? 1 : high_bit / 8 + 1; |
||||
} |
||||
|
||||
static uint64_t upb_encode_varint(uint64_t val) |
||||
{ |
||||
uint64_t ret = 0; |
||||
for (int bitpos = 0; val; bitpos+=8, val >>=7) { |
||||
if (bitpos > 0) ret |= (1 << (bitpos-1)); |
||||
ret |= (val & 0x7f) << bitpos; |
||||
} |
||||
return ret; |
||||
} |
||||
|
||||
// PTR should point to the beginning of the tag. |
||||
static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag, |
||||
upb_handlers_msgent *m, |
||||
upb_handlers_fieldent *f, upb_handlers_fieldent *next_f) { |
||||
int tag_size = upb_value_size(tag); |
||||
|
||||
// PC-label for the dispatch table. |
||||
// We check the wire type (which must be loaded in edx) because the |
||||
// table is keyed on field number, not type. |
||||
|=>f->jit_pclabel: |
||||
| cmp edx, upb_types[f->type].native_wire_type |
||||
| jne ->exit_jit // In the future: could be an unknown field. |
||||
|=>f->jit_pclabel_notypecheck: |
||||
|1: // Label for repeating this field. |
||||
|
||||
// Decode the value into arg 3 for the callback. |
||||
switch (f->type) { |
||||
case UPB_TYPE(DOUBLE): |
||||
case UPB_TYPE(FIXED64): |
||||
case UPB_TYPE(SFIXED64): |
||||
| mov ARG3_64, qword [PTR + tag_size] |
||||
| add PTR, 8 + tag_size |
||||
break; |
||||
|
||||
case UPB_TYPE(FLOAT): |
||||
case UPB_TYPE(FIXED32): |
||||
case UPB_TYPE(SFIXED32): |
||||
| mov ARG3_32, dword [PTR + tag_size] |
||||
| add PTR, 4 + tag_size |
||||
break; |
||||
|
||||
case UPB_TYPE(BOOL): |
||||
// Can't assume it's one byte long, because bool must be wire-compatible |
||||
// with all of the varint integer types. |
||||
| decode_varint tag_size |
||||
| test ARG3_64, ARG3_64 |
||||
| setne ARG3_8 // Other bytes left with val, should be ok. |
||||
break; |
||||
|
||||
case UPB_TYPE(INT64): |
||||
case UPB_TYPE(UINT64): |
||||
case UPB_TYPE(INT32): |
||||
case UPB_TYPE(UINT32): |
||||
case UPB_TYPE(ENUM): |
||||
| decode_varint tag_size |
||||
break; |
||||
|
||||
case UPB_TYPE(SINT64): |
||||
// 64-bit zig-zag decoding. |
||||
| decode_varint tag_size |
||||
| mov rax, ARG3_64 |
||||
| shr ARG3_64, 1 |
||||
| and rax, 1 |
||||
| neg rax |
||||
| xor ARG3_64, rax |
||||
break; |
||||
|
||||
case UPB_TYPE(SINT32): |
||||
// 32-bit zig-zag decoding. |
||||
| decode_varint tag_size |
||||
| mov eax, ARG3_32 |
||||
| shr ARG3_32, 1 |
||||
| and eax, 1 |
||||
| neg eax |
||||
| xor ARG3_32, eax |
||||
break; |
||||
|
||||
case UPB_TYPE(STRING): |
||||
case UPB_TYPE(BYTES): |
||||
// We only handle the case where the entire string is in our current |
||||
// buf, which sidesteps any security problems. The C path has more |
||||
// robust checks. |
||||
| decode_varint tag_size |
||||
| mov STRING->len, ARG3_32 |
||||
| mov STRING->ptr, PTR |
||||
| add PTR, ARG3_64 |
||||
| mov ARG3_64, STRING |
||||
| cmp PTR, DECODER->effective_end |
||||
| ja ->exit_jit // Can't deliver, whole string not in buf. |
||||
break; |
||||
|
||||
case UPB_TYPE_ENDGROUP: // A pseudo-type. |
||||
| add PTR, tag_size |
||||
| mov DECODER->ptr, PTR |
||||
| jmp =>m->jit_endofmsg_pclabel |
||||
return; |
||||
|
||||
case UPB_TYPE(MESSAGE): |
||||
| decode_varint tag_size |
||||
case UPB_TYPE(GROUP): |
||||
// Will dispatch callbacks and call submessage in a second. |
||||
break; |
||||
|
||||
default: abort(); |
||||
} |
||||
// Commit our work by advancing ptr. |
||||
// (If in the future we wanted to support a UPB_SUSPEND_AGAIN that |
||||
// suspends the decoder and redelivers the value later, we would |
||||
// need to adjust this to happen perhaps after the callback ran). |
||||
| mov DECODER->ptr, PTR |
||||
|
||||
// Load closure and fval into arg registers. |
||||
| mov ARG1_64, CLOSURE |
||||
| loadfval f |
||||
|
||||
// Call callbacks. |
||||
if (upb_issubmsgtype(f->type)) { |
||||
// Call startsubmsg handler (if any). |
||||
if (f->cb.startsubmsg != upb_startsubmsg_nop) { |
||||
// upb_sflow_t startsubmsg(void *closure, upb_value fval) |
||||
| mov r12d, ARG3_32 |
||||
| callp f->cb.startsubmsg |
||||
} else { |
||||
| mov rdx, CLOSURE |
||||
| mov r12d, ARG3_32 |
||||
} |
||||
// Push a stack frame (not the CPU stack, the upb_decoder stack). |
||||
| lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing. |
||||
| cmp rax, qword DECODER->dispatcher.limit |
||||
| jae ->exit_jit // Frame stack overflow. |
||||
| mov qword FRAME:rax->f, f |
||||
| mov qword FRAME:rax->closure, rdx |
||||
| mov rsi, PTR |
||||
| sub rsi, DECODER->buf |
||||
| add r12d, esi |
||||
| mov dword FRAME:rax->end_offset, r12d // = (d->ptr - d->buf) + delim_len |
||||
| mov CLOSURE, rdx |
||||
| mov DECODER->dispatcher.top, rax |
||||
| mov FRAME, rax |
||||
|
||||
upb_handlers_msgent *sub_m = upb_handlers_getmsgent(d->dispatcher.handlers, f); |
||||
if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) { |
||||
| jmp =>sub_m->jit_startmsg_pclabel; |
||||
} else { |
||||
| call =>sub_m->jit_startmsg_pclabel; |
||||
} |
||||
|
||||
|=>f->jit_submsg_done_pclabel: |
||||
// Pop a stack frame. |
||||
| sub FRAME, sizeof(upb_dispatcher_frame) |
||||
| mov DECODER->dispatcher.top, FRAME |
||||
| setmsgend m |
||||
| mov CLOSURE, FRAME->closure |
||||
|
||||
// Call endsubmsg handler (if any). |
||||
if (f->endsubmsg != upb_endsubmsg_nop) { |
||||
// upb_flow_t endsubmsg(void *closure, upb_value fval); |
||||
| mov ARG1_64, CLOSURE |
||||
| loadfval f |
||||
| callp f->endsubmsg |
||||
} |
||||
} else { |
||||
| callp f->cb.value |
||||
} |
||||
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK |
||||
|
||||
// Epilogue: load next tag, check for repeated field. |
||||
| check_eob m |
||||
| mov rcx, qword [PTR] |
||||
if (f->repeated) { |
||||
| checktag tag |
||||
| je <1 |
||||
} |
||||
if (next_tag != 0) { |
||||
| checktag next_tag |
||||
| je =>next_f->jit_pclabel_notypecheck |
||||
} |
||||
|
||||
// Fall back to dynamic dispatch. Replicate the dispatch |
||||
// here so we can learn what fields generally follow others. |
||||
| dyndispatch m |
||||
|1: |
||||
} |
||||
|
||||
static int upb_compare_uint32(const void *a, const void *b) { |
||||
return *(uint32_t*)a - *(uint32_t*)b; |
||||
} |
||||
|
||||
static void upb_decoder_jit_msg(upb_decoder *d, upb_handlers_msgent *m) { |
||||
|=>m->jit_startmsg_pclabel: |
||||
// Call startmsg handler (if any): |
||||
if (m->startmsg != upb_startmsg_nop) { |
||||
// upb_flow_t startmsg(void *closure); |
||||
| mov ARG1_64, FRAME->closure |
||||
| callp m->startmsg |
||||
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK |
||||
} |
||||
|
||||
| setmsgend m |
||||
| check_eob m |
||||
| mov ecx, dword [PTR] |
||||
| dyndispatch m |
||||
|
||||
// --------- New code section (does not fall through) ------------------------ |
||||
|
||||
// Emit code for parsing each field (dynamic dispatch contains pointers to |
||||
// all of these). |
||||
|
||||
// Create an ordering over the fields (inttable ordering is undefined). |
||||
int num_keys = upb_inttable_count(&m->fieldtab); |
||||
uint32_t *keys = malloc(num_keys * sizeof(*keys)); |
||||
int idx = 0; |
||||
for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); |
||||
i = upb_inttable_next(&m->fieldtab, i)) { |
||||
keys[idx++] = upb_inttable_iter_key(i); |
||||
} |
||||
qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32); |
||||
|
||||
|
||||
upb_handlers_fieldent *last_f = NULL; |
||||
uint32_t last_tag = 0; |
||||
for(int i = 0; i < num_keys; i++) { |
||||
uint32_t key = keys[i]; |
||||
upb_handlers_fieldent *f = upb_inttable_lookup(&m->fieldtab, key); |
||||
uint32_t tag = upb_encode_varint(key); |
||||
if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f); |
||||
last_tag = tag; |
||||
last_f = f; |
||||
} |
||||
|
||||
free(keys); |
||||
|
||||
if (m->is_group) { |
||||
// Create a fake fieldent for handling "end group." |
||||
upb_handlers_fieldent f = {0, UPB_TYPE_ENDGROUP, 0, UPB_NO_VALUE, {NULL}, NULL, 0, 0, 0, false}; |
||||
upb_decoder_jit_field(d, last_tag, m->groupnum, m, last_f, &f); |
||||
upb_decoder_jit_field(d, m->groupnum, 0, m, &f, NULL); |
||||
} else { |
||||
upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL); |
||||
} |
||||
|
||||
// --------- New code section (does not fall through) ------------------------ |
||||
|
||||
// End-of-buf / end-of-message. |
||||
if (!m->is_group) { |
||||
// This case doesn't exist for groups, because there eob really means |
||||
// eob, so that case just exits the jit directly. |
||||
|=>m->jit_endofbuf_pclabel: |
||||
| cmp PTR, DECODER->submsg_end |
||||
| jb ->exit_jit // We are at eob, but not end-of-submsg. |
||||
} |
||||
|
||||
|=>m->jit_endofmsg_pclabel: |
||||
// We are at end-of-submsg: call endmsg handler (if any): |
||||
if (m->endmsg != upb_endmsg_nop) { |
||||
// void endmsg(void *closure, upb_status *status) { |
||||
| mov ARG1_64, FRAME->closure |
||||
| lea ARG2_64, DECODER->dispatcher.status |
||||
| callp m->endmsg |
||||
} |
||||
|
||||
if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) { |
||||
| ret |
||||
} else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) { |
||||
| jmp ->exit_jit |
||||
} else { |
||||
| jmp =>m->jit_parent_field_done_pclabel |
||||
} |
||||
|
||||
} |
||||
|
||||
static void upb_decoder_jit(upb_decoder *d) { |
||||
| push rbp |
||||
| mov rbp, rsp |
||||
| push r15 |
||||
| push r14 |
||||
| push r13 |
||||
| push r12 |
||||
| push rbx |
||||
| mov DECODER, ARG1_64 |
||||
| mov FRAME, DECODER:ARG1_64->dispatcher.top |
||||
| mov STRING, DECODER:ARG1_64->tmp |
||||
| mov CLOSURE, FRAME->closure |
||||
| mov PTR, DECODER->ptr |
||||
|
||||
upb_handlers *h = d->dispatcher.handlers; |
||||
if (h->msgs[0].jit_parent_field_done_pclabel == UPB_MULTIPLE) { |
||||
| call =>h->msgs[0].jit_startmsg_pclabel |
||||
| jmp ->exit_jit |
||||
} |
||||
|
||||
// TODO: push return addresses for re-entry (will be necessary for multiple |
||||
// buffer support). |
||||
for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, &h->msgs[i]); |
||||
|
||||
|->exit_jit: |
||||
| pop rbx |
||||
| pop r12 |
||||
| pop r13 |
||||
| pop r14 |
||||
| pop r15 |
||||
| leave |
||||
| ret |
||||
|=>0: |
||||
| callp &abort |
||||
} |
||||
|
||||
void upb_decoder_jit_assignfieldlabs(upb_handlers_fieldent *f, |
||||
uint32_t *pclabel_count) { |
||||
f->jit_pclabel = (*pclabel_count)++; |
||||
f->jit_pclabel_notypecheck = (*pclabel_count)++; |
||||
f->jit_submsg_done_pclabel = (*pclabel_count)++; |
||||
} |
||||
|
||||
void upb_decoder_jit_assignmsglabs(upb_handlers_msgent *m, |
||||
uint32_t *pclabel_count) { |
||||
m->jit_startmsg_pclabel = (*pclabel_count)++; |
||||
m->jit_endofbuf_pclabel = (*pclabel_count)++; |
||||
m->jit_endofmsg_pclabel = (*pclabel_count)++; |
||||
m->jit_unknownfield_pclabel = (*pclabel_count)++; |
||||
m->jit_parent_field_done_pclabel = UPB_NONE; |
||||
m->max_field_number = 0; |
||||
upb_inttable_iter i; |
||||
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); |
||||
i = upb_inttable_next(&m->fieldtab, i)) { |
||||
uint32_t key = upb_inttable_iter_key(i); |
||||
m->max_field_number = UPB_MAX(m->max_field_number, key); |
||||
upb_handlers_fieldent *f = upb_inttable_iter_value(i); |
||||
upb_decoder_jit_assignfieldlabs(f, pclabel_count); |
||||
} |
||||
// XXX: Won't work for large field numbers; will need to use a upb_table. |
||||
m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*)); |
||||
} |
||||
|
||||
// Second pass: for messages that have only one parent, link them to the field |
||||
// from which they are called. |
||||
void upb_decoder_jit_assignmsglabs2(upb_handlers *h, upb_handlers_msgent *m) { |
||||
upb_inttable_iter i; |
||||
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); |
||||
i = upb_inttable_next(&m->fieldtab, i)) { |
||||
upb_handlers_fieldent *f = upb_inttable_iter_value(i); |
||||
if (upb_issubmsgtype(f->type)) { |
||||
upb_handlers_msgent *sub_m = upb_handlers_getmsgent(h, f); |
||||
if (f->type == UPB_TYPE(GROUP)) { |
||||
sub_m->is_group = true; |
||||
sub_m->groupnum = upb_inttable_iter_key(i); |
||||
} |
||||
if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) { |
||||
sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel; |
||||
} else { |
||||
sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
void upb_decoder_makejit(upb_decoder *d) { |
||||
// Assign pclabels. |
||||
uint32_t pclabel_count = 1; |
||||
upb_handlers *h = d->dispatcher.handlers; |
||||
for (int i = 0; i < h->msgs_len; i++) |
||||
upb_decoder_jit_assignmsglabs(&h->msgs[i], &pclabel_count); |
||||
for (int i = 0; i < h->msgs_len; i++) |
||||
upb_decoder_jit_assignmsglabs2(h, &h->msgs[i]); |
||||
|
||||
if (h->msgs[0].jit_parent_field_done_pclabel == UPB_NONE) { |
||||
h->msgs[0].jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE; |
||||
} |
||||
|
||||
void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals)); |
||||
dasm_init(d, 1); |
||||
dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX); |
||||
dasm_growpc(d, pclabel_count); |
||||
dasm_setup(d, upb_jit_actionlist); |
||||
|
||||
upb_decoder_jit(d); |
||||
|
||||
dasm_link(d, &d->jit_size); |
||||
|
||||
d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE, |
||||
MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); |
||||
|
||||
dasm_encode(d, d->jit_code); |
||||
|
||||
// Create dispatch tables. |
||||
for (int i = 0; i < h->msgs_len; i++) { |
||||
upb_handlers_msgent *m = &h->msgs[i]; |
||||
for (uint32_t j = 0; j <= m->max_field_number; j++) { |
||||
upb_handlers_fieldent *f = NULL; |
||||
for (int k = 0; k < 8; k++) { |
||||
f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k); |
||||
if (f) break; |
||||
} |
||||
if (f) { |
||||
m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel); |
||||
} else { |
||||
// Don't handle unknown fields yet. |
||||
m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0); |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Create debug info. |
||||
size_t elf_len = src_jit_debug_elf_file_o_len; |
||||
d->debug_info = malloc(elf_len); |
||||
memcpy(d->debug_info, src_jit_debug_elf_file_o, elf_len); |
||||
uint64_t *p = (void*)d->debug_info; |
||||
for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) { |
||||
if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; } |
||||
if (*p == 0x321) { *p = d->jit_size; } |
||||
} |
||||
|
||||
// Register the JIT-ted code with GDB. |
||||
gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry)); |
||||
e->next_entry = __jit_debug_descriptor.first_entry; |
||||
e->prev_entry = NULL; |
||||
if (e->next_entry) e->next_entry->prev_entry = e; |
||||
e->symfile_addr = d->debug_info; |
||||
e->symfile_size = elf_len; |
||||
__jit_debug_descriptor.first_entry = e; |
||||
__jit_debug_descriptor.relevant_entry = e; |
||||
__jit_debug_descriptor.action_flag = GDB_JIT_REGISTER; |
||||
__jit_debug_register_code(); |
||||
|
||||
dasm_free(d); |
||||
free(globals); |
||||
|
||||
mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ); |
||||
|
||||
FILE *f = fopen("/tmp/machine-code", "wb"); |
||||
fwrite(d->jit_code, d->jit_size, 1, f); |
||||
fclose(f); |
||||
} |
||||
|
||||
void upb_decoder_freejit(upb_decoder *d) { |
||||
munmap(d->jit_code, d->jit_size); |
||||
free(d->debug_info); |
||||
// TODO: unregister |
||||
} |
Loading…
Reference in new issue