First rough version of the JIT.

It can successfully parse SpeedMessage1.
Preliminary results: 750MB/s on Core2 2.4GHz.
This number is 2.5x proto2.
This isn't apples-to-apples, because
proto2 is parsing to a struct and we are
just doing stream parsing, but for apps
that are currently using proto2, this is the
improvement they would see if they could
move to stream-based processing.

Unfortunately perf-regression-test.py is
broken, and I'm not 100% sure why.  It would
be nice to fix it first (to ensure that
there are no performance regressions for
the table-based decoder) but I'm really
impatient to get the JIT checked in.
pull/13171/head
Joshua Haberman 14 years ago
parent 19517cc6f3
commit 9eb4d695c4
  1. 33
      Makefile
  2. 16
      benchmarks/parsestream.upb_table.c
  3. 1
      benchmarks/parsetostruct.upb_table.c
  4. 58
      dynasm/COPYRIGHT
  5. 440
      dynasm/dasm_arm.h
  6. 933
      dynasm/dasm_arm.lua
  7. 408
      dynasm/dasm_ppc.h
  8. 1225
      dynasm/dasm_ppc.lua
  9. 83
      dynasm/dasm_proto.h
  10. 12
      dynasm/dasm_x64.lua
  11. 470
      dynasm/dasm_x86.h
  12. 1930
      dynasm/dasm_x86.lua
  13. 1076
      dynasm/dynasm.lua
  14. 1
      src/upb.h
  15. 95
      src/upb_decoder.c
  16. 19
      src/upb_decoder.h
  17. 228
      src/upb_decoder_x64.asm
  18. 649
      src/upb_decoder_x86.dasc
  19. 37
      src/upb_def.c
  20. 3
      src/upb_glue.c
  21. 12
      src/upb_msg.c
  22. 63
      src/upb_stream.c
  23. 40
      src/upb_stream.h
  24. 1
      src/upb_string.c
  25. 14
      src/upb_string.h
  26. 6
      src/upb_table.c
  27. 74
      src/upb_varint_decoder.h
  28. 10
      tests/test.proto
  29. 19
      tests/test_varint.c
  30. 35
      tests/tests.c

@ -38,7 +38,7 @@ endif
# Basic compiler/flag setup.
CC=gcc
CXX=g++
CFLAGS=-std=c99
CFLAGS=-std=gnu99
INCLUDE=-Isrc -Itests -I.
CPPFLAGS=$(INCLUDE) -Wall -Wextra $(USER_CFLAGS)
LDLIBS=-lpthread src/libupb.a
@ -86,9 +86,6 @@ STREAM= \
src/upb_strstream.c \
src/upb_glue.c \
ASMCORE= \
src/upb_decoder_x64.asm
# Parts of core that are yet to be converted.
OTHERSRC=src/upb_encoder.c
@ -115,7 +112,10 @@ ALLSRC=$(CORE) $(STREAM) $(BENCHMARKS_SRC) $(TESTS_SRC)
clean_leave_profile:
rm -rf $(LIBUPB) $(LIBUPB_PIC)
rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(call rwildcard,,*.dSYM)
rm -rf src/upb_decoder_x86.h
rm -rf benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb*
rm -rf src/jit_debug_elf_file.o
rm -rf src/jit_debug_elf_file.h
rm -rf $(TESTS) tests/t.*
rm -rf src/descriptor.pb
rm -rf src/upbc deps
@ -135,9 +135,11 @@ lib: $(LIBUPB)
OBJ=$(patsubst %.c,%.o,$(SRC))
PICOBJ=$(patsubst %.c,%.lo,$(SRC))
ifneq (, $(findstring DUSE_X64_FASTPATH, $(USER_CFLAGS)))
OBJ += src/upb_decoder_x64.o
PICOBJ += src/upb_decoder_x64.o
ifneq (, $(findstring DUPB_USE_JIT_X64, $(USER_CFLAGS)))
src/upb_decoder.o: src/upb_decoder_x86.h
ifeq (, $(findstring DNDEBUG, $(USER_CFLAGS)))
$(error "JIT only works with -DNDEBUG enabled!")
endif
endif
$(LIBUPB): $(OBJ)
$(E) AR $(LIBUPB)
@ -164,13 +166,18 @@ src/upb_def.lo: src/upb_def.c
$(E) 'CC -fPIC' $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) $(DEF_OPT) -c -o $@ $< -fPIC
src/upb_decoder_x64.o: src/upb_decoder_x64.asm
$(E) NASM $<
$(Q) nasm -Ox src/upb_decoder_x64.asm -o src/upb_decoder_x64.o -f macho64
src/upb_decoder_x86.h: src/jit_debug_elf_file.h
src/upb_decoder_x86.h: src/upb_decoder_x86.dasc
$(E) DYNASM $<
$(Q) lua dynasm/dynasm.lua src/upb_decoder_x86.dasc > src/upb_decoder_x86.h
src/jit_debug_elf_file.o: src/jit_debug_elf_file.s
$(E) GAS $<
$(Q) gcc -c src/jit_debug_elf_file.s -o src/jit_debug_elf_file.o
src/upb_decoder_x64.lo: src/upb_decoder_x64.asm
$(E) NASM $<
$(Q) nasm -Ox src/upb_decoder_x64.asm -o src/upb_decoder_x64.lo -f macho64
src/jit_debug_elf_file.h: src/jit_debug_elf_file.o
$(E) XXD $<
$(Q) xxd -i src/jit_debug_elf_file.o > src/jit_debug_elf_file.h
# Function to expand a wildcard pattern recursively.
rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)$(filter $(subst *,%,$2),$d)))

@ -12,6 +12,19 @@ static upb_decoder decoder;
static upb_stringsrc stringsrc;
upb_handlers handlers;
static upb_sflow_t startsubmsg(void *_m, upb_value fval) {
(void)_m;
(void)fval;
return UPB_CONTINUE_WITH(NULL);
}
static upb_flow_t value(void *closure, upb_value fval, upb_value val) {
(void)closure;
(void)fval;
(void)val;
return UPB_CONTINUE;
}
static bool initialize()
{
// Initialize upb state, decode descriptor.
@ -50,7 +63,7 @@ static bool initialize()
upb_handlers_init(&handlers, def);
// Cause all messages to be read, but do nothing when they are.
upb_register_all(&handlers, NULL, NULL, NULL, NULL, NULL, NULL);
upb_register_all(&handlers, NULL, NULL, value, startsubmsg, NULL, NULL);
upb_decoder_init(&decoder, &handlers);
upb_stringsrc_init(&stringsrc);
return true;
@ -62,7 +75,6 @@ static void cleanup()
upb_def_unref(UPB_UPCAST(def));
upb_decoder_uninit(&decoder);
upb_stringsrc_uninit(&stringsrc);
upb_handlers_uninit(&handlers);
}
static size_t run(int i)

@ -78,7 +78,6 @@ static void cleanup()
upb_def_unref(UPB_UPCAST(def));
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
upb_handlers_uninit(&h);
}
static size_t run(int i)

@ -0,0 +1,58 @@
DynASM is taken from LuaJIT 2, which carries the following license statement:
===============================================================================
LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
Copyright (C) 2005-2011 Mike Pall. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
[ MIT license: http://www.opensource.org/licenses/mit-license.php ]
===============================================================================
[ LuaJIT includes code from Lua 5.1/5.2, which has this license statement: ]
Copyright (C) 1994-2011 Lua.org, PUC-Rio.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
===============================================================================
[ LuaJIT includes code from dlmalloc, which has this license statement: ]
This is a version (aka dlmalloc) of malloc/free/realloc written by
Doug Lea and released to the public domain, as explained at
http://creativecommons.org/licenses/publicdomain
===============================================================================

@ -0,0 +1,440 @@
/*
** DynASM ARM encoding engine.
** Copyright (C) 2005-2011 Mike Pall. All rights reserved.
** Released under the MIT/X license. See dynasm.lua for full copyright notice.
*/
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#define DASM_ARCH "arm"
#ifndef DASM_EXTERN
#define DASM_EXTERN(a,b,c,d) 0
#endif
/* Action definitions. */
enum {
DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
/* The following actions need a buffer position. */
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
/* The following actions also have an argument. */
DASM_REL_PC, DASM_LABEL_PC,
DASM_IMM, DASM_IMM12, DASM_IMM16, DASM_IMML8, DASM_IMML12,
DASM__MAX
};
/* Maximum number of section buffer positions for a single dasm_put() call. */
#define DASM_MAXSECPOS 25
/* DynASM encoder status codes. Action list offset or number are or'ed in. */
#define DASM_S_OK 0x00000000
#define DASM_S_NOMEM 0x01000000
#define DASM_S_PHASE 0x02000000
#define DASM_S_MATCH_SEC 0x03000000
#define DASM_S_RANGE_I 0x11000000
#define DASM_S_RANGE_SEC 0x12000000
#define DASM_S_RANGE_LG 0x13000000
#define DASM_S_RANGE_PC 0x14000000
#define DASM_S_RANGE_REL 0x15000000
#define DASM_S_UNDEF_LG 0x21000000
#define DASM_S_UNDEF_PC 0x22000000
/* Macros to convert positions (8 bit section + 24 bit index). */
#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
#define DASM_SEC2POS(sec) ((sec)<<24)
#define DASM_POS2SEC(pos) ((pos)>>24)
#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
/* Action list type. */
typedef const unsigned int *dasm_ActList;
/* Per-section structure. */
typedef struct dasm_Section {
int *rbuf; /* Biased buffer pointer (negative section bias). */
int *buf; /* True buffer pointer. */
size_t bsize; /* Buffer size in bytes. */
int pos; /* Biased buffer position. */
int epos; /* End of biased buffer position - max single put. */
int ofs; /* Byte offset into section. */
} dasm_Section;
/* Core structure holding the DynASM encoding state. */
struct dasm_State {
size_t psize; /* Allocated size of this structure. */
dasm_ActList actionlist; /* Current actionlist pointer. */
int *lglabels; /* Local/global chain/pos ptrs. */
size_t lgsize;
int *pclabels; /* PC label chains/pos ptrs. */
size_t pcsize;
void **globals; /* Array of globals (bias -10). */
dasm_Section *section; /* Pointer to active section. */
size_t codesize; /* Total size of all code sections. */
int maxsection; /* 0 <= sectionidx < maxsection. */
int status; /* Status code. */
dasm_Section sections[1]; /* All sections. Alloc-extended. */
};
/* The size of the core structure depends on the max. number of sections. */
#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
/* Initialize DynASM state. */
void dasm_init(Dst_DECL, int maxsection)
{
dasm_State *D;
size_t psz = 0;
int i;
Dst_REF = NULL;
DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
D = Dst_REF;
D->psize = psz;
D->lglabels = NULL;
D->lgsize = 0;
D->pclabels = NULL;
D->pcsize = 0;
D->globals = NULL;
D->maxsection = maxsection;
for (i = 0; i < maxsection; i++) {
D->sections[i].buf = NULL; /* Need this for pass3. */
D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
D->sections[i].bsize = 0;
D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
}
}
/* Free DynASM state. */
void dasm_free(Dst_DECL)
{
dasm_State *D = Dst_REF;
int i;
for (i = 0; i < D->maxsection; i++)
if (D->sections[i].buf)
DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
DASM_M_FREE(Dst, D, D->psize);
}
/* Setup global label array. Must be called before dasm_setup(). */
void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
{
dasm_State *D = Dst_REF;
D->globals = gl - 10; /* Negative bias to compensate for locals. */
DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
}
/* Grow PC label array. Can be called after dasm_setup(), too. */
void dasm_growpc(Dst_DECL, unsigned int maxpc)
{
dasm_State *D = Dst_REF;
size_t osz = D->pcsize;
DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
}
/* Setup encoder. */
void dasm_setup(Dst_DECL, const void *actionlist)
{
dasm_State *D = Dst_REF;
int i;
D->actionlist = (dasm_ActList)actionlist;
D->status = DASM_S_OK;
D->section = &D->sections[0];
memset((void *)D->lglabels, 0, D->lgsize);
if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
for (i = 0; i < D->maxsection; i++) {
D->sections[i].pos = DASM_SEC2POS(i);
D->sections[i].ofs = 0;
}
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) { \
D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
#define CKPL(kind, st) \
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
#else
#define CK(x, st) ((void)0)
#define CKPL(kind, st) ((void)0)
#endif
static int dasm_imm12(unsigned int n)
{
int i;
for (i = 0; i < 16; i++, n = (n << 2) | (n >> 30))
if (n <= 255) return (int)(n + (i << 8));
return -1;
}
/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
void dasm_put(Dst_DECL, int start, ...)
{
va_list ap;
dasm_State *D = Dst_REF;
dasm_ActList p = D->actionlist + start;
dasm_Section *sec = D->section;
int pos = sec->pos, ofs = sec->ofs;
int *b;
if (pos >= sec->epos) {
DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
}
b = sec->rbuf;
b[pos++] = start;
va_start(ap, start);
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
if (action >= DASM__MAX) {
ofs += 4;
} else {
int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
switch (action) {
case DASM_STOP: goto stop;
case DASM_SECTION:
n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
D->section = &D->sections[n]; goto stop;
case DASM_ESC: p++; ofs += 4; break;
case DASM_REL_EXT: break;
case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
case DASM_REL_LG:
n = (ins & 2047) - 10; pl = D->lglabels + n;
if (n >= 0) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */
pl += 10; n = *pl;
if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
goto linkrel;
case DASM_REL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putrel:
n = *pl;
if (n < 0) { /* Label exists. Get label pos and store it. */
b[pos] = -n;
} else {
linkrel:
b[pos] = n; /* Else link to rel chain, anchored at label. */
*pl = pos;
}
pos++;
break;
case DASM_LABEL_LG:
pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
case DASM_LABEL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putlabel:
n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
}
*pl = -pos; /* Label exists now. */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_IMM:
case DASM_IMM16:
#ifdef DASM_CHECKS
CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
if ((ins & 0x8000))
CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
else
CK((n>>((ins>>5)&31)) == 0, RANGE_I);
#endif
b[pos++] = n;
break;
case DASM_IMML8:
case DASM_IMML12:
CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) :
(((-n)>>((ins>>5)&31)) == 0), RANGE_I);
b[pos++] = n;
break;
case DASM_IMM12:
CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
b[pos++] = n;
break;
}
}
}
stop:
va_end(ap);
sec->pos = pos;
sec->ofs = ofs;
}
#undef CK
/* Pass 2: Link sections, shrink aligns, fix label offsets. */
int dasm_link(Dst_DECL, size_t *szp)
{
dasm_State *D = Dst_REF;
int secnum;
int ofs = 0;
#ifdef DASM_CHECKS
*szp = 0;
if (D->status != DASM_S_OK) return D->status;
{
int pc;
for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
}
#endif
{ /* Handle globals not defined in this translation unit. */
int idx;
for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
int n = D->lglabels[idx];
/* Undefined label: Collapse rel chain and replace with marker (< 0). */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
}
}
/* Combine all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->rbuf;
int pos = DASM_SEC2POS(secnum);
int lastpos = sec->pos;
while (pos != lastpos) {
dasm_ActList p = D->actionlist + b[pos++];
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: p++; break;
case DASM_REL_EXT: break;
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
case DASM_REL_LG: case DASM_REL_PC: pos++; break;
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
case DASM_IMM: case DASM_IMM12: case DASM_IMM16:
case DASM_IMML8: case DASM_IMML12: pos++; break;
}
}
stop: (void)0;
}
ofs += sec->ofs; /* Next section starts right after current section. */
}
D->codesize = ofs; /* Total size of all code sections */
*szp = ofs;
return DASM_S_OK;
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
#else
#define CK(x, st) ((void)0)
#endif
/* Pass 3: Encode sections. */
int dasm_encode(Dst_DECL, void *buffer)
{
dasm_State *D = Dst_REF;
char *base = (char *)buffer;
unsigned int *cp = (unsigned int *)buffer;
int secnum;
/* Encode all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->buf;
int *endb = sec->rbuf + sec->pos;
while (b != endb) {
dasm_ActList p = D->actionlist + *b++;
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: *cp++ = *p++; break;
case DASM_REL_EXT:
n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1);
goto patchrel;
case DASM_ALIGN:
ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
break;
case DASM_REL_LG:
CK(n >= 0, UNDEF_LG);
case DASM_REL_PC:
CK(n >= 0, UNDEF_PC);
n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
patchrel:
CK((n & 3) == 0 && ((n-4+0x02000000) >> 26) == 0, RANGE_REL);
cp[-1] |= (((n-4) >> 2) & 0x00ffffff);
break;
case DASM_LABEL_LG:
ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
break;
case DASM_LABEL_PC: break;
case DASM_IMM:
cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31);
break;
case DASM_IMM12:
cp[-1] |= dasm_imm12((unsigned int)n);
break;
case DASM_IMM16:
cp[-1] |= ((n & 0xf000) << 4) | (n & 0x0fff);
break;
case DASM_IMML8:
cp[-1] |= n >= 0 ? (0x00800000 | (n & 0x0f) | ((n & 0xf0) << 4)) :
((-n & 0x0f) | ((-n & 0xf0) << 4));
break;
case DASM_IMML12:
cp[-1] |= n >= 0 ? (0x00800000 | n) : (-n);
break;
default: *cp++ = ins; break;
}
}
stop: (void)0;
}
}
if (base + D->codesize != (char *)cp) /* Check for phase errors. */
return DASM_S_PHASE;
return DASM_S_OK;
}
#undef CK
/* Get PC label offset. */
int dasm_getpclabel(Dst_DECL, unsigned int pc)
{
dasm_State *D = Dst_REF;
if (pc*sizeof(int) < D->pcsize) {
int pos = D->pclabels[pc];
if (pos < 0) return *DASM_POS2PTR(D, -pos);
if (pos > 0) return -1; /* Undefined. */
}
return -2; /* Unused or out of range. */
}
#ifdef DASM_CHECKS
/* Optional sanity checker to call between isolated encoding steps. */
int dasm_checkstep(Dst_DECL, int secmatch)
{
dasm_State *D = Dst_REF;
if (D->status == DASM_S_OK) {
int i;
for (i = 1; i <= 9; i++) {
if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
D->lglabels[i] = 0;
}
}
if (D->status == DASM_S_OK && secmatch >= 0 &&
D->section != &D->sections[secmatch])
D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
return D->status;
}
#endif

@ -0,0 +1,933 @@
------------------------------------------------------------------------------
-- DynASM ARM module.
--
-- Copyright (C) 2005-2011 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
-- Module information:
local _info = {
arch = "arm",
description = "DynASM ARM module",
version = "1.2.2",
vernum = 10202,
release = "2011-03-23",
author = "Mike Pall",
license = "MIT",
}
-- Exported glue functions for the arch-specific module.
local _M = { _info = _info }
-- Cache library functions.
local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
local assert, setmetatable, rawget = assert, setmetatable, rawget
local _s = string
local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
local concat, sort = table.concat, table.sort
-- Inherited tables and callbacks.
local g_opt, g_arch
local wline, werror, wfatal, wwarn
-- Action name list.
-- CHECK: Keep this in sync with the C code!
local action_names = {
"STOP", "SECTION", "ESC", "REL_EXT",
"ALIGN", "REL_LG", "LABEL_LG",
"REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12",
}
-- Maximum number of section buffer positions for dasm_put().
-- CHECK: Keep this in sync with the C code!
local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
-- Action name -> action number.
local map_action = {}
for n,name in ipairs(action_names) do
map_action[name] = n-1
end
-- Action list buffer.
local actlist = {}
-- Argument list for next dasm_put(). Start with offset 0 into action list.
local actargs = { 0 }
-- Current number of section buffer positions for dasm_put().
local secpos = 1
------------------------------------------------------------------------------
-- Return 8 digit hex number.
local function tohex(x)
return sub(format("%08x", x), -8) -- Avoid 64 bit portability problem in Lua.
end
-- Dump action names and numbers.
local function dumpactions(out)
out:write("DynASM encoding engine action codes:\n")
for n,name in ipairs(action_names) do
local num = map_action[name]
out:write(format(" %-10s %02X %d\n", name, num, num))
end
out:write("\n")
end
-- Write action list buffer as a huge static C array.
local function writeactions(out, name)
local nn = #actlist
if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
out:write("static const unsigned int ", name, "[", nn, "] = {\n")
for i = 1,nn-1 do
assert(out:write("0x", tohex(actlist[i]), ",\n"))
end
assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
end
------------------------------------------------------------------------------
-- Add word to action list.
local function wputxw(n)
assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
actlist[#actlist+1] = n
end
-- Add action to list with optional arg. Advance buffer pos, too.
local function waction(action, val, a, num)
local w = assert(map_action[action], "bad action name `"..action.."'")
wputxw(w * 0x10000 + (val or 0))
if a then actargs[#actargs+1] = a end
if a or num then secpos = secpos + (num or 1) end
end
-- Flush action list (intervening C code or buffer pos overflow).
local function wflush(term)
if #actlist == actargs[1] then return end -- Nothing to flush.
if not term then waction("STOP") end -- Terminate action list.
wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
secpos = 1 -- The actionlist offset occupies a buffer position, too.
end
-- Put escaped word.
local function wputw(n)
if n <= 0x000fffff then waction("ESC") end
wputxw(n)
end
-- Reserve position for word.
local function wpos()
local pos = #actlist+1
actlist[pos] = ""
return pos
end
-- Store word to reserved position.
local function wputpos(pos, n)
assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
actlist[pos] = n
end
------------------------------------------------------------------------------
-- Global label name -> global label number. With auto assignment on 1st use.
local next_global = 20
local map_global = setmetatable({}, { __index = function(t, name)
if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
local n = next_global
if n > 2047 then werror("too many global labels") end
next_global = n + 1
t[name] = n
return n
end})
-- Dump global labels.
local function dumpglobals(out, lvl)
local t = {}
for name, n in pairs(map_global) do t[n] = name end
out:write("Global labels:\n")
for i=20,next_global-1 do
out:write(format(" %s\n", t[i]))
end
out:write("\n")
end
-- Write global label enum.
local function writeglobals(out, prefix)
local t = {}
for name, n in pairs(map_global) do t[n] = name end
out:write("enum {\n")
for i=20,next_global-1 do
out:write(" ", prefix, t[i], ",\n")
end
out:write(" ", prefix, "_MAX\n};\n")
end
-- Write global label names.
local function writeglobalnames(out, name)
local t = {}
for name, n in pairs(map_global) do t[n] = name end
out:write("static const char *const ", name, "[] = {\n")
for i=20,next_global-1 do
out:write(" \"", t[i], "\",\n")
end
out:write(" (const char *)0\n};\n")
end
------------------------------------------------------------------------------
-- Extern label name -> extern label number. With auto assignment on 1st use.
local next_extern = 0
local map_extern_ = {}
local map_extern = setmetatable({}, { __index = function(t, name)
-- No restrictions on the name for now.
local n = next_extern
if n > 2047 then werror("too many extern labels") end
next_extern = n + 1
t[name] = n
map_extern_[n] = name
return n
end})
-- Dump extern labels.
local function dumpexterns(out, lvl)
out:write("Extern labels:\n")
for i=0,next_extern-1 do
out:write(format(" %s\n", map_extern_[i]))
end
out:write("\n")
end
-- Write extern label names.
local function writeexternnames(out, name)
out:write("static const char *const ", name, "[] = {\n")
for i=0,next_extern-1 do
out:write(" \"", map_extern_[i], "\",\n")
end
out:write(" (const char *)0\n};\n")
end
------------------------------------------------------------------------------
-- Arch-specific maps.
-- Ext. register name -> int. name.
local map_archdef = { sp = "r13", lr = "r14", pc = "r15", }
-- Int. register name -> ext. name.
local map_reg_rev = { r13 = "sp", r14 = "lr", r15 = "pc", }
local map_type = {} -- Type name -> { ctype, reg }
local ctypenum = 0 -- Type number (for Dt... macros).
-- Reverse defines for registers.
function _M.revdef(s)
return map_reg_rev[s] or s
end
local map_shift = { lsl = 0, lsr = 1, asr = 2, ror = 3, }
local map_cond = {
eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
hs = 2, lo = 3,
}
------------------------------------------------------------------------------
-- Template strings for ARM instructions.
local map_op = {
-- Basic data processing instructions.
and_3 = "e0000000DNPs",
eor_3 = "e0200000DNPs",
sub_3 = "e0400000DNPs",
rsb_3 = "e0600000DNPs",
add_3 = "e0800000DNPs",
adc_3 = "e0a00000DNPs",
sbc_3 = "e0c00000DNPs",
rsc_3 = "e0e00000DNPs",
tst_2 = "e1100000NP",
teq_2 = "e1300000NP",
cmp_2 = "e1500000NP",
cmn_2 = "e1700000NP",
orr_3 = "e1800000DNPs",
mov_2 = "e1a00000DPs",
bic_3 = "e1c00000DNPs",
mvn_2 = "e1e00000DPs",
and_4 = "e0000000DNMps",
eor_4 = "e0200000DNMps",
sub_4 = "e0400000DNMps",
rsb_4 = "e0600000DNMps",
add_4 = "e0800000DNMps",
adc_4 = "e0a00000DNMps",
sbc_4 = "e0c00000DNMps",
rsc_4 = "e0e00000DNMps",
tst_3 = "e1100000NMp",
teq_3 = "e1300000NMp",
cmp_3 = "e1500000NMp",
cmn_3 = "e1700000NMp",
orr_4 = "e1800000DNMps",
mov_3 = "e1a00000DMps",
bic_4 = "e1c00000DNMps",
mvn_3 = "e1e00000DMps",
lsl_3 = "e1a00000DMvs",
lsr_3 = "e1a00020DMvs",
asr_3 = "e1a00040DMvs",
ror_3 = "e1a00060DMvs",
rrx_2 = "e1a00060DMs",
-- Multiply and multiply-accumulate.
mul_3 = "e0000090NMSs",
mla_4 = "e0200090NMSDs",
umaal_4 = "e0400090DNMSs", -- v6
mls_4 = "e0600090DNMSs", -- v6T2
umull_4 = "e0800090DNMSs",
umlal_4 = "e0a00090DNMSs",
smull_4 = "e0c00090DNMSs",
smlal_4 = "e0e00090DNMSs",
-- Halfword multiply and multiply-accumulate.
smlabb_4 = "e1000080NMSD", -- v5TE
smlatb_4 = "e10000a0NMSD", -- v5TE
smlabt_4 = "e10000c0NMSD", -- v5TE
smlatt_4 = "e10000e0NMSD", -- v5TE
smlawb_4 = "e1200080NMSD", -- v5TE
smulwb_3 = "e12000a0NMS", -- v5TE
smlawt_4 = "e12000c0NMSD", -- v5TE
smulwt_3 = "e12000e0NMS", -- v5TE
smlalbb_4 = "e1400080NMSD", -- v5TE
smlaltb_4 = "e14000a0NMSD", -- v5TE
smlalbt_4 = "e14000c0NMSD", -- v5TE
smlaltt_4 = "e14000e0NMSD", -- v5TE
smulbb_3 = "e1600080NMS", -- v5TE
smultb_3 = "e16000a0NMS", -- v5TE
smulbt_3 = "e16000c0NMS", -- v5TE
smultt_3 = "e16000e0NMS", -- v5TE
-- Miscellaneous data processing instructions.
clz_2 = "e16f0f10DM", -- v5T
rev_2 = "e6bf0f30DM", -- v6
rev16_2 = "e6bf0fb0DM", -- v6
revsh_2 = "e6ff0fb0DM", -- v6
sel_3 = "e6800fb0DNM", -- v6
usad8_3 = "e780f010NMS", -- v6
usada8_4 = "e7800010NMSD", -- v6
rbit_2 = "e6ff0f30DM", -- v6T2
movw_2 = "e3000000DW", -- v6T2
movt_2 = "e3400000DW", -- v6T2
-- Note: the X encodes width-1, not width.
sbfx_4 = "e7a00050DMvX", -- v6T2
ubfx_4 = "e7e00050DMvX", -- v6T2
-- Note: the X encodes the msb field, not the width.
bfc_3 = "e7c0001fDvX", -- v6T2
bfi_4 = "e7c00010DMvX", -- v6T2
-- Packing and unpacking instructions.
pkhbt_3 = "e6800010DNM", pkhbt_4 = "e6800010DNMv", -- v6
pkhtb_3 = "e6800050DNM", pkhtb_4 = "e6800050DNMv", -- v6
sxtab_3 = "e6a00070DNM", sxtab_4 = "e6a00070DNMv", -- v6
sxtab16_3 = "e6800070DNM", sxtab16_4 = "e6800070DNMv", -- v6
sxtah_3 = "e6b00070DNM", sxtah_4 = "e6b00070DNMv", -- v6
sxtb_2 = "e6af0070DM", sxtb_3 = "e6af0070DMv", -- v6
sxtb16_2 = "e68f0070DM", sxtb16_3 = "e68f0070DMv", -- v6
sxth_2 = "e6bf0070DM", sxth_3 = "e6bf0070DMv", -- v6
uxtab_3 = "e6e00070DNM", uxtab_4 = "e6e00070DNMv", -- v6
uxtab16_3 = "e6c00070DNM", uxtab16_4 = "e6c00070DNMv", -- v6
uxtah_3 = "e6f00070DNM", uxtah_4 = "e6f00070DNMv", -- v6
uxtb_2 = "e6ef0070DM", uxtb_3 = "e6ef0070DMv", -- v6
uxtb16_2 = "e6cf0070DM", uxtb16_3 = "e6cf0070DMv", -- v6
uxth_2 = "e6ff0070DM", uxth_3 = "e6ff0070DMv", -- v6
-- Saturating instructions.
qadd_3 = "e1000050DMN", -- v5TE
qsub_3 = "e1200050DMN", -- v5TE
qdadd_3 = "e1400050DMN", -- v5TE
qdsub_3 = "e1600050DMN", -- v5TE
-- Note: the X for ssat* encodes sat_imm-1, not sat_imm.
ssat_3 = "e6a00010DXM", ssat_4 = "e6a00010DXMp", -- v6
usat_3 = "e6e00010DXM", usat_4 = "e6e00010DXMp", -- v6
ssat16_3 = "e6a00f30DXM", -- v6
usat16_3 = "e6e00f30DXM", -- v6
-- Parallel addition and subtraction.
sadd16_3 = "e6100f10DNM", -- v6
sasx_3 = "e6100f30DNM", -- v6
ssax_3 = "e6100f50DNM", -- v6
ssub16_3 = "e6100f70DNM", -- v6
sadd8_3 = "e6100f90DNM", -- v6
ssub8_3 = "e6100ff0DNM", -- v6
qadd16_3 = "e6200f10DNM", -- v6
qasx_3 = "e6200f30DNM", -- v6
qsax_3 = "e6200f50DNM", -- v6
qsub16_3 = "e6200f70DNM", -- v6
qadd8_3 = "e6200f90DNM", -- v6
qsub8_3 = "e6200ff0DNM", -- v6
shadd16_3 = "e6300f10DNM", -- v6
shasx_3 = "e6300f30DNM", -- v6
shsax_3 = "e6300f50DNM", -- v6
shsub16_3 = "e6300f70DNM", -- v6
shadd8_3 = "e6300f90DNM", -- v6
shsub8_3 = "e6300ff0DNM", -- v6
uadd16_3 = "e6500f10DNM", -- v6
uasx_3 = "e6500f30DNM", -- v6
usax_3 = "e6500f50DNM", -- v6
usub16_3 = "e6500f70DNM", -- v6
uadd8_3 = "e6500f90DNM", -- v6
usub8_3 = "e6500ff0DNM", -- v6
uqadd16_3 = "e6600f10DNM", -- v6
uqasx_3 = "e6600f30DNM", -- v6
uqsax_3 = "e6600f50DNM", -- v6
uqsub16_3 = "e6600f70DNM", -- v6
uqadd8_3 = "e6600f90DNM", -- v6
uqsub8_3 = "e6600ff0DNM", -- v6
uhadd16_3 = "e6700f10DNM", -- v6
uhasx_3 = "e6700f30DNM", -- v6
uhsax_3 = "e6700f50DNM", -- v6
uhsub16_3 = "e6700f70DNM", -- v6
uhadd8_3 = "e6700f90DNM", -- v6
uhsub8_3 = "e6700ff0DNM", -- v6
-- Load/store instructions.
str_2 = "e4000000DL", str_3 = "e4000000DL", str_4 = "e4000000DL",
strb_2 = "e4400000DL", strb_3 = "e4400000DL", strb_4 = "e4400000DL",
ldr_2 = "e4100000DL", ldr_3 = "e4100000DL", ldr_4 = "e4100000DL",
ldrb_2 = "e4500000DL", ldrb_3 = "e4500000DL", ldrb_4 = "e4500000DL",
strh_2 = "e00000b0DL", strh_3 = "e00000b0DL",
ldrh_2 = "e01000b0DL", ldrh_3 = "e01000b0DL",
ldrd_2 = "e00000d0DL", ldrd_3 = "e00000d0DL", -- v5TE
ldrsb_2 = "e01000d0DL", ldrsb_3 = "e01000d0DL",
strd_2 = "e00000f0DL", strd_3 = "e00000f0DL", -- v5TE
ldrsh_2 = "e01000f0DL", ldrsh_3 = "e01000f0DL",
ldm_2 = "e8900000nR", ldmia_2 = "e8900000nR", ldmfd_2 = "e8900000nR",
ldmda_2 = "e8100000nR", ldmfa_2 = "e8100000nR",
ldmdb_2 = "e9100000nR", ldmea_2 = "e9100000nR",
ldmib_2 = "e9900000nR", ldmed_2 = "e9900000nR",
stm_2 = "e8800000nR", stmia_2 = "e8800000nR", stmfd_2 = "e8800000nR",
stmda_2 = "e8000000nR", stmfa_2 = "e8000000nR",
stmdb_2 = "e9000000nR", stmea_2 = "e9000000nR",
stmib_2 = "e9800000nR", stmed_2 = "e9800000nR",
pop_1 = "e8bd0000R", push_1 = "e92d0000R",
-- Branch instructions.
b_1 = "ea000000B",
bl_1 = "eb000000B",
blx_1 = "e12fff30C",
bx_1 = "e12fff10M",
-- Miscellaneous instructions.
nop_0 = "e1a00000",
mrs_1 = "e10f0000D",
bkpt_1 = "e1200070K", -- v5T
svc_1 = "ef000000T", swi_1 = "ef000000T",
ud_0 = "e7f001f0",
-- NYI: Advanced SIMD and VFP instructions.
-- NYI instructions, since I have no need for them right now:
-- swp, swpb, strex, ldrex, strexd, ldrexd, strexb, ldrexb, strexh, ldrexh
-- msr, nopv6, yield, wfe, wfi, sev, dbg, bxj, smc, srs, rfe
-- cps, setend, pli, pld, pldw, clrex, dsb, dmb, isb
-- stc, ldc, mcr, mcr2, mrc, mrc2, mcrr, mcrr2, mrrc, mrrc2, cdp, cdp2
}
-- Add mnemonics for "s" variants.
do
local t = {}
for k,v in pairs(map_op) do
if sub(v, -1) == "s" then
local v2 = sub(v, 1, 2)..char(byte(v, 3)+1)..sub(v, 4, -2)
t[sub(k, 1, -3).."s"..sub(k, -2)] = v2
end
end
for k,v in pairs(t) do
map_op[k] = v
end
end
------------------------------------------------------------------------------
local function parse_gpr(expr)
local tname, ovreg = match(expr, "^([%w_]+):(r1?[0-9])$")
local tp = map_type[tname or expr]
if tp then
local reg = ovreg or tp.reg
if not reg then
werror("type `"..(tname or expr).."' needs a register override")
end
expr = reg
end
local r = match(expr, "^r(1?[0-9])$")
if r then
r = tonumber(r)
if r <= 15 then return r, tp end
end
werror("bad register name `"..expr.."'")
end
local function parse_gpr_pm(expr)
local pm, expr2 = match(expr, "^([+-]?)(.*)$")
return parse_gpr(expr2), (pm == "-")
end
local function parse_reglist(reglist)
reglist = match(reglist, "^{%s*([^}]*)}$")
if not reglist then werror("register list expected") end
local rr = 0
for p in gmatch(reglist..",", "%s*([^,]*),") do
local rbit = 2^parse_gpr(gsub(p, "%s+$", ""))
if ((rr - (rr % rbit)) / rbit) % 2 ~= 0 then
werror("duplicate register `"..p.."'")
end
rr = rr + rbit
end
return rr
end
local function parse_imm(imm, bits, shift, scale, signed)
imm = match(imm, "^#(.*)$")
if not imm then werror("expected immediate operand") end
local n = tonumber(imm)
if n then
if n % 2^scale == 0 then
n = n / 2^scale
if signed then
if n >= 0 then
if n < 2^(bits-1) then return n*2^shift end
else
if n >= -(2^(bits-1))-1 then return (n+2^bits)*2^shift end
end
else
if n >= 0 and n <= 2^bits-1 then return n*2^shift end
end
end
werror("out of range immediate `"..imm.."'")
else
waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
return 0
end
end
local function parse_imm12(imm)
local n = tonumber(imm)
if n then
local m = n
for i=0,-15,-1 do
if m >= 0 and m <= 255 and n % 1 == 0 then return m + (i%16) * 256 end
local t = m % 4
m = (m - t) / 4 + t * 2^30
end
werror("out of range immediate `"..imm.."'")
else
waction("IMM12", 0, imm)
return 0
end
end
local function parse_imm16(imm)
imm = match(imm, "^#(.*)$")
if not imm then werror("expected immediate operand") end
local n = tonumber(imm)
if n then
if n >= 0 and n <= 65535 and n % 1 == 0 then
local t = n % 4096
return (n - t) * 16 + t
end
werror("out of range immediate `"..imm.."'")
else
waction("IMM16", 32*16, imm)
return 0
end
end
local function parse_imm_load(imm, ext)
local n = tonumber(imm)
if n then
if ext then
if n >= -255 and n <= 255 then
local up = 0x00800000
if n < 0 then n = -n; up = 0 end
return (n-(n%16))*16+(n%16) + up
end
else
if n >= -4095 and n <= 4095 then
if n >= 0 then return n+0x00800000 end
return -n
end
end
werror("out of range immediate `"..imm.."'")
else
waction(ext and "IMML8" or "IMML12", 32768 + 32*(ext and 8 or 12), imm)
return 0
end
end
local function parse_shift(shift, gprok)
if shift == "rrx" then
return 3 * 32
else
local s, s2 = match(shift, "^(%S+)%s*(.*)$")
s = map_shift[s]
if not s then werror("expected shift operand") end
if sub(s2, 1, 1) == "#" then
return parse_imm(s2, 5, 7, 0, false) + s * 32
else
if not gprok then werror("expected immediate shift operand") end
return parse_gpr(s2) * 256 + s * 32 + 16
end
end
end
local function parse_load(params, nparams, n, op)
local oplo = op % 256
local ext, ldrd = (oplo ~= 0), (oplo == 208)
local d
if (ldrd or oplo == 240) then
d = ((op - (op % 4096)) / 4096) % 16
if d % 2 ~= 0 then werror("odd destination register") end
end
local p1, wb = match(params[n], "^%[%s*(.-)%s*%](!?)$")
local p2 = params[n+1]
if not p1 then
if not p2 then
local reg, tailr = match(params[n], "^([%w_:]+)%s*(.*)$")
if reg and tailr ~= "" then
local d, tp = parse_gpr(reg)
if tp then
waction(ext and "IMML8" or "IMML12", 32768 + 32*(ext and 8 or 12),
format(tp.ctypefmt, tailr))
return op + d * 65536 + 0x01000000 + (ext and 0x00400000 or 0)
end
end
end
werror("expected address operand")
end
if wb == "!" then op = op + 0x00200000 end
if p2 then
if wb == "!" then werror("bad use of '!'") end
local p3 = params[n+2]
op = op + parse_gpr(p1) * 65536
local imm = match(p2, "^#(.*)$")
if imm then
local m = parse_imm_load(imm, ext)
if p3 then werror("too many parameters") end
op = op + m + (ext and 0x00400000 or 0)
else
local m, neg = parse_gpr_pm(p2)
if ldrd and (m == d or m-1 == d) then werror("register conflict") end
op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000)
if p3 then op = op + parse_shift(p3) end
end
else
local p1a, p2 = match(p1, "^([^,%s]*)%s*(.*)$")
local n = parse_gpr(p1a)
op = op + parse_gpr(p1a) * 65536 + 0x01000000
if p2 ~= "" then
local imm = match(p2, "^,%s*#(.*)$")
if imm then
local m = parse_imm_load(imm, ext)
op = op + m + (ext and 0x00400000 or 0)
else
local p2a, p3 = match(p2, "^,%s*([^,%s]*)%s*,?%s*(.*)$")
local m, neg = parse_gpr_pm(p2a)
if ldrd and (m == d or m-1 == d) then werror("register conflict") end
op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000)
if p3 ~= "" then
if ext then werror("too many parameters") end
op = op + parse_shift(p3)
end
end
else
if wb == "!" then werror("bad use of '!'") end
op = op + (ext and 0x00c00000 or 0x00800000)
end
end
return op
end
local function parse_label(label, def)
local prefix = sub(label, 1, 2)
-- =>label (pc label reference)
if prefix == "=>" then
return "PC", 0, sub(label, 3)
end
-- ->name (global label reference)
if prefix == "->" then
return "LG", map_global[sub(label, 3)]
end
if def then
-- [1-9] (local label definition)
if match(label, "^[1-9]$") then
return "LG", 10+tonumber(label)
end
else
-- [<>][1-9] (local label reference)
local dir, lnum = match(label, "^([<>])([1-9])$")
if dir then -- Fwd: 1-9, Bkwd: 11-19.
return "LG", lnum + (dir == ">" and 0 or 10)
end
-- extern label (extern label reference)
local extname = match(label, "^extern%s+(%S+)$")
if extname then
return "EXT", map_extern[extname]
end
end
werror("bad label `"..label.."'")
end
------------------------------------------------------------------------------
-- Handle opcodes defined with template strings.
map_op[".template__"] = function(params, template, nparams)
if not params then return sub(template, 9) end
local op = tonumber(sub(template, 1, 8), 16)
local n = 1
-- Limit number of section buffer positions used by a single dasm_put().
-- A single opcode needs a maximum of 3 positions (rlwinm).
if secpos+3 > maxsecpos then wflush() end
local pos = wpos()
-- Process each character.
for p in gmatch(sub(template, 9), ".") do
if p == "D" then
op = op + parse_gpr(params[n]) * 4096; n = n + 1
elseif p == "N" then
op = op + parse_gpr(params[n]) * 65536; n = n + 1
elseif p == "S" then
op = op + parse_gpr(params[n]) * 256; n = n + 1
elseif p == "M" then
op = op + parse_gpr(params[n]); n = n + 1
elseif p == "P" then
local imm = match(params[n], "^#(.*)$")
if imm then
op = op + parse_imm12(imm) + 0x02000000
else
op = op + parse_gpr(params[n])
end
n = n + 1
elseif p == "p" then
op = op + parse_shift(params[n], true); n = n + 1
elseif p == "L" then
op = parse_load(params, nparams, n, op)
elseif p == "B" then
local mode, n, s = parse_label(params[n], false)
waction("REL_"..mode, n, s, 1)
elseif p == "C" then -- blx gpr vs. blx label.
local p = params[n]
if match(p, "^([%w_]+):(r1?[0-9])$") or match(p, "^r(1?[0-9])$") then
op = op + parse_gpr(p)
else
if op < 0xe0000000 then werror("unconditional instruction") end
local mode, n, s = parse_label(params[n], false)
waction("REL_"..mode, n, s, 1)
op = 0xfa000000
end
elseif p == "n" then
local r, wb = match(params[n], "^([^!]*)(!?)$")
op = op + parse_gpr(r) * 65536 + (wb == "!" and 0x00200000 or 0)
n = n + 1
elseif p == "R" then
op = op + parse_reglist(params[n]); n = n + 1
elseif p == "W" then
op = op + parse_imm16(params[n]); n = n + 1
elseif p == "v" then
op = op + parse_imm(params[n], 5, 7, 0, false); n = n + 1
elseif p == "X" then
op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1
elseif p == "K" then
local imm = tonumber(match(params[n], "^#(.*)$")); n = n + 1
if not imm or imm % 1 ~= 0 or imm < 0 or imm > 0xffff then
werror("bad immediate operand")
end
local t = imm % 16
op = op + (imm - t) * 16 + t
elseif p == "T" then
op = op + parse_imm(params[n], 24, 0, 0, false); n = n + 1
elseif p == "s" then
-- Ignored.
else
assert(false)
end
end
wputpos(pos, op)
end
------------------------------------------------------------------------------
-- Pseudo-opcode to mark the position where the action list is to be emitted.
map_op[".actionlist_1"] = function(params)
if not params then return "cvar" end
local name = params[1] -- No syntax check. You get to keep the pieces.
wline(function(out) writeactions(out, name) end)
end
-- Pseudo-opcode to mark the position where the global enum is to be emitted.
map_op[".globals_1"] = function(params)
if not params then return "prefix" end
local prefix = params[1] -- No syntax check. You get to keep the pieces.
wline(function(out) writeglobals(out, prefix) end)
end
-- Pseudo-opcode to mark the position where the global names are to be emitted.
map_op[".globalnames_1"] = function(params)
if not params then return "cvar" end
local name = params[1] -- No syntax check. You get to keep the pieces.
wline(function(out) writeglobalnames(out, name) end)
end
-- Pseudo-opcode to mark the position where the extern names are to be emitted.
map_op[".externnames_1"] = function(params)
if not params then return "cvar" end
local name = params[1] -- No syntax check. You get to keep the pieces.
wline(function(out) writeexternnames(out, name) end)
end
------------------------------------------------------------------------------
-- Label pseudo-opcode (converted from trailing colon form).
map_op[".label_1"] = function(params)
if not params then return "[1-9] | ->global | =>pcexpr" end
if secpos+1 > maxsecpos then wflush() end
local mode, n, s = parse_label(params[1], true)
if mode == "EXT" then werror("bad label definition") end
waction("LABEL_"..mode, n, s, 1)
end
------------------------------------------------------------------------------
-- Pseudo-opcodes for data storage.
map_op[".long_*"] = function(params)
if not params then return "imm..." end
for _,p in ipairs(params) do
local n = tonumber(p)
if not n then werror("bad immediate `"..p.."'") end
if n < 0 then n = n + 2^32 end
wputw(n)
if secpos+2 > maxsecpos then wflush() end
end
end
-- Alignment pseudo-opcode.
map_op[".align_1"] = function(params)
if not params then return "numpow2" end
if secpos+1 > maxsecpos then wflush() end
local align = tonumber(params[1])
if align then
local x = align
-- Must be a power of 2 in the range (2 ... 256).
for i=1,8 do
x = x / 2
if x == 1 then
waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
return
end
end
end
werror("bad alignment")
end
------------------------------------------------------------------------------
-- Pseudo-opcode for (primitive) type definitions (map to C types).
map_op[".type_3"] = function(params, nparams)
if not params then
return nparams == 2 and "name, ctype" or "name, ctype, reg"
end
local name, ctype, reg = params[1], params[2], params[3]
if not match(name, "^[%a_][%w_]*$") then
werror("bad type name `"..name.."'")
end
local tp = map_type[name]
if tp then
werror("duplicate type `"..name.."'")
end
-- Add #type to defines. A bit unclean to put it in map_archdef.
map_archdef["#"..name] = "sizeof("..ctype..")"
-- Add new type and emit shortcut define.
local num = ctypenum + 1
map_type[name] = {
ctype = ctype,
ctypefmt = format("Dt%X(%%s)", num),
reg = reg,
}
wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
ctypenum = num
end
map_op[".type_2"] = map_op[".type_3"]
-- Dump type definitions.
local function dumptypes(out, lvl)
local t = {}
for name in pairs(map_type) do t[#t+1] = name end
sort(t)
out:write("Type definitions:\n")
for _,name in ipairs(t) do
local tp = map_type[name]
local reg = tp.reg or ""
out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
end
out:write("\n")
end
------------------------------------------------------------------------------
-- Set the current section.
function _M.section(num)
waction("SECTION", num)
wflush(true) -- SECTION is a terminal action.
end
------------------------------------------------------------------------------
-- Dump architecture description.
function _M.dumparch(out)
out:write(format("DynASM %s version %s, released %s\n\n",
_info.arch, _info.version, _info.release))
dumpactions(out)
end
-- Dump all user defined elements.
function _M.dumpdef(out, lvl)
dumptypes(out, lvl)
dumpglobals(out, lvl)
dumpexterns(out, lvl)
end
------------------------------------------------------------------------------
-- Pass callbacks from/to the DynASM core.
function _M.passcb(wl, we, wf, ww)
wline, werror, wfatal, wwarn = wl, we, wf, ww
return wflush
end
-- Setup the arch-specific module.
function _M.setup(arch, opt)
g_arch, g_opt = arch, opt
end
-- Merge the core maps and the arch-specific maps.
function _M.mergemaps(map_coreop, map_def)
setmetatable(map_op, { __index = function(t, k)
local v = map_coreop[k]
if v then return v end
local cc = sub(k, -4, -3)
local cv = map_cond[cc]
if cv then
local v = rawget(t, sub(k, 1, -5)..sub(k, -2))
if v then return format("%x%s", cv, sub(v, 2)) end
end
end })
setmetatable(map_def, { __index = map_archdef })
return map_op, map_def
end
return _M
------------------------------------------------------------------------------

@ -0,0 +1,408 @@
/*
** DynASM PPC encoding engine.
** Copyright (C) 2005-2011 Mike Pall. All rights reserved.
** Released under the MIT/X license. See dynasm.lua for full copyright notice.
*/
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#define DASM_ARCH "ppc"
#ifndef DASM_EXTERN
#define DASM_EXTERN(a,b,c,d) 0
#endif
/* Action definitions. */
enum {
DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
/* The following actions need a buffer position. */
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
/* The following actions also have an argument. */
DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
DASM__MAX
};
/* Maximum number of section buffer positions for a single dasm_put() call. */
#define DASM_MAXSECPOS 25
/* DynASM encoder status codes. Action list offset or number are or'ed in. */
#define DASM_S_OK 0x00000000
#define DASM_S_NOMEM 0x01000000
#define DASM_S_PHASE 0x02000000
#define DASM_S_MATCH_SEC 0x03000000
#define DASM_S_RANGE_I 0x11000000
#define DASM_S_RANGE_SEC 0x12000000
#define DASM_S_RANGE_LG 0x13000000
#define DASM_S_RANGE_PC 0x14000000
#define DASM_S_RANGE_REL 0x15000000
#define DASM_S_UNDEF_LG 0x21000000
#define DASM_S_UNDEF_PC 0x22000000
/* Macros to convert positions (8 bit section + 24 bit index). */
#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
#define DASM_SEC2POS(sec) ((sec)<<24)
#define DASM_POS2SEC(pos) ((pos)>>24)
#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
/* Action list type. */
typedef const unsigned int *dasm_ActList;
/* Per-section structure. */
typedef struct dasm_Section {
int *rbuf; /* Biased buffer pointer (negative section bias). */
int *buf; /* True buffer pointer. */
size_t bsize; /* Buffer size in bytes. */
int pos; /* Biased buffer position. */
int epos; /* End of biased buffer position - max single put. */
int ofs; /* Byte offset into section. */
} dasm_Section;
/* Core structure holding the DynASM encoding state. */
struct dasm_State {
size_t psize; /* Allocated size of this structure. */
dasm_ActList actionlist; /* Current actionlist pointer. */
int *lglabels; /* Local/global chain/pos ptrs. */
size_t lgsize;
int *pclabels; /* PC label chains/pos ptrs. */
size_t pcsize;
void **globals; /* Array of globals (bias -10). */
dasm_Section *section; /* Pointer to active section. */
size_t codesize; /* Total size of all code sections. */
int maxsection; /* 0 <= sectionidx < maxsection. */
int status; /* Status code. */
dasm_Section sections[1]; /* All sections. Alloc-extended. */
};
/* The size of the core structure depends on the max. number of sections. */
#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
/* Initialize DynASM state. */
void dasm_init(Dst_DECL, int maxsection)
{
dasm_State *D;
size_t psz = 0;
int i;
Dst_REF = NULL;
DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
D = Dst_REF;
D->psize = psz;
D->lglabels = NULL;
D->lgsize = 0;
D->pclabels = NULL;
D->pcsize = 0;
D->globals = NULL;
D->maxsection = maxsection;
for (i = 0; i < maxsection; i++) {
D->sections[i].buf = NULL; /* Need this for pass3. */
D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
D->sections[i].bsize = 0;
D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
}
}
/* Free DynASM state. */
void dasm_free(Dst_DECL)
{
dasm_State *D = Dst_REF;
int i;
for (i = 0; i < D->maxsection; i++)
if (D->sections[i].buf)
DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
DASM_M_FREE(Dst, D, D->psize);
}
/* Setup global label array. Must be called before dasm_setup(). */
void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
{
dasm_State *D = Dst_REF;
D->globals = gl - 10; /* Negative bias to compensate for locals. */
DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
}
/* Grow PC label array. Can be called after dasm_setup(), too. */
void dasm_growpc(Dst_DECL, unsigned int maxpc)
{
dasm_State *D = Dst_REF;
size_t osz = D->pcsize;
DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
}
/* Setup encoder. */
void dasm_setup(Dst_DECL, const void *actionlist)
{
dasm_State *D = Dst_REF;
int i;
D->actionlist = (dasm_ActList)actionlist;
D->status = DASM_S_OK;
D->section = &D->sections[0];
memset((void *)D->lglabels, 0, D->lgsize);
if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
for (i = 0; i < D->maxsection; i++) {
D->sections[i].pos = DASM_SEC2POS(i);
D->sections[i].ofs = 0;
}
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) { \
D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
#define CKPL(kind, st) \
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
#else
#define CK(x, st) ((void)0)
#define CKPL(kind, st) ((void)0)
#endif
/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
void dasm_put(Dst_DECL, int start, ...)
{
va_list ap;
dasm_State *D = Dst_REF;
dasm_ActList p = D->actionlist + start;
dasm_Section *sec = D->section;
int pos = sec->pos, ofs = sec->ofs;
int *b;
if (pos >= sec->epos) {
DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
}
b = sec->rbuf;
b[pos++] = start;
va_start(ap, start);
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
if (action >= DASM__MAX) {
ofs += 4;
} else {
int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
switch (action) {
case DASM_STOP: goto stop;
case DASM_SECTION:
n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
D->section = &D->sections[n]; goto stop;
case DASM_ESC: p++; ofs += 4; break;
case DASM_REL_EXT: break;
case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
case DASM_REL_LG:
n = (ins & 2047) - 10; pl = D->lglabels + n;
if (n >= 0) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */
pl += 10; n = *pl;
if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
goto linkrel;
case DASM_REL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putrel:
n = *pl;
if (n < 0) { /* Label exists. Get label pos and store it. */
b[pos] = -n;
} else {
linkrel:
b[pos] = n; /* Else link to rel chain, anchored at label. */
*pl = pos;
}
pos++;
break;
case DASM_LABEL_LG:
pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
case DASM_LABEL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putlabel:
n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
}
*pl = -pos; /* Label exists now. */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_IMM:
#ifdef DASM_CHECKS
CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
if (ins & 0x8000)
CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
else
CK((n>>((ins>>5)&31)) == 0, RANGE_I);
#endif
b[pos++] = n;
break;
}
}
}
stop:
va_end(ap);
sec->pos = pos;
sec->ofs = ofs;
}
#undef CK
/* Pass 2: Link sections, shrink aligns, fix label offsets. */
int dasm_link(Dst_DECL, size_t *szp)
{
dasm_State *D = Dst_REF;
int secnum;
int ofs = 0;
#ifdef DASM_CHECKS
*szp = 0;
if (D->status != DASM_S_OK) return D->status;
{
int pc;
for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
}
#endif
{ /* Handle globals not defined in this translation unit. */
int idx;
for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
int n = D->lglabels[idx];
/* Undefined label: Collapse rel chain and replace with marker (< 0). */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
}
}
/* Combine all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->rbuf;
int pos = DASM_SEC2POS(secnum);
int lastpos = sec->pos;
while (pos != lastpos) {
dasm_ActList p = D->actionlist + b[pos++];
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: p++; break;
case DASM_REL_EXT: break;
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
case DASM_REL_LG: case DASM_REL_PC: pos++; break;
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
case DASM_IMM: pos++; break;
}
}
stop: (void)0;
}
ofs += sec->ofs; /* Next section starts right after current section. */
}
D->codesize = ofs; /* Total size of all code sections */
*szp = ofs;
return DASM_S_OK;
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
#else
#define CK(x, st) ((void)0)
#endif
/* Pass 3: Encode sections. */
int dasm_encode(Dst_DECL, void *buffer)
{
dasm_State *D = Dst_REF;
char *base = (char *)buffer;
unsigned int *cp = (unsigned int *)buffer;
int secnum;
/* Encode all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->buf;
int *endb = sec->rbuf + sec->pos;
while (b != endb) {
dasm_ActList p = D->actionlist + *b++;
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: *cp++ = *p++; break;
case DASM_REL_EXT:
n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1);
goto patchrel;
case DASM_ALIGN:
ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
break;
case DASM_REL_LG:
CK(n >= 0, UNDEF_LG);
case DASM_REL_PC:
CK(n >= 0, UNDEF_PC);
n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
patchrel:
CK((n & 3) == 0 &&
(((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
break;
case DASM_LABEL_LG:
ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
break;
case DASM_LABEL_PC: break;
case DASM_IMM:
cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31);
break;
default: *cp++ = ins; break;
}
}
stop: (void)0;
}
}
if (base + D->codesize != (char *)cp) /* Check for phase errors. */
return DASM_S_PHASE;
return DASM_S_OK;
}
#undef CK
/* Get PC label offset. */
int dasm_getpclabel(Dst_DECL, unsigned int pc)
{
dasm_State *D = Dst_REF;
if (pc*sizeof(int) < D->pcsize) {
int pos = D->pclabels[pc];
if (pos < 0) return *DASM_POS2PTR(D, -pos);
if (pos > 0) return -1; /* Undefined. */
}
return -2; /* Unused or out of range. */
}
#ifdef DASM_CHECKS
/* Optional sanity checker to call between isolated encoding steps. */
int dasm_checkstep(Dst_DECL, int secmatch)
{
dasm_State *D = Dst_REF;
if (D->status == DASM_S_OK) {
int i;
for (i = 1; i <= 9; i++) {
if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
D->lglabels[i] = 0;
}
}
if (D->status == DASM_S_OK && secmatch >= 0 &&
D->section != &D->sections[secmatch])
D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
return D->status;
}
#endif

File diff suppressed because it is too large Load Diff

@ -0,0 +1,83 @@
/*
** DynASM encoding engine prototypes.
** Copyright (C) 2005-2011 Mike Pall. All rights reserved.
** Released under the MIT/X license. See dynasm.lua for full copyright notice.
*/
#ifndef _DASM_PROTO_H
#define _DASM_PROTO_H
#include <stddef.h>
#include <stdarg.h>
#define DASM_IDENT "DynASM 1.2.2"
#define DASM_VERSION 10202 /* 1.2.2 */
#ifndef Dst_DECL
#define Dst_DECL dasm_State **Dst
#endif
#ifndef Dst_REF
#define Dst_REF (*Dst)
#endif
#ifndef DASM_FDEF
#define DASM_FDEF extern
#endif
#ifndef DASM_M_GROW
#define DASM_M_GROW(ctx, t, p, sz, need) \
do { \
size_t _sz = (sz), _need = (need); \
if (_sz < _need) { \
if (_sz < 16) _sz = 16; \
while (_sz < _need) _sz += _sz; \
(p) = (t *)realloc((p), _sz); \
if ((p) == NULL) exit(1); \
(sz) = _sz; \
} \
} while(0)
#endif
#ifndef DASM_M_FREE
#define DASM_M_FREE(ctx, p, sz) free(p)
#endif
/* Internal DynASM encoder state. */
typedef struct dasm_State dasm_State;
/* Initialize and free DynASM state. */
DASM_FDEF void dasm_init(Dst_DECL, int maxsection);
DASM_FDEF void dasm_free(Dst_DECL);
/* Setup global array. Must be called before dasm_setup(). */
DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl);
/* Grow PC label array. Can be called after dasm_setup(), too. */
DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc);
/* Setup encoder. */
DASM_FDEF void dasm_setup(Dst_DECL, const void *actionlist);
/* Feed encoder with actions. Calls are generated by pre-processor. */
DASM_FDEF void dasm_put(Dst_DECL, int start, ...);
/* Link sections and return the resulting size. */
DASM_FDEF int dasm_link(Dst_DECL, size_t *szp);
/* Encode sections into buffer. */
DASM_FDEF int dasm_encode(Dst_DECL, void *buffer);
/* Get PC label offset. */
DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc);
#ifdef DASM_CHECKS
/* Optional sanity checker to call between isolated encoding steps. */
DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch);
#else
#define dasm_checkstep(a, b) 0
#endif
#endif /* _DASM_PROTO_H */

@ -0,0 +1,12 @@
------------------------------------------------------------------------------
-- DynASM x64 module.
--
-- Copyright (C) 2005-2011 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined x86/x64 module.
-- All the interesting stuff is there.
------------------------------------------------------------------------------
x64 = true -- Using a global is an ugly, but effective solution.
return require("dasm_x86")

@ -0,0 +1,470 @@
/*
** DynASM x86 encoding engine.
** Copyright (C) 2005-2011 Mike Pall. All rights reserved.
** Released under the MIT/X license. See dynasm.lua for full copyright notice.
*/
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#define DASM_ARCH "x86"
#ifndef DASM_EXTERN
#define DASM_EXTERN(a,b,c,d) 0
#endif
/* Action definitions. DASM_STOP must be 255. */
enum {
DASM_DISP = 233,
DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB,
DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC,
DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN,
DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP
};
/* Maximum number of section buffer positions for a single dasm_put() call. */
#define DASM_MAXSECPOS 25
/* DynASM encoder status codes. Action list offset or number are or'ed in. */
#define DASM_S_OK 0x00000000
#define DASM_S_NOMEM 0x01000000
#define DASM_S_PHASE 0x02000000
#define DASM_S_MATCH_SEC 0x03000000
#define DASM_S_RANGE_I 0x11000000
#define DASM_S_RANGE_SEC 0x12000000
#define DASM_S_RANGE_LG 0x13000000
#define DASM_S_RANGE_PC 0x14000000
#define DASM_S_RANGE_VREG 0x15000000
#define DASM_S_UNDEF_L 0x21000000
#define DASM_S_UNDEF_PC 0x22000000
/* Macros to convert positions (8 bit section + 24 bit index). */
#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
#define DASM_SEC2POS(sec) ((sec)<<24)
#define DASM_POS2SEC(pos) ((pos)>>24)
#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
/* Action list type. */
typedef const unsigned char *dasm_ActList;
/* Per-section structure. */
typedef struct dasm_Section {
int *rbuf; /* Biased buffer pointer (negative section bias). */
int *buf; /* True buffer pointer. */
size_t bsize; /* Buffer size in bytes. */
int pos; /* Biased buffer position. */
int epos; /* End of biased buffer position - max single put. */
int ofs; /* Byte offset into section. */
} dasm_Section;
/* Core structure holding the DynASM encoding state. */
struct dasm_State {
size_t psize; /* Allocated size of this structure. */
dasm_ActList actionlist; /* Current actionlist pointer. */
int *lglabels; /* Local/global chain/pos ptrs. */
size_t lgsize;
int *pclabels; /* PC label chains/pos ptrs. */
size_t pcsize;
void **globals; /* Array of globals (bias -10). */
dasm_Section *section; /* Pointer to active section. */
size_t codesize; /* Total size of all code sections. */
int maxsection; /* 0 <= sectionidx < maxsection. */
int status; /* Status code. */
dasm_Section sections[1]; /* All sections. Alloc-extended. */
};
/* The size of the core structure depends on the max. number of sections. */
#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
/* Initialize DynASM state. */
void dasm_init(Dst_DECL, int maxsection)
{
dasm_State *D;
size_t psz = 0;
int i;
Dst_REF = NULL;
DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
D = Dst_REF;
D->psize = psz;
D->lglabels = NULL;
D->lgsize = 0;
D->pclabels = NULL;
D->pcsize = 0;
D->globals = NULL;
D->maxsection = maxsection;
for (i = 0; i < maxsection; i++) {
D->sections[i].buf = NULL; /* Need this for pass3. */
D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
D->sections[i].bsize = 0;
D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
}
}
/* Free DynASM state. */
void dasm_free(Dst_DECL)
{
dasm_State *D = Dst_REF;
int i;
for (i = 0; i < D->maxsection; i++)
if (D->sections[i].buf)
DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
DASM_M_FREE(Dst, D, D->psize);
}
/* Setup global label array. Must be called before dasm_setup(). */
void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
{
dasm_State *D = Dst_REF;
D->globals = gl - 10; /* Negative bias to compensate for locals. */
DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
}
/* Grow PC label array. Can be called after dasm_setup(), too. */
void dasm_growpc(Dst_DECL, unsigned int maxpc)
{
dasm_State *D = Dst_REF;
size_t osz = D->pcsize;
DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
}
/* Setup encoder. */
void dasm_setup(Dst_DECL, const void *actionlist)
{
dasm_State *D = Dst_REF;
int i;
D->actionlist = (dasm_ActList)actionlist;
D->status = DASM_S_OK;
D->section = &D->sections[0];
memset((void *)D->lglabels, 0, D->lgsize);
if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
for (i = 0; i < D->maxsection; i++) {
D->sections[i].pos = DASM_SEC2POS(i);
D->sections[i].ofs = 0;
}
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) { \
D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
#define CKPL(kind, st) \
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
D->status=DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
#else
#define CK(x, st) ((void)0)
#define CKPL(kind, st) ((void)0)
#endif
/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
void dasm_put(Dst_DECL, int start, ...)
{
va_list ap;
dasm_State *D = Dst_REF;
dasm_ActList p = D->actionlist + start;
dasm_Section *sec = D->section;
int pos = sec->pos, ofs = sec->ofs, mrm = 4;
int *b;
if (pos >= sec->epos) {
DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
}
b = sec->rbuf;
b[pos++] = start;
va_start(ap, start);
while (1) {
int action = *p++;
if (action < DASM_DISP) {
ofs++;
} else if (action <= DASM_REL_A) {
int n = va_arg(ap, int);
b[pos++] = n;
switch (action) {
case DASM_DISP:
if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; }
case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
case DASM_IMM_D: ofs += 4; break;
case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob;
case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break;
case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob;
case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
case DASM_SPACE: p++; ofs += n; break;
case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG);
if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue;
}
mrm = 4;
} else {
int *pl, n;
switch (action) {
case DASM_REL_LG:
case DASM_IMM_LG:
n = *p++; pl = D->lglabels + n;
if (n <= 246) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */
pl -= 246; n = *pl;
if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
goto linkrel;
case DASM_REL_PC:
case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
putrel:
n = *pl;
if (n < 0) { /* Label exists. Get label pos and store it. */
b[pos] = -n;
} else {
linkrel:
b[pos] = n; /* Else link to rel chain, anchored at label. */
*pl = pos;
}
pos++;
ofs += 4; /* Maximum offset needed. */
if (action == DASM_REL_LG || action == DASM_REL_PC)
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
putlabel:
n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; }
*pl = -pos; /* Label exists now. */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_ALIGN:
ofs += *p++; /* Maximum alignment needed (arg is 2**n-1). */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_EXTERN: p += 2; ofs += 4; break;
case DASM_ESC: p++; ofs++; break;
case DASM_MARK: mrm = p[-2]; break;
case DASM_SECTION:
n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n];
case DASM_STOP: goto stop;
}
}
}
stop:
va_end(ap);
sec->pos = pos;
sec->ofs = ofs;
}
#undef CK
/* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */
int dasm_link(Dst_DECL, size_t *szp)
{
dasm_State *D = Dst_REF;
int secnum;
int ofs = 0;
#ifdef DASM_CHECKS
*szp = 0;
if (D->status != DASM_S_OK) return D->status;
{
int pc;
for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
}
#endif
{ /* Handle globals not defined in this translation unit. */
int idx;
for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
int n = D->lglabels[idx];
/* Undefined label: Collapse rel chain and replace with marker (< 0). */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
}
}
/* Combine all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->rbuf;
int pos = DASM_SEC2POS(secnum);
int lastpos = sec->pos;
while (pos != lastpos) {
dasm_ActList p = D->actionlist + b[pos++];
while (1) {
int op, action = *p++;
switch (action) {
case DASM_REL_LG: p++; op = p[-3]; goto rel_pc;
case DASM_REL_PC: op = p[-2]; rel_pc: {
int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0);
if (shrink) { /* Shrinkable branch opcode? */
int lofs, lpos = b[pos];
if (lpos < 0) goto noshrink; /* Ext global? */
lofs = *DASM_POS2PTR(D, lpos);
if (lpos > pos) { /* Fwd label: add cumulative section offsets. */
int i;
for (i = secnum; i < DASM_POS2SEC(lpos); i++)
lofs += D->sections[i].ofs;
} else {
lofs -= ofs; /* Bkwd label: unfix offset. */
}
lofs -= b[pos+1]; /* Short branch ok? */
if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink; /* Yes. */
else { noshrink: shrink = 0; } /* No, cannot shrink op. */
}
b[pos+1] = shrink;
pos += 2;
break;
}
case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++;
case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W:
case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB:
case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break;
case DASM_LABEL_LG: p++;
case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */
case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */
case DASM_EXTERN: p += 2; break;
case DASM_ESC: p++; break;
case DASM_MARK: break;
case DASM_SECTION: case DASM_STOP: goto stop;
}
}
stop: (void)0;
}
ofs += sec->ofs; /* Next section starts right after current section. */
}
D->codesize = ofs; /* Total size of all code sections */
*szp = ofs;
return DASM_S_OK;
}
#define dasmb(x) *cp++ = (unsigned char)(x)
#ifndef DASM_ALIGNED_WRITES
#define dasmw(x) \
do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0)
#define dasmd(x) \
do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0)
#else
#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0)
#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0)
#endif
/* Pass 3: Encode sections. */
int dasm_encode(Dst_DECL, void *buffer)
{
dasm_State *D = Dst_REF;
unsigned char *base = (unsigned char *)buffer;
unsigned char *cp = base;
int secnum;
/* Encode all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->buf;
int *endb = sec->rbuf + sec->pos;
while (b != endb) {
dasm_ActList p = D->actionlist + *b++;
unsigned char *mark = NULL;
while (1) {
int action = *p++;
int n = (action >= DASM_DISP && action <= DASM_ALIGN) ? *b++ : 0;
switch (action) {
case DASM_DISP: if (!mark) mark = cp; {
unsigned char *mm = mark;
if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL;
if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7;
if (mrm != 5) { mm[-1] -= 0x80; break; } }
if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40;
}
case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break;
case DASM_IMM_DB: if (((n+128)&-256) == 0) {
db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb;
} else mark = NULL;
case DASM_IMM_D: wd: dasmd(n); break;
case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
case DASM_IMM_W: dasmw(n); break;
case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; }
case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
b++; n = (int)(ptrdiff_t)D->globals[-n];
case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
case DASM_REL_PC: rel_pc: {
int shrink = *b++;
int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; }
n = *pb - ((int)(cp-base) + 4-shrink);
if (shrink == 0) goto wd;
if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb;
goto wb;
}
case DASM_IMM_LG:
p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
case DASM_IMM_PC: {
int *pb = DASM_POS2PTR(D, n);
n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
goto wd;
}
case DASM_LABEL_LG: {
int idx = *p++;
if (idx >= 10)
D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
break;
}
case DASM_LABEL_PC: case DASM_SETLABEL: break;
case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; }
case DASM_ALIGN:
n = *p++;
while (((cp-base) & n)) *cp++ = 0x90; /* nop */
break;
case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd;
case DASM_MARK: mark = cp; break;
case DASM_ESC: action = *p++;
default: *cp++ = action; break;
case DASM_SECTION: case DASM_STOP: goto stop;
}
}
stop: (void)0;
}
}
if (base + D->codesize != cp) /* Check for phase errors. */
return DASM_S_PHASE;
return DASM_S_OK;
}
/* Get PC label offset. */
int dasm_getpclabel(Dst_DECL, unsigned int pc)
{
dasm_State *D = Dst_REF;
if (pc*sizeof(int) < D->pcsize) {
int pos = D->pclabels[pc];
if (pos < 0) return *DASM_POS2PTR(D, -pos);
if (pos > 0) return -1; /* Undefined. */
}
return -2; /* Unused or out of range. */
}
#ifdef DASM_CHECKS
/* Optional sanity checker to call between isolated encoding steps. */
int dasm_checkstep(Dst_DECL, int secmatch)
{
dasm_State *D = Dst_REF;
if (D->status == DASM_S_OK) {
int i;
for (i = 1; i <= 9; i++) {
if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; }
D->lglabels[i] = 0;
}
}
if (D->status == DASM_S_OK && secmatch >= 0 &&
D->section != &D->sections[secmatch])
D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
return D->status;
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -165,6 +165,7 @@ typedef uint8_t upb_valuetype_t;
#define UPB_VALUETYPE_BYTESRC 32
#define UPB_VALUETYPE_RAW 33
#define UPB_VALUETYPE_FIELDDEF 34
#define UPB_TYPE_ENDGROUP 35
// A single .proto value. The owner must have an out-of-band way of knowing
// the type, so that it knows which union member to use.

@ -11,16 +11,13 @@
#include "upb_decoder.h"
#include "upb_varint_decoder.h"
// If the return value is other than UPB_CONTINUE, that is what the last
// callback returned.
typedef struct {
upb_flow_t flow;
const char *ptr;
} fastdecode_ret;
extern fastdecode_ret upb_fastdecode(const char *p, const char *end,
upb_value_handler_t value_cb, void *closure,
void *table, int table_size);
#ifdef UPB_USE_JIT_X64
#define Dst_DECL upb_decoder *d
#define Dst_REF (d->dynasm)
#define Dst (d)
#include "dynasm/dasm_proto.h"
#include "upb_decoder_x86.h"
#endif
/* Decoding/Buffering of individual values ************************************/
@ -28,10 +25,6 @@ extern fastdecode_ret upb_fastdecode(const char *p, const char *end,
INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
// Constant used to signal that the submessage is a group and therefore we
// don't know its end offset. This cannot be the offset of a real submessage
// end because it takes at least one byte to begin a submessage.
#define UPB_GROUP_END_OFFSET 0
#define UPB_MAX_VARINT_ENCODED_SIZE 10
INLINE void upb_decoder_advance(upb_decoder *d, size_t len) {
@ -54,6 +47,32 @@ INLINE void upb_dstate_setmsgend(upb_decoder *d) {
(void*)UINTPTR_MAX : d->buf + end_offset;
}
// Pulls the next buffer from the bytesrc. Should be called only when the
// current buffer is completely empty.
static bool upb_pullbuf(upb_decoder *d) {
assert(upb_decoder_bufleft(d) == 0);
int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1;
upb_string_recycle(&d->bufstr);
if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) {
d->buf = NULL;
d->end = NULL;
return false;
}
if (last_buf_len != -1) {
d->buf_stream_offset += last_buf_len;
for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f)
if (f->end_offset != UINT32_MAX)
f->end_offset -= last_buf_len;
}
d->buf = upb_string_getrobuf(d->bufstr);
d->ptr = upb_string_getrobuf(d->bufstr);
d->end = d->buf + upb_string_len(d->bufstr);
d->jit_end = d->end; //d->end - 12;
upb_string_substr(d->tmp, d->bufstr, 0, 0);
upb_dstate_setmsgend(d);
return true;
}
// Called only from the slow path, this function copies the next "len" bytes
// from the stream to "data", adjusting the dstate appropriately.
static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) {
@ -62,27 +81,8 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) {
memcpy(data, d->ptr, to_copy);
upb_decoder_advance(d, to_copy);
bytes_wanted -= to_copy;
if (bytes_wanted == 0) {
upb_dstate_setmsgend(d);
return true;
}
// Get next buffer.
int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1;
upb_string_recycle(&d->bufstr);
if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) {
d->buf = NULL;
return false;
}
if (last_buf_len != -1) {
d->buf_stream_offset += last_buf_len;
for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f)
if (f->end_offset != UINT32_MAX)
f->end_offset -= last_buf_len;
}
d->buf = upb_string_getrobuf(d->bufstr);
d->ptr = upb_string_getrobuf(d->bufstr);
d->end = d->buf + upb_string_len(d->bufstr);
if (bytes_wanted == 0) return true;
if (!upb_pullbuf(d)) return false;
}
}
@ -143,7 +143,7 @@ done:
INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) {
if (upb_decoder_bufleft(d) >= 16) {
// Common (fast) case.
upb_decoderet r = upb_decode_varint_fast(d->ptr);
upb_decoderet r = upb_vdecode_fast(d->ptr);
if (r.p == NULL) {
upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
return false;
@ -229,6 +229,7 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
}
#define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; }
CHECK(upb_pullbuf(d));
if (upb_dispatch_startmsg(&d->dispatcher) != UPB_CONTINUE) goto err;
// Main loop: executed once per tag/field pair.
@ -244,14 +245,13 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
// Decodes as many fields as possible, updating d->ptr appropriately,
// before falling through to the slow(er) path.
#ifdef USE_X64_FASTPATH
const char *end = UPB_MIN(d->end, d->submsg_end);
fastdecode_ret ret = upb_fastdecode(d->ptr, end,
d->dispatcher.top->handlers.set->value,
d->dispatcher.top->handlers.closure,
d->msgdef->itof.array,
d->msgdef->itof.array_size);
CHECK_FLOW(ret.flow);
#ifdef UPB_USE_JIT_X64
void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code;
if (d->dispatcher.handlers->should_jit && d->buf) {
//fprintf(stderr, "Entering JIT, ptr: %p\n", d->ptr);
upb_jit_decode(d);
//fprintf(stderr, "Exiting JIT, ptr: %p\n", d->ptr);
}
#endif
// Parse/handle tag.
@ -354,9 +354,13 @@ err:
void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
upb_dispatcher_init(&d->dispatcher, handlers);
#ifdef UPB_USE_JIT_X64
upb_decoder_makejit(d);
#endif
d->bufstr = NULL;
d->buf = NULL;
d->tmp = NULL;
upb_string_recycle(&d->tmp);
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {
@ -373,4 +377,7 @@ void upb_decoder_uninit(upb_decoder *d) {
upb_dispatcher_uninit(&d->dispatcher);
upb_string_unref(d->bufstr);
upb_string_unref(d->tmp);
#ifdef UPB_USE_JIT_X64
upb_decoder_freejit(d);
#endif
}

@ -27,13 +27,12 @@ extern "C" {
/* upb_decoder *****************************************************************/
struct dasm_State;
struct _upb_decoder {
// Bytesrc from which we pull serialized data.
upb_bytesrc *bytesrc;
// Dispatcher to which we push parsed data.
upb_dispatcher dispatcher;
// String to hold our input buffer; is only active if d->buf != NULL.
upb_string *bufstr;
@ -48,6 +47,7 @@ struct _upb_decoder {
// End of this buffer, relative to *ptr.
const char *end;
const char *jit_end;
// Members which may also be written by the JIT:
@ -57,8 +57,21 @@ struct _upb_decoder {
// End of this submessage, relative to *ptr.
const char *submsg_end;
// MIN(end, submsg_end)
const char *effective_end;
// Where we will store any errors that occur.
upb_status *status;
// Dispatcher to which we push parsed data.
upb_dispatcher dispatcher;
// JIT-generated machine code (else NULL).
char *jit_code;
size_t jit_size;
char *debug_info;
struct dasm_State *dynasm;
};
// A upb_decoder decodes the binary protocol buffer format, writing the data it

@ -1,228 +0,0 @@
DEFAULT REL ; Default to RIP-relative addressing instead of absolute.
extern _upb_decode_varint_fast64
SECTION .data
; Our dispatch table; used to jump to the right handler, keyed on the field's
; type.
dispatch_table:
dq _upb_fastdecode.cant_fast_path ; field not in table (type == 0). (check_4).
dq _upb_fastdecode.fixed64 ; double
dq _upb_fastdecode.fixed32 ; float
dq _upb_fastdecode.varint ; int64
dq _upb_fastdecode.varint ; uint64
dq _upb_fastdecode.varint ; int32
dq _upb_fastdecode.fixed64 ; fixed64
dq _upb_fastdecode.fixed32 ; fixed32
dq _upb_fastdecode.varint ; bool
dq _upb_fastdecode.string ; string
dq _upb_fastdecode.cant_fast_path ; group (check_6)
dq _upb_fastdecode.cant_fast_path ; message
dq _upb_fastdecode.string ; bytes
dq _upb_fastdecode.varint ; uint32
dq _upb_fastdecode.varint ; enum
dq _upb_fastdecode.fixed32 ; sfixed32
dq _upb_fastdecode.fixed64 ; sfixed64
dq _upb_fastdecode.varint_sint32 ; sint32
dq _upb_fastdecode.varint_sint64 ; sint64
GLOBAL _upb_decode_fast
SECTION .text
; Register allocation.
%define BUF rbx ; const char *p, current buf position.
%define END rbp ; const char *end, where the buf ends (either submsg end or buf end)
%define STRING r12 ; unused
%define FVAL r13 ; upb_value fval, needs to be preserved across varint decoding call.
%define UNUSED r14
%define CLOSURE r15
; Stack layout: *tableptr, uint32_t maxfield_times_8
%define STACK_SPACE 24 ; this value + 8 must be a multiple of 16.
%define TABLE_SPILL [rsp] ; our lookup table, indexed by field number.
%define COMMITTED_BUF_SPILL [rsp+8]
%define MAXFIELD_TIMES_8_SPILL [rsp+16]
; Executing the fast path requires the following conditions:
; - check_1: there are >=12 bytes left (<=2 byte tag and <=10 byte varint).
; - check_2: the tag is <= 2 bytes.
; - check_3: the field number is <= the table size
; (ie. it must be an array lookup, not a hash lookup).
; - check_4: the field is known (found in the table).
; - check_5: the wire type we read is correct for the field number,
; ("packed" fields are not accepted, yet. this could be handled
; efficiently by doing an extra check on the "type check failed"
; path that goes into a tight loop if the encoding was packed).
; - check_6: the field is not a group or a message (or string, TODO)
; (this could be relaxed, but due to delegation it's a bit tricky).
; - check_7: if the value is a string, the entire string is available in
; the buffer, and our cached string object can be recycled, and
; our string object already references the source buffer, so
; absolutely no refcount twiddling is required.
%macro decode_and_dispatch_ 0
align 16
.decode_and_dispatch:
; Load a few values we'll need in a sec.
mov r8, TABLE_SPILL
mov r9d, MAXFIELD_TIMES_8_SPILL
mov rax, END
sub rax, BUF
cmp rax, 12
jb _upb_fastdecode.cant_fast_path ; check_1 (<12 bytes left).
; Decode a 1 or 2-byte varint -> eax.
mov cl, byte [BUF]
lea rdi, [BUF+1]
movzx eax, cl
and eax, 0x7f
test cl, cl
jns .one_byte_tag ; Should be predictable if fields are in order.
movzx ecx, byte [BUF+1]
lea rdi, [BUF+2]
mov edx, ecx
and edx, 0x7f
shl edx, 7
or eax, edx
test al, al
js _upb_fastdecode.cant_fast_path ; check_2 (tag was >2 bytes).
.one_byte_tag:
mov BUF, rdi
; Decode tag and dispatch.
mov ecx, eax
and eax, 0x3ff8 ; eax now contains field number * 8
lea r11, [r8+rax*2] ; *2 is really *16, since rax is already *8.
and ecx, 0x7 ; ecx now contains wire type.
cmp eax, r9d
jae _upb_fastdecode.cant_fast_path ; check_3 (field number > table size)
mov FIELDDEF, [r11+8] ; Lookup fielddef (upb_itof_ent.f)
movzx rdx, BYTE [r11+1] ; Lookup field type.
mov rax, qword dispatch_table
jmp [rax+rdx*8]
%endmacro
%macro decode_and_dispatch 0
jmp .decode_and_dispatch
%endmacro
%macro call_callback 0
; Value arg must already be in rdx when macro is called.
mov rdi, CLOSURE
mov rsi, FIELDDEF
mov rcx, 33 ; RAW; we could pass the correct type, or only do this in non-debug modes.
call CALLBACK
mov COMMITTED_BUF_SPILL, BUF
cmp eax, 0
jne .done ; Caller requested BREAK or SKIPSUBMSG.
%endmacro
%macro check_type 1
cmp ecx, %1
jne _upb_fastdecode.cant_fast_path ; check_5 (wire type check failed).
%endmacro
; extern upb_flow_t upb_fastdecode(const char **p, const char *end,
; upb_value_handler_t value_cb, void *closure,
; void *table, int table_size);
align 16
global _upb_fastdecode
_upb_fastdecode:
; We use all callee-save regs.
push rbx
push rbp
push r12
push r13
push r14
push r15
sub rsp, STACK_SPACE
; Parse arguments into reg vals and stack.
mov BUF, rdi
mov COMMITTED_BUF_SPILL, rdi
mov END, rsi
mov CALLBACK, rdx
mov CLOSURE, rcx
mov TABLE_SPILL, r8
shl r9, 3
mov MAXFIELD_TIMES_8_SPILL, r9
decode_and_dispatch
align 16
.varint:
call _upb_decode_varint_fast64 ; BUF is already in rdi.
test rax, rax
jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
mov BUF, rax
call_callback ; rdx already holds value.
decode_and_dispatch_
align 16
.fixed32:
mov edx, DWORD [BUF] ; Might be unaligned, but that's ok.
add BUF, 4
call_callback
decode_and_dispatch
align 16
.fixed64:
mov rdx, QWORD [BUF] ; Might be unaligned, but that's ok.
add BUF, 8
call_callback
decode_and_dispatch
align 16
.varint_sint32:
call _upb_decode_varint_fast64 ; BUF is already in rdi.
test rax, rax
jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
mov BUF, rax
; Perform 32-bit zig-zag decoding.
mov ecx, edx
shr edx, 1
and ecx, 0x1
neg ecx
xor edx, ecx
call_callback
decode_and_dispatch
align 16
.varint_sint64:
call _upb_decode_varint_fast64 ; BUF is already in rdi.
test rax, rax
jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
mov BUF, rax
; Perform 64-bit zig-zag decoding.
mov rcx, rdx
shr rdx, 1
and ecx, 0x1
neg rcx
xor rdx, rcx
call_callback
decode_and_dispatch
align 16
.string:
.cant_fast_path:
mov rax, 0 ; UPB_CONTINUE -- continue as before.
.done:
; If coming via done, preserve the user callback's return in rax.
; Return committed buf pointer as second parameter.
mov rdx, COMMITTED_BUF_SPILL
add rsp, STACK_SPACE
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
ret

@ -0,0 +1,649 @@
|//
|// upb - a minimalist implementation of protocol buffers.
|//
|// Copyright (c) 2011 Google Inc. See LICENSE for details.
|// Author: Josh Haberman <jhaberman@gmail.com>
|//
|// JIT compiler for upb_decoder on x86. Given a upb_handlers object,
|// generates code specialized to parsing the specific message and
|// calling specific handlers.
#define UPB_NONE -1
#define UPB_MULTIPLE -2
#define UPB_TOPLEVEL_ONE -3
#include <sys/mman.h>
#include "dynasm/dasm_proto.h"
#include "dynasm/dasm_x86.h"
// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code
// at runtime. GDB 7.x+ has defined an interface for doing this, and these
// structure/function defintions are copied out of gdb/jit.h
//
// We need to give GDB an ELF file at runtime describing the symbols we have
// generated. To avoid implementing the ELF format, we generate an ELF file
// at compile-time and compile it in as a character string. We can replace
// a few key constants (address of JIT-ted function and its size) by looking
// for a few magic numbers and doing a dumb string replacement.
#include "jit_debug_elf_file.h"
typedef enum
{
GDB_JIT_NOACTION = 0,
GDB_JIT_REGISTER,
GDB_JIT_UNREGISTER
} jit_actions_t;
typedef struct gdb_jit_entry {
struct gdb_jit_entry *next_entry;
struct gdb_jit_entry *prev_entry;
const char *symfile_addr;
uint64_t symfile_size;
} gdb_jit_entry;
typedef struct {
uint32_t version;
uint32_t action_flag;
gdb_jit_entry *relevant_entry;
gdb_jit_entry *first_entry;
} gdb_jit_descriptor;
gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};
void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); }
|.arch x64
|.actionlist upb_jit_actionlist
|.globals UPB_JIT_GLOBAL_
|.globalnames upb_jit_globalnames
|
|// Calling conventions.
|.define ARG1_64, rdi
|.define ARG2_8, sil
|.define ARG2_32, esi
|.define ARG2_64, rsi
|.define ARG3_8, dl
|.define ARG3_32, edx
|.define ARG3_64, rdx
|
|// Register allocation / type map.
|// ALL of the code in this file uses these register allocations.
|// When we "call" within this file, we do not use regular calling
|// conventions, but of course when calling to user callbacks we must.
|.define PTR, rbx
|.define CLOSURE, r12
|.type FRAME, upb_dispatcher_frame, r13
|.type STRING, upb_string, r14
|.type DECODER, upb_decoder, r15
|
|.macro callp, addr
|| if ((uintptr_t)addr < 0xffffffff) {
| call &addr
|| } else {
| mov64 rax, (uintptr_t)addr
| call rax
|| }
|.endmacro
|
|// Checks PTR for end-of-buffer.
|.macro check_eob, m
| cmp PTR, DECODER->effective_end
|| if (m->is_group) {
| jae ->exit_jit
|| } else {
| jae =>m->jit_endofbuf_pclabel
|| }
|.endmacro
|
|// Decodes varint from [PTR + offset] -> ARG3.
|// Saves new pointer as rax.
|.macro decode_loaded_varint, offset
| // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder.
| lea rax, [PTR + offset + 1]
| mov ARG3_32, ecx
| and ARG3_32, 0x7f
| test cl, cl
| jns >9
| lea rax, [PTR + offset + 2]
| movzx esi, ch
| and esi, 0x7f
| shl esi, 7
| or ARG3_32, esi
| test cx, cx
| jns >9
| mov ARG1_64, rax
| mov ARG2_32, ARG3_32
| callp upb_vdecode_max8_fast
| test rax, rax
| jz ->exit_jit // >10-byte varint.
|9:
|.endmacro
|
|.macro decode_varint, offset
| mov ecx, dword [PTR + offset]
| decode_loaded_varint offset
| mov PTR, rax
|.endmacro
|
|// Decode the tag -> edx.
|// Could specialize this by avoiding the value masking: could just key the
|// table on the raw (length-masked) varint to save 3-4 cycles of latency.
|// Currently only support tables where all entries are in the array part.
|.macro dyndispatch, m
| decode_loaded_varint, 0
| mov ecx, edx
| shr ecx, 3
| and edx, 0x7
| cmp ecx, m->max_field_number // Bounds-check the field.
| ja ->exit_jit // In the future; could be unknown label
| mov rcx, qword [rcx*8 + m->tablearray] // TODO: support hybrid array/hash tables.
| jmp rcx // Dispatch: unpredictable jump.
|.endmacro
|
|.macro setmsgend, m
| mov rsi, DECODER->jit_end
|| if (m->is_group) {
| mov64 rax, 0xffffffffffffffff
| mov qword DECODER->submsg_end, rax
| mov DECODER->effective_end, rsi
|| } else {
| // Could store a correctly-biased version in the frame, at the cost of
| // a larger stack.
| mov eax, dword FRAME->end_offset
| add rax, qword DECODER->buf
| mov DECODER->submsg_end, rax // submsg_end = d->buf + f->end_offset
| cmp rax, rsi
| jb >1
| mov rax, rsi // effective_end = min(d->submsg_end, d->jit_end)
|1:
| mov DECODER->effective_end, rax
|| }
|.endmacro
|
|// rax contains the tag, compare it against "tag", but since it is a varint
|// we must only compare as many bytes as actually have data.
|.macro checktag, tag
|| switch (upb_value_size(tag)) {
|| case 1:
| cmp cl, tag
|| break;
|| case 2:
| cmp cx, tag
|| break;
|| case 3:
| and ecx, 0xffffff // 3 bytes
| cmp rcx, tag
|| case 4:
| cmp ecx, tag
|| break;
|| case 5:
| mov64 rdx, 0xffffffffff // 5 bytes
| and rcx, rdx
| cmp rcx, tag
|| break;
|| default: abort();
|| }
|.endmacro
|
|// TODO: optimize for 0 (xor) and 32-bits.
|.macro loadfval, f
|| if (f->fval.val.uint64 == 0) {
| xor ARG2_32, ARG2_32
|| } else {
| mov ARG2_64, f->fval.val.uint64
|| }
|.endmacro
#include <stdlib.h>
#include "upb_varint_decoder.h"
static size_t upb_value_size(uint64_t val) {
#ifdef __GNUC__
int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0.
#else
int high_bit = 0;
uint64_t tmp = val;
while(tmp >>= 1) high_bit++;
#endif
return val == 0 ? 1 : high_bit / 8 + 1;
}
static uint64_t upb_encode_varint(uint64_t val)
{
uint64_t ret = 0;
for (int bitpos = 0; val; bitpos+=8, val >>=7) {
if (bitpos > 0) ret |= (1 << (bitpos-1));
ret |= (val & 0x7f) << bitpos;
}
return ret;
}
// PTR should point to the beginning of the tag.
static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag,
upb_handlers_msgent *m,
upb_handlers_fieldent *f, upb_handlers_fieldent *next_f) {
int tag_size = upb_value_size(tag);
// PC-label for the dispatch table.
// We check the wire type (which must be loaded in edx) because the
// table is keyed on field number, not type.
|=>f->jit_pclabel:
| cmp edx, upb_types[f->type].native_wire_type
| jne ->exit_jit // In the future: could be an unknown field.
|=>f->jit_pclabel_notypecheck:
|1: // Label for repeating this field.
// Decode the value into arg 3 for the callback.
switch (f->type) {
case UPB_TYPE(DOUBLE):
case UPB_TYPE(FIXED64):
case UPB_TYPE(SFIXED64):
| mov ARG3_64, qword [PTR + tag_size]
| add PTR, 8 + tag_size
break;
case UPB_TYPE(FLOAT):
case UPB_TYPE(FIXED32):
case UPB_TYPE(SFIXED32):
| mov ARG3_32, dword [PTR + tag_size]
| add PTR, 4 + tag_size
break;
case UPB_TYPE(BOOL):
// Can't assume it's one byte long, because bool must be wire-compatible
// with all of the varint integer types.
| decode_varint tag_size
| test ARG3_64, ARG3_64
| setne ARG3_8 // Other bytes left with val, should be ok.
break;
case UPB_TYPE(INT64):
case UPB_TYPE(UINT64):
case UPB_TYPE(INT32):
case UPB_TYPE(UINT32):
case UPB_TYPE(ENUM):
| decode_varint tag_size
break;
case UPB_TYPE(SINT64):
// 64-bit zig-zag decoding.
| decode_varint tag_size
| mov rax, ARG3_64
| shr ARG3_64, 1
| and rax, 1
| neg rax
| xor ARG3_64, rax
break;
case UPB_TYPE(SINT32):
// 32-bit zig-zag decoding.
| decode_varint tag_size
| mov eax, ARG3_32
| shr ARG3_32, 1
| and eax, 1
| neg eax
| xor ARG3_32, eax
break;
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
// We only handle the case where the entire string is in our current
// buf, which sidesteps any security problems. The C path has more
// robust checks.
| decode_varint tag_size
| mov STRING->len, ARG3_32
| mov STRING->ptr, PTR
| add PTR, ARG3_64
| mov ARG3_64, STRING
| cmp PTR, DECODER->effective_end
| ja ->exit_jit // Can't deliver, whole string not in buf.
break;
case UPB_TYPE_ENDGROUP: // A pseudo-type.
| add PTR, tag_size
| mov DECODER->ptr, PTR
| jmp =>m->jit_endofmsg_pclabel
return;
case UPB_TYPE(MESSAGE):
| decode_varint tag_size
case UPB_TYPE(GROUP):
// Will dispatch callbacks and call submessage in a second.
break;
default: abort();
}
// Commit our work by advancing ptr.
// (If in the future we wanted to support a UPB_SUSPEND_AGAIN that
// suspends the decoder and redelivers the value later, we would
// need to adjust this to happen perhaps after the callback ran).
| mov DECODER->ptr, PTR
// Load closure and fval into arg registers.
| mov ARG1_64, CLOSURE
| loadfval f
// Call callbacks.
if (upb_issubmsgtype(f->type)) {
// Call startsubmsg handler (if any).
if (f->cb.startsubmsg != upb_startsubmsg_nop) {
// upb_sflow_t startsubmsg(void *closure, upb_value fval)
| mov r12d, ARG3_32
| callp f->cb.startsubmsg
} else {
| mov rdx, CLOSURE
| mov r12d, ARG3_32
}
// Push a stack frame (not the CPU stack, the upb_decoder stack).
| lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing.
| cmp rax, qword DECODER->dispatcher.limit
| jae ->exit_jit // Frame stack overflow.
| mov qword FRAME:rax->f, f
| mov qword FRAME:rax->closure, rdx
| mov rsi, PTR
| sub rsi, DECODER->buf
| add r12d, esi
| mov dword FRAME:rax->end_offset, r12d // = (d->ptr - d->buf) + delim_len
| mov CLOSURE, rdx
| mov DECODER->dispatcher.top, rax
| mov FRAME, rax
upb_handlers_msgent *sub_m = upb_handlers_getmsgent(d->dispatcher.handlers, f);
if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) {
| jmp =>sub_m->jit_startmsg_pclabel;
} else {
| call =>sub_m->jit_startmsg_pclabel;
}
|=>f->jit_submsg_done_pclabel:
// Pop a stack frame.
| sub FRAME, sizeof(upb_dispatcher_frame)
| mov DECODER->dispatcher.top, FRAME
| setmsgend m
| mov CLOSURE, FRAME->closure
// Call endsubmsg handler (if any).
if (f->endsubmsg != upb_endsubmsg_nop) {
// upb_flow_t endsubmsg(void *closure, upb_value fval);
| mov ARG1_64, CLOSURE
| loadfval f
| callp f->endsubmsg
}
} else {
| callp f->cb.value
}
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
// Epilogue: load next tag, check for repeated field.
| check_eob m
| mov rcx, qword [PTR]
if (f->repeated) {
| checktag tag
| je <1
}
if (next_tag != 0) {
| checktag next_tag
| je =>next_f->jit_pclabel_notypecheck
}
// Fall back to dynamic dispatch. Replicate the dispatch
// here so we can learn what fields generally follow others.
| dyndispatch m
|1:
}
static int upb_compare_uint32(const void *a, const void *b) {
return *(uint32_t*)a - *(uint32_t*)b;
}
static void upb_decoder_jit_msg(upb_decoder *d, upb_handlers_msgent *m) {
|=>m->jit_startmsg_pclabel:
// Call startmsg handler (if any):
if (m->startmsg != upb_startmsg_nop) {
// upb_flow_t startmsg(void *closure);
| mov ARG1_64, FRAME->closure
| callp m->startmsg
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
}
| setmsgend m
| check_eob m
| mov ecx, dword [PTR]
| dyndispatch m
// --------- New code section (does not fall through) ------------------------
// Emit code for parsing each field (dynamic dispatch contains pointers to
// all of these).
// Create an ordering over the fields (inttable ordering is undefined).
int num_keys = upb_inttable_count(&m->fieldtab);
uint32_t *keys = malloc(num_keys * sizeof(*keys));
int idx = 0;
for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
keys[idx++] = upb_inttable_iter_key(i);
}
qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
upb_handlers_fieldent *last_f = NULL;
uint32_t last_tag = 0;
for(int i = 0; i < num_keys; i++) {
uint32_t key = keys[i];
upb_handlers_fieldent *f = upb_inttable_lookup(&m->fieldtab, key);
uint32_t tag = upb_encode_varint(key);
if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
last_tag = tag;
last_f = f;
}
free(keys);
if (m->is_group) {
// Create a fake fieldent for handling "end group."
upb_handlers_fieldent f = {0, UPB_TYPE_ENDGROUP, 0, UPB_NO_VALUE, {NULL}, NULL, 0, 0, 0, false};
upb_decoder_jit_field(d, last_tag, m->groupnum, m, last_f, &f);
upb_decoder_jit_field(d, m->groupnum, 0, m, &f, NULL);
} else {
upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
}
// --------- New code section (does not fall through) ------------------------
// End-of-buf / end-of-message.
if (!m->is_group) {
// This case doesn't exist for groups, because there eob really means
// eob, so that case just exits the jit directly.
|=>m->jit_endofbuf_pclabel:
| cmp PTR, DECODER->submsg_end
| jb ->exit_jit // We are at eob, but not end-of-submsg.
}
|=>m->jit_endofmsg_pclabel:
// We are at end-of-submsg: call endmsg handler (if any):
if (m->endmsg != upb_endmsg_nop) {
// void endmsg(void *closure, upb_status *status) {
| mov ARG1_64, FRAME->closure
| lea ARG2_64, DECODER->dispatcher.status
| callp m->endmsg
}
if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
| ret
} else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) {
| jmp ->exit_jit
} else {
| jmp =>m->jit_parent_field_done_pclabel
}
}
static void upb_decoder_jit(upb_decoder *d) {
| push rbp
| mov rbp, rsp
| push r15
| push r14
| push r13
| push r12
| push rbx
| mov DECODER, ARG1_64
| mov FRAME, DECODER:ARG1_64->dispatcher.top
| mov STRING, DECODER:ARG1_64->tmp
| mov CLOSURE, FRAME->closure
| mov PTR, DECODER->ptr
upb_handlers *h = d->dispatcher.handlers;
if (h->msgs[0].jit_parent_field_done_pclabel == UPB_MULTIPLE) {
| call =>h->msgs[0].jit_startmsg_pclabel
| jmp ->exit_jit
}
// TODO: push return addresses for re-entry (will be necessary for multiple
// buffer support).
for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, &h->msgs[i]);
|->exit_jit:
| pop rbx
| pop r12
| pop r13
| pop r14
| pop r15
| leave
| ret
|=>0:
| callp &abort
}
void upb_decoder_jit_assignfieldlabs(upb_handlers_fieldent *f,
uint32_t *pclabel_count) {
f->jit_pclabel = (*pclabel_count)++;
f->jit_pclabel_notypecheck = (*pclabel_count)++;
f->jit_submsg_done_pclabel = (*pclabel_count)++;
}
void upb_decoder_jit_assignmsglabs(upb_handlers_msgent *m,
uint32_t *pclabel_count) {
m->jit_startmsg_pclabel = (*pclabel_count)++;
m->jit_endofbuf_pclabel = (*pclabel_count)++;
m->jit_endofmsg_pclabel = (*pclabel_count)++;
m->jit_unknownfield_pclabel = (*pclabel_count)++;
m->jit_parent_field_done_pclabel = UPB_NONE;
m->max_field_number = 0;
upb_inttable_iter i;
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
uint32_t key = upb_inttable_iter_key(i);
m->max_field_number = UPB_MAX(m->max_field_number, key);
upb_handlers_fieldent *f = upb_inttable_iter_value(i);
upb_decoder_jit_assignfieldlabs(f, pclabel_count);
}
// XXX: Won't work for large field numbers; will need to use a upb_table.
m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
}
// Second pass: for messages that have only one parent, link them to the field
// from which they are called.
void upb_decoder_jit_assignmsglabs2(upb_handlers *h, upb_handlers_msgent *m) {
upb_inttable_iter i;
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
upb_handlers_fieldent *f = upb_inttable_iter_value(i);
if (upb_issubmsgtype(f->type)) {
upb_handlers_msgent *sub_m = upb_handlers_getmsgent(h, f);
if (f->type == UPB_TYPE(GROUP)) {
sub_m->is_group = true;
sub_m->groupnum = upb_inttable_iter_key(i);
}
if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) {
sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel;
} else {
sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE;
}
}
}
}
void upb_decoder_makejit(upb_decoder *d) {
// Assign pclabels.
uint32_t pclabel_count = 1;
upb_handlers *h = d->dispatcher.handlers;
for (int i = 0; i < h->msgs_len; i++)
upb_decoder_jit_assignmsglabs(&h->msgs[i], &pclabel_count);
for (int i = 0; i < h->msgs_len; i++)
upb_decoder_jit_assignmsglabs2(h, &h->msgs[i]);
if (h->msgs[0].jit_parent_field_done_pclabel == UPB_NONE) {
h->msgs[0].jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE;
}
void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
dasm_init(d, 1);
dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX);
dasm_growpc(d, pclabel_count);
dasm_setup(d, upb_jit_actionlist);
upb_decoder_jit(d);
dasm_link(d, &d->jit_size);
d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE,
MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
dasm_encode(d, d->jit_code);
// Create dispatch tables.
for (int i = 0; i < h->msgs_len; i++) {
upb_handlers_msgent *m = &h->msgs[i];
for (uint32_t j = 0; j <= m->max_field_number; j++) {
upb_handlers_fieldent *f = NULL;
for (int k = 0; k < 8; k++) {
f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k);
if (f) break;
}
if (f) {
m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel);
} else {
// Don't handle unknown fields yet.
m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0);
}
}
}
// Create debug info.
size_t elf_len = src_jit_debug_elf_file_o_len;
d->debug_info = malloc(elf_len);
memcpy(d->debug_info, src_jit_debug_elf_file_o, elf_len);
uint64_t *p = (void*)d->debug_info;
for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) {
if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; }
if (*p == 0x321) { *p = d->jit_size; }
}
// Register the JIT-ted code with GDB.
gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry));
e->next_entry = __jit_debug_descriptor.first_entry;
e->prev_entry = NULL;
if (e->next_entry) e->next_entry->prev_entry = e;
e->symfile_addr = d->debug_info;
e->symfile_size = elf_len;
__jit_debug_descriptor.first_entry = e;
__jit_debug_descriptor.relevant_entry = e;
__jit_debug_descriptor.action_flag = GDB_JIT_REGISTER;
__jit_debug_register_code();
dasm_free(d);
free(globals);
mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ);
FILE *f = fopen("/tmp/machine-code", "wb");
fwrite(d->jit_code, d->jit_size, 1, f);
fclose(f);
}
void upb_decoder_freejit(upb_decoder *d) {
munmap(d->jit_code, d->jit_size);
free(d->debug_info);
// TODO: unregister
}

@ -349,18 +349,18 @@ static void upb_defbuilder_register_FileDescriptorProto(upb_handlers *h) {
upb_defbuilder_FileDescriptorProto_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNUM,
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDTYPE,
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDTYPE, false,
&upb_defbuilder_FileDescriptorProto_package, UPB_NO_VALUE);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNUM,
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDTYPE);
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDTYPE, true);
upb_msgdef_register_DescriptorProto(h);
upb_handlers_typed_pop(h);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNUM,
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE);
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE, true);
upb_enumdef_register_EnumDescriptorProto(h);
upb_handlers_typed_pop(h);
@ -383,12 +383,13 @@ static void upb_defbuilder_register_FileDescriptorSet(upb_handlers *h) {
upb_register_startend(h, NULL, upb_defbuilder_FileDescriptorSet_onendmsg);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM,
GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE);
GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE, true);
upb_defbuilder_register_FileDescriptorProto(h);
upb_handlers_typed_pop(h);
}
void upb_defbuilder_reghandlers(upb_handlers *h) {
upb_defbuilder_register_FileDescriptorSet(h);
h->should_jit = false;
}
@ -492,11 +493,11 @@ static void upb_enumdef_register_EnumValueDescriptorProto(upb_handlers *h) {
upb_enumdef_EnumValueDescriptorProto_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM,
GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE,
GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE, false,
&upb_enumdef_EnumValueDescriptorProto_name, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM,
GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE,
GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE, false,
&upb_enumdef_EnumValueDescriptorProto_number, UPB_NO_VALUE);
}
@ -540,12 +541,12 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) {
&upb_enumdef_EnumDescriptorProto_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM,
GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE,
GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE, false,
&upb_enumdef_EnumDescriptorProto_name, UPB_NO_VALUE);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM,
GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE);
GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE, true);
upb_enumdef_register_EnumValueDescriptorProto(h);
upb_handlers_typed_pop(h);
}
@ -813,27 +814,27 @@ static void upb_fielddef_register_FieldDescriptorProto(upb_handlers *h) {
upb_register_startend(h, upb_fielddef_startmsg, upb_fielddef_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNUM,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE, false,
&upb_fielddef_ontype, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNUM,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDTYPE,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDTYPE, false,
&upb_fielddef_onlabel, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNUM,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDTYPE,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDTYPE, false,
&upb_fielddef_onnumber, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNUM,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDTYPE,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDTYPE, false,
&upb_fielddef_onname, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE, false,
&upb_fielddef_ontypename, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNUM,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE, false,
&upb_fielddef_ondefaultval, UPB_NO_VALUE);
}
@ -954,23 +955,23 @@ static void upb_msgdef_register_DescriptorProto(upb_handlers *h) {
upb_register_startend(h, &upb_msgdef_startmsg, &upb_msgdef_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE, false,
&upb_msgdef_onname, UPB_NO_VALUE);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE);
GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE, true);
upb_fielddef_register_FieldDescriptorProto(h);
upb_handlers_typed_pop(h);
// DescriptorProto is self-recursive, so we must link the definition.
upb_handlers_typed_link(h,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE, 0);
GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE, true, 0);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE);
GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE, true);
upb_enumdef_register_EnumDescriptorProto(h);
upb_handlers_typed_pop(h);

@ -29,7 +29,6 @@ void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md,
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
upb_handlers_uninit(&h);
}
void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
@ -53,7 +52,6 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
upb_stringsink_uninit(&strsink);
upb_textprinter_free(p);
upb_handlers_uninit(&h);
}
void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) {
@ -72,7 +70,6 @@ void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) {
upb_decoder_decode(&d, status);
upb_handlers_uninit(&h);
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
}

@ -299,6 +299,13 @@ upb_msg *upb_msg_appendmsg(upb_msg *msg, upb_fielddef *f, upb_msgdef *msgdef) {
static upb_flow_t upb_dmsgsink_value(void *_m, upb_value fval, upb_value val) {
upb_msg *m = _m;
upb_fielddef *f = upb_value_getfielddef(fval);
if (upb_isstring(f)) {
//fprintf(stderr, "dmsg_value! this=%p f=%p name=" UPB_STRFMT ",
// " UPB_STRFMT " %p\n", m, f, UPB_STRARG(f->name), UPB_STRARG(val.val.str));
} else {
//fprintf(stderr, "dmsg_value! this=%p f=%p name=" UPB_STRFMT ",
// %llu\n", m, f, UPB_STRARG(f->name), val.val.uint64);
}
upb_msg_appendval(m, f, val);
return UPB_CONTINUE;
}
@ -306,8 +313,11 @@ static upb_flow_t upb_dmsgsink_value(void *_m, upb_value fval, upb_value val) {
static upb_sflow_t upb_dmsgsink_startsubmsg(void *_m, upb_value fval) {
upb_msg *m = _m;
upb_fielddef *f = upb_value_getfielddef(fval);
//fprintf(stderr, "dmsg_startsubmsg! " UPB_STRFMT " %p\n", UPB_STRARG(fval.val.fielddef->name), f);
upb_msgdef *msgdef = upb_downcast_msgdef(f->def);
return UPB_CONTINUE_WITH(upb_msg_appendmsg(m, f, msgdef));
void *p = upb_msg_appendmsg(m, f, msgdef);
//printf("Continuing with: %p\n", p);
return UPB_CONTINUE_WITH(p);
}
void upb_msg_regdhandlers(upb_handlers *h) {

@ -11,36 +11,36 @@
/* upb_handlers ***************************************************************/
static upb_flow_t upb_startmsg_nop(void *closure) {
upb_flow_t upb_startmsg_nop(void *closure) {
(void)closure;
return UPB_CONTINUE;
}
static void upb_endmsg_nop(void *closure, upb_status *status) {
void upb_endmsg_nop(void *closure, upb_status *status) {
(void)closure;
(void)status;
}
static upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val) {
upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val) {
(void)closure;
(void)fval;
(void)val;
return UPB_CONTINUE;
}
static upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval) {
upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval) {
(void)fval;
return UPB_CONTINUE_WITH(closure);
}
static upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval) {
upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval) {
(void)closure;
(void)fval;
return UPB_CONTINUE;
}
static upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
upb_value val) {
upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
upb_value val) {
(void)closure;
(void)fieldnum;
(void)val;
@ -52,6 +52,8 @@ static void upb_msgent_init(upb_handlers_msgent *e) {
e->startmsg = &upb_startmsg_nop;
e->endmsg = &upb_endmsg_nop;
e->unknownval = &upb_unknownval_nop;
e->is_group = false;
e->tablearray = NULL;
}
void upb_handlers_init(upb_handlers *h, upb_msgdef *md) {
@ -61,6 +63,7 @@ void upb_handlers_init(upb_handlers *h, upb_msgdef *md) {
h->top = &h->stack[0];
h->limit = &h->stack[UPB_MAX_TYPE_DEPTH];
h->toplevel_msgdef = md;
h->should_jit = true;
if (md) upb_msgdef_ref(md);
h->top->msgent_index = 0;
@ -70,19 +73,22 @@ void upb_handlers_init(upb_handlers *h, upb_msgdef *md) {
}
void upb_handlers_uninit(upb_handlers *h) {
for (int i = 0; i < h->msgs_len; i++) upb_inttable_free(&h->msgs[i].fieldtab);
for (int i = 0; i < h->msgs_len; i++) {
upb_inttable_free(&h->msgs[i].fieldtab);
free(h->msgs[i].tablearray);
}
free(h->msgs);
upb_msgdef_unref(h->toplevel_msgdef);
}
static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval(
upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type) {
upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type, bool repeated) {
uint32_t tag = fieldnum << 3 | upb_types[type].native_wire_type;
upb_handlers_fieldent *f =
upb_inttable_lookup(&h->msgent->fieldtab, tag);
if (!f) {
upb_handlers_fieldent new_f = {false, type, -1, UPB_NO_VALUE,
{&upb_value_nop}, &upb_endsubmsg_nop};
{&upb_value_nop}, &upb_endsubmsg_nop, 0, 0, 0, repeated};
if (upb_issubmsgtype(type)) new_f.cb.startsubmsg = &upb_startsubmsg_nop;
upb_inttable_insert(&h->msgent->fieldtab, tag, &new_f);
@ -95,9 +101,9 @@ static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval(
static upb_handlers_fieldent *upb_handlers_getorcreate(
upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type, upb_value fval) {
upb_fieldtype_t type, bool repeated, upb_value fval) {
upb_handlers_fieldent *f =
upb_handlers_getorcreate_without_fval(h, fieldnum, type);
upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
f->fval = fval;
return f;
}
@ -140,42 +146,40 @@ void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start,
}
void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type, upb_value_handler_t value,
upb_value fval) {
upb_handlers_getorcreate(h, fieldnum, type, fval)->cb.value =
upb_fieldtype_t type, bool repeated,
upb_value_handler_t value, upb_value fval) {
upb_handlers_getorcreate(h, fieldnum, type, repeated, fval)->cb.value =
value ? value : &upb_value_nop;
}
void upb_register_value(upb_handlers *h, upb_fielddef *f,
upb_value_handler_t value, upb_value fval) {
assert(f->msgdef == h->top->msgdef);
upb_register_typed_value(h, f->number, f->type, value, fval);
upb_register_typed_value(h, f->number, f->type, upb_isarray(f), value, fval);
}
void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type,
upb_fieldtype_t type, bool repeated,
upb_startsubmsg_handler_t start,
upb_endsubmsg_handler_t end,
upb_value fval) {
upb_handlers_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, fval);
upb_handlers_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, repeated, fval);
f->cb.startsubmsg = start ? start : &upb_startsubmsg_nop;
f->endsubmsg = end ? end : &upb_endsubmsg_nop;
}
void upb_handlers_typed_link(upb_handlers *h,
upb_field_number_t fieldnum,
upb_fieldtype_t type,
int frames) {
void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type, bool repeated, int frames) {
assert(frames <= (h->top - h->stack));
upb_handlers_fieldent *f =
upb_handlers_getorcreate_without_fval(h, fieldnum, type);
upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
f->msgent_index = (h->top - frames)->msgent_index;
}
void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type) {
upb_fieldtype_t type, bool repeated) {
upb_handlers_fieldent *f =
upb_handlers_getorcreate_without_fval(h, fieldnum, type);
upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
if (h->top == h->limit) abort(); // TODO: make growable.
++h->top;
if (f->msgent_index == -1) {
@ -204,8 +208,8 @@ void upb_handlers_push(upb_handlers *h, upb_fielddef *f,
bool delegate) {
assert(f->msgdef == h->top->msgdef);
(void)delegate; // TODO
upb_register_typed_submsg(h, f->number, f->type, start, end, fval);
upb_handlers_typed_push(h, f->number, f->type);
upb_register_typed_submsg(h, f->number, f->type, upb_isarray(f), start, end, fval);
upb_handlers_typed_push(h, f->number, f->type, upb_isarray(f));
}
void upb_handlers_typed_pop(upb_handlers *h) {
@ -229,13 +233,14 @@ static upb_handlers_fieldent toplevel_f = {
#else
{{0}, UPB_VALUETYPE_RAW},
#endif
{NULL}, NULL};
{NULL}, NULL, 0, 0, 0, false};
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h) {
d->handlers = h;
for (int i = 0; i < h->msgs_len; i++)
upb_inttable_compact(&h->msgs[i].fieldtab);
d->stack[0].f = &toplevel_f;
d->limit = &d->stack[UPB_MAX_NESTING];
upb_status_init(&d->status);
}
@ -249,10 +254,10 @@ void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end
d->top = d->stack;
d->top->closure = top_closure;
d->top->end_offset = top_end_offset;
d->limit = &d->stack[UPB_MAX_NESTING];
}
void upb_dispatcher_uninit(upb_dispatcher *d) {
upb_handlers_uninit(d->handlers);
upb_status_uninit(&d->status);
}

@ -81,6 +81,14 @@ typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure, upb_value fval);
typedef upb_flow_t (*upb_unknownval_handler_t)(
void *closure, upb_field_number_t fieldnum, upb_value val);
upb_flow_t upb_startmsg_nop(void *closure);
void upb_endmsg_nop(void *closure, upb_status *status);
upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val);
upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval);
upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval);
upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
upb_value val);
typedef struct {
bool junk;
upb_fieldtype_t type;
@ -93,14 +101,27 @@ typedef struct {
upb_startsubmsg_handler_t startsubmsg;
} cb;
upb_endsubmsg_handler_t endsubmsg;
uint32_t jit_pclabel;
uint32_t jit_pclabel_notypecheck;
uint32_t jit_submsg_done_pclabel;
bool repeated;
} upb_handlers_fieldent;
typedef struct {
typedef struct _upb_handlers_msgent {
upb_startmsg_handler_t startmsg;
upb_endmsg_handler_t endmsg;
upb_unknownval_handler_t unknownval;
// Maps field number -> upb_handlers_fieldent.
upb_inttable fieldtab;
uint32_t jit_startmsg_pclabel;
uint32_t jit_endofbuf_pclabel;
uint32_t jit_endofmsg_pclabel;
uint32_t jit_unknownfield_pclabel;
uint32_t groupnum;
bool is_group;
int32_t jit_parent_field_done_pclabel;
uint32_t max_field_number;
void **tablearray;
} upb_handlers_msgent;
typedef struct {
@ -115,6 +136,7 @@ struct _upb_handlers {
upb_msgdef *toplevel_msgdef; // We own a ref.
upb_handlers_msgent *msgent;
upb_handlers_frame stack[UPB_MAX_TYPE_DEPTH], *top, *limit;
bool should_jit;
};
typedef struct _upb_handlers upb_handlers;
@ -237,19 +259,17 @@ void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start,
// Low-level functions -- internal-only.
void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type, upb_value_handler_t value,
upb_value fval);
upb_fieldtype_t type, bool repeated,
upb_value_handler_t value, upb_value fval);
void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type,
upb_fieldtype_t type, bool repeated,
upb_startsubmsg_handler_t start,
upb_endsubmsg_handler_t end,
upb_value fval);
void upb_handlers_typed_link(upb_handlers *h,
upb_field_number_t fieldnum,
upb_fieldtype_t type,
int frames);
void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type, bool repeated, int frames);
void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type);
upb_fieldtype_t type, bool repeated);
void upb_handlers_typed_pop(upb_handlers *h);
INLINE upb_handlers_msgent *upb_handlers_getmsgent(upb_handlers *h,
@ -308,8 +328,8 @@ typedef struct {
int delegated_depth;
// Stack.
upb_dispatcher_frame stack[UPB_MAX_NESTING];
upb_status status;
upb_dispatcher_frame stack[UPB_MAX_NESTING];
} upb_dispatcher;
INLINE bool upb_dispatcher_skipping(upb_dispatcher *d) {

@ -72,7 +72,6 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) {
void upb_string_substr(upb_string *str, upb_string *target_str,
upb_strlen_t start, upb_strlen_t len) {
if(str->ptr) *(char*)0 = 0;
assert(str->ptr == NULL);
assert(start + len <= upb_string_len(target_str));
if (target_str->src) {

@ -155,9 +155,13 @@ INLINE const char *upb_string_getbufend(upb_string *str) {
}
// Attempts to recycle the string "str" so it may be reused and have different
// data written to it. After the function returns, "str" points to a writable
// string, which is either the original string if it had no other references
// or a newly created string if it did have other references.
// data written to it. The caller MUST own a reference on the given string
// prior to making this call (ie. the caller must have either created the
// string or obtained a reference with upb_string_getref()).
//
// After the function returns, "str" points to a writable string, which is
// either the original string if it had no other references or a newly created
// string if it did have other references.
//
// As a special case, passing a pointer to NULL will allocate a new string.
// This is convenient for the pattern:
@ -171,7 +175,9 @@ INLINE const char *upb_string_getbufend(upb_string *str) {
// }
INLINE void upb_string_recycle(upb_string **_str) {
upb_string *str = *_str;
if(str && upb_atomic_only(&str->refcount)) {
int r;
if(str && ((r = upb_atomic_read(&str->refcount)) == 1 ||
(r == _UPB_STRING_REFCOUNT_STACK))) {
str->ptr = NULL;
str->len = 0;
_upb_string_release(str);

@ -102,6 +102,7 @@ static void intinsert(upb_inttable *t, upb_inttable_key_t key, void *val) {
upb_inttable_value *table_val;
if (_upb_inttable_isarrkey(t, key)) {
table_val = UPB_INDEX(t->array, key, upb_table_valuesize(&t->t));
t->array_count++;
//printf("Inserting key %d to Array part! %p\n", key, table_val);
} else {
t->t.count++;
@ -152,8 +153,8 @@ static void intinsert(upb_inttable *t, upb_inttable_key_t key, void *val) {
static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) {
for(upb_inttable_iter i = upb_inttable_begin(src); !upb_inttable_done(i);
i = upb_inttable_next(src, i)) {
//printf("load check: %d %d\n", upb_inttable_count(dst), upb_inttable_hashtablesize(dst));
assert((double)(upb_inttable_count(dst)) /
//printf("load check: %d %d\n", upb_table_count(&dst->t), upb_inttable_hashtablesize(dst));
assert((double)(upb_table_count(&dst->t)) /
upb_inttable_hashtablesize(dst) <= MAX_LOAD);
intinsert(dst, upb_inttable_iter_key(i), upb_inttable_iter_value(i));
}
@ -209,6 +210,7 @@ void upb_inttable_compact(upb_inttable *t) {
}
upb_inttable new_table;
int hash_size = (upb_inttable_count(t) - array_count + 1) / MAX_LOAD;
//printf("array_count: %d, array_size: %d, hash_size: %d, table size: %d\n", array_count, array_size, hash_size, upb_inttable_count(t));
upb_inttable_sizedinit(&new_table, array_size, hash_size,
upb_table_valuesize(&t->t));
//printf("For %d things, using array size=%d, hash_size = %d\n", upb_inttable_count(t), array_size, hash_size);

@ -30,7 +30,7 @@ typedef struct {
// A basic branch-based decoder, uses 32-bit values to get good performance
// on 32-bit architectures (but performs well on 64-bits also).
INLINE upb_decoderet upb_decode_varint_branch32(const char *p) {
INLINE upb_decoderet upb_vdecode_branch32(const char *p) {
upb_decoderet r = {NULL, 0};
uint32_t low, high = 0;
uint32_t b;
@ -54,7 +54,7 @@ done:
}
// Like the previous, but uses 64-bit values.
INLINE upb_decoderet upb_decode_varint_branch64(const char *p) {
INLINE upb_decoderet upb_vdecode_branch64(const char *p) {
uint64_t val;
uint64_t b;
upb_decoderet r = {(void*)0, 0};
@ -76,17 +76,9 @@ done:
return r;
}
// Avoids branches for values >2-bytes.
INLINE upb_decoderet upb_decode_varint_nobranch1(const char *p) {
uint64_t b = 0;
upb_decoderet r = {p, 0};
memcpy(&b, r.p, 2);
if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; }
r.val = (b & 0x7f) | ((b & 0x7f00) >> 1);
r.p = p + 2;
if ((b & 0x8000) == 0) return r;
// >2-byte varint.
// Decodes a varint of at most 8 bytes without branching (except for error).
INLINE upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
uint64_t b;
memcpy(&b, r.p, sizeof(b));
uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL;
uint64_t stop_bit = ~cbits & (cbits+1);
@ -94,27 +86,19 @@ INLINE upb_decoderet upb_decode_varint_nobranch1(const char *p) {
b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff);
r.val |= b << 14;
r.p += (__builtin_ctzll(stop_bit) + 1) / 8;
if (stop_bit == 0) {
// Error: unterminated varint.
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
return r;
upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
r.val | (b << 14)};
return my_r;
}
// Avoids branches for values >2-bytes.
INLINE upb_decoderet upb_decode_varint_nobranch2(const char *p) {
uint64_t b = 0;
upb_decoderet r = {p, 0};
memcpy(&b, r.p, 2);
if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; }
r.val = (b & 0x7f) | ((b & 0x7f00) >> 1);
r.p = p + 2;
if ((b & 0x8000) == 0) return r;
// >2-byte varint.
// Another implementation of the previous.
INLINE upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
uint64_t b;
memcpy(&b, r.p, sizeof(b));
uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL;
uint64_t stop_bit = ~cbits & (cbits + 1);
@ -122,22 +106,46 @@ INLINE upb_decoderet upb_decode_varint_nobranch2(const char *p) {
b += b & 0x007f007f007f007fULL;
b += 3 * (b & 0x0000ffff0000ffffULL);
b += 15 * (b & 0x00000000ffffffffULL);
r.val |= b << 7;
r.p += (__builtin_ctzll(stop_bit) + 1) / 8;
if (stop_bit == 0) {
// Error: unterminated varint.
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
return r;
upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
r.val | (b << 7)};
return my_r;
}
INLINE upb_decoderet upb_decode_varint_fast(const char *p) {
// Template for a function that checks the first two bytes with branching
// and dispatches 2-10 bytes with a separate function.
#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \
INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *p) { \
uint64_t b = 0; \
upb_decoderet r = {p, 0}; \
memcpy(&b, r.p, 2); \
if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; } \
r.val = (b & 0x7f) | ((b & 0x7f00) >> 1); \
r.p = p + 2; \
if ((b & 0x8000) == 0) return r; \
return decode_max8_function(r); \
}
UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);
UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);
#undef UPB_VARINT_DECODER_CHECK2
// Our canonical functions for decoding varints, based on the currently
// favored best-performing implementations.
INLINE upb_decoderet upb_vdecode_fast(const char *p) {
// Use nobranch2 on 64-bit, branch32 on 32-bit.
if (sizeof(long) == 8)
return upb_decode_varint_nobranch2(p);
return upb_vdecode_check2_massimino(p);
else
return upb_decode_varint_branch32(p);
return upb_vdecode_branch32(p);
}
INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
return upb_vdecode_max8_massimino(r);
}
#ifdef __cplusplus

@ -29,3 +29,13 @@ message D {
optional A a = 1;
optional D d = 2;
}
// A proto with a bunch of simple primitives.
message SimplePrimitives {
optional fixed64 a = 1;
optional fixed32 b = 2;
optional double c = 3;
optional float d = 5;
//optional sint64 e = 6;
//optional sint32 f = 7;
}

@ -33,7 +33,7 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
const char *twelvebyte_buf = twelvebyte;
// A varint that terminates before hitting the end of the provided buffer,
// but in too many bytes (11 instead of 10).
upb_decoderet r = upb_decode_varint_fast(twelvebyte_buf);
upb_decoderet r = decoder(twelvebyte_buf);
ASSERT(r.p == NULL);
}
@ -41,23 +41,26 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
#define TEST_VARINT_DECODER(decoder) \
/* Create non-inline versions for convenient inspection of assembly language \
* output. */ \
upb_decoderet _upb_decode_varint_ ## decoder(const char *p) { \
return upb_decode_varint_ ## decoder(p); \
upb_decoderet _upb_vdecode_ ## decoder(const char *p) { \
return upb_vdecode_ ## decoder(p); \
} \
void test_ ## decoder() { \
test_varint_decoder(&_upb_decode_varint_ ## decoder); \
printf("Testing varint decoder: " #decoder "..."); \
fflush(stdout); \
test_varint_decoder(&_upb_vdecode_ ## decoder); \
printf("ok.\n"); \
} \
TEST_VARINT_DECODER(branch32);
TEST_VARINT_DECODER(branch64);
TEST_VARINT_DECODER(nobranch1);
TEST_VARINT_DECODER(nobranch2);
TEST_VARINT_DECODER(check2_wright);
TEST_VARINT_DECODER(check2_massimino);
int main() {
test_branch32();
test_branch64();
test_nobranch1();
test_nobranch2();
test_check2_wright();
test_check2_massimino();
}
#if 0

@ -5,8 +5,10 @@
#include "upb_def.h"
#include "upb_glue.h"
#include "upb_test.h"
#include "upb_stream.h"
#include "upb_decoder.h"
static void test_upb_symtab() {
static upb_symtab *load_test_proto() {
upb_symtab *s = upb_symtab_new();
ASSERT(s);
upb_string *descriptor = upb_strreadfile("tests/test.proto.pb");
@ -16,10 +18,38 @@ static void test_upb_symtab() {
}
upb_status status = UPB_STATUS_INIT;
upb_parsedesc(s, descriptor, &status);
upb_printerr(&status);
ASSERT(upb_ok(&status));
upb_status_uninit(&status);
upb_string_unref(descriptor);
return s;
}
static upb_flow_t upb_test_onvalue(void *closure, upb_value fval, upb_value val) {
(void)closure;
(void)fval;
(void)val;
return UPB_CONTINUE;
}
static void test_upb_jit() {
upb_symtab *s = load_test_proto();
upb_string *symname = upb_strdupc("SimplePrimitives");
upb_def *def = upb_symtab_lookup(s, symname);
upb_string_unref(symname);
ASSERT(def);
upb_handlers h;
upb_handlers_init(&h, upb_downcast_msgdef(def));
upb_register_all(&h, NULL, NULL, &upb_test_onvalue, NULL, NULL, NULL);
upb_decoder d;
upb_decoder_init(&d, &h);
upb_decoder_uninit(&d);
upb_symtab_unref(s);
upb_def_unref(def);
}
static void test_upb_symtab() {
upb_symtab *s = load_test_proto();
// Test cycle detection by making a cyclic def's main refcount go to zero
// and then be incremented to one again.
@ -53,6 +83,7 @@ int main()
} while (0)
TEST(test_upb_symtab);
TEST(test_upb_jit);
printf("All tests passed (%d assertions).\n", num_assertions);
return 0;
}

Loading…
Cancel
Save