From efefbffc804c6bc31b6da5aa3629b54f8bdb1793 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 3 Aug 2020 10:12:31 -0700 Subject: [PATCH] Fixed binary encoding and decoding for big-endian machines. --- BUILD | 27 +++++++++++++++++++++++++++ upb/decode.c | 23 +++++++++++++++-------- upb/encode.c | 4 ++-- upb/msg.h | 4 ++-- upb/upb.h | 26 ++++++++++++++++++++++++++ 5 files changed, 72 insertions(+), 12 deletions(-) diff --git a/BUILD b/BUILD index 96229584a4..90aa7a7919 100644 --- a/BUILD +++ b/BUILD @@ -735,6 +735,33 @@ cc_library( }), ) +upb_amalgamation( + name = "gen_ruby_amalgamation", + prefix = "ruby-", + outs = [ + "ruby-upb.c", + "ruby-upb.h", + ], + amalgamator = ":amalgamate", + libs = [ + ":upb", + ":descriptor_upb_proto", + ":reflection", + ":port", + ":json", + ], +) + +cc_library( + name = "ruby_amalgamation", + srcs = ["ruby-upb.c"], + hdrs = ["ruby-upb.h"], + copts = select({ + ":windows": [], + "//conditions:default": COPTS, + }), +) + # Lua ########################################################################## cc_library( diff --git a/upb/decode.c b/upb/decode.c index fbb9f2d2ff..a9f1cf548a 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -145,8 +145,6 @@ typedef struct { typedef union { bool bool_val; - int32_t int32_val; - int64_t int64_val; uint32_t uint32_val; uint64_t uint64_val; upb_strview str_val; @@ -245,14 +243,21 @@ static void decode_munge(int type, wireval *val) { break; case UPB_DESCRIPTOR_TYPE_SINT32: { uint32_t n = val->uint32_val; - val->int32_val = (n >> 1) ^ -(int32_t)(n & 1); + val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1); break; } case UPB_DESCRIPTOR_TYPE_SINT64: { uint64_t n = val->uint64_val; - val->int64_val = (n >> 1) ^ -(int64_t)(n & 1); + val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1); break; } + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_UINT32: + if (!_upb_isle()) { + /* The next stage will memcpy(dst, &val, 4) */ + val->uint32_val = val->uint64_val; + } + break; } } @@ -428,7 +433,7 @@ static void decode_tomap(upb_decstate *d, upb_msg *msg, if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE || entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) { /* Create proactively to handle the case where it doesn't appear. */ - ent.v.val.val = (uint64_t)_upb_msg_new(entry->submsgs[0], d->arena); + ent.v.val = upb_value_ptr(_upb_msg_new(entry->submsgs[0], d->arena)); } decode_tosubmsg(d, &ent.k, layout, field, val.str_val); @@ -519,14 +524,16 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg, break; case UPB_WIRE_TYPE_32BIT: if (d->limit - ptr < 4) decode_err(d); - memcpy(&val, ptr, 4); + memcpy(&val.uint32_val, ptr, 4); + val.uint32_val = _upb_be_swap32(val.uint32_val); ptr += 4; op = OP_SCALAR_LG2(2); if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown; break; case UPB_WIRE_TYPE_64BIT: if (d->limit - ptr < 8) decode_err(d); - memcpy(&val, ptr, 8); + memcpy(&val.uint64_val, ptr, 8); + val.uint64_val = _upb_be_swap64(val.uint64_val); ptr += 8; op = OP_SCALAR_LG2(3); if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown; @@ -546,7 +553,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg, break; } case UPB_WIRE_TYPE_START_GROUP: - val.int32_val = field_number; + val.uint32_val = field_number; op = OP_SUBMSG; if (field->descriptortype != UPB_DTYPE_GROUP) goto unknown; break; diff --git a/upb/encode.c b/upb/encode.c index 87162325ad..a6ce62bfa5 100644 --- a/upb/encode.c +++ b/upb/encode.c @@ -77,12 +77,12 @@ static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) { } static bool upb_put_fixed64(upb_encstate *e, uint64_t val) { - /* TODO(haberman): byte-swap for big endian. */ + val = _upb_be_swap64(val); return upb_put_bytes(e, &val, sizeof(uint64_t)); } static bool upb_put_fixed32(upb_encstate *e, uint32_t val) { - /* TODO(haberman): byte-swap for big endian. */ + val = _upb_be_swap32(val); return upb_put_bytes(e, &val, sizeof(uint32_t)); } diff --git a/upb/msg.h b/upb/msg.h index b321748ec0..695c278b21 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -324,7 +324,7 @@ UPB_INLINE upb_value _upb_map_tovalue(const void *val, size_t size, if (size == UPB_MAPTYPE_STRING) { upb_strview *strp = (upb_strview*)upb_arena_malloc(a, sizeof(*strp)); *strp = *(upb_strview*)val; - memcpy(&ret, &strp, sizeof(strp)); + ret = upb_value_ptr(strp); } else { memcpy(&ret, val, size); } @@ -455,7 +455,7 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, size_t size) /* This is like _upb_map_tovalue() except the entry already exists so we can * reuse the allocated upb_strview for string fields. */ if (size == UPB_MAPTYPE_STRING) { - upb_strview *strp = (upb_strview*)ent->val.val; + upb_strview *strp = (upb_strview*)(uintptr_t)ent->val.val; memcpy(strp, val, sizeof(*strp)); } else { memcpy(&ent->val.val, val, size); diff --git a/upb/upb.h b/upb/upb.h index c3e1c5f3d5..e1d9d8cfd3 100644 --- a/upb/upb.h +++ b/upb/upb.h @@ -273,6 +273,32 @@ typedef enum { #define UPB_MAP_BEGIN ((size_t)-1) +UPB_INLINE bool _upb_isle(void) { + int x = 1; + return *(char*)&x == 1; +} + +UPB_INLINE uint32_t _upb_be_swap32(uint32_t val) { + if (_upb_isle()) { + return val; + } else { + return ((val & 0xff) << 24) | ((val & 0xff00) << 8) | + ((val & 0xff0000ULL) >> 8) | ((val & 0xff000000ULL) >> 24); + } +} + +UPB_INLINE uint64_t _upb_be_swap64(uint64_t val) { + if (_upb_isle()) { + return val; + } else { + return ((val & 0xff) << 56) | ((val & 0xff00) << 40) | + ((val & 0xff0000) << 24) | ((val & 0xff000000) << 8) | + ((val & 0xff00000000ULL) >> 8) | ((val & 0xff0000000000ULL) >> 24) | + ((val & 0xff000000000000ULL) >> 40) | + ((val & 0xff00000000000000ULL) >> 56); + } +} + #include "upb/port_undef.inc" #ifdef __cplusplus