From 9a02d16ceea6e2eb06f221fa35767ce8a259b559 Mon Sep 17 00:00:00 2001 From: Xavier Bonaventura Date: Wed, 24 Aug 2022 22:21:38 +0200 Subject: [PATCH 01/35] Remove patter that does not glob anything This pattern does not glob anything because inside the upbc folder there is a BUILD file. Removing this allows to build upb with the flag incompatible_disallow_empty_glob --- BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/BUILD b/BUILD index a49327b287..3185c0e677 100644 --- a/BUILD +++ b/BUILD @@ -963,7 +963,6 @@ exports_files( filegroup( name = "cmake_files", srcs = glob([ - "upbc/**/*", "upb/**/*", "third_party/**/*", ]), From 552c1d10481d71937f371eb4fc61c3e83b80cb87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?X=C3=B9d=C5=8Dng=20Y=C3=A1ng?= Date: Thu, 1 Sep 2022 14:08:43 +0200 Subject: [PATCH 02/35] Use Ubuntu 20.04 for a newer Python version See https://github.com/protocolbuffers/upb/issues/760 --- .bazelci/presubmit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.bazelci/presubmit.yml b/.bazelci/presubmit.yml index 7e2b9110ff..d06e588bf9 100644 --- a/.bazelci/presubmit.yml +++ b/.bazelci/presubmit.yml @@ -1,7 +1,7 @@ --- tasks: ubuntu: - platform: ubuntu1804 + platform: ubuntu2004 shell_commands: - "sudo apt -y update && sudo apt -y install libreadline-dev cmake rsync" test_targets: From 5a7644b2d069d8e834cd41c677ed70e89e66920e Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 6 Sep 2022 10:27:55 -0700 Subject: [PATCH 03/35] Fixed fuzz bug in upb. Extending a MessageSet with a non-message extension was causing crashes that would manifest in various ways. PiperOrigin-RevId: 472496259 --- upb/fuzz_test_util.cc | 8 ++++---- upb/mini_table.c | 23 ++++++++++++++++++++--- upb/mini_table.h | 1 + upb/msg_test.cc | 36 ++++++++++++++++++++++++++++++++++++ upbc/file_layout.cc | 6 +++++- 5 files changed, 66 insertions(+), 8 deletions(-) diff --git a/upb/fuzz_test_util.cc b/upb/fuzz_test_util.cc index 12de4a9283..219e4a3294 100644 --- a/upb/fuzz_test_util.cc +++ b/upb/fuzz_test_util.cc @@ -111,8 +111,6 @@ void Builder::BuildEnums() { bool Builder::LinkExtension(upb_MiniTable_Extension* ext) { upb_MiniTable_Field* field = &ext->field; - ext->extendee = NextMiniTable(); - if (!ext->extendee) return false; if (field->descriptortype == kUpb_FieldType_Message || field->descriptortype == kUpb_FieldType_Group) { auto mt = NextMiniTable(); @@ -140,8 +138,10 @@ void Builder::BuildExtensions(upb_ExtensionRegistry** exts) { upb_MiniTable_Extension* ext = reinterpret_cast( upb_Arena_Malloc(arena_, sizeof(*ext))); upb_MiniTable_Sub sub; - ptr = - upb_MiniTable_BuildExtension(ptr, end - ptr, ext, sub, status.ptr()); + const upb_MiniTable* extendee = NextMiniTable(); + if (!extendee) break; + ptr = upb_MiniTable_BuildExtension(ptr, end - ptr, ext, extendee, sub, + status.ptr()); if (!ptr) break; if (!LinkExtension(ext)) continue; if (_upb_extreg_get(*exts, ext->extendee, ext->field.number)) continue; diff --git a/upb/mini_table.c b/upb/mini_table.c index 0b59607053..3d13780a04 100644 --- a/upb/mini_table.c +++ b/upb/mini_table.c @@ -1102,6 +1102,7 @@ upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len, const char* upb_MiniTable_BuildExtension(const char* data, size_t len, upb_MiniTable_Extension* ext, + const upb_MiniTable* extendee, upb_MiniTable_Sub sub, upb_Status* status) { upb_MtDecoder decoder = { @@ -1117,9 +1118,25 @@ const char* upb_MiniTable_BuildExtension(const char* data, size_t len, uint16_t count = 0; const char* ret = upb_MtDecoder_Parse(&decoder, data, len, ext, sizeof(*ext), &count, NULL); - ext->field.mode |= kUpb_LabelFlags_IsExtension; - ext->field.offset = 0; - ext->field.presence = 0; + if (!ret) return NULL; + + upb_MiniTable_Field* f = &ext->field; + + f->mode |= kUpb_LabelFlags_IsExtension; + f->offset = 0; + f->presence = 0; + + if (extendee->ext & kUpb_ExtMode_IsMessageSet) { + // Extensions of MessageSet must be messages. + if (!upb_IsSubMessage(f)) return NULL; + + // Extensions of MessageSet must be non-repeating. + if ((f->mode & kUpb_FieldMode_Mask) == kUpb_FieldMode_Array) return NULL; + } + + ext->extendee = extendee; + ext->sub = sub; + return ret; } diff --git a/upb/mini_table.h b/upb/mini_table.h index 238c6d7222..d71ebcf320 100644 --- a/upb/mini_table.h +++ b/upb/mini_table.h @@ -156,6 +156,7 @@ void upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTable_Field* field, const char* upb_MiniTable_BuildExtension(const char* data, size_t len, upb_MiniTable_Extension* ext, + const upb_MiniTable* extendee, upb_MiniTable_Sub sub, upb_Status* status); diff --git a/upb/msg_test.cc b/upb/msg_test.cc index 68fd6267ee..9e1548ae9d 100644 --- a/upb/msg_test.cc +++ b/upb/msg_test.cc @@ -534,4 +534,40 @@ TEST(MessageTest, MapField) { // "\010\002", 342248070, -806315555); // } // +// TEST(FuzzTest, DecodeExtendMessageSetWithNonMessage) { +// DecodeEncodeArbitrarySchemaAndPayload( +// {{"\n"}, {""}, ".\244", {}}, "\013\032\005\212a#\365\336\020\001\226", +// 14803219, 670718349); +// } +// +// TEST(FuzzTest, DecodeExtendMessageSetWithNonMessage2) { +// DecodeEncodeArbitrarySchemaAndPayload({{"\n", "G", "\n", "\274", ""}, +// {"", "\030"}, +// "_@", +// {4294967295, 2147483647}}, +// std::string("\013\032\000\220", 4), +// 279975758, 1647495141); +// } +// +// TEST(FuzzTest, DecodeExtendMessageSetWithNonMessage3) { +// DecodeEncodeArbitrarySchemaAndPayload( +// {{"\n"}, {"B", ""}, "\212:b", {11141121}}, +// "\013\032\004\357;7\363\020\001\346\240\200\201\271", 399842149, +// -452966025); +// } +// +// TEST(FuzzTest, DecodeExtendMessageSetWithNonMessage4) { +// DecodeEncodeArbitrarySchemaAndPayload( +// {{"\n", "3\340", "\354"}, {}, "B}G", {4294967295, 4082331310}}, +// "\013\032\004\244B\331\255\020\001\220\224\243\350\t", -561523015, +// 1683327312); +// } +// +// TEST(FuzzTest, DecodeExtendMessageSetWithNonMessage5) { +// DecodeEncodeArbitrarySchemaAndPayload( +// {{"\n"}, {""}, "kB", {0}}, +// "x\203\251\006\013\032\002S\376\010\273\'\020\014\365\207\244\234", +// -696925610, -654590577); +// } +// // end:google_only diff --git a/upbc/file_layout.cc b/upbc/file_layout.cc index 49268aab27..f9dbac39f0 100644 --- a/upbc/file_layout.cc +++ b/upbc/file_layout.cc @@ -265,8 +265,12 @@ void FilePlatformLayout::BuildExtensions(const protobuf::FileDescriptor* fd) { GetFieldModifiers(f)); upb_MiniTable_Extension& ext = extension_map_[f]; upb_MiniTable_Sub sub; + // The extendee may be from another file, so we build a temporary MiniTable + // for it, just for the purpose of building the extension. + // Note, we are not caching so this could use more memory than is necessary. + upb_MiniTable* extendee = MakeMiniTable(f->containing_type()); bool ok = upb_MiniTable_BuildExtension(e.data().data(), e.data().size(), - &ext, sub, status.ptr()); + &ext, extendee, sub, status.ptr()); if (!ok) { // TODO(haberman): Use ABSL CHECK() when it is available. fprintf(stderr, "Error building mini-table: %s\n", From ba7603b7c18b22d07c087813a6e19bca993e1c7d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 6 Sep 2022 20:03:59 -0700 Subject: [PATCH 04/35] Updated decoder internal symbols to new naming scheme. This is a naming change only, with no functional change. We did replace one inline function that had only one caller, but the net effect should be a no-op. I did remove several `return` statements for calls to certain `noreturn` functions. These had no effect but were intended to improve readability. However the unused "return" caused ClangTidy to throw warnings, so these have been removed. PiperOrigin-RevId: 472619191 --- upb/decode.c | 456 +++++++++++++++++++++-------------------- upb/decode_fast.c | 72 +++---- upb/decode_fast.h | 7 +- upb/def.c | 2 +- upb/internal/decode.h | 46 +++-- upbc/protoc-gen-upb.cc | 4 +- 6 files changed, 306 insertions(+), 281 deletions(-) diff --git a/upb/decode.c b/upb/decode.c index 2456efa92d..8ba5c21425 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -181,28 +181,31 @@ typedef union { uint32_t size; } wireval; -static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTable* layout); +static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable* layout); -UPB_NORETURN static void* decode_err(upb_Decoder* d, upb_DecodeStatus status) { +UPB_NORETURN static void* _upb_Decoder_ErrorJmp(upb_Decoder* d, + upb_DecodeStatus status) { assert(status != kUpb_DecodeStatus_Ok); UPB_LONGJMP(d->err, status); } -const char* fastdecode_err(upb_Decoder* d, int status) { +const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status) { assert(status != kUpb_DecodeStatus_Ok); UPB_LONGJMP(d->err, status); return NULL; } -static void decode_verifyutf8(upb_Decoder* d, const char* buf, int len) { - if (!decode_verifyutf8_inl(buf, len)) - decode_err(d, kUpb_DecodeStatus_BadUtf8); +static void _upb_Decoder_VerifyUtf8(upb_Decoder* d, const char* buf, int len) { + if (!_upb_Decoder_VerifyUtf8Inline(buf, len)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); + } } -static bool decode_reserve(upb_Decoder* d, upb_Array* arr, size_t elem) { +static bool _upb_Decoder_Reserve(upb_Decoder* d, upb_Array* arr, size_t elem) { bool need_realloc = arr->capacity - arr->size < elem; if (need_realloc && !_upb_array_realloc(arr, arr->size + elem, &d->arena)) { - decode_err(d, kUpb_DecodeStatus_OutOfMemory); + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } return need_realloc; } @@ -210,11 +213,12 @@ static bool decode_reserve(upb_Decoder* d, upb_Array* arr, size_t elem) { typedef struct { const char* ptr; uint64_t val; -} decode_vret; +} _upb_DecodeLongVarintReturn; UPB_NOINLINE -static decode_vret decode_longvarint64(const char* ptr, uint64_t val) { - decode_vret ret = {NULL, 0}; +static _upb_DecodeLongVarintReturn _upb_Decoder_DecodeLongVarint( + const char* ptr, uint64_t val) { + _upb_DecodeLongVarintReturn ret = {NULL, 0}; uint64_t byte; int i; for (i = 1; i < 10; i++) { @@ -230,31 +234,32 @@ static decode_vret decode_longvarint64(const char* ptr, uint64_t val) { } UPB_FORCEINLINE -static const char* decode_varint64(upb_Decoder* d, const char* ptr, - uint64_t* val) { +static const char* _upb_Decoder_DecodeVarint(upb_Decoder* d, const char* ptr, + uint64_t* val) { uint64_t byte = (uint8_t)*ptr; if (UPB_LIKELY((byte & 0x80) == 0)) { *val = byte; return ptr + 1; } else { - decode_vret res = decode_longvarint64(ptr, byte); - if (!res.ptr) return decode_err(d, kUpb_DecodeStatus_Malformed); + _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); + if (!res.ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); *val = res.val; return res.ptr; } } UPB_FORCEINLINE -static const char* decode_tag(upb_Decoder* d, const char* ptr, uint32_t* val) { +static const char* _upb_Decoder_DecodeTag(upb_Decoder* d, const char* ptr, + uint32_t* val) { uint64_t byte = (uint8_t)*ptr; if (UPB_LIKELY((byte & 0x80) == 0)) { *val = byte; return ptr + 1; } else { const char* start = ptr; - decode_vret res = decode_longvarint64(ptr, byte); + _upb_DecodeLongVarintReturn res = _upb_Decoder_DecodeLongVarint(ptr, byte); if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) { - return decode_err(d, kUpb_DecodeStatus_Malformed); + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } *val = res.val; return res.ptr; @@ -265,22 +270,22 @@ UPB_FORCEINLINE static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr, uint32_t* size) { uint64_t size64; - ptr = decode_varint64(d, ptr, &size64); + ptr = _upb_Decoder_DecodeVarint(d, ptr, &size64); if (size64 >= INT32_MAX || ptr - d->end + (int)size64 > d->limit) { - decode_err(d, kUpb_DecodeStatus_Malformed); + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } *size = size64; return ptr; } -static void decode_munge_int32(wireval* val) { +static void _upb_Decoder_MungeInt32(wireval* val) { if (!_upb_IsLittleEndian()) { /* The next stage will memcpy(dst, &val, 4) */ val->uint32_val = val->uint64_val; } } -static void decode_munge(int type, wireval* val) { +static void _upb_Decoder_Munge(int type, wireval* val) { switch (type) { case kUpb_FieldType_Bool: val->bool_val = val->uint64_val != 0; @@ -298,38 +303,36 @@ static void decode_munge(int type, wireval* val) { case kUpb_FieldType_Int32: case kUpb_FieldType_UInt32: case kUpb_FieldType_Enum: - decode_munge_int32(val); + _upb_Decoder_MungeInt32(val); break; } } -static upb_Message* decode_newsubmsg(upb_Decoder* d, - const upb_MiniTable_Sub* subs, - const upb_MiniTable_Field* field) { +static upb_Message* _upb_Decoder_NewSubMessage( + upb_Decoder* d, const upb_MiniTable_Sub* subs, + const upb_MiniTable_Field* field) { const upb_MiniTable* subl = subs[field->submsg_index].submsg; upb_Message* msg = _upb_Message_New_inl(subl, &d->arena); - if (!msg) decode_err(d, kUpb_DecodeStatus_OutOfMemory); + if (!msg) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); return msg; } UPB_NOINLINE -const char* decode_isdonefallback(upb_Decoder* d, const char* ptr, - int overrun) { +const char* _upb_Decoder_IsDoneFallback(upb_Decoder* d, const char* ptr, + int overrun) { int status; - ptr = decode_isdonefallback_inl(d, ptr, overrun, &status); - if (ptr == NULL) { - return decode_err(d, status); - } + ptr = _upb_Decoder_IsDoneFallbackInline(d, ptr, overrun, &status); + if (ptr == NULL) _upb_Decoder_ErrorJmp(d, status); return ptr; } -static const char* decode_readstr(upb_Decoder* d, const char* ptr, int size, - upb_StringView* str) { +static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr, + int size, upb_StringView* str) { if (d->options & kUpb_DecodeOption_AliasString) { str->data = ptr; } else { char* data = upb_Arena_Malloc(&d->arena, size); - if (!data) return decode_err(d, kUpb_DecodeStatus_OutOfMemory); + if (!data) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); memcpy(data, ptr, size); str->data = data; } @@ -338,50 +341,56 @@ static const char* decode_readstr(upb_Decoder* d, const char* ptr, int size, } UPB_FORCEINLINE -static const char* decode_tosubmsg2(upb_Decoder* d, const char* ptr, - upb_Message* submsg, - const upb_MiniTable* subl, int size) { - int saved_delta = decode_pushlimit(d, ptr, size); - if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded); - ptr = decode_msg(d, ptr, submsg, subl); - if (d->end_group != DECODE_NOGROUP) - return decode_err(d, kUpb_DecodeStatus_Malformed); - decode_poplimit(d, ptr, saved_delta); +static const char* _upb_Decoder_DecodeSubMessage( + upb_Decoder* d, const char* ptr, upb_Message* submsg, + const upb_MiniTable_Sub* subs, const upb_MiniTable_Field* field, int size) { + int saved_delta = _upb_Decoder_PushLimit(d, ptr, size); + const upb_MiniTable* subl = subs[field->submsg_index].submsg; + if (--d->depth < 0) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); + } + ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl); + if (d->end_group != DECODE_NOGROUP) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } + _upb_Decoder_PopLimit(d, ptr, saved_delta); d->depth++; return ptr; } UPB_FORCEINLINE -static const char* decode_tosubmsg(upb_Decoder* d, const char* ptr, - upb_Message* submsg, - const upb_MiniTable_Sub* subs, - const upb_MiniTable_Field* field, int size) { - return decode_tosubmsg2(d, ptr, submsg, subs[field->submsg_index].submsg, - size); -} - -UPB_FORCEINLINE -static const char* decode_group(upb_Decoder* d, const char* ptr, - upb_Message* submsg, const upb_MiniTable* subl, - uint32_t number) { - if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded); - if (decode_isdone(d, &ptr)) { - return decode_err(d, kUpb_DecodeStatus_Malformed); +static const char* _upb_Decoder_DoDecodeGroup(upb_Decoder* d, const char* ptr, + upb_Message* submsg, + const upb_MiniTable* subl, + uint32_t number) { + if (--d->depth < 0) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); + } + if (_upb_Decoder_IsDone(d, &ptr)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); + } + ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl); + if (d->end_group != number) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } - ptr = decode_msg(d, ptr, submsg, subl); - if (d->end_group != number) return decode_err(d, kUpb_DecodeStatus_Malformed); d->end_group = DECODE_NOGROUP; d->depth++; return ptr; } UPB_FORCEINLINE -static const char* decode_togroup(upb_Decoder* d, const char* ptr, - upb_Message* submsg, - const upb_MiniTable_Sub* subs, - const upb_MiniTable_Field* field) { +static const char* _upb_Decoder_DecodeUnknownGroup(upb_Decoder* d, + const char* ptr, + uint32_t number) { + return _upb_Decoder_DoDecodeGroup(d, ptr, NULL, NULL, number); +} + +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeKnownGroup( + upb_Decoder* d, const char* ptr, upb_Message* submsg, + const upb_MiniTable_Sub* subs, const upb_MiniTable_Field* field) { const upb_MiniTable* subl = subs[field->submsg_index].submsg; - return decode_group(d, ptr, submsg, subl, field->number); + return _upb_Decoder_DoDecodeGroup(d, ptr, submsg, subl, field->number); } static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) { @@ -394,23 +403,24 @@ static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) { return ptr; } -static void upb_Decode_AddUnknownVarints(upb_Decoder* d, upb_Message* msg, - uint32_t val1, uint32_t val2) { +static void _upb_Decoder_AddUnknownVarints(upb_Decoder* d, upb_Message* msg, + uint32_t val1, uint32_t val2) { char buf[20]; char* end = buf; end = upb_Decoder_EncodeVarint32(val1, end); end = upb_Decoder_EncodeVarint32(val2, end); if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { - decode_err(d, kUpb_DecodeStatus_OutOfMemory); + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } } UPB_NOINLINE -static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr, - upb_Message* msg, const upb_MiniTable_Enum* e, - const upb_MiniTable_Field* field, - uint32_t v) { +static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable_Enum* e, + const upb_MiniTable_Field* field, + uint32_t v) { // OPT: binary search long lists? int n = e->value_count; for (int i = 0; i < n; i++) { @@ -423,29 +433,30 @@ static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr, uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint; upb_Message* unknown_msg = field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg; - upb_Decode_AddUnknownVarints(d, unknown_msg, tag, v); + _upb_Decoder_AddUnknownVarints(d, unknown_msg, tag, v); return false; } UPB_FORCEINLINE -static bool decode_checkenum(upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTable_Enum* e, - const upb_MiniTable_Field* field, wireval* val) { +static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable_Enum* e, + const upb_MiniTable_Field* field, + wireval* val) { uint32_t v = val->uint32_val; if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true; - return decode_checkenum_slow(d, ptr, msg, e, field, v); + return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v); } UPB_NOINLINE -static const char* decode_enum_toarray(upb_Decoder* d, const char* ptr, - upb_Message* msg, upb_Array* arr, - const upb_MiniTable_Sub* subs, - const upb_MiniTable_Field* field, - wireval* val) { +static const char* _upb_Decoder_DecodeEnumArray( + upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr, + const upb_MiniTable_Sub* subs, const upb_MiniTable_Field* field, + wireval* val) { const upb_MiniTable_Enum* e = subs[field->submsg_index].subenum; - if (!decode_checkenum(d, ptr, msg, e, field, val)) return ptr; + if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, val)) return ptr; void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); arr->size++; memcpy(mem, val, 4); @@ -453,17 +464,16 @@ static const char* decode_enum_toarray(upb_Decoder* d, const char* ptr, } UPB_FORCEINLINE -static const char* decode_fixed_packed(upb_Decoder* d, const char* ptr, - upb_Array* arr, wireval* val, - const upb_MiniTable_Field* field, - int lg2) { +static const char* _upb_Decoder_DecodeFixedPacked( + upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, + const upb_MiniTable_Field* field, int lg2) { int mask = (1 << lg2) - 1; size_t count = val->size >> lg2; if ((val->size & mask) != 0) { // Length isn't a round multiple of elem size. - return decode_err(d, kUpb_DecodeStatus_Malformed); + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } - decode_reserve(d, arr, count); + _upb_Decoder_Reserve(d, arr, count); void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); arr->size += count; // Note: if/when the decoder supports multi-buffer input, we will need to @@ -496,70 +506,68 @@ static const char* decode_fixed_packed(upb_Decoder* d, const char* ptr, } UPB_FORCEINLINE -static const char* decode_varint_packed(upb_Decoder* d, const char* ptr, - upb_Array* arr, wireval* val, - const upb_MiniTable_Field* field, - int lg2) { +static const char* _upb_Decoder_DecodeVarintPacked( + upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val, + const upb_MiniTable_Field* field, int lg2) { int scale = 1 << lg2; - int saved_limit = decode_pushlimit(d, ptr, val->size); + int saved_limit = _upb_Decoder_PushLimit(d, ptr, val->size); char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); - while (!decode_isdone(d, &ptr)) { + while (!_upb_Decoder_IsDone(d, &ptr)) { wireval elem; - ptr = decode_varint64(d, ptr, &elem.uint64_val); - decode_munge(field->descriptortype, &elem); - if (decode_reserve(d, arr, 1)) { + ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); + _upb_Decoder_Munge(field->descriptortype, &elem); + if (_upb_Decoder_Reserve(d, arr, 1)) { out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void); } arr->size++; memcpy(out, &elem, scale); out += scale; } - decode_poplimit(d, ptr, saved_limit); + _upb_Decoder_PopLimit(d, ptr, saved_limit); return ptr; } UPB_NOINLINE -static const char* decode_enum_packed(upb_Decoder* d, const char* ptr, - upb_Message* msg, upb_Array* arr, - const upb_MiniTable_Sub* subs, - const upb_MiniTable_Field* field, - wireval* val) { +static const char* _upb_Decoder_DecodeEnumPacked( + upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr, + const upb_MiniTable_Sub* subs, const upb_MiniTable_Field* field, + wireval* val) { const upb_MiniTable_Enum* e = subs[field->submsg_index].subenum; - int saved_limit = decode_pushlimit(d, ptr, val->size); + int saved_limit = _upb_Decoder_PushLimit(d, ptr, val->size); char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); - while (!decode_isdone(d, &ptr)) { + while (!_upb_Decoder_IsDone(d, &ptr)) { wireval elem; - ptr = decode_varint64(d, ptr, &elem.uint64_val); - decode_munge_int32(&elem); - if (!decode_checkenum(d, ptr, msg, e, field, &elem)) { + ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val); + _upb_Decoder_MungeInt32(&elem); + if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, &elem)) { continue; } - if (decode_reserve(d, arr, 1)) { + if (_upb_Decoder_Reserve(d, arr, 1)) { out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void); } arr->size++; memcpy(out, &elem, 4); out += 4; } - decode_poplimit(d, ptr, saved_limit); + _upb_Decoder_PopLimit(d, ptr, saved_limit); return ptr; } -static const char* decode_toarray(upb_Decoder* d, const char* ptr, - upb_Message* msg, - const upb_MiniTable_Sub* subs, - const upb_MiniTable_Field* field, - wireval* val, int op) { +static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable_Sub* subs, + const upb_MiniTable_Field* field, + wireval* val, int op) { upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void); upb_Array* arr = *arrp; void* mem; if (arr) { - decode_reserve(d, arr, 1); + _upb_Decoder_Reserve(d, arr, 1); } else { size_t lg2 = desctype_to_elem_size_lg2[field->descriptortype]; arr = _upb_Array_New(&d->arena, 4, lg2); - if (!arr) return decode_err(d, kUpb_DecodeStatus_OutOfMemory); + if (!arr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); *arrp = arr; } @@ -573,48 +581,50 @@ static const char* decode_toarray(upb_Decoder* d, const char* ptr, memcpy(mem, val, 1 << op); return ptr; case OP_STRING: - decode_verifyutf8(d, ptr, val->size); + _upb_Decoder_VerifyUtf8(d, ptr, val->size); /* Fallthrough. */ case OP_BYTES: { /* Append bytes. */ upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->size; arr->size++; - return decode_readstr(d, ptr, val->size, str); + return _upb_Decoder_ReadString(d, ptr, val->size, str); } case OP_SUBMSG: { /* Append submessage / group. */ - upb_Message* submsg = decode_newsubmsg(d, subs, field); + upb_Message* submsg = _upb_Decoder_NewSubMessage(d, subs, field); *UPB_PTR_AT(_upb_array_ptr(arr), arr->size * sizeof(void*), upb_Message*) = submsg; arr->size++; if (UPB_UNLIKELY(field->descriptortype == kUpb_FieldType_Group)) { - return decode_togroup(d, ptr, submsg, subs, field); + return _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); } else { - return decode_tosubmsg(d, ptr, submsg, subs, field, val->size); + return _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, + val->size); } } case OP_FIXPCK_LG2(2): case OP_FIXPCK_LG2(3): - return decode_fixed_packed(d, ptr, arr, val, field, - op - OP_FIXPCK_LG2(0)); + return _upb_Decoder_DecodeFixedPacked(d, ptr, arr, val, field, + op - OP_FIXPCK_LG2(0)); case OP_VARPCK_LG2(0): case OP_VARPCK_LG2(2): case OP_VARPCK_LG2(3): - return decode_varint_packed(d, ptr, arr, val, field, - op - OP_VARPCK_LG2(0)); + return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field, + op - OP_VARPCK_LG2(0)); case OP_ENUM: - return decode_enum_toarray(d, ptr, msg, arr, subs, field, val); + return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val); case OP_PACKED_ENUM: - return decode_enum_packed(d, ptr, msg, arr, subs, field, val); + return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val); default: UPB_UNREACHABLE(); } } -static const char* decode_tomap(upb_Decoder* d, const char* ptr, - upb_Message* msg, const upb_MiniTable_Sub* subs, - const upb_MiniTable_Field* field, - wireval* val) { +static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable_Sub* subs, + const upb_MiniTable_Field* field, + wireval* val) { upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*); upb_Map* map = *map_p; upb_MapEntry ent; @@ -643,35 +653,35 @@ static const char* decode_tomap(upb_Decoder* d, const char* ptr, } const char* start = ptr; - ptr = decode_tosubmsg(d, ptr, &ent.k, subs, field, val->size); + ptr = _upb_Decoder_DecodeSubMessage(d, ptr, &ent.k, subs, field, val->size); // check if ent had any unknown fields size_t size; upb_Message_GetUnknown(&ent.k, &size); if (size != 0) { uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited; - upb_Decode_AddUnknownVarints(d, msg, tag, (uint32_t)(ptr - start)); + _upb_Decoder_AddUnknownVarints(d, msg, tag, (uint32_t)(ptr - start)); if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { - decode_err(d, kUpb_DecodeStatus_OutOfMemory); + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } } else { if (_upb_Map_Insert(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena) == _kUpb_MapInsertStatus_OutOfMemory) { - decode_err(d, kUpb_DecodeStatus_OutOfMemory); + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } } return ptr; } -static const char* decode_tomsg(upb_Decoder* d, const char* ptr, - upb_Message* msg, const upb_MiniTable_Sub* subs, - const upb_MiniTable_Field* field, wireval* val, - int op) { +static const char* _upb_Decoder_DecodeToSubMessage( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTable_Sub* subs, const upb_MiniTable_Field* field, + wireval* val, int op) { void* mem = UPB_PTR_AT(msg, field->offset, void); int type = field->descriptortype; if (UPB_UNLIKELY(op == OP_ENUM) && - !decode_checkenum(d, ptr, msg, subs[field->submsg_index].subenum, field, - val)) { + !_upb_Decoder_CheckEnum(d, ptr, msg, subs[field->submsg_index].subenum, + field, val)) { return ptr; } @@ -693,21 +703,22 @@ static const char* decode_tomsg(upb_Decoder* d, const char* ptr, upb_Message** submsgp = mem; upb_Message* submsg = *submsgp; if (!submsg) { - submsg = decode_newsubmsg(d, subs, field); + submsg = _upb_Decoder_NewSubMessage(d, subs, field); *submsgp = submsg; } if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) { - ptr = decode_togroup(d, ptr, submsg, subs, field); + ptr = _upb_Decoder_DecodeKnownGroup(d, ptr, submsg, subs, field); } else { - ptr = decode_tosubmsg(d, ptr, submsg, subs, field, val->size); + ptr = _upb_Decoder_DecodeSubMessage(d, ptr, submsg, subs, field, + val->size); } break; } case OP_STRING: - decode_verifyutf8(d, ptr, val->size); + _upb_Decoder_VerifyUtf8(d, ptr, val->size); /* Fallthrough. */ case OP_BYTES: - return decode_readstr(d, ptr, val->size, mem); + return _upb_Decoder_ReadString(d, ptr, val->size, mem); case OP_SCALAR_LG2(3): memcpy(mem, val, 8); break; @@ -726,9 +737,9 @@ static const char* decode_tomsg(upb_Decoder* d, const char* ptr, } UPB_NOINLINE -const char* decode_checkrequired(upb_Decoder* d, const char* ptr, - const upb_Message* msg, - const upb_MiniTable* l) { +const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr, + const upb_Message* msg, + const upb_MiniTable* l) { assert(l->required_count); if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) { return ptr; @@ -743,14 +754,14 @@ const char* decode_checkrequired(upb_Decoder* d, const char* ptr, } UPB_FORCEINLINE -static bool decode_tryfastdispatch(upb_Decoder* d, const char** ptr, - upb_Message* msg, - const upb_MiniTable* layout) { +static bool _upb_Decoder_TryFastDispatch(upb_Decoder* d, const char** ptr, + upb_Message* msg, + const upb_MiniTable* layout) { #if UPB_FASTTABLE if (layout && layout->table_mask != (unsigned char)-1) { - uint16_t tag = fastdecode_loadtag(*ptr); + uint16_t tag = _upb_FastDecoder_LoadTag(*ptr); intptr_t table = decode_totable(layout); - *ptr = fastdecode_tagdispatch(d, *ptr, msg, table, 0, tag); + *ptr = _upb_FastDecoder_TagDispatch(d, *ptr, msg, table, 0, tag); return true; } #endif @@ -764,7 +775,7 @@ static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr, switch (wire_type) { case kUpb_WireType_Varint: { uint64_t val; - return decode_varint64(d, ptr, &val); + return _upb_Decoder_DecodeVarint(d, ptr, &val); } case kUpb_WireType_64Bit: return ptr + 8; @@ -776,9 +787,9 @@ static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr, return ptr + size; } case kUpb_WireType_StartGroup: - return decode_group(d, ptr, NULL, NULL, field_number); + return _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); default: - decode_err(d, kUpb_DecodeStatus_Malformed); + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } } @@ -794,12 +805,15 @@ static void upb_Decoder_AddKnownMessageSetItem( const char* data, uint32_t size) { upb_Message_Extension* ext = _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena); - if (UPB_UNLIKELY(!ext)) decode_err(d, kUpb_DecodeStatus_OutOfMemory); - upb_Message* submsg = decode_newsubmsg(d, &ext->ext->sub, &ext->ext->field); + if (UPB_UNLIKELY(!ext)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } + upb_Message* submsg = + _upb_Decoder_NewSubMessage(d, &ext->ext->sub, &ext->ext->field); upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg, d->extreg, d->options, &d->arena); memcpy(&ext->data, &submsg, sizeof(submsg)); - if (status != kUpb_DecodeStatus_Ok) decode_err(d, status); + if (status != kUpb_DecodeStatus_Ok) _upb_Decoder_ErrorJmp(d, status); } static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d, @@ -822,7 +836,7 @@ static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d, if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) || !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) || !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) { - decode_err(d, kUpb_DecodeStatus_OutOfMemory); + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } } @@ -849,15 +863,15 @@ static const char* upb_Decoder_DecodeMessageSetItem( kUpb_HavePayload = 1 << 1, } StateMask; StateMask state_mask = 0; - while (!decode_isdone(d, &ptr)) { + while (!_upb_Decoder_IsDone(d, &ptr)) { uint32_t tag; - ptr = decode_tag(d, ptr, &tag); + ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); switch (tag) { case kEndItemTag: return ptr; case kTypeIdTag: { uint64_t tmp; - ptr = decode_varint64(d, ptr, &tmp); + ptr = _upb_Decoder_DecodeVarint(d, ptr, &tmp); if (state_mask & kUpb_HaveId) break; // Ignore dup. state_mask |= kUpb_HaveId; type_id = tmp; @@ -889,13 +903,12 @@ static const char* upb_Decoder_DecodeMessageSetItem( break; } } - decode_err(d, kUpb_DecodeStatus_Malformed); + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } -static const upb_MiniTable_Field* decode_findfield(upb_Decoder* d, - const upb_MiniTable* l, - uint32_t field_number, - int* last_field_index) { +static const upb_MiniTable_Field* _upb_Decoder_FindField( + upb_Decoder* d, const upb_MiniTable* l, uint32_t field_number, + int* last_field_index) { static upb_MiniTable_Field none = {0, 0, 0, 0, 0, 0}; if (l == NULL) return &none; @@ -948,14 +961,14 @@ found: } UPB_FORCEINLINE -static const char* decode_wireval(upb_Decoder* d, const char* ptr, - const upb_MiniTable_Field* field, - int wire_type, wireval* val, int* op) { +static const char* _upb_Decoder_DecodeWireValue( + upb_Decoder* d, const char* ptr, const upb_MiniTable_Field* field, + int wire_type, wireval* val, int* op) { switch (wire_type) { case kUpb_WireType_Varint: - ptr = decode_varint64(d, ptr, &val->uint64_val); + ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val); *op = varint_ops[field->descriptortype]; - decode_munge(field->descriptortype, val); + _upb_Decoder_Munge(field->descriptortype, val); return ptr; case kUpb_WireType_32Bit: memcpy(&val->uint32_val, ptr, 4); @@ -993,14 +1006,14 @@ static const char* decode_wireval(upb_Decoder* d, const char* ptr, default: break; } - return decode_err(d, kUpb_DecodeStatus_Malformed); + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } UPB_FORCEINLINE -static const char* decode_known(upb_Decoder* d, const char* ptr, - upb_Message* msg, const upb_MiniTable* layout, - const upb_MiniTable_Field* field, int op, - wireval* val) { +static const char* _upb_Decoder_DecodeKnownField( + upb_Decoder* d, const char* ptr, upb_Message* msg, + const upb_MiniTable* layout, const upb_MiniTable_Field* field, int op, + wireval* val) { const upb_MiniTable_Sub* subs = layout->subs; uint8_t mode = field->mode; @@ -1009,7 +1022,9 @@ static const char* decode_known(upb_Decoder* d, const char* ptr, (const upb_MiniTable_Extension*)field; upb_Message_Extension* ext = _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena); - if (UPB_UNLIKELY(!ext)) return decode_err(d, kUpb_DecodeStatus_OutOfMemory); + if (UPB_UNLIKELY(!ext)) { + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + } d->unknown_msg = msg; msg = &ext->data; subs = &ext->ext->sub; @@ -1017,17 +1032,18 @@ static const char* decode_known(upb_Decoder* d, const char* ptr, switch (mode & kUpb_FieldMode_Mask) { case kUpb_FieldMode_Array: - return decode_toarray(d, ptr, msg, subs, field, val, op); + return _upb_Decoder_DecodeToArray(d, ptr, msg, subs, field, val, op); case kUpb_FieldMode_Map: - return decode_tomap(d, ptr, msg, subs, field, val); + return _upb_Decoder_DecodeToMap(d, ptr, msg, subs, field, val); case kUpb_FieldMode_Scalar: - return decode_tomsg(d, ptr, msg, subs, field, val, op); + return _upb_Decoder_DecodeToSubMessage(d, ptr, msg, subs, field, val, op); default: UPB_UNREACHABLE(); } } -static const char* decode_reverse_skip_varint(const char* ptr, uint32_t val) { +static const char* _upb_Decoder_ReverseSkipVarint(const char* ptr, + uint32_t val) { uint32_t seen = 0; do { ptr--; @@ -1037,10 +1053,12 @@ static const char* decode_reverse_skip_varint(const char* ptr, uint32_t val) { return ptr; } -static const char* decode_unknown(upb_Decoder* d, const char* ptr, - upb_Message* msg, int field_number, - int wire_type, wireval val) { - if (field_number == 0) return decode_err(d, kUpb_DecodeStatus_Malformed); +static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, + const char* ptr, + upb_Message* msg, + int field_number, + int wire_type, wireval val) { + if (field_number == 0) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); // Since unknown fields are the uncommon case, we do a little extra work here // to walk backwards through the buffer to find the field start. This frees @@ -1068,37 +1086,38 @@ static const char* decode_unknown(upb_Decoder* d, const char* ptr, assert(start == d->debug_valstart); uint32_t tag = ((uint32_t)field_number << 3) | wire_type; - start = decode_reverse_skip_varint(start, tag); + start = _upb_Decoder_ReverseSkipVarint(start, tag); assert(start == d->debug_tagstart); if (wire_type == kUpb_WireType_StartGroup) { d->unknown = start; d->unknown_msg = msg; - ptr = decode_group(d, ptr, NULL, NULL, field_number); + ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); start = d->unknown; d->unknown = NULL; } if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { - return decode_err(d, kUpb_DecodeStatus_OutOfMemory); + _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } } else if (wire_type == kUpb_WireType_StartGroup) { - ptr = decode_group(d, ptr, NULL, NULL, field_number); + ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); } return ptr; } UPB_NOINLINE -static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg, - const upb_MiniTable* layout) { +static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, + upb_Message* msg, + const upb_MiniTable* layout) { int last_field_index = 0; #if UPB_FASTTABLE // The first time we want to skip fast dispatch, because we may have just been // invoked by the fast parser to handle a case that it bailed on. - if (!decode_isdone(d, &ptr)) goto nofast; + if (!_upb_Decoder_IsDone(d, &ptr)) goto nofast; #endif - while (!decode_isdone(d, &ptr)) { + while (!_upb_Decoder_IsDone(d, &ptr)) { uint32_t tag; const upb_MiniTable_Field* field; int field_number; @@ -1106,7 +1125,7 @@ static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg, wireval val; int op; - if (decode_tryfastdispatch(d, &ptr, msg, layout)) break; + if (_upb_Decoder_TryFastDispatch(d, &ptr, msg, layout)) break; #if UPB_FASTTABLE nofast: @@ -1117,7 +1136,7 @@ static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg, #endif UPB_ASSERT(ptr < d->limit_ptr); - ptr = decode_tag(d, ptr, &tag); + ptr = _upb_Decoder_DecodeTag(d, ptr, &tag); field_number = tag >> 3; wire_type = tag & 7; @@ -1130,15 +1149,16 @@ static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg, return ptr; } - field = decode_findfield(d, layout, field_number, &last_field_index); - ptr = decode_wireval(d, ptr, field, wire_type, &val, &op); + field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index); + ptr = _upb_Decoder_DecodeWireValue(d, ptr, field, wire_type, &val, &op); if (op >= 0) { - ptr = decode_known(d, ptr, msg, layout, field, op, &val); + ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val); } else { switch (op) { case OP_UNKNOWN: - ptr = decode_unknown(d, ptr, msg, field_number, wire_type, val); + ptr = _upb_Decoder_DecodeUnknownField(d, ptr, msg, field_number, + wire_type, val); break; case OP_MSGSET_ITEM: ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout); @@ -1148,22 +1168,24 @@ static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg, } return UPB_UNLIKELY(layout && layout->required_count) - ? decode_checkrequired(d, ptr, msg, layout) + ? _upb_Decoder_CheckRequired(d, ptr, msg, layout) : ptr; } -const char* fastdecode_generic(struct upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data) { +const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { (void)data; *(uint32_t*)msg |= hasbits; - return decode_msg(d, ptr, msg, decode_totablep(table)); + return _upb_Decoder_DecodeMessage(d, ptr, msg, decode_totablep(table)); } -static upb_DecodeStatus decode_top(struct upb_Decoder* d, const char* buf, - void* msg, const upb_MiniTable* l) { - if (!decode_tryfastdispatch(d, &buf, msg, l)) { - decode_msg(d, buf, msg, l); +static upb_DecodeStatus _upb_Decoder_DecodeTop(struct upb_Decoder* d, + const char* buf, void* msg, + const upb_MiniTable* l) { + if (!_upb_Decoder_TryFastDispatch(d, &buf, msg, l)) { + _upb_Decoder_DecodeMessage(d, buf, msg, l); } if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed; if (d->missing_required) return kUpb_DecodeStatus_MissingRequired; @@ -1203,7 +1225,7 @@ upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, upb_DecodeStatus status = UPB_SETJMP(state.err); if (UPB_LIKELY(status == kUpb_DecodeStatus_Ok)) { - status = decode_top(&state, buf, msg, l); + status = _upb_Decoder_DecodeTop(&state, buf, msg, l); } arena->head.ptr = state.arena.head.ptr; diff --git a/upb/decode_fast.c b/upb/decode_fast.c index 30210b01b2..79ff6db109 100644 --- a/upb/decode_fast.c +++ b/upb/decode_fast.c @@ -57,7 +57,7 @@ /* Uncomment either of these for debugging purposes. */ \ /* fprintf(stderr, m); */ \ /*__builtin_trap(); */ \ - return fastdecode_generic(d, ptr, msg, table, hasbits, 0); + return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0); typedef enum { CARD_s = 0, /* Singular (optional, non-repeated) */ @@ -70,12 +70,10 @@ UPB_NOINLINE static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) { int overrun = data; int status; - ptr = decode_isdonefallback_inl(d, ptr, overrun, &status); - if (ptr == NULL) { - return fastdecode_err(d, status); - } - data = fastdecode_loadtag(ptr); - UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); + ptr = _upb_Decoder_IsDoneFallbackInline(d, ptr, overrun, &status); + if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, status); + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); } UPB_FORCEINLINE @@ -87,7 +85,7 @@ static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { *(uint32_t*)msg |= hasbits; // Sync hasbits. const upb_MiniTable* l = decode_totablep(table); return UPB_UNLIKELY(l->required_count) - ? decode_checkrequired(d, ptr, msg, l) + ? _upb_Decoder_CheckRequired(d, ptr, msg, l) : ptr; } else { data = overrun; @@ -96,8 +94,8 @@ static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { } // Read two bytes of tag data (for a one-byte tag, the high byte is junk). - data = fastdecode_loadtag(ptr); - UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); } UPB_FORCEINLINE @@ -176,9 +174,9 @@ static const char* fastdecode_delimited(upb_Decoder* d, const char* ptr, // Corrupt wire format: invalid limit. return NULL; } - int delta = decode_pushlimit(d, ptr, len); + int delta = _upb_Decoder_PushLimit(d, ptr, len); ptr = func(d, ptr, ctx); - decode_poplimit(d, ptr, delta); + _upb_Decoder_PopLimit(d, ptr, delta); } else { // Fast case: Sub-message is <128 bytes and fits in the current buffer. // This means we can preserve limit/limit_ptr verbatim. @@ -258,8 +256,8 @@ static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst, fastdecode_nextret ret; dst = (char*)dst + valbytes; - if (UPB_LIKELY(!decode_isdone(d, ptr))) { - ret.tag = fastdecode_loadtag(*ptr); + if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) { + ret.tag = _upb_FastDecoder_LoadTag(*ptr); if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { ret.next = FD_NEXT_SAMEFIELD; } else { @@ -315,7 +313,7 @@ static void* fastdecode_getfield(upb_Decoder* d, const char* ptr, } begin = _upb_array_ptr(farr->arr); farr->end = begin + (farr->arr->capacity * valbytes); - *data = fastdecode_loadtag(ptr); + *data = _upb_FastDecoder_LoadTag(ptr); return begin + (farr->arr->size * valbytes); } default: @@ -402,7 +400,7 @@ done: \ ptr += tagbytes; \ ptr = fastdecode_varint64(ptr, &val); \ - if (ptr == NULL) return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \ + if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ val = fastdecode_munge(val, valbytes, zigzag); \ memcpy(dst, &val, valbytes); \ \ @@ -415,7 +413,7 @@ done: goto again; \ case FD_NEXT_OTHERFIELD: \ data = ret.tag; \ - UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ case FD_NEXT_ATLIMIT: \ return ptr; \ } \ @@ -437,7 +435,7 @@ static const char* fastdecode_topackedvarint(upb_Decoder* d, const char* ptr, void* dst = data->dst; uint64_t val; - while (!decode_isdone(d, &ptr)) { + while (!_upb_Decoder_IsDone(d, &ptr)) { dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); ptr = fastdecode_varint64(ptr, &val); if (ptr == NULL) return NULL; @@ -466,7 +464,7 @@ static const char* fastdecode_topackedvarint(upb_Decoder* d, const char* ptr, ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ \ if (UPB_UNLIKELY(ptr == NULL)) { \ - return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ } \ \ UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); @@ -561,7 +559,7 @@ TAGBYTES(p) goto again; \ case FD_NEXT_OTHERFIELD: \ data = ret.tag; \ - UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ case FD_NEXT_ATLIMIT: \ return ptr; \ } \ @@ -582,7 +580,7 @@ TAGBYTES(p) \ if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr) || \ (size % valbytes) != 0)) { \ - return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ } \ \ upb_Array** arr_p = fastdecode_fieldmem(msg, data); \ @@ -593,7 +591,7 @@ TAGBYTES(p) if (UPB_LIKELY(!arr)) { \ *arr_p = arr = _upb_Array_New(&d->arena, elems, elem_size_lg2); \ if (!arr) { \ - return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ } \ } else { \ _upb_Array_Resize(arr, elems, &d->arena); \ @@ -659,8 +657,8 @@ static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, uint64_t hasbits, uint64_t data) { upb_StringView* dst = (upb_StringView*)data; - if (!decode_verifyutf8_inl(dst->data, dst->size)) { - return fastdecode_err(d, kUpb_DecodeStatus_BadUtf8); + if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); } UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); } @@ -674,7 +672,7 @@ static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, \ if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) { \ dst->size = 0; \ - return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ } \ \ if (d->options & kUpb_DecodeOption_AliasString) { \ @@ -683,7 +681,7 @@ static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, } else { \ char* data = upb_Arena_Malloc(&d->arena, size); \ if (!data) { \ - return fastdecode_err(d, kUpb_DecodeStatus_OutOfMemory); \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ } \ memcpy(data, ptr, size); \ dst->data = data; \ @@ -774,8 +772,9 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, ptr += size; \ \ if (card == CARD_r) { \ - if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ - return fastdecode_err(d, kUpb_DecodeStatus_BadUtf8); \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ } \ fastdecode_nextret ret = fastdecode_nextrepeated( \ d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ @@ -785,7 +784,7 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, goto again; \ case FD_NEXT_OTHERFIELD: \ data = ret.tag; \ - UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ case FD_NEXT_ATLIMIT: \ return ptr; \ } \ @@ -852,8 +851,9 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, ptr += size; \ \ if (card == CARD_r) { \ - if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ - return fastdecode_err(d, kUpb_DecodeStatus_BadUtf8); \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ } \ fastdecode_nextret ret = fastdecode_nextrepeated( \ d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ @@ -866,12 +866,12 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, /* data also. */ \ fastdecode_commitarr(dst, &farr, sizeof(upb_StringView)); \ data = ret.tag; \ - UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ } \ goto again; \ case FD_NEXT_OTHERFIELD: \ data = ret.tag; \ - UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ case FD_NEXT_ATLIMIT: \ return ptr; \ } \ @@ -966,7 +966,7 @@ static const char* fastdecode_tosubmsg(upb_Decoder* d, const char* ptr, } \ \ if (--d->depth == 0) { \ - return fastdecode_err(d, kUpb_DecodeStatus_MaxDepthExceeded); \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \ } \ \ upb_Message** dst; \ @@ -1003,7 +1003,7 @@ static const char* fastdecode_tosubmsg(upb_Decoder* d, const char* ptr, ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ \ if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ - return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ } \ \ if (card == CARD_r) { \ @@ -1016,7 +1016,7 @@ static const char* fastdecode_tosubmsg(upb_Decoder* d, const char* ptr, case FD_NEXT_OTHERFIELD: \ d->depth++; \ data = ret.tag; \ - UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ case FD_NEXT_ATLIMIT: \ d->depth++; \ return ptr; \ diff --git a/upb/decode_fast.h b/upb/decode_fast.h index 3ccd1a02b1..7803367e5e 100644 --- a/upb/decode_fast.h +++ b/upb/decode_fast.h @@ -75,9 +75,10 @@ struct upb_Decoder; // The fallback, generic parsing function that can handle any field type. // This just uses the regular (non-fast) parser to parse a single field. -const char* fastdecode_generic(struct upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data); +const char* _upb_FastDecoder_DecodeGeneric(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + intptr_t table, uint64_t hasbits, + uint64_t data); #define UPB_PARSE_PARAMS \ struct upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \ diff --git a/upb/def.c b/upb/def.c index 9807101ec6..a7144bef25 100644 --- a/upb/def.c +++ b/upb/def.c @@ -1541,7 +1541,7 @@ static void make_layout(upb_DefBuilder* ctx, const upb_MessageDef* m) { /* TODO(haberman): initialize fast tables so that reflection-based parsing * can get the same speeds as linked-in types. */ - l->fasttable[0].field_parser = &fastdecode_generic; + l->fasttable[0].field_parser = &_upb_FastDecoder_DecodeGeneric; l->fasttable[0].field_data = 0; if (upb_MessageDef_IsMapEntry(m)) { diff --git a/upb/internal/decode.h b/upb/internal/decode.h index 99aa5ae824..ddd6fec962 100644 --- a/upb/internal/decode.h +++ b/upb/internal/decode.h @@ -72,12 +72,12 @@ typedef struct upb_Decoder { * of our optimizations. That is also why we must declare it in a separate file, * otherwise the compiler will see that it calls longjmp() and deduce that it is * noreturn. */ -const char* fastdecode_err(upb_Decoder* d, int status); +const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status); extern const uint8_t upb_utf8_offsets[]; UPB_INLINE -bool decode_verifyutf8_inl(const char* ptr, int len) { +bool _upb_Decoder_VerifyUtf8Inline(const char* ptr, int len) { const char* end = ptr + len; // Check 8 bytes at a time for any non-ASCII char. @@ -100,9 +100,9 @@ non_ascii: return utf8_range2((const unsigned char*)ptr, end - ptr) == 0; } -const char* decode_checkrequired(upb_Decoder* d, const char* ptr, - const upb_Message* msg, - const upb_MiniTable* l); +const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr, + const upb_Message* msg, + const upb_MiniTable* l); /* x86-64 pointers always have the high 16 bits matching. So we can shift * left 8 and right 8 without loss of information. */ @@ -115,8 +115,8 @@ UPB_INLINE const upb_MiniTable* decode_totablep(intptr_t table) { } UPB_INLINE -const char* decode_isdonefallback_inl(upb_Decoder* d, const char* ptr, - int overrun, int* status) { +const char* _upb_Decoder_IsDoneFallbackInline(upb_Decoder* d, const char* ptr, + int overrun, int* status) { if (overrun < d->limit) { /* Need to copy remaining data into patch buffer. */ UPB_ASSERT(overrun < 16); @@ -143,26 +143,27 @@ const char* decode_isdonefallback_inl(upb_Decoder* d, const char* ptr, } } -const char* decode_isdonefallback(upb_Decoder* d, const char* ptr, int overrun); +const char* _upb_Decoder_IsDoneFallback(upb_Decoder* d, const char* ptr, + int overrun); UPB_INLINE -bool decode_isdone(upb_Decoder* d, const char** ptr) { +bool _upb_Decoder_IsDone(upb_Decoder* d, const char** ptr) { int overrun = *ptr - d->end; if (UPB_LIKELY(*ptr < d->limit_ptr)) { return false; } else if (UPB_LIKELY(overrun == d->limit)) { return true; } else { - *ptr = decode_isdonefallback(d, *ptr, overrun); + *ptr = _upb_Decoder_IsDoneFallback(d, *ptr, overrun); return false; } } #if UPB_FASTTABLE UPB_INLINE -const char* fastdecode_tagdispatch(upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t tag) { +const char* _upb_FastDecoder_TagDispatch(upb_Decoder* d, const char* ptr, + upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t tag) { const upb_MiniTable* table_p = decode_totablep(table); uint8_t mask = table; uint64_t data; @@ -175,33 +176,34 @@ const char* fastdecode_tagdispatch(upb_Decoder* d, const char* ptr, } #endif -UPB_INLINE uint32_t fastdecode_loadtag(const char* ptr) { +UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) { uint16_t tag; memcpy(&tag, ptr, 2); return tag; } -UPB_INLINE void decode_checklimit(upb_Decoder* d) { +UPB_INLINE void _upb_Decoder_CheckLimit(upb_Decoder* d) { UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit)); } -UPB_INLINE int decode_pushlimit(upb_Decoder* d, const char* ptr, int size) { +UPB_INLINE int _upb_Decoder_PushLimit(upb_Decoder* d, const char* ptr, + int size) { int limit = size + (int)(ptr - d->end); int delta = d->limit - limit; - decode_checklimit(d); + _upb_Decoder_CheckLimit(d); d->limit = limit; d->limit_ptr = d->end + UPB_MIN(0, limit); - decode_checklimit(d); + _upb_Decoder_CheckLimit(d); return delta; } -UPB_INLINE void decode_poplimit(upb_Decoder* d, const char* ptr, - int saved_delta) { +UPB_INLINE void _upb_Decoder_PopLimit(upb_Decoder* d, const char* ptr, + int saved_delta) { UPB_ASSERT(ptr - d->end == d->limit); - decode_checklimit(d); + _upb_Decoder_CheckLimit(d); d->limit += saved_delta; d->limit_ptr = d->end + UPB_MIN(0, d->limit); - decode_checklimit(d); + _upb_Decoder_CheckLimit(d); } #include "upb/port_undef.inc" diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index b418670fed..d04b1f6b59 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -1208,9 +1208,9 @@ std::vector FastDecodeTable(const protobuf::Descriptor* message, } while ((size_t)slot >= table.size()) { size_t size = std::max(static_cast(1), table.size() * 2); - table.resize(size, TableEntry{"fastdecode_generic", 0}); + table.resize(size, TableEntry{"_upb_FastDecoder_DecodeGeneric", 0}); } - if (table[slot].first != "fastdecode_generic") { + if (table[slot].first != "_upb_FastDecoder_DecodeGeneric") { // A hotter field already filled this slot. continue; } From 85534bbb1e2119bc5dae045967386db9cad22aa7 Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Tue, 6 Sep 2022 21:04:59 -0700 Subject: [PATCH 05/35] remove unused #includes from reflection.h The functions declared in reflection.h use neither arrays nor maps, so (a) stop including the array and map definitions, and (b) update the handful of other source files which were relying on that transitivity. PiperOrigin-RevId: 472627278 --- BUILD | 3 ++- python/convert.c | 1 + upb/bindings/lua/msg.c | 1 + upb/json_decode.c | 1 + upb/json_encode.c | 1 + upb/reflection.c | 1 + upb/reflection.h | 3 +-- upb/text_encode.c | 1 + upb/util/BUILD | 1 + upb/util/required_fields.c | 1 + 10 files changed, 11 insertions(+), 3 deletions(-) diff --git a/BUILD b/BUILD index a49327b287..fced65eaf5 100644 --- a/BUILD +++ b/BUILD @@ -412,10 +412,10 @@ cc_library( copts = UPB_DEFAULT_COPTS, visibility = ["//visibility:public"], deps = [ + ":collections", ":encode_internal", ":port", ":reflection", - ":table_internal", ], ) @@ -433,6 +433,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":atoi_internal", + ":collections", ":encode_internal", ":port", ":reflection", diff --git a/python/convert.c b/python/convert.c index 3d9b94aa7c..b9df5019a1 100644 --- a/python/convert.c +++ b/python/convert.c @@ -29,6 +29,7 @@ #include "python/message.h" #include "python/protobuf.h" +#include "upb/map.h" #include "upb/reflection.h" #include "upb/util/compare.h" diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index 5da2765870..9c66c74b04 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -41,6 +41,7 @@ #include "upb/bindings/lua/upb.h" #include "upb/json_decode.h" #include "upb/json_encode.h" +#include "upb/map.h" #include "upb/port_def.inc" #include "upb/reflection.h" #include "upb/text_encode.h" diff --git a/upb/json_decode.c b/upb/json_decode.c index 1eed62f625..7a437db166 100644 --- a/upb/json_decode.c +++ b/upb/json_decode.c @@ -38,6 +38,7 @@ #include "upb/encode.h" #include "upb/internal/atoi.h" #include "upb/internal/unicode.h" +#include "upb/map.h" #include "upb/reflection.h" // Must be last. diff --git a/upb/json_encode.c b/upb/json_encode.c index 468b7f897c..9c657dddcd 100644 --- a/upb/json_encode.c +++ b/upb/json_encode.c @@ -38,6 +38,7 @@ #include "upb/decode.h" #include "upb/internal/encode.h" #include "upb/internal/vsnprintf_compat.h" +#include "upb/map.h" #include "upb/reflection.h" // Must be last. diff --git a/upb/reflection.c b/upb/reflection.c index 43f884495f..f2913b2465 100644 --- a/upb/reflection.c +++ b/upb/reflection.c @@ -30,6 +30,7 @@ #include #include "upb/internal/table.h" +#include "upb/map.h" #include "upb/msg.h" #include "upb/port_def.inc" diff --git a/upb/reflection.h b/upb/reflection.h index 23a7d34c5d..1fea8c6972 100644 --- a/upb/reflection.h +++ b/upb/reflection.h @@ -28,9 +28,8 @@ #ifndef UPB_REFLECTION_H_ #define UPB_REFLECTION_H_ -#include "upb/array.h" #include "upb/def.h" -#include "upb/map.h" +#include "upb/message_value.h" #include "upb/msg.h" #include "upb/upb.h" diff --git a/upb/text_encode.c b/upb/text_encode.c index f3a63e91fd..393b8d7615 100644 --- a/upb/text_encode.c +++ b/upb/text_encode.c @@ -36,6 +36,7 @@ #include "upb/internal/encode.h" #include "upb/internal/vsnprintf_compat.h" +#include "upb/map.h" #include "upb/reflection.h" // Must be last. diff --git a/upb/util/BUILD b/upb/util/BUILD index 9c34822932..48e648d869 100644 --- a/upb/util/BUILD +++ b/upb/util/BUILD @@ -62,6 +62,7 @@ cc_library( hdrs = ["required_fields.h"], visibility = ["//visibility:public"], deps = [ + "//:collections", "//:port", "//:reflection", ], diff --git a/upb/util/required_fields.c b/upb/util/required_fields.c index 90f40b6516..260a5b1d09 100644 --- a/upb/util/required_fields.c +++ b/upb/util/required_fields.c @@ -33,6 +33,7 @@ #include #include "upb/internal/vsnprintf_compat.h" +#include "upb/map.h" #include "upb/reflection.h" // Must be last. From 4215bc82eaca8d7ac361bf1aeb80045258d5a4cf Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Wed, 7 Sep 2022 14:21:18 -0700 Subject: [PATCH 06/35] fix the json parser to handle floats very near overflow Instead of bounds-testing a double value to guess whether it will overflow, just convert it to a float and check whether the result is +/- infinity. PiperOrigin-RevId: 472811865 --- BUILD | 18 ++++- upb/json_decode.c | 8 +- upb/json_decode_test.cc | 93 +++++++++++++++++++++++ upb/{json_test.cc => json_encode_test.cc} | 0 upb/json_test.proto | 2 + 5 files changed, 115 insertions(+), 6 deletions(-) create mode 100644 upb/json_decode_test.cc rename upb/{json_test.cc => json_encode_test.cc} (100%) diff --git a/BUILD b/BUILD index fced65eaf5..68cd34503e 100644 --- a/BUILD +++ b/BUILD @@ -513,8 +513,22 @@ upb_proto_reflection_library( ) cc_test( - name = "json_test", - srcs = ["upb/json_test.cc"], + name = "json_decode_test", + srcs = ["upb/json_decode_test.cc"], + deps = [ + ":json", + ":json_test_upb_proto", + ":json_test_upb_proto_reflection", + ":reflection", + ":struct_upb_proto", + ":upb", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "json_encode_test", + srcs = ["upb/json_encode_test.cc"], deps = [ ":json", ":json_test_upb_proto", diff --git a/upb/json_decode.c b/upb/json_decode.c index 7a437db166..56775cb778 100644 --- a/upb/json_decode.c +++ b/upb/json_decode.c @@ -745,11 +745,11 @@ static upb_MessageValue jsondec_double(jsondec* d, const upb_FieldDef* f) { } if (upb_FieldDef_CType(f) == kUpb_CType_Float) { - if (val.double_val != INFINITY && val.double_val != -INFINITY && - (val.double_val > FLT_MAX || val.double_val < -FLT_MAX)) { - jsondec_err(d, "Float out of range"); + float f = val.double_val; + if (val.double_val != INFINITY && val.double_val != -INFINITY) { + if (f == INFINITY || f == -INFINITY) jsondec_err(d, "Float out of range"); } - val.float_val = val.double_val; + val.float_val = f; } return val; diff --git a/upb/json_decode_test.cc b/upb/json_decode_test.cc new file mode 100644 index 0000000000..fcc65a9360 --- /dev/null +++ b/upb/json_decode_test.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2009-2022, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/json_decode.h" + +#include + +#include "google/protobuf/struct.upb.h" +#include "gtest/gtest.h" +#include "upb/def.hpp" +#include "upb/json_test.upb.h" +#include "upb/json_test.upbdefs.h" +#include "upb/upb.hpp" + +static upb_test_Box* JsonDecode(const char* json, upb_Arena* a) { + upb::Status status; + upb::DefPool defpool; + upb::MessageDefPtr m(upb_test_Box_getmsgdef(defpool.ptr())); + EXPECT_TRUE(m.ptr() != nullptr); + + upb_test_Box* box = upb_test_Box_new(a); + int options = 0; + bool ok = upb_JsonDecode(json, strlen(json), box, m.ptr(), defpool.ptr(), + options, a, status.ptr()); + return ok ? box : nullptr; +} + +struct FloatTest { + const std::string json; + float f; +}; + +static const std::vector FloatTestsPass = { + {R"({"f": 0})", 0}, + {R"({"f": 1})", 1}, + {R"({"f": 1.000000})", 1}, + {R"({"f": 1.5e1})", 15}, + {R"({"f": 15e-1})", 1.5}, + {R"({"f": -3.5})", -3.5}, + {R"({"f": 3.402823e38})", 3.402823e38}, + {R"({"f": -3.402823e38})", -3.402823e38}, + {R"({"f": 340282346638528859811704183484516925440.0})", + 340282346638528859811704183484516925440.0}, + {R"({"f": -340282346638528859811704183484516925440.0})", + -340282346638528859811704183484516925440.0}, +}; + +static const std::vector FloatTestsFail = { + {R"({"f": 1z})", 0}, + {R"({"f": 3.4028236e+38})", 0}, + {R"({"f": -3.4028236e+38})", 0}, +}; + +// Decode some floats. +TEST(JsonTest, DecodeFloats) { + upb::Arena a; + + for (const auto& test : FloatTestsPass) { + upb_test_Box* box = JsonDecode(test.json.c_str(), a.ptr()); + EXPECT_NE(box, nullptr); + float f = upb_test_Box_f(box); + EXPECT_EQ(f, test.f); + } + + for (const auto& test : FloatTestsFail) { + upb_test_Box* box = JsonDecode(test.json.c_str(), a.ptr()); + EXPECT_EQ(box, nullptr); + } +} diff --git a/upb/json_test.cc b/upb/json_encode_test.cc similarity index 100% rename from upb/json_test.cc rename to upb/json_encode_test.cc diff --git a/upb/json_test.proto b/upb/json_test.proto index d14b2f105a..0765333f5e 100644 --- a/upb/json_test.proto +++ b/upb/json_test.proto @@ -17,4 +17,6 @@ message Box { optional Tag last_tag = 5; optional string name = 4; optional google.protobuf.Value val = 6; + optional float f = 7; + optional double d = 8; } From 467e2b5e3b41d6822a8ebbda313d54bc12b8c584 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Wed, 7 Sep 2022 16:24:00 -0700 Subject: [PATCH 07/35] Fix public import header generation. PiperOrigin-RevId: 472841214 --- upbc/protoc-gen-upb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index d04b1f6b59..24ac89dfa0 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -896,7 +896,7 @@ void WriteHeader(const FileLayout& layout, Output& output) { if (i == 0) { output("/* Public Imports. */\n"); } - output("#include \"$0\"\n", HeaderFilename(file)); + output("#include \"$0\"\n", HeaderFilename(file->public_dependency(i))); if (i == file->public_dependency_count() - 1) { output("\n"); } From 27f8d367d3c6a6df134bd062ce43bd2435041c8a Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Wed, 7 Sep 2022 21:44:48 -0700 Subject: [PATCH 08/35] fix segfault when clearing an unset oneof in Python https://github.com/protocolbuffers/protobuf/issues/10461 PiperOrigin-RevId: 472891559 --- python/message.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/message.c b/python/message.c index 47b00e878f..e67bf2306b 100644 --- a/python/message.c +++ b/python/message.c @@ -1361,7 +1361,7 @@ static PyObject* PyUpb_Message_ClearField(PyObject* _self, PyObject* arg) { } if (o) f = upb_Message_WhichOneof(self->ptr.msg, o); - PyUpb_Message_DoClearField(_self, f); + if (f) PyUpb_Message_DoClearField(_self, f); Py_RETURN_NONE; } From 896e74c141b00ad1034355e53effac311b0d5a26 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Sep 2022 08:34:53 -0700 Subject: [PATCH 09/35] Optimizes `upb_MiniTable_Enum` for large but dense enums. Optimizes `upb_MiniTable_Enum` for enums with many values (>64) but with relatively dense packing in numeric space. This CL optimizes both the size and speed of such enums: - size: 30x code size reduction - speed: moved from linear search to a constant-time bit test Negative enum values are still expensive, as they are never put into the bitfield. PiperOrigin-RevId: 473259819 --- bazel/build_defs.bzl | 2 -- upb/decode.c | 11 +++--- upb/def.c | 2 -- upb/mini_table.c | 78 +++++++++++++++++++++++++++++------------- upb/mini_table.h | 13 ------- upb/msg_internal.h | 42 +++++++++++++++++++++-- upbc/protoc-gen-upb.cc | 18 ++++------ 7 files changed, 104 insertions(+), 62 deletions(-) diff --git a/bazel/build_defs.bzl b/bazel/build_defs.bzl index e5c270d057..ba7a037fd6 100644 --- a/bazel/build_defs.bzl +++ b/bazel/build_defs.bzl @@ -41,8 +41,6 @@ _DEFAULT_CPPOPTS.extend([ ]) _DEFAULT_COPTS.extend([ "-std=c99", - "-pedantic", - "-Werror=pedantic", "-Wall", "-Wstrict-prototypes", # GCC (at least) emits spurious warnings for this that cannot be fixed diff --git a/upb/decode.c b/upb/decode.c index 8ba5c21425..b3bcc0afc3 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -31,6 +31,7 @@ #include "upb/internal/array.h" #include "upb/internal/decode.h" +#include "upb/msg_internal.h" #include "upb/upb.h" // Must be last. @@ -421,11 +422,7 @@ static bool _upb_Decoder_CheckEnumSlow(upb_Decoder* d, const char* ptr, const upb_MiniTable_Enum* e, const upb_MiniTable_Field* field, uint32_t v) { - // OPT: binary search long lists? - int n = e->value_count; - for (int i = 0; i < n; i++) { - if ((uint32_t)e->values[i] == v) return true; - } + if (_upb_MiniTable_CheckEnumValueSlow(e, v)) return true; // Unrecognized enum goes into unknown fields. // For packed fields the tag could be arbitrarily far in the past, so we @@ -445,8 +442,8 @@ static bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, wireval* val) { uint32_t v = val->uint32_val; - if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true; - + _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, v); + if (UPB_LIKELY(status == _kUpb_FastEnumCheck_ValueIsInEnum)) return true; return _upb_Decoder_CheckEnumSlow(d, ptr, msg, e, field, v); } diff --git a/upb/def.c b/upb/def.c index a7144bef25..002faf554d 100644 --- a/upb/def.c +++ b/upb/def.c @@ -2644,8 +2644,6 @@ static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, if (ctx->layout) { UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count); e->layout = ctx->layout->enums[ctx->enum_count++]; - UPB_ASSERT(upb_inttable_count(&e->iton) == - e->layout->value_count + count_bits_debug(e->layout->mask)); } else { e->layout = create_enumlayout(ctx, e); } diff --git a/upb/mini_table.c b/upb/mini_table.c index 3d13780a04..0f0592eda3 100644 --- a/upb/mini_table.c +++ b/upb/mini_table.c @@ -30,6 +30,7 @@ #include #include +#include "upb/arena.h" #include "upb/msg_internal.h" #include "upb/upb.h" @@ -387,6 +388,13 @@ typedef struct { upb_LayoutItemVector vec; upb_Arena* arena; upb_Status* status; + + // When building enums. + upb_MiniTable_Enum* enum_table; + uint32_t enum_value_count; + uint32_t enum_data_count; + uint32_t enum_data_capacity; + jmp_buf err; } upb_MtDecoder; @@ -1034,41 +1042,67 @@ upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type, return ret; } -static bool upb_MiniTable_BuildEnumValue(upb_MtDecoder* d, - upb_MiniTable_Enum* table, - uint32_t val, upb_Arena* arena) { - if (val < 64) { - table->mask |= 1ULL << val; - return true; +static size_t upb_MiniTable_EnumSize(size_t count) { + return sizeof(upb_MiniTable_Enum) + count * sizeof(uint32_t); +} + +static upb_MiniTable_Enum* _upb_MiniTable_AddEnumDataMember(upb_MtDecoder* d, + uint32_t val) { + if (d->enum_data_count == d->enum_data_capacity) { + size_t old_sz = upb_MiniTable_EnumSize(d->enum_data_capacity); + d->enum_data_capacity = UPB_MAX(2, d->enum_data_capacity * 2); + size_t new_sz = upb_MiniTable_EnumSize(d->enum_data_capacity); + d->enum_table = upb_Arena_Realloc(d->arena, d->enum_table, old_sz, new_sz); + upb_MtDecoder_CheckOutOfMemory(d, d->enum_table); } + d->enum_table->data[d->enum_data_count++] = val; + return d->enum_table; +} - int32_t* values = (void*)table->values; - values = upb_Arena_Realloc(arena, values, table->value_count * 4, - (table->value_count + 1) * 4); - upb_MtDecoder_CheckOutOfMemory(d, values); - values[table->value_count++] = (int32_t)val; - table->values = values; - return true; +static void upb_MiniTable_BuildEnumValue(upb_MtDecoder* d, uint32_t val) { + upb_MiniTable_Enum* table = d->enum_table; + d->enum_value_count++; + if (table->value_count || (val > 512 && d->enum_value_count < val / 32)) { + if (table->value_count == 0) { + assert(d->enum_data_count == table->mask_limit / 32); + } + table = _upb_MiniTable_AddEnumDataMember(d, val); + table->value_count++; + } else { + uint32_t new_mask_limit = ((val / 32) + 1) * 32; + while (table->mask_limit < new_mask_limit) { + table = _upb_MiniTable_AddEnumDataMember(d, 0); + table->mask_limit += 32; + } + table->data[val / 32] |= 1ULL << (val % 32); + } } upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len, upb_Arena* arena, upb_Status* status) { upb_MtDecoder d = { + .enum_table = upb_Arena_Malloc(arena, upb_MiniTable_EnumSize(2)), + .enum_value_count = 0, + .enum_data_count = 0, + .enum_data_capacity = 1, .status = status, .end = UPB_PTRADD(data, len), + .arena = arena, }; if (UPB_SETJMP(d.err)) { return NULL; } - upb_MiniTable_Enum* table = upb_Arena_Malloc(arena, sizeof(*table)); - upb_MtDecoder_CheckOutOfMemory(&d, table); + upb_MtDecoder_CheckOutOfMemory(&d, d.enum_table); + + // Guarantee at least 64 bits of mask without checking mask size. + d.enum_table->mask_limit = 64; + d.enum_table = _upb_MiniTable_AddEnumDataMember(&d, 0); + d.enum_table = _upb_MiniTable_AddEnumDataMember(&d, 0); - table->mask = 0; - table->value_count = 0; - table->values = NULL; + d.enum_table->value_count = 0; const char* ptr = data; uint32_t base = 0; @@ -1078,11 +1112,7 @@ upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len, if (ch <= kUpb_EncodedValue_MaxEnumMask) { uint32_t mask = upb_FromBase92(ch); for (int i = 0; i < 5; i++, base++, mask >>= 1) { - if (mask & 1) { - if (!upb_MiniTable_BuildEnumValue(&d, table, base, arena)) { - return NULL; - } - } + if (mask & 1) upb_MiniTable_BuildEnumValue(&d, base); } } else if (kUpb_EncodedValue_MinSkip <= ch && ch <= kUpb_EncodedValue_MaxSkip) { @@ -1097,7 +1127,7 @@ upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len, } } - return table; + return d.enum_table; } const char* upb_MiniTable_BuildExtension(const char* data, size_t len, diff --git a/upb/mini_table.h b/upb/mini_table.h index d71ebcf320..0e2599516c 100644 --- a/upb/mini_table.h +++ b/upb/mini_table.h @@ -50,19 +50,6 @@ UPB_INLINE const upb_MiniTable_Enum* upb_MiniTable_GetSubEnumTable( return mini_table->subs[field->submsg_index].subenum; } -// Validates enum value against range defined by enum mini table. -UPB_INLINE bool upb_MiniTable_Enum_CheckValue(const upb_MiniTable_Enum* e, - int32_t val) { - uint32_t uval = (uint32_t)val; - if (uval < 64) return e->mask & (1ULL << uval); - // OPT: binary search long lists? - int n = e->value_count; - for (int i = 0; i < n; i++) { - if (e->values[i] == val) return true; - } - return false; -} - /** upb_MtDataEncoder *********************************************************/ // Functions to encode a string in a format that can be loaded by diff --git a/upb/msg_internal.h b/upb/msg_internal.h index 8c18a86dca..650cecb886 100644 --- a/upb/msg_internal.h +++ b/upb/msg_internal.h @@ -137,11 +137,47 @@ typedef struct { } _upb_FastTable_Entry; typedef struct { - const int32_t* values; // List of values <0 or >63 - uint64_t mask; // Bits are set for acceptable value 0 <= x < 64 - int value_count; + uint32_t mask_limit; // Limit enum value that can be tested with mask. + uint32_t value_count; // Number of values after the bitfield. + uint32_t data[]; // Bitmask + enumerated values follow. } upb_MiniTable_Enum; +typedef enum { + _kUpb_FastEnumCheck_ValueIsInEnum = 0, + _kUpb_FastEnumCheck_ValueIsNotInEnum = 1, + _kUpb_FastEnumCheck_CannotCheckFast = 2, +} _kUpb_FastEnumCheck_Status; + +UPB_INLINE _kUpb_FastEnumCheck_Status +_upb_MiniTable_CheckEnumValueFast(const upb_MiniTable_Enum* e, uint32_t val) { + if (UPB_UNLIKELY(val >= 64)) return _kUpb_FastEnumCheck_CannotCheckFast; + uint64_t mask = e->data[0] | ((uint64_t)e->data[1] << 32); + return (mask & (1ULL << val)) ? _kUpb_FastEnumCheck_ValueIsInEnum + : _kUpb_FastEnumCheck_ValueIsNotInEnum; +} + +UPB_INLINE bool _upb_MiniTable_CheckEnumValueSlow(const upb_MiniTable_Enum* e, + uint32_t val) { + if (val < e->mask_limit) return e->data[val / 32] & (1ULL << (val % 32)); + // OPT: binary search long lists? + const uint32_t* start = &e->data[e->mask_limit / 32]; + const uint32_t* limit = &e->data[(e->mask_limit / 32) + e->value_count]; + for (const uint32_t* p = start; p < limit; p++) { + if (*p == val) return true; + } + return false; +} + +// Validates enum value against range defined by enum mini table. +UPB_INLINE bool upb_MiniTable_Enum_CheckValue(const upb_MiniTable_Enum* e, + uint32_t val) { + _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, val); + if (UPB_UNLIKELY(status == _kUpb_FastEnumCheck_CannotCheckFast)) { + return _upb_MiniTable_CheckEnumValueSlow(e, val); + } + return status == _kUpb_FastEnumCheck_ValueIsInEnum ? true : false; +} + typedef union { const struct upb_MiniTable* submsg; const upb_MiniTable_Enum* subenum; diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 24ac89dfa0..11c60923ee 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -1373,16 +1373,13 @@ void WriteMessage(const protobuf::Descriptor* message, const FileLayout& layout, void WriteEnum(const upb_MiniTable_Enum* mt, const protobuf::EnumDescriptor* e, Output& output) { - std::string values_init = "NULL"; - - if (mt->value_count) { - values_init = EnumInit(e) + "_values"; - output("static const int32_t $0[$1] = {\n", values_init, mt->value_count); - for (int i = 0; i < mt->value_count; i++) { - output(" $0,\n", mt->values[i]); - } - output("};\n\n"); + std::string values_init = "{\n"; + uint32_t value_count = (mt->mask_limit / 32) + mt->value_count; + for (uint32_t i = 0; i < value_count; i++) { + absl::StrAppend(&values_init, " 0x", absl::Hex(mt->data[i]), + ",\n"); } + values_init += " }"; output( R"cc( @@ -1392,8 +1389,7 @@ void WriteEnum(const upb_MiniTable_Enum* mt, const protobuf::EnumDescriptor* e, $3, }; )cc", - EnumInit(e), values_init, absl::StrCat("0x", absl::Hex(mt->mask), "ULL"), - mt->value_count); + EnumInit(e), mt->mask_limit, mt->value_count, values_init); output("\n"); } From 34495f8d68349a2c38779ee22748ae9248f507e4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Sep 2022 12:22:28 -0700 Subject: [PATCH 10/35] Fixed minor type in the docs. PiperOrigin-RevId: 473314519 --- docs/wrapping-upb.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/wrapping-upb.md b/docs/wrapping-upb.md index 2bd35ae447..3c3114947a 100644 --- a/docs/wrapping-upb.md +++ b/docs/wrapping-upb.md @@ -178,7 +178,7 @@ To use reflection-based access: ### MiniTables -MiniTables are a "lite" schema representation that are much smaller that +MiniTables are a "lite" schema representation that are much smaller than reflection. MiniTables omit names, options, and almost everything else from the `.proto` file, retaining only enough information to parse and serialize binary format. From 5485645125ba3783ae2b597bd7b77679721cb1c6 Mon Sep 17 00:00:00 2001 From: Mike Kruskal Date: Fri, 9 Sep 2022 12:58:15 -0700 Subject: [PATCH 11/35] Internal change PiperOrigin-RevId: 473322443 --- upb/def.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/upb/def.c b/upb/def.c index 002faf554d..86616f127f 100644 --- a/upb/def.c +++ b/upb/def.c @@ -2527,12 +2527,6 @@ static int count_bits_debug(uint64_t x) { return n; } -static int compare_int32(const void* a_ptr, const void* b_ptr) { - int32_t a = *(int32_t*)a_ptr; - int32_t b = *(int32_t*)b_ptr; - return a < b ? -1 : (a == b ? 0 : 1); -} - static upb_MiniTable_Enum* create_enumlayout(upb_DefBuilder* ctx, const upb_EnumDef* e) { const char* desc = _upb_EnumDef_MiniDescriptor(e, ctx->tmp_arena); From a8bc4ca4a3cdb6c445ee55bd9e66d0abfc0fe806 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 12 Sep 2022 17:21:13 -0700 Subject: [PATCH 12/35] Fixed Bazel 4.1.0 compatibility, and added a GitHub Action to verify. This will help ensure that we remain compatible with old Bazel versions per the policy here: https://opensource.google/documentation/policies/cplusplus-support PiperOrigin-RevId: 473884285 --- .github/workflows/bazel_tests.yml | 32 ++++++++++++++++++++++--------- bazel/upb_proto_library.bzl | 13 ++++++++++++- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/.github/workflows/bazel_tests.yml b/.github/workflows/bazel_tests.yml index 2f612053f5..b815a54dd3 100644 --- a/.github/workflows/bazel_tests.yml +++ b/.github/workflows/bazel_tests.yml @@ -20,14 +20,19 @@ jobs: fail-fast: false # Don't cancel all jobs if one fails. matrix: include: - - { CC: clang, os: ubuntu-20.04, flags: "" } - - { CC: clang, os: ubuntu-20.04, flags: "-c opt" } # Some warnings only fire with -c opt - - { CC: gcc, os: ubuntu-20.04, flags: "-c opt" } - - { CC: clang, os: ubuntu-20.04, flags: "--//:fasttable_enabled=true -- -cmake:test_generated_files" } - - { CC: clang, os: ubuntu-20.04, flags: "--config=asan -c dbg -- -benchmarks:benchmark -python/..." } - - { CC: clang, os: ubuntu-20.04, flags: "--config=ubsan -c dbg -- -benchmarks:benchmark -python/... -upb/bindings/lua/...", install: "libunwind-dev" } - - { CC: clang, os: ubuntu-20.04, flags: "--copt=-m32 --linkopt=-m32 -- -... benchmarks:benchmark ", install: "g++-multilib" } - - { CC: clang, os: macos-11, flags: "" } + - { NAME: "Fastbuild", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "" } + - { NAME: "Optmized", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "-c opt" } # Some warnings only fire with -c opt + - { NAME: "GCC Optimized", BAZEL: bazel, CC: gcc, os: ubuntu-20.04, flags: "-c opt" } + - { NAME: "FastTable", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--//:fasttable_enabled=true -- -cmake:test_generated_files" } + - { NAME: "ASAN", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--config=asan -c dbg -- -benchmarks:benchmark -python/..." } + - { NAME: "UBSAN", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--config=ubsan -c dbg -- -benchmarks:benchmark -python/... -upb/bindings/lua/...", install: "libunwind-dev" } + - { NAME: "32-bit", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--copt=-m32 --linkopt=-m32 -- -... benchmarks:benchmark ", install: "g++-multilib" } + - { NAME: "macOS", BAZEL: bazel, CC: clang, os: macos-11, flags: "" } + # We support two Bazel versions back per https://opensource.google/documentation/policies/cplusplus-support + - { NAME: "Bazel 4.1.0", BAZEL: bazel-4.1.0-linux-x86_64, CC: clang, os: ubuntu-20.04, flags: "" } + - { NAME: "Bazel 5.3.0", BAZEL: bazel-5.3.0-linux-x86_64, CC: clang, os: ubuntu-20.04, flags: "" } + + name: ${{ matrix.NAME }} steps: - uses: actions/checkout@v2 @@ -37,6 +42,15 @@ jobs: credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }} export_environment_variables: true if: ${{ github.event.pull_request.head.repo.full_name == 'protocolbuffers/upb' }} + - name: Download historical Bazel version + run: | + FILENAME=$HOME/bin/${{ matrix.BAZEL }} + VERSION=$(echo ${{ matrix.BAZEL }} | cut -d- -f 2 ) + mkdir -p $HOME/bin + echo $HOME/bin >> $GITHUB_PATH + wget -O $FILENAME https://github.com/bazelbuild/bazel/releases/download/$VERSION/${{ matrix.BAZEL }} + chmod a+x $FILENAME + if: ${{ matrix.BAZEL != 'bazel' }} - name: Set up Bazel read/write caching run: echo "BAZEL_CACHE_AUTH=--google_default_credentials" >> $GITHUB_ENV if: ${{ github.event.pull_request.head.repo.full_name == 'protocolbuffers/upb' }} @@ -49,4 +63,4 @@ jobs: run: sudo apt update && sudo apt install -y ${{ matrix.install }} if: matrix.install != '' - name: Run tests - run: cd ${{ github.workspace }} && PATH=/tmp/venv/bin:$PATH CC=${{ matrix.CC }} bazel test --test_output=errors $BAZEL_CACHE $BAZEL_CACHE_AUTH ... ${{ matrix.flags }} + run: cd ${{ github.workspace }} && PATH=/tmp/venv/bin:$PATH CC=${{ matrix.CC }} ${{ matrix.BAZEL }} test --test_output=errors $BAZEL_CACHE $BAZEL_CACHE_AUTH ... ${{ matrix.flags }} diff --git a/bazel/upb_proto_library.bzl b/bazel/upb_proto_library.bzl index f837d6f5d3..2bd0e6d662 100644 --- a/bazel/upb_proto_library.bzl +++ b/bazel/upb_proto_library.bzl @@ -29,7 +29,18 @@ """ load("@bazel_skylib//lib:paths.bzl", "paths") -load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain", "use_cpp_toolchain") + +# begin:google_only +# load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain", "use_cpp_toolchain") +# end:google_only + +# begin:github_only +# Compatibility code for Bazel 4.x. Remove this when we drop support for Bazel 4.x. +load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain") + +def use_cpp_toolchain(): + return ["@bazel_tools//tools/cpp:toolchain_type"] +# end:github_only # Generic support code ######################################################### From 2341741bf31bc8f30deff74c7c869b3d1f9b07a7 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 10 Aug 2022 10:25:44 +0200 Subject: [PATCH 13/35] Add upb_Arena_SpaceAllocated function Ref: https://github.com/protocolbuffers/protobuf/pull/10291 Ruby types defined though native extensions should register a function that report their memory footprint in bytes. This feature is used by various memory profiling tools. --- upb/arena.c | 18 ++++++++++++++++++ upb/arena.h | 2 ++ 2 files changed, 20 insertions(+) diff --git a/upb/arena.c b/upb/arena.c index 15e7a6b180..319698a574 100644 --- a/upb/arena.c +++ b/upb/arena.c @@ -69,6 +69,24 @@ static upb_Arena* arena_findroot(upb_Arena* a) { return a; } +size_t upb_Arena_SpaceAllocated(upb_Arena* arena) { + arena = arena_findroot(arena); + size_t memsize = 0; + + mem_block* block = arena->freelist; + + while (block) { + memsize += sizeof(mem_block) + block->size; + block = block->next; + } + + return memsize; +} + +uint32_t upb_Arena_DebugRefCount(upb_Arena* arena) { + return arena_findroot(arena)->refcount; +} + static void upb_Arena_addblock(upb_Arena* a, upb_Arena* root, void* ptr, size_t size) { mem_block* block = ptr; diff --git a/upb/arena.h b/upb/arena.h index f978322db6..5887a84051 100644 --- a/upb/arena.h +++ b/upb/arena.h @@ -72,6 +72,8 @@ void upb_Arena_Free(upb_Arena* a); bool upb_Arena_AddCleanup(upb_Arena* a, void* ud, upb_CleanupFunc* func); bool upb_Arena_Fuse(upb_Arena* a, upb_Arena* b); void* _upb_Arena_SlowMalloc(upb_Arena* a, size_t size); +size_t upb_Arena_SpaceAllocated(upb_Arena* arena); +uint32_t upb_Arena_DebugRefCount(upb_Arena* arena); UPB_INLINE upb_alloc* upb_Arena_Alloc(upb_Arena* a) { return (upb_alloc*)a; } From 830100abd8481eb5dbd49c8990909a2048391a5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?X=C3=B9d=C5=8Dng=20Y=C3=A1ng?= Date: Tue, 13 Sep 2022 17:49:50 +0200 Subject: [PATCH 14/35] update googletest version Fixing https://github.com/bazelbuild/continuous-integration/issues/1404 --- WORKSPACE | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 2748b3285f..6054b018af 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -21,10 +21,13 @@ http_archive( ) http_archive( - name = "com_google_googletest", - urls = ["https://github.com/google/googletest/archive/b6cd405286ed8635ece71c72f118e659f4ade3fb.zip"], # 2019-01-07 - strip_prefix = "googletest-b6cd405286ed8635ece71c72f118e659f4ade3fb", - sha256 = "ff7a82736e158c077e76188232eac77913a15dac0b22508c390ab3f88e6d6d86", + name = "com_google_googletest", + sha256 = "81964fe578e9bd7c94dfdb09c8e4d6e6759e19967e397dbea48d1c10e45d0df2", + strip_prefix = "googletest-release-1.12.1", + urls = [ + "https://mirror.bazel.build/github.com/google/googletest/archive/refs/tags/release-1.12.1.tar.gz", + "https://github.com/google/googletest/archive/refs/tags/release-1.12.1.tar.gz", + ], ) http_archive( From 00765002ff5889f8235b08a0b4d5c4ea5c9990c9 Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Wed, 14 Sep 2022 20:27:24 -0700 Subject: [PATCH 15/35] - All of reflection now lives in upb/reflection/ - Each def type has its own .c file and its own .h file - Functions that require a builder context are declared in def_builder.h - The mini descriptor encoders have also been pulled into upb/reflection/ - upb/def.h, upb/def.hpp, upb/reflection.h, and upb/reflection.hpp are now deprecated stubs that point to the new headers PiperOrigin-RevId: 474459500 --- BUILD | 98 +- upb/def.c | 3323 ----------------- upb/def.h | 390 +- upb/def.hpp | 414 +- upb/mini_table_accessors.c | 6 +- upb/mini_table_accessors_test.cc | 7 +- upb/reflection.h | 82 +- upb/reflection.hpp | 10 +- upb/reflection/common.h | 55 + upb/{mini_descriptor.c => reflection/def.h} | 31 +- upb/reflection/def.hpp | 441 +++ upb/reflection/def_builder.c | 338 ++ upb/reflection/def_builder.h | 174 + upb/reflection/def_builder_test.cc | 85 + upb/reflection/def_pool.c | 437 +++ upb/reflection/def_pool.h | 143 + .../def_type.c} | 47 +- upb/reflection/def_type.h | 81 + upb/reflection/enum_def.c | 247 ++ upb/reflection/enum_def.h | 79 + upb/reflection/enum_value_def.c | 123 + upb/reflection/enum_value_def.h | 67 + upb/reflection/extension_range.c | 94 + upb/reflection/extension_range.h | 65 + upb/reflection/field_def.c | 1215 ++++++ upb/reflection/field_def.h | 102 + upb/reflection/file_def.c | 339 ++ upb/reflection/file_def.h | 89 + upb/{reflection.c => reflection/message.c} | 13 +- upb/reflection/message.h | 106 + upb/reflection/message.hpp | 37 + upb/reflection/message_def.c | 485 +++ upb/reflection/message_def.h | 173 + upb/reflection/method_def.c | 123 + upb/reflection/method_def.h | 68 + .../mini_descriptor_encode.c} | 26 +- .../mini_descriptor_encode.h} | 20 +- upb/reflection/oneof_def.c | 206 + upb/reflection/oneof_def.h | 79 + upb/reflection/service_def.c | 129 + upb/reflection/service_def.h | 68 + upbc/code_generator_request.c | 3 +- upbc/code_generator_request.h | 2 +- upbc/protoc-gen-upbdev.cc | 2 +- 44 files changed, 5825 insertions(+), 4297 deletions(-) delete mode 100644 upb/def.c create mode 100644 upb/reflection/common.h rename upb/{mini_descriptor.c => reflection/def.h} (71%) create mode 100644 upb/reflection/def.hpp create mode 100644 upb/reflection/def_builder.c create mode 100644 upb/reflection/def_builder.h create mode 100644 upb/reflection/def_builder_test.cc create mode 100644 upb/reflection/def_pool.c create mode 100644 upb/reflection/def_pool.h rename upb/{mini_descriptor.h => reflection/def_type.c} (65%) create mode 100644 upb/reflection/def_type.h create mode 100644 upb/reflection/enum_def.c create mode 100644 upb/reflection/enum_def.h create mode 100644 upb/reflection/enum_value_def.c create mode 100644 upb/reflection/enum_value_def.h create mode 100644 upb/reflection/extension_range.c create mode 100644 upb/reflection/extension_range.h create mode 100644 upb/reflection/field_def.c create mode 100644 upb/reflection/field_def.h create mode 100644 upb/reflection/file_def.c create mode 100644 upb/reflection/file_def.h rename upb/{reflection.c => reflection/message.c} (97%) create mode 100644 upb/reflection/message.h create mode 100644 upb/reflection/message.hpp create mode 100644 upb/reflection/message_def.c create mode 100644 upb/reflection/message_def.h create mode 100644 upb/reflection/method_def.c create mode 100644 upb/reflection/method_def.h rename upb/{internal/mini_descriptor.c => reflection/mini_descriptor_encode.c} (89%) rename upb/{internal/mini_descriptor.h => reflection/mini_descriptor_encode.h} (76%) create mode 100644 upb/reflection/oneof_def.c create mode 100644 upb/reflection/oneof_def.h create mode 100644 upb/reflection/service_def.c create mode 100644 upb/reflection/service_def.h diff --git a/BUILD b/BUILD index b8253a37ff..cd515ba430 100644 --- a/BUILD +++ b/BUILD @@ -168,6 +168,7 @@ cc_library( hdrs = [ "upb/msg_internal.h", ], + visibility = ["//:__subpackages__"], deps = [ ":extension_registry", ":port", @@ -333,9 +334,7 @@ cc_library( copts = UPB_DEFAULT_COPTS, visibility = ["//visibility:public"], deps = [ - ":descriptor_upb_proto", ":reflection", - ":table_internal", ], ) @@ -372,22 +371,49 @@ cc_library( ], ) +# TODO(b/232091617): Once we can delete the deprecated forwarding headers +# (= everything in upb/) we can move this build target down into reflection/ cc_library( name = "reflection", srcs = [ - "upb/def.c", - "upb/internal/mini_descriptor.c", - "upb/internal/mini_descriptor.h", - "upb/mini_descriptor.c", - "upb/msg.h", - "upb/reflection.c", + "upb/reflection/common.h", + "upb/reflection/def_builder.c", + "upb/reflection/def_builder.h", + "upb/reflection/def_pool.c", + "upb/reflection/def_pool.h", + "upb/reflection/def_type.c", + "upb/reflection/def_type.h", + "upb/reflection/enum_def.c", + "upb/reflection/enum_def.h", + "upb/reflection/enum_value_def.c", + "upb/reflection/enum_value_def.h", + "upb/reflection/extension_range.c", + "upb/reflection/extension_range.h", + "upb/reflection/field_def.c", + "upb/reflection/field_def.h", + "upb/reflection/file_def.c", + "upb/reflection/file_def.h", + "upb/reflection/message.c", + "upb/reflection/message_def.c", + "upb/reflection/message_def.h", + "upb/reflection/method_def.c", + "upb/reflection/method_def.h", + "upb/reflection/mini_descriptor_encode.c", + "upb/reflection/oneof_def.c", + "upb/reflection/oneof_def.h", + "upb/reflection/service_def.c", + "upb/reflection/service_def.h", ], hdrs = [ "upb/def.h", "upb/def.hpp", - "upb/mini_descriptor.h", "upb/reflection.h", "upb/reflection.hpp", + "upb/reflection/def.h", + "upb/reflection/def.hpp", + "upb/reflection/message.h", + "upb/reflection/message.hpp", + "upb/reflection/mini_descriptor_encode.h", ], copts = UPB_DEFAULT_COPTS, visibility = ["//visibility:public"], @@ -395,6 +421,7 @@ cc_library( ":collections", ":descriptor_upb_proto", ":mini_table", + ":mini_table_internal", ":port", ":table_internal", ":upb", @@ -444,6 +471,25 @@ cc_library( # Tests ######################################################################## +cc_test( + name = "def_builder_test", + srcs = [ + "upb/reflection/common.h", + "upb/reflection/def_builder.h", + "upb/reflection/def_builder_test.cc", + "upb/reflection/def_pool.h", + "upb/reflection/def_type.h", + ], + deps = [ + ":descriptor_upb_proto", + ":port", + ":reflection", + ":table_internal", + ":upb", + "@com_google_googletest//:gtest_main", + ], +) + cc_test( name = "test_generated_code", srcs = ["upb/test_generated_code.cc"], @@ -621,14 +667,14 @@ cc_test( srcs = ["upb/test_cpp.cc"], copts = UPB_DEFAULT_CPPOPTS, deps = [ + ":json", + ":port", + ":reflection", ":test_cpp_upb_proto", ":test_cpp_upb_proto_reflection", ":timestamp_upb_proto", ":timestamp_upb_proto_reflection", - "//:json", - "//:port", - "//:reflection", - "//:upb", + ":upb", "@com_google_googletest//:gtest_main", ], ) @@ -638,9 +684,9 @@ cc_test( srcs = ["upb/test_table.cc"], copts = UPB_DEFAULT_CPPOPTS, deps = [ - "//:port", - "//:table_internal", - "//:upb", + ":port", + ":table_internal", + ":upb", "@com_google_googletest//:gtest_main", ], ) @@ -678,13 +724,13 @@ cc_binary( deps = [ ":conformance_proto_upb", ":conformance_proto_upbdefs", + ":json", + ":port", + ":reflection", ":test_messages_proto2_upbdefs", ":test_messages_proto3_upbdefs", - "//:json", - "//:port", - "//:reflection", - "//:textformat", - "//:upb", + ":textformat", + ":upb", ], ) @@ -719,13 +765,13 @@ cc_binary( deps = [ ":conformance_proto_upb", ":conformance_proto_upbdefs", + ":json", + ":port", + ":reflection", ":test_messages_proto2_upbdefs", ":test_messages_proto3_upbdefs", - "//:json", - "//:port", - "//:reflection", - "//:textformat", - "//:upb", + ":textformat", + ":upb", ], ) diff --git a/upb/def.c b/upb/def.c deleted file mode 100644 index 86616f127f..0000000000 --- a/upb/def.c +++ /dev/null @@ -1,3323 +0,0 @@ -/* - * Copyright (c) 2009-2021, Google LLC - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Google LLC nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "upb/def.h" - -#include -#include -#include -#include - -#include "upb/internal/mini_descriptor.h" -#include "upb/mini_table.h" -#include "upb/reflection.h" - -// Must be last. -#include "upb/port_def.inc" - -typedef struct { - size_t len; - char str[1]; /* Null-terminated string data follows. */ -} str_t; - -/* The upb core does not generally have a concept of default instances. However - * for descriptor options we make an exception since the max size is known and - * modest (<200 bytes). All types can share a default instance since it is - * initialized to zeroes. - * - * We have to allocate an extra pointer for upb's internal metadata. */ -static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; -static const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; - -struct upb_FieldDef { - const google_protobuf_FieldOptions* opts; - const upb_FileDef* file; - const upb_MessageDef* msgdef; - const char* full_name; - const char* json_name; - union { - int64_t sint; - uint64_t uint; - double dbl; - float flt; - bool boolean; - str_t* str; - } defaultval; - union { - const upb_OneofDef* oneof; - const upb_MessageDef* extension_scope; - } scope; - union { - const upb_MessageDef* msgdef; - const upb_EnumDef* enumdef; - const google_protobuf_FieldDescriptorProto* unresolved; - } sub; - uint32_t number_; - uint16_t index_; - uint16_t layout_index; /* Index into msgdef->layout->fields or file->exts */ - bool has_default; - bool is_extension_; - bool packed_; - bool proto3_optional_; - bool has_json_name_; - upb_FieldType type_; - upb_Label label_; -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif -}; - -struct upb_ExtensionRange { - const google_protobuf_ExtensionRangeOptions* opts; - int32_t start; - int32_t end; -}; - -struct upb_MessageDef { - const google_protobuf_MessageOptions* opts; - const upb_MiniTable* layout; - const upb_FileDef* file; - const upb_MessageDef* containing_type; - const char* full_name; - - /* Tables for looking up fields by number and name. */ - upb_inttable itof; - upb_strtable ntof; - - /* All nested defs. - * MEM: We could save some space here by putting nested defs in a contiguous - * region and calculating counts from offsets or vice-versa. */ - const upb_FieldDef* fields; - const upb_OneofDef* oneofs; - const upb_ExtensionRange* ext_ranges; - const upb_MessageDef* nested_msgs; - const upb_EnumDef* nested_enums; - const upb_FieldDef* nested_exts; - int field_count; - int real_oneof_count; - int oneof_count; - int ext_range_count; - int nested_msg_count; - int nested_enum_count; - int nested_ext_count; - bool in_message_set; - upb_WellKnown well_known_type; -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif -}; - -struct upb_EnumDef { - const google_protobuf_EnumOptions* opts; - const upb_MiniTable_Enum* layout; // Only for proto2. - const upb_FileDef* file; - const upb_MessageDef* containing_type; // Could be merged with "file". - const char* full_name; - upb_strtable ntoi; - upb_inttable iton; - const upb_EnumValueDef* values; - int value_count; - int32_t defaultval; - bool is_sorted; // Are all of the values defined in ascending order? -}; - -struct upb_EnumValueDef { - const google_protobuf_EnumValueOptions* opts; - const upb_EnumDef* parent; - const char* full_name; - int32_t number; -}; - -struct upb_OneofDef { - const google_protobuf_OneofOptions* opts; - const upb_MessageDef* parent; - const char* full_name; - int field_count; - bool synthetic; - const upb_FieldDef** fields; - upb_strtable ntof; - upb_inttable itof; -#if UINTPTR_MAX == 0xffffffff - uint32_t padding; // Increase size to a multiple of 8. -#endif -}; - -struct upb_FileDef { - const google_protobuf_FileOptions* opts; - const char* name; - const char* package; - - const upb_FileDef** deps; - const int32_t* public_deps; - const int32_t* weak_deps; - const upb_MessageDef* top_lvl_msgs; - const upb_EnumDef* top_lvl_enums; - const upb_FieldDef* top_lvl_exts; - const upb_ServiceDef* services; - const upb_MiniTable_Extension** ext_layouts; - const upb_DefPool* symtab; - - int dep_count; - int public_dep_count; - int weak_dep_count; - int top_lvl_msg_count; - int top_lvl_enum_count; - int top_lvl_ext_count; - int service_count; - int ext_count; /* All exts in the file. */ - upb_Syntax syntax; -}; - -struct upb_MethodDef { - const google_protobuf_MethodOptions* opts; - upb_ServiceDef* service; - const char* full_name; - const upb_MessageDef* input_type; - const upb_MessageDef* output_type; - int index; - bool client_streaming; - bool server_streaming; -}; - -struct upb_ServiceDef { - const google_protobuf_ServiceOptions* opts; - const upb_FileDef* file; - const char* full_name; - upb_MethodDef* methods; - int method_count; - int index; -}; - -struct upb_DefPool { - upb_Arena* arena; - upb_strtable syms; /* full_name -> packed def ptr */ - upb_strtable files; /* file_name -> upb_FileDef* */ - upb_inttable exts; /* upb_MiniTable_Extension* -> upb_FieldDef* */ - upb_ExtensionRegistry* extreg; - size_t bytes_loaded; -}; - -/* Inside a symtab we store tagged pointers to specific def types. */ -typedef enum { - UPB_DEFTYPE_MASK = 7, - - /* Only inside symtab table. */ - UPB_DEFTYPE_EXT = 0, - UPB_DEFTYPE_MSG = 1, - UPB_DEFTYPE_ENUM = 2, - UPB_DEFTYPE_ENUMVAL = 3, - UPB_DEFTYPE_SERVICE = 4, - - /* Only inside message table. */ - UPB_DEFTYPE_FIELD = 0, - UPB_DEFTYPE_ONEOF = 1, - UPB_DEFTYPE_FIELD_JSONNAME = 2, -} upb_deftype_t; - -#define FIELD_TYPE_UNSPECIFIED 0 - -static upb_deftype_t deftype(upb_value v) { - uintptr_t num = (uintptr_t)upb_value_getconstptr(v); - return num & UPB_DEFTYPE_MASK; -} - -static const void* _upb_DefUtil_Unpack(upb_value v, upb_deftype_t type) { - uintptr_t num = (uintptr_t)upb_value_getconstptr(v); - return (num & UPB_DEFTYPE_MASK) == type - ? (const void*)(num & ~UPB_DEFTYPE_MASK) - : NULL; -} - -static upb_value _upb_DefUtil_Pack(const void* ptr, upb_deftype_t type, - size_t size) { - // Our 3-bit pointer tagging requires all pointers to be multiples of 8. - // The arena will always yield 8-byte-aligned addresses, however we put - // the defs into arrays. For each element in the array to be 8-byte-aligned, - // the sizes of each def type must also be a multiple of 8. - // - // If any of these asserts fail, we need to add or remove padding on 32-bit - // machines (64-bit machines will have 8-byte alignment already due to - // pointers, which all of these structs have). - UPB_ASSERT((size & UPB_DEFTYPE_MASK) == 0); - uintptr_t num = (uintptr_t)ptr; - UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0); - num |= type; - return upb_value_constptr((const void*)num); -} - -/* isalpha() etc. from are locale-dependent, which we don't want. */ -static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { - return c >= low && c <= high; -} - -static char upb_ascii_lower(char ch) { - // Per ASCII this will lower-case a letter. If the result is a letter, the - // input was definitely a letter. If the output is not a letter, this may - // have transformed the character unpredictably. - return ch | 0x20; -} - -static bool upb_isletter(char c) { - char lower = upb_ascii_lower(c); - return upb_isbetween(lower, 'a', 'z') || c == '_'; -} - -static bool upb_isalphanum(char c) { - return upb_isletter(c) || upb_isbetween(c, '0', '9'); -} - -static const char* _upb_DefUtil_FullToShort(const char* fullname) { - const char* p; - - if (fullname == NULL) { - return NULL; - } else if ((p = strrchr(fullname, '.')) == NULL) { - /* No '.' in the name, return the full string. */ - return fullname; - } else { - /* Return one past the last '.'. */ - return p + 1; - } -} - -/* All submessage fields are lower than all other fields. - * Secondly, fields are increasing in order. */ -uint32_t field_rank(const upb_FieldDef* f) { - uint32_t ret = upb_FieldDef_Number(f); - const uint32_t high_bit = 1 << 30; - UPB_ASSERT(ret < high_bit); - if (!upb_FieldDef_IsSubMessage(f)) ret |= high_bit; - return ret; -} - -static int cmp_values(const void* a, const void* b) { - const uint32_t A = upb_EnumValueDef_Number(*(const upb_EnumValueDef**)a); - const uint32_t B = upb_EnumValueDef_Number(*(const upb_EnumValueDef**)b); - return (A < B) ? -1 : (A > B); -} - -static void assign_msg_wellknowntype(upb_MessageDef* m) { - const char* name = upb_MessageDef_FullName(m); - if (name == NULL) { - m->well_known_type = kUpb_WellKnown_Unspecified; - return; - } - if (!strcmp(name, "google.protobuf.Any")) { - m->well_known_type = kUpb_WellKnown_Any; - } else if (!strcmp(name, "google.protobuf.FieldMask")) { - m->well_known_type = kUpb_WellKnown_FieldMask; - } else if (!strcmp(name, "google.protobuf.Duration")) { - m->well_known_type = kUpb_WellKnown_Duration; - } else if (!strcmp(name, "google.protobuf.Timestamp")) { - m->well_known_type = kUpb_WellKnown_Timestamp; - } else if (!strcmp(name, "google.protobuf.DoubleValue")) { - m->well_known_type = kUpb_WellKnown_DoubleValue; - } else if (!strcmp(name, "google.protobuf.FloatValue")) { - m->well_known_type = kUpb_WellKnown_FloatValue; - } else if (!strcmp(name, "google.protobuf.Int64Value")) { - m->well_known_type = kUpb_WellKnown_Int64Value; - } else if (!strcmp(name, "google.protobuf.UInt64Value")) { - m->well_known_type = kUpb_WellKnown_UInt64Value; - } else if (!strcmp(name, "google.protobuf.Int32Value")) { - m->well_known_type = kUpb_WellKnown_Int32Value; - } else if (!strcmp(name, "google.protobuf.UInt32Value")) { - m->well_known_type = kUpb_WellKnown_UInt32Value; - } else if (!strcmp(name, "google.protobuf.BoolValue")) { - m->well_known_type = kUpb_WellKnown_BoolValue; - } else if (!strcmp(name, "google.protobuf.StringValue")) { - m->well_known_type = kUpb_WellKnown_StringValue; - } else if (!strcmp(name, "google.protobuf.BytesValue")) { - m->well_known_type = kUpb_WellKnown_BytesValue; - } else if (!strcmp(name, "google.protobuf.Value")) { - m->well_known_type = kUpb_WellKnown_Value; - } else if (!strcmp(name, "google.protobuf.ListValue")) { - m->well_known_type = kUpb_WellKnown_ListValue; - } else if (!strcmp(name, "google.protobuf.Struct")) { - m->well_known_type = kUpb_WellKnown_Struct; - } else { - m->well_known_type = kUpb_WellKnown_Unspecified; - } -} - -typedef struct { - upb_DefPool* symtab; - upb_FileDef* file; /* File we are building. */ - upb_Arena* arena; /* Allocate defs here. */ - upb_Arena* tmp_arena; /* For temporary allocations. */ - upb_Status* status; /* Record errors here. */ - const upb_MiniTable_File* layout; /* NULL if we should build layouts. */ - int enum_count; /* Count of enums built so far. */ - int msg_count; /* Count of messages built so far. */ - int ext_count; /* Count of extensions built so far. */ - jmp_buf err; /* longjmp() on error. */ -} upb_DefBuilder; - -UPB_NORETURN UPB_NOINLINE UPB_PRINTF(2, 3) static void _upb_DefBuilder_Errf( - upb_DefBuilder* ctx, const char* fmt, ...) { - va_list argp; - va_start(argp, fmt); - upb_Status_VSetErrorFormat(ctx->status, fmt, argp); - va_end(argp); - UPB_LONGJMP(ctx->err, 1); -} - -UPB_NORETURN UPB_NOINLINE static void _upb_DefBuilder_OomErr( - upb_DefBuilder* ctx) { - upb_Status_SetErrorMessage(ctx->status, "out of memory"); - UPB_LONGJMP(ctx->err, 1); -} - -void* _upb_DefBuilder_Alloc(upb_DefBuilder* ctx, size_t bytes) { - if (bytes == 0) return NULL; - void* ret = upb_Arena_Malloc(ctx->arena, bytes); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; -} - -/* upb_EnumDef ****************************************************************/ - -const google_protobuf_EnumOptions* upb_EnumDef_Options(const upb_EnumDef* e) { - return e->opts; -} - -bool upb_EnumDef_HasOptions(const upb_EnumDef* e) { - return e->opts != (void*)kUpbDefOptDefault; -} - -const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; } - -const char* upb_EnumDef_Name(const upb_EnumDef* e) { - return _upb_DefUtil_FullToShort(e->full_name); -} - -const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; } - -const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) { - return e->containing_type; -} - -int32_t upb_EnumDef_Default(const upb_EnumDef* e) { - UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval)); - return e->defaultval; -} - -int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; } - -const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize( - const upb_EnumDef* def, const char* name, size_t len) { - upb_value v; - return upb_strtable_lookup2(&def->ntoi, name, len, &v) - ? upb_value_getconstptr(v) - : NULL; -} - -const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* def, - int32_t num) { - upb_value v; - return upb_inttable_lookup(&def->iton, num, &v) ? upb_value_getconstptr(v) - : NULL; -} - -bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) { - // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect - // this to be faster (especially for small numbers). - return upb_MiniTable_Enum_CheckValue(e->layout, num); -} - -const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) { - UPB_ASSERT(0 <= i && i < e->value_count); - return &e->values[i]; -} - -const char* _upb_EnumDef_MiniDescriptor(const upb_EnumDef* e, upb_Arena* a) { - if (e->is_sorted) return _upb_MiniDescriptor_EncodeEnum(e, NULL, a); - - const upb_EnumValueDef** sorted = (const upb_EnumValueDef**)upb_Arena_Malloc( - a, e->value_count * sizeof(void*)); - if (!sorted) return NULL; - - for (size_t i = 0; i < e->value_count; i++) { - sorted[i] = upb_EnumDef_Value(e, i); - } - qsort(sorted, e->value_count, sizeof(void*), cmp_values); - - return _upb_MiniDescriptor_EncodeEnum(e, sorted, a); -} - -/* upb_EnumValueDef ***********************************************************/ - -const google_protobuf_EnumValueOptions* upb_EnumValueDef_Options( - const upb_EnumValueDef* e) { - return e->opts; -} - -bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* e) { - return e->opts != (void*)kUpbDefOptDefault; -} - -const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* ev) { - return ev->parent; -} - -const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* ev) { - return ev->full_name; -} - -const char* upb_EnumValueDef_Name(const upb_EnumValueDef* ev) { - return _upb_DefUtil_FullToShort(ev->full_name); -} - -int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* ev) { - return ev->number; -} - -uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* ev) { - // Compute index in our parent's array. - return ev - ev->parent->values; -} - -/* upb_ExtensionRange *********************************************************/ - -const google_protobuf_ExtensionRangeOptions* upb_ExtensionRange_Options( - const upb_ExtensionRange* r) { - return r->opts; -} - -bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) { - return r->opts != (void*)kUpbDefOptDefault; -} - -int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* e) { - return e->start; -} - -int32_t upb_ExtensionRange_End(const upb_ExtensionRange* e) { return e->end; } - -// Allocate sufficient storage to contain an array of |n| extension ranges. -static upb_ExtensionRange* _upb_ExtensionRange_Alloc(upb_DefBuilder* ctx, - int n) { - return _upb_DefBuilder_Alloc(ctx, sizeof(upb_ExtensionRange) * n); -} - -/* upb_FieldDef ***************************************************************/ - -const google_protobuf_FieldOptions* upb_FieldDef_Options( - const upb_FieldDef* f) { - return f->opts; -} - -bool upb_FieldDef_HasOptions(const upb_FieldDef* f) { - return f->opts != (void*)kUpbDefOptDefault; -} - -const char* upb_FieldDef_FullName(const upb_FieldDef* f) { - return f->full_name; -} - -upb_CType upb_FieldDef_CType(const upb_FieldDef* f) { - switch (f->type_) { - case kUpb_FieldType_Double: - return kUpb_CType_Double; - case kUpb_FieldType_Float: - return kUpb_CType_Float; - case kUpb_FieldType_Int64: - case kUpb_FieldType_SInt64: - case kUpb_FieldType_SFixed64: - return kUpb_CType_Int64; - case kUpb_FieldType_Int32: - case kUpb_FieldType_SFixed32: - case kUpb_FieldType_SInt32: - return kUpb_CType_Int32; - case kUpb_FieldType_UInt64: - case kUpb_FieldType_Fixed64: - return kUpb_CType_UInt64; - case kUpb_FieldType_UInt32: - case kUpb_FieldType_Fixed32: - return kUpb_CType_UInt32; - case kUpb_FieldType_Enum: - return kUpb_CType_Enum; - case kUpb_FieldType_Bool: - return kUpb_CType_Bool; - case kUpb_FieldType_String: - return kUpb_CType_String; - case kUpb_FieldType_Bytes: - return kUpb_CType_Bytes; - case kUpb_FieldType_Group: - case kUpb_FieldType_Message: - return kUpb_CType_Message; - } - UPB_UNREACHABLE(); -} - -upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; } - -uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; } - -upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; } - -uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; } - -bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { - return f->is_extension_; -} - -bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->packed_; } - -const char* upb_FieldDef_Name(const upb_FieldDef* f) { - return _upb_DefUtil_FullToShort(f->full_name); -} - -const char* upb_FieldDef_JsonName(const upb_FieldDef* f) { - return f->json_name; -} - -bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) { - return f->has_json_name_; -} - -const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; } - -const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) { - return f->msgdef; -} - -const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) { - return f->is_extension_ ? f->scope.extension_scope : NULL; -} - -const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) { - return f->is_extension_ ? NULL : f->scope.oneof; -} - -const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) { - const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f); - if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL; - return oneof; -} - -upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) { - UPB_ASSERT(!upb_FieldDef_IsSubMessage(f)); - upb_MessageValue ret; - - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Bool: - return (upb_MessageValue){.bool_val = f->defaultval.boolean}; - case kUpb_CType_Int64: - return (upb_MessageValue){.int64_val = f->defaultval.sint}; - case kUpb_CType_UInt64: - return (upb_MessageValue){.uint64_val = f->defaultval.uint}; - case kUpb_CType_Enum: - case kUpb_CType_Int32: - return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint}; - case kUpb_CType_UInt32: - return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint}; - case kUpb_CType_Float: - return (upb_MessageValue){.float_val = f->defaultval.flt}; - case kUpb_CType_Double: - return (upb_MessageValue){.double_val = f->defaultval.dbl}; - case kUpb_CType_String: - case kUpb_CType_Bytes: { - str_t* str = f->defaultval.str; - if (str) { - return (upb_MessageValue){ - .str_val = (upb_StringView){.data = str->str, .size = str->len}}; - } else { - return (upb_MessageValue){ - .str_val = (upb_StringView){.data = NULL, .size = 0}}; - } - } - default: - UPB_UNREACHABLE(); - } - - return ret; -} - -const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL; -} - -const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL; -} - -const upb_MiniTable_Field* upb_FieldDef_MiniTable(const upb_FieldDef* f) { - UPB_ASSERT(!upb_FieldDef_IsExtension(f)); - const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef); - return &layout->fields[f->layout_index]; -} - -const upb_MiniTable_Extension* _upb_FieldDef_ExtensionMiniTable( - const upb_FieldDef* f) { - UPB_ASSERT(upb_FieldDef_IsExtension(f)); - const upb_FileDef* file = upb_FieldDef_File(f); - return file->ext_layouts[f->layout_index]; -} - -bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) { - return f->proto3_optional_; -} - -bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Message; -} - -bool upb_FieldDef_IsString(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_String || - upb_FieldDef_CType(f) == kUpb_CType_Bytes; -} - -bool upb_FieldDef_IsOptional(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Optional; -} - -bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Required; -} - -bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Repeated; -} - -bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) { - return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f); -} - -bool upb_FieldDef_IsMap(const upb_FieldDef* f) { - return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) && - upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f)); -} - -bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; } - -bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) { - return upb_FieldDef_IsSubMessage(f) || - upb_FieldDef_CType(f) == kUpb_CType_Enum; -} - -bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { - if (upb_FieldDef_IsRepeated(f)) return false; - const upb_FileDef* file = upb_FieldDef_File(f); - return upb_FieldDef_IsSubMessage(f) || upb_FieldDef_ContainingOneof(f) || - upb_FileDef_Syntax(file) == kUpb_Syntax_Proto2; -} - -static bool between(int32_t x, int32_t low, int32_t high) { - return x >= low && x <= high; -} - -bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); } -bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); } -bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } - -bool upb_FieldDef_checkdescriptortype(int32_t type) { - return between(type, 1, 18); -} - -// Allocate sufficient storage to contain an array of |n| field defs. -static upb_FieldDef* _upb_FieldDef_Alloc(upb_DefBuilder* ctx, int n) { - return _upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); -} - -/* upb_MessageDef *************************************************************/ - -const google_protobuf_MessageOptions* upb_MessageDef_Options( - const upb_MessageDef* m) { - return m->opts; -} - -bool upb_MessageDef_HasOptions(const upb_MessageDef* m) { - return m->opts != (void*)kUpbDefOptDefault; -} - -const char* upb_MessageDef_FullName(const upb_MessageDef* m) { - return m->full_name; -} - -const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) { - return m->file; -} - -const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) { - return m->containing_type; -} - -const char* upb_MessageDef_Name(const upb_MessageDef* m) { - return _upb_DefUtil_FullToShort(m->full_name); -} - -upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) { - return upb_FileDef_Syntax(m->file); -} - -const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, - uint32_t i) { - upb_value val; - return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) - : NULL; -} - -const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize( - const upb_MessageDef* m, const char* name, size_t len) { - upb_value val; - - if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { - return NULL; - } - - return _upb_DefUtil_Unpack(val, UPB_DEFTYPE_FIELD); -} - -const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize( - const upb_MessageDef* m, const char* name, size_t len) { - upb_value val; - - if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { - return NULL; - } - - return _upb_DefUtil_Unpack(val, UPB_DEFTYPE_ONEOF); -} - -bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m, - const char* name, size_t len, - const upb_FieldDef** out_f, - const upb_OneofDef** out_o) { - upb_value val; - - if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { - return false; - } - - const upb_FieldDef* f = _upb_DefUtil_Unpack(val, UPB_DEFTYPE_FIELD); - const upb_OneofDef* o = _upb_DefUtil_Unpack(val, UPB_DEFTYPE_ONEOF); - if (out_f) *out_f = f; - if (out_o) *out_o = o; - return f || o; /* False if this was a JSON name. */ -} - -const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize( - const upb_MessageDef* m, const char* name, size_t len) { - upb_value val; - const upb_FieldDef* f; - - if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { - return NULL; - } - - f = _upb_DefUtil_Unpack(val, UPB_DEFTYPE_FIELD); - if (!f) f = _upb_DefUtil_Unpack(val, UPB_DEFTYPE_FIELD_JSONNAME); - - return f; -} - -int upb_MessageDef_numfields(const upb_MessageDef* m) { return m->field_count; } - -int upb_MessageDef_numoneofs(const upb_MessageDef* m) { return m->oneof_count; } - -int upb_MessageDef_numrealoneofs(const upb_MessageDef* m) { - return m->real_oneof_count; -} - -int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) { - return m->ext_range_count; -} - -int upb_MessageDef_FieldCount(const upb_MessageDef* m) { - return m->field_count; -} - -int upb_MessageDef_OneofCount(const upb_MessageDef* m) { - return m->oneof_count; -} - -int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) { - return m->nested_msg_count; -} - -int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) { - return m->nested_enum_count; -} - -int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) { - return m->nested_ext_count; -} - -int upb_MessageDef_realoneofcount(const upb_MessageDef* m) { - return m->real_oneof_count; -} - -const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) { - return m->layout; -} - -const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->ext_range_count); - return &m->ext_ranges[i]; -} - -const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->field_count); - return &m->fields[i]; -} - -const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->oneof_count); - return &m->oneofs[i]; -} - -const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->nested_msg_count); - return &m->nested_msgs[i]; -} - -const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) { - UPB_ASSERT(0 <= i && i < m->nested_enum_count); - return &m->nested_enums[i]; -} - -const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m, - int i) { - UPB_ASSERT(0 <= i && i < m->nested_ext_count); - return &m->nested_exts[i]; -} - -upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) { - return m->well_known_type; -} - -/* upb_OneofDef ***************************************************************/ - -const google_protobuf_OneofOptions* upb_OneofDef_Options( - const upb_OneofDef* o) { - return o->opts; -} - -bool upb_OneofDef_HasOptions(const upb_OneofDef* o) { - return o->opts != (void*)kUpbDefOptDefault; -} - -const char* upb_OneofDef_Name(const upb_OneofDef* o) { - return _upb_DefUtil_FullToShort(o->full_name); -} - -const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) { - return o->parent; -} - -int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; } - -const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) { - UPB_ASSERT(i < o->field_count); - return o->fields[i]; -} - -int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; } - -uint32_t upb_OneofDef_Index(const upb_OneofDef* o) { - // Compute index in our parent's array. - return o - o->parent->oneofs; -} - -bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; } - -const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o, - const char* name, - size_t length) { - upb_value val; - return upb_strtable_lookup2(&o->ntof, name, length, &val) - ? upb_value_getptr(val) - : NULL; -} - -const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o, - uint32_t num) { - upb_value val; - return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) - : NULL; -} - -/* upb_FileDef ****************************************************************/ - -const google_protobuf_FileOptions* upb_FileDef_Options(const upb_FileDef* f) { - return f->opts; -} - -bool upb_FileDef_HasOptions(const upb_FileDef* f) { - return f->opts != (void*)kUpbDefOptDefault; -} - -const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; } - -const char* upb_FileDef_Package(const upb_FileDef* f) { - return f->package ? f->package : ""; -} - -upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; } - -int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) { - return f->top_lvl_msg_count; -} - -int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; } - -int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) { - return f->public_dep_count; -} - -int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) { - return f->weak_dep_count; -} - -const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) { - return f->public_deps; -} - -const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) { - return f->weak_deps; -} - -int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) { - return f->top_lvl_enum_count; -} - -int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) { - return f->top_lvl_ext_count; -} - -int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; } - -const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->dep_count); - return f->deps[i]; -} - -const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->public_dep_count); - return f->deps[f->public_deps[i]]; -} - -const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->public_dep_count); - return f->deps[f->weak_deps[i]]; -} - -const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count); - return &f->top_lvl_msgs[i]; -} - -const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count); - return &f->top_lvl_enums[i]; -} - -const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count); - return &f->top_lvl_exts[i]; -} - -const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) { - UPB_ASSERT(0 <= i && i < f->service_count); - return &f->services[i]; -} - -const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; } - -/* upb_MethodDef **************************************************************/ - -const google_protobuf_MethodOptions* upb_MethodDef_Options(const upb_MethodDef* m) { - return m->opts; -} - -bool upb_MethodDef_HasOptions(const upb_MethodDef* m) { - return m->opts != (void*)kUpbDefOptDefault; -} - -const char* upb_MethodDef_FullName(const upb_MethodDef* m) { - return m->full_name; -} - -int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; } - -const char* upb_MethodDef_Name(const upb_MethodDef* m) { - return _upb_DefUtil_FullToShort(m->full_name); -} - -const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) { - return m->service; -} - -const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) { - return m->input_type; -} - -const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) { - return m->output_type; -} - -bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) { - return m->client_streaming; -} - -bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) { - return m->server_streaming; -} - -/* upb_ServiceDef *************************************************************/ - -const google_protobuf_ServiceOptions* upb_ServiceDef_Options(const upb_ServiceDef* s) { - return s->opts; -} - -bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) { - return s->opts != (void*)kUpbDefOptDefault; -} - -const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) { - return s->full_name; -} - -const char* upb_ServiceDef_Name(const upb_ServiceDef* s) { - return _upb_DefUtil_FullToShort(s->full_name); -} - -int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; } - -const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) { - return s->file; -} - -int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) { - return s->method_count; -} - -const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) { - return (i < 0 || i >= s->method_count) ? NULL : &s->methods[i]; -} - -const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s, - const char* name) { - for (int i = 0; i < s->method_count; i++) { - if (strcmp(name, upb_MethodDef_Name(&s->methods[i])) == 0) { - return &s->methods[i]; - } - } - return NULL; -} - -/* upb_DefPool ****************************************************************/ - -void upb_DefPool_Free(upb_DefPool* s) { - upb_Arena_Free(s->arena); - upb_gfree(s); -} - -upb_DefPool* upb_DefPool_New(void) { - upb_DefPool* s = upb_gmalloc(sizeof(*s)); - if (!s) return NULL; - - s->arena = upb_Arena_New(); - s->bytes_loaded = 0; - - if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; - if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; - if (!upb_inttable_init(&s->exts, s->arena)) goto err; - - s->extreg = upb_ExtensionRegistry_New(s->arena); - if (!s->extreg) goto err; - - return s; - -err: - upb_Arena_Free(s->arena); - upb_gfree(s); - return NULL; -} - -static const void* symtab_lookup(const upb_DefPool* s, const char* sym, - upb_deftype_t type) { - upb_value v; - return upb_strtable_lookup(&s->syms, sym, &v) ? _upb_DefUtil_Unpack(v, type) - : NULL; -} - -static const void* symtab_lookup2(const upb_DefPool* s, const char* sym, - size_t size, upb_deftype_t type) { - upb_value v; - return upb_strtable_lookup2(&s->syms, sym, size, &v) - ? _upb_DefUtil_Unpack(v, type) - : NULL; -} - -const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, - const char* sym) { - return symtab_lookup(s, sym, UPB_DEFTYPE_MSG); -} - -const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( - const upb_DefPool* s, const char* sym, size_t len) { - return symtab_lookup2(s, sym, len, UPB_DEFTYPE_MSG); -} - -const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, - const char* sym) { - return symtab_lookup(s, sym, UPB_DEFTYPE_ENUM); -} - -const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, - const char* sym) { - return symtab_lookup(s, sym, UPB_DEFTYPE_ENUMVAL); -} - -const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, - const char* name) { - upb_value v; - return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) - : NULL; -} - -const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s, - const char* name, - size_t len) { - upb_value v; - return upb_strtable_lookup2(&s->files, name, len, &v) - ? upb_value_getconstptr(v) - : NULL; -} - -const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize( - const upb_DefPool* s, const char* name, size_t size) { - upb_value v; - if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL; - - switch (deftype(v)) { - case UPB_DEFTYPE_FIELD: - return _upb_DefUtil_Unpack(v, UPB_DEFTYPE_FIELD); - case UPB_DEFTYPE_MSG: { - const upb_MessageDef* m = _upb_DefUtil_Unpack(v, UPB_DEFTYPE_MSG); - return m->in_message_set ? &m->nested_exts[0] : NULL; - } - default: - break; - } - - return NULL; -} - -const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s, - const char* sym) { - return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym)); -} - -const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s, - const char* name) { - return symtab_lookup(s, name, UPB_DEFTYPE_SERVICE); -} - -const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize( - const upb_DefPool* s, const char* name, size_t size) { - return symtab_lookup2(s, name, size, UPB_DEFTYPE_SERVICE); -} - -const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s, - const char* name) { - upb_value v; - // TODO(haberman): non-extension fields and oneofs. - if (upb_strtable_lookup(&s->syms, name, &v)) { - switch (deftype(v)) { - case UPB_DEFTYPE_EXT: { - const upb_FieldDef* f = _upb_DefUtil_Unpack(v, UPB_DEFTYPE_EXT); - return upb_FieldDef_File(f); - } - case UPB_DEFTYPE_MSG: { - const upb_MessageDef* m = _upb_DefUtil_Unpack(v, UPB_DEFTYPE_MSG); - return upb_MessageDef_File(m); - } - case UPB_DEFTYPE_ENUM: { - const upb_EnumDef* e = _upb_DefUtil_Unpack(v, UPB_DEFTYPE_ENUM); - return upb_EnumDef_File(e); - } - case UPB_DEFTYPE_ENUMVAL: { - const upb_EnumValueDef* ev = - _upb_DefUtil_Unpack(v, UPB_DEFTYPE_ENUMVAL); - return upb_EnumDef_File(upb_EnumValueDef_Enum(ev)); - } - case UPB_DEFTYPE_SERVICE: { - const upb_ServiceDef* service = - _upb_DefUtil_Unpack(v, UPB_DEFTYPE_SERVICE); - return upb_ServiceDef_File(service); - } - default: - UPB_UNREACHABLE(); - } - } - - const char* last_dot = strrchr(name, '.'); - if (last_dot) { - const upb_MessageDef* parent = - upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name); - if (parent) { - const char* shortname = last_dot + 1; - if (upb_MessageDef_FindByNameWithSize(parent, shortname, - strlen(shortname), NULL, NULL)) { - return upb_MessageDef_File(parent); - } - } - } - - return NULL; -} - -/* Code to build defs from descriptor protos. *********************************/ - -/* There is a question of how much validation to do here. It will be difficult - * to perfectly match the amount of validation performed by proto2. But since - * this code is used to directly build defs from Ruby (for example) we do need - * to validate important constraints like uniqueness of names and numbers. */ - -// We want to copy the options verbatim into the destination options proto. -// We use serialize+parse as our deep copy. -#define SET_OPTIONS(target, desc_type, options_type, proto) \ - if (google_protobuf_##desc_type##_has_options(proto)) { \ - size_t size; \ - char* pb = google_protobuf_##options_type##_serialize( \ - google_protobuf_##desc_type##_options(proto), ctx->tmp_arena, &size); \ - if (!pb) _upb_DefBuilder_OomErr(ctx); \ - target = google_protobuf_##options_type##_parse(pb, size, ctx->arena); \ - if (!target) _upb_DefBuilder_OomErr(ctx); \ - } else { \ - target = (const google_protobuf_##options_type*)kUpbDefOptDefault; \ - } - -static void _upb_DefBuilder_CheckIdent(upb_DefBuilder* ctx, upb_StringView name, - bool full) { - const char* str = name.data; - size_t len = name.size; - bool start = true; - size_t i; - for (i = 0; i < len; i++) { - char c = str[i]; - if (c == '.') { - if (start || !full) { - _upb_DefBuilder_Errf(ctx, "invalid name: unexpected '.' (%.*s)", - (int)len, str); - } - start = true; - } else if (start) { - if (!upb_isletter(c)) { - _upb_DefBuilder_Errf( - ctx, - "invalid name: path components must start with a letter (%.*s)", - (int)len, str); - } - start = false; - } else { - if (!upb_isalphanum(c)) { - _upb_DefBuilder_Errf(ctx, - "invalid name: non-alphanumeric character (%.*s)", - (int)len, str); - } - } - } - if (start) { - _upb_DefBuilder_Errf(ctx, "invalid name: empty part (%.*s)", (int)len, str); - } -} - -static size_t div_round_up(size_t n, size_t d) { return (n + d - 1) / d; } - -static size_t upb_MessageValue_sizeof(upb_CType type) { - switch (type) { - case kUpb_CType_Double: - case kUpb_CType_Int64: - case kUpb_CType_UInt64: - return 8; - case kUpb_CType_Enum: - case kUpb_CType_Int32: - case kUpb_CType_UInt32: - case kUpb_CType_Float: - return 4; - case kUpb_CType_Bool: - return 1; - case kUpb_CType_Message: - return sizeof(void*); - case kUpb_CType_Bytes: - case kUpb_CType_String: - return sizeof(upb_StringView); - } - UPB_UNREACHABLE(); -} - -static uint8_t upb_msg_fielddefsize(const upb_FieldDef* f) { - if (upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f))) { - upb_MapEntry ent; - UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v)); - return sizeof(ent.k); - } else if (upb_FieldDef_IsRepeated(f)) { - return sizeof(void*); - } else { - return upb_MessageValue_sizeof(upb_FieldDef_CType(f)); - } -} - -static uint32_t upb_MiniTable_place(upb_DefBuilder* ctx, upb_MiniTable* l, - size_t size, const upb_MessageDef* m) { - size_t ofs = UPB_ALIGN_UP(l->size, size); - size_t next = ofs + size; - - if (next > UINT16_MAX) { - _upb_DefBuilder_Errf(ctx, - "size of message %s exceeded max size of %zu bytes", - upb_MessageDef_FullName(m), (size_t)UINT16_MAX); - } - - l->size = next; - return ofs; -} - -static int field_number_cmp(const void* p1, const void* p2) { - const upb_MiniTable_Field* f1 = p1; - const upb_MiniTable_Field* f2 = p2; - return f1->number - f2->number; -} - -static void assign_layout_indices(const upb_MessageDef* m, upb_MiniTable* l, - upb_MiniTable_Field* fields) { - int i; - int n = upb_MessageDef_numfields(m); - int dense_below = 0; - for (i = 0; i < n; i++) { - upb_FieldDef* f = - (upb_FieldDef*)upb_MessageDef_FindFieldByNumber(m, fields[i].number); - UPB_ASSERT(f); - f->layout_index = i; - if (i < UINT8_MAX && fields[i].number == i + 1 && - (i == 0 || fields[i - 1].number == i)) { - dense_below = i + 1; - } - } - l->dense_below = dense_below; -} - -static uint8_t map_descriptortype(const upb_FieldDef* f) { - uint8_t type = upb_FieldDef_Type(f); - /* See TableDescriptorType() in upbc/generator.cc for details and - * rationale of these exceptions. */ - if (type == kUpb_FieldType_String) { - const upb_FileDef* file = upb_FieldDef_File(f); - const upb_Syntax syntax = upb_FileDef_Syntax(file); - - if (syntax == kUpb_Syntax_Proto2) return kUpb_FieldType_Bytes; - } else if (type == kUpb_FieldType_Enum) { - const upb_FileDef* file = upb_EnumDef_File(f->sub.enumdef); - const upb_Syntax syntax = upb_FileDef_Syntax(file); - - if (syntax == kUpb_Syntax_Proto3 || UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 || - // TODO(https://github.com/protocolbuffers/upb/issues/541): - // fix map enum values to check for unknown enum values and put - // them in the unknown field set. - upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f))) { - return kUpb_FieldType_Int32; - } - } - return type; -} - -static void fill_fieldlayout(upb_MiniTable_Field* field, - const upb_FieldDef* f) { - field->number = upb_FieldDef_Number(f); - field->descriptortype = map_descriptortype(f); - - if (upb_FieldDef_IsMap(f)) { - field->mode = - kUpb_FieldMode_Map | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift); - } else if (upb_FieldDef_IsRepeated(f)) { - field->mode = - kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift); - } else { - /* Maps descriptor type -> elem_size_lg2. */ - static const uint8_t sizes[] = { - -1, /* invalid descriptor type */ - kUpb_FieldRep_8Byte, /* DOUBLE */ - kUpb_FieldRep_4Byte, /* FLOAT */ - kUpb_FieldRep_8Byte, /* INT64 */ - kUpb_FieldRep_8Byte, /* UINT64 */ - kUpb_FieldRep_4Byte, /* INT32 */ - kUpb_FieldRep_8Byte, /* FIXED64 */ - kUpb_FieldRep_4Byte, /* FIXED32 */ - kUpb_FieldRep_1Byte, /* BOOL */ - kUpb_FieldRep_StringView, /* STRING */ - kUpb_FieldRep_Pointer, /* GROUP */ - kUpb_FieldRep_Pointer, /* MESSAGE */ - kUpb_FieldRep_StringView, /* BYTES */ - kUpb_FieldRep_4Byte, /* UINT32 */ - kUpb_FieldRep_4Byte, /* ENUM */ - kUpb_FieldRep_4Byte, /* SFIXED32 */ - kUpb_FieldRep_8Byte, /* SFIXED64 */ - kUpb_FieldRep_4Byte, /* SINT32 */ - kUpb_FieldRep_8Byte, /* SINT64 */ - }; - field->mode = kUpb_FieldMode_Scalar | - (sizes[field->descriptortype] << kUpb_FieldRep_Shift); - } - - if (upb_FieldDef_IsPacked(f)) { - field->mode |= kUpb_LabelFlags_IsPacked; - } - - if (upb_FieldDef_IsExtension(f)) { - field->mode |= kUpb_LabelFlags_IsExtension; - } -} - -/* This function is the dynamic equivalent of message_layout.{cc,h} in upbc. - * It computes a dynamic layout for all of the fields in |m|. */ -static void make_layout(upb_DefBuilder* ctx, const upb_MessageDef* m) { - upb_MiniTable* l = (upb_MiniTable*)upb_MessageDef_MiniTable(m); - size_t field_count = upb_MessageDef_numfields(m); - size_t sublayout_count = 0; - upb_MiniTable_Sub* subs; - upb_MiniTable_Field* fields; - - memset(l, 0, sizeof(*l) + sizeof(_upb_FastTable_Entry)); - - /* Count sub-messages. */ - for (size_t i = 0; i < field_count; i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - if (upb_FieldDef_IsSubMessage(f)) { - sublayout_count++; - } - if (upb_FieldDef_CType(f) == kUpb_CType_Enum && - upb_FileDef_Syntax(f->sub.enumdef->file) == kUpb_Syntax_Proto2) { - sublayout_count++; - } - } - - fields = _upb_DefBuilder_Alloc(ctx, field_count * sizeof(*fields)); - subs = _upb_DefBuilder_Alloc(ctx, sublayout_count * sizeof(*subs)); - - l->field_count = upb_MessageDef_numfields(m); - l->fields = fields; - l->subs = subs; - l->table_mask = 0; - l->required_count = 0; - - if (upb_MessageDef_ExtensionRangeCount(m) > 0) { - if (google_protobuf_MessageOptions_message_set_wire_format(m->opts)) { - l->ext = kUpb_ExtMode_IsMessageSet; - } else { - l->ext = kUpb_ExtMode_Extendable; - } - } else { - l->ext = kUpb_ExtMode_NonExtendable; - } - - /* TODO(haberman): initialize fast tables so that reflection-based parsing - * can get the same speeds as linked-in types. */ - l->fasttable[0].field_parser = &_upb_FastDecoder_DecodeGeneric; - l->fasttable[0].field_data = 0; - - if (upb_MessageDef_IsMapEntry(m)) { - /* TODO(haberman): refactor this method so this special case is more - * elegant. */ - const upb_FieldDef* key = upb_MessageDef_FindFieldByNumber(m, 1); - const upb_FieldDef* val = upb_MessageDef_FindFieldByNumber(m, 2); - fields[0].number = 1; - fields[1].number = 2; - fields[0].mode = kUpb_FieldMode_Scalar; - fields[1].mode = kUpb_FieldMode_Scalar; - fields[0].presence = 0; - fields[1].presence = 0; - fields[0].descriptortype = map_descriptortype(key); - fields[1].descriptortype = map_descriptortype(val); - fields[0].offset = 0; - fields[1].offset = sizeof(upb_StringView); - fields[1].submsg_index = 0; - - if (upb_FieldDef_CType(val) == kUpb_CType_Message) { - subs[0].submsg = upb_FieldDef_MessageSubDef(val)->layout; - } - - upb_FieldDef* fielddefs = (upb_FieldDef*)&m->fields[0]; - UPB_ASSERT(fielddefs[0].number_ == 1); - UPB_ASSERT(fielddefs[1].number_ == 2); - fielddefs[0].layout_index = 0; - fielddefs[1].layout_index = 1; - - l->field_count = 2; - l->size = 2 * sizeof(upb_StringView); - l->size = UPB_ALIGN_UP(l->size, 8); - l->dense_below = 2; - return; - } - - /* Allocate data offsets in three stages: - * - * 1. hasbits. - * 2. regular fields. - * 3. oneof fields. - * - * OPT: There is a lot of room for optimization here to minimize the size. - */ - - /* Assign hasbits for required fields first. */ - size_t hasbit = 0; - - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = &m->fields[i]; - upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)]; - if (upb_FieldDef_Label(f) == kUpb_Label_Required) { - field->presence = ++hasbit; - if (hasbit >= 63) { - _upb_DefBuilder_Errf(ctx, "Message with >=63 required fields: %s", - upb_MessageDef_FullName(m)); - } - l->required_count++; - } - } - - /* Allocate hasbits and set basic field attributes. */ - sublayout_count = 0; - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = &m->fields[i]; - upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)]; - - fill_fieldlayout(field, f); - - if (field->descriptortype == kUpb_FieldType_Message || - field->descriptortype == kUpb_FieldType_Group) { - field->submsg_index = sublayout_count++; - subs[field->submsg_index].submsg = upb_FieldDef_MessageSubDef(f)->layout; - } else if (field->descriptortype == kUpb_FieldType_Enum) { - field->submsg_index = sublayout_count++; - subs[field->submsg_index].subenum = upb_FieldDef_EnumSubDef(f)->layout; - UPB_ASSERT(subs[field->submsg_index].subenum); - } - - if (upb_FieldDef_Label(f) == kUpb_Label_Required) { - /* Hasbit was already assigned. */ - } else if (upb_FieldDef_HasPresence(f) && - !upb_FieldDef_RealContainingOneof(f)) { - /* We don't use hasbit 0, so that 0 can indicate "no presence" in the - * table. This wastes one hasbit, but we don't worry about it for now. */ - field->presence = ++hasbit; - } else { - field->presence = 0; - } - } - - /* Account for space used by hasbits. */ - l->size = hasbit ? div_round_up(hasbit + 1, 8) : 0; - - /* Allocate non-oneof fields. */ - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = &m->fields[i]; - size_t field_size = upb_msg_fielddefsize(f); - size_t index = upb_FieldDef_Index(f); - - if (upb_FieldDef_RealContainingOneof(f)) { - /* Oneofs are handled separately below. */ - continue; - } - - fields[index].offset = upb_MiniTable_place(ctx, l, field_size, m); - } - - /* Allocate oneof fields. Each oneof field consists of a uint32 for the case - * and space for the actual data. */ - for (int i = 0; i < m->oneof_count; i++) { - const upb_OneofDef* o = &m->oneofs[i]; - size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */ - size_t field_size = 0; - uint32_t case_offset; - uint32_t data_offset; - - if (upb_OneofDef_IsSynthetic(o)) continue; - - if (o->field_count == 0) { - _upb_DefBuilder_Errf(ctx, "Oneof must have at least one field (%s)", - o->full_name); - } - - /* Calculate field size: the max of all field sizes. */ - for (int j = 0; j < o->field_count; j++) { - const upb_FieldDef* f = o->fields[j]; - field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f)); - } - - /* Align and allocate case offset. */ - case_offset = upb_MiniTable_place(ctx, l, case_size, m); - data_offset = upb_MiniTable_place(ctx, l, field_size, m); - - for (int i = 0; i < o->field_count; i++) { - const upb_FieldDef* f = o->fields[i]; - fields[upb_FieldDef_Index(f)].offset = data_offset; - fields[upb_FieldDef_Index(f)].presence = ~case_offset; - } - } - - /* Size of the entire structure should be a multiple of its greatest - * alignment. TODO: track overall alignment for real? */ - l->size = UPB_ALIGN_UP(l->size, 8); - - /* Sort fields by number. */ - if (fields) { - qsort(fields, upb_MessageDef_numfields(m), sizeof(*fields), - field_number_cmp); - } - assign_layout_indices(m, l, fields); -} - -static char* strviewdup(upb_DefBuilder* ctx, upb_StringView view) { - char* ret = upb_strdup2(view.data, view.size, ctx->arena); - if (!ret) _upb_DefBuilder_OomErr(ctx); - return ret; -} - -static bool streql2(const char* a, size_t n, const char* b) { - return n == strlen(b) && memcmp(a, b, n) == 0; -} - -static bool streql_view(upb_StringView view, const char* b) { - return streql2(view.data, view.size, b); -} - -static const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, - const char* prefix, - upb_StringView name) { - if (prefix) { - /* ret = prefix + '.' + name; */ - size_t n = strlen(prefix); - char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); - strcpy(ret, prefix); - ret[n] = '.'; - memcpy(&ret[n + 1], name.data, name.size); - ret[n + 1 + name.size] = '\0'; - return ret; - } else { - return strviewdup(ctx, name); - } -} - -static void finalize_oneofs(upb_DefBuilder* ctx, upb_MessageDef* m) { - int i; - int synthetic_count = 0; - upb_OneofDef* mutable_oneofs = (upb_OneofDef*)m->oneofs; - - for (i = 0; i < m->oneof_count; i++) { - upb_OneofDef* o = &mutable_oneofs[i]; - - if (o->synthetic && o->field_count != 1) { - _upb_DefBuilder_Errf(ctx, - "Synthetic oneofs must have one field, not %d: %s", - o->field_count, upb_OneofDef_Name(o)); - } - - if (o->synthetic) { - synthetic_count++; - } else if (synthetic_count != 0) { - _upb_DefBuilder_Errf( - ctx, "Synthetic oneofs must be after all other oneofs: %s", - upb_OneofDef_Name(o)); - } - - o->fields = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef*) * o->field_count); - o->field_count = 0; - } - - for (i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = &m->fields[i]; - upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f); - if (o) { - o->fields[o->field_count++] = f; - } - } - - m->real_oneof_count = m->oneof_count - synthetic_count; -} - -size_t getjsonname(const char* name, char* buf, size_t len) { - size_t src, dst = 0; - bool ucase_next = false; - -#define WRITE(byte) \ - ++dst; \ - if (dst < len) \ - buf[dst - 1] = byte; \ - else if (dst == len) \ - buf[dst - 1] = '\0' - - if (!name) { - WRITE('\0'); - return 0; - } - - /* Implement the transformation as described in the spec: - * 1. upper case all letters after an underscore. - * 2. remove all underscores. - */ - for (src = 0; name[src]; src++) { - if (name[src] == '_') { - ucase_next = true; - continue; - } - - if (ucase_next) { - WRITE(toupper(name[src])); - ucase_next = false; - } else { - WRITE(name[src]); - } - } - - WRITE('\0'); - return dst; - -#undef WRITE -} - -static char* makejsonname(upb_DefBuilder* ctx, const char* name) { - size_t size = getjsonname(name, NULL, 0); - char* json_name = _upb_DefBuilder_Alloc(ctx, size); - getjsonname(name, json_name, size); - return json_name; -} - -/* Adds a symbol |v| to the symtab, which must be a def pointer previously - * packed with pack_def(). The def's pointer to upb_FileDef* must be set before - * adding, so we know which entries to remove if building this file fails. */ -static void _upb_DefBuilder_Add(upb_DefBuilder* ctx, const char* name, - upb_value v) { - // TODO: table should support an operation "tryinsert" to avoid the double - // lookup. - if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) { - _upb_DefBuilder_Errf(ctx, "duplicate symbol '%s'", name); - } - size_t len = strlen(name); - bool ok = - upb_strtable_insert(&ctx->symtab->syms, name, len, v, ctx->symtab->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); -} - -static bool remove_component(char* base, size_t* len) { - if (*len == 0) return false; - - for (size_t i = *len - 1; i > 0; i--) { - if (base[i] == '.') { - *len = i; - return true; - } - } - - *len = 0; - return true; -} - -/* Given a symbol and the base symbol inside which it is defined, find the - * symbol's definition in t. */ -static const void* symtab_resolveany(upb_DefBuilder* ctx, - const char* from_name_dbg, - const char* base, upb_StringView sym, - upb_deftype_t* type) { - const upb_strtable* t = &ctx->symtab->syms; - if (sym.size == 0) goto notfound; - upb_value v; - if (sym.data[0] == '.') { - /* Symbols starting with '.' are absolute, so we do a single lookup. - * Slice to omit the leading '.' */ - if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) { - goto notfound; - } - } else { - /* Remove components from base until we find an entry or run out. */ - size_t baselen = base ? strlen(base) : 0; - char* tmp = malloc(sym.size + baselen + 1); - while (1) { - char* p = tmp; - if (baselen) { - memcpy(p, base, baselen); - p[baselen] = '.'; - p += baselen + 1; - } - memcpy(p, sym.data, sym.size); - p += sym.size; - if (upb_strtable_lookup2(t, tmp, p - tmp, &v)) { - break; - } - if (!remove_component(tmp, &baselen)) { - free(tmp); - goto notfound; - } - } - free(tmp); - } - - *type = deftype(v); - return _upb_DefUtil_Unpack(v, *type); - -notfound: - _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", - UPB_STRINGVIEW_ARGS(sym)); -} - -static const void* symtab_resolve(upb_DefBuilder* ctx, - const char* from_name_dbg, const char* base, - upb_StringView sym, upb_deftype_t type) { - upb_deftype_t found_type; - const void* ret = - symtab_resolveany(ctx, from_name_dbg, base, sym, &found_type); - if (ret && found_type != type) { - _upb_DefBuilder_Errf(ctx, - "type mismatch when resolving %s: couldn't find " - "name " UPB_STRINGVIEW_FORMAT " with type=%d", - from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); - } - return ret; -} - -static void create_oneofdef(upb_DefBuilder* ctx, upb_MessageDef* m, - const google_protobuf_OneofDescriptorProto* oneof_proto, - const upb_OneofDef* _o) { - upb_OneofDef* o = (upb_OneofDef*)_o; - upb_StringView name = google_protobuf_OneofDescriptorProto_name(oneof_proto); - upb_value v; - - o->parent = m; - o->full_name = _upb_DefBuilder_MakeFullName(ctx, m->full_name, name); - o->field_count = 0; - o->synthetic = false; - - SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto); - - upb_value existing_v; - if (upb_strtable_lookup2(&m->ntof, name.data, name.size, &existing_v)) { - _upb_DefBuilder_Errf(ctx, "duplicate oneof name (%s)", o->full_name); - } - - v = _upb_DefUtil_Pack(o, UPB_DEFTYPE_ONEOF, sizeof(upb_OneofDef)); - - bool ok = upb_strtable_insert(&m->ntof, name.data, name.size, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - ok = upb_inttable_init(&o->itof, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - ok = upb_strtable_init(&o->ntof, 4, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); -} - -// Allocate and initialize an array of |n| oneof defs. -static upb_OneofDef* _upb_OneofDefs_New( - upb_DefBuilder* ctx, int n, - const google_protobuf_OneofDescriptorProto* const* protos, upb_MessageDef* m) { - upb_OneofDef* o = _upb_DefBuilder_Alloc(ctx, sizeof(upb_OneofDef) * n); - for (int i = 0; i < n; i++) { - create_oneofdef(ctx, m, protos[i], &o[i]); - } - return o; -} - -static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) { - str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); - if (!ret) _upb_DefBuilder_OomErr(ctx); - ret->len = len; - if (len) memcpy(ret->str, data, len); - ret->str[len] = '\0'; - return ret; -} - -static bool upb_DefPool_TryGetChar(const char** src, const char* end, - char* ch) { - if (*src == end) return false; - *ch = **src; - *src += 1; - return true; -} - -static char upb_DefPool_TryGetHexDigit(const upb_FieldDef* f, const char** src, - const char* end) { - char ch; - if (!upb_DefPool_TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '9') { - return ch - '0'; - } - ch = upb_ascii_lower(ch); - if ('a' <= ch && ch <= 'f') { - return ch - 'a' + 0xa; - } - *src -= 1; // Char wasn't actually a hex digit. - return -1; -} - -static char upb_DefPool_ParseHexEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, const char** src, - const char* end) { - char hex_digit = upb_DefPool_TryGetHexDigit(f, src, end); - if (hex_digit < 0) { - _upb_DefBuilder_Errf( - ctx, "\\x cannot be followed by non-hex digit in field '%s' default", - upb_FieldDef_FullName(f)); - return 0; - } - unsigned int ret = hex_digit; - while ((hex_digit = upb_DefPool_TryGetHexDigit(f, src, end)) >= 0) { - ret = (ret << 4) | hex_digit; - } - if (ret > 0xff) { - _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", - upb_FieldDef_FullName(f)); - return 0; - } - return ret; -} - -char upb_DefPool_TryGetOctalDigit(const char** src, const char* end) { - char ch; - if (!upb_DefPool_TryGetChar(src, end, &ch)) return -1; - if ('0' <= ch && ch <= '7') { - return ch - '0'; - } - *src -= 1; // Char wasn't actually an octal digit. - return -1; -} - -static char upb_DefPool_ParseOctalEscape(upb_DefBuilder* ctx, - const upb_FieldDef* f, - const char** src, const char* end) { - char ch = 0; - for (int i = 0; i < 3; i++) { - char digit; - if ((digit = upb_DefPool_TryGetOctalDigit(src, end)) >= 0) { - ch = (ch << 3) | digit; - } - } - return ch; -} - -static char upb_DefPool_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char** src, const char* end) { - char ch; - if (!upb_DefPool_TryGetChar(src, end, &ch)) { - _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", - upb_FieldDef_FullName(f)); - return 0; - } - switch (ch) { - case 'a': - return '\a'; - case 'b': - return '\b'; - case 'f': - return '\f'; - case 'n': - return '\n'; - case 'r': - return '\r'; - case 't': - return '\t'; - case 'v': - return '\v'; - case '\\': - return '\\'; - case '\'': - return '\''; - case '\"': - return '\"'; - case '?': - return '\?'; - case 'x': - case 'X': - return upb_DefPool_ParseHexEscape(ctx, f, src, end); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - *src -= 1; - return upb_DefPool_ParseOctalEscape(ctx, f, src, end); - } - _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); -} - -static str_t* unescape(upb_DefBuilder* ctx, const upb_FieldDef* f, - const char* data, size_t len) { - // Size here is an upper bound; escape sequences could ultimately shrink it. - str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); - char* dst = &ret->str[0]; - const char* src = data; - const char* end = data + len; - - while (src < end) { - if (*src == '\\') { - src++; - *dst++ = upb_DefPool_ParseEscape(ctx, f, &src, end); - } else { - *dst++ = *src++; - } - } - - ret->len = dst - &ret->str[0]; - return ret; -} - -static void parse_default(upb_DefBuilder* ctx, const char* str, size_t len, - upb_FieldDef* f) { - char* end; - char nullz[64]; - errno = 0; - - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: - case kUpb_CType_Int64: - case kUpb_CType_UInt32: - case kUpb_CType_UInt64: - case kUpb_CType_Double: - case kUpb_CType_Float: - /* Standard C number parsing functions expect null-terminated strings. */ - if (len >= sizeof(nullz) - 1) { - _upb_DefBuilder_Errf(ctx, "Default too long: %.*s", (int)len, str); - } - memcpy(nullz, str, len); - nullz[len] = '\0'; - str = nullz; - break; - default: - break; - } - - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: { - long val = strtol(str, &end, 0); - if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) { - goto invalid; - } - f->defaultval.sint = val; - break; - } - case kUpb_CType_Enum: { - const upb_EnumDef* e = f->sub.enumdef; - const upb_EnumValueDef* ev = - upb_EnumDef_FindValueByNameWithSize(e, str, len); - if (!ev) { - goto invalid; - } - f->defaultval.sint = ev->number; - break; - } - case kUpb_CType_Int64: { - long long val = strtoll(str, &end, 0); - if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { - goto invalid; - } - f->defaultval.sint = val; - break; - } - case kUpb_CType_UInt32: { - unsigned long val = strtoul(str, &end, 0); - if (val > UINT32_MAX || errno == ERANGE || *end) { - goto invalid; - } - f->defaultval.uint = val; - break; - } - case kUpb_CType_UInt64: { - unsigned long long val = strtoull(str, &end, 0); - if (val > UINT64_MAX || errno == ERANGE || *end) { - goto invalid; - } - f->defaultval.uint = val; - break; - } - case kUpb_CType_Double: { - double val = strtod(str, &end); - if (errno == ERANGE || *end) { - goto invalid; - } - f->defaultval.dbl = val; - break; - } - case kUpb_CType_Float: { - float val = strtof(str, &end); - if (errno == ERANGE || *end) { - goto invalid; - } - f->defaultval.flt = val; - break; - } - case kUpb_CType_Bool: { - if (streql2(str, len, "false")) { - f->defaultval.boolean = false; - } else if (streql2(str, len, "true")) { - f->defaultval.boolean = true; - } else { - goto invalid; - } - break; - } - case kUpb_CType_String: - f->defaultval.str = newstr(ctx, str, len); - break; - case kUpb_CType_Bytes: - f->defaultval.str = unescape(ctx, f, str, len); - break; - case kUpb_CType_Message: - /* Should not have a default value. */ - _upb_DefBuilder_Errf(ctx, "Message should not have a default (%s)", - upb_FieldDef_FullName(f)); - } - - return; - -invalid: - _upb_DefBuilder_Errf(ctx, "Invalid default '%.*s' for field %s of type %d", - (int)len, str, upb_FieldDef_FullName(f), - (int)upb_FieldDef_Type(f)); -} - -static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) { - switch (upb_FieldDef_CType(f)) { - case kUpb_CType_Int32: - case kUpb_CType_Int64: - f->defaultval.sint = 0; - break; - case kUpb_CType_UInt64: - case kUpb_CType_UInt32: - f->defaultval.uint = 0; - break; - case kUpb_CType_Double: - case kUpb_CType_Float: - f->defaultval.dbl = 0; - break; - case kUpb_CType_String: - case kUpb_CType_Bytes: - f->defaultval.str = newstr(ctx, NULL, 0); - break; - case kUpb_CType_Bool: - f->defaultval.boolean = false; - break; - case kUpb_CType_Enum: { - const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0); - f->defaultval.sint = upb_EnumValueDef_Number(v); - } - case kUpb_CType_Message: - break; - } -} - -static void create_fielddef(upb_DefBuilder* ctx, const char* prefix, - upb_MessageDef* m, - const google_protobuf_FieldDescriptorProto* field_proto, - const upb_FieldDef* _f, bool is_extension) { - upb_FieldDef* f = (upb_FieldDef*)_f; - upb_StringView name; - const char* full_name; - const char* json_name; - const char* shortname; - int32_t field_number; - - f->file = ctx->file; // Must happen prior to _upb_DefBuilder_Add() - - if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) { - _upb_DefBuilder_Errf(ctx, "field has no name"); - } - - name = google_protobuf_FieldDescriptorProto_name(field_proto); - _upb_DefBuilder_CheckIdent(ctx, name, false); - full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - shortname = _upb_DefUtil_FullToShort(full_name); - - if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) { - json_name = strviewdup( - ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto)); - f->has_json_name_ = true; - } else { - json_name = makejsonname(ctx, shortname); - f->has_json_name_ = false; - } - - field_number = google_protobuf_FieldDescriptorProto_number(field_proto); - - f->full_name = full_name; - f->json_name = json_name; - f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto); - f->number_ = field_number; - f->scope.oneof = NULL; - f->proto3_optional_ = - google_protobuf_FieldDescriptorProto_proto3_optional(field_proto); - - bool has_type = google_protobuf_FieldDescriptorProto_has_type(field_proto); - bool has_type_name = - google_protobuf_FieldDescriptorProto_has_type_name(field_proto); - - f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto); - - if (has_type) { - switch (f->type_) { - case kUpb_FieldType_Message: - case kUpb_FieldType_Group: - case kUpb_FieldType_Enum: - if (!has_type_name) { - _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)", - (int)f->type_, full_name); - } - break; - default: - if (has_type_name) { - _upb_DefBuilder_Errf( - ctx, "invalid type for field with type_name set (%s, %d)", - full_name, (int)f->type_); - } - } - } else if (has_type_name) { - f->type_ = - FIELD_TYPE_UNSPECIFIED; // We'll fill this in in resolve_fielddef(). - } - - if (!is_extension) { - /* direct message field. */ - upb_value v, field_v, json_v, existing_v; - size_t json_size; - - if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) { - _upb_DefBuilder_Errf(ctx, "invalid field number (%u)", field_number); - } - - f->index_ = f - m->fields; - f->msgdef = m; - f->is_extension_ = false; - - field_v = _upb_DefUtil_Pack(f, UPB_DEFTYPE_FIELD, sizeof(upb_FieldDef)); - json_v = - _upb_DefUtil_Pack(f, UPB_DEFTYPE_FIELD_JSONNAME, sizeof(upb_FieldDef)); - v = upb_value_constptr(f); - json_size = strlen(json_name); - - if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) { - _upb_DefBuilder_Errf(ctx, "duplicate field name (%s)", shortname); - } - - bool ok = upb_strtable_insert(&m->ntof, name.data, name.size, field_v, - ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - if (strcmp(shortname, json_name) != 0) { - if (upb_strtable_lookup(&m->ntof, json_name, &v)) { - _upb_DefBuilder_Errf(ctx, "duplicate json_name (%s)", json_name); - } else { - ok = upb_strtable_insert(&m->ntof, json_name, json_size, json_v, - ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - } - } - - if (upb_inttable_lookup(&m->itof, field_number, NULL)) { - _upb_DefBuilder_Errf(ctx, "duplicate field number (%u)", field_number); - } - - ok = upb_inttable_insert(&m->itof, field_number, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - if (ctx->layout) { - const upb_MiniTable* mt = upb_MessageDef_MiniTable(m); - const upb_MiniTable_Field* fields = mt->fields; - const int count = mt->field_count; - bool found = false; - for (int i = 0; i < count; i++) { - if (fields[i].number == field_number) { - f->layout_index = i; - found = true; - break; - } - } - UPB_ASSERT(found); - } - } else { - /* extension field. */ - f->is_extension_ = true; - f->scope.extension_scope = m; - _upb_DefBuilder_Add( - ctx, full_name, - _upb_DefUtil_Pack(f, UPB_DEFTYPE_EXT, sizeof(upb_FieldDef))); - f->layout_index = ctx->ext_count++; - if (ctx->layout) { - UPB_ASSERT(_upb_FieldDef_ExtensionMiniTable(f)->field.number == - field_number); - } - } - - if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) { - _upb_DefBuilder_Errf(ctx, "invalid type for field %s (%d)", f->full_name, - f->type_); - } - - if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) { - _upb_DefBuilder_Errf(ctx, "invalid label for field %s (%d)", f->full_name, - f->label_); - } - - /* We can't resolve the subdef or (in the case of extensions) the containing - * message yet, because it may not have been defined yet. We stash a pointer - * to the field_proto until later when we can properly resolve it. */ - f->sub.unresolved = field_proto; - - if (f->label_ == kUpb_Label_Required && - f->file->syntax == kUpb_Syntax_Proto3) { - _upb_DefBuilder_Errf(ctx, "proto3 fields cannot be required (%s)", - f->full_name); - } - - if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) { - uint32_t oneof_index = google_protobuf_FieldDescriptorProto_oneof_index(field_proto); - upb_OneofDef* oneof; - upb_value v = upb_value_constptr(f); - - if (upb_FieldDef_Label(f) != kUpb_Label_Optional) { - _upb_DefBuilder_Errf(ctx, "fields in oneof must have OPTIONAL label (%s)", - f->full_name); - } - - if (!m) { - _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)", - f->full_name); - } - - if (oneof_index >= m->oneof_count) { - _upb_DefBuilder_Errf(ctx, "oneof_index out of range (%s)", f->full_name); - } - - oneof = (upb_OneofDef*)&m->oneofs[oneof_index]; - f->scope.oneof = oneof; - - oneof->field_count++; - if (f->proto3_optional_) { - oneof->synthetic = true; - } - - bool ok = upb_inttable_insert(&oneof->itof, f->number_, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - ok = upb_strtable_insert(&oneof->ntof, name.data, name.size, v, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - } else { - if (f->proto3_optional_) { - _upb_DefBuilder_Errf(ctx, - "field with proto3_optional was not in a oneof (%s)", - f->full_name); - } - } - - SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto); - - if (google_protobuf_FieldOptions_has_packed(f->opts)) { - f->packed_ = google_protobuf_FieldOptions_packed(f->opts); - } else { - /* Repeated fields default to packed for proto3 only. */ - f->packed_ = upb_FieldDef_IsPrimitive(f) && - f->label_ == kUpb_Label_Repeated && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; - } -} - -static void create_method(upb_DefBuilder* ctx, - const google_protobuf_MethodDescriptorProto* method_proto, - upb_ServiceDef* s, upb_MethodDef* m) { - upb_StringView name = google_protobuf_MethodDescriptorProto_name(method_proto); - - m->service = s; - m->full_name = _upb_DefBuilder_MakeFullName(ctx, s->full_name, name); - m->client_streaming = - google_protobuf_MethodDescriptorProto_client_streaming(method_proto); - m->server_streaming = - google_protobuf_MethodDescriptorProto_server_streaming(method_proto); - m->input_type = symtab_resolve( - ctx, m->full_name, m->full_name, - google_protobuf_MethodDescriptorProto_input_type(method_proto), UPB_DEFTYPE_MSG); - m->output_type = symtab_resolve( - ctx, m->full_name, m->full_name, - google_protobuf_MethodDescriptorProto_output_type(method_proto), UPB_DEFTYPE_MSG); - - SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, method_proto); -} - -// Allocate and initialize an array of |n| method defs. -static upb_MethodDef* _upb_MethodDefs_New( - upb_DefBuilder* ctx, int n, - const google_protobuf_MethodDescriptorProto* const* protos, upb_ServiceDef* s) { - upb_MethodDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MethodDef) * n); - for (int i = 0; i < n; i++) { - create_method(ctx, protos[i], s, &m[i]); - m[i].index = i; - } - return m; -} - -static void create_service(upb_DefBuilder* ctx, - const google_protobuf_ServiceDescriptorProto* svc_proto, - upb_ServiceDef* s) { - upb_StringView name; - size_t n; - - s->file = ctx->file; // Must happen prior to _upb_DefBuilder_Add() - - name = google_protobuf_ServiceDescriptorProto_name(svc_proto); - _upb_DefBuilder_CheckIdent(ctx, name, false); - s->full_name = _upb_DefBuilder_MakeFullName(ctx, ctx->file->package, name); - _upb_DefBuilder_Add( - ctx, s->full_name, - _upb_DefUtil_Pack(s, UPB_DEFTYPE_SERVICE, sizeof(upb_ServiceDef))); - - const google_protobuf_MethodDescriptorProto* const* methods = - google_protobuf_ServiceDescriptorProto_method(svc_proto, &n); - s->method_count = n; - s->methods = _upb_MethodDefs_New(ctx, n, methods, s); - - SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, svc_proto); -} - -// Allocate and initialize an array of |n| service defs. -static upb_ServiceDef* _upb_ServiceDefs_New( - upb_DefBuilder* ctx, int n, - const google_protobuf_ServiceDescriptorProto* const* protos) { - upb_ServiceDef* s = _upb_DefBuilder_Alloc(ctx, sizeof(upb_ServiceDef) * n); - for (int i = 0; i < n; i++) { - create_service(ctx, protos[i], &s[i]); - s[i].index = i; - } - return s; -} - -static int count_bits_debug(uint64_t x) { - // For assertions only, speed does not matter. - int n = 0; - while (x) { - if (x & 1) n++; - x >>= 1; - } - return n; -} - -static upb_MiniTable_Enum* create_enumlayout(upb_DefBuilder* ctx, - const upb_EnumDef* e) { - const char* desc = _upb_EnumDef_MiniDescriptor(e, ctx->tmp_arena); - if (!desc) - _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor"); - - upb_Status status; - upb_MiniTable_Enum* layout = - upb_MiniTable_BuildEnum(desc, strlen(desc), ctx->arena, &status); - if (!layout) - _upb_DefBuilder_Errf(ctx, "Error building enum MiniTable: %s", status.msg); - return layout; -} - -static void create_enumvaldef(upb_DefBuilder* ctx, const char* prefix, - const google_protobuf_EnumValueDescriptorProto* val_proto, - upb_EnumDef* e, upb_EnumValueDef* v) { - upb_StringView name = google_protobuf_EnumValueDescriptorProto_name(val_proto); - upb_value val = upb_value_constptr(v); - - v->parent = e; // Must happen prior to _upb_DefBuilder_Add() - v->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - v->number = google_protobuf_EnumValueDescriptorProto_number(val_proto); - _upb_DefBuilder_Add( - ctx, v->full_name, - _upb_DefUtil_Pack(v, UPB_DEFTYPE_ENUMVAL, sizeof(upb_EnumDef))); - - SET_OPTIONS(v->opts, EnumValueDescriptorProto, EnumValueOptions, val_proto); - - bool ok = - upb_strtable_insert(&e->ntoi, name.data, name.size, val, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - // Multiple enumerators can have the same number, first one wins. - if (!upb_inttable_lookup(&e->iton, v->number, NULL)) { - ok = upb_inttable_insert(&e->iton, v->number, val, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - } -} - -// Allocate and initialize an array of |n| enum value defs. -// TODO(b/243726666): This will eventually become the (only) public constructor. -static upb_EnumValueDef* _upb_EnumValueDefs_New( - upb_DefBuilder* ctx, const char* prefix, int n, - const google_protobuf_EnumValueDescriptorProto* const* protos, upb_EnumDef* e) { - upb_EnumValueDef* v = - _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumValueDef) * n); - - bool is_sorted = true; - uint32_t previous = 0; - for (size_t i = 0; i < n; i++) { - create_enumvaldef(ctx, prefix, protos[i], e, &v[i]); - - const uint32_t current = v[i].number; - if (previous > current) is_sorted = false; - previous = current; - } - e->is_sorted = is_sorted; - - if (upb_FileDef_Syntax(ctx->file) == kUpb_Syntax_Proto3 && n > 0 && - v[0].number != 0) { - _upb_DefBuilder_Errf(ctx, - "for proto3, the first enum value must be zero (%s)", - e->full_name); - } - - return v; -} - -static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, - const google_protobuf_EnumDescriptorProto* enum_proto, - upb_EnumDef* e) { - const google_protobuf_EnumValueDescriptorProto* const* values; - upb_StringView name; - size_t n; - - e->file = ctx->file; // Must happen prior to _upb_DefBuilder_Add() - - name = google_protobuf_EnumDescriptorProto_name(enum_proto); - _upb_DefBuilder_CheckIdent(ctx, name, false); - - e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - _upb_DefBuilder_Add( - ctx, e->full_name, - _upb_DefUtil_Pack(e, UPB_DEFTYPE_ENUM, sizeof(upb_EnumDef))); - - values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n); - - bool ok = upb_strtable_init(&e->ntoi, n, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - ok = upb_inttable_init(&e->iton, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - e->defaultval = 0; - e->value_count = n; - e->values = _upb_EnumValueDefs_New(ctx, prefix, n, values, e); - - if (n == 0) { - _upb_DefBuilder_Errf(ctx, "enums must contain at least one value (%s)", - e->full_name); - } - - SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto); - - upb_inttable_compact(&e->iton, ctx->arena); - - if (upb_FileDef_Syntax(e->file) == kUpb_Syntax_Proto2) { - if (ctx->layout) { - UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count); - e->layout = ctx->layout->enums[ctx->enum_count++]; - } else { - e->layout = create_enumlayout(ctx, e); - } - } else { - e->layout = NULL; - } -} - -// Allocate and initialize an array of |n| enum defs. -// TODO(b/243726666): This will eventually become the (only) public constructor. -static upb_EnumDef* _upb_EnumDefs_New( - upb_DefBuilder* ctx, int n, const google_protobuf_EnumDescriptorProto* const* protos, - const upb_MessageDef* containing_type) { - // If a containing type is defined then get the full name from that. - // Otherwise use the package name from the file def. - const char* name = - containing_type ? containing_type->full_name : ctx->file->package; - - upb_EnumDef* e = _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumDef) * n); - for (size_t i = 0; i < n; i++) { - create_enumdef(ctx, name, protos[i], &e[i]); - e[i].containing_type = containing_type; - } - return e; -} - -static void msgdef_create_nested(upb_DefBuilder* ctx, - const google_protobuf_DescriptorProto* msg_proto, - upb_MessageDef* m); - -static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, - const google_protobuf_DescriptorProto* msg_proto, - const upb_MessageDef* containing_type, - const upb_MessageDef* _m) { - upb_MessageDef* m = (upb_MessageDef*)_m; - const google_protobuf_OneofDescriptorProto* const* oneofs; - const google_protobuf_FieldDescriptorProto* const* fields; - const google_protobuf_DescriptorProto_ExtensionRange* const* ext_ranges; - size_t i, n_oneof, n_field, n_ext_range; - upb_StringView name; - - m->file = ctx->file; // Must happen prior to _upb_DefBuilder_Add() - m->containing_type = containing_type; - - name = google_protobuf_DescriptorProto_name(msg_proto); - _upb_DefBuilder_CheckIdent(ctx, name, false); - - m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - _upb_DefBuilder_Add( - ctx, m->full_name, - _upb_DefUtil_Pack(m, UPB_DEFTYPE_MSG, sizeof(upb_MessageDef))); - - oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof); - fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field); - ext_ranges = - google_protobuf_DescriptorProto_extension_range(msg_proto, &n_ext_range); - - bool ok = upb_inttable_init(&m->itof, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - ok = upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); - - if (ctx->layout) { - /* create_fielddef() below depends on this being set. */ - UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); - m->layout = ctx->layout->msgs[ctx->msg_count++]; - UPB_ASSERT(n_field == m->layout->field_count); - } else { - /* Allocate now (to allow cross-linking), populate later. */ - m->layout = _upb_DefBuilder_Alloc( - ctx, sizeof(*m->layout) + sizeof(_upb_FastTable_Entry)); - } - - SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto); - - m->oneof_count = n_oneof; - m->oneofs = _upb_OneofDefs_New(ctx, n_oneof, oneofs, m); - - m->field_count = n_field; - m->fields = _upb_FieldDef_Alloc(ctx, n_field); - for (i = 0; i < n_field; i++) { - create_fielddef(ctx, m->full_name, m, fields[i], &m->fields[i], - /* is_extension= */ false); - } - - m->ext_range_count = n_ext_range; - m->ext_ranges = _upb_ExtensionRange_Alloc(ctx, n_ext_range); - for (i = 0; i < n_ext_range; i++) { - const google_protobuf_DescriptorProto_ExtensionRange* r = ext_ranges[i]; - upb_ExtensionRange* r_def = (upb_ExtensionRange*)&m->ext_ranges[i]; - int32_t start = google_protobuf_DescriptorProto_ExtensionRange_start(r); - int32_t end = google_protobuf_DescriptorProto_ExtensionRange_end(r); - int32_t max = - google_protobuf_MessageOptions_message_set_wire_format(m->opts) - ? INT32_MAX - : kUpb_MaxFieldNumber + 1; - - // A full validation would also check that each range is disjoint, and that - // none of the fields overlap with the extension ranges, but we are just - // sanity checking here. - if (start < 1 || end <= start || end > max) { - _upb_DefBuilder_Errf(ctx, - "Extension range (%d, %d) is invalid, message=%s\n", - (int)start, (int)end, m->full_name); - } - - r_def->start = start; - r_def->end = end; - SET_OPTIONS(r_def->opts, DescriptorProto_ExtensionRange, - ExtensionRangeOptions, r); - } - - finalize_oneofs(ctx, m); - assign_msg_wellknowntype(m); - upb_inttable_compact(&m->itof, ctx->arena); - msgdef_create_nested(ctx, msg_proto, m); -} - -// Allocate and initialize an array of |n| message defs. -static upb_MessageDef* _upb_MessageDefs_New( - upb_DefBuilder* ctx, int n, const google_protobuf_DescriptorProto* const* protos, - const upb_MessageDef* containing_type) { - const char* name = - containing_type ? containing_type->full_name : ctx->file->package; - upb_MessageDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageDef) * n); - for (int i = 0; i < n; i++) { - create_msgdef(ctx, name, protos[i], containing_type, &m[i]); - } - return m; -} - -static void msgdef_create_nested(upb_DefBuilder* ctx, - const google_protobuf_DescriptorProto* msg_proto, - upb_MessageDef* m) { - size_t n; - - const google_protobuf_EnumDescriptorProto* const* enums = - google_protobuf_DescriptorProto_enum_type(msg_proto, &n); - m->nested_enum_count = n; - m->nested_enums = _upb_EnumDefs_New(ctx, n, enums, m); - - const google_protobuf_FieldDescriptorProto* const* exts = - google_protobuf_DescriptorProto_extension(msg_proto, &n); - m->nested_ext_count = n; - m->nested_exts = _upb_FieldDef_Alloc(ctx, n); - for (size_t i = 0; i < n; i++) { - create_fielddef(ctx, m->full_name, m, exts[i], &m->nested_exts[i], - /* is_extension= */ true); - ((upb_FieldDef*)&m->nested_exts[i])->index_ = i; - } - - const google_protobuf_DescriptorProto* const* msgs = - google_protobuf_DescriptorProto_nested_type(msg_proto, &n); - m->nested_msg_count = n; - m->nested_msgs = _upb_MessageDefs_New(ctx, n, msgs, m); -} - -static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f) { - const google_protobuf_FieldDescriptorProto* field_proto = f->sub.unresolved; - upb_StringView name = - google_protobuf_FieldDescriptorProto_type_name(field_proto); - bool has_name = - google_protobuf_FieldDescriptorProto_has_type_name(field_proto); - switch ((int)f->type_) { - case FIELD_TYPE_UNSPECIFIED: { - // Type was not specified and must be inferred. - UPB_ASSERT(has_name); - upb_deftype_t type; - const void* def = - symtab_resolveany(ctx, f->full_name, prefix, name, &type); - switch (type) { - case UPB_DEFTYPE_ENUM: - f->sub.enumdef = def; - f->type_ = kUpb_FieldType_Enum; - break; - case UPB_DEFTYPE_MSG: - f->sub.msgdef = def; - f->type_ = kUpb_FieldType_Message; // It appears there is no way of - // this being a group. - break; - default: - _upb_DefBuilder_Errf(ctx, "Couldn't resolve type name for field %s", - f->full_name); - } - } - case kUpb_FieldType_Message: - case kUpb_FieldType_Group: - UPB_ASSERT(has_name); - f->sub.msgdef = - symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); - break; - case kUpb_FieldType_Enum: - UPB_ASSERT(has_name); - f->sub.enumdef = - symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_ENUM); - break; - default: - // No resolution necessary. - break; - } -} - -static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f, - const google_protobuf_FieldDescriptorProto* field_proto) { - if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) { - _upb_DefBuilder_Errf(ctx, "extension for field '%s' had no extendee", - f->full_name); - } - - upb_StringView name = google_protobuf_FieldDescriptorProto_extendee(field_proto); - const upb_MessageDef* m = - symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); - f->msgdef = m; - - bool found = false; - - for (int i = 0, n = m->ext_range_count; i < n; i++) { - const upb_ExtensionRange* r = &m->ext_ranges[i]; - if (r->start <= f->number_ && f->number_ < r->end) { - found = true; - break; - } - } - - if (!found) { - _upb_DefBuilder_Errf( - ctx, - "field number %u in extension %s has no extension range in " - "message %s", - (unsigned)f->number_, f->full_name, f->msgdef->full_name); - } - - const upb_MiniTable_Extension* ext = _upb_FieldDef_ExtensionMiniTable(f); - if (ctx->layout) { - UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number); - } else { - upb_MiniTable_Extension* mut_ext = (upb_MiniTable_Extension*)ext; - fill_fieldlayout(&mut_ext->field, f); - mut_ext->field.presence = 0; - mut_ext->field.offset = 0; - mut_ext->field.submsg_index = 0; - mut_ext->extendee = f->msgdef->layout; - mut_ext->sub.submsg = f->sub.msgdef->layout; - } - - bool ok = upb_inttable_insert(&ctx->symtab->exts, (uintptr_t)ext, - upb_value_constptr(f), ctx->arena); - if (!ok) _upb_DefBuilder_OomErr(ctx); -} - -static void resolve_default(upb_DefBuilder* ctx, upb_FieldDef* f, - const google_protobuf_FieldDescriptorProto* field_proto) { - // Have to delay resolving of the default value until now because of the enum - // case, since enum defaults are specified with a label. - if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) { - upb_StringView defaultval = - google_protobuf_FieldDescriptorProto_default_value(field_proto); - - if (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { - _upb_DefBuilder_Errf(ctx, - "proto3 fields cannot have explicit defaults (%s)", - f->full_name); - } - - if (upb_FieldDef_IsSubMessage(f)) { - _upb_DefBuilder_Errf(ctx, - "message fields cannot have explicit defaults (%s)", - f->full_name); - } - - parse_default(ctx, defaultval.data, defaultval.size, f); - f->has_default = true; - } else { - set_default_default(ctx, f); - f->has_default = false; - } -} - -static void resolve_fielddef(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f) { - // We have to stash this away since resolve_subdef() may overwrite it. - const google_protobuf_FieldDescriptorProto* field_proto = f->sub.unresolved; - - resolve_subdef(ctx, prefix, f); - resolve_default(ctx, f, field_proto); - - if (f->is_extension_) { - resolve_extension(ctx, prefix, f, field_proto); - } -} - -static void resolve_msgdef(upb_DefBuilder* ctx, upb_MessageDef* m) { - for (int i = 0; i < m->field_count; i++) { - resolve_fielddef(ctx, m->full_name, (upb_FieldDef*)&m->fields[i]); - } - - m->in_message_set = false; - for (int i = 0; i < m->nested_ext_count; i++) { - upb_FieldDef* ext = (upb_FieldDef*)&m->nested_exts[i]; - resolve_fielddef(ctx, m->full_name, ext); - if (ext->type_ == kUpb_FieldType_Message && - ext->label_ == kUpb_Label_Optional && ext->sub.msgdef == m && - google_protobuf_MessageOptions_message_set_wire_format( - ext->msgdef->opts)) { - m->in_message_set = true; - } - } - - if (!ctx->layout) make_layout(ctx, m); - - for (int i = 0; i < m->nested_msg_count; i++) { - resolve_msgdef(ctx, (upb_MessageDef*)&m->nested_msgs[i]); - } -} - -static int count_exts_in_msg(const google_protobuf_DescriptorProto* msg_proto) { - size_t n; - google_protobuf_DescriptorProto_extension(msg_proto, &n); - int ext_count = n; - - const google_protobuf_DescriptorProto* const* nested_msgs = - google_protobuf_DescriptorProto_nested_type(msg_proto, &n); - for (size_t i = 0; i < n; i++) { - ext_count += count_exts_in_msg(nested_msgs[i]); - } - - return ext_count; -} - -// Allocate and initialize one file def, and add it to the context object. -static void _upb_FileDef_Create(upb_DefBuilder* ctx, - const google_protobuf_FileDescriptorProto* file_proto) { - upb_FileDef* file = _upb_DefBuilder_Alloc(ctx, sizeof(upb_FileDef)); - ctx->file = file; - - const google_protobuf_DescriptorProto* const* msgs; - const google_protobuf_EnumDescriptorProto* const* enums; - const google_protobuf_FieldDescriptorProto* const* exts; - const google_protobuf_ServiceDescriptorProto* const* services; - const upb_StringView* strs; - const int32_t* public_deps; - const int32_t* weak_deps; - size_t i, n; - - file->symtab = ctx->symtab; - - /* Count all extensions in the file, to build a flat array of layouts. */ - google_protobuf_FileDescriptorProto_extension(file_proto, &n); - int ext_count = n; - msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); - for (int i = 0; i < n; i++) { - ext_count += count_exts_in_msg(msgs[i]); - } - file->ext_count = ext_count; - - if (ctx->layout) { - /* We are using the ext layouts that were passed in. */ - file->ext_layouts = ctx->layout->exts; - if (ctx->layout->ext_count != file->ext_count) { - _upb_DefBuilder_Errf(ctx, - "Extension count did not match layout (%d vs %d)", - ctx->layout->ext_count, file->ext_count); - } - } else { - /* We are building ext layouts from scratch. */ - file->ext_layouts = _upb_DefBuilder_Alloc( - ctx, sizeof(*file->ext_layouts) * file->ext_count); - upb_MiniTable_Extension* ext = - _upb_DefBuilder_Alloc(ctx, sizeof(*ext) * file->ext_count); - for (int i = 0; i < file->ext_count; i++) { - file->ext_layouts[i] = &ext[i]; - } - } - - if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) { - _upb_DefBuilder_Errf(ctx, "File has no name"); - } - - file->name = strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto)); - - upb_StringView package = google_protobuf_FileDescriptorProto_package(file_proto); - if (package.size) { - _upb_DefBuilder_CheckIdent(ctx, package, true); - file->package = strviewdup(ctx, package); - } else { - file->package = NULL; - } - - if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) { - upb_StringView syntax = google_protobuf_FileDescriptorProto_syntax(file_proto); - - if (streql_view(syntax, "proto2")) { - file->syntax = kUpb_Syntax_Proto2; - } else if (streql_view(syntax, "proto3")) { - file->syntax = kUpb_Syntax_Proto3; - } else { - _upb_DefBuilder_Errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'", - UPB_STRINGVIEW_ARGS(syntax)); - } - } else { - file->syntax = kUpb_Syntax_Proto2; - } - - /* Read options. */ - SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto); - - /* Verify dependencies. */ - strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n); - file->dep_count = n; - file->deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->deps) * n); - - for (i = 0; i < n; i++) { - upb_StringView str = strs[i]; - file->deps[i] = - upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size); - if (!file->deps[i]) { - _upb_DefBuilder_Errf(ctx, - "Depends on file '" UPB_STRINGVIEW_FORMAT - "', but it has not been loaded", - UPB_STRINGVIEW_ARGS(str)); - } - } - - public_deps = google_protobuf_FileDescriptorProto_public_dependency(file_proto, &n); - file->public_dep_count = n; - file->public_deps = - _upb_DefBuilder_Alloc(ctx, sizeof(*file->public_deps) * n); - int32_t* mutable_public_deps = (int32_t*)file->public_deps; - for (i = 0; i < n; i++) { - if (public_deps[i] >= file->dep_count) { - _upb_DefBuilder_Errf(ctx, "public_dep %d is out of range", - (int)public_deps[i]); - } - mutable_public_deps[i] = public_deps[i]; - } - - weak_deps = google_protobuf_FileDescriptorProto_weak_dependency(file_proto, &n); - file->weak_dep_count = n; - file->weak_deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->weak_deps) * n); - int32_t* mutable_weak_deps = (int32_t*)file->weak_deps; - for (i = 0; i < n; i++) { - if (weak_deps[i] >= file->dep_count) { - _upb_DefBuilder_Errf(ctx, "weak_dep %d is out of range", - (int)weak_deps[i]); - } - mutable_weak_deps[i] = weak_deps[i]; - } - - // Create enums. - enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); - file->top_lvl_enum_count = n; - file->top_lvl_enums = _upb_EnumDefs_New(ctx, n, enums, NULL); - - /* Create extensions. */ - exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n); - file->top_lvl_ext_count = n; - file->top_lvl_exts = _upb_FieldDef_Alloc(ctx, n); - for (i = 0; i < n; i++) { - create_fielddef(ctx, file->package, NULL, exts[i], &file->top_lvl_exts[i], - /* is_extension= */ true); - ((upb_FieldDef*)&file->top_lvl_exts[i])->index_ = i; - } - - // Create messages. - msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); - file->top_lvl_msg_count = n; - file->top_lvl_msgs = _upb_MessageDefs_New(ctx, n, msgs, NULL); - - // Create services. - services = google_protobuf_FileDescriptorProto_service(file_proto, &n); - file->service_count = n; - file->services = _upb_ServiceDefs_New(ctx, n, services); - - /* Now that all names are in the table, build layouts and resolve refs. */ - for (i = 0; i < (size_t)file->top_lvl_ext_count; i++) { - resolve_fielddef(ctx, file->package, (upb_FieldDef*)&file->top_lvl_exts[i]); - } - - for (i = 0; i < (size_t)file->top_lvl_msg_count; i++) { - resolve_msgdef(ctx, (upb_MessageDef*)&file->top_lvl_msgs[i]); - } - - if (file->ext_count) { - bool ok = _upb_extreg_add(ctx->symtab->extreg, file->ext_layouts, - file->ext_count); - if (!ok) _upb_DefBuilder_OomErr(ctx); - } -} - -static void remove_filedef(upb_DefPool* s, upb_FileDef* file) { - intptr_t iter = UPB_INTTABLE_BEGIN; - upb_StringView key; - upb_value val; - while (upb_strtable_next2(&s->syms, &key, &val, &iter)) { - const upb_FileDef* f; - switch (deftype(val)) { - case UPB_DEFTYPE_EXT: - f = upb_FieldDef_File(_upb_DefUtil_Unpack(val, UPB_DEFTYPE_EXT)); - break; - case UPB_DEFTYPE_MSG: - f = upb_MessageDef_File(_upb_DefUtil_Unpack(val, UPB_DEFTYPE_MSG)); - break; - case UPB_DEFTYPE_ENUM: - f = upb_EnumDef_File(_upb_DefUtil_Unpack(val, UPB_DEFTYPE_ENUM)); - break; - case UPB_DEFTYPE_ENUMVAL: - f = upb_EnumDef_File(upb_EnumValueDef_Enum( - _upb_DefUtil_Unpack(val, UPB_DEFTYPE_ENUMVAL))); - break; - case UPB_DEFTYPE_SERVICE: - f = upb_ServiceDef_File(_upb_DefUtil_Unpack(val, UPB_DEFTYPE_SERVICE)); - break; - default: - UPB_UNREACHABLE(); - } - - if (f == file) upb_strtable_removeiter(&s->syms, &iter); - } -} - -static const upb_FileDef* _upb_DefPool_AddFile( - upb_DefPool* s, const google_protobuf_FileDescriptorProto* file_proto, - const upb_MiniTable_File* layout, upb_Status* status) { - const upb_StringView name = google_protobuf_FileDescriptorProto_name(file_proto); - - // Determine whether we already know about this file. - { - upb_value v; - if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) { - upb_Status_SetErrorFormat(status, "duplicate file name (%.*s)", - UPB_STRINGVIEW_ARGS(name)); - return NULL; - } - } - - upb_DefBuilder ctx = { - .symtab = s, - .layout = layout, - .msg_count = 0, - .enum_count = 0, - .ext_count = 0, - .status = status, - .file = NULL, - .arena = upb_Arena_New(), - .tmp_arena = upb_Arena_New(), - }; - - if (UPB_SETJMP(ctx.err)) { - UPB_ASSERT(!upb_Status_IsOk(status)); - if (ctx.file) { - remove_filedef(s, ctx.file); - ctx.file = NULL; - } - } else if (!ctx.arena || !ctx.tmp_arena) { - _upb_DefBuilder_OomErr(&ctx); - } else { - _upb_FileDef_Create(&ctx, file_proto); - upb_strtable_insert(&s->files, name.data, name.size, - upb_value_constptr(ctx.file), ctx.arena); - UPB_ASSERT(upb_Status_IsOk(status)); - upb_Arena_Fuse(s->arena, ctx.arena); - } - - if (ctx.arena) upb_Arena_Free(ctx.arena); - if (ctx.tmp_arena) upb_Arena_Free(ctx.tmp_arena); - return ctx.file; -} - -const upb_FileDef* upb_DefPool_AddFile( - upb_DefPool* s, const google_protobuf_FileDescriptorProto* file_proto, - upb_Status* status) { - return _upb_DefPool_AddFile(s, file_proto, NULL, status); -} - -/* Include here since we want most of this file to be stdio-free. */ -#include - -bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, - bool rebuild_minitable) { - /* Since this function should never fail (it would indicate a bug in upb) we - * print errors to stderr instead of returning error status to the user. */ - _upb_DefPool_Init** deps = init->deps; - google_protobuf_FileDescriptorProto* file; - upb_Arena* arena; - upb_Status status; - - upb_Status_Clear(&status); - - if (upb_DefPool_FindFileByName(s, init->filename)) { - return true; - } - - arena = upb_Arena_New(); - - for (; *deps; deps++) { - if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err; - } - - file = google_protobuf_FileDescriptorProto_parse_ex( - init->descriptor.data, init->descriptor.size, NULL, - kUpb_DecodeOption_AliasString, arena); - s->bytes_loaded += init->descriptor.size; - - if (!file) { - upb_Status_SetErrorFormat( - &status, - "Failed to parse compiled-in descriptor for file '%s'. This should " - "never happen.", - init->filename); - goto err; - } - - const upb_MiniTable_File* mt = rebuild_minitable ? NULL : init->layout; - if (!_upb_DefPool_AddFile(s, file, mt, &status)) { - goto err; - } - - upb_Arena_Free(arena); - return true; - -err: - fprintf(stderr, - "Error loading compiled-in descriptor for file '%s' (this should " - "never happen): %s\n", - init->filename, upb_Status_ErrorMessage(&status)); - upb_Arena_Free(arena); - return false; -} - -size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) { - return s->bytes_loaded; -} - -upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; } - -const upb_FieldDef* _upb_DefPool_FindExtensionByMiniTable( - const upb_DefPool* s, const upb_MiniTable_Extension* ext) { - upb_value v; - bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v); - UPB_ASSERT(ok); - return upb_value_getconstptr(v); -} - -const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s, - const upb_MessageDef* m, - int32_t fieldnum) { - const upb_MiniTable* l = upb_MessageDef_MiniTable(m); - const upb_MiniTable_Extension* ext = _upb_extreg_get(s->extreg, l, fieldnum); - return ext ? _upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL; -} - -const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry( - const upb_DefPool* s) { - return s->extreg; -} - -const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s, - const upb_MessageDef* m, - size_t* count) { - size_t n = 0; - intptr_t iter = UPB_INTTABLE_BEGIN; - uintptr_t key; - upb_value val; - // This is O(all exts) instead of O(exts for m). If we need this to be - // efficient we may need to make extreg into a two-level table, or have a - // second per-message index. - while (upb_inttable_next2(&s->exts, &key, &val, &iter)) { - const upb_FieldDef* f = upb_value_getconstptr(val); - if (upb_FieldDef_ContainingType(f) == m) n++; - } - const upb_FieldDef** exts = malloc(n * sizeof(*exts)); - iter = UPB_INTTABLE_BEGIN; - size_t i = 0; - while (upb_inttable_next2(&s->exts, &key, &val, &iter)) { - const upb_FieldDef* f = upb_value_getconstptr(val); - if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f; - } - *count = n; - return exts; -} diff --git a/upb/def.h b/upb/def.h index 5fd5fb703b..164b91b072 100644 --- a/upb/def.h +++ b/upb/def.h @@ -25,395 +25,11 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +// This header is deprecated, use upb/reflection/def.h instead + #ifndef UPB_DEF_H_ #define UPB_DEF_H_ -#include "google/protobuf/descriptor.upb.h" -#include "upb/internal/table.h" -#include "upb/upb.h" - -// Must be last. -#include "upb/port_def.inc" - -#ifdef __cplusplus -extern "C" { -#endif - -struct upb_EnumDef; -typedef struct upb_EnumDef upb_EnumDef; -struct upb_EnumValueDef; -typedef struct upb_EnumValueDef upb_EnumValueDef; -struct upb_ExtensionRange; -typedef struct upb_ExtensionRange upb_ExtensionRange; -struct upb_FieldDef; -typedef struct upb_FieldDef upb_FieldDef; -struct upb_FileDef; -typedef struct upb_FileDef upb_FileDef; -struct upb_MethodDef; -typedef struct upb_MethodDef upb_MethodDef; -struct upb_MessageDef; -typedef struct upb_MessageDef upb_MessageDef; -struct upb_OneofDef; -typedef struct upb_OneofDef upb_OneofDef; -struct upb_ServiceDef; -typedef struct upb_ServiceDef upb_ServiceDef; -struct upb_streamdef; -typedef struct upb_streamdef upb_streamdef; -struct upb_DefPool; -typedef struct upb_DefPool upb_DefPool; - -typedef enum { kUpb_Syntax_Proto2 = 2, kUpb_Syntax_Proto3 = 3 } upb_Syntax; - -/* All the different kind of well known type messages. For simplicity of check, - * number wrappers and string wrappers are grouped together. Make sure the - * order and merber of these groups are not changed. - */ -typedef enum { - kUpb_WellKnown_Unspecified, - kUpb_WellKnown_Any, - kUpb_WellKnown_FieldMask, - kUpb_WellKnown_Duration, - kUpb_WellKnown_Timestamp, - /* number wrappers */ - kUpb_WellKnown_DoubleValue, - kUpb_WellKnown_FloatValue, - kUpb_WellKnown_Int64Value, - kUpb_WellKnown_UInt64Value, - kUpb_WellKnown_Int32Value, - kUpb_WellKnown_UInt32Value, - /* string wrappers */ - kUpb_WellKnown_StringValue, - kUpb_WellKnown_BytesValue, - kUpb_WellKnown_BoolValue, - kUpb_WellKnown_Value, - kUpb_WellKnown_ListValue, - kUpb_WellKnown_Struct -} upb_WellKnown; - -/* upb_FieldDef ***************************************************************/ - -/* Maximum field number allowed for FieldDefs. This is an inherent limit of the - * protobuf wire format. */ -#define kUpb_MaxFieldNumber ((1 << 29) - 1) - -const google_protobuf_FieldOptions* upb_FieldDef_Options(const upb_FieldDef* f); -bool upb_FieldDef_HasOptions(const upb_FieldDef* f); -const char* upb_FieldDef_FullName(const upb_FieldDef* f); -upb_CType upb_FieldDef_CType(const upb_FieldDef* f); -upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f); -upb_Label upb_FieldDef_Label(const upb_FieldDef* f); -uint32_t upb_FieldDef_Number(const upb_FieldDef* f); -const char* upb_FieldDef_Name(const upb_FieldDef* f); -const char* upb_FieldDef_JsonName(const upb_FieldDef* f); -bool upb_FieldDef_HasJsonName(const upb_FieldDef* f); -bool upb_FieldDef_IsExtension(const upb_FieldDef* f); -bool upb_FieldDef_IsPacked(const upb_FieldDef* f); -const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f); -const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f); -const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f); -const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f); -const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f); -uint32_t upb_FieldDef_Index(const upb_FieldDef* f); -bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f); -bool upb_FieldDef_IsString(const upb_FieldDef* f); -bool upb_FieldDef_IsOptional(const upb_FieldDef* f); -bool upb_FieldDef_IsRequired(const upb_FieldDef* f); -bool upb_FieldDef_IsRepeated(const upb_FieldDef* f); -bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f); -bool upb_FieldDef_IsMap(const upb_FieldDef* f); -bool upb_FieldDef_HasDefault(const upb_FieldDef* f); -bool upb_FieldDef_HasSubDef(const upb_FieldDef* f); -bool upb_FieldDef_HasPresence(const upb_FieldDef* f); -const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f); -const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f); -const upb_MiniTable_Field* upb_FieldDef_MiniTable(const upb_FieldDef* f); -const upb_MiniTable_Extension* _upb_FieldDef_ExtensionMiniTable( - const upb_FieldDef* f); -bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f); - -/* upb_OneofDef ***************************************************************/ - -const google_protobuf_OneofOptions* upb_OneofDef_Options(const upb_OneofDef* o); -bool upb_OneofDef_HasOptions(const upb_OneofDef* o); -const char* upb_OneofDef_Name(const upb_OneofDef* o); -const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o); -uint32_t upb_OneofDef_Index(const upb_OneofDef* o); -bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o); -int upb_OneofDef_FieldCount(const upb_OneofDef* o); -const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i); - -/* Oneof lookups: - * - ntof: look up a field by name. - * - ntofz: look up a field by name (as a null-terminated string). - * - itof: look up a field by number. */ -const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o, - const char* name, - size_t length); -UPB_INLINE const upb_FieldDef* upb_OneofDef_LookupName(const upb_OneofDef* o, - const char* name) { - return upb_OneofDef_LookupNameWithSize(o, name, strlen(name)); -} -const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o, - uint32_t num); - -/* upb_MessageDef *************************************************************/ - -/* Well-known field tag numbers for map-entry messages. */ -#define kUpb_MapEntry_KeyFieldNumber 1 -#define kUpb_MapEntry_ValueFieldNumber 2 - -/* Well-known field tag numbers for Any messages. */ -#define kUpb_Any_TypeFieldNumber 1 -#define kUpb_Any_ValueFieldNumber 2 - -/* Well-known field tag numbers for duration messages. */ -#define kUpb_Duration_SecondsFieldNumber 1 -#define kUpb_Duration_NanosFieldNumber 2 - -/* Well-known field tag numbers for timestamp messages. */ -#define kUpb_Timestamp_SecondsFieldNumber 1 -#define kUpb_Timestamp_NanosFieldNumber 2 - -const google_protobuf_MessageOptions* upb_MessageDef_Options( - const upb_MessageDef* m); -bool upb_MessageDef_HasOptions(const upb_MessageDef* m); -const char* upb_MessageDef_FullName(const upb_MessageDef* m); -const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m); -const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m); -const char* upb_MessageDef_Name(const upb_MessageDef* m); -upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m); -upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m); -int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m); -int upb_MessageDef_FieldCount(const upb_MessageDef* m); -int upb_MessageDef_OneofCount(const upb_MessageDef* m); -const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m, - int i); -const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i); -const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i); -const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, - uint32_t i); -const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize( - const upb_MessageDef* m, const char* name, size_t len); -const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize( - const upb_MessageDef* m, const char* name, size_t len); -const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m); - -UPB_INLINE const upb_OneofDef* upb_MessageDef_FindOneofByName( - const upb_MessageDef* m, const char* name) { - return upb_MessageDef_FindOneofByNameWithSize(m, name, strlen(name)); -} - -UPB_INLINE const upb_FieldDef* upb_MessageDef_FindFieldByName( - const upb_MessageDef* m, const char* name) { - return upb_MessageDef_FindFieldByNameWithSize(m, name, strlen(name)); -} - -UPB_INLINE bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m) { - return google_protobuf_MessageOptions_map_entry(upb_MessageDef_Options(m)); -} - -UPB_INLINE bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) { - return google_protobuf_MessageOptions_message_set_wire_format( - upb_MessageDef_Options(m)); -} - -/* Nested entities. */ -int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m); -int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m); -int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m); -const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m, - int i); -const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i); -const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m, - int i); - -/* Lookup of either field or oneof by name. Returns whether either was found. - * If the return is true, then the found def will be set, and the non-found - * one set to NULL. */ -bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m, - const char* name, size_t len, - const upb_FieldDef** f, - const upb_OneofDef** o); - -UPB_INLINE bool upb_MessageDef_FindByName(const upb_MessageDef* m, - const char* name, - const upb_FieldDef** f, - const upb_OneofDef** o) { - return upb_MessageDef_FindByNameWithSize(m, name, strlen(name), f, o); -} - -/* Returns a field by either JSON name or regular proto name. */ -const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize( - const upb_MessageDef* m, const char* name, size_t len); -UPB_INLINE const upb_FieldDef* upb_MessageDef_FindByJsonName( - const upb_MessageDef* m, const char* name) { - return upb_MessageDef_FindByJsonNameWithSize(m, name, strlen(name)); -} - -/* upb_ExtensionRange *********************************************************/ - -const google_protobuf_ExtensionRangeOptions* upb_ExtensionRange_Options( - const upb_ExtensionRange* r); -bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r); -int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* r); -int32_t upb_ExtensionRange_End(const upb_ExtensionRange* r); - -/* upb_EnumDef ****************************************************************/ - -const google_protobuf_EnumOptions* upb_EnumDef_Options(const upb_EnumDef* e); -bool upb_EnumDef_HasOptions(const upb_EnumDef* e); -const char* upb_EnumDef_FullName(const upb_EnumDef* e); -const char* upb_EnumDef_Name(const upb_EnumDef* e); -const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e); -const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e); -int32_t upb_EnumDef_Default(const upb_EnumDef* e); -int upb_EnumDef_ValueCount(const upb_EnumDef* e); -const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i); - -const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize( - const upb_EnumDef* e, const char* name, size_t len); -const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* e, - int32_t num); -bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num); - -// Convenience wrapper. -UPB_INLINE const upb_EnumValueDef* upb_EnumDef_FindValueByName( - const upb_EnumDef* e, const char* name) { - return upb_EnumDef_FindValueByNameWithSize(e, name, strlen(name)); -} - -// Builds and returns a mini descriptor, or NULL if OOM. -const char* _upb_EnumDef_MiniDescriptor(const upb_EnumDef* e, upb_Arena* a); - -/* upb_EnumValueDef ***********************************************************/ - -const google_protobuf_EnumValueOptions* upb_EnumValueDef_Options( - const upb_EnumValueDef* e); -bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* e); -const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* e); -const char* upb_EnumValueDef_Name(const upb_EnumValueDef* e); -int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* e); -uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* e); -const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* e); - -/* upb_FileDef ****************************************************************/ - -const google_protobuf_FileOptions* upb_FileDef_Options(const upb_FileDef* f); -bool upb_FileDef_HasOptions(const upb_FileDef* f); -const char* upb_FileDef_Name(const upb_FileDef* f); -const char* upb_FileDef_Package(const upb_FileDef* f); -upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f); -int upb_FileDef_DependencyCount(const upb_FileDef* f); -int upb_FileDef_PublicDependencyCount(const upb_FileDef* f); -int upb_FileDef_WeakDependencyCount(const upb_FileDef* f); -int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f); -int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f); -int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f); -int upb_FileDef_ServiceCount(const upb_FileDef* f); -const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i); -const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i); -const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i); -const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i); -const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i); -const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i); -const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i); -const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f); -const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f); -const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f); - -/* upb_MethodDef **************************************************************/ - -const google_protobuf_MethodOptions* upb_MethodDef_Options( - const upb_MethodDef* m); -bool upb_MethodDef_HasOptions(const upb_MethodDef* m); -const char* upb_MethodDef_FullName(const upb_MethodDef* m); -int upb_MethodDef_Index(const upb_MethodDef* m); -const char* upb_MethodDef_Name(const upb_MethodDef* m); -const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m); -const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m); -const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m); -bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m); -bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m); - -/* upb_ServiceDef *************************************************************/ - -const google_protobuf_ServiceOptions* upb_ServiceDef_Options( - const upb_ServiceDef* s); -bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s); -const char* upb_ServiceDef_FullName(const upb_ServiceDef* s); -const char* upb_ServiceDef_Name(const upb_ServiceDef* s); -int upb_ServiceDef_Index(const upb_ServiceDef* s); -const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s); -int upb_ServiceDef_MethodCount(const upb_ServiceDef* s); -const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i); -const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s, - const char* name); - -/* upb_DefPool ****************************************************************/ - -upb_DefPool* upb_DefPool_New(void); -void upb_DefPool_Free(upb_DefPool* s); -const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, - const char* sym); -const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( - const upb_DefPool* s, const char* sym, size_t len); -const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, - const char* sym); -const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, - const char* sym); -const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s, - const char* sym); -const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize( - const upb_DefPool* s, const char* sym, size_t len); -const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, - const char* name); -const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s, - const char* name); -const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize( - const upb_DefPool* s, const char* name, size_t size); -const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s, - const char* name); -const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s, - const char* name, - size_t len); -const upb_FileDef* upb_DefPool_AddFile( - upb_DefPool* s, const google_protobuf_FileDescriptorProto* file, - upb_Status* status); -size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s); -upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s); -const upb_FieldDef* _upb_DefPool_FindExtensionByMiniTable( - const upb_DefPool* s, const upb_MiniTable_Extension* ext); -const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s, - const upb_MessageDef* m, - int32_t fieldnum); -const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry( - const upb_DefPool* s); -const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s, - const upb_MessageDef* m, - size_t* count); - -/* For generated code only: loads a generated descriptor. */ -typedef struct _upb_DefPool_Init { - struct _upb_DefPool_Init** deps; /* Dependencies of this file. */ - const upb_MiniTable_File* layout; - const char* filename; - upb_StringView descriptor; /* Serialized descriptor. */ -} _upb_DefPool_Init; - -// Should only be directly called by tests. This variant lets us suppress -// the use of compiled-in tables, forcing a rebuild of the tables at runtime. -bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, - bool rebuild_minitable); - -UPB_INLINE bool _upb_DefPool_LoadDefInit(upb_DefPool* s, - const _upb_DefPool_Init* init) { - return _upb_DefPool_LoadDefInitEx(s, init, false); -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#include "upb/port_undef.inc" +#include "upb/reflection/def.h" #endif /* UPB_DEF_H_ */ diff --git a/upb/def.hpp b/upb/def.hpp index 49cc560816..4a62909ab2 100644 --- a/upb/def.hpp +++ b/upb/def.hpp @@ -23,419 +23,11 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// This header is deprecated, use upb/reflection/def.hpp instead + #ifndef UPB_DEF_HPP_ #define UPB_DEF_HPP_ -#include -#include -#include -#include - -#include "upb/def.h" -#include "upb/reflection.h" -#include "upb/upb.hpp" - -namespace upb { - -typedef upb_MessageValue MessageValue; - -class EnumDefPtr; -class FileDefPtr; -class MessageDefPtr; -class OneofDefPtr; - -// A upb::FieldDefPtr describes a single field in a message. It is most often -// found as a part of a upb_MessageDef, but can also stand alone to represent -// an extension. -class FieldDefPtr { - public: - FieldDefPtr() : ptr_(nullptr) {} - explicit FieldDefPtr(const upb_FieldDef* ptr) : ptr_(ptr) {} - - const upb_FieldDef* ptr() const { return ptr_; } - explicit operator bool() const { return ptr_ != nullptr; } - - typedef upb_CType Type; - typedef upb_Label Label; - typedef upb_FieldType DescriptorType; - - const char* full_name() const { return upb_FieldDef_FullName(ptr_); } - - Type type() const { return upb_FieldDef_CType(ptr_); } - Label label() const { return upb_FieldDef_Label(ptr_); } - const char* name() const { return upb_FieldDef_Name(ptr_); } - const char* json_name() const { return upb_FieldDef_JsonName(ptr_); } - uint32_t number() const { return upb_FieldDef_Number(ptr_); } - bool is_extension() const { return upb_FieldDef_IsExtension(ptr_); } - - // For non-string, non-submessage fields, this indicates whether binary - // protobufs are encoded in packed or non-packed format. - // - // Note: this accessor reflects the fact that "packed" has different defaults - // depending on whether the proto is proto2 or proto3. - bool packed() const { return upb_FieldDef_IsPacked(ptr_); } - - // An integer that can be used as an index into an array of fields for - // whatever message this field belongs to. Guaranteed to be less than - // f->containing_type()->field_count(). May only be accessed once the def has - // been finalized. - uint32_t index() const { return upb_FieldDef_Index(ptr_); } - - // The MessageDef to which this field belongs. - // - // If this field has been added to a MessageDef, that message can be retrieved - // directly (this is always the case for frozen FieldDefs). - // - // If the field has not yet been added to a MessageDef, you can set the name - // of the containing type symbolically instead. This is mostly useful for - // extensions, where the extension is declared separately from the message. - MessageDefPtr containing_type() const; - - // The OneofDef to which this field belongs, or NULL if this field is not part - // of a oneof. - OneofDefPtr containing_oneof() const; - - // The field's type according to the enum in descriptor.proto. This is not - // the same as UPB_TYPE_*, because it distinguishes between (for example) - // INT32 and SINT32, whereas our "type" enum does not. This return of - // descriptor_type() is a function of type(), integer_format(), and - // is_tag_delimited(). - DescriptorType descriptor_type() const { return upb_FieldDef_Type(ptr_); } - - // Convenient field type tests. - bool IsSubMessage() const { return upb_FieldDef_IsSubMessage(ptr_); } - bool IsString() const { return upb_FieldDef_IsString(ptr_); } - bool IsSequence() const { return upb_FieldDef_IsRepeated(ptr_); } - bool IsPrimitive() const { return upb_FieldDef_IsPrimitive(ptr_); } - bool IsMap() const { return upb_FieldDef_IsMap(ptr_); } - - MessageValue default_value() const { return upb_FieldDef_Default(ptr_); } - - // Returns the enum or submessage def for this field, if any. The field's - // type must match (ie. you may only call enum_subdef() for fields where - // type() == kUpb_CType_Enum). - EnumDefPtr enum_subdef() const; - MessageDefPtr message_subdef() const; - - private: - const upb_FieldDef* ptr_; -}; - -// Class that represents a oneof. -class OneofDefPtr { - public: - OneofDefPtr() : ptr_(nullptr) {} - explicit OneofDefPtr(const upb_OneofDef* ptr) : ptr_(ptr) {} - - const upb_OneofDef* ptr() const { return ptr_; } - explicit operator bool() const { return ptr_ != nullptr; } - - // Returns the MessageDef that contains this OneofDef. - MessageDefPtr containing_type() const; - - // Returns the name of this oneof. - const char* name() const { return upb_OneofDef_Name(ptr_); } - - // Returns the number of fields in the oneof. - int field_count() const { return upb_OneofDef_FieldCount(ptr_); } - FieldDefPtr field(int i) const { - return FieldDefPtr(upb_OneofDef_Field(ptr_, i)); - } - - // Looks up by name. - FieldDefPtr FindFieldByName(const char* name, size_t len) const { - return FieldDefPtr(upb_OneofDef_LookupNameWithSize(ptr_, name, len)); - } - FieldDefPtr FindFieldByName(const char* name) const { - return FieldDefPtr(upb_OneofDef_LookupName(ptr_, name)); - } - - template - FieldDefPtr FindFieldByName(const T& str) const { - return FindFieldByName(str.c_str(), str.size()); - } - - // Looks up by tag number. - FieldDefPtr FindFieldByNumber(uint32_t num) const { - return FieldDefPtr(upb_OneofDef_LookupNumber(ptr_, num)); - } - - private: - const upb_OneofDef* ptr_; -}; - -// Structure that describes a single .proto message type. -class MessageDefPtr { - public: - MessageDefPtr() : ptr_(nullptr) {} - explicit MessageDefPtr(const upb_MessageDef* ptr) : ptr_(ptr) {} - - const upb_MessageDef* ptr() const { return ptr_; } - explicit operator bool() const { return ptr_ != nullptr; } - - FileDefPtr file() const; - - const char* full_name() const { return upb_MessageDef_FullName(ptr_); } - const char* name() const { return upb_MessageDef_Name(ptr_); } - - // The number of fields that belong to the MessageDef. - int field_count() const { return upb_MessageDef_FieldCount(ptr_); } - FieldDefPtr field(int i) const { - return FieldDefPtr(upb_MessageDef_Field(ptr_, i)); - } - - // The number of oneofs that belong to the MessageDef. - int oneof_count() const { return upb_MessageDef_OneofCount(ptr_); } - OneofDefPtr oneof(int i) const { - return OneofDefPtr(upb_MessageDef_Oneof(ptr_, i)); - } - - upb_Syntax syntax() const { return upb_MessageDef_Syntax(ptr_); } - - // These return null pointers if the field is not found. - FieldDefPtr FindFieldByNumber(uint32_t number) const { - return FieldDefPtr(upb_MessageDef_FindFieldByNumber(ptr_, number)); - } - FieldDefPtr FindFieldByName(const char* name, size_t len) const { - return FieldDefPtr(upb_MessageDef_FindFieldByNameWithSize(ptr_, name, len)); - } - FieldDefPtr FindFieldByName(const char* name) const { - return FieldDefPtr(upb_MessageDef_FindFieldByName(ptr_, name)); - } - - template - FieldDefPtr FindFieldByName(const T& str) const { - return FindFieldByName(str.c_str(), str.size()); - } - - OneofDefPtr FindOneofByName(const char* name, size_t len) const { - return OneofDefPtr(upb_MessageDef_FindOneofByNameWithSize(ptr_, name, len)); - } - - OneofDefPtr FindOneofByName(const char* name) const { - return OneofDefPtr(upb_MessageDef_FindOneofByName(ptr_, name)); - } - - template - OneofDefPtr FindOneofByName(const T& str) const { - return FindOneofByName(str.c_str(), str.size()); - } - - // Is this message a map entry? - bool mapentry() const { return upb_MessageDef_IsMapEntry(ptr_); } - - // Return the type of well known type message. kUpb_WellKnown_Unspecified for - // non-well-known message. - upb_WellKnown wellknowntype() const { - return upb_MessageDef_WellKnownType(ptr_); - } - - private: - class FieldIter { - public: - explicit FieldIter(const upb_MessageDef* m, int i) : m_(m), i_(i) {} - void operator++() { i_++; } - - FieldDefPtr operator*() { - return FieldDefPtr(upb_MessageDef_Field(m_, i_)); - } - bool operator!=(const FieldIter& other) { return i_ != other.i_; } - bool operator==(const FieldIter& other) { return i_ == other.i_; } - - private: - const upb_MessageDef* m_; - int i_; - }; - - class FieldAccessor { - public: - explicit FieldAccessor(const upb_MessageDef* md) : md_(md) {} - FieldIter begin() { return FieldIter(md_, 0); } - FieldIter end() { return FieldIter(md_, upb_MessageDef_FieldCount(md_)); } - - private: - const upb_MessageDef* md_; - }; - - class OneofIter { - public: - explicit OneofIter(const upb_MessageDef* m, int i) : m_(m), i_(i) {} - void operator++() { i_++; } - - OneofDefPtr operator*() { - return OneofDefPtr(upb_MessageDef_Oneof(m_, i_)); - } - bool operator!=(const OneofIter& other) { return i_ != other.i_; } - bool operator==(const OneofIter& other) { return i_ == other.i_; } - - private: - const upb_MessageDef* m_; - int i_; - }; - - class OneofAccessor { - public: - explicit OneofAccessor(const upb_MessageDef* md) : md_(md) {} - OneofIter begin() { return OneofIter(md_, 0); } - OneofIter end() { return OneofIter(md_, upb_MessageDef_OneofCount(md_)); } - - private: - const upb_MessageDef* md_; - }; - - public: - FieldAccessor fields() const { return FieldAccessor(ptr()); } - OneofAccessor oneofs() const { return OneofAccessor(ptr()); } - - private: - const upb_MessageDef* ptr_; -}; - -class EnumValDefPtr { - public: - EnumValDefPtr() : ptr_(nullptr) {} - explicit EnumValDefPtr(const upb_EnumValueDef* ptr) : ptr_(ptr) {} - - int32_t number() const { return upb_EnumValueDef_Number(ptr_); } - const char* full_name() const { return upb_EnumValueDef_FullName(ptr_); } - const char* name() const { return upb_EnumValueDef_Name(ptr_); } - - private: - const upb_EnumValueDef* ptr_; -}; - -class EnumDefPtr { - public: - EnumDefPtr() : ptr_(nullptr) {} - explicit EnumDefPtr(const upb_EnumDef* ptr) : ptr_(ptr) {} - - const upb_EnumDef* ptr() const { return ptr_; } - explicit operator bool() const { return ptr_ != nullptr; } - - const char* full_name() const { return upb_EnumDef_FullName(ptr_); } - const char* name() const { return upb_EnumDef_Name(ptr_); } - - // The value that is used as the default when no field default is specified. - // If not set explicitly, the first value that was added will be used. - // The default value must be a member of the enum. - // Requires that value_count() > 0. - int32_t default_value() const { return upb_EnumDef_Default(ptr_); } - - // Returns the number of values currently defined in the enum. Note that - // multiple names can refer to the same number, so this may be greater than - // the total number of unique numbers. - int value_count() const { return upb_EnumDef_ValueCount(ptr_); } - - // Lookups from name to integer, returning true if found. - EnumValDefPtr FindValueByName(const char* name) const { - return EnumValDefPtr(upb_EnumDef_FindValueByName(ptr_, name)); - } - - // Finds the name corresponding to the given number, or NULL if none was - // found. If more than one name corresponds to this number, returns the - // first one that was added. - EnumValDefPtr FindValueByNumber(int32_t num) const { - return EnumValDefPtr(upb_EnumDef_FindValueByNumber(ptr_, num)); - } - - private: - const upb_EnumDef* ptr_; -}; - -// Class that represents a .proto file with some things defined in it. -// -// Many users won't care about FileDefs, but they are necessary if you want to -// read the values of file-level options. -class FileDefPtr { - public: - explicit FileDefPtr(const upb_FileDef* ptr) : ptr_(ptr) {} - - const upb_FileDef* ptr() const { return ptr_; } - explicit operator bool() const { return ptr_ != nullptr; } - - // Get/set name of the file (eg. "foo/bar.proto"). - const char* name() const { return upb_FileDef_Name(ptr_); } - - // Package name for definitions inside the file (eg. "foo.bar"). - const char* package() const { return upb_FileDef_Package(ptr_); } - - // Syntax for the file. Defaults to proto2. - upb_Syntax syntax() const { return upb_FileDef_Syntax(ptr_); } - - // Get the list of dependencies from the file. These are returned in the - // order that they were added to the FileDefPtr. - int dependency_count() const { return upb_FileDef_DependencyCount(ptr_); } - const FileDefPtr dependency(int index) const { - return FileDefPtr(upb_FileDef_Dependency(ptr_, index)); - } - - private: - const upb_FileDef* ptr_; -}; - -// Non-const methods in upb::DefPool are NOT thread-safe. -class DefPool { - public: - DefPool() : ptr_(upb_DefPool_New(), upb_DefPool_Free) {} - explicit DefPool(upb_DefPool* s) : ptr_(s, upb_DefPool_Free) {} - - const upb_DefPool* ptr() const { return ptr_.get(); } - upb_DefPool* ptr() { return ptr_.get(); } - - // Finds an entry in the symbol table with this exact name. If not found, - // returns NULL. - MessageDefPtr FindMessageByName(const char* sym) const { - return MessageDefPtr(upb_DefPool_FindMessageByName(ptr_.get(), sym)); - } - - EnumDefPtr FindEnumByName(const char* sym) const { - return EnumDefPtr(upb_DefPool_FindEnumByName(ptr_.get(), sym)); - } - - FileDefPtr FindFileByName(const char* name) const { - return FileDefPtr(upb_DefPool_FindFileByName(ptr_.get(), name)); - } - - // TODO: iteration? - - // Adds the given serialized FileDescriptorProto to the pool. - FileDefPtr AddFile(const google_protobuf_FileDescriptorProto* file_proto, - Status* status) { - return FileDefPtr( - upb_DefPool_AddFile(ptr_.get(), file_proto, status->ptr())); - } - - private: - std::unique_ptr ptr_; -}; - -// TODO(b/236632406): This typedef is deprecated. Delete it. -using SymbolTable = DefPool; - -inline FileDefPtr MessageDefPtr::file() const { - return FileDefPtr(upb_MessageDef_File(ptr_)); -} - -inline MessageDefPtr FieldDefPtr::message_subdef() const { - return MessageDefPtr(upb_FieldDef_MessageSubDef(ptr_)); -} - -inline MessageDefPtr FieldDefPtr::containing_type() const { - return MessageDefPtr(upb_FieldDef_ContainingType(ptr_)); -} - -inline MessageDefPtr OneofDefPtr::containing_type() const { - return MessageDefPtr(upb_OneofDef_ContainingType(ptr_)); -} - -inline OneofDefPtr FieldDefPtr::containing_oneof() const { - return OneofDefPtr(upb_FieldDef_ContainingOneof(ptr_)); -} - -inline EnumDefPtr FieldDefPtr::enum_subdef() const { - return EnumDefPtr(upb_FieldDef_EnumSubDef(ptr_)); -} - -} // namespace upb +#include "upb/reflection/def.hpp" #endif // UPB_DEF_HPP_ diff --git a/upb/mini_table_accessors.c b/upb/mini_table_accessors.c index 4c892e0e9e..69847a96a3 100644 --- a/upb/mini_table_accessors.c +++ b/upb/mini_table_accessors.c @@ -186,7 +186,6 @@ upb_GetExtension_Status upb_MiniTable_GetOrPromoteExtension( int field_number = ext_table->field.number; upb_FindUnknownRet result = upb_MiniTable_FindUnknown(msg, field_number); if (result.status != kUpb_FindUnknown_Ok) { - UPB_ASSERT(result.status != kUpb_GetExtension_ParseError); return kUpb_GetExtension_NotPresent; } // Decode and promote from unknown. @@ -197,7 +196,7 @@ upb_GetExtension_Status upb_MiniTable_GetOrPromoteExtension( } const char* data = result.ptr; uint32_t tag; - uint64_t message_len; + uint64_t message_len = 0; data = decode_tag(data, &tag); data = decode_varint64(data, &message_len); upb_DecodeStatus status = @@ -245,7 +244,6 @@ upb_GetExtensionAsBytes_Status upb_MiniTable_GetExtensionAsBytes( int field_number = ext_table->field.number; upb_FindUnknownRet result = upb_MiniTable_FindUnknown(msg, field_number); if (result.status != kUpb_FindUnknown_Ok) { - UPB_ASSERT(result.status != kUpb_GetExtension_ParseError); return kUpb_GetExtensionAsBytes_NotPresent; } const char* data = result.ptr; @@ -328,7 +326,7 @@ upb_FindUnknownRet upb_MiniTable_FindUnknown(const upb_Message* msg, uint64_t uint64_val; while (ptr < end) { - uint32_t tag; + uint32_t tag = 0; int field; int wire_type; const char* unknown_begin = ptr; diff --git a/upb/mini_table_accessors_test.cc b/upb/mini_table_accessors_test.cc index 7f66a94ba7..3b57c7a8fa 100644 --- a/upb/mini_table_accessors_test.cc +++ b/upb/mini_table_accessors_test.cc @@ -446,10 +446,9 @@ TEST(GeneratedCode, Extensions) { // Get unknown extension bytes before promotion. const char* extension_data; size_t len; - upb_GetExtensionAsBytes_Status status = status = - upb_MiniTable_GetExtensionAsBytes(base_msg, - &upb_test_ModelExtension2_model_ext_ext, - 0, arena, &extension_data, &len); + upb_GetExtensionAsBytes_Status status = upb_MiniTable_GetExtensionAsBytes( + base_msg, &upb_test_ModelExtension2_model_ext_ext, 0, arena, + &extension_data, &len); EXPECT_EQ(kUpb_GetExtensionAsBytes_Ok, status); EXPECT_EQ(0x48, extension_data[0]); EXPECT_EQ(5, extension_data[1]); diff --git a/upb/reflection.h b/upb/reflection.h index 1fea8c6972..7b751378fa 100644 --- a/upb/reflection.h +++ b/upb/reflection.h @@ -25,87 +25,11 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +// This header is deprecated, use upb/reflection/message.h instead + #ifndef UPB_REFLECTION_H_ #define UPB_REFLECTION_H_ -#include "upb/def.h" -#include "upb/message_value.h" -#include "upb/msg.h" -#include "upb/upb.h" - -// Must be last. -#include "upb/port_def.inc" - -#ifdef __cplusplus -extern "C" { -#endif - -upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f); - -/** upb_Message - * *******************************************************************/ - -/* Creates a new message of the given type in the given arena. */ -upb_Message* upb_Message_New(const upb_MessageDef* m, upb_Arena* a); - -/* Returns the value associated with this field. */ -upb_MessageValue upb_Message_Get(const upb_Message* msg, const upb_FieldDef* f); - -/* Returns a mutable pointer to a map, array, or submessage value. If the given - * arena is non-NULL this will construct a new object if it was not previously - * present. May not be called for primitive fields. */ -upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg, - const upb_FieldDef* f, - upb_Arena* a); - -/* May only be called for fields where upb_FieldDef_HasPresence(f) == true. */ -bool upb_Message_Has(const upb_Message* msg, const upb_FieldDef* f); - -/* Returns the field that is set in the oneof, or NULL if none are set. */ -const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg, - const upb_OneofDef* o); - -/* Sets the given field to the given value. For a msg/array/map/string, the - * caller must ensure that the target data outlives |msg| (by living either in - * the same arena or a different arena that outlives it). - * - * Returns false if allocation fails. */ -bool upb_Message_Set(upb_Message* msg, const upb_FieldDef* f, - upb_MessageValue val, upb_Arena* a); - -/* Clears any field presence and sets the value back to its default. */ -void upb_Message_ClearField(upb_Message* msg, const upb_FieldDef* f); - -/* Clear all data and unknown fields. */ -void upb_Message_Clear(upb_Message* msg, const upb_MessageDef* m); - -/* Iterate over present fields. - * - * size_t iter = kUpb_Message_Begin; - * const upb_FieldDef *f; - * upb_MessageValue val; - * while (upb_Message_Next(msg, m, ext_pool, &f, &val, &iter)) { - * process_field(f, val); - * } - * - * If ext_pool is NULL, no extensions will be returned. If the given symtab - * returns extensions that don't match what is in this message, those extensions - * will be skipped. - */ - -#define kUpb_Message_Begin -1 -bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m, - const upb_DefPool* ext_pool, const upb_FieldDef** f, - upb_MessageValue* val, size_t* iter); - -/* Clears all unknown field data from this message and all submessages. */ -bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, - int maxdepth); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#include "upb/port_undef.inc" +#include "upb/reflection/message.h" #endif /* UPB_REFLECTION_H_ */ diff --git a/upb/reflection.hpp b/upb/reflection.hpp index b653893c0d..a2443fb6d4 100644 --- a/upb/reflection.hpp +++ b/upb/reflection.hpp @@ -23,15 +23,11 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// This header is deprecated, use upb/reflection/message.hpp instead + #ifndef UPB_REFLECTION_HPP_ #define UPB_REFLECTION_HPP_ -#include "upb/reflection.h" - -namespace upb { - -typedef upb_MessageValue MessageValue; - -} // namespace upb +#include "upb/reflection/message.hpp" #endif // UPB_REFLECTION_HPP_ diff --git a/upb/reflection/common.h b/upb/reflection/common.h new file mode 100644 index 0000000000..04b98a620e --- /dev/null +++ b/upb/reflection/common.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// IWYU pragma: private, include "third_party/upb/upb/reflection/def.h" + +// Declarations common to all public def types. + +#ifndef UPB_REFLECTION_COMMON_H_ +#define UPB_REFLECTION_COMMON_H_ + +#include "google/protobuf/descriptor.upb.h" + +typedef enum { kUpb_Syntax_Proto2 = 2, kUpb_Syntax_Proto3 = 3 } upb_Syntax; + +// Forward declarations for circular references. +typedef struct upb_DefPool upb_DefPool; +typedef struct upb_EnumDef upb_EnumDef; +typedef struct upb_EnumValueDef upb_EnumValueDef; +typedef struct upb_ExtensionRange upb_ExtensionRange; +typedef struct upb_FieldDef upb_FieldDef; +typedef struct upb_FileDef upb_FileDef; +typedef struct upb_MessageDef upb_MessageDef; +typedef struct upb_MethodDef upb_MethodDef; +typedef struct upb_OneofDef upb_OneofDef; +typedef struct upb_ServiceDef upb_ServiceDef; + +// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// + +typedef struct upb_DefBuilder upb_DefBuilder; + +#endif /* UPB_REFLECTION_COMMON_H_ */ diff --git a/upb/mini_descriptor.c b/upb/reflection/def.h similarity index 71% rename from upb/mini_descriptor.c rename to upb/reflection/def.h index 321af1be5d..be04a9a596 100644 --- a/upb/mini_descriptor.c +++ b/upb/reflection/def.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2022, Google LLC + * Copyright (c) 2009-2021, Google LLC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,21 +25,18 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "upb/internal/mini_descriptor.h" +#ifndef UPB_REFLECTION_DEF_H_ +#define UPB_REFLECTION_DEF_H_ -// Must be last. -#include "upb/port_def.inc" +#include "upb/reflection/def_pool.h" +#include "upb/reflection/enum_def.h" +#include "upb/reflection/enum_value_def.h" +#include "upb/reflection/extension_range.h" +#include "upb/reflection/field_def.h" +#include "upb/reflection/file_def.h" +#include "upb/reflection/message_def.h" +#include "upb/reflection/method_def.h" +#include "upb/reflection/oneof_def.h" +#include "upb/reflection/service_def.h" -const char* upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, upb_Arena* a) { - return _upb_EnumDef_MiniDescriptor(e, a); -} - -const char* upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, - upb_Arena* a) { - return _upb_MiniDescriptor_EncodeField(f, a); -} - -const char* upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, - upb_Arena* a) { - return _upb_MiniDescriptor_EncodeMessage(m, a); -} +#endif /* UPB_REFLECTION_DEF_H_ */ diff --git a/upb/reflection/def.hpp b/upb/reflection/def.hpp new file mode 100644 index 0000000000..55dd3b9a85 --- /dev/null +++ b/upb/reflection/def.hpp @@ -0,0 +1,441 @@ +// Copyright (c) 2009-2021, Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Google LLC nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef UPB_REFLECTION_DEF_HPP_ +#define UPB_REFLECTION_DEF_HPP_ + +#include +#include +#include +#include + +#include "upb/reflection/def.h" +#include "upb/reflection/message.h" +#include "upb/upb.hpp" + +namespace upb { + +typedef upb_MessageValue MessageValue; + +class EnumDefPtr; +class FileDefPtr; +class MessageDefPtr; +class OneofDefPtr; + +// A upb::FieldDefPtr describes a single field in a message. It is most often +// found as a part of a upb_MessageDef, but can also stand alone to represent +// an extension. +class FieldDefPtr { + public: + FieldDefPtr() : ptr_(nullptr) {} + explicit FieldDefPtr(const upb_FieldDef* ptr) : ptr_(ptr) {} + + const upb_FieldDef* ptr() const { return ptr_; } + explicit operator bool() const { return ptr_ != nullptr; } + + typedef upb_CType Type; + typedef upb_Label Label; + typedef upb_FieldType DescriptorType; + + const char* full_name() const { return upb_FieldDef_FullName(ptr_); } + + Type type() const { return upb_FieldDef_CType(ptr_); } + Label label() const { return upb_FieldDef_Label(ptr_); } + const char* name() const { return upb_FieldDef_Name(ptr_); } + const char* json_name() const { return upb_FieldDef_JsonName(ptr_); } + uint32_t number() const { return upb_FieldDef_Number(ptr_); } + bool is_extension() const { return upb_FieldDef_IsExtension(ptr_); } + + // For non-string, non-submessage fields, this indicates whether binary + // protobufs are encoded in packed or non-packed format. + // + // Note: this accessor reflects the fact that "packed" has different defaults + // depending on whether the proto is proto2 or proto3. + bool packed() const { return upb_FieldDef_IsPacked(ptr_); } + + // An integer that can be used as an index into an array of fields for + // whatever message this field belongs to. Guaranteed to be less than + // f->containing_type()->field_count(). May only be accessed once the def has + // been finalized. + uint32_t index() const { return upb_FieldDef_Index(ptr_); } + + // The MessageDef to which this field belongs. + // + // If this field has been added to a MessageDef, that message can be retrieved + // directly (this is always the case for frozen FieldDefs). + // + // If the field has not yet been added to a MessageDef, you can set the name + // of the containing type symbolically instead. This is mostly useful for + // extensions, where the extension is declared separately from the message. + MessageDefPtr containing_type() const; + + // The OneofDef to which this field belongs, or NULL if this field is not part + // of a oneof. + OneofDefPtr containing_oneof() const; + + // The field's type according to the enum in descriptor.proto. This is not + // the same as UPB_TYPE_*, because it distinguishes between (for example) + // INT32 and SINT32, whereas our "type" enum does not. This return of + // descriptor_type() is a function of type(), integer_format(), and + // is_tag_delimited(). + DescriptorType descriptor_type() const { return upb_FieldDef_Type(ptr_); } + + // Convenient field type tests. + bool IsSubMessage() const { return upb_FieldDef_IsSubMessage(ptr_); } + bool IsString() const { return upb_FieldDef_IsString(ptr_); } + bool IsSequence() const { return upb_FieldDef_IsRepeated(ptr_); } + bool IsPrimitive() const { return upb_FieldDef_IsPrimitive(ptr_); } + bool IsMap() const { return upb_FieldDef_IsMap(ptr_); } + + MessageValue default_value() const { return upb_FieldDef_Default(ptr_); } + + // Returns the enum or submessage def for this field, if any. The field's + // type must match (ie. you may only call enum_subdef() for fields where + // type() == kUpb_CType_Enum). + EnumDefPtr enum_subdef() const; + MessageDefPtr message_subdef() const; + + private: + const upb_FieldDef* ptr_; +}; + +// Class that represents a oneof. +class OneofDefPtr { + public: + OneofDefPtr() : ptr_(nullptr) {} + explicit OneofDefPtr(const upb_OneofDef* ptr) : ptr_(ptr) {} + + const upb_OneofDef* ptr() const { return ptr_; } + explicit operator bool() const { return ptr_ != nullptr; } + + // Returns the MessageDef that contains this OneofDef. + MessageDefPtr containing_type() const; + + // Returns the name of this oneof. + const char* name() const { return upb_OneofDef_Name(ptr_); } + + // Returns the number of fields in the oneof. + int field_count() const { return upb_OneofDef_FieldCount(ptr_); } + FieldDefPtr field(int i) const { + return FieldDefPtr(upb_OneofDef_Field(ptr_, i)); + } + + // Looks up by name. + FieldDefPtr FindFieldByName(const char* name, size_t len) const { + return FieldDefPtr(upb_OneofDef_LookupNameWithSize(ptr_, name, len)); + } + FieldDefPtr FindFieldByName(const char* name) const { + return FieldDefPtr(upb_OneofDef_LookupName(ptr_, name)); + } + + template + FieldDefPtr FindFieldByName(const T& str) const { + return FindFieldByName(str.c_str(), str.size()); + } + + // Looks up by tag number. + FieldDefPtr FindFieldByNumber(uint32_t num) const { + return FieldDefPtr(upb_OneofDef_LookupNumber(ptr_, num)); + } + + private: + const upb_OneofDef* ptr_; +}; + +// Structure that describes a single .proto message type. +class MessageDefPtr { + public: + MessageDefPtr() : ptr_(nullptr) {} + explicit MessageDefPtr(const upb_MessageDef* ptr) : ptr_(ptr) {} + + const upb_MessageDef* ptr() const { return ptr_; } + explicit operator bool() const { return ptr_ != nullptr; } + + FileDefPtr file() const; + + const char* full_name() const { return upb_MessageDef_FullName(ptr_); } + const char* name() const { return upb_MessageDef_Name(ptr_); } + + // The number of fields that belong to the MessageDef. + int field_count() const { return upb_MessageDef_FieldCount(ptr_); } + FieldDefPtr field(int i) const { + return FieldDefPtr(upb_MessageDef_Field(ptr_, i)); + } + + // The number of oneofs that belong to the MessageDef. + int oneof_count() const { return upb_MessageDef_OneofCount(ptr_); } + OneofDefPtr oneof(int i) const { + return OneofDefPtr(upb_MessageDef_Oneof(ptr_, i)); + } + + upb_Syntax syntax() const { return upb_MessageDef_Syntax(ptr_); } + + // These return null pointers if the field is not found. + FieldDefPtr FindFieldByNumber(uint32_t number) const { + return FieldDefPtr(upb_MessageDef_FindFieldByNumber(ptr_, number)); + } + FieldDefPtr FindFieldByName(const char* name, size_t len) const { + return FieldDefPtr(upb_MessageDef_FindFieldByNameWithSize(ptr_, name, len)); + } + FieldDefPtr FindFieldByName(const char* name) const { + return FieldDefPtr(upb_MessageDef_FindFieldByName(ptr_, name)); + } + + template + FieldDefPtr FindFieldByName(const T& str) const { + return FindFieldByName(str.c_str(), str.size()); + } + + OneofDefPtr FindOneofByName(const char* name, size_t len) const { + return OneofDefPtr(upb_MessageDef_FindOneofByNameWithSize(ptr_, name, len)); + } + + OneofDefPtr FindOneofByName(const char* name) const { + return OneofDefPtr(upb_MessageDef_FindOneofByName(ptr_, name)); + } + + template + OneofDefPtr FindOneofByName(const T& str) const { + return FindOneofByName(str.c_str(), str.size()); + } + + // Is this message a map entry? + bool mapentry() const { return upb_MessageDef_IsMapEntry(ptr_); } + + // Return the type of well known type message. kUpb_WellKnown_Unspecified for + // non-well-known message. + upb_WellKnown wellknowntype() const { + return upb_MessageDef_WellKnownType(ptr_); + } + + private: + class FieldIter { + public: + explicit FieldIter(const upb_MessageDef* m, int i) : m_(m), i_(i) {} + void operator++() { i_++; } + + FieldDefPtr operator*() { + return FieldDefPtr(upb_MessageDef_Field(m_, i_)); + } + bool operator!=(const FieldIter& other) { return i_ != other.i_; } + bool operator==(const FieldIter& other) { return i_ == other.i_; } + + private: + const upb_MessageDef* m_; + int i_; + }; + + class FieldAccessor { + public: + explicit FieldAccessor(const upb_MessageDef* md) : md_(md) {} + FieldIter begin() { return FieldIter(md_, 0); } + FieldIter end() { return FieldIter(md_, upb_MessageDef_FieldCount(md_)); } + + private: + const upb_MessageDef* md_; + }; + + class OneofIter { + public: + explicit OneofIter(const upb_MessageDef* m, int i) : m_(m), i_(i) {} + void operator++() { i_++; } + + OneofDefPtr operator*() { + return OneofDefPtr(upb_MessageDef_Oneof(m_, i_)); + } + bool operator!=(const OneofIter& other) { return i_ != other.i_; } + bool operator==(const OneofIter& other) { return i_ == other.i_; } + + private: + const upb_MessageDef* m_; + int i_; + }; + + class OneofAccessor { + public: + explicit OneofAccessor(const upb_MessageDef* md) : md_(md) {} + OneofIter begin() { return OneofIter(md_, 0); } + OneofIter end() { return OneofIter(md_, upb_MessageDef_OneofCount(md_)); } + + private: + const upb_MessageDef* md_; + }; + + public: + FieldAccessor fields() const { return FieldAccessor(ptr()); } + OneofAccessor oneofs() const { return OneofAccessor(ptr()); } + + private: + const upb_MessageDef* ptr_; +}; + +class EnumValDefPtr { + public: + EnumValDefPtr() : ptr_(nullptr) {} + explicit EnumValDefPtr(const upb_EnumValueDef* ptr) : ptr_(ptr) {} + + int32_t number() const { return upb_EnumValueDef_Number(ptr_); } + const char* full_name() const { return upb_EnumValueDef_FullName(ptr_); } + const char* name() const { return upb_EnumValueDef_Name(ptr_); } + + private: + const upb_EnumValueDef* ptr_; +}; + +class EnumDefPtr { + public: + EnumDefPtr() : ptr_(nullptr) {} + explicit EnumDefPtr(const upb_EnumDef* ptr) : ptr_(ptr) {} + + const upb_EnumDef* ptr() const { return ptr_; } + explicit operator bool() const { return ptr_ != nullptr; } + + const char* full_name() const { return upb_EnumDef_FullName(ptr_); } + const char* name() const { return upb_EnumDef_Name(ptr_); } + + // The value that is used as the default when no field default is specified. + // If not set explicitly, the first value that was added will be used. + // The default value must be a member of the enum. + // Requires that value_count() > 0. + int32_t default_value() const { return upb_EnumDef_Default(ptr_); } + + // Returns the number of values currently defined in the enum. Note that + // multiple names can refer to the same number, so this may be greater than + // the total number of unique numbers. + int value_count() const { return upb_EnumDef_ValueCount(ptr_); } + + // Lookups from name to integer, returning true if found. + EnumValDefPtr FindValueByName(const char* name) const { + return EnumValDefPtr(upb_EnumDef_FindValueByName(ptr_, name)); + } + + // Finds the name corresponding to the given number, or NULL if none was + // found. If more than one name corresponds to this number, returns the + // first one that was added. + EnumValDefPtr FindValueByNumber(int32_t num) const { + return EnumValDefPtr(upb_EnumDef_FindValueByNumber(ptr_, num)); + } + + private: + const upb_EnumDef* ptr_; +}; + +// Class that represents a .proto file with some things defined in it. +// +// Many users won't care about FileDefs, but they are necessary if you want to +// read the values of file-level options. +class FileDefPtr { + public: + explicit FileDefPtr(const upb_FileDef* ptr) : ptr_(ptr) {} + + const upb_FileDef* ptr() const { return ptr_; } + explicit operator bool() const { return ptr_ != nullptr; } + + // Get/set name of the file (eg. "foo/bar.proto"). + const char* name() const { return upb_FileDef_Name(ptr_); } + + // Package name for definitions inside the file (eg. "foo.bar"). + const char* package() const { return upb_FileDef_Package(ptr_); } + + // Syntax for the file. Defaults to proto2. + upb_Syntax syntax() const { return upb_FileDef_Syntax(ptr_); } + + // Get the list of dependencies from the file. These are returned in the + // order that they were added to the FileDefPtr. + int dependency_count() const { return upb_FileDef_DependencyCount(ptr_); } + const FileDefPtr dependency(int index) const { + return FileDefPtr(upb_FileDef_Dependency(ptr_, index)); + } + + private: + const upb_FileDef* ptr_; +}; + +// Non-const methods in upb::DefPool are NOT thread-safe. +class DefPool { + public: + DefPool() : ptr_(upb_DefPool_New(), upb_DefPool_Free) {} + explicit DefPool(upb_DefPool* s) : ptr_(s, upb_DefPool_Free) {} + + const upb_DefPool* ptr() const { return ptr_.get(); } + upb_DefPool* ptr() { return ptr_.get(); } + + // Finds an entry in the symbol table with this exact name. If not found, + // returns NULL. + MessageDefPtr FindMessageByName(const char* sym) const { + return MessageDefPtr(upb_DefPool_FindMessageByName(ptr_.get(), sym)); + } + + EnumDefPtr FindEnumByName(const char* sym) const { + return EnumDefPtr(upb_DefPool_FindEnumByName(ptr_.get(), sym)); + } + + FileDefPtr FindFileByName(const char* name) const { + return FileDefPtr(upb_DefPool_FindFileByName(ptr_.get(), name)); + } + + // TODO: iteration? + + // Adds the given serialized FileDescriptorProto to the pool. + FileDefPtr AddFile(const google_protobuf_FileDescriptorProto* file_proto, + Status* status) { + return FileDefPtr( + upb_DefPool_AddFile(ptr_.get(), file_proto, status->ptr())); + } + + private: + std::unique_ptr ptr_; +}; + +// TODO(b/236632406): This typedef is deprecated. Delete it. +using SymbolTable = DefPool; + +inline FileDefPtr MessageDefPtr::file() const { + return FileDefPtr(upb_MessageDef_File(ptr_)); +} + +inline MessageDefPtr FieldDefPtr::message_subdef() const { + return MessageDefPtr(upb_FieldDef_MessageSubDef(ptr_)); +} + +inline MessageDefPtr FieldDefPtr::containing_type() const { + return MessageDefPtr(upb_FieldDef_ContainingType(ptr_)); +} + +inline MessageDefPtr OneofDefPtr::containing_type() const { + return MessageDefPtr(upb_OneofDef_ContainingType(ptr_)); +} + +inline OneofDefPtr FieldDefPtr::containing_oneof() const { + return OneofDefPtr(upb_FieldDef_ContainingOneof(ptr_)); +} + +inline EnumDefPtr FieldDefPtr::enum_subdef() const { + return EnumDefPtr(upb_FieldDef_EnumSubDef(ptr_)); +} + +} // namespace upb + +#endif // UPB_REFLECTION_DEF_HPP_ diff --git a/upb/reflection/def_builder.c b/upb/reflection/def_builder.c new file mode 100644 index 0000000000..8ca6404778 --- /dev/null +++ b/upb/reflection/def_builder.c @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/reflection/def_builder.h" + +#include + +#include "upb/reflection/def_pool.h" +#include "upb/reflection/def_type.h" +#include "upb/reflection/field_def.h" + +// Must be last. +#include "upb/port_def.inc" + +/* The upb core does not generally have a concept of default instances. However + * for descriptor options we make an exception since the max size is known and + * modest (<200 bytes). All types can share a default instance since it is + * initialized to zeroes. + * + * We have to allocate an extra pointer for upb's internal metadata. */ +static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; +const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; + +const char* _upb_DefBuilder_FullToShort(const char* fullname) { + const char* p; + + if (fullname == NULL) { + return NULL; + } else if ((p = strrchr(fullname, '.')) == NULL) { + /* No '.' in the name, return the full string. */ + return fullname; + } else { + /* Return one past the last '.'. */ + return p + 1; + } +} + +void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { + va_list argp; + va_start(argp, fmt); + upb_Status_VSetErrorFormat(ctx->status, fmt, argp); + va_end(argp); + UPB_LONGJMP(ctx->err, 1); +} + +void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { + upb_Status_SetErrorMessage(ctx->status, "out of memory"); + UPB_LONGJMP(ctx->err, 1); +} + +const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, + const char* prefix, + upb_StringView name) { + if (prefix) { + // ret = prefix + '.' + name; + size_t n = strlen(prefix); + char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); + strcpy(ret, prefix); + ret[n] = '.'; + memcpy(&ret[n + 1], name.data, name.size); + ret[n + 1 + name.size] = '\0'; + return ret; + } else { + char* ret = upb_strdup2(name.data, name.size, ctx->arena); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; + } +} + +static bool remove_component(char* base, size_t* len) { + if (*len == 0) return false; + + for (size_t i = *len - 1; i > 0; i--) { + if (base[i] == '.') { + *len = i; + return true; + } + } + + *len = 0; + return true; +} + +const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, + const char* from_name_dbg, + const char* base, upb_StringView sym, + upb_deftype_t* type) { + if (sym.size == 0) goto notfound; + upb_value v; + if (sym.data[0] == '.') { + /* Symbols starting with '.' are absolute, so we do a single lookup. + * Slice to omit the leading '.' */ + if (!_upb_DefPool_LookupAny2(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { + goto notfound; + } + } else { + /* Remove components from base until we find an entry or run out. */ + size_t baselen = base ? strlen(base) : 0; + char* tmp = malloc(sym.size + baselen + 1); + while (1) { + char* p = tmp; + if (baselen) { + memcpy(p, base, baselen); + p[baselen] = '.'; + p += baselen + 1; + } + memcpy(p, sym.data, sym.size); + p += sym.size; + if (_upb_DefPool_LookupAny2(ctx->symtab, tmp, p - tmp, &v)) { + break; + } + if (!remove_component(tmp, &baselen)) { + free(tmp); + goto notfound; + } + } + free(tmp); + } + + *type = _upb_DefType_Type(v); + return _upb_DefType_Unpack(v, *type); + +notfound: + _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", + UPB_STRINGVIEW_ARGS(sym)); +} + +const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, + const char* from_name_dbg, const char* base, + upb_StringView sym, upb_deftype_t type) { + upb_deftype_t found_type; + const void* ret = + _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); + if (ret && found_type != type) { + _upb_DefBuilder_Errf(ctx, + "type mismatch when resolving %s: couldn't find " + "name " UPB_STRINGVIEW_FORMAT " with type=%d", + from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); + } + return ret; +} + +// Per ASCII this will lower-case a letter. If the result is a letter, the +// input was definitely a letter. If the output is not a letter, this may +// have transformed the character unpredictably. +static char upb_ascii_lower(char ch) { return ch | 0x20; } + +// isalpha() etc. from are locale-dependent, which we don't want. +static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { + return low <= c && c <= high; +} + +static bool upb_isletter(char c) { + char lower = upb_ascii_lower(c); + return upb_isbetween(lower, 'a', 'z') || c == '_'; +} + +static bool upb_isalphanum(char c) { + return upb_isletter(c) || upb_isbetween(c, '0', '9'); +} + +static bool TryGetChar(const char** src, const char* end, char* ch) { + if (*src == end) return false; + *ch = **src; + *src += 1; + return true; +} + +static char TryGetHexDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '9') { + return ch - '0'; + } + ch = upb_ascii_lower(ch); + if ('a' <= ch && ch <= 'f') { + return ch - 'a' + 0xa; + } + *src -= 1; // Char wasn't actually a hex digit. + return -1; +} + +static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + char hex_digit = TryGetHexDigit(src, end); + if (hex_digit < 0) { + _upb_DefBuilder_Errf( + ctx, "\\x cannot be followed by non-hex digit in field '%s' default", + upb_FieldDef_FullName(f)); + return 0; + } + unsigned int ret = hex_digit; + while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { + ret = (ret << 4) | hex_digit; + } + if (ret > 0xff) { + _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", + upb_FieldDef_FullName(f)); + return 0; + } + return ret; +} + +static char TryGetOctalDigit(const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) return -1; + if ('0' <= ch && ch <= '7') { + return ch - '0'; + } + *src -= 1; // Char wasn't actually an octal digit. + return -1; +} + +static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, + const upb_FieldDef* f, + const char** src, const char* end) { + char ch = 0; + for (int i = 0; i < 3; i++) { + char digit; + if ((digit = TryGetOctalDigit(src, end)) >= 0) { + ch = (ch << 3) | digit; + } + } + return ch; +} + +char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char** src, const char* end) { + char ch; + if (!TryGetChar(src, end, &ch)) { + _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", + upb_FieldDef_FullName(f)); + return 0; + } + switch (ch) { + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + case '\\': + return '\\'; + case '\'': + return '\''; + case '\"': + return '\"'; + case '?': + return '\?'; + case 'x': + case 'X': + return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + *src -= 1; + return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); + } + _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); +} + +void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, + bool full) { + const char* str = name.data; + const size_t len = name.size; + bool start = true; + for (size_t i = 0; i < len; i++) { + const char c = str[i]; + if (c == '.') { + if (start || !full) { + _upb_DefBuilder_Errf( + ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = true; + } else if (start) { + if (!upb_isletter(c)) { + _upb_DefBuilder_Errf(ctx, + "invalid name: path components must start with a " + "letter (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + start = false; + } else if (!upb_isalphanum(c)) { + _upb_DefBuilder_Errf( + ctx, + "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT + ")", + UPB_STRINGVIEW_ARGS(name)); + } + } + if (start) { + _upb_DefBuilder_Errf(ctx, + "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", + UPB_STRINGVIEW_ARGS(name)); + } + + // We should never reach this point. + UPB_ASSERT(false); +} diff --git a/upb/reflection/def_builder.h b/upb/reflection/def_builder.h new file mode 100644 index 0000000000..4b7358c77a --- /dev/null +++ b/upb/reflection/def_builder.h @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_REFLECTION_DEF_BUILDER_H_ +#define UPB_REFLECTION_DEF_BUILDER_H_ + +#include "upb/reflection/common.h" +#include "upb/reflection/def_pool.h" +#include "upb/reflection/def_type.h" + +// Must be last. +#include "upb/port_def.inc" + +// We want to copy the options verbatim into the destination options proto. +// We use serialize+parse as our deep copy. +#define UBP_DEF_SET_OPTIONS(target, desc_type, options_type, proto) \ + if (google_protobuf_##desc_type##_has_options(proto)) { \ + size_t size; \ + char* pb = google_protobuf_##options_type##_serialize( \ + google_protobuf_##desc_type##_options(proto), ctx->tmp_arena, &size); \ + if (!pb) _upb_DefBuilder_OomErr(ctx); \ + target = \ + google_protobuf_##options_type##_parse(pb, size, _upb_DefBuilder_Arena(ctx)); \ + if (!target) _upb_DefBuilder_OomErr(ctx); \ + } else { \ + target = (const google_protobuf_##options_type*)kUpbDefOptDefault; \ + } + +#ifdef __cplusplus +extern "C" { +#endif + +struct upb_DefBuilder { + upb_DefPool* symtab; + upb_FileDef* file; // File we are building. + upb_Arena* arena; // Allocate defs here. + upb_Arena* tmp_arena; // For temporary allocations. + upb_Status* status; // Record errors here. + const upb_MiniTable_File* layout; // NULL if we should build layouts. + int enum_count; // Count of enums built so far. + int msg_count; // Count of messages built so far. + int ext_count; // Count of extensions built so far. + jmp_buf err; // longjmp() on error. +}; + +extern const char* kUpbDefOptDefault; + +UPB_NORETURN void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, + ...) UPB_PRINTF(2, 3); +UPB_NORETURN void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx); + +const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, + const char* prefix, + upb_StringView name); + +// Given a symbol and the base symbol inside which it is defined, +// find the symbol's definition. +const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, + const char* from_name_dbg, + const char* base, upb_StringView sym, + upb_deftype_t* type); + +const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, + const char* from_name_dbg, const char* base, + upb_StringView sym, upb_deftype_t type); + +char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char** src, const char* end); + +const char* _upb_DefBuilder_FullToShort(const char* fullname); + +UPB_INLINE void* _upb_DefBuilder_Alloc(upb_DefBuilder* ctx, size_t bytes) { + if (bytes == 0) return NULL; + void* ret = upb_Arena_Malloc(ctx->arena, bytes); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; +} + +// Adds a symbol |v| to the symtab, which must be a def pointer previously +// packed with pack_def(). The def's pointer to upb_FileDef* must be set before +// adding, so we know which entries to remove if building this file fails. +UPB_INLINE void _upb_DefBuilder_Add(upb_DefBuilder* ctx, const char* name, + upb_value v) { + // TODO: table should support an operation "tryinsert" to avoid the double + // lookup. + if (_upb_DefPool_Contains(ctx->symtab, name)) { + _upb_DefBuilder_Errf(ctx, "duplicate symbol '%s'", name); + } + bool ok = _upb_DefPool_Insert(ctx->symtab, name, v); + if (!ok) _upb_DefBuilder_OomErr(ctx); +} + +UPB_INLINE upb_Arena* _upb_DefBuilder_Arena(const upb_DefBuilder* ctx) { + return ctx->arena; +} + +UPB_INLINE upb_FileDef* _upb_DefBuilder_File(const upb_DefBuilder* ctx) { + return ctx->file; +} + +// This version of CheckIdent() is only called by other, faster versions after +// they detect a parsing error. +void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, + bool full); + +// Verify a relative identifier string. The loop is branchless for speed. +UPB_INLINE void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, + upb_StringView name) { + bool good = name.size > 0; + + for (size_t i = 0; i < name.size; i++) { + const char c = name.data[i]; + const char d = c | 0x20; // force lowercase + const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); + const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); + + good &= is_alpha | is_numer; + } + + if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); +} + +// Verify a full identifier string. This is slightly more complicated than +// verifying a relative identifier string because we must track '.' chars. +UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx, + upb_StringView name) { + bool good = name.size > 0; + bool start = true; + + for (size_t i = 0; i < name.size; i++) { + const char c = name.data[i]; + const char d = c | 0x20; // force lowercase + const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); + const bool is_numer = ('0' <= c) & (c <= '9') & !start; + const bool is_dot = (c == '.') & !start; + + good &= is_alpha | is_numer | is_dot; + start = is_dot; + } + + if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, true); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_REFLECTION_DEF_BUILDER_H_ */ diff --git a/upb/reflection/def_builder_test.cc b/upb/reflection/def_builder_test.cc new file mode 100644 index 0000000000..113e574e33 --- /dev/null +++ b/upb/reflection/def_builder_test.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2009-2022, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/reflection/def_builder.h" + +#include "gtest/gtest.h" +#include "upb/reflection/def.hpp" + +// Must be last. +#include "upb/port_def.inc" + +struct IdentTest { + const char* text; + bool ok; +}; + +static const std::vector FullIdentTests = { + {"foo.bar", true}, {"foo.", true}, {"foo", true}, + + {"foo.7bar", false}, {".foo", false}, {"#", false}, + {".", false}, {"", false}, +}; + +static const std::vector NotFullIdentTests = { + {"foo", true}, {"foo1", true}, + + {"foo.bar", false}, {"1foo", false}, {"#", false}, + {".", false}, {"", false}, +}; + +TEST(DefBuilder, TestIdents) { + upb_StringView sv; + upb_Status status; + upb_DefBuilder ctx; + ctx.status = &status; + upb_Status_Clear(&status); + + for (const auto& test : FullIdentTests) { + sv.data = test.text; + sv.size = strlen(test.text); + + if (UPB_SETJMP(ctx.err)) { + EXPECT_FALSE(test.ok); + } else { + _upb_DefBuilder_CheckIdentFull(&ctx, sv); + EXPECT_TRUE(test.ok); + } + } + + for (const auto& test : NotFullIdentTests) { + sv.data = test.text; + sv.size = strlen(test.text); + + if (UPB_SETJMP(ctx.err)) { + EXPECT_FALSE(test.ok); + } else { + _upb_DefBuilder_CheckIdentNotFull(&ctx, sv); + EXPECT_TRUE(test.ok); + } + } +} diff --git a/upb/reflection/def_pool.c b/upb/reflection/def_pool.c new file mode 100644 index 0000000000..7ab375ec60 --- /dev/null +++ b/upb/reflection/def_pool.c @@ -0,0 +1,437 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/reflection/def_pool.h" + +#include + +#include "upb/reflection/def_builder.h" +#include "upb/reflection/def_type.h" +#include "upb/reflection/enum_def.h" +#include "upb/reflection/enum_value_def.h" +#include "upb/reflection/field_def.h" +#include "upb/reflection/file_def.h" +#include "upb/reflection/message_def.h" +#include "upb/reflection/service_def.h" + +// Must be last. +#include "upb/port_def.inc" + +struct upb_DefPool { + upb_Arena* arena; + upb_strtable syms; // full_name -> packed def ptr + upb_strtable files; // file_name -> (upb_FileDef*) + upb_inttable exts; // (upb_MiniTable_Extension*) -> (upb_FieldDef*) + upb_ExtensionRegistry* extreg; + size_t bytes_loaded; +}; + +void upb_DefPool_Free(upb_DefPool* s) { + upb_Arena_Free(s->arena); + upb_gfree(s); +} + +upb_DefPool* upb_DefPool_New(void) { + upb_DefPool* s = upb_gmalloc(sizeof(*s)); + if (!s) return NULL; + + s->arena = upb_Arena_New(); + s->bytes_loaded = 0; + + if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; + if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; + if (!upb_inttable_init(&s->exts, s->arena)) goto err; + + s->extreg = upb_ExtensionRegistry_New(s->arena); + if (!s->extreg) goto err; + + return s; + +err: + upb_Arena_Free(s->arena); + upb_gfree(s); + return NULL; +} + +bool _upb_DefPool_Contains(const upb_DefPool* s, const char* sym) { + return upb_strtable_lookup(&s->syms, sym, NULL); +} + +bool _upb_DefPool_Insert(upb_DefPool* s, const char* sym, upb_value v) { + return _upb_DefPool_Insert2(s, sym, strlen(sym), v); +} + +bool _upb_DefPool_Insert2(upb_DefPool* s, const char* sym, size_t size, + upb_value v) { + return upb_strtable_insert(&s->syms, sym, size, v, s->arena); +} + +bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTable_Extension* ext, + upb_FieldDef* f, upb_Arena* a) { + return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), + a); +} + +static const void* _upb_DefPool_Lookup(const upb_DefPool* s, const char* sym, + upb_deftype_t type) { + return _upb_DefPool_Lookup2(s, sym, strlen(sym), type); +} + +const void* _upb_DefPool_Lookup2(const upb_DefPool* s, const char* sym, + size_t size, upb_deftype_t type) { + upb_value v; + return upb_strtable_lookup2(&s->syms, sym, size, &v) + ? _upb_DefType_Unpack(v, type) + : NULL; +} + +bool _upb_DefPool_LookupAny2(const upb_DefPool* s, const char* sym, size_t size, + upb_value* v) { + return upb_strtable_lookup2(&s->syms, sym, size, v); +} + +upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { + return s->extreg; +} + +const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Lookup(s, sym, UPB_DEFTYPE_MSG); +} + +const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( + const upb_DefPool* s, const char* sym, size_t len) { + return _upb_DefPool_Lookup2(s, sym, len, UPB_DEFTYPE_MSG); +} + +const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Lookup(s, sym, UPB_DEFTYPE_ENUM); +} + +const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, + const char* sym) { + return _upb_DefPool_Lookup(s, sym, UPB_DEFTYPE_ENUMVAL); +} + +const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, + const char* name) { + upb_value v; + return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) + : NULL; +} + +const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s, + const char* name, + size_t len) { + upb_value v; + return upb_strtable_lookup2(&s->files, name, len, &v) + ? upb_value_getconstptr(v) + : NULL; +} + +const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize( + const upb_DefPool* s, const char* name, size_t size) { + upb_value v; + if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL; + + switch (_upb_DefType_Type(v)) { + case UPB_DEFTYPE_FIELD: + return _upb_DefType_Unpack(v, UPB_DEFTYPE_FIELD); + case UPB_DEFTYPE_MSG: { + const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); + return _upb_MessageDef_InMessageSet(m) + ? upb_MessageDef_NestedExtension(m, 0) + : NULL; + } + default: + break; + } + + return NULL; +} + +const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s, + const char* sym) { + return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym)); +} + +const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s, + const char* name) { + return _upb_DefPool_Lookup(s, name, UPB_DEFTYPE_SERVICE); +} + +const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize( + const upb_DefPool* s, const char* name, size_t size) { + return _upb_DefPool_Lookup2(s, name, size, UPB_DEFTYPE_SERVICE); +} + +const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s, + const char* name) { + upb_value v; + // TODO(haberman): non-extension fields and oneofs. + if (upb_strtable_lookup(&s->syms, name, &v)) { + switch (_upb_DefType_Type(v)) { + case UPB_DEFTYPE_EXT: { + const upb_FieldDef* f = _upb_DefType_Unpack(v, UPB_DEFTYPE_EXT); + return upb_FieldDef_File(f); + } + case UPB_DEFTYPE_MSG: { + const upb_MessageDef* m = _upb_DefType_Unpack(v, UPB_DEFTYPE_MSG); + return upb_MessageDef_File(m); + } + case UPB_DEFTYPE_ENUM: { + const upb_EnumDef* e = _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUM); + return upb_EnumDef_File(e); + } + case UPB_DEFTYPE_ENUMVAL: { + const upb_EnumValueDef* ev = + _upb_DefType_Unpack(v, UPB_DEFTYPE_ENUMVAL); + return upb_EnumDef_File(upb_EnumValueDef_Enum(ev)); + } + case UPB_DEFTYPE_SERVICE: { + const upb_ServiceDef* service = + _upb_DefType_Unpack(v, UPB_DEFTYPE_SERVICE); + return upb_ServiceDef_File(service); + } + default: + UPB_UNREACHABLE(); + } + } + + const char* last_dot = strrchr(name, '.'); + if (last_dot) { + const upb_MessageDef* parent = + upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name); + if (parent) { + const char* shortname = last_dot + 1; + if (upb_MessageDef_FindByNameWithSize(parent, shortname, + strlen(shortname), NULL, NULL)) { + return upb_MessageDef_File(parent); + } + } + } + + return NULL; +} + +static void remove_filedef(upb_DefPool* s, upb_FileDef* file) { + intptr_t iter = UPB_INTTABLE_BEGIN; + upb_StringView key; + upb_value val; + while (upb_strtable_next2(&s->syms, &key, &val, &iter)) { + const upb_FileDef* f; + switch (_upb_DefType_Type(val)) { + case UPB_DEFTYPE_EXT: + f = upb_FieldDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_EXT)); + break; + case UPB_DEFTYPE_MSG: + f = upb_MessageDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_MSG)); + break; + case UPB_DEFTYPE_ENUM: + f = upb_EnumDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_ENUM)); + break; + case UPB_DEFTYPE_ENUMVAL: + f = upb_EnumDef_File(upb_EnumValueDef_Enum( + _upb_DefType_Unpack(val, UPB_DEFTYPE_ENUMVAL))); + break; + case UPB_DEFTYPE_SERVICE: + f = upb_ServiceDef_File(_upb_DefType_Unpack(val, UPB_DEFTYPE_SERVICE)); + break; + default: + UPB_UNREACHABLE(); + } + + if (f == file) upb_strtable_removeiter(&s->syms, &iter); + } +} + +static const upb_FileDef* _upb_DefPool_AddFile( + upb_DefPool* s, const google_protobuf_FileDescriptorProto* file_proto, + const upb_MiniTable_File* layout, upb_Status* status) { + const upb_StringView name = google_protobuf_FileDescriptorProto_name(file_proto); + + // Determine whether we already know about this file. + { + upb_value v; + if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) { + upb_Status_SetErrorFormat(status, + "duplicate file name " UPB_STRINGVIEW_FORMAT, + UPB_STRINGVIEW_ARGS(name)); + return NULL; + } + } + + upb_DefBuilder ctx = { + .symtab = s, + .layout = layout, + .msg_count = 0, + .enum_count = 0, + .ext_count = 0, + .status = status, + .file = NULL, + .arena = upb_Arena_New(), + .tmp_arena = upb_Arena_New(), + }; + + if (UPB_SETJMP(ctx.err)) { + UPB_ASSERT(!upb_Status_IsOk(status)); + if (ctx.file) { + remove_filedef(s, ctx.file); + ctx.file = NULL; + } + } else if (!ctx.arena || !ctx.tmp_arena) { + _upb_DefBuilder_OomErr(&ctx); + } else { + _upb_FileDef_Create(&ctx, file_proto); + upb_strtable_insert(&s->files, name.data, name.size, + upb_value_constptr(ctx.file), ctx.arena); + UPB_ASSERT(upb_Status_IsOk(status)); + upb_Arena_Fuse(s->arena, ctx.arena); + } + + if (ctx.arena) upb_Arena_Free(ctx.arena); + if (ctx.tmp_arena) upb_Arena_Free(ctx.tmp_arena); + return ctx.file; +} + +const upb_FileDef* upb_DefPool_AddFile( + upb_DefPool* s, const google_protobuf_FileDescriptorProto* file_proto, + upb_Status* status) { + return _upb_DefPool_AddFile(s, file_proto, NULL, status); +} + +/* Include here since we want most of this file to be stdio-free. */ +#include + +bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, + bool rebuild_minitable) { + /* Since this function should never fail (it would indicate a bug in upb) we + * print errors to stderr instead of returning error status to the user. */ + _upb_DefPool_Init** deps = init->deps; + google_protobuf_FileDescriptorProto* file; + upb_Arena* arena; + upb_Status status; + + upb_Status_Clear(&status); + + if (upb_DefPool_FindFileByName(s, init->filename)) { + return true; + } + + arena = upb_Arena_New(); + + for (; *deps; deps++) { + if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err; + } + + file = google_protobuf_FileDescriptorProto_parse_ex( + init->descriptor.data, init->descriptor.size, NULL, + kUpb_DecodeOption_AliasString, arena); + s->bytes_loaded += init->descriptor.size; + + if (!file) { + upb_Status_SetErrorFormat( + &status, + "Failed to parse compiled-in descriptor for file '%s'. This should " + "never happen.", + init->filename); + goto err; + } + + const upb_MiniTable_File* mt = rebuild_minitable ? NULL : init->layout; + if (!_upb_DefPool_AddFile(s, file, mt, &status)) { + goto err; + } + + upb_Arena_Free(arena); + return true; + +err: + fprintf(stderr, + "Error loading compiled-in descriptor for file '%s' (this should " + "never happen): %s\n", + init->filename, upb_Status_ErrorMessage(&status)); + upb_Arena_Free(arena); + return false; +} + +size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) { + return s->bytes_loaded; +} + +upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; } + +const upb_FieldDef* _upb_DefPool_FindExtensionByMiniTable( + const upb_DefPool* s, const upb_MiniTable_Extension* ext) { + upb_value v; + bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v); + UPB_ASSERT(ok); + return upb_value_getconstptr(v); +} + +const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s, + const upb_MessageDef* m, + int32_t fieldnum) { + const upb_MiniTable* l = upb_MessageDef_MiniTable(m); + const upb_MiniTable_Extension* ext = _upb_extreg_get(s->extreg, l, fieldnum); + return ext ? _upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL; +} + +const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry( + const upb_DefPool* s) { + return s->extreg; +} + +const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s, + const upb_MessageDef* m, + size_t* count) { + size_t n = 0; + intptr_t iter = UPB_INTTABLE_BEGIN; + uintptr_t key; + upb_value val; + // This is O(all exts) instead of O(exts for m). If we need this to be + // efficient we may need to make extreg into a two-level table, or have a + // second per-message index. + while (upb_inttable_next2(&s->exts, &key, &val, &iter)) { + const upb_FieldDef* f = upb_value_getconstptr(val); + if (upb_FieldDef_ContainingType(f) == m) n++; + } + const upb_FieldDef** exts = malloc(n * sizeof(*exts)); + iter = UPB_INTTABLE_BEGIN; + size_t i = 0; + while (upb_inttable_next2(&s->exts, &key, &val, &iter)) { + const upb_FieldDef* f = upb_value_getconstptr(val); + if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f; + } + *count = n; + return exts; +} + +bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) { + return _upb_DefPool_LoadDefInitEx(s, init, false); +} diff --git a/upb/reflection/def_pool.h b/upb/reflection/def_pool.h new file mode 100644 index 0000000000..765053eaaa --- /dev/null +++ b/upb/reflection/def_pool.h @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// IWYU pragma: private, include "third_party/upb/upb/reflection/def.h" + +#ifndef UPB_REFLECTION_DEF_POOL_H_ +#define UPB_REFLECTION_DEF_POOL_H_ + +#include "upb/reflection/common.h" +#include "upb/reflection/def_type.h" +#include "upb/string_view.h" + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +void upb_DefPool_Free(upb_DefPool* s); + +upb_DefPool* upb_DefPool_New(void); + +const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, + const char* sym); + +const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( + const upb_DefPool* s, const char* sym, size_t len); + +const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, + const char* sym); + +const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, + const char* sym); + +const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, + const char* name); + +const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s, + const char* name, + size_t len); + +const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize( + const upb_DefPool* s, const char* name, size_t size); + +const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s, + const char* sym); + +const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s, + const char* name); + +const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize( + const upb_DefPool* s, const char* name, size_t size); + +const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s, + const char* name); + +const upb_FileDef* upb_DefPool_AddFile( + upb_DefPool* s, const google_protobuf_FileDescriptorProto* file_proto, + upb_Status* status); + +const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s, + const upb_MessageDef* m, + int32_t fieldnum); + +const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry( + const upb_DefPool* s); + +const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s, + const upb_MessageDef* m, + size_t* count); + +// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// + +// For generated code only: loads a generated descriptor. +typedef struct _upb_DefPool_Init { + struct _upb_DefPool_Init** deps; // Dependencies of this file. + const upb_MiniTable_File* layout; + const char* filename; + upb_StringView descriptor; // Serialized descriptor. +} _upb_DefPool_Init; + +upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s); +size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s); + +bool _upb_DefPool_Contains(const upb_DefPool* s, const char* sym); + +upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s); + +bool _upb_DefPool_Insert(upb_DefPool* s, const char* sym, upb_value v); +bool _upb_DefPool_Insert2(upb_DefPool* s, const char* sym, size_t size, + upb_value v); + +bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTable_Extension* ext, + upb_FieldDef* f, upb_Arena* a); + +const void* _upb_DefPool_Lookup2(const upb_DefPool* s, const char* sym, + size_t size, upb_deftype_t type); + +bool _upb_DefPool_LookupAny2(const upb_DefPool* s, const char* sym, size_t size, + upb_value* v); + +const upb_FieldDef* _upb_DefPool_FindExtensionByMiniTable( + const upb_DefPool* s, const upb_MiniTable_Extension* ext); + +// Should only be directly called by tests. This variant lets us suppress +// the use of compiled-in tables, forcing a rebuild of the tables at runtime. +bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, + bool rebuild_minitable); + +bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_REFLECTION_DEF_POOL_H_ */ diff --git a/upb/mini_descriptor.h b/upb/reflection/def_type.c similarity index 65% rename from upb/mini_descriptor.h rename to upb/reflection/def_type.c index 374794a011..033e7102f9 100644 --- a/upb/mini_descriptor.h +++ b/upb/reflection/def_type.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2022, Google LLC + * Copyright (c) 2009-2021, Google LLC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,33 +25,26 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef UPB_MINI_DESCRIPTOR_H_ -#define UPB_MINI_DESCRIPTOR_H_ - -#include "upb/arena.h" -#include "upb/def.h" +#include "upb/reflection/def_type.h" // Must be last. #include "upb/port_def.inc" -#ifdef __cplusplus -extern "C" { -#endif - -// Creates and returns a mini descriptor string for an enum, or NULL on error. -const char* upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, upb_Arena* a); - -// Creates and returns a mini descriptor string for a field, or NULL on error. -const char* upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a); - -// Creates and returns a mini descriptor string for a message, or NULL on error. -const char* upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, - upb_Arena* a); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#include "upb/port_undef.inc" - -#endif /* UPB_MINI_DESCRIPTOR_H_ */ +upb_deftype_t _upb_DefType_Type(upb_value v) { + const uintptr_t num = (uintptr_t)upb_value_getconstptr(v); + return num & UPB_DEFTYPE_MASK; +} + +upb_value _upb_DefType_Pack(const void* ptr, upb_deftype_t type) { + uintptr_t num = (uintptr_t)ptr; + UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0); + num |= type; + return upb_value_constptr((const void*)num); +} + +const void* _upb_DefType_Unpack(upb_value v, upb_deftype_t type) { + uintptr_t num = (uintptr_t)upb_value_getconstptr(v); + return (num & UPB_DEFTYPE_MASK) == type + ? (const void*)(num & ~UPB_DEFTYPE_MASK) + : NULL; +} diff --git a/upb/reflection/def_type.h b/upb/reflection/def_type.h new file mode 100644 index 0000000000..31ce3e263c --- /dev/null +++ b/upb/reflection/def_type.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_REFLECTION_DEF_TYPE_H_ +#define UPB_REFLECTION_DEF_TYPE_H_ + +#include "upb/internal/table.h" + +// Must be last. +#include "upb/port_def.inc" + +// Inside a symtab we store tagged pointers to specific def types. +typedef enum { + UPB_DEFTYPE_MASK = 7, + + // Only inside symtab table. + UPB_DEFTYPE_EXT = 0, + UPB_DEFTYPE_MSG = 1, + UPB_DEFTYPE_ENUM = 2, + UPB_DEFTYPE_ENUMVAL = 3, + UPB_DEFTYPE_SERVICE = 4, + + // Only inside message table. + UPB_DEFTYPE_FIELD = 0, + UPB_DEFTYPE_ONEOF = 1, + UPB_DEFTYPE_FIELD_JSONNAME = 2, +} upb_deftype_t; + +#ifdef __cplusplus +extern "C" { +#endif + +// Our 3-bit pointer tagging requires all pointers to be multiples of 8. +// The arena will always yield 8-byte-aligned addresses, however we put +// the defs into arrays. For each element in the array to be 8-byte-aligned, +// the sizes of each def type must also be a multiple of 8. +// +// If any of these asserts fail, we need to add or remove padding on 32-bit +// machines (64-bit machines will have 8-byte alignment already due to +// pointers, which all of these structs have). +UPB_INLINE void _upb_DefType_CheckPadding(size_t size) { + UPB_ASSERT((size & UPB_DEFTYPE_MASK) == 0); +} + +upb_deftype_t _upb_DefType_Type(upb_value v); + +upb_value _upb_DefType_Pack(const void* ptr, upb_deftype_t type); + +const void* _upb_DefType_Unpack(upb_value v, upb_deftype_t type); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_REFLECTION_DEF_TYPE_H_ */ diff --git a/upb/reflection/enum_def.c b/upb/reflection/enum_def.c new file mode 100644 index 0000000000..0998d03c8f --- /dev/null +++ b/upb/reflection/enum_def.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/reflection/enum_def.h" + +#include + +#include "upb/mini_table.h" +#include "upb/reflection/def_builder.h" +#include "upb/reflection/def_type.h" +#include "upb/reflection/enum_value_def.h" +#include "upb/reflection/file_def.h" +#include "upb/reflection/message_def.h" +#include "upb/reflection/mini_descriptor_encode.h" + +// Must be last. +#include "upb/port_def.inc" + +struct upb_EnumDef { + const google_protobuf_EnumOptions* opts; + const upb_MiniTable_Enum* layout; // Only for proto2. + const upb_FileDef* file; + const upb_MessageDef* containing_type; // Could be merged with "file". + const char* full_name; + upb_strtable ntoi; + upb_inttable iton; + const upb_EnumValueDef* values; + int value_count; + int32_t defaultval; + bool is_sorted; // Whether all of the values are defined in ascending order. +}; + +upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i) { + return (upb_EnumDef*)&e[i]; +} + +// TODO: Maybe implement this on top of a ZCOS instead? +void _upb_EnumDef_Debug(const upb_EnumDef* e) { + fprintf(stderr, "enum %s (%p) {\n", e->full_name, e); + fprintf(stderr, " value_count: %d\n", e->value_count); + fprintf(stderr, " default: %d\n", e->defaultval); + fprintf(stderr, " is_sorted: %d\n", e->is_sorted); + fprintf(stderr, "}\n"); +} + +const upb_MiniTable_Enum* _upb_EnumDef_MiniTable(const upb_EnumDef* e) { + return e->layout; +} + +bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a) { + const char* name = upb_EnumValueDef_Name(v); + const upb_value val = upb_value_constptr(v); + bool ok = upb_strtable_insert(&e->ntoi, name, strlen(name), val, a); + if (!ok) return false; + + // Multiple enumerators can have the same number, first one wins. + const int number = upb_EnumValueDef_Number(v); + if (!upb_inttable_lookup(&e->iton, number, NULL)) { + return upb_inttable_insert(&e->iton, number, val, a); + } + return true; +} + +static int cmp_values(const void* a, const void* b) { + const uint32_t A = upb_EnumValueDef_Number(*(const upb_EnumValueDef**)a); + const uint32_t B = upb_EnumValueDef_Number(*(const upb_EnumValueDef**)b); + return (A < B) ? -1 : (A > B); +} + +const char* _upb_EnumDef_MiniDescriptor(const upb_EnumDef* e, upb_Arena* a) { + if (e->is_sorted) return _upb_MiniDescriptor_EncodeEnum(e, NULL, a); + + const upb_EnumValueDef** sorted = (const upb_EnumValueDef**)upb_Arena_Malloc( + a, e->value_count * sizeof(void*)); + if (!sorted) return NULL; + + for (size_t i = 0; i < e->value_count; i++) { + sorted[i] = upb_EnumDef_Value(e, i); + } + qsort(sorted, e->value_count, sizeof(void*), cmp_values); + + return _upb_MiniDescriptor_EncodeEnum(e, sorted, a); +} + +const google_protobuf_EnumOptions* upb_EnumDef_Options(const upb_EnumDef* e) { + return e->opts; +} + +bool upb_EnumDef_HasOptions(const upb_EnumDef* e) { + return e->opts != (void*)kUpbDefOptDefault; +} + +const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; } + +const char* upb_EnumDef_Name(const upb_EnumDef* e) { + return _upb_DefBuilder_FullToShort(e->full_name); +} + +const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; } + +const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) { + return e->containing_type; +} + +int32_t upb_EnumDef_Default(const upb_EnumDef* e) { + UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval)); + return e->defaultval; +} + +int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; } + +const upb_EnumValueDef* upb_EnumDef_FindValueByName(const upb_EnumDef* e, + const char* name) { + return upb_EnumDef_FindValueByNameWithSize(e, name, strlen(name)); +} + +const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize( + const upb_EnumDef* e, const char* name, size_t size) { + upb_value v; + return upb_strtable_lookup2(&e->ntoi, name, size, &v) + ? upb_value_getconstptr(v) + : NULL; +} + +const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* e, + int32_t num) { + upb_value v; + return upb_inttable_lookup(&e->iton, num, &v) ? upb_value_getconstptr(v) + : NULL; +} + +bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) { + // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect + // this to be faster (especially for small numbers). + return upb_MiniTable_Enum_CheckValue(e->layout, num); +} + +const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) { + UPB_ASSERT(0 <= i && i < e->value_count); + return _upb_EnumValueDef_At(e->values, i); +} + +static upb_MiniTable_Enum* create_enumlayout(upb_DefBuilder* ctx, + const upb_EnumDef* e) { + const char* desc = _upb_EnumDef_MiniDescriptor(e, ctx->tmp_arena); + if (!desc) + _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor"); + + upb_Status status; + upb_MiniTable_Enum* layout = + upb_MiniTable_BuildEnum(desc, strlen(desc), ctx->arena, &status); + if (!layout) + _upb_DefBuilder_Errf(ctx, "Error building enum MiniTable: %s", status.msg); + return layout; +} + +static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, + const google_protobuf_EnumDescriptorProto* enum_proto, + upb_EnumDef* e) { + const google_protobuf_EnumValueDescriptorProto* const* values; + upb_StringView name; + size_t n; + + // Must happen before _upb_DefBuilder_Add() + e->file = _upb_DefBuilder_File(ctx); + + name = google_protobuf_EnumDescriptorProto_name(enum_proto); + _upb_DefBuilder_CheckIdentNotFull(ctx, name); + + e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + _upb_DefBuilder_Add(ctx, e->full_name, + _upb_DefType_Pack(e, UPB_DEFTYPE_ENUM)); + + values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n); + + bool ok = upb_strtable_init(&e->ntoi, n, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + ok = upb_inttable_init(&e->iton, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + e->defaultval = 0; + e->value_count = n; + e->values = _upb_EnumValueDefs_New(ctx, prefix, n, values, e, &e->is_sorted); + + if (n == 0) { + _upb_DefBuilder_Errf(ctx, "enums must contain at least one value (%s)", + e->full_name); + } + + UBP_DEF_SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto); + + upb_inttable_compact(&e->iton, ctx->arena); + + if (upb_FileDef_Syntax(e->file) == kUpb_Syntax_Proto2) { + if (ctx->layout) { + UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count); + e->layout = ctx->layout->enums[ctx->enum_count++]; + } else { + e->layout = create_enumlayout(ctx, e); + } + } else { + e->layout = NULL; + } +} + +upb_EnumDef* _upb_EnumDefs_New(upb_DefBuilder* ctx, int n, + const google_protobuf_EnumDescriptorProto* const* protos, + const upb_MessageDef* containing_type) { + _upb_DefType_CheckPadding(sizeof(upb_EnumDef)); + + // If a containing type is defined then get the full name from that. + // Otherwise use the package name from the file def. + const char* name = containing_type ? upb_MessageDef_FullName(containing_type) + : _upb_FileDef_RawPackage(ctx->file); + + upb_EnumDef* e = _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumDef) * n); + for (size_t i = 0; i < n; i++) { + create_enumdef(ctx, name, protos[i], &e[i]); + e[i].containing_type = containing_type; + } + return e; +} diff --git a/upb/reflection/enum_def.h b/upb/reflection/enum_def.h new file mode 100644 index 0000000000..0693453185 --- /dev/null +++ b/upb/reflection/enum_def.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// IWYU pragma: private, include "third_party/upb/upb/reflection/def.h" + +#ifndef UPB_REFLECTION_ENUM_DEF_H_ +#define UPB_REFLECTION_ENUM_DEF_H_ + +#include "upb/reflection/common.h" + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num); +const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e); +int32_t upb_EnumDef_Default(const upb_EnumDef* e); +const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e); +const upb_EnumValueDef* upb_EnumDef_FindValueByName(const upb_EnumDef* e, + const char* name); +const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize( + const upb_EnumDef* e, const char* name, size_t size); +const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* e, + int32_t num); +const char* upb_EnumDef_FullName(const upb_EnumDef* e); +bool upb_EnumDef_HasOptions(const upb_EnumDef* e); +const char* upb_EnumDef_Name(const upb_EnumDef* e); +const google_protobuf_EnumOptions* upb_EnumDef_Options(const upb_EnumDef* e); +const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i); +int upb_EnumDef_ValueCount(const upb_EnumDef* e); + +// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// + +upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i); +bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a); +const upb_MiniTable_Enum* _upb_EnumDef_MiniTable(const upb_EnumDef* e); + +// Builds and returns a mini descriptor, or NULL if OOM. +const char* _upb_EnumDef_MiniDescriptor(const upb_EnumDef* e, upb_Arena* a); + +// Allocate and initialize an array of |n| enum defs. +upb_EnumDef* _upb_EnumDefs_New(upb_DefBuilder* ctx, int n, + const google_protobuf_EnumDescriptorProto* const* protos, + const upb_MessageDef* containing_type); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_REFLECTION_ENUM_DEF_H_ */ diff --git a/upb/reflection/enum_value_def.c b/upb/reflection/enum_value_def.c new file mode 100644 index 0000000000..ad7504a9f2 --- /dev/null +++ b/upb/reflection/enum_value_def.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/reflection/enum_value_def.h" + +#include "upb/reflection/def_builder.h" +#include "upb/reflection/def_type.h" +#include "upb/reflection/enum_def.h" +#include "upb/reflection/file_def.h" + +// Must be last. +#include "upb/port_def.inc" + +struct upb_EnumValueDef { + const google_protobuf_EnumValueOptions* opts; + const upb_EnumDef* parent; + const char* full_name; + int32_t number; +}; + +upb_EnumValueDef* _upb_EnumValueDef_At(const upb_EnumValueDef* v, int i) { + return (upb_EnumValueDef*)&v[i]; +} + +const google_protobuf_EnumValueOptions* upb_EnumValueDef_Options( + const upb_EnumValueDef* v) { + return v->opts; +} + +bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* v) { + return v->opts != (void*)kUpbDefOptDefault; +} + +const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* v) { + return v->parent; +} + +const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* v) { + return v->full_name; +} + +const char* upb_EnumValueDef_Name(const upb_EnumValueDef* v) { + return _upb_DefBuilder_FullToShort(v->full_name); +} + +int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* v) { return v->number; } + +uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* v) { + // Compute index in our parent's array. + return v - upb_EnumDef_Value(v->parent, 0); +} + +static void create_enumvaldef(upb_DefBuilder* ctx, const char* prefix, + const google_protobuf_EnumValueDescriptorProto* val_proto, + upb_EnumDef* e, upb_EnumValueDef* v) { + upb_StringView name = google_protobuf_EnumValueDescriptorProto_name(val_proto); + + v->parent = e; // Must happen prior to _upb_DefBuilder_Add() + v->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + v->number = google_protobuf_EnumValueDescriptorProto_number(val_proto); + _upb_DefBuilder_Add(ctx, v->full_name, + _upb_DefType_Pack(v, UPB_DEFTYPE_ENUMVAL)); + + UBP_DEF_SET_OPTIONS(v->opts, EnumValueDescriptorProto, EnumValueOptions, + val_proto); + + bool ok = _upb_EnumDef_Insert(e, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); +} + +// Allocate and initialize an array of |n| enum value defs owned by |e|. +upb_EnumValueDef* _upb_EnumValueDefs_New( + upb_DefBuilder* ctx, const char* prefix, int n, + const google_protobuf_EnumValueDescriptorProto* const* protos, upb_EnumDef* e, + bool* is_sorted) { + _upb_DefType_CheckPadding(sizeof(upb_EnumValueDef)); + + upb_EnumValueDef* v = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_EnumValueDef) * n); + + *is_sorted = true; + uint32_t previous = 0; + for (size_t i = 0; i < n; i++) { + create_enumvaldef(ctx, prefix, protos[i], e, &v[i]); + + const uint32_t current = v[i].number; + if (previous > current) *is_sorted = false; + previous = current; + } + + if (upb_FileDef_Syntax(ctx->file) == kUpb_Syntax_Proto3 && n > 0 && + v[0].number != 0) { + _upb_DefBuilder_Errf(ctx, + "for proto3, the first enum value must be zero (%s)", + upb_EnumDef_FullName(e)); + } + + return v; +} diff --git a/upb/reflection/enum_value_def.h b/upb/reflection/enum_value_def.h new file mode 100644 index 0000000000..31bd7b5f27 --- /dev/null +++ b/upb/reflection/enum_value_def.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// IWYU pragma: private, include "third_party/upb/upb/reflection/def.h" + +#ifndef UPB_REFLECTION_ENUM_VALUE_DEF_H_ +#define UPB_REFLECTION_ENUM_VALUE_DEF_H_ + +#include "upb/reflection/common.h" + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* v); +const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* v); +bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* v); +uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* v); +const char* upb_EnumValueDef_Name(const upb_EnumValueDef* v); +int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* v); +const google_protobuf_EnumValueOptions* upb_EnumValueDef_Options( + const upb_EnumValueDef* v); + +// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// + +upb_EnumValueDef* _upb_EnumValueDef_At(const upb_EnumValueDef* v, int i); + +// Allocate and initialize an array of |n| enum value defs owned by |e|. +upb_EnumValueDef* _upb_EnumValueDefs_New( + upb_DefBuilder* ctx, const char* prefix, int n, + const google_protobuf_EnumValueDescriptorProto* const* protos, upb_EnumDef* e, + bool* is_sorted); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_REFLECTION_ENUM_VALUE_DEF_H_ */ diff --git a/upb/reflection/extension_range.c b/upb/reflection/extension_range.c new file mode 100644 index 0000000000..c48176a382 --- /dev/null +++ b/upb/reflection/extension_range.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/reflection/extension_range.h" + +#include "upb/reflection/def_builder.h" +#include "upb/reflection/field_def.h" +#include "upb/reflection/message_def.h" + +// Must be last. +#include "upb/port_def.inc" + +struct upb_ExtensionRange { + const google_protobuf_ExtensionRangeOptions* opts; + int32_t start; + int32_t end; +}; + +upb_ExtensionRange* _upb_ExtensionRange_At(const upb_ExtensionRange* r, int i) { + return (upb_ExtensionRange*)&r[i]; +} + +const google_protobuf_ExtensionRangeOptions* upb_ExtensionRange_Options( + const upb_ExtensionRange* r) { + return r->opts; +} + +bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) { + return r->opts != (void*)kUpbDefOptDefault; +} + +int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* r) { + return r->start; +} + +int32_t upb_ExtensionRange_End(const upb_ExtensionRange* r) { return r->end; } + +upb_ExtensionRange* _upb_ExtensionRanges_New( + upb_DefBuilder* ctx, int n, + const google_protobuf_DescriptorProto_ExtensionRange* const* protos, + const upb_MessageDef* m) { + upb_ExtensionRange* r = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_ExtensionRange) * n); + + for (int i = 0; i < n; i++) { + const int32_t start = + google_protobuf_DescriptorProto_ExtensionRange_start(protos[i]); + const int32_t end = google_protobuf_DescriptorProto_ExtensionRange_end(protos[i]); + const int32_t max = + google_protobuf_MessageOptions_message_set_wire_format(upb_MessageDef_Options(m)) + ? INT32_MAX + : kUpb_MaxFieldNumber + 1; + + // A full validation would also check that each range is disjoint, and that + // none of the fields overlap with the extension ranges, but we are just + // sanity checking here. + if (start < 1 || end <= start || end > max) { + _upb_DefBuilder_Errf(ctx, + "Extension range (%d, %d) is invalid, message=%s\n", + (int)start, (int)end, upb_MessageDef_FullName(m)); + } + + r[i].start = start; + r[i].end = end; + UBP_DEF_SET_OPTIONS(r[i].opts, DescriptorProto_ExtensionRange, + ExtensionRangeOptions, protos[i]); + } + + return r; +} diff --git a/upb/reflection/extension_range.h b/upb/reflection/extension_range.h new file mode 100644 index 0000000000..dd6ac1597d --- /dev/null +++ b/upb/reflection/extension_range.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// IWYU pragma: private, include "third_party/upb/upb/reflection/def.h" + +#ifndef UPB_REFLECTION_EXTENSION_RANGE_H_ +#define UPB_REFLECTION_EXTENSION_RANGE_H_ + +#include "upb/reflection/common.h" + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* r); +int32_t upb_ExtensionRange_End(const upb_ExtensionRange* r); + +bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r); +const google_protobuf_ExtensionRangeOptions* upb_ExtensionRange_Options( + const upb_ExtensionRange* r); + +// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// + +upb_ExtensionRange* _upb_ExtensionRange_At(const upb_ExtensionRange* r, int i); + +// Allocate and initialize an array of |n| extension ranges owned by |m|. +upb_ExtensionRange* _upb_ExtensionRanges_New( + upb_DefBuilder* ctx, int n, + const google_protobuf_DescriptorProto_ExtensionRange* const* protos, + const upb_MessageDef* m); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_REFLECTION_EXTENSION_RANGE_H_ */ diff --git a/upb/reflection/field_def.c b/upb/reflection/field_def.c new file mode 100644 index 0000000000..0ac1ce6e08 --- /dev/null +++ b/upb/reflection/field_def.c @@ -0,0 +1,1215 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/reflection/field_def.h" + +#include +#include + +#include "upb/mini_table.h" +#include "upb/reflection/def_builder.h" +#include "upb/reflection/def_pool.h" +#include "upb/reflection/def_type.h" +#include "upb/reflection/enum_def.h" +#include "upb/reflection/enum_value_def.h" +#include "upb/reflection/extension_range.h" +#include "upb/reflection/file_def.h" +#include "upb/reflection/message_def.h" +#include "upb/reflection/oneof_def.h" + +// Must be last. +#include "upb/port_def.inc" + +#define UPB_FIELD_TYPE_UNSPECIFIED 0 + +typedef struct { + size_t len; + char str[1]; // Null-terminated string data follows. +} str_t; + +struct upb_FieldDef { + const google_protobuf_FieldOptions* opts; + const upb_FileDef* file; + const upb_MessageDef* msgdef; + const char* full_name; + const char* json_name; + union { + int64_t sint; + uint64_t uint; + double dbl; + float flt; + bool boolean; + str_t* str; + } defaultval; + union { + const upb_OneofDef* oneof; + const upb_MessageDef* extension_scope; + } scope; + union { + const upb_MessageDef* msgdef; + const upb_EnumDef* enumdef; + const google_protobuf_FieldDescriptorProto* unresolved; + } sub; + uint32_t number_; + uint16_t index_; + uint16_t layout_index; /* Index into msgdef->layout->fields or file->exts */ + bool has_default; + bool is_extension_; + bool packed_; + bool proto3_optional_; + bool has_json_name_; + upb_FieldType type_; + upb_Label label_; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; + +upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i) { + return (upb_FieldDef*)&f[i]; +} + +const google_protobuf_FieldOptions* upb_FieldDef_Options(const upb_FieldDef* f) { + return f->opts; +} + +bool upb_FieldDef_HasOptions(const upb_FieldDef* f) { + return f->opts != (void*)kUpbDefOptDefault; +} + +const char* upb_FieldDef_FullName(const upb_FieldDef* f) { + return f->full_name; +} + +upb_CType upb_FieldDef_CType(const upb_FieldDef* f) { + switch (f->type_) { + case kUpb_FieldType_Double: + return kUpb_CType_Double; + case kUpb_FieldType_Float: + return kUpb_CType_Float; + case kUpb_FieldType_Int64: + case kUpb_FieldType_SInt64: + case kUpb_FieldType_SFixed64: + return kUpb_CType_Int64; + case kUpb_FieldType_Int32: + case kUpb_FieldType_SFixed32: + case kUpb_FieldType_SInt32: + return kUpb_CType_Int32; + case kUpb_FieldType_UInt64: + case kUpb_FieldType_Fixed64: + return kUpb_CType_UInt64; + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Fixed32: + return kUpb_CType_UInt32; + case kUpb_FieldType_Enum: + return kUpb_CType_Enum; + case kUpb_FieldType_Bool: + return kUpb_CType_Bool; + case kUpb_FieldType_String: + return kUpb_CType_String; + case kUpb_FieldType_Bytes: + return kUpb_CType_Bytes; + case kUpb_FieldType_Group: + case kUpb_FieldType_Message: + return kUpb_CType_Message; + } + UPB_UNREACHABLE(); +} + +upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; } + +uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; } + +upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; } + +uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; } + +bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { + return f->is_extension_; +} + +bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->packed_; } + +const char* upb_FieldDef_Name(const upb_FieldDef* f) { + return _upb_DefBuilder_FullToShort(f->full_name); +} + +const char* upb_FieldDef_JsonName(const upb_FieldDef* f) { + return f->json_name; +} + +bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) { + return f->has_json_name_; +} + +const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; } + +const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) { + return f->msgdef; +} + +const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) { + return f->is_extension_ ? f->scope.extension_scope : NULL; +} + +const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) { + return f->is_extension_ ? NULL : f->scope.oneof; +} + +const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) { + const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f); + if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL; + return oneof; +} + +upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) { + UPB_ASSERT(!upb_FieldDef_IsSubMessage(f)); + upb_MessageValue ret; + + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Bool: + return (upb_MessageValue){.bool_val = f->defaultval.boolean}; + case kUpb_CType_Int64: + return (upb_MessageValue){.int64_val = f->defaultval.sint}; + case kUpb_CType_UInt64: + return (upb_MessageValue){.uint64_val = f->defaultval.uint}; + case kUpb_CType_Enum: + case kUpb_CType_Int32: + return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint}; + case kUpb_CType_UInt32: + return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint}; + case kUpb_CType_Float: + return (upb_MessageValue){.float_val = f->defaultval.flt}; + case kUpb_CType_Double: + return (upb_MessageValue){.double_val = f->defaultval.dbl}; + case kUpb_CType_String: + case kUpb_CType_Bytes: { + str_t* str = f->defaultval.str; + if (str) { + return (upb_MessageValue){ + .str_val = (upb_StringView){.data = str->str, .size = str->len}}; + } else { + return (upb_MessageValue){ + .str_val = (upb_StringView){.data = NULL, .size = 0}}; + } + } + default: + UPB_UNREACHABLE(); + } + + return ret; +} + +const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL; +} + +const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL; +} + +const upb_MiniTable_Field* upb_FieldDef_MiniTable(const upb_FieldDef* f) { + UPB_ASSERT(!upb_FieldDef_IsExtension(f)); + const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef); + return &layout->fields[f->layout_index]; +} + +const upb_MiniTable_Extension* _upb_FieldDef_ExtensionMiniTable( + const upb_FieldDef* f) { + UPB_ASSERT(upb_FieldDef_IsExtension(f)); + const upb_FileDef* file = upb_FieldDef_File(f); + return _upb_FileDef_ExtensionMiniTable(file, f->layout_index); +} + +bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) { + return f->proto3_optional_; +} + +bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Message; +} + +bool upb_FieldDef_IsString(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_String || + upb_FieldDef_CType(f) == kUpb_CType_Bytes; +} + +bool upb_FieldDef_IsOptional(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Optional; +} + +bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Required; +} + +bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Repeated; +} + +bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) { + return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f); +} + +bool upb_FieldDef_IsMap(const upb_FieldDef* f) { + return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) && + upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f)); +} + +bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; } + +bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) { + return upb_FieldDef_IsSubMessage(f) || + upb_FieldDef_CType(f) == kUpb_CType_Enum; +} + +bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { + if (upb_FieldDef_IsRepeated(f)) return false; + const upb_FileDef* file = upb_FieldDef_File(f); + return upb_FieldDef_IsSubMessage(f) || upb_FieldDef_ContainingOneof(f) || + upb_FileDef_Syntax(file) == kUpb_Syntax_Proto2; +} + +static bool between(int32_t x, int32_t low, int32_t high) { + return x >= low && x <= high; +} + +bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); } +bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); } +bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } + +bool upb_FieldDef_checkdescriptortype(int32_t type) { + return between(type, 1, 18); +} + +/* Code to build defs from descriptor protos. *********************************/ + +/* There is a question of how much validation to do here. It will be difficult + * to perfectly match the amount of validation performed by proto2. But since + * this code is used to directly build defs from Ruby (for example) we do need + * to validate important constraints like uniqueness of names and numbers. */ + +static size_t div_round_up(size_t n, size_t d) { return (n + d - 1) / d; } + +static size_t upb_MessageValue_sizeof(upb_CType type) { + switch (type) { + case kUpb_CType_Double: + case kUpb_CType_Int64: + case kUpb_CType_UInt64: + return 8; + case kUpb_CType_Enum: + case kUpb_CType_Int32: + case kUpb_CType_UInt32: + case kUpb_CType_Float: + return 4; + case kUpb_CType_Bool: + return 1; + case kUpb_CType_Message: + return sizeof(void*); + case kUpb_CType_Bytes: + case kUpb_CType_String: + return sizeof(upb_StringView); + } + UPB_UNREACHABLE(); +} + +static uint8_t upb_msg_fielddefsize(const upb_FieldDef* f) { + if (upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f))) { + upb_MapEntry ent; + UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v)); + return sizeof(ent.k); + } else if (upb_FieldDef_IsRepeated(f)) { + return sizeof(void*); + } else { + return upb_MessageValue_sizeof(upb_FieldDef_CType(f)); + } +} + +static uint32_t upb_MiniTable_place(upb_DefBuilder* ctx, upb_MiniTable* l, + size_t size, const upb_MessageDef* m) { + size_t ofs = UPB_ALIGN_UP(l->size, size); + size_t next = ofs + size; + + if (next > UINT16_MAX) { + _upb_DefBuilder_Errf(ctx, + "size of message %s exceeded max size of %zu bytes", + upb_MessageDef_FullName(m), (size_t)UINT16_MAX); + } + + l->size = next; + return ofs; +} + +static int field_number_cmp(const void* p1, const void* p2) { + const upb_MiniTable_Field* f1 = p1; + const upb_MiniTable_Field* f2 = p2; + return f1->number - f2->number; +} + +static void assign_layout_indices(const upb_MessageDef* m, upb_MiniTable* l, + upb_MiniTable_Field* fields) { + int i; + int n = upb_MessageDef_FieldCount(m); + int dense_below = 0; + for (i = 0; i < n; i++) { + upb_FieldDef* f = + (upb_FieldDef*)upb_MessageDef_FindFieldByNumber(m, fields[i].number); + UPB_ASSERT(f); + f->layout_index = i; + if (i < UINT8_MAX && fields[i].number == i + 1 && + (i == 0 || fields[i - 1].number == i)) { + dense_below = i + 1; + } + } + l->dense_below = dense_below; +} + +static uint8_t map_descriptortype(const upb_FieldDef* f) { + uint8_t type = upb_FieldDef_Type(f); + /* See TableDescriptorType() in upbc/generator.cc for details and + * rationale of these exceptions. */ + if (type == kUpb_FieldType_String) { + const upb_FileDef* file = upb_FieldDef_File(f); + const upb_Syntax syntax = upb_FileDef_Syntax(file); + + if (syntax == kUpb_Syntax_Proto2) return kUpb_FieldType_Bytes; + } else if (type == kUpb_FieldType_Enum) { + const upb_FileDef* file = upb_EnumDef_File(f->sub.enumdef); + const upb_Syntax syntax = upb_FileDef_Syntax(file); + + if (syntax == kUpb_Syntax_Proto3 || UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 || + // TODO(https://github.com/protocolbuffers/upb/issues/541): + // fix map enum values to check for unknown enum values and put + // them in the unknown field set. + upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f))) { + return kUpb_FieldType_Int32; + } + } + return type; +} + +static void fill_fieldlayout(upb_MiniTable_Field* field, + const upb_FieldDef* f) { + field->number = upb_FieldDef_Number(f); + field->descriptortype = map_descriptortype(f); + + if (upb_FieldDef_IsMap(f)) { + field->mode = + kUpb_FieldMode_Map | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift); + } else if (upb_FieldDef_IsRepeated(f)) { + field->mode = + kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift); + } else { + /* Maps descriptor type -> elem_size_lg2. */ + static const uint8_t sizes[] = { + -1, /* invalid descriptor type */ + kUpb_FieldRep_8Byte, /* DOUBLE */ + kUpb_FieldRep_4Byte, /* FLOAT */ + kUpb_FieldRep_8Byte, /* INT64 */ + kUpb_FieldRep_8Byte, /* UINT64 */ + kUpb_FieldRep_4Byte, /* INT32 */ + kUpb_FieldRep_8Byte, /* FIXED64 */ + kUpb_FieldRep_4Byte, /* FIXED32 */ + kUpb_FieldRep_1Byte, /* BOOL */ + kUpb_FieldRep_StringView, /* STRING */ + kUpb_FieldRep_Pointer, /* GROUP */ + kUpb_FieldRep_Pointer, /* MESSAGE */ + kUpb_FieldRep_StringView, /* BYTES */ + kUpb_FieldRep_4Byte, /* UINT32 */ + kUpb_FieldRep_4Byte, /* ENUM */ + kUpb_FieldRep_4Byte, /* SFIXED32 */ + kUpb_FieldRep_8Byte, /* SFIXED64 */ + kUpb_FieldRep_4Byte, /* SINT32 */ + kUpb_FieldRep_8Byte, /* SINT64 */ + }; + field->mode = kUpb_FieldMode_Scalar | + (sizes[field->descriptortype] << kUpb_FieldRep_Shift); + } + + if (upb_FieldDef_IsPacked(f)) { + field->mode |= kUpb_LabelFlags_IsPacked; + } + + if (upb_FieldDef_IsExtension(f)) { + field->mode |= kUpb_LabelFlags_IsExtension; + } +} + +/* This function is the dynamic equivalent of message_layout.{cc,h} in upbc. + * It computes a dynamic layout for all of the fields in |m|. */ +void _upb_FieldDef_MakeLayout(upb_DefBuilder* ctx, const upb_MessageDef* m) { + upb_MiniTable* l = (upb_MiniTable*)upb_MessageDef_MiniTable(m); + size_t field_count = upb_MessageDef_FieldCount(m); + size_t sublayout_count = 0; + upb_MiniTable_Sub* subs; + upb_MiniTable_Field* fields; + + memset(l, 0, sizeof(*l) + sizeof(_upb_FastTable_Entry)); + + // Count sub-messages. + for (size_t i = 0; i < field_count; i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + if (upb_FieldDef_IsSubMessage(f)) { + sublayout_count++; + } + if (upb_FieldDef_CType(f) == kUpb_CType_Enum && + upb_FileDef_Syntax(upb_EnumDef_File(f->sub.enumdef)) == + kUpb_Syntax_Proto2) { + sublayout_count++; + } + } + + fields = _upb_DefBuilder_Alloc(ctx, field_count * sizeof(*fields)); + subs = _upb_DefBuilder_Alloc(ctx, sublayout_count * sizeof(*subs)); + + l->field_count = upb_MessageDef_FieldCount(m); + l->fields = fields; + l->subs = subs; + l->table_mask = 0; + l->required_count = 0; + + if (upb_MessageDef_ExtensionRangeCount(m) > 0) { + if (google_protobuf_MessageOptions_message_set_wire_format( + upb_MessageDef_Options(m))) { + l->ext = kUpb_ExtMode_IsMessageSet; + } else { + l->ext = kUpb_ExtMode_Extendable; + } + } else { + l->ext = kUpb_ExtMode_NonExtendable; + } + + /* TODO(haberman): initialize fast tables so that reflection-based parsing + * can get the same speeds as linked-in types. */ + l->fasttable[0].field_parser = &_upb_FastDecoder_DecodeGeneric; + l->fasttable[0].field_data = 0; + + if (upb_MessageDef_IsMapEntry(m)) { + /* TODO(haberman): refactor this method so this special case is more + * elegant. */ + const upb_FieldDef* key = upb_MessageDef_FindFieldByNumber(m, 1); + const upb_FieldDef* val = upb_MessageDef_FindFieldByNumber(m, 2); + fields[0].number = 1; + fields[1].number = 2; + fields[0].mode = kUpb_FieldMode_Scalar; + fields[1].mode = kUpb_FieldMode_Scalar; + fields[0].presence = 0; + fields[1].presence = 0; + fields[0].descriptortype = map_descriptortype(key); + fields[1].descriptortype = map_descriptortype(val); + fields[0].offset = 0; + fields[1].offset = sizeof(upb_StringView); + fields[1].submsg_index = 0; + + if (upb_FieldDef_CType(val) == kUpb_CType_Message) { + subs[0].submsg = + upb_MessageDef_MiniTable(upb_FieldDef_MessageSubDef(val)); + } + + upb_FieldDef* fielddefs = (upb_FieldDef*)upb_MessageDef_Field(m, 0); + UPB_ASSERT(fielddefs[0].number_ == 1); + UPB_ASSERT(fielddefs[1].number_ == 2); + fielddefs[0].layout_index = 0; + fielddefs[1].layout_index = 1; + + l->field_count = 2; + l->size = 2 * sizeof(upb_StringView); + l->size = UPB_ALIGN_UP(l->size, 8); + l->dense_below = 2; + return; + } + + /* Allocate data offsets in three stages: + * + * 1. hasbits. + * 2. regular fields. + * 3. oneof fields. + * + * OPT: There is a lot of room for optimization here to minimize the size. + */ + + /* Assign hasbits for required fields first. */ + size_t hasbit = 0; + + for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)]; + if (upb_FieldDef_Label(f) == kUpb_Label_Required) { + field->presence = ++hasbit; + if (hasbit >= 63) { + _upb_DefBuilder_Errf(ctx, "Message with >=63 required fields: %s", + upb_MessageDef_FullName(m)); + } + l->required_count++; + } + } + + /* Allocate hasbits and set basic field attributes. */ + sublayout_count = 0; + for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)]; + + fill_fieldlayout(field, f); + + if (field->descriptortype == kUpb_FieldType_Message || + field->descriptortype == kUpb_FieldType_Group) { + field->submsg_index = sublayout_count++; + subs[field->submsg_index].submsg = + upb_MessageDef_MiniTable(upb_FieldDef_MessageSubDef(f)); + } else if (field->descriptortype == kUpb_FieldType_Enum) { + field->submsg_index = sublayout_count++; + subs[field->submsg_index].subenum = + _upb_EnumDef_MiniTable(upb_FieldDef_EnumSubDef(f)); + UPB_ASSERT(subs[field->submsg_index].subenum); + } + + if (upb_FieldDef_Label(f) == kUpb_Label_Required) { + /* Hasbit was already assigned. */ + } else if (upb_FieldDef_HasPresence(f) && + !upb_FieldDef_RealContainingOneof(f)) { + /* We don't use hasbit 0, so that 0 can indicate "no presence" in the + * table. This wastes one hasbit, but we don't worry about it for now. */ + field->presence = ++hasbit; + } else { + field->presence = 0; + } + } + + /* Account for space used by hasbits. */ + l->size = hasbit ? div_round_up(hasbit + 1, 8) : 0; + + /* Allocate non-oneof fields. */ + for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + size_t field_size = upb_msg_fielddefsize(f); + size_t index = upb_FieldDef_Index(f); + + if (upb_FieldDef_RealContainingOneof(f)) { + /* Oneofs are handled separately below. */ + continue; + } + + fields[index].offset = upb_MiniTable_place(ctx, l, field_size, m); + } + + /* Allocate oneof fields. Each oneof field consists of a uint32 for the case + * and space for the actual data. */ + for (int i = 0; i < upb_MessageDef_OneofCount(m); i++) { + const upb_OneofDef* o = upb_MessageDef_Oneof(m, i); + size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */ + size_t field_size = 0; + uint32_t case_offset; + uint32_t data_offset; + + if (upb_OneofDef_IsSynthetic(o)) continue; + + if (upb_OneofDef_FieldCount(o) == 0) { + _upb_DefBuilder_Errf(ctx, "Oneof must have at least one field (%s)", + upb_OneofDef_FullName(o)); + } + + /* Calculate field size: the max of all field sizes. */ + for (int j = 0; j < upb_OneofDef_FieldCount(o); j++) { + const upb_FieldDef* f = upb_OneofDef_Field(o, j); + field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f)); + } + + /* Align and allocate case offset. */ + case_offset = upb_MiniTable_place(ctx, l, case_size, m); + data_offset = upb_MiniTable_place(ctx, l, field_size, m); + + for (int i = 0; i < upb_OneofDef_FieldCount(o); i++) { + const upb_FieldDef* f = upb_OneofDef_Field(o, i); + fields[upb_FieldDef_Index(f)].offset = data_offset; + fields[upb_FieldDef_Index(f)].presence = ~case_offset; + } + } + + /* Size of the entire structure should be a multiple of its greatest + * alignment. TODO: track overall alignment for real? */ + l->size = UPB_ALIGN_UP(l->size, 8); + + /* Sort fields by number. */ + if (fields) { + qsort(fields, upb_MessageDef_FieldCount(m), sizeof(*fields), + field_number_cmp); + } + assign_layout_indices(m, l, fields); +} + +static bool streql2(const char* a, size_t n, const char* b) { + return n == strlen(b) && memcmp(a, b, n) == 0; +} + +static size_t getjsonname(const char* name, size_t size, char* buf, + size_t len) { + size_t src, dst = 0; + bool ucase_next = false; + +#define WRITE(byte) \ + ++dst; \ + if (dst < len) \ + buf[dst - 1] = byte; \ + else if (dst == len) \ + buf[dst - 1] = '\0' + + if (!name) { + WRITE('\0'); + return 0; + } + + /* Implement the transformation as described in the spec: + * 1. upper case all letters after an underscore. + * 2. remove all underscores. + */ + for (src = 0; src < size; src++) { + if (name[src] == '_') { + ucase_next = true; + continue; + } + + if (ucase_next) { + WRITE(toupper(name[src])); + ucase_next = false; + } else { + WRITE(name[src]); + } + } + + WRITE('\0'); + return dst; + +#undef WRITE +} + +static char* makejsonname(upb_DefBuilder* ctx, const char* name, size_t size) { + size_t json_size = size + 1; // +1 for trailing '\0' + char* json_name = _upb_DefBuilder_Alloc(ctx, json_size); + getjsonname(name, size, json_name, json_size); + return json_name; +} + +static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) { + str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); + if (!ret) _upb_DefBuilder_OomErr(ctx); + ret->len = len; + if (len) memcpy(ret->str, data, len); + ret->str[len] = '\0'; + return ret; +} + +static str_t* unescape(upb_DefBuilder* ctx, const upb_FieldDef* f, + const char* data, size_t len) { + // Size here is an upper bound; escape sequences could ultimately shrink it. + str_t* ret = _upb_DefBuilder_Alloc(ctx, sizeof(*ret) + len); + char* dst = &ret->str[0]; + const char* src = data; + const char* end = data + len; + + while (src < end) { + if (*src == '\\') { + src++; + *dst++ = _upb_DefBuilder_ParseEscape(ctx, f, &src, end); + } else { + *dst++ = *src++; + } + } + + ret->len = dst - &ret->str[0]; + return ret; +} + +static void parse_default(upb_DefBuilder* ctx, const char* str, size_t len, + upb_FieldDef* f) { + char* end; + char nullz[64]; + errno = 0; + + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: + case kUpb_CType_Int64: + case kUpb_CType_UInt32: + case kUpb_CType_UInt64: + case kUpb_CType_Double: + case kUpb_CType_Float: + /* Standard C number parsing functions expect null-terminated strings. */ + if (len >= sizeof(nullz) - 1) { + _upb_DefBuilder_Errf(ctx, "Default too long: %.*s", (int)len, str); + } + memcpy(nullz, str, len); + nullz[len] = '\0'; + str = nullz; + break; + default: + break; + } + + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: { + long val = strtol(str, &end, 0); + if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.sint = val; + break; + } + case kUpb_CType_Enum: { + const upb_EnumDef* e = f->sub.enumdef; + const upb_EnumValueDef* ev = + upb_EnumDef_FindValueByNameWithSize(e, str, len); + if (!ev) { + goto invalid; + } + f->defaultval.sint = upb_EnumValueDef_Number(ev); + break; + } + case kUpb_CType_Int64: { + long long val = strtoll(str, &end, 0); + if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.sint = val; + break; + } + case kUpb_CType_UInt32: { + unsigned long val = strtoul(str, &end, 0); + if (val > UINT32_MAX || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.uint = val; + break; + } + case kUpb_CType_UInt64: { + unsigned long long val = strtoull(str, &end, 0); + if (val > UINT64_MAX || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.uint = val; + break; + } + case kUpb_CType_Double: { + double val = strtod(str, &end); + if (errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.dbl = val; + break; + } + case kUpb_CType_Float: { + float val = strtof(str, &end); + if (errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.flt = val; + break; + } + case kUpb_CType_Bool: { + if (streql2(str, len, "false")) { + f->defaultval.boolean = false; + } else if (streql2(str, len, "true")) { + f->defaultval.boolean = true; + } else { + goto invalid; + } + break; + } + case kUpb_CType_String: + f->defaultval.str = newstr(ctx, str, len); + break; + case kUpb_CType_Bytes: + f->defaultval.str = unescape(ctx, f, str, len); + break; + case kUpb_CType_Message: + /* Should not have a default value. */ + _upb_DefBuilder_Errf(ctx, "Message should not have a default (%s)", + upb_FieldDef_FullName(f)); + } + + return; + +invalid: + _upb_DefBuilder_Errf(ctx, "Invalid default '%.*s' for field %s of type %d", + (int)len, str, upb_FieldDef_FullName(f), + (int)upb_FieldDef_Type(f)); +} + +static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) { + switch (upb_FieldDef_CType(f)) { + case kUpb_CType_Int32: + case kUpb_CType_Int64: + f->defaultval.sint = 0; + break; + case kUpb_CType_UInt64: + case kUpb_CType_UInt32: + f->defaultval.uint = 0; + break; + case kUpb_CType_Double: + case kUpb_CType_Float: + f->defaultval.dbl = 0; + break; + case kUpb_CType_String: + case kUpb_CType_Bytes: + f->defaultval.str = newstr(ctx, NULL, 0); + break; + case kUpb_CType_Bool: + f->defaultval.boolean = false; + break; + case kUpb_CType_Enum: { + const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0); + f->defaultval.sint = upb_EnumValueDef_Number(v); + } + case kUpb_CType_Message: + break; + } +} + +static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, + upb_MessageDef* m, + const google_protobuf_FieldDescriptorProto* field_proto, + upb_FieldDef* f) { + const char* json_name; + + // Must happen before _upb_DefBuilder_Add() + f->file = _upb_DefBuilder_File(ctx); + + if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) { + _upb_DefBuilder_Errf(ctx, "field has no name"); + } + + const upb_StringView name = google_protobuf_FieldDescriptorProto_name(field_proto); + _upb_DefBuilder_CheckIdentNotFull(ctx, name); + const char* full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + + if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) { + const upb_StringView sv = + google_protobuf_FieldDescriptorProto_json_name(field_proto); + json_name = upb_strdup2(sv.data, sv.size, ctx->arena); + if (!json_name) _upb_DefBuilder_OomErr(ctx); + f->has_json_name_ = true; + } else { + json_name = makejsonname(ctx, name.data, name.size); + f->has_json_name_ = false; + } + + f->full_name = full_name; + f->json_name = json_name; + f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto); + f->number_ = google_protobuf_FieldDescriptorProto_number(field_proto); + f->scope.oneof = NULL; + f->proto3_optional_ = + google_protobuf_FieldDescriptorProto_proto3_optional(field_proto); + f->msgdef = m; + + bool has_type = google_protobuf_FieldDescriptorProto_has_type(field_proto); + bool has_type_name = + google_protobuf_FieldDescriptorProto_has_type_name(field_proto); + + f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto); + + if (has_type) { + switch (f->type_) { + case kUpb_FieldType_Message: + case kUpb_FieldType_Group: + case kUpb_FieldType_Enum: + if (!has_type_name) { + _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)", + (int)f->type_, full_name); + } + break; + default: + if (has_type_name) { + _upb_DefBuilder_Errf( + ctx, "invalid type for field with type_name set (%s, %d)", + full_name, (int)f->type_); + } + } + } else if (has_type_name) { + f->type_ = + UPB_FIELD_TYPE_UNSPECIFIED; // We'll fill this in in resolve_fielddef() + } + + if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) { + _upb_DefBuilder_Errf(ctx, "invalid type for field %s (%d)", f->full_name, + f->type_); + } + + if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) { + _upb_DefBuilder_Errf(ctx, "invalid label for field %s (%d)", f->full_name, + f->label_); + } + + /* We can't resolve the subdef or (in the case of extensions) the containing + * message yet, because it may not have been defined yet. We stash a pointer + * to the field_proto until later when we can properly resolve it. */ + f->sub.unresolved = field_proto; + + if (f->label_ == kUpb_Label_Required && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { + _upb_DefBuilder_Errf(ctx, "proto3 fields cannot be required (%s)", + f->full_name); + } + + if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) { + uint32_t oneof_index = google_protobuf_FieldDescriptorProto_oneof_index(field_proto); + + if (upb_FieldDef_Label(f) != kUpb_Label_Optional) { + _upb_DefBuilder_Errf(ctx, "fields in oneof must have OPTIONAL label (%s)", + f->full_name); + } + + if (!m) { + _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)", + f->full_name); + } + + if (oneof_index >= upb_MessageDef_OneofCount(m)) { + _upb_DefBuilder_Errf(ctx, "oneof_index out of range (%s)", f->full_name); + } + + upb_OneofDef* oneof = (upb_OneofDef*)upb_MessageDef_Oneof(m, oneof_index); + f->scope.oneof = oneof; + + bool ok = _upb_OneofDef_Insert(oneof, f, name.data, name.size, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + } else { + if (f->proto3_optional_) { + _upb_DefBuilder_Errf(ctx, + "field with proto3_optional was not in a oneof (%s)", + f->full_name); + } + } + + UBP_DEF_SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto); + + if (google_protobuf_FieldOptions_has_packed(f->opts)) { + f->packed_ = google_protobuf_FieldOptions_packed(f->opts); + } else { + // Repeated fields default to packed for proto3 only. + f->packed_ = upb_FieldDef_IsPrimitive(f) && + f->label_ == kUpb_Label_Repeated && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; + } +} + +static void _upb_FieldDef_CreateExt( + upb_DefBuilder* ctx, const char* prefix, + const google_protobuf_FieldDescriptorProto* field_proto, upb_MessageDef* m, + upb_FieldDef* f) { + _upb_FieldDef_Create(ctx, prefix, m, field_proto, f); + f->is_extension_ = true; + + f->scope.extension_scope = m; + _upb_DefBuilder_Add(ctx, f->full_name, _upb_DefType_Pack(f, UPB_DEFTYPE_EXT)); + f->layout_index = ctx->ext_count++; + + if (ctx->layout) { + UPB_ASSERT(_upb_FieldDef_ExtensionMiniTable(f)->field.number == f->number_); + } +} + +static void _upb_FieldDef_CreateNotExt( + upb_DefBuilder* ctx, const char* prefix, + const google_protobuf_FieldDescriptorProto* field_proto, upb_MessageDef* m, + upb_FieldDef* f) { + _upb_FieldDef_Create(ctx, prefix, m, field_proto, f); + f->is_extension_ = false; + + _upb_MessageDef_InsertField(ctx, m, f); + + if (ctx->layout) { + const upb_MiniTable* mt = upb_MessageDef_MiniTable(m); + const upb_MiniTable_Field* fields = mt->fields; + const int count = mt->field_count; + bool found = false; + for (int i = 0; i < count; i++) { + if (fields[i].number == f->number_) { + f->layout_index = i; + found = true; + break; + } + } + UPB_ASSERT(found); + } +} + +upb_FieldDef* _upb_FieldDefs_New( + upb_DefBuilder* ctx, int n, + const google_protobuf_FieldDescriptorProto* const* protos, const char* prefix, + upb_MessageDef* m, bool is_ext) { + _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); + upb_FieldDef* f = + (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); + + if (is_ext) { + for (size_t i = 0; i < n; i++) { + _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, &f[i]); + f[i].index_ = i; + } + } else { + for (size_t i = 0; i < n; i++) { + _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, &f[i]); + f[i].index_ = i; + } + } + return f; +} + +static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f) { + const google_protobuf_FieldDescriptorProto* field_proto = f->sub.unresolved; + upb_StringView name = + google_protobuf_FieldDescriptorProto_type_name(field_proto); + bool has_name = + google_protobuf_FieldDescriptorProto_has_type_name(field_proto); + switch ((int)f->type_) { + case UPB_FIELD_TYPE_UNSPECIFIED: { + // Type was not specified and must be inferred. + UPB_ASSERT(has_name); + upb_deftype_t type; + const void* def = + _upb_DefBuilder_ResolveAny(ctx, f->full_name, prefix, name, &type); + switch (type) { + case UPB_DEFTYPE_ENUM: + f->sub.enumdef = def; + f->type_ = kUpb_FieldType_Enum; + break; + case UPB_DEFTYPE_MSG: + f->sub.msgdef = def; + f->type_ = kUpb_FieldType_Message; // It appears there is no way of + // this being a group. + break; + default: + _upb_DefBuilder_Errf(ctx, "Couldn't resolve type name for field %s", + f->full_name); + } + } + case kUpb_FieldType_Message: + case kUpb_FieldType_Group: + UPB_ASSERT(has_name); + f->sub.msgdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, + UPB_DEFTYPE_MSG); + break; + case kUpb_FieldType_Enum: + UPB_ASSERT(has_name); + f->sub.enumdef = _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, + UPB_DEFTYPE_ENUM); + break; + default: + // No resolution necessary. + break; + } +} + +static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f, + const google_protobuf_FieldDescriptorProto* field_proto) { + if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) { + _upb_DefBuilder_Errf(ctx, "extension for field '%s' had no extendee", + f->full_name); + } + + upb_StringView name = google_protobuf_FieldDescriptorProto_extendee(field_proto); + const upb_MessageDef* m = + _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); + f->msgdef = m; + + bool found = false; + + for (int i = 0, n = upb_MessageDef_ExtensionRangeCount(m); i < n; i++) { + const upb_ExtensionRange* r = upb_MessageDef_ExtensionRange(m, i); + if (upb_ExtensionRange_Start(r) <= f->number_ && + f->number_ < upb_ExtensionRange_End(r)) { + found = true; + break; + } + } + + if (!found) { + _upb_DefBuilder_Errf( + ctx, + "field number %u in extension %s has no extension range in " + "message %s", + (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(f->msgdef)); + } + + const upb_MiniTable_Extension* ext = _upb_FieldDef_ExtensionMiniTable(f); + if (ctx->layout) { + UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number); + } else { + upb_MiniTable_Extension* mut_ext = (upb_MiniTable_Extension*)ext; + fill_fieldlayout(&mut_ext->field, f); + mut_ext->field.presence = 0; + mut_ext->field.offset = 0; + mut_ext->field.submsg_index = 0; + mut_ext->extendee = upb_MessageDef_MiniTable(f->msgdef); + mut_ext->sub.submsg = upb_MessageDef_MiniTable(f->sub.msgdef); + } + + bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); +} + +static void resolve_default(upb_DefBuilder* ctx, upb_FieldDef* f, + const google_protobuf_FieldDescriptorProto* field_proto) { + // Have to delay resolving of the default value until now because of the enum + // case, since enum defaults are specified with a label. + if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) { + upb_StringView defaultval = + google_protobuf_FieldDescriptorProto_default_value(field_proto); + + if (upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3) { + _upb_DefBuilder_Errf(ctx, + "proto3 fields cannot have explicit defaults (%s)", + f->full_name); + } + + if (upb_FieldDef_IsSubMessage(f)) { + _upb_DefBuilder_Errf(ctx, + "message fields cannot have explicit defaults (%s)", + f->full_name); + } + + parse_default(ctx, defaultval.data, defaultval.size, f); + f->has_default = true; + } else { + set_default_default(ctx, f); + f->has_default = false; + } +} + +void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f) { + // We have to stash this away since resolve_subdef() may overwrite it. + const google_protobuf_FieldDescriptorProto* field_proto = f->sub.unresolved; + + resolve_subdef(ctx, prefix, f); + resolve_default(ctx, f, field_proto); + + if (f->is_extension_) { + resolve_extension(ctx, prefix, f, field_proto); + } +} diff --git a/upb/reflection/field_def.h b/upb/reflection/field_def.h new file mode 100644 index 0000000000..409ee70733 --- /dev/null +++ b/upb/reflection/field_def.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// IWYU pragma: private, include "third_party/upb/upb/reflection/def.h" + +#ifndef UPB_REFLECTION_FIELD_DEF_H_ +#define UPB_REFLECTION_FIELD_DEF_H_ + +#include "upb/reflection/common.h" + +// Must be last. +#include "upb/port_def.inc" + +// Maximum field number allowed for FieldDefs. +// This is an inherent limit of the protobuf wire format. +#define kUpb_MaxFieldNumber ((1 << 29) - 1) + +#ifdef __cplusplus +extern "C" { +#endif + +const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f); +const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f); +upb_CType upb_FieldDef_CType(const upb_FieldDef* f); +const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f); +const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f); +const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f); +const char* upb_FieldDef_FullName(const upb_FieldDef* f); +bool upb_FieldDef_HasDefault(const upb_FieldDef* f); +bool upb_FieldDef_HasJsonName(const upb_FieldDef* f); +bool upb_FieldDef_HasOptions(const upb_FieldDef* f); +bool upb_FieldDef_HasPresence(const upb_FieldDef* f); +bool upb_FieldDef_HasSubDef(const upb_FieldDef* f); +uint32_t upb_FieldDef_Index(const upb_FieldDef* f); +bool upb_FieldDef_IsExtension(const upb_FieldDef* f); +bool upb_FieldDef_IsMap(const upb_FieldDef* f); +bool upb_FieldDef_IsOptional(const upb_FieldDef* f); +bool upb_FieldDef_IsPacked(const upb_FieldDef* f); +bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f); +bool upb_FieldDef_IsRepeated(const upb_FieldDef* f); +bool upb_FieldDef_IsRequired(const upb_FieldDef* f); +bool upb_FieldDef_IsString(const upb_FieldDef* f); +bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f); +const char* upb_FieldDef_JsonName(const upb_FieldDef* f); +upb_Label upb_FieldDef_Label(const upb_FieldDef* f); +const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f); +const upb_MiniTable_Field* upb_FieldDef_MiniTable(const upb_FieldDef* f); +const char* upb_FieldDef_Name(const upb_FieldDef* f); +uint32_t upb_FieldDef_Number(const upb_FieldDef* f); +const google_protobuf_FieldOptions* upb_FieldDef_Options(const upb_FieldDef* f); +const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f); +upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f); + +// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// + +upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i); +const upb_MiniTable_Extension* _upb_FieldDef_ExtensionMiniTable( + const upb_FieldDef* f); +bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f); + +// Allocate and initialize an array of |n| field defs. +upb_FieldDef* _upb_FieldDefs_New( + upb_DefBuilder* ctx, int n, + const google_protobuf_FieldDescriptorProto* const* protos, const char* prefix, + upb_MessageDef* m, bool is_ext); + +void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f); + +void _upb_FieldDef_MakeLayout(upb_DefBuilder* ctx, const upb_MessageDef* m); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_REFLECTION_FIELD_DEF_H_ */ diff --git a/upb/reflection/file_def.c b/upb/reflection/file_def.c new file mode 100644 index 0000000000..4a48286420 --- /dev/null +++ b/upb/reflection/file_def.c @@ -0,0 +1,339 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/reflection/file_def.h" + +#include "upb/reflection/def_builder.h" +#include "upb/reflection/def_pool.h" +#include "upb/reflection/enum_def.h" +#include "upb/reflection/field_def.h" +#include "upb/reflection/message_def.h" +#include "upb/reflection/service_def.h" + +// Must be last. +#include "upb/port_def.inc" + +struct upb_FileDef { + const google_protobuf_FileOptions* opts; + const char* name; + const char* package; + + const upb_FileDef** deps; + const int32_t* public_deps; + const int32_t* weak_deps; + const upb_MessageDef* top_lvl_msgs; + const upb_EnumDef* top_lvl_enums; + const upb_FieldDef* top_lvl_exts; + const upb_ServiceDef* services; + const upb_MiniTable_Extension** ext_layouts; + const upb_DefPool* symtab; + + int dep_count; + int public_dep_count; + int weak_dep_count; + int top_lvl_msg_count; + int top_lvl_enum_count; + int top_lvl_ext_count; + int service_count; + int ext_count; // All exts in the file. + upb_Syntax syntax; +}; + +const google_protobuf_FileOptions* upb_FileDef_Options(const upb_FileDef* f) { + return f->opts; +} + +bool upb_FileDef_HasOptions(const upb_FileDef* f) { + return f->opts != (void*)kUpbDefOptDefault; +} + +const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; } + +const char* upb_FileDef_Package(const upb_FileDef* f) { + return f->package ? f->package : ""; +} + +const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; } + +upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; } + +int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) { + return f->top_lvl_msg_count; +} + +int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; } + +int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) { + return f->public_dep_count; +} + +int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) { + return f->weak_dep_count; +} + +const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) { + return f->public_deps; +} + +const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) { + return f->weak_deps; +} + +int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) { + return f->top_lvl_enum_count; +} + +int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) { + return f->top_lvl_ext_count; +} + +int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; } + +const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->dep_count); + return f->deps[i]; +} + +const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->public_dep_count); + return f->deps[f->public_deps[i]]; +} + +const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->public_dep_count); + return f->deps[f->weak_deps[i]]; +} + +const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count); + return _upb_MessageDef_At(f->top_lvl_msgs, i); +} + +const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count); + return _upb_EnumDef_At(f->top_lvl_enums, i); +} + +const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count); + return _upb_FieldDef_At(f->top_lvl_exts, i); +} + +const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) { + UPB_ASSERT(0 <= i && i < f->service_count); + return _upb_ServiceDef_At(f->services, i); +} + +const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; } + +const upb_MiniTable_Extension* _upb_FileDef_ExtensionMiniTable( + const upb_FileDef* f, int i) { + return f->ext_layouts[i]; +} + +static char* strviewdup(upb_DefBuilder* ctx, upb_StringView view) { + char* ret = upb_strdup2(view.data, view.size, _upb_DefBuilder_Arena(ctx)); + if (!ret) _upb_DefBuilder_OomErr(ctx); + return ret; +} + +static bool streql_view(upb_StringView view, const char* b) { + return view.size == strlen(b) && memcmp(view.data, b, view.size) == 0; +} + +static int count_exts_in_msg(const google_protobuf_DescriptorProto* msg_proto) { + size_t n; + google_protobuf_DescriptorProto_extension(msg_proto, &n); + int ext_count = n; + + const google_protobuf_DescriptorProto* const* nested_msgs = + google_protobuf_DescriptorProto_nested_type(msg_proto, &n); + for (size_t i = 0; i < n; i++) { + ext_count += count_exts_in_msg(nested_msgs[i]); + } + + return ext_count; +} + +// Allocate and initialize one file def, and add it to the context object. +void _upb_FileDef_Create(upb_DefBuilder* ctx, + const google_protobuf_FileDescriptorProto* file_proto) { + upb_FileDef* file = _upb_DefBuilder_Alloc(ctx, sizeof(upb_FileDef)); + ctx->file = file; + + const google_protobuf_DescriptorProto* const* msgs; + const google_protobuf_EnumDescriptorProto* const* enums; + const google_protobuf_FieldDescriptorProto* const* exts; + const google_protobuf_ServiceDescriptorProto* const* services; + const upb_StringView* strs; + const int32_t* public_deps; + const int32_t* weak_deps; + size_t i, n; + + file->symtab = ctx->symtab; + + // Count all extensions in the file, to build a flat array of layouts. + google_protobuf_FileDescriptorProto_extension(file_proto, &n); + int ext_count = n; + msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); + for (int i = 0; i < n; i++) { + ext_count += count_exts_in_msg(msgs[i]); + } + file->ext_count = ext_count; + + if (ctx->layout) { + // We are using the ext layouts that were passed in. + file->ext_layouts = ctx->layout->exts; + if (ctx->layout->ext_count != file->ext_count) { + _upb_DefBuilder_Errf(ctx, + "Extension count did not match layout (%d vs %d)", + ctx->layout->ext_count, file->ext_count); + } + } else { + // We are building ext layouts from scratch. + file->ext_layouts = _upb_DefBuilder_Alloc( + ctx, sizeof(*file->ext_layouts) * file->ext_count); + upb_MiniTable_Extension* ext = + _upb_DefBuilder_Alloc(ctx, sizeof(*ext) * file->ext_count); + for (int i = 0; i < file->ext_count; i++) { + file->ext_layouts[i] = &ext[i]; + } + } + + if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) { + _upb_DefBuilder_Errf(ctx, "File has no name"); + } + + file->name = strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto)); + + upb_StringView package = google_protobuf_FileDescriptorProto_package(file_proto); + if (package.size) { + _upb_DefBuilder_CheckIdentFull(ctx, package); + file->package = strviewdup(ctx, package); + } else { + file->package = NULL; + } + + if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) { + upb_StringView syntax = google_protobuf_FileDescriptorProto_syntax(file_proto); + + if (streql_view(syntax, "proto2")) { + file->syntax = kUpb_Syntax_Proto2; + } else if (streql_view(syntax, "proto3")) { + file->syntax = kUpb_Syntax_Proto3; + } else { + _upb_DefBuilder_Errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'", + UPB_STRINGVIEW_ARGS(syntax)); + } + } else { + file->syntax = kUpb_Syntax_Proto2; + } + + // Read options. + UBP_DEF_SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto); + + // Verify dependencies. + strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n); + file->dep_count = n; + file->deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->deps) * n); + + for (i = 0; i < n; i++) { + upb_StringView str = strs[i]; + file->deps[i] = + upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size); + if (!file->deps[i]) { + _upb_DefBuilder_Errf(ctx, + "Depends on file '" UPB_STRINGVIEW_FORMAT + "', but it has not been loaded", + UPB_STRINGVIEW_ARGS(str)); + } + } + + public_deps = google_protobuf_FileDescriptorProto_public_dependency(file_proto, &n); + file->public_dep_count = n; + file->public_deps = + _upb_DefBuilder_Alloc(ctx, sizeof(*file->public_deps) * n); + int32_t* mutable_public_deps = (int32_t*)file->public_deps; + for (i = 0; i < n; i++) { + if (public_deps[i] >= file->dep_count) { + _upb_DefBuilder_Errf(ctx, "public_dep %d is out of range", + (int)public_deps[i]); + } + mutable_public_deps[i] = public_deps[i]; + } + + weak_deps = google_protobuf_FileDescriptorProto_weak_dependency(file_proto, &n); + file->weak_dep_count = n; + file->weak_deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->weak_deps) * n); + int32_t* mutable_weak_deps = (int32_t*)file->weak_deps; + for (i = 0; i < n; i++) { + if (weak_deps[i] >= file->dep_count) { + _upb_DefBuilder_Errf(ctx, "weak_dep %d is out of range", + (int)weak_deps[i]); + } + mutable_weak_deps[i] = weak_deps[i]; + } + + // Create enums. + enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); + file->top_lvl_enum_count = n; + file->top_lvl_enums = _upb_EnumDefs_New(ctx, n, enums, NULL); + + // Create extensions. + exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n); + file->top_lvl_ext_count = n; + file->top_lvl_exts = + _upb_FieldDefs_New(ctx, n, exts, file->package, NULL, true); + + // Create messages. + msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); + file->top_lvl_msg_count = n; + file->top_lvl_msgs = _upb_MessageDefs_New(ctx, n, msgs, NULL); + + // Create services. + services = google_protobuf_FileDescriptorProto_service(file_proto, &n); + file->service_count = n; + file->services = _upb_ServiceDefs_New(ctx, n, services); + + // Now that all names are in the table, build layouts and resolve refs. + for (i = 0; i < (size_t)file->top_lvl_ext_count; i++) { + _upb_FieldDef_Resolve( + ctx, file->package, + (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i)); + } + + for (i = 0; i < (size_t)file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_Resolve(ctx, m); + } + + if (file->ext_count) { + bool ok = _upb_extreg_add(_upb_DefPool_ExtReg(ctx->symtab), + file->ext_layouts, file->ext_count); + if (!ok) _upb_DefBuilder_OomErr(ctx); + } +} diff --git a/upb/reflection/file_def.h b/upb/reflection/file_def.h new file mode 100644 index 0000000000..0ccad42f4b --- /dev/null +++ b/upb/reflection/file_def.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// IWYU pragma: private, include "third_party/upb/upb/reflection/def.h" + +#ifndef UPB_REFLECTION_FILE_DEF_H_ +#define UPB_REFLECTION_FILE_DEF_H_ + +#include "upb/reflection/common.h" + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i); +int upb_FileDef_DependencyCount(const upb_FileDef* f); +bool upb_FileDef_HasOptions(const upb_FileDef* f); +const char* upb_FileDef_Name(const upb_FileDef* f); +const google_protobuf_FileOptions* upb_FileDef_Options(const upb_FileDef* f); +const char* upb_FileDef_Package(const upb_FileDef* f); +const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f); + +const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i); +int upb_FileDef_PublicDependencyCount(const upb_FileDef* f); + +const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i); +int upb_FileDef_ServiceCount(const upb_FileDef* f); + +upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f); + +const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i); +int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f); + +const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i); +int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f); + +const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i); +int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f); + +const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i); +int upb_FileDef_WeakDependencyCount(const upb_FileDef* f); + +// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// + +const upb_MiniTable_Extension* _upb_FileDef_ExtensionMiniTable( + const upb_FileDef* f, int i); +const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f); +const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f); + +// upb_FileDef_Package() returns "" if f->package is NULL, this does not. +const char* _upb_FileDef_RawPackage(const upb_FileDef* f); + +void _upb_FileDef_Create(upb_DefBuilder* ctx, + const google_protobuf_FileDescriptorProto* file_proto); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_REFLECTION_FILE_DEF_H_ */ diff --git a/upb/reflection.c b/upb/reflection/message.c similarity index 97% rename from upb/reflection.c rename to upb/reflection/message.c index f2913b2465..6cd92d08d2 100644 --- a/upb/reflection.c +++ b/upb/reflection/message.c @@ -25,13 +25,21 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "upb/reflection.h" +#include "upb/reflection/message.h" #include #include "upb/internal/table.h" #include "upb/map.h" #include "upb/msg.h" +#include "upb/reflection/def_builder.h" +#include "upb/reflection/def_pool.h" +#include "upb/reflection/def_type.h" +#include "upb/reflection/field_def.h" +#include "upb/reflection/message_def.h" +#include "upb/reflection/oneof_def.h" + +// Must be last. #include "upb/port_def.inc" static size_t get_field_size(const upb_MiniTable_Field* f) { @@ -59,9 +67,6 @@ static size_t get_field_size(const upb_MiniTable_Field* f) { return upb_IsRepeatedOrMap(f) ? sizeof(void*) : sizes[f->descriptortype]; } -/** upb_Message - * *******************************************************************/ - upb_Message* upb_Message_New(const upb_MessageDef* m, upb_Arena* a) { return _upb_Message_New(upb_MessageDef_MiniTable(m), a); } diff --git a/upb/reflection/message.h b/upb/reflection/message.h new file mode 100644 index 0000000000..68728c0c93 --- /dev/null +++ b/upb/reflection/message.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_REFLECTION_MESSAGE_H_ +#define UPB_REFLECTION_MESSAGE_H_ + +#include "upb/map.h" +#include "upb/reflection/common.h" + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f); + +/* Creates a new message of the given type in the given arena. */ +upb_Message* upb_Message_New(const upb_MessageDef* m, upb_Arena* a); + +/* Returns the value associated with this field. */ +upb_MessageValue upb_Message_Get(const upb_Message* msg, const upb_FieldDef* f); + +/* Returns a mutable pointer to a map, array, or submessage value. If the given + * arena is non-NULL this will construct a new object if it was not previously + * present. May not be called for primitive fields. */ +upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg, + const upb_FieldDef* f, + upb_Arena* a); + +/* May only be called for fields where upb_FieldDef_HasPresence(f) == true. */ +bool upb_Message_Has(const upb_Message* msg, const upb_FieldDef* f); + +/* Returns the field that is set in the oneof, or NULL if none are set. */ +const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg, + const upb_OneofDef* o); + +/* Sets the given field to the given value. For a msg/array/map/string, the + * caller must ensure that the target data outlives |msg| (by living either in + * the same arena or a different arena that outlives it). + * + * Returns false if allocation fails. */ +bool upb_Message_Set(upb_Message* msg, const upb_FieldDef* f, + upb_MessageValue val, upb_Arena* a); + +/* Clears any field presence and sets the value back to its default. */ +void upb_Message_ClearField(upb_Message* msg, const upb_FieldDef* f); + +/* Clear all data and unknown fields. */ +void upb_Message_Clear(upb_Message* msg, const upb_MessageDef* m); + +/* Iterate over present fields. + * + * size_t iter = kUpb_Message_Begin; + * const upb_FieldDef *f; + * upb_MessageValue val; + * while (upb_Message_Next(msg, m, ext_pool, &f, &val, &iter)) { + * process_field(f, val); + * } + * + * If ext_pool is NULL, no extensions will be returned. If the given symtab + * returns extensions that don't match what is in this message, those extensions + * will be skipped. + */ + +#define kUpb_Message_Begin -1 +bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m, + const upb_DefPool* ext_pool, const upb_FieldDef** f, + upb_MessageValue* val, size_t* iter); + +/* Clears all unknown field data from this message and all submessages. */ +bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, + int maxdepth); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_REFLECTION_MESSAGE_H_ */ diff --git a/upb/reflection/message.hpp b/upb/reflection/message.hpp new file mode 100644 index 0000000000..425be98d2d --- /dev/null +++ b/upb/reflection/message.hpp @@ -0,0 +1,37 @@ +// Copyright (c) 2009-2021, Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Google LLC nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef UPB_REFLECTION_MESSAGE_HPP_ +#define UPB_REFLECTION_MESSAGE_HPP_ + +#include "upb/reflection/message.h" + +namespace upb { + +typedef upb_MessageValue MessageValue; + +} // namespace upb + +#endif // UPB_REFLECTION_MESSAGE_HPP_ diff --git a/upb/reflection/message_def.c b/upb/reflection/message_def.c new file mode 100644 index 0000000000..82fb544926 --- /dev/null +++ b/upb/reflection/message_def.c @@ -0,0 +1,485 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/reflection/message_def.h" + +#include "upb/mini_table.h" +#include "upb/reflection/def_builder.h" +#include "upb/reflection/def_type.h" +#include "upb/reflection/enum_def.h" +#include "upb/reflection/extension_range.h" +#include "upb/reflection/field_def.h" +#include "upb/reflection/file_def.h" +#include "upb/reflection/oneof_def.h" + +// Must be last. +#include "upb/port_def.inc" + +struct upb_MessageDef { + const google_protobuf_MessageOptions* opts; + const upb_MiniTable* layout; + const upb_FileDef* file; + const upb_MessageDef* containing_type; + const char* full_name; + + // Tables for looking up fields by number and name. + upb_inttable itof; + upb_strtable ntof; + + /* All nested defs. + * MEM: We could save some space here by putting nested defs in a contiguous + * region and calculating counts from offsets or vice-versa. */ + const upb_FieldDef* fields; + const upb_OneofDef* oneofs; + const upb_ExtensionRange* ext_ranges; + const upb_MessageDef* nested_msgs; + const upb_EnumDef* nested_enums; + const upb_FieldDef* nested_exts; + int field_count; + int real_oneof_count; + int oneof_count; + int ext_range_count; + int nested_msg_count; + int nested_enum_count; + int nested_ext_count; + bool in_message_set; + upb_WellKnown well_known_type; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; + +static void assign_msg_wellknowntype(upb_MessageDef* m) { + const char* name = upb_MessageDef_FullName(m); + if (name == NULL) { + m->well_known_type = kUpb_WellKnown_Unspecified; + return; + } + if (!strcmp(name, "google.protobuf.Any")) { + m->well_known_type = kUpb_WellKnown_Any; + } else if (!strcmp(name, "google.protobuf.FieldMask")) { + m->well_known_type = kUpb_WellKnown_FieldMask; + } else if (!strcmp(name, "google.protobuf.Duration")) { + m->well_known_type = kUpb_WellKnown_Duration; + } else if (!strcmp(name, "google.protobuf.Timestamp")) { + m->well_known_type = kUpb_WellKnown_Timestamp; + } else if (!strcmp(name, "google.protobuf.DoubleValue")) { + m->well_known_type = kUpb_WellKnown_DoubleValue; + } else if (!strcmp(name, "google.protobuf.FloatValue")) { + m->well_known_type = kUpb_WellKnown_FloatValue; + } else if (!strcmp(name, "google.protobuf.Int64Value")) { + m->well_known_type = kUpb_WellKnown_Int64Value; + } else if (!strcmp(name, "google.protobuf.UInt64Value")) { + m->well_known_type = kUpb_WellKnown_UInt64Value; + } else if (!strcmp(name, "google.protobuf.Int32Value")) { + m->well_known_type = kUpb_WellKnown_Int32Value; + } else if (!strcmp(name, "google.protobuf.UInt32Value")) { + m->well_known_type = kUpb_WellKnown_UInt32Value; + } else if (!strcmp(name, "google.protobuf.BoolValue")) { + m->well_known_type = kUpb_WellKnown_BoolValue; + } else if (!strcmp(name, "google.protobuf.StringValue")) { + m->well_known_type = kUpb_WellKnown_StringValue; + } else if (!strcmp(name, "google.protobuf.BytesValue")) { + m->well_known_type = kUpb_WellKnown_BytesValue; + } else if (!strcmp(name, "google.protobuf.Value")) { + m->well_known_type = kUpb_WellKnown_Value; + } else if (!strcmp(name, "google.protobuf.ListValue")) { + m->well_known_type = kUpb_WellKnown_ListValue; + } else if (!strcmp(name, "google.protobuf.Struct")) { + m->well_known_type = kUpb_WellKnown_Struct; + } else { + m->well_known_type = kUpb_WellKnown_Unspecified; + } +} + +upb_MessageDef* _upb_MessageDef_At(const upb_MessageDef* m, int i) { + return (upb_MessageDef*)&m[i]; +} + +const google_protobuf_MessageOptions* upb_MessageDef_Options( + const upb_MessageDef* m) { + return m->opts; +} + +bool upb_MessageDef_HasOptions(const upb_MessageDef* m) { + return m->opts != (void*)kUpbDefOptDefault; +} + +const char* upb_MessageDef_FullName(const upb_MessageDef* m) { + return m->full_name; +} + +const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) { + return m->file; +} + +const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) { + return m->containing_type; +} + +const char* upb_MessageDef_Name(const upb_MessageDef* m) { + return _upb_DefBuilder_FullToShort(m->full_name); +} + +upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) { + return upb_FileDef_Syntax(m->file); +} + +const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, + uint32_t i) { + upb_value val; + return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) + : NULL; +} + +const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; + } + + return _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); +} + +const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; + } + + return _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); +} + +bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t len, + upb_value v, upb_Arena* a) { + return upb_strtable_insert(&m->ntof, name, len, v, a); +} + +bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m, + const char* name, size_t len, + const upb_FieldDef** out_f, + const upb_OneofDef** out_o) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { + return false; + } + + const upb_FieldDef* f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + const upb_OneofDef* o = _upb_DefType_Unpack(val, UPB_DEFTYPE_ONEOF); + if (out_f) *out_f = f; + if (out_o) *out_o = o; + return f || o; /* False if this was a JSON name. */ +} + +const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize( + const upb_MessageDef* m, const char* name, size_t size) { + upb_value val; + const upb_FieldDef* f; + + if (!upb_strtable_lookup2(&m->ntof, name, size, &val)) { + return NULL; + } + + f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD); + if (!f) f = _upb_DefType_Unpack(val, UPB_DEFTYPE_FIELD_JSONNAME); + + return f; +} + +int upb_MessageDef_numfields(const upb_MessageDef* m) { return m->field_count; } + +int upb_MessageDef_numoneofs(const upb_MessageDef* m) { return m->oneof_count; } + +int upb_MessageDef_numrealoneofs(const upb_MessageDef* m) { + return m->real_oneof_count; +} + +int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) { + return m->ext_range_count; +} + +int upb_MessageDef_FieldCount(const upb_MessageDef* m) { + return m->field_count; +} + +int upb_MessageDef_OneofCount(const upb_MessageDef* m) { + return m->oneof_count; +} + +int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) { + return m->nested_msg_count; +} + +int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) { + return m->nested_enum_count; +} + +int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) { + return m->nested_ext_count; +} + +int upb_MessageDef_realoneofcount(const upb_MessageDef* m) { + return m->real_oneof_count; +} + +const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) { + return m->layout; +} + +const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->ext_range_count); + return _upb_ExtensionRange_At(m->ext_ranges, i); +} + +const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->field_count); + return _upb_FieldDef_At(m->fields, i); +} + +const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->oneof_count); + return _upb_OneofDef_At(m->oneofs, i); +} + +const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->nested_msg_count); + return &m->nested_msgs[i]; +} + +const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) { + UPB_ASSERT(0 <= i && i < m->nested_enum_count); + return _upb_EnumDef_At(m->nested_enums, i); +} + +const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m, + int i) { + UPB_ASSERT(0 <= i && i < m->nested_ext_count); + return _upb_FieldDef_At(m->nested_exts, i); +} + +upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) { + return m->well_known_type; +} + +bool _upb_MessageDef_InMessageSet(const upb_MessageDef* m) { + return m->in_message_set; +} + +const upb_FieldDef* upb_MessageDef_FindFieldByName(const upb_MessageDef* m, + const char* name) { + return upb_MessageDef_FindFieldByNameWithSize(m, name, strlen(name)); +} + +const upb_OneofDef* upb_MessageDef_FindOneofByName(const upb_MessageDef* m, + const char* name) { + return upb_MessageDef_FindOneofByNameWithSize(m, name, strlen(name)); +} + +bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m) { + return google_protobuf_MessageOptions_map_entry(upb_MessageDef_Options(m)); +} + +bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) { + return google_protobuf_MessageOptions_message_set_wire_format( + upb_MessageDef_Options(m)); +} + +void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) { + for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_Field(m, i); + _upb_FieldDef_Resolve(ctx, upb_MessageDef_FullName(m), f); + } + + m->in_message_set = false; + for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { + upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i); + _upb_FieldDef_Resolve(ctx, upb_MessageDef_FullName(m), ext); + if (upb_FieldDef_Type(ext) == kUpb_FieldType_Message && + upb_FieldDef_Label(ext) == kUpb_Label_Optional && + upb_FieldDef_MessageSubDef(ext) == m && + google_protobuf_MessageOptions_message_set_wire_format( + upb_MessageDef_Options(upb_FieldDef_ContainingType(ext)))) { + m->in_message_set = true; + } + } + + if (!ctx->layout) _upb_FieldDef_MakeLayout(ctx, m); + + for (int i = 0; i < upb_MessageDef_NestedMessageCount(m); i++) { + upb_MessageDef* n = (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); + _upb_MessageDef_Resolve(ctx, n); + } +} + +void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, + const upb_FieldDef* f) { + const int32_t field_number = upb_FieldDef_Number(f); + + if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) { + _upb_DefBuilder_Errf(ctx, "invalid field number (%u)", field_number); + } + + const char* json_name = upb_FieldDef_JsonName(f); + const char* shortname = upb_FieldDef_Name(f); + const size_t shortnamelen = strlen(shortname); + + upb_value v = upb_value_constptr(f); + + upb_value existing_v; + if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) { + _upb_DefBuilder_Errf(ctx, "duplicate field name (%s)", shortname); + } + + const upb_value field_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD); + bool ok = + _upb_MessageDef_Insert(m, shortname, shortnamelen, field_v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + if (strcmp(shortname, json_name) != 0) { + if (upb_strtable_lookup(&m->ntof, json_name, &v)) { + _upb_DefBuilder_Errf(ctx, "duplicate json_name (%s)", json_name); + } + + const size_t json_size = strlen(json_name); + const upb_value json_v = _upb_DefType_Pack(f, UPB_DEFTYPE_FIELD_JSONNAME); + ok = _upb_MessageDef_Insert(m, json_name, json_size, json_v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + } + + if (upb_inttable_lookup(&m->itof, field_number, NULL)) { + _upb_DefBuilder_Errf(ctx, "duplicate field number (%u)", field_number); + } + + ok = upb_inttable_insert(&m->itof, field_number, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); +} + +static void msgdef_create_nested(upb_DefBuilder* ctx, + const google_protobuf_DescriptorProto* msg_proto, + upb_MessageDef* m) { + size_t n; + + const google_protobuf_EnumDescriptorProto* const* enums = + google_protobuf_DescriptorProto_enum_type(msg_proto, &n); + m->nested_enum_count = n; + m->nested_enums = _upb_EnumDefs_New(ctx, n, enums, m); + + const google_protobuf_FieldDescriptorProto* const* exts = + google_protobuf_DescriptorProto_extension(msg_proto, &n); + m->nested_ext_count = n; + m->nested_exts = _upb_FieldDefs_New(ctx, n, exts, m->full_name, m, true); + + const google_protobuf_DescriptorProto* const* msgs = + google_protobuf_DescriptorProto_nested_type(msg_proto, &n); + m->nested_msg_count = n; + m->nested_msgs = _upb_MessageDefs_New(ctx, n, msgs, m); +} + +static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, + const google_protobuf_DescriptorProto* msg_proto, + const upb_MessageDef* containing_type, + const upb_MessageDef* _m) { + upb_MessageDef* m = (upb_MessageDef*)_m; + const google_protobuf_OneofDescriptorProto* const* oneofs; + const google_protobuf_FieldDescriptorProto* const* fields; + const google_protobuf_DescriptorProto_ExtensionRange* const* ext_ranges; + size_t n_oneof, n_field, n_ext_range; + upb_StringView name; + + // Must happen before _upb_DefBuilder_Add() + m->file = _upb_DefBuilder_File(ctx); + + m->containing_type = containing_type; + + name = google_protobuf_DescriptorProto_name(msg_proto); + _upb_DefBuilder_CheckIdentNotFull(ctx, name); + + m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + _upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG)); + + oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof); + fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field); + ext_ranges = + google_protobuf_DescriptorProto_extension_range(msg_proto, &n_ext_range); + + bool ok = upb_inttable_init(&m->itof, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + ok = upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + if (ctx->layout) { + /* create_fielddef() below depends on this being set. */ + UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); + m->layout = ctx->layout->msgs[ctx->msg_count++]; + UPB_ASSERT(n_field == m->layout->field_count); + } else { + /* Allocate now (to allow cross-linking), populate later. */ + m->layout = _upb_DefBuilder_Alloc( + ctx, sizeof(*m->layout) + sizeof(_upb_FastTable_Entry)); + } + + UBP_DEF_SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto); + + m->oneof_count = n_oneof; + m->oneofs = _upb_OneofDefs_New(ctx, n_oneof, oneofs, m); + + m->field_count = n_field; + m->fields = _upb_FieldDefs_New(ctx, n_field, fields, m->full_name, m, false); + + m->ext_range_count = n_ext_range; + m->ext_ranges = _upb_ExtensionRanges_New(ctx, n_ext_range, ext_ranges, m); + + const size_t synthetic_count = _upb_OneofDefs_Finalize(ctx, m); + m->real_oneof_count = m->oneof_count - synthetic_count; + + assign_msg_wellknowntype(m); + upb_inttable_compact(&m->itof, ctx->arena); + msgdef_create_nested(ctx, msg_proto, m); +} + +// Allocate and initialize an array of |n| message defs. +upb_MessageDef* _upb_MessageDefs_New( + upb_DefBuilder* ctx, int n, const google_protobuf_DescriptorProto* const* protos, + const upb_MessageDef* containing_type) { + _upb_DefType_CheckPadding(sizeof(upb_MessageDef)); + + const char* name = containing_type ? containing_type->full_name + : _upb_FileDef_RawPackage(ctx->file); + upb_MessageDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageDef) * n); + for (int i = 0; i < n; i++) { + create_msgdef(ctx, name, protos[i], containing_type, &m[i]); + } + return m; +} diff --git a/upb/reflection/message_def.h b/upb/reflection/message_def.h new file mode 100644 index 0000000000..d8b8382128 --- /dev/null +++ b/upb/reflection/message_def.h @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// IWYU pragma: private, include "third_party/upb/upb/reflection/def.h" + +#ifndef UPB_REFLECTION_MESSAGE_DEF_H_ +#define UPB_REFLECTION_MESSAGE_DEF_H_ + +#include "upb/reflection/common.h" + +// Must be last. +#include "upb/port_def.inc" + +// Well-known field tag numbers for map-entry messages. +#define kUpb_MapEntry_KeyFieldNumber 1 +#define kUpb_MapEntry_ValueFieldNumber 2 + +// Well-known field tag numbers for Any messages. +#define kUpb_Any_TypeFieldNumber 1 +#define kUpb_Any_ValueFieldNumber 2 + +// Well-known field tag numbers for duration messages. +#define kUpb_Duration_SecondsFieldNumber 1 +#define kUpb_Duration_NanosFieldNumber 2 + +// Well-known field tag numbers for timestamp messages. +#define kUpb_Timestamp_SecondsFieldNumber 1 +#define kUpb_Timestamp_NanosFieldNumber 2 + +// All the different kind of well known type messages. For simplicity of check, +// number wrappers and string wrappers are grouped together. Make sure the +// order and number of these groups are not changed. +typedef enum { + kUpb_WellKnown_Unspecified, + kUpb_WellKnown_Any, + kUpb_WellKnown_FieldMask, + kUpb_WellKnown_Duration, + kUpb_WellKnown_Timestamp, + + // number wrappers + kUpb_WellKnown_DoubleValue, + kUpb_WellKnown_FloatValue, + kUpb_WellKnown_Int64Value, + kUpb_WellKnown_UInt64Value, + kUpb_WellKnown_Int32Value, + kUpb_WellKnown_UInt32Value, + + // string wrappers + kUpb_WellKnown_StringValue, + kUpb_WellKnown_BytesValue, + kUpb_WellKnown_BoolValue, + kUpb_WellKnown_Value, + kUpb_WellKnown_ListValue, + kUpb_WellKnown_Struct, +} upb_WellKnown; + +#ifdef __cplusplus +extern "C" { +#endif + +const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m); + +const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m, + int i); +int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m); + +const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i); +int upb_MessageDef_FieldCount(const upb_MessageDef* m); + +const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m); + +// Returns a field by either JSON name or regular proto name. +const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize( + const upb_MessageDef* m, const char* name, size_t size); +UPB_INLINE const upb_FieldDef* upb_MessageDef_FindByJsonName( + const upb_MessageDef* m, const char* name) { + return upb_MessageDef_FindByJsonNameWithSize(m, name, strlen(name)); +} + +// Lookup of either field or oneof by name. Returns whether either was found. +// If the return is true, then the found def will be set, and the non-found +// one set to NULL. +bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m, + const char* name, size_t size, + const upb_FieldDef** f, + const upb_OneofDef** o); +UPB_INLINE bool upb_MessageDef_FindByName(const upb_MessageDef* m, + const char* name, + const upb_FieldDef** f, + const upb_OneofDef** o) { + return upb_MessageDef_FindByNameWithSize(m, name, strlen(name), f, o); +} + +const upb_FieldDef* upb_MessageDef_FindFieldByName(const upb_MessageDef* m, + const char* name); +const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize( + const upb_MessageDef* m, const char* name, size_t size); +const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, + uint32_t i); +const upb_OneofDef* upb_MessageDef_FindOneofByName(const upb_MessageDef* m, + const char* name); +const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize( + const upb_MessageDef* m, const char* name, size_t size); +const char* upb_MessageDef_FullName(const upb_MessageDef* m); +bool upb_MessageDef_HasOptions(const upb_MessageDef* m); +bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m); +bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m); +const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m); +const char* upb_MessageDef_Name(const upb_MessageDef* m); + +const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i); +const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m, + int i); +const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m, + int i); + +int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m); +int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m); +int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m); + +const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i); +int upb_MessageDef_OneofCount(const upb_MessageDef* m); + +const google_protobuf_MessageOptions* upb_MessageDef_Options(const upb_MessageDef* m); +upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m); +upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m); + +// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// + +upb_MessageDef* _upb_MessageDef_At(const upb_MessageDef* m, int i); +bool _upb_MessageDef_InMessageSet(const upb_MessageDef* m); +bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t size, + upb_value v, upb_Arena* a); +void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, + const upb_FieldDef* f); +void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m); + +// Allocate and initialize an array of |n| message defs. +upb_MessageDef* _upb_MessageDefs_New( + upb_DefBuilder* ctx, int n, const google_protobuf_DescriptorProto* const* protos, + const upb_MessageDef* containing_type); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_REFLECTION_MESSAGE_DEF_H_ */ diff --git a/upb/reflection/method_def.c b/upb/reflection/method_def.c new file mode 100644 index 0000000000..aa9001232a --- /dev/null +++ b/upb/reflection/method_def.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/reflection/method_def.h" + +#include "upb/reflection/def_builder.h" +#include "upb/reflection/def_type.h" +#include "upb/reflection/service_def.h" + +// Must be last. +#include "upb/port_def.inc" + +struct upb_MethodDef { + const google_protobuf_MethodOptions* opts; + upb_ServiceDef* service; + const char* full_name; + const upb_MessageDef* input_type; + const upb_MessageDef* output_type; + int index; + bool client_streaming; + bool server_streaming; +}; + +upb_MethodDef* _upb_MethodDef_At(const upb_MethodDef* m, int i) { + return (upb_MethodDef*)&m[i]; +} + +const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) { + return m->service; +} + +const google_protobuf_MethodOptions* upb_MethodDef_Options(const upb_MethodDef* m) { + return m->opts; +} + +bool upb_MethodDef_HasOptions(const upb_MethodDef* m) { + return m->opts != (void*)kUpbDefOptDefault; +} + +const char* upb_MethodDef_FullName(const upb_MethodDef* m) { + return m->full_name; +} + +const char* upb_MethodDef_Name(const upb_MethodDef* m) { + return _upb_DefBuilder_FullToShort(m->full_name); +} + +int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; } + +const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) { + return m->input_type; +} + +const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) { + return m->output_type; +} + +bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) { + return m->client_streaming; +} + +bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) { + return m->server_streaming; +} + +static void create_method(upb_DefBuilder* ctx, + const google_protobuf_MethodDescriptorProto* method_proto, + upb_ServiceDef* s, upb_MethodDef* m) { + upb_StringView name = google_protobuf_MethodDescriptorProto_name(method_proto); + + m->service = s; + m->full_name = + _upb_DefBuilder_MakeFullName(ctx, upb_ServiceDef_FullName(s), name); + m->client_streaming = + google_protobuf_MethodDescriptorProto_client_streaming(method_proto); + m->server_streaming = + google_protobuf_MethodDescriptorProto_server_streaming(method_proto); + m->input_type = _upb_DefBuilder_Resolve( + ctx, m->full_name, m->full_name, + google_protobuf_MethodDescriptorProto_input_type(method_proto), UPB_DEFTYPE_MSG); + m->output_type = _upb_DefBuilder_Resolve( + ctx, m->full_name, m->full_name, + google_protobuf_MethodDescriptorProto_output_type(method_proto), UPB_DEFTYPE_MSG); + + UBP_DEF_SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, + method_proto); +} + +// Allocate and initialize an array of |n| method defs belonging to |s|. +upb_MethodDef* _upb_MethodDefs_New( + upb_DefBuilder* ctx, int n, + const google_protobuf_MethodDescriptorProto* const* protos, upb_ServiceDef* s) { + upb_MethodDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MethodDef) * n); + for (int i = 0; i < n; i++) { + create_method(ctx, protos[i], s, &m[i]); + m[i].index = i; + } + return m; +} diff --git a/upb/reflection/method_def.h b/upb/reflection/method_def.h new file mode 100644 index 0000000000..3123747c52 --- /dev/null +++ b/upb/reflection/method_def.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// IWYU pragma: private, include "third_party/upb/upb/reflection/def.h" + +#ifndef UPB_REFLECTION_METHOD_DEF_H_ +#define UPB_REFLECTION_METHOD_DEF_H_ + +#include "upb/reflection/common.h" + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m); +const char* upb_MethodDef_FullName(const upb_MethodDef* m); +bool upb_MethodDef_HasOptions(const upb_MethodDef* m); +int upb_MethodDef_Index(const upb_MethodDef* m); +const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m); +const char* upb_MethodDef_Name(const upb_MethodDef* m); +const google_protobuf_MethodOptions* upb_MethodDef_Options(const upb_MethodDef* m); +const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m); +bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m); +const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m); + +// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// + +upb_MethodDef* _upb_MethodDef_At(const upb_MethodDef* m, int i); + +// Allocate and initialize an array of |n| method defs owned by |s|. +upb_MethodDef* _upb_MethodDefs_New( + upb_DefBuilder* ctx, int n, + const google_protobuf_MethodDescriptorProto* const* protos, upb_ServiceDef* s); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_REFLECTION_METHOD_DEF_H_ */ diff --git a/upb/internal/mini_descriptor.c b/upb/reflection/mini_descriptor_encode.c similarity index 89% rename from upb/internal/mini_descriptor.c rename to upb/reflection/mini_descriptor_encode.c index 74776e28ca..3a7a0d55ac 100644 --- a/upb/internal/mini_descriptor.c +++ b/upb/reflection/mini_descriptor_encode.c @@ -25,9 +25,17 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "upb/internal/mini_descriptor.h" +#include "upb/reflection/mini_descriptor_encode.h" #include "upb/mini_table.h" +#include "upb/reflection/def_builder.h" +#include "upb/reflection/def_type.h" +#include "upb/reflection/enum_def.h" +#include "upb/reflection/enum_value_def.h" +#include "upb/reflection/field_def.h" +#include "upb/reflection/file_def.h" +#include "upb/reflection/message_def.h" +#include "upb/reflection/oneof_def.h" // Must be last. #include "upb/port_def.inc" @@ -233,3 +241,19 @@ const char* _upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, return s.buf; } + +/******************************************************************************/ + +const char* upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, upb_Arena* a) { + return _upb_EnumDef_MiniDescriptor(e, a); +} + +const char* upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, + upb_Arena* a) { + return _upb_MiniDescriptor_EncodeField(f, a); +} + +const char* upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, + upb_Arena* a) { + return _upb_MiniDescriptor_EncodeMessage(m, a); +} diff --git a/upb/internal/mini_descriptor.h b/upb/reflection/mini_descriptor_encode.h similarity index 76% rename from upb/internal/mini_descriptor.h rename to upb/reflection/mini_descriptor_encode.h index 487a996bfa..f197ce7b99 100644 --- a/upb/internal/mini_descriptor.h +++ b/upb/reflection/mini_descriptor_encode.h @@ -25,10 +25,10 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef UPB_INTERNAL_MINI_DESCRIPTOR_H_ -#define UPB_INTERNAL_MINI_DESCRIPTOR_H_ +#ifndef UPB_REFLECTION_MINI_DESCRIPTOR_ENCODE_H_ +#define UPB_REFLECTION_MINI_DESCRIPTOR_ENCODE_H_ -#include "upb/mini_descriptor.h" +#include "upb/reflection/common.h" // Must be last. #include "upb/port_def.inc" @@ -37,6 +37,18 @@ extern "C" { #endif +// Creates and returns a mini descriptor string for an enum, or NULL on error. +const char* upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, upb_Arena* a); + +// Creates and returns a mini descriptor string for a field, or NULL on error. +const char* upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a); + +// Creates and returns a mini descriptor string for a message, or NULL on error. +const char* upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, + upb_Arena* a); + +// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// + // Creates and returns a mini descriptor string for an enum, or NULL on error. // If the values in the enum happen to be defined in ascending order (when cast // to uint32_t) then |sorted| should be NULL. Otherwise it must point to an @@ -59,4 +71,4 @@ const char* _upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, #include "upb/port_undef.inc" -#endif /* UPB_INTERNAL_MINI_DESCRIPTOR_H_ */ +#endif /* UPB_REFLECTION_MINI_DESCRIPTOR_ENCODE_H_ */ diff --git a/upb/reflection/oneof_def.c b/upb/reflection/oneof_def.c new file mode 100644 index 0000000000..296d8f817b --- /dev/null +++ b/upb/reflection/oneof_def.c @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/reflection/oneof_def.h" + +#include +#include +#include + +#include "upb/mini_table.h" +#include "upb/reflection/def_builder.h" +#include "upb/reflection/def_type.h" +#include "upb/reflection/field_def.h" +#include "upb/reflection/message_def.h" + +// Must be last. +#include "upb/port_def.inc" + +struct upb_OneofDef { + const google_protobuf_OneofOptions* opts; + const upb_MessageDef* parent; + const char* full_name; + int field_count; + bool synthetic; + const upb_FieldDef** fields; + upb_strtable ntof; // lookup a field by name + upb_inttable itof; // lookup a field by number (index) +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif +}; + +upb_OneofDef* _upb_OneofDef_At(const upb_OneofDef* o, int i) { + return (upb_OneofDef*)&o[i]; +} + +const google_protobuf_OneofOptions* upb_OneofDef_Options(const upb_OneofDef* o) { + return o->opts; +} + +bool upb_OneofDef_HasOptions(const upb_OneofDef* o) { + return o->opts != (void*)kUpbDefOptDefault; +} + +const char* upb_OneofDef_FullName(const upb_OneofDef* o) { + return o->full_name; +} + +const char* upb_OneofDef_Name(const upb_OneofDef* o) { + return _upb_DefBuilder_FullToShort(o->full_name); +} + +const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) { + return o->parent; +} + +int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; } + +const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) { + UPB_ASSERT(i < o->field_count); + return o->fields[i]; +} + +int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; } + +uint32_t upb_OneofDef_Index(const upb_OneofDef* o) { + // Compute index in our parent's array. + return o - upb_MessageDef_Oneof(o->parent, 0); +} + +bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; } + +const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o, + const char* name, + size_t size) { + upb_value val; + return upb_strtable_lookup2(&o->ntof, name, size, &val) + ? upb_value_getptr(val) + : NULL; +} + +const upb_FieldDef* upb_OneofDef_LookupName(const upb_OneofDef* o, + const char* name) { + return upb_OneofDef_LookupNameWithSize(o, name, strlen(name)); +} + +const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o, + uint32_t num) { + upb_value val; + return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) + : NULL; +} + +bool _upb_OneofDef_Insert(upb_OneofDef* o, const upb_FieldDef* f, + const char* name, size_t size, upb_Arena* a) { + o->field_count++; + if (_upb_FieldDef_IsProto3Optional(f)) o->synthetic = true; + + const int number = upb_FieldDef_Number(f); + const upb_value v = upb_value_constptr(f); + return upb_inttable_insert(&o->itof, number, v, a) && + upb_strtable_insert(&o->ntof, name, size, v, a); +} + +// Returns the synthetic count. +size_t _upb_OneofDefs_Finalize(upb_DefBuilder* ctx, upb_MessageDef* m) { + int synthetic_count = 0; + + for (int i = 0; i < upb_MessageDef_OneofCount(m); i++) { + upb_OneofDef* o = (upb_OneofDef*)upb_MessageDef_Oneof(m, i); + + if (o->synthetic && o->field_count != 1) { + _upb_DefBuilder_Errf(ctx, + "Synthetic oneofs must have one field, not %d: %s", + o->field_count, upb_OneofDef_Name(o)); + } + + if (o->synthetic) { + synthetic_count++; + } else if (synthetic_count != 0) { + _upb_DefBuilder_Errf( + ctx, "Synthetic oneofs must be after all other oneofs: %s", + upb_OneofDef_Name(o)); + } + + o->fields = + _upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef*) * o->field_count); + o->field_count = 0; + } + + for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f); + if (o) { + o->fields[o->field_count++] = f; + } + } + + return synthetic_count; +} + +static void create_oneofdef(upb_DefBuilder* ctx, upb_MessageDef* m, + const google_protobuf_OneofDescriptorProto* oneof_proto, + const upb_OneofDef* _o) { + upb_OneofDef* o = (upb_OneofDef*)_o; + upb_StringView name = google_protobuf_OneofDescriptorProto_name(oneof_proto); + + o->parent = m; + o->full_name = + _upb_DefBuilder_MakeFullName(ctx, upb_MessageDef_FullName(m), name); + o->field_count = 0; + o->synthetic = false; + + UBP_DEF_SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto); + + if (upb_MessageDef_FindByNameWithSize(m, name.data, name.size, NULL, NULL)) { + _upb_DefBuilder_Errf(ctx, "duplicate oneof name (%s)", o->full_name); + } + + upb_value v = _upb_DefType_Pack(o, UPB_DEFTYPE_ONEOF); + bool ok = _upb_MessageDef_Insert(m, name.data, name.size, v, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + ok = upb_inttable_init(&o->itof, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + ok = upb_strtable_init(&o->ntof, 4, ctx->arena); + if (!ok) _upb_DefBuilder_OomErr(ctx); +} + +// Allocate and initialize an array of |n| oneof defs. +upb_OneofDef* _upb_OneofDefs_New( + upb_DefBuilder* ctx, int n, + const google_protobuf_OneofDescriptorProto* const* protos, upb_MessageDef* m) { + _upb_DefType_CheckPadding(sizeof(upb_OneofDef)); + + upb_OneofDef* o = _upb_DefBuilder_Alloc(ctx, sizeof(upb_OneofDef) * n); + for (int i = 0; i < n; i++) { + create_oneofdef(ctx, m, protos[i], &o[i]); + } + return o; +} diff --git a/upb/reflection/oneof_def.h b/upb/reflection/oneof_def.h new file mode 100644 index 0000000000..5116e9e4e9 --- /dev/null +++ b/upb/reflection/oneof_def.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// IWYU pragma: private, include "third_party/upb/upb/reflection/def.h" + +#ifndef UPB_REFLECTION_ONEOF_DEF_H_ +#define UPB_REFLECTION_ONEOF_DEF_H_ + +#include "upb/reflection/common.h" + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o); +const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i); +int upb_OneofDef_FieldCount(const upb_OneofDef* o); +const char* upb_OneofDef_FullName(const upb_OneofDef* o); +bool upb_OneofDef_HasOptions(const upb_OneofDef* o); +uint32_t upb_OneofDef_Index(const upb_OneofDef* o); +bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o); +const upb_FieldDef* upb_OneofDef_LookupName(const upb_OneofDef* o, + const char* name); +const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o, + const char* name, + size_t size); +const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o, + uint32_t num); +const char* upb_OneofDef_Name(const upb_OneofDef* o); +int upb_OneofDef_numfields(const upb_OneofDef* o); +const google_protobuf_OneofOptions* upb_OneofDef_Options(const upb_OneofDef* o); + +// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// + +upb_OneofDef* _upb_OneofDef_At(const upb_OneofDef* o, int i); +bool _upb_OneofDef_Insert(upb_OneofDef* o, const upb_FieldDef* f, + const char* name, size_t size, upb_Arena* a); + +// Allocate and initialize an array of |n| oneof defs owned by |m|. +upb_OneofDef* _upb_OneofDefs_New( + upb_DefBuilder* ctx, int n, + const google_protobuf_OneofDescriptorProto* const* protos, upb_MessageDef* m); + +size_t _upb_OneofDefs_Finalize(upb_DefBuilder* ctx, upb_MessageDef* m); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_REFLECTION_ONEOF_DEF_H_ */ diff --git a/upb/reflection/service_def.c b/upb/reflection/service_def.c new file mode 100644 index 0000000000..6b2d69d48e --- /dev/null +++ b/upb/reflection/service_def.c @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/reflection/service_def.h" + +#include "upb/reflection/def_builder.h" +#include "upb/reflection/def_type.h" +#include "upb/reflection/file_def.h" +#include "upb/reflection/method_def.h" + +// Must be last. +#include "upb/port_def.inc" + +struct upb_ServiceDef { + const google_protobuf_ServiceOptions* opts; + const upb_FileDef* file; + const char* full_name; + upb_MethodDef* methods; + int method_count; + int index; +}; + +upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int index) { + return (upb_ServiceDef*)&s[index]; +} + +const google_protobuf_ServiceOptions* upb_ServiceDef_Options(const upb_ServiceDef* s) { + return s->opts; +} + +bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) { + return s->opts != (void*)kUpbDefOptDefault; +} + +const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) { + return s->full_name; +} + +const char* upb_ServiceDef_Name(const upb_ServiceDef* s) { + return _upb_DefBuilder_FullToShort(s->full_name); +} + +int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; } + +const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) { + return s->file; +} + +int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) { + return s->method_count; +} + +const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) { + return (i < 0 || i >= s->method_count) ? NULL + : _upb_MethodDef_At(s->methods, i); +} + +const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s, + const char* name) { + for (int i = 0; i < s->method_count; i++) { + const upb_MethodDef* m = _upb_MethodDef_At(s->methods, i); + if (strcmp(name, upb_MethodDef_Name(m)) == 0) { + return m; + } + } + return NULL; +} + +static void create_service(upb_DefBuilder* ctx, + const google_protobuf_ServiceDescriptorProto* svc_proto, + upb_ServiceDef* s) { + upb_StringView name; + size_t n; + + // Must happen before _upb_DefBuilder_Add() + s->file = _upb_DefBuilder_File(ctx); + + name = google_protobuf_ServiceDescriptorProto_name(svc_proto); + _upb_DefBuilder_CheckIdentNotFull(ctx, name); + const char* package = _upb_FileDef_RawPackage(s->file); + s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name); + _upb_DefBuilder_Add(ctx, s->full_name, + _upb_DefType_Pack(s, UPB_DEFTYPE_SERVICE)); + + const google_protobuf_MethodDescriptorProto* const* methods = + google_protobuf_ServiceDescriptorProto_method(svc_proto, &n); + s->method_count = n; + s->methods = _upb_MethodDefs_New(ctx, n, methods, s); + + UBP_DEF_SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, + svc_proto); +} + +upb_ServiceDef* _upb_ServiceDefs_New( + upb_DefBuilder* ctx, int n, + const google_protobuf_ServiceDescriptorProto* const* protos) { + _upb_DefType_CheckPadding(sizeof(upb_ServiceDef)); + + upb_ServiceDef* s = _upb_DefBuilder_Alloc(ctx, sizeof(upb_ServiceDef) * n); + for (int i = 0; i < n; i++) { + create_service(ctx, protos[i], &s[i]); + s[i].index = i; + } + return s; +} diff --git a/upb/reflection/service_def.h b/upb/reflection/service_def.h new file mode 100644 index 0000000000..d265a507c0 --- /dev/null +++ b/upb/reflection/service_def.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// IWYU pragma: private, include "third_party/upb/upb/reflection/def.h" + +#ifndef UPB_REFLECTION_SERVICE_DEF_H_ +#define UPB_REFLECTION_SERVICE_DEF_H_ + +#include "upb/reflection/common.h" + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s); +const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s, + const char* name); +const char* upb_ServiceDef_FullName(const upb_ServiceDef* s); +bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s); +int upb_ServiceDef_Index(const upb_ServiceDef* s); +const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i); +int upb_ServiceDef_MethodCount(const upb_ServiceDef* s); +const char* upb_ServiceDef_Name(const upb_ServiceDef* s); +const google_protobuf_ServiceOptions* upb_ServiceDef_Options(const upb_ServiceDef* s); + +// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// + +upb_ServiceDef* _upb_ServiceDef_At(const upb_ServiceDef* s, int i); + +// Allocate and initialize an array of |n| service defs. +upb_ServiceDef* _upb_ServiceDefs_New( + upb_DefBuilder* ctx, int n, + const google_protobuf_ServiceDescriptorProto* const* protos); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_REFLECTION_SERVICE_DEF_H_ */ diff --git a/upbc/code_generator_request.c b/upbc/code_generator_request.c index cd7f5d1c72..ce6227d5a1 100644 --- a/upbc/code_generator_request.c +++ b/upbc/code_generator_request.c @@ -31,8 +31,9 @@ #include #include "google/protobuf/compiler/plugin.upb.h" -#include "upb/mini_descriptor.h" #include "upb/mini_table.h" +#include "upb/reflection/def.h" +#include "upb/reflection/mini_descriptor_encode.h" // Must be last. #include "upb/port_def.inc" diff --git a/upbc/code_generator_request.h b/upbc/code_generator_request.h index 747d98d84d..03deb29fea 100644 --- a/upbc/code_generator_request.h +++ b/upbc/code_generator_request.h @@ -28,7 +28,7 @@ #ifndef UPBC_CODE_GENERATOR_REQUEST_H_ #define UPBC_CODE_GENERATOR_REQUEST_H_ -#include "upb/def.h" +#include "upb/reflection/def.h" #include "upb/upb.h" #include "upbc/code_generator_request.upb.h" diff --git a/upbc/protoc-gen-upbdev.cc b/upbc/protoc-gen-upbdev.cc index e711ec75b5..a320c89fdc 100644 --- a/upbc/protoc-gen-upbdev.cc +++ b/upbc/protoc-gen-upbdev.cc @@ -32,7 +32,7 @@ #include "google/protobuf/compiler/plugin.upbdefs.h" #include "upb/json_decode.h" #include "upb/json_encode.h" -#include "upb/mini_descriptor.h" +#include "upb/reflection/def.h" #include "upb/upb.h" #include "upbc/code_generator_request.h" #include "upbc/code_generator_request.upb.h" From 38d84309239b5f0065e79cad91aeef01bcbc09ab Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Thu, 15 Sep 2022 09:08:22 -0700 Subject: [PATCH 16/35] simplify makejsonname() PiperOrigin-RevId: 474577074 --- upb/reflection/field_def.c | 82 ++++++++++++-------------------------- 1 file changed, 25 insertions(+), 57 deletions(-) diff --git a/upb/reflection/field_def.c b/upb/reflection/field_def.c index 0ac1ce6e08..36f1160fd3 100644 --- a/upb/reflection/field_def.c +++ b/upb/reflection/field_def.c @@ -665,52 +665,25 @@ static bool streql2(const char* a, size_t n, const char* b) { return n == strlen(b) && memcmp(a, b, n) == 0; } -static size_t getjsonname(const char* name, size_t size, char* buf, - size_t len) { - size_t src, dst = 0; - bool ucase_next = false; - -#define WRITE(byte) \ - ++dst; \ - if (dst < len) \ - buf[dst - 1] = byte; \ - else if (dst == len) \ - buf[dst - 1] = '\0' +// Implement the transformation as described in the spec: +// 1. upper case all letters after an underscore. +// 2. remove all underscores. +static char* make_json_name(const char* name, size_t size, upb_Arena* a) { + char* out = upb_Arena_Malloc(a, size + 1); // +1 is to add a trailing '\0' + if (out == NULL) return NULL; - if (!name) { - WRITE('\0'); - return 0; - } - - /* Implement the transformation as described in the spec: - * 1. upper case all letters after an underscore. - * 2. remove all underscores. - */ - for (src = 0; src < size; src++) { - if (name[src] == '_') { + bool ucase_next = false; + char* des = out; + for (size_t i = 0; i < size; i++) { + if (name[i] == '_') { ucase_next = true; - continue; - } - - if (ucase_next) { - WRITE(toupper(name[src])); - ucase_next = false; } else { - WRITE(name[src]); + *des++ = ucase_next ? toupper(name[i]) : name[i]; + ucase_next = false; } } - - WRITE('\0'); - return dst; - -#undef WRITE -} - -static char* makejsonname(upb_DefBuilder* ctx, const char* name, size_t size) { - size_t json_size = size + 1; // +1 for trailing '\0' - char* json_name = _upb_DefBuilder_Alloc(ctx, json_size); - getjsonname(name, size, json_name, json_size); - return json_name; + *des++ = '\0'; + return out; } static str_t* newstr(upb_DefBuilder* ctx, const char* data, size_t len) { @@ -891,8 +864,6 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, upb_MessageDef* m, const google_protobuf_FieldDescriptorProto* field_proto, upb_FieldDef* f) { - const char* json_name; - // Must happen before _upb_DefBuilder_Add() f->file = _upb_DefBuilder_File(ctx); @@ -902,30 +873,27 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, const upb_StringView name = google_protobuf_FieldDescriptorProto_name(field_proto); _upb_DefBuilder_CheckIdentNotFull(ctx, name); - const char* full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) { + f->has_json_name_ = google_protobuf_FieldDescriptorProto_has_json_name(field_proto); + if (f->has_json_name_) { const upb_StringView sv = google_protobuf_FieldDescriptorProto_json_name(field_proto); - json_name = upb_strdup2(sv.data, sv.size, ctx->arena); - if (!json_name) _upb_DefBuilder_OomErr(ctx); - f->has_json_name_ = true; + f->json_name = upb_strdup2(sv.data, sv.size, ctx->arena); } else { - json_name = makejsonname(ctx, name.data, name.size); - f->has_json_name_ = false; + f->json_name = make_json_name(name.data, name.size, ctx->arena); } + if (!f->json_name) _upb_DefBuilder_OomErr(ctx); - f->full_name = full_name; - f->json_name = json_name; + f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto); f->number_ = google_protobuf_FieldDescriptorProto_number(field_proto); - f->scope.oneof = NULL; f->proto3_optional_ = google_protobuf_FieldDescriptorProto_proto3_optional(field_proto); f->msgdef = m; + f->scope.oneof = NULL; - bool has_type = google_protobuf_FieldDescriptorProto_has_type(field_proto); - bool has_type_name = + const bool has_type = google_protobuf_FieldDescriptorProto_has_type(field_proto); + const bool has_type_name = google_protobuf_FieldDescriptorProto_has_type_name(field_proto); f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto); @@ -937,14 +905,14 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, case kUpb_FieldType_Enum: if (!has_type_name) { _upb_DefBuilder_Errf(ctx, "field of type %d requires type name (%s)", - (int)f->type_, full_name); + (int)f->type_, f->full_name); } break; default: if (has_type_name) { _upb_DefBuilder_Errf( ctx, "invalid type for field with type_name set (%s, %d)", - full_name, (int)f->type_); + f->full_name, (int)f->type_); } } } else if (has_type_name) { From edecfd5eb06ee7a55206d4651222d03b0a4f7b01 Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Thu, 15 Sep 2022 10:26:14 -0700 Subject: [PATCH 17/35] upb: use the new reflection headers Reflection headers now live in upb/reflection/ so update our include statements to reflect this. (See what I did there?) PiperOrigin-RevId: 474596615 --- BUILD | 18 +++++++++++++++++- benchmarks/benchmark.cc | 2 +- python/convert.c | 2 +- python/convert.h | 4 ++-- python/descriptor.c | 2 +- python/descriptor.h | 2 +- python/descriptor_containers.c | 2 +- python/descriptor_containers.h | 2 +- python/descriptor_pool.c | 2 +- python/map.c | 2 +- python/map.h | 2 +- python/message.c | 4 ++-- python/message.h | 2 +- python/repeated.h | 2 +- upb/bindings/lua/def.c | 2 +- upb/bindings/lua/msg.c | 2 +- upb/bindings/lua/upb.h | 4 ++-- upb/conformance_upb.c | 2 +- upb/json_decode.c | 2 +- upb/json_decode.h | 2 +- upb/json_decode_test.cc | 2 +- upb/json_encode.c | 2 +- upb/json_encode.h | 2 +- upb/json_encode_test.cc | 5 +++-- upb/msg_test.cc | 2 +- upb/test_cpp.cc | 4 ++-- upb/text_encode.c | 2 +- upb/text_encode.h | 2 +- upb/util/compare.h | 2 +- upb/util/def_to_proto.c | 2 +- upb/util/def_to_proto.h | 2 +- upb/util/def_to_proto_test.cc | 4 ++-- upb/util/required_fields.c | 2 +- upb/util/required_fields.h | 4 ++-- upb/util/required_fields_test.cc | 4 ++-- upbc/protoc-gen-upbdefs.cc | 6 +++--- 36 files changed, 63 insertions(+), 46 deletions(-) diff --git a/BUILD b/BUILD index cd515ba430..43de536048 100644 --- a/BUILD +++ b/BUILD @@ -326,15 +326,31 @@ cc_library( cc_library( name = "generated_reflection_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me", + srcs = [ + "upb/reflection/common.h", + "upb/reflection/def_pool.h", + "upb/reflection/def_type.h", + "upb/reflection/enum_def.h", + "upb/reflection/enum_value_def.h", + "upb/reflection/extension_range.h", + "upb/reflection/field_def.h", + "upb/reflection/file_def.h", + "upb/reflection/message_def.h", + "upb/reflection/method_def.h", + "upb/reflection/oneof_def.h", + "upb/reflection/service_def.h", + ], hdrs = [ - "upb/def.h", "upb/port_def.inc", "upb/port_undef.inc", + "upb/reflection/def.h", ], copts = UPB_DEFAULT_COPTS, visibility = ["//visibility:public"], deps = [ + ":descriptor_upb_proto", ":reflection", + ":table_internal", ], ) diff --git a/benchmarks/benchmark.cc b/benchmarks/benchmark.cc index e752d3e7eb..e4c3a0cba1 100644 --- a/benchmarks/benchmark.cc +++ b/benchmarks/benchmark.cc @@ -35,7 +35,7 @@ #include "benchmarks/descriptor.upb.h" #include "benchmarks/descriptor.upbdefs.h" #include "benchmarks/descriptor_sv.pb.h" -#include "upb/def.hpp" +#include "upb/reflection/def.hpp" upb_StringView descriptor = benchmarks_descriptor_proto_upbdefinit.descriptor; namespace protobuf = ::google::protobuf; diff --git a/python/convert.c b/python/convert.c index b9df5019a1..b079bdcbf7 100644 --- a/python/convert.c +++ b/python/convert.c @@ -30,7 +30,7 @@ #include "python/message.h" #include "python/protobuf.h" #include "upb/map.h" -#include "upb/reflection.h" +#include "upb/reflection/message.h" #include "upb/util/compare.h" // Must be last. diff --git a/python/convert.h b/python/convert.h index d26aeb72a5..c02c1976a3 100644 --- a/python/convert.h +++ b/python/convert.h @@ -29,8 +29,8 @@ #define PYUPB_CONVERT_H__ #include "protobuf.h" -#include "upb/def.h" -#include "upb/reflection.h" +#include "upb/reflection/def.h" +#include "upb/reflection/message.h" // Converts `val` to a Python object according to the type information in `f`. // Any newly-created Python objects that reference non-primitive data from `val` diff --git a/python/descriptor.c b/python/descriptor.c index 4d238bc4b8..5e3a5fdfd6 100644 --- a/python/descriptor.c +++ b/python/descriptor.c @@ -32,7 +32,7 @@ #include "python/descriptor_pool.h" #include "python/message.h" #include "python/protobuf.h" -#include "upb/def.h" +#include "upb/reflection/def.h" #include "upb/util/def_to_proto.h" // ----------------------------------------------------------------------------- diff --git a/python/descriptor.h b/python/descriptor.h index b4229ef616..9bf6b4258d 100644 --- a/python/descriptor.h +++ b/python/descriptor.h @@ -31,7 +31,7 @@ #include #include "python/python_api.h" -#include "upb/def.h" +#include "upb/reflection/def.h" typedef enum { kPyUpb_Descriptor = 0, diff --git a/python/descriptor_containers.c b/python/descriptor_containers.c index 0f2d4ff023..c750a4a04f 100644 --- a/python/descriptor_containers.c +++ b/python/descriptor_containers.c @@ -29,7 +29,7 @@ #include "python/descriptor.h" #include "python/protobuf.h" -#include "upb/def.h" +#include "upb/reflection/def.h" // Implements __repr__ as str(dict(self)). static PyObject* PyUpb_DescriptorMap_Repr(PyObject* _self) { diff --git a/python/descriptor_containers.h b/python/descriptor_containers.h index 8c6b0d9b43..276aea558a 100644 --- a/python/descriptor_containers.h +++ b/python/descriptor_containers.h @@ -43,7 +43,7 @@ #include #include "protobuf.h" -#include "upb/def.h" +#include "upb/reflection/def.h" // ----------------------------------------------------------------------------- // PyUpb_GenericSequence diff --git a/python/descriptor_pool.c b/python/descriptor_pool.c index ea0e8132a6..4e9faaad2f 100644 --- a/python/descriptor_pool.c +++ b/python/descriptor_pool.c @@ -32,7 +32,7 @@ #include "python/descriptor.h" #include "python/message.h" #include "python/protobuf.h" -#include "upb/def.h" +#include "upb/reflection/def.h" #include "upb/util/def_to_proto.h" // ----------------------------------------------------------------------------- diff --git a/python/map.c b/python/map.c index c5c6e68309..dfadf41d82 100644 --- a/python/map.c +++ b/python/map.c @@ -30,8 +30,8 @@ #include "python/convert.h" #include "python/message.h" #include "python/protobuf.h" -#include "upb/def.h" #include "upb/map.h" +#include "upb/reflection/def.h" // ----------------------------------------------------------------------------- // MapContainer diff --git a/python/map.h b/python/map.h index aaa4e20385..e69ff9fe28 100644 --- a/python/map.h +++ b/python/map.h @@ -31,7 +31,7 @@ #include #include "python/python_api.h" -#include "upb/def.h" +#include "upb/reflection/def.h" // Creates a new repeated field stub for field `f` of message object `parent`. // Precondition: `parent` must be a stub. diff --git a/python/message.c b/python/message.c index e67bf2306b..175292280e 100644 --- a/python/message.c +++ b/python/message.c @@ -32,8 +32,8 @@ #include "python/extension_dict.h" #include "python/map.h" #include "python/repeated.h" -#include "upb/def.h" -#include "upb/reflection.h" +#include "upb/reflection/def.h" +#include "upb/reflection/message.h" #include "upb/text_encode.h" #include "upb/util/required_fields.h" diff --git a/python/message.h b/python/message.h index f296d5ce93..f10f15f02a 100644 --- a/python/message.h +++ b/python/message.h @@ -31,7 +31,7 @@ #include #include "python/protobuf.h" -#include "upb/reflection.h" +#include "upb/reflection/message.h" // Removes the wrapper object for this field from the unset subobject cache. void PyUpb_Message_CacheDelete(PyObject* _self, const upb_FieldDef* f); diff --git a/python/repeated.h b/python/repeated.h index 5d74bd2edb..8cd38721a5 100644 --- a/python/repeated.h +++ b/python/repeated.h @@ -31,7 +31,7 @@ #include #include "python/python_api.h" -#include "upb/def.h" +#include "upb/reflection/def.h" // Creates a new repeated field stub for field `f` of message object `parent`. // Precondition: `parent` must be a stub. diff --git a/upb/bindings/lua/def.c b/upb/bindings/lua/def.c index 602dbaf9a4..b28ed42cb2 100644 --- a/upb/bindings/lua/def.c +++ b/upb/bindings/lua/def.c @@ -25,7 +25,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "upb/def.h" +#include "upb/reflection/def.h" #include #include diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index 9c66c74b04..4e37e0cecc 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -43,7 +43,7 @@ #include "upb/json_encode.h" #include "upb/map.h" #include "upb/port_def.inc" -#include "upb/reflection.h" +#include "upb/reflection/message.h" #include "upb/text_encode.h" /* diff --git a/upb/bindings/lua/upb.h b/upb/bindings/lua/upb.h index e0dc79eb04..a06d964d55 100644 --- a/upb/bindings/lua/upb.h +++ b/upb/bindings/lua/upb.h @@ -33,9 +33,9 @@ #define UPB_LUA_UPB_H_ #include "lauxlib.h" -#include "upb/def.h" #include "upb/msg.h" -#include "upb/reflection.h" +#include "upb/reflection/def.h" +#include "upb/reflection/message.h" /* Lua changes its API in incompatible ways in every minor release. * This is some shim code to paper over the differences. */ diff --git a/upb/conformance_upb.c b/upb/conformance_upb.c index be5ce9c7dc..b08239b6ff 100644 --- a/upb/conformance_upb.c +++ b/upb/conformance_upb.c @@ -43,7 +43,7 @@ #include "upb/encode.h" #include "upb/json_decode.h" #include "upb/json_encode.h" -#include "upb/reflection.h" +#include "upb/reflection/message.h" #include "upb/text_encode.h" // Must be last. diff --git a/upb/json_decode.c b/upb/json_decode.c index 56775cb778..2342d688e1 100644 --- a/upb/json_decode.c +++ b/upb/json_decode.c @@ -39,7 +39,7 @@ #include "upb/internal/atoi.h" #include "upb/internal/unicode.h" #include "upb/map.h" -#include "upb/reflection.h" +#include "upb/reflection/message.h" // Must be last. #include "upb/port_def.inc" diff --git a/upb/json_decode.h b/upb/json_decode.h index c7b6839427..a5abad279f 100644 --- a/upb/json_decode.h +++ b/upb/json_decode.h @@ -28,7 +28,7 @@ #ifndef UPB_JSONDECODE_H_ #define UPB_JSONDECODE_H_ -#include "upb/def.h" +#include "upb/reflection/def.h" // Must be last. #include "upb/port_def.inc" diff --git a/upb/json_decode_test.cc b/upb/json_decode_test.cc index fcc65a9360..2d41e602d8 100644 --- a/upb/json_decode_test.cc +++ b/upb/json_decode_test.cc @@ -31,9 +31,9 @@ #include "google/protobuf/struct.upb.h" #include "gtest/gtest.h" -#include "upb/def.hpp" #include "upb/json_test.upb.h" #include "upb/json_test.upbdefs.h" +#include "upb/reflection/def.hpp" #include "upb/upb.hpp" static upb_test_Box* JsonDecode(const char* json, upb_Arena* a) { diff --git a/upb/json_encode.c b/upb/json_encode.c index 9c657dddcd..487ee11098 100644 --- a/upb/json_encode.c +++ b/upb/json_encode.c @@ -39,7 +39,7 @@ #include "upb/internal/encode.h" #include "upb/internal/vsnprintf_compat.h" #include "upb/map.h" -#include "upb/reflection.h" +#include "upb/reflection/message.h" // Must be last. #include "upb/port_def.inc" diff --git a/upb/json_encode.h b/upb/json_encode.h index 7d83206580..b2207a9807 100644 --- a/upb/json_encode.h +++ b/upb/json_encode.h @@ -28,7 +28,7 @@ #ifndef UPB_JSONENCODE_H_ #define UPB_JSONENCODE_H_ -#include "upb/def.h" +#include "upb/reflection/def.h" // Must be last. #include "upb/port_def.inc" diff --git a/upb/json_encode_test.cc b/upb/json_encode_test.cc index 7abd23a93b..0d14b4176d 100644 --- a/upb/json_encode_test.cc +++ b/upb/json_encode_test.cc @@ -25,12 +25,13 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "upb/json_encode.h" + #include "google/protobuf/struct.upb.h" #include "gtest/gtest.h" -#include "upb/def.hpp" -#include "upb/json_encode.h" #include "upb/json_test.upb.h" #include "upb/json_test.upbdefs.h" +#include "upb/reflection/def.hpp" #include "upb/upb.hpp" static std::string JsonEncode(const upb_test_Box* msg, int options) { diff --git a/upb/msg_test.cc b/upb/msg_test.cc index 9e1548ae9d..113972321a 100644 --- a/upb/msg_test.cc +++ b/upb/msg_test.cc @@ -28,12 +28,12 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "google/protobuf/test_messages_proto3.upb.h" -#include "upb/def.hpp" #include "upb/fuzz_test_util.h" #include "upb/json_decode.h" #include "upb/json_encode.h" #include "upb/msg_test.upb.h" #include "upb/msg_test.upbdefs.h" +#include "upb/reflection/def.hpp" #include "upb/upb.hpp" // begin:google_only diff --git a/upb/test_cpp.cc b/upb/test_cpp.cc index 4d4aec6253..e70771420b 100644 --- a/upb/test_cpp.cc +++ b/upb/test_cpp.cc @@ -38,10 +38,10 @@ #include "google/protobuf/timestamp.upb.h" #include "google/protobuf/timestamp.upbdefs.h" #include "gtest/gtest.h" -#include "upb/def.h" -#include "upb/def.hpp" #include "upb/json_decode.h" #include "upb/json_encode.h" +#include "upb/reflection/def.h" +#include "upb/reflection/def.hpp" #include "upb/test_cpp.upb.h" #include "upb/test_cpp.upbdefs.h" #include "upb/upb.h" diff --git a/upb/text_encode.c b/upb/text_encode.c index 393b8d7615..d3e9f0145c 100644 --- a/upb/text_encode.c +++ b/upb/text_encode.c @@ -37,7 +37,7 @@ #include "upb/internal/encode.h" #include "upb/internal/vsnprintf_compat.h" #include "upb/map.h" -#include "upb/reflection.h" +#include "upb/reflection/message.h" // Must be last. #include "upb/port_def.inc" diff --git a/upb/text_encode.h b/upb/text_encode.h index 312db24e35..30f7404c80 100644 --- a/upb/text_encode.h +++ b/upb/text_encode.h @@ -28,7 +28,7 @@ #ifndef UPB_TEXTENCODE_H_ #define UPB_TEXTENCODE_H_ -#include "upb/def.h" +#include "upb/reflection/def.h" // Must be last. #include "upb/port_def.inc" diff --git a/upb/util/compare.h b/upb/util/compare.h index 92f6d83ab7..d99a7e956b 100644 --- a/upb/util/compare.h +++ b/upb/util/compare.h @@ -28,7 +28,7 @@ #ifndef UPB_UTIL_COMPARE_H_ #define UPB_UTIL_COMPARE_H_ -#include "upb/def.h" +#include "upb/reflection/def.h" #ifdef __cplusplus extern "C" { diff --git a/upb/util/def_to_proto.c b/upb/util/def_to_proto.c index ae9288c9e0..ab957db578 100644 --- a/upb/util/def_to_proto.c +++ b/upb/util/def_to_proto.c @@ -33,7 +33,7 @@ #include #include "upb/internal/vsnprintf_compat.h" -#include "upb/reflection.h" +#include "upb/reflection/message.h" /* Must be last. */ #include "upb/port_def.inc" diff --git a/upb/util/def_to_proto.h b/upb/util/def_to_proto.h index 24f65b9bfb..08c64532e9 100644 --- a/upb/util/def_to_proto.h +++ b/upb/util/def_to_proto.h @@ -28,7 +28,7 @@ #ifndef UPB_UTIL_DEF_TO_PROTO_H_ #define UPB_UTIL_DEF_TO_PROTO_H_ -#include "upb/def.h" +#include "upb/reflection/def.h" #ifdef __cplusplus extern "C" { diff --git a/upb/util/def_to_proto_test.cc b/upb/util/def_to_proto_test.cc index d8156e3989..b83e1c3623 100644 --- a/upb/util/def_to_proto_test.cc +++ b/upb/util/def_to_proto_test.cc @@ -27,13 +27,13 @@ #include "upb/util/def_to_proto.h" -#include "gmock/gmock.h" #include "google/protobuf/descriptor.pb.h" #include "google/protobuf/descriptor.upbdefs.h" #include "google/protobuf/dynamic_message.h" #include "google/protobuf/util/message_differencer.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" -#include "upb/def.hpp" +#include "upb/reflection/def.hpp" #include "upb/upb.hpp" #include "upb/util/def_to_proto_test.upbdefs.h" diff --git a/upb/util/required_fields.c b/upb/util/required_fields.c index 260a5b1d09..2bc714509d 100644 --- a/upb/util/required_fields.c +++ b/upb/util/required_fields.c @@ -34,7 +34,7 @@ #include "upb/internal/vsnprintf_compat.h" #include "upb/map.h" -#include "upb/reflection.h" +#include "upb/reflection/message.h" // Must be last. #include "upb/port_def.inc" diff --git a/upb/util/required_fields.h b/upb/util/required_fields.h index 874914bd67..e58f7ca38f 100644 --- a/upb/util/required_fields.h +++ b/upb/util/required_fields.h @@ -28,8 +28,8 @@ #ifndef UPB_UTIL_REQUIRED_FIELDS_H_ #define UPB_UTIL_REQUIRED_FIELDS_H_ -#include "upb/def.h" -#include "upb/reflection.h" +#include "upb/reflection/def.h" +#include "upb/reflection/message.h" /* Must be last. */ #include "upb/port_def.inc" diff --git a/upb/util/required_fields_test.cc b/upb/util/required_fields_test.cc index 85504101e6..7d44ef7954 100644 --- a/upb/util/required_fields_test.cc +++ b/upb/util/required_fields_test.cc @@ -27,11 +27,11 @@ #include "upb/util/required_fields.h" -#include "absl/strings/string_view.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "upb/def.hpp" +#include "absl/strings/string_view.h" #include "upb/json_decode.h" +#include "upb/reflection/def.hpp" #include "upb/upb.hpp" #include "upb/util/required_fields_test.upb.h" #include "upb/util/required_fields_test.upbdefs.h" diff --git a/upbc/protoc-gen-upbdefs.cc b/upbc/protoc-gen-upbdefs.cc index 51b3b95927..e1924218d4 100644 --- a/upbc/protoc-gen-upbdefs.cc +++ b/upbc/protoc-gen-upbdefs.cc @@ -69,14 +69,14 @@ void WriteDefHeader(const protobuf::FileDescriptor* file, Output& output) { output( "#ifndef $0_UPBDEFS_H_\n" "#define $0_UPBDEFS_H_\n\n" - "#include \"upb/def.h\"\n" + "#include \"upb/reflection/def.h\"\n" "#include \"upb/port_def.inc\"\n" "#ifdef __cplusplus\n" "extern \"C\" {\n" "#endif\n\n", ToPreproc(file->name())); - output("#include \"upb/def.h\"\n"); + output("#include \"upb/reflection/def.h\"\n"); output("\n"); output("#include \"upb/port_def.inc\"\n"); output("\n"); @@ -102,7 +102,7 @@ void WriteDefHeader(const protobuf::FileDescriptor* file, Output& output) { void WriteDefSource(const protobuf::FileDescriptor* file, Output& output) { EmitFileWarning(file, output); - output("#include \"upb/def.h\"\n"); + output("#include \"upb/reflection/def.h\"\n"); output("#include \"$0\"\n", DefHeaderFilename(file->name())); output("#include \"$0\"\n", HeaderFilename(file)); output("\n"); From 04957b106174080e839b60c07c3a4c052646102b Mon Sep 17 00:00:00 2001 From: Ivo List Date: Fri, 16 Sep 2022 16:55:12 +0000 Subject: [PATCH 18/35] Make upb backwards and forwards compatible with Bazel 4.x, 3.5.x and LTS --- upb/bindings/lua/lua_proto_library.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/upb/bindings/lua/lua_proto_library.bzl b/upb/bindings/lua/lua_proto_library.bzl index bb565327f4..0db161b19c 100644 --- a/upb/bindings/lua/lua_proto_library.bzl +++ b/upb/bindings/lua/lua_proto_library.bzl @@ -57,13 +57,13 @@ def _get_real_short_path(file): def _get_real_root(ctx, file): real_short_path = _get_real_short_path(file) root = file.path[:-len(real_short_path) - 1] - if not _is_google3 and ctx.rule.attr.strip_import_prefix: + if not _is_google3 and ctx.rule.attr.strip_import_prefix and ctx.rule.attr.strip_import_prefix != "DO_NOT_STRIP": root = paths.join(root, ctx.rule.attr.strip_import_prefix[1:]) return root def _generate_output_file(ctx, src, extension): package = ctx.label.package - if not _is_google3 and ctx.rule.attr.strip_import_prefix: + if not _is_google3 and ctx.rule.attr.strip_import_prefix and ctx.rule.attr.strip_import_prefix != "DO_NOT_STRIP": package = package[len(ctx.rule.attr.strip_import_prefix):] real_short_path = _get_real_short_path(src) real_short_path = paths.relativize(real_short_path, package) From 668cebbad9abdacd733d5d30b34afe31bbf3df54 Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Fri, 16 Sep 2022 13:10:48 -0700 Subject: [PATCH 19/35] mini descriptor encode functions now populate a string view Also fixed a few bugs in which output pointer updates were not stored. PiperOrigin-RevId: 474884814 --- upb/reflection/enum_def.c | 17 +++-- upb/reflection/enum_def.h | 5 +- upb/reflection/mini_descriptor_encode.c | 98 +++++++++++++------------ upb/reflection/mini_descriptor_encode.h | 32 ++++---- upbc/code_generator_request.c | 19 +++-- 5 files changed, 88 insertions(+), 83 deletions(-) diff --git a/upb/reflection/enum_def.c b/upb/reflection/enum_def.c index 0998d03c8f..d32ee569cb 100644 --- a/upb/reflection/enum_def.c +++ b/upb/reflection/enum_def.c @@ -91,19 +91,20 @@ static int cmp_values(const void* a, const void* b) { return (A < B) ? -1 : (A > B); } -const char* _upb_EnumDef_MiniDescriptor(const upb_EnumDef* e, upb_Arena* a) { - if (e->is_sorted) return _upb_MiniDescriptor_EncodeEnum(e, NULL, a); +bool _upb_EnumDef_MiniDescriptor(const upb_EnumDef* e, upb_Arena* a, + upb_StringView* out) { + if (e->is_sorted) return _upb_MiniDescriptor_EncodeEnum(e, NULL, a, out); const upb_EnumValueDef** sorted = (const upb_EnumValueDef**)upb_Arena_Malloc( a, e->value_count * sizeof(void*)); - if (!sorted) return NULL; + if (!sorted) return false; for (size_t i = 0; i < e->value_count; i++) { sorted[i] = upb_EnumDef_Value(e, i); } qsort(sorted, e->value_count, sizeof(void*), cmp_values); - return _upb_MiniDescriptor_EncodeEnum(e, sorted, a); + return _upb_MiniDescriptor_EncodeEnum(e, sorted, a, out); } const google_protobuf_EnumOptions* upb_EnumDef_Options(const upb_EnumDef* e) { @@ -166,13 +167,13 @@ const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) { static upb_MiniTable_Enum* create_enumlayout(upb_DefBuilder* ctx, const upb_EnumDef* e) { - const char* desc = _upb_EnumDef_MiniDescriptor(e, ctx->tmp_arena); - if (!desc) - _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor"); + upb_StringView sv; + bool ok = _upb_EnumDef_MiniDescriptor(e, ctx->tmp_arena, &sv); + if (!ok) _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor"); upb_Status status; upb_MiniTable_Enum* layout = - upb_MiniTable_BuildEnum(desc, strlen(desc), ctx->arena, &status); + upb_MiniTable_BuildEnum(sv.data, sv.size, ctx->arena, &status); if (!layout) _upb_DefBuilder_Errf(ctx, "Error building enum MiniTable: %s", status.msg); return layout; diff --git a/upb/reflection/enum_def.h b/upb/reflection/enum_def.h index 0693453185..0d97b7cf8d 100644 --- a/upb/reflection/enum_def.h +++ b/upb/reflection/enum_def.h @@ -62,8 +62,9 @@ upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i); bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a); const upb_MiniTable_Enum* _upb_EnumDef_MiniTable(const upb_EnumDef* e); -// Builds and returns a mini descriptor, or NULL if OOM. -const char* _upb_EnumDef_MiniDescriptor(const upb_EnumDef* e, upb_Arena* a); +// Builds a mini descriptor, returns false if OOM. +bool _upb_EnumDef_MiniDescriptor(const upb_EnumDef* e, upb_Arena* a, + upb_StringView* out); // Allocate and initialize an array of |n| enum defs. upb_EnumDef* _upb_EnumDefs_New(upb_DefBuilder* ctx, int n, diff --git a/upb/reflection/mini_descriptor_encode.c b/upb/reflection/mini_descriptor_encode.c index 3a7a0d55ac..d2ad8c99b9 100644 --- a/upb/reflection/mini_descriptor_encode.c +++ b/upb/reflection/mini_descriptor_encode.c @@ -130,65 +130,68 @@ static int upb_MiniDescriptor_CompareFields(const void* a, const void* b) { return 0; } -const char* _upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, - const upb_EnumValueDef** sorted, - upb_Arena* a) { +bool _upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, + const upb_EnumValueDef** sorted, + upb_Arena* a, upb_StringView* out) { DescState s; upb_DescState_Init(&s); upb_MtDataEncoder_StartEnum(&s.e); - const size_t value_count = upb_EnumDef_ValueCount(e); - // Duplicate values are allowed but we only encode each value once. uint32_t previous = 0; + const size_t value_count = upb_EnumDef_ValueCount(e); for (size_t i = 0; i < value_count; i++) { const uint32_t current = upb_EnumValueDef_Number(sorted ? sorted[i] : upb_EnumDef_Value(e, i)); if (i != 0 && previous == current) continue; - if (!upb_DescState_Grow(&s, a)) return NULL; + if (!upb_DescState_Grow(&s, a)) return false; s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, current); previous = current; } - if (!upb_DescState_Grow(&s, a)) return NULL; + if (!upb_DescState_Grow(&s, a)) return false; s.ptr = upb_MtDataEncoder_EndEnum(&s.e, s.ptr); // There will always be room for this '\0' in the encoder buffer because // kUpb_MtDataEncoder_MinSize is overkill for upb_MtDataEncoder_EndEnum(). UPB_ASSERT(s.ptr < s.buf + s.bufsize); - *s.ptr++ = '\0'; + *s.ptr = '\0'; - return s.buf; + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -const char* _upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, - upb_Arena* a) { +bool _upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a, + upb_StringView* out) { UPB_ASSERT(upb_FieldDef_IsExtension(f)); DescState s; upb_DescState_Init(&s); - if (!upb_DescState_Grow(&s, a)) return NULL; - upb_MtDataEncoder_StartMessage(&s.e, s.ptr, 0); + if (!upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_StartMessage(&s.e, s.ptr, 0); const upb_FieldType type = upb_FieldDef_Type(f); const int number = upb_FieldDef_Number(f); const uint64_t modifiers = upb_Field_Modifiers(f); - if (!upb_DescState_Grow(&s, a)) return NULL; - upb_MtDataEncoder_PutField(&s.e, s.ptr, type, number, modifiers); + if (!upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_PutField(&s.e, s.ptr, type, number, modifiers); - if (!upb_DescState_Grow(&s, a)) return NULL; - *s.ptr++ = '\0'; + if (!upb_DescState_Grow(&s, a)) return false; + *s.ptr = '\0'; - return s.buf; + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } -const char* _upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, - upb_Arena* a) { +bool _upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, upb_Arena* a, + upb_StringView* out) { DescState s; upb_DescState_Init(&s); @@ -196,7 +199,7 @@ const char* _upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, const size_t field_count = upb_MessageDef_FieldCount(m); const upb_FieldDef** sorted = (const upb_FieldDef**)upb_Arena_Malloc(a, field_count * sizeof(void*)); - if (!sorted) return NULL; + if (!sorted) return false; // Sort the copy. for (size_t i = 0; i < field_count; i++) { @@ -204,56 +207,55 @@ const char* _upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, } qsort(sorted, field_count, sizeof(void*), upb_MiniDescriptor_CompareFields); - // Start encoding. - if (!upb_DescState_Grow(&s, a)) return NULL; - upb_MtDataEncoder_StartMessage(&s.e, s.ptr, upb_Message_Modifiers(m)); + if (!upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_StartMessage(&s.e, s.ptr, upb_Message_Modifiers(m)); - // Encode the fields. for (size_t i = 0; i < field_count; i++) { - const upb_FieldDef* field_def = sorted[i]; - const upb_FieldType type = upb_FieldDef_Type(field_def); - const int number = upb_FieldDef_Number(field_def); - const uint64_t modifiers = upb_Field_Modifiers(field_def); + const upb_FieldDef* f = sorted[i]; + const upb_FieldType type = upb_FieldDef_Type(f); + const int number = upb_FieldDef_Number(f); + const uint64_t modifiers = upb_Field_Modifiers(f); - if (!upb_DescState_Grow(&s, a)) return NULL; + if (!upb_DescState_Grow(&s, a)) return false; s.ptr = upb_MtDataEncoder_PutField(&s.e, s.ptr, type, number, modifiers); } - // Encode the oneofs. const int oneof_count = upb_MessageDef_OneofCount(m); for (int i = 0; i < oneof_count; i++) { - if (!upb_DescState_Grow(&s, a)) return NULL; + if (!upb_DescState_Grow(&s, a)) return false; s.ptr = upb_MtDataEncoder_StartOneof(&s.e, s.ptr); - const upb_OneofDef* oneof_def = upb_MessageDef_Oneof(m, i); - const int field_count = upb_OneofDef_FieldCount(oneof_def); + const upb_OneofDef* o = upb_MessageDef_Oneof(m, i); + const int field_count = upb_OneofDef_FieldCount(o); for (int j = 0; j < field_count; j++) { - const upb_FieldDef* field_def = upb_OneofDef_Field(oneof_def, j); - const int number = upb_FieldDef_Number(field_def); + const int number = upb_FieldDef_Number(upb_OneofDef_Field(o, j)); - if (!upb_DescState_Grow(&s, a)) return NULL; + if (!upb_DescState_Grow(&s, a)) return false; s.ptr = upb_MtDataEncoder_PutOneofField(&s.e, s.ptr, number); } } - if (!upb_DescState_Grow(&s, a)) return NULL; - *s.ptr++ = '\0'; + if (!upb_DescState_Grow(&s, a)) return false; + *s.ptr = '\0'; - return s.buf; + out->data = s.buf; + out->size = s.ptr - s.buf; + return true; } /******************************************************************************/ -const char* upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, upb_Arena* a) { - return _upb_EnumDef_MiniDescriptor(e, a); +bool upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, upb_Arena* a, + upb_StringView* out) { + return _upb_EnumDef_MiniDescriptor(e, a, out); } -const char* upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, - upb_Arena* a) { - return _upb_MiniDescriptor_EncodeField(f, a); +bool upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a, + upb_StringView* out) { + return _upb_MiniDescriptor_EncodeField(f, a, out); } -const char* upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, - upb_Arena* a) { - return _upb_MiniDescriptor_EncodeMessage(m, a); +bool upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, upb_Arena* a, + upb_StringView* out) { + return _upb_MiniDescriptor_EncodeMessage(m, a, out); } diff --git a/upb/reflection/mini_descriptor_encode.h b/upb/reflection/mini_descriptor_encode.h index f197ce7b99..a9677abfd0 100644 --- a/upb/reflection/mini_descriptor_encode.h +++ b/upb/reflection/mini_descriptor_encode.h @@ -29,6 +29,7 @@ #define UPB_REFLECTION_MINI_DESCRIPTOR_ENCODE_H_ #include "upb/reflection/common.h" +#include "upb/string_view.h" // Must be last. #include "upb/port_def.inc" @@ -37,15 +38,17 @@ extern "C" { #endif -// Creates and returns a mini descriptor string for an enum, or NULL on error. -const char* upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, upb_Arena* a); +// Creates a mini descriptor string for an enum, returns true on success. +bool upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, upb_Arena* a, + upb_StringView* out); -// Creates and returns a mini descriptor string for a field, or NULL on error. -const char* upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a); +// Creates a mini descriptor string for a field, returns true on success. +bool upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a, + upb_StringView* out); -// Creates and returns a mini descriptor string for a message, or NULL on error. -const char* upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, - upb_Arena* a); +// Creates a mini descriptor string for a message, returns true on success. +bool upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, upb_Arena* a, + upb_StringView* out); // EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// @@ -53,17 +56,12 @@ const char* upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, // If the values in the enum happen to be defined in ascending order (when cast // to uint32_t) then |sorted| should be NULL. Otherwise it must point to an // array containing pointers to the enum value defs in sorted order. -const char* _upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, - const upb_EnumValueDef** sorted, - upb_Arena* a); - -// Creates and returns a mini descriptor string for a field, or NULL on error. -const char* _upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, - upb_Arena* a); +bool _upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, + const upb_EnumValueDef** sorted, + upb_Arena* a, upb_StringView* out); -// Creates and returns a mini descriptor string for a message, or NULL on error. -const char* _upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, - upb_Arena* a); +bool _upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a, + upb_StringView* out); #ifdef __cplusplus } /* extern "C" */ diff --git a/upbc/code_generator_request.c b/upbc/code_generator_request.c index ce6227d5a1..e3eca5a4c3 100644 --- a/upbc/code_generator_request.c +++ b/upbc/code_generator_request.c @@ -71,9 +71,9 @@ static void upbc_State_Init(upbc_State* s) { if (!s->out) upbc_Error(s, __func__, "could not allocate request"); } -static void upbc_State_Emit(upbc_State* s, const char* name, const char* data) { +static void upbc_State_Emit(upbc_State* s, const char* name, + upb_StringView encoding) { const upb_StringView key = upb_StringView_FromString(name); - const upb_StringView encoding = upb_StringView_FromString(data); bool ok = upbc_CodeGeneratorRequest_mini_descriptors_set(s->out, key, encoding, s->arena); if (!ok) upbc_Error(s, __func__, "could not set mini descriptor in map"); @@ -85,15 +85,17 @@ static void upbc_State_Emit(upbc_State* s, const char* name, const char* data) { static void upbc_Scrape_Message(upbc_State*, const upb_MessageDef*); static void upbc_Scrape_Enum(upbc_State* s, const upb_EnumDef* e) { - const char* desc = upb_MiniDescriptor_EncodeEnum(e, s->arena); - if (!desc) upbc_Error(s, __func__, "could not encode enum"); + upb_StringView desc; + bool ok = upb_MiniDescriptor_EncodeEnum(e, s->arena, &desc); + if (!ok) upbc_Error(s, __func__, "could not encode enum"); upbc_State_Emit(s, upb_EnumDef_FullName(e), desc); } static void upbc_Scrape_Extension(upbc_State* s, const upb_FieldDef* f) { - const char* desc = upb_MiniDescriptor_EncodeField(f, s->arena); - if (!desc) upbc_Error(s, __func__, "could not encode extension"); + upb_StringView desc; + bool ok = upb_MiniDescriptor_EncodeField(f, s->arena, &desc); + if (!ok) upbc_Error(s, __func__, "could not encode extension"); upbc_State_Emit(s, upb_FieldDef_FullName(f), desc); } @@ -170,8 +172,9 @@ static void upbc_Scrape_NestedMessages(upbc_State* s, const upb_MessageDef* m) { } static void upbc_Scrape_Message(upbc_State* s, const upb_MessageDef* m) { - const char* desc = upb_MiniDescriptor_EncodeMessage(m, s->arena); - if (!desc) upbc_Error(s, __func__, "could not encode message"); + upb_StringView desc; + bool ok = upb_MiniDescriptor_EncodeMessage(m, s->arena, &desc); + if (!ok) upbc_Error(s, __func__, "could not encode message"); upbc_State_Emit(s, upb_MessageDef_FullName(m), desc); From 6795ec13b42c5d829601b803292db4ed53e1c552 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Sep 2022 20:52:19 -0700 Subject: [PATCH 20/35] Fixed fuzz bug in MiniDescriptor parsing for extensions. If the extension MiniDescriptor did not contain any fields, we would read an uninitialized value. We need to add a check that the extension descriptor contains exactly one field. PiperOrigin-RevId: 475075831 --- upb/mini_table.c | 7 +++++-- upb/msg_test.cc | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/upb/mini_table.c b/upb/mini_table.c index 0f0592eda3..f3ccf081cd 100644 --- a/upb/mini_table.c +++ b/upb/mini_table.c @@ -754,7 +754,10 @@ static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, while (ptr < d->end) { char ch = *ptr++; if (ch <= kUpb_EncodedValue_MaxField) { - if (!d->table && last_field) return --ptr; + if (!d->table && last_field) { + // For extensions, consume only a single field and then return. + return --ptr; + } upb_MiniTable_Field* field = fields; *field_count += 1; fields = (char*)fields + field_size; @@ -1148,7 +1151,7 @@ const char* upb_MiniTable_BuildExtension(const char* data, size_t len, uint16_t count = 0; const char* ret = upb_MtDecoder_Parse(&decoder, data, len, ext, sizeof(*ext), &count, NULL); - if (!ret) return NULL; + if (!ret || count != 1) return NULL; upb_MiniTable_Field* f = &ext->field; diff --git a/upb/msg_test.cc b/upb/msg_test.cc index 113972321a..085a85a2e7 100644 --- a/upb/msg_test.cc +++ b/upb/msg_test.cc @@ -570,4 +570,9 @@ TEST(MessageTest, MapField) { // -696925610, -654590577); // } // +// TEST(FuzzTest, ExtendMessageSetWithEmptyExtension) { +// DecodeEncodeArbitrarySchemaAndPayload({{"\n"}, {}, "_", {}}, std::string(), 0, +// 0); +// } +// // end:google_only From e55bfa285143a0c521a3490a6500288fd347dd16 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 18 Sep 2022 10:00:32 -0700 Subject: [PATCH 21/35] Minor decoder refactor Created a common function for saving/restoring the depth and checking end_group. This will be even more helpful if we decide to add any more state that is saved/resolved when we recurse into a sub-message. This appears to be perf-neutral. PiperOrigin-RevId: 475140169 --- upb/decode.c | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/upb/decode.c b/upb/decode.c index b3bcc0afc3..7508473405 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -342,40 +342,43 @@ static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr, } UPB_FORCEINLINE -static const char* _upb_Decoder_DecodeSubMessage( - upb_Decoder* d, const char* ptr, upb_Message* submsg, - const upb_MiniTable_Sub* subs, const upb_MiniTable_Field* field, int size) { - int saved_delta = _upb_Decoder_PushLimit(d, ptr, size); - const upb_MiniTable* subl = subs[field->submsg_index].submsg; +static const char* _upb_Decoder_RecurseSubMessage(upb_Decoder* d, + const char* ptr, + upb_Message* submsg, + const upb_MiniTable* subl, + uint32_t expected_end_group) { if (--d->depth < 0) { _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); } ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl); - if (d->end_group != DECODE_NOGROUP) { + d->depth++; + if (d->end_group != expected_end_group) { _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } + return ptr; +} + +UPB_FORCEINLINE +static const char* _upb_Decoder_DecodeSubMessage( + upb_Decoder* d, const char* ptr, upb_Message* submsg, + const upb_MiniTable_Sub* subs, const upb_MiniTable_Field* field, int size) { + int saved_delta = _upb_Decoder_PushLimit(d, ptr, size); + const upb_MiniTable* subl = subs[field->submsg_index].submsg; + ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, DECODE_NOGROUP); _upb_Decoder_PopLimit(d, ptr, saved_delta); - d->depth++; return ptr; } UPB_FORCEINLINE -static const char* _upb_Decoder_DoDecodeGroup(upb_Decoder* d, const char* ptr, - upb_Message* submsg, - const upb_MiniTable* subl, - uint32_t number) { - if (--d->depth < 0) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); - } +static const char* _upb_Decoder_DecodeGroup(upb_Decoder* d, const char* ptr, + upb_Message* submsg, + const upb_MiniTable* subl, + uint32_t number) { if (_upb_Decoder_IsDone(d, &ptr)) { _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); } - ptr = _upb_Decoder_DecodeMessage(d, ptr, submsg, subl); - if (d->end_group != number) { - _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - } + ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, number); d->end_group = DECODE_NOGROUP; - d->depth++; return ptr; } @@ -383,7 +386,7 @@ UPB_FORCEINLINE static const char* _upb_Decoder_DecodeUnknownGroup(upb_Decoder* d, const char* ptr, uint32_t number) { - return _upb_Decoder_DoDecodeGroup(d, ptr, NULL, NULL, number); + return _upb_Decoder_DecodeGroup(d, ptr, NULL, NULL, number); } UPB_FORCEINLINE @@ -391,7 +394,7 @@ static const char* _upb_Decoder_DecodeKnownGroup( upb_Decoder* d, const char* ptr, upb_Message* submsg, const upb_MiniTable_Sub* subs, const upb_MiniTable_Field* field) { const upb_MiniTable* subl = subs[field->submsg_index].submsg; - return _upb_Decoder_DoDecodeGroup(d, ptr, submsg, subl, field->number); + return _upb_Decoder_DecodeGroup(d, ptr, submsg, subl, field->number); } static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) { From ba511fd4253aa9efe6195b86c2b82cd5837cf51d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 18 Sep 2022 17:16:06 -0700 Subject: [PATCH 22/35] Added a size overflow check for mini-table building PiperOrigin-RevId: 475172738 --- upb/mini_table.c | 8 +++++++- upb/mini_table_test.cc | 26 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/upb/mini_table.c b/upb/mini_table.c index f3ccf081cd..02c9145d15 100644 --- a/upb/mini_table.c +++ b/upb/mini_table.c @@ -892,7 +892,13 @@ size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) { size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform); size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform); size_t ret = UPB_ALIGN_UP(d->table->size, align); - d->table->size = ret + size; + static const size_t max = UINT16_MAX; + size_t new_size = ret + size; + if (new_size > max) { + upb_MtDecoder_ErrorFormat( + d, "Message size exceeded maximum size of %zu bytes", max); + } + d->table->size = new_size; return ret; } diff --git a/upb/mini_table_test.cc b/upb/mini_table_test.cc index b86e35005b..e0af43110a 100644 --- a/upb/mini_table_test.cc +++ b/upb/mini_table_test.cc @@ -170,6 +170,32 @@ TEST_P(MiniTableTest, AllScalarTypesOneof) { EXPECT_EQ(0, table->required_count); } +TEST_P(MiniTableTest, SizeOverflow) { + upb::Arena arena; + upb::MtDataEncoder e; + // upb can only handle messages up to UINT16_MAX. + size_t max_double_fields = UINT16_MAX / (sizeof(double) + 1); + + // A bit under max_double_fields is ok. + ASSERT_TRUE(e.StartMessage(0)); + for (int i = 1; i < max_double_fields; i++) { + ASSERT_TRUE(e.PutField(kUpb_FieldType_Double, i, 0)); + } + upb::Status status; + upb_MiniTable* table = upb_MiniTable_Build( + e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr()); + ASSERT_NE(nullptr, table) << status.error_message(); + + // A bit over max_double_fields fails. + ASSERT_TRUE(e.StartMessage(0)); + for (int i = 1; i < max_double_fields + 2; i++) { + ASSERT_TRUE(e.PutField(kUpb_FieldType_Double, i, 0)); + } + upb_MiniTable* table2 = upb_MiniTable_Build( + e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr()); + ASSERT_EQ(nullptr, table2) << status.error_message(); +} + INSTANTIATE_TEST_SUITE_P(Platforms, MiniTableTest, testing::Values(kUpb_MiniTablePlatform_32Bit, kUpb_MiniTablePlatform_64Bit)); From f44653bc52230d81bc05abec024b055dd329ac23 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 18 Sep 2022 20:15:23 -0700 Subject: [PATCH 23/35] Readability improvements for decode.c: Several data arrays that are used in only one function have been moved from file scope to the function that uses them. Updated to use C99 array designators so we can specify values by array index. This is more readable and less error-prone than using comments to label each value. There is no functional change for the vast majority of this CL. The only exception is that we now test for OOM when creating a map. PiperOrigin-RevId: 475191072 --- upb/decode.c | 403 ++++++++++++++++++++++++++++----------------------- 1 file changed, 218 insertions(+), 185 deletions(-) diff --git a/upb/decode.c b/upb/decode.c index 7508473405..c103ade4db 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -37,143 +37,37 @@ // Must be last. #include "upb/port_def.inc" -/* Maps descriptor type -> elem_size_lg2. */ -static const uint8_t desctype_to_elem_size_lg2[] = { - -1, /* invalid descriptor type */ - 3, /* DOUBLE */ - 2, /* FLOAT */ - 3, /* INT64 */ - 3, /* UINT64 */ - 2, /* INT32 */ - 3, /* FIXED64 */ - 2, /* FIXED32 */ - 0, /* BOOL */ - UPB_SIZE(3, 4), /* STRING */ - UPB_SIZE(2, 3), /* GROUP */ - UPB_SIZE(2, 3), /* MESSAGE */ - UPB_SIZE(3, 4), /* BYTES */ - 2, /* UINT32 */ - 2, /* ENUM */ - 2, /* SFIXED32 */ - 3, /* SFIXED64 */ - 2, /* SINT32 */ - 3, /* SINT64 */ +// A few fake field types for our tables. +enum { + kUpb_FakeFieldType_FieldNotFound = 0, + kUpb_FakeFieldType_MessageSetItem = 19, }; -/* Maps descriptor type -> upb map size. */ -static const uint8_t desctype_to_mapsize[] = { - -1, /* invalid descriptor type */ - 8, /* DOUBLE */ - 4, /* FLOAT */ - 8, /* INT64 */ - 8, /* UINT64 */ - 4, /* INT32 */ - 8, /* FIXED64 */ - 4, /* FIXED32 */ - 1, /* BOOL */ - UPB_MAPTYPE_STRING, /* STRING */ - sizeof(void*), /* GROUP */ - sizeof(void*), /* MESSAGE */ - UPB_MAPTYPE_STRING, /* BYTES */ - 4, /* UINT32 */ - 4, /* ENUM */ - 4, /* SFIXED32 */ - 8, /* SFIXED64 */ - 4, /* SINT32 */ - 8, /* SINT64 */ +// DecodeOp: an action to be performed for a wire-type/field-type combination. +enum { + // Special ops: we don't write data to regular fields for these. + kUpb_DecodeOp_UnknownField = -1, + kUpb_DecodeOp_MessageSetItem = -2, + + // Scalar-only ops. + kUpb_DecodeOp_Scalar1Byte = 0, + kUpb_DecodeOp_Scalar4Byte = 2, + kUpb_DecodeOp_Scalar8Byte = 3, + kUpb_DecodeOp_Enum = 1, + + // Scalar/repeated ops. + kUpb_DecodeOp_String = 4, + kUpb_DecodeOp_Bytes = 5, + kUpb_DecodeOp_SubMessage = 6, + + // Repeated-only ops (also see macros below). + kUpb_DecodeOp_PackedEnum = 13, }; -static const unsigned FIXED32_OK_MASK = (1 << kUpb_FieldType_Float) | - (1 << kUpb_FieldType_Fixed32) | - (1 << kUpb_FieldType_SFixed32); - -static const unsigned FIXED64_OK_MASK = (1 << kUpb_FieldType_Double) | - (1 << kUpb_FieldType_Fixed64) | - (1 << kUpb_FieldType_SFixed64); - -/* Three fake field types for MessageSet. */ -#define TYPE_MSGSET_ITEM 19 -#define TYPE_COUNT 19 - -/* Op: an action to be performed for a wire-type/field-type combination. */ -#define OP_UNKNOWN -1 /* Unknown field. */ -#define OP_MSGSET_ITEM -2 -#define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */ -#define OP_ENUM 1 -#define OP_STRING 4 -#define OP_BYTES 5 -#define OP_SUBMSG 6 -/* Scalar fields use only ops above. Repeated fields can use any op. */ +// For packed fields it is helpful to be able to recover the lg2 of the data +// size from the op. #define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */ #define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */ -#define OP_PACKED_ENUM 13 - -static const int8_t varint_ops[] = { - OP_UNKNOWN, /* field not found */ - OP_UNKNOWN, /* DOUBLE */ - OP_UNKNOWN, /* FLOAT */ - OP_SCALAR_LG2(3), /* INT64 */ - OP_SCALAR_LG2(3), /* UINT64 */ - OP_SCALAR_LG2(2), /* INT32 */ - OP_UNKNOWN, /* FIXED64 */ - OP_UNKNOWN, /* FIXED32 */ - OP_SCALAR_LG2(0), /* BOOL */ - OP_UNKNOWN, /* STRING */ - OP_UNKNOWN, /* GROUP */ - OP_UNKNOWN, /* MESSAGE */ - OP_UNKNOWN, /* BYTES */ - OP_SCALAR_LG2(2), /* UINT32 */ - OP_ENUM, /* ENUM */ - OP_UNKNOWN, /* SFIXED32 */ - OP_UNKNOWN, /* SFIXED64 */ - OP_SCALAR_LG2(2), /* SINT32 */ - OP_SCALAR_LG2(3), /* SINT64 */ - OP_UNKNOWN, /* MSGSET_ITEM */ -}; - -static const int8_t delim_ops[] = { - /* For non-repeated field type. */ - OP_UNKNOWN, /* field not found */ - OP_UNKNOWN, /* DOUBLE */ - OP_UNKNOWN, /* FLOAT */ - OP_UNKNOWN, /* INT64 */ - OP_UNKNOWN, /* UINT64 */ - OP_UNKNOWN, /* INT32 */ - OP_UNKNOWN, /* FIXED64 */ - OP_UNKNOWN, /* FIXED32 */ - OP_UNKNOWN, /* BOOL */ - OP_STRING, /* STRING */ - OP_UNKNOWN, /* GROUP */ - OP_SUBMSG, /* MESSAGE */ - OP_BYTES, /* BYTES */ - OP_UNKNOWN, /* UINT32 */ - OP_UNKNOWN, /* ENUM */ - OP_UNKNOWN, /* SFIXED32 */ - OP_UNKNOWN, /* SFIXED64 */ - OP_UNKNOWN, /* SINT32 */ - OP_UNKNOWN, /* SINT64 */ - OP_UNKNOWN, /* MSGSET_ITEM */ - /* For repeated field type. */ - OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */ - OP_FIXPCK_LG2(2), /* REPEATED FLOAT */ - OP_VARPCK_LG2(3), /* REPEATED INT64 */ - OP_VARPCK_LG2(3), /* REPEATED UINT64 */ - OP_VARPCK_LG2(2), /* REPEATED INT32 */ - OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */ - OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */ - OP_VARPCK_LG2(0), /* REPEATED BOOL */ - OP_STRING, /* REPEATED STRING */ - OP_SUBMSG, /* REPEATED GROUP */ - OP_SUBMSG, /* REPEATED MESSAGE */ - OP_BYTES, /* REPEATED BYTES */ - OP_VARPCK_LG2(2), /* REPEATED UINT32 */ - OP_PACKED_ENUM, /* REPEATED ENUM */ - OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */ - OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */ - OP_VARPCK_LG2(2), /* REPEATED SINT32 */ - OP_VARPCK_LG2(3), /* REPEATED SINT64 */ - /* Omitting MSGSET_*, because we never emit a repeated msgset type */ -}; typedef union { bool bool_val; @@ -553,6 +447,37 @@ static const char* _upb_Decoder_DecodeEnumPacked( return ptr; } +upb_Array* _upb_Decoder_CreateArray(upb_Decoder* d, + const upb_MiniTable_Field* field) { + /* Maps descriptor type -> elem_size_lg2. */ + static const uint8_t kElemSizeLg2[] = { + [0] = -1, // invalid descriptor type + [kUpb_FieldType_Double] = 3, + [kUpb_FieldType_Float] = 2, + [kUpb_FieldType_Int64] = 3, + [kUpb_FieldType_UInt64] = 3, + [kUpb_FieldType_Int32] = 2, + [kUpb_FieldType_Fixed64] = 3, + [kUpb_FieldType_Fixed32] = 2, + [kUpb_FieldType_Bool] = 0, + [kUpb_FieldType_String] = UPB_SIZE(3, 4), + [kUpb_FieldType_Group] = UPB_SIZE(2, 3), + [kUpb_FieldType_Message] = UPB_SIZE(2, 3), + [kUpb_FieldType_Bytes] = UPB_SIZE(3, 4), + [kUpb_FieldType_UInt32] = 2, + [kUpb_FieldType_Enum] = 2, + [kUpb_FieldType_SFixed32] = 2, + [kUpb_FieldType_SFixed64] = 3, + [kUpb_FieldType_SInt32] = 2, + [kUpb_FieldType_SInt64] = 3, + }; + + size_t lg2 = kElemSizeLg2[field->descriptortype]; + upb_Array* ret = _upb_Array_New(&d->arena, 4, lg2); + if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + return ret; +} + static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr, upb_Message* msg, const upb_MiniTable_Sub* subs, @@ -565,31 +490,29 @@ static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr, if (arr) { _upb_Decoder_Reserve(d, arr, 1); } else { - size_t lg2 = desctype_to_elem_size_lg2[field->descriptortype]; - arr = _upb_Array_New(&d->arena, 4, lg2); - if (!arr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + arr = _upb_Decoder_CreateArray(d, field); *arrp = arr; } switch (op) { - case OP_SCALAR_LG2(0): - case OP_SCALAR_LG2(2): - case OP_SCALAR_LG2(3): + case kUpb_DecodeOp_Scalar1Byte: + case kUpb_DecodeOp_Scalar4Byte: + case kUpb_DecodeOp_Scalar8Byte: /* Append scalar value. */ mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << op, void); arr->size++; memcpy(mem, val, 1 << op); return ptr; - case OP_STRING: + case kUpb_DecodeOp_String: _upb_Decoder_VerifyUtf8(d, ptr, val->size); /* Fallthrough. */ - case OP_BYTES: { + case kUpb_DecodeOp_Bytes: { /* Append bytes. */ upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->size; arr->size++; return _upb_Decoder_ReadString(d, ptr, val->size, str); } - case OP_SUBMSG: { + case kUpb_DecodeOp_SubMessage: { /* Append submessage / group. */ upb_Message* submsg = _upb_Decoder_NewSubMessage(d, subs, field); *UPB_PTR_AT(_upb_array_ptr(arr), arr->size * sizeof(void*), @@ -611,15 +534,50 @@ static const char* _upb_Decoder_DecodeToArray(upb_Decoder* d, const char* ptr, case OP_VARPCK_LG2(3): return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field, op - OP_VARPCK_LG2(0)); - case OP_ENUM: + case kUpb_DecodeOp_Enum: return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val); - case OP_PACKED_ENUM: + case kUpb_DecodeOp_PackedEnum: return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val); default: UPB_UNREACHABLE(); } } +upb_Map* _upb_Decoder_CreateMap(upb_Decoder* d, const upb_MiniTable* entry) { + /* Maps descriptor type -> upb map size. */ + static const uint8_t kSizeInMap[] = { + [0] = -1, // invalid descriptor type */ + [kUpb_FieldType_Double] = 8, + [kUpb_FieldType_Float] = 4, + [kUpb_FieldType_Int64] = 8, + [kUpb_FieldType_UInt64] = 8, + [kUpb_FieldType_Int32] = 4, + [kUpb_FieldType_Fixed64] = 8, + [kUpb_FieldType_Fixed32] = 4, + [kUpb_FieldType_Bool] = 1, + [kUpb_FieldType_String] = UPB_MAPTYPE_STRING, + [kUpb_FieldType_Group] = sizeof(void*), + [kUpb_FieldType_Message] = sizeof(void*), + [kUpb_FieldType_Bytes] = UPB_MAPTYPE_STRING, + [kUpb_FieldType_UInt32] = 4, + [kUpb_FieldType_Enum] = 4, + [kUpb_FieldType_SFixed32] = 4, + [kUpb_FieldType_SFixed64] = 8, + [kUpb_FieldType_SInt32] = 4, + [kUpb_FieldType_SInt64] = 8, + }; + + const upb_MiniTable_Field* key_field = &entry->fields[0]; + const upb_MiniTable_Field* val_field = &entry->fields[1]; + char key_size = kSizeInMap[key_field->descriptortype]; + char val_size = kSizeInMap[val_field->descriptortype]; + UPB_ASSERT(key_field->offset == 0); + UPB_ASSERT(val_field->offset == sizeof(upb_StringView)); + upb_Map* ret = _upb_Map_New(&d->arena, key_size, val_size); + if (!ret) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + return ret; +} + static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr, upb_Message* msg, const upb_MiniTable_Sub* subs, @@ -631,14 +589,7 @@ static const char* _upb_Decoder_DecodeToMap(upb_Decoder* d, const char* ptr, const upb_MiniTable* entry = subs[field->submsg_index].submsg; if (!map) { - /* Lazily create map. */ - const upb_MiniTable_Field* key_field = &entry->fields[0]; - const upb_MiniTable_Field* val_field = &entry->fields[1]; - char key_size = desctype_to_mapsize[key_field->descriptortype]; - char val_size = desctype_to_mapsize[val_field->descriptortype]; - UPB_ASSERT(key_field->offset == 0); - UPB_ASSERT(val_field->offset == sizeof(upb_StringView)); - map = _upb_Map_New(&d->arena, key_size, val_size); + map = _upb_Decoder_CreateMap(d, entry); *map_p = map; } @@ -679,7 +630,7 @@ static const char* _upb_Decoder_DecodeToSubMessage( void* mem = UPB_PTR_AT(msg, field->offset, void); int type = field->descriptortype; - if (UPB_UNLIKELY(op == OP_ENUM) && + if (UPB_UNLIKELY(op == kUpb_DecodeOp_Enum) && !_upb_Decoder_CheckEnum(d, ptr, msg, subs[field->submsg_index].subenum, field, val)) { return ptr; @@ -691,7 +642,7 @@ static const char* _upb_Decoder_DecodeToSubMessage( } else if (field->presence < 0) { /* Oneof case */ uint32_t* oneof_case = _upb_oneofcase_field(msg, field); - if (op == OP_SUBMSG && *oneof_case != field->number) { + if (op == kUpb_DecodeOp_SubMessage && *oneof_case != field->number) { memset(mem, 0, sizeof(void*)); } *oneof_case = field->number; @@ -699,7 +650,7 @@ static const char* _upb_Decoder_DecodeToSubMessage( /* Store into message. */ switch (op) { - case OP_SUBMSG: { + case kUpb_DecodeOp_SubMessage: { upb_Message** submsgp = mem; upb_Message* submsg = *submsgp; if (!submsg) { @@ -714,19 +665,19 @@ static const char* _upb_Decoder_DecodeToSubMessage( } break; } - case OP_STRING: + case kUpb_DecodeOp_String: _upb_Decoder_VerifyUtf8(d, ptr, val->size); /* Fallthrough. */ - case OP_BYTES: + case kUpb_DecodeOp_Bytes: return _upb_Decoder_ReadString(d, ptr, val->size, mem); - case OP_SCALAR_LG2(3): + case kUpb_DecodeOp_Scalar8Byte: memcpy(mem, val, 8); break; - case OP_ENUM: - case OP_SCALAR_LG2(2): + case kUpb_DecodeOp_Enum: + case kUpb_DecodeOp_Scalar4Byte: memcpy(mem, val, 4); break; - case OP_SCALAR_LG2(0): + case kUpb_DecodeOp_Scalar1Byte: memcpy(mem, val, 1); break; default: @@ -909,7 +860,8 @@ static const char* upb_Decoder_DecodeMessageSetItem( static const upb_MiniTable_Field* _upb_Decoder_FindField( upb_Decoder* d, const upb_MiniTable* l, uint32_t field_number, int* last_field_index) { - static upb_MiniTable_Field none = {0, 0, 0, 0, 0, 0}; + static upb_MiniTable_Field none = { + 0, 0, 0, 0, kUpb_FakeFieldType_FieldNotFound, 0}; if (l == NULL) return &none; size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX @@ -945,7 +897,8 @@ static const upb_MiniTable_Field* _upb_Decoder_FindField( } case kUpb_ExtMode_IsMessageSet: if (field_number == _UPB_MSGSET_ITEM) { - static upb_MiniTable_Field item = {0, 0, 0, 0, TYPE_MSGSET_ITEM, 0}; + static upb_MiniTable_Field item = { + 0, 0, 0, 0, kUpb_FakeFieldType_MessageSetItem, 0}; return &item; } break; @@ -960,47 +913,133 @@ found: return &l->fields[idx]; } +int _upb_Decoder_GetVarintOp(const upb_MiniTable_Field* field) { + static const int8_t kVarintOps[] = { + [kUpb_FakeFieldType_FieldNotFound] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FieldType_UInt64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FieldType_Int32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bool] = kUpb_DecodeOp_Scalar1Byte, + [kUpb_FieldType_String] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Message] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bytes] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_UInt32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_Enum] = kUpb_DecodeOp_Enum, + [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt32] = kUpb_DecodeOp_Scalar4Byte, + [kUpb_FieldType_SInt64] = kUpb_DecodeOp_Scalar8Byte, + [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, + }; + + return kVarintOps[field->descriptortype]; +} + +int _upb_Decoder_GetDelimitedOp(const upb_MiniTable_Field* field) { + enum { kRepeatedBase = 19 }; + + static const int8_t kDelimitedOps[] = { + /* For non-repeated field type. */ + [kUpb_FakeFieldType_FieldNotFound] = + kUpb_DecodeOp_UnknownField, // Field not found. + [kUpb_FieldType_Double] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Float] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_UInt64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Int32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Fixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Bool] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_String] = kUpb_DecodeOp_String, + [kUpb_FieldType_Group] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, + [kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, + [kUpb_FieldType_UInt32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_Enum] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt32] = kUpb_DecodeOp_UnknownField, + [kUpb_FieldType_SInt64] = kUpb_DecodeOp_UnknownField, + [kUpb_FakeFieldType_MessageSetItem] = kUpb_DecodeOp_UnknownField, + // For repeated field type. */ + [kRepeatedBase + kUpb_FieldType_Double] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Float] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Int64] = OP_VARPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_UInt64] = OP_VARPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Int32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Fixed64] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_Fixed32] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Bool] = OP_VARPCK_LG2(0), + [kRepeatedBase + kUpb_FieldType_String] = kUpb_DecodeOp_String, + [kRepeatedBase + kUpb_FieldType_Group] = kUpb_DecodeOp_SubMessage, + [kRepeatedBase + kUpb_FieldType_Message] = kUpb_DecodeOp_SubMessage, + [kRepeatedBase + kUpb_FieldType_Bytes] = kUpb_DecodeOp_Bytes, + [kRepeatedBase + kUpb_FieldType_UInt32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_Enum] = kUpb_DecodeOp_PackedEnum, + [kRepeatedBase + kUpb_FieldType_SFixed32] = OP_FIXPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_SFixed64] = OP_FIXPCK_LG2(3), + [kRepeatedBase + kUpb_FieldType_SInt32] = OP_VARPCK_LG2(2), + [kRepeatedBase + kUpb_FieldType_SInt64] = OP_VARPCK_LG2(3), + // Omitting kUpb_FakeFieldType_MessageSetItem, because we never emit a + // repeated msgset type + }; + + int ndx = field->descriptortype; + if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase; + return kDelimitedOps[ndx]; +} + UPB_FORCEINLINE static const char* _upb_Decoder_DecodeWireValue( upb_Decoder* d, const char* ptr, const upb_MiniTable_Field* field, int wire_type, wireval* val, int* op) { + static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) | + (1 << kUpb_FieldType_Fixed32) | + (1 << kUpb_FieldType_SFixed32); + + static const unsigned kFixed64OkMask = (1 << kUpb_FieldType_Double) | + (1 << kUpb_FieldType_Fixed64) | + (1 << kUpb_FieldType_SFixed64); + switch (wire_type) { case kUpb_WireType_Varint: ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val); - *op = varint_ops[field->descriptortype]; + *op = _upb_Decoder_GetVarintOp(field); _upb_Decoder_Munge(field->descriptortype, val); return ptr; case kUpb_WireType_32Bit: memcpy(&val->uint32_val, ptr, 4); val->uint32_val = _upb_BigEndian_Swap32(val->uint32_val); - *op = OP_SCALAR_LG2(2); - if (((1 << field->descriptortype) & FIXED32_OK_MASK) == 0) { - *op = OP_UNKNOWN; + *op = kUpb_DecodeOp_Scalar4Byte; + if (((1 << field->descriptortype) & kFixed32OkMask) == 0) { + *op = kUpb_DecodeOp_UnknownField; } return ptr + 4; case kUpb_WireType_64Bit: memcpy(&val->uint64_val, ptr, 8); val->uint64_val = _upb_BigEndian_Swap64(val->uint64_val); - *op = OP_SCALAR_LG2(3); - if (((1 << field->descriptortype) & FIXED64_OK_MASK) == 0) { - *op = OP_UNKNOWN; + *op = kUpb_DecodeOp_Scalar8Byte; + if (((1 << field->descriptortype) & kFixed64OkMask) == 0) { + *op = kUpb_DecodeOp_UnknownField; } return ptr + 8; - case kUpb_WireType_Delimited: { - int ndx = field->descriptortype; - if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += TYPE_COUNT; + case kUpb_WireType_Delimited: ptr = upb_Decoder_DecodeSize(d, ptr, &val->size); - *op = delim_ops[ndx]; + *op = _upb_Decoder_GetDelimitedOp(field); return ptr; - } case kUpb_WireType_StartGroup: val->uint32_val = field->number; if (field->descriptortype == kUpb_FieldType_Group) { - *op = OP_SUBMSG; - } else if (field->descriptortype == TYPE_MSGSET_ITEM) { - *op = OP_MSGSET_ITEM; + *op = kUpb_DecodeOp_SubMessage; + } else if (field->descriptortype == kUpb_FakeFieldType_MessageSetItem) { + *op = kUpb_DecodeOp_MessageSetItem; } else { - *op = OP_UNKNOWN; + *op = kUpb_DecodeOp_UnknownField; } return ptr; default: @@ -1156,11 +1195,11 @@ static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val); } else { switch (op) { - case OP_UNKNOWN: + case kUpb_DecodeOp_UnknownField: ptr = _upb_Decoder_DecodeUnknownField(d, ptr, msg, field_number, wire_type, val); break; - case OP_MSGSET_ITEM: + case kUpb_DecodeOp_MessageSetItem: ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout); break; } @@ -1234,11 +1273,5 @@ upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, return status; } -#undef OP_UNKNOWN -#undef OP_SKIP -#undef OP_SCALAR_LG2 #undef OP_FIXPCK_LG2 #undef OP_VARPCK_LG2 -#undef OP_STRING -#undef OP_BYTES -#undef OP_SUBMSG From 9e19bec587e9b2d2bea2e92c9b511702064a54b6 Mon Sep 17 00:00:00 2001 From: Ivo List Date: Mon, 19 Sep 2022 10:46:20 +0000 Subject: [PATCH 24/35] Fix upb proto libraryies for default strip_import_prefix. --- bazel/py_proto_library.bzl | 2 +- bazel/upb_proto_library.bzl | 2 +- upb/bindings/lua/lua_proto_library.bzl | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bazel/py_proto_library.bzl b/bazel/py_proto_library.bzl index c66ded8654..1ce3a6e11b 100644 --- a/bazel/py_proto_library.bzl +++ b/bazel/py_proto_library.bzl @@ -78,7 +78,7 @@ def _generate_output_file(ctx, src, extension): package = ctx.label.package if not _is_google3: strip_import_prefix = ctx.rule.attr.strip_import_prefix - if strip_import_prefix: + if strip_import_prefix and strip_import_prefix != "/": if not package.startswith(strip_import_prefix[1:]): fail("%s does not begin with prefix %s" % (package, strip_import_prefix)) package = package[len(strip_import_prefix):] diff --git a/bazel/upb_proto_library.bzl b/bazel/upb_proto_library.bzl index 2bd0e6d662..3e9743a8c3 100644 --- a/bazel/upb_proto_library.bzl +++ b/bazel/upb_proto_library.bzl @@ -81,7 +81,7 @@ def _generate_output_file(ctx, src, extension): package = ctx.label.package if not _is_google3: strip_import_prefix = ctx.rule.attr.strip_import_prefix - if strip_import_prefix: + if strip_import_prefix and strip_import_prefix != "/": if not package.startswith(strip_import_prefix[1:]): fail("%s does not begin with prefix %s" % (package, strip_import_prefix)) package = package[len(strip_import_prefix):] diff --git a/upb/bindings/lua/lua_proto_library.bzl b/upb/bindings/lua/lua_proto_library.bzl index 0db161b19c..4e0164fd9f 100644 --- a/upb/bindings/lua/lua_proto_library.bzl +++ b/upb/bindings/lua/lua_proto_library.bzl @@ -57,13 +57,13 @@ def _get_real_short_path(file): def _get_real_root(ctx, file): real_short_path = _get_real_short_path(file) root = file.path[:-len(real_short_path) - 1] - if not _is_google3 and ctx.rule.attr.strip_import_prefix and ctx.rule.attr.strip_import_prefix != "DO_NOT_STRIP": + if not _is_google3 and ctx.rule.attr.strip_import_prefix: root = paths.join(root, ctx.rule.attr.strip_import_prefix[1:]) return root def _generate_output_file(ctx, src, extension): package = ctx.label.package - if not _is_google3 and ctx.rule.attr.strip_import_prefix and ctx.rule.attr.strip_import_prefix != "DO_NOT_STRIP": + if not _is_google3 and ctx.rule.attr.strip_import_prefix and ctx.rule.attr.strip_import_prefix != "/": package = package[len(ctx.rule.attr.strip_import_prefix):] real_short_path = _get_real_short_path(src) real_short_path = paths.relativize(real_short_path, package) From c7187301d3106ff906175f0c63db134da752409a Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Fri, 23 Sep 2022 09:05:00 -0700 Subject: [PATCH 25/35] fix some warnings when the compiler is being strict PiperOrigin-RevId: 476384407 --- upb/io/string.h | 2 +- upb/io/tokenizer_test.cc | 12 ++++++------ upb/mini_table_test.cc | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/upb/io/string.h b/upb/io/string.h index 53e84b1529..62b74e2ec4 100644 --- a/upb/io/string.h +++ b/upb/io/string.h @@ -62,9 +62,9 @@ UPB_INLINE bool upb_String_Init(upb_String* s, upb_Arena* a) { s->size_ = 0; s->capacity_ = kDefaultCapacity; s->data_ = (char*)upb_Arena_Malloc(a, kDefaultCapacity); + s->arena_ = a; if (!s->data_) return false; s->data_[0] = '\0'; - s->arena_ = a; return true; } diff --git a/upb/io/tokenizer_test.cc b/upb/io/tokenizer_test.cc index a4ed94337c..5894d7305f 100644 --- a/upb/io/tokenizer_test.cc +++ b/upb/io/tokenizer_test.cc @@ -81,7 +81,7 @@ static bool StringEquals(const char* a, const char* b) { }; \ \ TEST_F(FIXTURE##_##NAME##_DD, NAME) { \ - for (int i = 0; i < arraysize(CASES); i++) { \ + for (size_t i = 0; i < arraysize(CASES); i++) { \ SCOPED_TRACE(testing::Message() \ << #CASES " case #" << i << ": " << CASES[i]); \ DoSingleCase(CASES[i]); \ @@ -100,8 +100,8 @@ static bool StringEquals(const char* a, const char* b) { }; \ \ TEST_F(FIXTURE##_##NAME##_DD, NAME) { \ - for (int i = 0; i < arraysize(CASES1); i++) { \ - for (int j = 0; j < arraysize(CASES2); j++) { \ + for (size_t i = 0; i < arraysize(CASES1); i++) { \ + for (size_t j = 0; j < arraysize(CASES2); j++) { \ SCOPED_TRACE(testing::Message() \ << #CASES1 " case #" << i << ": " << CASES1[i] << ", " \ << #CASES2 " case #" << j << ": " << CASES2[j]); \ @@ -571,7 +571,7 @@ TEST_1D(TokenizerTest, ShCommentStyle, kBlockSizes) { auto t = upb_Tokenizer_New(NULL, 0, input, options, arena.ptr()); // Advance through tokens and check that they are parsed as expected. - for (int i = 0; i < arraysize(kTokens); i++) { + for (size_t i = 0; i < arraysize(kTokens); i++) { EXPECT_TRUE(upb_Tokenizer_Next(t, NULL)); EXPECT_TRUE(StringEquals(upb_Tokenizer_TextData(t), kTokens[i])); } @@ -1000,7 +1000,7 @@ TEST_F(TokenizerTest, ParseString) { upb::Arena arena; - for (int i = 0; i < sizeof(inputs) / sizeof(inputs[0]); i++) { + for (size_t i = 0; i < sizeof(inputs) / sizeof(inputs[0]); i++) { auto sv = upb_Parse_String(inputs[i].data(), arena.ptr()); EXPECT_TRUE(StringEquals(sv.data, outputs[i].data())); } @@ -1202,7 +1202,7 @@ static std::string StandardUTF8(uint32_t code_point) { static std::string DisplayHex(const std::string& data) { std::string output; - for (int i = 0; i < data.size(); ++i) { + for (size_t i = 0; i < data.size(); ++i) { absl::StrAppendFormat(&output, "%02x ", data[i]); } return output; diff --git a/upb/mini_table_test.cc b/upb/mini_table_test.cc index e0af43110a..24efe5b519 100644 --- a/upb/mini_table_test.cc +++ b/upb/mini_table_test.cc @@ -178,7 +178,7 @@ TEST_P(MiniTableTest, SizeOverflow) { // A bit under max_double_fields is ok. ASSERT_TRUE(e.StartMessage(0)); - for (int i = 1; i < max_double_fields; i++) { + for (size_t i = 1; i < max_double_fields; i++) { ASSERT_TRUE(e.PutField(kUpb_FieldType_Double, i, 0)); } upb::Status status; @@ -188,7 +188,7 @@ TEST_P(MiniTableTest, SizeOverflow) { // A bit over max_double_fields fails. ASSERT_TRUE(e.StartMessage(0)); - for (int i = 1; i < max_double_fields + 2; i++) { + for (size_t i = 1; i < max_double_fields + 2; i++) { ASSERT_TRUE(e.PutField(kUpb_FieldType_Double, i, 0)); } upb_MiniTable* table2 = upb_MiniTable_Build( From e76dc4ac8a7b5a23edd8a13a816a01ac61b45bb7 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Fri, 23 Sep 2022 09:34:56 -0700 Subject: [PATCH 26/35] Fix mini_table_accessors FindUnknown status. PiperOrigin-RevId: 476391579 --- upb/mini_table_accessors.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/upb/mini_table_accessors.c b/upb/mini_table_accessors.c index 69847a96a3..325062ddeb 100644 --- a/upb/mini_table_accessors.c +++ b/upb/mini_table_accessors.c @@ -319,7 +319,9 @@ upb_FindUnknownRet upb_MiniTable_FindUnknown(const upb_Message* msg, const char* ptr = upb_Message_GetUnknown(msg, &size); if (size == 0) { + ret.status = kUpb_FindUnknown_NotPresent; ret.ptr = NULL; + ret.len = 0; return ret; } const char* end = ptr + size; From 6a625a627536388912380adbf88084ba8dd4f115 Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Fri, 23 Sep 2022 12:49:51 -0700 Subject: [PATCH 27/35] make msan happy with the arena code Clear a field that we were missing in one of the constructor paths PiperOrigin-RevId: 476440650 --- upb/arena.c | 1 + 1 file changed, 1 insertion(+) diff --git a/upb/arena.c b/upb/arena.c index 319698a574..2043470d71 100644 --- a/upb/arena.c +++ b/upb/arena.c @@ -186,6 +186,7 @@ upb_Arena* upb_Arena_Init(void* mem, size_t n, upb_alloc* alloc) { a->head.ptr = mem; a->head.end = UPB_PTR_AT(mem, n - sizeof(*a), char); a->freelist = NULL; + a->freelist_tail = NULL; a->cleanup_metadata = upb_cleanup_metadata(NULL, true); return a; From 8cc3e07735b192a6e45031b42040a21c90bac63e Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Sun, 25 Sep 2022 20:19:42 -0700 Subject: [PATCH 28/35] move (most of) the json code into its own subdir Continuing the trend of giving individual build targets their own subdirs where appropriate, and leaving behind temporary stub headers for compatibility. JSON is a particularly good choice for this because of how little interaction it has with the rest of upb. PiperOrigin-RevId: 476792865 --- BUILD | 14 ++-- upb/bindings/lua/msg.c | 4 +- upb/conformance_upb.c | 4 +- upb/{json_decode.c => json/decode.c} | 2 +- upb/json/decode.h | 52 ++++++++++++++ .../decode_test.cc} | 8 +-- upb/{json_encode.c => json/encode.c} | 2 +- upb/json/encode.h | 70 +++++++++++++++++++ .../encode_test.cc} | 6 +- upb/{json_test.proto => json/test.proto} | 0 upb/json_decode.h | 23 +----- upb/json_encode.h | 41 +---------- upb/msg_test.cc | 4 +- upb/test_cpp.cc | 4 +- upbc/protoc-gen-upbdev.cc | 4 +- 15 files changed, 155 insertions(+), 83 deletions(-) rename upb/{json_decode.c => json/decode.c} (99%) create mode 100644 upb/json/decode.h rename upb/{json_decode_test.cc => json/decode_test.cc} (96%) rename upb/{json_encode.c => json/encode.c} (99%) create mode 100644 upb/json/encode.h rename upb/{json_encode_test.cc => json/encode_test.cc} (97%) rename upb/{json_test.proto => json/test.proto} (100%) diff --git a/BUILD b/BUILD index 43de536048..fd16a0aecb 100644 --- a/BUILD +++ b/BUILD @@ -462,13 +462,17 @@ cc_library( ], ) +# TODO(b/232091617): Once we can delete the deprecated forwarding headers +# (= everything in upb/) we can move this build target down into json/ cc_library( name = "json", srcs = [ - "upb/json_decode.c", - "upb/json_encode.c", + "upb/json/decode.c", + "upb/json/encode.c", ], hdrs = [ + "upb/json/decode.h", + "upb/json/encode.h", "upb/json_decode.h", "upb/json_encode.h", ], @@ -558,7 +562,7 @@ upb_proto_library( proto_library( name = "json_test_proto", testonly = 1, - srcs = ["upb/json_test.proto"], + srcs = ["upb/json/test.proto"], deps = ["@com_google_protobuf//:struct_proto"], ) @@ -576,7 +580,7 @@ upb_proto_reflection_library( cc_test( name = "json_decode_test", - srcs = ["upb/json_decode_test.cc"], + srcs = ["upb/json/decode_test.cc"], deps = [ ":json", ":json_test_upb_proto", @@ -590,7 +594,7 @@ cc_test( cc_test( name = "json_encode_test", - srcs = ["upb/json_encode_test.cc"], + srcs = ["upb/json/encode_test.cc"], deps = [ ":json", ":json_test_upb_proto", diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index 4e37e0cecc..9ffb92d32f 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -39,8 +39,8 @@ #include "lauxlib.h" #include "upb/bindings/lua/upb.h" -#include "upb/json_decode.h" -#include "upb/json_encode.h" +#include "upb/json/decode.h" +#include "upb/json/encode.h" #include "upb/map.h" #include "upb/port_def.inc" #include "upb/reflection/message.h" diff --git a/upb/conformance_upb.c b/upb/conformance_upb.c index b08239b6ff..d6a3e1ed9a 100644 --- a/upb/conformance_upb.c +++ b/upb/conformance_upb.c @@ -41,8 +41,8 @@ #include "google/protobuf/test_messages_proto3.upbdefs.h" #include "upb/decode.h" #include "upb/encode.h" -#include "upb/json_decode.h" -#include "upb/json_encode.h" +#include "upb/json/decode.h" +#include "upb/json/encode.h" #include "upb/reflection/message.h" #include "upb/text_encode.h" diff --git a/upb/json_decode.c b/upb/json/decode.c similarity index 99% rename from upb/json_decode.c rename to upb/json/decode.c index 2342d688e1..d07362e4dc 100644 --- a/upb/json_decode.c +++ b/upb/json/decode.c @@ -25,7 +25,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "upb/json_decode.h" +#include "upb/json/decode.h" #include #include diff --git a/upb/json/decode.h b/upb/json/decode.h new file mode 100644 index 0000000000..5ddb164c71 --- /dev/null +++ b/upb/json/decode.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_JSON_DECODE_H_ +#define UPB_JSON_DECODE_H_ + +#include "upb/reflection/def.h" + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +enum { upb_JsonDecode_IgnoreUnknown = 1 }; + +bool upb_JsonDecode(const char* buf, size_t size, upb_Message* msg, + const upb_MessageDef* m, const upb_DefPool* symtab, + int options, upb_Arena* arena, upb_Status* status); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_JSONDECODE_H_ */ diff --git a/upb/json_decode_test.cc b/upb/json/decode_test.cc similarity index 96% rename from upb/json_decode_test.cc rename to upb/json/decode_test.cc index 2d41e602d8..deb40f4ad1 100644 --- a/upb/json_decode_test.cc +++ b/upb/json/decode_test.cc @@ -25,14 +25,12 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "upb/json_decode.h" - -#include +#include "upb/json/decode.h" #include "google/protobuf/struct.upb.h" #include "gtest/gtest.h" -#include "upb/json_test.upb.h" -#include "upb/json_test.upbdefs.h" +#include "upb/json/test.upb.h" +#include "upb/json/test.upbdefs.h" #include "upb/reflection/def.hpp" #include "upb/upb.hpp" diff --git a/upb/json_encode.c b/upb/json/encode.c similarity index 99% rename from upb/json_encode.c rename to upb/json/encode.c index 487ee11098..c833087e90 100644 --- a/upb/json_encode.c +++ b/upb/json/encode.c @@ -25,7 +25,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "upb/json_encode.h" +#include "upb/json/encode.h" #include #include diff --git a/upb/json/encode.h b/upb/json/encode.h new file mode 100644 index 0000000000..95fcca78e0 --- /dev/null +++ b/upb/json/encode.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_JSON_ENCODE_H_ +#define UPB_JSON_ENCODE_H_ + +#include "upb/reflection/def.h" + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + /* When set, emits 0/default values. TODO(haberman): proto3 only? */ + upb_JsonEncode_EmitDefaults = 1 << 0, + + /* When set, use normal (snake_case) field names instead of JSON (camelCase) + names. */ + upb_JsonEncode_UseProtoNames = 1 << 1, + + /* When set, emits enums as their integer values instead of as their names. */ + upb_JsonEncode_FormatEnumsAsIntegers = 1 << 2 +}; + +/* Encodes the given |msg| to JSON format. The message's reflection is given in + * |m|. The symtab in |symtab| is used to find extensions (if NULL, extensions + * will not be printed). + * + * Output is placed in the given buffer, and always NULL-terminated. The output + * size (excluding NULL) is returned. This means that a return value >= |size| + * implies that the output was truncated. (These are the same semantics as + * snprintf()). */ +size_t upb_JsonEncode(const upb_Message* msg, const upb_MessageDef* m, + const upb_DefPool* ext_pool, int options, char* buf, + size_t size, upb_Status* status); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_JSONENCODE_H_ */ diff --git a/upb/json_encode_test.cc b/upb/json/encode_test.cc similarity index 97% rename from upb/json_encode_test.cc rename to upb/json/encode_test.cc index 0d14b4176d..4c0d1a7745 100644 --- a/upb/json_encode_test.cc +++ b/upb/json/encode_test.cc @@ -25,12 +25,12 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "upb/json_encode.h" +#include "upb/json/encode.h" #include "google/protobuf/struct.upb.h" #include "gtest/gtest.h" -#include "upb/json_test.upb.h" -#include "upb/json_test.upbdefs.h" +#include "upb/json/test.upb.h" +#include "upb/json/test.upbdefs.h" #include "upb/reflection/def.hpp" #include "upb/upb.hpp" diff --git a/upb/json_test.proto b/upb/json/test.proto similarity index 100% rename from upb/json_test.proto rename to upb/json/test.proto diff --git a/upb/json_decode.h b/upb/json_decode.h index a5abad279f..c27c91be06 100644 --- a/upb/json_decode.h +++ b/upb/json_decode.h @@ -25,28 +25,11 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +// This header is deprecated, use upb/json/decode.h instead + #ifndef UPB_JSONDECODE_H_ #define UPB_JSONDECODE_H_ -#include "upb/reflection/def.h" - -// Must be last. -#include "upb/port_def.inc" - -#ifdef __cplusplus -extern "C" { -#endif - -enum { upb_JsonDecode_IgnoreUnknown = 1 }; - -bool upb_JsonDecode(const char* buf, size_t size, upb_Message* msg, - const upb_MessageDef* m, const upb_DefPool* symtab, - int options, upb_Arena* arena, upb_Status* status); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#include "upb/port_undef.inc" +#include "upb/json/decode.h" #endif /* UPB_JSONDECODE_H_ */ diff --git a/upb/json_encode.h b/upb/json_encode.h index b2207a9807..e76e1382f1 100644 --- a/upb/json_encode.h +++ b/upb/json_encode.h @@ -25,46 +25,11 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +// This header is deprecated, use upb/json/encode.h instead + #ifndef UPB_JSONENCODE_H_ #define UPB_JSONENCODE_H_ -#include "upb/reflection/def.h" - -// Must be last. -#include "upb/port_def.inc" - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - /* When set, emits 0/default values. TODO(haberman): proto3 only? */ - upb_JsonEncode_EmitDefaults = 1 << 0, - - /* When set, use normal (snake_case) field names instead of JSON (camelCase) - names. */ - upb_JsonEncode_UseProtoNames = 1 << 1, - - /* When set, emits enums as their integer values instead of as their names. */ - upb_JsonEncode_FormatEnumsAsIntegers = 1 << 2 -}; - -/* Encodes the given |msg| to JSON format. The message's reflection is given in - * |m|. The symtab in |symtab| is used to find extensions (if NULL, extensions - * will not be printed). - * - * Output is placed in the given buffer, and always NULL-terminated. The output - * size (excluding NULL) is returned. This means that a return value >= |size| - * implies that the output was truncated. (These are the same semantics as - * snprintf()). */ -size_t upb_JsonEncode(const upb_Message* msg, const upb_MessageDef* m, - const upb_DefPool* ext_pool, int options, char* buf, - size_t size, upb_Status* status); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#include "upb/port_undef.inc" +#include "upb/json/encode.h" #endif /* UPB_JSONENCODE_H_ */ diff --git a/upb/msg_test.cc b/upb/msg_test.cc index 085a85a2e7..055aed2762 100644 --- a/upb/msg_test.cc +++ b/upb/msg_test.cc @@ -29,8 +29,8 @@ #include "gtest/gtest.h" #include "google/protobuf/test_messages_proto3.upb.h" #include "upb/fuzz_test_util.h" -#include "upb/json_decode.h" -#include "upb/json_encode.h" +#include "upb/json/decode.h" +#include "upb/json/encode.h" #include "upb/msg_test.upb.h" #include "upb/msg_test.upbdefs.h" #include "upb/reflection/def.hpp" diff --git a/upb/test_cpp.cc b/upb/test_cpp.cc index e70771420b..b7101f6d88 100644 --- a/upb/test_cpp.cc +++ b/upb/test_cpp.cc @@ -38,8 +38,8 @@ #include "google/protobuf/timestamp.upb.h" #include "google/protobuf/timestamp.upbdefs.h" #include "gtest/gtest.h" -#include "upb/json_decode.h" -#include "upb/json_encode.h" +#include "upb/json/decode.h" +#include "upb/json/encode.h" #include "upb/reflection/def.h" #include "upb/reflection/def.hpp" #include "upb/test_cpp.upb.h" diff --git a/upbc/protoc-gen-upbdev.cc b/upbc/protoc-gen-upbdev.cc index a320c89fdc..ca79a40574 100644 --- a/upbc/protoc-gen-upbdev.cc +++ b/upbc/protoc-gen-upbdev.cc @@ -30,8 +30,8 @@ #include "google/protobuf/compiler/plugin.upb.h" #include "google/protobuf/compiler/plugin.upbdefs.h" -#include "upb/json_decode.h" -#include "upb/json_encode.h" +#include "upb/json/decode.h" +#include "upb/json/encode.h" #include "upb/reflection/def.h" #include "upb/upb.h" #include "upbc/code_generator_request.h" From 23048df5259ab99a45918738adb534405e4da1a5 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Tue, 27 Sep 2022 08:15:45 -0700 Subject: [PATCH 29/35] =?UTF-8?q?Fully-qualify=20labels=20in=20the=20?= =?UTF-8?q?=C2=B5pb=20repository.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is necessary at least for workspace rules because otherwise files will be taken from the main repository. PiperOrigin-RevId: 477175268 --- bazel/workspace_deps.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bazel/workspace_deps.bzl b/bazel/workspace_deps.bzl index c5036f6507..0f13d13b5b 100644 --- a/bazel/workspace_deps.bzl +++ b/bazel/workspace_deps.bzl @@ -25,7 +25,7 @@ def upb_deps(): name = "com_google_protobuf", repo = "https://github.com/protocolbuffers/protobuf", commit = "5407aa62af5f8d71c344ef10877806232a137991", - patches = ["//bazel:protobuf.patch"], + patches = ["@upb//bazel:protobuf.patch"], ) rules_python_version = "0.12.0" # Latest @ August 31, 2022 From e779b9d90aa8f8df7117c0f1870a158d54ab8d95 Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Tue, 27 Sep 2022 18:02:21 -0700 Subject: [PATCH 30/35] silently succeed when adding the same serialized file in Python PiperOrigin-RevId: 477321377 --- python/descriptor_pool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/descriptor_pool.c b/python/descriptor_pool.c index 4e9faaad2f..073fbdaf28 100644 --- a/python/descriptor_pool.c +++ b/python/descriptor_pool.c @@ -218,8 +218,7 @@ static PyObject* PyUpb_DescriptorPool_DoAddSerializedFile( } const upb_MessageDef* m = PyUpb_DescriptorPool_GetFileProtoDef(); if (upb_Message_IsEqual(proto, existing, m)) { - Py_INCREF(Py_None); - result = Py_None; + result = PyUpb_FileDescriptor_Get(file); goto done; } } From efd06e46a41161fd7e923a4ffa1cf485c1b9e3db Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Tue, 27 Sep 2022 19:20:43 -0700 Subject: [PATCH 31/35] use mini descriptors to build message defs and extension defs PiperOrigin-RevId: 477332937 --- upb/mini_table.c | 2 +- upb/reflection/def_builder.c | 10 +- upb/reflection/def_builder.h | 13 +- upb/reflection/def_pool.c | 79 ++-- upb/reflection/def_pool.h | 41 +- upb/reflection/enum_def.c | 24 +- upb/reflection/enum_def.h | 5 +- upb/reflection/enum_value_def.c | 21 + upb/reflection/enum_value_def.h | 3 + upb/reflection/field_def.c | 584 ++++++------------------ upb/reflection/field_def.h | 18 +- upb/reflection/file_def.c | 29 +- upb/reflection/message_def.c | 147 ++++-- upb/reflection/message_def.h | 4 + upb/reflection/mini_descriptor_encode.c | 69 +-- upb/reflection/mini_descriptor_encode.h | 13 - 16 files changed, 413 insertions(+), 649 deletions(-) diff --git a/upb/mini_table.c b/upb/mini_table.c index 02c9145d15..07723500df 100644 --- a/upb/mini_table.c +++ b/upb/mini_table.c @@ -990,7 +990,7 @@ upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, done: *buf = decoder.vec.data; - *buf_size = decoder.vec.capacity / sizeof(*decoder.vec.data); + *buf_size = decoder.vec.capacity * sizeof(*decoder.vec.data); return decoder.table; } diff --git a/upb/reflection/def_builder.c b/upb/reflection/def_builder.c index 8ca6404778..e0dadfc4d1 100644 --- a/upb/reflection/def_builder.c +++ b/upb/reflection/def_builder.c @@ -59,17 +59,19 @@ const char* _upb_DefBuilder_FullToShort(const char* fullname) { } } +void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } + void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { va_list argp; va_start(argp, fmt); upb_Status_VSetErrorFormat(ctx->status, fmt, argp); va_end(argp); - UPB_LONGJMP(ctx->err, 1); + _upb_DefBuilder_FailJmp(ctx); } void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { upb_Status_SetErrorMessage(ctx->status, "out of memory"); - UPB_LONGJMP(ctx->err, 1); + _upb_DefBuilder_FailJmp(ctx); } const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, @@ -114,7 +116,7 @@ const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, if (sym.data[0] == '.') { /* Symbols starting with '.' are absolute, so we do a single lookup. * Slice to omit the leading '.' */ - if (!_upb_DefPool_LookupAny2(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { + if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { goto notfound; } } else { @@ -130,7 +132,7 @@ const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, } memcpy(p, sym.data, sym.size); p += sym.size; - if (_upb_DefPool_LookupAny2(ctx->symtab, tmp, p - tmp, &v)) { + if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { break; } if (!remove_component(tmp, &baselen)) { diff --git a/upb/reflection/def_builder.h b/upb/reflection/def_builder.h index 4b7358c77a..ec7c0fe701 100644 --- a/upb/reflection/def_builder.h +++ b/upb/reflection/def_builder.h @@ -69,6 +69,9 @@ struct upb_DefBuilder { extern const char* kUpbDefOptDefault; +// ctx->status has already been set elsewhere so just fail/longjmp() +UPB_NORETURN void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx); + UPB_NORETURN void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) UPB_PRINTF(2, 3); UPB_NORETURN void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx); @@ -105,13 +108,9 @@ UPB_INLINE void* _upb_DefBuilder_Alloc(upb_DefBuilder* ctx, size_t bytes) { // adding, so we know which entries to remove if building this file fails. UPB_INLINE void _upb_DefBuilder_Add(upb_DefBuilder* ctx, const char* name, upb_value v) { - // TODO: table should support an operation "tryinsert" to avoid the double - // lookup. - if (_upb_DefPool_Contains(ctx->symtab, name)) { - _upb_DefBuilder_Errf(ctx, "duplicate symbol '%s'", name); - } - bool ok = _upb_DefPool_Insert(ctx->symtab, name, v); - if (!ok) _upb_DefBuilder_OomErr(ctx); + upb_StringView sym = {.data = name, .size = strlen(name)}; + bool ok = _upb_DefPool_InsertSym(ctx->symtab, sym, v, ctx->status); + if (!ok) _upb_DefBuilder_FailJmp(ctx); } UPB_INLINE upb_Arena* _upb_DefBuilder_Arena(const upb_DefBuilder* ctx) { diff --git a/upb/reflection/def_pool.c b/upb/reflection/def_pool.c index 7ab375ec60..8bb938852c 100644 --- a/upb/reflection/def_pool.c +++ b/upb/reflection/def_pool.c @@ -27,8 +27,6 @@ #include "upb/reflection/def_pool.h" -#include - #include "upb/reflection/def_builder.h" #include "upb/reflection/def_type.h" #include "upb/reflection/enum_def.h" @@ -47,11 +45,14 @@ struct upb_DefPool { upb_strtable files; // file_name -> (upb_FileDef*) upb_inttable exts; // (upb_MiniTable_Extension*) -> (upb_FieldDef*) upb_ExtensionRegistry* extreg; + void* scratch_data; + size_t scratch_size; size_t bytes_loaded; }; void upb_DefPool_Free(upb_DefPool* s) { upb_Arena_Free(s->arena); + upb_gfree(s->scratch_data); upb_gfree(s); } @@ -62,6 +63,10 @@ upb_DefPool* upb_DefPool_New(void) { s->arena = upb_Arena_New(); s->bytes_loaded = 0; + s->scratch_size = 240; + s->scratch_data = upb_gmalloc(s->scratch_size); + if (!s->scratch_data) goto err; + if (!upb_strtable_init(&s->syms, 32, s->arena)) goto err; if (!upb_strtable_init(&s->files, 4, s->arena)) goto err; if (!upb_inttable_init(&s->exts, s->arena)) goto err; @@ -72,45 +77,41 @@ upb_DefPool* upb_DefPool_New(void) { return s; err: - upb_Arena_Free(s->arena); - upb_gfree(s); + upb_DefPool_Free(s); return NULL; } -bool _upb_DefPool_Contains(const upb_DefPool* s, const char* sym) { - return upb_strtable_lookup(&s->syms, sym, NULL); -} - -bool _upb_DefPool_Insert(upb_DefPool* s, const char* sym, upb_value v) { - return _upb_DefPool_Insert2(s, sym, strlen(sym), v); -} - -bool _upb_DefPool_Insert2(upb_DefPool* s, const char* sym, size_t size, - upb_value v) { - return upb_strtable_insert(&s->syms, sym, size, v, s->arena); -} - bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTable_Extension* ext, - upb_FieldDef* f, upb_Arena* a) { + upb_FieldDef* f) { return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), - a); + s->arena); } -static const void* _upb_DefPool_Lookup(const upb_DefPool* s, const char* sym, - upb_deftype_t type) { - return _upb_DefPool_Lookup2(s, sym, strlen(sym), type); +bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, + upb_Status* status) { + // TODO: table should support an operation "tryinsert" to avoid the double + // lookup. + if (upb_strtable_lookup2(&s->syms, sym.data, sym.size, NULL)) { + upb_Status_SetErrorFormat(status, "duplicate symbol '%s'", sym.data); + return false; + } + if (!upb_strtable_insert(&s->syms, sym.data, sym.size, v, s->arena)) { + upb_Status_SetErrorMessage(status, "out of memory"); + return false; + } + return true; } -const void* _upb_DefPool_Lookup2(const upb_DefPool* s, const char* sym, - size_t size, upb_deftype_t type) { +static const void* _upb_DefPool_Unpack(const upb_DefPool* s, const char* sym, + size_t size, upb_deftype_t type) { upb_value v; return upb_strtable_lookup2(&s->syms, sym, size, &v) ? _upb_DefType_Unpack(v, type) : NULL; } -bool _upb_DefPool_LookupAny2(const upb_DefPool* s, const char* sym, size_t size, - upb_value* v) { +bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, + upb_value* v) { return upb_strtable_lookup2(&s->syms, sym, size, v); } @@ -118,24 +119,32 @@ upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s) { return s->extreg; } +void** _upb_DefPool_ScratchData(const upb_DefPool* s) { + return (void**)&s->scratch_data; +} + +size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s) { + return (size_t*)&s->scratch_size; +} + const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s, const char* sym) { - return _upb_DefPool_Lookup(s, sym, UPB_DEFTYPE_MSG); + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_MSG); } const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize( const upb_DefPool* s, const char* sym, size_t len) { - return _upb_DefPool_Lookup2(s, sym, len, UPB_DEFTYPE_MSG); + return _upb_DefPool_Unpack(s, sym, len, UPB_DEFTYPE_MSG); } const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s, const char* sym) { - return _upb_DefPool_Lookup(s, sym, UPB_DEFTYPE_ENUM); + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUM); } const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s, const char* sym) { - return _upb_DefPool_Lookup(s, sym, UPB_DEFTYPE_ENUMVAL); + return _upb_DefPool_Unpack(s, sym, strlen(sym), UPB_DEFTYPE_ENUMVAL); } const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s, @@ -182,12 +191,12 @@ const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s, const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s, const char* name) { - return _upb_DefPool_Lookup(s, name, UPB_DEFTYPE_SERVICE); + return _upb_DefPool_Unpack(s, name, strlen(name), UPB_DEFTYPE_SERVICE); } const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize( const upb_DefPool* s, const char* name, size_t size) { - return _upb_DefPool_Lookup2(s, name, size, UPB_DEFTYPE_SERVICE); + return _upb_DefPool_Unpack(s, name, size, UPB_DEFTYPE_SERVICE); } const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s, @@ -275,6 +284,12 @@ static const upb_FileDef* _upb_DefPool_AddFile( const upb_MiniTable_File* layout, upb_Status* status) { const upb_StringView name = google_protobuf_FileDescriptorProto_name(file_proto); + if (name.size == 0) { + upb_Status_SetErrorFormat(status, + "missing name in google_protobuf_FileDescriptorProto"); + return NULL; + } + // Determine whether we already know about this file. { upb_value v; diff --git a/upb/reflection/def_pool.h b/upb/reflection/def_pool.h index 765053eaaa..909746a397 100644 --- a/upb/reflection/def_pool.h +++ b/upb/reflection/def_pool.h @@ -96,44 +96,37 @@ const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s, // EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// -// For generated code only: loads a generated descriptor. -typedef struct _upb_DefPool_Init { - struct _upb_DefPool_Init** deps; // Dependencies of this file. - const upb_MiniTable_File* layout; - const char* filename; - upb_StringView descriptor; // Serialized descriptor. -} _upb_DefPool_Init; - upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s); size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s); - -bool _upb_DefPool_Contains(const upb_DefPool* s, const char* sym); - upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s); - -bool _upb_DefPool_Insert(upb_DefPool* s, const char* sym, upb_value v); -bool _upb_DefPool_Insert2(upb_DefPool* s, const char* sym, size_t size, - upb_value v); +const upb_FieldDef* _upb_DefPool_FindExtensionByMiniTable( + const upb_DefPool* s, const upb_MiniTable_Extension* ext); bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTable_Extension* ext, - upb_FieldDef* f, upb_Arena* a); + upb_FieldDef* f); +bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, + upb_Status* status); +bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, + upb_value* v); -const void* _upb_DefPool_Lookup2(const upb_DefPool* s, const char* sym, - size_t size, upb_deftype_t type); +void** _upb_DefPool_ScratchData(const upb_DefPool* s); +size_t* _upb_DefPool_ScratchSize(const upb_DefPool* s); -bool _upb_DefPool_LookupAny2(const upb_DefPool* s, const char* sym, size_t size, - upb_value* v); +// For generated code only: loads a generated descriptor. +typedef struct _upb_DefPool_Init { + struct _upb_DefPool_Init** deps; // Dependencies of this file. + const upb_MiniTable_File* layout; + const char* filename; + upb_StringView descriptor; // Serialized descriptor. +} _upb_DefPool_Init; -const upb_FieldDef* _upb_DefPool_FindExtensionByMiniTable( - const upb_DefPool* s, const upb_MiniTable_Extension* ext); +bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init); // Should only be directly called by tests. This variant lets us suppress // the use of compiled-in tables, forcing a rebuild of the tables at runtime. bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, bool rebuild_minitable); -bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init); - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/upb/reflection/enum_def.c b/upb/reflection/enum_def.c index d32ee569cb..7bb7cf8cbb 100644 --- a/upb/reflection/enum_def.c +++ b/upb/reflection/enum_def.c @@ -85,27 +85,7 @@ bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a) { return true; } -static int cmp_values(const void* a, const void* b) { - const uint32_t A = upb_EnumValueDef_Number(*(const upb_EnumValueDef**)a); - const uint32_t B = upb_EnumValueDef_Number(*(const upb_EnumValueDef**)b); - return (A < B) ? -1 : (A > B); -} - -bool _upb_EnumDef_MiniDescriptor(const upb_EnumDef* e, upb_Arena* a, - upb_StringView* out) { - if (e->is_sorted) return _upb_MiniDescriptor_EncodeEnum(e, NULL, a, out); - - const upb_EnumValueDef** sorted = (const upb_EnumValueDef**)upb_Arena_Malloc( - a, e->value_count * sizeof(void*)); - if (!sorted) return false; - - for (size_t i = 0; i < e->value_count; i++) { - sorted[i] = upb_EnumDef_Value(e, i); - } - qsort(sorted, e->value_count, sizeof(void*), cmp_values); - - return _upb_MiniDescriptor_EncodeEnum(e, sorted, a, out); -} +bool _upb_EnumDef_IsSorted(const upb_EnumDef* e) { return e->is_sorted; } const google_protobuf_EnumOptions* upb_EnumDef_Options(const upb_EnumDef* e) { return e->opts; @@ -168,7 +148,7 @@ const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) { static upb_MiniTable_Enum* create_enumlayout(upb_DefBuilder* ctx, const upb_EnumDef* e) { upb_StringView sv; - bool ok = _upb_EnumDef_MiniDescriptor(e, ctx->tmp_arena, &sv); + bool ok = upb_MiniDescriptor_EncodeEnum(e, ctx->tmp_arena, &sv); if (!ok) _upb_DefBuilder_Errf(ctx, "OOM while building enum MiniDescriptor"); upb_Status status; diff --git a/upb/reflection/enum_def.h b/upb/reflection/enum_def.h index 0d97b7cf8d..c85026152d 100644 --- a/upb/reflection/enum_def.h +++ b/upb/reflection/enum_def.h @@ -60,12 +60,9 @@ int upb_EnumDef_ValueCount(const upb_EnumDef* e); upb_EnumDef* _upb_EnumDef_At(const upb_EnumDef* e, int i); bool _upb_EnumDef_Insert(upb_EnumDef* e, upb_EnumValueDef* v, upb_Arena* a); +bool _upb_EnumDef_IsSorted(const upb_EnumDef* e); const upb_MiniTable_Enum* _upb_EnumDef_MiniTable(const upb_EnumDef* e); -// Builds a mini descriptor, returns false if OOM. -bool _upb_EnumDef_MiniDescriptor(const upb_EnumDef* e, upb_Arena* a, - upb_StringView* out); - // Allocate and initialize an array of |n| enum defs. upb_EnumDef* _upb_EnumDefs_New(upb_DefBuilder* ctx, int n, const google_protobuf_EnumDescriptorProto* const* protos, diff --git a/upb/reflection/enum_value_def.c b/upb/reflection/enum_value_def.c index ad7504a9f2..9564510c3f 100644 --- a/upb/reflection/enum_value_def.c +++ b/upb/reflection/enum_value_def.c @@ -46,6 +46,27 @@ upb_EnumValueDef* _upb_EnumValueDef_At(const upb_EnumValueDef* v, int i) { return (upb_EnumValueDef*)&v[i]; } +static int _upb_EnumValueDef_Compare(const void* p1, const void* p2) { + const uint32_t v1 = (*(const upb_EnumValueDef**)p1)->number; + const uint32_t v2 = (*(const upb_EnumValueDef**)p2)->number; + return (v1 < v2) ? -1 : (v1 > v2); +} + +const upb_EnumValueDef** _upb_EnumValueDefs_Sorted(const upb_EnumValueDef* v, + int n, upb_Arena* a) { + // TODO: Try to replace this arena alloc with a persistent scratch buffer. + upb_EnumValueDef** out = + (upb_EnumValueDef**)upb_Arena_Malloc(a, n * sizeof(void*)); + if (!out) return NULL; + + for (int i = 0; i < n; i++) { + out[i] = (upb_EnumValueDef*)&v[i]; + } + qsort(out, n, sizeof(void*), _upb_EnumValueDef_Compare); + + return (const upb_EnumValueDef**)out; +} + const google_protobuf_EnumValueOptions* upb_EnumValueDef_Options( const upb_EnumValueDef* v) { return v->opts; diff --git a/upb/reflection/enum_value_def.h b/upb/reflection/enum_value_def.h index 31bd7b5f27..98f4a189ce 100644 --- a/upb/reflection/enum_value_def.h +++ b/upb/reflection/enum_value_def.h @@ -58,6 +58,9 @@ upb_EnumValueDef* _upb_EnumValueDefs_New( const google_protobuf_EnumValueDescriptorProto* const* protos, upb_EnumDef* e, bool* is_sorted); +const upb_EnumValueDef** _upb_EnumValueDefs_Sorted(const upb_EnumValueDef* v, + int n, upb_Arena* a); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/upb/reflection/field_def.c b/upb/reflection/field_def.c index 36f1160fd3..3dabea3b94 100644 --- a/upb/reflection/field_def.c +++ b/upb/reflection/field_def.c @@ -39,6 +39,7 @@ #include "upb/reflection/extension_range.h" #include "upb/reflection/file_def.h" #include "upb/reflection/message_def.h" +#include "upb/reflection/mini_descriptor_encode.h" #include "upb/reflection/oneof_def.h" // Must be last. @@ -76,10 +77,10 @@ struct upb_FieldDef { } sub; uint32_t number_; uint16_t index_; - uint16_t layout_index; /* Index into msgdef->layout->fields or file->exts */ + uint16_t layout_index; // Index into msgdef->layout->fields or file->exts bool has_default; bool is_extension_; - bool packed_; + bool is_packed_; bool proto3_optional_; bool has_json_name_; upb_FieldType type_; @@ -152,7 +153,7 @@ bool upb_FieldDef_IsExtension(const upb_FieldDef* f) { return f->is_extension_; } -bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->packed_; } +bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->is_packed_; } const char* upb_FieldDef_Name(const upb_FieldDef* f) { return _upb_DefBuilder_FullToShort(f->full_name); @@ -245,47 +246,30 @@ const upb_MiniTable_Extension* _upb_FieldDef_ExtensionMiniTable( return _upb_FileDef_ExtensionMiniTable(file, f->layout_index); } -bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) { - return f->proto3_optional_; -} - -bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_Message; -} - -bool upb_FieldDef_IsString(const upb_FieldDef* f) { - return upb_FieldDef_CType(f) == kUpb_CType_String || - upb_FieldDef_CType(f) == kUpb_CType_Bytes; -} +bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f) { + if (UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3) return false; + if (f->type_ != kUpb_FieldType_Enum) return false; -bool upb_FieldDef_IsOptional(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Optional; -} - -bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Required; -} + // TODO(https://github.com/protocolbuffers/upb/issues/541): + // fix map enum values to check for unknown enum values and put + // them in the unknown field set. + if (upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f))) { + return false; + } -bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Repeated; + // TODO: Maybe make is_proto2 a bool at creation? + const upb_FileDef* file = upb_EnumDef_File(f->sub.enumdef); + return upb_FileDef_Syntax(file) == kUpb_Syntax_Proto2; } -bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) { - return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f); +bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) { + return f->proto3_optional_; } -bool upb_FieldDef_IsMap(const upb_FieldDef* f) { - return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) && - upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f)); -} +int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f) { return f->layout_index; } bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; } -bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) { - return upb_FieldDef_IsSubMessage(f) || - upb_FieldDef_CType(f) == kUpb_CType_Enum; -} - bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { if (upb_FieldDef_IsRepeated(f)) return false; const upb_FileDef* file = upb_FieldDef_File(f); @@ -293,372 +277,51 @@ bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { upb_FileDef_Syntax(file) == kUpb_Syntax_Proto2; } -static bool between(int32_t x, int32_t low, int32_t high) { - return x >= low && x <= high; +bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) { + return upb_FieldDef_IsSubMessage(f) || + upb_FieldDef_CType(f) == kUpb_CType_Enum; } -bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); } -bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); } -bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } - -bool upb_FieldDef_checkdescriptortype(int32_t type) { - return between(type, 1, 18); +bool upb_FieldDef_IsMap(const upb_FieldDef* f) { + return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) && + upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f)); } -/* Code to build defs from descriptor protos. *********************************/ - -/* There is a question of how much validation to do here. It will be difficult - * to perfectly match the amount of validation performed by proto2. But since - * this code is used to directly build defs from Ruby (for example) we do need - * to validate important constraints like uniqueness of names and numbers. */ - -static size_t div_round_up(size_t n, size_t d) { return (n + d - 1) / d; } - -static size_t upb_MessageValue_sizeof(upb_CType type) { - switch (type) { - case kUpb_CType_Double: - case kUpb_CType_Int64: - case kUpb_CType_UInt64: - return 8; - case kUpb_CType_Enum: - case kUpb_CType_Int32: - case kUpb_CType_UInt32: - case kUpb_CType_Float: - return 4; - case kUpb_CType_Bool: - return 1; - case kUpb_CType_Message: - return sizeof(void*); - case kUpb_CType_Bytes: - case kUpb_CType_String: - return sizeof(upb_StringView); - } - UPB_UNREACHABLE(); +bool upb_FieldDef_IsOptional(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Optional; } -static uint8_t upb_msg_fielddefsize(const upb_FieldDef* f) { - if (upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f))) { - upb_MapEntry ent; - UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v)); - return sizeof(ent.k); - } else if (upb_FieldDef_IsRepeated(f)) { - return sizeof(void*); - } else { - return upb_MessageValue_sizeof(upb_FieldDef_CType(f)); - } +bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) { + return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f); } -static uint32_t upb_MiniTable_place(upb_DefBuilder* ctx, upb_MiniTable* l, - size_t size, const upb_MessageDef* m) { - size_t ofs = UPB_ALIGN_UP(l->size, size); - size_t next = ofs + size; - - if (next > UINT16_MAX) { - _upb_DefBuilder_Errf(ctx, - "size of message %s exceeded max size of %zu bytes", - upb_MessageDef_FullName(m), (size_t)UINT16_MAX); - } - - l->size = next; - return ofs; +bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Required; } -static int field_number_cmp(const void* p1, const void* p2) { - const upb_MiniTable_Field* f1 = p1; - const upb_MiniTable_Field* f2 = p2; - return f1->number - f2->number; +bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Repeated; } -static void assign_layout_indices(const upb_MessageDef* m, upb_MiniTable* l, - upb_MiniTable_Field* fields) { - int i; - int n = upb_MessageDef_FieldCount(m); - int dense_below = 0; - for (i = 0; i < n; i++) { - upb_FieldDef* f = - (upb_FieldDef*)upb_MessageDef_FindFieldByNumber(m, fields[i].number); - UPB_ASSERT(f); - f->layout_index = i; - if (i < UINT8_MAX && fields[i].number == i + 1 && - (i == 0 || fields[i - 1].number == i)) { - dense_below = i + 1; - } - } - l->dense_below = dense_below; +bool upb_FieldDef_IsString(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_String || + upb_FieldDef_CType(f) == kUpb_CType_Bytes; } -static uint8_t map_descriptortype(const upb_FieldDef* f) { - uint8_t type = upb_FieldDef_Type(f); - /* See TableDescriptorType() in upbc/generator.cc for details and - * rationale of these exceptions. */ - if (type == kUpb_FieldType_String) { - const upb_FileDef* file = upb_FieldDef_File(f); - const upb_Syntax syntax = upb_FileDef_Syntax(file); - - if (syntax == kUpb_Syntax_Proto2) return kUpb_FieldType_Bytes; - } else if (type == kUpb_FieldType_Enum) { - const upb_FileDef* file = upb_EnumDef_File(f->sub.enumdef); - const upb_Syntax syntax = upb_FileDef_Syntax(file); - - if (syntax == kUpb_Syntax_Proto3 || UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 || - // TODO(https://github.com/protocolbuffers/upb/issues/541): - // fix map enum values to check for unknown enum values and put - // them in the unknown field set. - upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f))) { - return kUpb_FieldType_Int32; - } - } - return type; +bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) { + return upb_FieldDef_CType(f) == kUpb_CType_Message; } -static void fill_fieldlayout(upb_MiniTable_Field* field, - const upb_FieldDef* f) { - field->number = upb_FieldDef_Number(f); - field->descriptortype = map_descriptortype(f); - - if (upb_FieldDef_IsMap(f)) { - field->mode = - kUpb_FieldMode_Map | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift); - } else if (upb_FieldDef_IsRepeated(f)) { - field->mode = - kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift); - } else { - /* Maps descriptor type -> elem_size_lg2. */ - static const uint8_t sizes[] = { - -1, /* invalid descriptor type */ - kUpb_FieldRep_8Byte, /* DOUBLE */ - kUpb_FieldRep_4Byte, /* FLOAT */ - kUpb_FieldRep_8Byte, /* INT64 */ - kUpb_FieldRep_8Byte, /* UINT64 */ - kUpb_FieldRep_4Byte, /* INT32 */ - kUpb_FieldRep_8Byte, /* FIXED64 */ - kUpb_FieldRep_4Byte, /* FIXED32 */ - kUpb_FieldRep_1Byte, /* BOOL */ - kUpb_FieldRep_StringView, /* STRING */ - kUpb_FieldRep_Pointer, /* GROUP */ - kUpb_FieldRep_Pointer, /* MESSAGE */ - kUpb_FieldRep_StringView, /* BYTES */ - kUpb_FieldRep_4Byte, /* UINT32 */ - kUpb_FieldRep_4Byte, /* ENUM */ - kUpb_FieldRep_4Byte, /* SFIXED32 */ - kUpb_FieldRep_8Byte, /* SFIXED64 */ - kUpb_FieldRep_4Byte, /* SINT32 */ - kUpb_FieldRep_8Byte, /* SINT64 */ - }; - field->mode = kUpb_FieldMode_Scalar | - (sizes[field->descriptortype] << kUpb_FieldRep_Shift); - } - - if (upb_FieldDef_IsPacked(f)) { - field->mode |= kUpb_LabelFlags_IsPacked; - } - - if (upb_FieldDef_IsExtension(f)) { - field->mode |= kUpb_LabelFlags_IsExtension; - } +static bool between(int32_t x, int32_t low, int32_t high) { + return x >= low && x <= high; } -/* This function is the dynamic equivalent of message_layout.{cc,h} in upbc. - * It computes a dynamic layout for all of the fields in |m|. */ -void _upb_FieldDef_MakeLayout(upb_DefBuilder* ctx, const upb_MessageDef* m) { - upb_MiniTable* l = (upb_MiniTable*)upb_MessageDef_MiniTable(m); - size_t field_count = upb_MessageDef_FieldCount(m); - size_t sublayout_count = 0; - upb_MiniTable_Sub* subs; - upb_MiniTable_Field* fields; - - memset(l, 0, sizeof(*l) + sizeof(_upb_FastTable_Entry)); - - // Count sub-messages. - for (size_t i = 0; i < field_count; i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - if (upb_FieldDef_IsSubMessage(f)) { - sublayout_count++; - } - if (upb_FieldDef_CType(f) == kUpb_CType_Enum && - upb_FileDef_Syntax(upb_EnumDef_File(f->sub.enumdef)) == - kUpb_Syntax_Proto2) { - sublayout_count++; - } - } - - fields = _upb_DefBuilder_Alloc(ctx, field_count * sizeof(*fields)); - subs = _upb_DefBuilder_Alloc(ctx, sublayout_count * sizeof(*subs)); - - l->field_count = upb_MessageDef_FieldCount(m); - l->fields = fields; - l->subs = subs; - l->table_mask = 0; - l->required_count = 0; - - if (upb_MessageDef_ExtensionRangeCount(m) > 0) { - if (google_protobuf_MessageOptions_message_set_wire_format( - upb_MessageDef_Options(m))) { - l->ext = kUpb_ExtMode_IsMessageSet; - } else { - l->ext = kUpb_ExtMode_Extendable; - } - } else { - l->ext = kUpb_ExtMode_NonExtendable; - } - - /* TODO(haberman): initialize fast tables so that reflection-based parsing - * can get the same speeds as linked-in types. */ - l->fasttable[0].field_parser = &_upb_FastDecoder_DecodeGeneric; - l->fasttable[0].field_data = 0; - - if (upb_MessageDef_IsMapEntry(m)) { - /* TODO(haberman): refactor this method so this special case is more - * elegant. */ - const upb_FieldDef* key = upb_MessageDef_FindFieldByNumber(m, 1); - const upb_FieldDef* val = upb_MessageDef_FindFieldByNumber(m, 2); - fields[0].number = 1; - fields[1].number = 2; - fields[0].mode = kUpb_FieldMode_Scalar; - fields[1].mode = kUpb_FieldMode_Scalar; - fields[0].presence = 0; - fields[1].presence = 0; - fields[0].descriptortype = map_descriptortype(key); - fields[1].descriptortype = map_descriptortype(val); - fields[0].offset = 0; - fields[1].offset = sizeof(upb_StringView); - fields[1].submsg_index = 0; - - if (upb_FieldDef_CType(val) == kUpb_CType_Message) { - subs[0].submsg = - upb_MessageDef_MiniTable(upb_FieldDef_MessageSubDef(val)); - } - - upb_FieldDef* fielddefs = (upb_FieldDef*)upb_MessageDef_Field(m, 0); - UPB_ASSERT(fielddefs[0].number_ == 1); - UPB_ASSERT(fielddefs[1].number_ == 2); - fielddefs[0].layout_index = 0; - fielddefs[1].layout_index = 1; - - l->field_count = 2; - l->size = 2 * sizeof(upb_StringView); - l->size = UPB_ALIGN_UP(l->size, 8); - l->dense_below = 2; - return; - } - - /* Allocate data offsets in three stages: - * - * 1. hasbits. - * 2. regular fields. - * 3. oneof fields. - * - * OPT: There is a lot of room for optimization here to minimize the size. - */ - - /* Assign hasbits for required fields first. */ - size_t hasbit = 0; - - for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)]; - if (upb_FieldDef_Label(f) == kUpb_Label_Required) { - field->presence = ++hasbit; - if (hasbit >= 63) { - _upb_DefBuilder_Errf(ctx, "Message with >=63 required fields: %s", - upb_MessageDef_FullName(m)); - } - l->required_count++; - } - } - - /* Allocate hasbits and set basic field attributes. */ - sublayout_count = 0; - for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)]; - - fill_fieldlayout(field, f); - - if (field->descriptortype == kUpb_FieldType_Message || - field->descriptortype == kUpb_FieldType_Group) { - field->submsg_index = sublayout_count++; - subs[field->submsg_index].submsg = - upb_MessageDef_MiniTable(upb_FieldDef_MessageSubDef(f)); - } else if (field->descriptortype == kUpb_FieldType_Enum) { - field->submsg_index = sublayout_count++; - subs[field->submsg_index].subenum = - _upb_EnumDef_MiniTable(upb_FieldDef_EnumSubDef(f)); - UPB_ASSERT(subs[field->submsg_index].subenum); - } - - if (upb_FieldDef_Label(f) == kUpb_Label_Required) { - /* Hasbit was already assigned. */ - } else if (upb_FieldDef_HasPresence(f) && - !upb_FieldDef_RealContainingOneof(f)) { - /* We don't use hasbit 0, so that 0 can indicate "no presence" in the - * table. This wastes one hasbit, but we don't worry about it for now. */ - field->presence = ++hasbit; - } else { - field->presence = 0; - } - } - - /* Account for space used by hasbits. */ - l->size = hasbit ? div_round_up(hasbit + 1, 8) : 0; - - /* Allocate non-oneof fields. */ - for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - size_t field_size = upb_msg_fielddefsize(f); - size_t index = upb_FieldDef_Index(f); - - if (upb_FieldDef_RealContainingOneof(f)) { - /* Oneofs are handled separately below. */ - continue; - } - - fields[index].offset = upb_MiniTable_place(ctx, l, field_size, m); - } - - /* Allocate oneof fields. Each oneof field consists of a uint32 for the case - * and space for the actual data. */ - for (int i = 0; i < upb_MessageDef_OneofCount(m); i++) { - const upb_OneofDef* o = upb_MessageDef_Oneof(m, i); - size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */ - size_t field_size = 0; - uint32_t case_offset; - uint32_t data_offset; - - if (upb_OneofDef_IsSynthetic(o)) continue; - - if (upb_OneofDef_FieldCount(o) == 0) { - _upb_DefBuilder_Errf(ctx, "Oneof must have at least one field (%s)", - upb_OneofDef_FullName(o)); - } - - /* Calculate field size: the max of all field sizes. */ - for (int j = 0; j < upb_OneofDef_FieldCount(o); j++) { - const upb_FieldDef* f = upb_OneofDef_Field(o, j); - field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f)); - } - - /* Align and allocate case offset. */ - case_offset = upb_MiniTable_place(ctx, l, case_size, m); - data_offset = upb_MiniTable_place(ctx, l, field_size, m); - - for (int i = 0; i < upb_OneofDef_FieldCount(o); i++) { - const upb_FieldDef* f = upb_OneofDef_Field(o, i); - fields[upb_FieldDef_Index(f)].offset = data_offset; - fields[upb_FieldDef_Index(f)].presence = ~case_offset; - } - } - - /* Size of the entire structure should be a multiple of its greatest - * alignment. TODO: track overall alignment for real? */ - l->size = UPB_ALIGN_UP(l->size, 8); +bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); } +bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); } +bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } - /* Sort fields by number. */ - if (fields) { - qsort(fields, upb_MessageDef_FieldCount(m), sizeof(*fields), - field_number_cmp); - } - assign_layout_indices(m, l, fields); +bool upb_FieldDef_checkdescriptortype(int32_t type) { + return between(type, 1, 18); } static bool streql2(const char* a, size_t n, const char* b) { @@ -861,9 +524,8 @@ static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) { } static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, - upb_MessageDef* m, const google_protobuf_FieldDescriptorProto* field_proto, - upb_FieldDef* f) { + upb_MessageDef* m, upb_FieldDef* f) { // Must happen before _upb_DefBuilder_Add() f->file = _upb_DefBuilder_File(ctx); @@ -950,7 +612,7 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, } if (!m) { - _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)", + _upb_DefBuilder_Errf(ctx, "oneof field (%s) has no containing msg", f->full_name); } @@ -963,23 +625,21 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, bool ok = _upb_OneofDef_Insert(oneof, f, name.data, name.size, ctx->arena); if (!ok) _upb_DefBuilder_OomErr(ctx); - } else { - if (f->proto3_optional_) { - _upb_DefBuilder_Errf(ctx, - "field with proto3_optional was not in a oneof (%s)", - f->full_name); - } + } else if (f->proto3_optional_) { + _upb_DefBuilder_Errf(ctx, + "field with proto3_optional was not in a oneof (%s)", + f->full_name); } UBP_DEF_SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto); if (google_protobuf_FieldOptions_has_packed(f->opts)) { - f->packed_ = google_protobuf_FieldOptions_packed(f->opts); + f->is_packed_ = google_protobuf_FieldOptions_packed(f->opts); } else { // Repeated fields default to packed for proto3 only. - f->packed_ = upb_FieldDef_IsPrimitive(f) && - f->label_ == kUpb_Label_Repeated && - upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; + f->is_packed_ = upb_FieldDef_IsPrimitive(f) && + f->label_ == kUpb_Label_Repeated && + upb_FileDef_Syntax(f->file) == kUpb_Syntax_Proto3; } } @@ -987,9 +647,14 @@ static void _upb_FieldDef_CreateExt( upb_DefBuilder* ctx, const char* prefix, const google_protobuf_FieldDescriptorProto* field_proto, upb_MessageDef* m, upb_FieldDef* f) { - _upb_FieldDef_Create(ctx, prefix, m, field_proto, f); + _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); f->is_extension_ = true; + if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) { + _upb_DefBuilder_Errf(ctx, "oneof_index provided for extension field (%s)", + f->full_name); + } + f->scope.extension_scope = m; _upb_DefBuilder_Add(ctx, f->full_name, _upb_DefType_Pack(f, UPB_DEFTYPE_EXT)); f->layout_index = ctx->ext_count++; @@ -1003,47 +668,58 @@ static void _upb_FieldDef_CreateNotExt( upb_DefBuilder* ctx, const char* prefix, const google_protobuf_FieldDescriptorProto* field_proto, upb_MessageDef* m, upb_FieldDef* f) { - _upb_FieldDef_Create(ctx, prefix, m, field_proto, f); + _upb_FieldDef_Create(ctx, prefix, field_proto, m, f); f->is_extension_ = false; _upb_MessageDef_InsertField(ctx, m, f); - if (ctx->layout) { - const upb_MiniTable* mt = upb_MessageDef_MiniTable(m); - const upb_MiniTable_Field* fields = mt->fields; - const int count = mt->field_count; - bool found = false; - for (int i = 0; i < count; i++) { - if (fields[i].number == f->number_) { - f->layout_index = i; - found = true; - break; - } + if (!ctx->layout) return; + + const upb_MiniTable* mt = upb_MessageDef_MiniTable(m); + const upb_MiniTable_Field* fields = mt->fields; + for (int i = 0; i < mt->field_count; i++) { + if (fields[i].number == f->number_) { + f->layout_index = i; + return; } - UPB_ASSERT(found); } + + UPB_ASSERT(false); // It should be impossible to reach this point. } upb_FieldDef* _upb_FieldDefs_New( upb_DefBuilder* ctx, int n, const google_protobuf_FieldDescriptorProto* const* protos, const char* prefix, - upb_MessageDef* m, bool is_ext) { + upb_MessageDef* m, bool* is_sorted) { _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); - upb_FieldDef* f = + upb_FieldDef* defs = (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); - if (is_ext) { - for (size_t i = 0; i < n; i++) { - _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, &f[i]); - f[i].index_ = i; + // If we are creating extensions then is_sorted will be NULL. + // If we are not creating extensions then is_sorted will be non-NULL. + if (is_sorted) { + uint32_t previous = 0; + for (int i = 0; i < n; i++) { + upb_FieldDef* f = &defs[i]; + + _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f); + f->index_ = i; + if (!ctx->layout) f->layout_index = i; + + const uint32_t current = f->number_; + if (previous > current) *is_sorted = false; + previous = current; } } else { - for (size_t i = 0; i < n; i++) { - _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, &f[i]); - f[i].index_ = i; + for (int i = 0; i < n; i++) { + upb_FieldDef* f = &defs[i]; + + _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f); + f->index_ = i; } } - return f; + + return defs; } static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix, @@ -1092,6 +768,29 @@ static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix, } } +static int _upb_FieldDef_Compare(const void* p1, const void* p2) { + const uint32_t v1 = (*(upb_FieldDef**)p1)->number_; + const uint32_t v2 = (*(upb_FieldDef**)p2)->number_; + return (v1 < v2) ? -1 : (v1 > v2); +} + +const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n, + upb_Arena* a) { + // TODO: Try to replace this arena alloc with a persistent scratch buffer. + upb_FieldDef** out = (upb_FieldDef**)upb_Arena_Malloc(a, n * sizeof(void*)); + if (!out) return NULL; + + for (int i = 0; i < n; i++) { + out[i] = (upb_FieldDef*)&f[i]; + } + qsort(out, n, sizeof(void*), _upb_FieldDef_Compare); + + for (int i = 0; i < n; i++) { + out[i]->layout_index = i; + } + return (const upb_FieldDef**)out; +} + static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, upb_FieldDef* f, const google_protobuf_FieldDescriptorProto* field_proto) { @@ -1105,39 +804,42 @@ static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, _upb_DefBuilder_Resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); f->msgdef = m; - bool found = false; - - for (int i = 0, n = upb_MessageDef_ExtensionRangeCount(m); i < n; i++) { - const upb_ExtensionRange* r = upb_MessageDef_ExtensionRange(m, i); - if (upb_ExtensionRange_Start(r) <= f->number_ && - f->number_ < upb_ExtensionRange_End(r)) { - found = true; - break; - } - } - - if (!found) { + if (!_upb_MessageDef_IsValidExtensionNumber(m, f->number_)) { _upb_DefBuilder_Errf( ctx, - "field number %u in extension %s has no extension range in " - "message %s", - (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(f->msgdef)); + "field number %u in extension %s has no extension range in message %s", + (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m)); } const upb_MiniTable_Extension* ext = _upb_FieldDef_ExtensionMiniTable(f); + if (ctx->layout) { UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number); } else { + upb_StringView desc; + if (!upb_MiniDescriptor_EncodeField(f, ctx->tmp_arena, &desc)) { + _upb_DefBuilder_OomErr(ctx); + } + upb_MiniTable_Extension* mut_ext = (upb_MiniTable_Extension*)ext; - fill_fieldlayout(&mut_ext->field, f); - mut_ext->field.presence = 0; - mut_ext->field.offset = 0; - mut_ext->field.submsg_index = 0; - mut_ext->extendee = upb_MessageDef_MiniTable(f->msgdef); - mut_ext->sub.submsg = upb_MessageDef_MiniTable(f->sub.msgdef); + upb_MiniTable_Sub sub; + sub.submsg = NULL; + sub.subenum = NULL; + bool ok2 = upb_MiniTable_BuildExtension(desc.data, desc.size, mut_ext, + upb_MessageDef_MiniTable(m), sub, + ctx->status); + if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table"); + + assert(mut_ext->field.number == f->number_); + mut_ext->extendee = upb_MessageDef_MiniTable(m); + if (upb_FieldDef_IsSubMessage(f)) { + mut_ext->sub.submsg = upb_MessageDef_MiniTable(f->sub.msgdef); + } else if (mut_ext->field.descriptortype == kUpb_FieldType_Enum) { + mut_ext->sub.subenum = _upb_EnumDef_MiniTable(f->sub.enumdef); + } } - bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f, ctx->arena); + bool ok = _upb_DefPool_InsertExt(ctx->symtab, ext, f); if (!ok) _upb_DefBuilder_OomErr(ctx); } diff --git a/upb/reflection/field_def.h b/upb/reflection/field_def.h index 409ee70733..6c52149a50 100644 --- a/upb/reflection/field_def.h +++ b/upb/reflection/field_def.h @@ -78,20 +78,28 @@ upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f); // EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// upb_FieldDef* _upb_FieldDef_At(const upb_FieldDef* f, int i); + const upb_MiniTable_Extension* _upb_FieldDef_ExtensionMiniTable( const upb_FieldDef* f); +bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f); bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f); +int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f); + +void _upb_FieldDef_MakeLayout(upb_DefBuilder* ctx, const upb_MessageDef* m); + +void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, + upb_FieldDef* f); // Allocate and initialize an array of |n| field defs. upb_FieldDef* _upb_FieldDefs_New( upb_DefBuilder* ctx, int n, const google_protobuf_FieldDescriptorProto* const* protos, const char* prefix, - upb_MessageDef* m, bool is_ext); + upb_MessageDef* m, bool* is_sorted); -void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, - upb_FieldDef* f); - -void _upb_FieldDef_MakeLayout(upb_DefBuilder* ctx, const upb_MessageDef* m); +// Allocate and return a list of pointers to the |n| field defs in |ff|, +// sorted by field number. +const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n, + upb_Arena* a); #ifdef __cplusplus } /* extern "C" */ diff --git a/upb/reflection/file_def.c b/upb/reflection/file_def.c index 4a48286420..6218042ddb 100644 --- a/upb/reflection/file_def.c +++ b/upb/reflection/file_def.c @@ -192,7 +192,7 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, const upb_StringView* strs; const int32_t* public_deps; const int32_t* weak_deps; - size_t i, n; + size_t n; file->symtab = ctx->symtab; @@ -261,7 +261,7 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, file->dep_count = n; file->deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->deps) * n); - for (i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { upb_StringView str = strs[i]; file->deps[i] = upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size); @@ -278,7 +278,7 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, file->public_deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->public_deps) * n); int32_t* mutable_public_deps = (int32_t*)file->public_deps; - for (i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { if (public_deps[i] >= file->dep_count) { _upb_DefBuilder_Errf(ctx, "public_dep %d is out of range", (int)public_deps[i]); @@ -290,7 +290,7 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, file->weak_dep_count = n; file->weak_deps = _upb_DefBuilder_Alloc(ctx, sizeof(*file->weak_deps) * n); int32_t* mutable_weak_deps = (int32_t*)file->weak_deps; - for (i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { if (weak_deps[i] >= file->dep_count) { _upb_DefBuilder_Errf(ctx, "weak_dep %d is out of range", (int)weak_deps[i]); @@ -307,7 +307,7 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n); file->top_lvl_ext_count = n; file->top_lvl_exts = - _upb_FieldDefs_New(ctx, n, exts, file->package, NULL, true); + _upb_FieldDefs_New(ctx, n, exts, file->package, NULL, NULL); // Create messages. msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); @@ -320,17 +320,24 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, file->services = _upb_ServiceDefs_New(ctx, n, services); // Now that all names are in the table, build layouts and resolve refs. - for (i = 0; i < (size_t)file->top_lvl_ext_count; i++) { - _upb_FieldDef_Resolve( - ctx, file->package, - (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i)); - } - for (i = 0; i < (size_t)file->top_lvl_msg_count; i++) { + for (int i = 0; i < file->top_lvl_msg_count; i++) { upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); _upb_MessageDef_Resolve(ctx, m); } + for (int i = 0; i < file->top_lvl_ext_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); + _upb_FieldDef_Resolve(ctx, file->package, f); + } + + if (!ctx->layout) { + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_LinkMiniTable(ctx, m); + } + } + if (file->ext_count) { bool ok = _upb_extreg_add(_upb_DefPool_ExtReg(ctx->symtab), file->ext_layouts, file->ext_count); diff --git a/upb/reflection/message_def.c b/upb/reflection/message_def.c index 82fb544926..427b64003a 100644 --- a/upb/reflection/message_def.c +++ b/upb/reflection/message_def.c @@ -34,6 +34,7 @@ #include "upb/reflection/extension_range.h" #include "upb/reflection/field_def.h" #include "upb/reflection/file_def.h" +#include "upb/reflection/mini_descriptor_encode.h" #include "upb/reflection/oneof_def.h" // Must be last. @@ -67,6 +68,7 @@ struct upb_MessageDef { int nested_enum_count; int nested_ext_count; bool in_message_set; + bool is_sorted; upb_WellKnown well_known_type; #if UINTPTR_MAX == 0xffffffff uint32_t padding; // Increase size to a multiple of 8. @@ -120,6 +122,18 @@ upb_MessageDef* _upb_MessageDef_At(const upb_MessageDef* m, int i) { return (upb_MessageDef*)&m[i]; } +bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n) { + for (int i = 0; i < m->ext_range_count; i++) { + const upb_ExtensionRange* r = upb_MessageDef_ExtensionRange(m, i); + if (upb_ExtensionRange_Start(r) <= n && n < upb_ExtensionRange_End(r)) { + return true; + } + } + return false; +} + +bool _upb_MessageDef_IsSorted(const upb_MessageDef* m) { return m->is_sorted; } + const google_protobuf_MessageOptions* upb_MessageDef_Options( const upb_MessageDef* m) { return m->opts; @@ -215,14 +229,6 @@ const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize( return f; } -int upb_MessageDef_numfields(const upb_MessageDef* m) { return m->field_count; } - -int upb_MessageDef_numoneofs(const upb_MessageDef* m) { return m->oneof_count; } - -int upb_MessageDef_numrealoneofs(const upb_MessageDef* m) { - return m->real_oneof_count; -} - int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) { return m->ext_range_count; } @@ -247,10 +253,6 @@ int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) { return m->nested_ext_count; } -int upb_MessageDef_realoneofcount(const upb_MessageDef* m) { - return m->real_oneof_count; -} - const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) { return m->layout; } @@ -315,12 +317,56 @@ bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) { upb_MessageDef_Options(m)); } +static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx, + const upb_MessageDef* m) { + if (google_protobuf_MessageOptions_message_set_wire_format(m->opts)) { + return upb_MiniTable_BuildMessageSet(kUpb_MiniTablePlatform_Native, + ctx->arena); + } + + if (upb_MessageDef_IsMapEntry(m)) { + if (m->field_count != 2) { + _upb_DefBuilder_Errf(ctx, "invalid map (%s)", m->full_name); + } + + const upb_FieldDef* f0 = upb_MessageDef_Field(m, 0); + const upb_FieldDef* f1 = upb_MessageDef_Field(m, 1); + const upb_FieldType t0 = upb_FieldDef_Type(f0); + const upb_FieldType t1 = upb_FieldDef_Type(f1); + + const bool is_proto3_enum = + (t1 == kUpb_FieldType_Enum) && !_upb_FieldDef_IsClosedEnum(f1); + UPB_ASSERT(_upb_FieldDef_LayoutIndex(f0) == 0); + UPB_ASSERT(_upb_FieldDef_LayoutIndex(f1) == 1); + + return upb_MiniTable_BuildMapEntry( + t0, t1, is_proto3_enum, kUpb_MiniTablePlatform_Native, ctx->arena); + } + + upb_StringView desc; + bool ok = upb_MiniDescriptor_EncodeMessage(m, ctx->tmp_arena, &desc); + if (!ok) _upb_DefBuilder_OomErr(ctx); + + void** scratch_data = _upb_DefPool_ScratchData(ctx->symtab); + size_t* scratch_size = _upb_DefPool_ScratchSize(ctx->symtab); + upb_MiniTable* ret = upb_MiniTable_BuildWithBuf( + desc.data, desc.size, kUpb_MiniTablePlatform_Native, ctx->arena, + scratch_data, scratch_size, ctx->status); + if (!ret) _upb_DefBuilder_FailJmp(ctx); + return ret; +} + void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) { - for (int i = 0; i < upb_MessageDef_FieldCount(m); i++) { + for (int i = 0; i < m->field_count; i++) { upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_Field(m, i); _upb_FieldDef_Resolve(ctx, upb_MessageDef_FullName(m), f); } + if (!ctx->layout) { + m->layout = _upb_MessageDef_MakeMiniTable(ctx, m); + if (!m->layout) _upb_DefBuilder_OomErr(ctx); + } + m->in_message_set = false; for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i); @@ -334,8 +380,6 @@ void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) { } } - if (!ctx->layout) _upb_FieldDef_MakeLayout(ctx, m); - for (int i = 0; i < upb_MessageDef_NestedMessageCount(m); i++) { upb_MessageDef* n = (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); _upb_MessageDef_Resolve(ctx, n); @@ -385,42 +429,50 @@ void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, if (!ok) _upb_DefBuilder_OomErr(ctx); } -static void msgdef_create_nested(upb_DefBuilder* ctx, - const google_protobuf_DescriptorProto* msg_proto, - upb_MessageDef* m) { - size_t n; - - const google_protobuf_EnumDescriptorProto* const* enums = - google_protobuf_DescriptorProto_enum_type(msg_proto, &n); - m->nested_enum_count = n; - m->nested_enums = _upb_EnumDefs_New(ctx, n, enums, m); - - const google_protobuf_FieldDescriptorProto* const* exts = - google_protobuf_DescriptorProto_extension(msg_proto, &n); - m->nested_ext_count = n; - m->nested_exts = _upb_FieldDefs_New(ctx, n, exts, m->full_name, m, true); +void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, + const upb_MessageDef* m) { + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f); + const upb_EnumDef* sub_e = upb_FieldDef_EnumSubDef(f); + const int layout_index = _upb_FieldDef_LayoutIndex(f); + upb_MiniTable* mt = (upb_MiniTable*)upb_MessageDef_MiniTable(m); + + UPB_ASSERT(layout_index < m->field_count); + upb_MiniTable_Field* mt_f = + (upb_MiniTable_Field*)&m->layout->fields[layout_index]; + if (sub_m) { + if (!mt->subs) { + _upb_DefBuilder_Errf(ctx, "invalid submsg for (%s)", m->full_name); + } + UPB_ASSERT(mt_f); + UPB_ASSERT(sub_m->layout); + upb_MiniTable_SetSubMessage(mt, mt_f, sub_m->layout); + } else if (_upb_FieldDef_IsClosedEnum(f)) { + upb_MiniTable_SetSubEnum(mt, mt_f, _upb_EnumDef_MiniTable(sub_e)); + } + } - const google_protobuf_DescriptorProto* const* msgs = - google_protobuf_DescriptorProto_nested_type(msg_proto, &n); - m->nested_msg_count = n; - m->nested_msgs = _upb_MessageDefs_New(ctx, n, msgs, m); + for (int i = 0; i < m->nested_msg_count; i++) { + _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i)); + } } static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, const google_protobuf_DescriptorProto* msg_proto, const upb_MessageDef* containing_type, - const upb_MessageDef* _m) { - upb_MessageDef* m = (upb_MessageDef*)_m; + upb_MessageDef* m) { const google_protobuf_OneofDescriptorProto* const* oneofs; const google_protobuf_FieldDescriptorProto* const* fields; const google_protobuf_DescriptorProto_ExtensionRange* const* ext_ranges; - size_t n_oneof, n_field, n_ext_range; + size_t n_oneof, n_field, n_ext_range, n_enum, n_ext, n_msg; upb_StringView name; // Must happen before _upb_DefBuilder_Add() m->file = _upb_DefBuilder_File(ctx); m->containing_type = containing_type; + m->is_sorted = true; name = google_protobuf_DescriptorProto_name(msg_proto); _upb_DefBuilder_CheckIdentNotFull(ctx, name); @@ -430,8 +482,7 @@ static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof); fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field); - ext_ranges = - google_protobuf_DescriptorProto_extension_range(msg_proto, &n_ext_range); + ext_ranges = google_protobuf_DescriptorProto_extension_range(msg_proto, &n_ext_range); bool ok = upb_inttable_init(&m->itof, ctx->arena); if (!ok) _upb_DefBuilder_OomErr(ctx); @@ -456,7 +507,8 @@ static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, m->oneofs = _upb_OneofDefs_New(ctx, n_oneof, oneofs, m); m->field_count = n_field; - m->fields = _upb_FieldDefs_New(ctx, n_field, fields, m->full_name, m, false); + m->fields = + _upb_FieldDefs_New(ctx, n_field, fields, m->full_name, m, &m->is_sorted); m->ext_range_count = n_ext_range; m->ext_ranges = _upb_ExtensionRanges_New(ctx, n_ext_range, ext_ranges, m); @@ -466,7 +518,21 @@ static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, assign_msg_wellknowntype(m); upb_inttable_compact(&m->itof, ctx->arena); - msgdef_create_nested(ctx, msg_proto, m); + + const google_protobuf_EnumDescriptorProto* const* enums = + google_protobuf_DescriptorProto_enum_type(msg_proto, &n_enum); + m->nested_enum_count = n_enum; + m->nested_enums = _upb_EnumDefs_New(ctx, n_enum, enums, m); + + const google_protobuf_FieldDescriptorProto* const* exts = + google_protobuf_DescriptorProto_extension(msg_proto, &n_ext); + m->nested_ext_count = n_ext; + m->nested_exts = _upb_FieldDefs_New(ctx, n_ext, exts, m->full_name, m, NULL); + + const google_protobuf_DescriptorProto* const* msgs = + google_protobuf_DescriptorProto_nested_type(msg_proto, &n_msg); + m->nested_msg_count = n_msg; + m->nested_msgs = _upb_MessageDefs_New(ctx, n_msg, msgs, m); } // Allocate and initialize an array of |n| message defs. @@ -477,6 +543,7 @@ upb_MessageDef* _upb_MessageDefs_New( const char* name = containing_type ? containing_type->full_name : _upb_FileDef_RawPackage(ctx->file); + upb_MessageDef* m = _upb_DefBuilder_Alloc(ctx, sizeof(upb_MessageDef) * n); for (int i = 0; i < n; i++) { create_msgdef(ctx, name, protos[i], containing_type, &m[i]); diff --git a/upb/reflection/message_def.h b/upb/reflection/message_def.h index d8b8382128..fd79494d24 100644 --- a/upb/reflection/message_def.h +++ b/upb/reflection/message_def.h @@ -157,6 +157,10 @@ bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t size, upb_value v, upb_Arena* a); void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, const upb_FieldDef* f); +bool _upb_MessageDef_IsSorted(const upb_MessageDef* m); +bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n); +void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, + const upb_MessageDef* m); void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m); // Allocate and initialize an array of |n| message defs. diff --git a/upb/reflection/mini_descriptor_encode.c b/upb/reflection/mini_descriptor_encode.c index d2ad8c99b9..a5c507d46f 100644 --- a/upb/reflection/mini_descriptor_encode.c +++ b/upb/reflection/mini_descriptor_encode.c @@ -121,21 +121,18 @@ static uint64_t upb_Message_Modifiers(const upb_MessageDef* m) { /******************************************************************************/ -// Sort by field number. -static int upb_MiniDescriptor_CompareFields(const void* a, const void* b) { - const upb_FieldDef* A = *(void**)a; - const upb_FieldDef* B = *(void**)b; - if (upb_FieldDef_Number(A) < upb_FieldDef_Number(B)) return -1; - if (upb_FieldDef_Number(A) > upb_FieldDef_Number(B)) return 1; - return 0; -} - -bool _upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, - const upb_EnumValueDef** sorted, - upb_Arena* a, upb_StringView* out) { +bool upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, upb_Arena* a, + upb_StringView* out) { DescState s; upb_DescState_Init(&s); + const upb_EnumValueDef** sorted = NULL; + if (!_upb_EnumDef_IsSorted(e)) { + sorted = _upb_EnumValueDefs_Sorted(upb_EnumDef_Value(e, 0), + upb_EnumDef_ValueCount(e), a); + if (!sorted) return false; + } + upb_MtDataEncoder_StartEnum(&s.e); // Duplicate values are allowed but we only encode each value once. @@ -165,8 +162,8 @@ bool _upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, return true; } -bool _upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a, - upb_StringView* out) { +bool upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a, + upb_StringView* out) { UPB_ASSERT(upb_FieldDef_IsExtension(f)); DescState s; @@ -190,28 +187,27 @@ bool _upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a, return true; } -bool _upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, upb_Arena* a, - upb_StringView* out) { +// If the field numbers happen to be defined in ascending order then |sorted| +// should be NULL. Otherwise it must point to an array containing pointers to +// the field defs in sorted order. +bool upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, upb_Arena* a, + upb_StringView* out) { DescState s; upb_DescState_Init(&s); - // Make a copy. - const size_t field_count = upb_MessageDef_FieldCount(m); - const upb_FieldDef** sorted = - (const upb_FieldDef**)upb_Arena_Malloc(a, field_count * sizeof(void*)); - if (!sorted) return false; - - // Sort the copy. - for (size_t i = 0; i < field_count; i++) { - sorted[i] = upb_MessageDef_Field(m, i); + const upb_FieldDef** sorted = NULL; + if (!_upb_MessageDef_IsSorted(m)) { + sorted = _upb_FieldDefs_Sorted(upb_MessageDef_Field(m, 0), + upb_MessageDef_FieldCount(m), a); + if (!sorted) return false; } - qsort(sorted, field_count, sizeof(void*), upb_MiniDescriptor_CompareFields); if (!upb_DescState_Grow(&s, a)) return false; s.ptr = upb_MtDataEncoder_StartMessage(&s.e, s.ptr, upb_Message_Modifiers(m)); - for (size_t i = 0; i < field_count; i++) { - const upb_FieldDef* f = sorted[i]; + const int field_count = upb_MessageDef_FieldCount(m); + for (int i = 0; i < field_count; i++) { + const upb_FieldDef* f = sorted ? sorted[i] : upb_MessageDef_Field(m, i); const upb_FieldType type = upb_FieldDef_Type(f); const int number = upb_FieldDef_Number(f); const uint64_t modifiers = upb_Field_Modifiers(f); @@ -242,20 +238,3 @@ bool _upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, upb_Arena* a, out->size = s.ptr - s.buf; return true; } - -/******************************************************************************/ - -bool upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, upb_Arena* a, - upb_StringView* out) { - return _upb_EnumDef_MiniDescriptor(e, a, out); -} - -bool upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a, - upb_StringView* out) { - return _upb_MiniDescriptor_EncodeField(f, a, out); -} - -bool upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, upb_Arena* a, - upb_StringView* out) { - return _upb_MiniDescriptor_EncodeMessage(m, a, out); -} diff --git a/upb/reflection/mini_descriptor_encode.h b/upb/reflection/mini_descriptor_encode.h index a9677abfd0..dcb42a4604 100644 --- a/upb/reflection/mini_descriptor_encode.h +++ b/upb/reflection/mini_descriptor_encode.h @@ -50,19 +50,6 @@ bool upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a, bool upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, upb_Arena* a, upb_StringView* out); -// EVERYTHING BELOW THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// - -// Creates and returns a mini descriptor string for an enum, or NULL on error. -// If the values in the enum happen to be defined in ascending order (when cast -// to uint32_t) then |sorted| should be NULL. Otherwise it must point to an -// array containing pointers to the enum value defs in sorted order. -bool _upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, - const upb_EnumValueDef** sorted, - upb_Arena* a, upb_StringView* out); - -bool _upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a, - upb_StringView* out); - #ifdef __cplusplus } /* extern "C" */ #endif From 1e3deb013df2cefc0049bfc8f89bee9655c172fa Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Wed, 28 Sep 2022 10:20:45 -0700 Subject: [PATCH 32/35] move message/field modifiers functions out of mini_descriptor_encode.c PiperOrigin-RevId: 477486367 --- upb/reflection/field_def.c | 34 +++++++++++++++--- upb/reflection/field_def.h | 4 +-- upb/reflection/message_def.c | 12 +++++++ upb/reflection/message_def.h | 1 + upb/reflection/mini_descriptor_encode.c | 46 +++---------------------- 5 files changed, 48 insertions(+), 49 deletions(-) diff --git a/upb/reflection/field_def.c b/upb/reflection/field_def.c index 3dabea3b94..f7ac728445 100644 --- a/upb/reflection/field_def.c +++ b/upb/reflection/field_def.c @@ -268,6 +268,32 @@ bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) { int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f) { return f->layout_index; } +uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f) { + uint64_t out = f->is_packed_ ? kUpb_FieldModifier_IsPacked : 0; + + switch (f->label_) { + case kUpb_Label_Optional: + if (!upb_FieldDef_HasPresence(f)) { + out |= kUpb_FieldModifier_IsProto3Singular; + } + break; + case kUpb_Label_Repeated: + out |= kUpb_FieldModifier_IsRepeated; + break; + case kUpb_Label_Required: + out |= kUpb_FieldModifier_IsRequired; + break; + } + + if (f->type_ == kUpb_FieldType_Enum) { + const upb_FileDef* file_def = upb_EnumDef_File(upb_FieldDef_EnumSubDef(f)); + if (upb_FileDef_Syntax(file_def) == kUpb_Syntax_Proto2) { + out |= kUpb_FieldModifier_IsClosedEnum; + } + } + return out; +} + bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; } bool upb_FieldDef_HasPresence(const upb_FieldDef* f) { @@ -295,14 +321,14 @@ bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) { return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f); } -bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { - return upb_FieldDef_Label(f) == kUpb_Label_Required; -} - bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) { return upb_FieldDef_Label(f) == kUpb_Label_Repeated; } +bool upb_FieldDef_IsRequired(const upb_FieldDef* f) { + return upb_FieldDef_Label(f) == kUpb_Label_Required; +} + bool upb_FieldDef_IsString(const upb_FieldDef* f) { return upb_FieldDef_CType(f) == kUpb_CType_String || upb_FieldDef_CType(f) == kUpb_CType_Bytes; diff --git a/upb/reflection/field_def.h b/upb/reflection/field_def.h index 6c52149a50..dbfa5e70ec 100644 --- a/upb/reflection/field_def.h +++ b/upb/reflection/field_def.h @@ -84,9 +84,7 @@ const upb_MiniTable_Extension* _upb_FieldDef_ExtensionMiniTable( bool _upb_FieldDef_IsClosedEnum(const upb_FieldDef* f); bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f); int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f); - -void _upb_FieldDef_MakeLayout(upb_DefBuilder* ctx, const upb_MessageDef* m); - +uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f); void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, upb_FieldDef* f); diff --git a/upb/reflection/message_def.c b/upb/reflection/message_def.c index 427b64003a..3e4ce06a63 100644 --- a/upb/reflection/message_def.c +++ b/upb/reflection/message_def.c @@ -458,6 +458,18 @@ void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, } } +uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) { + uint64_t out = 0; + if (upb_FileDef_Syntax(m->file) == kUpb_Syntax_Proto3) { + out |= kUpb_MessageModifier_ValidateUtf8; + out |= kUpb_MessageModifier_DefaultIsPacked; + } + if (m->ext_range_count) { + out |= kUpb_MessageModifier_IsExtendable; + } + return out; +} + static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, const google_protobuf_DescriptorProto* msg_proto, const upb_MessageDef* containing_type, diff --git a/upb/reflection/message_def.h b/upb/reflection/message_def.h index fd79494d24..d9924c92e3 100644 --- a/upb/reflection/message_def.h +++ b/upb/reflection/message_def.h @@ -161,6 +161,7 @@ bool _upb_MessageDef_IsSorted(const upb_MessageDef* m); bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n); void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, const upb_MessageDef* m); +uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m); void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m); // Allocate and initialize an array of |n| message defs. diff --git a/upb/reflection/mini_descriptor_encode.c b/upb/reflection/mini_descriptor_encode.c index a5c507d46f..ce2e7a86a3 100644 --- a/upb/reflection/mini_descriptor_encode.c +++ b/upb/reflection/mini_descriptor_encode.c @@ -82,45 +82,6 @@ static bool upb_DescState_Grow(DescState* d, upb_Arena* a) { /******************************************************************************/ -// Copied from upbc/protoc-gen-upb.cc TODO(salo): can we consolidate? -static uint64_t upb_Field_Modifiers(const upb_FieldDef* f) { - uint64_t out = 0; - if (upb_FieldDef_IsRepeated(f)) { - out |= kUpb_FieldModifier_IsRepeated; - } - if (upb_FieldDef_IsPacked(f)) { - out |= kUpb_FieldModifier_IsPacked; - } - if (upb_FieldDef_Type(f) == kUpb_FieldType_Enum) { - const upb_FileDef* file_def = upb_EnumDef_File(upb_FieldDef_EnumSubDef(f)); - if (upb_FileDef_Syntax(file_def) == kUpb_Syntax_Proto2) { - out |= kUpb_FieldModifier_IsClosedEnum; - } - } - if (upb_FieldDef_IsOptional(f) && !upb_FieldDef_HasPresence(f)) { - out |= kUpb_FieldModifier_IsProto3Singular; - } - if (upb_FieldDef_IsRequired(f)) { - out |= kUpb_FieldModifier_IsRequired; - } - return out; -} - -static uint64_t upb_Message_Modifiers(const upb_MessageDef* m) { - uint64_t out = 0; - const upb_FileDef* file_def = upb_MessageDef_File(m); - if (upb_FileDef_Syntax(file_def) == kUpb_Syntax_Proto3) { - out |= kUpb_MessageModifier_ValidateUtf8; - out |= kUpb_MessageModifier_DefaultIsPacked; - } - if (upb_MessageDef_ExtensionRangeCount(m)) { - out |= kUpb_MessageModifier_IsExtendable; - } - return out; -} - -/******************************************************************************/ - bool upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, upb_Arena* a, upb_StringView* out) { DescState s; @@ -174,7 +135,7 @@ bool upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, upb_Arena* a, const upb_FieldType type = upb_FieldDef_Type(f); const int number = upb_FieldDef_Number(f); - const uint64_t modifiers = upb_Field_Modifiers(f); + const uint64_t modifiers = _upb_FieldDef_Modifiers(f); if (!upb_DescState_Grow(&s, a)) return false; s.ptr = upb_MtDataEncoder_PutField(&s.e, s.ptr, type, number, modifiers); @@ -203,14 +164,15 @@ bool upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, upb_Arena* a, } if (!upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_StartMessage(&s.e, s.ptr, upb_Message_Modifiers(m)); + s.ptr = + upb_MtDataEncoder_StartMessage(&s.e, s.ptr, _upb_MessageDef_Modifiers(m)); const int field_count = upb_MessageDef_FieldCount(m); for (int i = 0; i < field_count; i++) { const upb_FieldDef* f = sorted ? sorted[i] : upb_MessageDef_Field(m, i); const upb_FieldType type = upb_FieldDef_Type(f); const int number = upb_FieldDef_Number(f); - const uint64_t modifiers = upb_Field_Modifiers(f); + const uint64_t modifiers = _upb_FieldDef_Modifiers(f); if (!upb_DescState_Grow(&s, a)) return false; s.ptr = upb_MtDataEncoder_PutField(&s.e, s.ptr, type, number, modifiers); From d5bd55cde184f45f58c8654c961fe808cdc37e62 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 29 Sep 2022 11:05:54 -0700 Subject: [PATCH 33/35] Treat unlinked sub-messages in the MiniTable as unknown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an observable behavior change in the decoder. After submitting this CL, clients of the decoder can assume that any unlinked sub-messages will be treated as unknown, rather than crashing. Unlinked sub-messages must never have values present in the message. We can verify this with asserts. Since the values are never set, the encoder should never encounter data for any unlinked sub-message. ``` name old cpu/op new cpu/op delta BM_ArenaOneAlloc 18.3ns ± 9% 17.9ns ± 2% ~ (p=0.690 n=5+5) BM_ArenaInitialBlockOneAlloc 6.40ns ± 1% 6.68ns ±10% ~ (p=0.730 n=4+5) BM_LoadAdsDescriptor_Upb 5.09ms ± 2% 5.03ms ± 3% ~ (p=0.222 n=5+5) BM_LoadAdsDescriptor_Upb 5.45ms ± 3% 5.43ms ± 1% ~ (p=0.905 n=5+4) BM_LoadAdsDescriptor_Proto2 10.9ms ± 1% 10.8ms ± 1% -1.09% (p=0.016 n=5+4) BM_LoadAdsDescriptor_Proto2 11.3ms ± 9% 11.1ms ± 3% ~ (p=0.841 n=5+5) BM_Parse_Upb_FileDesc 11.2µs ± 3% 11.3µs ± 3% ~ (p=0.222 n=5+5) BM_Parse_Upb_FileDesc 10.3µs ± 5% 10.5µs ± 5% ~ (p=0.310 n=5+5) BM_Parse_Upb_FileDesc 11.4µs ±18% 11.0µs ± 2% ~ (p=1.000 n=5+5) BM_Parse_Upb_FileDesc 10.5µs ±17% 10.6µs ±19% ~ (p=0.421 n=5+5) BM_Parse_Proto2 20.5µs ± 2% 20.2µs ± 2% ~ (p=0.222 n=5+5) BM_Parse_Proto2 10.8µs ± 2% 10.9µs ± 4% ~ (p=0.841 n=5+5) BM_Parse_Proto2 10.5µs ± 3% 10.6µs ± 3% ~ (p=0.690 n=5+5) BM_Parse_Proto2 9.22µs ± 2% 9.23µs ± 3% ~ (p=1.000 n=5+5) BM_SerializeDescriptor_Proto2 6.05µs ± 3% 5.90µs ± 3% ~ (p=0.222 n=5+5) BM_SerializeDescriptor_Upb 10.2µs ± 3% 10.6µs ±14% ~ (p=0.841 n=5+5) name old time/op new time/op delta BM_ArenaOneAlloc 18.3ns ± 9% 17.9ns ± 2% ~ (p=0.841 n=5+5) BM_ArenaInitialBlockOneAlloc 6.42ns ± 1% 6.69ns ±10% ~ (p=0.730 n=4+5) BM_LoadAdsDescriptor_Upb 5.10ms ± 2% 5.05ms ± 3% ~ (p=0.222 n=5+5) BM_LoadAdsDescriptor_Upb 5.47ms ± 3% 5.45ms ± 1% ~ (p=0.905 n=5+4) BM_LoadAdsDescriptor_Proto2 10.9ms ± 1% 10.8ms ± 1% -1.11% (p=0.016 n=5+4) BM_LoadAdsDescriptor_Proto2 11.4ms ± 9% 11.1ms ± 3% ~ (p=0.841 n=5+5) BM_Parse_Upb_FileDesc 11.2µs ± 3% 11.3µs ± 3% ~ (p=0.222 n=5+5) BM_Parse_Upb_FileDesc 10.3µs ± 5% 10.5µs ± 5% ~ (p=0.151 n=5+5) BM_Parse_Upb_FileDesc 11.5µs ±18% 11.0µs ± 2% ~ (p=1.000 n=5+5) BM_Parse_Upb_FileDesc 10.5µs ±17% 10.7µs ±19% ~ (p=0.421 n=5+5) BM_Parse_Proto2 20.6µs ± 2% 20.3µs ± 2% ~ (p=0.222 n=5+5) BM_Parse_Proto2 10.9µs ± 2% 10.9µs ± 4% ~ (p=0.841 n=5+5) BM_Parse_Proto2 10.6µs ± 3% 10.6µs ± 3% ~ (p=0.690 n=5+5) BM_Parse_Proto2 9.24µs ± 2% 9.25µs ± 3% ~ (p=1.000 n=5+5) BM_SerializeDescriptor_Proto2 6.07µs ± 3% 5.91µs ± 3% ~ (p=0.222 n=5+5) BM_SerializeDescriptor_Upb 10.3µs ± 3% 10.6µs ±14% ~ (p=0.841 n=5+5) name old INSTRUCTIONS/op new INSTRUCTIONS/op delta BM_ArenaOneAlloc 201 ± 0% 201 ± 0% ~ (p=0.841 n=5+5) BM_ArenaInitialBlockOneAlloc 69.0 ± 0% 69.0 ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb 33.9M ± 0% 34.1M ± 0% +0.66% (p=0.008 n=5+5) BM_LoadAdsDescriptor_Upb 35.6M ± 0% 35.8M ± 0% +0.64% (p=0.008 n=5+5) BM_LoadAdsDescriptor_Proto2 70.8M ± 0% 70.8M ± 0% ~ (p=0.548 n=5+5) BM_LoadAdsDescriptor_Proto2 71.6M ± 0% 71.6M ± 0% ~ (p=0.151 n=5+5) BM_Parse_Upb_FileDesc 137k ± 0% 141k ± 0% +2.87% (p=0.008 n=5+5) BM_Parse_Upb_FileDesc 125k ± 0% 128k ± 0% +2.83% (p=0.008 n=5+5) BM_Parse_Upb_FileDesc 135k ± 0% 139k ± 0% +2.89% (p=0.008 n=5+5) BM_Parse_Upb_FileDesc 124k ± 0% 127k ± 0% +2.85% (p=0.016 n=5+4) BM_Parse_Proto2 201k ± 0% 201k ± 0% ~ (p=0.222 n=5+5) BM_Parse_Proto2 107k ± 0% 107k ± 0% ~ (p=1.000 n=5+5) BM_Parse_Proto2 105k ± 0% 105k ± 0% ~ (p=0.286 n=5+4) BM_Parse_Proto2 86.5k ± 0% 86.5k ± 0% ~ (p=0.222 n=5+5) BM_SerializeDescriptor_Proto2 60.3k ± 0% 60.3k ± 0% ~ (p=0.071 n=5+5) BM_SerializeDescriptor_Upb 111k ± 0% 111k ± 0% ~ (p=0.841 n=5+5) name old CYCLES/op new CYCLES/op delta BM_ArenaOneAlloc 60.0 ± 7% 58.8 ± 0% -2.15% (p=0.016 n=5+5) BM_ArenaInitialBlockOneAlloc 21.0 ± 0% 21.0 ± 0% ~ (p=1.000 n=5+5) BM_LoadAdsDescriptor_Upb 16.9M ± 0% 16.9M ± 0% ~ (p=0.056 n=5+5) BM_LoadAdsDescriptor_Upb 17.9M ± 1% 18.0M ± 1% ~ (p=0.095 n=5+5) BM_LoadAdsDescriptor_Proto2 35.9M ± 1% 35.8M ± 1% ~ (p=0.421 n=5+5) BM_LoadAdsDescriptor_Proto2 36.5M ± 0% 36.5M ± 0% ~ (p=0.841 n=5+5) BM_Parse_Upb_FileDesc 37.2k ± 0% 37.3k ± 0% ~ (p=0.222 n=5+5) BM_Parse_Upb_FileDesc 34.1k ± 0% 34.7k ± 0% +1.66% (p=0.008 n=5+5) BM_Parse_Upb_FileDesc 36.4k ± 0% 36.7k ± 0% +0.83% (p=0.008 n=5+5) BM_Parse_Upb_FileDesc 33.3k ± 1% 34.1k ± 1% +2.39% (p=0.008 n=5+5) BM_Parse_Proto2 68.1k ± 1% 68.0k ± 1% ~ (p=0.421 n=5+5) BM_Parse_Proto2 36.0k ± 1% 36.1k ± 1% ~ (p=0.841 n=5+5) BM_Parse_Proto2 35.3k ± 1% 35.5k ± 1% ~ (p=0.151 n=5+5) BM_Parse_Proto2 30.7k ± 0% 30.9k ± 1% ~ (p=0.151 n=5+5) BM_SerializeDescriptor_Proto2 20.3k ± 2% 19.7k ± 3% ~ (p=0.151 n=5+5) BM_SerializeDescriptor_Upb 33.6k ± 0% 33.7k ± 2% ~ (p=1.000 n=5+5) name old allocs/op new allocs/op delta BM_ArenaOneAlloc 1.19 ± 0% 1.19 ± 0% ~ (all samples are equal) BM_ArenaInitialBlockOneAlloc 0.19 ± 0% 0.19 ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb 6.00k ± 0% 6.00k ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb 5.99k ± 0% 5.99k ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2 77.8k ± 0% 77.8k ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2 79.0k ± 0% 79.0k ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc 7.19 ± 0% 7.19 ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc 7.19 ± 0% 7.19 ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc 0.19 ± 0% 0.19 ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc 0.19 ± 0% 0.19 ± 0% ~ (all samples are equal) BM_Parse_Proto2 765 ± 0% 765 ± 0% ~ (all samples are equal) BM_Parse_Proto2 10.2 ± 0% 10.2 ± 0% ~ (all samples are equal) BM_Parse_Proto2 1.19 ± 0% 1.19 ± 0% ~ (all samples are equal) BM_Parse_Proto2 1.19 ± 0% 1.19 ± 0% ~ (all samples are equal) BM_SerializeDescriptor_Proto2 0.19 ± 0% 0.19 ± 0% ~ (all samples are equal) BM_SerializeDescriptor_Upb 0.19 ± 0% 0.19 ± 0% ~ (all samples are equal) name old peak-mem(Bytes)/op new peak-mem(Bytes)/op delta BM_ArenaOneAlloc 344 ± 0% 344 ± 0% ~ (all samples are equal) BM_ArenaInitialBlockOneAlloc 112 ± 0% 112 ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb 9.64M ± 0% 9.64M ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb 9.70M ± 0% 9.70M ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2 6.38M ± 0% 6.38M ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2 6.44M ± 0% 6.44M ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc 36.5k ± 0% 36.5k ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc 36.5k ± 0% 36.5k ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc 112 ± 0% 112 ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc 112 ± 0% 112 ± 0% ~ (all samples are equal) BM_Parse_Proto2 35.8k ± 0% 35.8k ± 0% ~ (all samples are equal) BM_Parse_Proto2 40.8k ± 0% 40.8k ± 0% ~ (all samples are equal) BM_Parse_Proto2 112 ± 0% 112 ± 0% ~ (all samples are equal) BM_Parse_Proto2 112 ± 0% 112 ± 0% ~ (all samples are equal) BM_SerializeDescriptor_Proto2 112 ± 0% 112 ± 0% ~ (all samples are equal) BM_SerializeDescriptor_Upb 112 ± 0% 112 ± 0% ~ (all samples are equal) name old speed new speed delta BM_LoadAdsDescriptor_Upb 147MB/s ± 2% 148MB/s ± 3% ~ (p=0.222 n=5+5) BM_LoadAdsDescriptor_Upb 137MB/s ± 3% 137MB/s ± 1% ~ (p=0.905 n=5+4) BM_LoadAdsDescriptor_Proto2 68.6MB/s ± 1% 69.3MB/s ± 1% +1.10% (p=0.016 n=5+4) BM_LoadAdsDescriptor_Proto2 66.0MB/s ± 9% 67.4MB/s ± 3% ~ (p=0.841 n=5+5) BM_Parse_Upb_FileDesc 675MB/s ± 3% 667MB/s ± 3% ~ (p=0.222 n=5+5) BM_Parse_Upb_FileDesc 730MB/s ± 5% 718MB/s ± 5% ~ (p=0.310 n=5+5) BM_Parse_Upb_FileDesc 663MB/s ±16% 685MB/s ± 2% ~ (p=1.000 n=5+5) BM_Parse_Upb_FileDesc 723MB/s ±15% 712MB/s ±16% ~ (p=0.421 n=5+5) BM_Parse_Proto2 367MB/s ± 2% 372MB/s ± 2% ~ (p=0.222 n=5+5) BM_Parse_Proto2 694MB/s ± 2% 691MB/s ± 4% ~ (p=0.841 n=5+5) BM_Parse_Proto2 714MB/s ± 3% 709MB/s ± 3% ~ (p=0.690 n=5+5) BM_Parse_Proto2 816MB/s ± 2% 816MB/s ± 3% ~ (p=1.000 n=5+5) BM_SerializeDescriptor_Proto2 1.24GB/s ± 3% 1.28GB/s ± 3% ~ (p=0.222 n=5+5) BM_SerializeDescriptor_Upb 734MB/s ± 3% 713MB/s ±13% ~ (p=0.841 n=5+5) ``` PiperOrigin-RevId: 477770562 --- upb/decode.c | 25 +++++++++++++++++++------ upb/mini_table.h | 10 ++++++++++ upb/mini_table_accessors.h | 8 ++++++-- upb/mini_table_accessors_test.cc | 4 +++- upb/reflection/message.c | 8 ++++++++ 5 files changed, 46 insertions(+), 9 deletions(-) diff --git a/upb/decode.c b/upb/decode.c index c103ade4db..7892127445 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -940,7 +940,8 @@ int _upb_Decoder_GetVarintOp(const upb_MiniTable_Field* field) { return kVarintOps[field->descriptortype]; } -int _upb_Decoder_GetDelimitedOp(const upb_MiniTable_Field* field) { +int _upb_Decoder_GetDelimitedOp(const upb_MiniTable* mt, + const upb_MiniTable_Field* field) { enum { kRepeatedBase = 19 }; static const int8_t kDelimitedOps[] = { @@ -991,13 +992,24 @@ int _upb_Decoder_GetDelimitedOp(const upb_MiniTable_Field* field) { int ndx = field->descriptortype; if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase; - return kDelimitedOps[ndx]; + int op = kDelimitedOps[ndx]; + + // If sub-message is not linked, treat as unknown. + if (op == kUpb_DecodeOp_SubMessage && + !(field->mode & kUpb_LabelFlags_IsExtension)) { + const upb_MiniTable_Sub* sub = &mt->subs[field->submsg_index]; + if (!sub->submsg) { + op = kUpb_DecodeOp_UnknownField; + } + } + + return op; } UPB_FORCEINLINE static const char* _upb_Decoder_DecodeWireValue( - upb_Decoder* d, const char* ptr, const upb_MiniTable_Field* field, - int wire_type, wireval* val, int* op) { + upb_Decoder* d, const char* ptr, const upb_MiniTable* mt, + const upb_MiniTable_Field* field, int wire_type, wireval* val, int* op) { static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) | (1 << kUpb_FieldType_Fixed32) | (1 << kUpb_FieldType_SFixed32); @@ -1030,7 +1042,7 @@ static const char* _upb_Decoder_DecodeWireValue( return ptr + 8; case kUpb_WireType_Delimited: ptr = upb_Decoder_DecodeSize(d, ptr, &val->size); - *op = _upb_Decoder_GetDelimitedOp(field); + *op = _upb_Decoder_GetDelimitedOp(mt, field); return ptr; case kUpb_WireType_StartGroup: val->uint32_val = field->number; @@ -1189,7 +1201,8 @@ static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr, } field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index); - ptr = _upb_Decoder_DecodeWireValue(d, ptr, field, wire_type, &val, &op); + ptr = _upb_Decoder_DecodeWireValue(d, ptr, layout, field, wire_type, &val, + &op); if (op >= 0) { ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val); diff --git a/upb/mini_table.h b/upb/mini_table.h index 0e2599516c..59f2a71554 100644 --- a/upb/mini_table.h +++ b/upb/mini_table.h @@ -135,9 +135,19 @@ typedef enum { upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len, upb_MiniTablePlatform platform, upb_Arena* arena, upb_Status* status); + +// Links a sub-message field to a MiniTable for that sub-message. If a +// sub-message field is not linked, it will be treated as an unknown field +// during parsing, and setting the field will not be allowed. It is possible +// to link the message field later, at which point it will no longer be treated +// as unknown. However there is no synchronization for this operation, which +// means parallel mutation requires external synchronization. void upb_MiniTable_SetSubMessage(upb_MiniTable* table, upb_MiniTable_Field* field, const upb_MiniTable* sub); + +// Links an enum field to a MiniTable for that enum. All enum fields must +// be linked prior to parsing. void upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTable_Field* field, const upb_MiniTable_Enum* sub); diff --git a/upb/mini_table_accessors.h b/upb/mini_table_accessors.h index a0abdc83b9..0c3ec7cf3b 100644 --- a/upb/mini_table_accessors.h +++ b/upb/mini_table_accessors.h @@ -192,10 +192,12 @@ UPB_INLINE const upb_Message* upb_MiniTable_GetMessage( } UPB_INLINE void upb_MiniTable_SetMessage(upb_Message* msg, + const upb_MiniTable* mini_table, const upb_MiniTable_Field* field, upb_Message* sub_message) { UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message || field->descriptortype == kUpb_FieldType_Group); + UPB_ASSERT(mini_table->subs[field->submsg_index].submsg); _upb_MiniTable_SetPresence(msg, field); *UPB_PTR_AT(msg, field->offset, const upb_Message*) = sub_message; } @@ -207,8 +209,10 @@ UPB_INLINE upb_Message* upb_MiniTable_GetMutableMessage( field->descriptortype == kUpb_FieldType_Group); upb_Message* sub_message = *UPB_PTR_AT(msg, field->offset, upb_Message*); if (!sub_message) { - sub_message = - _upb_Message_New(mini_table->subs[field->submsg_index].submsg, arena); + const upb_MiniTable* sub_mini_table = + mini_table->subs[field->submsg_index].submsg; + UPB_ASSERT(sub_mini_table); + sub_message = _upb_Message_New(sub_mini_table, arena); *UPB_PTR_AT(msg, field->offset, upb_Message*) = sub_message; _upb_MiniTable_SetPresence(msg, field); } diff --git a/upb/mini_table_accessors_test.cc b/upb/mini_table_accessors_test.cc index 3b57c7a8fa..77bfb4417c 100644 --- a/upb/mini_table_accessors_test.cc +++ b/upb/mini_table_accessors_test.cc @@ -289,7 +289,9 @@ TEST(GeneratedCode, SubMessage) { upb_Message* new_nested_message = protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_new(arena); upb_MiniTable_SetInt32(new_nested_message, nested_message_a_field, 123); - upb_MiniTable_SetMessage(msg, optional_message_field, new_nested_message); + upb_MiniTable_SetMessage( + msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init, + optional_message_field, new_nested_message); upb_Message* mutable_message = upb_MiniTable_GetMutableMessage( msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init, diff --git a/upb/reflection/message.c b/upb/reflection/message.c index 6cd92d08d2..94eb51660a 100644 --- a/upb/reflection/message.c +++ b/upb/reflection/message.c @@ -183,6 +183,14 @@ bool upb_Message_Set(upb_Message* msg, const upb_FieldDef* f, memcpy(&ext->data, &val, sizeof(val)); } else { const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f); + + // Building reflection should always cause all sub-message types to be + // linked, but double-check here just for extra assurance. + UPB_ASSERT(!upb_FieldDef_IsSubMessage(f) || + upb_MessageDef_MiniTable(upb_FieldDef_ContainingType(f)) + ->subs[field->submsg_index] + .submsg); + char* mem = UPB_PTR_AT(msg, field->offset, char); memcpy(mem, &val, get_field_size(field)); if (field->presence > 0) { From 71895396100cdffab8692431a31036a55b02077b Mon Sep 17 00:00:00 2001 From: Adam Cozzette Date: Thu, 29 Sep 2022 20:46:08 +0000 Subject: [PATCH 34/35] Rename generated_file_staleness_test() to just staleness_test() This renaming is something we have been planning on doing, and I would like to do it now because I'm getting ready to rely on this staleness_test() macro from the main protobuf repo. --- cmake/BUILD.bazel | 4 ++-- cmake/build_defs.bzl | 2 +- cmake/make_cmakelists.py | 2 +- cmake/staleness_test.py | 2 +- cmake/staleness_test_lib.py | 5 ++--- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/cmake/BUILD.bazel b/cmake/BUILD.bazel index 10572f052c..2be41e5f00 100644 --- a/cmake/BUILD.bazel +++ b/cmake/BUILD.bazel @@ -25,7 +25,7 @@ load( ":build_defs.bzl", - "generated_file_staleness_test", + "staleness_test", ) load( "//bazel:build_defs.bzl", @@ -68,7 +68,7 @@ genrule( cmd = "cp $(SRCS) $(@D)/generated-in/google/protobuf", ) -generated_file_staleness_test( +staleness_test( name = "test_generated_files", outs = [ "CMakeLists.txt", diff --git a/cmake/build_defs.bzl b/cmake/build_defs.bzl index 9e30af952a..82c37b7efc 100644 --- a/cmake/build_defs.bzl +++ b/cmake/build_defs.bzl @@ -25,7 +25,7 @@ """Bazel support functions related to CMake support.""" -def generated_file_staleness_test(name, outs, generated_pattern, **kwargs): +def staleness_test(name, outs, generated_pattern, **kwargs): """Tests that checked-in file(s) match the contents of generated file(s). The resulting test will verify that all output files exist and have the diff --git a/cmake/make_cmakelists.py b/cmake/make_cmakelists.py index 496fa2f4fc..9aff9e4520 100755 --- a/cmake/make_cmakelists.py +++ b/cmake/make_cmakelists.py @@ -152,7 +152,7 @@ class BuildFileFunctions(object): def cc_proto_library(self, **kwargs): pass - def generated_file_staleness_test(self, **kwargs): + def staleness_test(self, **kwargs): pass def upb_amalgamation(self, **kwargs): diff --git a/cmake/staleness_test.py b/cmake/staleness_test.py index 58f1833c90..a21501f7f6 100644 --- a/cmake/staleness_test.py +++ b/cmake/staleness_test.py @@ -25,7 +25,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -"""The py_test() script for generated_file_staleness_test() rules. +"""The py_test() script for staleness_test() rules. Note that this file is preprocessed! The INSERT_<...> text below is replaced with the actual list of files before we actually run the script. diff --git a/cmake/staleness_test_lib.py b/cmake/staleness_test_lib.py index 0748969c34..171d2be0fc 100644 --- a/cmake/staleness_test_lib.py +++ b/cmake/staleness_test_lib.py @@ -25,10 +25,9 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -"""Shared code for validating generated_file_staleness_test() rules. +"""Shared code for validating staleness_test() rules. -This code is used by test scripts generated from -generated_file_staleness_test() rules. +This code is used by test scripts generated from staleness_test() rules. """ from __future__ import absolute_import From 4a9e1ba66216f0f143275ff1df88c13abb16cc2e Mon Sep 17 00:00:00 2001 From: Adam Cozzette Date: Thu, 29 Sep 2022 22:14:05 +0000 Subject: [PATCH 35/35] Make staleness_test() macro usable in other Bazel workspaces This commit makes a couple changes to allow staleness_test() to be used from outside the upb repo: - Fully qualify references to upb targets and wrap them in a Label() constructor. See here for details: https://bazel.build/extending/macros#label-resolution - Make the :staleness_test_lib target public. --- cmake/BUILD.bazel | 3 +++ cmake/build_defs.bzl | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/cmake/BUILD.bazel b/cmake/BUILD.bazel index 2be41e5f00..a4d4ff88ea 100644 --- a/cmake/BUILD.bazel +++ b/cmake/BUILD.bazel @@ -40,6 +40,9 @@ py_library( name = "staleness_test_lib", testonly = 1, srcs = ["staleness_test_lib.py"], + # This is public only for use by the staleness_test() macro. Please do not + # depend on this target directly. + visibility = ["//visibility:public"], ) py_binary( diff --git a/cmake/build_defs.bzl b/cmake/build_defs.bzl index 82c37b7efc..eddeaf0d32 100644 --- a/cmake/build_defs.bzl +++ b/cmake/build_defs.bzl @@ -42,7 +42,7 @@ def staleness_test(name, outs, generated_pattern, **kwargs): """ script_name = name + ".py" - script_src = ":staleness_test.py" + script_src = Label("//cmake:staleness_test.py") # Filter out non-existing rules so Blaze doesn't error out before we even # run the test. @@ -57,7 +57,7 @@ def staleness_test(name, outs, generated_pattern, **kwargs): outs = [script_name], srcs = [script_src], testonly = 1, - cmd = "cat $(location " + script_src + ") > $@; " + + cmd = "cp $< $@; " + "sed -i.bak -e 's|INSERT_FILE_LIST_HERE|" + "\\\n ".join(file_list) + "|' $@", ) @@ -67,7 +67,7 @@ def staleness_test(name, outs, generated_pattern, **kwargs): data = existing_outs + [generated_pattern % file for file in outs], python_version = "PY3", deps = [ - ":staleness_test_lib", + Label("//cmake:staleness_test_lib"), ], **kwargs )