diff --git a/python/descriptor.c b/python/descriptor.c index e45e6897c3..51aa8c62e1 100644 --- a/python/descriptor.c +++ b/python/descriptor.c @@ -514,6 +514,16 @@ static PyObject* PyUpb_Descriptor_GetEnumValuesByName(PyObject* _self, const upb_enumdef* e = upb_msgdef_nestedenum(self->def, i); int value_count = upb_enumdef_valuecount(e); for (int j = 0; j < value_count; j++) { + // Collisions should be impossible here, as uniqueness is checked by + // protoc (this is an invariant of the protobuf language). However this + // uniqueness constraint is not currently checked by upb/def.c at load + // time, so if the user supplies a manually-constructed descriptor that + // does not respect this constraint, a collision could be possible and the + // last-defined enumerator would win. This could be seen as an argument + // for having upb actually build the table at load time, thus checking the + // constraint proactively, but upb is always checking a subset of the full + // validation performed by C++, and we have to pick and choose the biggest + // bang for the buck. const upb_enumvaldef* ev = upb_enumdef_value(e, j); const char* name = upb_enumvaldef_name(ev); PyObject* val = PyLong_FromLong(upb_enumvaldef_number(ev)); diff --git a/python/descriptor_pool.c b/python/descriptor_pool.c index 1b3c7d46d9..95e40bc05b 100644 --- a/python/descriptor_pool.c +++ b/python/descriptor_pool.c @@ -159,13 +159,26 @@ static bool PyUpb_DescriptorPool_TryLoadFilename(PyUpb_DescriptorPool* self, } bool PyUpb_DescriptorPool_CheckNoDatabase(PyObject* _self) { - PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self; - if (self->db) { - PyErr_SetString( - PyExc_ValueError, - "Cannot call Add on a DescriptorPool that uses a DescriptorDatabase. " - "Add your file to the underlying database."); - return false; + return true; +} + +static bool PyUpb_DescriptorPool_LoadDependentFiles( + PyUpb_DescriptorPool* self, google_protobuf_FileDescriptorProto* proto) { + // Load dependent files if necessary. + size_t n; + const upb_strview* deps = + google_protobuf_FileDescriptorProto_dependency(proto, &n); + for (size_t i = 0; i < n; i++) { + const upb_filedef* dep = + upb_symtab_lookupfile2(self->symtab, deps[i].data, deps[i].size); + if (!dep) { + PyObject* filename = + PyUnicode_FromStringAndSize(deps[i].data, deps[i].size); + if (!filename) return false; + bool ok = PyUpb_DescriptorPool_TryLoadFilename(self, filename); + Py_DECREF(filename); + if (!ok) return false; + } } return true; } @@ -173,12 +186,12 @@ bool PyUpb_DescriptorPool_CheckNoDatabase(PyObject* _self) { static PyObject* PyUpb_DescriptorPool_DoAddSerializedFile( PyObject* _self, PyObject* serialized_pb) { PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self; - char* buf; - Py_ssize_t size; upb_arena* arena = upb_arena_new(); if (!arena) PYUPB_RETURN_OOM; PyObject* result = NULL; + char* buf; + Py_ssize_t size; if (PyBytes_AsStringAndSize(serialized_pb, &buf, &size) < 0) { goto done; } @@ -212,21 +225,7 @@ static PyObject* PyUpb_DescriptorPool_DoAddSerializedFile( } if (self->db) { - // Load dependent files if necessary. - size_t n; - const upb_strview* deps = - google_protobuf_FileDescriptorProto_dependency(proto, &n); - for (size_t i = 0; i < n; i++) { - const upb_filedef* dep = - upb_symtab_lookupfile2(self->symtab, deps[i].data, deps[i].size); - if (!dep) { - PyObject* filename = - PyUnicode_FromStringAndSize(deps[i].data, deps[i].size); - if (!filename) goto done; - if (!PyUpb_DescriptorPool_TryLoadFilename(self, filename)) goto done; - Py_DECREF(filename); - } - } + if (!PyUpb_DescriptorPool_LoadDependentFiles(self, proto)) goto done; } upb_status status; @@ -274,15 +273,29 @@ static PyObject* PyUpb_DescriptorPool_DoAdd(PyObject* _self, * Adds the given serialized FileDescriptorProto to the pool. */ static PyObject* PyUpb_DescriptorPool_AddSerializedFile( - PyObject * self, PyObject * serialized_pb) { - if (!PyUpb_DescriptorPool_CheckNoDatabase(self)) return NULL; - return PyUpb_DescriptorPool_DoAddSerializedFile(self, serialized_pb); + PyObject * _self, PyObject * serialized_pb) { + PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self; + if (self->db) { + PyErr_SetString( + PyExc_ValueError, + "Cannot call AddSerializedFile on a DescriptorPool that uses a " + "DescriptorDatabase. Add your file to the underlying database."); + return false; + } + return PyUpb_DescriptorPool_DoAddSerializedFile(_self, serialized_pb); } -static PyObject* PyUpb_DescriptorPool_Add(PyObject* self, +static PyObject* PyUpb_DescriptorPool_Add(PyObject* _self, PyObject* file_desc) { - if (!PyUpb_DescriptorPool_CheckNoDatabase(self)) return NULL; - return PyUpb_DescriptorPool_DoAdd(self, file_desc); + PyUpb_DescriptorPool* self = (PyUpb_DescriptorPool*)_self; + if (self->db) { + PyErr_SetString( + PyExc_ValueError, + "Cannot call Add on a DescriptorPool that uses a DescriptorDatabase. " + "Add your file to the underlying database."); + return false; + } + return PyUpb_DescriptorPool_DoAdd(_self, file_desc); } /* diff --git a/upb/def.c b/upb/def.c index 57c4cc71a2..494a897a68 100644 --- a/upb/def.c +++ b/upb/def.c @@ -140,7 +140,7 @@ struct upb_enumdef { struct upb_enumvaldef { const google_protobuf_EnumValueOptions *opts; - const upb_enumdef *enum_; + const upb_enumdef *parent; const char *full_name; int32_t number; }; @@ -453,7 +453,7 @@ bool upb_enumvaldef_hasoptions(const upb_enumvaldef *e) { } const upb_enumdef *upb_enumvaldef_enum(const upb_enumvaldef *ev) { - return ev->enum_; + return ev->parent; } const char *upb_enumvaldef_fullname(const upb_enumvaldef *ev) { @@ -469,7 +469,8 @@ int32_t upb_enumvaldef_number(const upb_enumvaldef *ev) { } uint32_t upb_enumvaldef_index(const upb_enumvaldef *ev) { - return ev - ev->enum_->values; + // Compute index in our parent's array. + return ev - ev->parent->values; } /* upb_extrange ***************************************************************/ @@ -1030,6 +1031,7 @@ int upb_oneofdef_numfields(const upb_oneofdef *o) { } uint32_t upb_oneofdef_index(const upb_oneofdef *o) { + // Compute index in our parent's array. return o - o->parent->oneofs; } @@ -2057,10 +2059,9 @@ static str_t *newstr(symtab_addctx *ctx, const char *data, size_t len) { } static bool upb_symtab_TryGetChar(const char** src, const char* end, char* ch) { - const char* ptr = *src + 1; - if (ptr == end) return false; - *ch = *ptr; - *src = ptr; + if (*src == end) return false; + *ch = **src; + *src += 1; return true; } @@ -2068,11 +2069,11 @@ static char upb_symtab_TryGetHexDigit(symtab_addctx* ctx, const upb_fielddef* f, const char** src, const char* end) { char ch; if (!upb_symtab_TryGetChar(src, end, &ch)) return -1; - if (ch >= '0' && ch <= '9') { + if ('0' <= ch && ch <= '9') { return ch - '0'; } ch = upb_ascii_lower(ch); - if (ch >= 'a' && ch <= 'f') { + if ('a' <= ch && ch <= 'f') { return ch - 'a' + 0xa; } *src -= 1; // Char wasn't actually a hex digit. @@ -2103,23 +2104,22 @@ static char upb_symtab_ParseHexEscape(symtab_addctx* ctx, const upb_fielddef* f, char upb_symtab_TryGetOctalDigit(const char** src, const char* end) { char ch; if (!upb_symtab_TryGetChar(src, end, &ch)) return -1; - if (ch >= '0' && ch <= '7') { + if ('0' <= ch && ch <= '7') { return ch - '0'; } - *src -= 1; // Char wasn't actually a octal digit. + *src -= 1; // Char wasn't actually an octal digit. return -1; } static char upb_symtab_ParseOctalEscape(symtab_addctx* ctx, - const upb_fielddef* f, char first, + const upb_fielddef* f, const char** src, const char* end) { - char ch = first - '0'; - char digit; - if ((digit = upb_symtab_TryGetOctalDigit(src, end)) >= 0) { - ch = (ch << 3) | digit; - } - if ((digit = upb_symtab_TryGetOctalDigit(src, end)) >= 0) { - ch = (ch << 3) | digit; + char ch = 0; + for (int i = 0; i < 3; i++) { + char digit; + if ((digit = upb_symtab_TryGetOctalDigit(src, end)) >= 0) { + ch = (ch << 3) | digit; + } } return ch; } @@ -2166,7 +2166,8 @@ static char upb_symtab_ParseEscape(symtab_addctx* ctx, const upb_fielddef* f, case '5': case '6': case '7': - return upb_symtab_ParseOctalEscape(ctx, f, ch, src, end); + *src -= 1; + return upb_symtab_ParseOctalEscape(ctx, f, src, end); } symtab_errf(ctx, "Unknown escape sequence: \\%c", ch); } @@ -2179,11 +2180,12 @@ static str_t* unescape(symtab_addctx* ctx, const upb_fielddef* f, const char* src = data; const char* end = data + len; - for (; src < end; dst++, src++) { + while (src < end) { if (*src == '\\') { - *dst = upb_symtab_ParseEscape(ctx, f, &src, end); + src++; + *dst++ = upb_symtab_ParseEscape(ctx, f, &src, end); } else { - *dst = *src; + *dst++ = *src++; } } @@ -2640,7 +2642,7 @@ static void create_enumvaldef( upb_strview name = google_protobuf_EnumValueDescriptorProto_name(val_proto); upb_value v = upb_value_constptr(val); - val->enum_ = e; /* Must happen prior to symtab_add(). */ + val->parent = e; /* Must happen prior to symtab_add(). */ val->full_name = makefullname(ctx, prefix, name); val->number = google_protobuf_EnumValueDescriptorProto_number(val_proto); symtab_add(ctx, val->full_name, pack_def(val, UPB_DEFTYPE_ENUMVAL)); @@ -2713,6 +2715,10 @@ static void create_enumdef( } } +static void msgdef_create_nested( + symtab_addctx* ctx, const google_protobuf_DescriptorProto* msg_proto, + upb_msgdef* m); + static void create_msgdef(symtab_addctx *ctx, const char *prefix, const google_protobuf_DescriptorProto *msg_proto, const upb_msgdef *containing_type, @@ -2720,10 +2726,8 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, upb_msgdef *m = (upb_msgdef*)_m; const google_protobuf_OneofDescriptorProto *const *oneofs; const google_protobuf_FieldDescriptorProto *const *fields; - const google_protobuf_EnumDescriptorProto *const *enums; - const google_protobuf_DescriptorProto *const *msgs; const google_protobuf_DescriptorProto_ExtensionRange *const *ext_ranges; - size_t i, n_oneof, n_field, n_ext_range, n; + size_t i, n_oneof, n_field, n_ext_range; upb_strview name; m->file = ctx->file; /* Must happen prior to symtab_add(). */ @@ -2765,7 +2769,8 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, m->field_count = n_field; m->fields = symtab_alloc(ctx, sizeof(*m->fields) * n_field); for (i = 0; i < n_field; i++) { - create_fielddef(ctx, m->full_name, m, fields[i], &m->fields[i], false); + create_fielddef(ctx, m->full_name, m, fields[i], &m->fields[i], + /* is_extension= */ false); } m->ext_range_count = n_ext_range; @@ -2797,29 +2802,38 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, finalize_oneofs(ctx, m); assign_msg_wellknowntype(m); upb_inttable_compact(&m->itof, ctx->arena); + msgdef_create_nested(ctx, msg_proto, m); +} - /* This message is built. Now build nested entities. */ +static void msgdef_create_nested( + symtab_addctx* ctx, const google_protobuf_DescriptorProto* msg_proto, + upb_msgdef* m) { + size_t n; - enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n); + const google_protobuf_EnumDescriptorProto* const* enums = + google_protobuf_DescriptorProto_enum_type(msg_proto, &n); m->nested_enum_count = n; m->nested_enums = symtab_alloc(ctx, sizeof(*m->nested_enums) * n); - for (i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { m->nested_enum_count = i + 1; create_enumdef(ctx, m->full_name, enums[i], m, &m->nested_enums[i]); } - fields = google_protobuf_DescriptorProto_extension(msg_proto, &n); + const google_protobuf_FieldDescriptorProto* const* exts = + google_protobuf_DescriptorProto_extension(msg_proto, &n); m->nested_ext_count = n; m->nested_exts = symtab_alloc(ctx, sizeof(*m->nested_exts) * n); - for (i = 0; i < n; i++) { - create_fielddef(ctx, m->full_name, m, fields[i], &m->nested_exts[i], true); + for (size_t i = 0; i < n; i++) { + create_fielddef(ctx, m->full_name, m, exts[i], &m->nested_exts[i], + /* is_extension= */ true); ((upb_fielddef*)&m->nested_exts[i])->index_ = i; } - msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n); + const google_protobuf_DescriptorProto* const* msgs = + google_protobuf_DescriptorProto_nested_type(msg_proto, &n); m->nested_msg_count = n; m->nested_msgs = symtab_alloc(ctx, sizeof(*m->nested_msgs) * n); - for (i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { create_msgdef(ctx, m->full_name, msgs[i], m, &m->nested_msgs[i]); } } @@ -3126,7 +3140,7 @@ static void build_filedef( file->top_lvl_exts = symtab_alloc(ctx, sizeof(*file->top_lvl_exts) * n); for (i = 0; i < n; i++) { create_fielddef(ctx, file->package, NULL, exts[i], &file->top_lvl_exts[i], - true); + /* is_extension= */ true); ((upb_fielddef*)&file->top_lvl_exts[i])->index_ = i; }