diff --git a/BUILD b/BUILD index 4616a49a28..29a996205d 100644 --- a/BUILD +++ b/BUILD @@ -128,6 +128,26 @@ cc_library( ], ) +cc_library( + name = "mini_descriptor", + srcs = [ + "upb/mini_descriptor.c", + ], + hdrs = [ + "upb/mini_descriptor.h", + ], + copts = UPB_DEFAULT_COPTS, + visibility = ["//visibility:public"], + deps = [ + ":descriptor_upb_proto", + ":mini_table", + ":port", + ":reflection", + ":table", + ":upb", + ], +) + cc_library( name = "mini_table_internal", hdrs = ["upb/msg_internal.h"], diff --git a/upb/mini_descriptor.c b/upb/mini_descriptor.c new file mode 100644 index 0000000000..8004964b19 --- /dev/null +++ b/upb/mini_descriptor.c @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2009-2022, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/mini_descriptor.h" + +#include +#include +#include +#include +#include +#include + +#include "upb/def.h" +#include "upb/mini_table.h" + +// Must be last. +#include "upb/port_def.inc" + +/* DescState ******************************************************************/ + +// Manages the storage for mini descriptor strings as they are being encoded. +// TODO(b/234740652): Move some of this state directly into the encoder, maybe. + +typedef struct { + upb_MtDataEncoder e; + size_t bufsize; + char* buf; + char* ptr; +} DescState; + +static void upb_DescState_Init(DescState* d) { + d->bufsize = kUpb_MtDataEncoder_MinSize * 2; + d->buf = NULL; + d->ptr = NULL; +} + +static bool upb_DescState_Grow(DescState* d, upb_Arena* a) { + const size_t oldbufsize = d->bufsize; + const int used = d->ptr - d->buf; + + if (!d->buf) { + d->buf = upb_Arena_Malloc(a, d->bufsize); + if (!d->buf) return false; + d->ptr = d->buf; + d->e.end = d->buf + d->bufsize; + } + + if (oldbufsize - used < kUpb_MtDataEncoder_MinSize) { + d->bufsize *= 2; + d->buf = upb_Arena_Realloc(a, d->buf, oldbufsize, d->bufsize); + if (!d->buf) return false; + d->ptr = d->buf + used; + d->e.end = d->buf + d->bufsize; + } + + return true; +} + +static void upb_DescState_Emit(const DescState* d, upb_StringView* str) { + *str = upb_StringView_FromDataAndSize(d->buf, d->ptr - d->buf); +} + +/******************************************************************************/ + +// Type and Field accessors. + +static inline bool upb_Type_IsPackable(upb_FieldType type) { + return (type != kUpb_FieldType_String && type != kUpb_FieldType_Group && + type != kUpb_FieldType_Message && type != kUpb_FieldType_Bytes); +} + +static inline bool upb_Field_IsOneof(const google_protobuf_FieldDescriptorProto* f) { + return google_protobuf_FieldDescriptorProto_has_oneof_index(f); +} + +static inline bool upb_Field_IsOptional(const google_protobuf_FieldDescriptorProto* f) { + const upb_Label label = google_protobuf_FieldDescriptorProto_label(f); + return label == kUpb_Label_Optional; +} + +static inline bool upb_Field_IsRepeated(const google_protobuf_FieldDescriptorProto* f) { + const upb_Label label = google_protobuf_FieldDescriptorProto_label(f); + return label == kUpb_Label_Repeated; +} + +static inline bool upb_Field_IsRequired(const google_protobuf_FieldDescriptorProto* f) { + const upb_Label label = google_protobuf_FieldDescriptorProto_label(f); + return label == kUpb_Label_Required; +} + +static inline bool upb_Field_IsPackable(const google_protobuf_FieldDescriptorProto* f) { + if (!upb_Field_IsRepeated(f)) return false; + + const upb_FieldType type = google_protobuf_FieldDescriptorProto_type(f); + return upb_Type_IsPackable(type); +} + +static bool upb_Field_IsPacked(const google_protobuf_FieldDescriptorProto* f, + upb_Syntax syntax) { + if (!upb_Field_IsPackable(f)) return false; + + const bool has_options = google_protobuf_FieldDescriptorProto_has_options(f); + const google_protobuf_FieldOptions* options = google_protobuf_FieldDescriptorProto_options(f); + + switch (syntax) { + case kUpb_Syntax_Proto2: + if (!has_options) return false; + break; + + default: + if (!has_options) return true; + if (!google_protobuf_FieldOptions_has_packed(options)) return true; + break; + } + + return google_protobuf_FieldOptions_packed(options); +} + +static inline int Field_OneofIndex(const google_protobuf_FieldDescriptorProto* f) { + return google_protobuf_FieldDescriptorProto_oneof_index(f); +} + +static bool upb_Field_HasPresence(const google_protobuf_FieldDescriptorProto* f, + upb_Syntax syntax) { + if (upb_Field_IsRepeated(f)) return false; + + const upb_FieldType type = google_protobuf_FieldDescriptorProto_type(f); + return type == kUpb_FieldType_Message || type == kUpb_FieldType_Group || + upb_Field_IsOneof(f) || syntax == kUpb_Syntax_Proto2; +} + +uint64_t upb_Field_Modifier(const google_protobuf_FieldDescriptorProto* f, + upb_Syntax syntax) { + uint64_t out = 0; + if (upb_Field_IsRepeated(f)) { + out |= kUpb_FieldModifier_IsRepeated; + } + if (upb_Field_IsPacked(f, syntax)) { + out |= kUpb_FieldModifier_IsPacked; + } + if (google_protobuf_FieldDescriptorProto_type(f) == kUpb_FieldType_Enum && + syntax == kUpb_Syntax_Proto2) { + out |= kUpb_FieldModifier_IsClosedEnum; + } + if (upb_Field_IsOptional(f) && !upb_Field_HasPresence(f, syntax)) { + out |= kUpb_FieldModifier_IsProto3Singular; + } + if (upb_Field_IsRequired(f)) { + out |= kUpb_FieldModifier_IsRequired; + } + return out; +} + +/******************************************************************************/ + +// Sort by enum value. +static int upb_MiniDescriptor_CompareEnums(const void* a, const void* b) { + const google_protobuf_EnumValueDescriptorProto* A = *(void**)a; + const google_protobuf_EnumValueDescriptorProto* B = *(void**)b; + if ((uint32_t)google_protobuf_EnumValueDescriptorProto_number(A) < + (uint32_t)google_protobuf_EnumValueDescriptorProto_number(B)) + return -1; + if ((uint32_t)google_protobuf_EnumValueDescriptorProto_number(A) > + (uint32_t)google_protobuf_EnumValueDescriptorProto_number(B)) + return 1; + return 0; +} + +// Sort by field number. +static int upb_MiniDescriptor_CompareFields(const void* a, const void* b) { + const google_protobuf_FieldDescriptorProto* A = *(void**)a; + const google_protobuf_FieldDescriptorProto* B = *(void**)b; + if (google_protobuf_FieldDescriptorProto_number(A) < + google_protobuf_FieldDescriptorProto_number(B)) + return -1; + if (google_protobuf_FieldDescriptorProto_number(A) > + google_protobuf_FieldDescriptorProto_number(B)) + return 1; + return 0; +} + +// Sort first by oneof index then by field number. +static int upb_MiniDescriptor_CompareOneofs(const void* a, const void* b) { + const google_protobuf_FieldDescriptorProto* A = *(void**)a; + const google_protobuf_FieldDescriptorProto* B = *(void**)b; + const int indexA = upb_Field_IsOneof(A) ? Field_OneofIndex(A) : -1; + const int indexB = upb_Field_IsOneof(B) ? Field_OneofIndex(B) : -1; + if (indexA < indexB) return -1; + if (indexA > indexB) return 1; + if (google_protobuf_FieldDescriptorProto_number(A) < + google_protobuf_FieldDescriptorProto_number(B)) + return -1; + if (google_protobuf_FieldDescriptorProto_number(A) > + google_protobuf_FieldDescriptorProto_number(B)) + return 1; + return 0; +} + +upb_StringView upb_MiniDescriptor_EncodeEnum( + const google_protobuf_EnumDescriptorProto* enum_type, upb_Arena* a) { + upb_StringView out; + out.data = NULL; + out.size = 0; + + size_t len = 0; + const google_protobuf_EnumValueDescriptorProto* const* value_types = + google_protobuf_EnumDescriptorProto_value(enum_type, &len); + + // Copy and sort. + google_protobuf_EnumValueDescriptorProto** sorted = upb_gmalloc(len * sizeof(void*)); + if (!sorted) goto err; + memcpy(sorted, value_types, len * sizeof(void*)); + qsort(sorted, len, sizeof(void*), upb_MiniDescriptor_CompareEnums); + + DescState s; + upb_DescState_Init(&s); + + upb_MtDataEncoder_StartEnum(&s.e); + + for (size_t i = 0; i < len; i++) { + if (!upb_DescState_Grow(&s, a)) goto err; + const uint32_t number = google_protobuf_EnumValueDescriptorProto_number(sorted[i]); + s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, number); + UPB_ASSERT(s.ptr); + } + + if (!upb_DescState_Grow(&s, a)) goto err; + s.ptr = upb_MtDataEncoder_EndEnum(&s.e, s.ptr); + UPB_ASSERT(s.ptr); + + upb_DescState_Emit(&s, &out); + +err: + if (sorted) upb_gfree(sorted); + return out; +} + +upb_StringView upb_MiniDescriptor_EncodeExtension( + const google_protobuf_FieldDescriptorProto* extension_type, upb_Syntax syntax, + upb_Arena* a) { + upb_StringView out; + out.data = NULL; + out.size = 0; + + DescState s; + upb_DescState_Init(&s); + + if (!upb_DescState_Grow(&s, a)) goto err; + upb_MtDataEncoder_StartMessage(&s.e, s.ptr, 0); + + const upb_FieldType type = google_protobuf_FieldDescriptorProto_type(extension_type); + const int number = google_protobuf_FieldDescriptorProto_number(extension_type); + const uint64_t modifier = upb_Field_Modifier(extension_type, syntax); + upb_MtDataEncoder_PutField(&s.e, s.ptr, type, number, modifier); + + upb_DescState_Emit(&s, &out); + +err: + return out; +} + +upb_StringView upb_MiniDescriptor_EncodeMessage( + const google_protobuf_DescriptorProto* message_type, upb_Syntax syntax, + upb_Arena* a) { + upb_StringView out; + out.data = NULL; + out.size = 0; + + size_t len = 0; + const google_protobuf_FieldDescriptorProto* const* field_types = + google_protobuf_DescriptorProto_field(message_type, &len); + + // Copy and sort. + google_protobuf_FieldDescriptorProto** sorted = upb_gmalloc(len * sizeof(void*)); + if (!sorted) goto err; + memcpy(sorted, field_types, len * sizeof(void*)); + qsort(sorted, len, sizeof(void*), upb_MiniDescriptor_CompareFields); + + DescState s; + upb_DescState_Init(&s); + + if (!upb_DescState_Grow(&s, a)) goto err; + upb_MtDataEncoder_StartMessage(&s.e, s.ptr, 0); + + // Encode the fields. + size_t oneof_fields = 0; + for (size_t i = 0; i < len; i++) { + google_protobuf_FieldDescriptorProto* field_type = sorted[i]; + if (upb_Field_IsOneof(field_type)) { + // Put all oneof fields at the beginning of the list for the next pass. + sorted[oneof_fields++] = field_type; + } + + const upb_FieldType type = google_protobuf_FieldDescriptorProto_type(field_type); + const int number = google_protobuf_FieldDescriptorProto_number(field_type); + const uint64_t modifier = upb_Field_Modifier(field_type, syntax); + + if (!upb_DescState_Grow(&s, a)) goto err; + s.ptr = upb_MtDataEncoder_PutField(&s.e, s.ptr, type, number, modifier); + UPB_ASSERT(s.ptr); + } + + qsort(sorted, oneof_fields, sizeof(void*), upb_MiniDescriptor_CompareOneofs); + + // Encode the oneofs. + int previous_index = -1; + for (size_t i = 0; i < oneof_fields; i++) { + google_protobuf_FieldDescriptorProto* field_type = sorted[i]; + if (!upb_Field_IsOneof(field_type)) continue; + + const int index = Field_OneofIndex(field_type); + if (previous_index != index) { + if (!upb_DescState_Grow(&s, a)) goto err; + s.ptr = upb_MtDataEncoder_StartOneof(&s.e, s.ptr); + UPB_ASSERT(s.ptr); + + previous_index = index; + } + + if (!upb_DescState_Grow(&s, a)) goto err; + s.ptr = upb_MtDataEncoder_PutOneofField( + &s.e, s.ptr, google_protobuf_FieldDescriptorProto_number(field_type)); + UPB_ASSERT(s.ptr); + } + + upb_DescState_Emit(&s, &out); + +err: + if (sorted) upb_gfree(sorted); + return out; +} diff --git a/upb/mini_descriptor.h b/upb/mini_descriptor.h new file mode 100644 index 0000000000..0a53a723e8 --- /dev/null +++ b/upb/mini_descriptor.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2009-2022, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_MINI_DESCRIPTOR_H_ +#define UPB_MINI_DESCRIPTOR_H_ + +#include "upb/def.h" +#include "upb/upb.h" + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +/** upb_MiniDescriptor ********************************************************/ + +upb_StringView upb_MiniDescriptor_EncodeEnum( + const google_protobuf_EnumDescriptorProto* enum_type, upb_Arena* a); + +upb_StringView upb_MiniDescriptor_EncodeExtension( + const google_protobuf_FieldDescriptorProto* extension_type, upb_Syntax syntax, + upb_Arena* a); + +upb_StringView upb_MiniDescriptor_EncodeMessage( + const google_protobuf_DescriptorProto* message_type, upb_Syntax syntax, + upb_Arena* a); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_MINI_DESCRIPTOR_H_ */ diff --git a/upbc/BUILD b/upbc/BUILD index 0b98f2902e..d32ffa45a0 100644 --- a/upbc/BUILD +++ b/upbc/BUILD @@ -27,9 +27,45 @@ load( "//bazel:build_defs.bzl", "UPB_DEFAULT_CPPOPTS", ) +load( + "//bazel:upb_proto_library.bzl", + "upb_proto_library", + "upb_proto_reflection_library", +) licenses(["notice"]) +proto_library( + name = "code_generator_request", + srcs = ["code_generator_request.proto"], + visibility = ["//:friends"], + deps = ["@com_google_protobuf//:compiler_plugin_proto"], +) + +upb_proto_library( + name = "code_generator_request_upb_proto", + visibility = ["//:friends"], + deps = [":code_generator_request"], +) + +upb_proto_reflection_library( + name = "code_generator_request_upb_proto_reflection", + visibility = ["//:friends"], + deps = [":code_generator_request"], +) + +upb_proto_library( + name = "plugin_upb_proto", + visibility = ["//:friends"], + deps = ["@com_google_protobuf//:compiler_plugin_proto"], +) + +upb_proto_reflection_library( + name = "plugin_upb_proto_reflection", + visibility = ["//:friends"], + deps = ["@com_google_protobuf//:compiler_plugin_proto"], +) + cc_library( name = "common", srcs = ["common.cc"], @@ -76,3 +112,29 @@ cc_binary( "@com_google_protobuf//:protoc_lib", ], ) + +cc_binary( + name = "protoc-gen-upbdev", + srcs = [ + "code_generator_request.c", + "code_generator_request.h", + "protoc-gen-upbdev.cc", + "subprocess.cc", + "subprocess.h", + ], + copts = UPB_DEFAULT_CPPOPTS, + visibility = ["//visibility:public"], + deps = [ + ":code_generator_request_upb_proto", + ":code_generator_request_upb_proto_reflection", + ":plugin_upb_proto", + ":plugin_upb_proto_reflection", + "//:json", + "//:mini_descriptor", + "//:mini_table", + "//:port", + "//:reflection", + "//:upb", + "@com_google_absl//absl/strings", + ], +) diff --git a/upbc/code_generator_request.c b/upbc/code_generator_request.c new file mode 100644 index 0000000000..95b8347a07 --- /dev/null +++ b/upbc/code_generator_request.c @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2009-2022, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upbc/code_generator_request.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "google/protobuf/compiler/plugin.upb.h" +#include "upb/def.h" +#include "upb/mini_descriptor.h" +#include "upb/mini_table.h" + +// Must be last. +#include "upb/port_def.inc" + +enum { + kErrArenaMalloc = 1, + kErrEnumName, + kErrExtensionName, + kErrFieldName, + kErrFilePackage, + kErrMapCollision, + kErrMiniDescriptorsSet, + kErrStateGrow, +}; + +/* upbc_PathState *************************************************************/ + +// Manages the current fully qualified path name as we dig down into a proto. +// Basically just a string that grows and shrinks like a stack. + +typedef struct { + size_t len; + char path[4000]; // TODO(salo): make this dynamic +} upbc_PathState; + +static void upbc_PathState_Init(upbc_PathState* p) { p->len = 0; } + +static void upbc_PathState_Push(upbc_PathState* p, upb_StringView name) { + if (p->len) { + p->path[p->len++] = '.'; + } + memcpy(&p->path[p->len], name.data, name.size); + p->len += name.size; +} + +static void upbc_PathState_Pop(upbc_PathState* p, upb_StringView name) { + p->len -= name.size; + if (p->len) { + p->len--; + } +} + +static upb_StringView upbc_PathState_String(const upbc_PathState* p) { + return upb_StringView_FromDataAndSize(p->path, p->len); +} + +/******************************************************************************/ + +// Kitchen sink storage for the mini descriptor state. + +typedef struct { + upb_Arena* a; + upb_Syntax syntax; + + upbc_CodeGeneratorRequest* out; + + jmp_buf err; + + upbc_PathState path; +} upbc_ScrapeState; + +static void upbc_ScrapeState_Init(upbc_ScrapeState* s, upb_Arena* a) { + s->a = a; + + upbc_PathState_Init(&s->path); + + s->out = upbc_CodeGeneratorRequest_new(a); + if (!s->out) UPB_LONGJMP(s->err, kErrArenaMalloc); +} + +static void upbc_ScrapeState_Push(upbc_ScrapeState* s, upb_StringView name) { + upbc_PathState_Push(&s->path, name); + + const upb_StringView key = upbc_PathState_String(&s->path); + if (upbc_CodeGeneratorRequest_mini_descriptors_get(s->out, key, NULL)) { + UPB_LONGJMP(s->err, kErrMapCollision); + } +} + +static void upbc_ScrapeState_Pop(upbc_ScrapeState* s, upb_StringView name) { + upbc_PathState_Pop(&s->path, name); +} + +static void upbc_ScrapeState_String(upbc_ScrapeState* s, + upb_StringView encoding) { + const upb_StringView path = upbc_PathState_String(&s->path); + bool ok = upbc_CodeGeneratorRequest_mini_descriptors_set(s->out, path, + encoding, s->a); + if (!ok) UPB_LONGJMP(s->err, kErrMiniDescriptorsSet); +} + +/******************************************************************************/ + +// File accessors. + +static upb_Syntax upbc_File_Syntax(const google_protobuf_FileDescriptorProto* file) { + if (google_protobuf_FileDescriptorProto_has_syntax(file)) { + const upb_StringView syntax = google_protobuf_FileDescriptorProto_syntax(file); + const upb_StringView proto3 = upb_StringView_FromString("proto3"); + if (upb_StringView_IsEqual(syntax, proto3)) return kUpb_Syntax_Proto3; + } + return kUpb_Syntax_Proto2; +} + +/******************************************************************************/ + +// Forward declaration. +static void upbc_Scrape_Messages(upbc_ScrapeState*, + const google_protobuf_DescriptorProto* const*, size_t); + +static void upbc_Scrape_Enum(upbc_ScrapeState* s, + const google_protobuf_EnumDescriptorProto* enum_type) { + if (!google_protobuf_EnumDescriptorProto_has_name(enum_type)) { + UPB_LONGJMP(s->err, kErrEnumName); + } + const upb_StringView name = google_protobuf_EnumDescriptorProto_name(enum_type); + + upbc_ScrapeState_Push(s, name); + + const upb_StringView encoding = + upb_MiniDescriptor_EncodeEnum(enum_type, s->a); + + upbc_ScrapeState_String(s, encoding); + upbc_ScrapeState_Pop(s, name); +} + +static void upbc_Scrape_Enums( + upbc_ScrapeState* s, const google_protobuf_EnumDescriptorProto* const* enum_types, + size_t len) { + for (size_t i = 0; i < len; i++) { + upbc_Scrape_Enum(s, enum_types[i]); + } +} + +static void upbc_Scrape_Extension( + upbc_ScrapeState* s, const google_protobuf_FieldDescriptorProto* extension_type) { + if (!google_protobuf_FieldDescriptorProto_has_name(extension_type)) { + UPB_LONGJMP(s->err, kErrExtensionName); + } + const upb_StringView name = google_protobuf_FieldDescriptorProto_name(extension_type); + + upbc_ScrapeState_Push(s, name); + + const upb_StringView encoding = + upb_MiniDescriptor_EncodeExtension(extension_type, s->syntax, s->a); + + upbc_ScrapeState_String(s, encoding); + upbc_ScrapeState_Pop(s, name); +} + +static void upbc_Scrape_Extensions( + const google_protobuf_FieldDescriptorProto* const* extension_types, size_t len, + upbc_ScrapeState* s) { + for (size_t i = 0; i < len; i++) { + upbc_Scrape_Extension(s, extension_types[i]); + } +} + +static void upbc_Scrape_File(upbc_ScrapeState* s, + const google_protobuf_FileDescriptorProto* file_type) { + if (!google_protobuf_FileDescriptorProto_has_package(file_type)) { + UPB_LONGJMP(s->err, kErrFilePackage); + } + const upb_StringView package = google_protobuf_FileDescriptorProto_package(file_type); + upbc_ScrapeState_Push(s, package); + + s->syntax = upbc_File_Syntax(file_type); + + size_t len = 0; + const google_protobuf_EnumDescriptorProto* const* enum_types = + google_protobuf_FileDescriptorProto_enum_type(file_type, &len); + upbc_Scrape_Enums(s, enum_types, len); + + const google_protobuf_FieldDescriptorProto* const* extension_types = + google_protobuf_FileDescriptorProto_extension(file_type, &len); + upbc_Scrape_Extensions(extension_types, len, s); + + const google_protobuf_DescriptorProto* const* message_types = + google_protobuf_FileDescriptorProto_message_type(file_type, &len); + upbc_Scrape_Messages(s, message_types, len); + + upbc_ScrapeState_Pop(s, package); +} + +static void upbc_Scrape_Files( + upbc_ScrapeState* s, const google_protobuf_FileDescriptorProto* const* file_types, + size_t len) { + for (size_t i = 0; i < len; i++) { + upbc_Scrape_File(s, file_types[i]); + } +} + +static void upbc_Scrape_Message(upbc_ScrapeState* s, + const google_protobuf_DescriptorProto* message_type) { + if (!google_protobuf_DescriptorProto_has_name(message_type)) return; + + const upb_StringView name = google_protobuf_DescriptorProto_name(message_type); + upbc_ScrapeState_Push(s, name); + + const upb_StringView encoding = + upb_MiniDescriptor_EncodeMessage(message_type, s->syntax, s->a); + upbc_ScrapeState_String(s, encoding); + + size_t len = 0; + const google_protobuf_EnumDescriptorProto* const* enum_types = + google_protobuf_DescriptorProto_enum_type(message_type, &len); + upbc_Scrape_Enums(s, enum_types, len); + + const google_protobuf_FieldDescriptorProto* const* extension_types = + google_protobuf_DescriptorProto_extension(message_type, &len); + upbc_Scrape_Extensions(extension_types, len, s); + + const google_protobuf_DescriptorProto* const* nested_types = + google_protobuf_DescriptorProto_nested_type(message_type, &len); + upbc_Scrape_Messages(s, nested_types, len); + + upbc_ScrapeState_Pop(s, name); +} + +static void upbc_Scrape_Messages( + upbc_ScrapeState* s, const google_protobuf_DescriptorProto* const* message_types, + size_t len) { + for (size_t i = 0; i < len; i++) { + upbc_Scrape_Message(s, message_types[i]); + } +} + +upbc_CodeGeneratorRequest* upbc_MakeCodeGeneratorRequest( + google_protobuf_compiler_CodeGeneratorRequest* request, upb_Arena* a, + upb_Status* status) { + upbc_ScrapeState s; + int err = UPB_SETJMP(s.err); + if (err) { + upb_Status_SetErrorFormat(status, "%s(): error %d", __func__, err); + return NULL; + } + upbc_ScrapeState_Init(&s, a); + + size_t len = 0; + const google_protobuf_FileDescriptorProto* const* file_types = + google_protobuf_compiler_CodeGeneratorRequest_proto_file(request, &len); + upbc_Scrape_Files(&s, file_types, len); + + upbc_CodeGeneratorRequest_set_request(s.out, request); + return s.out; +} diff --git a/upbc/code_generator_request.h b/upbc/code_generator_request.h new file mode 100644 index 0000000000..747d98d84d --- /dev/null +++ b/upbc/code_generator_request.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2009-2022, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPBC_CODE_GENERATOR_REQUEST_H_ +#define UPBC_CODE_GENERATOR_REQUEST_H_ + +#include "upb/def.h" +#include "upb/upb.h" +#include "upbc/code_generator_request.upb.h" + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +upbc_CodeGeneratorRequest* upbc_MakeCodeGeneratorRequest( + struct google_protobuf_compiler_CodeGeneratorRequest* request, upb_Arena* a, + upb_Status* s); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPBC_CODE_GENERATOR_REQUEST_H_ */ diff --git a/upbc/code_generator_request.proto b/upbc/code_generator_request.proto new file mode 100644 index 0000000000..5491b13693 --- /dev/null +++ b/upbc/code_generator_request.proto @@ -0,0 +1,13 @@ +syntax = "proto2"; + +package upbc; + +import "google/protobuf/compiler/plugin.proto"; + +message CodeGeneratorRequest { + // The pb sent by protoc to its plugins. + optional google.protobuf.compiler.CodeGeneratorRequest request = 1; + + // Mini descriptors for the above pb, keyed by the fully qualified names. + map mini_descriptors = 2; +} diff --git a/upbc/protoc-gen-upbdev.cc b/upbc/protoc-gen-upbdev.cc new file mode 100644 index 0000000000..e711ec75b5 --- /dev/null +++ b/upbc/protoc-gen-upbdev.cc @@ -0,0 +1,144 @@ +// Copyright (c) 2009-2022, Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Google LLC nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include +#include + +#include "google/protobuf/compiler/plugin.upb.h" +#include "google/protobuf/compiler/plugin.upbdefs.h" +#include "upb/json_decode.h" +#include "upb/json_encode.h" +#include "upb/mini_descriptor.h" +#include "upb/upb.h" +#include "upbc/code_generator_request.h" +#include "upbc/code_generator_request.upb.h" +#include "upbc/code_generator_request.upbdefs.h" +#include "upbc/subprocess.h" + +static constexpr char kDefaultPlugin[] = "protoc_dart_plugin"; + +static std::string JsonEncode(const upbc_CodeGeneratorRequest* request, + upb_Arena* a) { + upb_DefPool* s = upb_DefPool_New(); + const upb_MessageDef* m = upbc_CodeGeneratorRequest_getmsgdef(s); + + upb_Status status; + upb_Status_Clear(&status); + + const size_t json_size = upb_JsonEncode(request, m, s, 0, NULL, 0, &status); + assert(upb_Status_IsOk(&status)); + + char* json_buf = (char*)upb_Arena_Malloc(a, json_size + 1); + + (void)upb_JsonEncode(request, m, s, 0, json_buf, json_size + 1, &status); + assert(upb_Status_IsOk(&status)); + + upb_DefPool_Free(s); + + return std::string(json_buf, json_size); +} + +static google_protobuf_compiler_CodeGeneratorResponse* JsonDecode( + const std::string& json, upb_Arena* a) { + google_protobuf_compiler_CodeGeneratorResponse* response = + google_protobuf_compiler_CodeGeneratorResponse_new(a); + + upb_DefPool* s = upb_DefPool_New(); + const upb_MessageDef* m = google_protobuf_compiler_CodeGeneratorResponse_getmsgdef(s); + + upb_Status status; + upb_Status_Clear(&status); + + (void)upb_JsonDecode(json.c_str(), json.size(), response, m, s, 0, a, + &status); + assert(upb_Status_IsOk(&status)); + + upb_DefPool_Free(s); + + return response; +} + +static std::string Serialize( + const google_protobuf_compiler_CodeGeneratorResponse* response, upb_Arena* a) { + size_t len = 0; + const char* buf = + google_protobuf_compiler_CodeGeneratorResponse_serialize(response, a, &len); + return std::string(buf, len); +} + +int main() { + upb_Arena* a = upb_Arena_New(); + + // Read (binary) stdin into a string. + const std::string input = {std::istreambuf_iterator(std::cin), + std::istreambuf_iterator()}; + + // Parse the request. + auto inner_request = google_protobuf_compiler_CodeGeneratorRequest_parse( + input.c_str(), input.size(), a); + + // Check the request for a plugin name. + std::string plugin = kDefaultPlugin; + if (google_protobuf_compiler_CodeGeneratorRequest_has_parameter(inner_request)) { + auto param = google_protobuf_compiler_CodeGeneratorRequest_parameter(inner_request); + plugin = std::string(param.data, param.size); + } + + // Wrap the request inside a upbc_CodeGeneratorRequest. + upb_Status status; + upb_Status_Clear(&status); + auto outer_request = upbc_MakeCodeGeneratorRequest(inner_request, a, &status); + if (!upb_Status_IsOk(&status)) { + std::cerr << status.msg << std::endl; + return -1; + } + + const std::string json_request = JsonEncode(outer_request, a); + + // Launch the subprocess. + upbc::Subprocess subprocess; + subprocess.Start(plugin, upbc::Subprocess::SEARCH_PATH); + + // Exchange JSON strings with the subprocess. + std::string json_response, error; + const bool ok = subprocess.Communicate(json_request, &json_response, &error); + if (!ok) { + // Dump the JSON request to stderr if we can't launch the next plugin. + std::cerr << json_request << std::endl; + return -1; + } + + // Decode and serialize the JSON response. + const auto response = JsonDecode(json_response, a); + const std::string output = Serialize(response, a); + + // Question: Is this sufficient for sending reliably to stdout? + std::cout << output; + + upb_Arena_Free(a); + return 0; +} diff --git a/upbc/subprocess.cc b/upbc/subprocess.cc new file mode 100644 index 0000000000..e46abc86fc --- /dev/null +++ b/upbc/subprocess.cc @@ -0,0 +1,462 @@ +/* + * Copyright (c) 2009-2022, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Shamelessly copied from the protobuf compiler's subprocess.cc +// except this version passes strings instead of Messages. + +#include "upbc/subprocess.h" + +#include +#include +#include + +#ifndef _WIN32 +#include +#include +#include +#include +#endif + +#include "absl/strings/substitute.h" +#include "upb/upb.h" + +/* Must be last. */ +#include "upb/port_def.inc" + +namespace upbc { + +namespace { +char* portable_strdup(const char* s) { + char* ns = (char*)malloc(strlen(s) + 1); + if (ns != nullptr) { + strcpy(ns, s); + } + return ns; +} +} // namespace + +#ifdef _WIN32 + +static void CloseHandleOrDie(HANDLE handle) { + if (!CloseHandle(handle)) { + GOOGLE_LOG(FATAL) << "CloseHandle: " + << Subprocess::Win32ErrorMessage(GetLastError()); + } +} + +Subprocess::Subprocess() + : process_start_error_(ERROR_SUCCESS), + child_handle_(nullptr), + child_stdin_(nullptr), + child_stdout_(nullptr) {} + +Subprocess::~Subprocess() { + if (child_stdin_ != nullptr) { + CloseHandleOrDie(child_stdin_); + } + if (child_stdout_ != nullptr) { + CloseHandleOrDie(child_stdout_); + } +} + +void Subprocess::Start(const std::string& program, SearchMode search_mode) { + // Create the pipes. + HANDLE stdin_pipe_read; + HANDLE stdin_pipe_write; + HANDLE stdout_pipe_read; + HANDLE stdout_pipe_write; + + if (!CreatePipe(&stdin_pipe_read, &stdin_pipe_write, nullptr, 0)) { + GOOGLE_LOG(FATAL) << "CreatePipe: " << Win32ErrorMessage(GetLastError()); + } + if (!CreatePipe(&stdout_pipe_read, &stdout_pipe_write, nullptr, 0)) { + GOOGLE_LOG(FATAL) << "CreatePipe: " << Win32ErrorMessage(GetLastError()); + } + + // Make child side of the pipes inheritable. + if (!SetHandleInformation(stdin_pipe_read, HANDLE_FLAG_INHERIT, + HANDLE_FLAG_INHERIT)) { + GOOGLE_LOG(FATAL) << "SetHandleInformation: " + << Win32ErrorMessage(GetLastError()); + } + if (!SetHandleInformation(stdout_pipe_write, HANDLE_FLAG_INHERIT, + HANDLE_FLAG_INHERIT)) { + GOOGLE_LOG(FATAL) << "SetHandleInformation: " + << Win32ErrorMessage(GetLastError()); + } + + // Setup STARTUPINFO to redirect handles. + STARTUPINFOA startup_info; + ZeroMemory(&startup_info, sizeof(startup_info)); + startup_info.cb = sizeof(startup_info); + startup_info.dwFlags = STARTF_USESTDHANDLES; + startup_info.hStdInput = stdin_pipe_read; + startup_info.hStdOutput = stdout_pipe_write; + startup_info.hStdError = GetStdHandle(STD_ERROR_HANDLE); + + if (startup_info.hStdError == INVALID_HANDLE_VALUE) { + GOOGLE_LOG(FATAL) << "GetStdHandle: " << Win32ErrorMessage(GetLastError()); + } + + // Invoking cmd.exe allows for '.bat' files from the path as well as '.exe'. + // Using a malloc'ed string because CreateProcess() can mutate its second + // parameter. + char* command_line = + portable_strdup(("cmd.exe /c \"" + program + "\"").c_str()); + + // Create the process. + PROCESS_INFORMATION process_info; + + if (CreateProcessA((search_mode == SEARCH_PATH) ? nullptr : program.c_str(), + (search_mode == SEARCH_PATH) ? command_line : nullptr, + nullptr, // process security attributes + nullptr, // thread security attributes + TRUE, // inherit handles? + 0, // obscure creation flags + nullptr, // environment (inherit from parent) + nullptr, // current directory (inherit from parent) + &startup_info, &process_info)) { + child_handle_ = process_info.hProcess; + CloseHandleOrDie(process_info.hThread); + child_stdin_ = stdin_pipe_write; + child_stdout_ = stdout_pipe_read; + } else { + process_start_error_ = GetLastError(); + CloseHandleOrDie(stdin_pipe_write); + CloseHandleOrDie(stdout_pipe_read); + } + + CloseHandleOrDie(stdin_pipe_read); + CloseHandleOrDie(stdout_pipe_write); + free(command_line); +} + +bool Subprocess::Communicate(const std::string& input_data, + std::string* output_data, std::string* error) { + if (process_start_error_ != ERROR_SUCCESS) { + *error = Win32ErrorMessage(process_start_error_); + return false; + } + + GOOGLE_CHECK(child_handle_ != nullptr) << "Must call Start() first."; + + int input_pos = 0; + + while (child_stdout_ != nullptr) { + HANDLE handles[2]; + int handle_count = 0; + + if (child_stdin_ != nullptr) { + handles[handle_count++] = child_stdin_; + } + if (child_stdout_ != nullptr) { + handles[handle_count++] = child_stdout_; + } + + DWORD wait_result = + WaitForMultipleObjects(handle_count, handles, FALSE, INFINITE); + + HANDLE signaled_handle = nullptr; + if (wait_result >= WAIT_OBJECT_0 && + wait_result < WAIT_OBJECT_0 + handle_count) { + signaled_handle = handles[wait_result - WAIT_OBJECT_0]; + } else if (wait_result == WAIT_FAILED) { + GOOGLE_LOG(FATAL) << "WaitForMultipleObjects: " + << Win32ErrorMessage(GetLastError()); + } else { + GOOGLE_LOG(FATAL) << "WaitForMultipleObjects: Unexpected return code: " + << wait_result; + } + + if (signaled_handle == child_stdin_) { + DWORD n; + if (!WriteFile(child_stdin_, input_data.data() + input_pos, + input_data.size() - input_pos, &n, nullptr)) { + // Child closed pipe. Presumably it will report an error later. + // Pretend we're done for now. + input_pos = input_data.size(); + } else { + input_pos += n; + } + + if (input_pos == input_data.size()) { + // We're done writing. Close. + CloseHandleOrDie(child_stdin_); + child_stdin_ = nullptr; + } + } else if (signaled_handle == child_stdout_) { + char buffer[4096]; + DWORD n; + + if (!ReadFile(child_stdout_, buffer, sizeof(buffer), &n, nullptr)) { + // We're done reading. Close. + CloseHandleOrDie(child_stdout_); + child_stdout_ = nullptr; + } else { + output_data->append(buffer, n); + } + } + } + + if (child_stdin_ != nullptr) { + // Child did not finish reading input before it closed the output. + // Presumably it exited with an error. + CloseHandleOrDie(child_stdin_); + child_stdin_ = nullptr; + } + + DWORD wait_result = WaitForSingleObject(child_handle_, INFINITE); + + if (wait_result == WAIT_FAILED) { + GOOGLE_LOG(FATAL) << "WaitForSingleObject: " + << Win32ErrorMessage(GetLastError()); + } else if (wait_result != WAIT_OBJECT_0) { + GOOGLE_LOG(FATAL) << "WaitForSingleObject: Unexpected return code: " + << wait_result; + } + + DWORD exit_code; + if (!GetExitCodeProcess(child_handle_, &exit_code)) { + GOOGLE_LOG(FATAL) << "GetExitCodeProcess: " + << Win32ErrorMessage(GetLastError()); + } + + CloseHandleOrDie(child_handle_); + child_handle_ = nullptr; + + if (exit_code != 0) { + *error = absl::Substitute("Plugin failed with status code $0.", exit_code); + return false; + } + + return true; +} + +std::string Subprocess::Win32ErrorMessage(DWORD error_code) { + char* message; + + // WTF? + FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + nullptr, error_code, + MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), + (LPSTR)&message, // NOT A BUG! + 0, nullptr); + + std::string result = message; + LocalFree(message); + return result; +} + +// =================================================================== + +#else // _WIN32 + +Subprocess::Subprocess() + : child_pid_(-1), child_stdin_(-1), child_stdout_(-1) {} + +Subprocess::~Subprocess() { + if (child_stdin_ != -1) { + close(child_stdin_); + } + if (child_stdout_ != -1) { + close(child_stdout_); + } +} + +void Subprocess::Start(const std::string& program, SearchMode search_mode) { + // Note that we assume that there are no other threads, thus we don't have to + // do crazy stuff like using socket pairs or avoiding libc locks. + + // [0] is read end, [1] is write end. + int stdin_pipe[2]; + int stdout_pipe[2]; + + int p0 = pipe(stdin_pipe); + int p1 = pipe(stdout_pipe); + UPB_ASSERT(p0 != -1); + UPB_ASSERT(p1 != -1); + + char* argv[2] = {portable_strdup(program.c_str()), nullptr}; + + child_pid_ = fork(); + if (child_pid_ == -1) { + std::cerr << "fork: " << strerror(errno); + } else if (child_pid_ == 0) { + // We are the child. + dup2(stdin_pipe[0], STDIN_FILENO); + dup2(stdout_pipe[1], STDOUT_FILENO); + + close(stdin_pipe[0]); + close(stdin_pipe[1]); + close(stdout_pipe[0]); + close(stdout_pipe[1]); + + switch (search_mode) { + case SEARCH_PATH: + execvp(argv[0], argv); + break; + case EXACT_NAME: + execv(argv[0], argv); + break; + } + + // Write directly to STDERR_FILENO to avoid stdio code paths that may do + // stuff that is unsafe here. + int ignored; + ignored = write(STDERR_FILENO, argv[0], strlen(argv[0])); + const char* message = + ": program not found or is not executable\n" + "Please specify a program using absolute path or make sure " + "the program is available in your PATH system variable\n"; + ignored = write(STDERR_FILENO, message, strlen(message)); + (void)ignored; + + // Must use _exit() rather than exit() to avoid flushing output buffers + // that will also be flushed by the parent. + _exit(1); + } else { + free(argv[0]); + + close(stdin_pipe[0]); + close(stdout_pipe[1]); + + child_stdin_ = stdin_pipe[1]; + child_stdout_ = stdout_pipe[0]; + } +} + +bool Subprocess::Communicate(const std::string& input_data, + std::string* output_data, std::string* error) { + if (child_stdin_ == -1) { + std::cerr << "Must call Start() first." << std::endl; + UPB_ASSERT(child_stdin_ != -1); + } + + // The "sighandler_t" typedef is GNU-specific, so define our own. + typedef void SignalHandler(int); + + // Make sure SIGPIPE is disabled so that if the child dies it doesn't kill us. + SignalHandler* old_pipe_handler = signal(SIGPIPE, SIG_IGN); + + int input_pos = 0; + int max_fd = std::max(child_stdin_, child_stdout_); + + while (child_stdout_ != -1) { + fd_set read_fds; + fd_set write_fds; + FD_ZERO(&read_fds); + FD_ZERO(&write_fds); + if (child_stdout_ != -1) { + FD_SET(child_stdout_, &read_fds); + } + if (child_stdin_ != -1) { + FD_SET(child_stdin_, &write_fds); + } + + if (select(max_fd + 1, &read_fds, &write_fds, nullptr, nullptr) < 0) { + if (errno == EINTR) { + // Interrupted by signal. Try again. + continue; + } else { + std::cerr << "select: " << strerror(errno) << std::endl; + UPB_ASSERT(0); + } + } + + if (child_stdin_ != -1 && FD_ISSET(child_stdin_, &write_fds)) { + int n = write(child_stdin_, input_data.data() + input_pos, + input_data.size() - input_pos); + if (n < 0) { + // Child closed pipe. Presumably it will report an error later. + // Pretend we're done for now. + input_pos = input_data.size(); + } else { + input_pos += n; + } + + if (input_pos == (int)input_data.size()) { + // We're done writing. Close. + close(child_stdin_); + child_stdin_ = -1; + } + } + + if (child_stdout_ != -1 && FD_ISSET(child_stdout_, &read_fds)) { + char buffer[4096]; + int n = read(child_stdout_, buffer, sizeof(buffer)); + + if (n > 0) { + output_data->append(buffer, (size_t)n); + } else { + // We're done reading. Close. + close(child_stdout_); + child_stdout_ = -1; + } + } + } + + if (child_stdin_ != -1) { + // Child did not finish reading input before it closed the output. + // Presumably it exited with an error. + close(child_stdin_); + child_stdin_ = -1; + } + + int status; + while (waitpid(child_pid_, &status, 0) == -1) { + if (errno != EINTR) { + std::cerr << "waitpid: " << strerror(errno) << std::endl; + UPB_ASSERT(0); + } + } + + // Restore SIGPIPE handling. + signal(SIGPIPE, old_pipe_handler); + + if (WIFEXITED(status)) { + if (WEXITSTATUS(status) != 0) { + int error_code = WEXITSTATUS(status); + *error = + absl::Substitute("Plugin failed with status code $0.", error_code); + return false; + } + } else if (WIFSIGNALED(status)) { + int signal = WTERMSIG(status); + *error = absl::Substitute("Plugin killed by signal $0.", signal); + return false; + } else { + *error = "Neither WEXITSTATUS nor WTERMSIG is true?"; + return false; + } + + return true; +} + +#endif // !_WIN32 + +} // namespace upbc diff --git a/upbc/subprocess.h b/upbc/subprocess.h new file mode 100644 index 0000000000..14276d1320 --- /dev/null +++ b/upbc/subprocess.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2009-2022, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Shamelessly copied from the protobuf compiler's subprocess.h +// except this version passes strings instead of Messages. + +#ifndef THIRD_PARTY_UPB_UPBC_H_ +#define THIRD_PARTY_UPB_UPBC_H_ + +#ifdef _WIN32 +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN // right... +#endif +#include +#else // _WIN32 +#include +#include +#endif // !_WIN32 +#include + +namespace upbc { + +// Utility class for launching sub-processes. +class Subprocess { + public: + Subprocess(); + ~Subprocess(); + + enum SearchMode { + SEARCH_PATH, // Use PATH environment variable. + EXACT_NAME // Program is an exact file name; don't use the PATH. + }; + + // Start the subprocess. Currently we don't provide a way to specify + // arguments as protoc plugins don't have any. + void Start(const std::string& program, SearchMode search_mode); + + // Pipe the input message to the subprocess's stdin, then close the pipe. + // Meanwhile, read from the subprocess's stdout and copy into *output. + // All this is done carefully to avoid deadlocks. + // Returns true if successful. On any sort of error, returns false and sets + // *error to a description of the problem. + bool Communicate(const std::string& input_data, std::string* output_data, + std::string* error); + +#ifdef _WIN32 + // Given an error code, returns a human-readable error message. This is + // defined here so that CommandLineInterface can share it. + static std::string Win32ErrorMessage(DWORD error_code); +#endif + + private: +#ifdef _WIN32 + DWORD process_start_error_; + HANDLE child_handle_; + + // The file handles for our end of the child's pipes. We close each and + // set it to NULL when no longer needed. + HANDLE child_stdin_; + HANDLE child_stdout_; + +#else // _WIN32 + pid_t child_pid_; + + // The file descriptors for our end of the child's pipes. We close each and + // set it to -1 when no longer needed. + int child_stdin_; + int child_stdout_; + +#endif // !_WIN32 +}; + +} // namespace upbc + +#endif // THIRD_PARTY_UPB_UPBC_H_