diff --git a/php/ext/google/protobuf/php-upb.c b/php/ext/google/protobuf/php-upb.c index 4e75233c2b..99db650a26 100644 --- a/php/ext/google/protobuf/php-upb.c +++ b/php/ext/google/protobuf/php-upb.c @@ -8186,6 +8186,15 @@ static void _upb_Decoder_CheckUnlinked(upb_Decoder* d, const upb_MiniTable* mt, *op = kUpb_DecodeOp_UnknownField; } +UPB_FORCEINLINE +static void _upb_Decoder_MaybeVerifyUtf8(upb_Decoder* d, + const upb_MiniTableField* field, + int* op) { + if ((field->UPB_ONLYBITS(mode) & kUpb_LabelFlags_IsAlternate) && + UPB_UNLIKELY(d->options & kUpb_DecodeOption_AlwaysValidateUtf8)) + *op = kUpb_DecodeOp_String; +} + static int _upb_Decoder_GetDelimitedOp(upb_Decoder* d, const upb_MiniTable* mt, const upb_MiniTableField* field) { enum { kRepeatedBase = 19 }; @@ -8242,6 +8251,8 @@ static int _upb_Decoder_GetDelimitedOp(upb_Decoder* d, const upb_MiniTable* mt, if (op == kUpb_DecodeOp_SubMessage) { _upb_Decoder_CheckUnlinked(d, mt, field, &op); + } else if (op == kUpb_DecodeOp_Bytes) { + _upb_Decoder_MaybeVerifyUtf8(d, field, &op); } return op; diff --git a/php/ext/google/protobuf/php-upb.h b/php/ext/google/protobuf/php-upb.h index ae22247b56..872581a1bf 100644 --- a/php/ext/google/protobuf/php-upb.h +++ b/php/ext/google/protobuf/php-upb.h @@ -3985,6 +3985,16 @@ enum { * be created by the parser or the message-copying logic in message/copy.h. */ kUpb_DecodeOption_ExperimentalAllowUnlinked = 4, + + /* EXPERIMENTAL: + * + * If set, decoding will enforce UTF-8 validation for string fields, even for + * proto2 or fields with `features.utf8_validation = NONE`. Normally, only + * proto3 string fields will be validated for UTF-8. Decoding will return + * kUpb_DecodeStatus_BadUtf8 for non-UTF-8 strings, which is the same behavior + * as non-UTF-8 proto3 string fields. + */ + kUpb_DecodeOption_AlwaysValidateUtf8 = 8, }; UPB_INLINE uint32_t upb_DecodeOptions_MaxDepth(uint16_t depth) { diff --git a/ruby/ext/google/protobuf_c/ruby-upb.c b/ruby/ext/google/protobuf_c/ruby-upb.c index 237232d282..6c848d83fe 100644 --- a/ruby/ext/google/protobuf_c/ruby-upb.c +++ b/ruby/ext/google/protobuf_c/ruby-upb.c @@ -7702,6 +7702,15 @@ static void _upb_Decoder_CheckUnlinked(upb_Decoder* d, const upb_MiniTable* mt, *op = kUpb_DecodeOp_UnknownField; } +UPB_FORCEINLINE +static void _upb_Decoder_MaybeVerifyUtf8(upb_Decoder* d, + const upb_MiniTableField* field, + int* op) { + if ((field->UPB_ONLYBITS(mode) & kUpb_LabelFlags_IsAlternate) && + UPB_UNLIKELY(d->options & kUpb_DecodeOption_AlwaysValidateUtf8)) + *op = kUpb_DecodeOp_String; +} + static int _upb_Decoder_GetDelimitedOp(upb_Decoder* d, const upb_MiniTable* mt, const upb_MiniTableField* field) { enum { kRepeatedBase = 19 }; @@ -7758,6 +7767,8 @@ static int _upb_Decoder_GetDelimitedOp(upb_Decoder* d, const upb_MiniTable* mt, if (op == kUpb_DecodeOp_SubMessage) { _upb_Decoder_CheckUnlinked(d, mt, field, &op); + } else if (op == kUpb_DecodeOp_Bytes) { + _upb_Decoder_MaybeVerifyUtf8(d, field, &op); } return op; diff --git a/ruby/ext/google/protobuf_c/ruby-upb.h b/ruby/ext/google/protobuf_c/ruby-upb.h index 433d25c131..397804a339 100755 --- a/ruby/ext/google/protobuf_c/ruby-upb.h +++ b/ruby/ext/google/protobuf_c/ruby-upb.h @@ -3987,6 +3987,16 @@ enum { * be created by the parser or the message-copying logic in message/copy.h. */ kUpb_DecodeOption_ExperimentalAllowUnlinked = 4, + + /* EXPERIMENTAL: + * + * If set, decoding will enforce UTF-8 validation for string fields, even for + * proto2 or fields with `features.utf8_validation = NONE`. Normally, only + * proto3 string fields will be validated for UTF-8. Decoding will return + * kUpb_DecodeStatus_BadUtf8 for non-UTF-8 strings, which is the same behavior + * as non-UTF-8 proto3 string fields. + */ + kUpb_DecodeOption_AlwaysValidateUtf8 = 8, }; UPB_INLINE uint32_t upb_DecodeOptions_MaxDepth(uint16_t depth) {