diff --git a/python/pb_unit_tests/message_test_wrapper.py b/python/pb_unit_tests/message_test_wrapper.py index d73997b81e..105a763c65 100644 --- a/python/pb_unit_tests/message_test_wrapper.py +++ b/python/pb_unit_tests/message_test_wrapper.py @@ -35,11 +35,10 @@ message_test.MessageTest.testExtendInt32WithNothing_proto3.__unittest_expecting_ message_test.MessageTest.testExtendStringWithNothing_proto2.__unittest_expecting_failure__ = True message_test.MessageTest.testExtendStringWithNothing_proto3.__unittest_expecting_failure__ = True -# Our float printing suffers from not having dtoa(). +# Python/C++ customizes the C++ TextFormat to always print trailing ".0" for +# floats. upb doesn't do this, it matches C++ TextFormat. message_test.MessageTest.testFloatPrinting_proto2.__unittest_expecting_failure__ = True message_test.MessageTest.testFloatPrinting_proto3.__unittest_expecting_failure__ = True -message_test.MessageTest.testHighPrecisionDoublePrinting_proto2.__unittest_expecting_failure__ = True -message_test.MessageTest.testHighPrecisionDoublePrinting_proto3.__unittest_expecting_failure__ = True # For these tests we are throwing the correct error, only the text of the error # message is a mismatch. For technical reasons around the limited API, matching diff --git a/upb/json_encode.c b/upb/json_encode.c index a63b3c63d5..64826e34dd 100644 --- a/upb/json_encode.c +++ b/upb/json_encode.c @@ -38,6 +38,7 @@ #include "upb/decode.h" #include "upb/reflection.h" +#include "upb/upb_internal.h" /* Must be last. */ #include "upb/port_def.inc" @@ -305,7 +306,7 @@ static void jsonenc_string(jsonenc* e, upb_StringView str) { jsonenc_putstr(e, "\""); } -static void jsonenc_double(jsonenc* e, const char* fmt, double val) { +static bool upb_JsonEncode_HandleSpecialDoubles(jsonenc* e, double val) { if (val == INFINITY) { jsonenc_putstr(e, "\"Infinity\""); } else if (val == -INFINITY) { @@ -313,18 +314,23 @@ static void jsonenc_double(jsonenc* e, const char* fmt, double val) { } else if (val != val) { jsonenc_putstr(e, "\"NaN\""); } else { - char* p = e->ptr; - jsonenc_printf(e, fmt, val); - - /* printf() is dependent on locales; sadly there is no easy and portable way - * to avoid this. This little post-processing step will translate 1,2 -> 1.2 - * since JSON needs the latter. Arguably a hack, but it is simple and the - * alternatives are far more complicated, platform-dependent, and/or larger - * in code size. */ - for (char* end = e->ptr; p < end; p++) { - if (*p == ',') *p = '.'; - } + return false; } + return true; +} + +static void upb_JsonEncode_Double(jsonenc* e, double val) { + if (upb_JsonEncode_HandleSpecialDoubles(e, val)) return; + char buf[32]; + _upb_EncodeRoundTripDouble(val, buf, sizeof(buf)); + jsonenc_putstr(e, buf); +} + +static void upb_JsonEncode_Float(jsonenc* e, float val) { + if (upb_JsonEncode_HandleSpecialDoubles(e, val)) return; + char buf[32]; + _upb_EncodeRoundTripFloat(val, buf, sizeof(buf)); + jsonenc_putstr(e, buf); } static void jsonenc_wrapper(jsonenc* e, const upb_Message* msg, @@ -517,7 +523,7 @@ static void jsonenc_value(jsonenc* e, const upb_Message* msg, jsonenc_putstr(e, "null"); break; case 2: - jsonenc_double(e, "%.17g", val.double_val); + upb_JsonEncode_Double(e, val.double_val); break; case 3: jsonenc_string(e, val.str_val); @@ -582,10 +588,10 @@ static void jsonenc_scalar(jsonenc* e, upb_MessageValue val, jsonenc_putstr(e, val.bool_val ? "true" : "false"); break; case kUpb_CType_Float: - jsonenc_double(e, "%.9g", val.float_val); + upb_JsonEncode_Float(e, val.float_val); break; case kUpb_CType_Double: - jsonenc_double(e, "%.17g", val.double_val); + upb_JsonEncode_Double(e, val.double_val); break; case kUpb_CType_Int32: jsonenc_printf(e, "%" PRId32, val.int32_val); diff --git a/upb/text_encode.c b/upb/text_encode.c index 5067a27461..9612375dba 100644 --- a/upb/text_encode.c +++ b/upb/text_encode.c @@ -35,6 +35,7 @@ #include #include "upb/reflection.h" +#include "upb/upb_internal.h" // Must be last. #include "upb/port_def.inc" @@ -177,12 +178,18 @@ static void txtenc_field(txtenc* e, upb_MessageValue val, case kUpb_CType_Bool: txtenc_putstr(e, val.bool_val ? "true" : "false"); break; - case kUpb_CType_Float: - txtenc_printf(e, "%f", val.float_val); + case kUpb_CType_Float: { + char buf[32]; + _upb_EncodeRoundTripFloat(val.float_val, buf, sizeof(buf)); + txtenc_putstr(e, buf); break; - case kUpb_CType_Double: - txtenc_printf(e, "%f", val.double_val); + } + case kUpb_CType_Double: { + char buf[32]; + _upb_EncodeRoundTripDouble(val.double_val, buf, sizeof(buf)); + txtenc_putstr(e, buf); break; + } case kUpb_CType_Int32: txtenc_printf(e, "%" PRId32, val.int32_val); break; diff --git a/upb/upb.c b/upb/upb.c index 43ca2fd1fc..a702d48e55 100644 --- a/upb/upb.c +++ b/upb/upb.c @@ -26,6 +26,7 @@ */ #include +#include #include #include #include @@ -326,3 +327,37 @@ bool upb_Arena_Fuse(upb_Arena* a1, upb_Arena* a2) { r2->parent = r1; return true; } + +/* Miscellaneous utilities ****************************************************/ + +static void upb_FixLocale(char* p) { + /* printf() is dependent on locales; sadly there is no easy and portable way + * to avoid this. This little post-processing step will translate 1,2 -> 1.2 + * since JSON needs the latter. Arguably a hack, but it is simple and the + * alternatives are far more complicated, platform-dependent, and/or larger + * in code size. */ + for (; *p; p++) { + if (*p == ',') *p = '.'; + } +} + + +void _upb_EncodeRoundTripDouble(double val, char* buf, size_t size) { + assert(size >= kUpb_RoundTripBufferSize); + snprintf(buf, size, "%.*g", DBL_DIG, val); + if (strtod(buf, NULL) != val) { + snprintf(buf, size, "%.*g", DBL_DIG + 2, val); + assert(strtod(buf, NULL) == val); + } + upb_FixLocale(buf); +} + +void _upb_EncodeRoundTripFloat(float val, char* buf, size_t size) { + assert(size >= kUpb_RoundTripBufferSize); + snprintf(buf, size, "%.*g", FLT_DIG, val); + if (strtof(buf, NULL) != val) { + snprintf(buf, size, "%.*g", FLT_DIG + 3, val); + assert(strtof(buf, NULL) == val); + } + upb_FixLocale(buf); +} diff --git a/upb/upb_internal.h b/upb/upb_internal.h index 74f7c79f0c..d14a175836 100644 --- a/upb/upb_internal.h +++ b/upb/upb_internal.h @@ -55,4 +55,16 @@ struct upb_Arena { mem_block *freelist, *freelist_tail; }; +// Encodes a float or double that is round-trippable, but as short as possible. +// These routines are not fully optimal (not guaranteed to be shortest), but are +// short-ish and match the implementation that has been used in protobuf since +// the beginning. +// +// The given buffer size must be at least kUpb_RoundTripBufferSize. +enum { + kUpb_RoundTripBufferSize = 32 +}; +void _upb_EncodeRoundTripDouble(double val, char* buf, size_t size); +void _upb_EncodeRoundTripFloat(float val, char* buf, size_t size); + #endif /* UPB_INT_H_ */