Ported protobuf's dtoa() function for text format and JSON.

3 years ago · bc7b5dcadf
parent 43e36bb49f
commit bc7b5dcadf
5 changed files with 81 additions and 22 deletions
--- a/python/pb_unit_tests/message_test_wrapper.py
+++ b/python/pb_unit_tests/message_test_wrapper.py
@ -35,11 +35,10 @@ message_test.MessageTest.testExtendInt32WithNothing_proto3.__unittest_expecting_
 message_test.MessageTest.testExtendStringWithNothing_proto2.__unittest_expecting_failure__ = True
 message_test.MessageTest.testExtendStringWithNothing_proto3.__unittest_expecting_failure__ = True

-# Our float printing suffers from not having dtoa().
+# Python/C++ customizes the C++ TextFormat to always print trailing ".0" for
+# floats. upb doesn't do this, it matches C++ TextFormat.
 message_test.MessageTest.testFloatPrinting_proto2.__unittest_expecting_failure__ = True
 message_test.MessageTest.testFloatPrinting_proto3.__unittest_expecting_failure__ = True
-message_test.MessageTest.testHighPrecisionDoublePrinting_proto2.__unittest_expecting_failure__ = True
-message_test.MessageTest.testHighPrecisionDoublePrinting_proto3.__unittest_expecting_failure__ = True

 # For these tests we are throwing the correct error, only the text of the error
 # message is a mismatch.  For technical reasons around the limited API, matching
--- a/upb/json_encode.c
+++ b/upb/json_encode.c
@ -38,6 +38,7 @@

 #include "upb/decode.h"
 #include "upb/reflection.h"
+#include "upb/upb_internal.h"

 /* Must be last. */
 #include "upb/port_def.inc"
@ -305,7 +306,7 @@ static void jsonenc_string(jsonenc* e, upb_StringView str) {
  jsonenc_putstr(e, "\"");
 }

-static void jsonenc_double(jsonenc* e, const char* fmt, double val) {
+static bool upb_JsonEncode_HandleSpecialDoubles(jsonenc* e, double val) {
  if (val == INFINITY) {
    jsonenc_putstr(e, "\"Infinity\"");
  } else if (val == -INFINITY) {
@ -313,18 +314,23 @@ static void jsonenc_double(jsonenc* e, const char* fmt, double val) {
  } else if (val != val) {
    jsonenc_putstr(e, "\"NaN\"");
  } else {
-    char* p = e->ptr;
-    jsonenc_printf(e, fmt, val);
-
-    /* printf() is dependent on locales; sadly there is no easy and portable way
-     * to avoid this. This little post-processing step will translate 1,2 -> 1.2
-     * since JSON needs the latter. Arguably a hack, but it is simple and the
-     * alternatives are far more complicated, platform-dependent, and/or larger
-     * in code size. */
-    for (char* end = e->ptr; p < end; p++) {
-      if (*p == ',') *p = '.';
-    }
+    return false;
  }
+  return true;
+}
+
+static void upb_JsonEncode_Double(jsonenc* e, double val) {
+  if (upb_JsonEncode_HandleSpecialDoubles(e, val)) return;
+  char buf[32];
+  _upb_EncodeRoundTripDouble(val, buf, sizeof(buf));
+  jsonenc_putstr(e, buf);
+}
+
+static void upb_JsonEncode_Float(jsonenc* e, float val) {
+  if (upb_JsonEncode_HandleSpecialDoubles(e, val)) return;
+  char buf[32];
+  _upb_EncodeRoundTripFloat(val, buf, sizeof(buf));
+  jsonenc_putstr(e, buf);
 }

 static void jsonenc_wrapper(jsonenc* e, const upb_Message* msg,
@ -517,7 +523,7 @@ static void jsonenc_value(jsonenc* e, const upb_Message* msg,
      jsonenc_putstr(e, "null");
      break;
    case 2:
-      jsonenc_double(e, "%.17g", val.double_val);
+      upb_JsonEncode_Double(e, val.double_val);
      break;
    case 3:
      jsonenc_string(e, val.str_val);
@ -582,10 +588,10 @@ static void jsonenc_scalar(jsonenc* e, upb_MessageValue val,
      jsonenc_putstr(e, val.bool_val ? "true" : "false");
      break;
    case kUpb_CType_Float:
-      jsonenc_double(e, "%.9g", val.float_val);
+      upb_JsonEncode_Float(e, val.float_val);
      break;
    case kUpb_CType_Double:
-      jsonenc_double(e, "%.17g", val.double_val);
+      upb_JsonEncode_Double(e, val.double_val);
      break;
    case kUpb_CType_Int32:
      jsonenc_printf(e, "%" PRId32, val.int32_val);
--- a/upb/text_encode.c
+++ b/upb/text_encode.c
@ -35,6 +35,7 @@
 #include <string.h>

 #include "upb/reflection.h"
+#include "upb/upb_internal.h"

 // Must be last.
 #include "upb/port_def.inc"
@ -177,12 +178,18 @@ static void txtenc_field(txtenc* e, upb_MessageValue val,
    case kUpb_CType_Bool:
      txtenc_putstr(e, val.bool_val ? "true" : "false");
      break;
-    case kUpb_CType_Float:
-      txtenc_printf(e, "%f", val.float_val);
+    case kUpb_CType_Float: {
+      char buf[32];
+      _upb_EncodeRoundTripFloat(val.float_val, buf, sizeof(buf));
+      txtenc_putstr(e, buf);
      break;
-    case kUpb_CType_Double:
-      txtenc_printf(e, "%f", val.double_val);
+    }
+    case kUpb_CType_Double: {
+      char buf[32];
+      _upb_EncodeRoundTripDouble(val.double_val, buf, sizeof(buf));
+      txtenc_putstr(e, buf);
      break;
+    }
    case kUpb_CType_Int32:
      txtenc_printf(e, "%" PRId32, val.int32_val);
      break;
--- a/upb/upb.c
+++ b/upb/upb.c
@ -26,6 +26,7 @@
 */

 #include <errno.h>
+#include <float.h>
 #include <stdarg.h>
 #include <stddef.h>
 #include <stdint.h>
@ -326,3 +327,37 @@ bool upb_Arena_Fuse(upb_Arena* a1, upb_Arena* a2) {
  r2->parent = r1;
  return true;
 }
+
+/* Miscellaneous utilities ****************************************************/
+
+static void upb_FixLocale(char* p) {
+  /* printf() is dependent on locales; sadly there is no easy and portable way
+   * to avoid this. This little post-processing step will translate 1,2 -> 1.2
+   * since JSON needs the latter. Arguably a hack, but it is simple and the
+   * alternatives are far more complicated, platform-dependent, and/or larger
+   * in code size. */
+  for (; *p; p++) {
+    if (*p == ',') *p = '.';
+  }
+}
+
+
+void _upb_EncodeRoundTripDouble(double val, char* buf, size_t size) {
+  assert(size >= kUpb_RoundTripBufferSize);
+  snprintf(buf, size, "%.*g", DBL_DIG, val);
+  if (strtod(buf, NULL) != val) {
+    snprintf(buf, size, "%.*g", DBL_DIG + 2, val);
+    assert(strtod(buf, NULL) == val);
+  }
+  upb_FixLocale(buf);
+}
+
+void _upb_EncodeRoundTripFloat(float val, char* buf, size_t size) {
+  assert(size >= kUpb_RoundTripBufferSize);
+  snprintf(buf, size, "%.*g", FLT_DIG, val);
+  if (strtof(buf, NULL) != val) {
+    snprintf(buf, size, "%.*g", FLT_DIG + 3, val);
+    assert(strtof(buf, NULL) == val);
+  }
+  upb_FixLocale(buf);
+}
--- a/upb/upb_internal.h
+++ b/upb/upb_internal.h
@ -55,4 +55,16 @@ struct upb_Arena {
  mem_block *freelist, *freelist_tail;
 };

+// Encodes a float or double that is round-trippable, but as short as possible.
+// These routines are not fully optimal (not guaranteed to be shortest), but are
+// short-ish and match the implementation that has been used in protobuf since
+// the beginning.
+//
+// The given buffer size must be at least kUpb_RoundTripBufferSize.
+enum {
+  kUpb_RoundTripBufferSize = 32
+};
+void _upb_EncodeRoundTripDouble(double val, char* buf, size_t size);
+void _upb_EncodeRoundTripFloat(float val, char* buf, size_t size);
+
 #endif /* UPB_INT_H_ */