Fix tests (and src) such that all tests pass again.

16 years ago · 946dcf4a5d
parent b07b1165d3
commit 946dcf4a5d
5 changed files with 169 additions and 74 deletions
--- a/1
+++ b/1
@ -16,6 +16,7 @@ clean:
 src/libupb.a: $(OBJ)
 	ar rcs src/libupb.a $(OBJ)
 tests/test_table: src/libupb.a
+tests/tests: src/libupb.a
 tools/upbc: src/libupb.a
 benchmark/benchmark: src/libupb.a benchmark/google_messages.pb.h benchmark/google_messages.pb.o benchmark/benchmark.o
 	$(CXX) $(CPPFLAGS) -o benchmark/benchmark benchmark/google_messages.pb.o benchmark/benchmark.cc src/libupb.a -lm -lprotobuf -lpthread
--- a/26
+++ b/26
@ -4,6 +4,32 @@ upb - a minimalist implementation of protocol buffers.
 - For API documentation, see the header files.
 - To build type "make".

+
+ROADMAP OF THE SOURCE
+=====================
+
+benchmark/
+  Benchmarks of upb and other protocol buffer implementations.
+descriptor/
+  Files that describe the format of Protocol Buffer "descriptors", which are
+  protocol buffers that describe the format of other protocol buffers.  These
+  are used extensively inside upb.
+labs/
+  Code that is not part of upb, but contains efficiency-related experiments
+  about alternate ways of implementing things.  When possible, these are
+  benchmarked by the tests in benchmark/.  We also test these with the tests
+  in tests/, to ensure that the alternate implementations are actually correct.
+src/
+  The core source directory.  builds into src/libupb.a.
+tests/
+  Unit tests.
+tools/
+  Command-line tools like the upb compiler.
+
+
+CONTACT
+=======
+
 Author: Joshua Haberman (joshua@reverberate.org, haberman@google.com)
 See LICENSE for copyright information.

--- a/src/upb_parse.c
+++ b/src/upb_parse.c
@ -35,50 +35,21 @@ struct upb_type_info upb_type_info[] = {

 /* This is called by the inline version of the function if the varint turns out
 * to be >= 2 bytes. */
-upb_status_t upb_get_v_uint64_t_full(uint8_t *restrict buf, uint8_t *end,
-                                     uint64_t *restrict val,
+upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
                                     uint8_t **outbuf)
 {
+  uint8_t *const *maxend = buf + 10;
  uint8_t last = 0x80;
  *val = 0;
-  for(int bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7)
+  int bitpos;
+  for(bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7)
    *val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
-  if(last & 0x80) return UPB_STATUS_NEED_MORE_DATA;
+  if(buf >= end && buf <= maxend && (last & 0x80)) return UPB_STATUS_NEED_MORE_DATA;
+  if(buf > maxend) return UPB_ERROR_UNTERMINATED_VARINT;
  *outbuf = buf;
  return UPB_STATUS_OK;
 }

-static upb_status_t skip_v_uint64_t(uint8_t *buf, uint8_t *end, uint8_t **outbuf)
-{
-  /* TODO: fix and optimize. */
-  uint8_t last = 0x80;
-  for(; buf < end && (last & 0x80); buf++) {
-    last = *buf;
-  }
-
-  if(last & 0x80) {
-    return UPB_ERROR_UNTERMINATED_VARINT;
-  }
-  *outbuf = buf;
-  return UPB_STATUS_OK;
-}
-
-static upb_status_t skip_f_uint32_t(uint8_t *buf, uint8_t *end, uint8_t **outbuf)
-{
-  uint8_t *uint32_end = buf + sizeof(uint32_t);
-  if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA;
-  *outbuf = uint32_end;
-  return UPB_STATUS_OK;
-}
-
-static upb_status_t skip_f_uint64_t(uint8_t *buf, uint8_t *end, uint8_t **outbuf)
-{
-  uint8_t *uint64_end = buf + sizeof(uint64_t);
-  if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA;
-  *outbuf = uint64_end;
-  return UPB_STATUS_OK;
-}
-
 upb_status_t upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt,
                                  union upb_wire_value *wv, uint8_t **outbuf)
 {
@ -94,9 +65,9 @@ static upb_status_t skip_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t
                                    uint8_t **outbuf)
 {
  switch(wt) {
-    case UPB_WIRE_TYPE_VARINT: return skip_v_uint64_t(buf, end, outbuf);
-    case UPB_WIRE_TYPE_64BIT:  return skip_f_uint64_t(buf, end, outbuf);
-    case UPB_WIRE_TYPE_32BIT:  return skip_f_uint32_t(buf, end, outbuf);
+    case UPB_WIRE_TYPE_VARINT: return upb_skip_v_uint64_t(buf, end, outbuf);
+    case UPB_WIRE_TYPE_64BIT:  return upb_skip_f_uint64_t(buf, end, outbuf);
+    case UPB_WIRE_TYPE_32BIT:  return upb_skip_f_uint32_t(buf, end, outbuf);
    case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */
    case UPB_WIRE_TYPE_END_GROUP: return UPB_STATUS_OK;
    default: return UPB_ERROR_ILLEGAL;
--- a/src/upb_parse.h
+++ b/src/upb_parse.h
@ -208,6 +208,38 @@ INLINE upb_status_t upb_get_f_uint64_t(uint8_t *buf, uint8_t *end,
  return UPB_STATUS_OK;
 }

+INLINE upb_status_t upb_skip_v_uint64_t(uint8_t *buf, uint8_t *end,
+                                        uint8_t **outbuf)
+{
+  uint8_t *const maxend = buf + 10;
+  uint8_t last = 0x80;
+  for(; buf < (uint8_t*)end && (last & 0x80); buf++)
+    last = *buf;
+  if(buf >= end && buf <= maxend && (last & 0x80)) return UPB_STATUS_NEED_MORE_DATA;
+  if(buf > maxend) return UPB_ERROR_UNTERMINATED_VARINT;
+  *outbuf = buf;
+  return UPB_STATUS_OK;
+}
+
+INLINE upb_status_t upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end, uint8_t
+                                        **outbuf)
+{
+  uint8_t *uint32_end = buf + sizeof(uint32_t);
+  if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA;
+  *outbuf = uint32_end;
+  return UPB_STATUS_OK;
+}
+
+INLINE upb_status_t upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end, uint8_t
+                                        **outbuf)
+{
+  uint8_t *uint64_end = buf + sizeof(uint64_t);
+  if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA;
+  *outbuf = uint64_end;
+  return UPB_STATUS_OK;
+}
+
+
 /* Functions to read .proto values. *******************************************/

 /* These functions read the appropriate wire value for a given .proto type
--- a/tests/tests.c
+++ b/tests/tests.c
@ -3,14 +3,8 @@
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include "descriptor.c"
-#include "upb_context.c"
-#include "upb_enum.c"
-#include "upb_msg.c"
-#include "upb_parse.c"
-#include "upb_serialize.c"
-#include "upb_table.c"
-#include "upb_text.c"
+#include "upb_parse.h"
+#include "upb_context.h"

 int num_assertions = 0;
 #define ASSERT(expr) do { \
@ -25,10 +19,15 @@ static void test_get_v_uint64_t()
    uint8_t name[] = bytes; \
    uint8_t *name ## _buf = name; \
    uint64_t name ## _val = 0; \
-    status = upb_get_v_uint64_t(name ## _buf, name + sizeof(name), &name ## _val, &name ## _buf); \
+    status = upb_get_v_uint64_t(name, name + sizeof(name) - 1, &name ## _val, &name ## _buf); \
    ASSERT(status == UPB_STATUS_OK); \
    ASSERT(name ## _val == val); \
    ASSERT(name ## _buf == name + sizeof(name) - 1);  /* - 1 for NULL */ \
+    /* Test NEED_MORE_DATA. */ \
+    if(sizeof(name) > 2) { \
+      status = upb_get_v_uint64_t(name, name + sizeof(name) - 2, &name ## _val, &name ## _buf); \
+      ASSERT(status == UPB_STATUS_NEED_MORE_DATA); \
+    } \
  }

  TEST(zero,   "\x00",                                                      0ULL);
@ -42,18 +41,37 @@ static void test_get_v_uint64_t()
  TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01",            0x3fdf9f1e1c181ULL);
  TEST(nineb,  "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03",      0x303fdf9f1e1c181ULL);
  TEST(tenb,   "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL);
+#undef TEST

-  uint8_t elevenbyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01};
-  uint8_t *elevenbyte_buf = elevenbyte;
-  uint64_t elevenbyte_val = 0;
-  upb_status_t status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte), &elevenbyte_val, &elevenbyte_buf);
+  uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
+  uint8_t *twelvebyte_buf = twelvebyte;
+  uint64_t twelvebyte_val = 0;
+  upb_status_t status;
+  /* A varint that terminates before hitting the end of the provided buffer,
+   * but in too many bytes (11 instead of 10). */
+  status = upb_get_v_uint64_t(twelvebyte_buf, twelvebyte + 12, &twelvebyte_val, &twelvebyte_buf);
  ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT);
-  status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-1, &elevenbyte_val, &elevenbyte_buf);
-  /* Byte 10 is 0x80, so we know it's unterminated. */
+
+  /* A varint that terminates simultaneously with the end of the provided
+   * buffer, but in too many bytes (11 instead of 10). */
+  status = upb_get_v_uint64_t(twelvebyte_buf, twelvebyte + 11, &twelvebyte_val, &twelvebyte_buf);
  ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT);
-  status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-2, &elevenbyte_val, &elevenbyte_buf);
+
+  /* A varint whose buffer ends on exactly the byte where the varint must
+   * terminate, but the final byte does not terminate.  The absolutely most
+   * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know
+   * by this point that the varint does not properly terminate.  But we also
+   * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not
+   * compromise overall correctness -- clients who supply more data later will
+   * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no
+   * more data to supply will (rightly) conclude that their protobuf is corrupt.
+   */
+  status = upb_get_v_uint64_t(twelvebyte_buf, twelvebyte + 10, &twelvebyte_val, &twelvebyte_buf);
+  ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT ||
+         status == UPB_STATUS_NEED_MORE_DATA);
+
+  status = upb_get_v_uint64_t(twelvebyte_buf, twelvebyte + 9, &twelvebyte_val, &twelvebyte_buf);
  ASSERT(status == UPB_STATUS_NEED_MORE_DATA);
-#undef TEST
 }

 static void test_get_v_uint32_t()
@ -63,10 +81,15 @@ static void test_get_v_uint32_t()
    uint8_t name[] = bytes; \
    uint8_t *name ## _buf = name; \
    uint32_t name ## _val = 0; \
-    status = upb_get_v_uint32_t(name ## _buf, name + sizeof(name), &name ## _val, &name ## _buf); \
+    status = upb_get_v_uint32_t(name, name + sizeof(name), &name ## _val, &name ## _buf); \
    ASSERT(status == UPB_STATUS_OK); \
    ASSERT(name ## _val == val); \
    ASSERT(name ## _buf == name + sizeof(name) - 1);  /* - 1 for NULL */ \
+    /* Test NEED_MORE_DATA. */ \
+    if(sizeof(name) > 2) { \
+      status = upb_get_v_uint32_t(name, name + sizeof(name) - 2, &name ## _val, &name ## _buf); \
+      ASSERT(status == UPB_STATUS_NEED_MORE_DATA); \
+    } \
  }

  TEST(zero,   "\x00",                                              0UL);
@ -81,18 +104,37 @@ static void test_get_v_uint32_t()
  TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01",         0xf1e1c181UL);
  TEST(nineb,  "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03",     0xf1e1c181UL);
  TEST(tenb,   "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0xf1e1c181UL);
+#undef TEST

-  uint8_t elevenbyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01};
-  uint8_t *elevenbyte_buf = elevenbyte;
-  uint64_t elevenbyte_val = 0;
-  upb_status_t status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte), &elevenbyte_val, &elevenbyte_buf);
+  uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
+  uint8_t *twelvebyte_buf = twelvebyte;
+  uint32_t twelvebyte_val = 0;
+  upb_status_t status;
+  /* A varint that terminates before hitting the end of the provided buffer,
+   * but in too many bytes (11 instead of 10). */
+  status = upb_get_v_uint32_t(twelvebyte_buf, twelvebyte + 12, &twelvebyte_val, &twelvebyte_buf);
  ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT);
-  status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-1, &elevenbyte_val, &elevenbyte_buf);
-  /* Byte 10 is 0x80, so we know it's unterminated. */
+
+  /* A varint that terminates simultaneously with the end of the provided
+   * buffer, but in too many bytes (11 instead of 10). */
+  status = upb_get_v_uint32_t(twelvebyte_buf, twelvebyte + 11, &twelvebyte_val, &twelvebyte_buf);
  ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT);
-  status = upb_get_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-2, &elevenbyte_val, &elevenbyte_buf);
+
+  /* A varint whose buffer ends on exactly the byte where the varint must
+   * terminate, but the final byte does not terminate.  The absolutely most
+   * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know
+   * by this point that the varint does not properly terminate.  But we also
+   * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not
+   * compromise overall correctness -- clients who supply more data later will
+   * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no
+   * more data to supply will (rightly) conclude that their protobuf is corrupt.
+   */
+  status = upb_get_v_uint32_t(twelvebyte_buf, twelvebyte + 10, &twelvebyte_val, &twelvebyte_buf);
+  ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT ||
+         status == UPB_STATUS_NEED_MORE_DATA);
+
+  status = upb_get_v_uint32_t(twelvebyte_buf, twelvebyte + 9, &twelvebyte_val, &twelvebyte_buf);
  ASSERT(status == UPB_STATUS_NEED_MORE_DATA);
-#undef TEST
 }

 static void test_skip_v_uint64_t()
@ -101,9 +143,14 @@ static void test_skip_v_uint64_t()
    upb_status_t status; \
    uint8_t name[] = bytes; \
    uint8_t *name ## _buf = name; \
-    status = skip_v_uint64_t(name ## _buf, name + sizeof(name), &name ## _buf); \
+    status = upb_skip_v_uint64_t(name ## _buf, name + sizeof(name), &name ## _buf); \
    ASSERT(status == UPB_STATUS_OK); \
    ASSERT(name ## _buf == name + sizeof(name) - 1);  /* - 1 for NULL */ \
+    /* Test NEED_MORE_DATA. */ \
+    if(sizeof(name) > 2) { \
+      status = upb_skip_v_uint64_t(name, name + sizeof(name) - 2, &name ## _buf); \
+      ASSERT(status == UPB_STATUS_NEED_MORE_DATA); \
+    } \
  }

  TEST(zero,   "\x00");
@ -117,18 +164,36 @@ static void test_skip_v_uint64_t()
  TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01");
  TEST(nineb,  "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03");
  TEST(tenb,   "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07");
+#undef TEST

-  uint8_t elevenbyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01};
-  uint8_t *elevenbyte_buf = elevenbyte;
-  upb_status_t status = skip_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte), &elevenbyte_buf);
-  printf("%d\n", status);
+  uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
+  uint8_t *twelvebyte_buf = twelvebyte;
+  upb_status_t status;
+  /* A varint that terminates before hitting the end of the provided buffer,
+   * but in too many bytes (11 instead of 10). */
+  status = upb_skip_v_uint64_t(twelvebyte_buf, twelvebyte + 12, &twelvebyte_buf);
  ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT);
-  status = skip_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-1, &elevenbyte_buf);
-  /* Byte 10 is 0x80, so we know it's unterminated. */
+
+  /* A varint that terminates simultaneously with the end of the provided
+   * buffer, but in too many bytes (11 instead of 10). */
+  status = upb_skip_v_uint64_t(twelvebyte_buf, twelvebyte + 11, &twelvebyte_buf);
  ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT);
-  status = skip_v_uint64_t(elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-2, &elevenbyte_buf);
+
+  /* A varint whose buffer ends on exactly the byte where the varint must
+   * terminate, but the final byte does not terminate.  The absolutely most
+   * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know
+   * by this point that the varint does not properly terminate.  But we also
+   * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not
+   * compromise overall correctness -- clients who supply more data later will
+   * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no
+   * more data to supply will (rightly) conclude that their protobuf is corrupt.
+   */
+  status = upb_skip_v_uint64_t(twelvebyte_buf, twelvebyte + 10, &twelvebyte_buf);
+  ASSERT(status == UPB_ERROR_UNTERMINATED_VARINT ||
+         status == UPB_STATUS_NEED_MORE_DATA);
+
+  status = upb_skip_v_uint64_t(twelvebyte_buf, twelvebyte + 9, &twelvebyte_buf);
  ASSERT(status == UPB_STATUS_NEED_MORE_DATA);
-#undef TEST
 }

 static void test_get_f_uint32_t()
@ -150,7 +215,7 @@ static void test_get_f_uint32_t()
  uint8_t threeb[] = {0x00, 0x00, 0x00};
  uint8_t *threeb_buf = threeb;
  uint32_t threeb_val;
-  upb_status_t status = upb_get_f_uint32_t(threeb_buf, threeb + sizeof(threeb), &threeb_val, &threeb_buf);
+  upb_status_t status = upb_get_f_uint32_t(threeb, threeb + sizeof(threeb), &threeb_val, &threeb_buf);
  ASSERT(status == UPB_STATUS_NEED_MORE_DATA);

 #undef TEST