From 6b808a4072f727f18cd6de86cbc19d10f47170ba Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 3 Jun 2020 11:43:59 -0700 Subject: [PATCH] Fixed all UBSan issues and added UBSan CI checks. --- generated_for_cmake/upb/json/parser.c | 142 +++++++++++++------------- kokoro/ubuntu/build.sh | 4 + upb/def.c | 2 +- upb/encode.c | 12 +-- upb/json/parser.rl | 4 +- upb/json_decode.c | 2 +- upb/json_encode.c | 2 +- upb/msg.c | 11 +- upb/pb/varint.int.h | 3 +- upb/table.c | 4 +- upb/text_encode.c | 2 +- 11 files changed, 98 insertions(+), 90 deletions(-) diff --git a/generated_for_cmake/upb/json/parser.c b/generated_for_cmake/upb/json/parser.c index 1423add9c5..34571880f4 100644 --- a/generated_for_cmake/upb/json/parser.c +++ b/generated_for_cmake/upb/json/parser.c @@ -644,7 +644,9 @@ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, } if (p->accumulated != p->accumulate_buf) { - memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len); + if (p->accumulated_len) { + memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len); + } p->accumulated = p->accumulate_buf; } @@ -2573,11 +2575,11 @@ static bool does_fieldmask_end(upb_json_parser *p) { * final state once, when the closing '"' is seen. */ -#line 2778 "upb/json/parser.rl" +#line 2780 "upb/json/parser.rl" -#line 2581 "upb/json/parser.c" +#line 2583 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 1, 1, 3, 1, 4, 1, 6, 1, 7, 1, 8, 1, @@ -2832,7 +2834,7 @@ static const int json_en_value_machine = 78; static const int json_en_main = 1; -#line 2781 "upb/json/parser.rl" +#line 2783 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -2855,7 +2857,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, capture_resume(parser, buf); -#line 2859 "upb/json/parser.c" +#line 2861 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -2930,147 +2932,147 @@ _match: switch ( *_acts++ ) { case 1: -#line 2586 "upb/json/parser.rl" +#line 2588 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 2: -#line 2588 "upb/json/parser.rl" +#line 2590 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 23;goto _again;} } break; case 3: -#line 2592 "upb/json/parser.rl" +#line 2594 "upb/json/parser.rl" { start_text(parser, p); } break; case 4: -#line 2593 "upb/json/parser.rl" +#line 2595 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 5: -#line 2599 "upb/json/parser.rl" +#line 2601 "upb/json/parser.rl" { start_hex(parser); } break; case 6: -#line 2600 "upb/json/parser.rl" +#line 2602 "upb/json/parser.rl" { hexdigit(parser, p); } break; case 7: -#line 2601 "upb/json/parser.rl" +#line 2603 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hex(parser)); } break; case 8: -#line 2607 "upb/json/parser.rl" +#line 2609 "upb/json/parser.rl" { CHECK_RETURN_TOP(escape(parser, p)); } break; case 9: -#line 2613 "upb/json/parser.rl" +#line 2615 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 10: -#line 2618 "upb/json/parser.rl" +#line 2620 "upb/json/parser.rl" { start_year(parser, p); } break; case 11: -#line 2619 "upb/json/parser.rl" +#line 2621 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_year(parser, p)); } break; case 12: -#line 2623 "upb/json/parser.rl" +#line 2625 "upb/json/parser.rl" { start_month(parser, p); } break; case 13: -#line 2624 "upb/json/parser.rl" +#line 2626 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_month(parser, p)); } break; case 14: -#line 2628 "upb/json/parser.rl" +#line 2630 "upb/json/parser.rl" { start_day(parser, p); } break; case 15: -#line 2629 "upb/json/parser.rl" +#line 2631 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_day(parser, p)); } break; case 16: -#line 2633 "upb/json/parser.rl" +#line 2635 "upb/json/parser.rl" { start_hour(parser, p); } break; case 17: -#line 2634 "upb/json/parser.rl" +#line 2636 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hour(parser, p)); } break; case 18: -#line 2638 "upb/json/parser.rl" +#line 2640 "upb/json/parser.rl" { start_minute(parser, p); } break; case 19: -#line 2639 "upb/json/parser.rl" +#line 2641 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_minute(parser, p)); } break; case 20: -#line 2643 "upb/json/parser.rl" +#line 2645 "upb/json/parser.rl" { start_second(parser, p); } break; case 21: -#line 2644 "upb/json/parser.rl" +#line 2646 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_second(parser, p)); } break; case 22: -#line 2649 "upb/json/parser.rl" +#line 2651 "upb/json/parser.rl" { start_duration_base(parser, p); } break; case 23: -#line 2650 "upb/json/parser.rl" +#line 2652 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_duration_base(parser, p)); } break; case 24: -#line 2652 "upb/json/parser.rl" +#line 2654 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 25: -#line 2657 "upb/json/parser.rl" +#line 2659 "upb/json/parser.rl" { start_timestamp_base(parser); } break; case 26: -#line 2659 "upb/json/parser.rl" +#line 2661 "upb/json/parser.rl" { start_timestamp_fraction(parser, p); } break; case 27: -#line 2660 "upb/json/parser.rl" +#line 2662 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); } break; case 28: -#line 2662 "upb/json/parser.rl" +#line 2664 "upb/json/parser.rl" { start_timestamp_zone(parser, p); } break; case 29: -#line 2663 "upb/json/parser.rl" +#line 2665 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); } break; case 30: -#line 2665 "upb/json/parser.rl" +#line 2667 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 31: -#line 2670 "upb/json/parser.rl" +#line 2672 "upb/json/parser.rl" { start_fieldmask_path_text(parser, p); } break; case 32: -#line 2671 "upb/json/parser.rl" +#line 2673 "upb/json/parser.rl" { end_fieldmask_path_text(parser, p); } break; case 33: -#line 2676 "upb/json/parser.rl" +#line 2678 "upb/json/parser.rl" { start_fieldmask_path(parser); } break; case 34: -#line 2677 "upb/json/parser.rl" +#line 2679 "upb/json/parser.rl" { end_fieldmask_path(parser); } break; case 35: -#line 2683 "upb/json/parser.rl" +#line 2685 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 36: -#line 2688 "upb/json/parser.rl" +#line 2690 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) { {stack[top++] = cs; cs = 47;goto _again;} @@ -3084,11 +3086,11 @@ _match: } break; case 37: -#line 2701 "upb/json/parser.rl" +#line 2703 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 78;goto _again;} } break; case 38: -#line 2706 "upb/json/parser.rl" +#line 2708 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { start_any_member(parser, p); @@ -3098,11 +3100,11 @@ _match: } break; case 39: -#line 2713 "upb/json/parser.rl" +#line 2715 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_membername(parser)); } break; case 40: -#line 2716 "upb/json/parser.rl" +#line 2718 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { end_any_member(parser, p); @@ -3112,7 +3114,7 @@ _match: } break; case 41: -#line 2727 "upb/json/parser.rl" +#line 2729 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { start_any_object(parser, p); @@ -3122,7 +3124,7 @@ _match: } break; case 42: -#line 2736 "upb/json/parser.rl" +#line 2738 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { CHECK_RETURN_TOP(end_any_object(parser, p)); @@ -3132,54 +3134,54 @@ _match: } break; case 43: -#line 2748 "upb/json/parser.rl" +#line 2750 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 44: -#line 2752 "upb/json/parser.rl" +#line 2754 "upb/json/parser.rl" { end_array(parser); } break; case 45: -#line 2757 "upb/json/parser.rl" +#line 2759 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_number(parser, p)); } break; case 46: -#line 2758 "upb/json/parser.rl" +#line 2760 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 47: -#line 2760 "upb/json/parser.rl" +#line 2762 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 48: -#line 2761 "upb/json/parser.rl" +#line 2763 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 49: -#line 2763 "upb/json/parser.rl" +#line 2765 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, true)); } break; case 50: -#line 2765 "upb/json/parser.rl" +#line 2767 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, false)); } break; case 51: -#line 2767 "upb/json/parser.rl" +#line 2769 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_null(parser)); } break; case 52: -#line 2769 "upb/json/parser.rl" +#line 2771 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject_full(parser)); } break; case 53: -#line 2770 "upb/json/parser.rl" +#line 2772 "upb/json/parser.rl" { end_subobject_full(parser); } break; case 54: -#line 2775 "upb/json/parser.rl" +#line 2777 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 3183 "upb/json/parser.c" +#line 3185 "upb/json/parser.c" } } @@ -3196,32 +3198,32 @@ _again: while ( __nacts-- > 0 ) { switch ( *__acts++ ) { case 0: -#line 2584 "upb/json/parser.rl" +#line 2586 "upb/json/parser.rl" { p--; {cs = stack[--top]; if ( p == pe ) goto _test_eof; goto _again;} } break; case 46: -#line 2758 "upb/json/parser.rl" +#line 2760 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 49: -#line 2763 "upb/json/parser.rl" +#line 2765 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, true)); } break; case 50: -#line 2765 "upb/json/parser.rl" +#line 2767 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, false)); } break; case 51: -#line 2767 "upb/json/parser.rl" +#line 2769 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_null(parser)); } break; case 53: -#line 2770 "upb/json/parser.rl" +#line 2772 "upb/json/parser.rl" { end_subobject_full(parser); } break; -#line 3225 "upb/json/parser.c" +#line 3227 "upb/json/parser.c" } } } @@ -3229,7 +3231,7 @@ goto _again;} } _out: {} } -#line 2803 "upb/json/parser.rl" +#line 2805 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(parser->status, "Parse error at '%.*s'\n", pe - p, p); @@ -3272,13 +3274,13 @@ static void json_parser_reset(upb_json_parser *p) { /* Emit Ragel initialization of the parser. */ -#line 3276 "upb/json/parser.c" +#line 3278 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 2845 "upb/json/parser.rl" +#line 2847 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); diff --git a/kokoro/ubuntu/build.sh b/kokoro/ubuntu/build.sh index e73deef0ab..3642f2ce41 100644 --- a/kokoro/ubuntu/build.sh +++ b/kokoro/ubuntu/build.sh @@ -19,4 +19,8 @@ if [[ $(uname) = "Linux" ]]; then # Verify the ASAN build. Have to exclude test_conformance_upb as protobuf # currently leaks memory in the conformance test runner. bazel test --copt=-fsanitize=address --linkopt=-fsanitize=address --test_output=errors -- :all -:test_conformance_upb + + # Verify the UBSan build. Have to exclude Lua as the version we are using + # fails some UBSan tests. + CC=clang CXX=clang++ bazel test -c dbg --copt=-fsanitize=undefined --copt=-fno-sanitize=function,vptr --linkopt=-fsanitize=undefined --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1 -- :all -:test_lua fi diff --git a/upb/def.c b/upb/def.c index 50dccd8ff1..20b2be9969 100644 --- a/upb/def.c +++ b/upb/def.c @@ -18,7 +18,7 @@ static str_t *newstr(upb_alloc *alloc, const char *data, size_t len) { str_t *ret = upb_malloc(alloc, sizeof(*ret) + len); if (!ret) return NULL; ret->len = len; - memcpy(ret->str, data, len); + if (len) memcpy(ret->str, data, len); ret->str[len] = '\0'; return ret; } diff --git a/upb/encode.c b/upb/encode.c index 010f5c5c39..87162325ad 100644 --- a/upb/encode.c +++ b/upb/encode.c @@ -335,13 +335,11 @@ static bool upb_encode_map(upb_encstate *e, const char *field_mem, size_t size; upb_strview key = upb_strtable_iter_key(&i); const upb_value val = upb_strtable_iter_value(&i); - const void *keyp = - map->key_size == UPB_MAPTYPE_STRING ? (void *)&key : key.data; - const void *valp = - map->val_size == UPB_MAPTYPE_STRING ? upb_value_getptr(val) : &val; - - CHK(upb_encode_scalarfield(e, valp, entry, val_field, false)); - CHK(upb_encode_scalarfield(e, keyp, entry, key_field, false)); + upb_map_entry ent; + _upb_map_fromkey(key, &ent.k, map->key_size); + _upb_map_fromvalue(val, &ent.v, map->val_size); + CHK(upb_encode_scalarfield(e, &ent.v, entry, val_field, false)); + CHK(upb_encode_scalarfield(e, &ent.k, entry, key_field, false)); size = (e->limit - e->ptr) - pre_len; CHK(upb_put_varint(e, size)); CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); diff --git a/upb/json/parser.rl b/upb/json/parser.rl index eccd53039f..d5ffb65425 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -642,7 +642,9 @@ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, } if (p->accumulated != p->accumulate_buf) { - memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len); + if (p->accumulated_len) { + memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len); + } p->accumulated = p->accumulate_buf; } diff --git a/upb/json_decode.c b/upb/json_decode.c index e49ee854c8..dd8e58254f 100644 --- a/upb/json_decode.c +++ b/upb/json_decode.c @@ -473,7 +473,7 @@ static void jsondec_skipval(jsondec *d) { /* Base64 decoding for bytes fields. ******************************************/ -static int jsondec_base64_tablelookup(const char ch) { +static unsigned int jsondec_base64_tablelookup(const char ch) { /* Table includes the normal base64 chars plus the URL-safe variant. */ const signed char table[256] = { -1, -1, -1, -1, -1, -1, -1, diff --git a/upb/json_encode.c b/upb/json_encode.c index 04c9594ed7..4bd9b599d9 100644 --- a/upb/json_encode.c +++ b/upb/json_encode.c @@ -52,7 +52,7 @@ static void jsonenc_putbytes(jsonenc *e, const void *data, size_t len) { memcpy(e->ptr, data, len); e->ptr += len; } else { - memcpy(e->ptr, data, have); + if (have) memcpy(e->ptr, data, have); e->ptr += have; e->overflow += (len - have); } diff --git a/upb/msg.c b/upb/msg.c index fd6c59b0b2..25747c8481 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -35,16 +35,17 @@ static size_t upb_msg_sizeof(const upb_msglayout *l) { return l->size + upb_msg_internalsize(l); } -static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { - return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal); +static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) { + ptrdiff_t size = sizeof(upb_msg_internal); + return UPB_PTR_AT(msg, -size, upb_msg_internal); } -static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) { - return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal); +static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { + return (upb_msg_internal*)upb_msg_getinternal_const(msg); } void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l) { - size_t internal = upb_msg_internalsize(l); + ptrdiff_t internal = upb_msg_internalsize(l); void *mem = UPB_PTR_AT(msg, -internal, char); memset(mem, 0, l->size + internal); } diff --git a/upb/pb/varint.int.h b/upb/pb/varint.int.h index 59158d3b75..8ab0d9f141 100644 --- a/upb/pb/varint.int.h +++ b/upb/pb/varint.int.h @@ -111,7 +111,8 @@ UPB_INLINE upb_decoderet upb_vdecode_fast(const char *p) { UPB_INLINE int upb_value_size(uint64_t val) { #ifdef __GNUC__ - int high_bit = 63 - __builtin_clzll(val); /* 0-based, undef if val == 0. */ + /* 0-based, undef if val == 0. */ + int high_bit = val ? 63 - __builtin_clzll(val) : 0; #else int high_bit = 0; uint64_t tmp = val; diff --git a/upb/table.c b/upb/table.c index c5ca3ac49b..084b6a2bbf 100644 --- a/upb/table.c +++ b/upb/table.c @@ -263,7 +263,7 @@ static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) { char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1); if (str == NULL) return 0; memcpy(str, &len, sizeof(uint32_t)); - memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len); + if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len); str[sizeof(uint32_t) + k2.str.len] = '\0'; return (uintptr_t)str; } @@ -277,7 +277,7 @@ static uint32_t strhash(upb_tabkey key) { static bool streql(upb_tabkey k1, lookupkey_t k2) { uint32_t len; char *str = upb_tabstr(k1, &len); - return len == k2.str.len && memcmp(str, k2.str.str, len) == 0; + return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0); } bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) { diff --git a/upb/text_encode.c b/upb/text_encode.c index d426e67ea2..3ebd9cff74 100644 --- a/upb/text_encode.c +++ b/upb/text_encode.c @@ -27,7 +27,7 @@ static void txtenc_putbytes(txtenc *e, const void *data, size_t len) { memcpy(e->ptr, data, len); e->ptr += len; } else { - memcpy(e->ptr, data, have); + if (have) memcpy(e->ptr, data, have); e->ptr += have; e->overflow += (len - have); }