From 88b1026d26551b3451eb46dfb65510c0f4ca6588 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 7 Nov 2019 14:10:12 -0800 Subject: [PATCH] Fixed leap year handling by reworking upb_mktime() -> upb_timegm(). The new function name also better reflects the semantics of the function. Like timegm(), this function always converts to/from UTC, not local time. --- generated_for_cmake/upb/json/parser.c | 206 ++++++++++++-------------- upb/json/parser.rl | 68 ++++----- 2 files changed, 123 insertions(+), 151 deletions(-) diff --git a/generated_for_cmake/upb/json/parser.c b/generated_for_cmake/upb/json/parser.c index f72e945881..035ad7c29f 100644 --- a/generated_for_cmake/upb/json/parser.c +++ b/generated_for_cmake/upb/json/parser.c @@ -1689,46 +1689,32 @@ static void start_timestamp_zone(upb_json_parser *p, const char *ptr) { capture_begin(p, ptr); } -#define EPOCH_YEAR 1970 -#define TM_YEAR_BASE 1900 - -static bool isleap(int year) { - return (year % 4) == 0 && (year % 100 != 0 || (year % 400) == 0); -} - -const unsigned short int __mon_yday[2][13] = { - /* Normal years. */ - { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, - /* Leap years. */ - { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } -}; - -int64_t epoch(int year, int yday, int hour, int min, int sec) { - int64_t years = year - EPOCH_YEAR; - - int64_t leap_days = years / 4 - years / 100 + years / 400; - - int64_t days = years * 365 + yday + leap_days; - int64_t hours = days * 24 + hour; - int64_t mins = hours * 60 + min; - int64_t secs = mins * 60 + sec; - return secs; -} - - -static int64_t upb_mktime(const struct tm *tp) { - int sec = tp->tm_sec; - int min = tp->tm_min; - int hour = tp->tm_hour; - int mday = tp->tm_mday; - int mon = tp->tm_mon; - int year = tp->tm_year + TM_YEAR_BASE; - - /* Calculate day of year from year, month, and day of month. */ - int mon_yday = ((__mon_yday[isleap(year)][mon]) - 1); - int yday = mon_yday + mday; - - return epoch(year, yday, hour, min, sec); +static int div_round_up2(int n, int d) { + return (n + d - 1) / d; +} + +/* epoch_days(1970, 1, 1) == 1970-01-01 == 0. */ +static int epoch_days(int year, int month, int day) { + static const uint16_t month_yday[12] = {0, 31, 59, 90, 120, 151, + 181, 212, 243, 273, 304, 334}; + int febs_since_0 = month > 2 ? year + 1 : year; + int leap_days_since_0 = div_round_up2(febs_since_0, 4) - + div_round_up2(febs_since_0, 100) + + div_round_up2(febs_since_0, 400); + int days_since_0 = + 365 * year + month_yday[month - 1] + (day - 1) + leap_days_since_0; + + /* Convert from 0-epoch (0001-01-01 BC) to Unix Epoch (1970-01-01 AD). + * Since the "BC" system does not have a year zero, 1 BC == year zero. */ + return days_since_0 - 719528; +} + +static int64_t upb_timegm(const struct tm *tp) { + int64_t ret = epoch_days(tp->tm_year + 1900, tp->tm_mon + 1, tp->tm_mday); + ret = (ret * 24) + tp->tm_hour; + ret = (ret * 60) + tp->tm_min; + ret = (ret * 60) + tp->tm_sec; + return ret; } static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) { @@ -1758,7 +1744,7 @@ static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) { } /* Normalize tm */ - seconds = upb_mktime(&p->tm); + seconds = upb_timegm(&p->tm); /* Check timestamp boundary */ if (seconds < -62135596800) { @@ -2589,11 +2575,11 @@ static bool does_fieldmask_end(upb_json_parser *p) { * final state once, when the closing '"' is seen. */ -#line 2794 "upb/json/parser.rl" +#line 2780 "upb/json/parser.rl" -#line 2597 "upb/json/parser.c" +#line 2583 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 1, 1, 3, 1, 4, 1, 6, 1, 7, 1, 8, 1, @@ -2848,7 +2834,7 @@ static const int json_en_value_machine = 78; static const int json_en_main = 1; -#line 2797 "upb/json/parser.rl" +#line 2783 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -2871,7 +2857,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, capture_resume(parser, buf); -#line 2875 "upb/json/parser.c" +#line 2861 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -2946,147 +2932,147 @@ _match: switch ( *_acts++ ) { case 1: -#line 2602 "upb/json/parser.rl" +#line 2588 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 2: -#line 2604 "upb/json/parser.rl" +#line 2590 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 23;goto _again;} } break; case 3: -#line 2608 "upb/json/parser.rl" +#line 2594 "upb/json/parser.rl" { start_text(parser, p); } break; case 4: -#line 2609 "upb/json/parser.rl" +#line 2595 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 5: -#line 2615 "upb/json/parser.rl" +#line 2601 "upb/json/parser.rl" { start_hex(parser); } break; case 6: -#line 2616 "upb/json/parser.rl" +#line 2602 "upb/json/parser.rl" { hexdigit(parser, p); } break; case 7: -#line 2617 "upb/json/parser.rl" +#line 2603 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hex(parser)); } break; case 8: -#line 2623 "upb/json/parser.rl" +#line 2609 "upb/json/parser.rl" { CHECK_RETURN_TOP(escape(parser, p)); } break; case 9: -#line 2629 "upb/json/parser.rl" +#line 2615 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 10: -#line 2634 "upb/json/parser.rl" +#line 2620 "upb/json/parser.rl" { start_year(parser, p); } break; case 11: -#line 2635 "upb/json/parser.rl" +#line 2621 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_year(parser, p)); } break; case 12: -#line 2639 "upb/json/parser.rl" +#line 2625 "upb/json/parser.rl" { start_month(parser, p); } break; case 13: -#line 2640 "upb/json/parser.rl" +#line 2626 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_month(parser, p)); } break; case 14: -#line 2644 "upb/json/parser.rl" +#line 2630 "upb/json/parser.rl" { start_day(parser, p); } break; case 15: -#line 2645 "upb/json/parser.rl" +#line 2631 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_day(parser, p)); } break; case 16: -#line 2649 "upb/json/parser.rl" +#line 2635 "upb/json/parser.rl" { start_hour(parser, p); } break; case 17: -#line 2650 "upb/json/parser.rl" +#line 2636 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hour(parser, p)); } break; case 18: -#line 2654 "upb/json/parser.rl" +#line 2640 "upb/json/parser.rl" { start_minute(parser, p); } break; case 19: -#line 2655 "upb/json/parser.rl" +#line 2641 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_minute(parser, p)); } break; case 20: -#line 2659 "upb/json/parser.rl" +#line 2645 "upb/json/parser.rl" { start_second(parser, p); } break; case 21: -#line 2660 "upb/json/parser.rl" +#line 2646 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_second(parser, p)); } break; case 22: -#line 2665 "upb/json/parser.rl" +#line 2651 "upb/json/parser.rl" { start_duration_base(parser, p); } break; case 23: -#line 2666 "upb/json/parser.rl" +#line 2652 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_duration_base(parser, p)); } break; case 24: -#line 2668 "upb/json/parser.rl" +#line 2654 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 25: -#line 2673 "upb/json/parser.rl" +#line 2659 "upb/json/parser.rl" { start_timestamp_base(parser); } break; case 26: -#line 2675 "upb/json/parser.rl" +#line 2661 "upb/json/parser.rl" { start_timestamp_fraction(parser, p); } break; case 27: -#line 2676 "upb/json/parser.rl" +#line 2662 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); } break; case 28: -#line 2678 "upb/json/parser.rl" +#line 2664 "upb/json/parser.rl" { start_timestamp_zone(parser, p); } break; case 29: -#line 2679 "upb/json/parser.rl" +#line 2665 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); } break; case 30: -#line 2681 "upb/json/parser.rl" +#line 2667 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 31: -#line 2686 "upb/json/parser.rl" +#line 2672 "upb/json/parser.rl" { start_fieldmask_path_text(parser, p); } break; case 32: -#line 2687 "upb/json/parser.rl" +#line 2673 "upb/json/parser.rl" { end_fieldmask_path_text(parser, p); } break; case 33: -#line 2692 "upb/json/parser.rl" +#line 2678 "upb/json/parser.rl" { start_fieldmask_path(parser); } break; case 34: -#line 2693 "upb/json/parser.rl" +#line 2679 "upb/json/parser.rl" { end_fieldmask_path(parser); } break; case 35: -#line 2699 "upb/json/parser.rl" +#line 2685 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 36: -#line 2704 "upb/json/parser.rl" +#line 2690 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) { {stack[top++] = cs; cs = 47;goto _again;} @@ -3100,11 +3086,11 @@ _match: } break; case 37: -#line 2717 "upb/json/parser.rl" +#line 2703 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 78;goto _again;} } break; case 38: -#line 2722 "upb/json/parser.rl" +#line 2708 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { start_any_member(parser, p); @@ -3114,11 +3100,11 @@ _match: } break; case 39: -#line 2729 "upb/json/parser.rl" +#line 2715 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_membername(parser)); } break; case 40: -#line 2732 "upb/json/parser.rl" +#line 2718 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { end_any_member(parser, p); @@ -3128,7 +3114,7 @@ _match: } break; case 41: -#line 2743 "upb/json/parser.rl" +#line 2729 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { start_any_object(parser, p); @@ -3138,7 +3124,7 @@ _match: } break; case 42: -#line 2752 "upb/json/parser.rl" +#line 2738 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { CHECK_RETURN_TOP(end_any_object(parser, p)); @@ -3148,54 +3134,54 @@ _match: } break; case 43: -#line 2764 "upb/json/parser.rl" +#line 2750 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 44: -#line 2768 "upb/json/parser.rl" +#line 2754 "upb/json/parser.rl" { end_array(parser); } break; case 45: -#line 2773 "upb/json/parser.rl" +#line 2759 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_number(parser, p)); } break; case 46: -#line 2774 "upb/json/parser.rl" +#line 2760 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 47: -#line 2776 "upb/json/parser.rl" +#line 2762 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 48: -#line 2777 "upb/json/parser.rl" +#line 2763 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 49: -#line 2779 "upb/json/parser.rl" +#line 2765 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, true)); } break; case 50: -#line 2781 "upb/json/parser.rl" +#line 2767 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, false)); } break; case 51: -#line 2783 "upb/json/parser.rl" +#line 2769 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_null(parser)); } break; case 52: -#line 2785 "upb/json/parser.rl" +#line 2771 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject_full(parser)); } break; case 53: -#line 2786 "upb/json/parser.rl" +#line 2772 "upb/json/parser.rl" { end_subobject_full(parser); } break; case 54: -#line 2791 "upb/json/parser.rl" +#line 2777 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 3199 "upb/json/parser.c" +#line 3185 "upb/json/parser.c" } } @@ -3212,32 +3198,32 @@ _again: while ( __nacts-- > 0 ) { switch ( *__acts++ ) { case 0: -#line 2600 "upb/json/parser.rl" +#line 2586 "upb/json/parser.rl" { p--; {cs = stack[--top]; if ( p == pe ) goto _test_eof; goto _again;} } break; case 46: -#line 2774 "upb/json/parser.rl" +#line 2760 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 49: -#line 2779 "upb/json/parser.rl" +#line 2765 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, true)); } break; case 50: -#line 2781 "upb/json/parser.rl" +#line 2767 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, false)); } break; case 51: -#line 2783 "upb/json/parser.rl" +#line 2769 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_null(parser)); } break; case 53: -#line 2786 "upb/json/parser.rl" +#line 2772 "upb/json/parser.rl" { end_subobject_full(parser); } break; -#line 3241 "upb/json/parser.c" +#line 3227 "upb/json/parser.c" } } } @@ -3245,7 +3231,7 @@ goto _again;} } _out: {} } -#line 2819 "upb/json/parser.rl" +#line 2805 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(parser->status, "Parse error at '%.*s'\n", pe - p, p); @@ -3288,13 +3274,13 @@ static void json_parser_reset(upb_json_parser *p) { /* Emit Ragel initialization of the parser. */ -#line 3292 "upb/json/parser.c" +#line 3278 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 2861 "upb/json/parser.rl" +#line 2847 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); diff --git a/upb/json/parser.rl b/upb/json/parser.rl index 2641dda31c..bf38916765 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -1687,46 +1687,32 @@ static void start_timestamp_zone(upb_json_parser *p, const char *ptr) { capture_begin(p, ptr); } -#define EPOCH_YEAR 1970 -#define TM_YEAR_BASE 1900 - -static bool isleap(int year) { - return (year % 4) == 0 && (year % 100 != 0 || (year % 400) == 0); -} - -const unsigned short int __mon_yday[2][13] = { - /* Normal years. */ - { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, - /* Leap years. */ - { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } -}; - -int64_t epoch(int year, int yday, int hour, int min, int sec) { - int64_t years = year - EPOCH_YEAR; - - int64_t leap_days = years / 4 - years / 100 + years / 400; - - int64_t days = years * 365 + yday + leap_days; - int64_t hours = days * 24 + hour; - int64_t mins = hours * 60 + min; - int64_t secs = mins * 60 + sec; - return secs; -} - - -static int64_t upb_mktime(const struct tm *tp) { - int sec = tp->tm_sec; - int min = tp->tm_min; - int hour = tp->tm_hour; - int mday = tp->tm_mday; - int mon = tp->tm_mon; - int year = tp->tm_year + TM_YEAR_BASE; - - /* Calculate day of year from year, month, and day of month. */ - int mon_yday = ((__mon_yday[isleap(year)][mon]) - 1); - int yday = mon_yday + mday; - - return epoch(year, yday, hour, min, sec); +static int div_round_up2(int n, int d) { + return (n + d - 1) / d; +} + +/* epoch_days(1970, 1, 1) == 1970-01-01 == 0. */ +static int epoch_days(int year, int month, int day) { + static const uint16_t month_yday[12] = {0, 31, 59, 90, 120, 151, + 181, 212, 243, 273, 304, 334}; + int febs_since_0 = month > 2 ? year + 1 : year; + int leap_days_since_0 = div_round_up2(febs_since_0, 4) - + div_round_up2(febs_since_0, 100) + + div_round_up2(febs_since_0, 400); + int days_since_0 = + 365 * year + month_yday[month - 1] + (day - 1) + leap_days_since_0; + + /* Convert from 0-epoch (0001-01-01 BC) to Unix Epoch (1970-01-01 AD). + * Since the "BC" system does not have a year zero, 1 BC == year zero. */ + return days_since_0 - 719528; +} + +static int64_t upb_timegm(const struct tm *tp) { + int64_t ret = epoch_days(tp->tm_year + 1900, tp->tm_mon + 1, tp->tm_mday); + ret = (ret * 24) + tp->tm_hour; + ret = (ret * 60) + tp->tm_min; + ret = (ret * 60) + tp->tm_sec; + return ret; } static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) { @@ -1756,7 +1742,7 @@ static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) { } /* Normalize tm */ - seconds = upb_mktime(&p->tm); + seconds = upb_timegm(&p->tm); /* Check timestamp boundary */ if (seconds < -62135596800) {