From f0c56f9bc49b5015293eb5242a1982107b7c8b39 Mon Sep 17 00:00:00 2001
From: Samuel Freilich <sfreilich@google.com>
Date: Wed, 27 Sep 2023 17:59:53 -0700
Subject: [PATCH] Ensure Timestamp.ToDatetime(tz) has correct offset

The previous code did not work because timedelta arithmetic does not do timezone conversions. The result of adding a timedelta to a datetime has the same fixed UTC offset as the original datetime. This resulted in the correct timezone not being applied by `Timestamp.ToDatetime(tz)` whenever the UTC offset for the timezone at the represented moment was not the same as the UTC offset for that timezone at the epoch.

Instead, construct the datetime directly from the seconds part of the timestamp. It would be nice to include the nanoseconds as well (truncated to datetime's millisecond precision, but without unnecessary loss of precision). However, that doesn't work, since there isn't a way to construct a datetime from an integer-milliseconds timestamp, just float-seconds, which doesn't allow some datetime values to round-trip correctly. (This does assume that `tzinfo.utcoffset(dt).microseconds` is always zero, and that the value returned by `tzinfo.utcoffset(dt)` doesn't change mid-second. Neither of these is necessarily the case (see https://github.com/python/cpython/issues/49538), though I'd hope they hold in practice.)

This does take some care to still handle non-standard Timestamps where nanos is more than 1,000,000,000 (i.e. more than a second), since previously ToDatetime handled that, as do the other To* conversion methods.

The bug doesn't manifest for UTC (or any fixed-offset timezone), so it can be worked around in a way that will be correct before and after the fix by replacing `ts.ToDatetime(tz)` with `ts.ToDatetime(datetime.timezone.utc).astimezone(tz)`.

PiperOrigin-RevId: 569012931
---
 .../protobuf/internal/well_known_types.py     | 42 +++++++--
 .../internal/well_known_types_test.py         | 94 +++++++++++++++++--
 2 files changed, 118 insertions(+), 18 deletions(-)

diff --git a/python/google/protobuf/internal/well_known_types.py b/python/google/protobuf/internal/well_known_types.py
index ed66a16da9..349e605ac2 100644
--- a/python/google/protobuf/internal/well_known_types.py
+++ b/python/google/protobuf/internal/well_known_types.py
@@ -67,11 +67,6 @@ class Any(object):
     return '/' in self.type_url and self.TypeName() == descriptor.full_name
 
 
-_EPOCH_DATETIME_NAIVE = datetime.datetime.utcfromtimestamp(0)
-_EPOCH_DATETIME_AWARE = datetime.datetime.fromtimestamp(
-    0, tz=datetime.timezone.utc)
-
-
 class Timestamp(object):
   """Class for Timestamp message type."""
 
@@ -223,13 +218,40 @@ class Timestamp(object):
 
       Otherwise, returns a timezone-aware datetime in the input timezone.
     """
-    delta = datetime.timedelta(
-        seconds=self.seconds,
-        microseconds=_RoundTowardZero(self.nanos, _NANOS_PER_MICROSECOND))
+    # This could be made simpler and more efficient if there was a way to
+    # construct a datetime from a microseconds-since-epoch integer. For now, we
+    # can construct the datetime from the timestamp in seconds, then set the
+    # microseconds separately to avoid an unnecessary loss of precision (beyond
+    # truncating nanosecond precision to micro). This ensures that datetimes
+    # round-trip correctly (at least if timezone offset is not sub-second and
+    # does not change mid-second).
+
+    # Take care to handle Timestamps where |nanos| > 1s consistent with previous
+    # behavior.
+    #
+    # TODO: b/301980950 - Instead, strictly check that self.nanos is in the
+    # expected range.
+    seconds = self.seconds + self.nanos // _NANOS_PER_SECOND
     if tzinfo is None:
-      return _EPOCH_DATETIME_NAIVE + delta
+      # utcfromtimestamp will be deprecated in 3.12, so avoiding it even though
+      # this requires a call to replace.
+      dt = datetime.datetime.fromtimestamp(
+          seconds, datetime.timezone.utc
+      ).replace(tzinfo=None)
     else:
-      return _EPOCH_DATETIME_AWARE.astimezone(tzinfo) + delta
+      dt = datetime.datetime.fromtimestamp(seconds, tzinfo)
+    if self.nanos != 0:
+      nanos = _RoundTowardZero(
+          self.nanos % _NANOS_PER_SECOND, _NANOS_PER_MICROSECOND
+      )
+      # This gets the correct result if tzinfo.utcoffset neither affects nor
+      # is affected by dt.microsecond, i.e. the offset is not sub-second and
+      # never changes mid-second. It doesn't violate the contract of tzinfo for
+      # either of those to be the case, though one would hope not to run into
+      # that in a situation where it would matter.
+      if nanos != 0:
+        dt = dt.replace(microsecond=nanos)
+    return dt
 
   def FromDatetime(self, dt):
     """Converts datetime to Timestamp.
diff --git a/python/google/protobuf/internal/well_known_types_test.py b/python/google/protobuf/internal/well_known_types_test.py
index 144ae292e5..dfc9ea5eae 100644
--- a/python/google/protobuf/internal/well_known_types_test.py
+++ b/python/google/protobuf/internal/well_known_types_test.py
@@ -28,9 +28,11 @@ try:
   import zoneinfo  # pylint:disable=g-import-not-at-top
   _TZ_JAPAN = zoneinfo.ZoneInfo('Japan')
   _TZ_PACIFIC = zoneinfo.ZoneInfo('US/Pacific')
+  has_zoneinfo = True
 except ImportError:
   _TZ_JAPAN = datetime.timezone(datetime.timedelta(hours=9), 'Japan')
   _TZ_PACIFIC = datetime.timezone(datetime.timedelta(hours=-8), 'US/Pacific')
+  has_zoneinfo = False
 
 
 class TimeUtilTestBase(_parameterized.TestCase):
@@ -216,20 +218,18 @@ class TimeUtilTest(TimeUtilTestBase):
     message.FromNanoseconds(-1999)
     self.assertEqual(-1, message.ToMicroseconds())
 
-  def testTimezoneNaiveDatetimeConversion(self):
+  def testTimezoneNaiveDatetimeConversionNearEpoch(self):
     message = timestamp_pb2.Timestamp()
     naive_utc_epoch = datetime.datetime(1970, 1, 1)
     message.FromDatetime(naive_utc_epoch)
     self.assertEqual(0, message.seconds)
     self.assertEqual(0, message.nanos)
-
     self.assertEqual(naive_utc_epoch, message.ToDatetime())
 
     naive_epoch_morning = datetime.datetime(1970, 1, 1, 8, 0, 0, 1)
     message.FromDatetime(naive_epoch_morning)
     self.assertEqual(8 * 3600, message.seconds)
     self.assertEqual(1000, message.nanos)
-
     self.assertEqual(naive_epoch_morning, message.ToDatetime())
 
     message.FromMilliseconds(1999)
@@ -239,13 +239,26 @@ class TimeUtilTest(TimeUtilTestBase):
     self.assertEqual(datetime.datetime(1970, 1, 1, 0, 0, 1, 999000),
                      message.ToDatetime())
 
+  def testTimezoneNaiveDatetimeConversionWhereTimestampLosesPrecision(self):
+    ts = timestamp_pb2.Timestamp()
     naive_future = datetime.datetime(2555, 2, 22, 1, 2, 3, 456789)
-    message.FromDatetime(naive_future)
-    self.assertEqual(naive_future, message.ToDatetime())
+    # The float timestamp for this datetime does not represent the integer
+    # millisecond value with full precision.
+    self.assertNotEqual(
+        naive_future.astimezone(datetime.timezone.utc),
+        datetime.datetime.fromtimestamp(
+            naive_future.timestamp(), datetime.timezone.utc
+        ),
+    )
+    # It still round-trips correctly.
+    ts.FromDatetime(naive_future)
+    self.assertEqual(naive_future, ts.ToDatetime())
 
-    naive_end_of_time = datetime.datetime.max
-    message.FromDatetime(naive_end_of_time)
-    self.assertEqual(naive_end_of_time, message.ToDatetime())
+  def testTimezoneNaiveMaxDatetimeConversion(self):
+    ts = timestamp_pb2.Timestamp()
+    naive_end_of_time = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
+    ts.FromDatetime(naive_end_of_time)
+    self.assertEqual(naive_end_of_time, ts.ToDatetime())
 
   # Two hours after the Unix Epoch, around the world.
   @_parameterized.named_parameters(
@@ -279,6 +292,71 @@ class TimeUtilTest(TimeUtilTestBase):
         aware_datetime)
     self.assertEqual(tzinfo, aware_datetime.tzinfo)
 
+  @unittest.skipIf(
+      not has_zoneinfo,
+      'Versions without zoneinfo use a fixed-offset timezone that does not'
+      ' demonstrate this problem.',
+  )
+  def testDatetimeConversionWithDifferentUtcOffsetThanEpoch(self):
+    # This timezone has a different UTC offset at this date than at the epoch.
+    # The datetime returned by FromDatetime needs to have the correct offset
+    # for the moment represented.
+    tz = _TZ_PACIFIC
+    dt = datetime.datetime(2016, 6, 26, tzinfo=tz)
+    epoch_dt = datetime.datetime.fromtimestamp(
+        0, tz=datetime.timezone.utc
+    ).astimezone(tz)
+    self.assertNotEqual(dt.utcoffset(), epoch_dt.utcoffset())
+    ts = timestamp_pb2.Timestamp()
+    ts.FromDatetime(dt)
+    self.assertEqual(dt, ts.ToDatetime(tzinfo=dt.tzinfo))
+
+  def testTimezoneAwareDatetimeConversionWhereTimestampLosesPrecision(self):
+    tz = _TZ_PACIFIC
+    ts = timestamp_pb2.Timestamp()
+    tz_aware_future = datetime.datetime(2555, 2, 22, 1, 2, 3, 456789, tzinfo=tz)
+    # The float timestamp for this datetime does not represent the integer
+    # millisecond value with full precision.
+    self.assertNotEqual(
+        tz_aware_future,
+        datetime.datetime.fromtimestamp(tz_aware_future.timestamp(), tz),
+    )
+    # It still round-trips correctly.
+    ts.FromDatetime(tz_aware_future)
+    self.assertEqual(tz_aware_future, ts.ToDatetime(tz))
+
+  def testTimezoneAwareMaxDatetimeConversion(self):
+    tz = _TZ_PACIFIC
+    ts = timestamp_pb2.Timestamp()
+    tz_aware_end_of_time = datetime.datetime(
+        9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
+    )
+    ts.FromDatetime(tz_aware_end_of_time.astimezone(tz))
+    self.assertEqual(tz_aware_end_of_time, ts.ToDatetime(tz))
+
+  def testNanosOneSecond(self):
+    # TODO: b/301980950 - Test error behavior instead once ToDatetime validates
+    # that nanos are in expected range.
+    tz = _TZ_PACIFIC
+    ts = timestamp_pb2.Timestamp(nanos=1_000_000_000)
+    self.assertEqual(ts.ToDatetime(), datetime.datetime(1970, 1, 1, 0, 0, 1))
+    self.assertEqual(
+        ts.ToDatetime(tz), datetime.datetime(1969, 12, 31, 16, 0, 1, tzinfo=tz)
+    )
+
+  def testNanosNegativeOneSecond(self):
+    # TODO: b/301980950 - Test error behavior instead once ToDatetime validates
+    # that nanos are in expected range.
+    tz = _TZ_PACIFIC
+    ts = timestamp_pb2.Timestamp(nanos=-1_000_000_000)
+    self.assertEqual(
+        ts.ToDatetime(), datetime.datetime(1969, 12, 31, 23, 59, 59)
+    )
+    self.assertEqual(
+        ts.ToDatetime(tz),
+        datetime.datetime(1969, 12, 31, 15, 59, 59, tzinfo=tz),
+    )
+
   def testTimedeltaConversion(self):
     message = duration_pb2.Duration()
     message.FromNanoseconds(1999999999)