Use a more general and simpler (but slower) approach in Timestamp.ToDatetime

The attempt at a more optimized approach doesn't round-trip all values of `datetime` on all platforms because `datetime.fromtimestamp(tzinfo)` is limited by the range of values accepted by `time.gmtime`, which can be substantially narrower than `datetime.min` to `datetime.max`. (The documentation notes that either `OverflowError` or `OSError` can be raised in that case, and that often this is limited to 1970 through 2038, versus 1 to 9999. See also https://github.com/python/cpython/issues/110042, the use of `gmtime` here seems unnecessary when the tzinfo supports the entire range.) So, supporting that whole range would require need fallback logic that uses this general approach anyways, which then requires a redundant set of tests for error behavior that amounts to a reimplementation of the whole function. In addition, `datetime.fromtimestamp` doesn't support the full precision of `datetime` (https://github.com/python/cpython/issues/109849), which required adding additional code and an additional assumption (that neither tz offsets were sub-second nor tz changes mid-second). Added test-cases for `datetime.min` in addition to the ones for `datetime.max`. Adjusted the examples and variable names slightly. PiperOrigin-RevId: 569259168
1 year ago · f5e6fb02cc
parent 30b6251dea
commit f5e6fb02cc
2 changed files with 43 additions and 42 deletions
--- a/python/google/protobuf/internal/well_known_types.py
+++ b/python/google/protobuf/internal/well_known_types.py
@ -34,6 +34,11 @@ _MICROS_PER_SECOND = 1000000
 _SECONDS_PER_DAY = 24 * 3600
 _DURATION_SECONDS_MAX = 315576000000

+_EPOCH_DATETIME_NAIVE = datetime.datetime(1970, 1, 1, tzinfo=None)
+_EPOCH_DATETIME_AWARE = _EPOCH_DATETIME_NAIVE.replace(
+    tzinfo=datetime.timezone.utc
+)
+

 class Any(object):
  """Class for Any Message type."""
@ -218,40 +223,21 @@ class Timestamp(object):

      Otherwise, returns a timezone-aware datetime in the input timezone.
    """
-    # This could be made simpler and more efficient if there was a way to
-    # construct a datetime from a microseconds-since-epoch integer. For now, we
-    # can construct the datetime from the timestamp in seconds, then set the
-    # microseconds separately to avoid an unnecessary loss of precision (beyond
-    # truncating nanosecond precision to micro). This ensures that datetimes
-    # round-trip correctly (at least if timezone offset is not sub-second and
-    # does not change mid-second).
-
-    # Take care to handle Timestamps where |nanos| > 1s consistent with previous
-    # behavior.
-    #
-    # TODO: b/301980950 - Instead, strictly check that self.nanos is in the
-    # expected range.
-    seconds = self.seconds + self.nanos // _NANOS_PER_SECOND
+    # Using datetime.fromtimestamp for this would avoid constructing an extra
+    # timedelta object and possibly an extra datetime. Unfortuantely, that has
+    # the disadvantage of not handling the full precision (on all platforms, see
+    # https://github.com/python/cpython/issues/109849) or full range (on some
+    # platforms, see https://github.com/python/cpython/issues/110042) of
+    # datetime.
+    delta = datetime.timedelta(
+        seconds=self.seconds,
+        microseconds=_RoundTowardZero(self.nanos, _NANOS_PER_MICROSECOND),
+    )
    if tzinfo is None:
-      # utcfromtimestamp will be deprecated in 3.12, so avoiding it even though
-      # this requires a call to replace.
-      dt = datetime.datetime.fromtimestamp(
-          seconds, datetime.timezone.utc
-      ).replace(tzinfo=None)
+      return _EPOCH_DATETIME_NAIVE + delta
    else:
-      dt = datetime.datetime.fromtimestamp(seconds, tzinfo)
-    if self.nanos != 0:
-      nanos = _RoundTowardZero(
-          self.nanos % _NANOS_PER_SECOND, _NANOS_PER_MICROSECOND
-      )
-      # This gets the correct result if tzinfo.utcoffset neither affects nor
-      # is affected by dt.microsecond, i.e. the offset is not sub-second and
-      # never changes mid-second. It doesn't violate the contract of tzinfo for
-      # either of those to be the case, though one would hope not to run into
-      # that in a situation where it would matter.
-      if nanos != 0:
-        dt = dt.replace(microsecond=nanos)
-    return dt
+      # Note the tz conversion has to come after the timedelta arithmetic.
+      return (_EPOCH_DATETIME_AWARE + delta).astimezone(tzinfo)

  def FromDatetime(self, dt):
    """Converts datetime to Timestamp.
--- a/python/google/protobuf/internal/well_known_types_test.py
+++ b/python/google/protobuf/internal/well_known_types_test.py
@ -14,10 +14,10 @@ import datetime
 import unittest

 from google.protobuf import any_pb2
+from google.protobuf.internal import any_test_pb2
 from google.protobuf import duration_pb2
 from google.protobuf import struct_pb2
 from google.protobuf import timestamp_pb2
-from google.protobuf.internal import any_test_pb2
 from google.protobuf.internal import well_known_types
 from google.protobuf import text_format
 from google.protobuf.internal import _parameterized
@ -256,10 +256,15 @@ class TimeUtilTest(TimeUtilTestBase):

  def testTimezoneNaiveMaxDatetimeConversion(self):
    ts = timestamp_pb2.Timestamp()
-    naive_end_of_time = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
-    ts.FromDatetime(naive_end_of_time)
-    # TODO Re-enable once windows issue is fixed
-    # self.assertEqual(naive_end_of_time, ts.ToDatetime())
+    naive_max_datetime = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
+    ts.FromDatetime(naive_max_datetime)
+    self.assertEqual(naive_max_datetime, ts.ToDatetime())
+
+  def testTimezoneNaiveMinDatetimeConversion(self):
+    ts = timestamp_pb2.Timestamp()
+    naive_min_datetime = datetime.datetime(1, 1, 1)
+    ts.FromDatetime(naive_min_datetime)
+    self.assertEqual(naive_min_datetime, ts.ToDatetime())

  # Two hours after the Unix Epoch, around the world.
  @_parameterized.named_parameters(
@ -327,14 +332,24 @@ class TimeUtilTest(TimeUtilTestBase):
    self.assertEqual(tz_aware_future, ts.ToDatetime(tz))

  def testTimezoneAwareMaxDatetimeConversion(self):
-    tz = _TZ_PACIFIC
    ts = timestamp_pb2.Timestamp()
-    tz_aware_end_of_time = datetime.datetime(
+    tz_aware_max_datetime = datetime.datetime(
        9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
    )
-    ts.FromDatetime(tz_aware_end_of_time.astimezone(tz))
-    # TODO Re-enable once windows issue is fixed
-    # self.assertEqual(tz_aware_end_of_time, ts.ToDatetime(tz))
+    ts.FromDatetime(tz_aware_max_datetime)
+    self.assertEqual(
+        tz_aware_max_datetime, ts.ToDatetime(datetime.timezone.utc)
+    )
+
+  def testTimezoneAwareMinDatetimeConversion(self):
+    ts = timestamp_pb2.Timestamp()
+    tz_aware_min_datetime = datetime.datetime(
+        1, 1, 1, tzinfo=datetime.timezone.utc
+    )
+    ts.FromDatetime(tz_aware_min_datetime)
+    self.assertEqual(
+        tz_aware_min_datetime, ts.ToDatetime(datetime.timezone.utc)
+    )

  def testNanosOneSecond(self):
    # TODO: b/301980950 - Test error behavior instead once ToDatetime validates