From 51dfbe708fa98782982d70d12e83237d0f04cf6e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:10:05 -0700 Subject: [PATCH 01/13] Make pytz an optional dependency --- .circleci/config.yml | 2 +- .github/workflows/unit-tests.yml | 8 +++--- ci/deps/actions-310-minimum_versions.yaml | 2 +- ci/deps/actions-310.yaml | 2 +- ci/deps/actions-311-downstream_compat.yaml | 2 +- ci/deps/actions-311-numpydev.yaml | 1 - ci/deps/actions-311-pyarrownightly.yaml | 1 - ci/deps/actions-311.yaml | 2 +- ci/deps/actions-312.yaml | 2 +- ci/deps/actions-pypy-39.yaml | 1 - ci/deps/circle-311-arm64.yaml | 2 +- ci/meta.yaml | 1 - doc/source/getting_started/install.rst | 12 +++++++- doc/source/whatsnew/v3.0.0.rst | 31 ++++++++++++++++++++ environment.yml | 2 +- pandas/__init__.py | 2 +- pandas/_libs/tslibs/conversion.pyx | 5 ++-- pandas/_libs/tslibs/strptime.pyx | 6 ++-- pandas/_libs/tslibs/timezones.pyx | 33 ++++++++-------------- pandas/_libs/tslibs/tzconversion.pyx | 20 +++++++------ pandas/compat/_optional.py | 1 + pandas/core/dtypes/dtypes.py | 6 ++-- pandas/io/json/_table_schema.py | 6 ++-- pandas/util/_print_versions.py | 1 - pyproject.toml | 3 +- requirements-dev.txt | 2 +- 26 files changed, 95 insertions(+), 61 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4acc6473e6add..bb7773c3096a3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -54,7 +54,7 @@ jobs: /opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev . ~/virtualenvs/pandas-dev/bin/activate python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1 - python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1 + python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1 python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror" python -m pip list --no-cache-dir export PANDAS_CI=1 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 600ffd56b6d56..7b6635214abe9 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -230,7 +230,7 @@ jobs: . ~/virtualenvs/pandas-dev/bin/activate python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1 python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true" - python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1 + python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1 python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror" python -m pip list --no-cache-dir export PANDAS_CI=1 @@ -268,7 +268,7 @@ jobs: /opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev . ~/virtualenvs/pandas-dev/bin/activate python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1 - python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1 + python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1 python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror" python -m pip list --no-cache-dir @@ -289,7 +289,7 @@ jobs: # In general, this will remain frozen(present, but not running) until: # - The next unreleased Python version has released beta 1 # - This version should be available on GitHub Actions. - # - Our required build/runtime dependencies(numpy, pytz, Cython, python-dateutil) + # - Our required build/runtime dependencies(numpy, Cython, python-dateutil) # support that unreleased Python version. # To unfreeze, comment out the ``if: false`` condition, and make sure you update # the name of the workflow and Python version in actions/setup-python ``python-version:`` @@ -342,7 +342,7 @@ jobs: python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1 python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy python -m pip install versioneer[toml] - python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov + python -m pip install python-dateutil tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov python -m pip install -ve . --no-build-isolation --no-index --no-deps --config-settings=setup-args="--werror" python -m pip list diff --git a/ci/deps/actions-310-minimum_versions.yaml b/ci/deps/actions-310-minimum_versions.yaml index a9c205d24d212..9026ad52df01c 100644 --- a/ci/deps/actions-310-minimum_versions.yaml +++ b/ci/deps/actions-310-minimum_versions.yaml @@ -23,7 +23,6 @@ dependencies: # required dependencies - python-dateutil=2.8.2 - numpy=1.23.5 - - pytz=2020.1 # optional dependencies - beautifulsoup4=4.11.2 @@ -49,6 +48,7 @@ dependencies: - pyreadstat=1.2.0 - pytables=3.8.0 - python-calamine=0.1.7 + - pytz=2023.4 - pyxlsb=1.0.10 - s3fs=2022.11.0 - scipy=1.10.0 diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index ed7dfe1a3c17e..3ee54cc75bfec 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -20,7 +20,6 @@ dependencies: # required dependencies - python-dateutil - numpy - - pytz # optional dependencies - beautifulsoup4>=4.11.2 @@ -46,6 +45,7 @@ dependencies: - pyreadstat>=1.2.0 - pytables>=3.8.0 - python-calamine>=0.1.7 + - pytz>=2023.4 - pyxlsb>=1.0.10 - s3fs>=2022.11.0 - scipy>=1.10.0 diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml index dd1d341c70a9b..d4382b37f338e 100644 --- a/ci/deps/actions-311-downstream_compat.yaml +++ b/ci/deps/actions-311-downstream_compat.yaml @@ -22,7 +22,6 @@ dependencies: # required dependencies - python-dateutil - numpy - - pytz # optional dependencies - beautifulsoup4>=4.11.2 @@ -48,6 +47,7 @@ dependencies: - pyreadstat>=1.2.0 - pytables>=3.8.0 - python-calamine>=0.1.7 + - pytz>=2023.4 - pyxlsb>=1.0.10 - s3fs>=2022.11.0 - scipy>=1.10.0 diff --git a/ci/deps/actions-311-numpydev.yaml b/ci/deps/actions-311-numpydev.yaml index 61a0eabbf133c..68f7ef4e3630e 100644 --- a/ci/deps/actions-311-numpydev.yaml +++ b/ci/deps/actions-311-numpydev.yaml @@ -18,7 +18,6 @@ dependencies: # pandas dependencies - python-dateutil - - pytz - pip - pip: diff --git a/ci/deps/actions-311-pyarrownightly.yaml b/ci/deps/actions-311-pyarrownightly.yaml index 5455b9b84b034..893345a49e3c4 100644 --- a/ci/deps/actions-311-pyarrownightly.yaml +++ b/ci/deps/actions-311-pyarrownightly.yaml @@ -19,7 +19,6 @@ dependencies: # required dependencies - python-dateutil - numpy<2 - - pytz - pip - pip: diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index 388116439f944..b754c8af3da0e 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -20,7 +20,6 @@ dependencies: # required dependencies - python-dateutil - numpy - - pytz # optional dependencies - beautifulsoup4>=4.11.2 @@ -46,6 +45,7 @@ dependencies: - pyreadstat>=1.2.0 - pytables>=3.8.0 - python-calamine>=0.1.7 + - pytz>=2023.4 - pyxlsb>=1.0.10 - s3fs>=2022.11.0 - scipy>=1.10.0 diff --git a/ci/deps/actions-312.yaml b/ci/deps/actions-312.yaml index 1d9f8aa3b092a..3efc13e6101a0 100644 --- a/ci/deps/actions-312.yaml +++ b/ci/deps/actions-312.yaml @@ -20,7 +20,6 @@ dependencies: # required dependencies - python-dateutil - numpy - - pytz # optional dependencies - beautifulsoup4>=4.11.2 @@ -46,6 +45,7 @@ dependencies: - pyreadstat>=1.2.0 - pytables>=3.8.0 - python-calamine>=0.1.7 + - pytz>=2023.4 - pyxlsb>=1.0.10 - s3fs>=2022.11.0 - scipy>=1.10.0 diff --git a/ci/deps/actions-pypy-39.yaml b/ci/deps/actions-pypy-39.yaml index d9c8dd81b7c33..1f1944e5a0909 100644 --- a/ci/deps/actions-pypy-39.yaml +++ b/ci/deps/actions-pypy-39.yaml @@ -22,6 +22,5 @@ dependencies: # required - numpy - python-dateutil - - pytz - pip: - tzdata>=2022.7 diff --git a/ci/deps/circle-311-arm64.yaml b/ci/deps/circle-311-arm64.yaml index 1c31d353699f8..09283d8529ce3 100644 --- a/ci/deps/circle-311-arm64.yaml +++ b/ci/deps/circle-311-arm64.yaml @@ -21,7 +21,6 @@ dependencies: # required dependencies - python-dateutil - numpy - - pytz # optional dependencies - beautifulsoup4>=4.11.2 @@ -47,6 +46,7 @@ dependencies: - pyreadstat>=1.2.0 - pytables>=3.8.0 - python-calamine>=0.1.7 + - pytz>=2023.4 - pyxlsb>=1.0.10 - s3fs>=2022.11.0 - scipy>=1.10.0 diff --git a/ci/meta.yaml b/ci/meta.yaml index aac5593e493b7..9631a41e6cbd7 100644 --- a/ci/meta.yaml +++ b/ci/meta.yaml @@ -37,7 +37,6 @@ requirements: - numpy >=1.21.6 # [py<311] - numpy >=1.23.2 # [py>=311] - python-dateutil >=2.8.2 - - pytz >=2020.1 - python-tzdata >=2022.7 test: diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 86ce05fde547b..c5144c4e4f412 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -205,7 +205,6 @@ Package Minimum support ================================================================ ========================== `NumPy `__ 1.23.5 `python-dateutil `__ 2.8.2 -`pytz `__ 2020.1 `tzdata `__ 2022.7 ================================================================ ========================== @@ -419,3 +418,14 @@ Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= Zstandard 0.19.0 compression Zstandard compression ========================= ================== =============== ============================================================= + +Timezone +^^^^^^^^ + +Installable with ``pip install "pandas[timezone]"`` + +========================= ================== =================== ============================================================= +Dependency Minimum Version pip extra Notes +========================= ================== =================== ============================================================= +pytz 2023.3 timezone Alternative timezone library to ``zoneinfo``. +========================= ================== =================== ============================================================= diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f7039021ff276..d823902227f04 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -213,6 +213,8 @@ Optional libraries below the lowest tested version may still work, but are not c +------------------------+---------------------+ | Package | New Minimum Version | +========================+=====================+ +| pytz | 2023.4 | ++------------------------+---------------------+ | fastparquet | 2023.10.0 | +------------------------+---------------------+ | adbc-driver-postgresql | 0.10.0 | @@ -222,6 +224,35 @@ Optional libraries below the lowest tested version may still work, but are not c See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. +.. _whatsnew_300.api_breaking.pytz: + +``pytz`` now an optional dependency +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas now uses :py:mod:`zoneinfo` from the standard library as the default timezone implementation when passing a timezone +string to various methods. (:issue:`34916`) + +*Old behavior:* + +.. code-block:: ipython + In [1]: ts = pd.Timestamp(2024, 1, 1).tz_localize("US/Pacific") + In [2]: ts.tz + + +*New behavior:* + +.. ipython:: python + ts = pd.Timestamp(2024, 1, 1).tz_localize("US/Pacific") + ts.tz + +``pytz`` timezone objects are still supported when passed directly, but they will no longer be returned by default +from string inputs. Moreover, ``pytz`` is no longer a required dependency of pandas, but can be installed +with the pip extra ``pip install pandas[timezone]``. + + +Additionally, pandas no longer throws ``pytz`` exceptions for timezone operations leading to ambiguous or nonexistent +times. These operations will now yield + .. _whatsnew_300.api_breaking.other: Other API changes diff --git a/environment.yml b/environment.yml index dcc7aa5280b2c..7c8c9ca614747 100644 --- a/environment.yml +++ b/environment.yml @@ -24,7 +24,6 @@ dependencies: # required dependencies - python-dateutil - numpy<2 - - pytz # optional dependencies - beautifulsoup4>=4.11.2 @@ -50,6 +49,7 @@ dependencies: - pyreadstat>=1.2.0 - pytables>=3.8.0 - python-calamine>=0.1.7 + - pytz>=2023.4 - pyxlsb>=1.0.10 - s3fs>=2022.11.0 - scipy>=1.10.0 diff --git a/pandas/__init__.py b/pandas/__init__.py index 3ee6f6abf97bf..05547e50bbb37 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -3,7 +3,7 @@ __docformat__ = "restructuredtext" # Let users know if they're missing any of our hard dependencies -_hard_dependencies = ("numpy", "pytz", "dateutil") +_hard_dependencies = ("numpy", "dateutil") _missing_dependencies = [] for _dependency in _hard_dependencies: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 3a55f5fa0c003..65f95ad2078d7 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -69,6 +69,7 @@ from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.tslibs.timezones cimport ( get_utcoffset, is_utc, + treat_tz_as_pytz, ) from pandas._libs.tslibs.tzconversion cimport ( Localizer, @@ -742,11 +743,11 @@ cdef datetime _localize_pydatetime(datetime dt, tzinfo tz): identically, i.e. discards nanos from Timestamps. It also assumes that the `tz` input is not None. """ - try: + if treat_tz_as_pytz(tz): # datetime.replace with pytz may be incorrect result # TODO: try to respect `fold` attribute return tz.localize(dt, is_dst=None) - except AttributeError: + else: return dt.replace(tzinfo=tz) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 43279051e2a30..ccb1a1d6870f7 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -16,6 +16,7 @@ FUNCTIONS: strptime -- Calculates the time struct represented by the passed-in string """ from datetime import timezone +import zoneinfo from cpython.datetime cimport ( PyDate_Check, @@ -38,7 +39,6 @@ from _thread import allocate_lock as _thread_allocate_lock import re import numpy as np -import pytz cimport numpy as cnp from numpy cimport ( @@ -747,7 +747,7 @@ cdef tzinfo _parse_with_format( week_of_year_start = 0 elif parse_code == 17: # e.g. val='2011-12-30T00:00:00.000000UTC'; fmt='%Y-%m-%dT%H:%M:%S.%f%Z' - tz = pytz.timezone(found_dict["Z"]) + tz = zoneinfo.ZoneInfo(found_dict["Z"]) elif parse_code == 19: # e.g. val='March 1, 2018 12:00:00+0400'; fmt='%B %d, %Y %H:%M:%S%z' tz = parse_timezone_directive(found_dict["z"]) @@ -837,7 +837,7 @@ class TimeRE(_TimeRE): if key == "Z": # lazy computation if self._Z is None: - self._Z = self.__seqToRE(pytz.all_timezones, "Z") + self._Z = self.__seqToRE(zoneinfo.available_timezones(), "Z") # Note: handling Z is the key difference vs using the stdlib # _strptime.TimeRE. test_to_datetime_parse_tzname_or_tzoffset with # fmt='%Y-%m-%d %H:%M:%S %Z' fails with the stdlib version. diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 6292b6ce0fd1d..756bd91bc821d 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -2,17 +2,10 @@ from datetime import ( timedelta, timezone, ) +import zoneinfo from pandas.compat._optional import import_optional_dependency -try: - # py39+ - import zoneinfo - from zoneinfo import ZoneInfo -except ImportError: - zoneinfo = None - ZoneInfo = None - from cpython.datetime cimport ( datetime, timedelta, @@ -28,8 +21,8 @@ from dateutil.tz import ( tzutc as _dateutil_tzutc, ) import numpy as np -import pytz -from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo + +pytz = import_optional_dependency("pytz", errors="ignore") cimport numpy as cnp from numpy cimport int64_t @@ -45,7 +38,7 @@ from pandas._libs.tslibs.util cimport ( cdef int64_t NPY_NAT = get_nat() cdef tzinfo utc_stdlib = timezone.utc -cdef tzinfo utc_pytz = pytz.utc +cdef object utc_pytz = pytz.UTC if pytz else None cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC") # NB: *not* the same as tzutc() cdef tzinfo utc_zoneinfo = None @@ -56,13 +49,13 @@ cdef tzinfo utc_zoneinfo = None cdef bint is_utc_zoneinfo(tzinfo tz): # Workaround for cases with missing tzdata # https://github.com/pandas-dev/pandas/pull/46425#discussion_r830633025 - if tz is None or zoneinfo is None: + if tz is None: return False global utc_zoneinfo if utc_zoneinfo is None: try: - utc_zoneinfo = ZoneInfo("UTC") + utc_zoneinfo = zoneinfo.ZoneInfo("UTC") except zoneinfo.ZoneInfoNotFoundError: return False # Warn if tzdata is too old, even if there is a system tzdata to alert @@ -74,18 +67,16 @@ cdef bint is_utc_zoneinfo(tzinfo tz): cpdef inline bint is_utc(tzinfo tz): return ( - tz is utc_pytz - or tz is utc_stdlib + tz is utc_stdlib or isinstance(tz, _dateutil_tzutc) or tz is utc_dateutil_str or is_utc_zoneinfo(tz) + or (utc_pytz is not None and tz is utc_pytz) ) cdef bint is_zoneinfo(tzinfo tz): - if ZoneInfo is None: - return False - return isinstance(tz, ZoneInfo) + return isinstance(tz, zoneinfo.ZoneInfo) cdef bint is_tzlocal(tzinfo tz): @@ -166,7 +157,7 @@ cpdef inline tzinfo maybe_get_tz(object tz): elif tz == "UTC" or tz == "utc": tz = utc_stdlib else: - tz = pytz.timezone(tz) + tz = zoneinfo.ZoneInfo(tz) elif is_integer_object(tz): tz = timezone(timedelta(seconds=tz)) elif isinstance(tz, tzinfo): @@ -205,7 +196,7 @@ cdef object tz_cache_key(tzinfo tz): the same tz file). Also, pytz objects are not always hashable so we use str(tz) instead. """ - if isinstance(tz, _pytz_BaseTzInfo): + if pytz is not None and isinstance(tz, pytz.tzinfo.BaseTzInfo): return tz.zone elif isinstance(tz, _dateutil_tzfile): if ".tar.gz" in tz._filename: @@ -239,7 +230,7 @@ cpdef inline bint is_fixed_offset(tzinfo tz): return 1 else: return 0 - elif treat_tz_as_pytz(tz): + elif treat_tz_as_pytz(tz) and pytz is not None: if (len(tz._transition_info) == 0 and len(tz._utc_transition_times) == 0): return 1 diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index e3facd3d9599b..c100f315e9a19 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -15,7 +15,6 @@ from cython cimport Py_ssize_t import_datetime() import numpy as np -import pytz cimport numpy as cnp from numpy cimport ( @@ -196,8 +195,8 @@ def tz_localize_to_utc( NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns, ): """ - Localize tzinfo-naive i8 to given time zone (using pytz). If - there are ambiguities in the values, raise AmbiguousTimeError. + Localize tzinfo-naive i8 to given time zone. If + there are ambiguities in the values, raise ValueError. Parameters ---------- @@ -368,7 +367,7 @@ timedelta-like} result[i] = NPY_NAT else: stamp = _render_tstamp(val, creso=creso) - raise pytz.AmbiguousTimeError( + raise ValueError( f"Cannot infer dst time from {stamp}, try using the " "'ambiguous' argument" ) @@ -428,7 +427,10 @@ timedelta-like} result[i] = NPY_NAT else: stamp = _render_tstamp(val, creso=creso) - raise pytz.NonExistentTimeError(stamp) + raise ValueError( + f"{stamp} is a nonexistent time due to daylight savings time. " + "Try using the 'nonexistent' argument." + ) return result.base # .base to get underlying ndarray @@ -631,7 +633,7 @@ cdef ndarray[int64_t] _get_dst_hours( if trans_idx.size == 1: # see test_tz_localize_to_utc_ambiguous_infer stamp = _render_tstamp(vals[trans_idx[0]], creso=creso) - raise pytz.AmbiguousTimeError( + raise ValueError( f"Cannot infer dst time from {stamp} as there " "are no repeated times" ) @@ -653,14 +655,16 @@ cdef ndarray[int64_t] _get_dst_hours( if grp.size == 1 or np.all(delta > 0): # see test_tz_localize_to_utc_ambiguous_infer stamp = _render_tstamp(vals[grp[0]], creso=creso) - raise pytz.AmbiguousTimeError(stamp) + raise ValueError( + f"{stamp} is an ambiguous time and cannot be inferred." + ) # Find the index for the switch and pull from a for dst and b # for standard switch_idxs = (delta <= 0).nonzero()[0] if switch_idxs.size > 1: # see test_tz_localize_to_utc_ambiguous_infer - raise pytz.AmbiguousTimeError( + raise ValueError( f"There are {switch_idxs.size} dst switches when " "there should only be 1." ) diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index b62a4c8dcc8c8..6e82b75879aa1 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -43,6 +43,7 @@ "pyreadstat": "1.2.0", "pytest": "7.3.2", "python-calamine": "0.1.7", + "pytz": "2023.4", "pyxlsb": "1.0.10", "s3fs": "2022.11.0", "scipy": "1.10.0", diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 5213be8b69016..307dd45fd3ee7 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -18,9 +18,9 @@ cast, ) import warnings +import zoneinfo import numpy as np -import pytz from pandas._config.config import get_option @@ -789,7 +789,7 @@ def __init__(self, unit: str_type | DatetimeTZDtype = "ns", tz=None) -> None: tz = timezones.maybe_get_tz(tz) tz = timezones.tz_standardize(tz) elif tz is not None: - raise pytz.UnknownTimeZoneError(tz) + raise zoneinfo.ZoneInfoNotFoundError(tz) if tz is None: raise TypeError("A 'tz' is required.") @@ -882,7 +882,7 @@ def construct_from_string(cls, string: str_type) -> DatetimeTZDtype: return cls(unit=d["unit"], tz=d["tz"]) except (KeyError, TypeError, ValueError) as err: # KeyError if maybe_get_tz tries and fails to get a - # pytz timezone (actually pytz.UnknownTimeZoneError). + # zoneinfo timezone (actually zoneinfo.ZoneInfoNotFoundError). # TypeError if we pass a nonsense tz; # ValueError if we pass a unit other than "ns" raise TypeError(msg) from err diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index b44aecff79779..bb985447d6658 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -144,11 +144,11 @@ def convert_pandas_type_to_json_field(arr) -> dict[str, JSONSerializable]: field["freq"] = dtype.freq.freqstr elif isinstance(dtype, DatetimeTZDtype): if timezones.is_utc(dtype.tz): - # timezone.utc has no "zone" attr field["tz"] = "UTC" else: - # error: "tzinfo" has no attribute "zone" - field["tz"] = dtype.tz.zone # type: ignore[attr-defined] + zone = timezones.get_timezone(dtype.tz) + if isinstance(zone, str): + field["tz"] = zone elif isinstance(dtype, ExtensionDtype): field["extDtype"] = dtype.name return field diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index c4fec39594407..7c853e0f5bcae 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -67,7 +67,6 @@ def _get_dependency_info() -> dict[str, JSONSerializable]: "pandas", # required "numpy", - "pytz", "dateutil", # install / build, "setuptools", diff --git a/pyproject.toml b/pyproject.toml index 661e8efbb95fc..0a3e7175350a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,6 @@ dependencies = [ "numpy>=1.23.5; python_version<'3.12'", "numpy>=1.26.0; python_version>='3.12'", "python-dateutil>=2.8.2", - "pytz>=2020.1", "tzdata>=2022.7" ] classifiers = [ @@ -81,6 +80,7 @@ plot = ['matplotlib>=3.6.3'] output-formatting = ['jinja2>=3.1.2', 'tabulate>=0.9.0'] clipboard = ['PyQt5>=5.15.9', 'qtpy>=2.3.0'] compression = ['zstandard>=0.19.0'] +timezone = ['pytz>=2023.4'] all = ['adbc-driver-postgresql>=0.10.0', 'adbc-driver-sqlite>=0.8.0', 'beautifulsoup4>=4.11.2', @@ -107,6 +107,7 @@ all = ['adbc-driver-postgresql>=0.10.0', 'pytest>=7.3.2', 'pytest-xdist>=2.2.0', 'python-calamine>=0.1.7', + 'pytz>=2023.4', 'pyxlsb>=1.0.10', 'qtpy>=2.3.0', 'scipy>=1.10.0', diff --git a/requirements-dev.txt b/requirements-dev.txt index f5da7f70ccdba..a2dcbb065ba70 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -15,7 +15,6 @@ PyQt5>=5.15.9 coverage python-dateutil numpy<2 -pytz beautifulsoup4>=4.11.2 blosc bottleneck>=1.3.6 @@ -39,6 +38,7 @@ pymysql>=1.0.2 pyreadstat>=1.2.0 tables>=3.8.0 python-calamine>=0.1.7 +pytz>=2023.4 pyxlsb>=1.0.10 s3fs>=2022.11.0 scipy>=1.10.0 From ff76e49544108b1d1f9d848b2e63b9d60b35f616 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:25:14 -0700 Subject: [PATCH 02/13] Start to address tests --- pandas/tests/indexes/datetimes/test_timezones.py | 14 +++++--------- pandas/tests/tseries/offsets/test_dst.py | 6 +++--- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index e4b8a909add0d..8d9340818b511 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -184,11 +184,8 @@ def test_dti_tz_nat(self, tzstr): assert isna(idx[1]) assert idx[0].tzinfo is not None - @pytest.mark.parametrize("tzstr", ["pytz/US/Eastern", "dateutil/US/Eastern"]) + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) def test_utc_box_timestamp_and_localize(self, tzstr): - if tzstr.startswith("pytz/"): - pytest.importorskip("pytz") - tzstr = tzstr.removeprefix("pytz/") tz = timezones.maybe_get_tz(tzstr) rng = date_range("3/11/2012", "3/12/2012", freq="h", tz="utc") @@ -203,11 +200,10 @@ def test_utc_box_timestamp_and_localize(self, tzstr): # right tzinfo rng = date_range("3/13/2012", "3/14/2012", freq="h", tz="utc") rng_eastern = rng.tz_convert(tzstr) - # test not valid for dateutil timezones. - # assert 'EDT' in repr(rng_eastern[0].tzinfo) - assert "EDT" in repr(rng_eastern[0].tzinfo) or "tzfile" in repr( - rng_eastern[0].tzinfo - ) + if "dateutil" in tzstr: + assert "EDT" in repr(rng_eastern[0].tzinfo) or "tzfile" in repr( + rng_eastern[0].tzinfo + ) @pytest.mark.parametrize( "tz", [zoneinfo.ZoneInfo("US/Central"), gettz("US/Central")] diff --git a/pandas/tests/tseries/offsets/test_dst.py b/pandas/tests/tseries/offsets/test_dst.py index dfdc69c0fe18e..a420bda397162 100644 --- a/pandas/tests/tseries/offsets/test_dst.py +++ b/pandas/tests/tseries/offsets/test_dst.py @@ -256,10 +256,10 @@ def test_all_offset_classes(self, tup): ], ) def test_nontick_offset_with_ambiguous_time_error(original_dt, target_dt, offset, tz): - # .apply for non-Tick offsets throws AmbiguousTimeError when the target dt + # .apply for non-Tick offsets throws ValueError when the target dt # is dst-ambiguous - localized_dt = original_dt.tz_localize(pytz.timezone(tz)) + localized_dt = original_dt.tz_localize(tz) msg = f"Cannot infer dst time from {target_dt}, try using the 'ambiguous' argument" - with pytest.raises(pytz.AmbiguousTimeError, match=msg): + with pytest.raises(ValueError, match=msg): localized_dt + offset From 10df94e66236ef62f712825c15c44157d5a32143 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 24 Jun 2024 17:39:40 -0700 Subject: [PATCH 03/13] Fix tests --- doc/source/user_guide/timeseries.rst | 4 ++-- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/_libs/tslibs/conversion.pyx | 8 +++++++- pandas/_libs/tslibs/nattype.pyx | 16 ++++++++-------- pandas/_libs/tslibs/timestamps.pyx | 16 ++++++++-------- pandas/core/arrays/datetimelike.py | 4 ++-- pandas/core/arrays/datetimes.py | 4 ++-- pandas/core/generic.py | 4 ++-- pandas/core/indexes/datetimes.py | 5 ++--- .../datetimes/methods/test_tz_localize.py | 17 ++++++++--------- .../indexes/datetimes/test_constructors.py | 10 +++++----- .../tests/indexes/datetimes/test_date_range.py | 5 ++--- .../scalar/timestamp/methods/test_round.py | 5 ++--- .../timestamp/methods/test_tz_localize.py | 16 ++++++---------- .../tests/scalar/timestamp/test_constructors.py | 11 +++++------ .../tests/series/accessors/test_dt_accessor.py | 5 ++--- pandas/tests/series/methods/test_tz_localize.py | 9 ++++----- pandas/tests/tseries/offsets/test_dst.py | 4 ++-- .../tseries/offsets/test_offsets_properties.py | 5 ++--- pandas/tests/tslibs/test_tzconversion.py | 17 ++++++++++------- 20 files changed, 82 insertions(+), 85 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 0845417e4910d..4299dca4774b9 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2569,7 +2569,7 @@ Ambiguous times when localizing because daylight savings time (DST) in a local time zone causes some times to occur twice within one day ("clocks fall back"). The following options are available: -* ``'raise'``: Raises a ``pytz.AmbiguousTimeError`` (the default behavior) +* ``'raise'``: Raises a ``ValueError`` (the default behavior) * ``'infer'``: Attempt to determine the correct offset base on the monotonicity of the timestamps * ``'NaT'``: Replaces ambiguous times with ``NaT`` * ``bool``: ``True`` represents a DST time, ``False`` represents non-DST time. An array-like of ``bool`` values is supported for a sequence of times. @@ -2604,7 +2604,7 @@ A DST transition may also shift the local time ahead by 1 hour creating nonexist local times ("clocks spring forward"). The behavior of localizing a timeseries with nonexistent times can be controlled by the ``nonexistent`` argument. The following options are available: -* ``'raise'``: Raises a ``pytz.NonExistentTimeError`` (the default behavior) +* ``'raise'``: Raises a ``ValueError`` (the default behavior) * ``'NaT'``: Replaces nonexistent times with ``NaT`` * ``'shift_forward'``: Shifts nonexistent times forward to the closest real time * ``'shift_backward'``: Shifts nonexistent times backward to the closest real time diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d823902227f04..d939652ea7198 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -251,7 +251,7 @@ with the pip extra ``pip install pandas[timezone]``. Additionally, pandas no longer throws ``pytz`` exceptions for timezone operations leading to ambiguous or nonexistent -times. These operations will now yield +times. These cases will now raise a ``ValueError``. .. _whatsnew_300.api_breaking.other: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 65f95ad2078d7..f128ce1de40b4 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -744,9 +744,15 @@ cdef datetime _localize_pydatetime(datetime dt, tzinfo tz): It also assumes that the `tz` input is not None. """ if treat_tz_as_pytz(tz): + import pytz + # datetime.replace with pytz may be incorrect result # TODO: try to respect `fold` attribute - return tz.localize(dt, is_dst=None) + try: + return tz.localize(dt, is_dst=None) + except (pytz.AmbiguousTimeError, pytz.NonExistentTimeError) as err: + # As of pandas 3.0, we raise ValueErrors instead of pytz exceptions + raise ValueError(str(err)) from err else: return dt.replace(tzinfo=tz) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 27a371ef43832..17eb5f64c6d8a 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -1018,7 +1018,7 @@ class NaTType(_NaT): * bool contains flags to determine if time is dst or not (note that this flag is only applicable for ambiguous fall dst dates). * 'NaT' will return NaT for an ambiguous time. - * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + * 'raise' will raise a ValueError for an ambiguous time. nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ timedelta}, default 'raise' @@ -1031,7 +1031,7 @@ timedelta}, default 'raise' closest existing time. * 'NaT' will return NaT where there are nonexistent times. * timedelta objects will shift nonexistent times by the timedelta. - * 'raise' will raise an NonExistentTimeError if there are + * 'raise' will raise a ValueError if there are nonexistent times. Returns @@ -1119,7 +1119,7 @@ timedelta}, default 'raise' * bool contains flags to determine if time is dst or not (note that this flag is only applicable for ambiguous fall dst dates). * 'NaT' will return NaT for an ambiguous time. - * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + * 'raise' will raise a ValueError for an ambiguous time. nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ timedelta}, default 'raise' @@ -1132,7 +1132,7 @@ timedelta}, default 'raise' closest existing time. * 'NaT' will return NaT where there are nonexistent times. * timedelta objects will shift nonexistent times by the timedelta. - * 'raise' will raise an NonExistentTimeError if there are + * 'raise' will raise a ValueError if there are nonexistent times. Raises @@ -1214,7 +1214,7 @@ timedelta}, default 'raise' * bool contains flags to determine if time is dst or not (note that this flag is only applicable for ambiguous fall dst dates). * 'NaT' will return NaT for an ambiguous time. - * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + * 'raise' will raise a ValueError for an ambiguous time. nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ timedelta}, default 'raise' @@ -1227,7 +1227,7 @@ timedelta}, default 'raise' closest existing time. * 'NaT' will return NaT where there are nonexistent times. * timedelta objects will shift nonexistent times by the timedelta. - * 'raise' will raise an NonExistentTimeError if there are + * 'raise' will raise a ValueError if there are nonexistent times. Raises @@ -1378,7 +1378,7 @@ timedelta}, default 'raise' * bool contains flags to determine if time is dst or not (note that this flag is only applicable for ambiguous fall dst dates). * 'NaT' will return NaT for an ambiguous time. - * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + * 'raise' will raise a ValueError for an ambiguous time. nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ default 'raise' @@ -1393,7 +1393,7 @@ default 'raise' closest existing time. * 'NaT' will return NaT where there are nonexistent times. * timedelta objects will shift nonexistent times by the timedelta. - * 'raise' will raise an NonExistentTimeError if there are + * 'raise' will raise a ValueError if there are nonexistent times. Returns diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 93715c907d182..729596736cb73 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -2084,7 +2084,7 @@ class Timestamp(_Timestamp): * bool contains flags to determine if time is dst or not (note that this flag is only applicable for ambiguous fall dst dates). * 'NaT' will return NaT for an ambiguous time. - * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + * 'raise' will raise a ValueError for an ambiguous time. nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ timedelta}, default 'raise' @@ -2097,7 +2097,7 @@ timedelta}, default 'raise' closest existing time. * 'NaT' will return NaT where there are nonexistent times. * timedelta objects will shift nonexistent times by the timedelta. - * 'raise' will raise an NonExistentTimeError if there are + * 'raise' will raise a ValueError if there are nonexistent times. Returns @@ -2187,7 +2187,7 @@ timedelta}, default 'raise' * bool contains flags to determine if time is dst or not (note that this flag is only applicable for ambiguous fall dst dates). * 'NaT' will return NaT for an ambiguous time. - * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + * 'raise' will raise a ValueError for an ambiguous time. nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ timedelta}, default 'raise' @@ -2200,7 +2200,7 @@ timedelta}, default 'raise' closest existing time. * 'NaT' will return NaT where there are nonexistent times. * timedelta objects will shift nonexistent times by the timedelta. - * 'raise' will raise an NonExistentTimeError if there are + * 'raise' will raise a ValueError if there are nonexistent times. Raises @@ -2282,7 +2282,7 @@ timedelta}, default 'raise' * bool contains flags to determine if time is dst or not (note that this flag is only applicable for ambiguous fall dst dates). * 'NaT' will return NaT for an ambiguous time. - * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + * 'raise' will raise a ValueError for an ambiguous time. nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ timedelta}, default 'raise' @@ -2295,7 +2295,7 @@ timedelta}, default 'raise' closest existing time. * 'NaT' will return NaT where there are nonexistent times. * timedelta objects will shift nonexistent times by the timedelta. - * 'raise' will raise an NonExistentTimeError if there are + * 'raise' will raise a ValueError if there are nonexistent times. Raises @@ -2410,7 +2410,7 @@ timedelta}, default 'raise' * bool contains flags to determine if time is dst or not (note that this flag is only applicable for ambiguous fall dst dates). * 'NaT' will return NaT for an ambiguous time. - * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + * 'raise' will raise a ValueError for an ambiguous time. nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ default 'raise' @@ -2425,7 +2425,7 @@ default 'raise' closest existing time. * 'NaT' will return NaT where there are nonexistent times. * timedelta objects will shift nonexistent times by the timedelta. - * 'raise' will raise an NonExistentTimeError if there are + * 'raise' will raise a ValueError if there are nonexistent times. Returns diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index c90ff410b4b93..b2c66c917557c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1780,7 +1780,7 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: a non-DST time (note that this flag is only applicable for ambiguous times) - 'NaT' will return NaT where there are ambiguous times - - 'raise' will raise an AmbiguousTimeError if there are ambiguous + - 'raise' will raise a ValueError if there are ambiguous times. nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, default 'raise' @@ -1793,7 +1793,7 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: closest existing time - 'NaT' will return NaT where there are nonexistent times - timedelta objects will shift nonexistent times by the timedelta - - 'raise' will raise an NonExistentTimeError if there are + - 'raise' will raise a ValueError if there are nonexistent times. Returns diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 34d25f04b69e1..8dda64993c0a6 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -972,7 +972,7 @@ def tz_localize( non-DST time (note that this flag is only applicable for ambiguous times) - 'NaT' will return NaT where there are ambiguous times - - 'raise' will raise an AmbiguousTimeError if there are ambiguous + - 'raise' will raise a ValueError if there are ambiguous times. nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ @@ -986,7 +986,7 @@ def tz_localize( closest existing time - 'NaT' will return NaT where there are nonexistent times - timedelta objects will shift nonexistent times by the timedelta - - 'raise' will raise an NonExistentTimeError if there are + - 'raise' will raise a ValueError if there are nonexistent times. Returns diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b4908ad7a2158..08dd380682f0b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10554,7 +10554,7 @@ def tz_localize( a non-DST time (note that this flag is only applicable for ambiguous times) - 'NaT' will return NaT where there are ambiguous times - - 'raise' will raise an AmbiguousTimeError if there are ambiguous + - 'raise' will raise a ValueError if there are ambiguous times. nonexistent : str, default 'raise' A nonexistent time does not exist in a particular timezone @@ -10566,7 +10566,7 @@ def tz_localize( closest existing time - 'NaT' will return NaT where there are nonexistent times - timedelta objects will shift nonexistent times by the timedelta - - 'raise' will raise an NonExistentTimeError if there are + - 'raise' will raise a ValueError if there are nonexistent times. Returns diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 00a929724ed4c..412ef8a4b1e51 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -6,7 +6,6 @@ import warnings import numpy as np -import pytz from pandas._libs import ( NaT, @@ -162,7 +161,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin): non-DST time (note that this flag is only applicable for ambiguous times) - 'NaT' will return NaT where there are ambiguous times - - 'raise' will raise an AmbiguousTimeError if there are ambiguous times. + - 'raise' will raise a ValueError if there are ambiguous times. dayfirst : bool, default False If True, parse dates in `data` with the day first order. yearfirst : bool, default False @@ -591,7 +590,7 @@ def get_loc(self, key): elif isinstance(key, str): try: parsed, reso = self._parse_with_reso(key) - except (ValueError, pytz.NonExistentTimeError) as err: + except ValueError as err: raise KeyError(key) from err self._disallow_mismatched_indexing(parsed) diff --git a/pandas/tests/indexes/datetimes/methods/test_tz_localize.py b/pandas/tests/indexes/datetimes/methods/test_tz_localize.py index c6697fd169e8a..78a79ac7d1546 100644 --- a/pandas/tests/indexes/datetimes/methods/test_tz_localize.py +++ b/pandas/tests/indexes/datetimes/methods/test_tz_localize.py @@ -9,7 +9,6 @@ from dateutil.tz import gettz import numpy as np import pytest -import pytz from pandas import ( DatetimeIndex, @@ -69,10 +68,10 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self): times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"] index = DatetimeIndex(times) tz = "US/Eastern" - with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)): + with pytest.raises(ValueError, match="|".join(times)): index.tz_localize(tz=tz) - with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)): + with pytest.raises(ValueError, match="|".join(times)): index.tz_localize(tz=tz, nonexistent="raise") result = index.tz_localize(tz=tz, nonexistent="NaT") @@ -85,7 +84,7 @@ def test_dti_tz_localize_ambiguous_infer(self, tz): # November 6, 2011, fall back, repeat 2 AM hour # With no repeated hours, we cannot infer the transition dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour()) - with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): + with pytest.raises(ValueError, match="Cannot infer dst time"): dr.tz_localize(tz) def test_dti_tz_localize_ambiguous_infer2(self, tz, unit): @@ -117,7 +116,7 @@ def test_dti_tz_localize_ambiguous_infer3(self, tz): def test_dti_tz_localize_ambiguous_times(self, tz): # March 13, 2011, spring forward, skip from 2 AM to 3 AM dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, freq=offsets.Hour()) - with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:30:00"): + with pytest.raises(ValueError, match="2011-03-13 02:30:00"): dr.tz_localize(tz) # after dst transition, it works @@ -127,7 +126,7 @@ def test_dti_tz_localize_ambiguous_times(self, tz): # November 6, 2011, fall back, repeat 2 AM hour dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, freq=offsets.Hour()) - with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): + with pytest.raises(ValueError, match="Cannot infer dst time"): dr.tz_localize(tz) # UTC is OK @@ -163,11 +162,11 @@ def test_dti_tz_localize(self, prefix): tm.assert_numpy_array_equal(dti3.values, dti_utc.values) dti = date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="ms") - with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): + with pytest.raises(ValueError, match="Cannot infer dst time"): dti.tz_localize(tzstr) dti = date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="ms") - with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:00:00"): + with pytest.raises(ValueError, match="2011-03-13 02:00:00"): dti.tz_localize(tzstr) def test_dti_tz_localize_utc_conversion(self, tz): @@ -184,7 +183,7 @@ def test_dti_tz_localize_utc_conversion(self, tz): # DST ambiguity, this should fail rng = date_range("3/11/2012", "3/12/2012", freq="30min") # Is this really how it should fail?? - with pytest.raises(pytz.NonExistentTimeError, match="2012-03-11 02:00:00"): + with pytest.raises(ValueError, match="2012-03-11 02:00:00"): rng.tz_localize(tz) def test_dti_tz_localize_roundtrip(self, tz_aware_fixture): diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index aba440ceeb56b..8da88b97f9ea8 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -14,7 +14,6 @@ from dateutil.tz import gettz import numpy as np import pytest -import pytz from pandas._libs.tslibs import ( astype_overflowsafe, @@ -750,7 +749,7 @@ def test_disallow_setting_tz(self): [ None, "America/Los_Angeles", - pytz.timezone("America/Los_Angeles"), + zoneinfo.ZoneInfo("America/Los_Angeles"), Timestamp("2000", tz="America/Los_Angeles").tz, ], ) @@ -765,8 +764,8 @@ def test_constructor_start_end_with_tz(self, tz): freq="D", ) tm.assert_index_equal(result, expected) - # Especially assert that the timezone is consistent for pytz - assert pytz.timezone("America/Los_Angeles") is result.tz + # Especially assert that the timezone is consistent for zoneinfo + assert zoneinfo.ZoneInfo("America/Los_Angeles") is result.tz @pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"]) def test_constructor_with_non_normalized_pytz(self, tz): @@ -984,6 +983,7 @@ def test_dti_ambiguous_matches_timestamp(self, tz, use_str, box_cls, request): # GH#47471 check that we get the same raising behavior in the DTI # constructor and Timestamp constructor if isinstance(tz, str) and tz.startswith("pytz/"): + pytz = pytest.importorskip("pytz") tz = pytz.timezone(tz.removeprefix("pytz/")) dtstr = "2013-11-03 01:59:59.999999" item = dtstr @@ -1000,7 +1000,7 @@ def test_dti_ambiguous_matches_timestamp(self, tz, use_str, box_cls, request): mark = pytest.mark.xfail(reason="We implicitly get fold=0.") request.applymarker(mark) - with pytest.raises(pytz.AmbiguousTimeError, match=dtstr): + with pytest.raises(ValueError, match=dtstr): box_cls(item, tz=tz) @pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"]) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index ee1c906efea73..74d597bd7ff26 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -11,7 +11,6 @@ import numpy as np import pytest -import pytz from pandas._libs.tslibs import timezones from pandas._libs.tslibs.offsets import ( @@ -861,7 +860,7 @@ def test_date_range_ambiguous_endpoint(self, tz): # construction with an ambiguous end-point # GH#11626 - with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): + with pytest.raises(ValueError, match="Cannot infer dst time"): date_range( "2013-10-26 23:00", "2013-10-27 01:00", tz="Europe/London", freq="h" ) @@ -885,7 +884,7 @@ def test_date_range_ambiguous_endpoint(self, tz): def test_date_range_nonexistent_endpoint(self, tz, option, expected): # construction with an nonexistent end-point - with pytest.raises(pytz.NonExistentTimeError, match="2019-03-10 02:00:00"): + with pytest.raises(ValueError, match="2019-03-10 02:00:00"): date_range( "2019-03-10 00:00", "2019-03-10 02:00", tz="US/Pacific", freq="h" ) diff --git a/pandas/tests/scalar/timestamp/methods/test_round.py b/pandas/tests/scalar/timestamp/methods/test_round.py index 2fb0e1a8d3397..944aa55727217 100644 --- a/pandas/tests/scalar/timestamp/methods/test_round.py +++ b/pandas/tests/scalar/timestamp/methods/test_round.py @@ -4,7 +4,6 @@ ) import numpy as np import pytest -import pytz from pandas._libs import lib from pandas._libs.tslibs import ( @@ -182,7 +181,7 @@ def test_round_dst_border_ambiguous(self, method, unit): assert result is NaT msg = "Cannot infer dst time" - with pytest.raises(pytz.AmbiguousTimeError, match=msg): + with pytest.raises(ValueError, match=msg): getattr(ts, method)("h", ambiguous="raise") @pytest.mark.parametrize( @@ -205,7 +204,7 @@ def test_round_dst_border_nonexistent(self, method, ts_str, freq, unit): assert result is NaT msg = "2018-03-11 02:00:00" - with pytest.raises(pytz.NonExistentTimeError, match=msg): + with pytest.raises(ValueError, match=msg): getattr(ts, method)(freq, nonexistent="raise") @pytest.mark.parametrize( diff --git a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py index 90dc8d77608cb..cb7ac5fa6f1da 100644 --- a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py +++ b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py @@ -4,11 +4,6 @@ from dateutil.tz import gettz import pytest -import pytz -from pytz.exceptions import ( - AmbiguousTimeError, - NonExistentTimeError, -) from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.errors import OutOfBoundsDatetime @@ -54,13 +49,14 @@ def test_tz_localize_ambiguous_bool(self, unit, tz): # make sure that we are correctly accepting bool values as ambiguous # GH#14402 if isinstance(tz, str) and tz.startswith("pytz/"): + pytz = pytest.importorskip("pytz") tz = pytz.timezone(tz.removeprefix("pytz/")) ts = Timestamp("2015-11-01 01:00:03").as_unit(unit) expected0 = Timestamp("2015-11-01 01:00:03-0500", tz=tz) expected1 = Timestamp("2015-11-01 01:00:03-0600", tz=tz) msg = "Cannot infer dst time from 2015-11-01 01:00:03" - with pytest.raises(pytz.AmbiguousTimeError, match=msg): + with pytest.raises(ValueError, match=msg): ts.tz_localize(tz) result = ts.tz_localize(tz, ambiguous=True) @@ -105,10 +101,10 @@ def test_tz_localize_ambiguous(self): def test_tz_localize_nonexistent(self, stamp, tz): # GH#13057 ts = Timestamp(stamp) - with pytest.raises(NonExistentTimeError, match=stamp): + with pytest.raises(ValueError, match=stamp): ts.tz_localize(tz) # GH 22644 - with pytest.raises(NonExistentTimeError, match=stamp): + with pytest.raises(ValueError, match=stamp): ts.tz_localize(tz, nonexistent="raise") assert ts.tz_localize(tz, nonexistent="NaT") is NaT @@ -154,7 +150,7 @@ def test_tz_localize_ambiguous_raise(self): # GH#13057 ts = Timestamp("2015-11-1 01:00") msg = "Cannot infer dst time from 2015-11-01 01:00:00," - with pytest.raises(AmbiguousTimeError, match=msg): + with pytest.raises(ValueError, match=msg): ts.tz_localize("US/Pacific", ambiguous="raise") def test_tz_localize_nonexistent_invalid_arg(self, warsaw): @@ -330,7 +326,7 @@ def test_timestamp_tz_localize_nonexistent_raise(self, warsaw, unit): tz = warsaw ts = Timestamp("2015-03-29 02:20:00").as_unit(unit) msg = "2015-03-29 02:20:00" - with pytest.raises(pytz.NonExistentTimeError, match=msg): + with pytest.raises(ValueError, match=msg): ts.tz_localize(tz, nonexistent="raise") msg = ( "The nonexistent argument must be one of 'raise', 'NaT', " diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 39f302c3357de..2c97c4a32e0aa 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -15,7 +15,6 @@ ) import numpy as np import pytest -import pytz from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.errors import OutOfBoundsDatetime @@ -747,7 +746,7 @@ def test_constructor_tz_or_tzinfo(self): tz="UTC", ), Timestamp(2000, 1, 2, 3, 4, 5, 6, None, nanosecond=1), - Timestamp(2000, 1, 2, 3, 4, 5, 6, tz=pytz.UTC, nanosecond=1), + Timestamp(2000, 1, 2, 3, 4, 5, 6, tz=timezone.utc, nanosecond=1), ], ) def test_constructor_nanosecond(self, result): @@ -904,7 +903,7 @@ def test_raise_tz_and_tzinfo_in_datetime_input(self, box): Timestamp(box(**kwargs), tz="US/Pacific") msg = "Cannot pass a datetime or Timestamp" with pytest.raises(ValueError, match=msg): - Timestamp(box(**kwargs), tzinfo=pytz.timezone("US/Pacific")) + Timestamp(box(**kwargs), tzinfo=zoneinfo.ZoneInfo("US/Pacific")) def test_dont_convert_dateutil_utc_to_default_utc(self): result = Timestamp(datetime(2018, 1, 1), tz=tzutc()) @@ -948,7 +947,7 @@ def test_timestamp_constructor_near_dst_boundary(self): assert result == expected msg = "Cannot infer dst time from 2015-10-25 02:00:00" - with pytest.raises(pytz.AmbiguousTimeError, match=msg): + with pytest.raises(ValueError, match=msg): Timestamp("2015-10-25 02:00", tz=tz) result = Timestamp("2017-03-26 01:00", tz="Europe/Paris") @@ -956,7 +955,7 @@ def test_timestamp_constructor_near_dst_boundary(self): assert result == expected msg = "2017-03-26 02:00" - with pytest.raises(pytz.NonExistentTimeError, match=msg): + with pytest.raises(ValueError, match=msg): Timestamp("2017-03-26 02:00", tz="Europe/Paris") # GH#11708 @@ -975,7 +974,7 @@ def test_timestamp_constructor_near_dst_boundary(self): assert result == expected msg = "2017-03-26 02:00" - with pytest.raises(pytz.NonExistentTimeError, match=msg): + with pytest.raises(ValueError, match=msg): Timestamp("2017-03-26 02:00", tz="Europe/Paris") result = Timestamp("2017-03-26 02:00:00+0100", tz="Europe/Paris") diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 49ae0a60e6608..22f4d8e01eee4 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -9,7 +9,6 @@ import numpy as np import pytest -import pytz from pandas._libs.tslibs.timezones import maybe_get_tz @@ -350,7 +349,7 @@ def test_dt_round_tz_ambiguous(self, method): tm.assert_series_equal(result, expected) # raise - with tm.external_error_raised(pytz.AmbiguousTimeError): + with tm.external_error_raised(ValueError): getattr(df1.date.dt, method)("h", ambiguous="raise") @pytest.mark.parametrize( @@ -372,7 +371,7 @@ def test_dt_round_tz_nonexistent(self, method, ts_str, freq): expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz) tm.assert_series_equal(result, expected) - with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"): + with pytest.raises(ValueError, match="2018-03-11 02:00:00"): getattr(ser.dt, method)(freq, nonexistent="raise") @pytest.mark.parametrize("freq", ["ns", "us", "1000us"]) diff --git a/pandas/tests/series/methods/test_tz_localize.py b/pandas/tests/series/methods/test_tz_localize.py index 45620a721f442..53288e8a1f8e7 100644 --- a/pandas/tests/series/methods/test_tz_localize.py +++ b/pandas/tests/series/methods/test_tz_localize.py @@ -1,7 +1,6 @@ from datetime import timezone import pytest -import pytz from pandas._libs.tslibs import timezones @@ -28,7 +27,7 @@ def test_series_tz_localize_ambiguous_bool(self): expected0 = Series([expected0]) expected1 = Series([expected1]) - with tm.external_error_raised(pytz.AmbiguousTimeError): + with tm.external_error_raised(ValueError): ser.dt.tz_localize("US/Central") result = ser.dt.tz_localize("US/Central", ambiguous=True) @@ -79,11 +78,11 @@ def test_tz_localize_nonexistent(self, warsaw, method, exp, unit): df = ser.to_frame() if method == "raise": - with tm.external_error_raised(pytz.NonExistentTimeError): + with tm.external_error_raised(ValueError): dti.tz_localize(tz, nonexistent=method) - with tm.external_error_raised(pytz.NonExistentTimeError): + with tm.external_error_raised(ValueError): ser.tz_localize(tz, nonexistent=method) - with tm.external_error_raised(pytz.NonExistentTimeError): + with tm.external_error_raised(ValueError): df.tz_localize(tz, nonexistent=method) elif exp == "invalid": diff --git a/pandas/tests/tseries/offsets/test_dst.py b/pandas/tests/tseries/offsets/test_dst.py index a420bda397162..e75958843040d 100644 --- a/pandas/tests/tseries/offsets/test_dst.py +++ b/pandas/tests/tseries/offsets/test_dst.py @@ -108,13 +108,13 @@ def _test_offset( "second": "2013-11-03 01:59:01.999999", "microsecond": "2013-11-03 01:59:59.000001", }[offset_name] - with pytest.raises(pytz.AmbiguousTimeError, match=err_msg): + with pytest.raises(ValueError, match=err_msg): tstart + offset # While we're here, let's check that we get the same behavior in a # vectorized path dti = DatetimeIndex([tstart]) warn_msg = "Non-vectorized DateOffset" - with pytest.raises(pytz.AmbiguousTimeError, match=err_msg): + with pytest.raises(ValueError, match=err_msg): with tm.assert_produces_warning(performance_warning, match=warn_msg): dti + offset return diff --git a/pandas/tests/tseries/offsets/test_offsets_properties.py b/pandas/tests/tseries/offsets/test_offsets_properties.py index 99a6a583dd3e9..f9213b3c1cd7b 100644 --- a/pandas/tests/tseries/offsets/test_offsets_properties.py +++ b/pandas/tests/tseries/offsets/test_offsets_properties.py @@ -13,7 +13,6 @@ given, ) import pytest -import pytz import pandas as pd from pandas._testing._hypothesis import ( @@ -34,11 +33,11 @@ def test_on_offset_implementations(dt, offset): # (dt + offset) - offset == dt try: compare = (dt + offset) - offset - except (pytz.NonExistentTimeError, pytz.AmbiguousTimeError): + except ValueError: # When dt + offset does not exist or is DST-ambiguous, assume(False) to # indicate to hypothesis that this is not a valid test case # DST-ambiguous example (GH41906): - # dt = datetime.datetime(1900, 1, 1, tzinfo=pytz.timezone('Africa/Kinshasa')) + # dt=datetime.datetime(1900, 1, 1, tzinfo=zoneinfo.ZoneInfo('Africa/Kinshasa')) # offset = MonthBegin(66) assume(False) diff --git a/pandas/tests/tslibs/test_tzconversion.py b/pandas/tests/tslibs/test_tzconversion.py index c1a56ffb71b02..f32829b4e0b21 100644 --- a/pandas/tests/tslibs/test_tzconversion.py +++ b/pandas/tests/tslibs/test_tzconversion.py @@ -1,6 +1,7 @@ +import zoneinfo + import numpy as np import pytest -import pytz from pandas._libs.tslibs.tzconversion import tz_localize_to_utc @@ -11,13 +12,15 @@ def test_tz_localize_to_utc_ambiguous_infer(self): val = 1_320_541_200_000_000_000 vals = np.array([val, val - 1, val], dtype=np.int64) - with pytest.raises(pytz.AmbiguousTimeError, match="2011-11-06 01:00:00"): - tz_localize_to_utc(vals, pytz.timezone("US/Eastern"), ambiguous="infer") + with pytest.raises(ValueError, match="2011-11-06 01:00:00"): + tz_localize_to_utc(vals, zoneinfo.ZoneInfo("US/Eastern"), ambiguous="infer") - with pytest.raises(pytz.AmbiguousTimeError, match="are no repeated times"): - tz_localize_to_utc(vals[:1], pytz.timezone("US/Eastern"), ambiguous="infer") + with pytest.raises(ValueError, match="are no repeated times"): + tz_localize_to_utc( + vals[:1], zoneinfo.ZoneInfo("US/Eastern"), ambiguous="infer" + ) vals[1] += 1 msg = "There are 2 dst switches when there should only be 1" - with pytest.raises(pytz.AmbiguousTimeError, match=msg): - tz_localize_to_utc(vals, pytz.timezone("US/Eastern"), ambiguous="infer") + with pytest.raises(ValueError, match=msg): + tz_localize_to_utc(vals, zoneinfo.ZoneInfo("US/Eastern"), ambiguous="infer") From b76fca724e29d564639b01bf5a83e1127eb824c4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 24 Jun 2024 17:39:55 -0700 Subject: [PATCH 04/13] Fix tests --- pandas/tests/tseries/offsets/test_offsets_properties.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tseries/offsets/test_offsets_properties.py b/pandas/tests/tseries/offsets/test_offsets_properties.py index f9213b3c1cd7b..943434e515828 100644 --- a/pandas/tests/tseries/offsets/test_offsets_properties.py +++ b/pandas/tests/tseries/offsets/test_offsets_properties.py @@ -37,7 +37,7 @@ def test_on_offset_implementations(dt, offset): # When dt + offset does not exist or is DST-ambiguous, assume(False) to # indicate to hypothesis that this is not a valid test case # DST-ambiguous example (GH41906): - # dt=datetime.datetime(1900, 1, 1, tzinfo=zoneinfo.ZoneInfo('Africa/Kinshasa')) + # dt = datetime.datetime(1900, 1, 1, tzinfo=ZoneInfo('Africa/Kinshasa')) # offset = MonthBegin(66) assume(False) From 7d1b37bd12181e68fbb3505ab0c985a00776faf5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 24 Jun 2024 18:52:26 -0700 Subject: [PATCH 05/13] Fix test, import optional pytz in conftest --- pandas/conftest.py | 48 +++++++++++++++------------------ pandas/tests/test_downstream.py | 2 +- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index c3bfc8c06ad8a..5e105cd508e63 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -32,7 +32,10 @@ import gc import operator import os -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + Any, +) import uuid from dateutil.tz import ( @@ -43,11 +46,8 @@ from hypothesis import strategies as st import numpy as np import pytest -from pytz import ( - FixedOffset, - utc, -) +from pandas.compat._optional import import_optional_dependency import pandas.util._test_decorators as td from pandas.core.dtypes.dtypes import ( @@ -93,12 +93,7 @@ del pa has_pyarrow = True -import zoneinfo - -try: - zoneinfo.ZoneInfo("UTC") -except zoneinfo.ZoneInfoNotFoundError: - zoneinfo = None # type: ignore[assignment] +pytz = import_optional_dependency("pytz", errors="ignore") # ---------------------------------------------------------------- @@ -1196,19 +1191,19 @@ def deco(*args): "UTC-02:15", tzutc(), tzlocal(), - FixedOffset(300), - FixedOffset(0), - FixedOffset(-300), timezone.utc, timezone(timedelta(hours=1)), timezone(timedelta(hours=-1), name="foo"), ] -if zoneinfo is not None: +if pytz is not None: TIMEZONES.extend( - [ - zoneinfo.ZoneInfo("US/Pacific"), # type: ignore[list-item] - zoneinfo.ZoneInfo("UTC"), # type: ignore[list-item] - ] + ( + pytz.FixedOffset(300), + pytz.FixedOffset(0), + pytz.FixedOffset(-300), + pytz.timezone("US/Pacific"), + pytz.timezone("UTC"), + ) ) TIMEZONE_IDS = [repr(i) for i in TIMEZONES] @@ -1231,9 +1226,10 @@ def tz_aware_fixture(request): return request.param -_UTCS = ["utc", "dateutil/UTC", utc, tzutc(), timezone.utc] -if zoneinfo is not None: - _UTCS.append(zoneinfo.ZoneInfo("UTC")) +_UTCS = ["utc", "dateutil/UTC", tzutc(), timezone.utc] + +if pytz is not None: + _UTCS.append(pytz.utc) @pytest.fixture(params=_UTCS) @@ -1995,12 +1991,12 @@ def using_infer_string() -> bool: return pd.options.future.infer_string is True -warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"] -if zoneinfo is not None: - warsaws.append(zoneinfo.ZoneInfo("Europe/Warsaw")) # type: ignore[arg-type] +_warsaws: list[Any] = ["Europe/Warsaw", "dateutil/Europe/Warsaw"] +if pytz is not None: + _warsaws.append(pytz.timezone("Europe/Warsaw")) # type: ignore[arg-type] -@pytest.fixture(params=warsaws) +@pytest.fixture(params=_warsaws) def warsaw(request) -> str: """ tzinfo for Europe/Warsaw using pytz, dateutil, or zoneinfo. diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index ee26fdae74960..18df76ddd8ed8 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -218,7 +218,7 @@ def test_missing_required_dependency(): subprocess.check_output(call, stderr=subprocess.STDOUT) output = exc.value.stdout.decode() - for name in ["numpy", "pytz", "dateutil"]: + for name in ["numpy", "dateutil"]: assert name in output From a6d703e9e812e4454406543606f45cdb657b23f4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 24 Jun 2024 18:54:27 -0700 Subject: [PATCH 06/13] Fix formatting --- doc/source/whatsnew/v3.0.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d939652ea7198..c8b1887be0de9 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -235,6 +235,7 @@ string to various methods. (:issue:`34916`) *Old behavior:* .. code-block:: ipython + In [1]: ts = pd.Timestamp(2024, 1, 1).tz_localize("US/Pacific") In [2]: ts.tz @@ -242,6 +243,7 @@ string to various methods. (:issue:`34916`) *New behavior:* .. ipython:: python + ts = pd.Timestamp(2024, 1, 1).tz_localize("US/Pacific") ts.tz From 215398775ed6c324130dd61cfadadcbd2c118890 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 24 Jun 2024 18:55:38 -0700 Subject: [PATCH 07/13] Change minimum --- doc/source/getting_started/install.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index c5144c4e4f412..8e6cb9e9a132d 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -427,5 +427,5 @@ Installable with ``pip install "pandas[timezone]"`` ========================= ================== =================== ============================================================= Dependency Minimum Version pip extra Notes ========================= ================== =================== ============================================================= -pytz 2023.3 timezone Alternative timezone library to ``zoneinfo``. +pytz 2023.4 timezone Alternative timezone library to ``zoneinfo``. ========================= ================== =================== ============================================================= From ee34f5ae636821b346e0f42e0b29fee340675905 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 24 Jun 2024 19:59:50 -0700 Subject: [PATCH 08/13] remove type ignore --- pandas/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 5e105cd508e63..9dd9cc5b43fda 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1993,7 +1993,7 @@ def using_infer_string() -> bool: _warsaws: list[Any] = ["Europe/Warsaw", "dateutil/Europe/Warsaw"] if pytz is not None: - _warsaws.append(pytz.timezone("Europe/Warsaw")) # type: ignore[arg-type] + _warsaws.append(pytz.timezone("Europe/Warsaw")) @pytest.fixture(params=_warsaws) From 74263d3dc1fd7db66b7d8635b63d6c2780fce84e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 24 Jun 2024 20:17:06 -0700 Subject: [PATCH 09/13] another pa under 17 --- pandas/tests/io/test_parquet.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 930df8abea30f..6f6781cbab0e8 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -952,7 +952,10 @@ def test_timestamp_nanoseconds(self, pa): def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): pytest.importorskip("pyarrow", "11.0.0") - if timezone_aware_date_list.tzinfo != datetime.timezone.utc: + if ( + timezone_aware_date_list.tzinfo != datetime.timezone.utc + and pa_version_under17p0 + ): request.applymarker( pytest.mark.xfail( reason="temporary skip this test until it is properly resolved: " From 6690eaaa4a63fb3a15a69b9392876a11887e147d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 26 Jun 2024 13:53:21 -0700 Subject: [PATCH 10/13] Address comments --- pandas/_libs/tslibs/timezones.pyx | 5 +++-- .../io/data/excel/test_boolean_types.xlsx | Bin 5279 -> 5280 bytes 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 756bd91bc821d..36b644ffc826d 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -38,10 +38,11 @@ from pandas._libs.tslibs.util cimport ( cdef int64_t NPY_NAT = get_nat() cdef tzinfo utc_stdlib = timezone.utc -cdef object utc_pytz = pytz.UTC if pytz else None +cdef tzinfo utc_pytz = pytz.UTC if pytz else None cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC") # NB: *not* the same as tzutc() cdef tzinfo utc_zoneinfo = None +cdef type ZoneInfo = zoneinfo.ZoneInfo # ---------------------------------------------------------------------- @@ -76,7 +77,7 @@ cpdef inline bint is_utc(tzinfo tz): cdef bint is_zoneinfo(tzinfo tz): - return isinstance(tz, zoneinfo.ZoneInfo) + return isinstance(tz, ZoneInfo) cdef bint is_tzlocal(tzinfo tz): diff --git a/pandas/tests/io/data/excel/test_boolean_types.xlsx b/pandas/tests/io/data/excel/test_boolean_types.xlsx index 234703c32f0abe61516c3e44aa35275242d14f08..632f7c86b6a26537b6444f7a181c34ef254044be 100644 GIT binary patch delta 334 zcmV-U0kQs{DWEB^9}fkWxt{DLlOzu-e=a5n+kv*^)IgyrExie8lZaYBNNOFweHABh z638jX)y#f7yQrS;RG0jL^U~-BKnJ}@m|vlL*qDa@(M72$o{(`4$xOS#;AebVLe1A?>g0Mu6ZLI#8g4$9b0Hvp0ZK{$|6dTsV~sgN2*clQ_9|UQWio@3X!f ztucR&zm~7>rc@B15ir?1d~w| g8nc2Bnga!wxt{DLlOYsT0cDeR6e9)-5&!@I04ArJB>(^b delta 325 zcmV-L0lNO6DW55@9}fi$(B>2*lOzu-e+i)o+kv*^)IgywExie8lZaYBNNOFweHABh z67VU<)y#f7yQp05Rh#^Q^U~-F|vlL*qDa$BKY2$o{(`3@BGS#;Xy&hf{GtbtyRI4p=a*F7b5{57#mu*73v}3F4`b`o!C5kz*f-BnF_z zxM$^a_WaVUP+gE>o{~jc%oAwKgX~^BgGvJu}_7l|qvt Date: Mon, 1 Jul 2024 12:16:57 -0700 Subject: [PATCH 11/13] Undo file --- .../io/data/excel/test_boolean_types.xlsx | Bin 5280 -> 5279 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/pandas/tests/io/data/excel/test_boolean_types.xlsx b/pandas/tests/io/data/excel/test_boolean_types.xlsx index 632f7c86b6a26537b6444f7a181c34ef254044be..234703c32f0abe61516c3e44aa35275242d14f08 100644 GIT binary patch delta 325 zcmV-L0lNO6DW55@9}fi$(B>2*lOzu-e+i)o+kv*^)IgywExie8lZaYBNNOFweHABh z67VU<)y#f7yQp05Rh#^Q^U~-F|vlL*qDa$BKY2$o{(`3@BGS#;Xy&hf{GtbtyRI4p=a*F7b5{57#mu*73v}3F4`b`o!C5kz*f-BnF_z zxM$^a_WaVUP+gE>o{~jc%oAwKgX~^BgGvJu}_7l|qvtKnJ}@m|vlL*qDa@(M72$o{(`4$xOS#;AebVLe1A?>g0Mu6ZLI#8g4$9b0Hvp0ZK{$|6dTsV~sgN2*clQ_9|UQWio@3X!f ztucR&zm~7>rc@B15ir?1d~w| g8nc2Bnga!wxt{DLlOYsT0cDeR6e9)-5&!@I04ArJB>(^b From c59f52e04bb62c7afd069e319fcb673df17168fa Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 26 Jul 2024 10:39:09 -0700 Subject: [PATCH 12/13] Fix pyarrow 17 test --- pandas/tests/io/test_parquet.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 6f6781cbab0e8..936432f139c0d 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -11,6 +11,7 @@ from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( + pa_version_under10p1, pa_version_under11p0, pa_version_under13p0, pa_version_under15p0, @@ -954,12 +955,14 @@ def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): if ( timezone_aware_date_list.tzinfo != datetime.timezone.utc - and pa_version_under17p0 + and not pa_version_under10p1 ): request.applymarker( pytest.mark.xfail( - reason="temporary skip this test until it is properly resolved: " - "https://github.com/pandas-dev/pandas/issues/37286" + reason=( + "pyarrow returns pytz.FixedOffset while pandas " + "constructs datetime.timezone https://github.com/pandas-dev/pandas/issues/37286" + ) ) ) idx = 5 * [timezone_aware_date_list] From 9a0b4c1971d4c7d6210fbe35735b61515943e27d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 7 Aug 2024 16:20:45 -0700 Subject: [PATCH 13/13] Test xpasses on pyarrow 18 --- pandas/compat/__init__.py | 2 ++ pandas/compat/pyarrow.py | 2 ++ pandas/tests/io/test_parquet.py | 4 ++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 288559d386a71..756c209661fbb 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -33,6 +33,7 @@ pa_version_under14p1, pa_version_under16p0, pa_version_under17p0, + pa_version_under18p0, ) if TYPE_CHECKING: @@ -157,6 +158,7 @@ def is_ci_environment() -> bool: "pa_version_under14p1", "pa_version_under16p0", "pa_version_under17p0", + "pa_version_under18p0", "HAS_PYARROW", "IS64", "ISMUSL", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index ebfc0d69d9655..bd009b544f31e 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -17,6 +17,7 @@ pa_version_under15p0 = _palv < Version("15.0.0") pa_version_under16p0 = _palv < Version("16.0.0") pa_version_under17p0 = _palv < Version("17.0.0") + pa_version_under18p0 = _palv < Version("18.0.0") HAS_PYARROW = True except ImportError: pa_version_under10p1 = True @@ -28,4 +29,5 @@ pa_version_under15p0 = True pa_version_under16p0 = True pa_version_under17p0 = True + pa_version_under18p0 = True HAS_PYARROW = False diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 3f8cbc8ce5ef1..500393b716d9f 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -13,11 +13,11 @@ from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( - pa_version_under10p1, pa_version_under11p0, pa_version_under13p0, pa_version_under15p0, pa_version_under17p0, + pa_version_under18p0, ) import pandas as pd @@ -958,7 +958,7 @@ def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): if ( timezone_aware_date_list.tzinfo != datetime.timezone.utc - and not pa_version_under10p1 + and pa_version_under18p0 ): request.applymarker( pytest.mark.xfail(