diff --git a/.circleci/config.yml b/.circleci/config.yml
index b6a5a00429d9a..1c4f33925c999 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -56,7 +56,7 @@ jobs:
             /opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
             . ~/virtualenvs/pandas-dev/bin/activate
             python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
-            python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0
+            python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0
             python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
             python -m pip list --no-cache-dir
             export PANDAS_CI=1
diff --git a/.devcontainer.json b/.devcontainer.json
index 7c5d009260c64..54ddfa1a130f8 100644
--- a/.devcontainer.json
+++ b/.devcontainer.json
@@ -8,7 +8,6 @@
 	// Use 'settings' to set *default* container specific settings.json values on container create.
 	// You can edit these settings after create using File > Preferences > Settings > Remote.
 	"settings": {
-		"terminal.integrated.shell.linux": "/bin/bash",
 		"python.pythonPath": "/usr/local/bin/python",
 		"python.formatting.provider": "black",
 		"python.linting.enabled": true,
diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml
index ceeebfcd1c90c..3eb68bdd2a15c 100644
--- a/.github/actions/setup-conda/action.yml
+++ b/.github/actions/setup-conda/action.yml
@@ -14,3 +14,9 @@ runs:
         condarc-file: ci/.condarc
         cache-environment: true
         cache-downloads: true
+
+    - name: Uninstall pyarrow
+      if: ${{ env.REMOVE_PYARROW == '1' }}
+      run: |
+        micromamba remove -y pyarrow
+      shell: bash -el {0}
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index a085d0265a1a5..68b7573f01501 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -29,6 +29,7 @@ jobs:
         env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml]
         # Prevent the include jobs from overriding other jobs
         pattern: [""]
+        pandas_future_infer_string: ["0"]
         include:
           - name: "Downstream Compat"
             env_file: actions-311-downstream_compat.yaml
@@ -58,6 +59,9 @@ jobs:
             # It will be temporarily activated during tests with locale.setlocale
             extra_loc: "zh_CN"
           - name: "Future infer strings"
+            env_file: actions-312.yaml
+            pandas_future_infer_string: "1"
+          - name: "Future infer strings (without pyarrow)"
             env_file: actions-311.yaml
             pandas_future_infer_string: "1"
           - name: "Pypy"
@@ -85,9 +89,10 @@ jobs:
       NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }}
       # Clipboard tests
       QT_QPA_PLATFORM: offscreen
+      REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }}
     concurrency:
       # https://github.community/t/concurrecy-not-work-for-push/183068/7
-      group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}}
+      group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}
       cancel-in-progress: true
 
     services:
@@ -231,7 +236,7 @@ jobs:
           . ~/virtualenvs/pandas-dev/bin/activate
           python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
           python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true"
-          python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0
+          python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0
           python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
           python -m pip list --no-cache-dir
           export PANDAS_CI=1
@@ -269,7 +274,7 @@ jobs:
           /opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
           . ~/virtualenvs/pandas-dev/bin/activate
           python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
-          python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0
+          python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0
           python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
           python -m pip list --no-cache-dir
 
@@ -290,7 +295,7 @@ jobs:
     # In general, this will remain frozen(present, but not running) until:
     #    - The next unreleased Python version has released beta 1
     #      - This version should be available on GitHub Actions.
-    #    - Our required build/runtime dependencies(numpy, pytz, Cython, python-dateutil)
+    #    - Our required build/runtime dependencies(numpy, Cython, python-dateutil)
     #      support that unreleased Python version.
     #    To unfreeze, comment out the ``if: false`` condition, and make sure you update
     #    the name of the workflow and Python version in actions/setup-python ``python-version:``
@@ -343,7 +348,7 @@ jobs:
           python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
           python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
           python -m pip install versioneer[toml]
-          python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
+          python -m pip install python-dateutil tzdata cython hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
           python -m pip install -ve . --no-build-isolation --no-index --no-deps --config-settings=setup-args="--werror"
           python -m pip list
 
diff --git a/.gitpod.yml b/.gitpod.yml
index 9222639136a17..9ff349747a33e 100644
--- a/.gitpod.yml
+++ b/.gitpod.yml
@@ -14,7 +14,7 @@ tasks:
       cp gitpod/settings.json .vscode/settings.json
       git fetch --tags
       python -m pip install -ve . --no-build-isolation --config-settings editable-verbose=true
-      pre-commit install
+      pre-commit install --install-hooks
     command: |
       python -m pip install -ve . --no-build-isolation --config-settings editable-verbose=true
       echo "✨ Pre-build complete! You can close this terminal ✨ "
diff --git a/Dockerfile b/Dockerfile
index 0fcbcee92295c..dead3a494e52d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,7 +2,7 @@ FROM python:3.10.8
 WORKDIR /home/pandas
 
 RUN apt-get update && apt-get -y upgrade
-RUN apt-get install -y build-essential
+RUN apt-get install -y build-essential bash-completion
 
 # hdf5 needed for pytables installation
 # libgles2-mesa needed for pytest-qt
@@ -12,4 +12,6 @@ RUN python -m pip install --upgrade pip
 COPY requirements-dev.txt /tmp
 RUN python -m pip install -r /tmp/requirements-dev.txt
 RUN git config --global --add safe.directory /home/pandas
+
+ENV SHELL "/bin/bash"
 CMD ["/bin/bash"]
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 973e31815cf63..a9a4daa2e2059 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -70,20 +70,15 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         --format=actions \
         -i ES01 `# For now it is ok if docstrings are missing the extended summary` \
         -i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
-        -i "pandas.MultiIndex.names SA01" \
         -i "pandas.MultiIndex.reorder_levels RT03,SA01" \
-        -i "pandas.MultiIndex.sortlevel PR07,SA01" \
         -i "pandas.MultiIndex.to_frame RT03" \
         -i "pandas.NA SA01" \
         -i "pandas.NaT SA01" \
-        -i "pandas.Period.asfreq SA01" \
         -i "pandas.Period.freq GL08" \
         -i "pandas.Period.freqstr SA01" \
-        -i "pandas.Period.month SA01" \
         -i "pandas.Period.ordinal GL08" \
         -i "pandas.Period.strftime PR01,SA01" \
         -i "pandas.Period.to_timestamp SA01" \
-        -i "pandas.Period.year SA01" \
         -i "pandas.PeriodDtype SA01" \
         -i "pandas.PeriodDtype.freq SA01" \
         -i "pandas.PeriodIndex.day SA01" \
@@ -158,28 +153,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.sparse.sp_values SA01" \
         -i "pandas.Series.sparse.to_coo PR07,RT03,SA01" \
         -i "pandas.Series.std PR01,RT03,SA01" \
-        -i "pandas.Series.str.capitalize RT03" \
-        -i "pandas.Series.str.casefold RT03" \
-        -i "pandas.Series.str.center RT03,SA01" \
-        -i "pandas.Series.str.decode PR07,RT03,SA01" \
-        -i "pandas.Series.str.encode PR07,RT03,SA01" \
-        -i "pandas.Series.str.index RT03" \
-        -i "pandas.Series.str.ljust RT03,SA01" \
-        -i "pandas.Series.str.lower RT03" \
-        -i "pandas.Series.str.lstrip RT03" \
         -i "pandas.Series.str.match RT03" \
         -i "pandas.Series.str.normalize RT03,SA01" \
-        -i "pandas.Series.str.partition RT03" \
         -i "pandas.Series.str.repeat SA01" \
         -i "pandas.Series.str.replace SA01" \
-        -i "pandas.Series.str.rindex RT03" \
-        -i "pandas.Series.str.rjust RT03,SA01" \
-        -i "pandas.Series.str.rpartition RT03" \
-        -i "pandas.Series.str.rstrip RT03" \
-        -i "pandas.Series.str.strip RT03" \
-        -i "pandas.Series.str.swapcase RT03" \
-        -i "pandas.Series.str.title RT03" \
-        -i "pandas.Series.str.upper RT03" \
         -i "pandas.Series.str.wrap RT03,SA01" \
         -i "pandas.Series.str.zfill RT03" \
         -i "pandas.Series.struct.dtypes SA01" \
@@ -229,13 +206,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.to_julian_date SA01" \
         -i "pandas.Timestamp.today SA01" \
         -i "pandas.Timestamp.toordinal SA01" \
-        -i "pandas.Timestamp.tz_localize SA01" \
         -i "pandas.Timestamp.tzinfo GL08" \
-        -i "pandas.Timestamp.tzname SA01" \
-        -i "pandas.Timestamp.unit SA01" \
-        -i "pandas.Timestamp.utcfromtimestamp PR01,SA01" \
-        -i "pandas.Timestamp.utcoffset SA01" \
-        -i "pandas.Timestamp.utctimetuple SA01" \
         -i "pandas.Timestamp.value GL08" \
         -i "pandas.Timestamp.year GL08" \
         -i "pandas.api.extensions.ExtensionArray._pad_or_backfill PR01,RT03,SA01" \
@@ -259,7 +230,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.api.extensions.ExtensionArray.view SA01" \
         -i "pandas.api.interchange.from_dataframe RT03,SA01" \
         -i "pandas.api.types.is_bool PR01,SA01" \
-        -i "pandas.api.types.is_bool_dtype SA01" \
         -i "pandas.api.types.is_categorical_dtype SA01" \
         -i "pandas.api.types.is_complex PR01,SA01" \
         -i "pandas.api.types.is_complex_dtype SA01" \
@@ -421,156 +391,103 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.set_eng_float_format RT03,SA01" \
         -i "pandas.testing.assert_extension_array_equal SA01" \
         -i "pandas.tseries.offsets.BDay PR02,SA01" \
-        -i "pandas.tseries.offsets.BQuarterBegin PR02" \
-        -i "pandas.tseries.offsets.BQuarterBegin.freqstr SA01" \
         -i "pandas.tseries.offsets.BQuarterBegin.is_on_offset GL08" \
         -i "pandas.tseries.offsets.BQuarterBegin.n GL08" \
-        -i "pandas.tseries.offsets.BQuarterBegin.nanos GL08" \
         -i "pandas.tseries.offsets.BQuarterBegin.normalize GL08" \
         -i "pandas.tseries.offsets.BQuarterBegin.rule_code GL08" \
         -i "pandas.tseries.offsets.BQuarterBegin.startingMonth GL08" \
-        -i "pandas.tseries.offsets.BQuarterEnd.freqstr SA01" \
         -i "pandas.tseries.offsets.BQuarterEnd.is_on_offset GL08" \
         -i "pandas.tseries.offsets.BQuarterEnd.n GL08" \
-        -i "pandas.tseries.offsets.BQuarterEnd.nanos GL08" \
         -i "pandas.tseries.offsets.BQuarterEnd.normalize GL08" \
         -i "pandas.tseries.offsets.BQuarterEnd.rule_code GL08" \
         -i "pandas.tseries.offsets.BQuarterEnd.startingMonth GL08" \
-        -i "pandas.tseries.offsets.BYearBegin.freqstr SA01" \
         -i "pandas.tseries.offsets.BYearBegin.is_on_offset GL08" \
         -i "pandas.tseries.offsets.BYearBegin.month GL08" \
         -i "pandas.tseries.offsets.BYearBegin.n GL08" \
-        -i "pandas.tseries.offsets.BYearBegin.nanos GL08" \
         -i "pandas.tseries.offsets.BYearBegin.normalize GL08" \
-        -i "pandas.tseries.offsets.BYearBegin.rule_code GL08" \
-        -i "pandas.tseries.offsets.BYearEnd PR02" \
-        -i "pandas.tseries.offsets.BYearEnd.freqstr SA01" \
         -i "pandas.tseries.offsets.BYearEnd.is_on_offset GL08" \
         -i "pandas.tseries.offsets.BYearEnd.month GL08" \
         -i "pandas.tseries.offsets.BYearEnd.n GL08" \
-        -i "pandas.tseries.offsets.BYearEnd.nanos GL08" \
         -i "pandas.tseries.offsets.BYearEnd.normalize GL08" \
-        -i "pandas.tseries.offsets.BYearEnd.rule_code GL08" \
         -i "pandas.tseries.offsets.BusinessDay PR02,SA01" \
         -i "pandas.tseries.offsets.BusinessDay.calendar GL08" \
-        -i "pandas.tseries.offsets.BusinessDay.freqstr SA01" \
         -i "pandas.tseries.offsets.BusinessDay.holidays GL08" \
         -i "pandas.tseries.offsets.BusinessDay.is_on_offset GL08" \
         -i "pandas.tseries.offsets.BusinessDay.n GL08" \
-        -i "pandas.tseries.offsets.BusinessDay.nanos GL08" \
         -i "pandas.tseries.offsets.BusinessDay.normalize GL08" \
-        -i "pandas.tseries.offsets.BusinessDay.rule_code GL08" \
         -i "pandas.tseries.offsets.BusinessDay.weekmask GL08" \
         -i "pandas.tseries.offsets.BusinessHour PR02,SA01" \
         -i "pandas.tseries.offsets.BusinessHour.calendar GL08" \
         -i "pandas.tseries.offsets.BusinessHour.end GL08" \
-        -i "pandas.tseries.offsets.BusinessHour.freqstr SA01" \
         -i "pandas.tseries.offsets.BusinessHour.holidays GL08" \
         -i "pandas.tseries.offsets.BusinessHour.is_on_offset GL08" \
         -i "pandas.tseries.offsets.BusinessHour.n GL08" \
-        -i "pandas.tseries.offsets.BusinessHour.nanos GL08" \
         -i "pandas.tseries.offsets.BusinessHour.normalize GL08" \
-        -i "pandas.tseries.offsets.BusinessHour.rule_code GL08" \
         -i "pandas.tseries.offsets.BusinessHour.start GL08" \
         -i "pandas.tseries.offsets.BusinessHour.weekmask GL08" \
-        -i "pandas.tseries.offsets.BusinessMonthBegin.freqstr SA01" \
         -i "pandas.tseries.offsets.BusinessMonthBegin.is_on_offset GL08" \
         -i "pandas.tseries.offsets.BusinessMonthBegin.n GL08" \
-        -i "pandas.tseries.offsets.BusinessMonthBegin.nanos GL08" \
         -i "pandas.tseries.offsets.BusinessMonthBegin.normalize GL08" \
-        -i "pandas.tseries.offsets.BusinessMonthBegin.rule_code GL08" \
-        -i "pandas.tseries.offsets.BusinessMonthEnd.freqstr SA01" \
         -i "pandas.tseries.offsets.BusinessMonthEnd.is_on_offset GL08" \
         -i "pandas.tseries.offsets.BusinessMonthEnd.n GL08" \
-        -i "pandas.tseries.offsets.BusinessMonthEnd.nanos GL08" \
         -i "pandas.tseries.offsets.BusinessMonthEnd.normalize GL08" \
-        -i "pandas.tseries.offsets.BusinessMonthEnd.rule_code GL08" \
         -i "pandas.tseries.offsets.CBMonthBegin PR02" \
         -i "pandas.tseries.offsets.CBMonthEnd PR02" \
         -i "pandas.tseries.offsets.CDay PR02,SA01" \
         -i "pandas.tseries.offsets.CustomBusinessDay PR02,SA01" \
         -i "pandas.tseries.offsets.CustomBusinessDay.calendar GL08" \
-        -i "pandas.tseries.offsets.CustomBusinessDay.freqstr SA01" \
         -i "pandas.tseries.offsets.CustomBusinessDay.holidays GL08" \
         -i "pandas.tseries.offsets.CustomBusinessDay.is_on_offset GL08" \
         -i "pandas.tseries.offsets.CustomBusinessDay.n GL08" \
-        -i "pandas.tseries.offsets.CustomBusinessDay.nanos GL08" \
         -i "pandas.tseries.offsets.CustomBusinessDay.normalize GL08" \
-        -i "pandas.tseries.offsets.CustomBusinessDay.rule_code GL08" \
         -i "pandas.tseries.offsets.CustomBusinessDay.weekmask GL08" \
         -i "pandas.tseries.offsets.CustomBusinessHour PR02,SA01" \
         -i "pandas.tseries.offsets.CustomBusinessHour.calendar GL08" \
         -i "pandas.tseries.offsets.CustomBusinessHour.end GL08" \
-        -i "pandas.tseries.offsets.CustomBusinessHour.freqstr SA01" \
         -i "pandas.tseries.offsets.CustomBusinessHour.holidays GL08" \
         -i "pandas.tseries.offsets.CustomBusinessHour.is_on_offset GL08" \
         -i "pandas.tseries.offsets.CustomBusinessHour.n GL08" \
-        -i "pandas.tseries.offsets.CustomBusinessHour.nanos GL08" \
         -i "pandas.tseries.offsets.CustomBusinessHour.normalize GL08" \
-        -i "pandas.tseries.offsets.CustomBusinessHour.rule_code GL08" \
         -i "pandas.tseries.offsets.CustomBusinessHour.start GL08" \
         -i "pandas.tseries.offsets.CustomBusinessHour.weekmask GL08" \
         -i "pandas.tseries.offsets.CustomBusinessMonthBegin PR02" \
         -i "pandas.tseries.offsets.CustomBusinessMonthBegin.calendar GL08" \
-        -i "pandas.tseries.offsets.CustomBusinessMonthBegin.freqstr SA01" \
         -i "pandas.tseries.offsets.CustomBusinessMonthBegin.holidays GL08" \
         -i "pandas.tseries.offsets.CustomBusinessMonthBegin.is_on_offset SA01" \
         -i "pandas.tseries.offsets.CustomBusinessMonthBegin.m_offset GL08" \
         -i "pandas.tseries.offsets.CustomBusinessMonthBegin.n GL08" \
-        -i "pandas.tseries.offsets.CustomBusinessMonthBegin.nanos GL08" \
         -i "pandas.tseries.offsets.CustomBusinessMonthBegin.normalize GL08" \
-        -i "pandas.tseries.offsets.CustomBusinessMonthBegin.rule_code GL08" \
         -i "pandas.tseries.offsets.CustomBusinessMonthBegin.weekmask GL08" \
         -i "pandas.tseries.offsets.CustomBusinessMonthEnd PR02" \
         -i "pandas.tseries.offsets.CustomBusinessMonthEnd.calendar GL08" \
-        -i "pandas.tseries.offsets.CustomBusinessMonthEnd.freqstr SA01" \
         -i "pandas.tseries.offsets.CustomBusinessMonthEnd.holidays GL08" \
         -i "pandas.tseries.offsets.CustomBusinessMonthEnd.is_on_offset SA01" \
         -i "pandas.tseries.offsets.CustomBusinessMonthEnd.m_offset GL08" \
         -i "pandas.tseries.offsets.CustomBusinessMonthEnd.n GL08" \
-        -i "pandas.tseries.offsets.CustomBusinessMonthEnd.nanos GL08" \
         -i "pandas.tseries.offsets.CustomBusinessMonthEnd.normalize GL08" \
-        -i "pandas.tseries.offsets.CustomBusinessMonthEnd.rule_code GL08" \
         -i "pandas.tseries.offsets.CustomBusinessMonthEnd.weekmask GL08" \
-        -i "pandas.tseries.offsets.DateOffset PR02" \
-        -i "pandas.tseries.offsets.DateOffset.freqstr SA01" \
         -i "pandas.tseries.offsets.DateOffset.is_on_offset GL08" \
         -i "pandas.tseries.offsets.DateOffset.n GL08" \
-        -i "pandas.tseries.offsets.DateOffset.nanos GL08" \
         -i "pandas.tseries.offsets.DateOffset.normalize GL08" \
-        -i "pandas.tseries.offsets.DateOffset.rule_code GL08" \
-        -i "pandas.tseries.offsets.Day.freqstr SA01" \
         -i "pandas.tseries.offsets.Day.is_on_offset GL08" \
         -i "pandas.tseries.offsets.Day.n GL08" \
-        -i "pandas.tseries.offsets.Day.nanos SA01" \
         -i "pandas.tseries.offsets.Day.normalize GL08" \
-        -i "pandas.tseries.offsets.Day.rule_code GL08" \
-        -i "pandas.tseries.offsets.Easter PR02" \
-        -i "pandas.tseries.offsets.Easter.freqstr SA01" \
         -i "pandas.tseries.offsets.Easter.is_on_offset GL08" \
         -i "pandas.tseries.offsets.Easter.n GL08" \
-        -i "pandas.tseries.offsets.Easter.nanos GL08" \
         -i "pandas.tseries.offsets.Easter.normalize GL08" \
-        -i "pandas.tseries.offsets.Easter.rule_code GL08" \
-        -i "pandas.tseries.offsets.FY5253 PR02" \
-        -i "pandas.tseries.offsets.FY5253.freqstr SA01" \
         -i "pandas.tseries.offsets.FY5253.get_rule_code_suffix GL08" \
         -i "pandas.tseries.offsets.FY5253.get_year_end GL08" \
         -i "pandas.tseries.offsets.FY5253.is_on_offset GL08" \
         -i "pandas.tseries.offsets.FY5253.n GL08" \
-        -i "pandas.tseries.offsets.FY5253.nanos GL08" \
         -i "pandas.tseries.offsets.FY5253.normalize GL08" \
         -i "pandas.tseries.offsets.FY5253.rule_code GL08" \
         -i "pandas.tseries.offsets.FY5253.startingMonth GL08" \
         -i "pandas.tseries.offsets.FY5253.variation GL08" \
         -i "pandas.tseries.offsets.FY5253.weekday GL08" \
-        -i "pandas.tseries.offsets.FY5253Quarter PR02" \
-        -i "pandas.tseries.offsets.FY5253Quarter.freqstr SA01" \
         -i "pandas.tseries.offsets.FY5253Quarter.get_rule_code_suffix GL08" \
         -i "pandas.tseries.offsets.FY5253Quarter.get_weeks GL08" \
         -i "pandas.tseries.offsets.FY5253Quarter.is_on_offset GL08" \
         -i "pandas.tseries.offsets.FY5253Quarter.n GL08" \
-        -i "pandas.tseries.offsets.FY5253Quarter.nanos GL08" \
         -i "pandas.tseries.offsets.FY5253Quarter.normalize GL08" \
         -i "pandas.tseries.offsets.FY5253Quarter.qtr_with_extra_week GL08" \
         -i "pandas.tseries.offsets.FY5253Quarter.rule_code GL08" \
@@ -578,139 +495,80 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.tseries.offsets.FY5253Quarter.variation GL08" \
         -i "pandas.tseries.offsets.FY5253Quarter.weekday GL08" \
         -i "pandas.tseries.offsets.FY5253Quarter.year_has_extra_week GL08" \
-        -i "pandas.tseries.offsets.Hour PR02" \
-        -i "pandas.tseries.offsets.Hour.freqstr SA01" \
         -i "pandas.tseries.offsets.Hour.is_on_offset GL08" \
         -i "pandas.tseries.offsets.Hour.n GL08" \
-        -i "pandas.tseries.offsets.Hour.nanos SA01" \
         -i "pandas.tseries.offsets.Hour.normalize GL08" \
-        -i "pandas.tseries.offsets.Hour.rule_code GL08" \
-        -i "pandas.tseries.offsets.LastWeekOfMonth PR02,SA01" \
-        -i "pandas.tseries.offsets.LastWeekOfMonth.freqstr SA01" \
+        -i "pandas.tseries.offsets.LastWeekOfMonth SA01" \
         -i "pandas.tseries.offsets.LastWeekOfMonth.is_on_offset GL08" \
         -i "pandas.tseries.offsets.LastWeekOfMonth.n GL08" \
-        -i "pandas.tseries.offsets.LastWeekOfMonth.nanos GL08" \
         -i "pandas.tseries.offsets.LastWeekOfMonth.normalize GL08" \
-        -i "pandas.tseries.offsets.LastWeekOfMonth.rule_code GL08" \
         -i "pandas.tseries.offsets.LastWeekOfMonth.week GL08" \
         -i "pandas.tseries.offsets.LastWeekOfMonth.weekday GL08" \
-        -i "pandas.tseries.offsets.Micro PR02" \
-        -i "pandas.tseries.offsets.Micro.freqstr SA01" \
         -i "pandas.tseries.offsets.Micro.is_on_offset GL08" \
         -i "pandas.tseries.offsets.Micro.n GL08" \
-        -i "pandas.tseries.offsets.Micro.nanos SA01" \
         -i "pandas.tseries.offsets.Micro.normalize GL08" \
-        -i "pandas.tseries.offsets.Micro.rule_code GL08" \
-        -i "pandas.tseries.offsets.Milli PR02" \
-        -i "pandas.tseries.offsets.Milli.freqstr SA01" \
         -i "pandas.tseries.offsets.Milli.is_on_offset GL08" \
         -i "pandas.tseries.offsets.Milli.n GL08" \
-        -i "pandas.tseries.offsets.Milli.nanos SA01" \
         -i "pandas.tseries.offsets.Milli.normalize GL08" \
-        -i "pandas.tseries.offsets.Milli.rule_code GL08" \
-        -i "pandas.tseries.offsets.Minute PR02" \
-        -i "pandas.tseries.offsets.Minute.freqstr SA01" \
         -i "pandas.tseries.offsets.Minute.is_on_offset GL08" \
         -i "pandas.tseries.offsets.Minute.n GL08" \
-        -i "pandas.tseries.offsets.Minute.nanos SA01" \
         -i "pandas.tseries.offsets.Minute.normalize GL08" \
-        -i "pandas.tseries.offsets.Minute.rule_code GL08" \
-        -i "pandas.tseries.offsets.MonthBegin PR02" \
-        -i "pandas.tseries.offsets.MonthBegin.freqstr SA01" \
         -i "pandas.tseries.offsets.MonthBegin.is_on_offset GL08" \
         -i "pandas.tseries.offsets.MonthBegin.n GL08" \
-        -i "pandas.tseries.offsets.MonthBegin.nanos GL08" \
         -i "pandas.tseries.offsets.MonthBegin.normalize GL08" \
-        -i "pandas.tseries.offsets.MonthBegin.rule_code GL08" \
-        -i "pandas.tseries.offsets.MonthEnd.freqstr SA01" \
         -i "pandas.tseries.offsets.MonthEnd.is_on_offset GL08" \
         -i "pandas.tseries.offsets.MonthEnd.n GL08" \
-        -i "pandas.tseries.offsets.MonthEnd.nanos GL08" \
         -i "pandas.tseries.offsets.MonthEnd.normalize GL08" \
-        -i "pandas.tseries.offsets.MonthEnd.rule_code GL08" \
-        -i "pandas.tseries.offsets.Nano PR02" \
-        -i "pandas.tseries.offsets.Nano.freqstr SA01" \
         -i "pandas.tseries.offsets.Nano.is_on_offset GL08" \
-        -i "pandas.tseries.offsets.Nano.n GL08" \
-        -i "pandas.tseries.offsets.Nano.nanos SA01" \
         -i "pandas.tseries.offsets.Nano.normalize GL08" \
-        -i "pandas.tseries.offsets.Nano.rule_code GL08" \
-        -i "pandas.tseries.offsets.QuarterBegin PR02" \
-        -i "pandas.tseries.offsets.QuarterBegin.freqstr SA01" \
+        -i "pandas.tseries.offsets.Nano.n GL08" \
         -i "pandas.tseries.offsets.QuarterBegin.is_on_offset GL08" \
         -i "pandas.tseries.offsets.QuarterBegin.n GL08" \
-        -i "pandas.tseries.offsets.QuarterBegin.nanos GL08" \
         -i "pandas.tseries.offsets.QuarterBegin.normalize GL08" \
         -i "pandas.tseries.offsets.QuarterBegin.rule_code GL08" \
         -i "pandas.tseries.offsets.QuarterBegin.startingMonth GL08" \
-        -i "pandas.tseries.offsets.QuarterEnd.freqstr SA01" \
         -i "pandas.tseries.offsets.QuarterEnd.is_on_offset GL08" \
         -i "pandas.tseries.offsets.QuarterEnd.n GL08" \
-        -i "pandas.tseries.offsets.QuarterEnd.nanos GL08" \
         -i "pandas.tseries.offsets.QuarterEnd.normalize GL08" \
         -i "pandas.tseries.offsets.QuarterEnd.rule_code GL08" \
         -i "pandas.tseries.offsets.QuarterEnd.startingMonth GL08" \
-        -i "pandas.tseries.offsets.Second PR02" \
-        -i "pandas.tseries.offsets.Second.freqstr SA01" \
         -i "pandas.tseries.offsets.Second.is_on_offset GL08" \
         -i "pandas.tseries.offsets.Second.n GL08" \
-        -i "pandas.tseries.offsets.Second.nanos SA01" \
         -i "pandas.tseries.offsets.Second.normalize GL08" \
-        -i "pandas.tseries.offsets.Second.rule_code GL08" \
-        -i "pandas.tseries.offsets.SemiMonthBegin PR02,SA01" \
+        -i "pandas.tseries.offsets.SemiMonthBegin SA01" \
         -i "pandas.tseries.offsets.SemiMonthBegin.day_of_month GL08" \
-        -i "pandas.tseries.offsets.SemiMonthBegin.freqstr SA01" \
         -i "pandas.tseries.offsets.SemiMonthBegin.is_on_offset GL08" \
         -i "pandas.tseries.offsets.SemiMonthBegin.n GL08" \
-        -i "pandas.tseries.offsets.SemiMonthBegin.nanos GL08" \
         -i "pandas.tseries.offsets.SemiMonthBegin.normalize GL08" \
         -i "pandas.tseries.offsets.SemiMonthBegin.rule_code GL08" \
         -i "pandas.tseries.offsets.SemiMonthEnd SA01" \
         -i "pandas.tseries.offsets.SemiMonthEnd.day_of_month GL08" \
-        -i "pandas.tseries.offsets.SemiMonthEnd.freqstr SA01" \
         -i "pandas.tseries.offsets.SemiMonthEnd.is_on_offset GL08" \
         -i "pandas.tseries.offsets.SemiMonthEnd.n GL08" \
-        -i "pandas.tseries.offsets.SemiMonthEnd.nanos GL08" \
         -i "pandas.tseries.offsets.SemiMonthEnd.normalize GL08" \
         -i "pandas.tseries.offsets.SemiMonthEnd.rule_code GL08" \
         -i "pandas.tseries.offsets.Tick GL08" \
-        -i "pandas.tseries.offsets.Tick.freqstr SA01" \
         -i "pandas.tseries.offsets.Tick.is_on_offset GL08" \
         -i "pandas.tseries.offsets.Tick.n GL08" \
-        -i "pandas.tseries.offsets.Tick.nanos SA01" \
         -i "pandas.tseries.offsets.Tick.normalize GL08" \
-        -i "pandas.tseries.offsets.Tick.rule_code GL08" \
-        -i "pandas.tseries.offsets.Week PR02" \
-        -i "pandas.tseries.offsets.Week.freqstr SA01" \
         -i "pandas.tseries.offsets.Week.is_on_offset GL08" \
         -i "pandas.tseries.offsets.Week.n GL08" \
-        -i "pandas.tseries.offsets.Week.nanos GL08" \
         -i "pandas.tseries.offsets.Week.normalize GL08" \
-        -i "pandas.tseries.offsets.Week.rule_code GL08" \
         -i "pandas.tseries.offsets.Week.weekday GL08" \
-        -i "pandas.tseries.offsets.WeekOfMonth PR02,SA01" \
-        -i "pandas.tseries.offsets.WeekOfMonth.freqstr SA01" \
+        -i "pandas.tseries.offsets.WeekOfMonth SA01" \
         -i "pandas.tseries.offsets.WeekOfMonth.is_on_offset GL08" \
         -i "pandas.tseries.offsets.WeekOfMonth.n GL08" \
-        -i "pandas.tseries.offsets.WeekOfMonth.nanos GL08" \
         -i "pandas.tseries.offsets.WeekOfMonth.normalize GL08" \
-        -i "pandas.tseries.offsets.WeekOfMonth.rule_code GL08" \
         -i "pandas.tseries.offsets.WeekOfMonth.week GL08" \
         -i "pandas.tseries.offsets.WeekOfMonth.weekday GL08" \
-        -i "pandas.tseries.offsets.YearBegin.freqstr SA01" \
         -i "pandas.tseries.offsets.YearBegin.is_on_offset GL08" \
         -i "pandas.tseries.offsets.YearBegin.month GL08" \
         -i "pandas.tseries.offsets.YearBegin.n GL08" \
-        -i "pandas.tseries.offsets.YearBegin.nanos GL08" \
         -i "pandas.tseries.offsets.YearBegin.normalize GL08" \
-        -i "pandas.tseries.offsets.YearBegin.rule_code GL08" \
-        -i "pandas.tseries.offsets.YearEnd.freqstr SA01" \
         -i "pandas.tseries.offsets.YearEnd.is_on_offset GL08" \
         -i "pandas.tseries.offsets.YearEnd.month GL08" \
         -i "pandas.tseries.offsets.YearEnd.n GL08" \
-        -i "pandas.tseries.offsets.YearEnd.nanos GL08" \
         -i "pandas.tseries.offsets.YearEnd.normalize GL08" \
-        -i "pandas.tseries.offsets.YearEnd.rule_code GL08" \
         -i "pandas.util.hash_pandas_object PR07,SA01" # There should be no backslash in the final line, please keep this comment in the last ignored function
 
     RET=$(($RET + $?)) ; echo $MSG "DONE"
diff --git a/ci/deps/actions-310-minimum_versions.yaml b/ci/deps/actions-310-minimum_versions.yaml
index 0c46f476893dd..e670356c95637 100644
--- a/ci/deps/actions-310-minimum_versions.yaml
+++ b/ci/deps/actions-310-minimum_versions.yaml
@@ -23,7 +23,6 @@ dependencies:
   # required dependencies
   - python-dateutil=2.8.2
   - numpy=1.23.5
-  - pytz=2020.1
 
   # optional dependencies
   - beautifulsoup4=4.11.2
@@ -49,6 +48,7 @@ dependencies:
   - pyreadstat=1.2.0
   - pytables=3.8.0
   - python-calamine=0.1.7
+  - pytz=2023.4
   - pyxlsb=1.0.10
   - s3fs=2022.11.0
   - scipy=1.10.0
diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml
index 0af46752f5b3d..c33c0344e742f 100644
--- a/ci/deps/actions-310.yaml
+++ b/ci/deps/actions-310.yaml
@@ -21,7 +21,6 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy
-  - pytz
 
   # optional dependencies
   - beautifulsoup4>=4.11.2
@@ -47,6 +46,7 @@ dependencies:
   - pyreadstat>=1.2.0
   - pytables>=3.8.0
   - python-calamine>=0.1.7
+  - pytz>=2023.4
   - pyxlsb>=1.0.10
   - s3fs>=2022.11.0
   - scipy>=1.10.0
diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml
index 1a842c7212c1f..8692b6e35ab2d 100644
--- a/ci/deps/actions-311-downstream_compat.yaml
+++ b/ci/deps/actions-311-downstream_compat.yaml
@@ -22,7 +22,6 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy
-  - pytz
 
   # optional dependencies
   - beautifulsoup4>=4.11.2
@@ -48,6 +47,7 @@ dependencies:
   - pyreadstat>=1.2.0
   - pytables>=3.8.0
   - python-calamine>=0.1.7
+  - pytz>=2023.4
   - pyxlsb>=1.0.10
   - s3fs>=2022.11.0
   - scipy>=1.10.0
diff --git a/ci/deps/actions-311-numpydev.yaml b/ci/deps/actions-311-numpydev.yaml
index 748cfa861ec32..996ce5cd9ab94 100644
--- a/ci/deps/actions-311-numpydev.yaml
+++ b/ci/deps/actions-311-numpydev.yaml
@@ -18,7 +18,6 @@ dependencies:
 
   # pandas dependencies
   - python-dateutil
-  - pytz
   - pip
 
   - pip:
diff --git a/ci/deps/actions-311-pyarrownightly.yaml b/ci/deps/actions-311-pyarrownightly.yaml
index 469fb1bfb9138..434f1d4f7fed2 100644
--- a/ci/deps/actions-311-pyarrownightly.yaml
+++ b/ci/deps/actions-311-pyarrownightly.yaml
@@ -19,7 +19,6 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy<2
-  - pytz
   - pip
 
   - pip:
diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml
index 75394e2c8e109..8e7d9aba7878d 100644
--- a/ci/deps/actions-311.yaml
+++ b/ci/deps/actions-311.yaml
@@ -21,7 +21,6 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy
-  - pytz
 
   # optional dependencies
   - beautifulsoup4>=4.11.2
@@ -47,6 +46,7 @@ dependencies:
   - pyreadstat>=1.2.0
   - pytables>=3.8.0
   - python-calamine>=0.1.7
+  - pytz>=2023.4
   - pyxlsb>=1.0.10
   - s3fs>=2022.11.0
   - scipy>=1.10.0
diff --git a/ci/deps/actions-312.yaml b/ci/deps/actions-312.yaml
index d4b43ddef3601..6c97960a62d40 100644
--- a/ci/deps/actions-312.yaml
+++ b/ci/deps/actions-312.yaml
@@ -21,7 +21,6 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy
-  - pytz
 
   # optional dependencies
   - beautifulsoup4>=4.11.2
@@ -47,6 +46,7 @@ dependencies:
   - pyreadstat>=1.2.0
   - pytables>=3.8.0
   - python-calamine>=0.1.7
+  - pytz>=2023.4
   - pyxlsb>=1.0.10
   - s3fs>=2022.11.0
   - scipy>=1.10.0
diff --git a/ci/deps/actions-pypy-39.yaml b/ci/deps/actions-pypy-39.yaml
index b0ae9f1e48473..c157d2e65c001 100644
--- a/ci/deps/actions-pypy-39.yaml
+++ b/ci/deps/actions-pypy-39.yaml
@@ -22,6 +22,5 @@ dependencies:
   # required
   - numpy
   - python-dateutil
-  - pytz
   - pip:
     - tzdata>=2022.7
diff --git a/ci/deps/circle-311-arm64.yaml b/ci/deps/circle-311-arm64.yaml
index 18535d81e6985..c86534871b3d2 100644
--- a/ci/deps/circle-311-arm64.yaml
+++ b/ci/deps/circle-311-arm64.yaml
@@ -21,7 +21,6 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy
-  - pytz
 
   # optional dependencies
   - beautifulsoup4>=4.11.2
@@ -47,6 +46,7 @@ dependencies:
   - pyreadstat>=1.2.0
   - pytables>=3.8.0
   - python-calamine>=0.1.7
+  - pytz>=2023.4
   - pyxlsb>=1.0.10
   - s3fs>=2022.11.0
   - scipy>=1.10.0
diff --git a/ci/meta.yaml b/ci/meta.yaml
index b76bef2f630b7..9d434991b12c1 100644
--- a/ci/meta.yaml
+++ b/ci/meta.yaml
@@ -37,7 +37,6 @@ requirements:
     - numpy >=1.21.6  # [py<311]
     - numpy >=1.23.2  # [py>=311]
     - python-dateutil >=2.8.2
-    - pytz >=2020.1
     - python-tzdata >=2022.7
 
 test:
diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst
index 28129440b86d7..277f407ae4418 100644
--- a/doc/source/development/contributing_codebase.rst
+++ b/doc/source/development/contributing_codebase.rst
@@ -762,8 +762,7 @@ install pandas) by typing::
     your installation is probably fine and you can start contributing!
 
 Often it is worth running only a subset of tests first around your changes before running the
-entire suite (tip: you can use the `pandas-coverage app <https://pandas-coverage-12d2130077bc.herokuapp.com/>`_)
-to find out which tests hit the lines of code you've modified, and then run only those).
+entire suite.
 
 The easiest way to do this is with::
 
diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst
index 0b8c1e16dce0e..e174eea00ca60 100644
--- a/doc/source/development/contributing_docstring.rst
+++ b/doc/source/development/contributing_docstring.rst
@@ -142,7 +142,7 @@ backticks. The following are considered inline code:
 
         With several mistakes in the docstring.
 
-        It has a blank like after the signature ``def func():``.
+        It has a blank line after the signature ``def func():``.
 
         The text 'Some function' should go in the line after the
         opening quotes of the docstring, not in the same line.
diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index 86ce05fde547b..8e6cb9e9a132d 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -205,7 +205,6 @@ Package                                                          Minimum support
 ================================================================ ==========================
 `NumPy <https://numpy.org>`__                                    1.23.5
 `python-dateutil <https://dateutil.readthedocs.io/en/stable/>`__ 2.8.2
-`pytz <https://pypi.org/project/pytz/>`__                        2020.1
 `tzdata <https://pypi.org/project/tzdata/>`__                    2022.7
 ================================================================ ==========================
 
@@ -419,3 +418,14 @@ Dependency                Minimum Version    pip extra       Notes
 ========================= ================== =============== =============================================================
 Zstandard                 0.19.0             compression     Zstandard compression
 ========================= ================== =============== =============================================================
+
+Timezone
+^^^^^^^^
+
+Installable with ``pip install "pandas[timezone]"``
+
+========================= ================== =================== =============================================================
+Dependency                Minimum Version    pip extra           Notes
+========================= ================== =================== =============================================================
+pytz                      2023.4             timezone            Alternative timezone library to ``zoneinfo``.
+========================= ================== =================== =============================================================
diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
index 0845417e4910d..4299dca4774b9 100644
--- a/doc/source/user_guide/timeseries.rst
+++ b/doc/source/user_guide/timeseries.rst
@@ -2569,7 +2569,7 @@ Ambiguous times when localizing
 because daylight savings time (DST) in a local time zone causes some times to occur
 twice within one day ("clocks fall back"). The following options are available:
 
-* ``'raise'``: Raises a ``pytz.AmbiguousTimeError`` (the default behavior)
+* ``'raise'``: Raises a ``ValueError`` (the default behavior)
 * ``'infer'``: Attempt to determine the correct offset base on the monotonicity of the timestamps
 * ``'NaT'``: Replaces ambiguous times with ``NaT``
 * ``bool``: ``True`` represents a DST time, ``False`` represents non-DST time. An array-like of ``bool`` values is supported for a sequence of times.
@@ -2604,7 +2604,7 @@ A DST transition may also shift the local time ahead by 1 hour creating nonexist
 local times ("clocks spring forward"). The behavior of localizing a timeseries with nonexistent times
 can be controlled by the ``nonexistent`` argument. The following options are available:
 
-* ``'raise'``: Raises a ``pytz.NonExistentTimeError`` (the default behavior)
+* ``'raise'``: Raises a ``ValueError`` (the default behavior)
 * ``'NaT'``: Replaces nonexistent times with ``NaT``
 * ``'shift_forward'``: Shifts nonexistent times forward to the closest real time
 * ``'shift_backward'``: Shifts nonexistent times backward to the closest real time
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 3de65fe6f682c..f25edd39cf7da 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -50,8 +50,10 @@ Other enhancements
 - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
+- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
 - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
 - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
+- Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
 - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
 - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
 
@@ -220,6 +222,8 @@ Optional libraries below the lowest tested version may still work, but are not c
 +------------------------+---------------------+
 | Package                | New Minimum Version |
 +========================+=====================+
+| pytz                   | 2023.4              |
++------------------------+---------------------+
 | fastparquet            | 2023.10.0           |
 +------------------------+---------------------+
 | adbc-driver-postgresql | 0.10.0              |
@@ -229,6 +233,37 @@ Optional libraries below the lowest tested version may still work, but are not c
 
 See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.
 
+.. _whatsnew_300.api_breaking.pytz:
+
+``pytz`` now an optional dependency
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+pandas now uses :py:mod:`zoneinfo` from the standard library as the default timezone implementation when passing a timezone
+string to various methods. (:issue:`34916`)
+
+*Old behavior:*
+
+.. code-block:: ipython
+
+    In [1]: ts = pd.Timestamp(2024, 1, 1).tz_localize("US/Pacific")
+    In [2]: ts.tz
+    <DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
+
+*New behavior:*
+
+.. ipython:: python
+
+    ts = pd.Timestamp(2024, 1, 1).tz_localize("US/Pacific")
+    ts.tz
+
+``pytz`` timezone objects are still supported when passed directly, but they will no longer be returned by default
+from string inputs. Moreover, ``pytz`` is no longer a required dependency of pandas, but can be installed
+with the pip extra ``pip install pandas[timezone]``.
+
+
+Additionally, pandas no longer throws ``pytz`` exceptions for timezone operations leading to ambiguous or nonexistent
+times. These cases will now raise a ``ValueError``.
+
 .. _whatsnew_300.api_breaking.other:
 
 Other API changes
@@ -618,7 +653,9 @@ Groupby/resample/rolling
 
 Reshaping
 ^^^^^^^^^
+- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
 - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
+- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
 
@@ -631,6 +668,7 @@ ExtensionArray
 ^^^^^^^^^^^^^^
 - Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`)
 - Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)
+- Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`)
 - Bug in various :class:`DataFrame` reductions for pyarrow temporal dtypes returning incorrect dtype when result was null (:issue:`59234`)
 
 Styler
diff --git a/environment.yml b/environment.yml
index e5646af07c45c..34bc0591ca8df 100644
--- a/environment.yml
+++ b/environment.yml
@@ -24,7 +24,6 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy<2
-  - pytz
 
   # optional dependencies
   - beautifulsoup4>=4.11.2
@@ -50,6 +49,7 @@ dependencies:
   - pyreadstat>=1.2.0
   - pytables>=3.8.0
   - python-calamine>=0.1.7
+  - pytz>=2023.4
   - pyxlsb>=1.0.10
   - s3fs>=2022.11.0
   - scipy>=1.10.0
diff --git a/pandas/__init__.py b/pandas/__init__.py
index 3ee6f6abf97bf..05547e50bbb37 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -3,7 +3,7 @@
 __docformat__ = "restructuredtext"
 
 # Let users know if they're missing any of our hard dependencies
-_hard_dependencies = ("numpy", "pytz", "dateutil")
+_hard_dependencies = ("numpy", "dateutil")
 _missing_dependencies = []
 
 for _dependency in _hard_dependencies:
diff --git a/pandas/_config/config.py b/pandas/_config/config.py
index 51794ec04b29e..4ed2d4c3be692 100644
--- a/pandas/_config/config.py
+++ b/pandas/_config/config.py
@@ -426,6 +426,11 @@ def option_context(*args) -> Generator[None, None, None]:
     None
         No return value.
 
+    Yields
+    ------
+    None
+        No yield value.
+
     See Also
     --------
     get_option : Retrieve the value of the specified option.
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index 0fadbbbed2c72..a635dd33f8420 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -69,6 +69,7 @@ from pandas._libs.tslibs.timestamps cimport _Timestamp
 from pandas._libs.tslibs.timezones cimport (
     get_utcoffset,
     is_utc,
+    treat_tz_as_pytz,
 )
 from pandas._libs.tslibs.tzconversion cimport (
     Localizer,
@@ -747,11 +748,17 @@ cdef datetime _localize_pydatetime(datetime dt, tzinfo tz):
         identically, i.e. discards nanos from Timestamps.
         It also assumes that the `tz` input is not None.
     """
-    try:
+    if treat_tz_as_pytz(tz):
+        import pytz
+
         # datetime.replace with pytz may be incorrect result
         # TODO: try to respect `fold` attribute
-        return tz.localize(dt, is_dst=None)
-    except AttributeError:
+        try:
+            return tz.localize(dt, is_dst=None)
+        except (pytz.AmbiguousTimeError, pytz.NonExistentTimeError) as err:
+            # As of pandas 3.0, we raise ValueErrors instead of pytz exceptions
+            raise ValueError(str(err)) from err
+    else:
         return dt.replace(tzinfo=tz)
 
 
diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index 6ae5a96c428c2..3cb4dda1cd273 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -602,7 +602,24 @@ class NaTType(_NaT):
     utctimetuple = _make_error_func(
         "utctimetuple",
         """
-        Return UTC time tuple, compatible with time.localtime().
+        Return UTC time tuple, compatible with `time.localtime()`.
+
+        This method converts the Timestamp to UTC and returns a time tuple
+        containing 9 components: year, month, day, hour, minute, second,
+        weekday, day of year, and DST flag. This is particularly useful for
+        converting a Timestamp to a format compatible with time module functions.
+
+        Returns
+        -------
+        time.struct_time
+            A time.struct_time object representing the UTC time.
+
+        See Also
+        --------
+        datetime.datetime.utctimetuple :
+            Return UTC time tuple, compatible with time.localtime().
+        Timestamp.timetuple : Return time tuple of local time.
+        time.struct_time : Time tuple structure used by time functions.
 
         Examples
         --------
@@ -619,6 +636,22 @@ class NaTType(_NaT):
         """
         Return utc offset.
 
+        This method returns the difference between UTC and the local time
+        as a `timedelta` object. It is useful for understanding the time
+        difference between the current timezone and UTC.
+
+        Returns
+        --------
+        timedelta
+            The difference between UTC and the local time as a `timedelta` object.
+
+        See Also
+        --------
+        datetime.datetime.utcoffset :
+            Standard library method to get the UTC offset of a datetime object.
+        Timestamp.tzname : Return the name of the timezone.
+        Timestamp.dst : Return the daylight saving time (DST) adjustment.
+
         Examples
         --------
         >>> ts = pd.Timestamp('2023-01-01 10:00:00', tz='Europe/Brussels')
@@ -633,6 +666,13 @@ class NaTType(_NaT):
         """
         Return time zone name.
 
+        This method returns the name of the Timestamp's time zone as a string.
+
+        See Also
+        --------
+        Timestamp.tzinfo : Returns the timezone information of the Timestamp.
+        Timestamp.tz_convert : Convert timezone-aware Timestamp to another time zone.
+
         Examples
         --------
         >>> ts = pd.Timestamp('2023-01-01 10:00:00', tz='Europe/Brussels')
@@ -772,6 +812,21 @@ class NaTType(_NaT):
 
         Construct a timezone-aware UTC datetime from a POSIX timestamp.
 
+        This method creates a datetime object from a POSIX timestamp, keeping the
+        Timestamp object's timezone.
+
+        Parameters
+        ----------
+        ts : float
+            POSIX timestamp.
+
+        See Also
+        --------
+        Timezone.tzname : Return time zone name.
+        Timestamp.utcnow : Return a new Timestamp representing UTC day and time.
+        Timestamp.fromtimestamp : Transform timestamp[, tz] to tz's local
+            time from POSIX timestamp.
+
         Notes
         -----
         Timestamp.utcfromtimestamp behavior differs from datetime.utcfromtimestamp
@@ -1052,9 +1107,9 @@ class NaTType(_NaT):
             * bool contains flags to determine if time is dst or not (note
               that this flag is only applicable for ambiguous fall dst dates).
             * 'NaT' will return NaT for an ambiguous time.
-            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+            * 'raise' will raise a ValueError for an ambiguous time.
 
-        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+        nonexistent : {'raise', 'shift_forward', 'shift_backward', 'NaT', \
 timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
@@ -1065,7 +1120,7 @@ timedelta}, default 'raise'
               closest existing time.
             * 'NaT' will return NaT where there are nonexistent times.
             * timedelta objects will shift nonexistent times by the timedelta.
-            * 'raise' will raise an NonExistentTimeError if there are
+            * 'raise' will raise a ValueError if there are
               nonexistent times.
 
         Returns
@@ -1153,9 +1208,9 @@ timedelta}, default 'raise'
             * bool contains flags to determine if time is dst or not (note
               that this flag is only applicable for ambiguous fall dst dates).
             * 'NaT' will return NaT for an ambiguous time.
-            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+            * 'raise' will raise a ValueError for an ambiguous time.
 
-        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+        nonexistent : {'raise', 'shift_forward', 'shift_backward', 'NaT', \
 timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
@@ -1166,7 +1221,7 @@ timedelta}, default 'raise'
               closest existing time.
             * 'NaT' will return NaT where there are nonexistent times.
             * timedelta objects will shift nonexistent times by the timedelta.
-            * 'raise' will raise an NonExistentTimeError if there are
+            * 'raise' will raise a ValueError if there are
               nonexistent times.
 
         Raises
@@ -1248,9 +1303,9 @@ timedelta}, default 'raise'
             * bool contains flags to determine if time is dst or not (note
               that this flag is only applicable for ambiguous fall dst dates).
             * 'NaT' will return NaT for an ambiguous time.
-            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+            * 'raise' will raise a ValueError for an ambiguous time.
 
-        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+        nonexistent : {'raise', 'shift_forward', 'shift_backward', 'NaT', \
 timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
@@ -1261,7 +1316,7 @@ timedelta}, default 'raise'
               closest existing time.
             * 'NaT' will return NaT where there are nonexistent times.
             * timedelta objects will shift nonexistent times by the timedelta.
-            * 'raise' will raise an NonExistentTimeError if there are
+            * 'raise' will raise a ValueError if there are
               nonexistent times.
 
         Raises
@@ -1412,9 +1467,9 @@ timedelta}, default 'raise'
             * bool contains flags to determine if time is dst or not (note
               that this flag is only applicable for ambiguous fall dst dates).
             * 'NaT' will return NaT for an ambiguous time.
-            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+            * 'raise' will raise a ValueError for an ambiguous time.
 
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
+        nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, \
 default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
@@ -1427,7 +1482,7 @@ default 'raise'
               closest existing time.
             * 'NaT' will return NaT where there are nonexistent times.
             * timedelta objects will shift nonexistent times by the timedelta.
-            * 'raise' will raise an NonExistentTimeError if there are
+            * 'raise' will raise a ValueError if there are
               nonexistent times.
 
         Returns
@@ -1439,6 +1494,13 @@ default 'raise'
         TypeError
             If the Timestamp is tz-aware and tz is not None.
 
+        See Also
+        --------
+        Timestamp.tzinfo : Returns the timezone information of the Timestamp.
+        Timestamp.tz_convert : Convert timezone-aware Timestamp to another time zone.
+        DatetimeIndex.tz_localize : Localize a DatetimeIndex to a specific time zone.
+        datetime.datetime.astimezone : Convert a datetime object to another time zone.
+
         Examples
         --------
         Create a naive timestamp object:
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 554c4f109f1c5..c48acc07b34db 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -595,6 +595,24 @@ cdef class BaseOffset:
 
     @property
     def rule_code(self) -> str:
+        """
+        Return a string representing the base frequency.
+
+        See Also
+        --------
+        tseries.offsets.Hour.rule_code :
+            Returns a string representing the base frequency of 'h'.
+        tseries.offsets.Day.rule_code :
+            Returns a string representing the base frequency of 'D'.
+
+        Examples
+        --------
+        >>> pd.offsets.Hour().rule_code
+        'h'
+
+        >>> pd.offsets.Week(5).rule_code
+        'W'
+        """
         return self._prefix
 
     @cache_readonly
@@ -602,6 +620,17 @@ cdef class BaseOffset:
         """
         Return a string representing the frequency.
 
+        See Also
+        --------
+        tseries.offsets.BusinessDay.freqstr :
+            Return a string representing an offset frequency in Business Days.
+        tseries.offsets.BusinessHour.freqstr :
+            Return a string representing an offset frequency in Business Hours.
+        tseries.offsets.Week.freqstr :
+            Return a string representing an offset frequency in Weeks.
+        tseries.offsets.Hour.freqstr :
+            Return a string representing an offset frequency in Hours.
+
         Examples
         --------
         >>> pd.DateOffset(5).freqstr
@@ -779,6 +808,26 @@ cdef class BaseOffset:
 
     @property
     def nanos(self):
+        """
+        Returns a integer of the total number of nanoseconds for fixed frequencies.
+
+        Raises
+        ------
+        ValueError
+            If the frequency is non-fixed.
+
+        See Also
+        --------
+        tseries.offsets.Hour.nanos :
+            Returns an integer of the total number of nanoseconds.
+        tseries.offsets.Day.nanos :
+            Returns an integer of the total number of nanoseconds.
+
+        Examples
+        --------
+        >>> pd.offsets.Week(n=1).nanos
+        ValueError: Week: weekday=None is a non-fixed frequency
+        """
         raise ValueError(f"{self} is a non-fixed frequency")
 
     # ------------------------------------------------------------------
@@ -986,12 +1035,14 @@ cdef class Tick(SingleConstructorOffset):
     @property
     def nanos(self) -> int64_t:
         """
-        Return an integer of the total number of nanoseconds.
+        Returns an integer of the total number of nanoseconds.
 
-        Raises
-        ------
-        ValueError
-            If the frequency is non-fixed.
+        See Also
+        --------
+        tseries.offsets.Hour.nanos :
+            Returns an integer of the total number of nanoseconds.
+        tseries.offsets.Day.nanos :
+            Returns an integer of the total number of nanoseconds.
 
         Examples
         --------
@@ -1147,7 +1198,7 @@ cdef class Hour(Tick):
     """
     Offset ``n`` hours.
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of hours represented.
@@ -1183,7 +1234,7 @@ cdef class Minute(Tick):
     """
     Offset ``n`` minutes.
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of minutes represented.
@@ -1219,7 +1270,7 @@ cdef class Second(Tick):
     """
     Offset ``n`` seconds.
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of seconds represented.
@@ -1255,7 +1306,7 @@ cdef class Milli(Tick):
     """
     Offset ``n`` milliseconds.
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of milliseconds represented.
@@ -1292,7 +1343,7 @@ cdef class Micro(Tick):
     """
     Offset ``n`` microseconds.
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of microseconds represented.
@@ -1329,7 +1380,7 @@ cdef class Nano(Tick):
     """
     Offset ``n`` nanoseconds.
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of nanoseconds represented.
@@ -1616,7 +1667,7 @@ class DateOffset(RelativeDeltaOffset, metaclass=OffsetMeta):
     Besides, adding a DateOffsets specified by the singular form of the date
     component can be used to replace certain component of the timestamp.
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of time periods the offset represents.
@@ -2426,6 +2477,24 @@ cdef class WeekOfMonthMixin(SingleConstructorOffset):
 
     @property
     def rule_code(self) -> str:
+        """
+        Return a string representing the base frequency.
+
+        See Also
+        --------
+        tseries.offsets.Hour.rule_code :
+            Returns a string representing the base frequency of 'h'.
+        tseries.offsets.Day.rule_code :
+            Returns a string representing the base frequency of 'D'.
+
+        Examples
+        --------
+        >>> pd.offsets.Week(5).rule_code
+        'W'
+
+        >>> pd.offsets.WeekOfMonth(n=1, week=0, weekday=0).rule_code
+        'WOM-1MON'
+        """
         weekday = int_to_weekday.get(self.weekday, "")
         if self.week == -1:
             # LastWeekOfMonth
@@ -2472,6 +2541,24 @@ cdef class YearOffset(SingleConstructorOffset):
 
     @property
     def rule_code(self) -> str:
+        """
+        Return a string representing the base frequency.
+
+        See Also
+        --------
+        tseries.offsets.Hour.rule_code :
+            Returns a string representing the base frequency of 'h'.
+        tseries.offsets.Day.rule_code :
+            Returns a string representing the base frequency of 'D'.
+
+        Examples
+        --------
+        >>> pd.tseries.offsets.YearBegin(n=1, month=2).rule_code
+        'YS-FEB'
+
+        >>> pd.tseries.offsets.YearEnd(n=1, month=6).rule_code
+        'YE-JUN'
+        """
         month = MONTH_ALIASES[self.month]
         return f"{self._prefix}-{month}"
 
@@ -2506,7 +2593,7 @@ cdef class BYearEnd(YearOffset):
     """
     DateOffset increments between the last business day of the year.
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of years represented.
@@ -2804,7 +2891,7 @@ cdef class BQuarterBegin(QuarterOffset):
     startingMonth = 2 corresponds to dates like 2/01/2007, 5/01/2007, ...
     startingMonth = 3 corresponds to dates like 3/01/2007, 6/01/2007, ...
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of quarters represented.
@@ -2886,7 +2973,7 @@ cdef class QuarterBegin(QuarterOffset):
     startingMonth = 2 corresponds to dates like 2/01/2007, 5/01/2007, ...
     startingMonth = 3 corresponds to dates like 3/01/2007, 6/01/2007, ...
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of quarters represented.
@@ -2984,7 +3071,7 @@ cdef class MonthBegin(MonthOffset):
 
     MonthBegin goes to the next date which is a start of the month.
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of months represented.
@@ -3272,7 +3359,7 @@ cdef class SemiMonthBegin(SemiMonthOffset):
     """
     Two DateOffset's per month repeating on the first day of the month & day_of_month.
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of months represented.
@@ -3304,7 +3391,7 @@ cdef class Week(SingleConstructorOffset):
     """
     Weekly offset.
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of weeks represented.
@@ -3458,6 +3545,24 @@ cdef class Week(SingleConstructorOffset):
 
     @property
     def rule_code(self) -> str:
+        """
+        Return a string representing the base frequency.
+
+        See Also
+        --------
+        tseries.offsets.Hour.name :
+            Returns a string representing the base frequency of 'h'.
+        tseries.offsets.Day.name :
+            Returns a string representing the base frequency of 'D'.
+
+        Examples
+        --------
+        >>> pd.offsets.Hour().rule_code
+        'h'
+
+        >>> pd.offsets.Week(5).rule_code
+        'W'
+        """
         suffix = ""
         if self.weekday is not None:
             weekday = int_to_weekday[self.weekday]
@@ -3477,7 +3582,7 @@ cdef class WeekOfMonth(WeekOfMonthMixin):
     """
     Describes monthly dates like "the Tuesday of the 2nd week of each month".
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of months represented.
@@ -3554,7 +3659,7 @@ cdef class LastWeekOfMonth(WeekOfMonthMixin):
 
     For example "the last Tuesday of each month".
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of months represented.
@@ -3694,7 +3799,7 @@ cdef class FY5253(FY5253Mixin):
     X is a specific day of the week.
     Y is a certain month of the year
 
-    Parameters
+    Attributes
     ----------
     n : int
         The number of fiscal years represented.
@@ -3897,7 +4002,7 @@ cdef class FY5253Quarter(FY5253Mixin):
     startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ...
     startingMonth = 3 corresponds to dates like 3/30/2007, 6/29/2007, ...
 
-    Parameters
+    Attributes
     ----------
     n : int
         The number of business quarters represented.
@@ -4132,7 +4237,7 @@ cdef class Easter(SingleConstructorOffset):
 
     Right now uses the revised method which is valid in years 1583-4099.
 
-    Parameters
+    Attributes
     ----------
     n : int, default 1
         The number of years represented.
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
index c6ba97fe9f1a2..4f5dfc75a20bf 100644
--- a/pandas/_libs/tslibs/period.pyx
+++ b/pandas/_libs/tslibs/period.pyx
@@ -1913,20 +1913,58 @@ cdef class _Period(PeriodMixin):
         Parameters
         ----------
         freq : str, BaseOffset
-            The desired frequency. If passing a `str`, it needs to be a
-            valid :ref:`period alias <timeseries.period_aliases>`.
+            The target frequency to convert the Period object to.
+            If a string is provided,
+            it must be a valid :ref:`period alias <timeseries.period_aliases>`.
+
         how : {'E', 'S', 'end', 'start'}, default 'end'
-            Start or end of the timespan.
+            Specifies whether to align the period to the start or end of the interval:
+            - 'E' or 'end': Align to the end of the interval.
+            - 'S' or 'start': Align to the start of the interval.
 
         Returns
         -------
-        resampled : Period
+        Period : Period object with the specified frequency, aligned to the parameter.
+
+        See Also
+        --------
+        Period.end_time : Return the end Timestamp.
+        Period.start_time : Return the start Timestamp.
+        Period.dayofyear : Return the day of the year.
+        Period.dayofweek : Return the day of the week.
 
         Examples
         --------
-        >>> period = pd.Period('2023-1-1', freq='D')
+        Convert a daily period to an hourly period, aligning to the end of the day:
+
+        >>> period = pd.Period('2023-01-01', freq='D')
         >>> period.asfreq('h')
         Period('2023-01-01 23:00', 'h')
+
+        Convert a monthly period to a daily period, aligning to the start of the month:
+
+        >>> period = pd.Period('2023-01', freq='M')
+        >>> period.asfreq('D', how='start')
+        Period('2023-01-01', 'D')
+
+        Convert a yearly period to a monthly period, aligning to the last month:
+
+        >>> period = pd.Period('2023', freq='Y')
+        >>> period.asfreq('M', how='end')
+        Period('2023-12', 'M')
+
+        Convert a monthly period to an hourly period,
+        aligning to the first day of the month:
+
+        >>> period = pd.Period('2023-01', freq='M')
+        >>> period.asfreq('h', how='start')
+        Period('2023-01-01 00:00', 'H')
+
+        Convert a weekly period to a daily period, aligning to the last day of the week:
+
+        >>> period = pd.Period('2023-08-01', freq='W')
+        >>> period.asfreq('D', how='end')
+        Period('2023-08-04', 'D')
         """
         freq = self._maybe_convert_freq(freq)
         how = validate_end_alias(how)
@@ -2000,11 +2038,44 @@ cdef class _Period(PeriodMixin):
         """
         Return the year this Period falls on.
 
+        Returns
+        -------
+        int
+
+        See Also
+        --------
+        period.month : Get the month of the year for the given Period.
+        period.day : Return the day of the month the Period falls on.
+
+        Notes
+        -----
+        The year is based on the `ordinal` and `base` attributes of the Period.
+
         Examples
         --------
-        >>> period = pd.Period('2022-01', 'M')
+        Create a Period object for January 2023 and get the year:
+
+        >>> period = pd.Period('2023-01', 'M')
         >>> period.year
-        2022
+        2023
+
+        Create a Period object for 01 January 2023 and get the year:
+
+        >>> period = pd.Period('2023', 'D')
+        >>> period.year
+        2023
+
+        Get the year for a period representing a quarter:
+
+        >>> period = pd.Period('2023Q2', 'Q')
+        >>> period.year
+        2023
+
+        Handle a case where the Period object is empty, which results in `NaN`:
+
+        >>> period = pd.Period('nan', 'M')
+        >>> period.year
+        nan
         """
         base = self._dtype._dtype_code
         return pyear(self.ordinal, base)
@@ -2014,11 +2085,45 @@ cdef class _Period(PeriodMixin):
         """
         Return the month this Period falls on.
 
+        Returns
+        -------
+        int
+
+        See Also
+        --------
+        period.week : Get the week of the year on the given Period.
+        Period.year : Return the year this Period falls on.
+        Period.day : Return the day of the month this Period falls on.
+
+        Notes
+        -----
+        The month is based on the `ordinal` and `base` attributes of the Period.
+
         Examples
         --------
+        Create a Period object for January 2022 and get the month:
+
         >>> period = pd.Period('2022-01', 'M')
         >>> period.month
         1
+
+        Period object with no specified frequency, resulting in a default frequency:
+
+        >>> period = pd.Period('2022', 'Y')
+        >>> period.month
+        12
+
+        Create a Period object with a specified frequency but an incomplete date string:
+
+        >>> period = pd.Period('2022', 'M')
+        >>> period.month
+        1
+
+        Handle a case where the Period object is empty, which results in `NaN`:
+
+        >>> period = pd.Period('nan', 'M')
+        >>> period.month
+        nan
         """
         base = self._dtype._dtype_code
         return pmonth(self.ordinal, base)
diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx
index 43279051e2a30..ccb1a1d6870f7 100644
--- a/pandas/_libs/tslibs/strptime.pyx
+++ b/pandas/_libs/tslibs/strptime.pyx
@@ -16,6 +16,7 @@ FUNCTIONS:
     strptime -- Calculates the time struct represented by the passed-in string
 """
 from datetime import timezone
+import zoneinfo
 
 from cpython.datetime cimport (
     PyDate_Check,
@@ -38,7 +39,6 @@ from _thread import allocate_lock as _thread_allocate_lock
 import re
 
 import numpy as np
-import pytz
 
 cimport numpy as cnp
 from numpy cimport (
@@ -747,7 +747,7 @@ cdef tzinfo _parse_with_format(
                 week_of_year_start = 0
         elif parse_code == 17:
             # e.g. val='2011-12-30T00:00:00.000000UTC'; fmt='%Y-%m-%dT%H:%M:%S.%f%Z'
-            tz = pytz.timezone(found_dict["Z"])
+            tz = zoneinfo.ZoneInfo(found_dict["Z"])
         elif parse_code == 19:
             # e.g. val='March 1, 2018 12:00:00+0400'; fmt='%B %d, %Y %H:%M:%S%z'
             tz = parse_timezone_directive(found_dict["z"])
@@ -837,7 +837,7 @@ class TimeRE(_TimeRE):
         if key == "Z":
             # lazy computation
             if self._Z is None:
-                self._Z = self.__seqToRE(pytz.all_timezones, "Z")
+                self._Z = self.__seqToRE(zoneinfo.available_timezones(), "Z")
             # Note: handling Z is the key difference vs using the stdlib
             # _strptime.TimeRE. test_to_datetime_parse_tzname_or_tzoffset with
             # fmt='%Y-%m-%d %H:%M:%S %Z' fails with the stdlib version.
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 7cb9c852ea1e3..1cbb24084a62b 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -254,6 +254,28 @@ cdef class _Timestamp(ABCTimestamp):
         """
         The abbreviation associated with self._creso.
 
+        This property returns a string representing the time unit of the Timestamp's
+        resolution. It corresponds to the smallest time unit that can be represented
+        by this Timestamp object. The possible values are:
+        - 's' (second)
+        - 'ms' (millisecond)
+        - 'us' (microsecond)
+        - 'ns' (nanosecond)
+
+        Returns
+        -------
+        str
+            A string abbreviation of the Timestamp's resolution unit:
+            - 's' for second
+            - 'ms' for millisecond
+            - 'us' for microsecond
+            - 'ns' for nanosecond
+
+        See Also
+        --------
+        Timestamp.resolution : Return resolution of the Timestamp.
+        Timedelta : A duration expressing the difference between two dates or times.
+
         Examples
         --------
         >>> pd.Timestamp("2020-01-01 12:34:56").unit
@@ -1590,6 +1612,21 @@ class Timestamp(_Timestamp):
 
         Construct a timezone-aware UTC datetime from a POSIX timestamp.
 
+        This method creates a datetime object from a POSIX timestamp, keeping the
+        Timestamp object's timezone.
+
+        Parameters
+        ----------
+        ts : float
+            POSIX timestamp.
+
+        See Also
+        --------
+        Timezone.tzname : Return time zone name.
+        Timestamp.utcnow : Return a new Timestamp representing UTC day and time.
+        Timestamp.fromtimestamp : Transform timestamp[, tz] to tz's local
+            time from POSIX timestamp.
+
         Notes
         -----
         Timestamp.utcfromtimestamp behavior differs from datetime.utcfromtimestamp
@@ -1765,6 +1802,13 @@ class Timestamp(_Timestamp):
         """
         Return time zone name.
 
+        This method returns the name of the Timestamp's time zone as a string.
+
+        See Also
+        --------
+        Timestamp.tzinfo : Returns the timezone information of the Timestamp.
+        Timestamp.tz_convert : Convert timezone-aware Timestamp to another time zone.
+
         Examples
         --------
         >>> ts = pd.Timestamp('2023-01-01 10:00:00', tz='Europe/Brussels')
@@ -1779,6 +1823,22 @@ class Timestamp(_Timestamp):
         """
         Return utc offset.
 
+        This method returns the difference between UTC and the local time
+        as a `timedelta` object. It is useful for understanding the time
+        difference between the current timezone and UTC.
+
+        Returns
+        --------
+        timedelta
+            The difference between UTC and the local time as a `timedelta` object.
+
+        See Also
+        --------
+        datetime.datetime.utcoffset :
+            Standard library method to get the UTC offset of a datetime object.
+        Timestamp.tzname : Return the name of the timezone.
+        Timestamp.dst : Return the daylight saving time (DST) adjustment.
+
         Examples
         --------
         >>> ts = pd.Timestamp('2023-01-01 10:00:00', tz='Europe/Brussels')
@@ -1791,7 +1851,24 @@ class Timestamp(_Timestamp):
 
     def utctimetuple(self):
         """
-        Return UTC time tuple, compatible with time.localtime().
+        Return UTC time tuple, compatible with `time.localtime()`.
+
+        This method converts the Timestamp to UTC and returns a time tuple
+        containing 9 components: year, month, day, hour, minute, second,
+        weekday, day of year, and DST flag. This is particularly useful for
+        converting a Timestamp to a format compatible with time module functions.
+
+        Returns
+        -------
+        time.struct_time
+            A time.struct_time object representing the UTC time.
+
+        See Also
+        --------
+        datetime.datetime.utctimetuple :
+            Return UTC time tuple, compatible with time.localtime().
+        Timestamp.timetuple : Return time tuple of local time.
+        time.struct_time : Time tuple structure used by time functions.
 
         Examples
         --------
@@ -2134,9 +2211,9 @@ class Timestamp(_Timestamp):
             * bool contains flags to determine if time is dst or not (note
               that this flag is only applicable for ambiguous fall dst dates).
             * 'NaT' will return NaT for an ambiguous time.
-            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+            * 'raise' will raise a ValueError for an ambiguous time.
 
-        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+        nonexistent : {'raise', 'shift_forward', 'shift_backward', 'NaT', \
 timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
@@ -2147,7 +2224,7 @@ timedelta}, default 'raise'
               closest existing time.
             * 'NaT' will return NaT where there are nonexistent times.
             * timedelta objects will shift nonexistent times by the timedelta.
-            * 'raise' will raise an NonExistentTimeError if there are
+            * 'raise' will raise a ValueError if there are
               nonexistent times.
 
         Returns
@@ -2237,9 +2314,9 @@ timedelta}, default 'raise'
             * bool contains flags to determine if time is dst or not (note
               that this flag is only applicable for ambiguous fall dst dates).
             * 'NaT' will return NaT for an ambiguous time.
-            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+            * 'raise' will raise a ValueError for an ambiguous time.
 
-        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+        nonexistent : {'raise', 'shift_forward', 'shift_backward', 'NaT', \
 timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
@@ -2250,7 +2327,7 @@ timedelta}, default 'raise'
               closest existing time.
             * 'NaT' will return NaT where there are nonexistent times.
             * timedelta objects will shift nonexistent times by the timedelta.
-            * 'raise' will raise an NonExistentTimeError if there are
+            * 'raise' will raise a ValueError if there are
               nonexistent times.
 
         Raises
@@ -2332,9 +2409,9 @@ timedelta}, default 'raise'
             * bool contains flags to determine if time is dst or not (note
               that this flag is only applicable for ambiguous fall dst dates).
             * 'NaT' will return NaT for an ambiguous time.
-            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+            * 'raise' will raise a ValueError for an ambiguous time.
 
-        nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \
+        nonexistent : {'raise', 'shift_forward', 'shift_backward', 'NaT', \
 timedelta}, default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
@@ -2345,7 +2422,7 @@ timedelta}, default 'raise'
               closest existing time.
             * 'NaT' will return NaT where there are nonexistent times.
             * timedelta objects will shift nonexistent times by the timedelta.
-            * 'raise' will raise an NonExistentTimeError if there are
+            * 'raise' will raise a ValueError if there are
               nonexistent times.
 
         Raises
@@ -2471,9 +2548,9 @@ timedelta}, default 'raise'
             * bool contains flags to determine if time is dst or not (note
               that this flag is only applicable for ambiguous fall dst dates).
             * 'NaT' will return NaT for an ambiguous time.
-            * 'raise' will raise an AmbiguousTimeError for an ambiguous time.
+            * 'raise' will raise a ValueError for an ambiguous time.
 
-        nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
+        nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, \
 default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
@@ -2486,7 +2563,7 @@ default 'raise'
               closest existing time.
             * 'NaT' will return NaT where there are nonexistent times.
             * timedelta objects will shift nonexistent times by the timedelta.
-            * 'raise' will raise an NonExistentTimeError if there are
+            * 'raise' will raise a ValueError if there are
               nonexistent times.
 
         Returns
@@ -2498,6 +2575,13 @@ default 'raise'
         TypeError
             If the Timestamp is tz-aware and tz is not None.
 
+        See Also
+        --------
+        Timestamp.tzinfo : Returns the timezone information of the Timestamp.
+        Timestamp.tz_convert : Convert timezone-aware Timestamp to another time zone.
+        DatetimeIndex.tz_localize : Localize a DatetimeIndex to a specific time zone.
+        datetime.datetime.astimezone : Convert a datetime object to another time zone.
+
         Examples
         --------
         Create a naive timestamp object:
diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx
index 6292b6ce0fd1d..36b644ffc826d 100644
--- a/pandas/_libs/tslibs/timezones.pyx
+++ b/pandas/_libs/tslibs/timezones.pyx
@@ -2,17 +2,10 @@ from datetime import (
     timedelta,
     timezone,
 )
+import zoneinfo
 
 from pandas.compat._optional import import_optional_dependency
 
-try:
-    # py39+
-    import zoneinfo
-    from zoneinfo import ZoneInfo
-except ImportError:
-    zoneinfo = None
-    ZoneInfo = None
-
 from cpython.datetime cimport (
     datetime,
     timedelta,
@@ -28,8 +21,8 @@ from dateutil.tz import (
     tzutc as _dateutil_tzutc,
 )
 import numpy as np
-import pytz
-from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo
+
+pytz = import_optional_dependency("pytz", errors="ignore")
 
 cimport numpy as cnp
 from numpy cimport int64_t
@@ -45,10 +38,11 @@ from pandas._libs.tslibs.util cimport (
 
 cdef int64_t NPY_NAT = get_nat()
 cdef tzinfo utc_stdlib = timezone.utc
-cdef tzinfo utc_pytz = pytz.utc
+cdef tzinfo utc_pytz = pytz.UTC if pytz else None
 cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC")  # NB: *not* the same as tzutc()
 
 cdef tzinfo utc_zoneinfo = None
+cdef type ZoneInfo = zoneinfo.ZoneInfo
 
 
 # ----------------------------------------------------------------------
@@ -56,13 +50,13 @@ cdef tzinfo utc_zoneinfo = None
 cdef bint is_utc_zoneinfo(tzinfo tz):
     # Workaround for cases with missing tzdata
     #  https://github.com/pandas-dev/pandas/pull/46425#discussion_r830633025
-    if tz is None or zoneinfo is None:
+    if tz is None:
         return False
 
     global utc_zoneinfo
     if utc_zoneinfo is None:
         try:
-            utc_zoneinfo = ZoneInfo("UTC")
+            utc_zoneinfo = zoneinfo.ZoneInfo("UTC")
         except zoneinfo.ZoneInfoNotFoundError:
             return False
         # Warn if tzdata is too old, even if there is a system tzdata to alert
@@ -74,17 +68,15 @@ cdef bint is_utc_zoneinfo(tzinfo tz):
 
 cpdef inline bint is_utc(tzinfo tz):
     return (
-        tz is utc_pytz
-        or tz is utc_stdlib
+        tz is utc_stdlib
         or isinstance(tz, _dateutil_tzutc)
         or tz is utc_dateutil_str
         or is_utc_zoneinfo(tz)
+        or (utc_pytz is not None and tz is utc_pytz)
     )
 
 
 cdef bint is_zoneinfo(tzinfo tz):
-    if ZoneInfo is None:
-        return False
     return isinstance(tz, ZoneInfo)
 
 
@@ -166,7 +158,7 @@ cpdef inline tzinfo maybe_get_tz(object tz):
         elif tz == "UTC" or tz == "utc":
             tz = utc_stdlib
         else:
-            tz = pytz.timezone(tz)
+            tz = zoneinfo.ZoneInfo(tz)
     elif is_integer_object(tz):
         tz = timezone(timedelta(seconds=tz))
     elif isinstance(tz, tzinfo):
@@ -205,7 +197,7 @@ cdef object tz_cache_key(tzinfo tz):
     the same tz file). Also, pytz objects are not always hashable so we use
     str(tz) instead.
     """
-    if isinstance(tz, _pytz_BaseTzInfo):
+    if pytz is not None and isinstance(tz, pytz.tzinfo.BaseTzInfo):
         return tz.zone
     elif isinstance(tz, _dateutil_tzfile):
         if ".tar.gz" in tz._filename:
@@ -239,7 +231,7 @@ cpdef inline bint is_fixed_offset(tzinfo tz):
             return 1
         else:
             return 0
-    elif treat_tz_as_pytz(tz):
+    elif treat_tz_as_pytz(tz) and pytz is not None:
         if (len(tz._transition_info) == 0
                 and len(tz._utc_transition_times) == 0):
             return 1
diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx
index e3facd3d9599b..c100f315e9a19 100644
--- a/pandas/_libs/tslibs/tzconversion.pyx
+++ b/pandas/_libs/tslibs/tzconversion.pyx
@@ -15,7 +15,6 @@ from cython cimport Py_ssize_t
 import_datetime()
 
 import numpy as np
-import pytz
 
 cimport numpy as cnp
 from numpy cimport (
@@ -196,8 +195,8 @@ def tz_localize_to_utc(
     NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns,
 ):
     """
-    Localize tzinfo-naive i8 to given time zone (using pytz). If
-    there are ambiguities in the values, raise AmbiguousTimeError.
+    Localize tzinfo-naive i8 to given time zone. If
+    there are ambiguities in the values, raise ValueError.
 
     Parameters
     ----------
@@ -368,7 +367,7 @@ timedelta-like}
                     result[i] = NPY_NAT
                 else:
                     stamp = _render_tstamp(val, creso=creso)
-                    raise pytz.AmbiguousTimeError(
+                    raise ValueError(
                         f"Cannot infer dst time from {stamp}, try using the "
                         "'ambiguous' argument"
                     )
@@ -428,7 +427,10 @@ timedelta-like}
                 result[i] = NPY_NAT
             else:
                 stamp = _render_tstamp(val, creso=creso)
-                raise pytz.NonExistentTimeError(stamp)
+                raise ValueError(
+                    f"{stamp} is a nonexistent time due to daylight savings time. "
+                    "Try using the 'nonexistent' argument."
+                )
 
     return result.base  # .base to get underlying ndarray
 
@@ -631,7 +633,7 @@ cdef ndarray[int64_t] _get_dst_hours(
     if trans_idx.size == 1:
         # see test_tz_localize_to_utc_ambiguous_infer
         stamp = _render_tstamp(vals[trans_idx[0]], creso=creso)
-        raise pytz.AmbiguousTimeError(
+        raise ValueError(
             f"Cannot infer dst time from {stamp} as there "
             "are no repeated times"
         )
@@ -653,14 +655,16 @@ cdef ndarray[int64_t] _get_dst_hours(
             if grp.size == 1 or np.all(delta > 0):
                 # see test_tz_localize_to_utc_ambiguous_infer
                 stamp = _render_tstamp(vals[grp[0]], creso=creso)
-                raise pytz.AmbiguousTimeError(stamp)
+                raise ValueError(
+                    f"{stamp} is an ambiguous time and cannot be inferred."
+                )
 
             # Find the index for the switch and pull from a for dst and b
             # for standard
             switch_idxs = (delta <= 0).nonzero()[0]
             if switch_idxs.size > 1:
                 # see test_tz_localize_to_utc_ambiguous_infer
-                raise pytz.AmbiguousTimeError(
+                raise ValueError(
                     f"There are {switch_idxs.size} dst switches when "
                     "there should only be 1."
                 )
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 288559d386a71..756c209661fbb 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -33,6 +33,7 @@
     pa_version_under14p1,
     pa_version_under16p0,
     pa_version_under17p0,
+    pa_version_under18p0,
 )
 
 if TYPE_CHECKING:
@@ -157,6 +158,7 @@ def is_ci_environment() -> bool:
     "pa_version_under14p1",
     "pa_version_under16p0",
     "pa_version_under17p0",
+    "pa_version_under18p0",
     "HAS_PYARROW",
     "IS64",
     "ISMUSL",
diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
index 06082e71af32a..6b90389a62056 100644
--- a/pandas/compat/_optional.py
+++ b/pandas/compat/_optional.py
@@ -43,6 +43,7 @@
     "pyreadstat": "1.2.0",
     "pytest": "7.3.2",
     "python-calamine": "0.1.7",
+    "pytz": "2023.4",
     "pyxlsb": "1.0.10",
     "s3fs": "2022.11.0",
     "scipy": "1.10.0",
diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py
index ebfc0d69d9655..bd009b544f31e 100644
--- a/pandas/compat/pyarrow.py
+++ b/pandas/compat/pyarrow.py
@@ -17,6 +17,7 @@
     pa_version_under15p0 = _palv < Version("15.0.0")
     pa_version_under16p0 = _palv < Version("16.0.0")
     pa_version_under17p0 = _palv < Version("17.0.0")
+    pa_version_under18p0 = _palv < Version("18.0.0")
     HAS_PYARROW = True
 except ImportError:
     pa_version_under10p1 = True
@@ -28,4 +29,5 @@
     pa_version_under15p0 = True
     pa_version_under16p0 = True
     pa_version_under17p0 = True
+    pa_version_under18p0 = True
     HAS_PYARROW = False
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 7c485515f0784..d11213f1164bc 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -32,7 +32,10 @@
 import gc
 import operator
 import os
-from typing import TYPE_CHECKING
+from typing import (
+    TYPE_CHECKING,
+    Any,
+)
 import uuid
 
 from dateutil.tz import (
@@ -43,11 +46,8 @@
 from hypothesis import strategies as st
 import numpy as np
 import pytest
-from pytz import (
-    FixedOffset,
-    utc,
-)
 
+from pandas.compat._optional import import_optional_dependency
 import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.dtypes import (
@@ -92,12 +92,7 @@
     del pa
     has_pyarrow = True
 
-import zoneinfo
-
-try:
-    zoneinfo.ZoneInfo("UTC")
-except zoneinfo.ZoneInfoNotFoundError:
-    zoneinfo = None  # type: ignore[assignment]
+pytz = import_optional_dependency("pytz", errors="ignore")
 
 
 # ----------------------------------------------------------------
@@ -1199,19 +1194,19 @@ def deco(*args):
     "UTC-02:15",
     tzutc(),
     tzlocal(),
-    FixedOffset(300),
-    FixedOffset(0),
-    FixedOffset(-300),
     timezone.utc,
     timezone(timedelta(hours=1)),
     timezone(timedelta(hours=-1), name="foo"),
 ]
-if zoneinfo is not None:
+if pytz is not None:
     TIMEZONES.extend(
-        [
-            zoneinfo.ZoneInfo("US/Pacific"),  # type: ignore[list-item]
-            zoneinfo.ZoneInfo("UTC"),  # type: ignore[list-item]
-        ]
+        (
+            pytz.FixedOffset(300),
+            pytz.FixedOffset(0),
+            pytz.FixedOffset(-300),
+            pytz.timezone("US/Pacific"),
+            pytz.timezone("UTC"),
+        )
     )
 TIMEZONE_IDS = [repr(i) for i in TIMEZONES]
 
@@ -1234,9 +1229,10 @@ def tz_aware_fixture(request):
     return request.param
 
 
-_UTCS = ["utc", "dateutil/UTC", utc, tzutc(), timezone.utc]
-if zoneinfo is not None:
-    _UTCS.append(zoneinfo.ZoneInfo("UTC"))
+_UTCS = ["utc", "dateutil/UTC", tzutc(), timezone.utc]
+
+if pytz is not None:
+    _UTCS.append(pytz.utc)
 
 
 @pytest.fixture(params=_UTCS)
@@ -2046,12 +2042,12 @@ def using_infer_string() -> bool:
     return pd.options.future.infer_string is True
 
 
-warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
-if zoneinfo is not None:
-    warsaws.append(zoneinfo.ZoneInfo("Europe/Warsaw"))  # type: ignore[arg-type]
+_warsaws: list[Any] = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
+if pytz is not None:
+    _warsaws.append(pytz.timezone("Europe/Warsaw"))
 
 
-@pytest.fixture(params=warsaws)
+@pytest.fixture(params=_warsaws)
 def warsaw(request) -> str:
     """
     tzinfo for Europe/Warsaw using pytz, dateutil, or zoneinfo.
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 948836bf6a51d..56f8adda93251 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1529,9 +1529,7 @@ def safe_sort(
         order2 = sorter.argsort()
         if verify:
             mask = (codes < -len(values)) | (codes >= len(values))
-            codes[mask] = 0
-        else:
-            mask = None
+            codes[mask] = -1
         new_codes = take_nd(order2, codes, fill_value=-1)
     else:
         reverse_indexer = np.empty(len(sorter), dtype=int)
@@ -1540,14 +1538,6 @@ def safe_sort(
         # may deal with them here without performance loss using `mode='wrap'`
         new_codes = reverse_indexer.take(codes, mode="wrap")
 
-        if use_na_sentinel:
-            mask = codes == -1
-            if verify:
-                mask = mask | (codes < -len(values)) | (codes >= len(values))
-
-    if use_na_sentinel and mask is not None:
-        np.putmask(new_codes, mask, -1)
-
     return ordered, ensure_platform_int(new_codes)
 
 
diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py
index 5c933294fb944..b2f78182b9bf0 100644
--- a/pandas/core/array_algos/quantile.py
+++ b/pandas/core/array_algos/quantile.py
@@ -94,9 +94,9 @@ def quantile_with_mask(
         flat = np.array([fill_value] * len(qs))
         result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
     else:
-        result = _nanpercentile(
+        result = _nanquantile(
             values,
-            qs * 100.0,
+            qs,
             na_value=fill_value,
             mask=mask,
             interpolation=interpolation,
@@ -108,7 +108,7 @@ def quantile_with_mask(
     return result
 
 
-def _nanpercentile_1d(
+def _nanquantile_1d(
     values: np.ndarray,
     mask: npt.NDArray[np.bool_],
     qs: npt.NDArray[np.float64],
@@ -116,7 +116,7 @@ def _nanpercentile_1d(
     interpolation: str,
 ) -> Scalar | np.ndarray:
     """
-    Wrapper for np.percentile that skips missing values, specialized to
+    Wrapper for np.quantile that skips missing values, specialized to
     1-dimensional case.
 
     Parameters
@@ -142,7 +142,7 @@ def _nanpercentile_1d(
         # equiv: 'np.array([na_value] * len(qs))' but much faster
         return np.full(len(qs), na_value)
 
-    return np.percentile(
+    return np.quantile(
         values,
         qs,
         # error: No overload variant of "percentile" matches argument
@@ -152,7 +152,7 @@ def _nanpercentile_1d(
     )
 
 
-def _nanpercentile(
+def _nanquantile(
     values: np.ndarray,
     qs: npt.NDArray[np.float64],
     *,
@@ -161,7 +161,7 @@ def _nanpercentile(
     interpolation: str,
 ):
     """
-    Wrapper for np.percentile that skips missing values.
+    Wrapper for np.quantile that skips missing values.
 
     Parameters
     ----------
@@ -180,7 +180,7 @@ def _nanpercentile(
 
     if values.dtype.kind in "mM":
         # need to cast to integer to avoid rounding errors in numpy
-        result = _nanpercentile(
+        result = _nanquantile(
             values.view("i8"),
             qs=qs,
             na_value=na_value.view("i8"),
@@ -196,7 +196,7 @@ def _nanpercentile(
         # Caller is responsible for ensuring mask shape match
         assert mask.shape == values.shape
         result = [
-            _nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation)
+            _nanquantile_1d(val, m, qs, na_value, interpolation=interpolation)
             for (val, m) in zip(list(values), list(mask))
         ]
         if values.dtype.kind == "f":
@@ -215,7 +215,7 @@ def _nanpercentile(
                 result = result.astype(values.dtype, copy=False)
         return result
     else:
-        return np.percentile(
+        return np.quantile(
             values,
             qs,
             axis=1,
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index d07bfeda50e1d..e95fa441e18fb 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -709,7 +709,13 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray:
         if isinstance(
             other, (ArrowExtensionArray, np.ndarray, list, BaseMaskedArray)
         ) or isinstance(getattr(other, "dtype", None), CategoricalDtype):
-            result = pc_func(self._pa_array, self._box_pa(other))
+            try:
+                result = pc_func(self._pa_array, self._box_pa(other))
+            except pa.ArrowNotImplementedError:
+                # TODO: could this be wrong if other is object dtype?
+                #  in which case we need to operate pointwise?
+                result = ops.invalid_comparison(self, other, op)
+                result = pa.array(result, type=pa.bool_())
         elif is_scalar(other):
             try:
                 result = pc_func(self._pa_array, self._box_pa(other))
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index ad0bde3abbdd4..fbe1677b95b33 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -19,6 +19,7 @@
 
 import numpy as np
 
+from pandas._config import using_string_dtype
 from pandas._config.config import get_option
 
 from pandas._libs import (
@@ -1759,6 +1760,10 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
               dtype='object')
         """
         result = self._format_native_types(date_format=date_format, na_rep=np.nan)
+        if using_string_dtype():
+            from pandas import StringDtype
+
+            return pd_array(result, dtype=StringDtype(na_value=np.nan))  # type: ignore[return-value]
         return result.astype(object, copy=False)
 
 
@@ -1781,7 +1786,7 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
           a non-DST time (note that this flag is only applicable for
           ambiguous times)
         - 'NaT' will return NaT where there are ambiguous times
-        - 'raise' will raise an AmbiguousTimeError if there are ambiguous
+        - 'raise' will raise a ValueError if there are ambiguous
           times.
 
     nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, default 'raise'
@@ -1794,7 +1799,7 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
           closest existing time
         - 'NaT' will return NaT where there are nonexistent times
         - timedelta objects will shift nonexistent times by the timedelta
-        - 'raise' will raise an NonExistentTimeError if there are
+        - 'raise' will raise a ValueError if there are
           nonexistent times.
 
     Returns
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index dddfc440109d3..201c449185057 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -15,6 +15,7 @@
 
 import numpy as np
 
+from pandas._config import using_string_dtype
 from pandas._config.config import get_option
 
 from pandas._libs import (
@@ -158,15 +159,8 @@ def f(self):
             # these return a boolean by-definition
             return result
 
-        if field in self._object_ops:
-            result = fields.get_date_name_field(values, field, reso=self._creso)
-            result = self._maybe_mask_results(result, fill_value=None)
-
-        else:
-            result = fields.get_date_field(values, field, reso=self._creso)
-            result = self._maybe_mask_results(
-                result, fill_value=None, convert="float64"
-            )
+        result = fields.get_date_field(values, field, reso=self._creso)
+        result = self._maybe_mask_results(result, fill_value=None, convert="float64")
 
         return result
 
@@ -243,7 +237,6 @@ def _scalar_type(self) -> type[Timestamp]:
         "is_year_end",
         "is_leap_year",
     ]
-    _object_ops: list[str] = ["freq", "tz"]
     _field_ops: list[str] = [
         "year",
         "month",
@@ -264,7 +257,7 @@ def _scalar_type(self) -> type[Timestamp]:
     ]
     _other_ops: list[str] = ["date", "time", "timetz"]
     _datetimelike_ops: list[str] = (
-        _field_ops + _object_ops + _bool_ops + _other_ops + ["unit"]
+        _field_ops + _bool_ops + _other_ops + ["unit", "freq", "tz"]
     )
     _datetimelike_methods: list[str] = [
         "to_period",
@@ -972,7 +965,7 @@ def tz_localize(
               non-DST time (note that this flag is only applicable for
               ambiguous times)
             - 'NaT' will return NaT where there are ambiguous times
-            - 'raise' will raise an AmbiguousTimeError if there are ambiguous
+            - 'raise' will raise a ValueError if there are ambiguous
               times.
 
         nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
@@ -986,7 +979,7 @@ def tz_localize(
               closest existing time
             - 'NaT' will return NaT where there are nonexistent times
             - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
+            - 'raise' will raise a ValueError if there are
               nonexistent times.
 
         Returns
@@ -1340,6 +1333,13 @@ def month_name(self, locale=None) -> npt.NDArray[np.object_]:
             values, "month_name", locale=locale, reso=self._creso
         )
         result = self._maybe_mask_results(result, fill_value=None)
+        if using_string_dtype():
+            from pandas import (
+                StringDtype,
+                array as pd_array,
+            )
+
+            return pd_array(result, dtype=StringDtype(na_value=np.nan))  # type: ignore[return-value]
         return result
 
     def day_name(self, locale=None) -> npt.NDArray[np.object_]:
@@ -1401,6 +1401,14 @@ def day_name(self, locale=None) -> npt.NDArray[np.object_]:
             values, "day_name", locale=locale, reso=self._creso
         )
         result = self._maybe_mask_results(result, fill_value=None)
+        if using_string_dtype():
+            # TODO: no tests that check for dtype of result as of 2024-08-15
+            from pandas import (
+                StringDtype,
+                array as pd_array,
+            )
+
+            return pd_array(result, dtype=StringDtype(na_value=np.nan))  # type: ignore[return-value]
         return result
 
     @property
diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
index 07eb91e0cb13b..03712f75db0c7 100644
--- a/pandas/core/arrays/numpy_.py
+++ b/pandas/core/arrays/numpy_.py
@@ -557,7 +557,3 @@ def _wrap_ndarray_result(self, result: np.ndarray):
 
             return TimedeltaArray._simple_new(result, dtype=result.dtype)
         return type(self)(result)
-
-    # ------------------------------------------------------------------------
-    # String methods interface
-    _str_na_value = np.nan
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 8a4fd9fc1b34d..823084c3e9982 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -140,12 +140,16 @@ def __init__(
         # infer defaults
         if storage is None:
             if na_value is not libmissing.NA:
-                if HAS_PYARROW:
-                    storage = "pyarrow"
-                else:
-                    storage = "python"
+                storage = get_option("mode.string_storage")
+                if storage == "auto":
+                    if HAS_PYARROW:
+                        storage = "pyarrow"
+                    else:
+                        storage = "python"
             else:
                 storage = get_option("mode.string_storage")
+                if storage == "auto":
+                    storage = "python"
 
         if storage == "pyarrow_numpy":
             # TODO raise a deprecation warning
@@ -346,6 +350,55 @@ def _from_scalars(cls, scalars, dtype: DtypeObj) -> Self:
             raise ValueError
         return cls._from_sequence(scalars, dtype=dtype)
 
+    def _str_map(
+        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
+    ):
+        if self.dtype.na_value is np.nan:
+            return self._str_map_nan_semantics(f, na_value=na_value, dtype=dtype)
+
+        from pandas.arrays import BooleanArray
+
+        if dtype is None:
+            dtype = self.dtype
+        if na_value is None:
+            na_value = self.dtype.na_value
+
+        mask = isna(self)
+        arr = np.asarray(self)
+
+        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
+            constructor: type[IntegerArray | BooleanArray]
+            if is_integer_dtype(dtype):
+                constructor = IntegerArray
+            else:
+                constructor = BooleanArray
+
+            na_value_is_na = isna(na_value)
+            if na_value_is_na:
+                na_value = 1
+            elif dtype == np.dtype("bool"):
+                # GH#55736
+                na_value = bool(na_value)
+            result = lib.map_infer_mask(
+                arr,
+                f,
+                mask.view("uint8"),
+                convert=False,
+                na_value=na_value,
+                # error: Argument 1 to "dtype" has incompatible type
+                # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected
+                # "Type[object]"
+                dtype=np.dtype(cast(type, dtype)),
+            )
+
+            if not na_value_is_na:
+                mask[:] = False
+
+            return constructor(result, mask)
+
+        else:
+            return self._str_map_str_or_object(dtype, na_value, arr, f, mask)
+
     def _str_map_str_or_object(
         self,
         dtype,
@@ -353,7 +406,6 @@ def _str_map_str_or_object(
         arr: np.ndarray,
         f,
         mask: npt.NDArray[np.bool_],
-        convert: bool,
     ):
         # _str_map helper for case where dtype is either string dtype or object
         if is_string_dtype(dtype) and not is_object_dtype(dtype):
@@ -377,6 +429,45 @@ def _str_map_str_or_object(
             # -> We don't know the result type. E.g. `.get` can return anything.
             return lib.map_infer_mask(arr, f, mask.view("uint8"))
 
+    def _str_map_nan_semantics(self, f, na_value=None, dtype: Dtype | None = None):
+        if dtype is None:
+            dtype = self.dtype
+        if na_value is None:
+            na_value = self.dtype.na_value
+
+        mask = isna(self)
+        arr = np.asarray(self)
+
+        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
+            na_value_is_na = isna(na_value)
+            if na_value_is_na:
+                if is_integer_dtype(dtype):
+                    na_value = 0
+                else:
+                    na_value = True
+
+            result = lib.map_infer_mask(
+                arr,
+                f,
+                mask.view("uint8"),
+                convert=False,
+                na_value=na_value,
+                dtype=np.dtype(cast(type, dtype)),
+            )
+            if na_value_is_na and mask.any():
+                # TODO: we could alternatively do this check before map_infer_mask
+                #  and adjust the dtype/na_value we pass there. Which is more
+                #  performant?
+                if is_integer_dtype(dtype):
+                    result = result.astype("float64")
+                else:
+                    result = result.astype("object")
+                result[mask] = np.nan
+            return result
+
+        else:
+            return self._str_map_str_or_object(dtype, na_value, arr, f, mask)
+
 
 # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
 # incompatible with definition in base class "ExtensionArray"
@@ -655,6 +746,12 @@ def _reduce(
         axis: AxisInt | None = 0,
         **kwargs,
     ):
+        if self.dtype.na_value is np.nan and name in ["any", "all"]:
+            if name == "any":
+                return nanops.nanany(self._ndarray, skipna=skipna)
+            else:
+                return nanops.nanall(self._ndarray, skipna=skipna)
+
         if name in ["min", "max"]:
             result = getattr(self, name)(skipna=skipna, axis=axis)
             if keepdims:
@@ -663,6 +760,12 @@ def _reduce(
 
         raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
 
+    def _wrap_reduction_result(self, axis: AxisInt | None, result) -> Any:
+        if self.dtype.na_value is np.nan and result is libmissing.NA:
+            # the masked_reductions use pd.NA -> convert to np.nan
+            return np.nan
+        return super()._wrap_reduction_result(axis, result)
+
     def min(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
         nv.validate_min((), kwargs)
         result = masked_reductions.min(
@@ -680,8 +783,11 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
     def value_counts(self, dropna: bool = True) -> Series:
         from pandas.core.algorithms import value_counts_internal as value_counts
 
-        result = value_counts(self._ndarray, sort=False, dropna=dropna).astype("Int64")
+        result = value_counts(self._ndarray, sort=False, dropna=dropna)
         result.index = result.index.astype(self.dtype)
+
+        if self.dtype.na_value is libmissing.NA:
+            result = result.astype("Int64")
         return result
 
     def memory_usage(self, deep: bool = False) -> int:
@@ -732,104 +838,15 @@ def _cmp_method(self, other, op):
             # logical
             result = np.zeros(len(self._ndarray), dtype="bool")
             result[valid] = op(self._ndarray[valid], other)
-            return BooleanArray(result, mask)
-
-    _arith_method = _cmp_method
-
-    # ------------------------------------------------------------------------
-    # String methods interface
-    # error: Incompatible types in assignment (expression has type "NAType",
-    # base class "NumpyExtensionArray" defined the type as "float")
-    _str_na_value = libmissing.NA  # type: ignore[assignment]
-
-    def _str_map_nan_semantics(
-        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
-    ):
-        if dtype is None:
-            dtype = self.dtype
-        if na_value is None:
-            na_value = self.dtype.na_value
-
-        mask = isna(self)
-        arr = np.asarray(self)
-        convert = convert and not np.all(mask)
-
-        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
-            na_value_is_na = isna(na_value)
-            if na_value_is_na:
-                if is_integer_dtype(dtype):
-                    na_value = 0
+            res_arr = BooleanArray(result, mask)
+            if self.dtype.na_value is np.nan:
+                if op == operator.ne:
+                    return res_arr.to_numpy(np.bool_, na_value=True)
                 else:
-                    na_value = True
+                    return res_arr.to_numpy(np.bool_, na_value=False)
+            return res_arr
 
-            result = lib.map_infer_mask(
-                arr,
-                f,
-                mask.view("uint8"),
-                convert=False,
-                na_value=na_value,
-                dtype=np.dtype(cast(type, dtype)),
-            )
-            if na_value_is_na and mask.any():
-                if is_integer_dtype(dtype):
-                    result = result.astype("float64")
-                else:
-                    result = result.astype("object")
-                result[mask] = np.nan
-            return result
-
-        else:
-            return self._str_map_str_or_object(dtype, na_value, arr, f, mask, convert)
-
-    def _str_map(
-        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
-    ):
-        if self.dtype.na_value is np.nan:
-            return self._str_map_nan_semantics(
-                f, na_value=na_value, dtype=dtype, convert=convert
-            )
-
-        from pandas.arrays import BooleanArray
-
-        if dtype is None:
-            dtype = StringDtype(storage="python")
-        if na_value is None:
-            na_value = self.dtype.na_value
-
-        mask = isna(self)
-        arr = np.asarray(self)
-
-        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
-            constructor: type[IntegerArray | BooleanArray]
-            if is_integer_dtype(dtype):
-                constructor = IntegerArray
-            else:
-                constructor = BooleanArray
-
-            na_value_is_na = isna(na_value)
-            if na_value_is_na:
-                na_value = 1
-            elif dtype == np.dtype("bool"):
-                na_value = bool(na_value)
-            result = lib.map_infer_mask(
-                arr,
-                f,
-                mask.view("uint8"),
-                convert=False,
-                na_value=na_value,
-                # error: Argument 1 to "dtype" has incompatible type
-                # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected
-                # "Type[object]"
-                dtype=np.dtype(cast(type, dtype)),
-            )
-
-            if not na_value_is_na:
-                mask[:] = False
-
-            return constructor(result, mask)
-
-        else:
-            return self._str_map_str_or_object(dtype, na_value, arr, f, mask, convert)
+    _arith_method = _cmp_method
 
 
 class StringArrayNumpySemantics(StringArray):
@@ -861,38 +878,3 @@ def _from_backing_data(self, arr: np.ndarray) -> StringArrayNumpySemantics:
         # need to override NumpyExtensionArray._from_backing_data to ensure
         # we always preserve the dtype
         return NDArrayBacked._from_backing_data(self, arr)
-
-    def _reduce(
-        self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
-    ):
-        if name in ["any", "all"]:
-            if name == "any":
-                return nanops.nanany(self._ndarray, skipna=skipna)
-            else:
-                return nanops.nanall(self._ndarray, skipna=skipna)
-        else:
-            return super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs)
-
-    def _wrap_reduction_result(self, axis: AxisInt | None, result) -> Any:
-        # the masked_reductions use pd.NA
-        if result is libmissing.NA:
-            return np.nan
-        return super()._wrap_reduction_result(axis, result)
-
-    def _cmp_method(self, other, op):
-        result = super()._cmp_method(other, op)
-        if op == operator.ne:
-            return result.to_numpy(np.bool_, na_value=True)
-        else:
-            return result.to_numpy(np.bool_, na_value=False)
-
-    def value_counts(self, dropna: bool = True) -> Series:
-        from pandas.core.algorithms import value_counts_internal as value_counts
-
-        result = value_counts(self._ndarray, sort=False, dropna=dropna)
-        result.index = result.index.astype(self.dtype)
-        return result
-
-    # ------------------------------------------------------------------------
-    # String methods interface
-    _str_na_value = np.nan
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 4893883d3ad12..67114815341b6 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -1,12 +1,10 @@
 from __future__ import annotations
 
-from functools import partial
 import operator
 import re
 from typing import (
     TYPE_CHECKING,
     Union,
-    cast,
 )
 
 import numpy as np
@@ -23,8 +21,6 @@
 )
 
 from pandas.core.dtypes.common import (
-    is_bool_dtype,
-    is_integer_dtype,
     is_scalar,
     pandas_dtype,
 )
@@ -39,7 +35,6 @@
     BaseStringArray,
     StringDtype,
 )
-from pandas.core.ops import invalid_comparison
 from pandas.core.strings.object_array import ObjectStringArrayMixin
 
 if not pa_version_under10p1:
@@ -133,18 +128,22 @@ class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringAr
 
     def __init__(self, values) -> None:
         _chk_pyarrow_available()
-        if isinstance(values, (pa.Array, pa.ChunkedArray)) and pa.types.is_string(
-            values.type
+        if isinstance(values, (pa.Array, pa.ChunkedArray)) and (
+            pa.types.is_string(values.type)
+            or (
+                pa.types.is_dictionary(values.type)
+                and (
+                    pa.types.is_string(values.type.value_type)
+                    or pa.types.is_large_string(values.type.value_type)
+                )
+            )
         ):
             values = pc.cast(values, pa.large_string())
 
         super().__init__(values)
         self._dtype = StringDtype(storage=self._storage, na_value=self._na_value)
 
-        if not pa.types.is_large_string(self._pa_array.type) and not (
-            pa.types.is_dictionary(self._pa_array.type)
-            and pa.types.is_large_string(self._pa_array.type.value_type)
-        ):
+        if not pa.types.is_large_string(self._pa_array.type):
             raise ValueError(
                 "ArrowStringArray requires a PyArrow (chunked) array of "
                 "large_string type"
@@ -216,12 +215,17 @@ def dtype(self) -> StringDtype:  # type: ignore[override]
         return self._dtype
 
     def insert(self, loc: int, item) -> ArrowStringArray:
+        if self.dtype.na_value is np.nan and item is np.nan:
+            item = libmissing.NA
         if not isinstance(item, str) and item is not libmissing.NA:
             raise TypeError("Scalar must be NA or str")
         return super().insert(loc, item)
 
-    @classmethod
-    def _result_converter(cls, values, na=None):
+    def _result_converter(self, values, na=None):
+        if self.dtype.na_value is np.nan:
+            if not isna(na):
+                values = values.fill_null(bool(na))
+            return ArrowExtensionArray(values).to_numpy(na_value=np.nan)
         return BooleanDtype().__from_arrow__(values)
 
     def _maybe_convert_setitem_value(self, value):
@@ -275,102 +279,7 @@ def astype(self, dtype, copy: bool = True):
     # ------------------------------------------------------------------------
     # String methods interface
 
-    # error: Incompatible types in assignment (expression has type "NAType",
-    # base class "ObjectStringArrayMixin" defined the type as "float")
-    _str_na_value = libmissing.NA  # type: ignore[assignment]
-
-    def _str_map_nan_semantics(
-        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
-    ):
-        if dtype is None:
-            dtype = self.dtype
-        if na_value is None:
-            na_value = self.dtype.na_value
-
-        mask = isna(self)
-        arr = np.asarray(self)
-
-        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
-            if is_integer_dtype(dtype):
-                na_value = np.nan
-            else:
-                na_value = False
-
-            dtype = np.dtype(cast(type, dtype))
-            if mask.any():
-                # numpy int/bool dtypes cannot hold NaNs so we must convert to
-                # float64 for int (to match maybe_convert_objects) or
-                # object for bool (again to match maybe_convert_objects)
-                if is_integer_dtype(dtype):
-                    dtype = np.dtype("float64")
-                else:
-                    dtype = np.dtype(object)
-            result = lib.map_infer_mask(
-                arr,
-                f,
-                mask.view("uint8"),
-                convert=False,
-                na_value=na_value,
-                dtype=dtype,
-            )
-            return result
-
-        else:
-            return self._str_map_str_or_object(dtype, na_value, arr, f, mask, convert)
-
-    def _str_map(
-        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
-    ):
-        if self.dtype.na_value is np.nan:
-            return self._str_map_nan_semantics(
-                f, na_value=na_value, dtype=dtype, convert=convert
-            )
-
-        # TODO: de-duplicate with StringArray method. This method is moreless copy and
-        # paste.
-
-        from pandas.arrays import (
-            BooleanArray,
-            IntegerArray,
-        )
-
-        if dtype is None:
-            dtype = self.dtype
-        if na_value is None:
-            na_value = self.dtype.na_value
-
-        mask = isna(self)
-        arr = np.asarray(self)
-
-        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
-            constructor: type[IntegerArray | BooleanArray]
-            if is_integer_dtype(dtype):
-                constructor = IntegerArray
-            else:
-                constructor = BooleanArray
-
-            na_value_is_na = isna(na_value)
-            if na_value_is_na:
-                na_value = 1
-            result = lib.map_infer_mask(
-                arr,
-                f,
-                mask.view("uint8"),
-                convert=False,
-                na_value=na_value,
-                # error: Argument 1 to "dtype" has incompatible type
-                # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected
-                # "Type[object]"
-                dtype=np.dtype(cast(type, dtype)),
-            )
-
-            if not na_value_is_na:
-                mask[:] = False
-
-            return constructor(result, mask)
-
-        else:
-            return self._str_map_str_or_object(dtype, na_value, arr, f, mask, convert)
+    _str_map = BaseStringArray._str_map
 
     def _str_contains(
         self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
@@ -587,11 +496,30 @@ def _str_get_dummies(self, sep: str = "|"):
         return dummies.astype(np.int64, copy=False), labels
 
     def _convert_int_dtype(self, result):
+        if self.dtype.na_value is np.nan:
+            if isinstance(result, pa.Array):
+                result = result.to_numpy(zero_copy_only=False)
+            else:
+                result = result.to_numpy()
+            if result.dtype == np.int32:
+                result = result.astype(np.int64)
+            return result
+
         return Int64Dtype().__from_arrow__(result)
 
     def _reduce(
         self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
     ):
+        if self.dtype.na_value is np.nan and name in ["any", "all"]:
+            if not skipna:
+                nas = pc.is_null(self._pa_array)
+                arr = pc.or_kleene(nas, pc.not_equal(self._pa_array, ""))
+            else:
+                arr = pc.not_equal(self._pa_array, "")
+            return ArrowExtensionArray(arr)._reduce(
+                name, skipna=skipna, keepdims=keepdims, **kwargs
+            )
+
         result = self._reduce_calc(name, skipna=skipna, keepdims=keepdims, **kwargs)
         if name in ("argmin", "argmax") and isinstance(result, pa.Array):
             return self._convert_int_dtype(result)
@@ -622,70 +550,31 @@ def _rank(
             )
         )
 
-
-class ArrowStringArrayNumpySemantics(ArrowStringArray):
-    _storage = "pyarrow"
-    _na_value = np.nan
-
-    @classmethod
-    def _result_converter(cls, values, na=None):
-        if not isna(na):
-            values = values.fill_null(bool(na))
-        return ArrowExtensionArray(values).to_numpy(na_value=np.nan)
-
-    def __getattribute__(self, item):
-        # ArrowStringArray and we both inherit from ArrowExtensionArray, which
-        # creates inheritance problems (Diamond inheritance)
-        if item in ArrowStringArrayMixin.__dict__ and item not in (
-            "_pa_array",
-            "__dict__",
-        ):
-            return partial(getattr(ArrowStringArrayMixin, item), self)
-        return super().__getattribute__(item)
-
-    def _convert_int_dtype(self, result):
-        if isinstance(result, pa.Array):
-            result = result.to_numpy(zero_copy_only=False)
-        else:
-            result = result.to_numpy()
-        if result.dtype == np.int32:
-            result = result.astype(np.int64)
+    def value_counts(self, dropna: bool = True) -> Series:
+        result = super().value_counts(dropna=dropna)
+        if self.dtype.na_value is np.nan:
+            res_values = result._values.to_numpy()
+            return result._constructor(
+                res_values, index=result.index, name=result.name, copy=False
+            )
         return result
 
     def _cmp_method(self, other, op):
-        try:
-            result = super()._cmp_method(other, op)
-        except pa.ArrowNotImplementedError:
-            return invalid_comparison(self, other, op)
-        if op == operator.ne:
-            return result.to_numpy(np.bool_, na_value=True)
-        else:
-            return result.to_numpy(np.bool_, na_value=False)
-
-    def value_counts(self, dropna: bool = True) -> Series:
-        from pandas import Series
-
-        result = super().value_counts(dropna)
-        return Series(
-            result._values.to_numpy(), index=result.index, name=result.name, copy=False
-        )
-
-    def _reduce(
-        self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
-    ):
-        if name in ["any", "all"]:
-            if not skipna:
-                nas = pc.is_null(self._pa_array)
-                arr = pc.or_kleene(nas, pc.not_equal(self._pa_array, ""))
+        result = super()._cmp_method(other, op)
+        if self.dtype.na_value is np.nan:
+            if op == operator.ne:
+                return result.to_numpy(np.bool_, na_value=True)
             else:
-                arr = pc.not_equal(self._pa_array, "")
-            return ArrowExtensionArray(arr)._reduce(
-                name, skipna=skipna, keepdims=keepdims, **kwargs
-            )
-        else:
-            return super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs)
+                return result.to_numpy(np.bool_, na_value=False)
+        return result
 
-    def insert(self, loc: int, item) -> ArrowStringArrayNumpySemantics:
-        if item is np.nan:
-            item = libmissing.NA
-        return super().insert(loc, item)  # type: ignore[return-value]
+
+class ArrowStringArrayNumpySemantics(ArrowStringArray):
+    _na_value = np.nan
+    _str_get = ArrowStringArrayMixin._str_get
+    _str_removesuffix = ArrowStringArrayMixin._str_removesuffix
+    _str_capitalize = ArrowStringArrayMixin._str_capitalize
+    _str_pad = ArrowStringArrayMixin._str_pad
+    _str_title = ArrowStringArrayMixin._str_title
+    _str_swapcase = ArrowStringArrayMixin._str_swapcase
+    _str_slice_replace = ArrowStringArrayMixin._str_slice_replace
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 83cc2871f5459..b2cfbe7338c0d 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -152,9 +152,8 @@ def _scalar_type(self) -> type[Timedelta]:
     # define my properties & methods for delegation
     _other_ops: list[str] = []
     _bool_ops: list[str] = []
-    _object_ops: list[str] = ["freq"]
     _field_ops: list[str] = ["days", "seconds", "microseconds", "nanoseconds"]
-    _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + ["unit"]
+    _datetimelike_ops: list[str] = _field_ops + _bool_ops + ["unit", "freq"]
     _datetimelike_methods: list[str] = [
         "to_pytimedelta",
         "total_seconds",
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index e62cda0dfe8d0..e4eefb570fd95 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -452,13 +452,12 @@ def is_terminal() -> bool:
 
 string_storage_doc = """
 : string
-    The default storage for StringDtype. This option is ignored if
-    ``future.infer_string`` is set to True.
+    The default storage for StringDtype.
 """
 
 
 def is_valid_string_storage(value: Any) -> None:
-    legal_values = ["python", "pyarrow"]
+    legal_values = ["auto", "python", "pyarrow"]
     if value not in legal_values:
         msg = "Value must be one of python|pyarrow"
         if value == "pyarrow_numpy":
@@ -473,7 +472,7 @@ def is_valid_string_storage(value: Any) -> None:
 with cf.config_prefix("mode"):
     cf.register_option(
         "string_storage",
-        "python",
+        "auto",
         string_storage_doc,
         # validator=is_one_of_factory(["python", "pyarrow"]),
         validator=is_valid_string_storage,
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 162f6a4d30f3f..3394bf091e228 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1014,10 +1014,8 @@ def convert_dtypes(
         Back-end data type applied to the resultant :class:`DataFrame`
         (still experimental). Behaviour is as follows:
 
-        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-          (default).
+        * ``"numpy_nullable"``: returns nullable-dtype
         * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-          DataFrame.
 
         .. versionadded:: 2.0
 
@@ -1025,6 +1023,8 @@ def convert_dtypes(
     -------
     np.dtype, or ExtensionDtype
     """
+    from pandas.core.arrays.string_ import StringDtype
+
     inferred_dtype: str | DtypeObj
 
     if (
@@ -1103,6 +1103,13 @@ def convert_dtypes(
             # If we couldn't do anything else, then we retain the dtype
             inferred_dtype = input_array.dtype
 
+    elif (
+        convert_string
+        and isinstance(input_array.dtype, StringDtype)
+        and input_array.dtype.na_value is np.nan
+    ):
+        inferred_dtype = pandas_dtype_func("string")
+
     else:
         inferred_dtype = input_array.dtype
 
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 64b5278424192..bcf1ade9b0320 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1274,6 +1274,10 @@ def is_bool_dtype(arr_or_dtype) -> bool:
     """
     Check whether the provided array or dtype is of a boolean dtype.
 
+    This function verifies whether a given object is a boolean data type. The input
+    can be an array or a dtype object. Accepted array types include instances
+    of ``np.array``, ``pd.Series``, ``pd.Index``, and similar array-like structures.
+
     Parameters
     ----------
     arr_or_dtype : array-like or dtype
@@ -1284,6 +1288,10 @@ def is_bool_dtype(arr_or_dtype) -> bool:
     boolean
         Whether or not the array or dtype is of a boolean dtype.
 
+    See Also
+    --------
+    api.types.is_bool : Check if an object is a boolean.
+
     Notes
     -----
     An ExtensionArray is considered boolean when the ``_is_boolean``
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 3aeab96e03163..c0587d36bcb5a 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -18,9 +18,9 @@
     cast,
 )
 import warnings
+import zoneinfo
 
 import numpy as np
-import pytz
 
 from pandas._config.config import get_option
 
@@ -789,7 +789,7 @@ def __init__(self, unit: str_type | DatetimeTZDtype = "ns", tz=None) -> None:
             tz = timezones.maybe_get_tz(tz)
             tz = timezones.tz_standardize(tz)
         elif tz is not None:
-            raise pytz.UnknownTimeZoneError(tz)
+            raise zoneinfo.ZoneInfoNotFoundError(tz)
         if tz is None:
             raise TypeError("A 'tz' is required.")
 
@@ -882,7 +882,7 @@ def construct_from_string(cls, string: str_type) -> DatetimeTZDtype:
                 return cls(unit=d["unit"], tz=d["tz"])
             except (KeyError, TypeError, ValueError) as err:
                 # KeyError if maybe_get_tz tries and fails to get a
-                #  pytz timezone (actually pytz.UnknownTimeZoneError).
+                #  zoneinfo timezone (actually zoneinfo.ZoneInfoNotFoundError).
                 # TypeError if we pass a nonsense tz;
                 # ValueError if we pass a unit other than "ns"
                 raise TypeError(msg) from err
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b8039746d9952..1e6608b0d87f3 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6406,7 +6406,7 @@ def dropna(
 
         thresh : int, optional
             Require that many non-NA values. Cannot be combined with how.
-        subset : column label or sequence of labels, optional
+        subset : column label or iterable of labels, optional
             Labels along other axis to consider, e.g. if you are dropping rows
             these would be a list of columns to include.
         inplace : bool, default False
@@ -6536,7 +6536,7 @@ def dropna(
     @overload
     def drop_duplicates(
         self,
-        subset: Hashable | Sequence[Hashable] | None = ...,
+        subset: Hashable | Iterable[Hashable] | None = ...,
         *,
         keep: DropKeep = ...,
         inplace: Literal[True],
@@ -6546,7 +6546,7 @@ def drop_duplicates(
     @overload
     def drop_duplicates(
         self,
-        subset: Hashable | Sequence[Hashable] | None = ...,
+        subset: Hashable | Iterable[Hashable] | None = ...,
         *,
         keep: DropKeep = ...,
         inplace: Literal[False] = ...,
@@ -6556,7 +6556,7 @@ def drop_duplicates(
     @overload
     def drop_duplicates(
         self,
-        subset: Hashable | Sequence[Hashable] | None = ...,
+        subset: Hashable | Iterable[Hashable] | None = ...,
         *,
         keep: DropKeep = ...,
         inplace: bool = ...,
@@ -6565,7 +6565,7 @@ def drop_duplicates(
 
     def drop_duplicates(
         self,
-        subset: Hashable | Sequence[Hashable] | None = None,
+        subset: Hashable | Iterable[Hashable] | None = None,
         *,
         keep: DropKeep = "first",
         inplace: bool = False,
@@ -6579,7 +6579,7 @@ def drop_duplicates(
 
         Parameters
         ----------
-        subset : column label or sequence of labels, optional
+        subset : column label or iterable of labels, optional
             Only consider certain columns for identifying duplicates, by
             default use all of the columns.
         keep : {'first', 'last', ``False``}, default 'first'
@@ -6669,7 +6669,7 @@ def drop_duplicates(
 
     def duplicated(
         self,
-        subset: Hashable | Sequence[Hashable] | None = None,
+        subset: Hashable | Iterable[Hashable] | None = None,
         keep: DropKeep = "first",
     ) -> Series:
         """
@@ -6679,7 +6679,7 @@ def duplicated(
 
         Parameters
         ----------
-        subset : column label or sequence of labels, optional
+        subset : column label or iterable of labels, optional
             Only consider certain columns for identifying duplicates, by
             default use all of the columns.
         keep : {'first', 'last', False}, default 'first'
@@ -6771,10 +6771,7 @@ def f(vals) -> tuple[np.ndarray, int]:
             return labels.astype("i8"), len(shape)
 
         if subset is None:
-            # https://github.com/pandas-dev/pandas/issues/28770
-            # Incompatible types in assignment (expression has type "Index", variable
-            # has type "Sequence[Any]")
-            subset = self.columns  # type: ignore[assignment]
+            subset = self.columns
         elif (
             not np.iterable(subset)
             or isinstance(subset, str)
@@ -6795,7 +6792,7 @@ def f(vals) -> tuple[np.ndarray, int]:
 
         if len(subset) == 1 and self.columns.is_unique:
             # GH#45236 This is faster than get_group_index below
-            result = self[subset[0]].duplicated(keep)
+            result = self[next(iter(subset))].duplicated(keep)
             result.name = None
         else:
             vals = (col.values for name, col in self.items() if name in subset)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 8a6fc69d47cc3..0f0078fc3398b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6670,10 +6670,10 @@ def convert_dtypes(
             Back-end data type applied to the resultant :class:`DataFrame` or
             :class:`Series` (still experimental). Behaviour is as follows:
 
-            * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-              or :class:`Series` (default).
+            * ``"numpy_nullable"``: returns nullable-dtype-backed
+              :class:`DataFrame` or :class:`Serires`.
             * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-              DataFrame or Series.
+              :class:`DataFrame` or :class:`Series`.
 
             .. versionadded:: 2.0
 
@@ -10570,7 +10570,7 @@ def tz_localize(
               a non-DST time (note that this flag is only applicable for
               ambiguous times)
             - 'NaT' will return NaT where there are ambiguous times
-            - 'raise' will raise an AmbiguousTimeError if there are ambiguous
+            - 'raise' will raise a ValueError if there are ambiguous
               times.
         nonexistent : str, default 'raise'
             A nonexistent time does not exist in a particular timezone
@@ -10582,7 +10582,7 @@ def tz_localize(
               closest existing time
             - 'NaT' will return NaT where there are nonexistent times
             - timedelta objects will shift nonexistent times by the timedelta
-            - 'raise' will raise an NonExistentTimeError if there are
+            - 'raise' will raise a ValueError if there are
               nonexistent times.
 
         Returns
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 00a929724ed4c..3b3cda8f7cd33 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -6,7 +6,6 @@
 import warnings
 
 import numpy as np
-import pytz
 
 from pandas._libs import (
     NaT,
@@ -162,7 +161,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin):
           non-DST time (note that this flag is only applicable for ambiguous
           times)
         - 'NaT' will return NaT where there are ambiguous times
-        - 'raise' will raise an AmbiguousTimeError if there are ambiguous times.
+        - 'raise' will raise a ValueError if there are ambiguous times.
     dayfirst : bool, default False
         If True, parse dates in `data` with the day first order.
     yearfirst : bool, default False
@@ -264,7 +263,7 @@ def _engine_type(self) -> type[libindex.DatetimeEngine]:
     @doc(DatetimeArray.strftime)
     def strftime(self, date_format) -> Index:
         arr = self._data.strftime(date_format)
-        return Index(arr, name=self.name, dtype=object)
+        return Index(arr, name=self.name, dtype=arr.dtype)
 
     @doc(DatetimeArray.tz_convert)
     def tz_convert(self, tz) -> Self:
@@ -591,7 +590,7 @@ def get_loc(self, key):
         elif isinstance(key, str):
             try:
                 parsed, reso = self._parse_with_reso(key)
-            except (ValueError, pytz.NonExistentTimeError) as err:
+            except ValueError as err:
                 raise KeyError(key) from err
             self._disallow_mismatched_indexing(parsed)
 
diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py
index 48d5e59250f35..2eeacfb769be4 100644
--- a/pandas/core/indexes/extension.py
+++ b/pandas/core/indexes/extension.py
@@ -74,7 +74,7 @@ def fget(self):
                         return type(self)._simple_new(result, name=self.name)
                     elif isinstance(result, ABCDataFrame):
                         return result.set_index(self)
-                    return Index(result, name=self.name)
+                    return Index(result, name=self.name, dtype=result.dtype)
                 return result
 
             def fset(self, value) -> None:
@@ -101,7 +101,7 @@ def method(self, *args, **kwargs):  # type: ignore[misc]
                     return type(self)._simple_new(result, name=self.name)
                 elif isinstance(result, ABCDataFrame):
                     return result.set_index(self)
-                return Index(result, name=self.name)
+                return Index(result, name=self.name, dtype=result.dtype)
             return result
 
         # error: "property" has no attribute "__name__"
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 0900121ab717f..c3d4ad721c830 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1636,6 +1636,17 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None:
         doc="""
         Names of levels in MultiIndex.
 
+        This attribute provides access to the names of the levels in a `MultiIndex`.
+        The names are stored as a `FrozenList`, which is an immutable list-like
+        container. Each name corresponds to a level in the `MultiIndex`, and can be
+        used to identify or manipulate the levels individually.
+
+        See Also
+        --------
+        MultiIndex.set_names : Set Index or MultiIndex name.
+        MultiIndex.rename : Rename specific levels in a MultiIndex.
+        Index.names : Get names on index.
+
         Examples
         --------
         >>> mi = pd.MultiIndex.from_arrays(
@@ -2681,8 +2692,15 @@ def sortlevel(
         """
         Sort MultiIndex at the requested level.
 
-        The result will respect the original ordering of the associated
-        factor at that level.
+        This method is useful when dealing with MultiIndex objects, allowing for
+        sorting at a specific level of the index. The function preserves the
+        relative ordering of data within the same level while sorting
+        the overall MultiIndex. The method provides flexibility with the `ascending`
+        parameter to define the sort order and with the `sort_remaining` parameter to
+        control whether the remaining levels should also be sorted. Sorting a
+        MultiIndex can be crucial when performing operations that require ordered
+        indices, such as grouping or merging datasets. The `na_position` argument is
+        important in handling missing values consistently across different levels.
 
         Parameters
         ----------
@@ -2692,7 +2710,9 @@ def sortlevel(
         ascending : bool, default True
             False to sort in descending order.
             Can also be a list to specify a directed ordering.
-        sort_remaining : sort by the remaining levels after level
+        sort_remaining : bool, default True
+            If True, sorts by the remaining levels after sorting by the specified
+            `level`.
         na_position : {'first' or 'last'}, default 'first'
             Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
             the end.
@@ -2706,6 +2726,13 @@ def sortlevel(
         indexer : np.ndarray[np.intp]
             Indices of output values in original index.
 
+        See Also
+        --------
+        MultiIndex : A multi-level, or hierarchical, index object for pandas objects.
+        Index.sort_values : Sort Index values.
+        DataFrame.sort_index : Sort DataFrame by the index.
+        Series.sort_index : Sort Series by the index.
+
         Examples
         --------
         >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]])
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 149bef6258bfa..dfb96162f0ac1 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -512,7 +512,11 @@ def convert(self) -> list[Block]:
             convert_non_numeric=True,
         )
         refs = None
-        if res_values is values:
+        if (
+            res_values is values
+            or isinstance(res_values, NumpyExtensionArray)
+            and res_values._ndarray is values
+        ):
             refs = self.refs
 
         res_values = ensure_block_shape(res_values, self.ndim)
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 6836ba3f65691..c005a1ce26e4b 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -379,6 +379,11 @@ def concat(
     0   1   2
     1   3   4
     """
+    if ignore_index and keys is not None:
+        raise ValueError(
+            f"Cannot set {ignore_index=} and specify keys. Either should be used."
+        )
+
     if copy is not lib.no_default:
         warnings.warn(
             "The copy keyword is deprecated and will be removed in a future "
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
index 18517199f073c..b3f946f289891 100644
--- a/pandas/core/reshape/tile.py
+++ b/pandas/core/reshape/tile.py
@@ -358,7 +358,16 @@ def qcut(
     x_idx = _preprocess_for_cut(x)
     x_idx, _ = _coerce_to_type(x_idx)
 
-    quantiles = np.linspace(0, 1, q + 1) if is_integer(q) else q
+    if is_integer(q):
+        quantiles = np.linspace(0, 1, q + 1)
+        # Round up rather than to nearest if not representable in base 2
+        np.putmask(
+            quantiles,
+            q * quantiles != np.arange(q + 1),
+            np.nextafter(quantiles, 1),
+        )
+    else:
+        quantiles = q
 
     bins = x_idx.to_series().dropna().quantile(quantiles)
 
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 8e6183c43480f..1014c9559afaf 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -969,6 +969,8 @@ def rsplit(self, pat=None, *, n=-1, expand: bool = False):
     Returns
     -------
     DataFrame/MultiIndex or Series/Index of objects
+        Returns appropriate type based on `expand` parameter with strings
+        split based on the `sep` parameter.
 
     See Also
     --------
@@ -1749,6 +1751,18 @@ def pad(
     Returns
     -------
     Series/Index of objects.
+        A Series or Index where the strings are modified by :meth:`str.%(method)s`.
+
+    See Also
+    --------
+    Series.str.rjust : Fills the left side of strings with an arbitrary
+        character.
+    Series.str.ljust : Fills the right side of strings with an arbitrary
+        character.
+    Series.str.center : Fills both sides of strings with an arbitrary
+        character.
+    Series.str.zfill : Pad strings in the Series/Index by prepending '0'
+        character.
 
     Examples
     --------
@@ -2024,11 +2038,19 @@ def decode(self, encoding, errors: str = "strict"):
         Parameters
         ----------
         encoding : str
+            Specifies the encoding to be used.
         errors : str, optional
+            Specifies the error handling scheme.
+            Possible values are those supported by :meth:`bytes.decode`.
 
         Returns
         -------
         Series or Index
+            A Series or Index with decoded strings.
+
+        See Also
+        --------
+        Series.str.encode : Encodes strings into bytes in a Series/Index.
 
         Examples
         --------
@@ -2063,11 +2085,19 @@ def encode(self, encoding, errors: str = "strict"):
         Parameters
         ----------
         encoding : str
+            Specifies the encoding to be used.
         errors : str, optional
+            Specifies the error handling scheme.
+            Possible values are those supported by :meth:`str.encode`.
 
         Returns
         -------
         Series/Index of objects
+            A Series or Index with strings encoded into bytes.
+
+        See Also
+        --------
+        Series.str.decode : Decodes bytes into strings in a Series/Index.
 
         Examples
         --------
@@ -2099,6 +2129,7 @@ def encode(self, encoding, errors: str = "strict"):
     Returns
     -------
     Series or Index of object
+        Series or Index with the strings being stripped from the %(side)s.
 
     See Also
     --------
@@ -3092,6 +3123,8 @@ def normalize(self, form):
     Returns
     -------
     Series or Index of object
+        Returns a Series or an Index of the %(side)s indexes
+        in each string of the input.
 
     See Also
     --------
@@ -3207,7 +3240,8 @@ def len(self):
 
     Returns
     -------
-    Series or Index of object
+    Series or Index of objects
+        A Series or Index where the strings are modified by :meth:`str.%(method)s`.
 
     See Also
     --------
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 290a28ab60ae1..100afa956bd24 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -37,8 +37,6 @@ class ObjectStringArrayMixin(BaseStringArrayMethods):
     String Methods operating on object-dtype ndarrays.
     """
 
-    _str_na_value = np.nan
-
     def __len__(self) -> int:
         # For typing, _str_map relies on the object being sized.
         raise NotImplementedError
@@ -56,7 +54,7 @@ def _str_map(
         na_value : Scalar, optional
             The value to set for NA values. Might also be used for the
             fill value if the callable `f` raises an exception.
-            This defaults to ``self._str_na_value`` which is ``np.nan``
+            This defaults to ``self.dtype.na_value`` which is ``np.nan``
             for object-dtype and Categorical and ``pd.NA`` for StringArray.
         dtype : Dtype, optional
             The dtype of the result array.
@@ -66,7 +64,7 @@ def _str_map(
         if dtype is None:
             dtype = np.dtype("object")
         if na_value is None:
-            na_value = self._str_na_value
+            na_value = self.dtype.na_value  # type: ignore[attr-defined]
 
         if not len(self):
             return np.array([], dtype=dtype)
@@ -272,7 +270,7 @@ def f(x):
                 return x.get(i)
             elif len(x) > i >= -len(x):
                 return x[i]
-            return self._str_na_value
+            return self.dtype.na_value  # type: ignore[attr-defined]
 
         return self._str_map(f)
 
@@ -466,7 +464,7 @@ def _str_removesuffix(self, suffix: str):
 
     def _str_extract(self, pat: str, flags: int = 0, expand: bool = True):
         regex = re.compile(pat, flags=flags)
-        na_value = self._str_na_value
+        na_value = self.dtype.na_value  # type: ignore[attr-defined]
 
         if not expand:
 
diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
index 26e73794af298..982851d0557c3 100644
--- a/pandas/core/tools/numeric.py
+++ b/pandas/core/tools/numeric.py
@@ -99,8 +99,8 @@ def to_numeric(
         is to not use nullable data types. If specified, the behavior
         is as follows:
 
-        * ``"numpy_nullable"``: returns with nullable-dtype-backed
-        * ``"pyarrow"``: returns with pyarrow-backed nullable :class:`ArrowDtype`
+        * ``"numpy_nullable"``: returns nullable-dtype-backed object
+        * ``"pyarrow"``: returns with pyarrow-backed nullable object
 
         .. versionadded:: 2.0
 
diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py
index 5a0a8c321e629..2ed241f0b9bca 100644
--- a/pandas/io/clipboards.py
+++ b/pandas/io/clipboards.py
@@ -38,14 +38,15 @@ def read_clipboard(
         A string or regex delimiter. The default of ``'\\s+'`` denotes
         one or more whitespace characters.
 
-    dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
+    dtype_backend : {'numpy_nullable', 'pyarrow'}
         Back-end data type applied to the resultant :class:`DataFrame`
-        (still experimental). Behaviour is as follows:
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
 
         * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-          (default).
-        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-          DataFrame.
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
 
         .. versionadded:: 2.0
 
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index f83f9cb1c8d74..ef52107c283e9 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -267,14 +267,15 @@
     Rows at the end to skip (0-indexed).
 {storage_options}
 
-dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
+dtype_backend : {{'numpy_nullable', 'pyarrow'}}
     Back-end data type applied to the resultant :class:`DataFrame`
-    (still experimental). Behaviour is as follows:
+    (still experimental). If not specified, the default behavior
+    is to not use nullable data types. If specified, the behavior
+    is as follows:
 
     * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-      (default).
-    * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-      DataFrame.
+    * ``"pyarrow"``: returns pyarrow-backed nullable
+      :class:`ArrowDtype` :class:`DataFrame`
 
     .. versionadded:: 2.0
 
@@ -1728,14 +1729,15 @@ def parse(
             comment string and the end of the current line is ignored.
         skipfooter : int, default 0
             Rows at the end to skip (0-indexed).
-        dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
+        dtype_backend : {{'numpy_nullable', 'pyarrow'}}
             Back-end data type applied to the resultant :class:`DataFrame`
-            (still experimental). Behaviour is as follows:
+            (still experimental). If not specified, the default behavior
+            is to not use nullable data types. If specified, the behavior
+            is as follows:
 
             * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-              (default).
-            * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-              DataFrame.
+            * ``"pyarrow"``: returns pyarrow-backed nullable
+              :class:`ArrowDtype` :class:`DataFrame`
 
             .. versionadded:: 2.0
         **kwds : dict, optional
diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py
index 3df3e77a851a3..aaae9857b4fae 100644
--- a/pandas/io/feather_format.py
+++ b/pandas/io/feather_format.py
@@ -92,14 +92,15 @@ def read_feather(
         Whether to parallelize reading using multiple threads.
     {storage_options}
 
-    dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
+    dtype_backend : {{'numpy_nullable', 'pyarrow'}}
         Back-end data type applied to the resultant :class:`DataFrame`
-        (still experimental). Behaviour is as follows:
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
 
-        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-          (default).
-        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-          DataFrame.
+        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`.
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
 
         .. versionadded:: 2.0
 
diff --git a/pandas/io/html.py b/pandas/io/html.py
index 4b8bc48130fab..c9897f628fdc9 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -1131,14 +1131,15 @@ def read_html(
 
         .. versionadded:: 1.5.0
 
-    dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
+    dtype_backend : {{'numpy_nullable', 'pyarrow'}}
         Back-end data type applied to the resultant :class:`DataFrame`
-        (still experimental). Behaviour is as follows:
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
 
         * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-          (default).
-        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-          DataFrame.
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
 
         .. versionadded:: 2.0
 
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index b29ead1d14b1d..d077b9e0c4568 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -652,14 +652,15 @@ def read_json(
 
     {storage_options}
 
-    dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
+    dtype_backend : {{'numpy_nullable', 'pyarrow'}}
         Back-end data type applied to the resultant :class:`DataFrame`
-        (still experimental). Behaviour is as follows:
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
 
         * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-          (default).
-        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-          DataFrame.
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
 
         .. versionadded:: 2.0
 
diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py
index d966e38fa11a5..9d250ee5c08ce 100644
--- a/pandas/io/json/_table_schema.py
+++ b/pandas/io/json/_table_schema.py
@@ -144,11 +144,11 @@ def convert_pandas_type_to_json_field(arr) -> dict[str, JSONSerializable]:
         field["freq"] = dtype.freq.freqstr
     elif isinstance(dtype, DatetimeTZDtype):
         if timezones.is_utc(dtype.tz):
-            # timezone.utc has no "zone" attr
             field["tz"] = "UTC"
         else:
-            # error: "tzinfo" has no attribute "zone"
-            field["tz"] = dtype.tz.zone  # type: ignore[attr-defined]
+            zone = timezones.get_timezone(dtype.tz)
+            if isinstance(zone, str):
+                field["tz"] = zone
     elif isinstance(dtype, ExtensionDtype):
         field["extDtype"] = dtype.name
     return field
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index b297164d5d108..f179dafc919e5 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -61,14 +61,15 @@ def read_orc(
         Output always follows the ordering of the file and not the columns list.
         This mirrors the original behaviour of
         :external+pyarrow:py:meth:`pyarrow.orc.ORCFile.read`.
-    dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
+    dtype_backend : {'numpy_nullable', 'pyarrow'}
         Back-end data type applied to the resultant :class:`DataFrame`
-        (still experimental). Behaviour is as follows:
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
 
         * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-          (default).
-        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-          DataFrame.
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
 
         .. versionadded:: 2.0
 
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 77a9cc3fca644..24415299e799b 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -542,14 +542,15 @@ def read_parquet(
 
         .. versionadded:: 1.3.0
 
-    dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
+    dtype_backend : {{'numpy_nullable', 'pyarrow'}}
         Back-end data type applied to the resultant :class:`DataFrame`
-        (still experimental). Behaviour is as follows:
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
 
         * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-          (default).
-        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-          DataFrame.
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
 
         .. versionadded:: 2.0
 
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 0cca1ebdb8c8f..6e933f94cf0ba 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -268,6 +268,18 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
     Number of lines at bottom of file to skip (Unsupported with ``engine='c'``).
 nrows : int, optional
     Number of rows of file to read. Useful for reading pieces of large files.
+    Refers to the number of data rows in the returned DataFrame, excluding:
+
+    * The header row containing column names.
+    * Rows before the header row, if ``header=1`` or larger.
+
+    Example usage:
+
+    * To read the first 999,999 (non-header) rows:
+      ``read_csv(..., nrows=999999)``
+
+    * To read rows 1,000,000 through 1,999,999:
+      ``read_csv(..., skiprows=1000000, nrows=999999)``
 na_values : Hashable, Iterable of Hashable or dict of {{Hashable : Iterable}}, optional
     Additional strings to recognize as ``NA``/``NaN``. If ``dict`` passed, specific
     per-column ``NA`` values.  By default the following values are interpreted as
@@ -438,14 +450,14 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
 
 {storage_options}
 
-dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
+dtype_backend : {{'numpy_nullable', 'pyarrow'}}
     Back-end data type applied to the resultant :class:`DataFrame`
-    (still experimental). Behaviour is as follows:
+    (still experimental). If not specified, the default behavior
+    is to not use nullable data types. If specified, the behavior
+    is as follows:
 
     * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-      (default).
-    * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-      DataFrame.
+    * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` :class:`DataFrame`
 
     .. versionadded:: 2.0
 
diff --git a/pandas/io/spss.py b/pandas/io/spss.py
index 313ffa79cbd09..e597463aee453 100644
--- a/pandas/io/spss.py
+++ b/pandas/io/spss.py
@@ -36,14 +36,15 @@ def read_spss(
         Return a subset of the columns. If None, return all columns.
     convert_categoricals : bool, default is True
         Convert categorical columns into pd.Categorical.
-    dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
+    dtype_backend : {'numpy_nullable', 'pyarrow'}
         Back-end data type applied to the resultant :class:`DataFrame`
-        (still experimental). Behaviour is as follows:
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
 
         * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-          (default).
-        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-          DataFrame.
+        * ``"pyarrow"``: returns pyarrow-backed
+          nullable :class:`ArrowDtype` :class:`DataFrame`
 
         .. versionadded:: 2.0
 
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 4fd7de7a28855..99dd06568fa01 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -306,14 +306,15 @@ def read_sql_table(
     chunksize : int, default None
         If specified, returns an iterator where `chunksize` is the number of
         rows to include in each chunk.
-    dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
+    dtype_backend : {'numpy_nullable', 'pyarrow'}
         Back-end data type applied to the resultant :class:`DataFrame`
-        (still experimental). Behaviour is as follows:
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
 
         * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-          (default).
-        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-          DataFrame.
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
 
         .. versionadded:: 2.0
 
@@ -443,14 +444,15 @@ def read_sql_query(
         {'a': np.float64, 'b': np.int32, 'c': 'Int64'}.
 
         .. versionadded:: 1.3.0
-    dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
+    dtype_backend : {'numpy_nullable', 'pyarrow'}
         Back-end data type applied to the resultant :class:`DataFrame`
-        (still experimental). Behaviour is as follows:
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
 
         * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-          (default).
-        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-          DataFrame.
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
 
         .. versionadded:: 2.0
 
@@ -586,14 +588,15 @@ def read_sql(
     chunksize : int, default None
         If specified, return an iterator where `chunksize` is the
         number of rows to include in each chunk.
-    dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
+    dtype_backend : {'numpy_nullable', 'pyarrow'}
         Back-end data type applied to the resultant :class:`DataFrame`
-        (still experimental). Behaviour is as follows:
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
 
         * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-          (default).
-        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-          DataFrame.
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
 
         .. versionadded:: 2.0
     dtype : Type name or dict of columns
@@ -1683,14 +1686,15 @@ def read_table(
         chunksize : int, default None
             If specified, return an iterator where `chunksize` is the number
             of rows to include in each chunk.
-        dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
+        dtype_backend : {'numpy_nullable', 'pyarrow'}
             Back-end data type applied to the resultant :class:`DataFrame`
-            (still experimental). Behaviour is as follows:
+            (still experimental). If not specified, the default behavior
+            is to not use nullable data types. If specified, the behavior
+            is as follows:
 
             * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-              (default).
-            * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-              DataFrame.
+            * ``"pyarrow"``: returns pyarrow-backed nullable
+              :class:`ArrowDtype` :class:`DataFrame`
 
             .. versionadded:: 2.0
 
@@ -2148,14 +2152,15 @@ def read_table(
             schema of the SQL database object.
         chunksize : int, default None
             Raises NotImplementedError
-        dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
+        dtype_backend : {'numpy_nullable', 'pyarrow'}
             Back-end data type applied to the resultant :class:`DataFrame`
-            (still experimental). Behaviour is as follows:
+            (still experimental). If not specified, the default behavior
+            is to not use nullable data types. If specified, the behavior
+            is as follows:
 
             * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-              (default).
-            * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-              DataFrame.
+            * ``"pyarrow"``: returns pyarrow-backed nullable
+              :class:`ArrowDtype` :class:`DataFrame`
 
             .. versionadded:: 2.0
 
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 8c7381a926e72..0fcf27af42fde 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -959,14 +959,15 @@ def read_xml(
 
     {storage_options}
 
-    dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
+    dtype_backend : {{'numpy_nullable', 'pyarrow'}}
         Back-end data type applied to the resultant :class:`DataFrame`
-        (still experimental). Behaviour is as follows:
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
 
         * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
-          (default).
-        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
-          DataFrame.
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
 
         .. versionadded:: 2.0
 
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index fb7d785a94bc4..9a7e563332a42 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -546,7 +546,7 @@ def _maybe_right_yaxis(self, ax: Axes, axes_num: int) -> Axes:
                 new_ax.set_yscale("log")
             elif self.logy == "sym" or self.loglog == "sym":
                 new_ax.set_yscale("symlog")
-            return new_ax  # type: ignore[return-value]
+            return new_ax
 
     @final
     @cache_readonly
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index b0475b64a844e..3be3562d23cd6 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -6,6 +6,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas.core.dtypes.dtypes import CategoricalDtype
 
 import pandas as pd
@@ -1245,6 +1247,9 @@ def test_agg_multiple_mixed():
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_agg_multiple_mixed_raises():
     # GH 20909
     mdf = DataFrame(
diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
index 3137d3ff50954..ba970e328ae40 100644
--- a/pandas/tests/apply/test_invalid_arg.py
+++ b/pandas/tests/apply/test_invalid_arg.py
@@ -12,6 +12,9 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
 from pandas.errors import SpecificationError
 
 from pandas import (
@@ -209,6 +212,10 @@ def transform(row):
         data.apply(transform, axis=1)
 
 
+# we should raise a proper TypeError instead of propagating the pyarrow error
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 @pytest.mark.parametrize(
     "df, func, expected",
     tm.get_cython_table_params(
@@ -229,6 +236,10 @@ def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_str
             df.agg(func, axis=axis)
 
 
+# we should raise a proper TypeError instead of propagating the pyarrow error
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 @pytest.mark.parametrize(
     "series, func, expected",
     chain(
diff --git a/pandas/tests/apply/test_numba.py b/pandas/tests/apply/test_numba.py
index 6ac0b49f0e4e7..6bbe5100e8826 100644
--- a/pandas/tests/apply/test_numba.py
+++ b/pandas/tests/apply/test_numba.py
@@ -104,6 +104,7 @@ def test_numba_nonunique_unsupported(apply_axis):
 
 
 def test_numba_unsupported_dtypes(apply_axis):
+    pytest.importorskip("pyarrow")
     f = lambda x: x
     df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]})
     df["c"] = df["c"].astype("double[pyarrow]")
diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py
index 4b5156d0007bb..899ea1910d055 100644
--- a/pandas/tests/arithmetic/test_object.py
+++ b/pandas/tests/arithmetic/test_object.py
@@ -8,6 +8,9 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -315,6 +318,9 @@ def test_add(self):
         expected = pd.Index(["1a", "1b", "1c"])
         tm.assert_index_equal("1" + index, expected)
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_sub_fail(self, using_infer_string):
         index = pd.Index([str(i) for i in range(10)])
 
diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py
index 0c4fcf149eb20..4dbd8eb9f5ca7 100644
--- a/pandas/tests/arrays/boolean/test_arithmetic.py
+++ b/pandas/tests/arrays/boolean/test_arithmetic.py
@@ -3,6 +3,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 import pandas._testing as tm
 
@@ -90,6 +94,9 @@ def test_op_int8(left_array, right_array, opname):
 # -----------------------------------------------------------------------------
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
     # invalid ops
 
diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py
index dca33dffa3996..52fd80cd196e0 100644
--- a/pandas/tests/arrays/categorical/test_analytics.py
+++ b/pandas/tests/arrays/categorical/test_analytics.py
@@ -6,7 +6,10 @@
 
 from pandas._config import using_string_dtype
 
-from pandas.compat import PYPY
+from pandas.compat import (
+    HAS_PYARROW,
+    PYPY,
+)
 
 from pandas import (
     Categorical,
@@ -296,7 +299,9 @@ def test_nbytes(self):
         exp = 3 + 3 * 8  # 3 int8s for values + 3 int64s for categories
         assert cat.nbytes == exp
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+    @pytest.mark.xfail(
+        using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_memory_usage(self):
         cat = Categorical([1, 2, 3])
 
diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index 6752a503016f8..d7eb6800e5d07 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -8,6 +8,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas.core.dtypes.common import (
     is_float_dtype,
     is_integer_dtype,
@@ -442,7 +444,9 @@ def test_constructor_str_unknown(self):
         with pytest.raises(ValueError, match="Unknown dtype"):
             Categorical([1, 2], dtype="foo")
 
-    @pytest.mark.xfail(using_string_dtype(), reason="Can't be NumPy strings")
+    @pytest.mark.xfail(
+        using_string_dtype() and HAS_PYARROW, reason="Can't be NumPy strings"
+    )
     def test_constructor_np_strs(self):
         # GH#31499 Hashtable.map_locations needs to work on np.str_ objects
         cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])
diff --git a/pandas/tests/arrays/integer/test_reduction.py b/pandas/tests/arrays/integer/test_reduction.py
index db04862e4ea07..e485c7f79b475 100644
--- a/pandas/tests/arrays/integer/test_reduction.py
+++ b/pandas/tests/arrays/integer/test_reduction.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -102,9 +104,10 @@ def test_groupby_reductions(op, expected):
         ["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
     ],
 )
-def test_mixed_reductions(op, expected, using_infer_string):
-    if op in ["any", "all"] and using_infer_string:
-        expected = expected.astype("bool")
+def test_mixed_reductions(request, op, expected, using_infer_string):
+    if op in ["any", "all"] and using_infer_string and HAS_PYARROW:
+        # TODO(infer_string) inconsistent result type
+        request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
     df = DataFrame(
         {
             "A": ["a", "b", "b"],
diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py
index 7d4aae0f7bb4e..cfba32c62f206 100644
--- a/pandas/tests/arrays/string_/test_string_arrow.py
+++ b/pandas/tests/arrays/string_/test_string_arrow.py
@@ -4,7 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas.compat import HAS_PYARROW
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -27,11 +26,10 @@ def test_eq_all_na():
     tm.assert_extension_array_equal(result, expected)
 
 
-def test_config(string_storage, request, using_infer_string):
-    if using_infer_string and string_storage == "python" and HAS_PYARROW:
-        # string storage with na_value=NaN always uses pyarrow if available
-        # -> does not yet honor the option
-        request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
+def test_config(string_storage, using_infer_string):
+    # with the default string_storage setting
+    # always "python" at the moment
+    assert StringDtype().storage == "python"
 
     with pd.option_context("string_storage", string_storage):
         assert StringDtype().storage == string_storage
@@ -88,19 +86,18 @@ def test_constructor_not_string_type_value_dictionary_raises(chunked):
         ArrowStringArray(arr)
 
 
-@pytest.mark.xfail(
-    reason="dict conversion does not seem to be implemented for large string in arrow"
-)
+@pytest.mark.parametrize("string_type", ["string", "large_string"])
 @pytest.mark.parametrize("chunked", [True, False])
-def test_constructor_valid_string_type_value_dictionary(chunked):
+def test_constructor_valid_string_type_value_dictionary(string_type, chunked):
     pa = pytest.importorskip("pyarrow")
 
-    arr = pa.array(["1", "2", "3"], pa.large_string()).dictionary_encode()
+    arr = pa.array(["1", "2", "3"], getattr(pa, string_type)()).dictionary_encode()
     if chunked:
         arr = pa.chunked_array(arr)
 
     arr = ArrowStringArray(arr)
-    assert pa.types.is_string(arr._pa_array.type.value_type)
+    # dictionary type get converted to dense large string array
+    assert pa.types.is_large_string(arr._pa_array.type)
 
 
 def test_constructor_from_list():
diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index 5834b268be2be..59ff4f3122e8f 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -891,20 +891,24 @@ def test_concat_same_type_different_freq(self, unit):
 
         tm.assert_datetime_array_equal(result, expected)
 
-    def test_strftime(self, arr1d):
+    def test_strftime(self, arr1d, using_infer_string):
         arr = arr1d
 
         result = arr.strftime("%Y %b")
         expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object)
-        tm.assert_numpy_array_equal(result, expected)
+        if using_infer_string:
+            expected = pd.array(expected, dtype=pd.StringDtype(na_value=np.nan))
+        tm.assert_equal(result, expected)
 
-    def test_strftime_nat(self):
+    def test_strftime_nat(self, using_infer_string):
         # GH 29578
         arr = DatetimeIndex(["2019-01-01", NaT])._data
 
         result = arr.strftime("%Y-%m-%d")
         expected = np.array(["2019-01-01", np.nan], dtype=object)
-        tm.assert_numpy_array_equal(result, expected)
+        if using_infer_string:
+            expected = pd.array(expected, dtype=pd.StringDtype(na_value=np.nan))
+        tm.assert_equal(result, expected)
 
 
 class TestTimedeltaArray(SharedTests):
@@ -1161,20 +1165,24 @@ def test_array_interface(self, arr1d):
         expected = np.asarray(arr).astype("S20")
         tm.assert_numpy_array_equal(result, expected)
 
-    def test_strftime(self, arr1d):
+    def test_strftime(self, arr1d, using_infer_string):
         arr = arr1d
 
         result = arr.strftime("%Y")
         expected = np.array([per.strftime("%Y") for per in arr], dtype=object)
-        tm.assert_numpy_array_equal(result, expected)
+        if using_infer_string:
+            expected = pd.array(expected, dtype=pd.StringDtype(na_value=np.nan))
+        tm.assert_equal(result, expected)
 
-    def test_strftime_nat(self):
+    def test_strftime_nat(self, using_infer_string):
         # GH 29578
         arr = PeriodArray(PeriodIndex(["2019-01-01", NaT], dtype="period[D]"))
 
         result = arr.strftime("%Y-%m-%d")
         expected = np.array(["2019-01-01", np.nan], dtype=object)
-        tm.assert_numpy_array_equal(result, expected)
+        if using_infer_string:
+            expected = pd.array(expected, dtype=pd.StringDtype(na_value=np.nan))
+        tm.assert_equal(result, expected)
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py
index dd6bf3c7521f8..13a3ff048c79e 100644
--- a/pandas/tests/base/test_conversion.py
+++ b/pandas/tests/base/test_conversion.py
@@ -1,6 +1,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 
 import pandas as pd
@@ -20,6 +24,7 @@
     SparseArray,
     TimedeltaArray,
 )
+from pandas.core.arrays.string_ import StringArrayNumpySemantics
 from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics
 
 
@@ -218,7 +223,9 @@ def test_iter_box_period(self):
 )
 def test_values_consistent(arr, expected_type, dtype, using_infer_string):
     if using_infer_string and dtype == "object":
-        expected_type = ArrowStringArrayNumpySemantics
+        expected_type = (
+            ArrowStringArrayNumpySemantics if HAS_PYARROW else StringArrayNumpySemantics
+        )
     l_values = Series(arr)._values
     r_values = pd.Index(arr)._values
     assert type(l_values) is expected_type
@@ -355,6 +362,9 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request):
     tm.assert_numpy_array_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
+)
 @pytest.mark.parametrize("as_series", [True, False])
 @pytest.mark.parametrize(
     "arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)]
diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py
index 8724f62de1534..de56d5e4a07ee 100644
--- a/pandas/tests/copy_view/test_astype.py
+++ b/pandas/tests/copy_view/test_astype.py
@@ -5,6 +5,7 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
 from pandas.compat.pyarrow import pa_version_under12p0
 import pandas.util._test_decorators as td
 
@@ -197,7 +198,7 @@ def test_astype_arrow_timestamp():
         assert np.shares_memory(get_array(df, "a"), get_array(result, "a")._pa_array)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_convert_dtypes_infer_objects():
     ser = Series(["a", "b", "c"])
     ser_orig = ser.copy()
@@ -213,7 +214,7 @@ def test_convert_dtypes_infer_objects():
     tm.assert_series_equal(ser, ser_orig)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_convert_dtypes():
     df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]})
     df_orig = df.copy()
diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py
index d2e2d43b0a42b..dd4dd154f74b0 100644
--- a/pandas/tests/copy_view/test_functions.py
+++ b/pandas/tests/copy_view/test_functions.py
@@ -3,6 +3,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas import (
     DataFrame,
     Index,
@@ -14,7 +16,7 @@
 from pandas.tests.copy_view.util import get_array
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_concat_frames():
     df = DataFrame({"b": ["a"] * 3})
     df2 = DataFrame({"a": ["a"] * 3})
@@ -33,7 +35,7 @@ def test_concat_frames():
     tm.assert_frame_equal(df, df_orig)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_concat_frames_updating_input():
     df = DataFrame({"b": ["a"] * 3})
     df2 = DataFrame({"a": ["a"] * 3})
@@ -153,7 +155,7 @@ def test_concat_copy_keyword():
     assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 @pytest.mark.parametrize(
     "func",
     [
@@ -249,7 +251,7 @@ def test_merge_copy_keyword():
     assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_join_on_key():
     df_index = Index(["a", "b", "c"], name="key")
 
@@ -277,7 +279,7 @@ def test_join_on_key():
     tm.assert_frame_equal(df2, df2_orig)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_join_multiple_dataframes_on_key():
     df_index = Index(["a", "b", "c"], name="key")
 
diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py
index f80e9b7dcf838..fc57178b897b9 100644
--- a/pandas/tests/copy_view/test_interp_fillna.py
+++ b/pandas/tests/copy_view/test_interp_fillna.py
@@ -3,6 +3,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas import (
     NA,
     DataFrame,
@@ -121,7 +123,7 @@ def test_interpolate_cannot_with_object_dtype():
         df.interpolate()
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_interpolate_object_convert_no_op():
     df = DataFrame({"a": ["a", "b", "c"], "b": 1})
     arr_a = get_array(df, "a")
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index 3716df8fbf855..92e1ba750fae2 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -3,6 +3,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -714,7 +716,7 @@ def test_head_tail(method):
     tm.assert_frame_equal(df, df_orig)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_infer_objects():
     df = DataFrame({"a": [1, 2], "b": "c", "c": 1, "d": "x"})
     df_orig = df.copy()
@@ -730,6 +732,9 @@ def test_infer_objects():
     tm.assert_frame_equal(df, df_orig)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_infer_objects_no_reference():
     df = DataFrame(
         {
@@ -899,7 +904,7 @@ def test_sort_values_inplace(obj, kwargs):
     tm.assert_equal(view, obj_orig)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 @pytest.mark.parametrize("decimals", [-1, 0, 1])
 def test_round(decimals):
     df = DataFrame({"a": [1, 2], "b": "c"})
diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py
index c1120ccfea635..58c979fb05089 100644
--- a/pandas/tests/copy_view/test_replace.py
+++ b/pandas/tests/copy_view/test_replace.py
@@ -3,6 +3,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas import (
     Categorical,
     DataFrame,
@@ -59,7 +61,7 @@ def test_replace_regex_inplace_refs():
     tm.assert_frame_equal(view, df_orig)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_replace_regex_inplace():
     df = DataFrame({"a": ["aaa", "bbb"]})
     arr = get_array(df, "a")
@@ -257,7 +259,7 @@ def test_replace_empty_list():
     assert not df2._mgr._has_no_reference(0)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 @pytest.mark.parametrize("value", ["d", None])
 def test_replace_object_list_inplace(value):
     df = DataFrame({"a": ["a", "b", "c"]})
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index 4bf97b1fd8494..2c2dff7a957fe 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -3,6 +3,7 @@
 import numpy as np
 import pytest
 
+from pandas.compat import HAS_PYARROW
 import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.astype import astype_array
@@ -802,13 +803,17 @@ def test_pandas_dtype_ea_not_instance():
 
 
 def test_pandas_dtype_string_dtypes(string_storage):
-    # TODO(infer_string) remove skip if "python" is supported
-    pytest.importorskip("pyarrow")
+    with pd.option_context("future.infer_string", True):
+        # with the default string_storage setting
+        result = pandas_dtype("str")
+    assert result == pd.StringDtype(
+        "pyarrow" if HAS_PYARROW else "python", na_value=np.nan
+    )
+
     with pd.option_context("future.infer_string", True):
         with pd.option_context("string_storage", string_storage):
             result = pandas_dtype("str")
-    # TODO(infer_string) hardcoded to pyarrow until python is supported
-    assert result == pd.StringDtype("pyarrow", na_value=np.nan)
+    assert result == pd.StringDtype(string_storage, na_value=np.nan)
 
     with pd.option_context("future.infer_string", False):
         with pd.option_context("string_storage", string_storage):
diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
index fad2560265d21..ff9f3cbed64a2 100644
--- a/pandas/tests/extension/base/ops.py
+++ b/pandas/tests/extension/base/ops.py
@@ -7,6 +7,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas.core.dtypes.common import is_string_dtype
 
 import pandas as pd
@@ -140,6 +142,12 @@ class BaseArithmeticOpsTests(BaseOpsUtil):
     series_array_exc: type[Exception] | None = TypeError
     divmod_exc: type[Exception] | None = TypeError
 
+    # TODO(infer_string) need to remove import of pyarrow
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW,
+        reason="TODO(infer_string)",
+        strict=False,
+    )
     def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
         # series & scalar
         if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype):
@@ -149,6 +157,11 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
         ser = pd.Series(data)
         self.check_opname(ser, op_name, ser.iloc[0])
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW,
+        reason="TODO(infer_string)",
+        strict=False,
+    )
     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
         # frame & scalar
         if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype):
@@ -158,12 +171,22 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
         df = pd.DataFrame({"A": data})
         self.check_opname(df, op_name, data[0])
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW,
+        reason="TODO(infer_string)",
+        strict=False,
+    )
     def test_arith_series_with_array(self, data, all_arithmetic_operators):
         # ndarray & other series
         op_name = all_arithmetic_operators
         ser = pd.Series(data)
         self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)))
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW,
+        reason="TODO(infer_string)",
+        strict=False,
+    )
     def test_divmod(self, data):
         ser = pd.Series(data)
         self._check_divmod_op(ser, divmod, 1)
@@ -179,6 +202,7 @@ def test_divmod_series_array(self, data, data_for_twos):
         other = pd.Series(other)
         self._check_divmod_op(other, ops.rdivmod, ser)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_add_series_with_extension_array(self, data):
         # Check adding an ExtensionArray to a Series of the same dtype matches
         # the behavior of adding the arrays directly and then wrapping in a
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index 8f8af607585df..c3d4b83f731a3 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -140,6 +140,7 @@ def test_map(self, data, na_action):
         result = data.map(lambda x: x, na_action=na_action)
         tm.assert_extension_array_equal(result, data)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
         # frame & scalar
         op_name = all_arithmetic_operators
@@ -151,6 +152,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
             )
         super().test_arith_frame_with_scalar(data, op_name)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request):
         op_name = all_arithmetic_operators
         if op_name == "__rmod__":
diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
index 79cfb736941d6..1b251a5118681 100644
--- a/pandas/tests/extension/test_numpy.py
+++ b/pandas/tests/extension/test_numpy.py
@@ -19,6 +19,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 from pandas.core.dtypes.dtypes import NumpyEADtype
 
 import pandas as pd
@@ -255,6 +257,7 @@ def test_insert_invalid(self, data, invalid_scalar):
     frame_scalar_exc = None
     series_array_exc = None
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_divmod(self, data):
         divmod_exc = None
         if data.dtype.kind == "O":
@@ -262,6 +265,7 @@ def test_divmod(self, data):
         self.divmod_exc = divmod_exc
         super().test_divmod(data)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_divmod_series_array(self, data):
         ser = pd.Series(data)
         exc = None
@@ -270,6 +274,7 @@ def test_divmod_series_array(self, data):
             self.divmod_exc = exc
         self._check_divmod_op(ser, divmod, data)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request):
         opname = all_arithmetic_operators
         series_scalar_exc = None
@@ -283,6 +288,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request)
         self.series_scalar_exc = series_scalar_exc
         super().test_arith_series_with_scalar(data, all_arithmetic_operators)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_arith_series_with_array(self, data, all_arithmetic_operators):
         opname = all_arithmetic_operators
         series_array_exc = None
@@ -291,6 +297,7 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators):
         self.series_array_exc = series_array_exc
         super().test_arith_series_with_array(data, all_arithmetic_operators)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
         opname = all_arithmetic_operators
         frame_scalar_exc = None
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 826ac2be3339b..8ce4e8725d632 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -12,6 +12,7 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import iNaT
+from pandas.compat import HAS_PYARROW
 from pandas.errors import InvalidIndexError
 
 from pandas.core.dtypes.common import is_integer
@@ -1148,7 +1149,9 @@ def test_loc_setitem_datetimelike_with_inference(self):
         )
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+    @pytest.mark.xfail(
+        using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_getitem_boolean_indexing_mixed(self):
         df = DataFrame(
             {
diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
index 1d7b3e12b2e86..32a827c25c77a 100644
--- a/pandas/tests/frame/indexing/test_where.py
+++ b/pandas/tests/frame/indexing/test_where.py
@@ -6,6 +6,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas.core.dtypes.common import is_scalar
 
 import pandas as pd
@@ -938,6 +940,9 @@ def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype):
             obj.mask(mask, null)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 @given(data=OPTIONAL_ONE_OF_ALL)
 def test_where_inplace_casting(data):
     # GH 22051
@@ -1018,6 +1023,9 @@ def test_where_producing_ea_cond_for_np_dtype():
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
+)
 @pytest.mark.parametrize(
     "replacement", [0.001, True, "snake", None, datetime(2022, 5, 4)]
 )
diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py
index 59779234b46d9..e7f6e5d625d3e 100644
--- a/pandas/tests/frame/methods/test_convert_dtypes.py
+++ b/pandas/tests/frame/methods/test_convert_dtypes.py
@@ -3,21 +3,15 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 import pandas._testing as tm
 
 
 class TestConvertDtypes:
-    # TODO convert_dtypes should not use NaN variant of string dtype, but always NA
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     @pytest.mark.parametrize(
         "convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")]
     )
-    def test_convert_dtypes(
-        self, convert_integer, expected, string_storage, using_infer_string
-    ):
+    def test_convert_dtypes(self, convert_integer, expected, string_storage):
         # Specific types are tested in tests/series/test_dtypes.py
         # Just check that it works for DataFrame here
         df = pd.DataFrame(
@@ -182,7 +176,6 @@ def test_convert_dtypes_pyarrow_timestamp(self):
         result = expected.convert_dtypes(dtype_backend="pyarrow")
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_convert_dtypes_avoid_block_splitting(self):
         # GH#55341
         df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": "a"})
@@ -197,7 +190,6 @@ def test_convert_dtypes_avoid_block_splitting(self):
         tm.assert_frame_equal(result, expected)
         assert result._mgr.nblocks == 2
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_convert_dtypes_from_arrow(self):
         # GH#56581
         df = pd.DataFrame([["a", datetime.time(18, 12)]], columns=["a", "b"])
diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py
index 419fb75cb3669..7feb3b6fd816d 100644
--- a/pandas/tests/frame/methods/test_drop_duplicates.py
+++ b/pandas/tests/frame/methods/test_drop_duplicates.py
@@ -476,3 +476,41 @@ def test_drop_duplicates_non_boolean_ignore_index(arg):
     msg = '^For argument "ignore_index" expected type bool, received type .*.$'
     with pytest.raises(ValueError, match=msg):
         df.drop_duplicates(ignore_index=arg)
+
+
+def test_drop_duplicates_set():
+    # GH#59237
+    df = DataFrame(
+        {
+            "AAA": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"],
+            "B": ["one", "one", "two", "two", "two", "two", "one", "two"],
+            "C": [1, 1, 2, 2, 2, 2, 1, 2],
+            "D": range(8),
+        }
+    )
+    # single column
+    result = df.drop_duplicates({"AAA"})
+    expected = df[:2]
+    tm.assert_frame_equal(result, expected)
+
+    result = df.drop_duplicates({"AAA"}, keep="last")
+    expected = df.loc[[6, 7]]
+    tm.assert_frame_equal(result, expected)
+
+    result = df.drop_duplicates({"AAA"}, keep=False)
+    expected = df.loc[[]]
+    tm.assert_frame_equal(result, expected)
+    assert len(result) == 0
+
+    # multi column
+    expected = df.loc[[0, 1, 2, 3]]
+    result = df.drop_duplicates({"AAA", "B"})
+    tm.assert_frame_equal(result, expected)
+
+    result = df.drop_duplicates({"AAA", "B"}, keep="last")
+    expected = df.loc[[0, 5, 6, 7]]
+    tm.assert_frame_equal(result, expected)
+
+    result = df.drop_duplicates({"AAA", "B"}, keep=False)
+    expected = df.loc[[0]]
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py
index a4319f8a8ae7f..aad43b7a77ac7 100644
--- a/pandas/tests/frame/methods/test_info.py
+++ b/pandas/tests/frame/methods/test_info.py
@@ -10,6 +10,7 @@
 from pandas._config import using_string_dtype
 
 from pandas.compat import (
+    HAS_PYARROW,
     IS64,
     PYPY,
 )
@@ -520,7 +521,7 @@ def test_info_int_columns():
     assert result == expected
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_memory_usage_empty_no_warning():
     # GH#50066
     df = DataFrame(index=["a", "b"])
diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
index 79aabbcc83bbf..4e8e267523439 100644
--- a/pandas/tests/frame/methods/test_rank.py
+++ b/pandas/tests/frame/methods/test_rank.py
@@ -6,10 +6,13 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 from pandas._libs.algos import (
     Infinity,
     NegInfinity,
 )
+from pandas.compat import HAS_PYARROW
 
 from pandas import (
     DataFrame,
@@ -464,9 +467,18 @@ def test_rank_inf_nans_na_option(
         ],
     )
     def test_rank_object_first(
-        self, frame_or_series, na_option, ascending, expected, using_infer_string
+        self,
+        request,
+        frame_or_series,
+        na_option,
+        ascending,
+        expected,
+        using_infer_string,
     ):
         obj = frame_or_series(["foo", "foo", None, "foo"])
+        if using_string_dtype() and not HAS_PYARROW and isinstance(obj, Series):
+            request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
+
         result = obj.rank(method="first", na_option=na_option, ascending=ascending)
         expected = frame_or_series(expected)
         if using_infer_string and isinstance(obj, Series):
diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py
index 4136d641ef67f..7670b53f23173 100644
--- a/pandas/tests/frame/methods/test_value_counts.py
+++ b/pandas/tests/frame/methods/test_value_counts.py
@@ -1,6 +1,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 import pandas._testing as tm
 
@@ -132,6 +136,9 @@ def test_data_frame_value_counts_dropna_true(nulls_fixture):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
+)
 def test_data_frame_value_counts_dropna_false(nulls_fixture):
     # GH 41334
     df = pd.DataFrame(
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index e8ef0592ac432..f8219e68a72da 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -8,6 +8,8 @@
 from pandas._config import using_string_dtype
 from pandas._config.config import option_context
 
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -113,7 +115,9 @@ def test_not_hashable(self):
         with pytest.raises(TypeError, match=msg):
             hash(empty_frame)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="surrogates not allowed")
+    @pytest.mark.xfail(
+        using_string_dtype() and HAS_PYARROW, reason="surrogates not allowed"
+    )
     def test_column_name_contains_unicode_surrogate(self):
         # GH 25509
         colname = "\ud83d"
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index 734bfc8b30053..e41a3b27e592c 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -13,6 +13,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -1542,7 +1544,9 @@ def test_comparisons(self, simple_frame, float_frame, func):
         with pytest.raises(ValueError, match=msg):
             func(simple_frame, simple_frame[:2])
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+    @pytest.mark.xfail(
+        using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne):
         # GH 11565
         df = DataFrame(
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index a210af94561f9..0176a36fe78d7 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -24,6 +24,7 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import lib
+from pandas.compat import HAS_PYARROW
 from pandas.compat.numpy import np_version_gt2
 from pandas.errors import IntCastingNaNError
 
@@ -299,7 +300,7 @@ def test_constructor_dtype_nocast_view_2d_array(self):
         df2 = DataFrame(df.values, dtype=df[0].dtype)
         assert df2._mgr.blocks[0].values.flags.c_contiguous
 
-    @pytest.mark.xfail(using_string_dtype(), reason="conversion copies")
+    @pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="conversion copies")
     def test_1d_object_array_does_not_copy(self):
         # https://github.com/pandas-dev/pandas/issues/39272
         arr = np.array(["a", "b"], dtype="object")
diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py
index ad54cfaf9d927..6788721e8a72e 100644
--- a/pandas/tests/frame/test_logical_ops.py
+++ b/pandas/tests/frame/test_logical_ops.py
@@ -4,6 +4,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 from pandas import (
     CategoricalIndex,
     DataFrame,
@@ -96,6 +100,9 @@ def test_logical_ops_int_frame(self):
         res_ser = df1a_int["A"] | df1a_bool["A"]
         tm.assert_series_equal(res_ser, df1a_bool["A"])
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_logical_ops_invalid(self, using_infer_string):
         # GH#5808
 
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 4c355ed92b6c3..1d667d35db253 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -226,6 +226,7 @@ def float_frame_with_na():
 class TestDataFrameAnalytics:
     # ---------------------------------------------------------------------
     # Reductions
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("axis", [0, 1])
     @pytest.mark.parametrize(
         "opname",
@@ -431,6 +432,7 @@ def test_stat_operators_attempt_obj_array(self, method, df, axis):
             expected[expected.isna()] = None
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("op", ["mean", "std", "var", "skew", "kurt", "sem"])
     def test_mixed_ops(self, op):
         # GH#16116
@@ -532,7 +534,7 @@ def test_mean_mixed_string_decimal(self):
         df = DataFrame(d)
 
         with pytest.raises(
-            TypeError, match="unsupported operand type|does not support"
+            TypeError, match="unsupported operand type|does not support|Cannot perform"
         ):
             df.mean()
         result = df[["A", "C"]].mean()
@@ -690,6 +692,7 @@ def test_mode_dropna(self, dropna, expected):
         expected = DataFrame(expected)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_mode_sortwarning(self, using_infer_string):
         # Check for the warning that is raised when the mode
         # results cannot be sorted
@@ -979,7 +982,7 @@ def test_sum_mixed_datetime(self):
 
     def test_mean_corner(self, float_frame, float_string_frame):
         # unit test when have object data
-        msg = "Could not convert|does not support"
+        msg = "Could not convert|does not support|Cannot perform"
         with pytest.raises(TypeError, match=msg):
             float_string_frame.mean(axis=0)
 
@@ -1093,6 +1096,7 @@ def test_idxmin_empty(self, index, skipna, axis):
         expected = Series(dtype=index.dtype)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("numeric_only", [True, False])
     def test_idxmin_numeric_only(self, numeric_only):
         df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")})
@@ -1143,6 +1147,7 @@ def test_idxmax_empty(self, index, skipna, axis):
         expected = Series(dtype=index.dtype)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("numeric_only", [True, False])
     def test_idxmax_numeric_only(self, numeric_only):
         df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")})
@@ -1964,7 +1969,7 @@ def test_minmax_extensionarray(method, numeric_only):
 def test_frame_mixed_numeric_object_with_timestamp(ts_value):
     # GH 13912
     df = DataFrame({"a": [1], "b": [1.1], "c": ["foo"], "d": [ts_value]})
-    with pytest.raises(TypeError, match="does not support operation"):
+    with pytest.raises(TypeError, match="does not support operation|Cannot perform"):
         df.sum()
 
 
diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py
index 1887fa61ad081..5bbe047078c6e 100644
--- a/pandas/tests/frame/test_unary.py
+++ b/pandas/tests/frame/test_unary.py
@@ -5,6 +5,7 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
 from pandas.compat.numpy import np_version_gte1p25
 
 import pandas as pd
@@ -42,6 +43,11 @@ def test_neg_object(self, df, expected):
         tm.assert_frame_equal(-df, expected)
         tm.assert_series_equal(-df["a"], expected["a"])
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW,
+        reason="TODO(infer_string)",
+        strict=False,
+    )
     @pytest.mark.parametrize(
         "df_data",
         [
@@ -130,7 +136,9 @@ def test_pos_object(self, df_data):
         tm.assert_frame_equal(+df, df)
         tm.assert_series_equal(+df["a"], df["a"])
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+    @pytest.mark.xfail(
+        using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
+    )
     @pytest.mark.filterwarnings("ignore:Applying:DeprecationWarning")
     def test_pos_object_raises(self):
         # GH#21380
diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
index 14d3dbd6fa496..18802ebd002fc 100644
--- a/pandas/tests/groupby/methods/test_value_counts.py
+++ b/pandas/tests/groupby/methods/test_value_counts.py
@@ -9,6 +9,7 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
 import pandas.util._test_decorators as td
 
 from pandas import (
@@ -500,6 +501,9 @@ def test_dropna_combinations(
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
+)
 @pytest.mark.parametrize(
     "dropna, expected_data, expected_index",
     [
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 791f279bffc94..11b874d0b1608 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -8,6 +8,7 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
 from pandas.errors import SpecificationError
 import pandas.util._test_decorators as td
 
@@ -1407,6 +1408,10 @@ def g(group):
     tm.assert_series_equal(result, expected)
 
 
+# TODO harmonize error messages
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
+)
 @pytest.mark.parametrize("grouper", ["A", ["A", "B"]])
 def test_set_group_name(df, grouper, using_infer_string):
     def f(group):
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index d42aa06d6bbfe..02071acf378dd 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -3,6 +3,7 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
 from pandas.compat.pyarrow import pa_version_under10p1
 
 from pandas.core.dtypes.missing import na_value_for_dtype
@@ -12,6 +13,9 @@
 from pandas.tests.groupby import get_groupby_method_args
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
+)
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",
     [
@@ -55,6 +59,9 @@ def test_groupby_dropna_multi_index_dataframe_nan_in_one_group(
     tm.assert_frame_equal(grouped, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
+)
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",
     [
@@ -131,6 +138,9 @@ def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs):
     tm.assert_frame_equal(grouped, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
+)
 @pytest.mark.parametrize(
     "dropna, idx, expected",
     [
@@ -205,6 +215,9 @@ def test_groupby_dataframe_slice_then_transform(dropna, index):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
+)
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",
     [
@@ -286,6 +299,9 @@ def test_groupby_dropna_datetime_like_data(
     tm.assert_frame_equal(grouped, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
+)
 @pytest.mark.parametrize(
     "dropna, data, selected_data, levels",
     [
@@ -371,6 +387,9 @@ def test_groupby_dropna_with_multiindex_input(input_index, keys, series):
     tm.assert_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_groupby_nan_included():
     # GH 35646
     data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]}
diff --git a/pandas/tests/indexes/datetimes/methods/test_tz_localize.py b/pandas/tests/indexes/datetimes/methods/test_tz_localize.py
index c6697fd169e8a..78a79ac7d1546 100644
--- a/pandas/tests/indexes/datetimes/methods/test_tz_localize.py
+++ b/pandas/tests/indexes/datetimes/methods/test_tz_localize.py
@@ -9,7 +9,6 @@
 from dateutil.tz import gettz
 import numpy as np
 import pytest
-import pytz
 
 from pandas import (
     DatetimeIndex,
@@ -69,10 +68,10 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self):
         times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"]
         index = DatetimeIndex(times)
         tz = "US/Eastern"
-        with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)):
+        with pytest.raises(ValueError, match="|".join(times)):
             index.tz_localize(tz=tz)
 
-        with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)):
+        with pytest.raises(ValueError, match="|".join(times)):
             index.tz_localize(tz=tz, nonexistent="raise")
 
         result = index.tz_localize(tz=tz, nonexistent="NaT")
@@ -85,7 +84,7 @@ def test_dti_tz_localize_ambiguous_infer(self, tz):
         # November 6, 2011, fall back, repeat 2 AM hour
         # With no repeated hours, we cannot infer the transition
         dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour())
-        with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
+        with pytest.raises(ValueError, match="Cannot infer dst time"):
             dr.tz_localize(tz)
 
     def test_dti_tz_localize_ambiguous_infer2(self, tz, unit):
@@ -117,7 +116,7 @@ def test_dti_tz_localize_ambiguous_infer3(self, tz):
     def test_dti_tz_localize_ambiguous_times(self, tz):
         # March 13, 2011, spring forward, skip from 2 AM to 3 AM
         dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, freq=offsets.Hour())
-        with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:30:00"):
+        with pytest.raises(ValueError, match="2011-03-13 02:30:00"):
             dr.tz_localize(tz)
 
         # after dst transition, it works
@@ -127,7 +126,7 @@ def test_dti_tz_localize_ambiguous_times(self, tz):
 
         # November 6, 2011, fall back, repeat 2 AM hour
         dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, freq=offsets.Hour())
-        with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
+        with pytest.raises(ValueError, match="Cannot infer dst time"):
             dr.tz_localize(tz)
 
         # UTC is OK
@@ -163,11 +162,11 @@ def test_dti_tz_localize(self, prefix):
         tm.assert_numpy_array_equal(dti3.values, dti_utc.values)
 
         dti = date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="ms")
-        with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
+        with pytest.raises(ValueError, match="Cannot infer dst time"):
             dti.tz_localize(tzstr)
 
         dti = date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="ms")
-        with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:00:00"):
+        with pytest.raises(ValueError, match="2011-03-13 02:00:00"):
             dti.tz_localize(tzstr)
 
     def test_dti_tz_localize_utc_conversion(self, tz):
@@ -184,7 +183,7 @@ def test_dti_tz_localize_utc_conversion(self, tz):
         # DST ambiguity, this should fail
         rng = date_range("3/11/2012", "3/12/2012", freq="30min")
         # Is this really how it should fail??
-        with pytest.raises(pytz.NonExistentTimeError, match="2012-03-11 02:00:00"):
+        with pytest.raises(ValueError, match="2012-03-11 02:00:00"):
             rng.tz_localize(tz)
 
     def test_dti_tz_localize_roundtrip(self, tz_aware_fixture):
diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
index aba440ceeb56b..8da88b97f9ea8 100644
--- a/pandas/tests/indexes/datetimes/test_constructors.py
+++ b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -14,7 +14,6 @@
 from dateutil.tz import gettz
 import numpy as np
 import pytest
-import pytz
 
 from pandas._libs.tslibs import (
     astype_overflowsafe,
@@ -750,7 +749,7 @@ def test_disallow_setting_tz(self):
         [
             None,
             "America/Los_Angeles",
-            pytz.timezone("America/Los_Angeles"),
+            zoneinfo.ZoneInfo("America/Los_Angeles"),
             Timestamp("2000", tz="America/Los_Angeles").tz,
         ],
     )
@@ -765,8 +764,8 @@ def test_constructor_start_end_with_tz(self, tz):
             freq="D",
         )
         tm.assert_index_equal(result, expected)
-        # Especially assert that the timezone is consistent for pytz
-        assert pytz.timezone("America/Los_Angeles") is result.tz
+        # Especially assert that the timezone is consistent for zoneinfo
+        assert zoneinfo.ZoneInfo("America/Los_Angeles") is result.tz
 
     @pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"])
     def test_constructor_with_non_normalized_pytz(self, tz):
@@ -984,6 +983,7 @@ def test_dti_ambiguous_matches_timestamp(self, tz, use_str, box_cls, request):
         # GH#47471 check that we get the same raising behavior in the DTI
         # constructor and Timestamp constructor
         if isinstance(tz, str) and tz.startswith("pytz/"):
+            pytz = pytest.importorskip("pytz")
             tz = pytz.timezone(tz.removeprefix("pytz/"))
         dtstr = "2013-11-03 01:59:59.999999"
         item = dtstr
@@ -1000,7 +1000,7 @@ def test_dti_ambiguous_matches_timestamp(self, tz, use_str, box_cls, request):
             mark = pytest.mark.xfail(reason="We implicitly get fold=0.")
             request.applymarker(mark)
 
-        with pytest.raises(pytz.AmbiguousTimeError, match=dtstr):
+        with pytest.raises(ValueError, match=dtstr):
             box_cls(item, tz=tz)
 
     @pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
index b37b5cf74b347..e09883e95ecec 100644
--- a/pandas/tests/indexes/datetimes/test_date_range.py
+++ b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -11,7 +11,6 @@
 
 import numpy as np
 import pytest
-import pytz
 
 from pandas._libs.tslibs import timezones
 from pandas._libs.tslibs.offsets import (
@@ -881,7 +880,7 @@ def test_date_range_ambiguous_endpoint(self, tz):
         # construction with an ambiguous end-point
         # GH#11626
 
-        with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
+        with pytest.raises(ValueError, match="Cannot infer dst time"):
             date_range(
                 "2013-10-26 23:00", "2013-10-27 01:00", tz="Europe/London", freq="h"
             )
@@ -905,7 +904,7 @@ def test_date_range_ambiguous_endpoint(self, tz):
     def test_date_range_nonexistent_endpoint(self, tz, option, expected):
         # construction with an nonexistent end-point
 
-        with pytest.raises(pytz.NonExistentTimeError, match="2019-03-10 02:00:00"):
+        with pytest.raises(ValueError, match="2019-03-10 02:00:00"):
             date_range(
                 "2019-03-10 00:00", "2019-03-10 02:00", tz="US/Pacific", freq="h"
             )
@@ -1259,6 +1258,24 @@ def test_range_with_timezone_and_custombusinessday(self, start, period, expected
         expected = DatetimeIndex(expected).as_unit("ns")
         tm.assert_index_equal(result, expected)
 
+    def test_data_range_custombusinessday_partial_time(self, unit):
+        # GH#57456
+        offset = offsets.CustomBusinessDay(weekmask="Sun Mon Tue")
+        start = datetime(2024, 2, 6, 23)
+        # end datetime is partial and not in the offset
+        end = datetime(2024, 2, 14, 14)
+        result = date_range(start, end, freq=offset, unit=unit)
+        expected = DatetimeIndex(
+            [
+                "2024-02-06 23:00:00",
+                "2024-02-11 23:00:00",
+                "2024-02-12 23:00:00",
+                "2024-02-13 23:00:00",
+            ],
+            dtype=f"M8[{unit}]",
+        )
+        tm.assert_index_equal(result, expected)
+
 
 class TestDateRangeNonNano:
     def test_date_range_reso_validation(self):
diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py
index e4b8a909add0d..8d9340818b511 100644
--- a/pandas/tests/indexes/datetimes/test_timezones.py
+++ b/pandas/tests/indexes/datetimes/test_timezones.py
@@ -184,11 +184,8 @@ def test_dti_tz_nat(self, tzstr):
         assert isna(idx[1])
         assert idx[0].tzinfo is not None
 
-    @pytest.mark.parametrize("tzstr", ["pytz/US/Eastern", "dateutil/US/Eastern"])
+    @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
     def test_utc_box_timestamp_and_localize(self, tzstr):
-        if tzstr.startswith("pytz/"):
-            pytest.importorskip("pytz")
-            tzstr = tzstr.removeprefix("pytz/")
         tz = timezones.maybe_get_tz(tzstr)
 
         rng = date_range("3/11/2012", "3/12/2012", freq="h", tz="utc")
@@ -203,11 +200,10 @@ def test_utc_box_timestamp_and_localize(self, tzstr):
         # right tzinfo
         rng = date_range("3/13/2012", "3/14/2012", freq="h", tz="utc")
         rng_eastern = rng.tz_convert(tzstr)
-        # test not valid for dateutil timezones.
-        # assert 'EDT' in repr(rng_eastern[0].tzinfo)
-        assert "EDT" in repr(rng_eastern[0].tzinfo) or "tzfile" in repr(
-            rng_eastern[0].tzinfo
-        )
+        if "dateutil" in tzstr:
+            assert "EDT" in repr(rng_eastern[0].tzinfo) or "tzfile" in repr(
+                rng_eastern[0].tzinfo
+            )
 
     @pytest.mark.parametrize(
         "tz", [zoneinfo.ZoneInfo("US/Central"), gettz("US/Central")]
diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py
index 1eeeebd6b8ca9..e3428d1060dbe 100644
--- a/pandas/tests/indexes/object/test_indexing.py
+++ b/pandas/tests/indexes/object/test_indexing.py
@@ -3,10 +3,13 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 from pandas._libs.missing import (
     NA,
     is_matching_na,
 )
+from pandas.compat import HAS_PYARROW
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -29,6 +32,9 @@ def test_get_indexer_strings(self, method, expected):
 
         tm.assert_numpy_array_equal(actual, expected)
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_get_indexer_strings_raises(self, using_infer_string):
         index = Index(["b", "c"])
 
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 0911f2aec74d6..7ec66100b7291 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -8,7 +8,12 @@
 import numpy as np
 import pytest
 
-from pandas.compat import IS64
+from pandas._config import using_string_dtype
+
+from pandas.compat import (
+    HAS_PYARROW,
+    IS64,
+)
 from pandas.errors import InvalidIndexError
 import pandas.util._test_decorators as td
 
@@ -71,6 +76,9 @@ def test_constructor_casting(self, index):
         tm.assert_contains_all(arr, new_index)
         tm.assert_index_equal(index, new_index)
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_constructor_copy(self, using_infer_string):
         index = Index(list("abc"), name="name")
         arr = np.array(index)
@@ -335,6 +343,11 @@ def test_constructor_empty_special(self, empty, klass):
     def test_view_with_args(self, index):
         index.view("i8")
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW,
+        reason="TODO(infer_string)",
+        strict=False,
+    )
     @pytest.mark.parametrize(
         "index",
         [
@@ -817,6 +830,11 @@ def test_isin(self, values, index, expected):
         expected = np.array(expected, dtype=bool)
         tm.assert_numpy_array_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW,
+        reason="TODO(infer_string)",
+        strict=False,
+    )
     def test_isin_nan_common_object(
         self, nulls_fixture, nulls_fixture2, using_infer_string
     ):
diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
index 6d01ba6adc87a..9993a21d93f12 100644
--- a/pandas/tests/indexes/test_old_base.py
+++ b/pandas/tests/indexes/test_old_base.py
@@ -9,6 +9,7 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs.tslibs import Timestamp
+from pandas.compat import HAS_PYARROW
 
 from pandas.core.dtypes.common import (
     is_integer_dtype,
@@ -245,6 +246,11 @@ def test_repr_max_seq_item_setting(self, simple_index):
             repr(idx)
             assert "..." not in str(idx)
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW,
+        reason="TODO(infer_string)",
+        strict=False,
+    )
     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
     def test_ensure_copied_data(self, index):
         # Check the "copy" argument of each Index.__new__ is honoured
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 247501f1504e7..e007b8c4e97ac 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -16,6 +16,7 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import index as libindex
+from pandas.compat import HAS_PYARROW
 from pandas.errors import IndexingError
 
 import pandas as pd
@@ -1388,6 +1389,9 @@ def test_loc_setitem_categorical_values_partial_column_slice(self):
             df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
             df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_loc_setitem_single_row_categorical(self, using_infer_string):
         # GH#25495
         df = DataFrame({"Alpha": ["a"], "Numeric": [0]})
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 65a52bc8e0794..b831ec3bb2c6a 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -30,10 +30,6 @@
     read_csv,
 )
 import pandas._testing as tm
-from pandas.core.arrays import (
-    ArrowStringArray,
-    StringArray,
-)
 
 read_ext_params = [".xls", ".xlsx", ".xlsm", ".xlsb", ".ods"]
 engine_params = [
@@ -692,43 +688,33 @@ def test_dtype_backend_and_dtype(self, read_ext, tmp_excel):
         )
         tm.assert_frame_equal(result, df)
 
-    @pytest.mark.xfail(
-        using_string_dtype(), reason="infer_string takes precedence", strict=False
-    )
     def test_dtype_backend_string(self, read_ext, string_storage, tmp_excel):
         # GH#36712
         if read_ext in (".xlsb", ".xls"):
             pytest.skip(f"No engine for filetype: '{read_ext}'")
 
-        pa = pytest.importorskip("pyarrow")
+        df = DataFrame(
+            {
+                "a": np.array(["a", "b"], dtype=np.object_),
+                "b": np.array(["x", pd.NA], dtype=np.object_),
+            }
+        )
+        df.to_excel(tmp_excel, sheet_name="test", index=False)
 
         with pd.option_context("mode.string_storage", string_storage):
-            df = DataFrame(
-                {
-                    "a": np.array(["a", "b"], dtype=np.object_),
-                    "b": np.array(["x", pd.NA], dtype=np.object_),
-                }
-            )
-            df.to_excel(tmp_excel, sheet_name="test", index=False)
             result = pd.read_excel(
                 tmp_excel, sheet_name="test", dtype_backend="numpy_nullable"
             )
 
-            if string_storage == "python":
-                expected = DataFrame(
-                    {
-                        "a": StringArray(np.array(["a", "b"], dtype=np.object_)),
-                        "b": StringArray(np.array(["x", pd.NA], dtype=np.object_)),
-                    }
-                )
-            else:
-                expected = DataFrame(
-                    {
-                        "a": ArrowStringArray(pa.array(["a", "b"])),
-                        "b": ArrowStringArray(pa.array(["x", None])),
-                    }
-                )
-            tm.assert_frame_equal(result, expected)
+        expected = DataFrame(
+            {
+                "a": Series(["a", "b"], dtype=pd.StringDtype(string_storage)),
+                "b": Series(["x", None], dtype=pd.StringDtype(string_storage)),
+            }
+        )
+        # the storage of the str columns' Index is also affected by the
+        # string_storage setting -> ignore that for checking the result
+        tm.assert_frame_equal(result, expected, check_column_type=False)
 
     @pytest.mark.parametrize("dtypes, exp_value", [({}, 1), ({"a.1": "int64"}, 1)])
     def test_dtype_mangle_dup_cols(self, read_ext, dtypes, exp_value):
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
index e1cdfb8bfa7e3..44266ae9a62a5 100644
--- a/pandas/tests/io/excel/test_writers.py
+++ b/pandas/tests/io/excel/test_writers.py
@@ -282,7 +282,6 @@ def test_excel_multindex_roundtrip(
         )
         tm.assert_frame_equal(df, act, check_names=check_names)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_read_excel_parse_dates(self, tmp_excel):
         # see gh-11544, gh-12051
         df = DataFrame(
diff --git a/pandas/tests/io/formats/style/test_bar.py b/pandas/tests/io/formats/style/test_bar.py
index b0e4712e8bb3d..d28c7c566d851 100644
--- a/pandas/tests/io/formats/style/test_bar.py
+++ b/pandas/tests/io/formats/style/test_bar.py
@@ -347,6 +347,7 @@ def test_styler_bar_with_NA_values():
 
 
 def test_style_bar_with_pyarrow_NA_values():
+    pytest.importorskip("pyarrow")
     data = """name,age,test1,test2,teacher
         Adam,15,95.0,80,Ashby
         Bob,16,81.0,82,Ashby
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 1bc227369a968..3d07c0219691e 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -28,11 +28,6 @@
     read_json,
 )
 import pandas._testing as tm
-from pandas.core.arrays import (
-    ArrowStringArray,
-    StringArray,
-)
-from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics
 
 from pandas.io.json import ujson_dumps
 
@@ -2143,12 +2138,10 @@ def test_json_uint64(self):
         result = df.to_json(orient="split")
         assert result == expected
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_read_json_dtype_backend(
         self, string_storage, dtype_backend, orient, using_infer_string
     ):
         # GH#50750
-        pa = pytest.importorskip("pyarrow")
         df = DataFrame(
             {
                 "a": Series([1, np.nan, 3], dtype="Int64"),
@@ -2162,30 +2155,18 @@ def test_read_json_dtype_backend(
             }
         )
 
-        if using_infer_string:
-            string_array = ArrowStringArrayNumpySemantics(pa.array(["a", "b", "c"]))
-            string_array_na = ArrowStringArrayNumpySemantics(pa.array(["a", "b", None]))
-        elif string_storage == "python":
-            string_array = StringArray(np.array(["a", "b", "c"], dtype=np.object_))
-            string_array_na = StringArray(np.array(["a", "b", NA], dtype=np.object_))
-
-        elif dtype_backend == "pyarrow":
-            pa = pytest.importorskip("pyarrow")
-            from pandas.arrays import ArrowExtensionArray
-
-            string_array = ArrowExtensionArray(pa.array(["a", "b", "c"]))
-            string_array_na = ArrowExtensionArray(pa.array(["a", "b", None]))
-
-        else:
-            string_array = ArrowStringArray(pa.array(["a", "b", "c"]))
-            string_array_na = ArrowStringArray(pa.array(["a", "b", None]))
-
         out = df.to_json(orient=orient)
         with pd.option_context("mode.string_storage", string_storage):
             result = read_json(
                 StringIO(out), dtype_backend=dtype_backend, orient=orient
             )
 
+        if dtype_backend == "pyarrow":
+            pa = pytest.importorskip("pyarrow")
+            string_dtype = pd.ArrowDtype(pa.string())
+        else:
+            string_dtype = pd.StringDtype(string_storage)
+
         expected = DataFrame(
             {
                 "a": Series([1, np.nan, 3], dtype="Int64"),
@@ -2194,12 +2175,13 @@ def test_read_json_dtype_backend(
                 "d": Series([1.5, 2.0, 2.5], dtype="Float64"),
                 "e": Series([True, False, NA], dtype="boolean"),
                 "f": Series([True, False, True], dtype="boolean"),
-                "g": string_array,
-                "h": string_array_na,
+                "g": Series(["a", "b", "c"], dtype=string_dtype),
+                "h": Series(["a", "b", None], dtype=string_dtype),
             }
         )
 
         if dtype_backend == "pyarrow":
+            pa = pytest.importorskip("pyarrow")
             from pandas.arrays import ArrowExtensionArray
 
             expected = DataFrame(
@@ -2212,7 +2194,9 @@ def test_read_json_dtype_backend(
         if orient == "values":
             expected.columns = list(range(8))
 
-        tm.assert_frame_equal(result, expected)
+        # the storage of the str columns' Index is also affected by the
+        # string_storage setting -> ignore that for checking the result
+        tm.assert_frame_equal(result, expected, check_column_type=False)
 
     @pytest.mark.parametrize("orient", ["split", "records", "index"])
     def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py
index 6d5f870f07206..90f77a7024235 100644
--- a/pandas/tests/io/parser/conftest.py
+++ b/pandas/tests/io/parser/conftest.py
@@ -4,6 +4,7 @@
 
 import pytest
 
+from pandas.compat import HAS_PYARROW
 from pandas.compat._optional import VERSIONS
 
 from pandas import (
@@ -117,7 +118,15 @@ def csv1(datapath):
 
 _py_parsers_only = [_pythonParser]
 _c_parsers_only = [_cParserHighMemory, _cParserLowMemory]
-_pyarrow_parsers_only = [pytest.param(_pyarrowParser, marks=pytest.mark.single_cpu)]
+_pyarrow_parsers_only = [
+    pytest.param(
+        _pyarrowParser,
+        marks=[
+            pytest.mark.single_cpu,
+            pytest.mark.skipif(not HAS_PYARROW, reason="pyarrow is not installed"),
+        ],
+    )
+]
 
 _all_parsers = [*_c_parsers_only, *_py_parsers_only, *_pyarrow_parsers_only]
 
@@ -181,7 +190,16 @@ def _get_all_parser_float_precision_combinations():
             parser = parser.values[0]
         for precision in parser.float_precision_choices:
             # Re-wrap in pytest.param for pyarrow
-            mark = pytest.mark.single_cpu if parser.engine == "pyarrow" else ()
+            mark = (
+                [
+                    pytest.mark.single_cpu,
+                    pytest.mark.skipif(
+                        not HAS_PYARROW, reason="pyarrow is not installed"
+                    ),
+                ]
+                if parser.engine == "pyarrow"
+                else ()
+            )
             param = pytest.param((parser(), precision), marks=mark)
             params.append(param)
             ids.append(f"{parser_id}-{precision}")
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index 3f410a13c8f80..07f29518b7881 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -19,11 +19,7 @@
     Timestamp,
 )
 import pandas._testing as tm
-from pandas.core.arrays import (
-    ArrowStringArray,
-    IntegerArray,
-    StringArray,
-)
+from pandas.core.arrays import IntegerArray
 
 pytestmark = pytest.mark.filterwarnings(
     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
@@ -463,11 +459,8 @@ def test_dtype_backend_and_dtype(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 def test_dtype_backend_string(all_parsers, string_storage):
     # GH#36712
-    pa = pytest.importorskip("pyarrow")
-
     with pd.option_context("mode.string_storage", string_storage):
         parser = all_parsers
 
@@ -477,21 +470,13 @@ def test_dtype_backend_string(all_parsers, string_storage):
 """
         result = parser.read_csv(StringIO(data), dtype_backend="numpy_nullable")
 
-        if string_storage == "python":
-            expected = DataFrame(
-                {
-                    "a": StringArray(np.array(["a", "b"], dtype=np.object_)),
-                    "b": StringArray(np.array(["x", pd.NA], dtype=np.object_)),
-                }
-            )
-        else:
-            expected = DataFrame(
-                {
-                    "a": ArrowStringArray(pa.array(["a", "b"])),
-                    "b": ArrowStringArray(pa.array(["x", None])),
-                }
-            )
-        tm.assert_frame_equal(result, expected)
+        expected = DataFrame(
+            {
+                "a": pd.array(["a", "b"], dtype=pd.StringDtype(string_storage)),
+                "b": pd.array(["x", pd.NA], dtype=pd.StringDtype(string_storage)),
+            },
+        )
+    tm.assert_frame_equal(result, expected)
 
 
 def test_dtype_backend_ea_dtype_specified(all_parsers):
@@ -507,7 +492,6 @@ def test_dtype_backend_ea_dtype_specified(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 def test_dtype_backend_pyarrow(all_parsers, request):
     # GH#36712
     pa = pytest.importorskip("pyarrow")
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
index b7b4a77c9e048..6243185294894 100644
--- a/pandas/tests/io/parser/test_read_fwf.py
+++ b/pandas/tests/io/parser/test_read_fwf.py
@@ -13,8 +13,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import EmptyDataError
 
 import pandas as pd
@@ -23,10 +21,6 @@
     DatetimeIndex,
 )
 import pandas._testing as tm
-from pandas.core.arrays import (
-    ArrowStringArray,
-    StringArray,
-)
 
 from pandas.io.common import urlopen
 from pandas.io.parsers import (
@@ -941,39 +935,30 @@ def test_widths_and_usecols():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 def test_dtype_backend(string_storage, dtype_backend):
     # GH#50289
-    if string_storage == "python":
-        arr = StringArray(np.array(["a", "b"], dtype=np.object_))
-        arr_na = StringArray(np.array([pd.NA, "a"], dtype=np.object_))
-    elif dtype_backend == "pyarrow":
-        pa = pytest.importorskip("pyarrow")
-        from pandas.arrays import ArrowExtensionArray
-
-        arr = ArrowExtensionArray(pa.array(["a", "b"]))
-        arr_na = ArrowExtensionArray(pa.array([None, "a"]))
-    else:
-        pa = pytest.importorskip("pyarrow")
-        arr = ArrowStringArray(pa.array(["a", "b"]))
-        arr_na = ArrowStringArray(pa.array([None, "a"]))
-
     data = """a  b    c      d  e     f  g    h  i
 1  2.5  True  a
 3  4.5  False b  True  6  7.5  a"""
     with pd.option_context("mode.string_storage", string_storage):
         result = read_fwf(StringIO(data), dtype_backend=dtype_backend)
 
+    if dtype_backend == "pyarrow":
+        pa = pytest.importorskip("pyarrow")
+        string_dtype = pd.ArrowDtype(pa.string())
+    else:
+        string_dtype = pd.StringDtype(string_storage)
+
     expected = DataFrame(
         {
             "a": pd.Series([1, 3], dtype="Int64"),
             "b": pd.Series([2.5, 4.5], dtype="Float64"),
             "c": pd.Series([True, False], dtype="boolean"),
-            "d": arr,
+            "d": pd.Series(["a", "b"], dtype=string_dtype),
             "e": pd.Series([pd.NA, True], dtype="boolean"),
             "f": pd.Series([pd.NA, 6], dtype="Int64"),
             "g": pd.Series([pd.NA, 7.5], dtype="Float64"),
-            "h": arr_na,
+            "h": pd.Series([None, "a"], dtype=string_dtype),
             "i": pd.Series([pd.NA, pd.NA], dtype="Int64"),
         }
     )
@@ -989,7 +974,9 @@ def test_dtype_backend(string_storage, dtype_backend):
         )
         expected["i"] = ArrowExtensionArray(pa.array([None, None]))
 
-    tm.assert_frame_equal(result, expected)
+    # the storage of the str columns' Index is also affected by the
+    # string_storage setting -> ignore that for checking the result
+    tm.assert_frame_equal(result, expected, check_column_type=False)
 
 
 def test_invalid_dtype_backend():
diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
index 923b880004c26..541cc39606047 100644
--- a/pandas/tests/io/test_clipboard.py
+++ b/pandas/tests/io/test_clipboard.py
@@ -19,10 +19,6 @@
     read_clipboard,
 )
 import pandas._testing as tm
-from pandas.core.arrays import (
-    ArrowStringArray,
-    StringArray,
-)
 
 from pandas.io.clipboard import (
     CheckedCall,
@@ -358,23 +354,15 @@ def test_read_clipboard_dtype_backend(
         self, clipboard, string_storage, dtype_backend, engine
     ):
         # GH#50502
-        if string_storage == "pyarrow" or dtype_backend == "pyarrow":
-            pa = pytest.importorskip("pyarrow")
-
-        if string_storage == "python":
-            string_array = StringArray(np.array(["x", "y"], dtype=np.object_))
-            string_array_na = StringArray(np.array(["x", NA], dtype=np.object_))
-
-        elif dtype_backend == "pyarrow" and engine != "c":
+        if dtype_backend == "pyarrow":
             pa = pytest.importorskip("pyarrow")
-            from pandas.arrays import ArrowExtensionArray
-
-            string_array = ArrowExtensionArray(pa.array(["x", "y"]))
-            string_array_na = ArrowExtensionArray(pa.array(["x", None]))
-
+            if engine == "c" and string_storage == "pyarrow":
+                # TODO avoid this exception?
+                string_dtype = pd.ArrowDtype(pa.large_string())
+            else:
+                string_dtype = pd.ArrowDtype(pa.string())
         else:
-            string_array = ArrowStringArray(pa.array(["x", "y"]))
-            string_array_na = ArrowStringArray(pa.array(["x", None]))
+            string_dtype = pd.StringDtype(string_storage)
 
         text = """a,b,c,d,e,f,g,h,i
 x,1,4.0,x,2,4.0,,True,False
@@ -386,10 +374,10 @@ def test_read_clipboard_dtype_backend(
 
         expected = DataFrame(
             {
-                "a": string_array,
+                "a": Series(["x", "y"], dtype=string_dtype),
                 "b": Series([1, 2], dtype="Int64"),
                 "c": Series([4.0, 5.0], dtype="Float64"),
-                "d": string_array_na,
+                "d": Series(["x", None], dtype=string_dtype),
                 "e": Series([2, NA], dtype="Int64"),
                 "f": Series([4.0, NA], dtype="Float64"),
                 "g": Series([NA, NA], dtype="Int64"),
diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
index 5aa8f1c69fe44..6dd4368f09cc8 100644
--- a/pandas/tests/io/test_feather.py
+++ b/pandas/tests/io/test_feather.py
@@ -9,10 +9,6 @@
 
 import pandas as pd
 import pandas._testing as tm
-from pandas.core.arrays import (
-    ArrowStringArray,
-    StringArray,
-)
 
 from pandas.io.feather_format import read_feather, to_feather  # isort:skip
 
@@ -184,25 +180,17 @@ def test_read_feather_dtype_backend(self, string_storage, dtype_backend):
             }
         )
 
-        if string_storage == "python":
-            string_array = StringArray(np.array(["a", "b", "c"], dtype=np.object_))
-            string_array_na = StringArray(np.array(["a", "b", pd.NA], dtype=np.object_))
-
-        elif dtype_backend == "pyarrow":
-            from pandas.arrays import ArrowExtensionArray
-
-            string_array = ArrowExtensionArray(pa.array(["a", "b", "c"]))
-            string_array_na = ArrowExtensionArray(pa.array(["a", "b", None]))
-
-        else:
-            string_array = ArrowStringArray(pa.array(["a", "b", "c"]))
-            string_array_na = ArrowStringArray(pa.array(["a", "b", None]))
-
         with tm.ensure_clean() as path:
             to_feather(df, path)
             with pd.option_context("mode.string_storage", string_storage):
                 result = read_feather(path, dtype_backend=dtype_backend)
 
+        if dtype_backend == "pyarrow":
+            pa = pytest.importorskip("pyarrow")
+            string_dtype = pd.ArrowDtype(pa.string())
+        else:
+            string_dtype = pd.StringDtype(string_storage)
+
         expected = pd.DataFrame(
             {
                 "a": pd.Series([1, np.nan, 3], dtype="Int64"),
@@ -211,8 +199,8 @@ def test_read_feather_dtype_backend(self, string_storage, dtype_backend):
                 "d": pd.Series([1.5, 2.0, 2.5], dtype="Float64"),
                 "e": pd.Series([True, False, pd.NA], dtype="boolean"),
                 "f": pd.Series([True, False, True], dtype="boolean"),
-                "g": string_array,
-                "h": string_array_na,
+                "g": pd.Series(["a", "b", "c"], dtype=string_dtype),
+                "h": pd.Series(["a", "b", None], dtype=string_dtype),
             }
         )
 
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index 164646aedf464..73e9933e3681b 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -13,8 +13,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat import is_platform_windows
 import pandas.util._test_decorators as td
 
@@ -31,17 +29,9 @@
     to_datetime,
 )
 import pandas._testing as tm
-from pandas.core.arrays import (
-    ArrowStringArray,
-    StringArray,
-)
 
 from pandas.io.common import file_path_to_url
 
-pytestmark = pytest.mark.xfail(
-    using_string_dtype(), reason="TODO(infer_string)", strict=False
-)
-
 
 @pytest.fixture(
     params=[
@@ -156,7 +146,7 @@ def test_to_html_compat(self, flavor_read_html):
         df = (
             DataFrame(
                 np.random.default_rng(2).random((4, 3)),
-                columns=pd.Index(list("abc"), dtype=object),
+                columns=pd.Index(list("abc")),
             )
             .map("{:.3f}".format)
             .astype(float)
@@ -182,24 +172,16 @@ def test_dtype_backend(self, string_storage, dtype_backend, flavor_read_html):
             }
         )
 
-        if string_storage == "python":
-            string_array = StringArray(np.array(["a", "b", "c"], dtype=np.object_))
-            string_array_na = StringArray(np.array(["a", "b", NA], dtype=np.object_))
-        elif dtype_backend == "pyarrow":
-            pa = pytest.importorskip("pyarrow")
-            from pandas.arrays import ArrowExtensionArray
-
-            string_array = ArrowExtensionArray(pa.array(["a", "b", "c"]))
-            string_array_na = ArrowExtensionArray(pa.array(["a", "b", None]))
-        else:
-            pa = pytest.importorskip("pyarrow")
-            string_array = ArrowStringArray(pa.array(["a", "b", "c"]))
-            string_array_na = ArrowStringArray(pa.array(["a", "b", None]))
-
         out = df.to_html(index=False)
         with pd.option_context("mode.string_storage", string_storage):
             result = flavor_read_html(StringIO(out), dtype_backend=dtype_backend)[0]
 
+        if dtype_backend == "pyarrow":
+            pa = pytest.importorskip("pyarrow")
+            string_dtype = pd.ArrowDtype(pa.string())
+        else:
+            string_dtype = pd.StringDtype(string_storage)
+
         expected = DataFrame(
             {
                 "a": Series([1, np.nan, 3], dtype="Int64"),
@@ -208,8 +190,8 @@ def test_dtype_backend(self, string_storage, dtype_backend, flavor_read_html):
                 "d": Series([1.5, 2.0, 2.5], dtype="Float64"),
                 "e": Series([True, False, NA], dtype="boolean"),
                 "f": Series([True, False, True], dtype="boolean"),
-                "g": string_array,
-                "h": string_array_na,
+                "g": Series(["a", "b", "c"], dtype=string_dtype),
+                "h": Series(["a", "b", None], dtype=string_dtype),
             }
         )
 
@@ -225,7 +207,9 @@ def test_dtype_backend(self, string_storage, dtype_backend, flavor_read_html):
                 }
             )
 
-        tm.assert_frame_equal(result, expected)
+        # the storage of the str columns' Index is also affected by the
+        # string_storage setting -> ignore that for checking the result
+        tm.assert_frame_equal(result, expected, check_column_type=False)
 
     @pytest.mark.network
     @pytest.mark.single_cpu
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 561c718ea5851..ec087eab0cf14 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -17,6 +17,7 @@
     pa_version_under13p0,
     pa_version_under15p0,
     pa_version_under17p0,
+    pa_version_under18p0,
 )
 
 import pandas as pd
@@ -955,11 +956,16 @@ def test_timestamp_nanoseconds(self, pa):
     def test_timezone_aware_index(self, request, pa, timezone_aware_date_list):
         pytest.importorskip("pyarrow", "11.0.0")
 
-        if timezone_aware_date_list.tzinfo != datetime.timezone.utc:
+        if (
+            timezone_aware_date_list.tzinfo != datetime.timezone.utc
+            and pa_version_under18p0
+        ):
             request.applymarker(
                 pytest.mark.xfail(
-                    reason="temporary skip this test until it is properly resolved: "
-                    "https://github.com/pandas-dev/pandas/issues/37286"
+                    reason=(
+                        "pyarrow returns pytz.FixedOffset while pandas "
+                        "constructs datetime.timezone https://github.com/pandas-dev/pandas/issues/37286"
+                    )
                 )
             )
         idx = 5 * [timezone_aware_date_list]
@@ -1172,6 +1178,10 @@ def test_duplicate_columns(self, fp):
         msg = "Cannot create parquet dataset with duplicate column names"
         self.check_error_on_write(df, fp, ValueError, msg)
 
+    @pytest.mark.xfail(
+        Version(np.__version__) >= Version("2.0.0"),
+        reason="fastparquet uses np.float_ in numpy2",
+    )
     def test_bool_with_none(self, fp):
         df = pd.DataFrame({"a": [True, None, False]})
         expected = pd.DataFrame({"a": [1.0, np.nan, 0.0]}, dtype="float16")
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index a21893f66722a..980c88f070b89 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -39,10 +39,6 @@
     to_timedelta,
 )
 import pandas._testing as tm
-from pandas.core.arrays import (
-    ArrowStringArray,
-    StringArray,
-)
 from pandas.util.version import Version
 
 from pandas.io import sql
@@ -3661,24 +3657,13 @@ def dtype_backend_data() -> DataFrame:
 
 @pytest.fixture
 def dtype_backend_expected():
-    def func(storage, dtype_backend, conn_name) -> DataFrame:
-        string_array: StringArray | ArrowStringArray
-        string_array_na: StringArray | ArrowStringArray
-        if storage == "python":
-            string_array = StringArray(np.array(["a", "b", "c"], dtype=np.object_))
-            string_array_na = StringArray(np.array(["a", "b", pd.NA], dtype=np.object_))
-
-        elif dtype_backend == "pyarrow":
+    def func(string_storage, dtype_backend, conn_name) -> DataFrame:
+        string_dtype: pd.StringDtype | pd.ArrowDtype
+        if dtype_backend == "pyarrow":
             pa = pytest.importorskip("pyarrow")
-            from pandas.arrays import ArrowExtensionArray
-
-            string_array = ArrowExtensionArray(pa.array(["a", "b", "c"]))  # type: ignore[assignment]
-            string_array_na = ArrowExtensionArray(pa.array(["a", "b", None]))  # type: ignore[assignment]
-
+            string_dtype = pd.ArrowDtype(pa.string())
         else:
-            pa = pytest.importorskip("pyarrow")
-            string_array = ArrowStringArray(pa.array(["a", "b", "c"]))
-            string_array_na = ArrowStringArray(pa.array(["a", "b", None]))
+            string_dtype = pd.StringDtype(string_storage)
 
         df = DataFrame(
             {
@@ -3688,8 +3673,8 @@ def func(storage, dtype_backend, conn_name) -> DataFrame:
                 "d": Series([1.5, 2.0, 2.5], dtype="Float64"),
                 "e": Series([True, False, pd.NA], dtype="boolean"),
                 "f": Series([True, False, True], dtype="boolean"),
-                "g": string_array,
-                "h": string_array_na,
+                "g": Series(["a", "b", "c"], dtype=string_dtype),
+                "h": Series(["a", "b", None], dtype=string_dtype),
             }
         )
         if dtype_backend == "pyarrow":
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 036a5d6265dd7..5c07a56c9fb3f 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -14,8 +14,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat import WASM
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import (
@@ -31,11 +29,6 @@
     Series,
 )
 import pandas._testing as tm
-from pandas.core.arrays import (
-    ArrowStringArray,
-    StringArray,
-)
-from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics
 
 from pandas.io.common import get_handle
 from pandas.io.xml import read_xml
@@ -1992,7 +1985,6 @@ def test_s3_parser_consistency(s3_public_bucket_with_data, s3so):
     tm.assert_frame_equal(df_lxml, df_etree)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 def test_read_xml_nullable_dtypes(
     parser, string_storage, dtype_backend, using_infer_string
 ):
@@ -2023,36 +2015,21 @@ def test_read_xml_nullable_dtypes(
 </row>
 </data>"""
 
-    if using_infer_string:
-        pa = pytest.importorskip("pyarrow")
-        string_array = ArrowStringArrayNumpySemantics(pa.array(["x", "y"]))
-        string_array_na = ArrowStringArrayNumpySemantics(pa.array(["x", None]))
-
-    elif string_storage == "python":
-        string_array = StringArray(np.array(["x", "y"], dtype=np.object_))
-        string_array_na = StringArray(np.array(["x", NA], dtype=np.object_))
+    with pd.option_context("mode.string_storage", string_storage):
+        result = read_xml(StringIO(data), parser=parser, dtype_backend=dtype_backend)
 
-    elif dtype_backend == "pyarrow":
+    if dtype_backend == "pyarrow":
         pa = pytest.importorskip("pyarrow")
-        from pandas.arrays import ArrowExtensionArray
-
-        string_array = ArrowExtensionArray(pa.array(["x", "y"]))
-        string_array_na = ArrowExtensionArray(pa.array(["x", None]))
-
+        string_dtype = pd.ArrowDtype(pa.string())
     else:
-        pa = pytest.importorskip("pyarrow")
-        string_array = ArrowStringArray(pa.array(["x", "y"]))
-        string_array_na = ArrowStringArray(pa.array(["x", None]))
-
-    with pd.option_context("mode.string_storage", string_storage):
-        result = read_xml(StringIO(data), parser=parser, dtype_backend=dtype_backend)
+        string_dtype = pd.StringDtype(string_storage)
 
     expected = DataFrame(
         {
-            "a": string_array,
+            "a": Series(["x", "y"], dtype=string_dtype),
             "b": Series([1, 2], dtype="Int64"),
             "c": Series([4.0, 5.0], dtype="Float64"),
-            "d": string_array_na,
+            "d": Series(["x", None], dtype=string_dtype),
             "e": Series([2, NA], dtype="Int64"),
             "f": Series([4.0, NA], dtype="Float64"),
             "g": Series([NA, NA], dtype="Int64"),
@@ -2073,7 +2050,9 @@ def test_read_xml_nullable_dtypes(
         )
         expected["g"] = ArrowExtensionArray(pa.array([None, None]))
 
-    tm.assert_frame_equal(result, expected)
+    # the storage of the str columns' Index is also affected by the
+    # string_storage setting -> ignore that for checking the result
+    tm.assert_frame_equal(result, expected, check_column_type=False)
 
 
 def test_invalid_dtype_backend():
diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
index b381c4fce8430..b39f953da1ee6 100644
--- a/pandas/tests/plotting/frame/test_frame.py
+++ b/pandas/tests/plotting/frame/test_frame.py
@@ -45,6 +45,7 @@
     _check_visible,
     get_y_axis,
 )
+from pandas.util.version import Version
 
 from pandas.io.formats.printing import pprint_thing
 
@@ -2465,8 +2466,14 @@ def test_group_subplot_invalid_column_name(self):
         d = {"a": np.arange(10), "b": np.arange(10)}
         df = DataFrame(d)
 
-        with pytest.raises(ValueError, match=r"Column label\(s\) \['bad_name'\]"):
-            df.plot(subplots=[("a", "bad_name")])
+        if Version(np.__version__) < Version("2.0.0"):
+            with pytest.raises(ValueError, match=r"Column label\(s\) \['bad_name'\]"):
+                df.plot(subplots=[("a", "bad_name")])
+        else:
+            with pytest.raises(
+                ValueError, match=r"Column label\(s\) \[np\.str\_\('bad_name'\)\]"
+            ):
+                df.plot(subplots=[("a", "bad_name")])
 
     def test_group_subplot_duplicated_column(self):
         d = {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)}
diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
index 66799732be064..26fecef6ed0e6 100644
--- a/pandas/tests/reductions/test_reductions.py
+++ b/pandas/tests/reductions/test_reductions.py
@@ -9,6 +9,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -1204,6 +1206,9 @@ def test_idxminmax_object_dtype(self, using_infer_string):
             with pytest.raises(TypeError, match=msg):
                 ser3.idxmin(skipna=False)
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_idxminmax_object_frame(self):
         # GH#4279
         df = DataFrame([["zimm", 2.5], ["biff", 1.0], ["bid", 12.0]])
diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
index b2caa1fadd1a5..8af224f1ad64f 100644
--- a/pandas/tests/reshape/concat/test_concat.py
+++ b/pandas/tests/reshape/concat/test_concat.py
@@ -939,3 +939,14 @@ def test_concat_with_series_and_frame_returns_rangeindex_columns():
     result = concat([ser, df])
     expected = DataFrame([0, 1, 2], index=[0, 0, 1])
     tm.assert_frame_equal(result, expected, check_column_type=True)
+
+
+def test_concat_with_moot_ignore_index_and_keys():
+    df1 = DataFrame([[0]])
+    df2 = DataFrame([[42]])
+
+    ignore_index = True
+    keys = ["df1", "df2"]
+    msg = f"Cannot set {ignore_index=} and specify keys. Either should be used."
+    with pytest.raises(ValueError, match=msg):
+        concat([df1, df2], keys=keys, ignore_index=ignore_index)
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index ad704d87a491b..cbee85f4aede9 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -2998,3 +2998,15 @@ def test_merge_datetime_and_timedelta(how):
     )
     with pytest.raises(ValueError, match=re.escape(msg)):
         right.merge(left, on="key", how=how)
+
+
+def test_merge_on_all_nan_column():
+    # GH#59421
+    left = DataFrame({"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan], "z": [4, 5, 6]})
+    right = DataFrame({"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan], "zz": [4, 5, 6]})
+    result = left.merge(right, on=["x", "y"], how="outer")
+    # Should not trigger array bounds eerror with bounds checking or asan enabled.
+    expected = DataFrame(
+        {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan], "z": [4, 5, 6], "zz": [4, 5, 6]}
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py
index 5f769db7f8acf..b2e9f26e1c407 100644
--- a/pandas/tests/reshape/test_qcut.py
+++ b/pandas/tests/reshape/test_qcut.py
@@ -307,3 +307,15 @@ def test_qcut_nullable_integer(q, any_numeric_ea_dtype):
     expected = qcut(arr.astype(float), q)
 
     tm.assert_categorical_equal(result, expected)
+
+
+@pytest.mark.parametrize("scale", [1.0, 1 / 3, 17.0])
+@pytest.mark.parametrize("q", [3, 7, 9])
+@pytest.mark.parametrize("precision", [1, 3, 16])
+def test_qcut_contains(scale, q, precision):
+    # GH-59355
+    arr = (scale * np.arange(q + 1)).round(precision)
+    result = qcut(arr, q, precision=precision)
+
+    for value, bucket in zip(arr, result):
+        assert value in bucket
diff --git a/pandas/tests/scalar/timestamp/methods/test_round.py b/pandas/tests/scalar/timestamp/methods/test_round.py
index 2fb0e1a8d3397..944aa55727217 100644
--- a/pandas/tests/scalar/timestamp/methods/test_round.py
+++ b/pandas/tests/scalar/timestamp/methods/test_round.py
@@ -4,7 +4,6 @@
 )
 import numpy as np
 import pytest
-import pytz
 
 from pandas._libs import lib
 from pandas._libs.tslibs import (
@@ -182,7 +181,7 @@ def test_round_dst_border_ambiguous(self, method, unit):
         assert result is NaT
 
         msg = "Cannot infer dst time"
-        with pytest.raises(pytz.AmbiguousTimeError, match=msg):
+        with pytest.raises(ValueError, match=msg):
             getattr(ts, method)("h", ambiguous="raise")
 
     @pytest.mark.parametrize(
@@ -205,7 +204,7 @@ def test_round_dst_border_nonexistent(self, method, ts_str, freq, unit):
         assert result is NaT
 
         msg = "2018-03-11 02:00:00"
-        with pytest.raises(pytz.NonExistentTimeError, match=msg):
+        with pytest.raises(ValueError, match=msg):
             getattr(ts, method)(freq, nonexistent="raise")
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py
index 90dc8d77608cb..cb7ac5fa6f1da 100644
--- a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py
+++ b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py
@@ -4,11 +4,6 @@
 
 from dateutil.tz import gettz
 import pytest
-import pytz
-from pytz.exceptions import (
-    AmbiguousTimeError,
-    NonExistentTimeError,
-)
 
 from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
 from pandas.errors import OutOfBoundsDatetime
@@ -54,13 +49,14 @@ def test_tz_localize_ambiguous_bool(self, unit, tz):
         # make sure that we are correctly accepting bool values as ambiguous
         # GH#14402
         if isinstance(tz, str) and tz.startswith("pytz/"):
+            pytz = pytest.importorskip("pytz")
             tz = pytz.timezone(tz.removeprefix("pytz/"))
         ts = Timestamp("2015-11-01 01:00:03").as_unit(unit)
         expected0 = Timestamp("2015-11-01 01:00:03-0500", tz=tz)
         expected1 = Timestamp("2015-11-01 01:00:03-0600", tz=tz)
 
         msg = "Cannot infer dst time from 2015-11-01 01:00:03"
-        with pytest.raises(pytz.AmbiguousTimeError, match=msg):
+        with pytest.raises(ValueError, match=msg):
             ts.tz_localize(tz)
 
         result = ts.tz_localize(tz, ambiguous=True)
@@ -105,10 +101,10 @@ def test_tz_localize_ambiguous(self):
     def test_tz_localize_nonexistent(self, stamp, tz):
         # GH#13057
         ts = Timestamp(stamp)
-        with pytest.raises(NonExistentTimeError, match=stamp):
+        with pytest.raises(ValueError, match=stamp):
             ts.tz_localize(tz)
         # GH 22644
-        with pytest.raises(NonExistentTimeError, match=stamp):
+        with pytest.raises(ValueError, match=stamp):
             ts.tz_localize(tz, nonexistent="raise")
         assert ts.tz_localize(tz, nonexistent="NaT") is NaT
 
@@ -154,7 +150,7 @@ def test_tz_localize_ambiguous_raise(self):
         # GH#13057
         ts = Timestamp("2015-11-1 01:00")
         msg = "Cannot infer dst time from 2015-11-01 01:00:00,"
-        with pytest.raises(AmbiguousTimeError, match=msg):
+        with pytest.raises(ValueError, match=msg):
             ts.tz_localize("US/Pacific", ambiguous="raise")
 
     def test_tz_localize_nonexistent_invalid_arg(self, warsaw):
@@ -330,7 +326,7 @@ def test_timestamp_tz_localize_nonexistent_raise(self, warsaw, unit):
         tz = warsaw
         ts = Timestamp("2015-03-29 02:20:00").as_unit(unit)
         msg = "2015-03-29 02:20:00"
-        with pytest.raises(pytz.NonExistentTimeError, match=msg):
+        with pytest.raises(ValueError, match=msg):
             ts.tz_localize(tz, nonexistent="raise")
         msg = (
             "The nonexistent argument must be one of 'raise', 'NaT', "
diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py
index 39f302c3357de..2c97c4a32e0aa 100644
--- a/pandas/tests/scalar/timestamp/test_constructors.py
+++ b/pandas/tests/scalar/timestamp/test_constructors.py
@@ -15,7 +15,6 @@
 )
 import numpy as np
 import pytest
-import pytz
 
 from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
 from pandas.errors import OutOfBoundsDatetime
@@ -747,7 +746,7 @@ def test_constructor_tz_or_tzinfo(self):
                 tz="UTC",
             ),
             Timestamp(2000, 1, 2, 3, 4, 5, 6, None, nanosecond=1),
-            Timestamp(2000, 1, 2, 3, 4, 5, 6, tz=pytz.UTC, nanosecond=1),
+            Timestamp(2000, 1, 2, 3, 4, 5, 6, tz=timezone.utc, nanosecond=1),
         ],
     )
     def test_constructor_nanosecond(self, result):
@@ -904,7 +903,7 @@ def test_raise_tz_and_tzinfo_in_datetime_input(self, box):
             Timestamp(box(**kwargs), tz="US/Pacific")
         msg = "Cannot pass a datetime or Timestamp"
         with pytest.raises(ValueError, match=msg):
-            Timestamp(box(**kwargs), tzinfo=pytz.timezone("US/Pacific"))
+            Timestamp(box(**kwargs), tzinfo=zoneinfo.ZoneInfo("US/Pacific"))
 
     def test_dont_convert_dateutil_utc_to_default_utc(self):
         result = Timestamp(datetime(2018, 1, 1), tz=tzutc())
@@ -948,7 +947,7 @@ def test_timestamp_constructor_near_dst_boundary(self):
             assert result == expected
 
             msg = "Cannot infer dst time from 2015-10-25 02:00:00"
-            with pytest.raises(pytz.AmbiguousTimeError, match=msg):
+            with pytest.raises(ValueError, match=msg):
                 Timestamp("2015-10-25 02:00", tz=tz)
 
         result = Timestamp("2017-03-26 01:00", tz="Europe/Paris")
@@ -956,7 +955,7 @@ def test_timestamp_constructor_near_dst_boundary(self):
         assert result == expected
 
         msg = "2017-03-26 02:00"
-        with pytest.raises(pytz.NonExistentTimeError, match=msg):
+        with pytest.raises(ValueError, match=msg):
             Timestamp("2017-03-26 02:00", tz="Europe/Paris")
 
         # GH#11708
@@ -975,7 +974,7 @@ def test_timestamp_constructor_near_dst_boundary(self):
         assert result == expected
 
         msg = "2017-03-26 02:00"
-        with pytest.raises(pytz.NonExistentTimeError, match=msg):
+        with pytest.raises(ValueError, match=msg):
             Timestamp("2017-03-26 02:00", tz="Europe/Paris")
 
         result = Timestamp("2017-03-26 02:00:00+0100", tz="Europe/Paris")
diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py
index 3e1ece6b7f59e..9b9a8ea3600ae 100644
--- a/pandas/tests/series/accessors/test_dt_accessor.py
+++ b/pandas/tests/series/accessors/test_dt_accessor.py
@@ -9,7 +9,6 @@
 
 import numpy as np
 import pytest
-import pytz
 
 from pandas._config import using_string_dtype
 
@@ -28,6 +27,7 @@
     Period,
     PeriodIndex,
     Series,
+    StringDtype,
     TimedeltaIndex,
     date_range,
     period_range,
@@ -352,7 +352,7 @@ def test_dt_round_tz_ambiguous(self, method):
         tm.assert_series_equal(result, expected)
 
         # raise
-        with tm.external_error_raised(pytz.AmbiguousTimeError):
+        with tm.external_error_raised(ValueError):
             getattr(df1.date.dt, method)("h", ambiguous="raise")
 
     @pytest.mark.parametrize(
@@ -374,7 +374,7 @@ def test_dt_round_tz_nonexistent(self, method, ts_str, freq):
         expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz)
         tm.assert_series_equal(result, expected)
 
-        with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"):
+        with pytest.raises(ValueError, match="2018-03-11 02:00:00"):
             getattr(ser.dt, method)(freq, nonexistent="raise")
 
     @pytest.mark.parametrize("freq", ["ns", "us", "1000us"])
@@ -514,7 +514,6 @@ def test_dt_accessor_datetime_name_accessors(self, time_locale):
         ser = pd.concat([ser, Series([pd.NaT])])
         assert np.isnan(ser.dt.month_name(locale=time_locale).iloc[-1])
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_strftime(self):
         # GH 10086
         ser = Series(date_range("20130101", periods=5))
@@ -585,10 +584,9 @@ def test_strftime_period_days(self, using_infer_string):
             dtype="=U10",
         )
         if using_infer_string:
-            expected = expected.astype("str")
+            expected = expected.astype(StringDtype(na_value=np.nan))
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_strftime_dt64_microsecond_resolution(self):
         ser = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)])
         result = ser.dt.strftime("%Y-%m-%d %H:%M:%S")
@@ -621,7 +619,6 @@ def test_strftime_period_minutes(self):
         )
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize(
         "data",
         [
@@ -644,7 +641,7 @@ def test_strftime_all_nat(self, data):
         ser = Series(data)
         with tm.assert_produces_warning(None):
             result = ser.dt.strftime("%Y-%m-%d")
-        expected = Series([np.nan], dtype=object)
+        expected = Series([np.nan], dtype="str")
         tm.assert_series_equal(result, expected)
 
     def test_valid_dt_with_missing_values(self):
diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
index 742091d761d62..71ba2dab671ef 100644
--- a/pandas/tests/series/indexing/test_setitem.py
+++ b/pandas/tests/series/indexing/test_setitem.py
@@ -10,6 +10,7 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
 from pandas.compat.numpy import np_version_gte1p24
 from pandas.errors import IndexingError
 
@@ -822,6 +823,11 @@ def test_mask_key(self, obj, key, expected, raises, val, indexer_sli):
         else:
             indexer_sli(obj)[mask] = val
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW,
+        reason="TODO(infer_string)",
+        strict=False,
+    )
     def test_series_where(self, obj, key, expected, raises, val, is_inplace):
         mask = np.zeros(obj.shape, dtype=bool)
         mask[key] = True
diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py
index 4a8af259b4134..90c4056a39e84 100644
--- a/pandas/tests/series/methods/test_convert_dtypes.py
+++ b/pandas/tests/series/methods/test_convert_dtypes.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs import lib
 
 import pandas as pd
@@ -12,7 +10,6 @@
 
 
 class TestSeriesConvertDtypes:
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize(
         "data, maindtype, expected_default, expected_other",
         [
@@ -223,9 +220,9 @@ def test_convert_dtypes(
             and params[0]
             and not params[1]
         ):
-            # If we would convert with convert strings then infer_objects converts
-            # with the option
-            expected_dtype = "string[pyarrow_numpy]"
+            # If convert_string=False and infer_objects=True, we end up with the
+            # default string dtype instead of preserving object for string data
+            expected_dtype = pd.StringDtype(na_value=np.nan)
 
         expected = pd.Series(data, dtype=expected_dtype)
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_tz_localize.py b/pandas/tests/series/methods/test_tz_localize.py
index 45620a721f442..53288e8a1f8e7 100644
--- a/pandas/tests/series/methods/test_tz_localize.py
+++ b/pandas/tests/series/methods/test_tz_localize.py
@@ -1,7 +1,6 @@
 from datetime import timezone
 
 import pytest
-import pytz
 
 from pandas._libs.tslibs import timezones
 
@@ -28,7 +27,7 @@ def test_series_tz_localize_ambiguous_bool(self):
         expected0 = Series([expected0])
         expected1 = Series([expected1])
 
-        with tm.external_error_raised(pytz.AmbiguousTimeError):
+        with tm.external_error_raised(ValueError):
             ser.dt.tz_localize("US/Central")
 
         result = ser.dt.tz_localize("US/Central", ambiguous=True)
@@ -79,11 +78,11 @@ def test_tz_localize_nonexistent(self, warsaw, method, exp, unit):
         df = ser.to_frame()
 
         if method == "raise":
-            with tm.external_error_raised(pytz.NonExistentTimeError):
+            with tm.external_error_raised(ValueError):
                 dti.tz_localize(tz, nonexistent=method)
-            with tm.external_error_raised(pytz.NonExistentTimeError):
+            with tm.external_error_raised(ValueError):
                 ser.tz_localize(tz, nonexistent=method)
-            with tm.external_error_raised(pytz.NonExistentTimeError):
+            with tm.external_error_raised(ValueError):
                 df.tz_localize(tz, nonexistent=method)
 
         elif exp == "invalid":
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
index a63ffbbd3a5a1..79a55eb357f87 100644
--- a/pandas/tests/series/test_api.py
+++ b/pandas/tests/series/test_api.py
@@ -4,6 +4,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -160,6 +164,9 @@ def test_attrs(self):
         result = s + 1
         assert result.attrs == {"version": 1}
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_inspect_getmembers(self):
         # GH38782
         pytest.importorskip("jinja2")
diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py
index 939bf888fd61b..baed3ba936699 100644
--- a/pandas/tests/series/test_logical_ops.py
+++ b/pandas/tests/series/test_logical_ops.py
@@ -6,7 +6,10 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas import (
+    ArrowDtype,
     DataFrame,
     Index,
     Series,
@@ -143,6 +146,9 @@ def test_logical_operators_int_dtype_with_bool(self):
         expected = Series([False, True, True, True])
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_logical_operators_int_dtype_with_object(self, using_infer_string):
         # GH#9016: support bitwise op for integer types
         s_0123 = Series(range(4), dtype="int64")
@@ -518,18 +524,38 @@ def test_int_dtype_different_index_not_bool(self):
         result = ser1 ^ ser2
         tm.assert_series_equal(result, expected)
 
+    # TODO: this belongs in comparison tests
     def test_pyarrow_numpy_string_invalid(self):
         # GH#56008
-        pytest.importorskip("pyarrow")
+        pa = pytest.importorskip("pyarrow")
         ser = Series([False, True])
         ser2 = Series(["a", "b"], dtype="string[pyarrow_numpy]")
         result = ser == ser2
-        expected = Series(False, index=ser.index)
-        tm.assert_series_equal(result, expected)
+        expected_eq = Series(False, index=ser.index)
+        tm.assert_series_equal(result, expected_eq)
 
         result = ser != ser2
-        expected = Series(True, index=ser.index)
-        tm.assert_series_equal(result, expected)
+        expected_ne = Series(True, index=ser.index)
+        tm.assert_series_equal(result, expected_ne)
 
         with pytest.raises(TypeError, match="Invalid comparison"):
             ser > ser2
+
+        # GH#59505
+        ser3 = ser2.astype("string[pyarrow]")
+        result3_eq = ser3 == ser
+        tm.assert_series_equal(result3_eq, expected_eq.astype("bool[pyarrow]"))
+        result3_ne = ser3 != ser
+        tm.assert_series_equal(result3_ne, expected_ne.astype("bool[pyarrow]"))
+
+        with pytest.raises(TypeError, match="Invalid comparison"):
+            ser > ser3
+
+        ser4 = ser2.astype(ArrowDtype(pa.string()))
+        result4_eq = ser4 == ser
+        tm.assert_series_equal(result4_eq, expected_eq.astype("bool[pyarrow]"))
+        result4_ne = ser4 != ser
+        tm.assert_series_equal(result4_ne, expected_ne.astype("bool[pyarrow]"))
+
+        with pytest.raises(TypeError, match="Invalid comparison"):
+            ser > ser4
diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py
index 0bc3092d30b43..7bbb902e14a36 100644
--- a/pandas/tests/series/test_reductions.py
+++ b/pandas/tests/series/test_reductions.py
@@ -1,6 +1,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 from pandas import Series
 import pandas._testing as tm
@@ -162,6 +166,9 @@ def test_validate_stat_keepdims():
         np.sum(ser, keepdims=True)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_mean_with_convertible_string_raises(using_infer_string):
     # GH#44008
     ser = Series(["1", "2"])
@@ -181,6 +188,9 @@ def test_mean_with_convertible_string_raises(using_infer_string):
         df.mean()
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_mean_dont_convert_j_to_complex():
     # GH#36703
     df = pd.DataFrame([{"db": "J", "numeric": 123}])
@@ -199,6 +209,9 @@ def test_mean_dont_convert_j_to_complex():
         np.mean(df["db"].astype("string").array)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_median_with_convertible_string_raises():
     # GH#34671 this _could_ return a string "2", but definitely not float 2.0
     msg = r"Cannot convert \['1' '2' '3'\] to numeric|does not support"
diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
index ee26fdae74960..18df76ddd8ed8 100644
--- a/pandas/tests/test_downstream.py
+++ b/pandas/tests/test_downstream.py
@@ -218,7 +218,7 @@ def test_missing_required_dependency():
         subprocess.check_output(call, stderr=subprocess.STDOUT)
 
     output = exc.value.stdout.decode()
-    for name in ["numpy", "pytz", "dateutil"]:
+    for name in ["numpy", "dateutil"]:
         assert name in output
 
 
diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
index 2a225bda953cf..869d41efa6c28 100644
--- a/pandas/tests/test_sorting.py
+++ b/pandas/tests/test_sorting.py
@@ -408,6 +408,13 @@ def test_codes_out_of_bound(self):
         tm.assert_numpy_array_equal(result, expected)
         tm.assert_numpy_array_equal(result_codes, expected_codes)
 
+    @pytest.mark.parametrize("codes", [[-1, -1], [2, -1], [2, 2]])
+    def test_codes_empty_array_out_of_bound(self, codes):
+        empty_values = np.array([])
+        expected_codes = -np.ones_like(codes, dtype=np.intp)
+        _, result_codes = safe_sort(empty_values, codes)
+        tm.assert_numpy_array_equal(result_codes, expected_codes)
+
     def test_mixed_integer(self):
         values = np.array(["b", 1, 0, "a", 0, "b"], dtype=object)
         result = safe_sort(values)
diff --git a/pandas/tests/tseries/offsets/test_dst.py b/pandas/tests/tseries/offsets/test_dst.py
index dfdc69c0fe18e..e75958843040d 100644
--- a/pandas/tests/tseries/offsets/test_dst.py
+++ b/pandas/tests/tseries/offsets/test_dst.py
@@ -108,13 +108,13 @@ def _test_offset(
                 "second": "2013-11-03 01:59:01.999999",
                 "microsecond": "2013-11-03 01:59:59.000001",
             }[offset_name]
-            with pytest.raises(pytz.AmbiguousTimeError, match=err_msg):
+            with pytest.raises(ValueError, match=err_msg):
                 tstart + offset
             # While we're here, let's check that we get the same behavior in a
             #  vectorized path
             dti = DatetimeIndex([tstart])
             warn_msg = "Non-vectorized DateOffset"
-            with pytest.raises(pytz.AmbiguousTimeError, match=err_msg):
+            with pytest.raises(ValueError, match=err_msg):
                 with tm.assert_produces_warning(performance_warning, match=warn_msg):
                     dti + offset
             return
@@ -256,10 +256,10 @@ def test_all_offset_classes(self, tup):
     ],
 )
 def test_nontick_offset_with_ambiguous_time_error(original_dt, target_dt, offset, tz):
-    # .apply for non-Tick offsets throws AmbiguousTimeError when the target dt
+    # .apply for non-Tick offsets throws ValueError when the target dt
     # is dst-ambiguous
-    localized_dt = original_dt.tz_localize(pytz.timezone(tz))
+    localized_dt = original_dt.tz_localize(tz)
 
     msg = f"Cannot infer dst time from {target_dt}, try using the 'ambiguous' argument"
-    with pytest.raises(pytz.AmbiguousTimeError, match=msg):
+    with pytest.raises(ValueError, match=msg):
         localized_dt + offset
diff --git a/pandas/tests/tseries/offsets/test_offsets_properties.py b/pandas/tests/tseries/offsets/test_offsets_properties.py
index 99a6a583dd3e9..943434e515828 100644
--- a/pandas/tests/tseries/offsets/test_offsets_properties.py
+++ b/pandas/tests/tseries/offsets/test_offsets_properties.py
@@ -13,7 +13,6 @@
     given,
 )
 import pytest
-import pytz
 
 import pandas as pd
 from pandas._testing._hypothesis import (
@@ -34,11 +33,11 @@ def test_on_offset_implementations(dt, offset):
     #   (dt + offset) - offset == dt
     try:
         compare = (dt + offset) - offset
-    except (pytz.NonExistentTimeError, pytz.AmbiguousTimeError):
+    except ValueError:
         # When dt + offset does not exist or is DST-ambiguous, assume(False) to
         # indicate to hypothesis that this is not a valid test case
         # DST-ambiguous example (GH41906):
-        # dt = datetime.datetime(1900, 1, 1, tzinfo=pytz.timezone('Africa/Kinshasa'))
+        # dt = datetime.datetime(1900, 1, 1, tzinfo=ZoneInfo('Africa/Kinshasa'))
         # offset = MonthBegin(66)
         assume(False)
 
diff --git a/pandas/tests/tslibs/test_tzconversion.py b/pandas/tests/tslibs/test_tzconversion.py
index c1a56ffb71b02..f32829b4e0b21 100644
--- a/pandas/tests/tslibs/test_tzconversion.py
+++ b/pandas/tests/tslibs/test_tzconversion.py
@@ -1,6 +1,7 @@
+import zoneinfo
+
 import numpy as np
 import pytest
-import pytz
 
 from pandas._libs.tslibs.tzconversion import tz_localize_to_utc
 
@@ -11,13 +12,15 @@ def test_tz_localize_to_utc_ambiguous_infer(self):
         val = 1_320_541_200_000_000_000
         vals = np.array([val, val - 1, val], dtype=np.int64)
 
-        with pytest.raises(pytz.AmbiguousTimeError, match="2011-11-06 01:00:00"):
-            tz_localize_to_utc(vals, pytz.timezone("US/Eastern"), ambiguous="infer")
+        with pytest.raises(ValueError, match="2011-11-06 01:00:00"):
+            tz_localize_to_utc(vals, zoneinfo.ZoneInfo("US/Eastern"), ambiguous="infer")
 
-        with pytest.raises(pytz.AmbiguousTimeError, match="are no repeated times"):
-            tz_localize_to_utc(vals[:1], pytz.timezone("US/Eastern"), ambiguous="infer")
+        with pytest.raises(ValueError, match="are no repeated times"):
+            tz_localize_to_utc(
+                vals[:1], zoneinfo.ZoneInfo("US/Eastern"), ambiguous="infer"
+            )
 
         vals[1] += 1
         msg = "There are 2 dst switches when there should only be 1"
-        with pytest.raises(pytz.AmbiguousTimeError, match=msg):
-            tz_localize_to_utc(vals, pytz.timezone("US/Eastern"), ambiguous="infer")
+        with pytest.raises(ValueError, match=msg):
+            tz_localize_to_utc(vals, zoneinfo.ZoneInfo("US/Eastern"), ambiguous="infer")
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
index af3194b5085c4..17b92427f0d5d 100644
--- a/pandas/tests/window/test_rolling.py
+++ b/pandas/tests/window/test_rolling.py
@@ -6,7 +6,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 from pandas.compat import (
+    HAS_PYARROW,
     IS64,
     is_platform_arm,
     is_platform_power,
@@ -1326,6 +1329,9 @@ def test_rolling_corr_timedelta_index(index, window):
     tm.assert_almost_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_groupby_rolling_nan_included():
     # GH 35542
     data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]}
diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py
index 7e18ebe40cfa8..bd20660bdbba6 100644
--- a/pandas/util/_print_versions.py
+++ b/pandas/util/_print_versions.py
@@ -67,7 +67,6 @@ def _get_dependency_info() -> dict[str, JSONSerializable]:
         "pandas",
         # required
         "numpy",
-        "pytz",
         "dateutil",
         # install / build,
         "pip",
diff --git a/pyproject.toml b/pyproject.toml
index cc5cc1cf84d0c..645ded35f3d18 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,6 @@ dependencies = [
   "numpy>=1.23.5; python_version<'3.12'",
   "numpy>=1.26.0; python_version>='3.12'",
   "python-dateutil>=2.8.2",
-  "pytz>=2020.1",
   "tzdata>=2022.7"
 ]
 classifiers = [
@@ -81,6 +80,7 @@ plot = ['matplotlib>=3.6.3']
 output-formatting = ['jinja2>=3.1.2', 'tabulate>=0.9.0']
 clipboard = ['PyQt5>=5.15.9', 'qtpy>=2.3.0']
 compression = ['zstandard>=0.19.0']
+timezone = ['pytz>=2023.4']
 all = ['adbc-driver-postgresql>=0.10.0',
        'adbc-driver-sqlite>=0.8.0',
        'beautifulsoup4>=4.11.2',
@@ -107,6 +107,7 @@ all = ['adbc-driver-postgresql>=0.10.0',
        'pytest>=7.3.2',
        'pytest-xdist>=3.4.0',
        'python-calamine>=0.1.7',
+       'pytz>=2023.4',
        'pyxlsb>=1.0.10',
        'qtpy>=2.3.0',
        'scipy>=1.10.0',
diff --git a/requirements-dev.txt b/requirements-dev.txt
index dbfd7c6bf7bf5..52d2553fc4001 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -15,7 +15,6 @@ PyQt5>=5.15.9
 coverage
 python-dateutil
 numpy<2
-pytz
 beautifulsoup4>=4.11.2
 blosc
 bottleneck>=1.3.6
@@ -39,6 +38,7 @@ pymysql>=1.0.2
 pyreadstat>=1.2.0
 tables>=3.8.0
 python-calamine>=0.1.7
+pytz>=2023.4
 pyxlsb>=1.0.10
 s3fs>=2022.11.0
 scipy>=1.10.0
diff --git a/web/pandas/_templates/layout.html b/web/pandas/_templates/layout.html
index aa4bfc92ce8a8..4c66f28818abd 100644
--- a/web/pandas/_templates/layout.html
+++ b/web/pandas/_templates/layout.html
@@ -73,8 +73,8 @@
                     </a>
                 </li>
                 <li class="list-inline-item">
-                    <a href="https://twitter.com/pandas_dev/">
-                        <i class="fab bi bi-twitter"></i>
+                    <a href="https://x.com/pandas_dev/">
+                        <i class="fab bi bi-twitter-x"></i>
                     </a>
                 </li>
                 <li class="list-inline-item">
diff --git a/web/pandas/community/ecosystem.md b/web/pandas/community/ecosystem.md
index 49ece5564c300..c14996211bb8b 100644
--- a/web/pandas/community/ecosystem.md
+++ b/web/pandas/community/ecosystem.md
@@ -360,6 +360,13 @@ Deltalake python package lets you access tables stored in
 JVM. It provides the ``delta_table.to_pyarrow_table().to_pandas()`` method to convert
 any Delta table into Pandas dataframe.
 
+### [pandas-gbq](https://github.com/googleapis/python-bigquery-pandas)
+
+pandas-gbq provides high performance reads and writes to and from
+[Google BigQuery](https://cloud.google.com/bigquery/). Previously (before version 2.2.0),
+these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`.
+Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead.
+
 ## Out-of-core
 
 ### [Bodo](https://bodo.ai/)
@@ -513,6 +520,13 @@ Arrays](https://awkward-array.org/) inside pandas' Series and
 DataFrame. It also provides an accessor for using awkward functions
 on Series that are of awkward type.
 
+### [db-dtypes](https://github.com/googleapis/python-db-dtypes-pandas)
+
+db-dtypes provides an extension types for working with types like
+DATE, TIME, and JSON from database systems. This package is used
+by pandas-gbq to provide natural dtypes for BigQuery data types without
+a natural numpy type.
+
 ### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/)
 
 Pandas-Genomics provides an extension type and extension array for working