fix typos (#59665)

typos
pandas-dev · Aug 30, 2024 · 5593886 · 5593886
1 parent 952cbb6
commit 5593886
Show file tree

Hide file tree

Showing 50 changed files with 65 additions and 65 deletions.
diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py
@@ -87,7 +87,7 @@ def setup(self, engine_and_dtype, index_type, unique, N):
                 arr = np.array([1, 2, 3], dtype=dtype).repeat(N)
 
         self.data = engine(arr)
-        # code belows avoids populating the mapping etc. while timing.
+        # code below avoids populating the mapping etc. while timing.
         self.data.get_loc(2)
 
         self.key_middle = arr[len(arr) // 2]
@@ -140,7 +140,7 @@ def setup(self, engine_and_dtype, index_type, unique, N):
             mask[-1] = True
 
         self.data = engine(BaseMaskedArray(arr, mask))
-        # code belows avoids populating the mapping etc. while timing.
+        # code below avoids populating the mapping etc. while timing.
         self.data.get_loc(2)
 
         self.key_middle = arr[len(arr) // 2]
@@ -169,7 +169,7 @@ def setup(self, index_type):
         }[index_type]
 
         self.data = libindex.ObjectEngine(arr)
-        # code belows avoids populating the mapping etc. while timing.
+        # code below avoids populating the mapping etc. while timing.
         self.data.get_loc("b")
 
     def time_get_loc(self, index_type):

diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst
@@ -605,7 +605,7 @@ The ``temp_file`` pytest fixture creates a temporary file :py:class:`Pathlib` ob
         pd.DataFrame([1]).to_csv(str(temp_file))
 
 Please reference `pytest's documentation <https://docs.pytest.org/en/latest/how-to/tmp_path.html#the-default-base-temporary-directory>`_
-for the file retension policy.
+for the file retention policy.
 
 Testing involving network connectivity
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst
@@ -30,7 +30,7 @@ By specifying ``builddir="debug"`` all of the targets will be built and placed i
 Using Docker
 ------------
 
-To simplify the debugging process, pandas has created a Docker image with a debug build of Python and the gdb/Cython debuggers pre-installed. You may either ``docker pull pandas/pandas-debug`` to get access to this image or build it from the ``tooling/debug`` folder locallly.
+To simplify the debugging process, pandas has created a Docker image with a debug build of Python and the gdb/Cython debuggers pre-installed. You may either ``docker pull pandas/pandas-debug`` to get access to this image or build it from the ``tooling/debug`` folder locally.
 
 You can then mount your pandas repository into this image via:
 

diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
@@ -613,7 +613,7 @@ the pandas-equivalent operations compared to software you already know:
 
         Users of `Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__
         or other spreadsheet programs will find that many of the concepts are
-        transferrable to pandas.
+        transferable to pandas.
 
         +++
 

diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst
@@ -914,7 +914,7 @@ Using TimeGrouper and another grouping to create subgroups, then apply a custom
 <https://stackoverflow.com/questions/15408156/resampling-with-custom-periods>`__
 
 `Resample intraday frame without adding new days
-<https://stackoverflow.com/questions/14898574/resample-intrday-pandas-dataframe-without-add-new-days>`__
+<https://stackoverflow.com/questions/14898574/resample-intraday-pandas-dataframe-without-add-new-days>`__
 
 `Resample minute data
 <https://stackoverflow.com/questions/14861023/resampling-minute-data>`__

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -169,7 +169,7 @@ dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFram
   implementation when "numpy_nullable" is set, pyarrow is used for all
   dtypes if "pyarrow" is set.
 
-  The dtype_backends are still experimential.
+  The dtype_backends are still experiential.
 
   .. versionadded:: 2.0
 
@@ -2893,7 +2893,7 @@ Read in the content of the "books.xml" as instance of ``StringIO`` or
    df
 
 Even read XML from AWS S3 buckets such as NIH NCBI PMC Article Datasets providing
-Biomedical and Life Science Jorurnals:
+Biomedical and Life Science Journals:
 
 .. code-block:: python
 

diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb
@@ -1182,7 +1182,7 @@
     "Some styling functions are common enough that we've \"built them in\" to the `Styler`, so you don't have to write them and apply them yourself. The current list of such functions is:\n",
     "\n",
     " - [.highlight_null][nullfunc]: for use with identifying missing data. \n",
-    " - [.highlight_min][minfunc] and [.highlight_max][maxfunc]: for use with identifying extremeties in data.\n",
+    " - [.highlight_min][minfunc] and [.highlight_max][maxfunc]: for use with identifying extremities in data.\n",
     " - [.highlight_between][betweenfunc] and [.highlight_quantile][quantilefunc]: for use with identifying classes within data.\n",
     " - [.background_gradient][bgfunc]: a flexible method for highlighting cells based on their, or other, values on a numeric scale.\n",
     " - [.text_gradient][textfunc]: similar method for highlighting text based on their, or other, values on a numeric scale.\n",

diff --git a/doc/source/whatsnew/v0.21.1.rst b/doc/source/whatsnew/v0.21.1.rst
@@ -141,7 +141,7 @@ IO
 Plotting
 ^^^^^^^^
 
-- Bug in ``DataFrame.plot()`` and ``Series.plot()`` with :class:`DatetimeIndex` where a figure generated by them is not pickleable in Python 3 (:issue:`18439`)
+- Bug in ``DataFrame.plot()`` and ``Series.plot()`` with :class:`DatetimeIndex` where a figure generated by them is not picklable in Python 3 (:issue:`18439`)
 
 GroupBy/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -1159,7 +1159,7 @@ IO
 - Bug in :meth:`DataFrame.to_html` where header numbers would ignore display options when rounding (:issue:`17280`)
 - Bug in :func:`read_hdf` where reading a table from an HDF5 file written directly with PyTables fails with a ``ValueError`` when using a sub-selection via the ``start`` or ``stop`` arguments (:issue:`11188`)
 - Bug in :func:`read_hdf` not properly closing store after a ``KeyError`` is raised (:issue:`25766`)
-- Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds (:issue:`25772`)
+- Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested workarounds (:issue:`25772`)
 - Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`)
 - Improved the ``col_space`` parameter in :meth:`DataFrame.to_html` to accept a string so CSS length values can be set correctly (:issue:`25941`)
 - Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -900,7 +900,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
 - Removed ``pandas.plotting._matplotlib.tsplot``, use :meth:`Series.plot` instead (:issue:`19980`)
 - ``pandas.tseries.converter.register`` has been moved to :func:`pandas.plotting.register_matplotlib_converters` (:issue:`18307`)
 - :meth:`Series.plot` no longer accepts positional arguments, pass keyword arguments instead (:issue:`30003`)
-- :meth:`DataFrame.hist` and :meth:`Series.hist` no longer allows ``figsize="default"``, specify figure size by passinig a tuple instead (:issue:`30003`)
+- :meth:`DataFrame.hist` and :meth:`Series.hist` no longer allows ``figsize="default"``, specify figure size by passing a tuple instead (:issue:`30003`)
 - Floordiv of integer-dtyped array by :class:`Timedelta` now raises ``TypeError`` (:issue:`21036`)
 - :class:`TimedeltaIndex` and :class:`DatetimeIndex` no longer accept non-nanosecond dtype strings like "timedelta64" or "datetime64", use "timedelta64[ns]" and "datetime64[ns]" instead (:issue:`24806`)
 - Changed the default "skipna" argument in :func:`pandas.api.types.infer_dtype` from ``False`` to ``True`` (:issue:`24050`)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -1130,7 +1130,7 @@ Performance improvements
 - Performance improvement in :func:`to_datetime` when using ``'%Y%m%d'`` format (:issue:`17410`)
 - Performance improvement in :func:`to_datetime` when format is given or can be inferred (:issue:`50465`)
 - Performance improvement in :meth:`Series.median` for nullable dtypes (:issue:`50838`)
-- Performance improvement in :func:`read_csv` when passing :func:`to_datetime` lambda-function to ``date_parser`` and inputs have mixed timezone offsetes (:issue:`35296`)
+- Performance improvement in :func:`read_csv` when passing :func:`to_datetime` lambda-function to ``date_parser`` and inputs have mixed timezone offsets (:issue:`35296`)
 - Performance improvement in :func:`isna` and :func:`isnull` (:issue:`50658`)
 - Performance improvement in :meth:`.SeriesGroupBy.value_counts` with categorical dtype (:issue:`46202`)
 - Fixed a reference leak in :func:`read_hdf` (:issue:`37441`)

diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
@@ -1821,7 +1821,7 @@ default 'raise'
 
     def as_unit(self, str unit, bint round_ok=True) -> "NaTType":
         """
-        Convert the underlying int64 representaton to the given unit.
+        Convert the underlying int64 representation to the given unit.
 
         Parameters
         ----------

diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd
@@ -34,7 +34,7 @@ cdef extern from "numpy/ndarraytypes.h":
         NPY_FR_as
         NPY_FR_GENERIC
 
-    int64_t NPY_DATETIME_NAT  # elswhere we call this NPY_NAT
+    int64_t NPY_DATETIME_NAT  # elsewhere we call this NPY_NAT
 
 
 cdef extern from "pandas/datetime/pd_datetime.h":

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -792,7 +792,7 @@ cdef class BaseOffset:
 
     def __getstate__(self):
         """
-        Return a pickleable state
+        Return a picklable state
         """
         state = {}
         state["n"] = self.n
@@ -1456,7 +1456,7 @@ cdef class RelativeDeltaOffset(BaseOffset):
 
     def __getstate__(self):
         """
-        Return a pickleable state
+        Return a picklable state
         """
         # RelativeDeltaOffset (technically DateOffset) is the only non-cdef
         #  class, so the only one with __dict__

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
@@ -321,7 +321,7 @@ cdef class _Timestamp(ABCTimestamp):
     def _from_dt64(cls, dt64: np.datetime64):
         # construct a Timestamp from a np.datetime64 object, keeping the
         #  resolution of the input.
-        # This is herely mainly so we can incrementally implement non-nano
+        # This is here mainly so we can incrementally implement non-nano
         #  (e.g. only tznaive at first)
         cdef:
             int64_t value
@@ -1359,7 +1359,7 @@ cdef class _Timestamp(ABCTimestamp):
 
     def as_unit(self, str unit, bint round_ok=True):
         """
-        Convert the underlying int64 representaton to the given unit.
+        Convert the underlying int64 representation to the given unit.
 
         Parameters
         ----------

diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -429,7 +429,7 @@ def closed(self) -> bool:
 SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"]
 NaPosition = Literal["first", "last"]
 
-# Arguments for nsmalles and n_largest
+# Arguments for nsmallest and nlargest
 NsmallestNlargestKeep = Literal["first", "last", "all"]
 
 # quantile interpolation
@@ -524,7 +524,7 @@ def closed(self) -> bool:
     None,
 ]
 
-# maintaine the sub-type of any hashable sequence
+# maintain the sub-type of any hashable sequence
 SequenceT = TypeVar("SequenceT", bound=Sequence[Hashable])
 
 SliceType = Optional[Hashable]
diff --git a/pandas/_version.py b/pandas/_version.py
@@ -1,5 +1,5 @@
 # This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
+# git-archive tarball (such as those provided by github's download-from-tag
 # feature). Distribution tarballs (built by setup.py sdist) and build
 # directories (produced by setup.py build) will contain a much shorter file
 # that just contains the computed version number.

diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
@@ -522,7 +522,7 @@ def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwar
             #  so calls DataFrame.min (without ever getting here) with the np.min
             #  default of axis=None, which DataFrame.min catches and changes to axis=0.
             # np.minimum.reduce(df) gets here bc axis is not in kwargs,
-            #  so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)
+            #  so we set axis=0 to match the behavior of np.minimum.reduce(df.values)
             kwargs["axis"] = 0
 
     # By default, numpy's reductions do not skip NaNs, so we have to

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -1784,7 +1784,7 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
         --------
         This gives view on the underlying data of an ``ExtensionArray`` and is not a
         copy. Modifications on either the view or the original ``ExtensionArray``
-        will be reflectd on the underlying data:
+        will be reflected on the underlying data:
 
         >>> arr = pd.array([1, 2, 3])
         >>> arr2 = arr.view()

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
@@ -257,7 +257,7 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
         result = pc.is_in(
             self._pa_array, value_set=pa.array(value_set, type=self._pa_array.type)
         )
-        # pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls
+        # pyarrow 2.0.0 returned nulls, so we explicitly specify dtype to convert nulls
         # to False
         return np.array(result, dtype=np.bool_)
 

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1378,7 +1378,7 @@ def common_dtype_categorical_compat(
     # TODO: more generally, could do `not can_hold_na(dtype)`
     if lib.is_np_dtype(dtype, "iu"):
         for obj in objs:
-            # We don't want to accientally allow e.g. "categorical" str here
+            # We don't want to accidentally allow e.g. "categorical" str here
             obj_dtype = getattr(obj, "dtype", None)
             if isinstance(obj_dtype, CategoricalDtype):
                 if isinstance(obj, ABCIndex):

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2199,7 +2199,7 @@ def maybe_reorder(
         ) -> tuple[list[ArrayLike], Index, Index | None]:
             """
             If our desired 'columns' do not match the data's pre-existing 'arr_columns',
-            we re-order our arrays.  This is like a pre-emptive (cheap) reindex.
+            we re-order our arrays.  This is like a preemptive (cheap) reindex.
             """
             if len(arrays):
                 length = len(arrays[0])
@@ -4484,7 +4484,7 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
 
             You can refer to column names that are not valid Python variable names
             by surrounding them in backticks. Thus, column names containing spaces
-            or punctuations (besides underscores) or starting with digits must be
+            or punctuation (besides underscores) or starting with digits must be
             surrounded by backticks. (For example, a column named "Area (cm^2)" would
             be referenced as ```Area (cm^2)```). Column names which are Python keywords
             (like "if", "for", "import", etc) cannot be used.
@@ -12360,7 +12360,7 @@ def std(
         --------
         Series.std : Return standard deviation over Series values.
         DataFrame.mean : Return the mean of the values over the requested axis.
-        DataFrame.mediam : Return the mediam of the values over the requested axis.
+        DataFrame.median : Return the median of the values over the requested axis.
         DataFrame.mode : Get the mode(s) of each element along the requested axis.
         DataFrame.sum : Return the sum of the values over the requested axis.
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6994,7 +6994,7 @@ def fillna(
                 f'you passed a "{type(value).__name__}"'
             )
 
-        # set the default here, so functions examining the signaure
+        # set the default here, so functions examining the signature
         # can detect if something was set (e.g. in groupby) (GH9221)
         if axis is None:
             axis = 0

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2943,7 +2943,7 @@ def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index
         """
         With mismatched timezones, cast both to UTC.
         """
-        # Caller is responsibelf or checking
+        # Caller is responsible for checking
         #  `self.dtype != other.dtype`
         if (
             isinstance(self, ABCDatetimeIndex)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1478,7 +1478,7 @@ def round(self, decimals: int) -> Self:
         """
         Rounds the values.
         If the block is not of an integer or float dtype, nothing happens.
-        This is consistent with DataFrame.round behavivor.
+        This is consistent with DataFrame.round behavior.
         (Note: Series.round would raise)
 
         Parameters

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -621,7 +621,7 @@ def reorder_arrays(
     arrays: list[ArrayLike], arr_columns: Index, columns: Index | None, length: int
 ) -> tuple[list[ArrayLike], Index]:
     """
-    Pre-emptively (cheaply) reindex arrays with new columns.
+    Preemptively (cheaply) reindex arrays with new columns.
     """
     # reorder according to the columns
     if columns is not None:

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1362,7 +1362,7 @@ def _iset_split_block(
         """Removes columns from a block by splitting the block.
 
         Avoids copying the whole block through slicing and updates the manager
-        after determinint the new block structure. Optionally adds a new block,
+        after determining the new block structure. Optionally adds a new block,
         otherwise has to be done by the caller.
 
         Parameters

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
@@ -201,7 +201,7 @@ def concat(
         be very expensive relative to the actual data concatenation.
     sort : bool, default False
         Sort non-concatenation axis. One exception to this is when the
-        non-concatentation axis is a DatetimeIndex and join='outer' and the axis is
+        non-concatenation axis is a DatetimeIndex and join='outer' and the axis is
         not already aligned. In that case, the non-concatenation axis is always
         sorted lexicographically.
     copy : bool, default False

diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
@@ -378,7 +378,7 @@ class InvalidIndexError(Exception):
 
 class DataError(Exception):
     """
-    Exceptionn raised when performing an operation on non-numerical data.
+    Exception raised when performing an operation on non-numerical data.
 
     For example, calling ``ohlc`` on a non-numerical column or a function
     on a rolling window.

diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -342,7 +342,7 @@ def concat(self, other: Styler) -> Styler:
             keys ``data``, ``row_heading`` and ``row`` will be prepended with
             ``foot0_``. If more concats are chained, their styles will be prepended
             with ``foot1_``, ''foot_2'', etc., and if a concatenated style have
-            another concatanated style, the second style will be prepended with
+            another concatenated style, the second style will be prepended with
             ``foot{parent}_foot{child}_``.
 
         A common use case is to concatenate user defined functions with

diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py
@@ -1064,7 +1064,7 @@ def format(
 
         .. warning::
            `Styler.format` is ignored when using the output format `Styler.to_excel`,
-           since Excel and Python have inherrently different formatting structures.
+           since Excel and Python have inherently different formatting structures.
            However, it is possible to use the `number-format` pseudo CSS attribute
            to force Excel permissible formatting. See examples.
 
@@ -1312,7 +1312,7 @@ def format_index(
 
         .. warning::
            `Styler.format_index` is ignored when using the output format
-           `Styler.to_excel`, since Excel and Python have inherrently different
+           `Styler.to_excel`, since Excel and Python have inherently different
            formatting structures.
            However, it is possible to use the `number-format` pseudo CSS attribute
            to force Excel permissible formatting. See documentation for `Styler.format`.
@@ -1649,7 +1649,7 @@ def format_index_names(
 
         .. warning::
             `Styler.format_index_names` is ignored when using the output format
-            `Styler.to_excel`, since Excel and Python have inherrently different
+            `Styler.to_excel`, since Excel and Python have inherently different
             formatting structures.
 
         Examples
@@ -2410,7 +2410,7 @@ def _parse_latex_header_span(
     r"""
     Refactor the cell `display_value` if a 'colspan' or 'rowspan' attribute is present.
 
-    'rowspan' and 'colspan' do not occur simultaneouly. If they are detected then
+    'rowspan' and 'colspan' do not occur simultaneously. If they are detected then
     the `display_value` is altered to a LaTeX `multirow` or `multicol` command
     respectively, with the appropriate cell-span.