From 5593886ee384893c35565dfa29eaa956ccbe1186 Mon Sep 17 00:00:00 2001 From: musvaage <112724366+musvaage@users.noreply.github.com> Date: Fri, 30 Aug 2024 20:08:14 +0200 Subject: [PATCH] fix typos (#59665) typos --- asv_bench/benchmarks/indexing_engines.py | 6 +++--- doc/source/development/contributing_codebase.rst | 2 +- doc/source/development/debugging_extensions.rst | 2 +- doc/source/getting_started/index.rst | 2 +- doc/source/user_guide/cookbook.rst | 2 +- doc/source/user_guide/io.rst | 4 ++-- doc/source/user_guide/style.ipynb | 2 +- doc/source/whatsnew/v0.21.1.rst | 2 +- doc/source/whatsnew/v0.25.0.rst | 2 +- doc/source/whatsnew/v1.0.0.rst | 2 +- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/_libs/tslibs/nattype.pyx | 2 +- pandas/_libs/tslibs/np_datetime.pxd | 2 +- pandas/_libs/tslibs/offsets.pyx | 4 ++-- pandas/_libs/tslibs/timestamps.pyx | 4 ++-- pandas/_typing.py | 4 ++-- pandas/_version.py | 2 +- pandas/core/arraylike.py | 2 +- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/string_arrow.py | 2 +- pandas/core/dtypes/cast.py | 2 +- pandas/core/frame.py | 6 +++--- pandas/core/generic.py | 2 +- pandas/core/indexes/base.py | 2 +- pandas/core/internals/blocks.py | 2 +- pandas/core/internals/construction.py | 2 +- pandas/core/internals/managers.py | 2 +- pandas/core/reshape/concat.py | 2 +- pandas/errors/__init__.py | 2 +- pandas/io/formats/style.py | 2 +- pandas/io/formats/style_render.py | 8 ++++---- pandas/io/pytables.py | 2 +- pandas/tests/apply/test_numba.py | 2 +- pandas/tests/dtypes/test_inference.py | 2 +- pandas/tests/extension/base/dtype.py | 2 +- pandas/tests/extension/base/io.py | 2 +- pandas/tests/frame/methods/test_rank.py | 2 +- pandas/tests/frame/test_api.py | 2 +- pandas/tests/indexes/datetimes/test_arithmetic.py | 2 +- pandas/tests/indexes/datetimes/test_constructors.py | 2 +- pandas/tests/indexing/test_indexing.py | 2 +- pandas/tests/io/formats/style/test_matplotlib.py | 4 ++-- pandas/tests/io/parser/test_header.py | 4 ++-- pandas/tests/io/xml/test_to_xml.py | 4 ++-- pandas/tests/plotting/frame/test_frame_subplots.py | 2 +- pandas/tests/test_aggregation.py | 2 +- web/pandas/community/blog/2019-user-survey.md | 2 +- web/pandas/community/ecosystem.md | 2 +- web/pandas/pdeps/0010-required-pyarrow-dependency.md | 4 ++-- .../pdeps/0012-compact-and-reversible-JSON-interface.md | 2 +- 50 files changed, 65 insertions(+), 65 deletions(-) diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py index fd3d0f0b9cf2e..5e3c593e269cb 100644 --- a/asv_bench/benchmarks/indexing_engines.py +++ b/asv_bench/benchmarks/indexing_engines.py @@ -87,7 +87,7 @@ def setup(self, engine_and_dtype, index_type, unique, N): arr = np.array([1, 2, 3], dtype=dtype).repeat(N) self.data = engine(arr) - # code belows avoids populating the mapping etc. while timing. + # code below avoids populating the mapping etc. while timing. self.data.get_loc(2) self.key_middle = arr[len(arr) // 2] @@ -140,7 +140,7 @@ def setup(self, engine_and_dtype, index_type, unique, N): mask[-1] = True self.data = engine(BaseMaskedArray(arr, mask)) - # code belows avoids populating the mapping etc. while timing. + # code below avoids populating the mapping etc. while timing. self.data.get_loc(2) self.key_middle = arr[len(arr) // 2] @@ -169,7 +169,7 @@ def setup(self, index_type): }[index_type] self.data = libindex.ObjectEngine(arr) - # code belows avoids populating the mapping etc. while timing. + # code below avoids populating the mapping etc. while timing. self.data.get_loc("b") def time_get_loc(self, index_type): diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst index 277f407ae4418..9d5a992e911b6 100644 --- a/doc/source/development/contributing_codebase.rst +++ b/doc/source/development/contributing_codebase.rst @@ -605,7 +605,7 @@ The ``temp_file`` pytest fixture creates a temporary file :py:class:`Pathlib` ob pd.DataFrame([1]).to_csv(str(temp_file)) Please reference `pytest's documentation `_ -for the file retension policy. +for the file retention policy. Testing involving network connectivity ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst index 376d7b21cab52..0ea1c112cb55b 100644 --- a/doc/source/development/debugging_extensions.rst +++ b/doc/source/development/debugging_extensions.rst @@ -30,7 +30,7 @@ By specifying ``builddir="debug"`` all of the targets will be built and placed i Using Docker ------------ -To simplify the debugging process, pandas has created a Docker image with a debug build of Python and the gdb/Cython debuggers pre-installed. You may either ``docker pull pandas/pandas-debug`` to get access to this image or build it from the ``tooling/debug`` folder locallly. +To simplify the debugging process, pandas has created a Docker image with a debug build of Python and the gdb/Cython debuggers pre-installed. You may either ``docker pull pandas/pandas-debug`` to get access to this image or build it from the ``tooling/debug`` folder locally. You can then mount your pandas repository into this image via: diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst index 9f29f7f4f4406..36ed553d9d88e 100644 --- a/doc/source/getting_started/index.rst +++ b/doc/source/getting_started/index.rst @@ -613,7 +613,7 @@ the pandas-equivalent operations compared to software you already know: Users of `Excel `__ or other spreadsheet programs will find that many of the concepts are - transferrable to pandas. + transferable to pandas. +++ diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 3dfc6534f2b64..42430fb1fbba0 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -914,7 +914,7 @@ Using TimeGrouper and another grouping to create subgroups, then apply a custom `__ `Resample intraday frame without adding new days -`__ +`__ `Resample minute data `__ diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index be40710a9e307..b996e1706ca2f 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -169,7 +169,7 @@ dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFram implementation when "numpy_nullable" is set, pyarrow is used for all dtypes if "pyarrow" is set. - The dtype_backends are still experimential. + The dtype_backends are still experiential. .. versionadded:: 2.0 @@ -2893,7 +2893,7 @@ Read in the content of the "books.xml" as instance of ``StringIO`` or df Even read XML from AWS S3 buckets such as NIH NCBI PMC Article Datasets providing -Biomedical and Life Science Jorurnals: +Biomedical and Life Science Journals: .. code-block:: python diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index f4a55280cd1f1..daecfce6ecebc 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -1182,7 +1182,7 @@ "Some styling functions are common enough that we've \"built them in\" to the `Styler`, so you don't have to write them and apply them yourself. The current list of such functions is:\n", "\n", " - [.highlight_null][nullfunc]: for use with identifying missing data. \n", - " - [.highlight_min][minfunc] and [.highlight_max][maxfunc]: for use with identifying extremeties in data.\n", + " - [.highlight_min][minfunc] and [.highlight_max][maxfunc]: for use with identifying extremities in data.\n", " - [.highlight_between][betweenfunc] and [.highlight_quantile][quantilefunc]: for use with identifying classes within data.\n", " - [.background_gradient][bgfunc]: a flexible method for highlighting cells based on their, or other, values on a numeric scale.\n", " - [.text_gradient][textfunc]: similar method for highlighting text based on their, or other, values on a numeric scale.\n", diff --git a/doc/source/whatsnew/v0.21.1.rst b/doc/source/whatsnew/v0.21.1.rst index e217e1a75efc5..bb08d85b5a052 100644 --- a/doc/source/whatsnew/v0.21.1.rst +++ b/doc/source/whatsnew/v0.21.1.rst @@ -141,7 +141,7 @@ IO Plotting ^^^^^^^^ -- Bug in ``DataFrame.plot()`` and ``Series.plot()`` with :class:`DatetimeIndex` where a figure generated by them is not pickleable in Python 3 (:issue:`18439`) +- Bug in ``DataFrame.plot()`` and ``Series.plot()`` with :class:`DatetimeIndex` where a figure generated by them is not picklable in Python 3 (:issue:`18439`) GroupBy/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 50be28a912cf6..243714651e3b9 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -1159,7 +1159,7 @@ IO - Bug in :meth:`DataFrame.to_html` where header numbers would ignore display options when rounding (:issue:`17280`) - Bug in :func:`read_hdf` where reading a table from an HDF5 file written directly with PyTables fails with a ``ValueError`` when using a sub-selection via the ``start`` or ``stop`` arguments (:issue:`11188`) - Bug in :func:`read_hdf` not properly closing store after a ``KeyError`` is raised (:issue:`25766`) -- Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds (:issue:`25772`) +- Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested workarounds (:issue:`25772`) - Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`) - Improved the ``col_space`` parameter in :meth:`DataFrame.to_html` to accept a string so CSS length values can be set correctly (:issue:`25941`) - Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 5dbf6f1c60598..98cb9c4ad7b45 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -900,7 +900,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed ``pandas.plotting._matplotlib.tsplot``, use :meth:`Series.plot` instead (:issue:`19980`) - ``pandas.tseries.converter.register`` has been moved to :func:`pandas.plotting.register_matplotlib_converters` (:issue:`18307`) - :meth:`Series.plot` no longer accepts positional arguments, pass keyword arguments instead (:issue:`30003`) -- :meth:`DataFrame.hist` and :meth:`Series.hist` no longer allows ``figsize="default"``, specify figure size by passinig a tuple instead (:issue:`30003`) +- :meth:`DataFrame.hist` and :meth:`Series.hist` no longer allows ``figsize="default"``, specify figure size by passing a tuple instead (:issue:`30003`) - Floordiv of integer-dtyped array by :class:`Timedelta` now raises ``TypeError`` (:issue:`21036`) - :class:`TimedeltaIndex` and :class:`DatetimeIndex` no longer accept non-nanosecond dtype strings like "timedelta64" or "datetime64", use "timedelta64[ns]" and "datetime64[ns]" instead (:issue:`24806`) - Changed the default "skipna" argument in :func:`pandas.api.types.infer_dtype` from ``False`` to ``True`` (:issue:`24050`) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index cacbf8452ba32..ddcd69c3fd962 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1130,7 +1130,7 @@ Performance improvements - Performance improvement in :func:`to_datetime` when using ``'%Y%m%d'`` format (:issue:`17410`) - Performance improvement in :func:`to_datetime` when format is given or can be inferred (:issue:`50465`) - Performance improvement in :meth:`Series.median` for nullable dtypes (:issue:`50838`) -- Performance improvement in :func:`read_csv` when passing :func:`to_datetime` lambda-function to ``date_parser`` and inputs have mixed timezone offsetes (:issue:`35296`) +- Performance improvement in :func:`read_csv` when passing :func:`to_datetime` lambda-function to ``date_parser`` and inputs have mixed timezone offsets (:issue:`35296`) - Performance improvement in :func:`isna` and :func:`isnull` (:issue:`50658`) - Performance improvement in :meth:`.SeriesGroupBy.value_counts` with categorical dtype (:issue:`46202`) - Fixed a reference leak in :func:`read_hdf` (:issue:`37441`) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 25d9510bcd543..60afc1acdc297 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -1821,7 +1821,7 @@ default 'raise' def as_unit(self, str unit, bint round_ok=True) -> "NaTType": """ - Convert the underlying int64 representaton to the given unit. + Convert the underlying int64 representation to the given unit. Parameters ---------- diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index cb2658d343772..43240046c6500 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -34,7 +34,7 @@ cdef extern from "numpy/ndarraytypes.h": NPY_FR_as NPY_FR_GENERIC - int64_t NPY_DATETIME_NAT # elswhere we call this NPY_NAT + int64_t NPY_DATETIME_NAT # elsewhere we call this NPY_NAT cdef extern from "pandas/datetime/pd_datetime.h": diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index c48acc07b34db..043c029ec900c 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -792,7 +792,7 @@ cdef class BaseOffset: def __getstate__(self): """ - Return a pickleable state + Return a picklable state """ state = {} state["n"] = self.n @@ -1456,7 +1456,7 @@ cdef class RelativeDeltaOffset(BaseOffset): def __getstate__(self): """ - Return a pickleable state + Return a picklable state """ # RelativeDeltaOffset (technically DateOffset) is the only non-cdef # class, so the only one with __dict__ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index bdcb5e751c2a8..b3811eb644dc5 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -321,7 +321,7 @@ cdef class _Timestamp(ABCTimestamp): def _from_dt64(cls, dt64: np.datetime64): # construct a Timestamp from a np.datetime64 object, keeping the # resolution of the input. - # This is herely mainly so we can incrementally implement non-nano + # This is here mainly so we can incrementally implement non-nano # (e.g. only tznaive at first) cdef: int64_t value @@ -1359,7 +1359,7 @@ cdef class _Timestamp(ABCTimestamp): def as_unit(self, str unit, bint round_ok=True): """ - Convert the underlying int64 representaton to the given unit. + Convert the underlying int64 representation to the given unit. Parameters ---------- diff --git a/pandas/_typing.py b/pandas/_typing.py index d43e6e900546d..c1769126a5776 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -429,7 +429,7 @@ def closed(self) -> bool: SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"] NaPosition = Literal["first", "last"] -# Arguments for nsmalles and n_largest +# Arguments for nsmallest and nlargest NsmallestNlargestKeep = Literal["first", "last", "all"] # quantile interpolation @@ -524,7 +524,7 @@ def closed(self) -> bool: None, ] -# maintaine the sub-type of any hashable sequence +# maintain the sub-type of any hashable sequence SequenceT = TypeVar("SequenceT", bound=Sequence[Hashable]) SliceType = Optional[Hashable] diff --git a/pandas/_version.py b/pandas/_version.py index b32c9e67fdbb6..c5e3c16d3f773 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -1,5 +1,5 @@ # This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag +# git-archive tarball (such as those provided by github's download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 03c73489bd3d8..f70bb0743aa0f 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -522,7 +522,7 @@ def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwar # so calls DataFrame.min (without ever getting here) with the np.min # default of axis=None, which DataFrame.min catches and changes to axis=0. # np.minimum.reduce(df) gets here bc axis is not in kwargs, - # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values) + # so we set axis=0 to match the behavior of np.minimum.reduce(df.values) kwargs["axis"] = 0 # By default, numpy's reductions do not skip NaNs, so we have to diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 2124f86b03b9c..536c7303a2f92 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1784,7 +1784,7 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike: -------- This gives view on the underlying data of an ``ExtensionArray`` and is not a copy. Modifications on either the view or the original ``ExtensionArray`` - will be reflectd on the underlying data: + will be reflected on the underlying data: >>> arr = pd.array([1, 2, 3]) >>> arr2 = arr.view() diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index cfc892b9e3648..e552f1960bf8c 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -257,7 +257,7 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: result = pc.is_in( self._pa_array, value_set=pa.array(value_set, type=self._pa_array.type) ) - # pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls + # pyarrow 2.0.0 returned nulls, so we explicitly specify dtype to convert nulls # to False return np.array(result, dtype=np.bool_) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3394bf091e228..6ba07b1761557 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1378,7 +1378,7 @@ def common_dtype_categorical_compat( # TODO: more generally, could do `not can_hold_na(dtype)` if lib.is_np_dtype(dtype, "iu"): for obj in objs: - # We don't want to accientally allow e.g. "categorical" str here + # We don't want to accidentally allow e.g. "categorical" str here obj_dtype = getattr(obj, "dtype", None) if isinstance(obj_dtype, CategoricalDtype): if isinstance(obj, ABCIndex): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b84fb33af26e5..f47acf579d79c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2199,7 +2199,7 @@ def maybe_reorder( ) -> tuple[list[ArrayLike], Index, Index | None]: """ If our desired 'columns' do not match the data's pre-existing 'arr_columns', - we re-order our arrays. This is like a pre-emptive (cheap) reindex. + we re-order our arrays. This is like a preemptive (cheap) reindex. """ if len(arrays): length = len(arrays[0]) @@ -4484,7 +4484,7 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No You can refer to column names that are not valid Python variable names by surrounding them in backticks. Thus, column names containing spaces - or punctuations (besides underscores) or starting with digits must be + or punctuation (besides underscores) or starting with digits must be surrounded by backticks. (For example, a column named "Area (cm^2)" would be referenced as ```Area (cm^2)```). Column names which are Python keywords (like "if", "for", "import", etc) cannot be used. @@ -12360,7 +12360,7 @@ def std( -------- Series.std : Return standard deviation over Series values. DataFrame.mean : Return the mean of the values over the requested axis. - DataFrame.mediam : Return the mediam of the values over the requested axis. + DataFrame.median : Return the median of the values over the requested axis. DataFrame.mode : Get the mode(s) of each element along the requested axis. DataFrame.sum : Return the sum of the values over the requested axis. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3109b67a4fc43..dce462f3eabb1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6994,7 +6994,7 @@ def fillna( f'you passed a "{type(value).__name__}"' ) - # set the default here, so functions examining the signaure + # set the default here, so functions examining the signature # can detect if something was set (e.g. in groupby) (GH9221) if axis is None: axis = 0 diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c8dbea1fd39ea..582e1f96fa562 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2943,7 +2943,7 @@ def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index """ With mismatched timezones, cast both to UTC. """ - # Caller is responsibelf or checking + # Caller is responsible for checking # `self.dtype != other.dtype` if ( isinstance(self, ABCDatetimeIndex) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index dfb96162f0ac1..dced92ba04520 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1478,7 +1478,7 @@ def round(self, decimals: int) -> Self: """ Rounds the values. If the block is not of an integer or float dtype, nothing happens. - This is consistent with DataFrame.round behavivor. + This is consistent with DataFrame.round behavior. (Note: Series.round would raise) Parameters diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 535397871588c..07465e7b87fcd 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -621,7 +621,7 @@ def reorder_arrays( arrays: list[ArrayLike], arr_columns: Index, columns: Index | None, length: int ) -> tuple[list[ArrayLike], Index]: """ - Pre-emptively (cheaply) reindex arrays with new columns. + Preemptively (cheaply) reindex arrays with new columns. """ # reorder according to the columns if columns is not None: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index bade5fd1bdcf2..aa4a785519051 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1362,7 +1362,7 @@ def _iset_split_block( """Removes columns from a block by splitting the block. Avoids copying the whole block through slicing and updates the manager - after determinint the new block structure. Optionally adds a new block, + after determining the new block structure. Optionally adds a new block, otherwise has to be done by the caller. Parameters diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index c005a1ce26e4b..cfe83111b6e38 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -201,7 +201,7 @@ def concat( be very expensive relative to the actual data concatenation. sort : bool, default False Sort non-concatenation axis. One exception to this is when the - non-concatentation axis is a DatetimeIndex and join='outer' and the axis is + non-concatenation axis is a DatetimeIndex and join='outer' and the axis is not already aligned. In that case, the non-concatenation axis is always sorted lexicographically. copy : bool, default False diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index c8863e1b39c94..6d571031636b5 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -378,7 +378,7 @@ class InvalidIndexError(Exception): class DataError(Exception): """ - Exceptionn raised when performing an operation on non-numerical data. + Exception raised when performing an operation on non-numerical data. For example, calling ``ohlc`` on a non-numerical column or a function on a rolling window. diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 82bc0301fed3a..6e5ae09485951 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -342,7 +342,7 @@ def concat(self, other: Styler) -> Styler: keys ``data``, ``row_heading`` and ``row`` will be prepended with ``foot0_``. If more concats are chained, their styles will be prepended with ``foot1_``, ''foot_2'', etc., and if a concatenated style have - another concatanated style, the second style will be prepended with + another concatenated style, the second style will be prepended with ``foot{parent}_foot{child}_``. A common use case is to concatenate user defined functions with diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index ec718f2a1276f..8a6383f7e8f82 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1064,7 +1064,7 @@ def format( .. warning:: `Styler.format` is ignored when using the output format `Styler.to_excel`, - since Excel and Python have inherrently different formatting structures. + since Excel and Python have inherently different formatting structures. However, it is possible to use the `number-format` pseudo CSS attribute to force Excel permissible formatting. See examples. @@ -1312,7 +1312,7 @@ def format_index( .. warning:: `Styler.format_index` is ignored when using the output format - `Styler.to_excel`, since Excel and Python have inherrently different + `Styler.to_excel`, since Excel and Python have inherently different formatting structures. However, it is possible to use the `number-format` pseudo CSS attribute to force Excel permissible formatting. See documentation for `Styler.format`. @@ -1649,7 +1649,7 @@ def format_index_names( .. warning:: `Styler.format_index_names` is ignored when using the output format - `Styler.to_excel`, since Excel and Python have inherrently different + `Styler.to_excel`, since Excel and Python have inherently different formatting structures. Examples @@ -2410,7 +2410,7 @@ def _parse_latex_header_span( r""" Refactor the cell `display_value` if a 'colspan' or 'rowspan' attribute is present. - 'rowspan' and 'colspan' do not occur simultaneouly. If they are detected then + 'rowspan' and 'colspan' do not occur simultaneously. If they are detected then the `display_value` is altered to a LaTeX `multirow` or `multicol` command respectively, with the appropriate cell-span. diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 618254fee9259..be7b8dc6640ba 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3580,7 +3580,7 @@ def is_transposed(self) -> bool: @property def data_orientation(self) -> tuple[int, ...]: - """return a tuple of my permutated axes, non_indexable at the front""" + """return a tuple of my permuted axes, non_indexable at the front""" return tuple( itertools.chain( [int(a[0]) for a in self.non_index_axes], diff --git a/pandas/tests/apply/test_numba.py b/pandas/tests/apply/test_numba.py index 6bbe5100e8826..d86eeadbaa0fe 100644 --- a/pandas/tests/apply/test_numba.py +++ b/pandas/tests/apply/test_numba.py @@ -73,7 +73,7 @@ def test_numba_vs_python_reductions(reduction, apply_axis): @pytest.mark.parametrize("colnames", [[1, 2, 3], [1.0, 2.0, 3.0]]) def test_numba_numeric_colnames(colnames): - # Check that numeric column names lower properly and can be indxed on + # Check that numeric column names lower properly and can be indexed on df = DataFrame( np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.int64), columns=colnames ) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index b1d7c701e1267..da444b55490f0 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1925,7 +1925,7 @@ def test_is_scalar_pandas_containers(self): assert not is_scalar(pd.array([1, 2, 3])) def test_is_scalar_number(self): - # Number() is not recognied by PyNumber_Check, so by extension + # Number() is not recognized by PyNumber_Check, so by extension # is not recognized by is_scalar, but instances of non-abstract # subclasses are. diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index c7b768f6e3c88..38478ed3c40ae 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -85,7 +85,7 @@ def test_str(self, dtype): def test_eq(self, dtype): assert dtype == dtype.name - assert dtype != "anonther_type" + assert dtype != "another_type" def test_construct_from_string_own_name(self, dtype): result = dtype.construct_from_string(dtype.name) diff --git a/pandas/tests/extension/base/io.py b/pandas/tests/extension/base/io.py index 3a6f2eb5ba8b1..f7367fceeb52f 100644 --- a/pandas/tests/extension/base/io.py +++ b/pandas/tests/extension/base/io.py @@ -13,7 +13,7 @@ class BaseParsingTests: def test_EA_types(self, engine, data, request): if isinstance(data.dtype, pd.CategoricalDtype): # in parsers.pyx _convert_with_dtype there is special-casing for - # Categorical that pre-empts _from_sequence_of_strings + # Categorical that preempts _from_sequence_of_strings pass elif isinstance(data.dtype, pd.core.dtypes.dtypes.NumpyEADtype): # These get unwrapped internally so are treated as numpy dtypes diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index 4e8e267523439..4b1435babe6b1 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -396,7 +396,7 @@ def test_rank_inf_and_nan(self, contents, dtype, frame_or_series): # Insert nans at random positions if underlying dtype has missing # value. Then adjust the expected order by adding nans accordingly # This is for testing whether rank calculation is affected - # when values are interwined with nan values. + # when values are intertwined with nan values. values = np.array(contents, dtype=dtype) exp_order = np.array(range(len(values)), dtype="float64") + 1.0 if dtype in dtype_na_map: diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index f8219e68a72da..3fb994f2e0aff 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -87,7 +87,7 @@ def test_tab_completion(self): assert isinstance(df.__getitem__("A"), DataFrame) def test_display_max_dir_items(self): - # display.max_dir_items increaes the number of columns that are in __dir__. + # display.max_dir_items increases the number of columns that are in __dir__. columns = ["a" + str(i) for i in range(420)] values = [range(420), range(420)] df = DataFrame(values, columns=columns) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index 3a7c418b27de6..bac849301d1f7 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -1,5 +1,5 @@ # Arithmetic tests specific to DatetimeIndex are generally about `freq` -# rentention or inference. Other arithmetic tests belong in +# retention or inference. Other arithmetic tests belong in # tests/arithmetic/test_datetime64.py import pytest diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 8da88b97f9ea8..c418b2a18008b 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -122,7 +122,7 @@ def test_dti_with_period_data_raises(self): to_datetime(period_array(data)) def test_dti_with_timedelta64_data_raises(self): - # GH#23675 deprecated, enforrced in GH#29794 + # GH#23675 deprecated, enforced in GH#29794 data = np.array([0], dtype="m8[ns]") msg = r"timedelta64\[ns\] cannot be converted to datetime64" with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index ef8c0e432ca49..f7ada06e3ecb2 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -676,7 +676,7 @@ def run_tests(df, rhs, right_loc, right_iloc): cols = ["jim", "joe", "jolie", "joline"] df = DataFrame(xs, columns=cols, index=list("abcde"), dtype="int64") - # right hand side; permute the indices and multiplpy by -2 + # right hand side; permute the indices and multiply by -2 rhs = -2 * df.iloc[3:0:-1, 2:0:-1] # expected `right` result; just multiply by -2 diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index 296fb20d855c4..490bd45bfb2ee 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -225,8 +225,8 @@ def test_background_gradient_gmap_dataframe_align(styler_blank, gmap, subset, ex @pytest.mark.parametrize( "gmap, axis, exp_gmap", [ - (Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]), # revrse the index - (Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]), # revrse the cols + (Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]), # reverse the index + (Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]), # reverse the cols (Series([1, 2, 3], index=["X", "Y", "Z"]), 0, [[1, 1], [2, 2]]), # add idx (Series([1, 2, 3], index=["A", "B", "C"]), 1, [[1, 2], [1, 2]]), # add col ], diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index c6efbd8059138..d333aef723de2 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -538,7 +538,7 @@ def test_mangles_multi_index(all_parsers, data, expected): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # TypeError: an integer is requireds +@xfail_pyarrow # TypeError: an integer is required @pytest.mark.parametrize("index_col", [None, [0]]) @pytest.mark.parametrize( "columns", [None, (["", "Unnamed"]), (["Unnamed", ""]), (["Unnamed", "NotUnnamed"])] @@ -670,7 +670,7 @@ def test_header_none_and_on_bad_lines_skip(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # TypeError: an integer is requireds +@xfail_pyarrow # TypeError: an integer is required def test_header_missing_rows(all_parsers): # GH#47400 parser = all_parsers diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 62cc33376c630..50fef2c5eb4eb 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -33,7 +33,7 @@ # [X] - KeyError: "...is not included in namespaces" # [X] - KeyError: "no valid column" # [X] - ValueError: "To use stylesheet, you need lxml installed..." -# [] - OSError: (NEED PERMISSOIN ISSUE, DISK FULL, ETC.) +# [] - OSError: (NEED PERMISSION ISSUE, DISK FULL, ETC.) # [X] - FileNotFoundError: "No such file or directory" # [X] - PermissionError: "Forbidden" @@ -41,7 +41,7 @@ # [X] - TypeError: "...is not a valid type for attr_cols" # [X] - TypeError: "...is not a valid type for elem_cols" # [X] - LookupError: "unknown encoding" -# [] - OSError: (NEED PERMISSOIN ISSUE, DISK FULL, ETC.) +# [] - OSError: (NEED PERMISSION ISSUE, DISK FULL, ETC.) # [X] - FileNotFoundError: "No such file or directory" # [X] - KeyError: "...is not included in namespaces" # [X] - KeyError: "no valid column" diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index a98f4b56ebf4d..b44725a01fe23 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -327,7 +327,7 @@ def test_subplots_multiple_axes_error(self): def test_subplots_multiple_axes_2_dim(self, layout, exp_layout): # GH 5353, 6970, GH 7069 # pass 2-dim axes and invalid layout - # invalid lauout should not affect to input and return value + # invalid layout should not affect to input and return value # (show warning is tested in # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes _, axes = mpl.pyplot.subplots(2, 2) diff --git a/pandas/tests/test_aggregation.py b/pandas/tests/test_aggregation.py index 7695c953712ed..3a01805cc2365 100644 --- a/pandas/tests/test_aggregation.py +++ b/pandas/tests/test_aggregation.py @@ -10,7 +10,7 @@ def test_maybe_mangle_lambdas_passthrough(): assert maybe_mangle_lambdas("mean") == "mean" assert maybe_mangle_lambdas(lambda x: x).__name__ == "" - # don't mangel single lambda. + # don't mangle single lambda. assert maybe_mangle_lambdas([lambda x: x])[0].__name__ == "" diff --git a/web/pandas/community/blog/2019-user-survey.md b/web/pandas/community/blog/2019-user-survey.md index 312ee49bdf387..821fdd01acf65 100644 --- a/web/pandas/community/blog/2019-user-survey.md +++ b/web/pandas/community/blog/2019-user-survey.md @@ -77,7 +77,7 @@ For environment isolation, [conda](https://conda.io/en/latest/) was the most pop ![png]({{ base_url }}/static/img/blog/2019-user-survey/2019_13_0.png) -Most repondents are Python 3 only. +Most respondents are Python 3 only. diff --git a/web/pandas/community/ecosystem.md b/web/pandas/community/ecosystem.md index c14996211bb8b..73a3cb6429790 100644 --- a/web/pandas/community/ecosystem.md +++ b/web/pandas/community/ecosystem.md @@ -587,7 +587,7 @@ See installation and usage instructions on the [GitHub page](https://github.com/ Hamilton is a declarative dataflow framework that came out of Stitch Fix. It was designed to help one manage a Pandas code base, specifically with respect to feature engineering for machine learning models. -It prescibes an opinionated paradigm, that ensures all code is: +It prescribes an opinionated paradigm, that ensures all code is: * unit testable * integration testing friendly diff --git a/web/pandas/pdeps/0010-required-pyarrow-dependency.md b/web/pandas/pdeps/0010-required-pyarrow-dependency.md index 4d6e928ce68bd..d586c46e243f8 100644 --- a/web/pandas/pdeps/0010-required-pyarrow-dependency.md +++ b/web/pandas/pdeps/0010-required-pyarrow-dependency.md @@ -40,7 +40,7 @@ PyArrow is an optional dependency of pandas that provides a wide range of supple data types within the `ExtensionArray` interface - Since pandas 2.0.0, all I/O readers have the option to return PyArrow-backed data types, and many methods now utilize PyArrow compute functions to -accelerate PyArrow-backed data in pandas, notibly string and datetime types. +accelerate PyArrow-backed data in pandas, notably string and datetime types. As of pandas 2.0, one can feasibly utilize PyArrow as an alternative data representation to NumPy with advantages such as: @@ -117,7 +117,7 @@ In[4]: %timeit ser_string.str.startswith("a") ### Immediate User Benefit 2: Nested Datatypes -Currently, if you try storing `dict`s in a pandas `Series`, you will again get the horrendeous `object` dtype: +Currently, if you try storing `dict`s in a pandas `Series`, you will again get the horrendous `object` dtype: ```python In [6]: pd.Series([{'a': 1, 'b': 2}, {'a': 2, 'b': 99}]) Out[6]: diff --git a/web/pandas/pdeps/0012-compact-and-reversible-JSON-interface.md b/web/pandas/pdeps/0012-compact-and-reversible-JSON-interface.md index f49193462a44a..71f669825f979 100644 --- a/web/pandas/pdeps/0012-compact-and-reversible-JSON-interface.md +++ b/web/pandas/pdeps/0012-compact-and-reversible-JSON-interface.md @@ -448,7 +448,7 @@ To conclude, ## Core team decision -Vote was open from september-11 to setpember-26: +Vote was open from september-11 to september-26: - Final tally is 0 approvals, 5 abstentions, 7 disapprove. The quorum has been met. The PDEP fails.