From 9b633dc39aa32c101538f15baf296f77a970a41c Mon Sep 17 00:00:00 2001
From: co63oc <co63oc@users.noreply.github.com>
Date: Wed, 24 Jan 2024 11:56:45 +0800
Subject: [PATCH]  Fix invovled involved (#61063)

---
 python/paddle/incubate/autograd/functional.py  | 18 +++++++++---------
 .../fleet/parameter_server/ir/pserver_pass.py  |  2 +-
 .../distributed/utils/io/save_for_auto.py      |  2 +-
 .../nn/functional/fused_gate_attention.py      |  2 +-
 python/paddle/incubate/operators/unzip.py      |  2 +-
 .../optimizer/functional/line_search.py        |  2 +-
 .../incubate/optimizer/line_search_dygraph.py  |  4 ++--
 python/paddle/incubate/optimizer/lookahead.py  |  4 ++--
 python/paddle/incubate/tensor/manipulation.py  |  2 +-
 python/paddle/io/dataloader/dataloader_iter.py |  4 ++--
 10 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/python/paddle/incubate/autograd/functional.py b/python/paddle/incubate/autograd/functional.py
index 9490a10e1ec8d0..6348b18083961d 100644
--- a/python/paddle/incubate/autograd/functional.py
+++ b/python/paddle/incubate/autograd/functional.py
@@ -31,7 +31,7 @@ def vjp(func, xs, v=None):
             returns a sequence of Tensors or a Tensor.
         xs(Tensor|Sequence[Tensor]): Used as positional arguments to evaluate
             ``func``. ``xs`` is accepted as one Tensor or a sequence of Tensors.
-        v(Tensor|Sequence[Tensor]|None, optional): The cotangent vector invovled
+        v(Tensor|Sequence[Tensor]|None, optional): The cotangent vector involved
             in the VJP computation. ``v`` matches the size and shape of
             ``func`` 's output. Defaults to None, which is equivalent to all
             ones the same size of ``func`` 's output.
@@ -67,8 +67,8 @@ def vjp(func, xs, v=None):
     """
     _check_inputs(func, xs, v)
 
-    # ``_seprate`` breaks the dependencies between ``xs`` and other
-    # variables. See more ``_seprate`` .
+    # ``_separate`` breaks the dependencies between ``xs`` and other
+    # variables. See more ``_separate`` .
     if framework.in_dygraph_mode() or not utils.prim_enabled():
         xs, v = _separate(xs), _separate(v)
     ys = func(*xs) if isinstance(xs, typing.Sequence) else func(xs)
@@ -91,7 +91,7 @@ def jvp(func, xs, v=None):
         xs(Tensor|Sequence[Tensor]): Used as positional arguments to
             evaluate ``func``.  The ``xs`` is accepted as one Tensor or a
             Sequence of Tensors.
-        v(Tensor|Sequence[Tensor]|None, Optional): The tangent vector invovled
+        v(Tensor|Sequence[Tensor]|None, Optional): The tangent vector involved
             in the JVP computation. The ``v`` matches the size and shape of
             ``xs`` . Default value is None and in this case is equivalent to
             all ones the same size of ``xs`` .
@@ -127,8 +127,8 @@ def jvp(func, xs, v=None):
 
     """
     _check_inputs(func, xs, v)
-    # ``_seprate`` breaks the dependencies between ``xs`` and other
-    # variables. See more ``_seprate`` .
+    # ``_separate`` breaks the dependencies between ``xs`` and other
+    # variables. See more ``_separate`` .
     if framework.in_dygraph_mode() or not utils.prim_enabled():
         xs, v = _separate(xs), _separate(v)
     ys = func(*xs) if isinstance(xs, typing.Sequence) else func(xs)
@@ -153,7 +153,7 @@ def _double_backward_trick(ys, xs, v):
 
 def _zeros_like_with_grad(xs):
     """Create a zero or zeros sequence Tensor like ``xs`` with a flag
-    ``stop_graident=False`` .
+    ``stop_gradient=False`` .
     """
     if not isinstance(xs, typing.Sequence):
         ys = paddle.zeros_like(xs)
@@ -309,7 +309,7 @@ def _jac_func(*xs):
                 not is_batched and jac.shape[0] != 1
             ):
                 raise RuntimeError(
-                    "The function given to Hessian shoud return as single element Tensor or batched single element Tensor."
+                    "The function given to Hessian should return as single element Tensor or batched single element Tensor."
                 )
             return jac[:, 0, :] if is_batched else jac[0, :]
 
@@ -485,7 +485,7 @@ def _multi_index(indexes, shape):
 
     Currently supporting following input format:
         * ([positive|negative|slice], ...), the right-most elements can be
-            omited.
+            omitted.
 
     The standard format after converted is slice tuple which contains N elements:
         * ([positive|slice], ..., [positive|slice])
diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/ir/pserver_pass.py b/python/paddle/incubate/distributed/fleet/parameter_server/ir/pserver_pass.py
index 32c4b3398b4b25..01b3a3bd53b927 100644
--- a/python/paddle/incubate/distributed/fleet/parameter_server/ir/pserver_pass.py
+++ b/python/paddle/incubate/distributed/fleet/parameter_server/ir/pserver_pass.py
@@ -896,7 +896,7 @@ def add_large_scale_op(
             entry_attr = get_entry_attr(param)
 
             if fuse:
-                # remove origin optimzier op
+                # remove origin optimizer op
                 opt_block._remove_op(opt_idx)
 
             # training/infer
diff --git a/python/paddle/incubate/distributed/utils/io/save_for_auto.py b/python/paddle/incubate/distributed/utils/io/save_for_auto.py
index 9f7f88852e197d..2999ae5b2dd9ba 100644
--- a/python/paddle/incubate/distributed/utils/io/save_for_auto.py
+++ b/python/paddle/incubate/distributed/utils/io/save_for_auto.py
@@ -121,7 +121,7 @@ def _save_param_attr(state_dict_, path, dims_mapping_dict=None):
         save params' attr dict
     Args:
         state_dict_:
-            state for which to save attrs, when the state is optimzier state, the master and LRScheduler will be reomoved.
+            state for which to save attrs, when the state is optimizer state, the master and LRScheduler will be removed.
         path:
             path to save
         dims_mapping_dict:
diff --git a/python/paddle/incubate/nn/functional/fused_gate_attention.py b/python/paddle/incubate/nn/functional/fused_gate_attention.py
index 560d6717fda912..1c2c89ea634704 100644
--- a/python/paddle/incubate/nn/functional/fused_gate_attention.py
+++ b/python/paddle/incubate/nn/functional/fused_gate_attention.py
@@ -34,7 +34,7 @@ def fused_gate_attention(
     use_flash_attn=False,
 ):
     r"""
-    Attention mapps queries and a set of key-value pairs to outputs, and
+    Attention maps queries and a set of key-value pairs to outputs, and
     Gate Attention performs multiple parallel attention to jointly attending
     to information from different representation subspaces. This API only
     support self_attention. The pseudo code is as follows:
diff --git a/python/paddle/incubate/operators/unzip.py b/python/paddle/incubate/operators/unzip.py
index 3eb33804153052..ba1e16babaaeb0 100644
--- a/python/paddle/incubate/operators/unzip.py
+++ b/python/paddle/incubate/operators/unzip.py
@@ -21,7 +21,7 @@ def unzip(input, lod, len):
 
     **unzip layers**
 
-    unzip 'input' accroding to 'lod'
+    unzip 'input' according to 'lod'
 
     Args:
         input (Variable): The zipped input
diff --git a/python/paddle/incubate/optimizer/functional/line_search.py b/python/paddle/incubate/optimizer/functional/line_search.py
index 9fb30855ff988f..65ebbafccae713 100644
--- a/python/paddle/incubate/optimizer/functional/line_search.py
+++ b/python/paddle/incubate/optimizer/functional/line_search.py
@@ -30,7 +30,7 @@ def cubic_interpolation_(x1, f1, g1, x2, f2, g2):
         x1, f1, g1: point1's position, value and gradient.
         x2, f2, g2: point2's position, value and gradient.
     Returns:
-        min_pos: the minimun point between the specified points in the cubic curve.
+        min_pos: the minimum point between the specified points in the cubic curve.
     """
     xmin, xmax = paddle.static.nn.cond(
         x1 <= x2, lambda: (x1, x2), lambda: (x2, x1)
diff --git a/python/paddle/incubate/optimizer/line_search_dygraph.py b/python/paddle/incubate/optimizer/line_search_dygraph.py
index 12bc62dfab6eb8..5d3aaa2c18b2a9 100644
--- a/python/paddle/incubate/optimizer/line_search_dygraph.py
+++ b/python/paddle/incubate/optimizer/line_search_dygraph.py
@@ -17,7 +17,7 @@
 
 def _cubic_interpolate(x1, f1, g1, x2, f2, g2, bounds=None):
     r"""Cubic interpolation between (x1, f1, g1) and (x2, f2, g2).
-        Use two points and their gradient to determine a cubic function and get the minimun point
+        Use two points and their gradient to determine a cubic function and get the minimum point
         between them in the cubic curve.
 
     Reference:
@@ -30,7 +30,7 @@ def _cubic_interpolate(x1, f1, g1, x2, f2, g2, bounds=None):
         bounds: bounds of interpolation area
 
     Returns:
-        min_pos: the minimun point between the specified points in the cubic curve.
+        min_pos: the minimum point between the specified points in the cubic curve.
     """
     # Compute bounds of interpolation area
     if bounds is not None:
diff --git a/python/paddle/incubate/optimizer/lookahead.py b/python/paddle/incubate/optimizer/lookahead.py
index 12cb00ba7a3ff0..a06b2e7d2b5ef7 100644
--- a/python/paddle/incubate/optimizer/lookahead.py
+++ b/python/paddle/incubate/optimizer/lookahead.py
@@ -42,8 +42,8 @@ class LookAhead(Optimizer):
 
     Args:
         inner_optimizer (Optimizer): The optimizer that update fast params step by step.
-        alpha (float, optinal): The learning rate of Lookahead. The default value is 0.5.
-        k (int, optinal): The slow params is updated every k steps. The default value is 5.
+        alpha (float, optional): The learning rate of Lookahead. The default value is 0.5.
+        k (int, optional): The slow params is updated every k steps. The default value is 5.
         name (str, optional): Normally there is no need for user to set this property.
             For more information, please refer to :ref:`api_guide_Name`.
             The default value is None.
diff --git a/python/paddle/incubate/tensor/manipulation.py b/python/paddle/incubate/tensor/manipulation.py
index a1615fd2b8d209..8afd98e42258ae 100644
--- a/python/paddle/incubate/tensor/manipulation.py
+++ b/python/paddle/incubate/tensor/manipulation.py
@@ -25,7 +25,7 @@
 def _npu_identity(x, format=-1):
     """
 
-    This OP takes in the Tensor :attr:`x` and change it to ouptut with
+    This OP takes in the Tensor :attr:`x` and change it to output with
     aclFormat with int value. This API is only used for Ascend NPU.
 
     Args:
diff --git a/python/paddle/io/dataloader/dataloader_iter.py b/python/paddle/io/dataloader/dataloader_iter.py
index d8ba4a7685d307..9249175bdd54cc 100644
--- a/python/paddle/io/dataloader/dataloader_iter.py
+++ b/python/paddle/io/dataloader/dataloader_iter.py
@@ -49,11 +49,11 @@
 # NOTE: fix `terminate called without an active exception`
 # if for loop break and program exit immediately(with no model
 # layers processing) after iterate **the first few data** in
-# distributed lauch mode, distributed launch will call
+# distributed launch mode, distributed launch will call
 # terminate() to kill main process on each devices, but thread
 # is still iterating to fullfill blocking queue caches, which
 # may cause thread error `terminate called without an active
-# exception` for terminate is a strong singal and `__del__`
+# exception` for terminate is a strong signal and `__del__`
 # of DataLoader may not be called, so we add a global link to
 # the last DataLoader instance to call `__del__` to clean up
 # resources